7 from python_speech_features
import mfcc
, logfbank
8 import scipy
.io
.wavfile
as wav
12 from keras
.models
import Sequential
13 from keras
.layers
import Dense
, Dropout
# , Activation
14 from keras
import backend
, utils
22 files
= glob
.glob(os
.path
.join(os
.getcwd(), 'textgrid', '*.TextGrid'))
23 # Loop over all datafiles and make wavefile string
24 for i
, tg
in enumerate(files
):
25 num
= re
.match('^.*/(\\d+).TextGrid$', tg
).group(1)
26 yield (tg
, 'wav/{:02d}.wav'.format(int(num
)), int(num
))
28 def label_from_annotation(ann
):
29 return 0 if ann
.strip() == '' else 1
31 def features_from_wav(tg
, wavp
, typ
='mfcc', winlen
=0.025, winstep
=0.01):
33 tgob
= pympi
.TextGrid(tg
)
34 intervalit
= tgob
.get_tier('lyrics').get_intervals(sort
=True)
36 (rate
, sig
) = wav
.read(wavp
, mmap
=True)
39 data
= mfcc(sig
, rate
, winlen
=winlen
, winstep
=winstep
, numcep
=13,
42 (data
, energy
) = logfbank(sig
, rate
, winlen
=winlen
, winstep
=winstep
, nfilt
=26)
44 raise ValueError("No such type")
46 (s
, e
, v
) = next(intervalit
)
48 label
= label_from_annotation(v
)
49 labels
= np
.empty(data
.shape
[0], dtype
=int)
52 # If we exceeded the interval, make new one
54 (s
, e
, v
) = next(intervalit
, (s
, e
, v
))
55 label
= label_from_annotation(v
)
61 currentframe
+= winstep
65 def singerfun(num
, l
):
74 raise Exception("halp")
78 def run(typ
, winlen
, winstep
, modelfun
, modelname
, multiclass
=False):
82 for tg
, wavp
, num
in get_datafiles():
83 (d
, l
) = features_from_wav(
84 tg
, wavp
, winlen
=winlen
, winstep
=winstep
, typ
=typ
)
87 labels
.append(list(map(lambda x
: singerfun(int(num
), x
), l
)))
92 datas
= np
.concatenate(datas
)
93 labels
= np
.concatenate(labels
)
94 print(np
.unique(labels
, return_counts
=True))
96 labels
= utils
.to_categorical(labels
, num_classes
=4)
99 rng_state
= np
.random
.get_state()
100 np
.random
.shuffle(datas
)
101 np
.random
.set_state(rng_state
)
102 np
.random
.shuffle(labels
)
104 splitindex
= int(labels
.shape
[0]*testset
)
105 testdata
, traindata
= datas
[:splitindex
], datas
[splitindex
:]
106 testlabels
, trainlabels
= labels
[:splitindex
], labels
[splitindex
:]
109 model
= modelfun(traindata
)
112 model
.fit(traindata
, trainlabels
, epochs
=10, batch_size
=32, shuffle
=False,
116 loss
, acc
= model
.evaluate(testdata
, testlabels
, batch_size
=32,
118 print('{}\t{}\t{}\t{}\t{}\n'.format(
119 winlen
, winstep
, modelname
, loss
, acc
))
124 model
.add(Dense(13, activation
='relu', input_shape
=(d
.shape
[1],)))
125 model
.add(Dense(1, activation
='sigmoid'))
127 # Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
128 # model.add(Dense(13, activation='relu'))
129 # model.add(Dense(1, activation='sigmoid'))
130 model
.compile(optimizer
='rmsprop',
131 loss
='binary_crossentropy',
132 metrics
=['accuracy'])
137 # model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
138 model
.add(Dense(13, activation
='relu', input_shape
=(d
.shape
[1],)))
139 model
.add(Dense(4, activation
='softmax'))
140 model
.compile(optimizer
='rmsprop',
141 loss
='categorical_crossentropy',
142 metrics
=['accuracy'])
146 if __name__
== '__main__':
147 print('winlen\twinstep\tmodel\tloss\taccuracy\n')
148 with backend
.get_session():
149 for winlen
, winstep
in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
150 for name
, model
, multi
in reversed((('bottle', bottlemodel
, False), ('multi', multimodel
, True))):
151 m
= run('mfcc', winlen
, winstep
, model
, name
, multi
)
152 fproot
= 'model_{}_{}_{}'.format(winlen
, winstep
, name
)
153 with
open('{}.json'.format(fproot
), 'w') as f
:
155 m
.save_weights('{}.hdf5'.format(fproot
))