7 from python_speech_features
import mfcc
, logfbank
8 import scipy
.io
.wavfile
as wav
12 from keras
.models
import Sequential
13 from keras
.layers
import Dense
# , Dropout # , Activation
14 from keras
import backend
, utils
23 files
= glob
.glob(os
.path
.join(os
.getcwd(), 'textgrid', '*.TextGrid'))
24 # Loop over all datafiles and make wavefile string
25 for i
, tg
in enumerate(files
):
26 num
= re
.match('^.*/(\\d+).TextGrid$', tg
).group(1)
27 yield (tg
, 'wav/{:02d}.wav'.format(int(num
)), int(num
))
30 def label_from_annotation(ann
):
31 return 0 if ann
.strip() == '' else 1
34 def features_from_wav(tg
, wavp
, typ
='mfcc', winlen
=0.025, winstep
=0.01):
36 tgob
= pympi
.TextGrid(tg
)
37 intervalit
= tgob
.get_tier('lyrics').get_intervals(sort
=True)
39 (rate
, sig
) = wav
.read(wavp
, mmap
=True)
42 data
= mfcc(sig
, rate
, winlen
=winlen
, winstep
=winstep
, numcep
=13,
45 (data
, energy
) = logfbank(
46 sig
, rate
, winlen
=winlen
, winstep
=winstep
, nfilt
=26)
48 raise ValueError("No such type")
50 (s
, e
, v
) = next(intervalit
)
52 label
= label_from_annotation(v
)
53 labels
= np
.empty(data
.shape
[0], dtype
=int)
56 # If we exceeded the interval, make new one
58 (s
, e
, v
) = next(intervalit
, (s
, e
, v
))
59 label
= label_from_annotation(v
)
65 currentframe
+= winstep
70 def singerfun(num
, l
):
79 raise Exception("halp")
84 def run(typ
, winlen
, winstep
, modelfun
, modelname
, multiclass
=False):
88 for tg
, wavp
, num
in get_datafiles():
89 (d
, l
) = features_from_wav(
90 tg
, wavp
, winlen
=winlen
, winstep
=winstep
, typ
=typ
)
93 labels
.append(list(map(lambda x
: singerfun(int(num
), x
), l
)))
97 datas
= np
.concatenate(datas
)
98 labels
= np
.concatenate(labels
)
99 print(np
.unique(labels
, return_counts
=True))
101 labels
= utils
.to_categorical(labels
, num_classes
=4)
103 rng_state
= np
.random
.get_state()
104 np
.random
.shuffle(datas
)
105 np
.random
.set_state(rng_state
)
106 np
.random
.shuffle(labels
)
108 splitindex
= int(labels
.shape
[0]*testset
)
109 testdata
, traindata
= datas
[:splitindex
], datas
[splitindex
:]
110 testlabels
, trainlabels
= labels
[:splitindex
], labels
[splitindex
:]
113 model
= modelfun(traindata
)
116 model
.fit(traindata
, trainlabels
, epochs
=10, batch_size
=32, shuffle
=False,
120 loss
, acc
= model
.evaluate(testdata
, testlabels
, batch_size
=32,
122 print('{}\t{}\t{}\t{}\t{}\n'.format(
123 winlen
, winstep
, modelname
, loss
, acc
))
127 def bottlemodel(layers
):
130 model
.add(Dense(layers
, activation
='relu', input_shape
=(d
.shape
[1],)))
131 model
.add(Dense(1, activation
='sigmoid'))
133 # Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
134 # model.add(Dense(13, activation='relu'))
135 # model.add(Dense(1, activation='sigmoid'))
136 model
.compile(optimizer
='rmsprop',
137 loss
='binary_crossentropy',
138 metrics
=['accuracy'])
143 def multimodel(layers
):
146 # model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
147 model
.add(Dense(layers
, activation
='relu', input_shape
=(d
.shape
[1],)))
148 model
.add(Dense(4, activation
='softmax'))
149 model
.compile(optimizer
='rmsprop',
150 loss
='categorical_crossentropy',
151 metrics
=['accuracy'])
157 ('bottle3', bottlemodel(3), False),
158 ('bottle5', bottlemodel(5), False),
159 ('bottle8', bottlemodel(8), False),
160 ('bottle13', bottlemodel(13), False),
161 ('multi3', multimodel(3), True),
162 ('multi5', multimodel(5), True),
163 ('multi8', multimodel(8), True),
164 ('multi13', multimodel(13), True)]
166 if __name__
== '__main__':
167 print('winlen\twinstep\tmodel\tloss\taccuracy\n')
168 with backend
.get_session():
169 for winlen
, winstep
in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
170 for name
, model
, multi
in models
:
171 m
= run('mfcc', winlen
, winstep
, model
, name
, multi
)
172 fproot
= 'model_{}_{}_{}'.format(winlen
, winstep
, name
)
174 with
open('{}.json'.format(fproot
), 'w') as f
:
176 m
.save_weights('{}.hdf5'.format(fproot
))