7 from python_speech_features
import mfcc
, logfbank
8 import scipy
.io
.wavfile
as wav
12 from keras
.models
import Sequential
13 from keras
.layers
import Dense
, Dropout
# , Activation
14 from keras
import backend
22 files
= glob
.glob(os
.path
.join(os
.getcwd(), 'textgrid', '*.TextGrid'))
23 # Loop over all datafiles and make wavefile string
24 for i
, tg
in enumerate(files
):
25 num
= re
.match('^.*/(\\d+).TextGrid$', tg
).group(1)
26 yield (tg
, 'wav/{:02d}.wav'.format(int(num
)))
28 def label_from_annotation(ann
):
29 return 0 if ann
.strip() == '' else 1
31 def features_from_wav(tg
, wavp
, typ
='mfcc', winlen
=0.025, winstep
=0.01):
33 tgob
= pympi
.TextGrid(tg
)
34 intervalit
= tgob
.get_tier('lyrics').get_intervals(sort
=True)
36 (rate
, sig
) = wav
.read(wavp
, mmap
=True)
39 data
= mfcc(sig
, rate
, winlen
=winlen
, winstep
=winstep
, numcep
=13,
42 (data
, energy
) = logfbank(sig
, rate
, winlen
=winlen
, winstep
=winstep
, nfilt
=26)
44 raise ValueError("No such type")
46 (s
, e
, v
) = next(intervalit
)
48 label
= label_from_annotation(v
)
49 labels
= np
.empty(data
.shape
[0], dtype
=int)
52 # If we exceeded the interval, make new one
54 (s
, e
, v
) = next(intervalit
, (s
, e
, v
))
55 label
= label_from_annotation(v
)
61 currentframe
+= winstep
65 def run(typ
, winlen
, winstep
, modelfun
, modelname
):
69 for tg
, wavp
in get_datafiles():
70 (d
, l
) = features_from_wav(
71 tg
, wavp
, winlen
=winlen
, winstep
=winstep
, typ
=typ
)
75 datas
= np
.concatenate(datas
)
76 labels
= np
.concatenate(labels
)
78 rng_state
= np
.random
.get_state()
79 np
.random
.shuffle(datas
)
80 np
.random
.set_state(rng_state
)
81 np
.random
.shuffle(labels
)
83 splitindex
= int(labels
.shape
[0]*testset
)
84 testdata
, traindata
= datas
[:splitindex
], datas
[splitindex
:]
85 testlabels
, trainlabels
= labels
[:splitindex
], labels
[splitindex
:]
88 model
= modelfun(traindata
)
91 model
.fit(traindata
, trainlabels
, epochs
=10, batch_size
=32, shuffle
=False,
95 loss
, acc
= model
.evaluate(testdata
, testlabels
, batch_size
=32,
97 print('{}\t{}\t{}\t{}\t{}\n'.format(
98 winlen
, winstep
, modelname
, loss
, acc
))
104 Dense(d
.shape
[1]*2, input_shape
=(d
.shape
[1],), activation
='relu'))
105 model
.add(Dense(100, activation
='relu'))
106 model
.add(Dense(1, activation
='sigmoid'))
107 model
.compile(optimizer
='rmsprop',
108 loss
='binary_crossentropy',
109 metrics
=['accuracy'])
115 Dense(d
.shape
[1]*2, input_shape
=(d
.shape
[1],), activation
='relu'))
116 model
.add(Dense(13, activation
='relu'))
117 model
.add(Dense(1, activation
='sigmoid'))
118 model
.compile(optimizer
='rmsprop',
119 loss
='binary_crossentropy',
120 metrics
=['accuracy'])
123 if __name__
== '__main__':
124 print('winlen\twinstep\tmodel\tloss\taccuracy\n')
125 with backend
.get_session():
126 for winlen
, winstep
in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
127 for name
, model
in (('simple', simplemodel
), ('bottle', bottlemodel
)):
128 m
= run('mfcc', winlen
, winstep
, model
, name
)
129 fproot
= 'model_{}_{}_{}'.format(winlen
, winstep
, name
)
130 with
open('{}.json'.format(fproot
), 'w') as f
:
132 m
.save_weights('{}.hdf5'.format(fproot
))