9 from python_speech_features
import mfcc
, fbank
, logfbank
10 import scipy
.io
.wavfile
as wav
14 from keras
.models
import Sequential
15 from keras
.layers
import Dense
, Dropout
# , Activation
23 files
= glob
.glob(os
.path
.join(os
.getcwd(), 'textgrid', '*.TextGrid'))
24 # Loop over all datafiles and make wavefile string
25 for i
, tg
in enumerate(files
):
26 num
= re
.match('^.*/(\\d+).TextGrid$', tg
).group(1)
27 yield (tg
, 'wav/{:02d}.wav'.format(int(num
)))
29 def label_from_annotation(ann
):
30 return 0 if ann
.strip() == '' else 1
32 def features_from_wav(tg
, wavp
, typ
='mfcc', winlen
=0.025, winstep
=0.01):
34 tgob
= pympi
.TextGrid(tg
)
35 intervalit
= tgob
.get_tier('lyrics').get_intervals(sort
=True)
37 (rate
, sig
) = wav
.read(wavp
, mmap
=True)
40 data
= mfcc(sig
, rate
, winlen
=winlen
, winstep
=winstep
, numcep
=13,
43 (data
, energy
) = logfbank(sig
, rate
, winlen
=winlen
, winstep
=winstep
, nfilt
=26)
45 raise ValueError("No such type")
47 (s
, e
, v
) = next(intervalit
)
49 label
= label_from_annotation(v
)
50 labels
= np
.empty(data
.shape
[0], dtype
=int)
53 # If we exceeded the interval, make new one
55 (s
, e
, v
) = next(intervalit
, (s
, e
, v
))
56 label
= label_from_annotation(v
)
62 currentframe
+= winstep
66 def run(typ
, winlen
, winstep
, modelfun
, modelname
):
70 for tg
, wavp
in get_datafiles():
71 (d
, l
) = features_from_wav(
72 tg
, wavp
, winlen
=winlen
, winstep
=winstep
, typ
=typ
)
76 datas
= np
.concatenate(datas
)
77 labels
= np
.concatenate(labels
)
79 rng_state
= np
.random
.get_state()
80 np
.random
.shuffle(datas
)
81 np
.random
.set_state(rng_state
)
82 np
.random
.shuffle(labels
)
84 splitindex
= int(labels
.shape
[0]*testset
)
85 testdata
, traindata
= datas
[:splitindex
], datas
[splitindex
:]
86 testlabels
, trainlabels
= labels
[:splitindex
], labels
[splitindex
:]
89 model
= modelfun(traindata
)
92 model
.fit(traindata
, trainlabels
, epochs
=10, batch_size
=32, shuffle
=False,
96 loss
, acc
= model
.evaluate(testdata
, testlabels
, batch_size
=32,
98 print('{}\t{}\t{}\t{}\t{}\n'.format(
99 winlen
, winstep
, modelname
, loss
, acc
))
104 Dense(d
.shape
[1]*2, input_shape
=(d
.shape
[1],), activation
='relu'))
105 model
.add(Dense(100, activation
='relu'))
106 model
.add(Dense(1, activation
='sigmoid'))
107 model
.compile(optimizer
='rmsprop',
108 loss
='binary_crossentropy',
109 metrics
=['accuracy'])
115 Dense(d
.shape
[1]*2, input_shape
=(d
.shape
[1],), activation
='relu'))
116 model
.add(Dense(13, activation
='relu'))
117 model
.add(Dense(1, activation
='sigmoid'))
118 model
.compile(optimizer
='rmsprop',
119 loss
='binary_crossentropy',
120 metrics
=['accuracy'])
123 if __name__
== '__main__':
124 print('winlen\twinstep\tmodel\tloss\taccuracy\n')
125 for winlen
, winstep
in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
126 for name
, model
in (('simple', simplemodel
), ('bottle', bottlemodel
)):
127 run('mfcc', winlen
, winstep
, model
, name
)