9 from python_speech_features
import mfcc
, fbank
, logfbank
10 import scipy
.io
.wavfile
as wav
14 from keras
.models
import Sequential
15 from keras
.layers
import Dense
, Dropout
# , Activation
22 files
= glob
.glob(os
.path
.join(os
.getcwd(), 'textgrid', '*.TextGrid'))
23 # Loop over all datafiles and make wavefile string
24 for i
, tg
in enumerate(files
):
25 num
= re
.match('^.*/(\\d+).TextGrid$', tg
).group(1)
26 yield (tg
, 'wav/{:02d}.wav'.format(int(num
)))
28 def label_from_annotation(ann
):
29 return 0 if ann
.strip() == '' else 1
31 def features_from_wav(tg
, wavp
, typ
='mfcc', winlen
=0.025, winstep
=0.01):
33 tgob
= pympi
.TextGrid(tg
)
34 intervalit
= tgob
.get_tier('lyrics').get_intervals(sort
=True)
36 (rate
, sig
) = wav
.read(wavp
, mmap
=True)
39 data
= mfcc(sig
, rate
, winlen
=winlen
, winstep
=winstep
, numcep
=13,
42 (data
, energy
) = logfbank(sig
, rate
, winlen
=winlen
, winstep
=winstep
, nfilt
=26)
44 raise ValueError("No such type")
46 (s
, e
, v
) = next(intervalit
)
48 label
= label_from_annotation(v
)
49 labels
= np
.empty(data
.shape
[0], dtype
=int)
52 # If we exceeded the interval, make new one
54 (s
, e
, v
) = next(intervalit
, (s
, e
, v
))
55 label
= label_from_annotation(v
)
61 currentframe
+= winstep
65 def run(typ
, winlen
, winstep
, modelfun
):
69 for tg
, wavp
in get_datafiles():
70 (d
, l
) = features_from_wav(tg
, wavp
, winlen
=winlen
, winstep
=winstep
, typ
=typ
)
74 datas
= np
.concatenate(datas
)
75 labels
= np
.concatenate(labels
)
77 rng_state
= np
.random
.get_state()
78 np
.random
.shuffle(datas
)
79 np
.random
.set_state(rng_state
)
80 np
.random
.shuffle(labels
)
82 splitindex
= int(labels
.shape
[0]*testset
)
83 testdata
, traindata
= datas
[:splitindex
], datas
[splitindex
:]
84 testlabels
, trainlabels
= labels
[:splitindex
], labels
[splitindex
:]
87 model
= modelfun(traindata
)
90 model
.fit(traindata
, trainlabels
, epochs
=10, batch_size
=32, shuffle
=False,
94 return model
.evaluate(testdata
, testlabels
, batch_size
=32, verbose
=0)
98 model
.add(Dense(d
.shape
[1]*2, input_shape
=(d
.shape
[1],), activation
='relu'))
99 model
.add(Dense(100, activation
='relu'))
100 model
.add(Dense(1, activation
='sigmoid'))
101 model
.compile(optimizer
='rmsprop',
102 loss
='binary_crossentropy',
103 metrics
=['accuracy'])
108 model
.add(Dense(d
.shape
[1]*2, input_shape
=(d
.shape
[1],), activation
='relu'))
109 model
.add(Dense(13, activation
='relu'))
110 model
.add(Dense(1, activation
='sigmoid'))
111 model
.compile(optimizer
='rmsprop',
112 loss
='binary_crossentropy',
113 metrics
=['accuracy'])
116 if __name__
== '__main__':
117 #print(run('mfcc', 0.025, 0.01, simplemodel))
118 #print(run('mfcc', 0.1, 0.04, simplemodel))
119 #print(run('mfcc', 0.2, 0.08, simplemodel))
121 print(run('mfcc', 0.025, 0.01, bottlemodel
))
122 print(run('mfcc', 0.1, 0.04, bottlemodel
))
123 print(run('mfcc', 0.2, 0.08, bottlemodel
))