4 print("Usage: {} MODEL.json AUDIO.wav".format(sys
.argv
[0]))
8 import scipy
.io
.wavfile
as wav
10 from python_speech_features
import mfcc
12 from keras
.models
import model_from_json
14 modelfile
= sys
.argv
[1]
15 hdf5file
= '{}.hdf5'.format(modelfile
[:-5])
17 with
open(modelfile
, 'r', encoding
='utf-8') as f
:
19 model
= model_from_json(json
)
20 model
.load_weights(hdf5file
)
22 (_
, winlen
, winstep
, name
) = modelfile
.split('_')
23 winlen
= float(winlen
)
24 winstep
= float(winstep
)
26 multiclass
= name
== 'multi'
29 loss
='binary_crossentropy',
33 (rate
, sig
) = wav
.read(sys
.argv
[2], mmap
=True)
34 data
= mfcc(sig
, rate
, winlen
, winstep
, numcep
=13, appendEnergy
=True)
35 tgob
= pympi
.TextGrid(xmax
=winstep
*len(data
))
36 tier
= tgob
.add_tier('lyrics')
38 window_len
= int(1.0/winstep
)
40 x
= model
.predict(data
, batch_size
=32, verbose
=0)
41 #s = np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
42 #w = np.hanning(window_len)
44 #smoothed = np.convolve(w/w.sum(), s[:,0], mode='valid')
45 #wavdata = np.uint8(list(map(int,
46 # smoothed*255))[int(window_len/2):-1*(int(window_len/2))])
47 wavdata
= np
.uint8(x
*255)
49 print('sr: ', int(1.0/winstep
))
50 print("len(wavdata): ", len(wavdata
))
51 print("len(x): ", len(x
))
52 wav
.write('class.wav', int(1.0/winstep
), wavdata
)
54 tgob
.to_file('/dev/stdout')