4 print("Usage: {} MODEL.json AUDIO.wav".format(sys
.argv
[0]))
8 import scipy
.io
.wavfile
as wav
10 from python_speech_features
import mfcc
12 from keras
.models
import model_from_json
14 modelfile
= sys
.argv
[1]
15 hdf5file
= '{}.hdf5'.format(modelfile
[:-5])
17 with
open(modelfile
, 'r', encoding
='utf-8') as f
:
19 model
= model_from_json(json
)
20 model
.load_weights(hdf5file
)
22 (_
, winlen
, winstep
, name
) = modelfile
.split('_')
23 winlen
= float(winlen
)
24 winstep
= float(winstep
)
26 multiclass
= name
== 'multi'
29 loss
='binary_crossentropy',
33 (rate
, sig
) = wav
.read(sys
.argv
[2], mmap
=True)
34 data
= mfcc(sig
, rate
, winlen
, winstep
, numcep
=13, appendEnergy
=True)
35 tgob
= pympi
.TextGrid(xmax
=winstep
*len(data
))
36 tier
= tgob
.add_tier('lyrics')
38 window_len
= int(1.0/winstep
)
40 x
= model
.predict(data
, batch_size
=32, verbose
=0)
41 s
= np
.r_
[x
[window_len
-1:0:-1],x
,x
[-2:-window_len
-1:-1]]
42 w
= np
.hanning(window_len
)
44 smoothed
= np
.convolve(w
/w
.sum(), s
[:,0], mode
='valid')
45 wavdata
= np
.uint8(list(map(int,
46 smoothed
*255))[int(window_len
/2):-1*(int(window_len
/2))])
47 #wavdata = np.uint8(x*255)
49 print('sr: ', int(1.0/winstep
))
50 print("len(wavdata): ", len(wavdata
))
51 print("len(x): ", len(x
))
52 wav
.write('class.wav', int(1.0/winstep
), wavdata
)
54 tgob
.to_file('/dev/stdout')