b561c6511110c90ae6ee5b56e12335d03a45d43a
[asr1617data.git] / predict.py
1 import numpy as np
2 import sys
3
4 import pympi
5 import scipy.io.wavfile as wav
6 import numpy as np
7 from python_speech_features import mfcc
8
9 from keras.models import model_from_json
10
11 modelfile = sys.argv[1]
12 hdf5file = '{}.hdf5'.format(modelfile[:-5])
13
14 with open(modelfile, 'r', encoding='utf-8') as f:
15 json = f.read()
16 model = model_from_json(json)
17 model.load_weights(hdf5file)
18
19 (_, winlen, winstep, _) = modelfile.split('_')
20 winlen = float(winlen)
21 winstep = float(winstep)
22
23 model.compile(
24 loss='binary_crossentropy',
25 optimizer='rmsprop',
26 metrics=['accuracy'])
27
28 (rate, sig) = wav.read(sys.argv[2], mmap=True)
29 data = mfcc(sig, rate, winlen, winstep, numcep=13, appendEnergy=True)
30 tgob = pympi.TextGrid(xmax=winstep*len(data))
31 tier = tgob.add_tier('lyrics')
32
33 time = 0.0
34 lastlabel = False
35 lasttime = 0.0
36 for i in model.predict(data, batch_size=32, verbose=0):
37 # print('{}\t{}'.format(time, i))
38 label = i > 0.5
39 if label != lastlabel and time-lasttime > 0.5:
40 tier.add_interval(lasttime, time, '*' if lastlabel else '')
41 lastlabel = label
42 lasttime = time
43
44 time += winstep
45 tgob.to_file('/dev/stdout')