-import numpy as np
import sys
+if len(sys.argv) != 3:
+ print("Usage: {} MODEL.json AUDIO.wav".format(sys.argv[0]))
+ sys.exit(1)
+
import pympi
import scipy.io.wavfile as wav
import numpy as np
model = model_from_json(json)
model.load_weights(hdf5file)
-(_, winlen, winstep, _) = modelfile.split('_')
+(_, winlen, winstep, name) = modelfile.split('_')
winlen = float(winlen)
winstep = float(winstep)
+multiclass = name == 'multi'
+
model.compile(
loss='binary_crossentropy',
optimizer='rmsprop',
tgob = pympi.TextGrid(xmax=winstep*len(data))
tier = tgob.add_tier('lyrics')
-time = 0.0
-lastlabel = False
-lasttime = 0.0
-for i in model.predict(data, batch_size=32, verbose=0):
-# print('{}\t{}'.format(time, i))
- label = i > 0.5
- if label != lastlabel and time-lasttime > 0.5:
- tier.add_interval(lasttime, time, '*' if lastlabel else '')
- lastlabel = label
- lasttime = time
-
- time += winstep
+window_len = int(1.0/winstep)
+
+x = model.predict(data, batch_size=32, verbose=0)
+s = np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]]
+w = np.hanning(window_len)
+#
+smoothed = np.convolve(w/w.sum(), s[:,0], mode='valid')
+wavdata = np.uint8(list(map(int,
+ smoothed*255))[int(window_len/2):-1*(int(window_len/2))])
+#wavdata = np.uint8(x*255)
+
+print('sr: ', int(1.0/winstep))
+print("len(wavdata): ", len(wavdata))
+print("len(x): ", len(x))
+wav.write('class.wav', int(1.0/winstep), wavdata)
+
tgob.to_file('/dev/stdout')