From ce6a177b6cb5dc726ea075bb47190b21ce0995d3 Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Tue, 16 May 2017 12:16:56 +0200 Subject: [PATCH] update data --- .gitignore | 1 + makevenv.sh | 2 +- predict.py | 41 ++++++++++++++++++++++++++++------------- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/.gitignore b/.gitignore index 2146a79..a23102a 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,4 @@ test.txt train.txt *.hdf5 *.json +class.wav diff --git a/makevenv.sh b/makevenv.sh index d75000f..0146460 100755 --- a/makevenv.sh +++ b/makevenv.sh @@ -3,5 +3,5 @@ deactivate || true virtualenv -p python3 --system-site-packages . . bin/activate pip install --upgrade keras h5py python_speech_features pympi-ling scipy -pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.1.0-cp35-none-linux_x86_64.whl +#pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/protobuf-3.1.0-cp36-none-linux_x86_64.whl deactivate diff --git a/predict.py b/predict.py index b561c65..18a84e7 100644 --- a/predict.py +++ b/predict.py @@ -1,6 +1,9 @@ -import numpy as np import sys +if len(sys.argv) != 3: + print("Usage: {} MODEL.json AUDIO.wav".format(sys.argv[0])) + sys.exit(1) + import pympi import scipy.io.wavfile as wav import numpy as np @@ -30,16 +33,28 @@ data = mfcc(sig, rate, winlen, winstep, numcep=13, appendEnergy=True) tgob = pympi.TextGrid(xmax=winstep*len(data)) tier = tgob.add_tier('lyrics') -time = 0.0 -lastlabel = False -lasttime = 0.0 -for i in model.predict(data, batch_size=32, verbose=0): -# print('{}\t{}'.format(time, i)) - label = i > 0.5 - if label != lastlabel and time-lasttime > 0.5: - tier.add_interval(lasttime, time, '*' if lastlabel else '') - lastlabel = label - lasttime = time - - time += winstep +window_len = int(1.0/winstep) + +x = model.predict(data, batch_size=32, verbose=0) +s = np.r_[x[window_len-1:0:-1],x,x[-2:-window_len-1:-1]] +w = np.hanning(window_len) + +smoothed = np.convolve(w/w.sum(), s[:,0], mode='valid') + +wavdata = np.uint8(list(map(int, + smoothed*255))[int(window_len/2):-1*(int(window_len/2))]) + + +print('sr: ', int(1.0/winstep)) +print("len(wavdata): ", len(wavdata)) +print("len(x): ", len(x)) +wav.write('class.wav', int(1.0/winstep), wavdata) +#for i in smoothed: +# print(int(i*255)) + + + + + + tgob.to_file('/dev/stdout') -- 2.20.1