From b614ef4e5b306b74316fe798fecd13b7cb8d8cf3 Mon Sep 17 00:00:00 2001 From: Mart Lubbers Date: Tue, 25 Apr 2017 14:00:23 +0200 Subject: [PATCH] update scripts and merge --- data.txt | 0 experiments.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++++ mfcc.py | 7 --- preprocess.sh | 4 +- segment.py | 60 ------------------------ test.py | 19 -------- train.py | 28 ----------- 7 files changed, 125 insertions(+), 116 deletions(-) delete mode 100644 data.txt create mode 100644 experiments.py delete mode 100644 mfcc.py delete mode 100644 segment.py delete mode 100644 test.py delete mode 100644 train.py diff --git a/data.txt b/data.txt deleted file mode 100644 index e69de29..0000000 diff --git a/experiments.py b/experiments.py new file mode 100644 index 0000000..708f0ee --- /dev/null +++ b/experiments.py @@ -0,0 +1,123 @@ +import sys +import pympi +import random +import glob +import re +import os + +# mfcc +from python_speech_features import mfcc, fbank, logfbank +import scipy.io.wavfile as wav +import numpy as np + +#keras +from keras.models import Sequential +from keras.layers import Dense, Dropout # , Activation + +# Testset ratio +testset = 0.10 +samplerate = 16000 + +def get_datafiles(): + files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid')) + # Loop over all datafiles and make wavefile string + for i, tg in enumerate(files): + num = re.match('^.*/(\\d+).TextGrid$', tg).group(1) + yield (tg, 'wav/{:02d}.wav'.format(int(num))) + +def label_from_annotation(ann): + return 0 if ann.strip() == '' else 1 + +def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01): + # Load textgrid + tgob = pympi.TextGrid(tg) + intervalit = tgob.get_tier('lyrics').get_intervals(sort=True) + # Load wav + (rate, sig) = wav.read(wavp, mmap=True) + + if typ == 'mfcc': + data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13, + appendEnergy=True) + elif typ == 'fbank': + (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26) + else: + raise ValueError("No such type") + + (s, e, v) = next(intervalit) + currentframe = 0.0 + label = label_from_annotation(v) + labels = np.empty(data.shape[0], dtype=int) + i = 0 + for d in data: + # If we exceeded the interval, make new one + if currentframe > e: + (s, e, v) = next(intervalit, (s, e, v)) + label = label_from_annotation(v) + + # Yield datapoint + labels[i] = label + + # Increase frame + currentframe += winstep + i += 1 + return (data, labels) + +def run(typ, winlen, winstep, modelfun): + datas = [] + labels = [] + + for tg, wavp in get_datafiles(): + (d, l) = features_from_wav(tg, wavp, winlen=winlen, winstep=winstep, typ=typ) + datas.append(d) + labels.append(l) + + datas = np.concatenate(datas) + labels = np.concatenate(labels) + + rng_state = np.random.get_state() + np.random.shuffle(datas) + np.random.set_state(rng_state) + np.random.shuffle(labels) + + splitindex = int(labels.shape[0]*testset) + testdata, traindata = datas[:splitindex], datas[splitindex:] + testlabels, trainlabels = labels[:splitindex], labels[splitindex:] + del datas, labels + + model = modelfun(traindata) + + #Train + model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False, + verbose=0) + + #Test + return model.evaluate(testdata, testlabels, batch_size=32, verbose=0) + +def simplemodel(d): + model = Sequential() + model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu')) + model.add(Dense(100, activation='relu')) + model.add(Dense(1, activation='sigmoid')) + model.compile(optimizer='rmsprop', + loss='binary_crossentropy', + metrics=['accuracy']) + return model + +def bottlemodel(d): + model = Sequential() + model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu')) + model.add(Dense(13, activation='relu')) + model.add(Dense(1, activation='sigmoid')) + model.compile(optimizer='rmsprop', + loss='binary_crossentropy', + metrics=['accuracy']) + return model + +if __name__ == '__main__': + #print(run('mfcc', 0.025, 0.01, simplemodel)) + #print(run('mfcc', 0.1, 0.04, simplemodel)) + #print(run('mfcc', 0.2, 0.08, simplemodel)) + + print(run('mfcc', 0.025, 0.01, bottlemodel)) + print(run('mfcc', 0.1, 0.04, bottlemodel)) + print(run('mfcc', 0.2, 0.08, bottlemodel)) diff --git a/mfcc.py b/mfcc.py deleted file mode 100644 index 4bc7033..0000000 --- a/mfcc.py +++ /dev/null @@ -1,7 +0,0 @@ -from python_speech_features import mfcc -import scipy.io.wavfile as wav -import sys - -(rate, sig) = wav.read(sys.stdin.buffer) -for i in mfcc(sig, rate, winlen=0.025, winstep=0.01, numcep=13, appendEnergy=True): - print(*i, sep='\t') diff --git a/preprocess.sh b/preprocess.sh index bf43c7c..58db369 100644 --- a/preprocess.sh +++ b/preprocess.sh @@ -4,6 +4,7 @@ set -e MAXPROCS=4 FREQUENCY=44100 FREQUENCY=22050 +FREQUENCY=16000 rm -rf wav mfcc mkdir -p wav mfcc @@ -18,8 +19,7 @@ for f in orig/*/*.flac; do MFCC="mfcc/$NUM.mfcc" ( echo "Processing $f" && - sox "$f" -V1 -c 1 -r $FREQUENCY $WAV && - python mfcc.py < "$WAV" > "$MFCC" + sox "$f" -V1 -c 1 -r $FREQUENCY $WAV ) & i=$((i+1)) done diff --git a/segment.py b/segment.py deleted file mode 100644 index dc803ef..0000000 --- a/segment.py +++ /dev/null @@ -1,60 +0,0 @@ -import sys -import pympi -import random -import glob -import re -import os - -testset = 0.10 -data = [] - - -def process(num): - num = re.match('^.*/(\\d+).TextGrid$', num).group(1) - tg = 'textgrid/{:02d}.TextGrid'.format(int(num)) - - tgob = pympi.TextGrid(tg) - intervalit = tgob.get_tier('lyrics').get_intervals(sort=True) - - with open('mfcc/{:02d}.mfcc'.format(int(num))) as mfcc: - (s, e, v) = next(intervalit) - currentframe = 0.0 - for l in mfcc: - # Go to next interval - if currentframe > e: - try: - (s, e, v) = next(intervalit) - except StopIteration: - pass - - label = 1 if v == '' else 0 - data.append([label] + l.split('\t')) - - # Increase time - currentframe += 0.01 - - -if __name__ == '__main__': - datafiles = [] - for fl in glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid')): - sys.stderr.write('Segment {}\n'.format(fl)) - process(fl) - - sys.stderr.write('Shuffling {} samples\n'.format(len(data))) - random.shuffle(data) - - splitpoint = int(len(data)*testset) - testset = data[:splitpoint] - trainset = data[splitpoint:] - del(data) - - sys.stderr.write('Write testset: {} items\n'.format(splitpoint)) - with open('test.txt', 'w') as f: - for d in testset: - f.write('\t'.join(map(str, d))) - - sys.stderr.write('Write trainingset: {:d} items\n'.format(9*splitpoint)) - with open('train.txt', 'w') as f: - for d in trainset: - f.write('\t'.join(map(str, d))) - f.close() diff --git a/test.py b/test.py deleted file mode 100644 index 7378596..0000000 --- a/test.py +++ /dev/null @@ -1,19 +0,0 @@ -import numpy as np -from keras.models import model_from_json - -with open('model.json', 'r') as f: - json = f.read() - -model = model_from_json(json) -model.load_weights('./model.hdf5') -model.compile( - loss='binary_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - -model.summary() - -dat = np.genfromtxt('test.txt', dtype=float, delimiter='\t', usecols=range(1, 14)) -lab = np.genfromtxt('test.txt', dtype=int, delimiter='\t', usecols=[0]) - -print(model.evaluate(dat, lab, batch_size=32)) diff --git a/train.py b/train.py deleted file mode 100644 index 28902f9..0000000 --- a/train.py +++ /dev/null @@ -1,28 +0,0 @@ -import numpy as np -from keras.models import Sequential -from keras.layers import Dense, Dropout # , Activation - -model = Sequential() - -model.add(Dense(26, input_shape=(13,), activation='relu')) -#model.add(Dense(100, activation='relu')) -#model.add(Dropout(0.25)) -model.add(Dense(100, activation='relu')) -model.add(Dense(26, activation='relu')) -model.add(Dense(1, activation='sigmoid')) - -model.compile( - loss='binary_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - -model.summary() - -dat = np.genfromtxt('train.txt', dtype=float, delimiter='\t', usecols=range(1, 14)) -lab = np.genfromtxt('train.txt', dtype=int, delimiter='\t', usecols=[0]) - -model.fit(dat, lab, epochs=10, batch_size=32) - -with open('model.json', 'w') as f: - f.write(model.to_json()) -model.save_weights('model.hdf5') -- 2.20.1