update scripts and merge
authorMart Lubbers <mart@martlubbers.net>
Tue, 25 Apr 2017 12:00:23 +0000 (14:00 +0200)
committerMart Lubbers <mart@martlubbers.net>
Tue, 25 Apr 2017 12:00:31 +0000 (14:00 +0200)
data.txt [deleted file]
experiments.py [new file with mode: 0644]
mfcc.py [deleted file]
preprocess.sh
segment.py [deleted file]
test.py [deleted file]
train.py [deleted file]

diff --git a/data.txt b/data.txt
deleted file mode 100644 (file)
index e69de29..0000000
diff --git a/experiments.py b/experiments.py
new file mode 100644 (file)
index 0000000..708f0ee
--- /dev/null
@@ -0,0 +1,123 @@
+import sys
+import pympi
+import random
+import glob
+import re
+import os
+
+# mfcc
+from python_speech_features import mfcc, fbank, logfbank
+import scipy.io.wavfile as wav
+import numpy as np
+
+#keras
+from keras.models import Sequential
+from keras.layers import Dense, Dropout  # , Activation
+
+# Testset ratio
+testset = 0.10
+samplerate = 16000
+
+def get_datafiles():
+    files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
+    # Loop over all datafiles and make wavefile string
+    for i, tg in enumerate(files):
+        num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
+        yield (tg, 'wav/{:02d}.wav'.format(int(num)))
+
+def label_from_annotation(ann):
+    return 0 if ann.strip() == '' else 1
+
+def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
+    # Load textgrid
+    tgob = pympi.TextGrid(tg)
+    intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
+    # Load wav
+    (rate, sig) = wav.read(wavp, mmap=True)
+
+    if typ == 'mfcc':
+        data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
+                    appendEnergy=True)
+    elif typ == 'fbank':
+        (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
+    else:
+        raise ValueError("No such type")
+
+    (s, e, v) = next(intervalit)
+    currentframe = 0.0
+    label = label_from_annotation(v)
+    labels = np.empty(data.shape[0], dtype=int)
+    i = 0
+    for d in data:
+        # If we exceeded the interval, make new one
+        if currentframe > e:
+            (s, e, v) = next(intervalit, (s, e, v))
+            label = label_from_annotation(v)
+
+        # Yield datapoint
+        labels[i] = label
+
+        # Increase frame
+        currentframe += winstep
+        i += 1
+    return (data, labels)
+
+def run(typ, winlen, winstep, modelfun):
+    datas = []
+    labels = []
+
+    for tg, wavp in get_datafiles():
+        (d, l) = features_from_wav(tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
+        datas.append(d)
+        labels.append(l)
+
+    datas = np.concatenate(datas)
+    labels = np.concatenate(labels)
+
+    rng_state = np.random.get_state()
+    np.random.shuffle(datas)
+    np.random.set_state(rng_state)
+    np.random.shuffle(labels)
+
+    splitindex = int(labels.shape[0]*testset)
+    testdata, traindata = datas[:splitindex], datas[splitindex:]
+    testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
+    del datas, labels
+
+    model = modelfun(traindata)
+
+    #Train
+    model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
+            verbose=0)
+
+    #Test
+    return model.evaluate(testdata, testlabels, batch_size=32, verbose=0)
+
+def simplemodel(d):
+    model = Sequential()
+    model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+    model.add(Dense(100, activation='relu'))
+    model.add(Dense(1, activation='sigmoid'))
+    model.compile(optimizer='rmsprop',
+                  loss='binary_crossentropy',
+                  metrics=['accuracy'])
+    return model
+
+def bottlemodel(d):
+    model = Sequential()
+    model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+    model.add(Dense(13, activation='relu'))
+    model.add(Dense(1, activation='sigmoid'))
+    model.compile(optimizer='rmsprop',
+                  loss='binary_crossentropy',
+                  metrics=['accuracy'])
+    return model
+
+if __name__ == '__main__':
+    #print(run('mfcc', 0.025, 0.01, simplemodel))
+    #print(run('mfcc', 0.1, 0.04, simplemodel))
+    #print(run('mfcc', 0.2, 0.08, simplemodel))
+
+    print(run('mfcc', 0.025, 0.01, bottlemodel))
+    print(run('mfcc', 0.1, 0.04, bottlemodel))
+    print(run('mfcc', 0.2, 0.08, bottlemodel))
diff --git a/mfcc.py b/mfcc.py
deleted file mode 100644 (file)
index 4bc7033..0000000
--- a/mfcc.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from python_speech_features import mfcc
-import scipy.io.wavfile as wav
-import sys
-
-(rate, sig) = wav.read(sys.stdin.buffer)
-for i in mfcc(sig, rate, winlen=0.025, winstep=0.01, numcep=13, appendEnergy=True):
-    print(*i, sep='\t')
index bf43c7c..58db369 100644 (file)
@@ -4,6 +4,7 @@ set -e
 MAXPROCS=4
 FREQUENCY=44100
 FREQUENCY=22050
+FREQUENCY=16000
 
 rm -rf wav mfcc
 mkdir -p wav mfcc
@@ -18,8 +19,7 @@ for f in orig/*/*.flac; do
        MFCC="mfcc/$NUM.mfcc"
 
        (       echo "Processing $f" &&
-               sox "$f" -V1 -c 1 -r $FREQUENCY $WAV &&
-               python mfcc.py < "$WAV" > "$MFCC"
+               sox "$f" -V1 -c 1 -r $FREQUENCY $WAV
        ) &
        i=$((i+1))
 done
diff --git a/segment.py b/segment.py
deleted file mode 100644 (file)
index dc803ef..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-import sys
-import pympi
-import random
-import glob
-import re
-import os
-
-testset = 0.10
-data = []
-
-
-def process(num):
-    num = re.match('^.*/(\\d+).TextGrid$', num).group(1)
-    tg = 'textgrid/{:02d}.TextGrid'.format(int(num))
-
-    tgob = pympi.TextGrid(tg)
-    intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
-
-    with open('mfcc/{:02d}.mfcc'.format(int(num))) as mfcc:
-        (s, e, v) = next(intervalit)
-        currentframe = 0.0
-        for l in mfcc:
-            # Go to next interval
-            if currentframe > e:
-                try:
-                    (s, e, v) = next(intervalit)
-                except StopIteration:
-                    pass
-
-            label = 1 if v == '' else 0
-            data.append([label] + l.split('\t'))
-
-            # Increase time
-            currentframe += 0.01
-
-
-if __name__ == '__main__':
-    datafiles = []
-    for fl in glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid')):
-        sys.stderr.write('Segment {}\n'.format(fl))
-        process(fl)
-
-    sys.stderr.write('Shuffling {} samples\n'.format(len(data)))
-    random.shuffle(data)
-
-    splitpoint = int(len(data)*testset)
-    testset = data[:splitpoint]
-    trainset = data[splitpoint:]
-    del(data)
-
-    sys.stderr.write('Write testset: {} items\n'.format(splitpoint))
-    with open('test.txt', 'w') as f:
-        for d in testset:
-            f.write('\t'.join(map(str, d)))
-
-    sys.stderr.write('Write trainingset: {:d} items\n'.format(9*splitpoint))
-    with open('train.txt', 'w') as f:
-        for d in trainset:
-            f.write('\t'.join(map(str, d)))
-    f.close()
diff --git a/test.py b/test.py
deleted file mode 100644 (file)
index 7378596..0000000
--- a/test.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import numpy as np
-from keras.models import model_from_json
-
-with open('model.json', 'r') as f:
-    json = f.read()
-
-model = model_from_json(json)
-model.load_weights('./model.hdf5')
-model.compile(
-    loss='binary_crossentropy',
-    optimizer='rmsprop',
-    metrics=['accuracy'])
-
-model.summary()
-
-dat = np.genfromtxt('test.txt', dtype=float, delimiter='\t', usecols=range(1, 14))
-lab = np.genfromtxt('test.txt', dtype=int, delimiter='\t', usecols=[0])
-
-print(model.evaluate(dat, lab, batch_size=32))
diff --git a/train.py b/train.py
deleted file mode 100644 (file)
index 28902f9..0000000
--- a/train.py
+++ /dev/null
@@ -1,28 +0,0 @@
-import numpy as np
-from keras.models import Sequential
-from keras.layers import Dense, Dropout  # , Activation
-
-model = Sequential()
-
-model.add(Dense(26, input_shape=(13,), activation='relu'))
-#model.add(Dense(100, activation='relu'))
-#model.add(Dropout(0.25))
-model.add(Dense(100, activation='relu'))
-model.add(Dense(26, activation='relu'))
-model.add(Dense(1, activation='sigmoid'))
-
-model.compile(
-    loss='binary_crossentropy',
-    optimizer='rmsprop',
-    metrics=['accuracy'])
-
-model.summary()
-
-dat = np.genfromtxt('train.txt', dtype=float, delimiter='\t', usecols=range(1, 14))
-lab = np.genfromtxt('train.txt', dtype=int, delimiter='\t', usecols=[0])
-
-model.fit(dat, lab, epochs=10, batch_size=32)
-
-with open('model.json', 'w') as f:
-    f.write(model.to_json())
-model.save_weights('model.hdf5')