--- /dev/null
+import sys
+import pympi
+import random
+import glob
+import re
+import os
+
+# mfcc
+from python_speech_features import mfcc, fbank, logfbank
+import scipy.io.wavfile as wav
+import numpy as np
+
+#keras
+from keras.models import Sequential
+from keras.layers import Dense, Dropout  # , Activation
+
+# Testset ratio
+testset = 0.10
+samplerate = 16000
+
+def get_datafiles():
+    files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
+    # Loop over all datafiles and make wavefile string
+    for i, tg in enumerate(files):
+        num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
+        yield (tg, 'wav/{:02d}.wav'.format(int(num)))
+
+def label_from_annotation(ann):
+    return 0 if ann.strip() == '' else 1
+
+def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
+    # Load textgrid
+    tgob = pympi.TextGrid(tg)
+    intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
+    # Load wav
+    (rate, sig) = wav.read(wavp, mmap=True)
+
+    if typ == 'mfcc':
+        data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
+                    appendEnergy=True)
+    elif typ == 'fbank':
+        (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
+    else:
+        raise ValueError("No such type")
+
+    (s, e, v) = next(intervalit)
+    currentframe = 0.0
+    label = label_from_annotation(v)
+    labels = np.empty(data.shape[0], dtype=int)
+    i = 0
+    for d in data:
+        # If we exceeded the interval, make new one
+        if currentframe > e:
+            (s, e, v) = next(intervalit, (s, e, v))
+            label = label_from_annotation(v)
+
+        # Yield datapoint
+        labels[i] = label
+
+        # Increase frame
+        currentframe += winstep
+        i += 1
+    return (data, labels)
+
+def run(typ, winlen, winstep, modelfun):
+    datas = []
+    labels = []
+
+    for tg, wavp in get_datafiles():
+        (d, l) = features_from_wav(tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
+        datas.append(d)
+        labels.append(l)
+
+    datas = np.concatenate(datas)
+    labels = np.concatenate(labels)
+
+    rng_state = np.random.get_state()
+    np.random.shuffle(datas)
+    np.random.set_state(rng_state)
+    np.random.shuffle(labels)
+
+    splitindex = int(labels.shape[0]*testset)
+    testdata, traindata = datas[:splitindex], datas[splitindex:]
+    testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
+    del datas, labels
+
+    model = modelfun(traindata)
+
+    #Train
+    model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
+            verbose=0)
+
+    #Test
+    return model.evaluate(testdata, testlabels, batch_size=32, verbose=0)
+
+def simplemodel(d):
+    model = Sequential()
+    model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+    model.add(Dense(100, activation='relu'))
+    model.add(Dense(1, activation='sigmoid'))
+    model.compile(optimizer='rmsprop',
+                  loss='binary_crossentropy',
+                  metrics=['accuracy'])
+    return model
+
+def bottlemodel(d):
+    model = Sequential()
+    model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+    model.add(Dense(13, activation='relu'))
+    model.add(Dense(1, activation='sigmoid'))
+    model.compile(optimizer='rmsprop',
+                  loss='binary_crossentropy',
+                  metrics=['accuracy'])
+    return model
+
+if __name__ == '__main__':
+    #print(run('mfcc', 0.025, 0.01, simplemodel))
+    #print(run('mfcc', 0.1, 0.04, simplemodel))
+    #print(run('mfcc', 0.2, 0.08, simplemodel))
+
+    print(run('mfcc', 0.025, 0.01, bottlemodel))
+    print(run('mfcc', 0.1, 0.04, bottlemodel))
+    print(run('mfcc', 0.2, 0.08, bottlemodel))
 
+++ /dev/null
-import sys
-import pympi
-import random
-import glob
-import re
-import os
-
-testset = 0.10
-data = []
-
-
-def process(num):
-    num = re.match('^.*/(\\d+).TextGrid$', num).group(1)
-    tg = 'textgrid/{:02d}.TextGrid'.format(int(num))
-
-    tgob = pympi.TextGrid(tg)
-    intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
-
-    with open('mfcc/{:02d}.mfcc'.format(int(num))) as mfcc:
-        (s, e, v) = next(intervalit)
-        currentframe = 0.0
-        for l in mfcc:
-            # Go to next interval
-            if currentframe > e:
-                try:
-                    (s, e, v) = next(intervalit)
-                except StopIteration:
-                    pass
-
-            label = 1 if v == '' else 0
-            data.append([label] + l.split('\t'))
-
-            # Increase time
-            currentframe += 0.01
-
-
-if __name__ == '__main__':
-    datafiles = []
-    for fl in glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid')):
-        sys.stderr.write('Segment {}\n'.format(fl))
-        process(fl)
-
-    sys.stderr.write('Shuffling {} samples\n'.format(len(data)))
-    random.shuffle(data)
-
-    splitpoint = int(len(data)*testset)
-    testset = data[:splitpoint]
-    trainset = data[splitpoint:]
-    del(data)
-
-    sys.stderr.write('Write testset: {} items\n'.format(splitpoint))
-    with open('test.txt', 'w') as f:
-        for d in testset:
-            f.write('\t'.join(map(str, d)))
-
-    sys.stderr.write('Write trainingset: {:d} items\n'.format(9*splitpoint))
-    with open('train.txt', 'w') as f:
-        for d in trainset:
-            f.write('\t'.join(map(str, d)))
-    f.close()
 
+++ /dev/null
-import numpy as np
-from keras.models import Sequential
-from keras.layers import Dense, Dropout  # , Activation
-
-model = Sequential()
-
-model.add(Dense(26, input_shape=(13,), activation='relu'))
-#model.add(Dense(100, activation='relu'))
-#model.add(Dropout(0.25))
-model.add(Dense(100, activation='relu'))
-model.add(Dense(26, activation='relu'))
-model.add(Dense(1, activation='sigmoid'))
-
-model.compile(
-    loss='binary_crossentropy',
-    optimizer='rmsprop',
-    metrics=['accuracy'])
-
-model.summary()
-
-dat = np.genfromtxt('train.txt', dtype=float, delimiter='\t', usecols=range(1, 14))
-lab = np.genfromtxt('train.txt', dtype=int, delimiter='\t', usecols=[0])
-
-model.fit(dat, lab, epochs=10, batch_size=32)
-
-with open('model.json', 'w') as f:
-    f.write(model.to_json())
-model.save_weights('model.hdf5')