--- /dev/null
+import sys
+import pympi
+import random
+import glob
+import re
+import os
+
+# mfcc
+from python_speech_features import mfcc, fbank, logfbank
+import scipy.io.wavfile as wav
+import numpy as np
+
+#keras
+from keras.models import Sequential
+from keras.layers import Dense, Dropout # , Activation
+
+# Testset ratio
+testset = 0.10
+samplerate = 16000
+
+def get_datafiles():
+ files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
+ # Loop over all datafiles and make wavefile string
+ for i, tg in enumerate(files):
+ num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
+ yield (tg, 'wav/{:02d}.wav'.format(int(num)))
+
+def label_from_annotation(ann):
+ return 0 if ann.strip() == '' else 1
+
+def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
+ # Load textgrid
+ tgob = pympi.TextGrid(tg)
+ intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
+ # Load wav
+ (rate, sig) = wav.read(wavp, mmap=True)
+
+ if typ == 'mfcc':
+ data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
+ appendEnergy=True)
+ elif typ == 'fbank':
+ (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
+ else:
+ raise ValueError("No such type")
+
+ (s, e, v) = next(intervalit)
+ currentframe = 0.0
+ label = label_from_annotation(v)
+ labels = np.empty(data.shape[0], dtype=int)
+ i = 0
+ for d in data:
+ # If we exceeded the interval, make new one
+ if currentframe > e:
+ (s, e, v) = next(intervalit, (s, e, v))
+ label = label_from_annotation(v)
+
+ # Yield datapoint
+ labels[i] = label
+
+ # Increase frame
+ currentframe += winstep
+ i += 1
+ return (data, labels)
+
+def run(typ, winlen, winstep, modelfun):
+ datas = []
+ labels = []
+
+ for tg, wavp in get_datafiles():
+ (d, l) = features_from_wav(tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
+ datas.append(d)
+ labels.append(l)
+
+ datas = np.concatenate(datas)
+ labels = np.concatenate(labels)
+
+ rng_state = np.random.get_state()
+ np.random.shuffle(datas)
+ np.random.set_state(rng_state)
+ np.random.shuffle(labels)
+
+ splitindex = int(labels.shape[0]*testset)
+ testdata, traindata = datas[:splitindex], datas[splitindex:]
+ testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
+ del datas, labels
+
+ model = modelfun(traindata)
+
+ #Train
+ model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
+ verbose=0)
+
+ #Test
+ return model.evaluate(testdata, testlabels, batch_size=32, verbose=0)
+
+def simplemodel(d):
+ model = Sequential()
+ model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+ model.add(Dense(100, activation='relu'))
+ model.add(Dense(1, activation='sigmoid'))
+ model.compile(optimizer='rmsprop',
+ loss='binary_crossentropy',
+ metrics=['accuracy'])
+ return model
+
+def bottlemodel(d):
+ model = Sequential()
+ model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+ model.add(Dense(13, activation='relu'))
+ model.add(Dense(1, activation='sigmoid'))
+ model.compile(optimizer='rmsprop',
+ loss='binary_crossentropy',
+ metrics=['accuracy'])
+ return model
+
+if __name__ == '__main__':
+ #print(run('mfcc', 0.025, 0.01, simplemodel))
+ #print(run('mfcc', 0.1, 0.04, simplemodel))
+ #print(run('mfcc', 0.2, 0.08, simplemodel))
+
+ print(run('mfcc', 0.025, 0.01, bottlemodel))
+ print(run('mfcc', 0.1, 0.04, bottlemodel))
+ print(run('mfcc', 0.2, 0.08, bottlemodel))
+++ /dev/null
-import sys
-import pympi
-import random
-import glob
-import re
-import os
-
-testset = 0.10
-data = []
-
-
-def process(num):
- num = re.match('^.*/(\\d+).TextGrid$', num).group(1)
- tg = 'textgrid/{:02d}.TextGrid'.format(int(num))
-
- tgob = pympi.TextGrid(tg)
- intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
-
- with open('mfcc/{:02d}.mfcc'.format(int(num))) as mfcc:
- (s, e, v) = next(intervalit)
- currentframe = 0.0
- for l in mfcc:
- # Go to next interval
- if currentframe > e:
- try:
- (s, e, v) = next(intervalit)
- except StopIteration:
- pass
-
- label = 1 if v == '' else 0
- data.append([label] + l.split('\t'))
-
- # Increase time
- currentframe += 0.01
-
-
-if __name__ == '__main__':
- datafiles = []
- for fl in glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid')):
- sys.stderr.write('Segment {}\n'.format(fl))
- process(fl)
-
- sys.stderr.write('Shuffling {} samples\n'.format(len(data)))
- random.shuffle(data)
-
- splitpoint = int(len(data)*testset)
- testset = data[:splitpoint]
- trainset = data[splitpoint:]
- del(data)
-
- sys.stderr.write('Write testset: {} items\n'.format(splitpoint))
- with open('test.txt', 'w') as f:
- for d in testset:
- f.write('\t'.join(map(str, d)))
-
- sys.stderr.write('Write trainingset: {:d} items\n'.format(9*splitpoint))
- with open('train.txt', 'w') as f:
- for d in trainset:
- f.write('\t'.join(map(str, d)))
- f.close()
+++ /dev/null
-import numpy as np
-from keras.models import Sequential
-from keras.layers import Dense, Dropout # , Activation
-
-model = Sequential()
-
-model.add(Dense(26, input_shape=(13,), activation='relu'))
-#model.add(Dense(100, activation='relu'))
-#model.add(Dropout(0.25))
-model.add(Dense(100, activation='relu'))
-model.add(Dense(26, activation='relu'))
-model.add(Dense(1, activation='sigmoid'))
-
-model.compile(
- loss='binary_crossentropy',
- optimizer='rmsprop',
- metrics=['accuracy'])
-
-model.summary()
-
-dat = np.genfromtxt('train.txt', dtype=float, delimiter='\t', usecols=range(1, 14))
-lab = np.genfromtxt('train.txt', dtype=int, delimiter='\t', usecols=[0])
-
-model.fit(dat, lab, epochs=10, batch_size=32)
-
-with open('model.json', 'w') as f:
- f.write(model.to_json())
-model.save_weights('model.hdf5')