-import sys
import pympi
-import random
import glob
import re
import os
# mfcc
-from python_speech_features import mfcc, fbank, logfbank
+from python_speech_features import mfcc, logfbank
import scipy.io.wavfile as wav
import numpy as np
-#keras
+# keras
from keras.models import Sequential
-from keras.layers import Dense, Dropout # , Activation
+from keras.layers import Dense # , Dropout # , Activation
+from keras import backend, utils
# Testset ratio
testset = 0.10
samplerate = 16000
+verbosity = 0
+
def get_datafiles():
files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
# Loop over all datafiles and make wavefile string
for i, tg in enumerate(files):
num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
- yield (tg, 'wav/{:02d}.wav'.format(int(num)))
+ yield (tg, 'wav/{:02d}.wav'.format(int(num)), int(num))
+
def label_from_annotation(ann):
return 0 if ann.strip() == '' else 1
+
def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
# Load textgrid
tgob = pympi.TextGrid(tg)
data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
appendEnergy=True)
elif typ == 'fbank':
- (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
+ (data, energy) = logfbank(
+ sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
else:
raise ValueError("No such type")
i += 1
return (data, labels)
-def run(typ, winlen, winstep, modelfun):
+
+def singerfun(num, l):
+ if l == 1:
+ if 0 <= num <= 11:
+ return 1
+ elif 12 <= num <= 21:
+ return 2
+ elif 22 <= num <= 28:
+ return 3
+ else:
+ raise Exception("halp")
+ else:
+ return 0
+
+
+def run(typ, winlen, winstep, modelfun, modelname, multiclass=False):
datas = []
labels = []
- for tg, wavp in get_datafiles():
- (d, l) = features_from_wav(tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
+ for tg, wavp, num in get_datafiles():
+ (d, l) = features_from_wav(
+ tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
datas.append(d)
- labels.append(l)
+ if multiclass:
+ labels.append(list(map(lambda x: singerfun(int(num), x), l)))
+ else:
+ labels.append(l)
datas = np.concatenate(datas)
labels = np.concatenate(labels)
+ print(np.unique(labels, return_counts=True))
+ if multiclass:
+ labels = utils.to_categorical(labels, num_classes=4)
rng_state = np.random.get_state()
np.random.shuffle(datas)
model = modelfun(traindata)
- #Train
+ # Train
model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
- verbose=0)
-
- #Test
- return model.evaluate(testdata, testlabels, batch_size=32, verbose=0)
-
-def simplemodel(d):
- model = Sequential()
- model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
- model.add(Dense(100, activation='relu'))
- model.add(Dense(1, activation='sigmoid'))
- model.compile(optimizer='rmsprop',
- loss='binary_crossentropy',
- metrics=['accuracy'])
- return model
+ verbose=verbosity)
-def bottlemodel(d):
- model = Sequential()
- model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
- model.add(Dense(13, activation='relu'))
- model.add(Dense(1, activation='sigmoid'))
- model.compile(optimizer='rmsprop',
- loss='binary_crossentropy',
- metrics=['accuracy'])
+ # Test
+ loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
+ verbose=verbosity)
+ print('{}\t{}\t{}\t{}\t{}\n'.format(
+ winlen, winstep, modelname, loss, acc))
return model
-if __name__ == '__main__':
- #print(run('mfcc', 0.025, 0.01, simplemodel))
- #print(run('mfcc', 0.1, 0.04, simplemodel))
- #print(run('mfcc', 0.2, 0.08, simplemodel))
- print(run('mfcc', 0.025, 0.01, bottlemodel))
- print(run('mfcc', 0.1, 0.04, bottlemodel))
- print(run('mfcc', 0.2, 0.08, bottlemodel))
+def bottlemodel(layers):
+ def fun(d):
+ model = Sequential()
+ model.add(Dense(layers, activation='relu', input_shape=(d.shape[1],)))
+ model.add(Dense(1, activation='sigmoid'))
+# model.add(
+# Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+# model.add(Dense(13, activation='relu'))
+# model.add(Dense(1, activation='sigmoid'))
+ model.compile(optimizer='rmsprop',
+ loss='binary_crossentropy',
+ metrics=['accuracy'])
+ return model
+ return fun
+
+
+def multimodel(layers):
+ def fun(d):
+ model = Sequential()
+# model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+ model.add(Dense(layers, activation='relu', input_shape=(d.shape[1],)))
+ model.add(Dense(4, activation='softmax'))
+ model.compile(optimizer='rmsprop',
+ loss='categorical_crossentropy',
+ metrics=['accuracy'])
+ return model
+ return fun
+
+
+models = [
+ ('bottle3', bottlemodel(3), False),
+ ('bottle5', bottlemodel(5), False),
+ ('bottle8', bottlemodel(8), False),
+ ('bottle13', bottlemodel(13), False),
+ ('multi3', multimodel(3), True),
+ ('multi5', multimodel(5), True),
+ ('multi8', multimodel(8), True),
+ ('multi13', multimodel(13), True)]
+
+if __name__ == '__main__':
+ print('winlen\twinstep\tmodel\tloss\taccuracy\n')
+ with backend.get_session():
+ for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
+ for name, model, multi in models:
+ m = run('mfcc', winlen, winstep, model, name, multi)
+ fproot = 'model_{}_{}_{}'.format(winlen, winstep, name)
+ print(fproot);
+ with open('{}.json'.format(fproot), 'w') as f:
+ f.write(m.to_json())
+ m.save_weights('{}.hdf5'.format(fproot))