# keras
from keras.models import Sequential
-from keras.layers import Dense, Dropout # , Activation
+from keras.layers import Dense # , Dropout # , Activation
from keras import backend, utils
# Testset ratio
testset = 0.10
samplerate = 16000
-verbosity = 1
+verbosity = 0
+
def get_datafiles():
files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
yield (tg, 'wav/{:02d}.wav'.format(int(num)), int(num))
+
def label_from_annotation(ann):
return 0 if ann.strip() == '' else 1
+
def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
# Load textgrid
tgob = pympi.TextGrid(tg)
data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
appendEnergy=True)
elif typ == 'fbank':
- (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
+ (data, energy) = logfbank(
+ sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
else:
raise ValueError("No such type")
i += 1
return (data, labels)
+
def singerfun(num, l):
if l == 1:
if 0 <= num <= 11:
else:
return 0
+
def run(typ, winlen, winstep, modelfun, modelname, multiclass=False):
datas = []
labels = []
else:
labels.append(l)
-
datas = np.concatenate(datas)
labels = np.concatenate(labels)
print(np.unique(labels, return_counts=True))
if multiclass:
labels = utils.to_categorical(labels, num_classes=4)
-
rng_state = np.random.get_state()
np.random.shuffle(datas)
np.random.set_state(rng_state)
model = modelfun(traindata)
- #Train
+ # Train
model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
verbose=verbosity)
- #Test
+ # Test
loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
verbose=verbosity)
print('{}\t{}\t{}\t{}\t{}\n'.format(
winlen, winstep, modelname, loss, acc))
return model
-def bottlemodel(d):
- model = Sequential()
- model.add(Dense(13, activation='relu', input_shape=(d.shape[1],)))
- model.add(Dense(1, activation='sigmoid'))
+
+def bottlemodel(layers):
+ def fun(d):
+ model = Sequential()
+ model.add(Dense(layers, activation='relu', input_shape=(d.shape[1],)))
+ model.add(Dense(1, activation='sigmoid'))
# model.add(
# Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
# model.add(Dense(13, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))
- model.compile(optimizer='rmsprop',
- loss='binary_crossentropy',
- metrics=['accuracy'])
- return model
-
-def multimodel(d):
- model = Sequential()
-# model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
- model.add(Dense(13, activation='relu', input_shape=(d.shape[1],)))
- model.add(Dense(4, activation='softmax'))
- model.compile(optimizer='rmsprop',
- loss='categorical_crossentropy',
- metrics=['accuracy'])
- return model
-
+ model.compile(optimizer='rmsprop',
+ loss='binary_crossentropy',
+ metrics=['accuracy'])
+ return model
+ return fun
+
+
+def multimodel(layers):
+ def fun(d):
+ model = Sequential()
+# model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
+ model.add(Dense(layers, activation='relu', input_shape=(d.shape[1],)))
+ model.add(Dense(4, activation='softmax'))
+ model.compile(optimizer='rmsprop',
+ loss='categorical_crossentropy',
+ metrics=['accuracy'])
+ return model
+ return fun
+
+
+models = [
+ ('bottle3', bottlemodel(3), False),
+ ('bottle5', bottlemodel(5), False),
+ ('bottle8', bottlemodel(8), False),
+ ('bottle13', bottlemodel(13), False),
+ ('multi3', multimodel(3), True),
+ ('multi5', multimodel(5), True),
+ ('multi8', multimodel(8), True),
+ ('multi13', multimodel(13), True)]
if __name__ == '__main__':
print('winlen\twinstep\tmodel\tloss\taccuracy\n')
with backend.get_session():
for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
- for name, model, multi in reversed((('bottle', bottlemodel, False), ('multi', multimodel, True))):
+ for name, model, multi in models:
m = run('mfcc', winlen, winstep, model, name, multi)
fproot = 'model_{}_{}_{}'.format(winlen, winstep, name)
+ print(fproot);
with open('{}.json'.format(fproot), 'w') as f:
f.write(m.to_json())
m.save_weights('{}.hdf5'.format(fproot))
--- /dev/null
+winlen winstep model loss accuracy
+
+(array([0, 1]), array([476266, 325691]))
+0.025 0.01 bottle3 0.3389156284828169 0.8597792879855353
+
+model_0.025_0.01_bottle3
+(array([0, 1]), array([476266, 325691]))
+0.025 0.01 bottle5 0.30842042285169746 0.874156742939086
+
+model_0.025_0.01_bottle5
+(array([0, 1]), array([476266, 325691]))
+0.025 0.01 bottle8 0.2965808613601041 0.8830849803611148
+
+model_0.025_0.01_bottle8
+(array([0, 1]), array([476266, 325691]))
+0.025 0.01 bottle13 0.28287798926393115 0.889955732901797
+
+model_0.025_0.01_bottle13
+(array([0, 1, 2, 3]), array([476266, 125684, 151977, 48030]))
+0.025 0.01 multi3 0.48447136703078025 0.8319970072947191
+
+model_0.025_0.01_multi3
+(array([0, 1, 2, 3]), array([476266, 125684, 151977, 48030]))
+0.025 0.01 multi5 0.43238697158434836 0.8497038468739897
+
+model_0.025_0.01_multi5
+(array([0, 1, 2, 3]), array([476266, 125684, 151977, 48030]))
+0.025 0.01 multi8 0.4072758802538003 0.8624353139223143
+
+model_0.025_0.01_multi8
+(array([0, 1, 2, 3]), array([476266, 125684, 151977, 48030]))
+0.025 0.01 multi13 0.3707011521600716 0.868520481326766
+
+model_0.025_0.01_multi13
+(array([0, 1]), array([119037, 81431]))
+0.1 0.04 bottle3 0.32152209714074387 0.8698992317728829
+
+model_0.1_0.04_bottle3
+(array([0, 1]), array([119037, 81431]))
+0.1 0.04 bottle5 0.29785478306865665 0.879028235071257
+
+model_0.1_0.04_bottle5
+(array([0, 1]), array([119037, 81431]))
+0.1 0.04 bottle8 0.31144997012195363 0.8783797266109135
+
+model_0.1_0.04_bottle8
+(array([0, 1]), array([119037, 81431]))
+0.1 0.04 bottle13 0.28727721201615886 0.8858126309547645
+
+model_0.1_0.04_bottle13
+(array([0, 1, 2, 3]), array([119037, 31435, 37996, 12000]))
+0.1 0.04 multi3 0.47640502210221436 0.824603412157997
+
+model_0.1_0.04_multi3
+(array([0, 1, 2, 3]), array([119037, 31435, 37996, 12000]))
+0.1 0.04 multi5 0.44105214603370885 0.8400678439588946
+
+model_0.1_0.04_multi5
+(array([0, 1, 2, 3]), array([119037, 31435, 37996, 12000]))
+0.1 0.04 multi8 0.3903473072779056 0.8636635737803053
+
+model_0.1_0.04_multi8
+(array([0, 1, 2, 3]), array([119037, 31435, 37996, 12000]))
+0.1 0.04 multi13 0.3752127004474934 0.8698493464852026
+
+model_0.1_0.04_multi13
+(array([0, 1]), array([59493, 40723]))
+0.2 0.08 bottle3 0.35470913447088 0.847719788444267
+
+model_0.2_0.08_bottle3
+(array([0, 1]), array([59493, 40723]))
+0.2 0.08 bottle5 0.32276059763122966 0.870272427901407
+
+model_0.2_0.08_bottle5
+(array([0, 1]), array([59493, 40723]))
+0.2 0.08 bottle8 0.2949916362783269 0.8849416225925556
+
+model_0.2_0.08_bottle8
+(array([0, 1]), array([59493, 40723]))
+0.2 0.08 bottle13 0.3005677448865742 0.8803512623490669
+
+model_0.2_0.08_bottle13
+(array([0, 1, 2, 3]), array([59493, 15726, 18993, 6004]))
+0.2 0.08 multi3 0.4804011737202328 0.8227721784312548
+
+model_0.2_0.08_multi3
+(array([0, 1, 2, 3]), array([59493, 15726, 18993, 6004]))
+0.2 0.08 multi5 0.4430828566863061 0.8434287995210059
+
+model_0.2_0.08_multi5
+(array([0, 1, 2, 3]), array([59493, 15726, 18993, 6004]))
+0.2 0.08 multi8 0.39796411239295504 0.8585969464244296
+
+model_0.2_0.08_multi8
+(array([0, 1, 2, 3]), array([59493, 15726, 18993, 6004]))
+0.2 0.08 multi13 0.391048318939632 0.8592954794930646
+
+model_0.2_0.08_multi13
+