local tensorflow installation, add multiclass
[asr1617data.git] / experiments.py
1 import pympi
2 import glob
3 import re
4 import os
5
6 # mfcc
7 from python_speech_features import mfcc, logfbank
8 import scipy.io.wavfile as wav
9 import numpy as np
10
11 # keras
12 from keras.models import Sequential
13 from keras.layers import Dense, Dropout # , Activation
14 from keras import backend, utils
15
16 # Testset ratio
17 testset = 0.10
18 samplerate = 16000
19 verbosity = 1
20
21 def get_datafiles():
22 files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
23 # Loop over all datafiles and make wavefile string
24 for i, tg in enumerate(files):
25 num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
26 yield (tg, 'wav/{:02d}.wav'.format(int(num)), int(num))
27
28 def label_from_annotation(ann):
29 return 0 if ann.strip() == '' else 1
30
31 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
32 # Load textgrid
33 tgob = pympi.TextGrid(tg)
34 intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
35 # Load wav
36 (rate, sig) = wav.read(wavp, mmap=True)
37
38 if typ == 'mfcc':
39 data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
40 appendEnergy=True)
41 elif typ == 'fbank':
42 (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
43 else:
44 raise ValueError("No such type")
45
46 (s, e, v) = next(intervalit)
47 currentframe = 0.0
48 label = label_from_annotation(v)
49 labels = np.empty(data.shape[0], dtype=int)
50 i = 0
51 for d in data:
52 # If we exceeded the interval, make new one
53 if currentframe > e:
54 (s, e, v) = next(intervalit, (s, e, v))
55 label = label_from_annotation(v)
56
57 # Yield datapoint
58 labels[i] = label
59
60 # Increase frame
61 currentframe += winstep
62 i += 1
63 return (data, labels)
64
65 def singerfun(num, l):
66 if l == 1:
67 if 0 <= num <= 11:
68 return 1
69 elif 12 <= num <= 21:
70 return 2
71 elif 22 <= num <= 28:
72 return 3
73 else:
74 raise Exception("halp")
75 else:
76 return 0
77
78 def run(typ, winlen, winstep, modelfun, modelname, multiclass=False):
79 datas = []
80 labels = []
81
82 for tg, wavp, num in get_datafiles():
83 (d, l) = features_from_wav(
84 tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
85 datas.append(d)
86 if multiclass:
87 labels.append(list(map(lambda x: singerfun(int(num), x), l)))
88 else:
89 labels.append(l)
90
91
92 datas = np.concatenate(datas)
93 labels = np.concatenate(labels)
94 print(np.unique(labels, return_counts=True))
95 if multiclass:
96 labels = utils.to_categorical(labels, num_classes=4)
97
98
99 rng_state = np.random.get_state()
100 np.random.shuffle(datas)
101 np.random.set_state(rng_state)
102 np.random.shuffle(labels)
103
104 splitindex = int(labels.shape[0]*testset)
105 testdata, traindata = datas[:splitindex], datas[splitindex:]
106 testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
107 del datas, labels
108
109 model = modelfun(traindata)
110
111 #Train
112 model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
113 verbose=verbosity)
114
115 #Test
116 loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
117 verbose=verbosity)
118 print('{}\t{}\t{}\t{}\t{}\n'.format(
119 winlen, winstep, modelname, loss, acc))
120 return model
121
122 def bottlemodel(d):
123 model = Sequential()
124 model.add(Dense(13, activation='relu', input_shape=(d.shape[1],)))
125 model.add(Dense(1, activation='sigmoid'))
126 # model.add(
127 # Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
128 # model.add(Dense(13, activation='relu'))
129 # model.add(Dense(1, activation='sigmoid'))
130 model.compile(optimizer='rmsprop',
131 loss='binary_crossentropy',
132 metrics=['accuracy'])
133 return model
134
135 def multimodel(d):
136 model = Sequential()
137 # model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
138 model.add(Dense(13, activation='relu', input_shape=(d.shape[1],)))
139 model.add(Dense(4, activation='softmax'))
140 model.compile(optimizer='rmsprop',
141 loss='categorical_crossentropy',
142 metrics=['accuracy'])
143 return model
144
145
146 if __name__ == '__main__':
147 print('winlen\twinstep\tmodel\tloss\taccuracy\n')
148 with backend.get_session():
149 for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
150 for name, model, multi in reversed((('bottle', bottlemodel, False), ('multi', multimodel, True))):
151 m = run('mfcc', winlen, winstep, model, name, multi)
152 fproot = 'model_{}_{}_{}'.format(winlen, winstep, name)
153 with open('{}.json'.format(fproot), 'w') as f:
154 f.write(m.to_json())
155 m.save_weights('{}.hdf5'.format(fproot))