predict smoothing
[asr1617data.git] / experiments.py
1 import pympi
2 import glob
3 import re
4 import os
5
6 # mfcc
7 from python_speech_features import mfcc, logfbank
8 import scipy.io.wavfile as wav
9 import numpy as np
10
11 # keras
12 from keras.models import Sequential
13 from keras.layers import Dense # , Dropout # , Activation
14 from keras import backend, utils
15
16 # Testset ratio
17 testset = 0.10
18 samplerate = 16000
19 verbosity = 0
20
21
22 def get_datafiles():
23 files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
24 # Loop over all datafiles and make wavefile string
25 for i, tg in enumerate(files):
26 num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
27 yield (tg, 'wav/{:02d}.wav'.format(int(num)), int(num))
28
29
30 def label_from_annotation(ann):
31 return 0 if ann.strip() == '' else 1
32
33
34 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
35 # Load textgrid
36 tgob = pympi.TextGrid(tg)
37 intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
38 # Load wav
39 (rate, sig) = wav.read(wavp, mmap=True)
40
41 if typ == 'mfcc':
42 data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
43 appendEnergy=True)
44 elif typ == 'fbank':
45 (data, energy) = logfbank(
46 sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
47 else:
48 raise ValueError("No such type")
49
50 (s, e, v) = next(intervalit)
51 currentframe = 0.0
52 label = label_from_annotation(v)
53 labels = np.empty(data.shape[0], dtype=int)
54 i = 0
55 for d in data:
56 # If we exceeded the interval, make new one
57 if currentframe > e:
58 (s, e, v) = next(intervalit, (s, e, v))
59 label = label_from_annotation(v)
60
61 # Yield datapoint
62 labels[i] = label
63
64 # Increase frame
65 currentframe += winstep
66 i += 1
67 return (data, labels)
68
69
70 def singerfun(num, l):
71 if l == 1:
72 if 0 <= num <= 11:
73 return 1
74 elif 12 <= num <= 21:
75 return 2
76 elif 22 <= num <= 28:
77 return 3
78 else:
79 raise Exception("halp")
80 else:
81 return 0
82
83
84 def run(typ, winlen, winstep, modelfun, modelname, multiclass=False):
85 datas = []
86 labels = []
87
88 for tg, wavp, num in get_datafiles():
89 (d, l) = features_from_wav(
90 tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
91 datas.append(d)
92 if multiclass:
93 labels.append(list(map(lambda x: singerfun(int(num), x), l)))
94 else:
95 labels.append(l)
96
97 datas = np.concatenate(datas)
98 labels = np.concatenate(labels)
99 print(np.unique(labels, return_counts=True))
100 if multiclass:
101 labels = utils.to_categorical(labels, num_classes=4)
102
103 rng_state = np.random.get_state()
104 np.random.shuffle(datas)
105 np.random.set_state(rng_state)
106 np.random.shuffle(labels)
107
108 splitindex = int(labels.shape[0]*testset)
109 testdata, traindata = datas[:splitindex], datas[splitindex:]
110 testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
111 del datas, labels
112
113 model = modelfun(traindata)
114
115 # Train
116 model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
117 verbose=verbosity)
118
119 # Test
120 loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
121 verbose=verbosity)
122 print('{}\t{}\t{}\t{}\t{}\n'.format(
123 winlen, winstep, modelname, loss, acc))
124 return model
125
126
127 def bottlemodel(layers):
128 def fun(d):
129 model = Sequential()
130 model.add(Dense(layers, activation='relu', input_shape=(d.shape[1],)))
131 model.add(Dense(1, activation='sigmoid'))
132 # model.add(
133 # Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
134 # model.add(Dense(13, activation='relu'))
135 # model.add(Dense(1, activation='sigmoid'))
136 model.compile(optimizer='rmsprop',
137 loss='binary_crossentropy',
138 metrics=['accuracy'])
139 return model
140 return fun
141
142
143 def multimodel(layers):
144 def fun(d):
145 model = Sequential()
146 # model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
147 model.add(Dense(layers, activation='relu', input_shape=(d.shape[1],)))
148 model.add(Dense(4, activation='softmax'))
149 model.compile(optimizer='rmsprop',
150 loss='categorical_crossentropy',
151 metrics=['accuracy'])
152 return model
153 return fun
154
155
156 models = [
157 ('bottle3', bottlemodel(3), False),
158 ('bottle5', bottlemodel(5), False),
159 ('bottle8', bottlemodel(8), False),
160 ('bottle13', bottlemodel(13), False),
161 ('multi3', multimodel(3), True),
162 ('multi5', multimodel(5), True),
163 ('multi8', multimodel(8), True),
164 ('multi13', multimodel(13), True)]
165
166 if __name__ == '__main__':
167 print('winlen\twinstep\tmodel\tloss\taccuracy\n')
168 with backend.get_session():
169 for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
170 for name, model, multi in models:
171 m = run('mfcc', winlen, winstep, model, name, multi)
172 fproot = 'model_{}_{}_{}'.format(winlen, winstep, name)
173 print(fproot);
174 with open('{}.json'.format(fproot), 'w') as f:
175 f.write(m.to_json())
176 m.save_weights('{}.hdf5'.format(fproot))