update scripts and merge
[asr1617data.git] / experiments.py
1 import sys
2 import pympi
3 import random
4 import glob
5 import re
6 import os
7
8 # mfcc
9 from python_speech_features import mfcc, fbank, logfbank
10 import scipy.io.wavfile as wav
11 import numpy as np
12
13 #keras
14 from keras.models import Sequential
15 from keras.layers import Dense, Dropout # , Activation
16
17 # Testset ratio
18 testset = 0.10
19 samplerate = 16000
20
21 def get_datafiles():
22 files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
23 # Loop over all datafiles and make wavefile string
24 for i, tg in enumerate(files):
25 num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
26 yield (tg, 'wav/{:02d}.wav'.format(int(num)))
27
28 def label_from_annotation(ann):
29 return 0 if ann.strip() == '' else 1
30
31 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
32 # Load textgrid
33 tgob = pympi.TextGrid(tg)
34 intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
35 # Load wav
36 (rate, sig) = wav.read(wavp, mmap=True)
37
38 if typ == 'mfcc':
39 data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
40 appendEnergy=True)
41 elif typ == 'fbank':
42 (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
43 else:
44 raise ValueError("No such type")
45
46 (s, e, v) = next(intervalit)
47 currentframe = 0.0
48 label = label_from_annotation(v)
49 labels = np.empty(data.shape[0], dtype=int)
50 i = 0
51 for d in data:
52 # If we exceeded the interval, make new one
53 if currentframe > e:
54 (s, e, v) = next(intervalit, (s, e, v))
55 label = label_from_annotation(v)
56
57 # Yield datapoint
58 labels[i] = label
59
60 # Increase frame
61 currentframe += winstep
62 i += 1
63 return (data, labels)
64
65 def run(typ, winlen, winstep, modelfun):
66 datas = []
67 labels = []
68
69 for tg, wavp in get_datafiles():
70 (d, l) = features_from_wav(tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
71 datas.append(d)
72 labels.append(l)
73
74 datas = np.concatenate(datas)
75 labels = np.concatenate(labels)
76
77 rng_state = np.random.get_state()
78 np.random.shuffle(datas)
79 np.random.set_state(rng_state)
80 np.random.shuffle(labels)
81
82 splitindex = int(labels.shape[0]*testset)
83 testdata, traindata = datas[:splitindex], datas[splitindex:]
84 testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
85 del datas, labels
86
87 model = modelfun(traindata)
88
89 #Train
90 model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
91 verbose=0)
92
93 #Test
94 return model.evaluate(testdata, testlabels, batch_size=32, verbose=0)
95
96 def simplemodel(d):
97 model = Sequential()
98 model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
99 model.add(Dense(100, activation='relu'))
100 model.add(Dense(1, activation='sigmoid'))
101 model.compile(optimizer='rmsprop',
102 loss='binary_crossentropy',
103 metrics=['accuracy'])
104 return model
105
106 def bottlemodel(d):
107 model = Sequential()
108 model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
109 model.add(Dense(13, activation='relu'))
110 model.add(Dense(1, activation='sigmoid'))
111 model.compile(optimizer='rmsprop',
112 loss='binary_crossentropy',
113 metrics=['accuracy'])
114 return model
115
116 if __name__ == '__main__':
117 #print(run('mfcc', 0.025, 0.01, simplemodel))
118 #print(run('mfcc', 0.1, 0.04, simplemodel))
119 #print(run('mfcc', 0.2, 0.08, simplemodel))
120
121 print(run('mfcc', 0.025, 0.01, bottlemodel))
122 print(run('mfcc', 0.1, 0.04, bottlemodel))
123 print(run('mfcc', 0.2, 0.08, bottlemodel))