annotate a new album
[asr1617data.git] / experiments.py
1 import sys
2 import pympi
3 import random
4 import glob
5 import re
6 import os
7
8 # mfcc
9 from python_speech_features import mfcc, fbank, logfbank
10 import scipy.io.wavfile as wav
11 import numpy as np
12
13 #keras
14 from keras.models import Sequential
15 from keras.layers import Dense, Dropout # , Activation
16
17 # Testset ratio
18 testset = 0.10
19 samplerate = 16000
20 verbosity = 1
21
22 def get_datafiles():
23 files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
24 # Loop over all datafiles and make wavefile string
25 for i, tg in enumerate(files):
26 num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
27 yield (tg, 'wav/{:02d}.wav'.format(int(num)))
28
29 def label_from_annotation(ann):
30 return 0 if ann.strip() == '' else 1
31
32 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
33 # Load textgrid
34 tgob = pympi.TextGrid(tg)
35 intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
36 # Load wav
37 (rate, sig) = wav.read(wavp, mmap=True)
38
39 if typ == 'mfcc':
40 data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
41 appendEnergy=True)
42 elif typ == 'fbank':
43 (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
44 else:
45 raise ValueError("No such type")
46
47 (s, e, v) = next(intervalit)
48 currentframe = 0.0
49 label = label_from_annotation(v)
50 labels = np.empty(data.shape[0], dtype=int)
51 i = 0
52 for d in data:
53 # If we exceeded the interval, make new one
54 if currentframe > e:
55 (s, e, v) = next(intervalit, (s, e, v))
56 label = label_from_annotation(v)
57
58 # Yield datapoint
59 labels[i] = label
60
61 # Increase frame
62 currentframe += winstep
63 i += 1
64 return (data, labels)
65
66 def run(typ, winlen, winstep, modelfun, modelname):
67 datas = []
68 labels = []
69
70 for tg, wavp in get_datafiles():
71 (d, l) = features_from_wav(
72 tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
73 datas.append(d)
74 labels.append(l)
75
76 datas = np.concatenate(datas)
77 labels = np.concatenate(labels)
78
79 rng_state = np.random.get_state()
80 np.random.shuffle(datas)
81 np.random.set_state(rng_state)
82 np.random.shuffle(labels)
83
84 splitindex = int(labels.shape[0]*testset)
85 testdata, traindata = datas[:splitindex], datas[splitindex:]
86 testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
87 del datas, labels
88
89 model = modelfun(traindata)
90
91 #Train
92 model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
93 verbose=verbosity)
94
95 #Test
96 loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
97 verbose=verbosity)
98 print('{}\t{}\t{}\t{}\t{}\n'.format(
99 winlen, winstep, modelname, loss, acc))
100
101 def simplemodel(d):
102 model = Sequential()
103 model.add(
104 Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
105 model.add(Dense(100, activation='relu'))
106 model.add(Dense(1, activation='sigmoid'))
107 model.compile(optimizer='rmsprop',
108 loss='binary_crossentropy',
109 metrics=['accuracy'])
110 return model
111
112 def bottlemodel(d):
113 model = Sequential()
114 model.add(
115 Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
116 model.add(Dense(13, activation='relu'))
117 model.add(Dense(1, activation='sigmoid'))
118 model.compile(optimizer='rmsprop',
119 loss='binary_crossentropy',
120 metrics=['accuracy'])
121 return model
122
123 if __name__ == '__main__':
124 print('winlen\twinstep\tmodel\tloss\taccuracy\n')
125 for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
126 for name, model in (('simple', simplemodel), ('bottle', bottlemodel)):
127 run('mfcc', winlen, winstep, model, name)