experiments.py

   1 import pympi
   2 import glob
   3 import re
   4 import os
   5
   6 # mfcc
   7 from python_speech_features import mfcc, logfbank
   8 import scipy.io.wavfile as wav
   9 import numpy as np
  10
  11 # keras
  12 from keras.models import Sequential
  13 from keras.layers import Dense, Dropout  # , Activation
  14 from keras import backend
  15
  16 # Testset ratio
  17 testset = 0.10
  18 samplerate = 16000
  19 verbosity = 1
  20
  21 def get_datafiles():
  22     files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
  23     # Loop over all datafiles and make wavefile string
  24     for i, tg in enumerate(files):
  25         num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
  26         yield (tg, 'wav/{:02d}.wav'.format(int(num)))
  27
  28 def label_from_annotation(ann):
  29     return 0 if ann.strip() == '' else 1
  30
  31 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
  32     # Load textgrid
  33     tgob = pympi.TextGrid(tg)
  34     intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
  35     # Load wav
  36     (rate, sig) = wav.read(wavp, mmap=True)
  37
  38     if typ == 'mfcc':
  39         data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
  40                     appendEnergy=True)
  41     elif typ == 'fbank':
  42         (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
  43     else:
  44         raise ValueError("No such type")
  45
  46     (s, e, v) = next(intervalit)
  47     currentframe = 0.0
  48     label = label_from_annotation(v)
  49     labels = np.empty(data.shape[0], dtype=int)
  50     i = 0
  51     for d in data:
  52         # If we exceeded the interval, make new one
  53         if currentframe > e:
  54             (s, e, v) = next(intervalit, (s, e, v))
  55             label = label_from_annotation(v)
  56
  57         # Yield datapoint
  58         labels[i] = label
  59
  60         # Increase frame
  61         currentframe += winstep
  62         i += 1
  63     return (data, labels)
  64
  65 def run(typ, winlen, winstep, modelfun, modelname):
  66     datas = []
  67     labels = []
  68
  69     for tg, wavp in get_datafiles():
  70         (d, l) = features_from_wav(
  71             tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
  72         datas.append(d)
  73         labels.append(l)
  74
  75     datas = np.concatenate(datas)
  76     labels = np.concatenate(labels)
  77
  78     rng_state = np.random.get_state()
  79     np.random.shuffle(datas)
  80     np.random.set_state(rng_state)
  81     np.random.shuffle(labels)
  82
  83     splitindex = int(labels.shape[0]*testset)
  84     testdata, traindata = datas[:splitindex], datas[splitindex:]
  85     testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
  86     del datas, labels
  87
  88     model = modelfun(traindata)
  89
  90     #Train
  91     model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
  92               verbose=verbosity)
  93
  94     #Test
  95     loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
  96                                verbose=verbosity)
  97     print('{}\t{}\t{}\t{}\t{}\n'.format(
  98         winlen, winstep, modelname, loss, acc))
  99     return model
 100
 101 def simplemodel(d):
 102     model = Sequential()
 103     model.add(
 104         Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 105     model.add(Dense(100, activation='relu'))
 106     model.add(Dense(1, activation='sigmoid'))
 107     model.compile(optimizer='rmsprop',
 108                   loss='binary_crossentropy',
 109                   metrics=['accuracy'])
 110     return model
 111
 112 def bottlemodel(d):
 113     model = Sequential()
 114     model.add(
 115         Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 116     model.add(Dense(13, activation='relu'))
 117     model.add(Dense(1, activation='sigmoid'))
 118     model.compile(optimizer='rmsprop',
 119                   loss='binary_crossentropy',
 120                   metrics=['accuracy'])
 121     return model
 122
 123 if __name__ == '__main__':
 124     print('winlen\twinstep\tmodel\tloss\taccuracy\n')
 125     with backend.get_session():
 126         for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
 127             for name, model in (('simple', simplemodel), ('bottle', bottlemodel)):
 128                 m = run('mfcc', winlen, winstep, model, name)
 129                 fproot = 'model_{}_{}_{}'.format(winlen, winstep, name)
 130                 with open('{}.json'.format(fproot), 'w') as f:
 131                     f.write(m.to_json())
 132                 m.save_weights('{}.hdf5'.format(fproot))