experiments.py

   1 import sys
   2 import pympi
   3 import random
   4 import glob
   5 import re
   6 import os
   7
   8 # mfcc
   9 from python_speech_features import mfcc, fbank, logfbank
  10 import scipy.io.wavfile as wav
  11 import numpy as np
  12
  13 #keras
  14 from keras.models import Sequential
  15 from keras.layers import Dense, Dropout  # , Activation
  16
  17 # Testset ratio
  18 testset = 0.10
  19 samplerate = 16000
  20 verbosity = 1
  21
  22 def get_datafiles():
  23     files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
  24     # Loop over all datafiles and make wavefile string
  25     for i, tg in enumerate(files):
  26         num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
  27         yield (tg, 'wav/{:02d}.wav'.format(int(num)))
  28
  29 def label_from_annotation(ann):
  30     return 0 if ann.strip() == '' else 1
  31
  32 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
  33     # Load textgrid
  34     tgob = pympi.TextGrid(tg)
  35     intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
  36     # Load wav
  37     (rate, sig) = wav.read(wavp, mmap=True)
  38
  39     if typ == 'mfcc':
  40         data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
  41                     appendEnergy=True)
  42     elif typ == 'fbank':
  43         (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
  44     else:
  45         raise ValueError("No such type")
  46
  47     (s, e, v) = next(intervalit)
  48     currentframe = 0.0
  49     label = label_from_annotation(v)
  50     labels = np.empty(data.shape[0], dtype=int)
  51     i = 0
  52     for d in data:
  53         # If we exceeded the interval, make new one
  54         if currentframe > e:
  55             (s, e, v) = next(intervalit, (s, e, v))
  56             label = label_from_annotation(v)
  57
  58         # Yield datapoint
  59         labels[i] = label
  60
  61         # Increase frame
  62         currentframe += winstep
  63         i += 1
  64     return (data, labels)
  65
  66 def run(typ, winlen, winstep, modelfun, modelname):
  67     datas = []
  68     labels = []
  69
  70     for tg, wavp in get_datafiles():
  71         (d, l) = features_from_wav(
  72             tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
  73         datas.append(d)
  74         labels.append(l)
  75
  76     datas = np.concatenate(datas)
  77     labels = np.concatenate(labels)
  78
  79     rng_state = np.random.get_state()
  80     np.random.shuffle(datas)
  81     np.random.set_state(rng_state)
  82     np.random.shuffle(labels)
  83
  84     splitindex = int(labels.shape[0]*testset)
  85     testdata, traindata = datas[:splitindex], datas[splitindex:]
  86     testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
  87     del datas, labels
  88
  89     model = modelfun(traindata)
  90
  91     #Train
  92     model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
  93               verbose=verbosity)
  94
  95     #Test
  96     loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
  97                                verbose=verbosity)
  98     print('{}\t{}\t{}\t{}\t{}\n'.format(
  99         winlen, winstep, modelname, loss, acc))
 100
 101 def simplemodel(d):
 102     model = Sequential()
 103     model.add(
 104         Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 105     model.add(Dense(100, activation='relu'))
 106     model.add(Dense(1, activation='sigmoid'))
 107     model.compile(optimizer='rmsprop',
 108                   loss='binary_crossentropy',
 109                   metrics=['accuracy'])
 110     return model
 111
 112 def bottlemodel(d):
 113     model = Sequential()
 114     model.add(
 115         Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 116     model.add(Dense(13, activation='relu'))
 117     model.add(Dense(1, activation='sigmoid'))
 118     model.compile(optimizer='rmsprop',
 119                   loss='binary_crossentropy',
 120                   metrics=['accuracy'])
 121     return model
 122
 123 if __name__ == '__main__':
 124     print('winlen\twinstep\tmodel\tloss\taccuracy\n')
 125     for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
 126         for name, model in (('simple', simplemodel), ('bottle', bottlemodel)):
 127             run('mfcc', winlen, winstep, model, name)