experiments.py

   1 import sys
   2 import pympi
   3 import random
   4 import glob
   5 import re
   6 import os
   7
   8 # mfcc
   9 from python_speech_features import mfcc, fbank, logfbank
  10 import scipy.io.wavfile as wav
  11 import numpy as np
  12
  13 #keras
  14 from keras.models import Sequential
  15 from keras.layers import Dense, Dropout  # , Activation
  16
  17 # Testset ratio
  18 testset = 0.10
  19 samplerate = 16000
  20
  21 def get_datafiles():
  22     files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
  23     # Loop over all datafiles and make wavefile string
  24     for i, tg in enumerate(files):
  25         num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
  26         yield (tg, 'wav/{:02d}.wav'.format(int(num)))
  27
  28 def label_from_annotation(ann):
  29     return 0 if ann.strip() == '' else 1
  30
  31 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
  32     # Load textgrid
  33     tgob = pympi.TextGrid(tg)
  34     intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
  35     # Load wav
  36     (rate, sig) = wav.read(wavp, mmap=True)
  37
  38     if typ == 'mfcc':
  39         data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
  40                     appendEnergy=True)
  41     elif typ == 'fbank':
  42         (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
  43     else:
  44         raise ValueError("No such type")
  45
  46     (s, e, v) = next(intervalit)
  47     currentframe = 0.0
  48     label = label_from_annotation(v)
  49     labels = np.empty(data.shape[0], dtype=int)
  50     i = 0
  51     for d in data:
  52         # If we exceeded the interval, make new one
  53         if currentframe > e:
  54             (s, e, v) = next(intervalit, (s, e, v))
  55             label = label_from_annotation(v)
  56
  57         # Yield datapoint
  58         labels[i] = label
  59
  60         # Increase frame
  61         currentframe += winstep
  62         i += 1
  63     return (data, labels)
  64
  65 def run(typ, winlen, winstep, modelfun, modelname):
  66     datas = []
  67     labels = []
  68
  69     for tg, wavp in get_datafiles():
  70         (d, l) = features_from_wav(tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
  71         datas.append(d)
  72         labels.append(l)
  73
  74     datas = np.concatenate(datas)
  75     labels = np.concatenate(labels)
  76
  77     rng_state = np.random.get_state()
  78     np.random.shuffle(datas)
  79     np.random.set_state(rng_state)
  80     np.random.shuffle(labels)
  81
  82     splitindex = int(labels.shape[0]*testset)
  83     testdata, traindata = datas[:splitindex], datas[splitindex:]
  84     testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
  85     del datas, labels
  86
  87     model = modelfun(traindata)
  88
  89     #Train
  90     model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
  91             verbose=0)
  92
  93     #Test
  94     loss, acc = model.evaluate(testdata, testlabels, batch_size=32, verbose=0)
  95     print('{}\t{}\t{}\t{}\t{}\n'.format(
  96         winlen, winstep, modelname, loss, acc))
  97
  98 def simplemodel(d):
  99     model = Sequential()
 100     model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 101     model.add(Dense(100, activation='relu'))
 102     model.add(Dense(1, activation='sigmoid'))
 103     model.compile(optimizer='rmsprop',
 104                   loss='binary_crossentropy',
 105                   metrics=['accuracy'])
 106     return model
 107
 108 def bottlemodel(d):
 109     model = Sequential()
 110     model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 111     model.add(Dense(13, activation='relu'))
 112     model.add(Dense(1, activation='sigmoid'))
 113     model.compile(optimizer='rmsprop',
 114                   loss='binary_crossentropy',
 115                   metrics=['accuracy'])
 116     return model
 117
 118 if __name__ == '__main__':
 119     print('winlen\twinstep\tmodel\tloss\taccuracy\n')
 120     for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
 121         for name, model in (('simple', simplemodel), ('bottle', bottlemodel)):
 122             run('mfcc', winlen, winstep, model, name)