experiments.py

   1 import pympi
   2 import glob
   3 import re
   4 import os
   5
   6 # mfcc
   7 from python_speech_features import mfcc, logfbank
   8 import scipy.io.wavfile as wav
   9 import numpy as np
  10
  11 # keras
  12 from keras.models import Sequential
  13 from keras.layers import Dense  # , Dropout  # , Activation
  14 from keras import backend, utils
  15
  16 # Testset ratio
  17 testset = 0.10
  18 samplerate = 16000
  19 verbosity = 0
  20
  21
  22 def get_datafiles():
  23     files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
  24     # Loop over all datafiles and make wavefile string
  25     for i, tg in enumerate(files):
  26         num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
  27         yield (tg, 'wav/{:02d}.wav'.format(int(num)), int(num))
  28
  29
  30 def label_from_annotation(ann):
  31     return 0 if ann.strip() == '' else 1
  32
  33
  34 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
  35     # Load textgrid
  36     tgob = pympi.TextGrid(tg)
  37     intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
  38     # Load wav
  39     (rate, sig) = wav.read(wavp, mmap=True)
  40
  41     if typ == 'mfcc':
  42         data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
  43                     appendEnergy=True)
  44     elif typ == 'fbank':
  45         (data, energy) = logfbank(
  46             sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
  47     else:
  48         raise ValueError("No such type")
  49
  50     (s, e, v) = next(intervalit)
  51     currentframe = 0.0
  52     label = label_from_annotation(v)
  53     labels = np.empty(data.shape[0], dtype=int)
  54     i = 0
  55     for d in data:
  56         # If we exceeded the interval, make new one
  57         if currentframe > e:
  58             (s, e, v) = next(intervalit, (s, e, v))
  59             label = label_from_annotation(v)
  60
  61         # Yield datapoint
  62         labels[i] = label
  63
  64         # Increase frame
  65         currentframe += winstep
  66         i += 1
  67     return (data, labels)
  68
  69
  70 def singerfun(num, l):
  71     if l == 1:
  72         if 0 <= num <= 11:
  73             return 1
  74         elif 12 <= num <= 21:
  75             return 2
  76         elif 22 <= num <= 28:
  77             return 3
  78         else:
  79             raise Exception("halp")
  80     else:
  81         return 0
  82
  83
  84 def run(typ, winlen, winstep, modelfun, modelname, multiclass=False):
  85     datas = []
  86     labels = []
  87
  88     for tg, wavp, num in get_datafiles():
  89         (d, l) = features_from_wav(
  90             tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
  91         datas.append(d)
  92         if multiclass:
  93             labels.append(list(map(lambda x: singerfun(int(num), x), l)))
  94         else:
  95             labels.append(l)
  96
  97     datas = np.concatenate(datas)
  98     labels = np.concatenate(labels)
  99     print(np.unique(labels, return_counts=True))
 100     if multiclass:
 101         labels = utils.to_categorical(labels, num_classes=4)
 102
 103     rng_state = np.random.get_state()
 104     np.random.shuffle(datas)
 105     np.random.set_state(rng_state)
 106     np.random.shuffle(labels)
 107
 108     splitindex = int(labels.shape[0]*testset)
 109     testdata, traindata = datas[:splitindex], datas[splitindex:]
 110     testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
 111     del datas, labels
 112
 113     model = modelfun(traindata)
 114
 115     # Train
 116     model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
 117               verbose=verbosity)
 118
 119     # Test
 120     loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
 121                                verbose=verbosity)
 122     print('{}\t{}\t{}\t{}\t{}\n'.format(
 123         winlen, winstep, modelname, loss, acc))
 124     return model
 125
 126
 127 def bottlemodel(layers):
 128     def fun(d):
 129         model = Sequential()
 130         model.add(Dense(layers, activation='relu', input_shape=(d.shape[1],)))
 131         model.add(Dense(1, activation='sigmoid'))
 132 #    model.add(
 133 #        Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 134 #    model.add(Dense(13, activation='relu'))
 135 #    model.add(Dense(1, activation='sigmoid'))
 136         model.compile(optimizer='rmsprop',
 137                       loss='binary_crossentropy',
 138                       metrics=['accuracy'])
 139         return model
 140     return fun
 141
 142
 143 def multimodel(layers):
 144     def fun(d):
 145         model = Sequential()
 146 #  model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 147         model.add(Dense(layers, activation='relu', input_shape=(d.shape[1],)))
 148         model.add(Dense(4, activation='softmax'))
 149         model.compile(optimizer='rmsprop',
 150                       loss='categorical_crossentropy',
 151                       metrics=['accuracy'])
 152         return model
 153     return fun
 154
 155
 156 models = [
 157     ('bottle3', bottlemodel(3), False),
 158     ('bottle5', bottlemodel(5), False),
 159     ('bottle8', bottlemodel(8), False),
 160     ('bottle13', bottlemodel(13), False),
 161     ('multi3', multimodel(3), True),
 162     ('multi5', multimodel(5), True),
 163     ('multi8', multimodel(8), True),
 164     ('multi13', multimodel(13), True)]
 165
 166 if __name__ == '__main__':
 167     print('winlen\twinstep\tmodel\tloss\taccuracy\n')
 168     with backend.get_session():
 169         for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
 170             for name, model, multi in models:
 171                 m = run('mfcc', winlen, winstep, model, name, multi)
 172                 fproot = 'model_{}_{}_{}'.format(winlen, winstep, name)
 173                 print(fproot);
 174                 with open('{}.json'.format(fproot), 'w') as f:
 175                     f.write(m.to_json())
 176                 m.save_weights('{}.hdf5'.format(fproot))