experiments.py

   1 import pympi
   2 import glob
   3 import re
   4 import os
   5
   6 # mfcc
   7 from python_speech_features import mfcc, logfbank
   8 import scipy.io.wavfile as wav
   9 import numpy as np
  10
  11 # keras
  12 from keras.models import Sequential
  13 from keras.layers import Dense, Dropout  # , Activation
  14 from keras import backend, utils
  15
  16 # Testset ratio
  17 testset = 0.10
  18 samplerate = 16000
  19 verbosity = 1
  20
  21 def get_datafiles():
  22     files = glob.glob(os.path.join(os.getcwd(), 'textgrid', '*.TextGrid'))
  23     # Loop over all datafiles and make wavefile string
  24     for i, tg in enumerate(files):
  25         num = re.match('^.*/(\\d+).TextGrid$', tg).group(1)
  26         yield (tg, 'wav/{:02d}.wav'.format(int(num)), int(num))
  27
  28 def label_from_annotation(ann):
  29     return 0 if ann.strip() == '' else 1
  30
  31 def features_from_wav(tg, wavp, typ='mfcc', winlen=0.025, winstep=0.01):
  32     # Load textgrid
  33     tgob = pympi.TextGrid(tg)
  34     intervalit = tgob.get_tier('lyrics').get_intervals(sort=True)
  35     # Load wav
  36     (rate, sig) = wav.read(wavp, mmap=True)
  37
  38     if typ == 'mfcc':
  39         data = mfcc(sig, rate, winlen=winlen, winstep=winstep, numcep=13,
  40                     appendEnergy=True)
  41     elif typ == 'fbank':
  42         (data, energy) = logfbank(sig, rate, winlen=winlen, winstep=winstep, nfilt=26)
  43     else:
  44         raise ValueError("No such type")
  45
  46     (s, e, v) = next(intervalit)
  47     currentframe = 0.0
  48     label = label_from_annotation(v)
  49     labels = np.empty(data.shape[0], dtype=int)
  50     i = 0
  51     for d in data:
  52         # If we exceeded the interval, make new one
  53         if currentframe > e:
  54             (s, e, v) = next(intervalit, (s, e, v))
  55             label = label_from_annotation(v)
  56
  57         # Yield datapoint
  58         labels[i] = label
  59
  60         # Increase frame
  61         currentframe += winstep
  62         i += 1
  63     return (data, labels)
  64
  65 def singerfun(num, l):
  66     if l == 1:
  67         if 0 <= num <= 11:
  68             return 1
  69         elif 12 <= num <= 21:
  70             return 2
  71         elif 22 <= num <= 28:
  72             return 3
  73         else:
  74             raise Exception("halp")
  75     else:
  76         return 0
  77
  78 def run(typ, winlen, winstep, modelfun, modelname, multiclass=False):
  79     datas = []
  80     labels = []
  81
  82     for tg, wavp, num in get_datafiles():
  83         (d, l) = features_from_wav(
  84             tg, wavp, winlen=winlen, winstep=winstep, typ=typ)
  85         datas.append(d)
  86         if multiclass:
  87             labels.append(list(map(lambda x: singerfun(int(num), x), l)))
  88         else:
  89             labels.append(l)
  90
  91
  92     datas = np.concatenate(datas)
  93     labels = np.concatenate(labels)
  94     print(np.unique(labels, return_counts=True))
  95     if multiclass:
  96         labels = utils.to_categorical(labels, num_classes=4)
  97
  98
  99     rng_state = np.random.get_state()
 100     np.random.shuffle(datas)
 101     np.random.set_state(rng_state)
 102     np.random.shuffle(labels)
 103
 104     splitindex = int(labels.shape[0]*testset)
 105     testdata, traindata = datas[:splitindex], datas[splitindex:]
 106     testlabels, trainlabels = labels[:splitindex], labels[splitindex:]
 107     del datas, labels
 108
 109     model = modelfun(traindata)
 110
 111     #Train
 112     model.fit(traindata, trainlabels, epochs=10, batch_size=32, shuffle=False,
 113               verbose=verbosity)
 114
 115     #Test
 116     loss, acc = model.evaluate(testdata, testlabels, batch_size=32,
 117                                verbose=verbosity)
 118     print('{}\t{}\t{}\t{}\t{}\n'.format(
 119         winlen, winstep, modelname, loss, acc))
 120     return model
 121
 122 def bottlemodel(d):
 123     model = Sequential()
 124     model.add(Dense(13, activation='relu', input_shape=(d.shape[1],)))
 125     model.add(Dense(1, activation='sigmoid'))
 126 #    model.add(
 127 #        Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 128 #    model.add(Dense(13, activation='relu'))
 129 #    model.add(Dense(1, activation='sigmoid'))
 130     model.compile(optimizer='rmsprop',
 131                   loss='binary_crossentropy',
 132                   metrics=['accuracy'])
 133     return model
 134
 135 def multimodel(d):
 136     model = Sequential()
 137 #    model.add(Dense(d.shape[1]*2, input_shape=(d.shape[1],), activation='relu'))
 138     model.add(Dense(13, activation='relu', input_shape=(d.shape[1],)))
 139     model.add(Dense(4, activation='softmax'))
 140     model.compile(optimizer='rmsprop',
 141                   loss='categorical_crossentropy',
 142                   metrics=['accuracy'])
 143     return model
 144
 145
 146 if __name__ == '__main__':
 147     print('winlen\twinstep\tmodel\tloss\taccuracy\n')
 148     with backend.get_session():
 149         for winlen, winstep in ((0.025, 0.01), (0.1, 0.04), (0.2, 0.08)):
 150             for name, model, multi in reversed((('bottle', bottlemodel, False), ('multi', multimodel, True))):
 151                 m = run('mfcc', winlen, winstep, model, name, multi)
 152                 fproot = 'model_{}_{}_{}'.format(winlen, winstep, name)
 153                 with open('{}.json'.format(fproot), 'w') as f:
 154                     f.write(m.to_json())
 155                 m.save_weights('{}.hdf5'.format(fproot))