13 num
= re
.match('^.*/(\\d+).TextGrid$', num
).group(1)
14 tg
= 'textgrid/{:02d}.TextGrid'.format(int(num
))
16 tgob
= pympi
.TextGrid(tg
)
17 intervalit
= tgob
.get_tier('lyrics').get_intervals(sort
=True)
19 with
open('mfcc/{:02d}.mfcc'.format(int(num
))) as mfcc
:
20 (s
, e
, v
) = next(intervalit
)
26 (s
, e
, v
) = next(intervalit
)
30 label
= 1 if v
== '' else 0
31 data
.append([label
] + l
.split('\t'))
37 if __name__
== '__main__':
39 for fl
in glob
.glob(os
.path
.join(os
.getcwd(), 'textgrid', '*.TextGrid')):
40 sys
.stderr
.write('Segment {}\n'.format(fl
))
43 sys
.stderr
.write('Shuffling {} samples\n'.format(len(data
)))
46 splitpoint
= int(len(data
)*testset
)
47 testset
= data
[:splitpoint
]
48 trainset
= data
[splitpoint
:]
51 sys
.stderr
.write('Write testset: {} items\n'.format(splitpoint
))
52 with
open('test.txt', 'w') as f
:
54 f
.write('\t'.join(map(str, d
)))
56 sys
.stderr
.write('Write trainingset: {:d} items\n'.format(9*splitpoint
))
57 with
open('train.txt', 'w') as f
:
59 f
.write('\t'.join(map(str, d
)))