5454ad0f2995da9fb0d5af9fbf48fed5bd19909c
9 def process(num
, *args
):
10 tg
= 'textgrid/{:02d}.TextGrid'.format(int(num
))
12 tgob
= pympi
.TextGrid(tg
)
13 intervalit
= tgob
.get_tier('lyrics').get_intervals(sort
=True)
15 with
open('mfcc/{:02d}.mfcc'.format(int(num
))) as mfcc
:
16 (s
, e
, v
) = next(intervalit
)
22 (s
, e
, v
) = next(intervalit
)
26 label
= 1 if v
== '' else 0
27 data
.append([label
] + l
.split('\t'))
33 if __name__
== '__main__':
35 with
open('data.txt', 'r') as f
:
37 s
= l
.strip().split('\t')
38 sys
.stderr
.write('Processing {}: {}\n'.format(s
[0], s
[1]))
41 sys
.stderr
.write('Shuffling {} samples\n'.format(len(data
)))
44 splitpoint
= int(len(data
)*testset
)
45 testset
= data
[:splitpoint
]
46 trainset
= data
[splitpoint
:]
49 sys
.stderr
.write('Write testset: {} items\n'.format(splitpoint
))
50 with
open('test.txt', 'w') as f
:
52 f
.write('\t'.join(map(str, d
)))
54 sys
.stderr
.write('Write trainingset: {:d} items\n'.format(9*splitpoint
))
55 with
open('train.txt', 'w') as f
:
57 f
.write('\t'.join(map(str, d
)))