Skip to content

Commit e12e461

Browse files
committed
changes for VTL2.3 version, which now includes a lot of the functionality that was implemented here beforehand
1 parent 4a7ea3a commit e12e461

File tree

8 files changed

+1975
-23
lines changed

8 files changed

+1975
-23
lines changed

.gitignore

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
11
__pycache__/
2-
bin/VocalTractLabApi.so
32
bin/targetoptimizer
4-
bin/JD2.speaker
53
vtl_corpus1.0/

README.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,10 @@ This python tool is based on the work and on the Matlab code on Yingming Gao.
1313

1414
The overall logic is in ``create_corpus.py`` which executes the approriate functions from top to bottom. The functions are supplied by the other files.
1515

16+
17+
Copyright
18+
=========
19+
As the VocalTractLabAPI.so and the JD2.speaker is GPL v3 the rest of the code
20+
here is GPL as well. If the code is not dependent on VTL anymore you can use
21+
it under MIT license.
22+

bin/JD2.speaker

Lines changed: 1858 additions & 0 deletions
Large diffs are not rendered by default.

bin/VocalTractLabApi.so

12 MB
Binary file not shown.

create_corpus.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,21 @@
33
import shutil
44
from multiprocessing import Pool
55

6-
from . import tgwav2utt, pitch_contour, sampa2ges
6+
from . import tgwav2utt, pitch_contour, sampa2seg, seg2ges
77

88
def create_corpus(version, *, geco_path=None):
99
"""
1010
creates the vtl corpus.
1111
1212
"""
1313
if not geco_path:
14-
raise ValueError("You need to specify the path where all GECO corpus files live that you have downloaded seperately.")
14+
pass
15+
#raise ValueError("You need to specify the path where all GECO corpus files live that you have downloaded seperately.")
1516
VERSION = version
1617
GECO_PATH = geco_path
1718
WAV_ORIG_DIR = f"vtl_corpus{VERSION}/wav_original"
1819
WAV_SYNTH_DIR = f"vtl_corpus{VERSION}/wav_synthesized"
20+
SEG_DIR = f"vtl_corpus{VERSION}/segments"
1921
PLAIN_GES_DIR = f"vtl_corpus{VERSION}/plain_gestures"
2022
FIXED_GES_DIR = f"vtl_corpus{VERSION}/fixed_gestures"
2123
CP_DIR = f"vtl_corpus{VERSION}/control_parameters"
@@ -26,14 +28,15 @@ def create_corpus(version, *, geco_path=None):
2628

2729
## create folders
2830
#os.makedirs(WAV_ORIG_DIR)
29-
#os.makedirs(WAV_SYNTH_DIR)
30-
#os.makedirs(PLAIN_GES_DIR)
31+
os.makedirs(WAV_SYNTH_DIR)
32+
os.makedirs(SEG_DIR)
33+
os.makedirs(PLAIN_GES_DIR)
3134
#os.makedirs(FIXED_GES_DIR)
32-
#os.makedirs(CP_DIR)
35+
os.makedirs(CP_DIR)
3336
#os.makedirs(PITCH_DIR)
3437
#os.makedirs(TG_DIR)
35-
#
36-
#
38+
39+
3740
## text grid + wave -> utterance, wav, text grids
3841
#base_names = [os.path.splitext(f)[0] for f in os.listdir(f"{GECO_PATH}/textgrids") if f.endswith('.textGrid')]
3942
#ii = 0
@@ -47,18 +50,21 @@ def create_corpus(version, *, geco_path=None):
4750
#pitch_contour.extract_pitch_tier(os.path.abspath(WAV_ORIG_DIR), os.path.abspath(PITCH_DIR), praat_script_path="./extractpitch.praat", n_jobs=8)
4851
#
4952
#pitch_contour.fit_f0(os.path.abspath(PITCH_DIR), os.path.abspath(TG_DIR), "./bin/targetoptimizer", n_jobs=8)
50-
#
51-
#
52-
## heuristacally create ges files
53-
#sampa2ges.sampa_to_ges(UTT_NAME, PLAIN_GES_DIR, phone_attributes='./phone_attributes.txt')
54-
#
55-
#
56-
## insert f0 fit into ges files
53+
54+
55+
# create segmentfiles
56+
sampa2seg.sampa_to_seg(UTT_NAME, SEG_DIR)
57+
58+
# create gesture files
59+
seg2ges.seg_to_ges(SEG_DIR, PLAIN_GES_DIR)
60+
61+
# insert f0 fit into ges files
5762
#pitch_contour.fix_all_ges(PLAIN_GES_DIR, PITCH_DIR, FIXED_GES_DIR)
5863

5964

6065

6166
# synthesize wav
67+
FIXED_GES_DIR = PLAIN_GES_DIR
6268
ges_files = [os.path.splitext(f)[0] for f in os.listdir(FIXED_GES_DIR) if f.endswith('.ges')]
6369

6470
commands = []

ges2wav.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
import os
55

66
VTL = ctypes.cdll.LoadLibrary(os.path.dirname(__file__) + '/bin/VocalTractLabApi.so')
7-
speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/CK_female.speaker').encode())
7+
#speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/CK_female.speaker').encode())
8+
speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/JD2.speaker').encode())
89

910
name = sys.argv[1]
1011
ges_dir = sys.argv[2]
@@ -13,16 +14,27 @@
1314

1415
gesture_file_name = ctypes.c_char_p(f'{ges_dir}/{name}.ges'.encode())
1516
wav_file_name = ctypes.c_char_p(f'{wav_dir}/{name}.wav'.encode())
16-
feedback_file_name = ctypes.c_char_p(f'{cp_dir}/{name}.txt'.encode())
17+
tract_sequence_file_name = ctypes.c_char_p(f'{cp_dir}/{name}.txt'.encode())
1718

18-
failure = VTL.vtlGesToWav(speaker_file_name, # input
19-
gesture_file_name, # input
20-
wav_file_name, # output
21-
feedback_file_name) # output
19+
# API 2.2
20+
#failure = VTL.vtlGesToWav(speaker_file_name, # input
21+
# gesture_file_name, # input
22+
# wav_file_name, # output
23+
# feedback_file_name) # output
2224

25+
# API 2.3
26+
failure = VTL.vtlInitialize(speaker_file_name)
27+
if failure != 0:
28+
raise ValueError('Error in vtlInitialize! Errorcode: %i' % failure)
29+
30+
failure = VTL.vtlGesturalScoreToTractSequence(gesture_file_name, tract_sequence_file_name)
31+
if failure != 0:
32+
raise ValueError('Error in vtlGesturalScoreToTractSequence! Errorcode: %i' % failure)
33+
34+
failure = VTL.vtlTractSequenceToAudio(tract_sequence_file_name, wav_file_name, None, None)
2335
if failure != 0:
2436
#raise ValueError('Error in vtlGesToWav! Errorcode: %i' % failure)
25-
print('Error in vtlGesToWav! Errorcode: %i' % failure)
37+
print('Error in vtlTractSequenceToAudio! Errorcode: %i' % failure)
2638
print(f'move "{name}" to ./BAD/ folder')
2739
shutil.copy(f'{ges_dir}/{name}.ges', f'./BAD/{name}.ges')
2840

sampa2seg.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import os
2+
3+
def sampa_to_seg(utt_name, seg_dir):
4+
5+
with open(utt_name, 'rt') as uttfile:
6+
7+
uttfile.readline() # skip header
8+
9+
for ii, line in enumerate(uttfile):
10+
utterance, sampa, durations = line.split('\t')
11+
sampa = sampa.strip('/')
12+
sampa = sampa.split()
13+
durations = durations.split()
14+
if durations == []: # if no durations are given set to None
15+
durations = None
16+
else:
17+
assert len(sampa) == len(durations), f"line {ii + 2}"
18+
if not sampa:
19+
raise ValueError(f"sampa transcription is missing in line {ii + 2}")
20+
21+
base_name = f'{ii:06d}-{utterance:.16s}'
22+
23+
try:
24+
write_seg(f'{seg_dir}/{base_name}.seg', sampa, durations)
25+
except KeyError as e:
26+
print(e)
27+
continue
28+
except IndexError as e:
29+
print(base_name)
30+
raise e
31+
32+
33+
def write_seg(segment_file_name, sampa, durations):
34+
with open(segment_file_name, 'wt') as seg_file:
35+
seg_file.write('name = ; duration_s = 0.05000; \r\n')
36+
for phone, duration in zip(sampa, durations):
37+
duration = float(duration)
38+
if phone == '.':
39+
continue
40+
seg_file.write(f'name = {phone}; duration_s = {duration:.6f}; \r\n')
41+
seg_file.write('name = ; duration_s = 0.05000; \r\n')
42+

seg2ges.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import ctypes
2+
import sys
3+
import shutil
4+
import os
5+
6+
VTL = ctypes.cdll.LoadLibrary(os.path.dirname(__file__) + '/bin/VocalTractLabApi.so')
7+
#speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/CK_female.speaker').encode())
8+
speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/JD2.speaker').encode())
9+
10+
11+
def seg_to_ges(seg_dir, ges_dir):
12+
13+
failure = VTL.vtlInitialize(speaker_file_name)
14+
if failure != 0:
15+
raise ValueError('Error in vtlInitialize! Errorcode: %i' % failure)
16+
17+
seg_files = sorted([os.path.splitext(f)[0] for f in os.listdir(seg_dir) if f.endswith('.seg')])
18+
19+
for name in seg_files:
20+
segment_file_name = ctypes.c_char_p(f'{seg_dir}/{name}.seg'.encode())
21+
gesture_file_name = ctypes.c_char_p(f'{ges_dir}/{name}.ges'.encode())
22+
23+
failure = VTL.vtlSegmentSequenceToGesturalScore(segment_file_name, gesture_file_name)
24+
if failure != 0:
25+
print(name)
26+
raise ValueError('Error in vtlSegmentSequenceToGesturalScore! Errorcode: %i' % failure)
27+
28+
VTL.vtlClose()
29+

0 commit comments

Comments
 (0)