changes for VTL2.3 version, which now includes a lot of the functionality that was implemented here beforehand

derNarr · derNarr · commit e12e461f111a · 2020-09-06T00:06:38.000+02:00
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,3 @@
 __pycache__/
-bin/VocalTractLabApi.so
 bin/targetoptimizer
-bin/JD2.speaker
 vtl_corpus1.0/
diff --git a/README.rst b/README.rst
@@ -13,3 +13,10 @@ This python tool is based on the work and on the Matlab code on Yingming Gao.
 
 The overall logic is in ``create_corpus.py`` which executes the approriate functions from top to bottom. The functions are supplied by the other files.
 
+
+Copyright
+=========
+As the VocalTractLabAPI.so and the JD2.speaker is GPL v3 the rest of the code
+here is GPL as well.  If the code is not dependent on VTL anymore you can use
+it under MIT license.
+
diff --git a/bin/JD2.speaker b/bin/JD2.speaker
diff --git a/bin/VocalTractLabApi.so b/bin/VocalTractLabApi.so
diff --git a/create_corpus.py b/create_corpus.py
@@ -3,19 +3,21 @@
 import shutil
 from multiprocessing import Pool
 
-from . import tgwav2utt, pitch_contour, sampa2ges
+from . import tgwav2utt, pitch_contour, sampa2seg, seg2ges
 
 def create_corpus(version, *, geco_path=None):
     """
     creates the vtl corpus.
 
     """
     if not geco_path:
-        raise ValueError("You need to specify the path where all GECO corpus files live that you have downloaded seperately.")
+        pass
+        #raise ValueError("You need to specify the path where all GECO corpus files live that you have downloaded seperately.")
     VERSION = version
     GECO_PATH = geco_path
     WAV_ORIG_DIR = f"vtl_corpus{VERSION}/wav_original"
     WAV_SYNTH_DIR = f"vtl_corpus{VERSION}/wav_synthesized"
+    SEG_DIR = f"vtl_corpus{VERSION}/segments"
     PLAIN_GES_DIR = f"vtl_corpus{VERSION}/plain_gestures"
     FIXED_GES_DIR = f"vtl_corpus{VERSION}/fixed_gestures"
     CP_DIR = f"vtl_corpus{VERSION}/control_parameters"
@@ -26,14 +28,15 @@ def create_corpus(version, *, geco_path=None):
 
     ## create folders
     #os.makedirs(WAV_ORIG_DIR)
-    #os.makedirs(WAV_SYNTH_DIR)
-    #os.makedirs(PLAIN_GES_DIR)
+    os.makedirs(WAV_SYNTH_DIR)
+    os.makedirs(SEG_DIR)
+    os.makedirs(PLAIN_GES_DIR)
     #os.makedirs(FIXED_GES_DIR)
-    #os.makedirs(CP_DIR)
+    os.makedirs(CP_DIR)
     #os.makedirs(PITCH_DIR)
     #os.makedirs(TG_DIR)
-    #
-    #
+
+
     ## text grid + wave -> utterance, wav, text grids
     #base_names = [os.path.splitext(f)[0] for f in os.listdir(f"{GECO_PATH}/textgrids") if f.endswith('.textGrid')]
     #ii = 0
@@ -47,18 +50,21 @@ def create_corpus(version, *, geco_path=None):
     #pitch_contour.extract_pitch_tier(os.path.abspath(WAV_ORIG_DIR), os.path.abspath(PITCH_DIR), praat_script_path="./extractpitch.praat",  n_jobs=8)
     #
     #pitch_contour.fit_f0(os.path.abspath(PITCH_DIR), os.path.abspath(TG_DIR), "./bin/targetoptimizer", n_jobs=8)
-    #
-    #
-    ## heuristacally create ges files
-    #sampa2ges.sampa_to_ges(UTT_NAME, PLAIN_GES_DIR, phone_attributes='./phone_attributes.txt')
-    #
-    #
-    ## insert f0 fit into ges files
+
+
+    # create segmentfiles
+    sampa2seg.sampa_to_seg(UTT_NAME, SEG_DIR)
+
+    # create gesture files
+    seg2ges.seg_to_ges(SEG_DIR, PLAIN_GES_DIR)
+
+    # insert f0 fit into ges files
     #pitch_contour.fix_all_ges(PLAIN_GES_DIR, PITCH_DIR, FIXED_GES_DIR)
 
 
 
     # synthesize wav
+    FIXED_GES_DIR = PLAIN_GES_DIR
     ges_files = [os.path.splitext(f)[0] for f in os.listdir(FIXED_GES_DIR) if f.endswith('.ges')]
 
     commands = []
diff --git a/ges2wav.py b/ges2wav.py
@@ -4,7 +4,8 @@
 import os
 
 VTL = ctypes.cdll.LoadLibrary(os.path.dirname(__file__) + '/bin/VocalTractLabApi.so')
-speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/CK_female.speaker').encode())
+#speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/CK_female.speaker').encode())
+speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/JD2.speaker').encode())
 
 name = sys.argv[1]
 ges_dir = sys.argv[2]
@@ -13,16 +14,27 @@
 
 gesture_file_name = ctypes.c_char_p(f'{ges_dir}/{name}.ges'.encode())
 wav_file_name = ctypes.c_char_p(f'{wav_dir}/{name}.wav'.encode())
-feedback_file_name = ctypes.c_char_p(f'{cp_dir}/{name}.txt'.encode())
+tract_sequence_file_name = ctypes.c_char_p(f'{cp_dir}/{name}.txt'.encode())
 
-failure = VTL.vtlGesToWav(speaker_file_name,  # input
-                          gesture_file_name,  # input
-                          wav_file_name,  # output
-                          feedback_file_name)  # output
+# API 2.2
+#failure = VTL.vtlGesToWav(speaker_file_name,  # input
+#                          gesture_file_name,  # input
+#                          wav_file_name,  # output
+#                          feedback_file_name)  # output
 
+# API 2.3
+failure = VTL.vtlInitialize(speaker_file_name)
+if failure != 0:
+    raise ValueError('Error in vtlInitialize! Errorcode: %i' % failure)
+
+failure = VTL.vtlGesturalScoreToTractSequence(gesture_file_name, tract_sequence_file_name)
+if failure != 0:
+    raise ValueError('Error in vtlGesturalScoreToTractSequence! Errorcode: %i' % failure)
+
+failure = VTL.vtlTractSequenceToAudio(tract_sequence_file_name, wav_file_name, None, None)
 if failure != 0:
     #raise ValueError('Error in vtlGesToWav! Errorcode: %i' % failure)
-    print('Error in vtlGesToWav! Errorcode: %i' % failure)
+    print('Error in vtlTractSequenceToAudio! Errorcode: %i' % failure)
     print(f'move "{name}" to ./BAD/ folder')
     shutil.copy(f'{ges_dir}/{name}.ges', f'./BAD/{name}.ges')
 
diff --git a/sampa2seg.py b/sampa2seg.py
@@ -0,0 +1,42 @@
+import os
+
+def sampa_to_seg(utt_name, seg_dir):
+
+    with open(utt_name, 'rt') as uttfile:
+
+        uttfile.readline()  # skip header
+
+        for ii, line in enumerate(uttfile):
+            utterance, sampa, durations = line.split('\t')
+            sampa = sampa.strip('/')
+            sampa = sampa.split()
+            durations = durations.split()
+            if durations == []:  # if no durations are given set to None
+                durations = None
+            else:
+                assert len(sampa) == len(durations), f"line {ii + 2}"
+            if not sampa:
+                raise ValueError(f"sampa transcription is missing in line {ii + 2}")
+
+            base_name = f'{ii:06d}-{utterance:.16s}'
+
+            try:
+                write_seg(f'{seg_dir}/{base_name}.seg', sampa, durations)
+            except KeyError as e:
+                print(e)
+                continue
+            except IndexError as e:
+                print(base_name)
+                raise e
+
+
+def write_seg(segment_file_name, sampa, durations):
+    with open(segment_file_name, 'wt') as seg_file:
+        seg_file.write('name = ; duration_s = 0.05000; \r\n')
+        for phone, duration in zip(sampa, durations):
+            duration = float(duration)
+            if phone == '.':
+                continue
+            seg_file.write(f'name = {phone}; duration_s = {duration:.6f}; \r\n')
+        seg_file.write('name = ; duration_s = 0.05000; \r\n')
+
diff --git a/seg2ges.py b/seg2ges.py
@@ -0,0 +1,29 @@
+import ctypes
+import sys
+import shutil
+import os
+
+VTL = ctypes.cdll.LoadLibrary(os.path.dirname(__file__) + '/bin/VocalTractLabApi.so')
+#speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/CK_female.speaker').encode())
+speaker_file_name = ctypes.c_char_p((os.path.dirname(__file__) + '/bin/JD2.speaker').encode())
+
+
+def seg_to_ges(seg_dir, ges_dir):
+
+    failure = VTL.vtlInitialize(speaker_file_name)
+    if failure != 0:
+        raise ValueError('Error in vtlInitialize! Errorcode: %i' % failure)
+
+    seg_files = sorted([os.path.splitext(f)[0] for f in os.listdir(seg_dir) if f.endswith('.seg')])
+
+    for name in seg_files:
+        segment_file_name = ctypes.c_char_p(f'{seg_dir}/{name}.seg'.encode())
+        gesture_file_name = ctypes.c_char_p(f'{ges_dir}/{name}.ges'.encode())
+
+        failure = VTL.vtlSegmentSequenceToGesturalScore(segment_file_name, gesture_file_name)
+        if failure != 0:
+            print(name)
+            raise ValueError('Error in vtlSegmentSequenceToGesturalScore! Errorcode: %i' % failure)
+
+    VTL.vtlClose()
+