download libritts test

saveriyo · saveriyo · commit 107c90c5f622 · 2024-09-11T10:52:21.000-07:00
diff --git a/wavtokenizer/prepare_libritts.py b/wavtokenizer/prepare_libritts.py
@@ -41,12 +41,15 @@ def main():
     # Define download URLs
     train_url = "http://www.openslr.org/resources/60/train-clean-100.tar.gz"
     val_url = "http://www.openslr.org/resources/60/dev-clean.tar.gz"
+    test_url = "http://www.openslr.org/resources/60/test-clean.tar.gz"
 
     # Define target directories and extraction subdirectories
     train_dir = "./data/train"
     val_dir = "./data/infer"
+    test_dir = "./data/infer"
     train_extraction_subdir = os.path.join(train_dir, "LibriTTS", "train-clean-100")
     val_extraction_subdir = os.path.join(val_dir, "LibriTTS", "dev-clean")
+    test_extraction_subdir = os.path.join(test_dir, "LibriTTS", "test-clean")
 
     # Skip processing if the train or infer directory already exists
     if os.path.exists(train_extraction_subdir) and os.listdir(train_extraction_subdir):
@@ -63,6 +66,13 @@ def main():
         download_and_extract(val_url, val_dir)
         generate_filelist(val_extraction_subdir, os.path.join(val_dir, "libritts_val"))
 
+    if os.path.exists(test_extraction_subdir) and os.listdir(test_extraction_subdir):
+        print(f"{test_extraction_subdir} already exists, skipping test data preparation.")
+    else:
+        print(f"Checking test data in {test_extraction_subdir}...")
+        download_and_extract(test_url, test_dir)
+        generate_filelist(test_extraction_subdir, os.path.join(test_dir, "libritts_testclean"))
+
     print("Dataset preparation complete.")
 
 if __name__ == "__main__":
@@ -84,10 +94,7 @@ def main():
 │   └── infer/
 │       ├── LibriTTS/
 │       │   └── dev-clean/
-│       │       ├── <speaker_id>/
-│       │       │   ├── <chapter_id>/
-│       │       │   │   ├── <audio_files>.wav
-│       │       │   │   └── ...
-│       │       └── ...
-│       └── libritts_val  # File list generated here
+│       |   └── test-clean/
+│       └── libritts_val
+|       └── libritts_testclean
 """