feat: implement --title; format audio player bar

2025-09-04 13:16:13 +02:00 · 2025-09-04 13:16:13 +02:00 · ac24db96a1
commit ac24db96a1
parent 2c44ca026a
2 changed files with 30 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -100,6 +100,18 @@ $ python tts.py --clipboard
 $ python tts.py -c
 ```

+### Labeling your outputs
+
+You can indicate a title to be used as label (i.e., file name prefix and directory name) to the generated outputs using `--title`
+
+```bash
+# This will put the generated files in ./outputs/siple-greeting/
+$ python tts.py "Hello there!" --title "simple-greeting"
+
+# or shorter
+$ python tts.py "Hello there!" -t "simple-greeting"
+```
+
 ### Voices

 Optionally, you can indicate a voice you want to use with the `--voice` flag. See [all voices available](https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md).
--- a/tts.py
+++ b/tts.py
@ -31,6 +31,14 @@ def parse_args():
        default="",
        help="Text to read",
    )
+    parser.add_argument(
+        "--title",
+        "-t",
+        required=False,
+        type=str,
+        default="generated",
+        help="Title to use as label to the generated outputs",
+    )
    parser.add_argument(
        "--voice",
        "-v",
@ -76,7 +84,7 @@ def generate_audio(generator, name, voice, device):
    output_files = []
    print(f"Using {device} device...")
    for i, (gs, ps, audio) in enumerate(generator):
-        output_file_name=f'outputs/{name}-{voice}-{i}.wav'
+        output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
        os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
        output_files.append(output_file_name)
        sf.write(output_file_name, audio, 24000)
@ -95,8 +103,11 @@ def play_audio(output_files):
        sleep(0.1)
        duration=media.get_length() / 1000
        chunk=f"{i+1}/{length} " if length > 1 else ""
-        description = f"\u25B6 {chunk}({'{0:0>5.2f}'.format(duration)}s)"
-        for i in tqdm(range(100), desc=description):
+        description = f"\u25B6 {chunk}"
+        for i in tqdm(range(100),
+            desc=description,
+            bar_format='{l_bar} {elapsed} {bar} {remaining}',
+            colour='yellow'):
            sleep(duration / 100)

 def main():
@ -124,7 +135,10 @@ def main():
        name = "chat"
        text = args.input_text

-    generator = pipeline(text, voice=voice, split_pattern=r':\n+')
+    if args.title:
+        name = args.title
+
+    generator = pipeline(text, voice=voice, split_pattern=r'[:.?!;]\n+')
    output_files = generate_audio(generator, name, voice, args.device)
    if args.skip_play:
        print("Audio player disabled.", f"{name}-{voice}-#.wav")