From ac24db96a1a576bce06a036e38a8f6be756c161a Mon Sep 17 00:00:00 2001 From: Ayo Date: Thu, 4 Sep 2025 13:16:13 +0200 Subject: [PATCH] feat: implement --title; format audio player bar --- README.md | 12 ++++++++++++ tts.py | 22 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 29a6e7a..3ac3d26 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,18 @@ $ python tts.py --clipboard $ python tts.py -c ``` +### Labeling your outputs + +You can indicate a title to be used as label (i.e., file name prefix and directory name) to the generated outputs using `--title` + +```bash +# This will put the generated files in ./outputs/siple-greeting/ +$ python tts.py "Hello there!" --title "simple-greeting" + +# or shorter +$ python tts.py "Hello there!" -t "simple-greeting" +``` + ### Voices Optionally, you can indicate a voice you want to use with the `--voice` flag. See [all voices available](https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md). diff --git a/tts.py b/tts.py index 541f771..40ee381 100644 --- a/tts.py +++ b/tts.py @@ -31,6 +31,14 @@ def parse_args(): default="", help="Text to read", ) + parser.add_argument( + "--title", + "-t", + required=False, + type=str, + default="generated", + help="Title to use as label to the generated outputs", + ) parser.add_argument( "--voice", "-v", @@ -76,7 +84,7 @@ def generate_audio(generator, name, voice, device): output_files = [] print(f"Using {device} device...") for i, (gs, ps, audio) in enumerate(generator): - output_file_name=f'outputs/{name}-{voice}-{i}.wav' + output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav' os.makedirs(os.path.dirname(output_file_name), exist_ok=True) output_files.append(output_file_name) sf.write(output_file_name, audio, 24000) @@ -95,8 +103,11 @@ def play_audio(output_files): sleep(0.1) duration=media.get_length() / 1000 chunk=f"{i+1}/{length} " if length > 1 else "" - description = f"\u25B6 {chunk}({'{0:0>5.2f}'.format(duration)}s)" - for i in tqdm(range(100), desc=description): + description = f"\u25B6 {chunk}" + for i in tqdm(range(100), + desc=description, + bar_format='{l_bar} {elapsed} {bar} {remaining}', + colour='yellow'): sleep(duration / 100) def main(): @@ -124,7 +135,10 @@ def main(): name = "chat" text = args.input_text - generator = pipeline(text, voice=voice, split_pattern=r':\n+') + if args.title: + name = args.title + + generator = pipeline(text, voice=voice, split_pattern=r'[:.?!;]\n+') output_files = generate_audio(generator, name, voice, args.device) if args.skip_play: print("Audio player disabled.", f"{name}-{voice}-#.wav")