feat: show fun preparation texts (while generating audio)

2025-09-04 21:40:39 +02:00 · 2025-09-04 21:40:39 +02:00 · 79d5352329
commit 79d5352329
parent 6a0105ff1c
1 changed files with 83 additions and 58 deletions
--- a/tts.py
+++ b/tts.py
@ -2,6 +2,7 @@ import os
 from time import sleep, time
 import warnings
 import importlib
 import random
 import torch
 import argparse
@ -19,6 +20,20 @@ voices = {
    'brit': 'bf_emma'
 }
 prep_texts = [
    "Check mic, 1-2-3...",
    "*Tap* *tap* ... Is this thing on?",
    "Ready, set... *Ahem!*",
    "Mic's on, lights are set, I'm ready to roll.",
    "All set? Let's make it a good one.",
    "Ready, set, go—now that's the real countdown.",
    "Checking the mic, one, two, three.",
    "Lights, mic, action—now let's do this.",
    "Hold tight—this is about to get interesting.",
    "If the mic works, we're good to go.",
    "All systems green—let's make this a good one."
 ]
 def parse_args():
    parser = argparse.ArgumentParser(description="Simple TTS", allow_abbrev=False)
    parser.add_argument(
@ -83,12 +98,12 @@ def parse_args():
 def generate_audio(generator, name, voice):
    output_files = []
-    with yaspin():
+
-        for i, (gs, ps, audio) in enumerate(generator):
+    for i, (gs, ps, audio) in enumerate(generator):
-            output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
+        output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
-            os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
+        os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
-            output_files.append(output_file_name)
+        output_files.append(output_file_name)
-            sf.write(output_file_name, audio, 24000)
+        sf.write(output_file_name, audio, 24000)
    return output_files
 def play_audio(output_files):
@ -108,70 +123,80 @@ def play_audio(output_files):
            colour='yellow'):
            sleep(duration / 100)
 def main():
    args=parse_args()
-    if not args.verbose:
+    # Get a randome "preparing" text
-        # Disable all warnings
+    spinner_text = random.choice(prep_texts)
        warnings.filterwarnings("ignore")
-    pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M')
+    # Generate audio
-    if args.voice in voices:
+    with yaspin() as spinner:
-        voice=voices[args.voice]
+        spinner.text = spinner_text
-    else:
+ 
-        voice=voices['pro'] if args.voice is None else args.voice
+        args=parse_args()
-    # filename argument
+        if not args.verbose:
-    if args.input_text == "":
+            # Disable all warnings
-        if args.clipboard:
+            warnings.filterwarnings("ignore")
-            # use copied text
+ 
-            text = pyperclip.paste()
+        pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M')
-            name = 'copied'
+        if args.voice in voices:
            voice=voices[args.voice]
        else:
-            file_path = args.input_file
+            voice=voices['pro'] if args.voice is None else args.voice
            directory, file_name = os.path.split(file_path)
            name = '.'.join(file_name.split('.')[:-1])
            file = open(file_path, "r")
            text = file.read()
    else:
        name = "chat"
        text = args.input_text
-    if args.title:
+        # filename argument
-        name = args.title
+        if args.input_text == "":
            if args.clipboard:
                # use copied text
                text = pyperclip.paste()
                name = 'copied'
            else:
                file_path = args.input_file
                directory, file_name = os.path.split(file_path)
                name = '.'.join(file_name.split('.')[:-1])
                file = open(file_path, "r")
                text = file.read()
        else:
            name = "chat"
            text = args.input_text
-    # make safe for filenames
+        if args.title:
-    name = name.replace(" ", "_")
+            name = args.title
    name = name.replace("\\", "_")
    name = name.replace("/", "_")
-    '''
+        # make safe for filenames
-        Split patterns:
+        name = name.replace(" ", "_")
-        - only multiple consecutive new line (to handle wrapped statements)
+        name = name.replace("\\", "_")
-        - statements ending in punctuations (:.?!;)
+        name = name.replace("/", "_")
        - list items starting in '-' or '*'
        - numbered items starting with a digit followed by a dot '.'
    '''
    generator = pipeline(
        text,
        voice=voice,
        split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
    )
-    if args.verbose:
+        '''
-        print(f"[TTS] Using device: \"{args.device}\", voice: \"{voice}\", output label: \"{name}\"")
+            Split patterns:
-        if args.clipboard:
+            - only multiple consecutive new line (to handle wrapped statements)
-            print('[TTS] Using copied text as input.')
+            - statements ending in punctuations (:.?!;)
            - list items starting in '-' or '*'
            - numbered items starting with a digit followed by a dot '.'
        '''
        generator = pipeline(
            text,
            voice=voice,
            split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
        )
-    start_time = time()
+        if args.verbose:
-    output_files = generate_audio(generator, name, voice)
+            print(f"[TTS] Using device: \"{args.device}\", voice: \"{voice}\", output label: \"{name}\"")
-    generation_time = time() - start_time
+            if args.clipboard:
-    directory,f = os.path.split(output_files[0])
+                print('[TTS] Using copied text as input.')
-    if args.verbose:
+        start_time = time()
-        print(f"[TTS] {len(output_files)} chunks generated in {generation_time:.2f} seconds")
+        output_files = generate_audio(generator, name, voice)
-        print(f"[TTS] Output files are in: {directory}/*")
+        generation_time = time() - start_time
        directory,f = os.path.split(output_files[0])
        if args.verbose:
            print(f"[TTS] {len(output_files)} chunks generated in {generation_time:.2f} seconds")
            print(f"[TTS] Output files are in: {directory}/*")
    # Play audio
    if args.skip_play:
        print(f"[TTS] Audio player disabled: {directory}/*")
    else: