From 79d5352329702494629dc0ad33329946d4f1f005 Mon Sep 17 00:00:00 2001
From: Ayo <ayo@ayco.io>
Date: Thu, 4 Sep 2025 21:40:39 +0200
Subject: [PATCH] feat: show fun preparation texts (while generating audio)

---
 tts.py | 141 +++++++++++++++++++++++++++++++++------------------------
 1 file changed, 83 insertions(+), 58 deletions(-)

diff --git a/tts.py b/tts.py
index 43fd8f0..1acb4eb 100644
--- a/tts.py
+++ b/tts.py
@@ -2,6 +2,7 @@ import os
 from time import sleep, time
 import warnings
 import importlib
+import random
 
 import torch
 import argparse
@@ -19,6 +20,20 @@ voices = {
     'brit': 'bf_emma'
 }
 
+prep_texts = [
+    "Check mic, 1-2-3...",
+    "*Tap* *tap* ... Is this thing on?",
+    "Ready, set... *Ahem!*",
+    "Mic's on, lights are set, I'm ready to roll.",
+    "All set? Let's make it a good one.",
+    "Ready, set, go—now that's the real countdown.",
+    "Checking the mic, one, two, three.",
+    "Lights, mic, action—now let's do this.",
+    "Hold tight—this is about to get interesting.",
+    "If the mic works, we're good to go.",
+    "All systems green—let's make this a good one."
+]
+
 def parse_args():
     parser = argparse.ArgumentParser(description="Simple TTS", allow_abbrev=False)
     parser.add_argument(
@@ -83,12 +98,12 @@ def parse_args():
 
 def generate_audio(generator, name, voice):
     output_files = []
-    with yaspin():
-        for i, (gs, ps, audio) in enumerate(generator):
-            output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
-            os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
-            output_files.append(output_file_name)
-            sf.write(output_file_name, audio, 24000)
+
+    for i, (gs, ps, audio) in enumerate(generator):
+        output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
+        os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
+        output_files.append(output_file_name)
+        sf.write(output_file_name, audio, 24000)
     return output_files
 
 def play_audio(output_files):
@@ -108,70 +123,80 @@ def play_audio(output_files):
             colour='yellow'):
             sleep(duration / 100)
 
+
 def main():
-    args=parse_args()
 
-    if not args.verbose:
-        # Disable all warnings
-        warnings.filterwarnings("ignore")
+    # Get a randome "preparing" text
+    spinner_text = random.choice(prep_texts)
 
-    pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M')
-    if args.voice in voices:
-        voice=voices[args.voice]
-    else:
-        voice=voices['pro'] if args.voice is None else args.voice
+    # Generate audio
+    with yaspin() as spinner:
+        spinner.text = spinner_text
+ 
+        args=parse_args()
 
-    # filename argument
-    if args.input_text == "":
-        if args.clipboard:
-            # use copied text
-            text = pyperclip.paste()
-            name = 'copied'
+        if not args.verbose:
+            # Disable all warnings
+            warnings.filterwarnings("ignore")
+ 
+        pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M')
+        if args.voice in voices:
+            voice=voices[args.voice]
         else:
-            file_path = args.input_file
-            directory, file_name = os.path.split(file_path)
-            name = '.'.join(file_name.split('.')[:-1])
-            file = open(file_path, "r")
-            text = file.read()
-    else:
-        name = "chat"
-        text = args.input_text
+            voice=voices['pro'] if args.voice is None else args.voice
 
-    if args.title:
-        name = args.title
+        # filename argument
+        if args.input_text == "":
+            if args.clipboard:
+                # use copied text
+                text = pyperclip.paste()
+                name = 'copied'
+            else:
+                file_path = args.input_file
+                directory, file_name = os.path.split(file_path)
+                name = '.'.join(file_name.split('.')[:-1])
+                file = open(file_path, "r")
+                text = file.read()
+        else:
+            name = "chat"
+            text = args.input_text
 
-    # make safe for filenames
-    name = name.replace(" ", "_")
-    name = name.replace("\\", "_")
-    name = name.replace("/", "_")
+        if args.title:
+            name = args.title
 
-    '''
-        Split patterns:
-        - only multiple consecutive new line (to handle wrapped statements)
-        - statements ending in punctuations (:.?!;)
-        - list items starting in '-' or '*'
-        - numbered items starting with a digit followed by a dot '.'
-    '''
-    generator = pipeline(
-        text,
-        voice=voice,
-        split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
-    )
+        # make safe for filenames
+        name = name.replace(" ", "_")
+        name = name.replace("\\", "_")
+        name = name.replace("/", "_")
 
-    if args.verbose:
-        print(f"[TTS] Using device: \"{args.device}\", voice: \"{voice}\", output label: \"{name}\"")
-        if args.clipboard:
-            print('[TTS] Using copied text as input.')
+        '''
+            Split patterns:
+            - only multiple consecutive new line (to handle wrapped statements)
+            - statements ending in punctuations (:.?!;)
+            - list items starting in '-' or '*'
+            - numbered items starting with a digit followed by a dot '.'
+        '''
+        generator = pipeline(
+            text,
+            voice=voice,
+            split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
+        )
 
-    start_time = time()
-    output_files = generate_audio(generator, name, voice)
-    generation_time = time() - start_time
-    directory,f = os.path.split(output_files[0])
+        if args.verbose:
+            print(f"[TTS] Using device: \"{args.device}\", voice: \"{voice}\", output label: \"{name}\"")
+            if args.clipboard:
+                print('[TTS] Using copied text as input.')
 
-    if args.verbose:
-        print(f"[TTS] {len(output_files)} chunks generated in {generation_time:.2f} seconds")
-        print(f"[TTS] Output files are in: {directory}/*")
+        start_time = time()
+        output_files = generate_audio(generator, name, voice)
+        generation_time = time() - start_time
+        directory,f = os.path.split(output_files[0])
 
+        if args.verbose:
+            print(f"[TTS] {len(output_files)} chunks generated in {generation_time:.2f} seconds")
+            print(f"[TTS] Output files are in: {directory}/*")
+
+    # Play audio
     if args.skip_play:
         print(f"[TTS] Audio player disabled: {directory}/*")
     else: