feat: show fun preparation texts (while generating audio)

This commit is contained in:
Ayo Ayco 2025-09-04 21:40:39 +02:00
parent 6a0105ff1c
commit 79d5352329

141
tts.py
View file

@ -2,6 +2,7 @@ import os
from time import sleep, time from time import sleep, time
import warnings import warnings
import importlib import importlib
import random
import torch import torch
import argparse import argparse
@ -19,6 +20,20 @@ voices = {
'brit': 'bf_emma' 'brit': 'bf_emma'
} }
prep_texts = [
"Check mic, 1-2-3...",
"*Tap* *tap* ... Is this thing on?",
"Ready, set... *Ahem!*",
"Mic's on, lights are set, I'm ready to roll.",
"All set? Let's make it a good one.",
"Ready, set, go—now that's the real countdown.",
"Checking the mic, one, two, three.",
"Lights, mic, action—now let's do this.",
"Hold tight—this is about to get interesting.",
"If the mic works, we're good to go.",
"All systems green—let's make this a good one."
]
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description="Simple TTS", allow_abbrev=False) parser = argparse.ArgumentParser(description="Simple TTS", allow_abbrev=False)
parser.add_argument( parser.add_argument(
@ -83,12 +98,12 @@ def parse_args():
def generate_audio(generator, name, voice): def generate_audio(generator, name, voice):
output_files = [] output_files = []
with yaspin():
for i, (gs, ps, audio) in enumerate(generator): for i, (gs, ps, audio) in enumerate(generator):
output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav' output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
os.makedirs(os.path.dirname(output_file_name), exist_ok=True) os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
output_files.append(output_file_name) output_files.append(output_file_name)
sf.write(output_file_name, audio, 24000) sf.write(output_file_name, audio, 24000)
return output_files return output_files
def play_audio(output_files): def play_audio(output_files):
@ -108,70 +123,80 @@ def play_audio(output_files):
colour='yellow'): colour='yellow'):
sleep(duration / 100) sleep(duration / 100)
def main(): def main():
args=parse_args()
if not args.verbose: # Get a randome "preparing" text
# Disable all warnings spinner_text = random.choice(prep_texts)
warnings.filterwarnings("ignore")
pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M') # Generate audio
if args.voice in voices: with yaspin() as spinner:
voice=voices[args.voice] spinner.text = spinner_text
else:
voice=voices['pro'] if args.voice is None else args.voice args=parse_args()
# filename argument if not args.verbose:
if args.input_text == "": # Disable all warnings
if args.clipboard: warnings.filterwarnings("ignore")
# use copied text
text = pyperclip.paste() pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M')
name = 'copied' if args.voice in voices:
voice=voices[args.voice]
else: else:
file_path = args.input_file voice=voices['pro'] if args.voice is None else args.voice
directory, file_name = os.path.split(file_path)
name = '.'.join(file_name.split('.')[:-1])
file = open(file_path, "r")
text = file.read()
else:
name = "chat"
text = args.input_text
if args.title: # filename argument
name = args.title if args.input_text == "":
if args.clipboard:
# use copied text
text = pyperclip.paste()
name = 'copied'
else:
file_path = args.input_file
directory, file_name = os.path.split(file_path)
name = '.'.join(file_name.split('.')[:-1])
file = open(file_path, "r")
text = file.read()
else:
name = "chat"
text = args.input_text
# make safe for filenames if args.title:
name = name.replace(" ", "_") name = args.title
name = name.replace("\\", "_")
name = name.replace("/", "_")
''' # make safe for filenames
Split patterns: name = name.replace(" ", "_")
- only multiple consecutive new line (to handle wrapped statements) name = name.replace("\\", "_")
- statements ending in punctuations (:.?!;) name = name.replace("/", "_")
- list items starting in '-' or '*'
- numbered items starting with a digit followed by a dot '.'
'''
generator = pipeline(
text,
voice=voice,
split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
)
if args.verbose: '''
print(f"[TTS] Using device: \"{args.device}\", voice: \"{voice}\", output label: \"{name}\"") Split patterns:
if args.clipboard: - only multiple consecutive new line (to handle wrapped statements)
print('[TTS] Using copied text as input.') - statements ending in punctuations (:.?!;)
- list items starting in '-' or '*'
- numbered items starting with a digit followed by a dot '.'
'''
generator = pipeline(
text,
voice=voice,
split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
)
start_time = time() if args.verbose:
output_files = generate_audio(generator, name, voice) print(f"[TTS] Using device: \"{args.device}\", voice: \"{voice}\", output label: \"{name}\"")
generation_time = time() - start_time if args.clipboard:
directory,f = os.path.split(output_files[0]) print('[TTS] Using copied text as input.')
if args.verbose: start_time = time()
print(f"[TTS] {len(output_files)} chunks generated in {generation_time:.2f} seconds") output_files = generate_audio(generator, name, voice)
print(f"[TTS] Output files are in: {directory}/*") generation_time = time() - start_time
directory,f = os.path.split(output_files[0])
if args.verbose:
print(f"[TTS] {len(output_files)} chunks generated in {generation_time:.2f} seconds")
print(f"[TTS] Output files are in: {directory}/*")
# Play audio
if args.skip_play: if args.skip_play:
print(f"[TTS] Audio player disabled: {directory}/*") print(f"[TTS] Audio player disabled: {directory}/*")
else: else: