feat: show spinner while generating; implement --verbose

This commit is contained in:
Ayo Ayco 2025-09-04 20:54:49 +02:00
parent 09ae3ae0fc
commit 618a833881
2 changed files with 34 additions and 16 deletions

View file

@ -5,3 +5,4 @@ tqdm
argparse argparse
torch torch
pyperclip pyperclip
yaspin

37
tts.py
View file

@ -10,9 +10,9 @@ import soundfile as sf
# import vlc # import vlc
from tqdm import tqdm from tqdm import tqdm
import pyperclip import pyperclip
from yaspin import yaspin
# Disable all warnings
warnings.filterwarnings("ignore")
# See voices: https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md # See voices: https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md
voices = { voices = {
@ -54,6 +54,12 @@ def parse_args():
default="demo/tongue-twister.txt", default="demo/tongue-twister.txt",
help="Path to the input text file", help="Path to the input text file",
) )
parser.add_argument(
"--verbose",
default=False,
action="store_true",
help="Show verbose reports",
)
parser.add_argument( parser.add_argument(
"--clipboard", "--clipboard",
"-c", "-c",
@ -78,28 +84,24 @@ def parse_args():
) )
return parser.parse_args() return parser.parse_args()
def generate_audio(generator, name, voice, device): def generate_audio(generator, name, voice):
start_time = time()
output_files = [] output_files = []
print(f"Using {device} device...") with yaspin():
for i, (gs, ps, audio) in enumerate(generator): for i, (gs, ps, audio) in enumerate(generator):
output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav' output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
os.makedirs(os.path.dirname(output_file_name), exist_ok=True) os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
output_files.append(output_file_name) output_files.append(output_file_name)
sf.write(output_file_name, audio, 24000) sf.write(output_file_name, audio, 24000)
generation_time = time() - start_time
print(f"{len(output_files)} chunks generated in {generation_time:.2f} seconds")
return output_files return output_files
def play_audio(output_files): def play_audio(output_files):
vlc_module = importlib.import_module("vlc") vlc_module = importlib.import_module("vlc")
print("Now playing generated audio...")
length = len(output_files) length = len(output_files)
for i, output in enumerate(output_files): for i, output in enumerate(output_files):
full_path = os.path.abspath(output) full_path = os.path.abspath(output)
media = vlc_module.MediaPlayer(f"file://{full_path}") media = vlc_module.MediaPlayer(f"file://{full_path}")
media.play() media.play()
sleep(0.1) sleep(0.01)
duration=media.get_length() / 1000 duration=media.get_length() / 1000
chunk=f"{i+1}/{length} " if length > 1 else "" chunk=f"{i+1}/{length} " if length > 1 else ""
description = f"\u25B6 {chunk}" description = f"\u25B6 {chunk}"
@ -111,6 +113,11 @@ def play_audio(output_files):
def main(): def main():
args=parse_args() args=parse_args()
if not args.verbose:
# Disable all warnings
warnings.filterwarnings("ignore")
pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M') pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M')
if args.voice in voices: if args.voice in voices:
voice=voices[args.voice] voice=voices[args.voice]
@ -155,8 +162,18 @@ def main():
split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]' split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
) )
output_files = generate_audio(generator, name, voice, args.device) if (args.verbose):
print(f"Using {args.device} device.")
start_time = time()
output_files = generate_audio(generator, name, voice)
generation_time = time() - start_time
directory, output_file_name = os.path.split(output_files[0]) directory, output_file_name = os.path.split(output_files[0])
if args.verbose:
print(f"{len(output_files)} chunks generated in {generation_time:.2f} seconds")
print("Now playing generated audio...")
if args.skip_play: if args.skip_play:
print(f"Audio player disabled: {directory}/*") print(f"Audio player disabled: {directory}/*")
else: else: