diff --git a/README.md b/README.md index 1a0e2b0..36be6bb 100644 --- a/README.md +++ b/README.md @@ -69,10 +69,16 @@ Running the program without arguments will use the demo text `tongue-twister.txt $ python tts.py # will use default arguments ``` -To run the program with an input file, use flag `--input`. +You can pass a string as first argument: ```bash -$ python tts.py --input demo/tongue-twister.txt +$ python tts.py "Hello world!" # will be read by the default voice +``` + +To run the program with an input file, use flag `--input_file`. + +```bash +$ python tts.py --input_file demo/tongue-twister.txt ``` ### Voices diff --git a/tts.py b/tts.py index 3dfa817..e307e73 100644 --- a/tts.py +++ b/tts.py @@ -1,6 +1,7 @@ import sys import os from time import sleep, time +import warnings import torch import argparse @@ -9,6 +10,8 @@ import soundfile as sf import vlc from tqdm import tqdm +# Disable all warnings +warnings.filterwarnings("ignore") # See voices: https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md voices = { @@ -18,23 +21,32 @@ voices = { 'brit': 'bf_emma' } - def parse_args(): parser = argparse.ArgumentParser(description="Simple TTS") + parser.add_argument( + "input_text", + type=str, + nargs='?', + default="", + help="Text to read", + ) parser.add_argument( "--voice", + required=False, type=str, default="pro", help="Voice to use (pro, hot, asmr, brit)", ) parser.add_argument( - "--input", + "--input_file", + required=False, type=str, default="demo/tongue-twister.txt", - help="Voice to use (pro, hot, asmr, brit)", + help="Path to the input text file", ) parser.add_argument( "--device", + required=False, type=str, default=("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else ("xpu" if torch.xpu.is_available() else "cpu"))), help="Device for inference: cuda | mps | cpu", @@ -50,28 +62,30 @@ def main(): voice=voices['pro'] if args.voice is None else args.voice # filename argument - file_path = args.input - directory, file_name = os.path.split(file_path) + if args.input_text == "": + file_path = args.input_file + directory, file_name = os.path.split(file_path) + name = '.'.join(file_name.split('.')[:-1]) + file = open(file_path, "r") + text = file.read() + else: + name = "chat" + text = args.input_text - name = '.'.join(file_name.split('.')[:-1]) - - file = open(file_path, "r") - text = file.read() generator = pipeline(text, voice=voice) - output_files = [] length = 0 start_time = time() + print("Generating...") for i, (gs, ps, audio) in enumerate(generator): output_file_name=f'outputs/{name}-{voice}-{i}.wav' os.makedirs(os.path.dirname(output_file_name), exist_ok=True) output_files.append(output_file_name) sf.write(output_file_name, audio, 24000) - print(u'\u2713', output_file_name) length = length + 1 generation_time = time() - start_time - print(f"Generation time: {generation_time:.2f} seconds") + print(f"Done in {generation_time:.2f} seconds") for i, output in enumerate(output_files): full_path = os.path.abspath(output) @@ -84,4 +98,4 @@ def main(): sleep(duration / 100) if __name__ == "__main__": - main() \ No newline at end of file + main()