feat: implement --title; format audio player bar
This commit is contained in:
parent
2c44ca026a
commit
ac24db96a1
2 changed files with 30 additions and 4 deletions
12
README.md
12
README.md
|
@ -100,6 +100,18 @@ $ python tts.py --clipboard
|
||||||
$ python tts.py -c
|
$ python tts.py -c
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Labeling your outputs
|
||||||
|
|
||||||
|
You can indicate a title to be used as label (i.e., file name prefix and directory name) to the generated outputs using `--title`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# This will put the generated files in ./outputs/siple-greeting/
|
||||||
|
$ python tts.py "Hello there!" --title "simple-greeting"
|
||||||
|
|
||||||
|
# or shorter
|
||||||
|
$ python tts.py "Hello there!" -t "simple-greeting"
|
||||||
|
```
|
||||||
|
|
||||||
### Voices
|
### Voices
|
||||||
|
|
||||||
Optionally, you can indicate a voice you want to use with the `--voice` flag. See [all voices available](https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md).
|
Optionally, you can indicate a voice you want to use with the `--voice` flag. See [all voices available](https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md).
|
||||||
|
|
22
tts.py
22
tts.py
|
@ -31,6 +31,14 @@ def parse_args():
|
||||||
default="",
|
default="",
|
||||||
help="Text to read",
|
help="Text to read",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--title",
|
||||||
|
"-t",
|
||||||
|
required=False,
|
||||||
|
type=str,
|
||||||
|
default="generated",
|
||||||
|
help="Title to use as label to the generated outputs",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--voice",
|
"--voice",
|
||||||
"-v",
|
"-v",
|
||||||
|
@ -76,7 +84,7 @@ def generate_audio(generator, name, voice, device):
|
||||||
output_files = []
|
output_files = []
|
||||||
print(f"Using {device} device...")
|
print(f"Using {device} device...")
|
||||||
for i, (gs, ps, audio) in enumerate(generator):
|
for i, (gs, ps, audio) in enumerate(generator):
|
||||||
output_file_name=f'outputs/{name}-{voice}-{i}.wav'
|
output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
|
||||||
os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
|
os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
|
||||||
output_files.append(output_file_name)
|
output_files.append(output_file_name)
|
||||||
sf.write(output_file_name, audio, 24000)
|
sf.write(output_file_name, audio, 24000)
|
||||||
|
@ -95,8 +103,11 @@ def play_audio(output_files):
|
||||||
sleep(0.1)
|
sleep(0.1)
|
||||||
duration=media.get_length() / 1000
|
duration=media.get_length() / 1000
|
||||||
chunk=f"{i+1}/{length} " if length > 1 else ""
|
chunk=f"{i+1}/{length} " if length > 1 else ""
|
||||||
description = f"\u25B6 {chunk}({'{0:0>5.2f}'.format(duration)}s)"
|
description = f"\u25B6 {chunk}"
|
||||||
for i in tqdm(range(100), desc=description):
|
for i in tqdm(range(100),
|
||||||
|
desc=description,
|
||||||
|
bar_format='{l_bar} {elapsed} {bar} {remaining}',
|
||||||
|
colour='yellow'):
|
||||||
sleep(duration / 100)
|
sleep(duration / 100)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
@ -124,7 +135,10 @@ def main():
|
||||||
name = "chat"
|
name = "chat"
|
||||||
text = args.input_text
|
text = args.input_text
|
||||||
|
|
||||||
generator = pipeline(text, voice=voice, split_pattern=r':\n+')
|
if args.title:
|
||||||
|
name = args.title
|
||||||
|
|
||||||
|
generator = pipeline(text, voice=voice, split_pattern=r'[:.?!;]\n+')
|
||||||
output_files = generate_audio(generator, name, voice, args.device)
|
output_files = generate_audio(generator, name, voice, args.device)
|
||||||
if args.skip_play:
|
if args.skip_play:
|
||||||
print("Audio player disabled.", f"{name}-{voice}-#.wav")
|
print("Audio player disabled.", f"{name}-{voice}-#.wav")
|
||||||
|
|
Loading…
Reference in a new issue