feat: implement --voice & --input
This commit is contained in:
parent
da6faa61fe
commit
b212a108df
4 changed files with 82 additions and 51 deletions
|
@ -51,8 +51,8 @@ $ . env.sh
|
|||
|
||||
## Usage
|
||||
|
||||
To run the program it needs an input file. For example, using `input.txt`
|
||||
To run the program it needs an input file using the flag `--input`. Optionally, you can indicate a [voice](https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md) you want to use with `--voice`.
|
||||
|
||||
```bash
|
||||
$ python main.py input.txt
|
||||
$ python tts.py --input demo/tongue-twister.txt --voice asmr
|
||||
```
|
||||
|
|
49
main.py
49
main.py
|
@ -1,49 +0,0 @@
|
|||
import sys
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
from kokoro import KPipeline
|
||||
import soundfile as sf
|
||||
import vlc
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# See voices: https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md
|
||||
voices = {
|
||||
'pro': 'af_heart',
|
||||
'hot': 'af_bella',
|
||||
'asmr':'af_nicole',
|
||||
'brit': 'bf_emma'
|
||||
}
|
||||
pipeline = KPipeline(lang_code='a', device='xpu', repo_id='hexgrad/Kokoro-82M')
|
||||
|
||||
# filename argument
|
||||
file_path = sys.argv[1]
|
||||
directory, file_name = os.path.split(file_path)
|
||||
|
||||
name = '.'.join(file_name.split('.')[:-1])
|
||||
|
||||
file = open(file_path, "r")
|
||||
text = file.read()
|
||||
generator = pipeline(text, voice=voices['pro'])
|
||||
|
||||
output_files = []
|
||||
length = 0
|
||||
|
||||
for i, (gs, ps, audio) in enumerate(generator):
|
||||
output_file_name=f'outputs/{name}-{i}.wav'
|
||||
os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
|
||||
output_files.append(output_file_name)
|
||||
sf.write(output_file_name, audio, 24000)
|
||||
print(u'\u2713', output_file_name)
|
||||
length = length + 1
|
||||
|
||||
for i, output in enumerate(output_files):
|
||||
full_path = os.path.abspath(output)
|
||||
media = vlc.MediaPlayer(f"file://{full_path}")
|
||||
media.play()
|
||||
sleep(0.1)
|
||||
duration=media.get_length() / 1000
|
||||
description = f"\u25B6 {i+1}/{length} ({'{0:0>5.2f}'.format(duration)}s)"
|
||||
for i in tqdm(range(100), desc=description):
|
||||
sleep(duration / 100)
|
|
@ -2,3 +2,4 @@ kokoro
|
|||
soundfile
|
||||
python-vlc
|
||||
tqdm
|
||||
argparse
|
||||
|
|
79
tts.py
Normal file
79
tts.py
Normal file
|
@ -0,0 +1,79 @@
|
|||
import sys
|
||||
import os
|
||||
from time import sleep
|
||||
|
||||
import argparse
|
||||
from kokoro import KPipeline
|
||||
import soundfile as sf
|
||||
import vlc
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
# See voices: https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md
|
||||
voices = {
|
||||
'pro': 'af_heart',
|
||||
'hot': 'af_bella',
|
||||
'asmr':'af_nicole',
|
||||
'brit': 'bf_emma'
|
||||
}
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description="Simple TTS")
|
||||
parser.add_argument(
|
||||
"--voice",
|
||||
type=str,
|
||||
default="pro",
|
||||
help="Voice to use (pro, hot, asmr, brit)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input",
|
||||
type=str,
|
||||
default="demo/tongue-twister.txt",
|
||||
help="Voice to use (pro, hot, asmr, brit)",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
def main():
|
||||
args=parse_args()
|
||||
pipeline = KPipeline(lang_code='a', device='xpu', repo_id='hexgrad/Kokoro-82M')
|
||||
voice=voices[args.voice]
|
||||
if voice is None:
|
||||
if args.voice is None:
|
||||
voice=voices['pro']
|
||||
else:
|
||||
voice=args.voice
|
||||
|
||||
# filename argument
|
||||
file_path = args.input
|
||||
directory, file_name = os.path.split(file_path)
|
||||
|
||||
name = '.'.join(file_name.split('.')[:-1])
|
||||
|
||||
file = open(file_path, "r")
|
||||
text = file.read()
|
||||
generator = pipeline(text, voice=voices[args.voice])
|
||||
|
||||
output_files = []
|
||||
length = 0
|
||||
|
||||
for i, (gs, ps, audio) in enumerate(generator):
|
||||
output_file_name=f'outputs/{name}-{i}.wav'
|
||||
os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
|
||||
output_files.append(output_file_name)
|
||||
sf.write(output_file_name, audio, 24000)
|
||||
print(u'\u2713', output_file_name)
|
||||
length = length + 1
|
||||
|
||||
for i, output in enumerate(output_files):
|
||||
full_path = os.path.abspath(output)
|
||||
media = vlc.MediaPlayer(f"file://{full_path}")
|
||||
media.play()
|
||||
sleep(0.1)
|
||||
duration=media.get_length() / 1000
|
||||
description = f"\u25B6 {i+1}/{length} ({'{0:0>5.2f}'.format(duration)}s)"
|
||||
for i in tqdm(range(100), desc=description):
|
||||
sleep(duration / 100)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in a new issue