commit 42193d46f384027b3ae182cd70d74c57c986038e Author: Ayo Date: Wed Sep 3 20:59:57 2025 +0200 initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0ce5f9a --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +*~ +*swp +*swo + +outputs/ +inputs/ +*.wav diff --git a/README.md b/README.md new file mode 100644 index 0000000..3c64d25 --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# Simple TTTS + +A simple text to speech powered by [kokoro](https://huggingface.co/hexgrad/Kokoro-82M). + +## Setup + +Create new environment. Here I use `conda` + +```bash +$ conda create -n tts +``` + +Because I use an intel-based laptop, I use [ipex-llm environment with pytorch 2.6](https://git.ayo.run/ayo/ipex-llm/src/branch/main/docs/mddocs/Quickstart/install_pytorch26_gpu.md) + +```bash +### for Intel XPU specific device usage: +$ conda create -n tts --clone llm-pt26 +``` + +Activate the environment and install the dependencies + +```bash +$ conda activate tts +$ python -m pip install -r requirements.txt +``` + +Because `vlc` to automatically play the generated audio, you will have to install it: + +```bash +sudo apt update +sudo apt install vlc +``` + +Note: installing `vlc` via flatpak or snap will not work, as the code need access to `libvlc`. diff --git a/env.sh b/env.sh new file mode 100644 index 0000000..38b1713 --- /dev/null +++ b/env.sh @@ -0,0 +1,5 @@ +conda activate tts + +unset OCL_ICD_VENDORS +export SYCL_CACHE_PERSISTENT=1 +export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1 diff --git a/main.py b/main.py new file mode 100644 index 0000000..01ec3cf --- /dev/null +++ b/main.py @@ -0,0 +1,40 @@ +from kokoro import KPipeline +from IPython.display import display, Audio +import soundfile as sf +import torch +import sys +import os +import vlc +from time import sleep + +pipeline = KPipeline(lang_code='a', device='xpu') + +# filename argument +file_path = sys.argv[1] +directory, file_name = os.path.split(file_path) + +name = '.'.join(file_name.split('.')[:-1]) + +file = open(file_path, "r") +text = file.read() +generator = pipeline(text, voice='af_bella') + +output_files = [] + +for i, (gs, ps, audio) in enumerate(generator): + # print(i, gs, ps) + display(Audio(data=audio, rate=24000, autoplay=i==0)) + output_file_name=f'{name}-{i}.wav' + print(f"Done generating audio: {output_file_name}") + sf.write(output_file_name, audio, 24000) + output_files.append(output_file_name) + +for output in output_files: + full_path = os.path.abspath(output) + print(f"Playing: {output}") + media = vlc.MediaPlayer(f"file://{full_path}") + media.play() + sleep(0.1) + duration=media.get_length() / 1000 + print(f"duration: {duration}s") + sleep(duration) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9ca664c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +kokoro +IPython +soundfile +torch +vlc