initial commit

2025-09-03 20:59:57 +02:00 · 2025-09-03 20:59:57 +02:00 · 42193d46f3
commit 42193d46f3
5 changed files with 91 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,7 @@
+*~
+*swp
+*swo
+
+outputs/
+inputs/
+*.wav
--- a/README.md
+++ b/README.md
@ -0,0 +1,34 @@
+# Simple TTTS
+
+A simple text to speech powered by [kokoro](https://huggingface.co/hexgrad/Kokoro-82M).
+
+## Setup
+
+Create new environment. Here I use `conda`
+
+```bash
+$ conda create -n tts
+```
+
+Because I use an intel-based laptop, I use [ipex-llm environment with pytorch 2.6](https://git.ayo.run/ayo/ipex-llm/src/branch/main/docs/mddocs/Quickstart/install_pytorch26_gpu.md)
+
+```bash
+### for Intel XPU specific device usage:
+$ conda create -n tts --clone llm-pt26
+```
+
+Activate the environment and install the dependencies
+
+```bash
+$ conda activate tts
+$ python -m pip install -r requirements.txt
+```
+
+Because `vlc` to automatically play the generated audio, you will have to install it:
+
+```bash
+sudo apt update
+sudo apt install vlc
+```
+
+Note: installing `vlc` via flatpak or snap will not work, as the code need access to `libvlc`.
--- a/env.sh
+++ b/env.sh
@ -0,0 +1,5 @@
+conda activate tts
+
+unset OCL_ICD_VENDORS
+export SYCL_CACHE_PERSISTENT=1
+export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
--- a/main.py
+++ b/main.py
@ -0,0 +1,40 @@
+from kokoro import KPipeline
+from IPython.display import display, Audio
+import soundfile as sf
+import torch
+import sys
+import os
+import vlc
+from time import sleep
+
+pipeline = KPipeline(lang_code='a', device='xpu')
+
+# filename argument
+file_path = sys.argv[1]
+directory, file_name = os.path.split(file_path)
+
+name = '.'.join(file_name.split('.')[:-1])
+
+file = open(file_path, "r")
+text = file.read()
+generator = pipeline(text, voice='af_bella')
+
+output_files = []
+
+for i, (gs, ps, audio) in enumerate(generator):
+    # print(i, gs, ps)
+    display(Audio(data=audio, rate=24000, autoplay=i==0))
+    output_file_name=f'{name}-{i}.wav'
+    print(f"Done generating audio: {output_file_name}")
+    sf.write(output_file_name, audio, 24000)
+    output_files.append(output_file_name)
+
+for output in output_files:
+    full_path = os.path.abspath(output)
+    print(f"Playing: {output}")
+    media = vlc.MediaPlayer(f"file://{full_path}")
+    media.play()
+    sleep(0.1)
+    duration=media.get_length() / 1000
+    print(f"duration: {duration}s")
+    sleep(duration)
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+kokoro
+IPython
+soundfile
+torch
+vlc