From a5a32903e05ee3822bec10d189e838a5b985a512 Mon Sep 17 00:00:00 2001
From: Ayo <ayo@ayco.io>
Date: Thu, 4 Sep 2025 15:21:45 +0200
Subject: [PATCH] feat: better splitting for common markdown syntaxes

- list items
- multiple new lines
- stamements ending with punctuations
---
 tts.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tts.py b/tts.py
index b202726..d89a0ee 100644
--- a/tts.py
+++ b/tts.py
@@ -143,7 +143,19 @@ def main():
     name = name.replace("\\", "_")
     name = name.replace("/", "_")
 
-    generator = pipeline(text, voice=voice, split_pattern=r'[:.?!;]\n+|\n[*-]')
+    '''
+        Split patterns:
+        - only multiple consecutive new line (to handle wrapped statements)
+        - statements ending in punctuations (:.?!;)
+        - list items starting in '-' or '*'
+        - numbered items starting with a digit followed by a dot '.'
+    '''
+    generator = pipeline(
+        text,
+        voice=voice,
+        split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
+    )
+
     output_files = generate_audio(generator, name, voice, args.device)
     directory, output_file_name = os.path.split(output_files[0])
     if args.skip_play: