import os
from time import sleep, time
import warnings
import importlib

import torch
import argparse
from kokoro import KPipeline
import soundfile as sf
from tqdm import tqdm
import pyperclip
from yaspin import yaspin

# See voices: https://huggingface.co/hexgrad/Kokoro-82M/blob/main/VOICES.md
voices = {
    'pro': 'af_heart',
    'hot': 'af_bella',
    'asmr':'af_nicole',
    'brit': 'bf_emma'
}

def parse_args():
    parser = argparse.ArgumentParser(description="Simple TTS", allow_abbrev=False)
    parser.add_argument(
        "input_text",
        type=str,
        nargs='?',
        default="",
        help="Text to read",
    )
    parser.add_argument(
        "--title",
        "-t",
        required=False,
        type=str,
        help="Title to use as label to the generated outputs",
    )
    parser.add_argument(
        "--voice",
        "-v",
        required=False,
        type=str,
        default="pro",
        help="Voice to use (pro, hot, asmr, brit)",
    )
    parser.add_argument(
        "--input_file",
        "-i",
        required=False,
        type=str,
        default="demo/tongue-twister.txt",
        help="Path to the input text file",
    )
    parser.add_argument(
        "--verbose",
        default=False,
        action="store_true",
        help="Show verbose reports",
    )
    parser.add_argument(
        "--clipboard",
        "-c",
        required=False,
        action="store_true",
        help="Use text from the clipboard (i.e., copied text)",
    )
    parser.add_argument(
        "--device",
        "-d",
        required=False,
        type=str,
        default=("cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else ("xpu" if torch.xpu.is_available() else "cpu"))),
        help="Device for inference: cuda | mps | cpu",
    )
    parser.add_argument(
        "--skip_play",
        "-s",
        required=False,
        action="store_true",
        help="Prevent playing the generated audio",
    )
    return parser.parse_args()

def generate_audio(generator, name, voice):
    output_files = []
    with yaspin():
        for i, (gs, ps, audio) in enumerate(generator):
            output_file_name=f'outputs/{name}/{name}-{voice}-{i}.wav'
            os.makedirs(os.path.dirname(output_file_name), exist_ok=True)
            output_files.append(output_file_name)
            sf.write(output_file_name, audio, 24000)
    return output_files

def play_audio(output_files):
    vlc_module = importlib.import_module("vlc")
    length = len(output_files)
    for i, output in enumerate(output_files):
        full_path = os.path.abspath(output)
        media = vlc_module.MediaPlayer(f"file://{full_path}")
        media.play()
        sleep(0.01)
        duration=media.get_length() / 1000
        chunk=f"{i+1}/{length} " if length > 1 else ""
        description = f"\u25B6 {chunk}"
        for i in tqdm(range(100),
            desc=description,
            bar_format='{l_bar} {elapsed} {bar} {remaining}',
            colour='yellow'):
            sleep(duration / 100)

def main():
    args=parse_args()

    if not args.verbose:
        # Disable all warnings
        warnings.filterwarnings("ignore")

    pipeline = KPipeline(lang_code='a', device=args.device, repo_id='hexgrad/Kokoro-82M')
    if args.voice in voices:
        voice=voices[args.voice]
    else:
        voice=voices['pro'] if args.voice is None else args.voice

    # filename argument
    if args.input_text == "":
        if args.clipboard:
            # use copied text
            text = pyperclip.paste()
            name = 'copied'
        else:
            file_path = args.input_file
            directory, file_name = os.path.split(file_path)
            name = '.'.join(file_name.split('.')[:-1])
            file = open(file_path, "r")
            text = file.read()
    else:
        name = "chat"
        text = args.input_text

    if args.title:
        name = args.title

    # make safe for filenames
    name = name.replace(" ", "_")
    name = name.replace("\\", "_")
    name = name.replace("/", "_")

    '''
        Split patterns:
        - only multiple consecutive new line (to handle wrapped statements)
        - statements ending in punctuations (:.?!;)
        - list items starting in '-' or '*'
        - numbered items starting with a digit followed by a dot '.'
    '''
    generator = pipeline(
        text,
        voice=voice,
        split_pattern=r'\n{2,}|[:.?!;]\n+|\n[\*\-(\d+\.)]'
    )

    if args.verbose:
        print(f"[TTS] Using device: \"{args.device}\", voice: \"{voice}\", output label: \"{name}\"")
        if args.clipboard:
            print('[TTS] Using copied text as input.')

    start_time = time()
    output_files = generate_audio(generator, name, voice)
    generation_time = time() - start_time
    directory,f = os.path.split(output_files[0])

    if args.verbose:
        print(f"[TTS] {len(output_files)} chunks generated in {generation_time:.2f} seconds")
        print(f"[TTS] Output files are in: {directory}/*")

    if args.skip_play:
        print(f"[TTS] Audio player disabled: {directory}/*")
    else:
        try:
            play_audio(output_files)
        except:
            print(f"[TTS] Something went wrong when trying to play the audio. Play the output files manually: {directory}/*")

if __name__ == "__main__":
    main()