-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxtts.py
63 lines (46 loc) · 1.94 KB
/
xtts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import asyncio
import os
import librosa
import math
import uuid
from TTS.api import TTS
import soundfile as sf
from transformers.utils import logging
logging.set_verbosity_error()
async def axtts(text: str, reference_wav: str, language: str = "hi", duration: float = None, device: str = "cpu") -> tuple:
# Check if the directory exists, if not, create it
folder_path = "./tts_files"
if not os.path.exists(folder_path):
os.makedirs(folder_path)
print(f"Directory '{folder_path}' created.")
else:
print(f"Directory '{folder_path}' already exists.")
text = str(text)
if device == "cuda":
gpu = True
else:
gpu = False
# Init TTS
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu).to(device)
file_name = folder_path + "/" + str(uuid.uuid4()) + ".wav"
# Text to speech to a file
tts.tts_to_file(text=text, speaker_wav=reference_wav, language="hi", file_path=file_name)
if duration is not None:
file_name = speed_up_audio(file_name, duration, folder_path)
return file_name
def speed_up_audio(wav_file: str, expected_duration: float, folder_path: str):
current_duration = librosa.get_duration(path=wav_file)
expected_duration = math.ceil(expected_duration)
speed = current_duration / expected_duration
print(f"Speed: {speed}")
# Load the audio file
y, sr = librosa.load(wav_file)
# Use librosa's time-stretch function to change the speed without altering the pitch
y_fast = librosa.effects.time_stretch(y, rate=speed)
# Save the modified audio
output_path = folder_path + "/" + str(uuid.uuid4()) + ".wav"
sf.write(output_path, y_fast, sr)
os.remove(wav_file)
return output_path
if __name__ == "__main__":
asyncio.run(axtts("यदि आप इस ऐप को कैसे बनाया गया है के बारे में सभी विवरण चाहते हैं"))