Add VITS model

robinhad · Jan 14, 2022 · 16f4a86 · 16f4a86
1 parent ecf0e33
commit 16f4a86
Show file tree

Hide file tree

Showing 4 changed files with 235 additions and 174 deletions.
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# model files
+*.pth.tar
diff --git a/README.md b/README.md
@@ -1,6 +1,6 @@
 ---
 title: "Ukrainian TTS"
-emoji: 🐸
+emoji: 🇺🇦
 colorFrom: green
 colorTo: green
 sdk: gradio
@@ -11,11 +11,11 @@ pinned: false
 # Ukrainian TTS 📢🤖
 Ukrainian TTS (text-to-speech) using Coqui TTS.
 
-Trained on [M-AILABS Ukrainian dataset](https://www.caito.de/2019/01/the-m-ailabs-speech-dataset/) using `sumska` voice.  
+Trained on [M-AILABS Ukrainian dataset](https://www.caito.de/2019/01/the-m-ailabs-speech-dataset/).  
 
 Link to online demo -> [https://huggingface.co/spaces/robinhad/ukrainian-tts](https://huggingface.co/spaces/robinhad/ukrainian-tts)
 # Support
-If you like my work, please support -> [SUPPORT LINK](https://send.monobank.ua/jar/48iHq4xAXm)
+If you like my work, please support -> ![mono](https://www.monobank.ua/favicon.ico) [SUPPORT LINK](https://send.monobank.ua/jar/48iHq4xAXm)
 # Example
 
 https://user-images.githubusercontent.com/5759207/140622395-9e734c95-159c-4d72-9f56-e8d1f1ac66c2.mp4

diff --git a/app.py b/app.py
@@ -1,16 +1,14 @@
 import tempfile
-from typing import Optional
 
 import gradio as gr
-import numpy as np
 
 from TTS.utils.manage import ModelManager
 from TTS.utils.synthesizer import Synthesizer
 import requests
 from os.path import exists
 
 MODEL_NAMES = [
-    "uk/mai/glow-tts"
+    "uk/mai/vits-tts"
 ]
 MODELS = {}
 
@@ -29,21 +27,18 @@ def download(url, file_name):
 
 for MODEL_NAME in MODEL_NAMES:
     print(f"downloading {MODEL_NAME}")
-    model_path, config_path, model_item = manager.download_model(
-        f"tts_models/{MODEL_NAME}")
-    vocoder_name: Optional[str] = model_item["default_vocoder"]
-    release_number = "0.0.1"
-    vocoder_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/v{release_number}/vocoder.pth.tar"
-    vocoder_config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/v{release_number}/vocoder_config.json"
+    release_number = "1.0.0"
+    model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/v{release_number}/model.pth.tar"
+    config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/v{release_number}/config.json"
 
-    vocoder_path = "vocoder.pth.tar"
-    vocoder_config_path = "vocoder_config.json"
+    model_path = "model.pth.tar"
+    config_path = "config.json"
 
-    download(vocoder_link, vocoder_path)
-    download(vocoder_config_link, vocoder_config_path)
+    download(model_link, model_path)
+    download(config_link, config_path)
 
     synthesizer = Synthesizer(
-        model_path, config_path, None, vocoder_path, vocoder_config_path,
+        model_path, config_path, None, None, None,
     )
     MODELS[MODEL_NAME] = synthesizer