diff --git a/build/lib/ovos_tts_plugin_mimic2/__init__.py b/build/lib/ovos_tts_plugin_mimic2/__init__.py new file mode 100644 index 0000000..8e9aa16 --- /dev/null +++ b/build/lib/ovos_tts_plugin_mimic2/__init__.py @@ -0,0 +1,243 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import base64 +import math +import random +import re + +import requests + +from ovos_plugin_manager.templates.tts import TTS, TTSValidator, RemoteTTSException +from ovos_utils.lang.visimes import VISIMES + + +class Mimic2TTSPlugin(TTS): + """Interface to Mimic2 TTS.""" + # Heuristic value, caps character length of a chunk of text + # to be spoken as a work around for current Tacotron implementation limits. + max_sentence_size = 170 + + def __init__(self, lang="en-us", config=None): + config = config or {} + super(Mimic2TTSPlugin, self).__init__(lang, config, + Mimic2TTSValidator(self), 'wav') + self.voice = self.voice.lower() + self._visemes = False + self.cache.persist = True # save synths to avoid repeat queries + if self.config.get("url"): # self hosted + self.url = self.config["url"] + # TODO disable cache to avoid filename conflicts with other voices + if not self.voice or self.voice == "default": + self.voice = f"selfhosted{random.randint(0, 9999999)}" + self.cache.persist = False + elif self.voice == "kusal" or self.voice == "default": + self.url = "https://mimic-api.mycroft.ai/synthesize" + self._visemes = True + elif self.voice == "nancy": + self.url = "https://nancy.2022.us/synthesize" + elif self.voice == "ljspeech": + self.url = "https://ljspeech.2022.us/synthesize" + else: + self.voice = "kusal" + self.url = "https://mimic-api.mycroft.ai/synthesize" + + def get_tts(self, sentence, wav_file, lang=None): + """Fetch tts audio using tacotron endpoint. + + Arguments: + sentence (str): Sentence to generate audio for + wav_file (str): output file path + Returns: + Tuple ((str) written file, None) + """ + params = {"text": sentence, "visimes": self._visemes} + r = requests.get(self.url, params=params) + if not r.ok: + raise RemoteTTSException(f"Mimic2 server error: {r.reason}") + if not self._visemes: + audio_data = r.content + phonemes = None + else: + results = r.json() + audio_data = base64.b64decode(results['audio_base64']) + phonemes = results['visimes'] + with open(wav_file, "wb") as f: + f.write(audio_data) + return (wav_file, phonemes) # No phonemes + + def viseme(self, phonemes): + """Maps phonemes to appropriate viseme encoding + + Arguments: + phonemes (list): list of tuples (phoneme, time_start) + + Returns: + list: list of tuples (viseme_encoding, time_start) + """ + visemes = [] + for pair in phonemes: + if pair[0]: + phone = pair[0].lower() + else: + # if phoneme doesn't exist use + # this as placeholder since it + # is the most common one "3" + phone = 'z' + vis = VISIMES.get(phone) + vis_dur = float(pair[1]) + visemes.append((vis, vis_dur)) + return visemes + + # below are helpers to split sentence in chunks that tacotron can synth + # there is a limit for 150 chars + def _preprocess_sentence(self, sentence): + """Split sentence in chunks better suited for mimic2. """ + return self._split_sentences(sentence) + + @staticmethod + def _split_sentences(text): + """Split text into smaller chunks for TTS generation. + NOTE: The smaller chunks are needed due to current Catotron TTS limitations. + This stage can be removed once Catotron can generate longer sentences. + Arguments: + text (str): text to split + chunk_size (int): size of each chunk + split_by_punc (bool, optional): Defaults to True. + Returns: + list: list of text chunks + """ + if len(text) <= Mimic2TTSPlugin.max_sentence_size: + return [Mimic2TTSPlugin._add_punctuation(text)] + + # first split by punctuations that are major pauses + first_splits = Mimic2TTSPlugin._split_by_punctuation( + text, + puncs=[r'\.', r'\!', r'\?', r'\:', r'\;'] + ) + + # if chunks are too big, split by minor pauses (comma, hyphen) + second_splits = [] + for chunk in first_splits: + if len(chunk) > Mimic2TTSPlugin.max_sentence_size: + second_splits += Mimic2TTSPlugin._split_by_punctuation( + chunk, puncs=[r'\,', '--', '-']) + else: + second_splits.append(chunk) + + # if chunks are still too big, chop into pieces of at most 20 words + third_splits = [] + for chunk in second_splits: + if len(chunk) > Mimic2TTSPlugin.max_sentence_size: + third_splits += Mimic2TTSPlugin._split_by_chunk_size( + chunk, 20) + else: + third_splits.append(chunk) + + return [Mimic2TTSPlugin._add_punctuation(chunk) + for chunk in third_splits] + + @staticmethod + def _break_chunks(l, n): + """Yield successive n-sized chunks + Arguments: + l (list): text (str) to split + chunk_size (int): chunk size + """ + for i in range(0, len(l), n): + yield " ".join(l[i:i + n]) + + @staticmethod + def _split_by_chunk_size(text, chunk_size): + """Split text into word chunks by chunk_size size + Arguments: + text (str): text to split + chunk_size (int): chunk size + Returns: + list: list of text chunks + """ + text_list = text.split() + + if len(text_list) <= chunk_size: + return [text] + + if chunk_size < len(text_list) < (chunk_size * 2): + return list(Mimic2TTSPlugin._break_chunks( + text_list, + int(math.ceil(len(text_list) / 2)) + )) + elif (chunk_size * 2) < len(text_list) < (chunk_size * 3): + return list(Mimic2TTSPlugin._break_chunks( + text_list, + int(math.ceil(len(text_list) / 3)) + )) + elif (chunk_size * 3) < len(text_list) < (chunk_size * 4): + return list(Mimic2TTSPlugin._break_chunks( + text_list, + int(math.ceil(len(text_list) / 4)) + )) + else: + return list(Mimic2TTSPlugin._break_chunks( + text_list, + int(math.ceil(len(text_list) / 5)) + )) + + @staticmethod + def _split_by_punctuation(chunks, puncs): + """Splits text by various punctionations + e.g. hello, world => [hello, world] + Arguments: + chunks (list or str): text (str) to split + puncs (list): list of punctuations used to split text + Returns: + list: list with split text + """ + if isinstance(chunks, str): + out = [chunks] + else: + out = chunks + + for punc in puncs: + splits = [] + for t in out: + # Split text by punctuation, but not embedded punctuation. E.g. + # Split: "Short sentence. Longer sentence." + # But not at: "I.B.M." or "3.424", "3,424" or "what's-his-name." + splits += re.split(r'(?= 1 and text[-1] not in punctuation: + return text + ', ' + else: + return text + + +class Mimic2TTSValidator(TTSValidator): + def __init__(self, tts): + super(Mimic2TTSValidator, self).__init__(tts) + + def validate_lang(self): + lang = self.tts.lang.lower() + assert lang.startswith("en") + + def validate_connection(self): + pass + + def get_tts_class(self): + return Mimic2TTSPlugin diff --git a/build/lib/ovos_tts_plugin_mimic2/version.py b/build/lib/ovos_tts_plugin_mimic2/version.py new file mode 100644 index 0000000..b377648 --- /dev/null +++ b/build/lib/ovos_tts_plugin_mimic2/version.py @@ -0,0 +1,7 @@ +# The following lines are replaced during the release process. +# START_VERSION_BLOCK +VERSION_MAJOR = 0 +VERSION_MINOR = 1 +VERSION_BUILD = 4 +VERSION_ALPHA = 0 +# END_VERSION_BLOCK diff --git a/dist/ovos_tts_plugin_mimic2-0.1.4-py3-none-any.whl b/dist/ovos_tts_plugin_mimic2-0.1.4-py3-none-any.whl new file mode 100644 index 0000000..88b9adf Binary files /dev/null and b/dist/ovos_tts_plugin_mimic2-0.1.4-py3-none-any.whl differ diff --git a/ovos_tts_plugin_mimic2.egg-info/PKG-INFO b/ovos_tts_plugin_mimic2.egg-info/PKG-INFO new file mode 100644 index 0000000..229a5a1 --- /dev/null +++ b/ovos_tts_plugin_mimic2.egg-info/PKG-INFO @@ -0,0 +1,25 @@ +Metadata-Version: 1.1 +Name: ovos-tts-plugin-mimic2 +Version: 0.1.4 +Summary: Mimic2 (tacotron based tts) plugin for OpenVoiceOS +Home-page: https://github.com/OpenVoiceOS/ovos-tts-plugin-mimic2 +Author: JarbasAi +Author-email: jarbasai@mailfence.com +License: Apache-2.0 +Description: UNKNOWN +Keywords: mycroft ovos plugin tts +Platform: UNKNOWN +Classifier: Development Status :: 3 - Alpha +Classifier: Intended Audience :: Developers +Classifier: Topic :: Text Processing :: Linguistic +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Programming Language :: Python :: 2 +Classifier: Programming Language :: Python :: 2.7 +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.0 +Classifier: Programming Language :: Python :: 3.1 +Classifier: Programming Language :: Python :: 3.2 +Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 diff --git a/ovos_tts_plugin_mimic2.egg-info/SOURCES.txt b/ovos_tts_plugin_mimic2.egg-info/SOURCES.txt new file mode 100644 index 0000000..9b2b7b3 --- /dev/null +++ b/ovos_tts_plugin_mimic2.egg-info/SOURCES.txt @@ -0,0 +1,14 @@ +CHANGELOG.md +LICENSE +MANIFEST.in +setup.py +ovos_tts_plugin_mimic2/__init__.py +ovos_tts_plugin_mimic2/version.py +ovos_tts_plugin_mimic2.egg-info/PKG-INFO +ovos_tts_plugin_mimic2.egg-info/SOURCES.txt +ovos_tts_plugin_mimic2.egg-info/dependency_links.txt +ovos_tts_plugin_mimic2.egg-info/entry_points.txt +ovos_tts_plugin_mimic2.egg-info/requires.txt +ovos_tts_plugin_mimic2.egg-info/top_level.txt +ovos_tts_plugin_mimic2.egg-info/zip-safe +requirements/requirements.txt \ No newline at end of file diff --git a/ovos_tts_plugin_mimic2.egg-info/dependency_links.txt b/ovos_tts_plugin_mimic2.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/ovos_tts_plugin_mimic2.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/ovos_tts_plugin_mimic2.egg-info/entry_points.txt b/ovos_tts_plugin_mimic2.egg-info/entry_points.txt new file mode 100644 index 0000000..2c377bc --- /dev/null +++ b/ovos_tts_plugin_mimic2.egg-info/entry_points.txt @@ -0,0 +1,3 @@ +[mycroft.plugin.tts] +ovos-tts-plugin-mimic2 = ovos_tts_plugin_mimic2:Mimic2TTSPlugin + diff --git a/ovos_tts_plugin_mimic2.egg-info/requires.txt b/ovos_tts_plugin_mimic2.egg-info/requires.txt new file mode 100644 index 0000000..4ca8768 --- /dev/null +++ b/ovos_tts_plugin_mimic2.egg-info/requires.txt @@ -0,0 +1,3 @@ +requests~=2.26 +ovos-plugin-manager>=0.0.1 +ovos-utils>=0.0.14 diff --git a/ovos_tts_plugin_mimic2.egg-info/top_level.txt b/ovos_tts_plugin_mimic2.egg-info/top_level.txt new file mode 100644 index 0000000..2021e0b --- /dev/null +++ b/ovos_tts_plugin_mimic2.egg-info/top_level.txt @@ -0,0 +1 @@ +ovos_tts_plugin_mimic2 diff --git a/ovos_tts_plugin_mimic2.egg-info/zip-safe b/ovos_tts_plugin_mimic2.egg-info/zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/ovos_tts_plugin_mimic2.egg-info/zip-safe @@ -0,0 +1 @@ + diff --git a/ovos_tts_plugin_mimic2/version.py b/ovos_tts_plugin_mimic2/version.py index b377648..de067f2 100644 --- a/ovos_tts_plugin_mimic2/version.py +++ b/ovos_tts_plugin_mimic2/version.py @@ -2,6 +2,6 @@ # START_VERSION_BLOCK VERSION_MAJOR = 0 VERSION_MINOR = 1 -VERSION_BUILD = 4 +VERSION_BUILD = 5 VERSION_ALPHA = 0 # END_VERSION_BLOCK