Prepare Next Version

OpenVoiceOS · Feb 28, 2022 · 38afc96 · 38afc96
1 parent a43421a
commit 38afc96
Show file tree

Hide file tree

Showing 11 changed files with 299 additions and 1 deletion.
diff --git a/build/lib/ovos_tts_plugin_mimic2/__init__.py b/build/lib/ovos_tts_plugin_mimic2/__init__.py
@@ -0,0 +1,243 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import base64
+import math
+import random
+import re
+
+import requests
+
+from ovos_plugin_manager.templates.tts import TTS, TTSValidator, RemoteTTSException
+from ovos_utils.lang.visimes import VISIMES
+
+
+class Mimic2TTSPlugin(TTS):
+    """Interface to Mimic2 TTS."""
+    # Heuristic value, caps character length of a chunk of text
+    # to be spoken as a work around for current Tacotron implementation limits.
+    max_sentence_size = 170
+
+    def __init__(self, lang="en-us", config=None):
+        config = config or {}
+        super(Mimic2TTSPlugin, self).__init__(lang, config,
+                                              Mimic2TTSValidator(self), 'wav')
+        self.voice = self.voice.lower()
+        self._visemes = False
+        self.cache.persist = True  # save synths to avoid repeat queries
+        if self.config.get("url"):  # self hosted
+            self.url = self.config["url"]
+            # TODO disable cache to avoid filename conflicts with other voices
+            if not self.voice or self.voice == "default":
+                self.voice = f"selfhosted{random.randint(0, 9999999)}"
+                self.cache.persist = False
+        elif self.voice == "kusal" or self.voice == "default":
+            self.url = "https://mimic-api.mycroft.ai/synthesize"
+            self._visemes = True
+        elif self.voice == "nancy":
+            self.url = "https://nancy.2022.us/synthesize"
+        elif self.voice == "ljspeech":
+            self.url = "https://ljspeech.2022.us/synthesize"
+        else:
+            self.voice = "kusal"
+            self.url = "https://mimic-api.mycroft.ai/synthesize"
+
+    def get_tts(self, sentence, wav_file, lang=None):
+        """Fetch tts audio using tacotron endpoint.
+
+        Arguments:
+            sentence (str): Sentence to generate audio for
+            wav_file (str): output file path
+        Returns:
+            Tuple ((str) written file, None)
+        """
+        params = {"text": sentence, "visimes": self._visemes}
+        r = requests.get(self.url, params=params)
+        if not r.ok:
+            raise RemoteTTSException(f"Mimic2 server error: {r.reason}")
+        if not self._visemes:
+            audio_data = r.content
+            phonemes = None
+        else:
+            results = r.json()
+            audio_data = base64.b64decode(results['audio_base64'])
+            phonemes = results['visimes']
+        with open(wav_file, "wb") as f:
+            f.write(audio_data)
+        return (wav_file, phonemes)  # No phonemes
+
+    def viseme(self, phonemes):
+        """Maps phonemes to appropriate viseme encoding
+
+        Arguments:
+            phonemes (list): list of tuples (phoneme, time_start)
+
+        Returns:
+            list: list of tuples (viseme_encoding, time_start)
+        """
+        visemes = []
+        for pair in phonemes:
+            if pair[0]:
+                phone = pair[0].lower()
+            else:
+                # if phoneme doesn't exist use
+                # this as placeholder since it
+                # is the most common one "3"
+                phone = 'z'
+            vis = VISIMES.get(phone)
+            vis_dur = float(pair[1])
+            visemes.append((vis, vis_dur))
+        return visemes
+
+    # below are helpers to split sentence in chunks that tacotron can synth
+    # there is a limit for 150 chars
+    def _preprocess_sentence(self, sentence):
+        """Split sentence in chunks better suited for mimic2. """
+        return self._split_sentences(sentence)
+
+    @staticmethod
+    def _split_sentences(text):
+        """Split text into smaller chunks for TTS generation.
+        NOTE: The smaller chunks are needed due to current Catotron TTS limitations.
+        This stage can be removed once Catotron can generate longer sentences.
+        Arguments:
+            text (str): text to split
+            chunk_size (int): size of each chunk
+            split_by_punc (bool, optional): Defaults to True.
+        Returns:
+            list: list of text chunks
+        """
+        if len(text) <= Mimic2TTSPlugin.max_sentence_size:
+            return [Mimic2TTSPlugin._add_punctuation(text)]
+
+        # first split by punctuations that are major pauses
+        first_splits = Mimic2TTSPlugin._split_by_punctuation(
+            text,
+            puncs=[r'\.', r'\!', r'\?', r'\:', r'\;']
+        )
+
+        # if chunks are too big, split by minor pauses (comma, hyphen)
+        second_splits = []
+        for chunk in first_splits:
+            if len(chunk) > Mimic2TTSPlugin.max_sentence_size:
+                second_splits += Mimic2TTSPlugin._split_by_punctuation(
+                    chunk, puncs=[r'\,', '--', '-'])
+            else:
+                second_splits.append(chunk)
+
+        # if chunks are still too big, chop into pieces of at most 20 words
+        third_splits = []
+        for chunk in second_splits:
+            if len(chunk) > Mimic2TTSPlugin.max_sentence_size:
+                third_splits += Mimic2TTSPlugin._split_by_chunk_size(
+                    chunk, 20)
+            else:
+                third_splits.append(chunk)
+
+        return [Mimic2TTSPlugin._add_punctuation(chunk)
+                for chunk in third_splits]
+
+    @staticmethod
+    def _break_chunks(l, n):
+        """Yield successive n-sized chunks
+        Arguments:
+            l (list): text (str) to split
+            chunk_size (int): chunk size
+        """
+        for i in range(0, len(l), n):
+            yield " ".join(l[i:i + n])
+
+    @staticmethod
+    def _split_by_chunk_size(text, chunk_size):
+        """Split text into word chunks by chunk_size size
+        Arguments:
+            text (str): text to split
+            chunk_size (int): chunk size
+        Returns:
+            list: list of text chunks
+        """
+        text_list = text.split()
+
+        if len(text_list) <= chunk_size:
+            return [text]
+
+        if chunk_size < len(text_list) < (chunk_size * 2):
+            return list(Mimic2TTSPlugin._break_chunks(
+                text_list,
+                int(math.ceil(len(text_list) / 2))
+            ))
+        elif (chunk_size * 2) < len(text_list) < (chunk_size * 3):
+            return list(Mimic2TTSPlugin._break_chunks(
+                text_list,
+                int(math.ceil(len(text_list) / 3))
+            ))
+        elif (chunk_size * 3) < len(text_list) < (chunk_size * 4):
+            return list(Mimic2TTSPlugin._break_chunks(
+                text_list,
+                int(math.ceil(len(text_list) / 4))
+            ))
+        else:
+            return list(Mimic2TTSPlugin._break_chunks(
+                text_list,
+                int(math.ceil(len(text_list) / 5))
+            ))
+
+    @staticmethod
+    def _split_by_punctuation(chunks, puncs):
+        """Splits text by various punctionations
+        e.g. hello, world => [hello, world]
+        Arguments:
+            chunks (list or str): text (str) to split
+            puncs (list): list of punctuations used to split text
+        Returns:
+            list: list with split text
+        """
+        if isinstance(chunks, str):
+            out = [chunks]
+        else:
+            out = chunks
+
+        for punc in puncs:
+            splits = []
+            for t in out:
+                # Split text by punctuation, but not embedded punctuation.  E.g.
+                # Split:  "Short sentence.  Longer sentence."
+                # But not at: "I.B.M." or "3.424", "3,424" or "what's-his-name."
+                splits += re.split(r'(?<!\.\S)' + punc + r'\s', t)
+            out = splits
+        return [t.strip() for t in out]
+
+    @staticmethod
+    def _add_punctuation(text):
+        """Add punctuation at the end of each chunk.
+        Catotron expects some form of punctuation at the end of a sentence.
+        """
+        punctuation = ['.', '?', '!', ';']
+        if len(text) >= 1 and text[-1] not in punctuation:
+            return text + ', '
+        else:
+            return text
+
+
+class Mimic2TTSValidator(TTSValidator):
+    def __init__(self, tts):
+        super(Mimic2TTSValidator, self).__init__(tts)
+
+    def validate_lang(self):
+        lang = self.tts.lang.lower()
+        assert lang.startswith("en")
+
+    def validate_connection(self):
+        pass
+
+    def get_tts_class(self):
+        return Mimic2TTSPlugin
diff --git a/build/lib/ovos_tts_plugin_mimic2/version.py b/build/lib/ovos_tts_plugin_mimic2/version.py
@@ -0,0 +1,7 @@
+# The following lines are replaced during the release process.
+# START_VERSION_BLOCK
+VERSION_MAJOR = 0
+VERSION_MINOR = 1
+VERSION_BUILD = 4
+VERSION_ALPHA = 0
+# END_VERSION_BLOCK
diff --git a/dist/ovos_tts_plugin_mimic2-0.1.4-py3-none-any.whl b/dist/ovos_tts_plugin_mimic2-0.1.4-py3-none-any.whl
diff --git a/ovos_tts_plugin_mimic2.egg-info/PKG-INFO b/ovos_tts_plugin_mimic2.egg-info/PKG-INFO
@@ -0,0 +1,25 @@
+Metadata-Version: 1.1
+Name: ovos-tts-plugin-mimic2
+Version: 0.1.4
+Summary: Mimic2 (tacotron based tts) plugin for OpenVoiceOS
+Home-page: https://github.com/OpenVoiceOS/ovos-tts-plugin-mimic2
+Author: JarbasAi
+Author-email: [email protected]
+License: Apache-2.0
+Description: UNKNOWN
+Keywords: mycroft ovos plugin tts
+Platform: UNKNOWN
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Text Processing :: Linguistic
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Programming Language :: Python :: 2
+Classifier: Programming Language :: Python :: 2.7
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.0
+Classifier: Programming Language :: Python :: 3.1
+Classifier: Programming Language :: Python :: 3.2
+Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
diff --git a/ovos_tts_plugin_mimic2.egg-info/SOURCES.txt b/ovos_tts_plugin_mimic2.egg-info/SOURCES.txt
@@ -0,0 +1,14 @@
+CHANGELOG.md
+LICENSE
+MANIFEST.in
+setup.py
+ovos_tts_plugin_mimic2/__init__.py
+ovos_tts_plugin_mimic2/version.py
+ovos_tts_plugin_mimic2.egg-info/PKG-INFO
+ovos_tts_plugin_mimic2.egg-info/SOURCES.txt
+ovos_tts_plugin_mimic2.egg-info/dependency_links.txt
+ovos_tts_plugin_mimic2.egg-info/entry_points.txt
+ovos_tts_plugin_mimic2.egg-info/requires.txt
+ovos_tts_plugin_mimic2.egg-info/top_level.txt
+ovos_tts_plugin_mimic2.egg-info/zip-safe
+requirements/requirements.txt
diff --git a/ovos_tts_plugin_mimic2.egg-info/dependency_links.txt b/ovos_tts_plugin_mimic2.egg-info/dependency_links.txt
@@ -0,0 +1 @@
+
diff --git a/ovos_tts_plugin_mimic2.egg-info/entry_points.txt b/ovos_tts_plugin_mimic2.egg-info/entry_points.txt
@@ -0,0 +1,3 @@
+[mycroft.plugin.tts]
+ovos-tts-plugin-mimic2 = ovos_tts_plugin_mimic2:Mimic2TTSPlugin
+
diff --git a/ovos_tts_plugin_mimic2.egg-info/requires.txt b/ovos_tts_plugin_mimic2.egg-info/requires.txt
@@ -0,0 +1,3 @@
+requests~=2.26
+ovos-plugin-manager>=0.0.1
+ovos-utils>=0.0.14
diff --git a/ovos_tts_plugin_mimic2.egg-info/top_level.txt b/ovos_tts_plugin_mimic2.egg-info/top_level.txt
@@ -0,0 +1 @@
+ovos_tts_plugin_mimic2
diff --git a/ovos_tts_plugin_mimic2.egg-info/zip-safe b/ovos_tts_plugin_mimic2.egg-info/zip-safe
@@ -0,0 +1 @@
+
diff --git a/ovos_tts_plugin_mimic2/version.py b/ovos_tts_plugin_mimic2/version.py
@@ -2,6 +2,6 @@
 # START_VERSION_BLOCK
 VERSION_MAJOR = 0
 VERSION_MINOR = 1
-VERSION_BUILD = 4
+VERSION_BUILD = 5
 VERSION_ALPHA = 0
 # END_VERSION_BLOCK
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		[mycroft.plugin.tts]
		ovos-tts-plugin-mimic2 = ovos_tts_plugin_mimic2:Mimic2TTSPlugin