Skip to content
This repository has been archived by the owner on Feb 5, 2024. It is now read-only.

Commit

Permalink
Prepare Next Version
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl authored and actions-user committed Feb 28, 2022
1 parent a43421a commit 38afc96
Show file tree
Hide file tree
Showing 11 changed files with 299 additions and 1 deletion.
243 changes: 243 additions & 0 deletions build/lib/ovos_tts_plugin_mimic2/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import base64
import math
import random
import re

import requests

from ovos_plugin_manager.templates.tts import TTS, TTSValidator, RemoteTTSException
from ovos_utils.lang.visimes import VISIMES


class Mimic2TTSPlugin(TTS):
"""Interface to Mimic2 TTS."""
# Heuristic value, caps character length of a chunk of text
# to be spoken as a work around for current Tacotron implementation limits.
max_sentence_size = 170

def __init__(self, lang="en-us", config=None):
config = config or {}
super(Mimic2TTSPlugin, self).__init__(lang, config,
Mimic2TTSValidator(self), 'wav')
self.voice = self.voice.lower()
self._visemes = False
self.cache.persist = True # save synths to avoid repeat queries
if self.config.get("url"): # self hosted
self.url = self.config["url"]
# TODO disable cache to avoid filename conflicts with other voices
if not self.voice or self.voice == "default":
self.voice = f"selfhosted{random.randint(0, 9999999)}"
self.cache.persist = False
elif self.voice == "kusal" or self.voice == "default":
self.url = "https://mimic-api.mycroft.ai/synthesize"
self._visemes = True
elif self.voice == "nancy":
self.url = "https://nancy.2022.us/synthesize"
elif self.voice == "ljspeech":
self.url = "https://ljspeech.2022.us/synthesize"
else:
self.voice = "kusal"
self.url = "https://mimic-api.mycroft.ai/synthesize"

def get_tts(self, sentence, wav_file, lang=None):
"""Fetch tts audio using tacotron endpoint.
Arguments:
sentence (str): Sentence to generate audio for
wav_file (str): output file path
Returns:
Tuple ((str) written file, None)
"""
params = {"text": sentence, "visimes": self._visemes}
r = requests.get(self.url, params=params)
if not r.ok:
raise RemoteTTSException(f"Mimic2 server error: {r.reason}")
if not self._visemes:
audio_data = r.content
phonemes = None
else:
results = r.json()
audio_data = base64.b64decode(results['audio_base64'])
phonemes = results['visimes']
with open(wav_file, "wb") as f:
f.write(audio_data)
return (wav_file, phonemes) # No phonemes

def viseme(self, phonemes):
"""Maps phonemes to appropriate viseme encoding
Arguments:
phonemes (list): list of tuples (phoneme, time_start)
Returns:
list: list of tuples (viseme_encoding, time_start)
"""
visemes = []
for pair in phonemes:
if pair[0]:
phone = pair[0].lower()
else:
# if phoneme doesn't exist use
# this as placeholder since it
# is the most common one "3"
phone = 'z'
vis = VISIMES.get(phone)
vis_dur = float(pair[1])
visemes.append((vis, vis_dur))
return visemes

# below are helpers to split sentence in chunks that tacotron can synth
# there is a limit for 150 chars
def _preprocess_sentence(self, sentence):
"""Split sentence in chunks better suited for mimic2. """
return self._split_sentences(sentence)

@staticmethod
def _split_sentences(text):
"""Split text into smaller chunks for TTS generation.
NOTE: The smaller chunks are needed due to current Catotron TTS limitations.
This stage can be removed once Catotron can generate longer sentences.
Arguments:
text (str): text to split
chunk_size (int): size of each chunk
split_by_punc (bool, optional): Defaults to True.
Returns:
list: list of text chunks
"""
if len(text) <= Mimic2TTSPlugin.max_sentence_size:
return [Mimic2TTSPlugin._add_punctuation(text)]

# first split by punctuations that are major pauses
first_splits = Mimic2TTSPlugin._split_by_punctuation(
text,
puncs=[r'\.', r'\!', r'\?', r'\:', r'\;']
)

# if chunks are too big, split by minor pauses (comma, hyphen)
second_splits = []
for chunk in first_splits:
if len(chunk) > Mimic2TTSPlugin.max_sentence_size:
second_splits += Mimic2TTSPlugin._split_by_punctuation(
chunk, puncs=[r'\,', '--', '-'])
else:
second_splits.append(chunk)

# if chunks are still too big, chop into pieces of at most 20 words
third_splits = []
for chunk in second_splits:
if len(chunk) > Mimic2TTSPlugin.max_sentence_size:
third_splits += Mimic2TTSPlugin._split_by_chunk_size(
chunk, 20)
else:
third_splits.append(chunk)

return [Mimic2TTSPlugin._add_punctuation(chunk)
for chunk in third_splits]

@staticmethod
def _break_chunks(l, n):
"""Yield successive n-sized chunks
Arguments:
l (list): text (str) to split
chunk_size (int): chunk size
"""
for i in range(0, len(l), n):
yield " ".join(l[i:i + n])

@staticmethod
def _split_by_chunk_size(text, chunk_size):
"""Split text into word chunks by chunk_size size
Arguments:
text (str): text to split
chunk_size (int): chunk size
Returns:
list: list of text chunks
"""
text_list = text.split()

if len(text_list) <= chunk_size:
return [text]

if chunk_size < len(text_list) < (chunk_size * 2):
return list(Mimic2TTSPlugin._break_chunks(
text_list,
int(math.ceil(len(text_list) / 2))
))
elif (chunk_size * 2) < len(text_list) < (chunk_size * 3):
return list(Mimic2TTSPlugin._break_chunks(
text_list,
int(math.ceil(len(text_list) / 3))
))
elif (chunk_size * 3) < len(text_list) < (chunk_size * 4):
return list(Mimic2TTSPlugin._break_chunks(
text_list,
int(math.ceil(len(text_list) / 4))
))
else:
return list(Mimic2TTSPlugin._break_chunks(
text_list,
int(math.ceil(len(text_list) / 5))
))

@staticmethod
def _split_by_punctuation(chunks, puncs):
"""Splits text by various punctionations
e.g. hello, world => [hello, world]
Arguments:
chunks (list or str): text (str) to split
puncs (list): list of punctuations used to split text
Returns:
list: list with split text
"""
if isinstance(chunks, str):
out = [chunks]
else:
out = chunks

for punc in puncs:
splits = []
for t in out:
# Split text by punctuation, but not embedded punctuation. E.g.
# Split: "Short sentence. Longer sentence."
# But not at: "I.B.M." or "3.424", "3,424" or "what's-his-name."
splits += re.split(r'(?<!\.\S)' + punc + r'\s', t)
out = splits
return [t.strip() for t in out]

@staticmethod
def _add_punctuation(text):
"""Add punctuation at the end of each chunk.
Catotron expects some form of punctuation at the end of a sentence.
"""
punctuation = ['.', '?', '!', ';']
if len(text) >= 1 and text[-1] not in punctuation:
return text + ', '
else:
return text


class Mimic2TTSValidator(TTSValidator):
def __init__(self, tts):
super(Mimic2TTSValidator, self).__init__(tts)

def validate_lang(self):
lang = self.tts.lang.lower()
assert lang.startswith("en")

def validate_connection(self):
pass

def get_tts_class(self):
return Mimic2TTSPlugin
7 changes: 7 additions & 0 deletions build/lib/ovos_tts_plugin_mimic2/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# The following lines are replaced during the release process.
# START_VERSION_BLOCK
VERSION_MAJOR = 0
VERSION_MINOR = 1
VERSION_BUILD = 4
VERSION_ALPHA = 0
# END_VERSION_BLOCK
Binary file not shown.
25 changes: 25 additions & 0 deletions ovos_tts_plugin_mimic2.egg-info/PKG-INFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
Metadata-Version: 1.1
Name: ovos-tts-plugin-mimic2
Version: 0.1.4
Summary: Mimic2 (tacotron based tts) plugin for OpenVoiceOS
Home-page: https://github.com/OpenVoiceOS/ovos-tts-plugin-mimic2
Author: JarbasAi
Author-email: [email protected]
License: Apache-2.0
Description: UNKNOWN
Keywords: mycroft ovos plugin tts
Platform: UNKNOWN
Classifier: Development Status :: 3 - Alpha
Classifier: Intended Audience :: Developers
Classifier: Topic :: Text Processing :: Linguistic
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Programming Language :: Python :: 2
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.0
Classifier: Programming Language :: Python :: 3.1
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Programming Language :: Python :: 3.4
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
14 changes: 14 additions & 0 deletions ovos_tts_plugin_mimic2.egg-info/SOURCES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CHANGELOG.md
LICENSE
MANIFEST.in
setup.py
ovos_tts_plugin_mimic2/__init__.py
ovos_tts_plugin_mimic2/version.py
ovos_tts_plugin_mimic2.egg-info/PKG-INFO
ovos_tts_plugin_mimic2.egg-info/SOURCES.txt
ovos_tts_plugin_mimic2.egg-info/dependency_links.txt
ovos_tts_plugin_mimic2.egg-info/entry_points.txt
ovos_tts_plugin_mimic2.egg-info/requires.txt
ovos_tts_plugin_mimic2.egg-info/top_level.txt
ovos_tts_plugin_mimic2.egg-info/zip-safe
requirements/requirements.txt
1 change: 1 addition & 0 deletions ovos_tts_plugin_mimic2.egg-info/dependency_links.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

3 changes: 3 additions & 0 deletions ovos_tts_plugin_mimic2.egg-info/entry_points.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
[mycroft.plugin.tts]
ovos-tts-plugin-mimic2 = ovos_tts_plugin_mimic2:Mimic2TTSPlugin

3 changes: 3 additions & 0 deletions ovos_tts_plugin_mimic2.egg-info/requires.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
requests~=2.26
ovos-plugin-manager>=0.0.1
ovos-utils>=0.0.14
1 change: 1 addition & 0 deletions ovos_tts_plugin_mimic2.egg-info/top_level.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ovos_tts_plugin_mimic2
1 change: 1 addition & 0 deletions ovos_tts_plugin_mimic2.egg-info/zip-safe
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

2 changes: 1 addition & 1 deletion ovos_tts_plugin_mimic2/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@
# START_VERSION_BLOCK
VERSION_MAJOR = 0
VERSION_MINOR = 1
VERSION_BUILD = 4
VERSION_BUILD = 5
VERSION_ALPHA = 0
# END_VERSION_BLOCK

0 comments on commit 38afc96

Please sign in to comment.