From 8c2aedfbd29bf72160439e216dace5e833ffec44 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Sat, 14 Dec 2024 16:28:30 +0800 Subject: [PATCH 1/8] Make continuous reading work when using SAPI5 voices without bookmark support --- source/synthDrivers/sapi5.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index a61e5ad9ee2..3dc21ff9d6c 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -7,7 +7,7 @@ from typing import Optional from enum import IntEnum import locale -from collections import OrderedDict +from collections import OrderedDict, deque import comtypes.client from comtypes import COMError import winreg @@ -78,12 +78,23 @@ def Bookmark(self, streamNum, pos, bookmark, bookmarkId): log.debugWarning("Called Bookmark method on SapiSink while driver is dead") return synthIndexReached.notify(synth=synth, index=bookmarkId) + # remove already triggered bookmarks + if streamNum in synth._streamBookmarks: + bookmarks = synth._streamBookmarks[streamNum] + while bookmarks: + if bookmarks.popleft() == bookmarkId: + break def EndStream(self, streamNum, pos): synth = self.synthRef() if synth is None: log.debugWarning("Called Bookmark method on EndStream while driver is dead") return + # trigger all untriggered bookmarks + if streamNum in synth._streamBookmarks: + for bookmark in synth._streamBookmarks[streamNum]: + synthIndexReached.notify(synth=synth, index=bookmark) + del synth._streamBookmarks[streamNum] synthDoneSpeaking.notify(synth=synth) if synth._audioDucker: if audioDucking._isDebug(): @@ -138,6 +149,7 @@ def __init__(self, _defaultVoiceToken=None): self._audioDucker = audioDucking.AudioDucker() self._pitch = 50 self._initTts(_defaultVoiceToken) + self._streamBookmarks = dict() # key = stream num, value = deque of bookmarks def terminate(self): self._eventsConnection = None @@ -263,6 +275,7 @@ def _convertPhoneme(self, ipa): def speak(self, speechSequence): textList = [] + bookmarks = deque() # NVDA SpeechCommands are linear, but XML is hierarchical. # Therefore, we track values for non-empty tags. @@ -298,6 +311,7 @@ def outputTags(): textList.append(item.replace("<", "<")) elif isinstance(item, IndexCommand): textList.append('' % item.index) + bookmarks.append(item.index) elif isinstance(item, CharacterModeCommand): if item.state: tags["spell"] = {} @@ -397,7 +411,8 @@ def outputTags(): log.debug("Enabling audio ducking due to speak call") tempAudioDucker.enable() try: - self.tts.Speak(text, flags) + streamNum = self.tts.Speak(text, flags) + self._streamBookmarks[streamNum] = bookmarks finally: if tempAudioDucker: if audioDucking._isDebug(): From dbd087c576532204abcdb3fdde9fd9d6dc412d86 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Mon, 16 Dec 2024 10:41:11 +0800 Subject: [PATCH 2/8] Add changelog entry --- user_docs/en/changes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index 37ab4b9e3b8..e42f42239a2 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -81,6 +81,7 @@ Specifically, MathML inside of span and other elements that have the attribute ` In any document, if the cursor is on the last line, it will be moved to the end when using this command. (#17251, #17430, @nvdaes) * In web browsers, changes to text selection no longer sometimes fail to be reported in editable text controls. (#17501, @jcsteh) +* Fixed an issue where continuous reading (say all) stopped at the end of the first sentence when using some SAPI5 synthesizers. (#16691, @gexgd0419) ### Changes for Developers From d963096ff5236439368314f7c45a903fb8408f72 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Mon, 16 Dec 2024 16:39:58 +0800 Subject: [PATCH 3/8] Fix --- source/synthDrivers/sapi5.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py index 6464ae38a03..c6bd0725b5b 100644 --- a/source/synthDrivers/sapi5.py +++ b/source/synthDrivers/sapi5.py @@ -66,6 +66,10 @@ def StartStream(self, streamNum, pos): if synth is None: log.debugWarning("Called StartStream method on SapiSink while driver is dead") return + # The stream has been started. Move the bookmark list to _streamBookmarks. + if streamNum in synth._streamBookmarksNew: + synth._streamBookmarks[streamNum] = synth._streamBookmarksNew[streamNum] + del synth._streamBookmarksNew[streamNum] if synth._audioDucker: if audioDucking._isDebug(): log.debug("Enabling audio ducking due to starting speech stream") @@ -148,7 +152,9 @@ def __init__(self, _defaultVoiceToken=None): self._audioDucker = audioDucking.AudioDucker() self._pitch = 50 self._initTts(_defaultVoiceToken) - self._streamBookmarks = dict() # key = stream num, value = deque of bookmarks + # key = stream num, value = deque of bookmarks + self._streamBookmarks = dict() # bookmarks in currently speaking streams + self._streamBookmarksNew = dict() # bookmarks for streams that haven't been started def terminate(self): self._eventsConnection = None @@ -412,7 +418,9 @@ def outputTags(): tempAudioDucker.enable() try: streamNum = self.tts.Speak(text, flags) - self._streamBookmarks[streamNum] = bookmarks + # When Speak returns, the previous stream may not have been ended. + # So the bookmark list is stored in another dict until this stream starts. + self._streamBookmarksNew[streamNum] = bookmarks finally: if tempAudioDucker: if audioDucking._isDebug(): From d38197b302aeb8eb107db4663cd9a91023ea8978 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Tue, 17 Dec 2024 17:33:22 +0800 Subject: [PATCH 4/8] Fix SAPI4 continuous reading --- source/synthDrivers/sapi4.py | 58 +++++++++++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index b970dd11ba2..1f7bd9a743d 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -4,7 +4,7 @@ # See the file COPYING for more details. import locale -from collections import OrderedDict +from collections import OrderedDict, deque import winreg from comtypes import CoCreateInstance, COMObject, COMError, GUID from ctypes import byref, c_ulong, POINTER @@ -20,6 +20,7 @@ ITTSBufNotifySink, ITTSCentralW, ITTSEnumW, + ITTSNotifySinkW, TextSDATA, TTSATTR_MAXPITCH, TTSATTR_MAXSPEED, @@ -70,6 +71,10 @@ def ITTSBufNotifySink_BookMark(self, this, qTimeStamp, dwMarkNum): if synth._finalIndex == dwMarkNum: synth._finalIndex = None synthDoneSpeaking.notify(synth=synth) + # remove already triggered bookmarks + while synth._bookmarks: + if synth._bookmarks.popleft() == dwMarkNum: + break def IUnknown_Release(self, this, *args, **kwargs): if not self._allowDelete and self._refcnt.value == 1: @@ -78,6 +83,42 @@ def IUnknown_Release(self, this, *args, **kwargs): return super(SynthDriverBufSink, self).IUnknown_Release(this, *args, **kwargs) +class SynthDriverSink(COMObject): + _com_interfaces_ = [ITTSNotifySinkW] + + def __init__(self, synthRef: weakref.ReferenceType): + self.synthRef = synthRef + super(SynthDriverSink, self).__init__() + + def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp): + synth = self.synthRef() + if synth is None: + log.debugWarning( + "Called ITTSNotifySinkW_AudioStart method on ITTSNotifySinkW while driver is dead", + ) + return + if synth._bookmarkLists: + # take the first bookmark list + synth._bookmarks = synth._bookmarkLists.popleft() + + def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp): + synth = self.synthRef() + if synth is None: + log.debugWarning( + "Called ITTSNotifySinkW_AudioStop method on ITTSNotifySinkW while driver is dead", + ) + return + # trigger all untriggered bookmarks + if synth._bookmarks: + while synth._bookmarks: + synthIndexReached.notify(synth=synth, index=synth._bookmarks.popleft()) + # if there are untriggered bookmarks, synthDoneSpeaking hasn't been triggered yet. + # Trigger synthDoneSpeaking after triggering all bookmarks + synth._finalIndex = None + synthDoneSpeaking.notify(synth=synth) + synth._bookmarks = None + + class SynthDriver(SynthDriver): name = "sapi4" description = "Microsoft Speech API version 4" @@ -115,6 +156,12 @@ def _fetchEnginesList(self): def __init__(self): self._finalIndex: Optional[int] = None + self._ttsCentral = None + self._sinkRegKey = DWORD() + self._bookmarks = None + self._bookmarkLists = deque() + self._sink = SynthDriverSink(weakref.ref(self)) + self._sinkPtr = self._sink.QueryInterface(ITTSNotifySinkW) self._bufSink = SynthDriverBufSink(weakref.ref(self)) self._bufSinkPtr = self._bufSink.QueryInterface(ITTSBufNotifySink) # HACK: Some buggy engines call Release() too many times on our buf sink. @@ -133,6 +180,7 @@ def speak(self, speechSequence: SpeechSequence): textList = [] charMode = False unprocessedSequence = speechSequence + bookmarks = deque() # #15500: Some SAPI4 voices reset all prosody when they receive any prosody command, # whereas other voices never undo prosody changes when a sequence is interrupted. # Add all default values to the start and end of the sequence, @@ -153,6 +201,7 @@ def speak(self, speechSequence: SpeechSequence): textList.append(item.replace("\\", "\\\\")) elif isinstance(item, IndexCommand): textList.append("\\mrk=%d\\" % item.index) + bookmarks.append(item.index) lastHandledIndexInSequence = item.index elif isinstance(item, CharacterModeCommand): textList.append("\\RmS=1\\" if item.state else "\\RmS=0\\") @@ -187,6 +236,7 @@ def speak(self, speechSequence: SpeechSequence): # Therefore we add the pause of 1ms at the end textList.append("\\PAU=1\\") text = "".join(textList) + self._bookmarkLists.append(bookmarks) flags = TTSDATAFLAG_TAGGED self._ttsCentral.TextData( VOICECHARSET.CHARSET_TEXT, @@ -198,6 +248,9 @@ def speak(self, speechSequence: SpeechSequence): def cancel(self): try: + # cancel all pending bookmarks + self._bookmarkLists.clear() + self._bookmarks = None self._ttsCentral.AudioReset() except COMError: log.error("Error cancelling speech", exc_info=True) @@ -234,8 +287,11 @@ def _set_voice(self, val): self._currentMode = mode self._ttsAudio = CoCreateInstance(CLSID_MMAudioDest, IAudioMultiMediaDevice) self._ttsAudio.DeviceNumSet(nvwave.outputDeviceNameToID(config.conf["speech"]["outputDevice"], True)) + if self._ttsCentral: + self._ttsCentral.UnRegister(self._sinkRegKey) self._ttsCentral = POINTER(ITTSCentralW)() self._ttsEngines.Select(self._currentMode.gModeID, byref(self._ttsCentral), self._ttsAudio) + self._ttsCentral.Register(self._sinkPtr, ITTSNotifySinkW._iid_, byref(self._sinkRegKey)) self._ttsAttrs = self._ttsCentral.QueryInterface(ITTSAttributes) # Find out rate limits hasRate = bool(mode.dwFeatures & TTSFEATURE_SPEED) From 65387a861460b8ae375924a56b4fd1b3817c9027 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Thu, 9 Jan 2025 17:10:29 +0800 Subject: [PATCH 5/8] Add type hints --- source/synthDrivers/sapi4.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index a314fa1f5cf..05dc538d657 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -60,7 +60,7 @@ def __init__(self, synthRef: weakref.ReferenceType): self._allowDelete = True super(SynthDriverBufSink, self).__init__() - def ITTSBufNotifySink_BookMark(self, this, qTimeStamp, dwMarkNum): + def ITTSBufNotifySink_BookMark(self, this, qTimeStamp: int, dwMarkNum: int): synth = self.synthRef() if synth is None: log.debugWarning( @@ -90,7 +90,7 @@ def __init__(self, synthRef: weakref.ReferenceType): self.synthRef = synthRef super(SynthDriverSink, self).__init__() - def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp): + def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp: int): synth = self.synthRef() if synth is None: log.debugWarning( @@ -101,7 +101,7 @@ def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp): # take the first bookmark list synth._bookmarks = synth._bookmarkLists.popleft() - def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp): + def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp: int): synth = self.synthRef() if synth is None: log.debugWarning( @@ -289,7 +289,6 @@ def _set_voice(self, val): self._ttsAudio.DeviceNumSet(nvwave.outputDeviceNameToID(config.conf["audio"]["outputDevice"], True)) if self._ttsCentral: self._ttsCentral.UnRegister(self._sinkRegKey) - self._ttsCentral = POINTER(ITTSCentralW)() self._ttsEngines.Select(self._currentMode.gModeID, byref(self._ttsCentral), self._ttsAudio) self._ttsCentral.Register(self._sinkPtr, ITTSNotifySinkW._iid_, byref(self._sinkRegKey)) From ee5d5053491ea4c963a1be1bd8653d8767ab7536 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Fri, 10 Jan 2025 12:09:34 +0800 Subject: [PATCH 6/8] Apply suggestion --- source/synthDrivers/sapi4.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py index 05dc538d657..3617530ca63 100755 --- a/source/synthDrivers/sapi4.py +++ b/source/synthDrivers/sapi4.py @@ -58,7 +58,7 @@ class SynthDriverBufSink(COMObject): def __init__(self, synthRef: weakref.ReferenceType): self.synthRef = synthRef self._allowDelete = True - super(SynthDriverBufSink, self).__init__() + super().__init__() def ITTSBufNotifySink_BookMark(self, this, qTimeStamp: int, dwMarkNum: int): synth = self.synthRef() @@ -88,7 +88,7 @@ class SynthDriverSink(COMObject): def __init__(self, synthRef: weakref.ReferenceType): self.synthRef = synthRef - super(SynthDriverSink, self).__init__() + super().__init__() def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp: int): synth = self.synthRef() From 33fa2d03cb7004a6c2a39bbc93fcca63d7f33595 Mon Sep 17 00:00:00 2001 From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com> Date: Fri, 10 Jan 2025 12:16:11 +0800 Subject: [PATCH 7/8] Add API breaking change entry --- user_docs/en/changes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index 790ee2e80ab..6dee8a7a7b3 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -174,6 +174,7 @@ Instead, a `callback` property has been added, which returns a function that per * Because SAPI5 voices now use `nvwave.WavePlayer` to output audio: (#17592, @gexgd0419) * `synthDrivers.sapi5.SPAudioState` has been removed. * `synthDrivers.sapi5.SynthDriver.ttsAudioStream` has been removed. +* `synthDrivers.sapi5.SapiSink.onIndexReached` now expects two arguments: stream number and index. (#17523, @gexgd0419) #### Deprecations From bf006898617b6a03f69413ef9182391b013b9301 Mon Sep 17 00:00:00 2001 From: Sean Budd Date: Fri, 10 Jan 2025 15:56:04 +1100 Subject: [PATCH 8/8] Update user_docs/en/changes.md --- user_docs/en/changes.md | 1 - 1 file changed, 1 deletion(-) diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index 6dee8a7a7b3..790ee2e80ab 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -174,7 +174,6 @@ Instead, a `callback` property has been added, which returns a function that per * Because SAPI5 voices now use `nvwave.WavePlayer` to output audio: (#17592, @gexgd0419) * `synthDrivers.sapi5.SPAudioState` has been removed. * `synthDrivers.sapi5.SynthDriver.ttsAudioStream` has been removed. -* `synthDrivers.sapi5.SapiSink.onIndexReached` now expects two arguments: stream number and index. (#17523, @gexgd0419) #### Deprecations