From 8c2aedfbd29bf72160439e216dace5e833ffec44 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Sat, 14 Dec 2024 16:28:30 +0800
Subject: [PATCH 1/8] Make continuous reading work when using SAPI5 voices
without bookmark support
---
source/synthDrivers/sapi5.py | 19 +++++++++++++++++--
1 file changed, 17 insertions(+), 2 deletions(-)
diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py
index a61e5ad9ee2..3dc21ff9d6c 100644
--- a/source/synthDrivers/sapi5.py
+++ b/source/synthDrivers/sapi5.py
@@ -7,7 +7,7 @@
from typing import Optional
from enum import IntEnum
import locale
-from collections import OrderedDict
+from collections import OrderedDict, deque
import comtypes.client
from comtypes import COMError
import winreg
@@ -78,12 +78,23 @@ def Bookmark(self, streamNum, pos, bookmark, bookmarkId):
log.debugWarning("Called Bookmark method on SapiSink while driver is dead")
return
synthIndexReached.notify(synth=synth, index=bookmarkId)
+ # remove already triggered bookmarks
+ if streamNum in synth._streamBookmarks:
+ bookmarks = synth._streamBookmarks[streamNum]
+ while bookmarks:
+ if bookmarks.popleft() == bookmarkId:
+ break
def EndStream(self, streamNum, pos):
synth = self.synthRef()
if synth is None:
log.debugWarning("Called Bookmark method on EndStream while driver is dead")
return
+ # trigger all untriggered bookmarks
+ if streamNum in synth._streamBookmarks:
+ for bookmark in synth._streamBookmarks[streamNum]:
+ synthIndexReached.notify(synth=synth, index=bookmark)
+ del synth._streamBookmarks[streamNum]
synthDoneSpeaking.notify(synth=synth)
if synth._audioDucker:
if audioDucking._isDebug():
@@ -138,6 +149,7 @@ def __init__(self, _defaultVoiceToken=None):
self._audioDucker = audioDucking.AudioDucker()
self._pitch = 50
self._initTts(_defaultVoiceToken)
+ self._streamBookmarks = dict() # key = stream num, value = deque of bookmarks
def terminate(self):
self._eventsConnection = None
@@ -263,6 +275,7 @@ def _convertPhoneme(self, ipa):
def speak(self, speechSequence):
textList = []
+ bookmarks = deque()
# NVDA SpeechCommands are linear, but XML is hierarchical.
# Therefore, we track values for non-empty tags.
@@ -298,6 +311,7 @@ def outputTags():
textList.append(item.replace("<", "<"))
elif isinstance(item, IndexCommand):
textList.append('' % item.index)
+ bookmarks.append(item.index)
elif isinstance(item, CharacterModeCommand):
if item.state:
tags["spell"] = {}
@@ -397,7 +411,8 @@ def outputTags():
log.debug("Enabling audio ducking due to speak call")
tempAudioDucker.enable()
try:
- self.tts.Speak(text, flags)
+ streamNum = self.tts.Speak(text, flags)
+ self._streamBookmarks[streamNum] = bookmarks
finally:
if tempAudioDucker:
if audioDucking._isDebug():
From dbd087c576532204abcdb3fdde9fd9d6dc412d86 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Mon, 16 Dec 2024 10:41:11 +0800
Subject: [PATCH 2/8] Add changelog entry
---
user_docs/en/changes.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md
index 37ab4b9e3b8..e42f42239a2 100644
--- a/user_docs/en/changes.md
+++ b/user_docs/en/changes.md
@@ -81,6 +81,7 @@ Specifically, MathML inside of span and other elements that have the attribute `
In any document, if the cursor is on the last line, it will be moved to the end when using this command.
(#17251, #17430, @nvdaes)
* In web browsers, changes to text selection no longer sometimes fail to be reported in editable text controls. (#17501, @jcsteh)
+* Fixed an issue where continuous reading (say all) stopped at the end of the first sentence when using some SAPI5 synthesizers. (#16691, @gexgd0419)
### Changes for Developers
From d963096ff5236439368314f7c45a903fb8408f72 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Mon, 16 Dec 2024 16:39:58 +0800
Subject: [PATCH 3/8] Fix
---
source/synthDrivers/sapi5.py | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)
diff --git a/source/synthDrivers/sapi5.py b/source/synthDrivers/sapi5.py
index 6464ae38a03..c6bd0725b5b 100644
--- a/source/synthDrivers/sapi5.py
+++ b/source/synthDrivers/sapi5.py
@@ -66,6 +66,10 @@ def StartStream(self, streamNum, pos):
if synth is None:
log.debugWarning("Called StartStream method on SapiSink while driver is dead")
return
+ # The stream has been started. Move the bookmark list to _streamBookmarks.
+ if streamNum in synth._streamBookmarksNew:
+ synth._streamBookmarks[streamNum] = synth._streamBookmarksNew[streamNum]
+ del synth._streamBookmarksNew[streamNum]
if synth._audioDucker:
if audioDucking._isDebug():
log.debug("Enabling audio ducking due to starting speech stream")
@@ -148,7 +152,9 @@ def __init__(self, _defaultVoiceToken=None):
self._audioDucker = audioDucking.AudioDucker()
self._pitch = 50
self._initTts(_defaultVoiceToken)
- self._streamBookmarks = dict() # key = stream num, value = deque of bookmarks
+ # key = stream num, value = deque of bookmarks
+ self._streamBookmarks = dict() # bookmarks in currently speaking streams
+ self._streamBookmarksNew = dict() # bookmarks for streams that haven't been started
def terminate(self):
self._eventsConnection = None
@@ -412,7 +418,9 @@ def outputTags():
tempAudioDucker.enable()
try:
streamNum = self.tts.Speak(text, flags)
- self._streamBookmarks[streamNum] = bookmarks
+ # When Speak returns, the previous stream may not have been ended.
+ # So the bookmark list is stored in another dict until this stream starts.
+ self._streamBookmarksNew[streamNum] = bookmarks
finally:
if tempAudioDucker:
if audioDucking._isDebug():
From d38197b302aeb8eb107db4663cd9a91023ea8978 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:33:22 +0800
Subject: [PATCH 4/8] Fix SAPI4 continuous reading
---
source/synthDrivers/sapi4.py | 58 +++++++++++++++++++++++++++++++++++-
1 file changed, 57 insertions(+), 1 deletion(-)
diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index b970dd11ba2..1f7bd9a743d 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -4,7 +4,7 @@
# See the file COPYING for more details.
import locale
-from collections import OrderedDict
+from collections import OrderedDict, deque
import winreg
from comtypes import CoCreateInstance, COMObject, COMError, GUID
from ctypes import byref, c_ulong, POINTER
@@ -20,6 +20,7 @@
ITTSBufNotifySink,
ITTSCentralW,
ITTSEnumW,
+ ITTSNotifySinkW,
TextSDATA,
TTSATTR_MAXPITCH,
TTSATTR_MAXSPEED,
@@ -70,6 +71,10 @@ def ITTSBufNotifySink_BookMark(self, this, qTimeStamp, dwMarkNum):
if synth._finalIndex == dwMarkNum:
synth._finalIndex = None
synthDoneSpeaking.notify(synth=synth)
+ # remove already triggered bookmarks
+ while synth._bookmarks:
+ if synth._bookmarks.popleft() == dwMarkNum:
+ break
def IUnknown_Release(self, this, *args, **kwargs):
if not self._allowDelete and self._refcnt.value == 1:
@@ -78,6 +83,42 @@ def IUnknown_Release(self, this, *args, **kwargs):
return super(SynthDriverBufSink, self).IUnknown_Release(this, *args, **kwargs)
+class SynthDriverSink(COMObject):
+ _com_interfaces_ = [ITTSNotifySinkW]
+
+ def __init__(self, synthRef: weakref.ReferenceType):
+ self.synthRef = synthRef
+ super(SynthDriverSink, self).__init__()
+
+ def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp):
+ synth = self.synthRef()
+ if synth is None:
+ log.debugWarning(
+ "Called ITTSNotifySinkW_AudioStart method on ITTSNotifySinkW while driver is dead",
+ )
+ return
+ if synth._bookmarkLists:
+ # take the first bookmark list
+ synth._bookmarks = synth._bookmarkLists.popleft()
+
+ def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp):
+ synth = self.synthRef()
+ if synth is None:
+ log.debugWarning(
+ "Called ITTSNotifySinkW_AudioStop method on ITTSNotifySinkW while driver is dead",
+ )
+ return
+ # trigger all untriggered bookmarks
+ if synth._bookmarks:
+ while synth._bookmarks:
+ synthIndexReached.notify(synth=synth, index=synth._bookmarks.popleft())
+ # if there are untriggered bookmarks, synthDoneSpeaking hasn't been triggered yet.
+ # Trigger synthDoneSpeaking after triggering all bookmarks
+ synth._finalIndex = None
+ synthDoneSpeaking.notify(synth=synth)
+ synth._bookmarks = None
+
+
class SynthDriver(SynthDriver):
name = "sapi4"
description = "Microsoft Speech API version 4"
@@ -115,6 +156,12 @@ def _fetchEnginesList(self):
def __init__(self):
self._finalIndex: Optional[int] = None
+ self._ttsCentral = None
+ self._sinkRegKey = DWORD()
+ self._bookmarks = None
+ self._bookmarkLists = deque()
+ self._sink = SynthDriverSink(weakref.ref(self))
+ self._sinkPtr = self._sink.QueryInterface(ITTSNotifySinkW)
self._bufSink = SynthDriverBufSink(weakref.ref(self))
self._bufSinkPtr = self._bufSink.QueryInterface(ITTSBufNotifySink)
# HACK: Some buggy engines call Release() too many times on our buf sink.
@@ -133,6 +180,7 @@ def speak(self, speechSequence: SpeechSequence):
textList = []
charMode = False
unprocessedSequence = speechSequence
+ bookmarks = deque()
# #15500: Some SAPI4 voices reset all prosody when they receive any prosody command,
# whereas other voices never undo prosody changes when a sequence is interrupted.
# Add all default values to the start and end of the sequence,
@@ -153,6 +201,7 @@ def speak(self, speechSequence: SpeechSequence):
textList.append(item.replace("\\", "\\\\"))
elif isinstance(item, IndexCommand):
textList.append("\\mrk=%d\\" % item.index)
+ bookmarks.append(item.index)
lastHandledIndexInSequence = item.index
elif isinstance(item, CharacterModeCommand):
textList.append("\\RmS=1\\" if item.state else "\\RmS=0\\")
@@ -187,6 +236,7 @@ def speak(self, speechSequence: SpeechSequence):
# Therefore we add the pause of 1ms at the end
textList.append("\\PAU=1\\")
text = "".join(textList)
+ self._bookmarkLists.append(bookmarks)
flags = TTSDATAFLAG_TAGGED
self._ttsCentral.TextData(
VOICECHARSET.CHARSET_TEXT,
@@ -198,6 +248,9 @@ def speak(self, speechSequence: SpeechSequence):
def cancel(self):
try:
+ # cancel all pending bookmarks
+ self._bookmarkLists.clear()
+ self._bookmarks = None
self._ttsCentral.AudioReset()
except COMError:
log.error("Error cancelling speech", exc_info=True)
@@ -234,8 +287,11 @@ def _set_voice(self, val):
self._currentMode = mode
self._ttsAudio = CoCreateInstance(CLSID_MMAudioDest, IAudioMultiMediaDevice)
self._ttsAudio.DeviceNumSet(nvwave.outputDeviceNameToID(config.conf["speech"]["outputDevice"], True))
+ if self._ttsCentral:
+ self._ttsCentral.UnRegister(self._sinkRegKey)
self._ttsCentral = POINTER(ITTSCentralW)()
self._ttsEngines.Select(self._currentMode.gModeID, byref(self._ttsCentral), self._ttsAudio)
+ self._ttsCentral.Register(self._sinkPtr, ITTSNotifySinkW._iid_, byref(self._sinkRegKey))
self._ttsAttrs = self._ttsCentral.QueryInterface(ITTSAttributes)
# Find out rate limits
hasRate = bool(mode.dwFeatures & TTSFEATURE_SPEED)
From 65387a861460b8ae375924a56b4fd1b3817c9027 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Thu, 9 Jan 2025 17:10:29 +0800
Subject: [PATCH 5/8] Add type hints
---
source/synthDrivers/sapi4.py | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index a314fa1f5cf..05dc538d657 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -60,7 +60,7 @@ def __init__(self, synthRef: weakref.ReferenceType):
self._allowDelete = True
super(SynthDriverBufSink, self).__init__()
- def ITTSBufNotifySink_BookMark(self, this, qTimeStamp, dwMarkNum):
+ def ITTSBufNotifySink_BookMark(self, this, qTimeStamp: int, dwMarkNum: int):
synth = self.synthRef()
if synth is None:
log.debugWarning(
@@ -90,7 +90,7 @@ def __init__(self, synthRef: weakref.ReferenceType):
self.synthRef = synthRef
super(SynthDriverSink, self).__init__()
- def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp):
+ def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp: int):
synth = self.synthRef()
if synth is None:
log.debugWarning(
@@ -101,7 +101,7 @@ def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp):
# take the first bookmark list
synth._bookmarks = synth._bookmarkLists.popleft()
- def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp):
+ def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp: int):
synth = self.synthRef()
if synth is None:
log.debugWarning(
@@ -289,7 +289,6 @@ def _set_voice(self, val):
self._ttsAudio.DeviceNumSet(nvwave.outputDeviceNameToID(config.conf["audio"]["outputDevice"], True))
if self._ttsCentral:
self._ttsCentral.UnRegister(self._sinkRegKey)
-
self._ttsCentral = POINTER(ITTSCentralW)()
self._ttsEngines.Select(self._currentMode.gModeID, byref(self._ttsCentral), self._ttsAudio)
self._ttsCentral.Register(self._sinkPtr, ITTSNotifySinkW._iid_, byref(self._sinkRegKey))
From ee5d5053491ea4c963a1be1bd8653d8767ab7536 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Fri, 10 Jan 2025 12:09:34 +0800
Subject: [PATCH 6/8] Apply suggestion
---
source/synthDrivers/sapi4.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/source/synthDrivers/sapi4.py b/source/synthDrivers/sapi4.py
index 05dc538d657..3617530ca63 100755
--- a/source/synthDrivers/sapi4.py
+++ b/source/synthDrivers/sapi4.py
@@ -58,7 +58,7 @@ class SynthDriverBufSink(COMObject):
def __init__(self, synthRef: weakref.ReferenceType):
self.synthRef = synthRef
self._allowDelete = True
- super(SynthDriverBufSink, self).__init__()
+ super().__init__()
def ITTSBufNotifySink_BookMark(self, this, qTimeStamp: int, dwMarkNum: int):
synth = self.synthRef()
@@ -88,7 +88,7 @@ class SynthDriverSink(COMObject):
def __init__(self, synthRef: weakref.ReferenceType):
self.synthRef = synthRef
- super(SynthDriverSink, self).__init__()
+ super().__init__()
def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp: int):
synth = self.synthRef()
From 33fa2d03cb7004a6c2a39bbc93fcca63d7f33595 Mon Sep 17 00:00:00 2001
From: gexgd0419 <55008943+gexgd0419@users.noreply.github.com>
Date: Fri, 10 Jan 2025 12:16:11 +0800
Subject: [PATCH 7/8] Add API breaking change entry
---
user_docs/en/changes.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md
index 790ee2e80ab..6dee8a7a7b3 100644
--- a/user_docs/en/changes.md
+++ b/user_docs/en/changes.md
@@ -174,6 +174,7 @@ Instead, a `callback` property has been added, which returns a function that per
* Because SAPI5 voices now use `nvwave.WavePlayer` to output audio: (#17592, @gexgd0419)
* `synthDrivers.sapi5.SPAudioState` has been removed.
* `synthDrivers.sapi5.SynthDriver.ttsAudioStream` has been removed.
+* `synthDrivers.sapi5.SapiSink.onIndexReached` now expects two arguments: stream number and index. (#17523, @gexgd0419)
#### Deprecations
From bf006898617b6a03f69413ef9182391b013b9301 Mon Sep 17 00:00:00 2001
From: Sean Budd
Date: Fri, 10 Jan 2025 15:56:04 +1100
Subject: [PATCH 8/8] Update user_docs/en/changes.md
---
user_docs/en/changes.md | 1 -
1 file changed, 1 deletion(-)
diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md
index 6dee8a7a7b3..790ee2e80ab 100644
--- a/user_docs/en/changes.md
+++ b/user_docs/en/changes.md
@@ -174,7 +174,6 @@ Instead, a `callback` property has been added, which returns a function that per
* Because SAPI5 voices now use `nvwave.WavePlayer` to output audio: (#17592, @gexgd0419)
* `synthDrivers.sapi5.SPAudioState` has been removed.
* `synthDrivers.sapi5.SynthDriver.ttsAudioStream` has been removed.
-* `synthDrivers.sapi5.SapiSink.onIndexReached` now expects two arguments: stream number and index. (#17523, @gexgd0419)
#### Deprecations