Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make continuous reading work when using SAPI5 voices without bookmark support #17523

Merged
merged 12 commits into from
Jan 12, 2025
62 changes: 59 additions & 3 deletions source/synthDrivers/sapi4.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See the file COPYING for more details.

import locale
from collections import OrderedDict
from collections import OrderedDict, deque
import winreg
from comtypes import CoCreateInstance, COMObject, COMError, GUID
from ctypes import byref, c_ulong, POINTER
Expand All @@ -20,6 +20,7 @@
ITTSBufNotifySink,
ITTSCentralW,
ITTSEnumW,
ITTSNotifySinkW,
TextSDATA,
TTSATTR_MAXPITCH,
TTSATTR_MAXSPEED,
Expand Down Expand Up @@ -57,9 +58,9 @@ class SynthDriverBufSink(COMObject):
def __init__(self, synthRef: weakref.ReferenceType):
self.synthRef = synthRef
self._allowDelete = True
super(SynthDriverBufSink, self).__init__()
super().__init__()

def ITTSBufNotifySink_BookMark(self, this, qTimeStamp, dwMarkNum):
def ITTSBufNotifySink_BookMark(self, this, qTimeStamp: int, dwMarkNum: int):
synth = self.synthRef()
if synth is None:
log.debugWarning(
Expand All @@ -70,6 +71,10 @@ def ITTSBufNotifySink_BookMark(self, this, qTimeStamp, dwMarkNum):
if synth._finalIndex == dwMarkNum:
synth._finalIndex = None
synthDoneSpeaking.notify(synth=synth)
# remove already triggered bookmarks
while synth._bookmarks:
if synth._bookmarks.popleft() == dwMarkNum:
break

def IUnknown_Release(self, this, *args, **kwargs):
if not self._allowDelete and self._refcnt.value == 1:
Expand All @@ -78,6 +83,42 @@ def IUnknown_Release(self, this, *args, **kwargs):
return super(SynthDriverBufSink, self).IUnknown_Release(this, *args, **kwargs)


class SynthDriverSink(COMObject):
_com_interfaces_ = [ITTSNotifySinkW]

def __init__(self, synthRef: weakref.ReferenceType):
self.synthRef = synthRef
super().__init__()

def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp: int):
synth = self.synthRef()
if synth is None:
log.debugWarning(
"Called ITTSNotifySinkW_AudioStart method on ITTSNotifySinkW while driver is dead",
)
return
if synth._bookmarkLists:
# take the first bookmark list
synth._bookmarks = synth._bookmarkLists.popleft()

def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp: int):
synth = self.synthRef()
if synth is None:
log.debugWarning(
"Called ITTSNotifySinkW_AudioStop method on ITTSNotifySinkW while driver is dead",
)
return
# trigger all untriggered bookmarks
if synth._bookmarks:
while synth._bookmarks:
synthIndexReached.notify(synth=synth, index=synth._bookmarks.popleft())
# if there are untriggered bookmarks, synthDoneSpeaking hasn't been triggered yet.
# Trigger synthDoneSpeaking after triggering all bookmarks
synth._finalIndex = None
synthDoneSpeaking.notify(synth=synth)
synth._bookmarks = None


class SynthDriver(SynthDriver):
name = "sapi4"
description = "Microsoft Speech API version 4"
Expand Down Expand Up @@ -115,6 +156,12 @@ def _fetchEnginesList(self):

def __init__(self):
self._finalIndex: Optional[int] = None
self._ttsCentral = None
self._sinkRegKey = DWORD()
self._bookmarks = None
self._bookmarkLists = deque()
self._sink = SynthDriverSink(weakref.ref(self))
self._sinkPtr = self._sink.QueryInterface(ITTSNotifySinkW)
self._bufSink = SynthDriverBufSink(weakref.ref(self))
self._bufSinkPtr = self._bufSink.QueryInterface(ITTSBufNotifySink)
# HACK: Some buggy engines call Release() too many times on our buf sink.
Expand All @@ -133,6 +180,7 @@ def speak(self, speechSequence: SpeechSequence):
textList = []
charMode = False
unprocessedSequence = speechSequence
bookmarks = deque()
# #15500: Some SAPI4 voices reset all prosody when they receive any prosody command,
# whereas other voices never undo prosody changes when a sequence is interrupted.
# Add all default values to the start and end of the sequence,
Expand All @@ -153,6 +201,7 @@ def speak(self, speechSequence: SpeechSequence):
textList.append(item.replace("\\", "\\\\"))
elif isinstance(item, IndexCommand):
textList.append("\\mrk=%d\\" % item.index)
bookmarks.append(item.index)
lastHandledIndexInSequence = item.index
elif isinstance(item, CharacterModeCommand):
textList.append("\\RmS=1\\" if item.state else "\\RmS=0\\")
Expand Down Expand Up @@ -187,6 +236,7 @@ def speak(self, speechSequence: SpeechSequence):
# Therefore we add the pause of 1ms at the end
textList.append("\\PAU=1\\")
text = "".join(textList)
self._bookmarkLists.append(bookmarks)
flags = TTSDATAFLAG_TAGGED
self._ttsCentral.TextData(
VOICECHARSET.CHARSET_TEXT,
Expand All @@ -198,6 +248,9 @@ def speak(self, speechSequence: SpeechSequence):

def cancel(self):
try:
# cancel all pending bookmarks
self._bookmarkLists.clear()
self._bookmarks = None
self._ttsCentral.AudioReset()
except COMError:
log.error("Error cancelling speech", exc_info=True)
Expand Down Expand Up @@ -234,8 +287,11 @@ def _set_voice(self, val):
self._currentMode = mode
self._ttsAudio = CoCreateInstance(CLSID_MMAudioDest, IAudioMultiMediaDevice)
self._ttsAudio.DeviceNumSet(nvwave.outputDeviceNameToID(config.conf["audio"]["outputDevice"], True))
if self._ttsCentral:
self._ttsCentral.UnRegister(self._sinkRegKey)
self._ttsCentral = POINTER(ITTSCentralW)()
self._ttsEngines.Select(self._currentMode.gModeID, byref(self._ttsCentral), self._ttsAudio)
self._ttsCentral.Register(self._sinkPtr, ITTSNotifySinkW._iid_, byref(self._sinkRegKey))
self._ttsAttrs = self._ttsCentral.QueryInterface(ITTSAttributes)
# Find out rate limits
hasRate = bool(mode.dwFeatures & TTSFEATURE_SPEED)
Expand Down
31 changes: 27 additions & 4 deletions source/synthDrivers/sapi5.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from ctypes import POINTER, c_ubyte, c_wchar_p, cast, windll, _Pointer
from enum import IntEnum
import locale
from collections import OrderedDict
from collections import OrderedDict, deque
from typing import TYPE_CHECKING
from comInterfaces.SpeechLib import ISpEventSource, ISpNotifySource, ISpNotifySink
import comtypes.client
Expand Down Expand Up @@ -153,6 +153,10 @@ def ISpNotifySink_Notify(self):

def StartStream(self, streamNum: int, pos: int):
synth = self.synthRef()
# The stream has been started. Move the bookmark list to _streamBookmarks.
if streamNum in synth._streamBookmarksNew:
synth._streamBookmarks[streamNum] = synth._streamBookmarksNew[streamNum]
del synth._streamBookmarksNew[streamNum]
synth.isSpeaking = True

def Bookmark(self, streamNum: int, pos: int, bookmark: str, bookmarkId: int):
Expand All @@ -161,20 +165,31 @@ def Bookmark(self, streamNum: int, pos: int, bookmark: str, bookmarkId: int):
return
# Bookmark event is raised before the audio after that point.
# Queue an IndexReached event at this point.
synth.player.feed(None, 0, lambda: self.onIndexReached(bookmarkId))
synth.player.feed(None, 0, lambda: self.onIndexReached(streamNum, bookmarkId))

def EndStream(self, streamNum: int, pos: int):
synth = self.synthRef()
# trigger all untriggered bookmarks
if streamNum in synth._streamBookmarks:
for bookmark in synth._streamBookmarks[streamNum]:
synthIndexReached.notify(synth=synth, index=bookmark)
del synth._streamBookmarks[streamNum]
synth.isSpeaking = False
synth.player.idle()
synthDoneSpeaking.notify(synth=synth)

def onIndexReached(self, index: int):
def onIndexReached(self, streamNum: int, index: int):
seanbudd marked this conversation as resolved.
Show resolved Hide resolved
synth = self.synthRef()
if synth is None:
log.debugWarning("Called onIndexReached method on SapiSink while driver is dead")
return
synthIndexReached.notify(synth=synth, index=index)
# remove already triggered bookmarks
if streamNum in synth._streamBookmarks:
bookmarks = synth._streamBookmarks[streamNum]
while bookmarks:
if bookmarks.popleft() == index:
break


class SynthDriver(SynthDriver):
Expand Down Expand Up @@ -220,6 +235,9 @@ def __init__(self, _defaultVoiceToken=None):
self.player = None
self.isSpeaking = False
self._initTts(_defaultVoiceToken)
# key = stream num, value = deque of bookmarks
self._streamBookmarks = dict() # bookmarks in currently speaking streams
self._streamBookmarksNew = dict() # bookmarks for streams that haven't been started

def terminate(self):
self.tts = None
Expand Down Expand Up @@ -358,6 +376,7 @@ def _convertPhoneme(self, ipa):

def speak(self, speechSequence):
textList = []
bookmarks = deque()

# NVDA SpeechCommands are linear, but XML is hierarchical.
# Therefore, we track values for non-empty tags.
Expand Down Expand Up @@ -393,6 +412,7 @@ def outputTags():
textList.append(item.replace("<", "&lt;"))
elif isinstance(item, IndexCommand):
textList.append('<Bookmark Mark="%d" />' % item.index)
bookmarks.append(item.index)
elif isinstance(item, CharacterModeCommand):
if item.state:
tags["spell"] = {}
Expand Down Expand Up @@ -459,7 +479,10 @@ def outputTags():

text = "".join(textList)
flags = SpeechVoiceSpeakFlags.IsXML | SpeechVoiceSpeakFlags.Async
self.tts.Speak(text, flags)
streamNum = self.tts.Speak(text, flags)
# When Speak returns, the previous stream may not have been ended.
# So the bookmark list is stored in another dict until this stream starts.
self._streamBookmarksNew[streamNum] = bookmarks

def cancel(self):
# SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices.
Expand Down
1 change: 1 addition & 0 deletions user_docs/en/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ In any document, if the cursor is on the last line, it will be moved to the end
* The Humanware Brailliant driver is now more reliable in selecting the right connection endpoint, resulting in better connection stability and less errors. (#17537, @LeonarddeR)
* Custom braille tables in the developer scratchpad are now properly ignored when running with add-ons disabled. (#17565, @LeonarddeR)
* Fix issue with certain section elements not being recognized as editable controls in Visual Studio Code. (#17573, @Cary-rowen)
* Fixed an issue where continuous reading (say all) stopped at the end of the first sentence when using some SAPI5 synthesizers. (#16691, @gexgd0419)

### Changes for Developers

Expand Down
Loading