nvaccess · seanbudd · Jan 12, 2025 · Dec 14, 2024 · Dec 16, 2024 · Dec 16, 2024
@@ -4,7 +4,7 @@
 # See the file COPYING for more details.
 
 import locale
-from collections import OrderedDict
+from collections import OrderedDict, deque
 import winreg
 from comtypes import CoCreateInstance, COMObject, COMError, GUID
 from ctypes import byref, c_ulong, POINTER
@@ -20,6 +20,7 @@
 	ITTSBufNotifySink,
 	ITTSCentralW,
 	ITTSEnumW,
+	ITTSNotifySinkW,
 	TextSDATA,
 	TTSATTR_MAXPITCH,
 	TTSATTR_MAXSPEED,
@@ -57,9 +58,9 @@ class SynthDriverBufSink(COMObject):
 	def __init__(self, synthRef: weakref.ReferenceType):
 		self.synthRef = synthRef
 		self._allowDelete = True
-		super(SynthDriverBufSink, self).__init__()
+		super().__init__()
 
-	def ITTSBufNotifySink_BookMark(self, this, qTimeStamp, dwMarkNum):
+	def ITTSBufNotifySink_BookMark(self, this, qTimeStamp: int, dwMarkNum: int):
 		synth = self.synthRef()
 		if synth is None:
 			log.debugWarning(
@@ -70,6 +71,10 @@ def ITTSBufNotifySink_BookMark(self, this, qTimeStamp, dwMarkNum):
 		if synth._finalIndex == dwMarkNum:
 			synth._finalIndex = None
 			synthDoneSpeaking.notify(synth=synth)
+		# remove already triggered bookmarks
+		while synth._bookmarks:
+			if synth._bookmarks.popleft() == dwMarkNum:
+				break
 
 	def IUnknown_Release(self, this, *args, **kwargs):
 		if not self._allowDelete and self._refcnt.value == 1:
@@ -78,6 +83,42 @@ def IUnknown_Release(self, this, *args, **kwargs):
 		return super(SynthDriverBufSink, self).IUnknown_Release(this, *args, **kwargs)
 
 
+class SynthDriverSink(COMObject):
+	_com_interfaces_ = [ITTSNotifySinkW]
+
+	def __init__(self, synthRef: weakref.ReferenceType):
+		self.synthRef = synthRef
+		super().__init__()
+
+	def ITTSNotifySinkW_AudioStart(self, this, qTimeStamp: int):
+		synth = self.synthRef()
+		if synth is None:
+			log.debugWarning(
+				"Called ITTSNotifySinkW_AudioStart method on ITTSNotifySinkW while driver is dead",
+			)
+			return
+		if synth._bookmarkLists:
+			# take the first bookmark list
+			synth._bookmarks = synth._bookmarkLists.popleft()
+
+	def ITTSNotifySinkW_AudioStop(self, this, qTimeStamp: int):
+		synth = self.synthRef()
+		if synth is None:
+			log.debugWarning(
+				"Called ITTSNotifySinkW_AudioStop method on ITTSNotifySinkW while driver is dead",
+			)
+			return
+		# trigger all untriggered bookmarks
+		if synth._bookmarks:
+			while synth._bookmarks:
+				synthIndexReached.notify(synth=synth, index=synth._bookmarks.popleft())
+			# if there are untriggered bookmarks, synthDoneSpeaking hasn't been triggered yet.
+			# Trigger synthDoneSpeaking after triggering all bookmarks
+			synth._finalIndex = None
+			synthDoneSpeaking.notify(synth=synth)
+		synth._bookmarks = None
+
+
 class SynthDriver(SynthDriver):
 	name = "sapi4"
 	description = "Microsoft Speech API version 4"
@@ -115,6 +156,12 @@ def _fetchEnginesList(self):
 
 	def __init__(self):
 		self._finalIndex: Optional[int] = None
+		self._ttsCentral = None
+		self._sinkRegKey = DWORD()
+		self._bookmarks = None
+		self._bookmarkLists = deque()
+		self._sink = SynthDriverSink(weakref.ref(self))
+		self._sinkPtr = self._sink.QueryInterface(ITTSNotifySinkW)
 		self._bufSink = SynthDriverBufSink(weakref.ref(self))
 		self._bufSinkPtr = self._bufSink.QueryInterface(ITTSBufNotifySink)
 		# HACK: Some buggy engines call Release() too many times on our buf sink.
@@ -133,6 +180,7 @@ def speak(self, speechSequence: SpeechSequence):
 		textList = []
 		charMode = False
 		unprocessedSequence = speechSequence
+		bookmarks = deque()
 		# #15500: Some SAPI4 voices reset all prosody when they receive any prosody command,
 		# whereas other voices never undo prosody changes when a sequence is interrupted.
 		# Add all default values to the start and end of the sequence,
@@ -153,6 +201,7 @@ def speak(self, speechSequence: SpeechSequence):
 				textList.append(item.replace("\\", "\\\\"))
 			elif isinstance(item, IndexCommand):
 				textList.append("\\mrk=%d\\" % item.index)
+				bookmarks.append(item.index)
 				lastHandledIndexInSequence = item.index
 			elif isinstance(item, CharacterModeCommand):
 				textList.append("\\RmS=1\\" if item.state else "\\RmS=0\\")
@@ -187,6 +236,7 @@ def speak(self, speechSequence: SpeechSequence):
 		# Therefore we add the pause of 1ms at the end
 		textList.append("\\PAU=1\\")
 		text = "".join(textList)
+		self._bookmarkLists.append(bookmarks)
 		flags = TTSDATAFLAG_TAGGED
 		self._ttsCentral.TextData(
 			VOICECHARSET.CHARSET_TEXT,
@@ -198,6 +248,9 @@ def speak(self, speechSequence: SpeechSequence):
 
 	def cancel(self):
 		try:
+			# cancel all pending bookmarks
+			self._bookmarkLists.clear()
+			self._bookmarks = None
 			self._ttsCentral.AudioReset()
 		except COMError:
 			log.error("Error cancelling speech", exc_info=True)
@@ -234,8 +287,11 @@ def _set_voice(self, val):
 		self._currentMode = mode
 		self._ttsAudio = CoCreateInstance(CLSID_MMAudioDest, IAudioMultiMediaDevice)
 		self._ttsAudio.DeviceNumSet(nvwave.outputDeviceNameToID(config.conf["audio"]["outputDevice"], True))
+		if self._ttsCentral:
+			self._ttsCentral.UnRegister(self._sinkRegKey)
 		self._ttsCentral = POINTER(ITTSCentralW)()
 		self._ttsEngines.Select(self._currentMode.gModeID, byref(self._ttsCentral), self._ttsAudio)
+		self._ttsCentral.Register(self._sinkPtr, ITTSNotifySinkW._iid_, byref(self._sinkRegKey))
 		self._ttsAttrs = self._ttsCentral.QueryInterface(ITTSAttributes)
 		# Find out rate limits
 		hasRate = bool(mode.dwFeatures & TTSFEATURE_SPEED)

@@ -7,7 +7,7 @@
 from ctypes import POINTER, c_ubyte, c_wchar_p, cast, windll, _Pointer
 from enum import IntEnum
 import locale
-from collections import OrderedDict
+from collections import OrderedDict, deque
 from typing import TYPE_CHECKING
 from comInterfaces.SpeechLib import ISpEventSource, ISpNotifySource, ISpNotifySink
 import comtypes.client
@@ -153,6 +153,10 @@ def ISpNotifySink_Notify(self):
 
 	def StartStream(self, streamNum: int, pos: int):
 		synth = self.synthRef()
+		# The stream has been started. Move the bookmark list to _streamBookmarks.
+		if streamNum in synth._streamBookmarksNew:
+			synth._streamBookmarks[streamNum] = synth._streamBookmarksNew[streamNum]
+			del synth._streamBookmarksNew[streamNum]
 		synth.isSpeaking = True
 
 	def Bookmark(self, streamNum: int, pos: int, bookmark: str, bookmarkId: int):
@@ -161,20 +165,31 @@ def Bookmark(self, streamNum: int, pos: int, bookmark: str, bookmarkId: int):
 			return
 		# Bookmark event is raised before the audio after that point.
 		# Queue an IndexReached event at this point.
-		synth.player.feed(None, 0, lambda: self.onIndexReached(bookmarkId))
+		synth.player.feed(None, 0, lambda: self.onIndexReached(streamNum, bookmarkId))
 
 	def EndStream(self, streamNum: int, pos: int):
 		synth = self.synthRef()
+		# trigger all untriggered bookmarks
+		if streamNum in synth._streamBookmarks:
+			for bookmark in synth._streamBookmarks[streamNum]:
+				synthIndexReached.notify(synth=synth, index=bookmark)
+			del synth._streamBookmarks[streamNum]
 		synth.isSpeaking = False
 		synth.player.idle()
 		synthDoneSpeaking.notify(synth=synth)
 
-	def onIndexReached(self, index: int):
+	def onIndexReached(self, streamNum: int, index: int):
 		synth = self.synthRef()
 		if synth is None:
 			log.debugWarning("Called onIndexReached method on SapiSink while driver is dead")
 			return
 		synthIndexReached.notify(synth=synth, index=index)
+		# remove already triggered bookmarks
+		if streamNum in synth._streamBookmarks:
+			bookmarks = synth._streamBookmarks[streamNum]
+			while bookmarks:
+				if bookmarks.popleft() == index:
+					break
 
 
 class SynthDriver(SynthDriver):
@@ -220,6 +235,9 @@ def __init__(self, _defaultVoiceToken=None):
 		self.player = None
 		self.isSpeaking = False
 		self._initTts(_defaultVoiceToken)
+		# key = stream num, value = deque of bookmarks
+		self._streamBookmarks = dict()  # bookmarks in currently speaking streams
+		self._streamBookmarksNew = dict()  # bookmarks for streams that haven't been started
 
 	def terminate(self):
 		self.tts = None
@@ -358,6 +376,7 @@ def _convertPhoneme(self, ipa):
 
 	def speak(self, speechSequence):
 		textList = []
+		bookmarks = deque()
 
 		# NVDA SpeechCommands are linear, but XML is hierarchical.
 		# Therefore, we track values for non-empty tags.
@@ -393,6 +412,7 @@ def outputTags():
 				textList.append(item.replace("<", "&lt;"))
 			elif isinstance(item, IndexCommand):
 				textList.append('<Bookmark Mark="%d" />' % item.index)
+				bookmarks.append(item.index)
 			elif isinstance(item, CharacterModeCommand):
 				if item.state:
 					tags["spell"] = {}
@@ -459,7 +479,10 @@ def outputTags():
 
 		text = "".join(textList)
 		flags = SpeechVoiceSpeakFlags.IsXML | SpeechVoiceSpeakFlags.Async
-		self.tts.Speak(text, flags)
+		streamNum = self.tts.Speak(text, flags)
+		# When Speak returns, the previous stream may not have been ended.
+		# So the bookmark list is stored in another dict until this stream starts.
+		self._streamBookmarksNew[streamNum] = bookmarks
 
 	def cancel(self):
 		# SAPI5's default means of stopping speech can sometimes lag at end of speech, especially with Win8 / Win 10 Microsoft Voices.

@@ -86,6 +86,7 @@ In any document, if the cursor is on the last line, it will be moved to the end
 * The Humanware Brailliant driver is now more reliable in selecting the right connection endpoint, resulting in better connection stability and less errors.  (#17537, @LeonarddeR)
 * Custom braille tables in the developer scratchpad are now properly ignored when running with add-ons disabled. (#17565, @LeonarddeR)
 * Fix issue with certain section elements not being recognized as editable controls in Visual Studio Code. (#17573, @Cary-rowen)
+* Fixed an issue where continuous reading (say all) stopped at the end of the first sentence when using some SAPI5 synthesizers. (#16691, @gexgd0419)
 
 ### Changes for Developers