diff --git a/source/braille.py b/source/braille.py index d8eed16d0ce..6b881da0ef6 100644 --- a/source/braille.py +++ b/source/braille.py @@ -64,6 +64,7 @@ import brailleViewer from autoSettingsUtils.driverSetting import BooleanDriverSetting, NumericDriverSetting from utils.security import objectBelowLockScreenAndWindowsIsLocked +from textUtils import isUnicodeNormalized, UnicodeNormalizationOffsetConverter import hwIo from editableText import EditableText @@ -496,13 +497,40 @@ def update(self): mode = louis.dotsIO if config.conf["braille"]["expandAtCursor"] and self.cursorPos is not None: mode |= louis.compbrlAtCursor - self.brailleCells, self.brailleToRawPos, self.rawToBraillePos, self.brailleCursorPos = louisHelper.translate( + + converter: UnicodeNormalizationOffsetConverter | None = None + if config.conf["braille"]["unicodeNormalization"] and not isUnicodeNormalized(self.rawText): + converter = UnicodeNormalizationOffsetConverter(self.rawText) + textToTranslate = converter.encoded + # Typeforms must be adapted to represent normalized characters. + textToTranslateTypeforms = [ + self.rawTextTypeforms[strOffset] for strOffset in converter.computedEncodedToStrOffsets + ] + # Convert the cursor position to a normalized offset. + cursorPos = converter.strToEncodedOffsets(self.cursorPos) + else: + textToTranslate = self.rawText + textToTranslateTypeforms = self.rawTextTypeforms + cursorPos = self.cursorPos + + self.brailleCells, brailleToRawPos, rawToBraillePos, self.brailleCursorPos = louisHelper.translate( [handler.table.fileName, "braille-patterns.cti"], - self.rawText, - typeform=self.rawTextTypeforms, + textToTranslate, + typeform=textToTranslateTypeforms, mode=mode, - cursorPos=self.cursorPos + cursorPos=cursorPos ) + + if converter: + # The received brailleToRawPos contains braille to normalized positions. + # Process them to represent real raw positions by converting them from normalized ones. + brailleToRawPos = [converter.encodedToStrOffsets(i) for i in brailleToRawPos] + # The received rawToBraillePos contains normalized to braille positions. + # Create a new list based on real raw positions. + rawToBraillePos = [rawToBraillePos[i] for i in converter.computedStrToEncodedOffsets] + self.brailleToRawPos = brailleToRawPos + self.rawToBraillePos = rawToBraillePos + if ( self.selectionStart is not None and self.selectionEnd is not None diff --git a/source/config/configSpec.py b/source/config/configSpec.py index bfb9cf7288a..684d113f94b 100644 --- a/source/config/configSpec.py +++ b/source/config/configSpec.py @@ -35,6 +35,7 @@ # symbolLevel: One of the characterProcessing.SymbolLevel values. symbolLevel = integer(default=100) trustVoiceLanguage = boolean(default=true) + unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled") includeCLDR = boolean(default=True) beepSpeechModePitch = integer(default=10000,min=50,max=11025) outputDevice = string(default=default) @@ -82,6 +83,7 @@ optionsEnum="ReviewRoutingMovesSystemCaretFlag", behaviorOfDefault="NEVER") readByParagraph = boolean(default=false) wordWrap = boolean(default=true) + unicodeNormalization = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="disabled") focusContextPresentation = option("changedContext", "fill", "scroll", default="changedContext") interruptSpeechWhileScrolling = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled") showSelection = featureFlag(optionsEnum="BoolFlag", behaviorOfDefault="enabled") diff --git a/source/gui/settingsDialogs.py b/source/gui/settingsDialogs.py index 644fd0d2984..999876eb0c2 100644 --- a/source/gui/settingsDialogs.py +++ b/source/gui/settingsDialogs.py @@ -1589,6 +1589,17 @@ def makeSettings(self, settingsSizer): self.bindHelpEvent("SpeechSettingsTrust", self.trustVoiceLanguageCheckbox) self.trustVoiceLanguageCheckbox.SetValue(config.conf["speech"]["trustVoiceLanguage"]) + self.unicodeNormalizationCombo: nvdaControls.FeatureFlagCombo = settingsSizerHelper.addLabeledControl( + labelText=_( + # Translators: This is a label for a combo-box in the Speech settings panel. + "Unicode normali&zation" + ), + wxCtrlClass=nvdaControls.FeatureFlagCombo, + keyPath=["speech", "unicodeNormalization"], + conf=config.conf, + ) + self.bindHelpEvent("SpeechUnicodeNormalization", self.unicodeNormalizationCombo) + includeCLDRText = _( # Translators: This is the label for a checkbox in the # voice settings panel (if checked, data from the unicode CLDR will be used @@ -1701,6 +1712,7 @@ def onSave(self): self.symbolLevelList.GetSelection() ].value config.conf["speech"]["trustVoiceLanguage"] = self.trustVoiceLanguageCheckbox.IsChecked() + self.unicodeNormalizationCombo.saveCurrentValueToConf() currentIncludeCLDR = config.conf["speech"]["includeCLDR"] config.conf["speech"]["includeCLDR"] = newIncludeCldr = self.includeCLDRCheckbox.IsChecked() if currentIncludeCLDR is not newIncludeCldr: @@ -4145,6 +4157,17 @@ def makeSettings(self, settingsSizer): self.bindHelpEvent("BrailleSettingsWordWrap", self.wordWrapCheckBox) self.wordWrapCheckBox.Value = config.conf["braille"]["wordWrap"] + self.unicodeNormalizationCombo: nvdaControls.FeatureFlagCombo = sHelper.addLabeledControl( + labelText=_( + # Translators: This is a label for a combo-box in the Braille settings panel. + "Unicode normali&zation" + ), + wxCtrlClass=nvdaControls.FeatureFlagCombo, + keyPath=["braille", "unicodeNormalization"], + conf=config.conf, + ) + self.bindHelpEvent("BrailleUnicodeNormalization", self.unicodeNormalizationCombo) + self.brailleInterruptSpeechCombo: nvdaControls.FeatureFlagCombo = sHelper.addLabeledControl( labelText=_( # Translators: This is a label for a combo-box in the Braille settings panel. @@ -4184,6 +4207,7 @@ def onSave(self): self.brailleReviewRoutingMovesSystemCaretCombo.saveCurrentValueToConf() config.conf["braille"]["readByParagraph"] = self.readByParagraphCheckBox.Value config.conf["braille"]["wordWrap"] = self.wordWrapCheckBox.Value + self.unicodeNormalizationCombo.saveCurrentValueToConf() config.conf["braille"]["focusContextPresentation"] = self.focusContextPresentationValues[self.focusContextPresentationList.GetSelection()] self.brailleInterruptSpeechCombo.saveCurrentValueToConf() self.brailleShowSelectionCombo.saveCurrentValueToConf() diff --git a/source/speech/speech.py b/source/speech/speech.py index f2a8b9a0b06..bcf01d55b99 100644 --- a/source/speech/speech.py +++ b/source/speech/speech.py @@ -25,6 +25,7 @@ import speechDictHandler import characterProcessing import languageHandler +from textUtils import unicodeNormalize from . import manager from .extensions import speechCanceled, pre_speechCanceled, pre_speech from .extensions import filter_speechSequence, speechCanceled @@ -1568,6 +1569,8 @@ def getTextInfoSpeech( # noqa: C901 # There was content after the indentation, so there is no more indentation. indentationDone=True if command: + if config.conf["speech"]["unicodeNormalization"]: + command = unicodeNormalize(command) if inTextChunk: relativeSpeechSequence[-1]+=command else: @@ -1775,7 +1778,7 @@ def getPropertiesSpeech( # noqa: C901 reason: OutputReason = OutputReason.QUERY, **propertyValues ) -> SpeechSequence: - textList: List[str] = [] + textList: SpeechSequence = [] name: Optional[str] = propertyValues.get('name') if name: textList.append(name) @@ -1968,7 +1971,11 @@ def getPropertiesSpeech( # noqa: C901 errorMessage: str | None = propertyValues.get("errorMessage", None) if errorMessage: textList.append(errorMessage) - + if config.conf["speech"]["unicodeNormalization"]: + textList = [ + unicodeNormalize(t) if isinstance(t, str) else t + for t in textList + ] types.logBadSequenceTypes(textList) return textList diff --git a/source/textUtils.py b/source/textUtils.py index 3d13a70377b..cc9d6a72e68 100644 --- a/source/textUtils.py +++ b/source/textUtils.py @@ -1,30 +1,32 @@ -# -*- coding: UTF-8 -*- # A part of NonVisual Desktop Access (NVDA) # This file is covered by the GNU General Public License. # See the file COPYING for more details. -# Copyright (C) 2018-2021 NV Access Limited, Babbage B.V., Łukasz Golonka +# Copyright (C) 2018-2024 NV Access Limited, Babbage B.V., Łukasz Golonka """ Classes and utilities to deal with offsets variable width encodings, particularly utf_16. """ -import encodings -import sys import ctypes -from collections.abc import ByteString -from typing import Tuple, Optional, Type +import encodings import locale +import unicodedata +from abc import ABCMeta, abstractmethod, abstractproperty +from collections import defaultdict +from difflib import ndiff +from functools import cached_property +from typing import Optional, Tuple, Type + from logHandler import log -from abc import abstractmethod WCHAR_ENCODING = "utf_16_le" UTF8_ENCODING = "utf-8" USER_ANSI_CODE_PAGE = locale.getpreferredencoding() -class OffsetConverter: +class OffsetConverter(metaclass=ABCMeta): decoded: str - + def __init__(self, text: str): if not isinstance(text, str): raise TypeError("Value must be of type str") @@ -33,7 +35,7 @@ def __init__(self, text: str): def __repr__(self): return f"{self.__class__.__name__}({repr(self.decoded)})" - @property + @abstractproperty def encodedStringLength(self) -> int: """Returns the length of the string in itssubclass-specific encoded representation.""" raise NotImplementedError @@ -385,8 +387,6 @@ class IdentityOffsetConverter(OffsetConverter): This is a dummy converter that assumes 1:1 correspondence between encoded and decoded characters. """ - _encoding: str = UTF8_ENCODING - def __init__(self, text: str): super().__init__(text) @@ -417,6 +417,182 @@ def encodedToStrOffsets( return (encodedStart, encodedEnd) +DEFAULT_UNICODE_NORMALIZATION_ALGORITHM = "NFKC" + + +class UnicodeNormalizationOffsetConverter(OffsetConverter): + """ + Object that holds a string in both its decoded and its unicode normalized form. + The object allows for easy conversion between offsets in strings which may or may not be normalized, + + For example, when using the NFKC algorithm, the "ij" ligature normalizes to "ij", + which takes two characters instead of one. + """ + normalizationForm: str + computedStrToEncodedOffsets: tuple[int] + computedEncodedToStrOffsets: tuple[int] + + def __init__(self, text: str, normalizationForm: str = DEFAULT_UNICODE_NORMALIZATION_ALGORITHM): + super().__init__(text) + self.normalizationForm = normalizationForm + self.encoded: str = unicodedata.normalize(normalizationForm, text) + self.computedStrToEncodedOffsets, self.computedEncodedToStrOffsets = self._calculateOffsets() + + def _calculateOffsets(self) -> tuple[tuple[int], tuple[int]]: + # Initialize a diff list between the decoded original and the normalized string. + diff = list(ndiff(self.decoded, self.encoded)) + diff.append("!") # Closing the diff + # Initialize indices and buffers for tracking positions and changes. + iOrigin = iNormalized = 0 + originBuffer = "" + normalizedBuffer = "" + originToNormalizedDict = defaultdict(list) + normalizedToOriginDict = defaultdict(list) + originPending = normalizedPending = False + # Iterate over each character in the diff list. + for char in diff: + if char[0] == "?": + raise RuntimeError("Unexpected entry in diff") + elif char[0] == "-": + # Accumulate characters in the origin buffer that aren't in the normalized string. + originBuffer += char[2:] + originPending = True + elif char[0] == "+": + # Accumulate characters in the normalized buffer that aren't in the original string. + normalizedBuffer += char[2:] + normalizedPending = True + elif char[0] == " " and ( + (not originPending and normalizedPending) or (originPending and not normalizedPending) + ): + # Accumulate unchanged characters in both buffers. + originBuffer += char[2:] + normalizedBuffer += char[2:] + else: + # Process accumulated characters in the buffers. + while originBuffer and normalizedBuffer: + originPart = "" + originPartLen = 0 + normalizedPart = "" + normalizedPartLen = 0 + # Find the smallest part that can be normalized + # and still matches the beginning of the normalized buffer. + for i in range(len(originBuffer)): + originPart = originBuffer[: (i + 1)] + normalizedPart = unicodedata.normalize(self.normalizationForm, originPart) + if ( + originPart == normalizedPart + or not normalizedBuffer.startswith(normalizedPart) + ): + continue + originPartLen = len(originPart) + originBuffer = originBuffer[originPartLen:] + normalizedPartLen = len(normalizedPart) + normalizedBuffer = normalizedBuffer[normalizedPartLen:] + break + # Map the original indices to the normalized indices. + # originMultiplier is used to multiply indices in origin + # when a character takes more space in origin than in normalized. + # This is applicable when normalizing letter+modifier compositions to one character. + originMultiplier = min(originPartLen / normalizedPartLen, 1) + # normalizedMultiplier is used to multiply indices in normalized + # when a character takes more space in normalized than in origin. + # This is applicable when normalizing one character ligeatures + # into their two corresponding letters. + normalizedMultiplier = min(normalizedPartLen / originPartLen, 1) + for i in range(max(originPartLen, normalizedPartLen)): + tempOrigin = iOrigin + int(i * originMultiplier) + tempNormalized = iNormalized + int(i * normalizedMultiplier) + originC = originPart[i] if i < originPartLen else None + if originC: + # If normalization results in the same characters + # but they have moved in the string, for example when normalizing the order of modifiers + # on ancient hebrew consonants, the normalized index should be based on + # the position of the origin character in normalized. + normalizedIndex = normalizedPart.find(originC) + if normalizedIndex != -1: + tempNormalized = iNormalized + normalizedIndex + normalizedC = normalizedPart[i] if i < normalizedPartLen else None + if normalizedC: + # The origin index should be based on the position + # of the normalized character in origin. + originIndex = originPart.find(normalizedC) + if originIndex != -1: + tempOrigin = iOrigin + originIndex + originToNormalizedDict[tempOrigin].append(tempNormalized) + normalizedToOriginDict[tempNormalized].append(tempOrigin) + iOrigin += originPartLen + iNormalized += normalizedPartLen + originPending = normalizedPending = False + if char[0] == " ": + # Map indices directly for unchanged characters. + originToNormalizedDict[iOrigin].append(iNormalized) + normalizedToOriginDict[iNormalized].append(iOrigin) + iOrigin += 1 + iNormalized += 1 + # Finalize the mapping by selecting the minimum index for each original position. + originResult = tuple(map(min, originToNormalizedDict.values())) + assert len(originResult) == len(self.decoded) + normalizedResult = tuple(map(min, normalizedToOriginDict.values())) + assert len(normalizedResult) == len(self.encoded) + return tuple(( + originResult, + normalizedResult + )) + + @cached_property + def encodedStringLength(self) -> int: + """Returns the length of the string in its normalized representation.""" + return len(self.encoded) + + def strToEncodedOffsets( + self, + strStart: int, + strEnd: int | None = None, + raiseOnError: bool = False, + ) -> int | Tuple[int]: + super().strToEncodedOffsets(strStart, strEnd, raiseOnError) + if strStart == 0: + resultStart = 0 + else: + resultStart = self.computedStrToEncodedOffsets[strStart] + if strEnd is None: + return resultStart + elif strStart == strEnd: + return (resultStart, resultStart) + else: + resultEnd = self.computedStrToEncodedOffsets[strEnd] + return (resultStart, resultEnd) + + def encodedToStrOffsets( + self, + encodedStart: int, + encodedEnd: int | None = None, + raiseOnError: bool = False + ) -> int | Tuple[int]: + super().encodedToStrOffsets(encodedStart, encodedEnd, raiseOnError) + if encodedStart == 0: + resultStart = 0 + else: + resultStart = self.computedEncodedToStrOffsets[encodedStart] + if encodedEnd is None: + return resultStart + elif encodedStart == encodedEnd: + return (resultStart, resultStart) + else: + resultEnd = self.computedEncodedToStrOffsets[encodedEnd] + return (resultStart, resultEnd) + + +def isUnicodeNormalized(text: str, normalizationForm: str = DEFAULT_UNICODE_NORMALIZATION_ALGORITHM) -> bool: + """Convenience function to wrap unicodedata.is_normalized with a default normalization form.""" + return unicodedata.is_normalized(normalizationForm, text) + + +def unicodeNormalize(text: str, normalizationForm: str = DEFAULT_UNICODE_NORMALIZATION_ALGORITHM) -> str: + """Convenience function to wrap unicodedata.normalize with a default normalization form.""" + return unicodedata.normalize(normalizationForm, text) + + ENCODINGS_TO_CONVERTERS: dict[str, Type[OffsetConverter]] = { WCHAR_ENCODING: WideStringOffsetConverter, UTF8_ENCODING: UTF8OffsetConverter, diff --git a/tests/unit/test_textUtils.py b/tests/unit/test_textUtils.py index f916e38cd31..abf4516c900 100644 --- a/tests/unit/test_textUtils.py +++ b/tests/unit/test_textUtils.py @@ -1,18 +1,18 @@ -# -*- coding: UTF-8 -*- -#tests/unit/test_textUtils.py -#A part of NonVisual Desktop Access (NVDA) -#This file is covered by the GNU General Public License. -#See the file COPYING for more details. -#Copyright (C) 2019 NV Access Limited, Babbage B.V., Leonard de Ruijter +# A part of NonVisual Desktop Access (NVDA) +# This file is covered by the GNU General Public License. +# See the file COPYING for more details. +# Copyright (C) 2019-2024 NV Access Limited, Babbage B.V., Leonard de Ruijter """Unit tests for the textUtils module.""" import unittest -from textUtils import WideStringOffsetConverter -FACE_PALM = u"\U0001f926" # 🤦 -SMILE = u"\U0001f60a" # 😊 -THUMBS_UP = u"\U0001f44d" # 👍 +from textUtils import UnicodeNormalizationOffsetConverter, WideStringOffsetConverter + +FACE_PALM = "\U0001f926" # 🤦 +SMILE = "\U0001f60a" # 😊 +THUMBS_UP = "\U0001f44d" # 👍 + class TestStrToWideOffsets(unittest.TestCase): """ @@ -198,6 +198,7 @@ def test_mixedSurrogatePairsNonSurrogatesAndSingleSurrogates(self): self.assertEqual(converter.wideToStrOffsets(5, 6), (4, 5)) self.assertEqual(converter.wideToStrOffsets(6, 6), (5, 5)) + class TestEdgeCases(unittest.TestCase): """ Tests for edge cases, such as offsets out of range of a string, @@ -229,3 +230,41 @@ def test_strToWideOffsets(self): self.assertRaises(IndexError, converter.strToWideOffsets, -1, 0, raiseOnError=True) self.assertRaises(IndexError, converter.strToWideOffsets, 0, 4, raiseOnError=True) self.assertRaises(ValueError, converter.strToWideOffsets, 1, 0) + + +class TestUnicodeNormalizationOffsetConverter(unittest.TestCase): + """Tests for unicode normalization using the UnicodeNormalizationOffsetConverter""" + + def test_normalizedOffsetsSentence(self): + text = "Één eigenwijze geïnteresseerde ijsbeer" + converter = UnicodeNormalizationOffsetConverter(text, "NFKC") + expectedStrToEncoded = ( + 0, 0, 1, 1, 2, 3, # Één + 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, # eigenwijze + 15, 16, 17, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, # geïnteresseerde + 31, 33, 34, 35, 36, 37, # ijsbeer + ) + self.assertSequenceEqual(converter.computedStrToEncodedOffsets, expectedStrToEncoded) + expectedEncodedToStr = ( + 0, 2, 4, 5, # Één + 6, 7, 8, 9, 10, 11, 12, 12, 13, 14, 15, # eigenwijze + 16, 17, 18, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, # geïnteresseerde + 33, 33, 34, 35, 36, 37, 38, # ijsbeer + ) + self.assertSequenceEqual(converter.computedEncodedToStrOffsets, expectedEncodedToStr) + + def test_normalizedOffsetsMixed(self): + text = "Ééijo" + converter = UnicodeNormalizationOffsetConverter(text, "NFKC") + expectedStrToEncoded = (0, 0, 1, 1, 2, 4) + self.assertSequenceEqual(converter.computedStrToEncodedOffsets, expectedStrToEncoded) + expectedEncodedToStr = (0, 2, 4, 4, 5) + self.assertSequenceEqual(converter.computedEncodedToStrOffsets, expectedEncodedToStr) + + def test_normalizedOffsetsDifferentOrder(self): + text = "בְּרֵאשִׁית" + converter = UnicodeNormalizationOffsetConverter(text, "NFKC") + expectedStrToEncoded = (0, 2, 1, 3, 4, 5, 6, 8, 7, 9, 10) + self.assertSequenceEqual(converter.computedStrToEncodedOffsets, expectedStrToEncoded) + expectedEncodedToStr = (0, 2, 1, 3, 4, 5, 6, 8, 7, 9, 10) + self.assertSequenceEqual(converter.computedEncodedToStrOffsets, expectedEncodedToStr) diff --git a/user_docs/en/changes.md b/user_docs/en/changes.md index 71f084281c4..1c09d2a9629 100644 --- a/user_docs/en/changes.md +++ b/user_docs/en/changes.md @@ -8,6 +8,9 @@ * Error messages referenced with `aria-errormessage` are now reported in Google Chrome and Mozilla Firefox. (#8318) * In LibreOffice Writer (version 24.8 and newer), when toggling text formatting (bold, italic, underline, subscript/superscript, alignment) using the corresponding keyboard shortcut, NVDA announces the new formatting attribute (e.g. "Bold on", "Bold off"). (#4248, @michaelweghorn) +* Added support for Unicode Normalization to speech and braille output. (#16466 @LeonarddeR). + * This can be of help when reading characters that are unknown to a particular speech synthesizer or braille table and which have a compatible alternative, like the bold and italic characters commonly uses on social media. + * You can enable this functionality for both speech and braille in their respective settings categories in the NVDA Settings dialog. ### Changes diff --git a/user_docs/en/userGuide.md b/user_docs/en/userGuide.md index 492ae7d360b..dd9b6f021b9 100644 --- a/user_docs/en/userGuide.md +++ b/user_docs/en/userGuide.md @@ -1806,6 +1806,29 @@ This option applies to all synthesizers, not just the currently active synthesiz On by default, this option tells NVDA if the current voice's language can be trusted when processing symbols and characters. If you find that NVDA is reading punctuation in the wrong language for a particular synthesizer or voice, you may wish to turn this off to force NVDA to use its global language setting instead. +##### Unicode normalization {#SpeechUnicodeNormalization} +| . {.hideHeaderRow} |.| +|---|---| +|Options |Default (Disabled), Enabled, Disabled| +|Default |Disabled| + +When this option is enabled, unicode normalization is performed on the text that is spoken by NVDA. +This is beneficial when speaking characters that can be represented in several forms. +NVDA uses the NFKC (Normalization Form Compatibility Composition) algorithm, which provides the following benefits, among others: + +1. The bold and italic versions of characters that are part of the unicode standard and are commonly used on social media are normalized to their most common compatible equivalent. +For example, the latin letter "h" can also be presented as "𝐡" (bold), "ℎ" (itallic), etc. but will always be spoken as "h" when normalization is enabled. + +1. Normalization to composed characters. +For example, the character "ü" (u with umlaut/diaeresis), a common character in languages like German and Turkish can be represented in two forms. + 1. One stand alone unicode character (ü) + 1. A decomposition into two characters (ü), namely the normal latin letter u and a diaeresis modifier + Unicode normalization ensures that only one form will be used throughout all speech output, which is the one character variant. + +1. Decomposition of some ligatures, Including "ij" (ligature ij) to their two letter form ("ij"). + +1. Stable ordering of modifiers in composite characters, for example in ancient Hebrew. + ##### Include Unicode Consortium data (including emoji) when processing characters and symbols {#SpeechSettingsCLDR} When this checkbox is checked, NVDA will include additional symbol pronunciation dictionaries when pronouncing characters and symbols. @@ -2045,6 +2068,16 @@ When you scroll the display, you will then be able to read the rest of the word. Enabling this may allow for more fluent reading, but generally requires you to scroll the display more. +##### Unicode normalization {#BrailleUnicodeNormalization} +| . {.hideHeaderRow} |.| +|---|---| +|Options |Default (Disabled), Enabled, Disabled| +|Default |Disabled| + +When this option is enabled, unicode normalization is performed on the text that is brailled on the braille display. +This is beneficial when coming across characters in braille that are unknown in a particular braille table and which have a compatible alternative, like the bold and italic characters commonly used on social media. +Other benefits of unicode normalization are explained in greater detail in the [section for the equivalent speech setting](#SpeechUnicodeNormalization). + ##### Focus context presentation {#BrailleSettingsFocusContextPresentation} This option allows you to choose what context information NVDA will show on the braille display when an object gets focus.