diff --git a/TransliterationAPI.UnitTests/Service/Transliterators/HebrewTransliteratorTests.cs b/TransliterationAPI.UnitTests/Service/Transliterators/HebrewTransliteratorTests.cs index 5e7391c..b5643c9 100644 --- a/TransliterationAPI.UnitTests/Service/Transliterators/HebrewTransliteratorTests.cs +++ b/TransliterationAPI.UnitTests/Service/Transliterators/HebrewTransliteratorTests.cs @@ -22,16 +22,19 @@ public void SetUp() [TestCase("גִּבְעָתַיִים", "Givatayim")] [TestCase("דַּמֶּשֶׂק", "Dammeśeq")] [TestCase("הֶרְצְלִיָּה", "Herzliya")] + [TestCase("חֶבְרוֹן", "Ḥevrōn")] [TestCase("חֵיפָה", "Ḥēyfā")] [TestCase("ירושלים", "Yerushaláyim")] [TestCase("יריחו", "Yərīḥō")] + [TestCase("יִשְׂרָאֵל", "Yīsrāʾēl")] [TestCase("מִצְרַיִם", "Miṣráyim")] [TestCase("נַהֲרִיָּה", "Nahariya")] [TestCase("נציבין", "Netzivin")] + [TestCase("נָצְרַת", "Nāṣəraṯ")] [TestCase("נְתַנְיָה", "Netanya")] [TestCase("עֵילָם", "ʿĒlām")] + [TestCase("פלשתינה", "Palestīna")] [TestCase("רבת בני עמון", "Rabat Bnei ʿAmmon")] - [TestCase("רַהַט", "Rahat")] [TestCase("רְחוֹבוֹת", "Reḥōvōt")] [TestCase("רַמְלָה", "Ramlā")] [TestCase("תל-אביב", "Tel-Aviv")] diff --git a/TransliterationAPI/Service/Transliterators/HebrewTransliterator.cs b/TransliterationAPI/Service/Transliterators/HebrewTransliterator.cs index f14f873..3b1599e 100644 --- a/TransliterationAPI/Service/Transliterators/HebrewTransliterator.cs +++ b/TransliterationAPI/Service/Transliterators/HebrewTransliterator.cs @@ -88,24 +88,31 @@ string ApplyFixes(string text) fixedText = Regex.Replace(fixedText, "Ber", "Bəʾēr"); fixedText = Regex.Replace(fixedText, "Ch", "Ḥ"); fixedText = Regex.Replace(fixedText, "Ey", "ʿĒ"); - fixedText = Regex.Replace(fixedText, "Ḥe", "Ḥē"); - fixedText = Regex.Replace(fixedText, "Ḥvo", "Ho"); + fixedText = Regex.Replace(fixedText, "Ḥe([^b])", "Ḥē$1"); fixedText = Regex.Replace(fixedText, "Hvo", "Ho"); + fixedText = Regex.Replace(fixedText, "Ḥvo", "Ho"); fixedText = Regex.Replace(fixedText, "Mq", "Maq"); fixedText = Regex.Replace(fixedText, "Mv", "ʿAmmv"); + fixedText = Regex.Replace(fixedText, "Na(ṣ|ts)", "Nā$1"); fixedText = Regex.Replace(fixedText, "Nt", "Net"); - fixedText = Regex.Replace(fixedText, "Rb", "Rab"); + fixedText = Regex.Replace(fixedText, "Pl", "Pal"); fixedText = Regex.Replace(fixedText, "R(ch|ḥ)", "Re$1"); + fixedText = Regex.Replace(fixedText, "Rb", "Rab"); fixedText = Regex.Replace(fixedText, "Sh", "Š"); fixedText = Regex.Replace(fixedText, "Tl", "Tel"); + fixedText = Regex.Replace(fixedText, "Yi", "Yī"); fixedText = Regex.Replace(fixedText, "Yr([^iīy])", "Yer$1"); fixedText = Regex.Replace(fixedText, "Yr([iīy])", "Yər$1"); fixedText = Regex.Replace(fixedText, "([hḥ])v ", "$1ō "); fixedText = Regex.Replace(fixedText, "([hḥ])v$", "$1ō"); + fixedText = Regex.Replace(fixedText, "ā(ṣ|ts)r", "ā$1ər"); + fixedText = Regex.Replace(fixedText, "ae", "āʾē"); fixedText = Regex.Replace(fixedText, "am ", "ām "); fixedText = Regex.Replace(fixedText, "am$", "ām"); fixedText = Regex.Replace(fixedText, "ame", "amme"); + fixedText = Regex.Replace(fixedText, "at ", "aṯ "); + fixedText = Regex.Replace(fixedText, "at$", "aṯ"); fixedText = Regex.Replace(fixedText, "ayim", "áyim"); fixedText = Regex.Replace(fixedText, "bt", "bat"); fixedText = Regex.Replace(fixedText, "bvo", "vō"); @@ -114,26 +121,34 @@ string ApplyFixes(string text) fixedText = Regex.Replace(fixedText, "dvn", "dun"); fixedText = Regex.Replace(fixedText, "dvo", "dō"); fixedText = Regex.Replace(fixedText, "eׁba", "evaʿ"); + fixedText = Regex.Replace(fixedText, "ebr", "evr"); fixedText = Regex.Replace(fixedText, "eׂq", "eq"); fixedText = Regex.Replace(fixedText, "ḥvo", "ḥō"); fixedText = Regex.Replace(fixedText, "iba", "iva"); fixedText = Regex.Replace(fixedText, "lah ", "lā "); fixedText = Regex.Replace(fixedText, "lah$", "lā"); + fixedText = Regex.Replace(fixedText, "ls", "les"); fixedText = Regex.Replace(fixedText, "lvo", "lō"); fixedText = Regex.Replace(fixedText, "ly", "láy"); fixedText = Regex.Replace(fixedText, "mvn", "mon"); + fixedText = Regex.Replace(fixedText, "nh ", "na "); + fixedText = Regex.Replace(fixedText, "nh$", "na"); fixedText = Regex.Replace(fixedText, "ny", "nei"); fixedText = Regex.Replace(fixedText, "ōd ", "ōḏ "); fixedText = Regex.Replace(fixedText, "ōd$", "ōḏ"); fixedText = Regex.Replace(fixedText, "pah", "fā"); fixedText = Regex.Replace(fixedText, "ql", "qəl"); fixedText = Regex.Replace(fixedText, "rts", "rz"); + fixedText = Regex.Replace(fixedText, "rvo", "rō"); fixedText = Regex.Replace(fixedText, "rvs", "rus"); fixedText = Regex.Replace(fixedText, "shׁ", "š"); fixedText = Regex.Replace(fixedText, "sheq", "śeq"); fixedText = Regex.Replace(fixedText, "shl", "shal"); + fixedText = Regex.Replace(fixedText, "shׂr", "sr"); + fixedText = Regex.Replace(fixedText, "sht", "st"); fixedText = Regex.Replace(fixedText, "sy", "zi"); fixedText = Regex.Replace(fixedText, "ts", "ṣ"); + fixedText = Regex.Replace(fixedText, "ty", "tī"); fixedText = Regex.Replace(fixedText, "y(ch|ḥ)", "īḥ"); fixedText = Regex.Replace(fixedText, "yah ", "ya "); fixedText = Regex.Replace(fixedText, "yah$", "ya");