Skip to content

Commit

Permalink
Merge pull request #41 from hmlendea/hebrew
Browse files Browse the repository at this point in the history
Improved the transliteration for `Hebrew`
  • Loading branch information
hmlendea authored May 26, 2023
2 parents db95738 + aa08efd commit 6708563
Show file tree
Hide file tree
Showing 2 changed files with 130 additions and 131 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ public void SetUp()
[TestCase("בָּבֶל", "Bāvel")]
[TestCase("גִּבְעָתַיִים", "Givatayim")]
[TestCase("דַּמֶּשֶׂק", "Dammeśeq")]
[TestCase("הַיָּם הָאָדְוֹם", "Hayyām Hāʾāḏōm")]
[TestCase("הֶרְצְלִיָּה", "Herzliya")]
[TestCase("חֶבְרוֹן", "Ḥevrōn")]
[TestCase("חֵיפָה", "Ḥēyfā")]
[TestCase("יהודה", "Yəhūda")]
[TestCase("שֹׁמְרוֹן", "Šōmrōn")]
[TestCase("יַם-סוּף", "Yam-sūf")]
[TestCase("ירושלים", "Yerushaláyim")]
[TestCase("יריחו", "Yərīḥō")]
[TestCase("יִשְׂרָאֵל", "Yīsrāʾēl")]
Expand All @@ -36,10 +37,12 @@ public void SetUp()
[TestCase("נְתַנְיָה", "Netanya")]
[TestCase("עֵילָם", "ʿĒlām")]
[TestCase("פלשתינה", "Palestīna")]
[TestCase("פַּרְעֹה", "Parʿō")]
[TestCase("פְּרָת", "Pǝrāṯ")]
[TestCase("רבת בני עמון", "Rabat Bnei ʿAmmon")]
[TestCase("רְחוֹבוֹת", "Reḥōvōt")]
[TestCase("רַמְלָה", "Ramlā")]
[TestCase("שֹׁמְרוֹן", "Šōmrōn")]
[TestCase("תל-אביב", "Tel-Aviv")]
public void GivenATextInHebrewScript_WhenTransliteratingIntoLatin_ThenTheCorrectTextIsReturned(
string hebrewText,
Expand Down
256 changes: 126 additions & 130 deletions TransliterationAPI/Service/Transliterators/HebrewTransliterator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,70 +6,69 @@ namespace TransliterationAPI.Service.Transliterators
{
public class HebrewTransliterator : IHebrewTransliterator
{
Dictionary<char, string> transliterationMap;
Dictionary<string, string> transliterationTable;

public HebrewTransliterator()
{
transliterationMap = new Dictionary<char, string>
transliterationTable = new Dictionary<string, string>
{
{ 'א', "" },
{ 'ב', "b" },
{ 'ג', "g" },
{ 'ד', "d" },
{ 'ה', "h" },
{ 'ו', "v" },
{ 'ז', "z" },
{ 'ח', "ch" },
{ 'ט', "t" },
{ 'י', "y" },
{ 'כ', "k" },
{ 'ך', "k" },
{ 'ל', "l" },
{ 'מ', "m" },
{ 'ם', "m" },
{ 'נ', "n" },
{ 'ן', "n" },
{ 'ס', "s" },
{ 'ע', "" },
{ 'פ', "p" },
{ 'ף', "p" },
{ 'צ', "ts" },
{ 'ץ', "ts" },
{ 'ק', "q" },
{ 'ר', "r" },
{ 'ש', "sh" },
{ 'ת', "t" },
{ "רְעֹ", "rʿo" },

{ "פַּ", "pa" },
{ "רְ", "r" },
{ "עְֹ", "o" },

{ "א", "" },
{ "ב", "b" },
{ "ג", "g" },
{ "ד", "d" },
{ "ה", "h" },
{ "ו", "v" },
{ "ז", "z" },
{ "ח", "ch" },
{ "ט", "t" },
{ "י", "y" },
{ "כ", "k" },
{ "ך", "k" },
{ "ל", "l" },
{ "מ", "m" },
{ "ם", "m" },
{ "נ", "n" },
{ "ן", "n" },
{ "ס", "s" },
{ "ע", "" },
{ "פ", "p" },
{ "ף", "p" },
{ "צ", "ts" },
{ "ץ", "ts" },
{ "ק", "q" },
{ "ר", "r" },
{ "ש", "sh" },
{ "ת", "t" },

// Niqqud
{ 'ְ', "" }, // Sheva
{ 'ֱ', "e" }, // Hataf Segol
{ 'ֲ', "a" }, // Hataf Patah
{ 'ֳ', "o" }, // Hataf Qamats
{ 'ִ', "i" }, // Hiriq
{ 'ֵ', "e" }, // Tsere
{ 'ֶ', "e" }, // Segol
{ 'ַ', "a" }, // Patah
{ 'ָ', "a" }, // Qamats
{ 'ֹ', "o" }, // Holam
{ 'ֻ', "u" }, // Qubuts
{ 'ּ', "" }, // Dagesh, Mapiq, Shuruq: used to modify the pronunciation of the consonant, included for completeness but has no direct transliteration
{ "ְ", "" }, // Sheva
{ "ֱ", "e" }, // Hataf Segol
{ "ֲ", "a" }, // Hataf Patah
{ "ֳ", "o" }, // Hataf Qamats
{ "ִ", "i" }, // Hiriq
{ "ֵ", "e" }, // Tsere
{ "ֶ", "e" }, // Segol
{ "ַ", "a" }, // Patah
{ "ָ", "a" }, // Qamats
{ "ֹ", "o" }, // Holam
{ "ֻ", "u" }, // Qubuts
{ "ּ", "" }, // Dagesh, Mapiq, Shuruq: used to modify the pronunciation of the consonant, included for completeness but has no direct transliteration
};
}

public string Transliterate(string text)
{
string transliteratedText = string.Empty;
string transliteratedText = text;

foreach (char character in text)
foreach (string character in transliterationTable.Keys)
{
if (transliterationMap.ContainsKey(character))
{
transliteratedText += transliterationMap[character];
}
else
{
transliteratedText += character;
}
transliteratedText = Regex.Replace(transliteratedText, character, transliterationTable[character]);
}

transliteratedText = ApplyFixes(transliteratedText);
Expand All @@ -83,87 +82,84 @@ string ApplyFixes(string text)

fixedText = Regex.Replace(fixedText, "([\\ \\-])byb", "$1Aviv");

fixedText = Regex.Replace(fixedText, "Ash", "ʾAsh");
fixedText = Regex.Replace(fixedText, "Bab", "Bāv");
fixedText = Regex.Replace(fixedText, "Ber", "Bəʾēr");
fixedText = Regex.Replace(fixedText, "Ch", "Ḥ");
fixedText = Regex.Replace(fixedText, "Ey", "ʿĒ");
fixedText = Regex.Replace(fixedText, "Ḥe([^b])", "Ḥē$1");
fixedText = Regex.Replace(fixedText, "Hvo", "Ho");
fixedText = Regex.Replace(fixedText, "Ḥvo", "Ho");
fixedText = Regex.Replace(fixedText, "Mq", "Maq");
fixedText = Regex.Replace(fixedText, "Mv", "ʿAmmv");
fixedText = Regex.Replace(fixedText, "Na(ṣ|ts)", "Nā$1");
fixedText = Regex.Replace(fixedText, "Nt", "Net");
fixedText = Regex.Replace(fixedText, "Pl", "Pal");
fixedText = Regex.Replace(fixedText, "R(ch|ḥ)", "Re$1");
fixedText = Regex.Replace(fixedText, "Rb", "Rab");
fixedText = Regex.Replace(fixedText, "Sh", "Š");
fixedText = Regex.Replace(fixedText, "Tl", "Tel");
fixedText = Regex.Replace(fixedText, "Yh", "Yəh");
fixedText = Regex.Replace(fixedText, "Yi", "Yī");
fixedText = Regex.Replace(fixedText, "Yr([^iīy])", "Yer$1");
fixedText = Regex.Replace(fixedText, "Yr([iīy])", "Yər$1");
fixedText = Regex.Replace(fixedText, @"Ash", "ʾAsh");
fixedText = Regex.Replace(fixedText, @"Bab", "Bāv");
fixedText = Regex.Replace(fixedText, @"Ber", "Bəʾēr");
fixedText = Regex.Replace(fixedText, @"Ch", "Ḥ");
fixedText = Regex.Replace(fixedText, @"Ey", "ʿĒ");
fixedText = Regex.Replace(fixedText, @"Haa", "Hāʾā");
fixedText = Regex.Replace(fixedText, @"Ḥe([^b])", "Ḥē$1");
fixedText = Regex.Replace(fixedText, @"Hvo", "Ho");
fixedText = Regex.Replace(fixedText, @"Ḥvo", "Ho");
fixedText = Regex.Replace(fixedText, @"Mq", "Maq");
fixedText = Regex.Replace(fixedText, @"Mv", "ʿAmmv");
fixedText = Regex.Replace(fixedText, @"Na(ṣ|ts)", "Nā$1");
fixedText = Regex.Replace(fixedText, @"Nt", "Net");
fixedText = Regex.Replace(fixedText, @"Pl", "Pal");
fixedText = Regex.Replace(fixedText, @"R(ch|ḥ)", "Re$1");
fixedText = Regex.Replace(fixedText, @"Rb", "Rab");
fixedText = Regex.Replace(fixedText, @"Sh", "Š");
fixedText = Regex.Replace(fixedText, @"Tl", "Tel");
fixedText = Regex.Replace(fixedText, @"Yh", "Yəh");
fixedText = Regex.Replace(fixedText, @"Yi", "Yī");
fixedText = Regex.Replace(fixedText, @"Yr([^iīy])", "Yer$1");
fixedText = Regex.Replace(fixedText, @"Yr([iīy])", "Yər$1");

fixedText = Regex.Replace(fixedText, "([hḥ])v ", "$1ō ");
fixedText = Regex.Replace(fixedText, "([hḥ])v$", "$1ō");
fixedText = Regex.Replace(fixedText, "([Pp])r", "$1ǝr");
fixedText = Regex.Replace(fixedText, "ā(ṣ|ts)r", "ā$1ər");
fixedText = Regex.Replace(fixedText, "ae", "āʾē");
fixedText = Regex.Replace(fixedText, "am ", "ām ");
fixedText = Regex.Replace(fixedText, "am$", "ām");
fixedText = Regex.Replace(fixedText, "ame", "amme");
fixedText = Regex.Replace(fixedText, "at ", "aṯ ");
fixedText = Regex.Replace(fixedText, "at$", "aṯ");
fixedText = Regex.Replace(fixedText, "ayim", "áyim");
fixedText = Regex.Replace(fixedText, "bt", "bat");
fixedText = Regex.Replace(fixedText, "bvo", "vō");
fixedText = Regex.Replace(fixedText, "byn", "vin");
fixedText = Regex.Replace(fixedText, "ch", "ḥ");
fixedText = Regex.Replace(fixedText, "dh ", "da ");
fixedText = Regex.Replace(fixedText, "dh$", "da");
fixedText = Regex.Replace(fixedText, "dvn", "dun");
fixedText = Regex.Replace(fixedText, "dvo", "dō");
fixedText = Regex.Replace(fixedText, "eׁba", "evaʿ");
fixedText = Regex.Replace(fixedText, "ebr", "evr");
fixedText = Regex.Replace(fixedText, "eׂq", "eq");
fixedText = Regex.Replace(fixedText, "hvd", "hūd");
fixedText = Regex.Replace(fixedText, "ḥvo", "ḥō");
fixedText = Regex.Replace(fixedText, "iba", "iva");
fixedText = Regex.Replace(fixedText, "lah ", "lā ");
fixedText = Regex.Replace(fixedText, "lah$", "lā");
fixedText = Regex.Replace(fixedText, "ls", "les");
fixedText = Regex.Replace(fixedText, "lvo", "lō");
fixedText = Regex.Replace(fixedText, "ly", "láy");
fixedText = Regex.Replace(fixedText, "mvn", "mon");
fixedText = Regex.Replace(fixedText, "nh ", "na ");
fixedText = Regex.Replace(fixedText, "nh$", "na");
fixedText = Regex.Replace(fixedText, "ny", "nei");
fixedText = Regex.Replace(fixedText, "ōd ", "ōḏ ");
fixedText = Regex.Replace(fixedText, "ōd$", "ōḏ");
fixedText = Regex.Replace(fixedText, "oׁm", "ōm");
fixedText = Regex.Replace(fixedText, "pah", "fā");
fixedText = Regex.Replace(fixedText, "ql", "qəl");
fixedText = Regex.Replace(fixedText, "rts", "rz");
fixedText = Regex.Replace(fixedText, "rvo", "rō");
fixedText = Regex.Replace(fixedText, "rvs", "rus");
fixedText = Regex.Replace(fixedText, "shׁ", "š");
fixedText = Regex.Replace(fixedText, "sheq", "śeq");
fixedText = Regex.Replace(fixedText, "shl", "shal");
fixedText = Regex.Replace(fixedText, "shׂr", "sr");
fixedText = Regex.Replace(fixedText, "sht", "st");
fixedText = Regex.Replace(fixedText, "sy", "zi");
fixedText = Regex.Replace(fixedText, "ts", "ṣ");
fixedText = Regex.Replace(fixedText, "ty", "tī");
fixedText = Regex.Replace(fixedText, "y(ch|ḥ)", "īḥ");
fixedText = Regex.Replace(fixedText, "yah ", "ya ");
fixedText = Regex.Replace(fixedText, "yah$", "ya");
fixedText = Regex.Replace(fixedText, "yh", "yah");
fixedText = Regex.Replace(fixedText, "ym", "yim");
fixedText = Regex.Replace(fixedText, @"([^Yy])am\b", "$1ām");
fixedText = Regex.Replace(fixedText, @"([hḥ])v\b", "$1ō");
fixedText = Regex.Replace(fixedText, @"([Pp])r", "$1ǝr");
fixedText = Regex.Replace(fixedText, @"ā(ṣ|ts)r", "ā$1ər");
fixedText = Regex.Replace(fixedText, @"ādvo", "āḏō");
fixedText = Regex.Replace(fixedText, @"ae", "āʾē");
fixedText = Regex.Replace(fixedText, @"ame", "amme");
fixedText = Regex.Replace(fixedText, @"at\b", "aṯ");
fixedText = Regex.Replace(fixedText, @"ayim", "áyim");
fixedText = Regex.Replace(fixedText, @"bt", "bat");
fixedText = Regex.Replace(fixedText, @"bvo", "vō");
fixedText = Regex.Replace(fixedText, @"byn", "vin");
fixedText = Regex.Replace(fixedText, @"ch", "ḥ");
fixedText = Regex.Replace(fixedText, @"dh\b", "da");
fixedText = Regex.Replace(fixedText, @"dvn", "dun");
fixedText = Regex.Replace(fixedText, @"dvo", "dō");
fixedText = Regex.Replace(fixedText, @"eׁba", "evaʿ");
fixedText = Regex.Replace(fixedText, @"ebr", "evr");
fixedText = Regex.Replace(fixedText, @"eׂq", "eq");
fixedText = Regex.Replace(fixedText, @"hvd", "hūd");
fixedText = Regex.Replace(fixedText, @"ḥvo", "ḥō");
fixedText = Regex.Replace(fixedText, @"iba", "iva");
fixedText = Regex.Replace(fixedText, @"lah\b", "lā");
fixedText = Regex.Replace(fixedText, @"ls", "les");
fixedText = Regex.Replace(fixedText, @"lvo", "lō");
fixedText = Regex.Replace(fixedText, @"ly", "láy");
fixedText = Regex.Replace(fixedText, @"mvn", "mon");
fixedText = Regex.Replace(fixedText, @"nh\b", "na");
fixedText = Regex.Replace(fixedText, @"ny", "nei");
fixedText = Regex.Replace(fixedText, @"ōd\b", "ōḏ");
fixedText = Regex.Replace(fixedText, @"oׁm", "ōm");
fixedText = Regex.Replace(fixedText, @"pah", "fā");
fixedText = Regex.Replace(fixedText, @"ql", "qəl");
fixedText = Regex.Replace(fixedText, @"rts", "rz");
fixedText = Regex.Replace(fixedText, @"rvo", "rō");
fixedText = Regex.Replace(fixedText, @"rvs", "rus");
fixedText = Regex.Replace(fixedText, @"shׁ", "š");
fixedText = Regex.Replace(fixedText, @"sheq", "śeq");
fixedText = Regex.Replace(fixedText, @"shl", "shal");
fixedText = Regex.Replace(fixedText, @"shׂr", "sr");
fixedText = Regex.Replace(fixedText, @"sht", "st");
fixedText = Regex.Replace(fixedText, @"svp", "sūf");
fixedText = Regex.Replace(fixedText, @"sy", "zi");
fixedText = Regex.Replace(fixedText, @"ts", "ṣ");
fixedText = Regex.Replace(fixedText, @"ty", "tī");
fixedText = Regex.Replace(fixedText, @"y(ch|ḥ)", "īḥ");
fixedText = Regex.Replace(fixedText, @"yah\b", "ya");
fixedText = Regex.Replace(fixedText, @"ayam", "ayyām");
fixedText = Regex.Replace(fixedText, @"yh", "yah");
fixedText = Regex.Replace(fixedText, @"ym", "yim");
fixedText = Regex.Replace(fixedText, @"ʿoh", "ʿō");

fixedText = Regex.Replace(fixedText, "eiah", "ya");
fixedText = Regex.Replace(fixedText, "ǝraṯ", "ǝrāṯ");
fixedText = Regex.Replace(fixedText, "yiyi", "yi");
fixedText = Regex.Replace(fixedText, @"eiah", "ya");
fixedText = Regex.Replace(fixedText, @"ǝraṯ", "ǝrāṯ");
fixedText = Regex.Replace(fixedText, @"yiyi", "yi");

return fixedText;
}
Expand Down

0 comments on commit 6708563

Please sign in to comment.