Skip to content

Commit

Permalink
transliteration improvement for v0.11.3 (#38)
Browse files Browse the repository at this point in the history
* add more test cases

* adjust the transliteration

* handle "kitabi la roiba"

* compress contextual_map patterns

* remove some test cases

* add test cases

* bump version
  • Loading branch information
alpancs authored Mar 20, 2024
1 parent aea77ce commit ecc4b3d
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 22 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "quranize"
version = "0.11.2"
version = "0.11.3"
authors = ["Alfan Nur Fauzan <[email protected]>"]
edition = "2021"
description = "Encoding transliterations into Quran forms."
Expand Down
42 changes: 33 additions & 9 deletions src/quranize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,8 @@ mod tests {
assert_eq!(q.e("yukhodiun"), vec!["يُخٰدِعونَ"]);
assert_eq!(q.e("indallah"), vec!["عِندَ اللَّهِ", "عِندِ اللَّهِ"]);
assert_eq!(q.e("alimul ghoibi"), vec!["عٰلِمُ الغَيبِ"]);
assert_eq!(q.e("kaana dhoifa"), vec!["كانَ ضَعيفًا"]);
assert_eq!(q.e("kitabi la roiba"), vec!["الكِتٰبِ لا رَيبَ"]);
}

#[test]
Expand Down Expand Up @@ -290,10 +292,7 @@ mod tests {
#[test]
fn test_quranize_misc() {
let q = Quranize::new(70);
assert_eq!(q.encode("bismillah")[0].1.len(), 13);
assert_eq!(q.encode("bismillah")[0].2, 3);
assert_eq!(q.encode("arrohman").len(), 3);
assert_eq!(q.encode("arrohman")[0].1.len(), 10);
assert_eq!(
q.encode("alhamdu")[0].1,
vec!["a", "l", "h", "a", "m", "d", "u"]
Expand All @@ -302,12 +301,37 @@ mod tests {
q.encode("arrohman")[0].1,
vec!["a", "", "r", "r", "o", "h", "m", "a", "n", ""]
);
let result = &q.encode("masyaallah")[0];
assert_eq!(result.0.chars().count(), result.1.len());
assert_eq!(
result.1,
vec!["m", "a", "", "sy", "a", "a", "", "", "", "", "l", "l", "a", "h", ""]
);
{
let r = &q.encode("masyaallah")[0];
assert_eq!(r.0.chars().count(), r.1.len());
assert_eq!(
r.1,
vec!["m", "a", "", "sy", "a", "a", "", "", "", "", "l", "l", "a", "h", ""]
);
}
{
let r = &q.encode("birobbinnas")[0];
assert_eq!(
r.1.iter().zip(r.0.chars()).collect::<Vec<_>>(),
vec![
(&"b", 'ب',),
(&"i", '\u{650}',),
(&"r", 'ر',),
(&"o", '\u{64e}',),
(&"b", 'ب',),
(&"b", '\u{651}',),
(&"i", '\u{650}',),
(&"", ' ',),
(&"", 'ا',),
(&"", 'ل',),
(&"n", 'ن',),
(&"n", '\u{651}',),
(&"a", 'ا',),
(&"s", 'س',),
(&"", '\u{650}',),
]
);
}
}

#[test]
Expand Down
17 changes: 5 additions & 12 deletions src/quranize/transliterations.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,11 @@ pub(super) fn map(c: char) -> &'static [&'static str] {

pub(super) fn contextual_map(c0: char, c1: char) -> &'static [&'static str] {
match (c0, c1) {
(SPACE, LETTER_ALEF)
| (LETTER_HAMZA, LETTER_ALEF)
| (LETTER_ALEF, LETTER_LAM)
| (LETTER_AIN, LETTER_WAW)
| (LETTER_AIN, LETTER_SUPERSCRIPT_ALEF)
| (LETTER_WAW, LETTER_ALEF)
| (FATHATAN, LETTER_ALEF)
| (DAMMA, LETTER_WAW)
| (KASRA, LETTER_ALEF)
| (KASRA, LETTER_LAM)
| (HAMZA_ABOVE, LETTER_ALEF) => &[""],
('\0', LETTER_ALEF) => &["u", "i"],
(SPACE | LETTER_HAMZA | LETTER_WAW | FATHATAN | KASRA | HAMZA_ABOVE, LETTER_ALEF)
| (LETTER_ALEF | KASRA, LETTER_LAM)
| (LETTER_AIN, LETTER_WAW | LETTER_YEH | LETTER_SUPERSCRIPT_ALEF)
| (DAMMA, LETTER_WAW) => &[""],
('\0', LETTER_ALEF) => &["u", "i", ""],
(_, SHADDA) => map(c0),
_ => &[],
}
Expand Down

0 comments on commit ecc4b3d

Please sign in to comment.