Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Korean Revised Romanization to hangeul IME #716

Merged
merged 11 commits into from
Oct 3, 2024
187 changes: 187 additions & 0 deletions rules/kor/kor-rr.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
( function ( $ ) {
'use strict';

var patternList = [
// Syllable finals
[ '([ᅡ-ᅵ])k', '$1ᆨ' ],
[ 'ᆨk', 'ᆩ' ],
[ 'ᆨs', 'ᆪ' ],
[ '([ᅡ-ᅵ])n', '$1ᆫ' ],
[ 'ᆫj', 'ᆬ' ],
[ 'ᆫh', 'ᆭ' ],
[ '([ᅡ-ᅵ])t', '$1ᆮ' ],
[ '([ᅡ-ᅵ])l', '$1ᆯ' ],
[ '([ᅡ-ᅵ])r', '$1ᆯ' ],
[ 'ᆯk', 'ᆰ' ],
[ 'ᆯm', 'ᆱ' ],
[ 'ᆯb', 'ᆲ' ],
[ 'ᆯs', 'ᆳ' ],
[ 'ᆯt', 'ᆴ' ],
[ 'ᆯp', 'ᆵ' ],
[ 'ᆯh', 'ᆶ' ],
[ '([ᅡ-ᅵ])m', '$1ᆷ' ],
[ '([ᅡ-ᅵ])b', '$1ᆸ' ],
[ 'ᆸs', 'ᆹ' ],
[ '([ᅡ-ᅵ])s', '$1ᆺ' ],
[ 'ᆺs', 'ᆻ' ],
[ 'ᆫg', 'ᆼ' ],
[ '([ᅡ-ᅵ])j', '$1ᆽ' ],
[ '([ᅡ-ᅵ])ch', '$1ᆾ' ],
[ '([ᅡ-ᅵ])K', '$1ᆿ' ],
[ '([ᅡ-ᅵ])T', '$1ᇀ' ],
[ '([ᅡ-ᅵ])p', '$1ᇁ' ],
[ '([ᅡ-ᅵ])h', '$1ᇂ' ],

// Use space, hyphen, and apostrophe to disambiguate
// Do nothing, combineJamo will do the work
[ '([\- \'])', '$1'],

// Syllable initials
[ 'g', 'ᄀ' ],
[ 'ᄀk', 'ᄁ' ],
[ 'n', 'ᄂ' ],
[ 'ᄃt', 'ᄄ' ],
[ 'ᄐt', 'ᄄ' ],
[ 'ᄃd', 'ᄄ' ],
[ 'ᄐd', 'ᄄ' ],
[ 'd', 'ᄃ' ],
[ 'r', 'ᄅ' ],
[ 'l', 'ᄅ' ],
[ 'm', 'ᄆ' ],
[ 'b', 'ᄇ' ],
[ 'ᄇp', 'ᄈ' ],
[ 'ᄉs', 'ᄊ' ],
// [ '\'', 'ᄋ'], // Apostrophe can be written to represent silent ᄋ
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks unnecessary.

[ 's', 'ᄉ' ],
[ 'ᄌj', 'ᄍ' ],
[ 'j', 'ᄌ' ],
[ 'ch', 'ᄎ' ],
[ 'k', 'ᄏ' ],
[ 'K', 'ᄏ' ], // There is some ambiguity for final ᆿ and ᇀ, so they get capital K and T
[ 't', 'ᄐ' ],
[ 'T', 'ᄐ' ],
[ 'p', 'ᄑ' ],
[ 'h', 'ᄒ' ],

// Vowels
// Vowels without consontant initial must have ᄋ prepended
// [^ᄀ-ᄒ]|^ matches the start character or anything but an initial consonant
[ '([^ᄀ-ᄒ]|^)wa', '$1와' ],
[ '([^ᄀ-ᄒ]|^)wo', '$1워' ],
[ '([^ᄀ-ᄒ]|^)we', '$1웨' ],
[ '([^ᄀ-ᄒ]|^)wi', '$1위' ],
[ '([^ᄀ-ᄒ]|^)ya', '$1야' ],
[ '([^ᄀ-ᄒ]|^)ye', '$1예' ],
[ '([^ᄀ-ᄒ]|^)yo', '$1요' ],
[ '([^ᄀ-ᄒ]|^)yu', '$1유' ],
// 'y' diphthongs
[ 'ya', 'ᅣ' ],
[ 'ᅣe', 'ᅤ' ],
[ 'ᅨo', 'ᅧ' ],
[ 'ye', 'ᅨ' ],
[ 'yo', 'ᅭ' ],
[ 'yu', 'ᅲ' ],
// 'w' diphthongs
[ 'wa', 'ᅪ' ],
[ 'ᅪe', 'ᅫ' ],
[ 'wo', 'ᅯ' ],
[ 'we', 'ᅰ' ],
[ 'wi', 'ᅱ' ],
// Other diphthongs
[ 'ᅩe', 'ᅬ' ],
[ 'ᅦu', 'ᅳ' ],
[ 'ᅮi', 'ᅴ' ],
[ 'ᅦo', 'ᅥ' ],
[ 'ᅡe', 'ᅢ' ],
[ '([^ᄀ-ᄒ]|^)i', '$1이' ],
[ '([^ᄀ-ᄒ]|^)a', '$1아' ],
[ '([^ᄀ-ᄒ]|^)u', '$1우' ],
[ '([^ᄀ-ᄒ]|^)o', '$1오' ],
[ '([^ᄀ-ᄒ]|^)e', '$1에' ],
[ 'i', 'ᅵ' ],
[ 'a', 'ᅡ' ],
[ 'u', 'ᅮ' ],
[ 'o', 'ᅩ' ],
[ 'e', 'ᅦ' ],
];

var koreanRR = {
id: 'kor-rr',
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We usually give them an identified that is based on the shortest language code, so it should be "ko" and not "kor".

name: 'Korean Revised Romanization',
description: 'Transliteration using Korean revised romanization',
date: '2023-02-04',
URL: 'https://github.com/wikimedia/jquery.ime',
author: 'Anne Drew Hu',
license: 'GPLv3',
version: '1.0',
maxKeyLength: 4,
contextLength: 1,

// This function mirrors the normal behavior in jquery.ime.js,
// except it combines jamo when a new syllable starts
// This version does not support context rules, but we don't need them
patterns: function(input, context) {
var patterns, regex, rule, replacement, i, result;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too many empty lines. There should be one.



for ( i = 0; i < patternList.length; i++ ) {
rule = patternList[ i ];
regex = new RegExp( rule[ 0 ] + '$' );

// Last item in the rules.
// It can also be a function, because the replace
// method can have a function as the second argument.
replacement = rule.slice( -1 )[ 0 ];

// Input string match test
if ( regex.test( input ) ) {
result = input.replace(regex, replacement);

// This regex matches jamo that form a syllable so they can be combined
var jamoRegex = /([ᄀ-ᄒ])([ᅡ-ᅵ])([ᆨ-ᇂ])?([ᄀ-ᄒ]|[\- '])(.*)$/;
if (jamoRegex.test(result)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We require spaces inside parentheses.

return { noop: false, output: result.replace(jamoRegex, combineJamo) };
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, too, spaces inside parentheses.

} else {
return { noop: false, output: result };
}
}
}

// No matches, return the input
return { noop: true, output: input };
},
};

// Conjoining jamo behavior is defined by this Unicode standard
// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G24646
// parameter `final` is optional
function combineJamo(substring, initial, vowel, final, nextSyllableInitial, otherChars) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, too, spaces inside parentheses.

// Get the UTF code for each character
var initialNo = initial.charCodeAt(0);
var vowelNo = vowel.charCodeAt(0);
var finalDiff = 0;
if (final) {
var finalNo = final.charCodeAt(0);
// Need to add one to account for the no final option, where finalDiff is 0
finalDiff = finalNo - 'ᆨ'.charCodeAt(0) + 1;
}

var initialDiff = initialNo - 'ᄀ'.charCodeAt(0);
var vowelDiff = vowelNo - 'ᅡ'.charCodeAt(0);

// See Unicode standard: https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G24646
var syllableNo = 44032 + initialDiff * 588 + vowelDiff * 28 + finalDiff;

var syllable = String.fromCharCode(syllableNo);

const disambig = /[\- ']/;
if (nextSyllableInitial.match(disambig)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here, too, spaces inside parentheses.

return syllable;
} else if (otherChars.match(disambig)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And here.

return syllable + nextSyllableInitial;
}
return syllable + nextSyllableInitial + otherChars;
}
$.ime.register( koreanRR );
}( jQuery ) );
8 changes: 8 additions & 0 deletions src/jquery.ime.inputmethods.js
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,10 @@
name: 'ಲಿಪ್ಯಂತರಣ',
source: 'rules/kn/kn-transliteration.js'
},
'kor-rr': {
name: 'Korean Revised Romanization',
source: 'rules/kor/kor-rr.js'
},
'kr-tilde': {
name: 'Kanuri tilde',
source: 'rules/kr/kr-tilde.js'
Expand Down Expand Up @@ -1458,6 +1462,10 @@
autonym: 'ಕನ್ನಡ',
inputmethods: [ 'kn-transliteration', 'kn-inscript', 'kn-kgp', 'kn-inscript2' ]
},
kor: {
autonym: '한국어',
inputmethods: [ 'kor-rr' ]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We usually give them an identified that is based on the shortest language code, so it should be "ko" and not "kor".

},
kr: {
autonym: 'kanuri',
inputmethods: [ 'kr-tilde' ]
Expand Down
73 changes: 73 additions & 0 deletions test/jquery.ime.test.fixtures.js
Original file line number Diff line number Diff line change
Expand Up @@ -4184,6 +4184,79 @@ var palochkaVariants = {
{ input: 'd~ha', output: 'ದ್ಹ', description: 'd~ha for ದ್ಹ in Kannada transliteration' }
]
},
{
description: 'Korean RR test',
inputmethod: 'kor-rr',
tests: [
// Note that RR is meant to romanize from hangul to latin script, but not
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Capitalize "Hangul" and "Latin".

// the other way around, so there are some instances where the keystrokes
// required are different from RR
{ input: 'namsan ', output: '남산', description: 'Namsan -> 남산'},
{ input: 'dokdo ', output: '독도', description: 'Dokdo -> 독도'},
{ input: 'yeon-hwagyo ', output: '연화교', description: 'yeon-hwagyo -> 연화교'},
{ input: 'namhansanseong ', output: '남한산성', description: 'Namhansanseong -> 남한산성'},
{ input: 'hyeonchungsa ', output: '현충사', description: 'Hyeonchungsa -> 현충사'},
{ input: 'chok-seoklu ', output: '촉석루', description: 'Chokseongnu (chok-seoklu) -> 촉석루'},
{ input: 'geumgang ', output: '금강', description: 'Geumgang -> 금강'},
{ input: 'sokri-san ', output: '속리산', description: 'Songnisan (sokri-san) -> 속리산'},
{ input: 'mu-ryangsu-jeon ', output: '무량수전', description: 'mu-ryangsu-jeon -> 무량수전'},
{ input: 'gyeongbokgung ', output: '경복궁', description: 'Gyeongbokgung -> 경복궁'},
{ input: 'anabji ', output: '안압지', description: 'anabji -> 안압지'},
{ input: 'geukrakjeon ', output: '극락전', description: 'geukrakjeon -> 극락전'},
{ input: 'bulguk-sa ', output: '불국사', description: 'Bulguksa (bulguk-sa) -> 불국사'},
{ input: 'hwa-rangdae ', output: '화랑대', description: 'Hwarangdae (hwa-rangdae) -> 화랑대'},
{ input: 'o-jukheon ', output: '오죽헌', description: 'Ojukheon (o-jukheon) -> 오죽헌'},
{ input: 'dokribmun ', output: '독립문', description: 'Dongnimmun (dokribmun) -> 독립문'},
{ input: 'da-bo-tab ', output: '다보탑', description: 'Dabotap (da-bo-tab) -> 다보탑'},
{ input: 'jongmyo ', output: '종묘', description: 'Jongmyo -> 종묘'},
// Hyphens can be used to disambiguate
{ input: 'jung-ang ', output: '중앙', description: 'Jung-ang -> 중앙'},
{ input: 'jun-gang ', output: '준강', description: 'Jun-gang -> 준강'},
{ input: 'jungang ', output: '중앙', description: 'Jungang -> 중앙'},
{ input: 'se-un ', output: '세운', description: 'Se-un -> 세운'},
{ input: 'seun ', output: '슨', description: 'Seun -> 슨'},
{ input: 'ban-gudae ', output: '반구대', description: 'Ban-gudae -> 반구대'},
{ input: 'bang-udae ', output: '방우대', description: 'Bang-udae -> 방우대'},
{ input: 'bangudae ', output: '방우대', description: 'Bangudae -> 방우대'},
{ input: 'hae-undae ', output: '해운대', description: 'Hae-undae -> 해운대'},
{ input: 'ha-eundae ', output: '하은대', description: 'Ha-eundae -> 하은대'},
{ input: 'haeundae ', output: '해운대', description: 'Haeundae -> 해운대'},
// Hyphens can also be used even when disambiguation is not necessary
{ input: 'han boknam ', output: '한복남', description: 'Han Boknam -> 한복남'},
{ input: 'han bok-nam ', output: '한복남', description: 'Han Bok-nam -> 한복남'},
{ input: 'hong bichna ', output: '홍빛나', description: 'Hong Bitna (hong bichna) -> 홍빛나'},
{ input: 'hong bich-na ', output: '홍빛나', description: 'Hong Bit-na (hong bich-na) -> 홍빛나'},
// Tense (or glottalized) sounds are not transcribed in cases where morphemes are compounded
{ input: 'abgu-jeong ', output: '압구정', description: 'Apgujeong (abgu-jeong) -> 압구정'},
{ input: 'habjeong ', output: '합정', description: 'Hapjeong (habjeong) -> 합정'},
{ input: 'jukbyeon ', output: '죽변', description: 'Jukbyeon -> 죽변'},
{ input: 'nakdonggang ', output: '낙동강', description: 'Nakdonggang -> 낙동강'},
{ input: 'paldang ', output: '팔당', description: 'Paldang -> 팔당'},
{ input: 'nak-seongdae ', output: '낙성대', description: 'Nakseongdae (nak-seongdae) -> 낙성대'},
{ input: 'ul-san ', output: '울산', description: 'Ulsan (ul-san) -> 울산'},
// Hangul -> RR -> hangul may sometimes result in different hangul, like these
{ input: 'baengma ', output: '뱅마', description: 'Baengma -> 뱅마 (not 백마)'},
{ input: 'wangsimni ', output: '왕심니', description: 'Wangsimni -> 왕심니 (not 왕십리)'},
{ input: 'sinmunno ', output: '신문노', description: 'Sinmunno -> 신문노 (not 신문로)'},
{ input: 'byeollae ', output: '별래', description: 'Byeollae -> 별래 (not 별내)'},
{ input: 'jongno ', output: '종노', description: 'Jongno -> 종노 (not 종로)'},
{ input: 'silla ', output: '실라', description: 'Silla -> 실라 (not 신라)'},
// Syllable-final ㅅ should always be 's'
{ input: 'saesbyeol ', output: '샛별', description: 'saesbyeol (not saetbyol) -> 샛별'},
// The holiday Seollal would be typed Seolnal, even though both RR and MR transcribe it as Seollal
{ input: 'seolnal ', output: '설날', description: 'Seolnal -> 설날'},
{ input: 'seollal ', output: '설랄', description: 'Seollal -> 설랄'},
// Stressed final syllables should be allowed
{ input: 'tieuT ', output: '티읕', description: 'tieuT -> 티읕'},
{ input: 'TieuT ', output: '티읕', description: 'TieuT -> 티읕'},
{ input: 'kieuK ', output: '키읔', description: 'kieuK -> 키읔'},
{ input: 'KieuK ', output: '키읔', description: 'KieuK -> 키읔'},
{ input: 'tteokbokki ', output: '떡볶이', descsription: 'tteokbokki -> 떡볶이'},
{ input: 'ddeokbokki ', output: '떡볶이', descsription: 'ddeokbokki -> 떡볶이'},
{ input: 'go-chu-jangjjigae ', output: '고추장찌개', descsription: 'go-chu-jangjjigae -> 고추장찌개'},
{ input: 'sundu-bu jjigae ', output: '순두부찌개', descsription: 'sundu-bu jjigae -> 순두부찌개'},
]
},
{
description: 'Kanuri tilde test',
inputmethod: 'kr-tilde',
Expand Down
Loading