-
Notifications
You must be signed in to change notification settings - Fork 169
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add Korean Revised Romanization to hangeul IME #716
Changes from all commits
3af46c7
cf65587
d5b3741
8454adc
0df0fd5
e4f9ca4
7f08d37
cb32895
2b2c06a
26114fc
cf5cf10
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
( function ( $ ) { | ||
'use strict'; | ||
|
||
var patternList = [ | ||
// Syllable finals | ||
[ '([ᅡ-ᅵ])k', '$1ᆨ' ], | ||
[ 'ᆨk', 'ᆩ' ], | ||
[ 'ᆨs', 'ᆪ' ], | ||
[ '([ᅡ-ᅵ])n', '$1ᆫ' ], | ||
[ 'ᆫj', 'ᆬ' ], | ||
[ 'ᆫh', 'ᆭ' ], | ||
[ '([ᅡ-ᅵ])t', '$1ᆮ' ], | ||
[ '([ᅡ-ᅵ])l', '$1ᆯ' ], | ||
[ '([ᅡ-ᅵ])r', '$1ᆯ' ], | ||
[ 'ᆯk', 'ᆰ' ], | ||
[ 'ᆯm', 'ᆱ' ], | ||
[ 'ᆯb', 'ᆲ' ], | ||
[ 'ᆯs', 'ᆳ' ], | ||
[ 'ᆯt', 'ᆴ' ], | ||
[ 'ᆯp', 'ᆵ' ], | ||
[ 'ᆯh', 'ᆶ' ], | ||
[ '([ᅡ-ᅵ])m', '$1ᆷ' ], | ||
[ '([ᅡ-ᅵ])b', '$1ᆸ' ], | ||
[ 'ᆸs', 'ᆹ' ], | ||
[ '([ᅡ-ᅵ])s', '$1ᆺ' ], | ||
[ 'ᆺs', 'ᆻ' ], | ||
[ 'ᆫg', 'ᆼ' ], | ||
[ '([ᅡ-ᅵ])j', '$1ᆽ' ], | ||
[ '([ᅡ-ᅵ])ch', '$1ᆾ' ], | ||
[ '([ᅡ-ᅵ])K', '$1ᆿ' ], | ||
[ '([ᅡ-ᅵ])T', '$1ᇀ' ], | ||
[ '([ᅡ-ᅵ])p', '$1ᇁ' ], | ||
[ '([ᅡ-ᅵ])h', '$1ᇂ' ], | ||
|
||
// Use space, hyphen, and apostrophe to disambiguate | ||
// Do nothing, combineJamo will do the work | ||
[ '([\- \'])', '$1'], | ||
|
||
// Syllable initials | ||
[ 'g', 'ᄀ' ], | ||
[ 'ᄀk', 'ᄁ' ], | ||
[ 'n', 'ᄂ' ], | ||
[ 'ᄃt', 'ᄄ' ], | ||
[ 'ᄐt', 'ᄄ' ], | ||
[ 'ᄃd', 'ᄄ' ], | ||
[ 'ᄐd', 'ᄄ' ], | ||
[ 'd', 'ᄃ' ], | ||
[ 'r', 'ᄅ' ], | ||
[ 'l', 'ᄅ' ], | ||
[ 'm', 'ᄆ' ], | ||
[ 'b', 'ᄇ' ], | ||
[ 'ᄇp', 'ᄈ' ], | ||
[ 'ᄉs', 'ᄊ' ], | ||
// [ '\'', 'ᄋ'], // Apostrophe can be written to represent silent ᄋ | ||
[ 's', 'ᄉ' ], | ||
[ 'ᄌj', 'ᄍ' ], | ||
[ 'j', 'ᄌ' ], | ||
[ 'ch', 'ᄎ' ], | ||
[ 'k', 'ᄏ' ], | ||
[ 'K', 'ᄏ' ], // There is some ambiguity for final ᆿ and ᇀ, so they get capital K and T | ||
[ 't', 'ᄐ' ], | ||
[ 'T', 'ᄐ' ], | ||
[ 'p', 'ᄑ' ], | ||
[ 'h', 'ᄒ' ], | ||
|
||
// Vowels | ||
// Vowels without consontant initial must have ᄋ prepended | ||
// [^ᄀ-ᄒ]|^ matches the start character or anything but an initial consonant | ||
[ '([^ᄀ-ᄒ]|^)wa', '$1와' ], | ||
[ '([^ᄀ-ᄒ]|^)wo', '$1워' ], | ||
[ '([^ᄀ-ᄒ]|^)we', '$1웨' ], | ||
[ '([^ᄀ-ᄒ]|^)wi', '$1위' ], | ||
[ '([^ᄀ-ᄒ]|^)ya', '$1야' ], | ||
[ '([^ᄀ-ᄒ]|^)ye', '$1예' ], | ||
[ '([^ᄀ-ᄒ]|^)yo', '$1요' ], | ||
[ '([^ᄀ-ᄒ]|^)yu', '$1유' ], | ||
// 'y' diphthongs | ||
[ 'ya', 'ᅣ' ], | ||
[ 'ᅣe', 'ᅤ' ], | ||
[ 'ᅨo', 'ᅧ' ], | ||
[ 'ye', 'ᅨ' ], | ||
[ 'yo', 'ᅭ' ], | ||
[ 'yu', 'ᅲ' ], | ||
// 'w' diphthongs | ||
[ 'wa', 'ᅪ' ], | ||
[ 'ᅪe', 'ᅫ' ], | ||
[ 'wo', 'ᅯ' ], | ||
[ 'we', 'ᅰ' ], | ||
[ 'wi', 'ᅱ' ], | ||
// Other diphthongs | ||
[ 'ᅩe', 'ᅬ' ], | ||
[ 'ᅦu', 'ᅳ' ], | ||
[ 'ᅮi', 'ᅴ' ], | ||
[ 'ᅦo', 'ᅥ' ], | ||
[ 'ᅡe', 'ᅢ' ], | ||
[ '([^ᄀ-ᄒ]|^)i', '$1이' ], | ||
[ '([^ᄀ-ᄒ]|^)a', '$1아' ], | ||
[ '([^ᄀ-ᄒ]|^)u', '$1우' ], | ||
[ '([^ᄀ-ᄒ]|^)o', '$1오' ], | ||
[ '([^ᄀ-ᄒ]|^)e', '$1에' ], | ||
[ 'i', 'ᅵ' ], | ||
[ 'a', 'ᅡ' ], | ||
[ 'u', 'ᅮ' ], | ||
[ 'o', 'ᅩ' ], | ||
[ 'e', 'ᅦ' ], | ||
]; | ||
|
||
var koreanRR = { | ||
id: 'kor-rr', | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We usually give them an identified that is based on the shortest language code, so it should be "ko" and not "kor". |
||
name: 'Korean Revised Romanization', | ||
description: 'Transliteration using Korean revised romanization', | ||
date: '2023-02-04', | ||
URL: 'https://github.com/wikimedia/jquery.ime', | ||
author: 'Anne Drew Hu', | ||
license: 'GPLv3', | ||
version: '1.0', | ||
maxKeyLength: 4, | ||
contextLength: 1, | ||
|
||
// This function mirrors the normal behavior in jquery.ime.js, | ||
// except it combines jamo when a new syllable starts | ||
// This version does not support context rules, but we don't need them | ||
patterns: function(input, context) { | ||
var patterns, regex, rule, replacement, i, result; | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Too many empty lines. There should be one. |
||
|
||
|
||
for ( i = 0; i < patternList.length; i++ ) { | ||
rule = patternList[ i ]; | ||
regex = new RegExp( rule[ 0 ] + '$' ); | ||
|
||
// Last item in the rules. | ||
// It can also be a function, because the replace | ||
// method can have a function as the second argument. | ||
replacement = rule.slice( -1 )[ 0 ]; | ||
|
||
// Input string match test | ||
if ( regex.test( input ) ) { | ||
result = input.replace(regex, replacement); | ||
|
||
// This regex matches jamo that form a syllable so they can be combined | ||
var jamoRegex = /([ᄀ-ᄒ])([ᅡ-ᅵ])([ᆨ-ᇂ])?([ᄀ-ᄒ]|[\- '])(.*)$/; | ||
if (jamoRegex.test(result)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We require spaces inside parentheses. |
||
return { noop: false, output: result.replace(jamoRegex, combineJamo) }; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, too, spaces inside parentheses. |
||
} else { | ||
return { noop: false, output: result }; | ||
} | ||
} | ||
} | ||
|
||
// No matches, return the input | ||
return { noop: true, output: input }; | ||
}, | ||
}; | ||
|
||
// Conjoining jamo behavior is defined by this Unicode standard | ||
// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G24646 | ||
// parameter `final` is optional | ||
function combineJamo(substring, initial, vowel, final, nextSyllableInitial, otherChars) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, too, spaces inside parentheses. |
||
// Get the UTF code for each character | ||
var initialNo = initial.charCodeAt(0); | ||
var vowelNo = vowel.charCodeAt(0); | ||
var finalDiff = 0; | ||
if (final) { | ||
var finalNo = final.charCodeAt(0); | ||
// Need to add one to account for the no final option, where finalDiff is 0 | ||
finalDiff = finalNo - 'ᆨ'.charCodeAt(0) + 1; | ||
} | ||
|
||
var initialDiff = initialNo - 'ᄀ'.charCodeAt(0); | ||
var vowelDiff = vowelNo - 'ᅡ'.charCodeAt(0); | ||
|
||
// See Unicode standard: https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G24646 | ||
var syllableNo = 44032 + initialDiff * 588 + vowelDiff * 28 + finalDiff; | ||
|
||
var syllable = String.fromCharCode(syllableNo); | ||
|
||
const disambig = /[\- ']/; | ||
if (nextSyllableInitial.match(disambig)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, too, spaces inside parentheses. |
||
return syllable; | ||
} else if (otherChars.match(disambig)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And here. |
||
return syllable + nextSyllableInitial; | ||
} | ||
return syllable + nextSyllableInitial + otherChars; | ||
} | ||
$.ime.register( koreanRR ); | ||
}( jQuery ) ); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -548,6 +548,10 @@ | |
name: 'ಲಿಪ್ಯಂತರಣ', | ||
source: 'rules/kn/kn-transliteration.js' | ||
}, | ||
'kor-rr': { | ||
name: 'Korean Revised Romanization', | ||
source: 'rules/kor/kor-rr.js' | ||
}, | ||
'kr-tilde': { | ||
name: 'Kanuri tilde', | ||
source: 'rules/kr/kr-tilde.js' | ||
|
@@ -1458,6 +1462,10 @@ | |
autonym: 'ಕನ್ನಡ', | ||
inputmethods: [ 'kn-transliteration', 'kn-inscript', 'kn-kgp', 'kn-inscript2' ] | ||
}, | ||
kor: { | ||
autonym: '한국어', | ||
inputmethods: [ 'kor-rr' ] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We usually give them an identified that is based on the shortest language code, so it should be "ko" and not "kor". |
||
}, | ||
kr: { | ||
autonym: 'kanuri', | ||
inputmethods: [ 'kr-tilde' ] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4184,6 +4184,79 @@ var palochkaVariants = { | |
{ input: 'd~ha', output: 'ದ್ಹ', description: 'd~ha for ದ್ಹ in Kannada transliteration' } | ||
] | ||
}, | ||
{ | ||
description: 'Korean RR test', | ||
inputmethod: 'kor-rr', | ||
tests: [ | ||
// Note that RR is meant to romanize from hangul to latin script, but not | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Capitalize "Hangul" and "Latin". |
||
// the other way around, so there are some instances where the keystrokes | ||
// required are different from RR | ||
{ input: 'namsan ', output: '남산', description: 'Namsan -> 남산'}, | ||
{ input: 'dokdo ', output: '독도', description: 'Dokdo -> 독도'}, | ||
{ input: 'yeon-hwagyo ', output: '연화교', description: 'yeon-hwagyo -> 연화교'}, | ||
{ input: 'namhansanseong ', output: '남한산성', description: 'Namhansanseong -> 남한산성'}, | ||
{ input: 'hyeonchungsa ', output: '현충사', description: 'Hyeonchungsa -> 현충사'}, | ||
{ input: 'chok-seoklu ', output: '촉석루', description: 'Chokseongnu (chok-seoklu) -> 촉석루'}, | ||
{ input: 'geumgang ', output: '금강', description: 'Geumgang -> 금강'}, | ||
{ input: 'sokri-san ', output: '속리산', description: 'Songnisan (sokri-san) -> 속리산'}, | ||
{ input: 'mu-ryangsu-jeon ', output: '무량수전', description: 'mu-ryangsu-jeon -> 무량수전'}, | ||
{ input: 'gyeongbokgung ', output: '경복궁', description: 'Gyeongbokgung -> 경복궁'}, | ||
{ input: 'anabji ', output: '안압지', description: 'anabji -> 안압지'}, | ||
{ input: 'geukrakjeon ', output: '극락전', description: 'geukrakjeon -> 극락전'}, | ||
{ input: 'bulguk-sa ', output: '불국사', description: 'Bulguksa (bulguk-sa) -> 불국사'}, | ||
{ input: 'hwa-rangdae ', output: '화랑대', description: 'Hwarangdae (hwa-rangdae) -> 화랑대'}, | ||
{ input: 'o-jukheon ', output: '오죽헌', description: 'Ojukheon (o-jukheon) -> 오죽헌'}, | ||
{ input: 'dokribmun ', output: '독립문', description: 'Dongnimmun (dokribmun) -> 독립문'}, | ||
{ input: 'da-bo-tab ', output: '다보탑', description: 'Dabotap (da-bo-tab) -> 다보탑'}, | ||
{ input: 'jongmyo ', output: '종묘', description: 'Jongmyo -> 종묘'}, | ||
// Hyphens can be used to disambiguate | ||
{ input: 'jung-ang ', output: '중앙', description: 'Jung-ang -> 중앙'}, | ||
{ input: 'jun-gang ', output: '준강', description: 'Jun-gang -> 준강'}, | ||
{ input: 'jungang ', output: '중앙', description: 'Jungang -> 중앙'}, | ||
{ input: 'se-un ', output: '세운', description: 'Se-un -> 세운'}, | ||
{ input: 'seun ', output: '슨', description: 'Seun -> 슨'}, | ||
{ input: 'ban-gudae ', output: '반구대', description: 'Ban-gudae -> 반구대'}, | ||
{ input: 'bang-udae ', output: '방우대', description: 'Bang-udae -> 방우대'}, | ||
{ input: 'bangudae ', output: '방우대', description: 'Bangudae -> 방우대'}, | ||
{ input: 'hae-undae ', output: '해운대', description: 'Hae-undae -> 해운대'}, | ||
{ input: 'ha-eundae ', output: '하은대', description: 'Ha-eundae -> 하은대'}, | ||
{ input: 'haeundae ', output: '해운대', description: 'Haeundae -> 해운대'}, | ||
// Hyphens can also be used even when disambiguation is not necessary | ||
{ input: 'han boknam ', output: '한복남', description: 'Han Boknam -> 한복남'}, | ||
{ input: 'han bok-nam ', output: '한복남', description: 'Han Bok-nam -> 한복남'}, | ||
{ input: 'hong bichna ', output: '홍빛나', description: 'Hong Bitna (hong bichna) -> 홍빛나'}, | ||
{ input: 'hong bich-na ', output: '홍빛나', description: 'Hong Bit-na (hong bich-na) -> 홍빛나'}, | ||
// Tense (or glottalized) sounds are not transcribed in cases where morphemes are compounded | ||
{ input: 'abgu-jeong ', output: '압구정', description: 'Apgujeong (abgu-jeong) -> 압구정'}, | ||
{ input: 'habjeong ', output: '합정', description: 'Hapjeong (habjeong) -> 합정'}, | ||
{ input: 'jukbyeon ', output: '죽변', description: 'Jukbyeon -> 죽변'}, | ||
{ input: 'nakdonggang ', output: '낙동강', description: 'Nakdonggang -> 낙동강'}, | ||
{ input: 'paldang ', output: '팔당', description: 'Paldang -> 팔당'}, | ||
{ input: 'nak-seongdae ', output: '낙성대', description: 'Nakseongdae (nak-seongdae) -> 낙성대'}, | ||
{ input: 'ul-san ', output: '울산', description: 'Ulsan (ul-san) -> 울산'}, | ||
// Hangul -> RR -> hangul may sometimes result in different hangul, like these | ||
{ input: 'baengma ', output: '뱅마', description: 'Baengma -> 뱅마 (not 백마)'}, | ||
{ input: 'wangsimni ', output: '왕심니', description: 'Wangsimni -> 왕심니 (not 왕십리)'}, | ||
{ input: 'sinmunno ', output: '신문노', description: 'Sinmunno -> 신문노 (not 신문로)'}, | ||
{ input: 'byeollae ', output: '별래', description: 'Byeollae -> 별래 (not 별내)'}, | ||
{ input: 'jongno ', output: '종노', description: 'Jongno -> 종노 (not 종로)'}, | ||
{ input: 'silla ', output: '실라', description: 'Silla -> 실라 (not 신라)'}, | ||
// Syllable-final ㅅ should always be 's' | ||
{ input: 'saesbyeol ', output: '샛별', description: 'saesbyeol (not saetbyol) -> 샛별'}, | ||
// The holiday Seollal would be typed Seolnal, even though both RR and MR transcribe it as Seollal | ||
{ input: 'seolnal ', output: '설날', description: 'Seolnal -> 설날'}, | ||
{ input: 'seollal ', output: '설랄', description: 'Seollal -> 설랄'}, | ||
// Stressed final syllables should be allowed | ||
{ input: 'tieuT ', output: '티읕', description: 'tieuT -> 티읕'}, | ||
{ input: 'TieuT ', output: '티읕', description: 'TieuT -> 티읕'}, | ||
{ input: 'kieuK ', output: '키읔', description: 'kieuK -> 키읔'}, | ||
{ input: 'KieuK ', output: '키읔', description: 'KieuK -> 키읔'}, | ||
{ input: 'tteokbokki ', output: '떡볶이', descsription: 'tteokbokki -> 떡볶이'}, | ||
{ input: 'ddeokbokki ', output: '떡볶이', descsription: 'ddeokbokki -> 떡볶이'}, | ||
{ input: 'go-chu-jangjjigae ', output: '고추장찌개', descsription: 'go-chu-jangjjigae -> 고추장찌개'}, | ||
{ input: 'sundu-bu jjigae ', output: '순두부찌개', descsription: 'sundu-bu jjigae -> 순두부찌개'}, | ||
] | ||
}, | ||
{ | ||
description: 'Kanuri tilde test', | ||
inputmethod: 'kr-tilde', | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks unnecessary.