wikimedia · srish · Oct 3, 2024 · Feb 4, 2023 · Feb 6, 2023 · Feb 6, 2023
diff --git a/rules/kor/kor-rr.js b/rules/kor/kor-rr.js
@@ -0,0 +1,187 @@
+( function ( $ ) {
+	'use strict';
+
+	var patternList = [
+		// Syllable finals
+		[ '([ᅡ-ᅵ])k', '$1ᆨ' ],
+		[ 'ᆨk', 'ᆩ' ],
+		[ 'ᆨs', 'ᆪ' ],
+		[ '([ᅡ-ᅵ])n', '$1ᆫ' ],
+		[ 'ᆫj', 'ᆬ' ],
+		[ 'ᆫh', 'ᆭ' ],
+		[ '([ᅡ-ᅵ])t', '$1ᆮ' ],
+		[ '([ᅡ-ᅵ])l', '$1ᆯ' ],
+		[ '([ᅡ-ᅵ])r', '$1ᆯ' ],
+		[ 'ᆯk', 'ᆰ' ],
+		[ 'ᆯm', 'ᆱ' ],
+		[ 'ᆯb', 'ᆲ' ],
+		[ 'ᆯs', 'ᆳ' ],
+		[ 'ᆯt', 'ᆴ' ],
+		[ 'ᆯp', 'ᆵ' ],
+		[ 'ᆯh', 'ᆶ' ],
+		[ '([ᅡ-ᅵ])m', '$1ᆷ' ],
+		[ '([ᅡ-ᅵ])b', '$1ᆸ' ],
+		[ 'ᆸs', 'ᆹ' ],
+		[ '([ᅡ-ᅵ])s', '$1ᆺ' ],
+		[ 'ᆺs', 'ᆻ' ],
+		[ 'ᆫg', 'ᆼ' ],
+		[ '([ᅡ-ᅵ])j', '$1ᆽ' ],
+		[ '([ᅡ-ᅵ])ch', '$1ᆾ' ],
+		[ '([ᅡ-ᅵ])K', '$1ᆿ' ],
+		[ '([ᅡ-ᅵ])T', '$1ᇀ' ],
+		[ '([ᅡ-ᅵ])p', '$1ᇁ' ],
+		[ '([ᅡ-ᅵ])h', '$1ᇂ' ],
+
+		// Use space, hyphen, and apostrophe to disambiguate
+		// Do nothing, combineJamo will do the work 
+		[ '([\- \'])', '$1'],
+
+		// Syllable initials
+		[ 'g', 'ᄀ' ],
+		[ 'ᄀk', 'ᄁ' ],
+		[ 'n', 'ᄂ' ],
+		[ 'ᄃt', 'ᄄ' ],
+		[ 'ᄐt', 'ᄄ' ],
+		[ 'ᄃd', 'ᄄ' ],
+		[ 'ᄐd', 'ᄄ' ],
+		[ 'd', 'ᄃ' ],
+		[ 'r', 'ᄅ' ],
+		[ 'l', 'ᄅ' ],
+		[ 'm', 'ᄆ' ],
+		[ 'b', 'ᄇ' ],
+		[ 'ᄇp', 'ᄈ' ],
+		[ 'ᄉs', 'ᄊ' ],
+		// [ '\'', 'ᄋ'],  // Apostrophe can be written to represent silent ᄋ
+		[ 's', 'ᄉ' ],
+		[ 'ᄌj', 'ᄍ' ],
+		[ 'j', 'ᄌ' ],
+		[ 'ch', 'ᄎ' ],
+		[ 'k', 'ᄏ' ],
+		[ 'K', 'ᄏ' ],  // There is some ambiguity for final ᆿ and ᇀ, so they get capital K and T
+		[ 't', 'ᄐ' ],
+		[ 'T', 'ᄐ' ],
+		[ 'p', 'ᄑ' ],
+		[ 'h', 'ᄒ' ],
+
+		// Vowels
+		// Vowels without consontant initial must have ᄋ prepended
+		// [^ᄀ-ᄒ]|^ matches the start character or anything but an initial consonant
+		[ '([^ᄀ-ᄒ]|^)wa', '$1와' ],
+		[ '([^ᄀ-ᄒ]|^)wo', '$1워' ],
+		[ '([^ᄀ-ᄒ]|^)we', '$1웨' ],
+		[ '([^ᄀ-ᄒ]|^)wi', '$1위' ],
+		[ '([^ᄀ-ᄒ]|^)ya', '$1야' ],
+		[ '([^ᄀ-ᄒ]|^)ye', '$1예' ],
+		[ '([^ᄀ-ᄒ]|^)yo', '$1요' ],
+		[ '([^ᄀ-ᄒ]|^)yu', '$1유' ],
+		// 'y' diphthongs
+		[ 'ya', 'ᅣ' ],
+		[ 'ᅣe', 'ᅤ' ],
+		[ 'ᅨo', 'ᅧ' ],
+		[ 'ye', 'ᅨ' ],
+		[ 'yo', 'ᅭ' ],
+		[ 'yu', 'ᅲ' ],
+		// 'w' diphthongs
+		[ 'wa', 'ᅪ' ],
+		[ 'ᅪe', 'ᅫ' ],
+		[ 'wo', 'ᅯ' ],
+		[ 'we', 'ᅰ' ],
+		[ 'wi', 'ᅱ' ],
+		// Other diphthongs
+		[ 'ᅩe', 'ᅬ' ],
+		[ 'ᅦu', 'ᅳ' ],
+		[ 'ᅮi', 'ᅴ' ],
+		[ 'ᅦo', 'ᅥ' ],
+		[ 'ᅡe', 'ᅢ' ],
+		[ '([^ᄀ-ᄒ]|^)i', '$1이' ],
+		[ '([^ᄀ-ᄒ]|^)a', '$1아' ],
+		[ '([^ᄀ-ᄒ]|^)u', '$1우' ],
+		[ '([^ᄀ-ᄒ]|^)o', '$1오' ],
+		[ '([^ᄀ-ᄒ]|^)e', '$1에' ],
+		[ 'i', 'ᅵ' ],
+		[ 'a', 'ᅡ' ],
+		[ 'u', 'ᅮ' ],
+		[ 'o', 'ᅩ' ],
+		[ 'e', 'ᅦ' ],
+	];
+
+	var koreanRR = {
+		id: 'kor-rr',
+		name: 'Korean Revised Romanization',
+		description: 'Transliteration using Korean revised romanization',
+		date: '2023-02-04',
+		URL: 'https://github.com/wikimedia/jquery.ime',
+		author: 'Anne Drew Hu',
+		license: 'GPLv3',
+		version: '1.0',
+		maxKeyLength: 4,
+		contextLength: 1,
+
+		// This function mirrors the normal behavior in jquery.ime.js, 
+		// except it combines jamo when a new syllable starts
+		// This version does not support context rules, but we don't need them
+		patterns: function(input, context) {
+			var patterns, regex, rule, replacement, i, result;
+
+
+
+			for ( i = 0; i < patternList.length; i++ ) {
+				rule = patternList[ i ];
+				regex = new RegExp( rule[ 0 ] + '$' );
+
+				// Last item in the rules.
+				// It can also be a function, because the replace
+				// method can have a function as the second argument.
+				replacement = rule.slice( -1 )[ 0 ];
+
+				// Input string match test
+				if ( regex.test( input ) ) {
+					result = input.replace(regex, replacement);
+
+					// This regex matches jamo that form a syllable so they can be combined
+					var jamoRegex = /([ᄀ-ᄒ])([ᅡ-ᅵ])([ᆨ-ᇂ])?([ᄀ-ᄒ]|[\- '])(.*)$/;
+					if (jamoRegex.test(result)) {
+						return { noop: false, output: result.replace(jamoRegex, combineJamo) };
+					} else {
+						return { noop: false, output: result };
+					}
+				}
+			}
+
+			// No matches, return the input
+			return { noop: true, output: input };
+		},
+	};
+
+	// Conjoining jamo behavior is defined by this Unicode standard
+	// https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G24646
+	// parameter `final` is optional
+	function combineJamo(substring, initial, vowel, final, nextSyllableInitial, otherChars) {
+		// Get the UTF code for each character
+		var initialNo = initial.charCodeAt(0);
+		var vowelNo = vowel.charCodeAt(0);
+		var finalDiff = 0;
+		if (final) {
+			var finalNo = final.charCodeAt(0);
+			// Need to add one to account for the no final option, where finalDiff is 0
+			finalDiff = finalNo - 'ᆨ'.charCodeAt(0) + 1;
+		}
+
+		var initialDiff = initialNo - 'ᄀ'.charCodeAt(0);
+		var vowelDiff = vowelNo - 'ᅡ'.charCodeAt(0);
+
+		// See Unicode standard: https://www.unicode.org/versions/Unicode13.0.0/ch03.pdf#G24646
+		var syllableNo = 44032 + initialDiff * 588 + vowelDiff * 28 + finalDiff;
+
+		var syllable = String.fromCharCode(syllableNo);
+
+		const disambig = /[\- ']/;
+		if (nextSyllableInitial.match(disambig)) {
+			return syllable;
+		} else if (otherChars.match(disambig)) {
+			return syllable + nextSyllableInitial;
+		}
+		return syllable + nextSyllableInitial + otherChars;
+	}
+	$.ime.register( koreanRR );
+}( jQuery ) );
diff --git a/src/jquery.ime.inputmethods.js b/src/jquery.ime.inputmethods.js
@@ -548,6 +548,10 @@
 			name: 'ಲಿಪ್ಯಂತರಣ',
 			source: 'rules/kn/kn-transliteration.js'
 		},
+		'kor-rr': {
+			name: 'Korean Revised Romanization',
+			source: 'rules/kor/kor-rr.js'
+		},
 		'kr-tilde': {
 			name: 'Kanuri tilde',
 			source: 'rules/kr/kr-tilde.js'
@@ -1458,6 +1462,10 @@
 			autonym: 'ಕನ್ನಡ',
 			inputmethods: [ 'kn-transliteration', 'kn-inscript', 'kn-kgp', 'kn-inscript2' ]
 		},
+		kor: {
+			autonym: '한국어',
+			inputmethods: [ 'kor-rr' ]
+		},
 		kr: {
 			autonym: 'kanuri',
 			inputmethods: [ 'kr-tilde' ]

diff --git a/test/jquery.ime.test.fixtures.js b/test/jquery.ime.test.fixtures.js
@@ -4184,6 +4184,79 @@ var palochkaVariants = {
 			{ input: 'd~ha', output: 'ದ್ಹ', description: 'd~ha for ದ್ಹ in Kannada transliteration' }
 		]
 	},
+	{
+		description: 'Korean RR test',
+		inputmethod: 'kor-rr',
+		tests: [
+			// Note that RR is meant to romanize from hangul to latin script, but not
+			// the other way around, so there are some instances where the keystrokes
+			// required are different from RR
+			{ input: 'namsan ', output: '남산', description: 'Namsan -> 남산'},
+			{ input: 'dokdo ', output: '독도', description: 'Dokdo -> 독도'},
+			{ input: 'yeon-hwagyo ', output: '연화교', description: 'yeon-hwagyo -> 연화교'},
+			{ input: 'namhansanseong ', output: '남한산성', description: 'Namhansanseong -> 남한산성'},
+			{ input: 'hyeonchungsa ', output: '현충사', description: 'Hyeonchungsa -> 현충사'},
+			{ input: 'chok-seoklu ', output: '촉석루', description: 'Chokseongnu (chok-seoklu) -> 촉석루'},
+			{ input: 'geumgang ', output: '금강', description: 'Geumgang -> 금강'},
+			{ input: 'sokri-san ', output: '속리산', description: 'Songnisan (sokri-san) -> 속리산'},
+			{ input: 'mu-ryangsu-jeon ', output: '무량수전', description: 'mu-ryangsu-jeon -> 무량수전'},
+			{ input: 'gyeongbokgung ', output: '경복궁', description: 'Gyeongbokgung -> 경복궁'},
+			{ input: 'anabji ', output: '안압지', description: 'anabji -> 안압지'},
+			{ input: 'geukrakjeon ', output: '극락전', description: 'geukrakjeon -> 극락전'},
+			{ input: 'bulguk-sa ', output: '불국사', description: 'Bulguksa (bulguk-sa) -> 불국사'},
+			{ input: 'hwa-rangdae ', output: '화랑대', description: 'Hwarangdae (hwa-rangdae) -> 화랑대'},
+			{ input: 'o-jukheon ', output: '오죽헌', description: 'Ojukheon (o-jukheon) -> 오죽헌'},
+			{ input: 'dokribmun ', output: '독립문', description: 'Dongnimmun (dokribmun) -> 독립문'},
+			{ input: 'da-bo-tab ', output: '다보탑', description: 'Dabotap (da-bo-tab) -> 다보탑'},
+			{ input: 'jongmyo ', output: '종묘', description: 'Jongmyo -> 종묘'},
+			// Hyphens can be used to disambiguate
+			{ input: 'jung-ang ', output: '중앙', description: 'Jung-ang -> 중앙'},
+			{ input: 'jun-gang ', output: '준강', description: 'Jun-gang -> 준강'},
+			{ input: 'jungang ', output: '중앙', description: 'Jungang -> 중앙'},
+			{ input: 'se-un ', output: '세운', description: 'Se-un -> 세운'},
+			{ input: 'seun ',  output: '슨', description: 'Seun -> 슨'},
+			{ input: 'ban-gudae ', output: '반구대', description: 'Ban-gudae -> 반구대'},
+			{ input: 'bang-udae ', output: '방우대', description: 'Bang-udae -> 방우대'},
+			{ input: 'bangudae ', output: '방우대', description: 'Bangudae -> 방우대'},
+			{ input: 'hae-undae ', output: '해운대', description: 'Hae-undae -> 해운대'},
+			{ input: 'ha-eundae ', output: '하은대', description: 'Ha-eundae -> 하은대'},
+			{ input: 'haeundae ', output: '해운대', description: 'Haeundae -> 해운대'},
+			// Hyphens can also be used even when disambiguation is not necessary
+			{ input: 'han boknam ', output: '한복남', description: 'Han Boknam -> 한복남'},
+			{ input: 'han bok-nam ', output: '한복남', description: 'Han Bok-nam -> 한복남'},
+			{ input: 'hong bichna ', output: '홍빛나', description: 'Hong Bitna (hong bichna) -> 홍빛나'},
+			{ input: 'hong bich-na ', output: '홍빛나', description: 'Hong Bit-na (hong bich-na) -> 홍빛나'},
+			// Tense (or glottalized) sounds are not transcribed in cases where morphemes are compounded
+			{ input: 'abgu-jeong ', output: '압구정', description: 'Apgujeong (abgu-jeong) -> 압구정'},
+			{ input: 'habjeong ', output: '합정', description: 'Hapjeong (habjeong) -> 합정'},
+			{ input: 'jukbyeon ', output: '죽변', description: 'Jukbyeon -> 죽변'},
+			{ input: 'nakdonggang ', output: '낙동강', description: 'Nakdonggang -> 낙동강'},
+			{ input: 'paldang ', output: '팔당', description: 'Paldang -> 팔당'},
+			{ input: 'nak-seongdae ', output: '낙성대', description: 'Nakseongdae (nak-seongdae) -> 낙성대'},
+			{ input: 'ul-san ', output: '울산', description: 'Ulsan (ul-san) -> 울산'},
+			// Hangul -> RR -> hangul may sometimes result in different hangul, like these
+			{ input: 'baengma ', output: '뱅마', description: 'Baengma -> 뱅마 (not 백마)'},
+			{ input: 'wangsimni ', output: '왕심니', description: 'Wangsimni -> 왕심니 (not 왕십리)'},
+			{ input: 'sinmunno ', output: '신문노', description: 'Sinmunno -> 신문노 (not 신문로)'},
+			{ input: 'byeollae ', output: '별래', description: 'Byeollae -> 별래 (not 별내)'},
+			{ input: 'jongno ', output: '종노', description: 'Jongno -> 종노 (not 종로)'},
+			{ input: 'silla ', output: '실라', description: 'Silla -> 실라 (not 신라)'},
+			// Syllable-final ㅅ should always be 's'
+			{ input: 'saesbyeol ', output: '샛별', description: 'saesbyeol (not saetbyol) -> 샛별'},
+			// The holiday Seollal would be typed Seolnal, even though both RR and MR transcribe it as Seollal
+			{ input: 'seolnal ', output: '설날', description: 'Seolnal -> 설날'},
+			{ input: 'seollal ', output: '설랄', description: 'Seollal -> 설랄'},
+			// Stressed final syllables should be allowed
+			{ input: 'tieuT ', output: '티읕', description: 'tieuT -> 티읕'},
+			{ input: 'TieuT ', output: '티읕', description: 'TieuT -> 티읕'},
+			{ input: 'kieuK ', output: '키읔', description: 'kieuK -> 키읔'},
+			{ input: 'KieuK ', output: '키읔', description: 'KieuK -> 키읔'},
+			{ input: 'tteokbokki ', output: '떡볶이', descsription: 'tteokbokki -> 떡볶이'},
+			{ input: 'ddeokbokki ', output: '떡볶이', descsription: 'ddeokbokki -> 떡볶이'},
+			{ input: 'go-chu-jangjjigae ', output: '고추장찌개', descsription: 'go-chu-jangjjigae -> 고추장찌개'},
+			{ input: 'sundu-bu jjigae ', output: '순두부찌개', descsription: 'sundu-bu jjigae -> 순두부찌개'},
+		]
+	},
 	{
 		description: 'Kanuri tilde test',
 		inputmethod: 'kr-tilde',