Last modified on 15 October 2014, at 22:17

Module:ko-translit

The following documentation is located at Module:ko-translit/documentation. [edit]
See also: subpages of this module.

This module will transliterate Korean language text.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:ko-translit/testcases.

FunctionsEdit

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by sc, and language specified by lang. When the transliteration fails, returns nil.

local export = {}
 
function export.tr_revised(word, sc, lang, nn)
	nn = nn or 'no' -- WTF is "nn"? very descriptive.
	local initial = {}
	local vowel = {}
	local final = {}
	local syllable = {}
	local revised = {}
 
	word = mw.ustring.gsub(word,'%([一丁-龯㐀-䶵]+%)','')
	word = mw.ustring.gsub(word,'—','-')
	if mw.ustring.gsub(word,'[ㅂㅈㄷㄱㅅㅁㄴㅇㄹㅎㅋㅌㅊㅍㄸㅃㅉㄲㅆㅛㅕㅑㅐㅔㅗㅓㅏㅣㅠㅜㅡㅖㅒ%-]','') == '' then
		revised = mw.ustring.gsub(word,'[ㅂㅈㄷㄱㅅㅁㄴㅇㄹㅎㅋㅌㅊㅍㄸㅃㅉㄲㅆㅛㅕㅑㅐㅔㅗㅓㅏㅣㅠㅜㅡㅖㅒ]',{['ㅂ']='b',['ㅈ']='j',['ㄷ']='d',['ㄱ']='g',['ㅅ']='s',['ㅁ']='m',['ㄴ']='n',['ㅇ']='/',['ㄹ']='l/r',['ㅎ']='h',['ㅋ']='k',['ㅌ']='t',['ㅊ']='ch',['ㅍ']='p',['ㄸ']='tt',['ㅃ']='pp',['ㅉ']='jj',['ㄲ']='kk',['ㅆ']='ss',['ㅛ']='yo',['ㅕ']='yeo',['ㅑ']='ya',['ㅐ']='ae',['ㅔ']='e',['ㅗ']='o',['ㅓ']='eo',['ㅏ']='a',['ㅣ']='i',['ㅠ']='yu',['ㅜ']='u',['ㅡ']='eu',['ㅖ']='ye',['ㅒ']='yae'})
		return revised
	end
	if not mw.ustring.match(word,'[가-힣]') then
		return nil
	end
	local wordlen = mw.ustring.len(word)
	local i = 0
	for codep in mw.ustring.gcodepoint(word) do
		i = i + 1
		syllable[i] = mw.ustring.char(codep)
		if mw.ustring.gsub(syllable[i], '[가-힣]', '') == '' then
			local syllableindex = codep - 0xAC00
			initial[i] = mw.ustring.char(0x1100 + math.floor(syllableindex / 588))
			vowel[i] = mw.ustring.char(0x1161 + math.floor((syllableindex % 588) / 28))
			final[i] = syllableindex % 28
			if final[i] == 0 then
				final[i] = ''
			else
				final[i] = mw.ustring.char(0x11A7 + final[i])
			end
		else
			initial[i], vowel[i], final[i] = '', '', ''
		end
	end
	syllable[wordlen+1], initial[wordlen+1], vowel[wordlen+1], final[wordlen+1] = '', '', '', ''
 
	for i = 1, wordlen, 1 do
		j = i + 1
		while mw.ustring.match(syllable[j],"[%-%^%']") do
			syllable[j] = mw.ustring.gsub(syllable[j],'%-','—')
			j = j + 1
		end
		if vowel[j] ~= '' then
			if mw.ustring.gsub((final[i] .. syllable[j]),'[ᇀᆴ][이히]','') == '' then
				final[i] = 'ᆾ'
			end
			if mw.ustring.gsub((final[i] .. syllable[j]),'ᆮ[이히]','') == '' then
				final[i] = mw.ustring.gsub(syllable[j],'[이히]',{['이']='ᆽ',['히']='ᆾ'})
			end
			if mw.ustring.gsub((final[i] .. initial[j]),'ᆺᄋ','') == '' then
				if mw.ustring.gsub(syllable[j],'[이아어은으음읍을었았에]','') ~= '' then
					final[i] = 'ᆮ'
				end
			end
		end
		if initial[j] == 'ᄋ' then
			final[i] = ({['ᆨ']='g',['ᆩ']='kk',['ᆪ']='ks',['ᆬ']='nj',['ᆭ']='n',['ᆮ']='d',['ᆯ']='r',['ᆰ']='lg',['ᆱ']='lm',['ᆲ']='lb',['ᆳ']='ls',['ᆴ']='lt',['ᆵ']='lp',['ᆶ']='r',['ᆸ']='b',['ᆹ']='ps',['ᆺ']='s',['ᆻ']='ss',['ᆼ']='ng-',['ᆽ']='j',['ᆾ']='ch',['ᇂ']=''})[final[i]] or final[i]
		end
		if syllable[i] == '밟' then
			final[i] = 'ᆸ'
		elseif final[i] == 'ᆭ' then
			initial[j] = ({['ᄀ']='k',['ᄁ']='k',['ᄃ']='t',['ᄄ']='t',['ᄅ']='n',['ᄇ']='p',['ᄈ']='p',['ᄌ']='ch',['ᄍ']='ch'})[initial[j]] or initial[j]
		elseif final[i] == 'ᆶ' then
			initial[j] = ({['ᄀ']='k',['ᄁ']='k',['ᄂ']='l',['ᄃ']='t',['ᄄ']='t',['ᄅ']='l',['ᄇ']='p',['ᄈ']='p',['ᄌ']='ch',['ᄍ']='ch'})[initial[j]] or initial[j]
		elseif final[i] == 'ᇂ' then
			initial[j] = ({['ᄀ']='k',['ᄁ']='k',['ᄂ']='nn',['ᄃ']='t',['ᄄ']='t',['ᄅ']='nn',['ᄆ']='nm',['ᄇ']='p',['ᄈ']='p',['ᄌ']='ch',['ᄍ']='ch'})[initial[j]] or initial[j]
		end
		if initial[j] == 'ᄂ' then
			if mw.ustring.match(final[i],'[ᆯᆲᆴᆶ]') then
				final[i] = 'l'
				initial[j] = 'l'
			else
				final[i] = ({['ᆨ']='ng',['ᆩ']='ng',['ᆪ']='ng',['ᆫ']='n',['ᆬ']='n',['ᆭ']='n',['ᆮ']='n',['ᆰ']='ng',['ᆱ']='m',['ᆵ']='m',['ᆷ']='m',['ᆸ']='m',['ᆹ']='m',['ᆺ']='n',['ᆻ']='n',['ᆼ']='ng',['ᆽ']='n',['ᆾ']='n',['ᆿ']='ng',['ᇀ']='n',['ᇁ']='m',['ᇂ']='n'})[final[i]] or final[i]
			end
		end
		if initial[j] == 'ᄅ' then
			if final[i] == 'ᆫ' then
				if nn ~= 'yes' then
					final[i] = 'l'
					initial[j] = 'l'
				else
					initial[j] = 'n'
				end
			elseif mw.ustring.match(final[i],'[ᆯᆲᆴ]') then
				final[i] = 'l'
				initial[j] = 'l'
			else
				final[i] = ({['ᆨ']='ng',['ᆩ']='ng',['ᆪ']='ng',['ᆬ']='n',['ᆮ']='n',['ᆰ']='ng',['ᆱ']='m',['ᆵ']='m',['ᆷ']='m',['ᆸ']='m',['ᆹ']='m',['ᆺ']='n',['ᆻ']='n',['ᆼ']='ng',['ᆽ']='n',['ᆾ']='n',['ᆿ']='ng',['ᇀ']='n',['ᇁ']='m'})[final[i]] or final[i]
				if final[i] ~= '' then
					initial[j] = 'n'
				end
			end
		end
		if initial[j] == 'ᄆ' then
			final[i] = ({['ᆨ']='ng',['ᆩ']='ng',['ᆪ']='ng',['ᆮ']='n',['ᆰ']='ng',['ᆵ']='m',['ᆸ']='m',['ᆹ']='m',['ᆺ']='n',['ᆻ']='n',['ᆽ']='n',['ᆾ']='n',['ᆿ']='ng',['ᇀ']='n',['ᇁ']='m'})[final[i]] or final[i]
		end
		if final[i] == 'ᆰ' then
			if mw.ustring.match(initial[j],'[ᄀᄁᄏ]') then
				final[i] = 'l'
			elseif initial[j] == 'ᄒ' then
				final[i] = 'l'
				initial[j] = 'k'
			end
		end
		if (final[i] .. initial[j]) == 'ᆫᄀ' then
			final[i] = 'n-'
		end
		if (final[i] .. initial[j]) == 'ᆽᄒ' then
			final[i] = 'c'
		end
		if (final[i] .. initial[j]) == 'ᆬᄒ' then
			final[i] = 'nc'
		end
		if vowel[i] ~= '' then
			if (final[i] .. initial[j]) == 'ᄋ' then
				final[i] = '…'
			end
		end
		final[i] = ({['ᆨ']='k',['ᆩ']='k',['ᆪ']='k',['ᆫ']='n',['ᆬ']='n',['ᆭ']='n',['ᆮ']='t',['ᆯ']='l',['ᆰ']='k',['ᆱ']='m',['ᆲ']='l',['ᆳ']='l',['ᆴ']='l',['ᆵ']='p',['ᆶ']='l',['ᆷ']='m',['ᆸ']='p',['ᆹ']='p',['ᆺ']='t',['ᆻ']='t',['ᆼ']='ng',['ᆽ']='t',['ᆾ']='t',['ᆿ']='k',['ᇀ']='t',['ᇁ']='p',['ᇂ']=''})[final[i]] or final[i]
		initial[i] = ({['ᄀ']='g',['ᄁ']='kk',['ᄂ']='n',['ᄃ']='d',['ᄄ']='tt',['ᄅ']='r',['ᄆ']='m',['ᄇ']='b',['ᄈ']='pp',['ᄉ']='s',['ᄊ']='ss',['ᄋ']='',['ᄌ']='j',['ᄍ']='jj',['ᄎ']='ch',['ᄏ']='k',['ᄐ']='t',['ᄑ']='p',['ᄒ']='h'})[initial[i]] or initial[i]
		vowel[i] = ({['ᅡ']='a',['ᅢ']='ae',['ᅣ']='ya',['ᅤ']='yae',['ᅥ']='eo',['ᅦ']='e',['ᅧ']='yeo',['ᅨ']='ye',['ᅩ']='o',['ᅪ']='wa',['ᅫ']='wae',['ᅬ']='oe',['ᅭ']='yo',['ᅮ']='u',['ᅯ']='wo',['ᅰ']='we',['ᅱ']='wi',['ᅲ']='yu',['ᅳ']='eu',['ᅴ']='ui',['ᅵ']='i'})[vowel[i]] or vowel[i]
		revised[i] = initial[i] .. vowel[i] .. final[i]
		if revised[i] == '' then
			if syllable[i] ~= '' then
				revised[i] = syllable[i]
			end
		end
	end
 
	revised = table.concat(revised,"")
	revised = mw.ustring.gsub(revised, 'o…e', 'o-e')
	revised = mw.ustring.gsub(revised, 'e…([ou])', 'e-%1')
	revised = mw.ustring.gsub(revised, 'a…e', 'a-e')
	revised = mw.ustring.gsub(revised, 'u…i', 'u-i')
	revised = mw.ustring.gsub(revised, '…', '')
	revised = mw.ustring.gsub(revised, '—', '-')
	if mw.ustring.match(revised,'[%.%?%!]') then
		revised = mw.ustring.upper(mw.ustring.sub(revised,1,1)) .. mw.ustring.sub(revised,2,-1)
		revised = mw.ustring.gsub(revised,"([%.%?%!]) ([a-z%'])",'%1 ^%2')
	end
	revised = mw.ustring.gsub(revised, "%^%'%'%'", "'''^")
	revised = mw.ustring.gsub(revised, "%^%l", mw.ustring.upper)
	revised = mw.ustring.gsub(revised, '%^', '')
 
	return revised
end
 
export.tr = export.tr_revised
 
return export