မော်ဂျူး:hak-pron

ဝစ်ရှင်နရီ မှ

Documentation for this module may be created at မော်ဂျူး:hak-pron/doc

local export = {}
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local match = mw.ustring.match
local find = mw.ustring.find
local len = mw.ustring.len

function export.rom_display(text,convtype)
	if type(text) == 'table' then text,convtype = text.args[1],(text.args[2] or '') end
	local display = ''
	local show = { ['pfs'] = '', ['gd'] = '' }
	local decomp = mw.text.split(gsub(text,'/',' / '),';',true)
	local TableTools = require('Module:table')
	for i = 1,#decomp,1 do
		if match(decomp[i],'pfs') then
			decomp[i] = gsub(decomp[i],'pfs=','')
			local pfs_readings = { ['n'] = {}, ['s'] = {} }
			local hrs_readings = { ['n'] = {}, ['s'] = {} }
			local hpy_readings = { ['n'] = {}, ['s'] = {} }
			local ipa_readings = { ['n'] = {}, ['s'] = {} }
			local function display_format(style)
				local label = { ['n'] = 'Northern ', ['s'] = 'Southern ', ['ns'] = '' }
				local city = {
					['n']	= '[[w:Miaoli City|Miaoli]]',
					['s']	= '[[w:Meinong District|Meinong]]',
					['ns']	= '[[w:Miaoli City|Miaoli]] and [[w:Meinong District|Meinong]]'
					}
				local text = string.format("\n** <small>(''[[w:Sixian dialect|%sSixian]], incl. %s'')</small>", label[style], city[style])
				text = text .. "\n*** <small>''[[w:Pha̍k-fa-sṳ|Pha̍k-fa-sṳ]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(pfs_readings[style:sub(1,1)], ' / ') .. '</span>'
				text = text .. "\n*** <small>''[[w:zh:客家語拼音方案|Hakka Romanization System]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(hrs_readings[style:sub(1,1)], ' / ') .. '</span>'
				text = text .. "\n*** <small>''[[w:Hagfa Pinyim|Hagfa Pinyim]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. table.concat(hpy_readings[style:sub(1,1)], ' / ') .. '</span>'
				local ipa = '\n*** <small>[[Wiktionary:International Phonetic Alphabet|IPA]]'
				local span = '</small>: <span class="IPA">/'
				text = text .. ipa
				if style == 'ns' then
					local north = table.concat(ipa_readings['n'], '/, /')
					local south = table.concat(ipa_readings['s'], '/, /')
					if north == south then
						text = text .. span .. north .. '/</span>'
					else
						text = text .. " (''Northern, incl. " .. city['n'] .. "'')" .. span .. north .. "/</span>"
						text = text .. ipa .. " (''Southern, incl. " .. city['s'] .. "'')" .. span .. south .. "/</span>"
					end
				else
					text = text .. span .. table.concat(ipa_readings[style], '/, /') .. '/</span>'
				end
				return text
			end
			local function southern(text)
				local function convert(a, b, c)
					local e_a = { ['e'] = 'a', ['ê'] = 'â', ['é'] = 'á', ['è'] = 'à' }
					return a .. e_a[b] .. c
				end
				text = gsub(text, '([yY])([eéèê])(̍?[nt])', convert)
				text = gsub(text, '([nN]gi)([eéèê])(̍?[nt])', convert)
				text = gsub(text, '([kK]h?i)([eéèê])(̍?[nt])', convert)
				text = gsub(text, '^([hH]i)([eéèê])(̍?[nt])', convert)
				text = gsub(text, '([%-%s][hH]i)([eéèê])(̍?[nt])', convert)
				return text
			end
			local function add(style, reading)
				pfs_readings[style][#pfs_readings[style] + 1] = reading
				hrs_readings[style][#hrs_readings[style] + 1] = export.hrs(reading, style)
				hpy_readings[style][#hpy_readings[style] + 1] = export.pfs_to_hpy(reading)
				ipa_readings[style][#ipa_readings[style] + 1] = export.ipa(reading, style)
			end
			
			local ns = true
			for _, reading in ipairs(mw.text.split(decomp[i], ' / ')) do
				if match(reading,':') then
					local pair = mw.text.split(reading, ':')
					if pair[1] == 'ns' then
						add('n', pair[2])
						add('s', pair[2])
					else
						ns = false
						add(pair[1], pair[2])
					end
				elseif match(reading,'[yY]') or reading ~= southern(reading) then
					ns = false
					add('n', reading)
					add('s', southern(reading))
				else
					add('n', reading)
					add('s', reading)
				end
			end
		
			if convtype == '' then
				if ns then 
					display = display .. display_format('ns')
				else
					if #pfs_readings['n'] ~= 0 then display = display .. display_format('n') end
					if #pfs_readings['s'] ~= 0 then display = display .. display_format('s') end
				end
			else
				for i, reading in ipairs(pfs_readings['s']) do
					pfs_readings['n'][#pfs_readings['n'] + 1] = pfs_readings['s'][i]
				end
				show['pfs'] = table.concat(TableTools.removeDuplicates(pfs_readings['n']), ' / ')
			end
		end
		if match(decomp[i],'gd') then
			if convtype == '' then
				display = display .. "\n** <small>(''[[w:Meixian dialect|Meixian]]'')</small>"
				display = display .. "\n*** <small>''[[w:Guangdong_Romanization#Hakka|Guangdong]]''</small>: <span style=\"font-family: Consolas, monospace;\">" .. string.gsub(string.gsub(decomp[i],'gd=',''),'([1-6])','<sup>%1</sup>') .. '</span>'
				display = display .. '\n*** <small>[[Wiktionary:International Phonetic Alphabet|IPA]]</small>: <span class="IPA">/' .. export.gd_to_ipa(decomp[i]) .. "/</span>"
			else
				show['gd'] = gsub(gsub(decomp[i], 'gd=', ''), '([1-6])', '<sup>%1</sup>')
			end
		end
	end
	if convtype ~= '' then
		local pfs = " <small>(''[[w:Sixian dialect|Sixian]], [[w:Pha̍k-fa-sṳ|PFS]]'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['pfs'] .. '</span>'
		local gd = " <small>(''[[w:Meixian dialect|Meixian]], [[w:Guangdong_Romanization#Hakka|Guangdong]]'')</small>: <span style=\"font-family: Consolas, monospace;\">" .. show['gd'] .. '</span>'
		if show['pfs'] ~= '' and show['gd'] ~= '' then
			display = display .. '\n*:' .. pfs .. '\n*:' .. gd
		elseif show['pfs'] ~= '' then
			display = display .. pfs
		elseif show['gd'] ~= '' then
			display = display .. gd
		end
	end
	return display
end

local function find_tone(text)
	text = mw.ustring.toNFD(text)
	if find(text, '̂') then
		return 1
	elseif find(text, '̀') then
		return 2
	elseif find(text, '́') then
		return 3
	elseif find(text, '̍') then
		return 6
	elseif find(text, '[^n][ptkbdg]$') then 
		return 5
	else
		return 4
	end
end

function export.ipa(text, dialect)
	local syllables, initial, final, tone, tone_conv = {}, {}, {}, {}, {}
	local ipa = {}
	if type(text) == 'table' then text = text.args[1] end
	syllables = mw.text.split(gsub(mw.ustring.lower(text), ' ', '-'), "-")
	for i, syllable in ipairs(syllables) do
		syllable = gsub(syllable,'o̍[ae]',{['o̍a']='ua̍',['o̍e']='ue̍'})
		syllable = gsub(syllable,'[oóòôō][ae]',{['oa']='ua',['óa']='uá',['òa']='uà',['ôa']='uâ',['ōa']='uā',['oe']='ue',['óe']='ué',['òe']='uè',['ôe']='uê',['ōe']='uē'})
		
		local palatal = false
		initial[i] = match(syllable, '^[mnptkcfvshyl]?[gh]?h?')
		final[i] = sub(syllable, len(initial[i]) + 1, -1)
		if find(final[i], '^[iíìî]') then
			palatal = true
		end
		local function add_palatal(text, add_or_not)
			local palatal_initial = {
				['ch'] = true,
				['chh'] = true,
				['s'] = true,
				['ng'] = true
			}
			if add_or_not and palatal_initial[text] then
				return text .. 'i'
			else
				return text
			end
		end
		local initial_ipa = {
			['ngi'] = 'ɲ',
			['ng'] = 'ŋ',
			['ph'] = 'pʰ',
			['th'] = 'tʰ',
			['kh'] = 'kʰ',
			['ch'] = 't͡s',
			['chi'] = 't͡ɕ',
			['chh'] = 't͡sʰ',
			['chhi'] = 't͡ɕʰ',
			['si'] = 'ɕ',
			['y'] = 'i'
		}
		initial[i] = initial_ipa[add_palatal(initial[i], palatal)] or initial[i]
		tone[i] = find_tone(final[i])
		local final_conv = {
			['á'] = 'a', ['é'] = 'e', ['í'] = 'i', ['ó'] = 'o', ['ú'] = 'u', ['́'] = '',
			['à'] = 'a', ['è'] = 'e', ['ì'] = 'i', ['ò'] = 'o', ['ù'] = 'u', ['̀'] = '',
			['â'] = 'a', ['ê'] = 'e', ['î'] = 'i', ['ô'] = 'o', ['û'] = 'u', ['̂'] = '',
			['ń'] = 'n', ['ǹ'] = 'n',
			['̍'] = '',
			['ṳ'] = 'ɨ',
		}
		final[i] = gsub(final[i], '[âêîôû̂àèìòù̀áéíóú́ńǹ̍ṳ]', final_conv)
		if initial[i] == 'i' then
			final[i] = (find(final[i], '^i[mnpt]?$') and '' or 'i') .. final[i]
			initial[i] = dialect == 's' and '(j)' or ''
		end
		final[i] = gsub(final[i], '([ptk])$', '%1̚')
		final[i] = gsub(final[i], 'ng$', 'ŋ')
		final[i] = final[i] == 'ŋ' and 'ŋ̍' or final[i]
		final[i] = gsub(final[i], 'er$', 'ə')
		final[i] = gsub(final[i], '([aeiouɨ])([aeiouɨ])([aeiouɨ]?)', function(first, second, third)
				if third ~= '' then
					first = first .. '̯'
					third = third .. '̯'
				elseif first == 'i' or first == 'u' then
					first = first .. '̯'
				elseif second == 'i' or second == 'u' then
					second = second .. '̯'
				end
			return first .. second .. third end)
	end
	for i, syllable in ipairs(syllables) do
		local tone_ipa = {
			[1] = '²⁴',
			[2] = '¹¹',
			[3] = '³¹',
			[4] = '⁵⁵',
			[5] = '²',
			[6] = '⁵',
		}
		tone_conv[i] = tone_ipa[tone[i]]
		if (tone[i] == 1 and find(tostring(tone[i+1]), '[146]') and not find(syllable, ",")) or (syllable == 'é' and dialect == 'n' and find(mw.title.getCurrentTitle().text, '仔') and find(text, '-é') and find(tostring(tone[i-1]), '[35]')) then
			tone_conv[i] = tone_conv[i] .. '⁻¹¹'
		end
		
		ipa[i] = initial[i] .. final[i] .. tone_conv[i]
	end
	return gsub(table.concat(ipa, " "), ",", "")
end

function export.hrs(text, dialect)
	if type(text) == 'table' then text = text.args[1] end
	local syllables = mw.text.split(gsub(mw.ustring.lower(text), ' ', '-'), "-")
	for i, syllable in ipairs(syllables) do
		-- check for commas
		local comma = ''
		if find(syllable, ',') then
			comma = ','
			syllable = sub(syllable, 1, -2)
		end
		-- change consonants
		syllable = gsub(syllable,'[ptky]',{['p']='b',['t']='d',['k']='g',['y']=dialect == 's' and '(r)i' or 'i'})
		syllable = gsub(syllable,'[bdgc]h',{['bh']='p',['dh']='t',['gh']='k',['ch']='z'})
		syllable = gsub(syllable,'zh','c')
		local palatal = {['z']='j',['c']='q',['s']='x',['i']=''}
		syllable = gsub(syllable,'([zcsi])([iíìî])', function(a,b) return palatal[a]..b end)
		
		-- find tones
		local marks = { [1] = '´', [2] = 'ˇ', [3] = '`', [5] = '`' }
		local tone = marks[find_tone(syllable)] or ''
		
		-- remove tone marks and fix vowels
		syllable = gsub(syllable, 'ṳ', 'ii')
		syllable = gsub(mw.ustring.toNFD(syllable), '[́̀̂̍]', '')
		syllable = gsub(syllable, 'o([ae])', 'u%1')
		
		-- add new tone marks
		syllables[i] = syllable .. tone .. comma
	end
	return table.concat(syllables, " ")
end

function export.pfs_to_hpy(text)
	if type(text) == 'table' then text = text.args[1] end
	local syllables = mw.text.split(gsub(mw.ustring.lower(text), ' ', '-'), "-")
	for i, syllable in ipairs(syllables) do
		-- check for commas
		local comma = ''
		if find(syllable, ',') then
			comma = ','
			syllable = sub(syllable, 1, -2)
		end
		-- change consonants
		syllable = gsub(syllable,'[ptk]',{['p']='b',['t']='d',['k']='g'})
		syllable = gsub(syllable,'[bdgc]h',{['bh']='p',['dh']='t',['gh']='k',['ch']='z'})
		syllable = gsub(syllable,'zh','c')
		local palatal = {['z']='j',['c']='q',['s']='x'}
		syllable = gsub(syllable,'([zcs])([iíìî])', function(a,b) return palatal[a]..b end)
		
		-- find tones
		local tone = find_tone(syllable)
		
		-- remove tone marks and fix vowels
		syllable = gsub(syllable, 'ṳ', 'i')
		syllable = gsub(mw.ustring.toNFD(syllable), '[́̀̂̍]', '')
		syllable = gsub(syllable, 'o([ae])', 'u%1')
		syllable = syllable == 'yu' and 'yiu' or syllable
		syllable = gsub(syllable, '([iy])e([nd])', '%1a%2')
		
		-- put everything together
		syllables[i] = syllable .. '<sup>' .. tone .. '</sup>' .. comma
	end
	return table.concat(syllables, " ")
end

function export.gd_to_ipa(text)
	local initial_conv = {
		["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", ["v"] = "ʋ",
		["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", 
		["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h", [""] = "",
		["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s",
		["j"] = "t͡ɕ", ["q"] = "t͡ɕʰ", ["x"] = "ɕ"
	}
	local final_conv = {
		["ii"] = "z̩", ["i"] = "i", ["u"] = "u",
		["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
		["ê"] = "e", ["iê"] = "ie", ["uê"] = "ue",
		["o"] = "o", ["io"] = "io", ["uo"] = "uo",
		["m"] = "m̩", ["n"] = "n̩",
		["ai"] = "aɪ", ["iai"] = "iaɪ", ["uai"] = "uaɪ",
		["oi"] = "oɪ",
		["ui"] = "uɪ", ["iui"] = "iuɪ",
		["au"] = "au", ["iau"] = "iau",
		["êu"] = "eu",
		["iu"] = "iu",
		["em"] = "əm", ["im"] = "im",
		["am"] = "am", ["iam"] = "iam",
		["êm"] = "ɛm",
		["en"] = "ən", ["in"] = "in",
		["an"] = "an", ["ian"] = "ian", ["uan"] = "uan",
		["ên"] = "ɛn", ["iên"] = "iɛn", ["uên"] = "uɛn",
		["on"] = "ɔn", ["ion"] = "iɔn", ["uon"] = "uɔn",
		["un"] = "un", ["iun"] = "iun",
		["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ",
		["ong"] = "ɔŋ", ["iong"] = "iɔŋ", ["uong"] = "uɔŋ",
		["ung"] = "ʊŋ", ["iung"] = "iʊŋ",
		["eb"] = "əp̚", ["ib"] = "ip̚",
		["ab"] = "ap̚", ["iab"] = "iap̚",
		["êb"] = "ɛp̚",
		["ed"] = "ət̚", ["id"] = "it̚",
		["ad"] = "at̚", ["iad"] = "iat̚", ["uad"] = "uat̚",
		["êd"] = "ɛt̚", ["iêd"] = "iɛt̚", ["uêd"] = "uɛt̚",
		["od"] = "ɔt̚",
		["ud"] = "ut̚", ["iud"] = "iut̚",
		["ag"] = "ak̚", ["iag"] = "iak̚", ["uag"] = "uak̚",
		["og"] = "ɔk̚", ["iog"] = "iɔk̚", ["uog"] = "uɔk̚",
		["ug"] = "ʊk̚", ["iug"] = "iʊk̚"
	}
	local tone_conv = {
		["1"] = "⁴⁴", ["2"] = "¹¹",
		["3"] = "³¹",
		["4"] = "⁵³",
		["5"] = "¹", ["6"] = "⁵",
		["1*"] = "⁴⁴⁻³⁵",
		["4*"] = "⁵³⁻⁵⁵"
	}
	local palatal = {
		['g'] = 'c',
		['k'] = 'cʰ',
		['ng'] = 'ɲ',
		['h'] = 'ç'
	}
	
	if type(text) == 'table' then text = text.args[1] end
	local words = mw.text.split(text, " / ")
	local result = {}
	for _, word in ipairs(words) do
		local syllables = mw.text.split(gsub(word, 'gd=', ''), ' ')
		local initial, final, tone, ipa = {}, {}, {}, {}
		for i, syllable in ipairs(syllables) do
			initial[i] = match(syllable, "^[bpmfvdtnlgkhzcsjqx]?g?")
			final[i] = match(sub(syllable, len(initial[i]) + 1, -1), "^[^1-6]*")
			final[i] = gsub(gsub(final[i], "^yi", "i"), "^y", "i")
			if find(initial[i], "[zcs]") and final[i] == "i" then
				final[i] = "ii"
			end
			if final[i] == "" then
				final[i] = initial[i]
				initial[i] = ""
			end
			tone[i] = match(syllable, "[1-6]$")
		end
		for i, syllable in ipairs(syllables) do
			initial[i] = (find(final[i], "^i") and palatal[initial[i]] or initial_conv[initial[i]]) or error(("Unrecognised initial: \"%s\""):format(initial[i]))
			final[i] = final_conv[final[i]] or error(("Unrecognised final: \"%s\""):format(final[i]))
			if match(tone[i], "[14]") and match(tone[i+1] or "", "[2345]") then
				tone[i] = tone[i] .. "*"
			end
			if initial[i] == "" and final[i] == "e" and tone[i] == "3" and find(mw.title.getCurrentTitle().text, '仔') then
				initial[i] = match(final[i-1] or '', '([mnŋpti])̚?$') or initial[i]
				initial[i] = find(final[i-1] or '', 'u$') and 'ʋ' or initial[i]
				initial[i] = find(final[i-1] or '', '[ao]$') and '(ʋ)' or initial[i]
				initial[i] = find(final[i-1] or '', 'e$') and '(i)' or initial[i]
			end
			tone[i] = tone_conv[tone[i]]
			ipa[i] = initial[i] .. final[i] .. tone[i]
		end
		table.insert(result, table.concat(ipa, " "))
	end
	return table.concat(result, "/, /")
end

return export