မော်ဂျူး:mnw-pron

ဝစ်ရှင်နရီ မှ

Documentation for this module may be created at မော်ဂျူး:mnw-pron/doc

local export = {}
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local match = mw.ustring.match

local system_list = {
	{ 1, ["type"] = "phonetic",		["name"] = "IPA" },
	{ 2, ["type"] = "orthographic",	["name"] = "MLCTS" },
	{ 3, ["type"] = "orthographic",	["name"] = "ALA-LC" },
	{ 4, ["type"] = "phonetic",		["name"] = "BGN/PCGN" },
	{ 5, ["type"] = "phonetic",		["name"] = "Okell" },
}

local initial_table = {
	["က"]	=	{ "k", "k", "k", "k", "k" },
	["ကက်"]	=	{ "kɛk", "kɛk", "kɛk", "kɛk", "kɛk" },
	["ကာက်"]	= { "kac", "kac", "kac", "kac", "kac" },
	["ကိက်"]	=	{ "koc", "koc", "koc", "koc", "koc" },
	["ကုက်"]	=	{ "kɤk", "kɤk", "kɤk", "kɤk", "kɤk" },
	["ကေက်"]	=	{ "kɔc", "kɔc", "kɔc", "kɔc", "kɔc" },
	["ကောက်"]	=	{ "kòk", "kòk", "kòk", "kòk", "kòk" },
	["ကံက်"]	=	{ "kɔk", "kɔk", "kɛ̀k", "kɛ̀k", "kɛ̀k" },
	["ကအ်"]	=	{ "kɔˀ", "kɔˀ", "kɔˀ", "kɔˀ", "kɔˀ" },
	["ကေအ်"]	=	{ "keˀ", "keˀ", "keˀ", "keˀ", "keˀ" },
	["ကောအ်"]	=	{ "kɒˀ", "kɒˀ", "kɒˀ", "kɒˀ", "kɒˀ" },
	["ကိုအ်"]	=	{ "kɜˀ", "kɜˀ", "kɜˀ", "kɜˀ", "kɜˀ" },
	["ခ"]	=	{ "kʰ", "hk", "kh", "hk", "hk" },
	["ခက်"]	=	{ "kʰ", "hk", "kh", "hk", "hk" },
	["ခါက်"]	=	{ "khac", "khac", "khac", "khac", "khac" },
}

local initial_voicing = {
	["+က"]	=	"ဂ", 
	["+ခ"]	=	"ဂ", 
	["+စ"]	=	"ဇ", 
	["+ဆ"]	=	"ဇ", 
	["+ဋ"]	=	"ဍ", 
	["+ဌ"]	=	"ဍ", 
	["+တ"]	=	"ဒ", 
	["+ထ"]	=	"ဒ", 
	["+ပ"]	=	"ဗ", 
	["+ဖ"]	=	"ဗ", 
	["-ဘ"]	=	"ဖ",
}

local final_table = {
	[""]	=	{ "a̰", "a.", "a", "a.", "á" },
	["က်"]	=	{ "ɛk", "ɛk", "ɛk‘", "ɛk", "ɛk" },

}

local nucleus_table = {
	[""]	=	{ "à", "a", "a", "a", "a" }, 
	["ိ"]	=	{ "ò", "ò", "ò", "ò", "ò" },
}

local indep_letter_table = {
	["အာ"]	=	{ "ɛ̀ə", "ɛ̀ə.", "ɛ̀ə", "ɛ̀ə.", "ɛ̀ə" },
	["ဣ"]	=	{ "ḭ", "i.", "i", "i.", "í" },
	["ဣဳ"]	=	{ "ɒə", "ɒə", "ɒə", "ɒə", "ɒə" },
	["ဥ"]	=	{ "ṵ", "u.", "u", "u.", "aoˀ" },
	["ဥူ"]	=	{ "ù", "u", "ū", "u", "u" },
	["ဨ"]	=	{ "ey", "ɛ", "ɛ", "ɛ", "èy" },
	["အဲ"]	=	{ "ɔə", "uə", "ɔ̀ə", "ɔə.", "ùə" },
	["ဩ"]	=	{ "ò", "èə:", "o", "èə:", "ò" },
	["အဴ"]	=	{ "ɤ̀", "ao", "o‘", "ao", "ò" },
	["အံ"]	=	{ "ɔm", "ɔ̀m.", "ɔˀ", "ɔ̀m", "ɔ̀m" },
	["အး"]	=	{ "ɛ̀h", "ah", "ɛ̀h", "ɛ̀h.", "ɛ̀h" },
}

local tone_table = {
	["း"] = { "́", ":", "″", ":", "̀" },
	["့"] = { "̰", ".", "′", ".", "́" },
}

local repl_string = "([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿ][ျြွှ]*[ံါဲါါဴေး]*)([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿ][့]?[^့်္])"
function syllabify(text)
	text = gsub(text, "('?)([%+%-%*]*)", function(a, b)
		if a .. b ~= "" then return a .. " " .. b end
		end)
	
	text = gsub(text, "([အာဣဣဳဥဥူဨအဲသြအဴအံအး][့း်]?)(.?)(.?)", function(a, b, c)
		return (c == "္" and " "..a..b.." "..c or (c == "်" and " "..a..b..c or " "..a.." "..b..c))
		end) .. " "
	
	text = gsub(text, "(်း?'?)", "%1 ")
	text = gsub(text, "([း့])([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝ]်)", "%2%1")
	
	while match(text, repl_string) do
		text = gsub(text, repl_string, "%1 %2")
	end
	
	text = gsub(text, "္", " , ")
	text = gsub(text, " +", " ")
	text = gsub(text, "^ ?(.*[^ ]) ?$", "%1")
	text = gsub(text, " , ", " ")
	text = gsub(text, " ([23])", "%1")
	return text
end

function initial_by_char(initial_string, system_index, ref_table)
	local initial_set = {}
	for character in mw.text.gsplit(initial_string, "") do
		local temp_initial = ref_table[character] or error("Initial data not found.")
		table.insert(initial_set, temp_initial[system_index] or temp_initial)
	end
	return table.concat(initial_set)
end

function generate_respelling(text)
	text = gsub(text, " ", "   ")
	text = gsub(text, "ါ", "ာ")
	if match(text, "[က-႟အဴ-ꩻ]") then return text end
	text = gsub(text, "(%+?)([^%?%+'/\\~aeiou ]*)(/?)([%?'/\\~aeiou]+)", function(voicing_mark, latin_initial, opt_sep, latin_final)
		return 
			voicing_mark .. 
			(reverse_table[latin_initial] or initial_by_char(latin_initial, nil, reverse_table)) .. 
			opt_sep ..
			reverse_table[latin_final]
	end)
	return text
end

function process(initial, final, tone, schwa, system, system_index)
	if match(initial .. final, "ွှ?[တနပမံ]") and system["type"] == "phonetic" then
		initial = gsub(initial, "[ွ/]", "")
		final = "ွ" .. final
	else
		initial = gsub(initial, "/", "")
	end
	
	initial_new = system["type"] == "phonetic" and gsub(initial, "%+.", initial_voicing) or initial
	
	if indep_letter_table[initial_new] then
		initial_new = match(initial_new, "[ဨအဴ]") and "-" or ""
		final = initial .. final
	end

	initial_data = 
		initial_table[initial_new]
		or initial_table[gsub(initial_new, "[%+%-%*]", "")]
		or (system["type"] == "orthographic" 
			and initial_by_char(initial_new, system_index, initial_table)
			or error("Initial data not found."))
		
	initial_value = initial_data[system_index] or initial_data

	if match(initial, "^%+") and system_index == 5 then
		initial_value = initial_table[gsub(initial, "%+", "")][system_index]
		initial_value = gsub(initial_value, "^([^rwy]+)", "<u>%1</u>")
	end

	final_data =
		final_table[system["type"] .. schwa == "phonetic'" and schwa or final]
		or (system["type"] == "phonetic" 
			and (final_table[final .. "်"] or indep_letter_table[final]) 
			or indep_letter_table[final])
		or gsub(final, "^([^်]*)([^်])(်?)$", function(first, second, third) 
			first_data = nucleus_table[first] or final_table[first] or indep_letter_table[first] or first
			second_data = initial_table[second] or second
			first = first_data ~= first and first_data[system_index] or first
			second = second_data ~= second
				and second_data[system_index] .. ((system_index == 3 and third ~= "") and "‘" or "")
				or second
			return (gsub(first .. second, "([%.:])(.*)", "%2"))
			end)
		
	final_value = type(final_data) == "table" and final_data[system_index] or final_data
	final_value = mw.ustring.toNFD(final_value)
	if tone == "" then
		tone_value = ""
	else
		if system_index ~= 4 then final_value = gsub(final_value, "̀", "") end
		final_value = gsub(final_value, "[́:%.]", "")
		if system["type"] .. schwa == "phonetic'" then
			tone_value = ""
		else
			tone_data = tone_table[tone] or error("Tone data not found.")
			tone_value = tone_data[system_index]
		end
	end

	if system_index == 1 then
		final_value = gsub(final_value, "^([aeəɛiɪoɔuʊ])", "%1" .. tone_value)
	elseif system_index == 5 then
		final_value = gsub(final_value, "([aeiou])([^aeiou]*)$", "%1" .. tone_value .. "%2")
	else
		final_value = final_value .. tone_value
	end
	
	return mw.ustring.toNFC(initial_value .. final_value)
end

function remove_wide_space(text)
	return (gsub(text, " ", ""))
end

function concatenate(set, system_index)
	if system_index == 1 then return remove_wide_space(table.concat(set)) end
	result_text = remove_wide_space(table.concat(set, " "))
	
	for count = 1, 3 do
		result_text = gsub(result_text, "(.) (.)([^ ]?)",
			function(previous, next, after_next)
				if ambig_intersyl[system_index][previous .. next] 
				or ((system_index == 2 or system_index == 4)
					and (match(previous .. " " .. next, "[ptkgmngy] [aeiou]")
					or (match(previous .. next .. after_next, "[aeiou][ptkmn][rwyg]") and not match(after_next, "[aeiou]")))) then
						return previous .. "-" .. next .. after_next
				else
					return previous .. next .. after_next
				end
			end)
	end
		
	return result_text
end

function export.get_romanisation(word, pronunciations, system, system_index, mode)
	local sentences = {}
	word = gsub(word, " ", "|")
	word = syllabify(word)
	word = gsub(word, "ါ", "ာ")
	if system["type"] == "phonetic" then
		word = gsub(word, "ဝ([တနပမံ])", "ဝွ%1")
	end
	for phrase in mw.text.gsplit(word, "|", true) do
		local temp = {}
		local syllable = mw.text.split(phrase, " ", true)
		for syllable_index = 1, #syllable do
			syllable[syllable_index] = gsub(syllable[syllable_index], "([း့])(်)", "%2%1")
			temp[syllable_index] = gsub(
				syllable[syllable_index], 
				"^([%+%-%*]*[ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿအာဣဣဳဥဥူဨအဲသြအဴအံအး][ျြ]?ွ?ှ?/?)([^း့']*)([း့]?)('?)$",
				function(initial, final, tone, schwa)
					return process(initial, final, tone, schwa, system, system_index)
				end)
		end
		table.insert(sentences, concatenate(temp, system_index))
	end
	if mode == "translit_module" then return table.concat(sentences, " ") end
	table.insert(pronunciations[system_index], table.concat(sentences, " "))
	return pronunciations[system_index]
end

function respelling_format(phonetic, page_title)
	local page_title_set = mw.text.split(syllabify(page_title), " ")
	local new_respellings = {}
	for _, respelling in ipairs(phonetic) do
		local respelling_set = mw.text.split(syllabify(respelling), " ")
		if gsub(table.concat(respelling_set), "[%+%-%*']", "") == (gsub(table.concat(page_title_set), "ါ", "ာ")) then
			for index, element in ipairs(respelling_set) do
				if element ~= page_title_set[index] then
					respelling_set[index] = '<span style="font-size:110%; color:#A32214; font-weight: bold">' .. element .. '</span>'
				end
			end
		end
		table.insert(new_respellings, table.concat(respelling_set))
	end
	text = table.concat(new_respellings, ", ")
	text = remove_wide_space(text)
	text = gsub(text, "[%+%-].", initial_voicing)
	text = gsub(text, "([ခဂၚဒပဝ]ေ?)ာ", "%1ါ")
	return text
end

function export.generate_tests(word, respelling)
	respelling, word = generate_respelling(respelling), generate_respelling(word)
	local pronunciations = {
		[1] = {},
		[2] = {},
		[3] = {},
		[4] = {},
		[5] = {},
	}
	local p, result = { ["orthographic"] = word, ["phonetic"] = respelling or word }, {}
	
	table.sort(system_list, function(first, second) return first[1] < second[1] end)
	for system_index, system in ipairs(system_list) do
		pronunciations[system_index] = export.get_romanisation(p[system["type"]], pronunciations, system, system_index)
	end
	for system_index = 1, 5 do
		table.insert(result, table.concat(pronunciations[system_index]))
	end
	return (gsub(gsub(table.concat(result, " | "), "<u>", "("), "</u>", ")"))
end

function export.make(frame)
	local args = frame:getParent().args
	local page_title = mw.title.getCurrentTitle().text
	local title = generate_respelling(args["word"] or page_title)
	
	local p, result = { ["orthographic"] = { title }, ["phonetic"] = {} }, {}
	local pronunciations = {
		[1] = {},
		[2] = {},
		[3] = {},
		[4] = {},
		[5] = {},
	}

	if not args[1] then args = { title } end
	for index, item in ipairs(args) do
		table.insert(p["phonetic"], (item ~= "") and generate_respelling(item) or nil)
	end
	
	table.sort(system_list, function(first, second) return first[1] < second[1] end)
	for system_index, system in ipairs(system_list) do
		for _, word in ipairs(p[system["type"]]) do
		 	pronunciations[system_index] = export.get_romanisation(word, pronunciations, system, system_index)
		end
	end
	
	if title ~= table.concat(args) then
		table.insert(result, 
			"* ဗွဟ်ရမ္သာင်" .. (#p["phonetic"] > 1 and "s" or "") .. ": " ..
			tostring( mw.html.create( "span" )
				:attr( "lang", "mnw" )
				:attr( "class", "mnw" )
				:wikitext( respelling_format( p["phonetic"], page_title ))) .. "\n" )
	end

	table.insert(result,
		'* [[ရီု:IPA-Mon|IPA]]' ..
		'<sup>([[ရမ္သာင်|key]])</sup>: ' ..
		
		(tostring( mw.html.create( "span" )
			:attr( "class", "IPA" )
			:wikitext( "/" .. gsub(table.concat(pronunciations[1], "/, /"), "ʔʔ", "ʔ.ʔ") .. "/" ))) ..
		
		'\n* [[Wiktionary:ပြံင်သၠာဲအက္ခရ်မန်|ပြံင်လှာဲအက္ခရ်မန်]] ')
		
	for system_index = 2, 5 do
		table.insert(result, 
			(system_index ~= 2 and " • " or "") ..
			"''" .. system_list[system_index]["name"] .. ":'' " .. 
			table.concat(pronunciations[system_index], "/"))
	end
	
	return table.concat(result)
end

return export