မော်ဂျူး:mnw-pron
ပုံပန်းသွင်ပြင်
Documentation for this module may be created at မော်ဂျူး:mnw-pron/doc
local export = {}
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local match = mw.ustring.match
local system_list = {
{ 1, ["type"] = "phonetic", ["name"] = "IPA" },
{ 2, ["type"] = "orthographic", ["name"] = "MLCTS" },
{ 3, ["type"] = "orthographic", ["name"] = "ALA-LC" },
{ 4, ["type"] = "phonetic", ["name"] = "BGN/PCGN" },
{ 5, ["type"] = "phonetic", ["name"] = "Okell" },
}
local initial_table = {
["က"] = { "k", "k", "k", "k", "k" },
["ကက်"] = { "kɛk", "kɛk", "kɛk", "kɛk", "kɛk" },
["ကာက်"] = { "kac", "kac", "kac", "kac", "kac" },
["ကိက်"] = { "koc", "koc", "koc", "koc", "koc" },
["ကုက်"] = { "kɤk", "kɤk", "kɤk", "kɤk", "kɤk" },
["ကေက်"] = { "kɔc", "kɔc", "kɔc", "kɔc", "kɔc" },
["ကောက်"] = { "kòk", "kòk", "kòk", "kòk", "kòk" },
["ကံက်"] = { "kɔk", "kɔk", "kɛ̀k", "kɛ̀k", "kɛ̀k" },
["ကအ်"] = { "kɔˀ", "kɔˀ", "kɔˀ", "kɔˀ", "kɔˀ" },
["ကေအ်"] = { "keˀ", "keˀ", "keˀ", "keˀ", "keˀ" },
["ကောအ်"] = { "kɒˀ", "kɒˀ", "kɒˀ", "kɒˀ", "kɒˀ" },
["ကိုအ်"] = { "kɜˀ", "kɜˀ", "kɜˀ", "kɜˀ", "kɜˀ" },
["ခ"] = { "kʰ", "hk", "kh", "hk", "hk" },
["ခက်"] = { "kʰ", "hk", "kh", "hk", "hk" },
["ခါက်"] = { "khac", "khac", "khac", "khac", "khac" },
}
local initial_voicing = {
["+က"] = "ဂ",
["+ခ"] = "ဂ",
["+စ"] = "ဇ",
["+ဆ"] = "ဇ",
["+ဋ"] = "ဍ",
["+ဌ"] = "ဍ",
["+တ"] = "ဒ",
["+ထ"] = "ဒ",
["+ပ"] = "ဗ",
["+ဖ"] = "ဗ",
["-ဘ"] = "ဖ",
}
local final_table = {
[""] = { "a̰", "a.", "a", "a.", "á" },
["က်"] = { "ɛk", "ɛk", "ɛk‘", "ɛk", "ɛk" },
}
local nucleus_table = {
[""] = { "à", "a", "a", "a", "a" },
["ိ"] = { "ò", "ò", "ò", "ò", "ò" },
}
local indep_letter_table = {
["အာ"] = { "ɛ̀ə", "ɛ̀ə.", "ɛ̀ə", "ɛ̀ə.", "ɛ̀ə" },
["ဣ"] = { "ḭ", "i.", "i", "i.", "í" },
["ဣဳ"] = { "ɒə", "ɒə", "ɒə", "ɒə", "ɒə" },
["ဥ"] = { "ṵ", "u.", "u", "u.", "aoˀ" },
["ဥူ"] = { "ù", "u", "ū", "u", "u" },
["ဨ"] = { "ey", "ɛ", "ɛ", "ɛ", "èy" },
["အဲ"] = { "ɔə", "uə", "ɔ̀ə", "ɔə.", "ùə" },
["ဩ"] = { "ò", "èə:", "o", "èə:", "ò" },
["အဴ"] = { "ɤ̀", "ao", "o‘", "ao", "ò" },
["အံ"] = { "ɔm", "ɔ̀m.", "ɔˀ", "ɔ̀m", "ɔ̀m" },
["အး"] = { "ɛ̀h", "ah", "ɛ̀h", "ɛ̀h.", "ɛ̀h" },
}
local tone_table = {
["း"] = { "́", ":", "″", ":", "̀" },
["့"] = { "̰", ".", "′", ".", "́" },
}
local repl_string = "([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿ][ျြွှ]*[ံါဲါါဴေး]*)([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿ][့]?[^့်္])"
function syllabify(text)
text = gsub(text, "('?)([%+%-%*]*)", function(a, b)
if a .. b ~= "" then return a .. " " .. b end
end)
text = gsub(text, "([အာဣဣဳဥဥူဨအဲသြအဴအံအး][့း်]?)(.?)(.?)", function(a, b, c)
return (c == "္" and " "..a..b.." "..c or (c == "်" and " "..a..b..c or " "..a.." "..b..c))
end) .. " "
text = gsub(text, "(်း?'?)", "%1 ")
text = gsub(text, "([း့])([ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝ]်)", "%2%1")
while match(text, repl_string) do
text = gsub(text, repl_string, "%1 %2")
end
text = gsub(text, "္", " , ")
text = gsub(text, " +", " ")
text = gsub(text, "^ ?(.*[^ ]) ?$", "%1")
text = gsub(text, " , ", " ")
text = gsub(text, " ([23])", "%1")
return text
end
function initial_by_char(initial_string, system_index, ref_table)
local initial_set = {}
for character in mw.text.gsplit(initial_string, "") do
local temp_initial = ref_table[character] or error("Initial data not found.")
table.insert(initial_set, temp_initial[system_index] or temp_initial)
end
return table.concat(initial_set)
end
function generate_respelling(text)
text = gsub(text, " ", " ")
text = gsub(text, "ါ", "ာ")
if match(text, "[က-႟အဴ-ꩻ]") then return text end
text = gsub(text, "(%+?)([^%?%+'/\\~aeiou ]*)(/?)([%?'/\\~aeiou]+)", function(voicing_mark, latin_initial, opt_sep, latin_final)
return
voicing_mark ..
(reverse_table[latin_initial] or initial_by_char(latin_initial, nil, reverse_table)) ..
opt_sep ..
reverse_table[latin_final]
end)
return text
end
function process(initial, final, tone, schwa, system, system_index)
if match(initial .. final, "ွှ?[တနပမံ]") and system["type"] == "phonetic" then
initial = gsub(initial, "[ွ/]", "")
final = "ွ" .. final
else
initial = gsub(initial, "/", "")
end
initial_new = system["type"] == "phonetic" and gsub(initial, "%+.", initial_voicing) or initial
if indep_letter_table[initial_new] then
initial_new = match(initial_new, "[ဨအဴ]") and "-" or ""
final = initial .. final
end
initial_data =
initial_table[initial_new]
or initial_table[gsub(initial_new, "[%+%-%*]", "")]
or (system["type"] == "orthographic"
and initial_by_char(initial_new, system_index, initial_table)
or error("Initial data not found."))
initial_value = initial_data[system_index] or initial_data
if match(initial, "^%+") and system_index == 5 then
initial_value = initial_table[gsub(initial, "%+", "")][system_index]
initial_value = gsub(initial_value, "^([^rwy]+)", "<u>%1</u>")
end
final_data =
final_table[system["type"] .. schwa == "phonetic'" and schwa or final]
or (system["type"] == "phonetic"
and (final_table[final .. "်"] or indep_letter_table[final])
or indep_letter_table[final])
or gsub(final, "^([^်]*)([^်])(်?)$", function(first, second, third)
first_data = nucleus_table[first] or final_table[first] or indep_letter_table[first] or first
second_data = initial_table[second] or second
first = first_data ~= first and first_data[system_index] or first
second = second_data ~= second
and second_data[system_index] .. ((system_index == 3 and third ~= "") and "‘" or "")
or second
return (gsub(first .. second, "([%.:])(.*)", "%2"))
end)
final_value = type(final_data) == "table" and final_data[system_index] or final_data
final_value = mw.ustring.toNFD(final_value)
if tone == "" then
tone_value = ""
else
if system_index ~= 4 then final_value = gsub(final_value, "̀", "") end
final_value = gsub(final_value, "[́:%.]", "")
if system["type"] .. schwa == "phonetic'" then
tone_value = ""
else
tone_data = tone_table[tone] or error("Tone data not found.")
tone_value = tone_data[system_index]
end
end
if system_index == 1 then
final_value = gsub(final_value, "^([aeəɛiɪoɔuʊ])", "%1" .. tone_value)
elseif system_index == 5 then
final_value = gsub(final_value, "([aeiou])([^aeiou]*)$", "%1" .. tone_value .. "%2")
else
final_value = final_value .. tone_value
end
return mw.ustring.toNFC(initial_value .. final_value)
end
function remove_wide_space(text)
return (gsub(text, " ", ""))
end
function concatenate(set, system_index)
if system_index == 1 then return remove_wide_space(table.concat(set)) end
result_text = remove_wide_space(table.concat(set, " "))
for count = 1, 3 do
result_text = gsub(result_text, "(.) (.)([^ ]?)",
function(previous, next, after_next)
if ambig_intersyl[system_index][previous .. next]
or ((system_index == 2 or system_index == 4)
and (match(previous .. " " .. next, "[ptkgmngy] [aeiou]")
or (match(previous .. next .. after_next, "[aeiou][ptkmn][rwyg]") and not match(after_next, "[aeiou]")))) then
return previous .. "-" .. next .. after_next
else
return previous .. next .. after_next
end
end)
end
return result_text
end
function export.get_romanisation(word, pronunciations, system, system_index, mode)
local sentences = {}
word = gsub(word, " ", "|")
word = syllabify(word)
word = gsub(word, "ါ", "ာ")
if system["type"] == "phonetic" then
word = gsub(word, "ဝ([တနပမံ])", "ဝွ%1")
end
for phrase in mw.text.gsplit(word, "|", true) do
local temp = {}
local syllable = mw.text.split(phrase, " ", true)
for syllable_index = 1, #syllable do
syllable[syllable_index] = gsub(syllable[syllable_index], "([း့])(်)", "%2%1")
temp[syllable_index] = gsub(
syllable[syllable_index],
"^([%+%-%*]*[ကခဂဃၚစဆဇၛဉညဋဌဍဎဏတထဒဓနပဖဗဘမယရလဝသဟဠၜအၝဿအာဣဣဳဥဥူဨအဲသြအဴအံအး][ျြ]?ွ?ှ?/?)([^း့']*)([း့]?)('?)$",
function(initial, final, tone, schwa)
return process(initial, final, tone, schwa, system, system_index)
end)
end
table.insert(sentences, concatenate(temp, system_index))
end
if mode == "translit_module" then return table.concat(sentences, " ") end
table.insert(pronunciations[system_index], table.concat(sentences, " "))
return pronunciations[system_index]
end
function respelling_format(phonetic, page_title)
local page_title_set = mw.text.split(syllabify(page_title), " ")
local new_respellings = {}
for _, respelling in ipairs(phonetic) do
local respelling_set = mw.text.split(syllabify(respelling), " ")
if gsub(table.concat(respelling_set), "[%+%-%*']", "") == (gsub(table.concat(page_title_set), "ါ", "ာ")) then
for index, element in ipairs(respelling_set) do
if element ~= page_title_set[index] then
respelling_set[index] = '<span style="font-size:110%; color:#A32214; font-weight: bold">' .. element .. '</span>'
end
end
end
table.insert(new_respellings, table.concat(respelling_set))
end
text = table.concat(new_respellings, ", ")
text = remove_wide_space(text)
text = gsub(text, "[%+%-].", initial_voicing)
text = gsub(text, "([ခဂၚဒပဝ]ေ?)ာ", "%1ါ")
return text
end
function export.generate_tests(word, respelling)
respelling, word = generate_respelling(respelling), generate_respelling(word)
local pronunciations = {
[1] = {},
[2] = {},
[3] = {},
[4] = {},
[5] = {},
}
local p, result = { ["orthographic"] = word, ["phonetic"] = respelling or word }, {}
table.sort(system_list, function(first, second) return first[1] < second[1] end)
for system_index, system in ipairs(system_list) do
pronunciations[system_index] = export.get_romanisation(p[system["type"]], pronunciations, system, system_index)
end
for system_index = 1, 5 do
table.insert(result, table.concat(pronunciations[system_index]))
end
return (gsub(gsub(table.concat(result, " | "), "<u>", "("), "</u>", ")"))
end
function export.make(frame)
local args = frame:getParent().args
local page_title = mw.title.getCurrentTitle().text
local title = generate_respelling(args["word"] or page_title)
local p, result = { ["orthographic"] = { title }, ["phonetic"] = {} }, {}
local pronunciations = {
[1] = {},
[2] = {},
[3] = {},
[4] = {},
[5] = {},
}
if not args[1] then args = { title } end
for index, item in ipairs(args) do
table.insert(p["phonetic"], (item ~= "") and generate_respelling(item) or nil)
end
table.sort(system_list, function(first, second) return first[1] < second[1] end)
for system_index, system in ipairs(system_list) do
for _, word in ipairs(p[system["type"]]) do
pronunciations[system_index] = export.get_romanisation(word, pronunciations, system, system_index)
end
end
if title ~= table.concat(args) then
table.insert(result,
"* ဗွဟ်ရမ္သာင်" .. (#p["phonetic"] > 1 and "s" or "") .. ": " ..
tostring( mw.html.create( "span" )
:attr( "lang", "mnw" )
:attr( "class", "mnw" )
:wikitext( respelling_format( p["phonetic"], page_title ))) .. "\n" )
end
table.insert(result,
'* [[ရီု:IPA-Mon|IPA]]' ..
'<sup>([[ရမ္သာင်|key]])</sup>: ' ..
(tostring( mw.html.create( "span" )
:attr( "class", "IPA" )
:wikitext( "/" .. gsub(table.concat(pronunciations[1], "/, /"), "ʔʔ", "ʔ.ʔ") .. "/" ))) ..
'\n* [[Wiktionary:ပြံင်သၠာဲအက္ခရ်မန်|ပြံင်လှာဲအက္ခရ်မန်]] ')
for system_index = 2, 5 do
table.insert(result,
(system_index ~= 2 and " • " or "") ..
"''" .. system_list[system_index]["name"] .. ":'' " ..
table.concat(pronunciations[system_index], "/"))
end
return table.concat(result)
end
return export