မော်ဂျူး:cdo-pron
ပုံပန်းသွင်ပြင်
Documentation for this module may be created at မော်ဂျူး:cdo-pron/doc
local export = {}
local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local len = mw.ustring.len
local match = mw.ustring.match
local lower = mw.ustring.lower
local split_tone = {
["ă"] = "a".."̆", ["ĕ"] = "e".."̆", ["ĭ"] = "i".."̆", ["ŏ"] = "o".."̆", ["ŭ"] = "u".."̆",
["ā"] = "a".."̄", ["ē"] = "e".."̄", ["ī"] = "i".."̄", ["ō"] = "o".."̄", ["ū"] = "u".."̄",
["á"] = "a".."́", ["é"] = "e".."́", ["í"] = "i".."́", ["ó"] = "o".."́", ["ú"] = "u".."́",
["à"] = "a".."̀", ["è"] = "e".."̀", ["ì"] = "i".."̀", ["ò"] = "o".."̀", ["ù"] = "u".."̀",
["â"] = "a".."̂", ["ê"] = "e".."̂", ["î"] = "i".."̂", ["ô"] = "o".."̂", ["û"] = "u".."̂",
}
local tone_from_mark = {
[""] = 1, ["̆"] = 1, ["̆k"] = 7, ["̆h"] = 7,
["̄"] = 2,
["́"] = 3,
["́k"] = "4A", ["́h"] = "4B",
["̀"] = 5,
["̂"] = 6
}
local initial_ipa = {
["b"] = { ["unchanged"] = "p", ["lenited"] = "<sup>(p-)</sup>β", ["nasal"] = "<sup>(p-)</sup>m" },
["p"] = { ["unchanged"] = "pʰ", ["lenited"] = "<sup>(pʰ-)</sup>β", ["nasal"] = "<sup>(pʰ-)</sup>m" },
["m"] = { ["unchanged"] = "m", ["lenited"] = "m", ["nasal"] = "m" },
["d"] = { ["unchanged"] = "t", ["lenited"] = "<sup>(t-)</sup>l", ["nasal"] = "<sup>(t-)</sup>n" },
["t"] = { ["unchanged"] = "tʰ", ["lenited"] = "<sup>(tʰ-)</sup>l", ["nasal"] = "<sup>(tʰ-)</sup>n" },
["n"] = { ["unchanged"] = "nˡ", ["lenited"] = "nˡ", ["nasal"] = "nˡ" },
["l"] = { ["unchanged"] = "l̃", ["lenited"] = "l̃", ["nasal"] = "<sup>(l-)</sup>nˡ" },
["g"] = { ["unchanged"] = "k", ["lenited"] = "<sup>(k-)</sup>", ["nasal"] = "<sup>(k-)</sup>ŋ" },
["k"] = { ["unchanged"] = "kʰ", ["lenited"] = "<sup>(kʰ-)</sup>", ["nasal"] = "<sup>(kʰ-)</sup>ŋ" },
["ng"] = { ["unchanged"] = "ŋ", ["lenited"] = "ŋ", ["nasal"] = "ŋ" },
["h"] = { ["unchanged"] = "h", ["lenited"] = "<sup>(h-)</sup>", ["nasal"] = "<sup>(h-)</sup>ŋ" },
["c"] = { ["unchanged"] = "t͡s", ["lenited"] = "<sup>(t͡s-)</sup>ʒ", ["nasal"] = "<sup>(t͡s-)</sup>ʒ" },
["ch"] = { ["unchanged"] = "t͡sʰ", ["lenited"] = "<sup>(t͡sʰ-)</sup>ʒ", ["nasal"] = "<sup>(t͡sʰ-)</sup>ʒ" },
["s"] = { ["unchanged"] = "s", ["lenited"] = "<sup>(s-)</sup>l", ["nasal"] = "<sup>(s-)</sup>n" },
[""] = { ["unchanged"] = "", ["lenited"] = "", ["nasal"] = "<sup>(Ø-)</sup>ŋ" },
}
local final_ipa = {
["a"] = { ["closed"] = "a", ["open"] = "ɑ" },
["a̤"] = { ["closed"] = "ɛ", ["open"] = "ɑ" },
["ae̤"] = { ["closed"] = "œ", ["open"] = "ɔ" },
["ae̤h"] = { ["closed"] = "øyʔ", ["open"] = "ɔyʔ" },--see [[茉莉]]
["ae̤k"] = { ["closed"] = "øyʔ", ["open"] = "ɔyʔ" },
["ae̤ng"] = { ["closed"] = "øyŋ", ["open"] = "ɔyŋ" },
["ah"] = { ["closed"] = "aʔ", ["open"] = "ɑʔ" },
["a̤h"] = { ["closed"] = "ɛʔ", ["open"] = "ɑʔ" },
["ai"] = { ["closed"] = "ai", ["open"] = "ɑi" },
["aik"] = { ["closed"] = "ɛiʔ", ["open"] = "aiʔ" },
["aing"] = { ["closed"] = "ɛiŋ", ["open"] = "aiŋ" },
["aiu"] = { ["closed"] = "ɛu", ["open"] = "ɑu" },
["ak"] = { ["closed"] = "aʔ", ["open"] = "ɑʔ" },
["ang"] = { ["closed"] = "aŋ", ["open"] = "ɑŋ" },
["au"] = { ["closed"] = "au", ["open"] = "ɑu" },
["auk"] = { ["closed"] = "ouʔ", ["open"] = "ɑuʔ" },
["aung"] = { ["closed"] = "ouŋ", ["open"] = "ɑuŋ" },
["e"] = { ["closed"] = "i", ["open"] = "ɛi" },
["e̤"] = { ["closed"] = "œ", ["open"] = "ɔ" },
["eh"] = { ["closed"] = "ɛiʔ", ["open"] = "ɛiʔ" },--see [[茉莉]]
["e̤h"] = { ["closed"] = "œʔ", ["open"] = "œʔ" },
["ek"] = { ["closed"] = "ɛiʔ", ["open"] = "aiʔ" },
["ek2"] = { ["closed"] = "iʔ", ["open"] = "ɛiʔ" },
["e̤k"] = { ["closed"] = "øyʔ", ["open"] = "ɔyʔ" },
["eng"] = { ["closed"] = "ɛiŋ", ["open"] = "aiŋ" },
["eng2"] = { ["closed"] = "iŋ", ["open"] = "ɛiŋ" },
["e̤ng"] = { ["closed"] = "øyŋ", ["open"] = "ɔyŋ" },
["eu"] = { ["closed"] = "ɛu", ["open"] = "ɑu" },
["eu2"] = { ["closed"] = "ieu", ["open"] = "iɛu" },
["e̤ṳ"] = { ["closed"] = "y", ["open"] = "øy" },
["e̤ṳk"] = { ["closed"] = "yʔ", ["open"] = "øyʔ" },
["e̤ṳng"] = { ["closed"] = "yŋ", ["open"] = "øyŋ" },
["i"] = { ["closed"] = "i", ["open"] = "ɛi" },
["ia"] = { ["closed"] = "ia", ["open"] = "iɑ" },
["iah"] = { ["closed"] = "iaʔ", ["open"] = "iɑʔ" },
["iak"] = { ["closed"] = "iaʔ", ["open"] = "iɑʔ" },
["iang"] = { ["closed"] = "iaŋ", ["open"] = "iɑŋ" },
["iau"] = { ["closed"] = "iau", ["open"] = "iau" },
["ie"] = { ["closed"] = "ie", ["open"] = "iɛ" },
["ieh"] = { ["closed"] = "ieʔ", ["open"] = "iɛʔ" },
["iek"] = { ["closed"] = "ieʔ", ["open"] = "iɛʔ" },
["ieng"] = { ["closed"] = "ieŋ", ["open"] = "iɛŋ" },
["ieu"] = { ["closed"] = "ieu", ["open"] = "iɛu" },
["ih"] = { ["closed"] = "iʔ", ["open"] = "ɛiʔ" },
["ik"] = { ["closed"] = "iʔ", ["open"] = "ɛiʔ" },
["ing"] = { ["closed"] = "iŋ", ["open"] = "ɛiŋ" },
["io"] = { ["closed"] = "yo", ["open"] = "yɔ" },
["ioh"] = { ["closed"] = "yoʔ", ["open"] = "yɔʔ" },
["iok"] = { ["closed"] = "yoʔ", ["open"] = "yɔʔ" },
["iong"] = { ["closed"] = "yoŋ", ["open"] = "yɔŋ" },
["iu"] = { ["closed"] = "ieu", ["open"] = "iɛu" },
["ng"] = { ["closed"] = "ŋ̍", ["open"] = "ŋ̍" },
["o"] = { ["closed"] = "u", ["open"] = "ou" },
["o̤"] = { ["closed"] = "o", ["open"] = "ɔ" },
["o̤h"] = { ["closed"] = "oʔ", ["open"] = "ɔʔ" },
["oi"] = { ["closed"] = "øy", ["open"] = "ɔy" },
["oi2"] = { ["closed"] = "uoi", ["open"] = "uɔi" },
["o̤i"] = { ["closed"] = "øy", ["open"] = "ɔy" },
["ok"] = { ["closed"] = "ouʔ", ["open"] = "ɔuʔ" },
["ok2"] = { ["closed"] = "uʔ", ["open"] = "ouʔ" },
["o̤k"] = { ["closed"] = "oʔ", ["open"] = "ɔʔ" }, --see [[汝各儂]]
["ong"] = { ["closed"] = "ouŋ", ["open"] = "ɔuŋ" },
["ong2"] = { ["closed"] = "uŋ", ["open"] = "ouŋ" },
["u"] = { ["closed"] = "u", ["open"] = "ou" },
["ṳ"] = { ["closed"] = "y", ["open"] = "øy" },
["ua"] = { ["closed"] = "ua", ["open"] = "uɑ" },
["uah"] = { ["closed"] = "uaʔ", ["open"] = "uɑʔ" },
["uai"] = { ["closed"] = "uai", ["open"] = "uɑi" },
["uak"] = { ["closed"] = "uaʔ", ["open"] = "uɑʔ" },
["uang"] = { ["closed"] = "uaŋ", ["open"] = "uɑŋ" },
["ui"] = { ["closed"] = "uoi", ["open"] = "uɔi" },
["uk"] = { ["closed"] = "uʔ", ["open"] = "ouʔ" },
["ṳk"] = { ["closed"] = "yʔ", ["open"] = "øyʔ" },
["ung"] = { ["closed"] = "uŋ", ["open"] = "ouŋ" },
["ṳng"] = { ["closed"] = "yŋ", ["open"] = "øyŋ" },
["uo"] = { ["closed"] = "uo", ["open"] = "uɔ" },
["uoh"] = { ["closed"] = "uoʔ", ["open"] = "uɔʔ" },
["uoi"] = { ["closed"] = "uoi", ["open"] = "uɔi" },
["uok"] = { ["closed"] = "uoʔ", ["open"] = "uɔʔ" },
["uong"] = { ["closed"] = "uoŋ", ["open"] = "uɔŋ" },
}
local tone_ipa = {
[1] = "⁵⁵", --陰平
[2] = "³³", --上聲
[3] = "²¹³", --陰去
["4A"] = "²⁴", --陰入-甲 (-k)
["4B"] = "²⁴", --陰入-乙 (-h)
[5] = "⁵³", --陽平
[6] = "²⁴²", --陽去
[7] = "⁵", --陽入
[8] = "²¹", --半陰去
[9] = "³⁵", --半陽去
["-"] = "⁻",
["("] = "⁽",
[")"] = "⁾",
}
local tone_sandhi = {
["A-I"] = "1", ["A-II"] = "1", ["A-III"] = "5", ["A-IV"] = "5",
["B-I"] = "8", ["B-II"] = "8", ["B-III"] = "9", ["B-IV"] = "1",
["C-I"] = "1", ["C-II"] = "2", ["C-III"] = "2", ["C-IV"] = "8",
["A-A-I"] = "8-1", ["A-A-II"] = "8-1", ["A-A-III"] = "8-5", ["A-A-IV"] = "8-5",
["A-B-I"] = "8-8", ["A-B-II"] = "8-8", ["A-B-III"] = "8-9", ["A-B-IV"] = "8-1",
["B-A-I"] = "8-1", ["B-A-II"] = "8-1", ["B-A-III"] = "8-5", ["B-A-IV"] = "8-5",
["B-B-I"] = "8-8", ["B-B-II"] = "8-8", ["B-B-III"] = "8-9", ["B-B-IV"] = "8-1",
["C-A-I"] = "8-1", ["C-A-II"] = "8-1", ["C-A-III"] = "8-5", ["C-A-IV"] = "8-5",
["C-B-I"] = "8-8", ["C-B-II"] = "8-8", ["C-B-III"] = "8-9", ["C-B-IV"] = "8-1",
["A-C-I"] = "1-1", ["A-C-II"] = "1-1", ["A-C-III"] = "5-2", ["A-C-IV"] = "5-8",
["B-C-I"] = "9-2", ["B-C-II"] = "9-2", ["B-C-III"] = "9-2", ["B-C-IV"] = "1-8",
["C-C-I"] = "2-2", ["C-C-II"] = "2-2", ["C-C-III"] = "2-2", ["C-C-IV"] = "8-8",
}
local sylcat = {
[1] = {
["1"] = "A", ["3"] = "A", ["4B"] = "A", ["6"] = "A",
["2"] = "B", ["4A"] = "B",
["5"] = "C", ["7"] = "C"
},
[2] = {
["1"] = "I",
["5"] = "II", ["7"] = "II",
["2"] = "III",
["3"] = "IV", ["6"] = "IV", ["4A"] = "IV", ["4B"] = "IV"
}
}
local dual_rimes = {
["ong"] = true, ["ok"] = true,
["eng"] = true, ["ek"] = true,
["eu"] = true,
["oi"] = true,
}
local neg_assim = {
["labial"] = "<sup>(ŋ̍-)</sup>m̩",
["dental"] = "<sup>(ŋ̍-)</sup>n̩",
["velar"] = "<sup>(ŋ̍-)</sup>ŋ̍",
["alone"] = "<sup>(ŋ̍-)</sup>ŋ̍/m̩/n̩",
}
local neg_type = {
["b"] = "labial", ["p"] = "labial", ["m"] = "labial",
["d"] = "dental", ["t"] = "dental", ["n"] = "dental", ["l"] = "dental", ["s"] = "dental", ["c"] = "dental",
["✘"] = "alone",
}
local initial_string = "^([bpmdtnlgkhcs]?[gh]?)"
function export.rom(text)
text = gsub(text, "/", " / ")
text = gsub(text, "\>([^\> \-]+)", "<sup>→%1</sup>")
return text
end
function export.sentence(text)
local sentence = {}
text = gsub(text, "[,%.%?!]", "")
for word in mw.text.gsplit(lower(text), " ", true) do
table.insert(sentence, export.ipa(word))
end
return table.concat(sentence, " ")
end
local function determ_tone(text)
local tone = gsub(gsub(text, ".", split_tone), "^[^̆̄́̀̂hk]*([̆̄́̀̂]?)[^̆̄́̀̂hk]*([hk]?)$", function(tone_symbol, coda)
return tone_from_mark[tone_symbol..coda] end)
return tone
end
function export.ipa(text, feature)
if type(text) == "table" then
text = text.args[1]
end
text = lower(text)
local phrase_result = {}
local words = mw.text.split(text, "/")
for _, word in ipairs(words) do
local word_result = {}
local parts = mw.text.split(word, " ")
for _, part in ipairs(parts) do
local initial, final, tone, tone_conv, ipa, exc = {}, {}, {}, {}, {}, {}
local lenition_blocked = {}
local syllables = mw.text.split(part, "-")
for index, syllable in ipairs(syllables) do
syllable = gsub(syllable, "\*", function(captured_initial)
lenition_blocked[index] = true
return "" end)
if match(syllable, "\>") then
tone[index] = determ_tone(gsub(gsub(gsub(syllable, "\>[^\>]+$", ""), initial_string, ""), ".", split_tone))
syllable = gsub(syllable, "[^\>]+\>", "")
exc[index] = determ_tone(gsub(syllable, initial_string, ""))
end
initial[index] = match(syllable, initial_string)
final[index] = sub(syllable, len(initial[index]) + 1, -1)
final[index] = gsub(final[index], ".", split_tone)
tone[index] = exc[index] and tone[index] or determ_tone(final[index])
final[index] = gsub(final[index], "[̆̄́̀̂]", "")
if dual_rimes[final[index]] and match(tostring(tone[index]), "[346]") then
final[index] = final[index] .. "2"
end
final[index] = match(initial[index] .. final[index], "[dtnlcs]h?io") and gsub(final[index], "io", "uo") or final[index]
if (initial[index] .. final[index]) == "ng" then
initial[index], final[index] = "", "ng"
end
end
if #syllables == 1 or feature == "no_sandhi" then
tone_conv = tone
elseif #syllables == 2 then
tone_conv = {
tone[1].."-"..(tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..sylcat[2][tone[2]]]),
tone[2]
}
elseif #syllables == 3 then
sandhi = mw.text.split(tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..
sylcat[1][exc[2] or tone[2]].."-"..sylcat[2][tone[3]]], "-")
tone_conv = {
tone[1].."-"..sandhi[1],
tone[2].."-"..sandhi[2],
tone[3]
}
elseif #syllables == 4 then
tone_conv = {
tone[1].."-"..tone_sandhi[sylcat[1][exc[1] or tone[1]].."-"..sylcat[2][tone[2]]],
tone[2].."(-8)",
tone[3].."-"..tone_sandhi[sylcat[1][exc[3] or tone[3]].."-"..sylcat[2][tone[4]]],
tone[4]
}
end
for index = 1, #syllables do
if match(tostring(tone_conv[index]), "[346][AB]?$") and (#syllables == 1 or index == #syllables) then
final[index] = final_ipa[final[index]]["open"]
else
final[index] = final_ipa[final[index]]["closed"]
end
local initial_state = (index == 1 or match(syllables[index-1], "k$") or lenition_blocked[index])
and "unchanged" or (match(final[index-1], "[ŋ̩̍]$") and "nasal" or "lenited")
initial[index] = initial_ipa[initial[index]][initial_state]
if final[index] == "ŋ̍" then
final[index] = neg_assim[neg_type[sub(syllables[index + 1] or "✘", 1, 1)] or "velar"]
end
tone_conv[index] = gsub(tone_conv[index], "([1-9AB]+)\-([1-9AB]+)", function(original, sandhi)
if original == sandhi then
return original
end end)
tone_conv[index] = gsub(tone_conv[index], "(.[AB]?)", tone_ipa)
ipa[index] = initial[index] .. final[index] .. tone_conv[index]
end
table.insert(word_result, table.concat(ipa, " "))
end
table.insert(phrase_result, table.concat(word_result, " "))
end
return table.concat(phrase_result, "/, /")
end
return export