မော်ဂျူး:Tibt-translit

Documentation for this module may be created at မော်ဂျူး:Tibt-translit/doc
local export = {}
local Tibt = require("Module:Tibt-common")
local gsub = mw.ustring.gsub
local gmatch = mw.ustring.gmatch
local match = mw.ustring.match
local sub = mw.ustring.sub
local toNFC = mw.ustring.toNFC
local upper = mw.ustring.upper

local twoChars = {
	["རྀ"] = "ṛ", ["ྲྀ"] = "ṛ", -- Primarily used in Sanskrit(-derived) borrowings.
	["ལྀ"] = "ḷ", ["ླྀ"] = "ḷ",
	
	["ཕ༹"] = "f", ["བ༹"] = "v", -- Used to transliterate Chinese.
	["ཁ༹"] = "x", ["ག༹"] = "ġ", -- Used in Balti.
	
	["ྥ༹"] = "f", ["ྦ༹"] = "v",
	["ྑ༹"] = "x", ["ྒ༹"] = "ġ",
}

local oneChar = {
	["ཀ"] = "k", ["ཁ"] = "kh", ["ག"] = "g", ["ང"] = "ng",
	["ཅ"] = "c", ["ཆ"] = "ch", ["ཇ"] = "j", ["ཉ"] = "ny",
	["ཏ"] = "t", ["ཐ"] = "th", ["ད"] = "d", ["ན"] = "n",
	["པ"] = "p", ["ཕ"] = "ph", ["བ"] = "b", ["མ"] = "m",
	["ཙ"] = "ts", ["ཚ"] = "tsh", ["ཛ"] = "dz", ["ཝ"] = ".w",
	["ཞ"] = "zh", ["ཟ"] = "z", ["འ"] = "'", ["ཡ"] = ".y",
	["ར"] = ".r", ["ཪ"] = ".r", ["ལ"] = "l", ["ཤ"] = "sh", ["ས"] = "s",
	["ཧ"] = "h", ["ཨ"] = "\1",
	["ཊ"] = "ṭ", ["ཋ"] = "ṭh", ["ཌ"] = "ḍ", ["ཎ"] = "ṇ", ["ཥ"] = "ṣ",
	["ཫ"] = "q", ["ཬ"] = "ṛ", -- Used in Balti.
	["྅"] = "ʼ", ["ྈ"] = "x", ["ྉ"] = "f", ["ྌ"] = "f", -- Used in Sanskrit.
	
	["ཱ"] = "̄", ["ི"] = "i", ["ྀ"] = "ị", ["ུ"] = "u", ["ེ"] = "e", ["ཻ"] = "ai", ["ོ"] = "o", ["ཽ"] = "au",
	
	["ཾ"] = "ṃ", ["ྂ"] = "ṃ", ["ྃ"] = "m̐", ["ཿ"] = "ḥ",
	
	["ྐ"] = "k", ["ྑ"] = "kh", ["ྒ"] = "g", ["ྔ"] = "ng",
	["ྕ"] = "c", ["ྖ"] = "ch", ["ྗ"] = "j", ["ྙ"] = "ny",
	["ྟ"] = "t", ["ྠ"] = "th", ["ྡ"] = "d", ["ྣ"] = "n",
	["ྤ"] = "p", ["ྥ"] = "ph", ["ྦ"] = "b", ["ྨ"] = "m",
	["ྩ"] = "ts", ["ྪ"] = "tsh", ["ྫ"] = "dz", ["ྭ"] = "w", ["ྺ"] = "w",
	["ྮ"] = "zh", ["ྯ"] = "z", ["ྰ"] = "'", ["ྱ"] = "y", ["ྻ"] = "y",
	["ྲ"] = "r", ["ྼ"] = "r", ["ླ"] = "l", ["ྴ"] = "sh", ["ྶ"] = "s",
	["ྷ"] = "h", ["ྸ"] = "+a",
	["ྚ"] = "ṭ", ["ྛ"] = "ṭh", ["ྜ"] = "ḍ", ["ྞ"] = "ṇ", ["ྵ"] = "ṣ",
	["ྍ"] = "x", ["ྎ"] = "f", ["ྏ"] = "f",
}

local symbol = {
	["༠"] = "0", ["༡"] = "1", ["༢"] = "2", ["༣"] = "3", ["༤"] = "4",
	["༥"] = "5", ["༦"] = "6", ["༧"] = "7", ["༨"] = "8", ["༩"] = "9",
	["༪"] = "0.5", ["༫"] = "1.5", ["༬"] = "2.5", ["༭"] = "3.5", ["༮"] = "4.5",
	["༯"] = "5.5", ["༰"] = "6.5", ["༱"] = "7.5", ["༲"] = "8.5", ["༳"] = "9.5",
	["་"] = " ", ["༌"] = "*", ["།"] = ".", ["༎"] = ".\n\n", ["༏"] = ";",
	["༑"] = "|", ["༈"] = "!", ["༔"] = ":", ["༼"] = "(", ["༽"] = ")",
	["༺"] = "<", ["༻"] = ">"
}

function export.tr(text, lang, sc)

	local langObj; if not lang then
		error("Language code required.")
	else
		langObj = require("Module:languages").getByCode(lang)
	end
	local scObj = require("Module:scripts").getByCode("Tibt")
	text = (langObj:makeEntryName(text))
	text = scObj:fixDiscouragedSequences(text)
	text = scObj:toFixedNFD(text)
	text = gsub(text, "༒", "།")
	text = gsub(text, "[་༌]+$", "")
	
	for word in Tibt.getWords(text) do
		for syllable in Tibt.getSyllables(word) do
			local tr = syllable
			
			tr = gsub(tr, "(ཱ)([ིེུ-ཽྀྲླ]+)", "%2%1")
			
			local mainStack = gsub(Tibt.findMainStack(syllable, lang), "[ཾཿྂྃ]", "")
			if match(mainStack, "([^ༀི-ཽྀ]ཱ?)$") then
				local newMainStack = mainStack .. "a"
				newMainStack = gsub(newMainStack, "ཱa$", "aཱ")
				tr = gsub(tr, mainStack, newMainStack, 1)
			end
			
			tr = gsub(tr, "^(.*)༷(.*)$", "<u>%1%2</u>")
			tr = gsub(tr, "^(.*)༵(.*)$", "<span style=\"text-decoration-style:double;\">%1%2</span>")
			
			for letter, replacement in pairs(twoChars) do
				tr = gsub(tr, letter, replacement)
			end
			tr = gsub(tr, ".", oneChar)
			
			tr = gsub(tr, "(.')([^aāeiīoḷḹṛṝuū%-<])", "%1a%2")
			tr = gsub(tr, "%f[^%zaāeiīoḷḹṛṝuū%->]%.", "")
			tr = gsub(tr, "%.([rwy][^aāeiīoḷḹṛṝuū])", "%1")
			tr = gsub(tr, "^\1", "")
			tr = tr:gsub("\1", "%.")
			
			text = gsub(text, syllable, tr, 1)
		end
	end
	
	text = gsub(text, "྄a?", "")
	text = gsub(text, ".", symbol)
	text = gsub(text, " ' ", "")
	text = gsub(text, " *· *·? *", " · ")
	text = gsub(text, " *%.", ".")
	text = gsub(text, "\n+", "\n\n")
	text = gsub(text, "\n\n$", "")
	if match(text, "%. ") or match(text, "%.\n.") or match(text, "%.$") then
		text = gsub(text, "^'?.", upper)
		text = gsub(text, "\n\n'?.", upper)
		text = gsub(text, "%. '?.", upper)
	end
	
	-- End with a space so that concurrent parts of running text that need to be transliterated separately (e.g. due to links) are still properly separated.
	return toNFC(text) .. " "
end

return export