မော်ဂျူး:encodings

ဝစ်ရှင်နရီ မှ

Documentation for this module may be created at မော်ဂျူး:encodings/doc

local export = {}

local encoders = {}

encoders["ISO 8859-1"] = function(text)
	local ret = {}
	
	for cp in mw.ustring.gcodepoint(text) do
		if cp >= 256 then
			error("Invalid ISO 8859-1 character \"" .. mw.ustring.char(cp) .. "\".")
		end
		
		table.insert(ret, string.char(cp))
	end
	
	return table.concat(ret)
end

encoders["cp1251"] = function(text)	-- [[d:Q1748665|cp1251]]
	local ret = {}
	local range -- 0 1 2 3 4 5 6 7
	local diff2 = {[25]=57, [26]=98, [27]=98, [28]=58, [29]=40, [30]=52, [31]=56, [32]=69, [33]=95, [34]=94, [35]=93, [36]=95, [38]=77, [39]=96}
	local diff4 = {[5]=21, [6]=62, [7]=76, [8]=22, [9]=19, [10]=31, [11]=20, [12]=24, [13]=59, [14]=58, [15]=57, [16]=59, [18]=56, [19]=60, [68]=103, [69]=89}
	local diff6 = {[11]=61, [12]=61, [16]=71, [17]=71, [18]=88, [20]=73, [21]=73, [22]=90, [24]=90, [25]=90, [26]=77, [30]=97, [40]=103, [49]=110, [50]=95, [164]=228, [270]=285, [282]=329}
	
	for cp in mw.ustring.gcodepoint(text) do
		range = (191<cp and 1 or 0) + (1023<cp and 1 or 0) + (1039<cp and 1 or 0) + (1104<cp and 1 or 0) + (1169<cp and 1 or 0) + (8210<cp and 1 or 0) + (8482<cp and 1 or 0)
		if range==2 and diff2[cp-1000] then
			cp = cp-800-diff2[cp-1000]
		elseif range==3 then
			cp = cp-848
		elseif range==4 and diff4[cp-1100] then
			cp = cp - 900 - diff4[cp-1100]
		elseif range==6 and diff6[cp-8200] then
			cp = cp -8000 - diff6[cp-8200]
		elseif range>0 then
			cp = 63 -- '?'
		end
		table.insert(ret, string.char(cp))
	end
	return table.concat(ret)
end

function export.encode(text, encoding)
	if type(text) == "table" then
		local params = {
			[1] = {required = true, allow_empty = true},
			[2] = {required = true},
		}
		
		local args = require("Module:parameters").process(text.args, params)
		text = args[1]
		encoding = args[2]
	end
	
	local encoder = encoders[encoding]
	
	if not encoder then
		error("No encoder exists for the encoding \"" .. encoding .. "\".")
	end
	
	return mw.uri.encode(encoder(text))
end

return export