-- Codificació de caràcters amb normes diferents a UTF-8

local p = {}

--[[
Codificació de caràcters en latin1 o ISO-8859-1, usat per exemple per {{DCVB}} i {{Optimot}}.
Original de witk:en:Module:encodings.
    Sintaxi: {{#invoke:encode|latin1|<text>}}
]]

function p.latin1(frame)
	local text = frame.args[1] or ""
    local ret = {}
	
	for cp in mw.ustring.gcodepoint(text) do
		if cp >= 256 then
			error("Caràcter ISO 8859-1 no vàlid: \"" .. mw.ustring.char(cp) .. "\".")
		end
		
		table.insert(ret, string.char(cp))
	end
	
	return mw.uri.encode(table.concat(ret))
end

function p.pagename(frame)
    local taulaHtml = { -- caràcters codificats amb PAGENAME
        ["'"] = "&#39;",
        ["&"] = "&#38;",
        ['"'] = "&#34;",
        ["‎"] = "", -- caràcter de control al final de la cadena que apareix en copiar i engaxar
        ["_"] = " "}
    local text = mw.ustring.gsub(frame.args[1] or "", ".", taulaHtml)
    text = mw.text.trim(text)
    return mw.ustring.upper(mw.ustring.sub(text,1,1)) .. mw.ustring.sub(text,2)
end

function p.subSupMarkup(frame)
	local sub = {["₀"] = "0", ["₁"] = "1", ["₂"] = "2", ["₃"] = "3", ["₄"] = "4", ["₅"] = "5", ["₆"] = "6", ["₇"] = "7", ["₈"] = "8", ["₉"] = "9"}
	local sup = {["⁰"] = "0", ["¹"] = "1", ["²"] = "2", ["³"] = "3", ["⁴"] = "4", ["⁵"] = "5", ["⁶"] = "6", ["⁷"] = "7", ["⁸"] = "8", ["⁹"] = "9"}
	local function Unicode2markup(n)
		if sub[n] then
			return "<sub>" .. sub[n] .. "</sub>"
		elseif sup[n] then
			return "<sup>" .. sup[n] .. "</sup>"
		end
		return n
	end
	local ret = mw.ustring.gsub(frame.args[1] or "", ".", Unicode2markup)
	return (string.gsub(ret, '</su[bp]><su[bp]>', ''))
end


-- converts a title to text with hyphens, lowercase and no diacritics, as URL recommendation
function p.hyphens(frame)
	local text = frame.args[1]
	text = mw.ustring.gsub(mw.ustring.toNFD(text), "[\204\128-\205\175]", ""):gsub(" ", "-") -- no diacritics, hyphens
	text = mw.ustring.lower(text)
	text = mw.ustring.gsub(text, "['’]", "")
	
	return mw.uri.encode(text)
end

return p