Module:Typing-aids

local export = {}

local m_data = mw.loadData("Module:typing-aids/data") local reorderDiacritics = require("Module:grc-utilities").reorderDiacritics local formatLink = require("Module:template link").format_link local listToSet = require("Module:table").listToSet

--[=[	Other data modules: -- Module:typing-aids/data/ar -- Module:typing-aids/data/fa -- Module:typing-aids/data/gmy -- Module:typing-aids/data/grc -- Module:typing-aids/data/hit -- Module:typing-aids/data/hy -- Module:typing-aids/data/sa -- Module:typing-aids/data/sux -- Module:typing-aids/data/got -- Module:typing-aids/data/psu --]=]

local U = mw.ustring.char local gsub = mw.ustring.gsub local find = mw.ustring.find

local acute = U(0x0301) local macron = U(0x0304)

local function load_or_nil(module_name) local success, module = pcall(mw.loadData, module_name) if success then return module end end

-- Try to load a list of modules. Return the first successfully loaded module -- and its name. local function get_module_and_title(...) for i = 1, select("#", ...) do		local module_name = select(i, ...) if module_name then local module = load_or_nil(module_name) if module then return module, module_name end end end end

local function clone_args(frame) local args = frame.getParent and frame:getParent.args or frame local newargs = {} for k, v in pairs(args) do		if v ~= "" then newargs[k] = v		end end return newargs end local function tag(text, lang) if lang and not find(lang, "%-tr$") then return ' ' .. text .. ' '	else return text end end

local acute_decomposer -- compose Latin text, then decompose into sequences of letter and combining -- accent, either partly or completely depending on the language. local function compose_decompose(text, lang) if lang == "sa" or lang == "hy" or lang == "xcl" or lang == "psu" then acute_decomposer = acute_decomposer or m_data.acute_decomposer text = mw.ustring.toNFC(text) text = gsub(text, ".", acute_decomposer) else text = mw.ustring.toNFD(text) end return text end

local function doSequentialSimpleReplacements(text, seriesOfReplacements) for _, replacements in ipairs(seriesOfReplacements) do		for k, v in pairs(replacements) do			text = text:gsub(k, v)		end end return text end

local function doComplexReplacements(text, replacements) local empty = { "", "" } for shortcut, symbol in pairs(replacements) do		if type(symbol) == "table" then local before = symbol.before and { "("..symbol.before..")", "%1" } or empty local after = symbol.after and { "("..symbol.after..")", symbol.before and "%2" or "%1" } or empty text = gsub(text, before[1]..shortcut..after[1], before[2]..symbol[1]..after[2]) elseif type(symbol) == "string" then text = gsub(text, shortcut, symbol) end end text = mw.text.trim(text) return text end

local function doSequentialComplexReplacements(text, seriesOfReplacements) for _, v in ipairs(seriesOfReplacements) do		text = doComplexReplacements(text, v)	end return text end

local function getReplacements(lang, script) local module_data = m_data.modules[lang] local replacements_module if not module_data then replacements_module = m_data else local success local resolved_name = "Module:typing-aids/data/" .. (module_data[1] or module_data[script] or module_data.default) replacements_module = load_or_nil(resolved_name) if not replacements_module then error("Data module " .. resolved_name				.. " specified in 'modules' table of Module:typing-aids/data does not exist.") end end local replacements if not module_data then if lang then replacements = replacements_module[lang] else replacements = replacements_module.all end elseif module_data[2] then replacements = replacements_module[module_data[2]] else replacements = replacements_module end return replacements end

local function interpretShortcuts(text, origlang, script, untouchedDiacritics, moduleName) mw.log(text, origlang, script, untouchedDiacritics, moduleName) if not text or type(text) ~= "string" then return nil end

local lang = origlang if lang == "xcl" then lang = "hy" end local replacements = moduleName and load_or_nil("Module:typing-aids/data/" .. moduleName) or getReplacements(lang, script) or error("The language code \"" .. tostring(origlang) ..			"\" does not have a set of replacements in Module:typing-aids/data or its submodules.") -- Hittite transliteration must operate on composed letters, because it adds -- diacritics to Basic Latin letters: s -> š, for instance. if lang ~= "hit-tr" then text = compose_decompose(text, lang) end if lang == "ae" or lang == "sa" or lang == "got" or lang == "hy" or lang == "xcl" or lang == "psu" then local replacements_module, replacements_module_name = get_module_and_title("Module:typing-aids/data/" .. lang,				script and "Module:typing-aids/data/" .. script or nil) local transliterationTable = replacements_module[lang .. "-tr"] or script and replacements_module[script .. "-tr"] or error("Field " .. lang .. "-tr"				.. (script and " or " .. script .. "-tr" or "")				.. " not found in " .. replacements_module_name .. ".") text = doSequentialSimpleReplacements(text, transliterationTable) text = compose_decompose(text, lang) text = doSequentialComplexReplacements(text, replacements) else if replacements[1] then text = doSequentialComplexReplacements(text, replacements) else text = doComplexReplacements(text, replacements) end if lang == "grc" and not untouchedDiacritics then text = reorderDiacritics(text) end end return text end

local function hyphenSeparatedReplacements(text, lang) local module = mw.loadData("Module:typing-aids/data/" .. lang) local replacements = module[lang] or module if not replacements then error("??") end text = text:gsub("(.-)%-?", "%1-") if replacements.pre then for k, v in pairs(replacements.pre) do			text = gsub(text, k, v)		end end local output = {} -- Find groups of characters that aren't hyphens or whitespace. for symbol in text:gmatch("([^%-%s]+)") do		table.insert(output, replacements[symbol] or symbol) end return table.concat(output) end

local function addParameter(list, args, key, content) if not content then content = args[key] end args[key] = nil if not content then return false end

if find(content, "=") or type(key) == "string" then table.insert(list, key .. "=" .. content) else while list.maxarg < key - 1 do			table.insert(list, "") list.maxarg = list.maxarg + 1 end table.insert(list, content) list.maxarg = key end return true end

local function addAndConvertParameter(list, args, key, altkey1, altkey2, trkey, lang) if altkey1 and args[altkey1] then addAndConvertParameter(list, args, key, nil, nil, nil, lang) key = altkey1 elseif altkey2 and args[altkey2] then addAndConvertParameter(list, args, key, nil, nil, nil, lang) key = altkey2 end local content = args[key] if trkey and args[trkey] then if not content then content = args[trkey] args[trkey] = nil else if args[trkey] ~= "-" then error("Can't specify manual translit " .. trkey .. "=" ..					args[trkey] .. " along with parameter " .. key .. "=" .. content) end end end if not content then return false end local trcontent = nil -- If Sanskrit or Sauraseni Prakrit and there's an acute accent specified somehow or other -- in the source content, preserve the translit, which includes the -- accent when the Devanagari doesn't.	if lang == "sa" or lang == "psu" then local proposed_trcontent = interpretShortcuts(content, lang .. "-tr") if find(proposed_trcontent, acute) then trcontent = proposed_trcontent end end -- If Gothic and there's a macron specified somehow or other -- in the source content that remains after canonicalization, preserve -- the translit, which includes the accent when the Gothic doesn't.	if lang == "got" then local proposed_trcontent = interpretShortcuts(content, "got-tr") if find(proposed_trcontent, macron) then trcontent = proposed_trcontent end end --	if lang == "gmy" then		local proposed_trcontent = interpretShortcuts(content, "gmy-tr")		if find(proposed_trcontent, macron) then			trcontent = proposed_trcontent		end	end	-- if lang == "hit" or lang == "akk" then trcontent = interpretShortcuts(content, lang .. "-tr") content = hyphenSeparatedReplacements(content, lang) elseif lang == "sux" or lang == "gmy" then content = hyphenSeparatedReplacements(content, lang) else content = interpretShortcuts(content, lang, args.sc, nil, args.module) end addParameter(list, args, key, content) if trcontent then addParameter(list, args, trkey, trcontent) end return true end

local isCompound = listToSet{ "affix", "af", "suffix", "suf", "prefix", "pre", } -- Technically lang, ux, and uxi aren't link templates, but they have many of the same parameters. local isLinkTemplate = listToSet{ "m", "m+", "langname-mention", "l", "ll", "cog", "noncog", "cognate", "ncog", "m-self", "l-self", "desc", "lang", "usex", "ux", "uxi" } local isTwoLangLinkTemplate = listToSet{ "der", "inh", "bor", "calque", "cal", "translit" } local isTransTemplate = listToSet{ "t", "t+", "t-check", "t+check" }

local function printTemplate(args) local parameters = {} for key, value in pairs(args) do		parameters[key] = value end local template = parameters[1] local result = { } local lang = nil result.maxarg = 0 addParameter(result, parameters, 1) lang = parameters[2] addParameter(result, parameters, 2) if isLinkTemplate[template] then addAndConvertParameter(result, parameters, 3, "alt", 4, "tr", lang) for _, param in ipairs({ 5, "gloss", "t" }) do			addParameter(result, parameters, param) end elseif isTwoLangLinkTemplate[template] then lang = parameters[3] addParameter(result, parameters, 3) addAndConvertParameter(result, parameters, 4, "alt", 5, "tr", lang) for _, param in ipairs({ 6, "gloss", "t" }) do			addParameter(result, parameters, param) end elseif isTransTemplate[template] then addAndConvertParameter(result, parameters, 3, "alt", nil, "tr", lang) local i = 4 while true do			if not parameters[i] then break end addParameter(result, parameters, i)		end elseif isCompound[template] then local i = 1 while true do local sawparam = addAndConvertParameter(result, parameters, i + 2, "alt" .. i, nil, "tr" .. i, lang) if not sawparam then break end for _, param in ipairs({ "id", "lang", "sc", "t", "pos", "lit" }) do addParameter(result, parameters, param .. i)			end i = i + 1 end else error("Unrecognized template name '" .. template .. "'") end -- Copy any remaining parameters for k in pairs(parameters) do		addParameter(result, parameters, k)	end return "" end

function export.link(frame) local args = frame.args or frame return printTemplate(args) end

function export.replace(frame) local args = clone_args(frame) local text, lang if args[4] or args[3] or args.tr then return printTemplate(args) else if args[2] then lang, text = args[1], args[2] else lang, text = "all", args[1] end end if lang == "akk" or lang == "gmy" or lang == "hit" or lang == "sux" then return hyphenSeparatedReplacements(text, lang) else text = interpretShortcuts(text, lang, args.sc, args.noreorder, args.module) end return text or "" end

function export.example(frame) local args = clone_args(frame) local text, lang if args[2] then lang, text = args[1], args[2] else lang, text = "all", args[1] end local textparam if find(text, "=") then textparam = "2="..text -- Currently, "=" is only used in the shortcuts for Greek, and Greek is always found in the second parameter, since the first parameter specify the language, "grc". else textparam = text end local template = { [1] = "subst:chars", [2] = lang ~= "all" and lang or textparam, [3] = lang ~= "all" and textparam or nil, }	local output = { formatLink(template) } table.insert(output, "\n| ") table.insert(output, lang ~= "all" and "" or "") table.insert(output, export.replace({lang, text})) table.insert(output, lang ~= "all" and " " or "") return table.concat(output) end

function export.examples(frame) local args = frame.getParent and frame:getParent.args or frame.args[1] and frame.args or frame local examples = args[1] and mw.text.split(args[1], ";%s+") or error('No content in the first parameter.') local lang = args["lang"] local output = { class="wikitable" ! shortcut !! result }	local row = for _, example in pairs(examples) do		local textparam if find(example, "=") then textparam = "2=" .. example -- Currently, "=" is only used in the shortcuts for Greek, and Greek is always found in the second parameter, since the first parameter specify the language, "grc". else textparam = example end local template = { [1] = "subst:chars", [2] = lang or textparam, [3] = lang and textparam, }		local result = export.replace{lang, example} local content = { templateCode = formatLink(template), result = tag(result, lang), }		local function addContent(item) if content[item] then return content[item] else return 'No content for "' .. item .. '".' end end local row = gsub(row, "%a+", addContent) table.insert(output, row) end return table.concat(output) .. "|}" end
 * templateCode || result
 * templateCode || result

return export