Module:Languages

local export = {}

--[=[ Throw an error for an invalid language code or script code.

`lang_code` (required) is the bad code and can be nil or a non-string.

`param` (required) is the name of the parameter in which the code was contained. It can be a string, a number (for a numeric param, in which case the param will show up in the error message as an ordinal such as	"first" or "second"), or `true` if no parameter can be clearly identified.

`code_desc` (optional) is text describing what the code is; by default, "language code".

`template_text` (optional) is a string specifying the template that generated the error, or a function to generate this string. If given, it will be displayed in the error message.

`not_real_lang` (optional), if given, indicates that the code is not in the form of a language code (e.g. it's a script code). Normally, this function checks for things that could plausibly be a language code: two or three lowercase letters, two or three groups of three lowercase letters with hyphens between them. If such a pattern is found, a different error message is displayed (indicating an invalid code) than otherwise (indicating a missing code). If `not_real_lang` is given, this check is suppressed. ]=]

function export.err(lang_code, param, code_desc, template_tag, not_real_lang) local ordinals = { "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth", "eleventh", "twelfth", "thirteenth", "fourteenth", "fifteenth", "sixteenth", "seventeenth", "eighteenth", "nineteenth", "twentieth" }	code_desc = code_desc or "language code" if not template_tag then template_tag = "" else if type(template_tag) ~= "string" then template_tag = template_tag end template_tag = " (Original template: " .. template_tag .. ")" end local function err(msg) error(msg .. template_tag, 3) end local param_type = type(param) local in_the_param if param == true then -- handled specially below in_the_param = "" else if param_type == "number" then param = ordinals[param] .. " parameter" elseif param_type == "string" then param = 'parameter "' .. param .. '"' else err("The parameter name is "					.. (param_type == "table" and "a table" or tostring(param))					.. ", but it should be a number or a string.") end in_the_param = " in the " .. param end if not lang_code or lang_code == "" then if param == true then err("The " .. code_desc .. " is missing.") else err("The " .. param .. " (" .. code_desc .. ") is missing.") end elseif type(lang_code) ~= "string" then err("The " .. code_desc .. in_the_param .. " is supposed to be a string but is a " .. type(lang_code) .. ".") -- Can use string.find because language codes only contain ASCII. elseif not_real_lang or lang_code:find("^%l%l%l?$") or lang_code:find("^%l%l%l%-%l%l%l$") or lang_code:find("^%l%l%l%-%l%l%l%-%l%l%l$") then err("The " .. code_desc .. " \"" .. lang_code .. "\"" .. in_the_param .. " is not valid. See List of languages.") else err("Please specify a " .. code_desc .. in_the_param .. ". The value \"" .. lang_code .. "\" is not valid. See List of languages.") end end

local function do_entry_name_or_sort_key_replacements(text, replacements) if replacements.from then for i, from in ipairs(replacements.from) do			local to = replacements.to[i] or "" text = mw.ustring.gsub(text, from, to) end end if replacements.remove_diacritics then text = mw.ustring.toNFD(text) text = mw.ustring.gsub(text,			'[' .. replacements.remove_diacritics .. ']',			'') text = mw.ustring.toNFC(text) end return text end

local Language = {}

function Language:getCode return self._code end

function Language:getCanonicalName return self._rawData[1] or self._rawData.canonicalName end

function Language:getDisplayForm return self:getCanonicalName end

function Language:getOtherNames(onlyOtherNames) self:loadInExtraData return require("Module:language-like").getOtherNames(self, onlyOtherNames) end

function Language:getAliases self:loadInExtraData return self._extraData.aliases or {} end

function Language:getVarieties(flatten) self:loadInExtraData return require("Module:language-like").getVarieties(self, flatten) end

function Language:getType return self._rawData.type or "regular" end

function Language:getWikimediaLanguages if not self._wikimediaLanguageObjects then local m_wikimedia_languages = require("Module:wikimedia languages") self._wikimediaLanguageObjects = {} local wikimedia_codes = self._rawData.wikimedia_codes or { self._code } for _, wlangcode in ipairs(wikimedia_codes) do			table.insert(self._wikimediaLanguageObjects, m_wikimedia_languages.getByCode(wlangcode)) end end return self._wikimediaLanguageObjects end

function Language:getWikipediaArticle if self._rawData.wikipedia_article then return self._rawData.wikipedia_article elseif self._wikipedia_article then return self._wikipedia_article elseif self:getWikidataItem and mw.wikibase then self._wikipedia_article = mw.wikibase.sitelink(self:getWikidataItem, 'enwiki') end if not self._wikipedia_article then self._wikipedia_article = mw.ustring.gsub(self:getCategoryName, "Creole language", "Creole") end return self._wikipedia_article end

function Language:makeWikipediaLink return "" .. self:getCanonicalName .. "" end

function Language:getWikidataItem local item = self._rawData[2] if type(item) == "number" then return "Q" .. item else return item end end

function Language:getScripts if not self._scriptObjects then local m_scripts = require("Module:scripts") self._scriptObjects = {} for _, sc in ipairs(self:getScriptCodes) do			table.insert(self._scriptObjects, m_scripts.getByCode(sc)) end end return self._scriptObjects end

function Language:getScriptCodes return self._rawData.scripts or self._rawData[4] or { "None" } end

function Language:getFamily if self._familyObject then return self._familyObject end local family = self._rawData[3] or self._rawData.family if family then self._familyObject = require("Module:families").getByCode(family) end return self._familyObject end

function Language:getAncestors if not self._ancestorObjects then self._ancestorObjects = {} if self._rawData.ancestors then for _, ancestor in ipairs(self._rawData.ancestors) do				table.insert(self._ancestorObjects, export.getByCode(ancestor) or require("Module:etymology languages").getByCode(ancestor)) end else local fam = self:getFamily local protoLang = fam and fam:getProtoLanguage or nil -- For the case where the current language is the proto-language -- of its family, we need to step up a level higher right from the start. if protoLang and protoLang:getCode == self:getCode then fam = fam:getFamily protoLang = fam and fam:getProtoLanguage or nil end while not protoLang and not (not fam or fam:getCode == "qfa-not") do				fam = fam:getFamily protoLang = fam and fam:getProtoLanguage or nil end table.insert(self._ancestorObjects, protoLang) end end return self._ancestorObjects end

local function iterateOverAncestorTree(node, func) for _, ancestor in ipairs(node:getAncestors) do		if ancestor then local ret = func(ancestor) or iterateOverAncestorTree(ancestor, func) if ret then return ret end end end end

function Language:getAncestorChain if not self._ancestorChain then self._ancestorChain = {} local step = #self:getAncestors == 1 and self:getAncestors[1] or nil while step do			table.insert(self._ancestorChain, 1, step) step = #step:getAncestors == 1 and step:getAncestors[1] or nil end end return self._ancestorChain end

function Language:hasAncestor(otherlang) local function compare(ancestor) return ancestor:getCode == otherlang:getCode end return iterateOverAncestorTree(self, compare) or false end

function Language:getCategoryName(nocap) local name = self:getCanonicalName -- If the name already has "language" in it, don't add it. if not name:find("[Ll]anguage$") then name = name .. " language" end if not nocap then name = mw.getContentLanguage:ucfirst(name) end return name end

function Language:makeCategoryLink return "" .. self:getDisplayForm .. "" end

function Language:getStandardCharacters return self._rawData.standardChars end

function Language:makeEntryName(text) text = mw.ustring.match(text, "^[¿¡]?(.-[^%s%p].-)%s*[؟?!;՛՜ ՞ ՟？！︖︕।॥။၊་།]?$") or text if self:getCode == "ar" then local U = mw.ustring.char local taTwiil = U(0x640) local waSla = U(0x671) -- diacritics ordinarily removed by entry_name replacements local Arabic_diacritics = U(0x64B, 0x64C, 0x64D, 0x64E, 0x64F, 0x650, 0x651, 0x652, 0x670) if text == waSla or mw.ustring.find(text, "^" .. taTwiil .. "?[" .. Arabic_diacritics .. "]" .. "$") then return text end end if type(self._rawData.entry_name) == "table" then text = do_entry_name_or_sort_key_replacements(text, self._rawData.entry_name) end return text end

-- Return true if the language has display processing enabled, i.e. lang:makeDisplayText -- does non-trivial processing. function Language:hasDisplayProcessing return not not self._rawData.display end

-- Apply display-text replacements to `text`, if any. function Language:makeDisplayText(text) if type(self._rawData.display) == "table" then text = do_entry_name_or_sort_key_replacements(text, self._rawData.display) end return text end

-- Add to data tables? local has_dotted_undotted_i = { ["az"] = true, ["crh"] = true, ["gag"] = true, ["kaa"] = true, ["tt"] = true, ["tr"] = true, ["zza"] = true, }

function Language:makeSortKey(name, sc) if has_dotted_undotted_i[self:getCode] then name = name:gsub("I", "ı") end name = mw.ustring.lower(name) -- Remove initial hyphens and * local hyphens_regex = "^[-־ـ*]+(.)" name = mw.ustring.gsub(name, hyphens_regex, "%1") -- If there are language-specific rules to generate the key, use those if type(self._rawData.sort_key) == "table" then name = do_entry_name_or_sort_key_replacements(name, self._rawData.sort_key) elseif type(self._rawData.sort_key) == "string" then name = require("Module:" .. self._rawData.sort_key).makeSortKey(name, self:getCode, sc and sc:getCode) end -- Remove parentheses, as long as they are either preceded or followed by something name = mw.ustring.gsub(name, "(.)[]+", "%1") name = mw.ustring.gsub(name, "[]+(.)", "%1") if has_dotted_undotted_i[self:getCode] then name = name:gsub("i", "İ") end return mw.ustring.upper(name) end

function Language:overrideManualTranslit if self._rawData.override_translit then return true else return false end end

function Language:transliterate(text, sc, module_override) if not ((module_override or self._rawData.translit_module) and text) then return nil end if module_override then require("Module:debug").track("module_override") end return require("Module:" .. (module_override or self._rawData.translit_module)).tr(text, self:getCode, sc and sc:getCode or nil) end

function Language:hasTranslit return self._rawData.translit_module and true or false end

function Language:link_tr return self._rawData.link_tr and true or false end

function Language:toJSON local entryNamePatterns = nil local entryNameRemoveDiacritics = nil if self._rawData.entry_name then entryNameRemoveDiacritics = self._rawData.entry_name.remove_diacritics if self._rawData.entry_name.from then entryNamePatterns = {} for i, from in ipairs(self._rawData.entry_name.from) do				local to = self._rawData.entry_name.to[i] or "" table.insert(entryNamePatterns, { from = from, to = to }) end end end local ret = { ancestors = self._rawData.ancestors, canonicalName = self:getCanonicalName, categoryName = self:getCategoryName("nocap"), code = self._code, entryNamePatterns = entryNamePatterns, entryNameRemoveDiacritics = entryNameRemoveDiacritics, family = self._rawData[3] or self._rawData.family, otherNames = self:getOtherNames(true), aliases = self:getAliases, varieties = self:getVarieties, scripts = self._rawData.scripts or self._rawData[4], type = self:getType, wikimediaLanguages = self._rawData.wikimedia_codes, wikidataItem = self:getWikidataItem, }	return require("Module:JSON").toJSON(ret) end

-- Do NOT use these methods! -- All uses should be pre-approved on the talk page! function Language:getRawData return self._rawData end

function Language:getRawExtraData self:loadInExtraData return self._extraData end

Language.__index = Language

function export.getDataModuleName(code) if code:find("^%l%l$") then return "languages/data2" elseif code:find("^%l%l%l$") then local prefix = code:sub(1, 1) return "languages/data3/" .. prefix elseif code:find("^[%l-]+$") then return "languages/datax" else return nil end end

function export.getExtraDataModuleName(code) if code:find("^%l%l$") then return "languages/extradata2" elseif code:find("^%l%l%l$") then local prefix = code:sub(1, 1) return "languages/extradata3/" .. prefix elseif code:find("^[%l-]+$") then return "languages/extradatax" else return nil end end

local function getRawLanguageData(code) local modulename = export.getDataModuleName(code) return modulename and mw.loadData("Module:" .. modulename)[code] or nil end

local function getRawExtraLanguageData(code) local modulename = export.getExtraDataModuleName(code) return modulename and mw.loadData("Module:" .. modulename)[code] or nil end

function Language:loadInExtraData if not self._extraData then -- load extra data from module and assign to meta table -- use empty table as a fallback if extra data is nil local meta = getmetatable(self) meta._extraData = getRawExtraLanguageData(self._code) or {} setmetatable(self, meta) end end

function export.makeObject(code, data) if data and data.deprecated then require("Module:debug").track { "languages/deprecated", "languages/deprecated/" .. code }	end return data and setmetatable({ _rawData = data, _code = code }, Language) or nil end

function export.getByCode(code, paramForError, allowEtymLang, allowFamily) if type(code) ~= "string" then error("The function getByCode expects a string as its first argument, but received " .. (code == nil and "nil" or "a " .. type(code)) .. ".") end local retval = export.makeObject(code, getRawLanguageData(code)) if not retval and allowEtymLang then retval = require("Module:etymology languages").getByCode(code) end if not retval and allowFamily then retval = require("Module:families").getByCode(code) end if not retval and paramForError then local codetext = nil if allowEtymLang and allowFamily then codetext = "language, etymology language or family code" elseif allowEtymLang then codetext = "language or etymology language code" elseif allowFamily then codetext = "language or family code" else codetext = "language code" end export.err(code, paramForError, codetext) end return retval end

function export.getByName(name, errorIfInvalid) local byName = mw.loadData("Module:languages/by name") local code = byName.all and byName.all[name] or byName[name] if not code then if errorIfInvalid then error("The language name \"" .. name .. "\" is not valid. See List of languages.") else return nil end end return export.makeObject(code, getRawLanguageData(code)) end

function export.getByCanonicalName(name, errorIfInvalid, allowEtymLang, allowFamily) local byName = mw.loadData("Module:languages/canonical names") local code = byName and byName[name]

local retval = code and export.makeObject(code, getRawLanguageData(code)) or nil if not retval and allowEtymLang then retval = require("Module:etymology languages").getByCanonicalName(name) end if not retval and allowFamily then local famname = name:match("^(.*) languages$") famname = famname or name retval = require("Module:families").getByCanonicalName(famname) end if not retval and errorIfInvalid then local text if allowEtymLang and allowFamily then text = "language, etymology language or family name" elseif allowEtymLang then text = "language or etymology language name" elseif allowFamily then text = "language or family name" else text = "language name" end error("The " .. text .. " \"" .. name .. "\" is not valid.") end return retval end

function export.iterateAll mw.incrementExpensiveFunctionCount local m_data = mw.loadData("Module:languages/alldata") local func, t, var = pairs(m_data) return function local code, data = func(t, var) return export.makeObject(code, data) end end

--	If language is an etymology language, iterates through parent languages		until it finds a non-etymology language. function export.getNonEtymological(lang) while lang:getType == "etymology language" do		local parentCode = lang:getParentCode lang = export.getByCode(parentCode) or require("Module:etymology languages").getByCode(parentCode) or require("Module:families").getByCode(parentCode) end return lang end

return export