Module:Nan-pron

local export = {}

local find = mw.ustring.find local gsub = mw.ustring.gsub local sub = mw.ustring.sub local match = mw.ustring.match local len = mw.ustring.len local split = mw.text.split

local psdb_initial = { ["p"] = "'p", ["ph"] = "ph", ["b"] = "'b", ["t"] = "'d", ["th"] = "'t", ["k"] = "'k", ["kh"] = "'q", ["g"] = "'g", ["chi"] = "c", ["ch"] = "z", ["chhi"] = "ch", ["chh"] = "zh", ["si"] = "s", ["s"] = "s", ["j"] = "j", ["l"] = "l", ["h"] = "'h", ["m"] = "m", ["n"] = "n", ["ng"] = "ng", [""] = "'" }

local function psdb_final(text) local basic_psdb = { --single vowel tone 12357 ["a1"] = "af", ["a2"] = "ar", ["a3"] = "ax", ["a5"] = "aa", ["a7"] = "a", ["i1"] = "y", ["i2"] = "ie", ["i3"] = "ix", ["i5"] = "ii", ["i7"] = "i", ["u1"] = "w", ["u2"] = "uo", ["u3"] = "ux", ["u5"] = "uu", ["u7"] = "u", ["e1"] = "ef", ["e2"] = "ea", ["e3"] = "ex", ["e5"] = "ee", ["e7"] = "e", ["oo1"] = "of", ["oo2"] = "or", ["oo3"] = "ox", ["oo5"] = "oo", ["oo7"] = "o", ["o1"] = "oy", ["o2"] = "oir", ["o3"] = "oix", ["o5"] = "ooi", ["o7"] = "oi", ["ng1"] = "'ngf", ["ng2"] = "'ngr", ["ng3"] = "'ngx", ["ng5"] = "'ngg", ["ng7"] = "'ng", ["m1"] = "'mf", ["m2"] = "'mr", ["m3"] = "'mx", ["m5"] = "'mm", ["m7"] = "'m", --double vowel tone 12357 ["ai1"] = "ay", ["ai2"] = "ae", ["ai3"] = "aix", ["ai5"] = "aai", ["ai7"] = "ai", ["au1"] = "aw", ["au2"] = "ao", ["au3"] = "aux", ["au5"] = "aau", ["au7"] = "au", ["ia1"] = "iaf", ["ia2"] = "iar", ["ia3"] = "iax", ["ia5"] = "iaa", ["ia7"] = "ia", ["iau1"] = "iaw", ["iau2"] = "iao", ["iau3"] = "iaux", ["iau5"] = "iaau", ["iau7"] = "iau", ["io1"] = "ioy", ["io2"] = "ioir", ["io3"] = "ioix", ["io5"] = "iooi", ["io7"] = "ioi", ["iu1"] = "iw", ["iu2"] = "iuo", ["iu3"] = "iux", ["iu5"] = "iuu", ["iu7"] = "iu", ["oa1"] = "oaf", ["oa2"] = "oar", ["oa3"] = "oax", ["oa5"] = "oaa", ["oa7"] = "oa", ["oai1"] = "oay", ["oai2"] = "oae", ["oai3"] = "oaix", ["oai5"] = "oaai", ["oai7"] = "oai", ["oe1"] = "oef", ["oe2"] = "oea", ["oe3"] = "oex", ["oe5"] = "oee", ["oe7"] = "oe", ["ui1"] = "uy", ["ui2"] = "uie", ["ui3"] = "uix", ["ui5"] = "uii", ["ui7"] = "ui", --nasal vowel tone 12357 --nasal ending tone 12357 ["ian1"] = "iefn", ["ian2"] = "iern", ["ian3"] = "iexn", ["ian5"] = "ieen", ["ian7"] = "ien", ["iong1"] = "iofng", ["iong2"] = "iorng", ["iong3"] = "ioxng", ["iong5"] = "ioong", ["iong7"] = "iong", --stopped single vowel tone 48 ["op4"] = "ob", ["op8"] = "op", ["ot4"] = "od", ["ot8"] = "ot", ["ok4"] = "og", ["ok8"] = "ok", --stopped double vowel tone 48 ["iop4"] = "iob", ["iop8"] = "iop", ["iot4"] = "iod", ["iot8"] = "iot", ["iok4"] = "iog", ["iok8"] = "iok", }	text = gsub(text, "[一二三四五六七八]", {["一"] = "1", ["二"] = "2", ["三"] = "3", ["四"] = "4", ["五"] = "5", ["六"] = "6", ["七"] = "7", ["八"] = "8"}) if find(text, "ⁿ[12357]$") then local basic = gsub(text, "ⁿ", "") basic = gsub(basic, "^o([12357])$", "oo%1") if basic_psdb[basic] then return "v" .. basic_psdb[basic] end elseif find(text, ".[mn]g?[12357]$") and not find(text, "^ian[12357]$") and not find(text, "^iong[12357]$") then local basic = gsub(text, "[mn]g?([12357])$", "%1") local ending = match(text, "([mn]g?)[12357]$") basic = gsub(basic, "^o([12357])$", "oo%1") if basic_psdb[basic] then return basic_psdb[basic] .. ending end elseif find(text, "[ptkh]ⁿ?4$") and not find(text, "^i?o[ptk]4$") then local basic = gsub(text, "[ptkh](ⁿ?)4$", "%1") .. "7"		local ending = match(text, "([ptkh])ⁿ?4$") ending = gsub(ending, "[ptkh]",{p = "b", t = "d", k = "g", h = "q"}) if find(basic, "ⁿ") then basic = gsub(basic, "ⁿ", "") basic = gsub(basic, "^o([12357])$", "oo%1") if basic_psdb[basic] then return "v" .. basic_psdb[basic] .. ending end else if basic_psdb[basic] then return basic_psdb[basic] .. ending end end elseif find(text, "[ptkh]ⁿ?8$") and not find(text, "^i?o[ptk]8$") then local basic = gsub(text, "[ptkh](ⁿ?)8$", "%1") .. "7"		local ending = match(text, "([ptkh])ⁿ?8$") if find(basic, "ⁿ") then basic = gsub(basic, "ⁿ", "") basic = gsub(basic, "^o([12357])$", "oo%1") if basic_psdb[basic] then return "v" .. basic_psdb[basic] .. ending end else if basic_psdb[basic] then return basic_psdb[basic] .. ending end end else return basic_psdb[text] end end

function export.poj_check_invalid(text) if not text then return nil end local correct = mw.ustring.toNFD(text) .. "-"	local accent = "[́̀̂̌̄̍̋]" local switch = "%1%3%2%4" local title = mw.title.getCurrentTitle.text correct = gsub(correct, "([oO])([ae])(" .. gsub(accent, "̍", "") .. ")([ⁿ%-/ ])", switch) correct = gsub(correct, "([oO])(" .. accent .. ")([ae])([imnptkh][gh]?ⁿ?)", switch) correct = gsub(correct, "([oO]a)(i)(" .. accent .. ")(h?ⁿ?)", switch) correct = gsub(correct, "([aA])([iu])(" .. accent .. ")(h?ⁿ?)", switch) correct = gsub(correct, "([iI])(" .. accent .. ")([aou])(u?[mnptkh]?g?ⁿ?)", switch) correct = gsub(correct, "([iI]a)(u)(" .. accent .. ")(h?ⁿ?)", switch) correct = gsub(correct, "([uU])(i)(" .. accent .. ")([hⁿ]?)", switch) correct = gsub(correct, "([eE])(e)(" .. accent .. ")(h?ⁿ?)", switch) correct = gsub(correct, "([eE])(re)(" .. gsub(accent, "̍", "") .. ")([%-/ ])", switch) -- correct = gsub(correct, "([oO]" .. accent .. ")[ou·]", "%1͘") if find(title, "[子仔]") and title ~= "明仔早" then correct = gsub(correct, "%-" .. mw.ustring.toNFD("á") .. "([%- /])", "-仔%1") end correct = mw.ustring.toNFC(gsub(correct, "-$", "")) if text ~= correct then error("Invalid POJ input \"" .. text .. "\": please change it to \"" .. correct .. "\"") end return text end

function export.poj_to_tl_conv(text) if type(text) == "table" then text = text.args[1] end local accent = "[́̀̂̌̄̍̋]?" local conv = { ["e"] = "i", ["E"] = "I", ["o"] = "u", ["O"] = "U" }	local function convert(a, b) return conv[a] .. b	end text = gsub(text, "#", "") text = mw.ustring.toNFD(text) text = gsub(text, "仔", "á") text = gsub(text, "%(([^%)]+)%)", "%1-%1-%1")	text = gsub(text, "([eE])(̍?k)", convert)	text = gsub(text, "([eE])(" .. accent .. "ng)", convert)	text = gsub(text, "([oO])(" .. accent .. "[ae])", convert)	text = gsub(text, "([uU])(" .. accent .. ")([aei])", "%1%3%2")	text = gsub(text, "([eE])(" .. accent .. ")(re)", "%1%3%2")	text = gsub(text, "([oO]" .. accent .. ")͘", "%1o")	text = gsub(text, "(h?)ⁿ", "nn%1")	text = gsub(text, "[cC]h", {["ch"] = "ts", ["Ch"] = "Ts"})	text = gsub(text,'/([^ ])',' / %1')	return mw.ustring.toNFC(text) end

function export.poj_check_syllable(initial, final, loc) local validInitials = { ["p"] = 1, ["ph"] = 1, ["m"] = 1, ["b"] = 1, ["t"] = 1, ["th"] = 1, ["n"] = 1, ["l"] = 1, ["ch"] = 1, ["chh"] = 1, ["s"] = 1, ["k"] = 1, ["kh"] = 1, ["ng"] = 1, ["g"] = 1, ["h"] = 1, [""] = 1, }	local moreValidInitials = { } moreValidInitials["Xiamen"] = { } moreValidInitials["Xiamen-d"] = { ["j"] = 1 } moreValidInitials["Tong'an"] = { } moreValidInitials["Quanzhou"] = { } moreValidInitials["Jinjiang"] = { } moreValidInitials["Zhangzhou"] = { ["j"] = 1 } moreValidInitials["Taipei"] = { } moreValidInitials["Kaohsiung"] = { ["j"] = 1 } moreValidInitials["Kinmen"] = { } moreValidInitials["Singapore"] = { ["j"] = 1 } moreValidInitials["Penang"] = { ["f"] = 1, ["d"] = 1, ["j"] = 1, ["sh"] = 1, ["r"] = 1, ["w"] = 1, ["y"] = 1 }	moreValidInitials["Philippines"] = { } local validFinals = { ["a"] = 1, ["ah"] = 1, ["ai"] = 1, ["aiⁿ"] = 1, ["ak"] = 1, ["am"] = 1, ["an"] = 1, ["aⁿ"] = 1, ["ang"] = 1, ["ap"] = 1, ["at"] = 1, ["au"] = 1, ["auh"] = 1, ["e"] = 1, ["eh"] = 1, ["eng"] = 1, ["i"] = 1, ["ia"] = 1, ["iah"] = 1, ["iak"] = 1, ["iam"] = 1, ["ian"] = 1, ["iaⁿ"] = 1, ["iang"] = 1, ["iap"] = 1, ["iat"] = 1, ["iau"] = 1, ["iauⁿ"] = 1, ["ih"] = 1, ["im"] = 1, ["in"] = 1, ["iⁿ"] = 1, ["io"] = 1, ["ioh"] = 1, ["iok"] = 1, ["iong"] = 1, ["ip"] = 1, ["it"] = 1, ["iu"] = 1, ["m"] = 1, ["ng"] = 1, ["o"] = 1, ["o͘"] = 1, ["oa"] = 1, ["oah"] = 1, ["oai"] = 1, ["oan"] = 1, ["oaⁿ"] = 1, ["oat"] = 1, ["oe"] = 1, ["oeh"] = 1, ["oh"] = 1, ["ok"] = 1, ["oⁿ"] = 1, ["ong"] = 1, ["u"] = 1, ["uh"] = 1, ["ui"] = 1, ["uiⁿ"] = 1, ["un"] = 1, ["ut"] = 1, }	local moreValidFinals = { } moreValidFinals["Xiamen"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oehⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1, ["uihⁿ"] = 1, }	moreValidFinals["Xiamen-d"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oehⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1, ["uihⁿ"] = 1, }	moreValidFinals["Tong'an"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ek"] = 1, ["er"] = 1, ["erh"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ir"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["oehⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1, }	moreValidFinals["Quanzhou"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1, ["er"] = 1, ["erh"] = 1, ["erm"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ir"] = 1, ["irh"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1, ["uihⁿ"] = 1, }	moreValidFinals["Jinjiang"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaihⁿ"] = 1, ["oang"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1, ["uihⁿ"] = 1, }	moreValidFinals["Zhangzhou"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ee"] = 1, ["eeh"] = 1, ["ehⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ioⁿ"] = 1, ["iuh"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["om"] = 1, ["op"] = 1, }	moreValidFinals["Taipei"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1, ["ek"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["om"] = 1, ["op"] = 1, ["uih"] = 1, }	moreValidFinals["Kaohsiung"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ehⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1, ["oaiⁿ"] = 1, ["oaihⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["om"] = 1, ["op"] = 1, }	moreValidFinals["Kinmen"] = { ["ahⁿ"] = 1, ["aih"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ek"] = 1, ["er"] = 1, ["erh"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ir"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaih"] = 1, ["oaihⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["oehⁿ"] = 1, ["o͘h"] = 1, ["ohⁿ"] = 1, ["uih"] = 1, }	moreValidFinals["Singapore"] = { ["ahⁿ"] = 1, ["auhⁿ"] = 1, ["auⁿ"] = 1, ["ee"] = 1, ["ehⁿ"] = 1, ["ek"] = 1, ["eⁿ"] = 1, ["er"] = 1, ["erh"] = 1, ["ern"] = 1, ["iahⁿ"] = 1, ["iauh"] = 1, ["iauhⁿ"] = 1, ["ihⁿ"] = 1, ["ioⁿ"] = 1, ["ir"] = 1, ["iuh"] = 1, ["iuⁿ"] = 1, ["mh"] = 1, ["ngh"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["oehⁿ"] = 1, ["oeⁿ"] = 1, ["ohⁿ"] = 1, ["ou"] = 1, ["ouⁿ"] = 1, ["uih"] = 1, }	moreValidFinals["Penang"] = { ["aih"] = 1, ["ee"] = 1, ["eeh"] = 1, ["eek"] = 1, ["eeng"] = 1, ["ei"] = 1, ["ek"] = 1, ["em"] = 1, ["en"] = 1, ["eⁿ"] = 1, ["eoi"] = 1, ["er"] = 1, ["erh"] = 1, ["ern"] = 1, ["ert"] = 1, ["et"] = 1, ["ik"] = 1, ["ing"] = 1, ["ioⁿ"] = 1, ["oaiⁿ"] = 1, ["oang"] = 1, ["o͘h"] = 1, ["oi"] = 1, ["oiⁿ"] = 1, ["om"] = 1, ["on"] = 1, ["ot"] = 1, ["ou"] = 1, ["uk"] = 1, ["um"] = 1, ["ung"] = 1, ["y"] = 1, ["yn"] = 1, }	moreValidFinals["Philippines"] = moreValidFinals["Jinjiang"] local loc_code = { ["Xiamen"] = "x", ["Xiamen-d"] = "a", ["Tong'an"] = "d", ["Quanzhou"] = "q", ["Jinjiang"] = "c", ["Zhangzhou"] = "z", ["Taipei"] = "t", ["Kaohsiung"] = "k", ["Kinmen"] = "j", ["Singapore"] = "s", ["Penang"] = "p", ["Philippines"] = "f", }	if not ((validInitials[initial] or moreValidInitials[loc][initial]) and (validFinals[final] or moreValidFinals[loc][final])) then --error("The syllable " .. initial .. "+" .. final .. " does not appear to be a valid " .. loc .. " POJ syllable.") return "" end return nil end

function export.generate_all(text) local nan_pronunc, loc if type(text) == "table" then text, nan_pronunc, loc = text.args[1], text.args[2], text.args["loc"] end local location_list = { ["ax"] = "Anxi", ["ct"] = "Changtai", ["hc"] = "Hsinchu", ["jj"] = "Jinjiang", ["kh"] = "Kaohsiung", ["km"] = "Kinmen", ["lk"] = "Lukang", ["md"] = "Medan", ["mg"] = "Magong", ["ml"] = "Mainland", ["ph"] = "Philippines", ["pn"] = "Penang", ["qz"] = "Quanzhou", ["qzd"] = "Quanzhou-d", ["sg"] = "Singapore", ["sx"] = "Sanxia", ["ta"] = "Tong'an", ["tc"] = "Taichung", ["tn"] = "Tainan", ["tp"] = "Taipei", ["tt"] = "Taitung", ["wh"] = "Wanhua", ["wq"] = "Wuqi", ["xm"] = "Xiamen", ["xmd"] = "Xiamen-d", ["yl"] = "Yilan", ["zp"] = "Zhangpu", ["zz"] = "Zhangzhou", ["tw"] = "Taiwan", ["twt"] = "Taiwan-t", ["twk"] = "Taiwan-k", ["twv"] = "Taiwan-v", ["twvt"] = "Taiwan-vt", ["twvk"] = "Taiwan-vk", ["twd"] = "Taiwan-d", ["twdt"] = "Taiwan-dt", ["twdk"] = "Taiwan-dk", ["twr"] = "Taiwan-r", ["twrt"] = "Taiwan-rt", ["twrk"] = "Taiwan-rk", ["twq"] = "Taiwan-Q", ["twz"] = "Taiwan-Z", }	local location_alias = { ["xz"] = "hc", ["sj"] = "hc", ["st"] = "hc", ["kx"] = "kh", ["gm"] = "km", ["jm"] = "km", ["qm"] = "km", ["lg"] = "lk", ["mk"] = "mg", ["cj"] = "qj", ["ss"] = "sx", ["sk"] = "sx", ["tz"] = "tc", ["tj"] = "tc", ["tl"] = "tn", ["em"] = "xm", ["am"] = "xm", ["hm"] = "xm", ["il"] = "yl", ["lc"] = "zz", ["cc"] = "zz (Zhangzhou) or qz (Quanzhou)", ["cz"] = "zz (Zhangzhou) or qz (Quanzhou)" }

local location_link = { ["Anxi"] = "Anxi", ["Changtai"] = "Changtai", ["Hsinchu"] = "Hsinchu", ["Jinjiang"] = "Jinjiang", ["Kaohsiung"] = "Kaohsiung", ["Kinmen"] = "Kinmen", ["Lukang"] = "Lukang", ["Magong"] = "Magong", ["Medan"] = "Medan", ["Mainland"] = "Xiamen, Quanzhou, Zhangzhou", ["Penang"] = "Penang", ["Philippines"] = "Philippines", ["Quanzhou"] = "Quanzhou", ["Quanzhou-d"] = "dated in Quanzhou", ["Sanxia"] = "Sanxia", ["Singapore"] = "Singapore", ["Taichung"] = "Taichung", ["Tainan"] = "Tainan", ["Taipei"] = "Taipei", ["Taitung"] = "Taitung", ["Tong'an"] = "Tong'an", ["Wanhua"] = "Wanhua", ["Wuqi"] = "Wuqi", ["Xiamen"] = "Xiamen", ["Xiamen-d"] = "dated in Xiamen", ["Yilan"] = "Yilan", ["Zhangpu"] = "Zhangpu", ["Zhangzhou"] = "Zhangzhou", ["Taiwan"] = "General Taiwanese", ["Taiwan-t"] = "General Taiwanese", ["Taiwan-k"] = "General Taiwanese", ["Taiwan-v"] = "variant in Taiwan", ["Taiwan-vt"] = "variant in Taiwan", ["Taiwan-vk"] = "variant in Taiwan", ["Taiwan-d"] = "dated in Taiwan", ["Taiwan-dt"] = "dated in Taiwan", ["Taiwan-dk"] = "dated in Taiwan", ["Taiwan-r"] = "rare in Taiwan", ["Taiwan-rt"] = "rare in Taiwan", ["Taiwan-rk"] = "rare in Taiwan", ["Taiwan-Q"] = "Quanzhou-like accent in Taiwan", ["Taiwan-Z"] = "Zhangzhou-like accent in Taiwan", }	local IPA_available = { ["Xiamen"] = true, ["Tong'an"] = true, ["Quanzhou"] = true, ["Jinjiang"] = true, ["Zhangzhou"] = true, ["Taipei"] = true, ["Kaohsiung"] = true, ["Kinmen"] = true, ["Singapore"] = true, ["Penang"] = true, ["Philippines"] = true, }	local ast = (not nan_pronunc or nan_pronunc == "") and "*" or "" local formatting = { LV_two = { leading = "\n" .. ast .. "* (\'\'Hokkien\'\'",			trailing = ") ", },		POJ = { leading = "\n" .. ast .. "** Pe̍h-ōe-jī : ", trailing = " ", },		TL = { leading = "\n" .. ast .. "** Tâi-lô : ", trailing = " ", },		PSDB = { leading = "\n" .. ast .. "** Phofsit Daibuun : ", trailing = " ", },		IPA = { leading = "\n" .. ast .. "** IPA (",			trailing = ") : ", }	}	local IPA_available_list = { "Xiamen", "Quanzhou", "Zhangzhou", "Taiwan" } export.poj_check_invalid(text) local all_readings, locations, output_text = {}, {}, {} for i, reading in ipairs(split(text, "/")) do		if find(reading, ":") then local reading_part = split(reading, ":") locations[i] = {} all_readings[i] = reading_part[2] for location_abbrev in mw.text.gsplit(reading_part[1], ",") do				if location_alias[location_abbrev] then error("Invalid Min Nan location code: " .. location_abbrev .. ", maybe you meant: " .. location_alias[location_abbrev]) end if not location_list[location_abbrev] then error("The region label '" .. location_abbrev .. "' cannot be found. Please see Template:zh-pron.") end table.insert(locations[i], location_list[location_abbrev]) end else locations[i] = IPA_available_list all_readings[i] = reading end end if not find(text, ":") then table.insert(output_text, formatting.LV_two.leading .. formatting.LV_two.trailing ..			formatting.POJ.leading .. export.poj_display(text) .. formatting.POJ.trailing ..			formatting.TL.leading .. export.poj_to_tl_conv(text) .. formatting.TL.trailing) if not find(text, "%-%-") then local psdb_hash = export.poj_to_psdb_conv(text) if not find(psdb_hash, "error") then table.insert(output_text, formatting.PSDB.leading .. psdb_hash .. formatting.PSDB.trailing) end for _, IPA_location in ipairs(IPA_available_list) do				IPA_location = IPA_location == "Taiwan" and { "Taipei", "Kaohsiung" } or { IPA_location } for _, location in ipairs(IPA_location) do table.insert(output_text, formatting.IPA.leading .. location_link[location] .. formatting.IPA.trailing) local reading_IPA_hash = {} for poj_reading in mw.text.gsplit(text, "/") do						table.insert(reading_IPA_hash, export.generate_IPA(poj_reading, location)) end table.insert(output_text, table.concat(reading_IPA_hash, ", ")) if #reading_IPA_hash > 1 then table.insert(output_text, "") end end end end else for i, poj_reading in ipairs(all_readings) do			table.insert(output_text, formatting.LV_two.leading) local location_hash = {} for _, location_name in ipairs(locations[i]) do				table.insert(location_hash, location_link[location_name]) end table.insert(output_text, ": " .. table.concat(location_hash, ", ") .. formatting.LV_two.trailing) table.insert(output_text, formatting.POJ.leading .. export.poj_display(poj_reading) .. formatting.POJ.trailing ..				formatting.TL.leading .. export.poj_to_tl_conv(poj_reading) .. formatting.TL.trailing) if not find(poj_reading, "%-%-") then local psdb_hash = export.poj_to_psdb_conv(poj_reading) if not find(psdb_hash, "error") then table.insert(output_text, formatting.PSDB.leading .. psdb_hash .. formatting.PSDB.trailing) end local IPA_readings = {} for j, location_name in ipairs(locations[i]) do					location_name = gsub(location_name, '^Taiwan%-[vdr]?([tk]?)$', { ['t'] = 'Taipei', ['k'] = 'Kaohsiung', [''] = 'Taiwan' }) loc = { ['Taiwan'] = { 'Taipei', 'Kaohsiung' }, ['Xiamen-d'] = { 'Xiamen-d' }, ['Mainland'] = { 'Xiamen', 'Quanzhou', 'Zhangzhou' } }					location_name = loc[location_name] or { gsub(location_name, '%-d$', '') } for k, location in ipairs(location_name) do						local loc = gsub(location, '%-d$', '') if IPA_available[loc] then local poj_to_ipa = export.generate_IPA(poj_reading, location) if IPA_readings[poj_to_ipa] then table.insert(IPA_readings[poj_to_ipa][2], location_link[loc]) else IPA_readings[poj_to_ipa] = { j + (k/10), { location_link[loc] } } end end end end for reading, reading_info in pairs(IPA_readings) do table.insert(output_text, formatting.IPA.leading .. table.concat(reading_info[2], ", ") ..						formatting.IPA.trailing .. reading) end end end end return table.concat(output_text) end

function export.generate_IPA(text, location) -- (Wyang) I can't seem to find an example where 'triple' is used.. The code is below: --if match(p[i], "%(") then	--	p[i] = gsub(p[i], "[%(%)]", "")	--	triple[i] = true	--end	--if triple[i] then	--	if tone[i] == "一" then	--		ipa[i] = (initial[i] .. final[i] .. "一至七 " .. initial[i] .. final[i] .. "一至七 " .. initial[i] .. final[i] .. (i == #tone and "一" or "一至七"))	--	elseif tone[i] == "二" then	--		ipa[i] = (initial[i] .. final[i] .. "二至一 " .. initial[i] .. final[i] .. "二至一 " .. initial[i] .. final[i] .. (i == #tone and "二" or "二至一"))	--	elseif tone[i] == "三" then	--		ipa[i] = (initial[i] .. final[i] .. "三至二 " .. initial[i] .. final[i] .. "三至二 " .. initial[i] .. final[i] .. (i == #tone and "三" or "三至二"))	--	elseif tone[i] == "四A" then	--		ipa[i] = (initial[i] .. final[i] .. "四至八 " .. initial[i] .. final[i] .. "四至八 " .. initial[i] .. final[i] .. (i == #tone and "四" or "四至八"))	--	elseif tone[i] == "四B" then	--		final[i] = gsub(final[i], "ʔ", "(ʔ)")	--		ipa[i] = (initial[i] .. final[i] .. "四至二 " .. initial[i] .. final[i] .. "四至二 " .. initial[i] .. final[i] .. (i == #tone and "四" or "四至二"))	--	elseif tone[i] == "五" then	--		if loc == "Quanzhou" or loc == "Taipei" then	--			ipa[i] = (initial[i] .. final[i] .. "五 " .. initial[i] .. final[i] .. "五至三 " .. initial[i] .. final[i] .. (i == #tone and "五" or "五至三"))	--		else	--			ipa[i] = (initial[i] .. final[i] .. "五 " .. initial[i] .. final[i] .. "五至七 " .. initial[i] .. final[i] .. (i == #tone and "五" or "五至七"))	--		end	--	elseif tone[i] == "七" then	--		ipa[i] = (initial[i] .. final[i] .. "七至一 " .. initial[i] .. final[i] .. "七至三 " .. initial[i] .. final[i] .. (i == #tone and "七" or "七至三"))	--	elseif tone[i] == "八A" then	--		ipa[i] = (initial[i] .. final[i] .. "八至四 " .. initial[i] .. final[i] .. "八至四 " .. initial[i] .. final[i] .. (i == #tone and "八" or "八至四"))	--	elseif tone[i] == "八B" then	--		final[i] = gsub(final[i], "ʔ", "(ʔ)")	--		ipa[i] = (initial[i] .. final[i] .. "八至五 " .. initial[i] .. final[i] .. "八至三 " .. initial[i] .. final[i] .. (i == #tone and "八" or "八至三"))	--	end	--end

if type(text) == "table" then text, location = text.args[1], text.args["loc"] end local tone_from_mark = { [""] = "1", 		["́"] = "2",		["̀"] = "3",		["p"] = "4A", ["t"] = "4A", ["k"] = "4A", ["h"] = "4B", ["̂"] = "5",		["̌"] = "6",		["̄"] = "7",		["̍p"] = "8A", ["̍t"] = "8A", ["̍k"] = "8A", ["̍h"] = "8B", ["̋"] = "9",	}	local initial_ipa = { ["p"] = "p", ["ph"] = "pʰ", ["m"] = "m", ["b"] = "b", ["f"] = "f", ["t"] = "t", ["th"] = "tʰ", ["n"] = "n", ["l"] = "l", ["d"] = "d", ["ch"] = "t͡s", ["chh"] = "t͡sʰ", ["j"] = "d͡z", ["s"] = "s", ["sh"] = "ʃ", ["k"] = "k", ["kh"] = "kʰ", ["ng"] = "ŋ", ["g"] = "ɡ", ["h"] = "h", ["r"] = "ɹ", ["w"] = "w", ["y"] = "j", [""] = "", ["Kaohsiung-j"] = "z", }	local palatal = { ["s"] = "ɕ", ["z"] = "ʑ" } local final_ipa = { ["a"] = "a", ["ah"] = "aʔ", ["ahⁿ"] = "ãʔ", ["ai"] = "aɪ", ["aih"] = "aiʔ", ["aiⁿ"] = "ãɪ", ["ak"] = "ak̚", ["am"] = "am", ["an"] = "an", ["aⁿ"] = "ã", ["ang"] = "aŋ", ["ap"] = "ap̚", ["at"] = "at̚", ["au"] = "aʊ", ["auh"] = "aʊʔ", ["auhⁿ"] = "ãʊʔ", ["auⁿ"] = "ãʊ", ["e"] = "e", ["ee"] = "ɛ", ["eeh"] = "ɛʔ", ["eek"] = "ɛk̚", ["eeng"] = "ɛŋ", ["eh"] = "eʔ", ["ehⁿ"] = "ẽʔ", ["ei"] = "ei", ["ek"] = "iɪk̚", ["em"] = "ɛm", ["en"] = "ɛn", ["eⁿ"] = "ẽ", ["eng"] = "iɪŋ", ["eoi"] = "ɵy", ["er"] = "ə", ["erh"] = "əʔ", ["erm"] = "əm", ["ern"] = "ən", ["ert"] = "ət", ["et"] = "ɛt", ["i"] = "i", ["ia"] = "ia", ["iah"] = "iaʔ", ["iahⁿ"] = "iãʔ", ["iak"] = "iak̚", ["iam"] = "iam", ["ian"] = "iɛn", ["iaⁿ"] = "iã", ["iang"] = "iaŋ", ["iap"] = "iap̚", ["iat"] = "iɛt̚", ["iau"] = "iaʊ", ["iauh"] = "iaʊʔ", ["iauhⁿ"] = "iãʊʔ", ["iauⁿ"] = "iãʊ", ["ih"] = "iʔ", ["ihⁿ"] = "ĩʔ", ["im"] = "im", ["in"] = "in", ["iⁿ"] = "ĩ", ["ing"] = "iŋ", ["io"] = "io", ["ioh"] = "ioʔ", ["io͘h"] = "iɔʔ", ["io͘"] = "iɔ", ["iok"] = "iɔk̚", ["ioⁿ"] = "iɔ̃", ["iong"] = "iɔŋ", ["ip"] = "ip̚", ["ir"] = "ɯ", ["irh"] = "ɯʔ", ["it"] = "it̚", ["iu"] = "iu", ["iuh"] = "iuʔ", ["iuⁿ"] = "iũ", ["m"] = "m̩", ["mh"] = "m̩ʔ", ["ng"] = "ŋ̍", ["ngh"] = "ŋ̍ʔ", ["o"] = "o", ["o͘"] = "ɔ", ["oa"] = "ua", ["oah"] = "uaʔ", ["oai"] = "uai", ["oaih"] = "uaiʔ", ["oaihⁿ"] = "uãiʔ", ["oaiⁿ"] = "uãi", ["oan"] = "uan", ["oaⁿ"] = "uã", ["oang"] = "uaŋ", ["oat"] = "uat̚", ["oe"] = "ue", ["oeh"] = "ueʔ", ["oehⁿ"] = "uẽʔ", ["oeⁿ"] = "uẽ", ["oh"] = "oʔ", ["o͘h"] = "ɔʔ", ["ohⁿ"] = "ɔ̃ʔ", ["oi"] = "ɔi", ["oiⁿ"] = "ɔ̃i", ["ok"] = "ɔk̚", ["om"] = "ɔm", ["oⁿ"] = "ɔ̃", ["ong"] = "ɔŋ", ["op"] = "ɔp̚", ["ot"] = "ɔt̚", ["ouⁿ"] = "ɔ̃u", ["u"] = "u", ["uh"] = "uʔ", ["ui"] = "ui", ["uih"] = "uiʔ", ["uihⁿ"] = "uĩʔ", ["uiⁿ"] = "uĩ", ["uk"] = "ok̚", ["um"] = "om", ["uⁿ"] = "ū", ["un"] = "un", ["ung"] = "oŋ", ["ut"] = "ut̚", ["y"] = "y", ["yn"] = "yn", ["Zhangzhou-eⁿ"] = "ɛ̃", ["Penang-eⁿ"] = "ɛ̃", ["Zhangzhou-ehⁿ"] = "ɛ̃ʔ", ["Kaohsiung-o"] = "ɤ", ["Kaohsiung-io"] = "iɤ", ["Kaohsiung-oh"] = "ɤʔ", ["Kaohsiung-ioh"] = "iɤʔ", ["Singapore-eng"] = "eŋ", ["Singapore-ek"] = "ek̚", ["Penang-eng"] = "eŋ", ["Penang-ek"] = "ek̚", ["Penang-ik"] = "ik̚", ["Singapore-ou"] = "ɔu", ["Penang-ou"] = "ou", }	local tone_sandhi = { } -- (Wyang) I'm not sure about the 'Xd' ones, when tone X is followed by the diminutive 仔. tone_sandhi["Xiamen"] = { ["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2", ["5"] = "7", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3", }	tone_sandhi["Tong'an"] = { -- 2 and 4 are special cases ["1"] = "7", ["3"] = "10",		["5"] = "9", ["7"] = "9", ["8A"] = "11", ["8B"] = "11", }	tone_sandhi["Quanzhou"] = { ["1"] = "1", ["2"] = "5", ["3"] = "2", ["4A"] = "8A", ["4B"] = "4B", ["5"] = "6", ["6"] = "6", ["7"] = "6", ["8A"] = "6", ["8B"] = "6", }	tone_sandhi["Jinjiang"] = { ["1"] = "1", ["2"] = "5", ["3"] = "2", ["4A"] = "8A", ["4B"] = "4B", ["5"] = "S", ["6"] = "S", ["7"] = "S", ["8A"] = "S", ["8B"] = "S", }	tone_sandhi["Zhangzhou"] = { ["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "S", ["4B"] = "2", ["5"] = "7", ["7"] = "3", ["8A"] = "3", ["8B"] = "3", ["4Bd"] = "1", ["8Bd"] = "7", }	tone_sandhi["Taipei"] = { ["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2", ["5"] = "3", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3", ["9"] = "9", ["3d"] = "1", ["4Bd"] = "1", ["5d"] = "7", ["7d"] = "7", ["8Bd"] = "7", }	tone_sandhi["Kaohsiung"] = { ["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "8A", ["4B"] = "2", ["5"] = "7", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3", ["9"] = "9", ["3d"] = "1", ["4Bd"] = "1", ["5d"] = "7", ["7d"] = "7", ["8Bd"] = "7", }	tone_sandhi["Kinmen"] = { -- 3 and 4B are special cases ["1"] = "7", ["2"] = "5", ["4A"] = "8A", ["5"] = "3", ["7"] = "3", ["8A"] = "4A", ["8B"] = "3" }	tone_sandhi["Singapore"] = { --Xiamen/Zhangzhou-like ["1"] = "7", ["2"] = "1", ["3"] = "2", ["4A"] = "8As", ["4B"] = "2", ["5"] = "3", ["7"] = "3", ["8A"] = "3", ["8B"] = "3" }	tone_sandhi["Penang"] = { ["1"] = "7", ["2"] = "1", ["3"] = "1", ["4A"] = "8A", ["4B"] = "8B", ["5"] = "7", ["6"] = "6", ["7"] = "3", ["8A"] = "4A", ["8B"] = "4B", ["9"] = "9" }	tone_sandhi["Philippines"] = tone_sandhi["Jinjiang"]

local tone_value = { } tone_value["Xiamen"] = { ["1"] = "44", ["2"] = "53", ["3"] = "21", ["4A"] = "32", ["4B"] = "32", ["5"] = "24", ["7"] = "22", ["8A"] = "4", ["8B"] = "4", }	tone_value["Tong'an"] = { ["1"] = "44", ["2"] = "31", ["3"] = "112", ["4A"] = "32", ["4B"] = "32", ["5"] = "24", ["7"] = "22", ["8A"] = "53", ["8B"] = "53", ["9"] = "11", ["10"] = "42", ["11"] = "1", ["12"] = "4" --sandhi-only tones }	tone_value["Quanzhou"] = { ["1"] = "33", ["2"] = "554", ["3"] = "41", ["4A"] = "5", ["4B"] = "5", ["5"] = "24", ["6"] = "22", ["7"] = "41", ["8A"] = "24", ["8B"] = "24", }	tone_value["Jinjiang"] = { ["1"] = "33", ["2"] = "554", ["3"] = "41", ["4A"] = "5", ["4B"] = "5", ["5"] = "24", ["6"] = "33", ["7"] = "41", ["8A"] = "24", ["8B"] = "24", ["S"] = "22", --sandhi-only }	tone_value["Zhangzhou"] = { ["1"] = "44", ["2"] = "53", ["3"] = "21", ["4A"] = "32", ["4B"] = "32", ["5"] = "13", ["7"] = "22", ["8A"] = "121", ["8B"] = "121", ["S"] = "5", --sandhi-only }	tone_value["Taipei"] = { ["1"] = "44", ["2"] = "53", ["3"] = "11", ["4A"] = "32", ["4B"] = "32", ["5"] = "24", ["7"] = "33", ["8A"] = "4", ["8B"] = "4", ["9"] = "35" }	tone_value["Kaohsiung"] = { ["1"] = "44", ["2"] = "41", ["3"] = "21", ["4A"] = "32", ["4B"] = "32", ["5"] = "23", ["7"] = "33", ["8A"] = "4", ["8B"] = "4", ["9"] = "35" }	tone_value["Kinmen"] = { ["1"] = "44", ["2"] = "53", ["3"] = "12", ["4A"] = "32", ["4B"] = "32", ["5"] = "24", ["7"] = "22", ["8A"] = "54", ["8B"] = "54" }	tone_value["Singapore"] = { --Xiamen/Zhangzhou-like ["1"] = "44", ["2"] = "42", ["3"] = "21", ["4A"] = "32", ["4B"] = "32", ["5"] = "24", ["7"] = "22", ["8A"] = "43", ["8B"] = "43", ["8As"] = "4" }	tone_value["Penang"] = { ["1"] = "33", ["2"] = "445", ["3"] = "21", ["4A"] = "3", ["4B"] = "3", ["5"] = "23", ["6"] = "55", ["7"] = "21", ["8A"] = "4", ["8B"] = "4", ["9"] = "5" }	tone_value["Philippines"] = tone_value["Jinjiang"] local function get_sandhi_from_post(location, current, post) if post then if location == "Tong'an" then if current == "2" then if find(post, "^[15]$") or find(post, "^8[AB]$") then return "7" else return "5" end elseif find(current, "^4[AB]$") then if post == "2" then return "10" else return "12" end end elseif location == "Kinmen" then if current == "3" or current == "4B" then if find(post, "^[12]$") or find(post, "^4[AB]$") then return "1" else return "2" end end end end end local function get_tone(text) local tone = gsub(text, "^[^́̀̂̌̄̍̋ptkh]+([́̀̂̌̄̍̋]?)[^́̀̂̌̄̍̋ptkh]*([ptkh]?)ⁿ?", function(tone_symbol, coda)			return tone_from_mark[tone_symbol .. coda] end) return tone end local function nasalize(final) if find(final, "^mh?$") or find(final, "^ngh?$") then return final end if find(final, "o͘h?$") then final = gsub(final, "͘", "") elseif find(final, "oh?$") then error("Invalid POJ: nasal initial cannot go with -" .. final) elseif find(final, "eeh?$") then final = gsub(final, "ee", "e") end return final .. "ⁿ" end local formatting = { leading = "/", trailing = "/ " }	local tone_superscript = { [1] = "¹", [2] = "²", [3] = "³", [4] = "⁴", [5] = "⁵", ["-"] = "⁻" } local word_result = {} local attention = {} text = gsub(text, " ", "-") text = gsub(text, ",", "#") text = gsub(text, "%-?%.%.%.%-?", "#") text = gsub(text, "#$", "") text = gsub(text, "#%-?", " ") text = mw.ustring.toNFD(mw.ustring.lower(text)) for word in mw.text.gsplit(text, " ") do		local initial, final, tone, diminutive, sandhi, result = {}, {}, {}, {}, {}, {} local syllables = split(word, "-") syllables.length = #syllables for index, syllable in ipairs(syllables) do			if syllable == "仔" then syllable = "a".."́" diminutive[index] = true end local original_syllable = syllable syllable = gsub(syllable, "[́̀̂̌̄̍̋]", "") if not find(syllable, "[aeiouy]") then final[index] = match(syllable, "^[ckmnpst]?h?h?(ngh?)$") or match(syllable, "^h?(mh?)$") initial[index] = syllable ~= final[index] and sub(syllable, 1, len(syllable) - len(final[index])) or "" --original code: "ʔ" else initial[index] = match(syllable, "^[bcdfgjklmnprstwy]?[gh]?h?") final[index] = sub(syllable, len(initial[index]) + 1, -1) end tone[index] = get_tone(sub(original_syllable, len(initial[index]) + 1, -1)) table.insert(attention, export.poj_check_syllable(initial[index], final[index], location)) location = gsub(location, '%-d$', '') local nasal_initial = match(initial[index], "^[mn]g?$") if nasal_initial then if find(final[index], "ⁿ") then error("Too much nasality in POJ. " .. original_syllable .. " should be " .. gsub(original_syllable, "ⁿ", "")) end if location ~= "Penang" and location ~= "Philippines" and location ~= "Singapore" then --exception for Penang, Philippines and Singapore final[index] = nasalize(final[index]) end end local nasal_final = match(final[index], "^[mn]") or match(final[index], "ⁿ") local not_nasal_initial = match(initial[index], "^[blg]$") if ((nasal_initial and not nasal_final) or (not_nasal_initial and nasal_final)) and (location ~= "Penang" and location ~= "Philippines" and location ~= "Singapore") then --exception for Penang, Philippines and Singapore error("POJ error: nasality of initial and final not synchronized.") end initial[index] = initial_ipa[location .. "-" .. initial[index]] or initial_ipa[initial[index]] final[index] = final_ipa[location .. "-" .. final[index]] or final_ipa[final[index]] or error("Cannot recognise " .. final[index] .. ".") if find(initial[index], "[sz]ʰ?") and find(final[index], "^[iĩy]") then initial[index] = gsub(initial[index], "[sz]", palatal) end if index < syllables.length then final[index] = gsub(final[index], "ʔ", "(ʔ)") end end for index = 1, syllables.length do			sandhi[index] = tone_value[location][tone[index]] local sandhi_hash = get_sandhi_from_post(location, tone[index], tone[index+1]) or tone_sandhi[location][tone[index]..(diminutive[index+1] and "d" or "")] or tone_sandhi[location][tone[index]] if index < syllables.length and tone_value[location][sandhi_hash] ~= tone_value[location][tone[index]] then sandhi[index] = sandhi[index] .. "-" .. tone_value[location][sandhi_hash] end table.insert(result, initial[index] .. final[index] .. sandhi[index]) end table.insert(word_result, table.concat(result, " ")) end return (gsub(formatting.leading .. table.concat(word_result, " ") .. formatting.trailing, "[12345%-]", tone_superscript)) .. table.concat(attention) end

function export.poj_to_psdb_conv(text) if type(text) == "table" then text = text.args[1] end local readings = split(mw.ustring.lower(text), "/", true) for i = 1, #readings do		-- will ignore # boundary marker local parts = split(gsub(readings[i], "#", ""), " ", true) for j = 1, #parts do			local initial = {} local final = {} local psdb = {} local tone = {} local tonesandhi = {} local neutral = {} parts[j] = gsub(parts[j], "%-%-", "-0") local p = split(parts[j], "-",true) local ar = {} local triple = {} for i, item in ipairs(p) do				if find(item, "仔") then item = gsub(item, "仔", "á") ar[i] = true end if find(item, "%(") then					item = gsub(item, "[%(%)]", "")					triple[i] = true				end				if find(item, "^0") then					item = gsub(item, "0", "")					neutral[i] = true				end				item = gsub(item, "ớ", "óo")				item = gsub(item, "ờ", "òo")				item = gsub(item, "ơ̂", "ôo")				item = gsub(item, "ơ̄", "ōo")				item = gsub(item, "ơ̍", "o̍o")				item = gsub(item, "ơ", "oo")				item = gsub(item, "͘", "o")				item = gsub(item, "[̍̂̄̀]",{["̍"] = "捌", ["̂"] = "伍", ["̄"] = "柒", ["̀"] = "叁"})				if find(item, "[aeiou][捌]?[ptkh]") or find(item, "[^aeiou][mn][捌]?g?[ptkh]") then					if find(item, "捌") then						tone[i] = "八"					else						tone[i] = "四"					end				elseif find(item, "[áíúéóḿń貳]") then					tone[i] = "二"				elseif find(item, "[àìùèòǹ叁]") then					tone[i] = "三"				elseif find(item, "[âîûêô伍]") then					tone[i] = "五"				elseif find(item, "[āīūēō柒]") then					tone[i] = "七"				else					tone[i] = "一" end item = gsub(item, "[áíúéóḿńàìùèòǹâîûêôāīūēō貳叁伍柒捌]",{["á"] = "a", ["í"] = "i", ["ú"] = "u", ["é"] = "e", ["ó"] = "o", ["ḿ"] = "m", ["ń"] = "n", ["貳"] = "", ["à"] = "a", ["ì"] = "i", ["ù"] = "u", ["è"] = "e", ["ò"] = "o", ["ǹ"] = "n", ["叁"] = "", ["â"] = "a", ["î"] = "i", ["û"] = "u", ["ê"] = "e", ["ô"] = "o", ["伍"] = "", ["ā"] = "a", ["ī"] = "i", ["ū"] = "u", ["ē"] = "e", ["ō"] = "o", ["柒"] = "", ["捌"] = ""}) if sub(item,1,3) == "chh" then initial[i] = "chh" final[i] = sub(item,4,-1) elseif sub(item,1,1) == "m" then if sub(item,2,2) == "h" then initial[i] = "" final[i] = "mh" elseif sub(item,2,2) == "" then initial[i] = "" final[i] = "m" else initial[i] = "m" final[i] = sub(item,2,-1) end elseif sub(item,1,2) == "ng" then if sub(item,3,3) == "h" then initial[i] = "" final[i] = "ngh" elseif sub(item,3,3) == "" then initial[i] = "" final[i] = "ng" else initial[i] = "ng" final[i] = sub(item,3,-1) end elseif find(item, "^[ptkc]h") then initial[i] = sub(item,1,2) final[i] = sub(item,3,-1) elseif find(item, "^[npbtkgjshl]") then initial[i] = sub(item,1,1) final[i] = sub(item,2,-1) else initial[i] = "" final[i] = item end if find(initial[i], "^chh?$") or initial[i] == "s" then if find(final[i], "^i") then initial[i] = initial[i] .. "i" end end p[i] = item end for i = 1, #p do				if tone[i] == "一" then tonesandhi[i] = "七" elseif tone[i] == "二" then tonesandhi[i] = "一" elseif tone[i] == "三" then tonesandhi[i] = ar[i+1] and "一" or "二" elseif tone[i] == "四" then tonesandhi[i] = "八" elseif tone[i] == "五" then tonesandhi[i] = "七" elseif tone[i] == "七" then tonesandhi[i] = ar[i+1] and "七" or "三" elseif tone[i] == "八" then tonesandhi[i] = "四" end if triple[i] then local tonesandhi1 = nil if tone[i] == "五" then tonesandhi1 = "五" elseif tone[i] == "七" then tonesandhi1 = "一" end psdb[i] = (psdb_initial[initial[i]] or "error") ..(psdb_final(final[i]..(tonesandhi1 or tonesandhi[i])) or "error") ..psdb_initial[initial[i]] ..psdb_final(final[i]..tonesandhi[i]) ..psdb_initial[initial[i]] ..psdb_final(final[i]..(i == #tone and tone[i] or tonesandhi[i])) else psdb[i] = (psdb_initial[initial[i]] or "error") ..(psdb_final(final[i]..(i == #tone and tone[i] or tonesandhi[i])) or "error") end if neutral[i] then psdb[i] = "~" .. (psdb_initial[initial[i]] or "error") ..(psdb_final(final[i].."七") or "error") end --psdb[i] = p[i] end parts[j] = table.concat(psdb, "") end readings[i] = table.concat(parts, " ") readings[i] = gsub(readings[i], "'+", "'") readings[i] = gsub(readings[i], "^'", "") readings[i] = gsub(readings[i], "([^a-z])'", "%1") readings[i] = gsub(readings[i], "([^pbdtkqgczsjlmnhaeiou])'([aeiouwy])", "%1%2") readings[i] = gsub(readings[i], "([^aeiouwy])'([ptkbdqmn])", "%1%2") readings[i] = gsub(readings[i], "([^aeiouwyn])'g", "%1g") readings[i] = gsub(readings[i], "([^aeiouwypcz])'h", "%1h") end return (gsub(table.concat(readings, ", "),'/([^ ])',' / %1')) end

function export.poj_display(text) if type(text) == "table" then text = text.args[1] end text = gsub(text, "仔", "á") local readings = split(text, "/") for i = 1, #readings do		readings[i] = gsub(readings[i], "[%a,]+:", "") readings[i] = gsub(readings[i], "#", "") readings[i] = gsub(readings[i], "%(([^%)]+)%)", "%1-%1-%1")		if not find(readings[i], " ") then			readings[i] = "" .. readings[i] .. ""		end	end	text = table.concat(readings, " / ")	return text end

function export.pengim_check_syllable(initial, final, loc) local validInitials = { ["b"] = 1, ["p"] = 1, ["m"] = 1, ["bh"] = 1, ["d"] = 1, ["t"] = 1, ["n"] = 1, ["l"] = 1, ["g"] = 1, ["k"] = 1, ["ng"] = 1, ["gh"] = 1, ["h"] = 1, ["z"] = 1, ["c"] = 1, ["s"] = 1, ["r"] = 1, [""] = 1, }	local validFinals = { ["a"] = 1, ["i"] = 1, ["u"] = 1, ["ê"] = 1, ["o"] = 1, ["ah"] = 1, ["ih"] = 1, ["uh"] = 1, ["êh"] = 1, ["oh"] = 1, ["oi"] = 1, ["ai"] = 1, ["ou"] = 1, ["au"] = 1, ["ia"] = 1, ["iu"] = 1, ["uê"] = 1, ["ua"] = 1, ["ui"] = 1, ["uai"] = 1, ["oih"] = 1, ["auh"] = 1, ["uêh"] = 1, ["uah"] = 1, ["ang"] = 1, ["êng"] = 1, ["ong"] = 1, ["iang"] = 1, ["iong"] = 1, ["ung"] = 1, ["uang"] = 1, ["ag"] = 1, ["êg"] = 1, ["og"] = 1, ["iag"] = 1, ["iog"] = 1, ["ug"] = 1, ["uag"] = 1, ["an"] = 1, ["in"] = 1, ["ên"] = 1, ["oun"] = 1, ["ain"] = 1, ["aun"] = 1, ["iun"] = 1, ["ian"] = 1, ["uên"] = 1, ["uin"] = 1, ["uan"] = 1, ["uain"] = 1, ["ng"] = 1, ["m"] = 1, }	local moreValidFinals = { ["Chaozhou"] = { ["e"] = 1, ["iê"] = 1, ["iou"] = 1, ["iêh"] = 1, ["eng"] = 1, ["am"] = 1, ["ing"] = 1, ["iêng"] = 1, ["im"] = 1, ["iêm"] = 1, ["uêng"] = 1, ["uam"] = 1, ["eg"] = 1, ["ab"] = 1, ["ig"] = 1, ["iêg"] = 1, ["ib"] = 1, ["iêb"] = 1, ["uêg"] = 1, ["uab"] = 1, ["oin"] = 1, ["iên"] = 1, },		["Shantou"] = { ["e"] = 1, ["io"] = 1, ["iau"] = 1, ["ioh"] = 1, ["eng"] = 1, ["am"] = 1, ["ing"] = 1, ["im"] = 1, ["iam"] = 1, ["uam"] = 1, ["eg"] = 1, ["ab"] = 1, ["ig"] = 1, ["ib"] = 1, ["iab"] = 1, ["uab"] = 1, ["oin"] = 1, ["ion"] = 1, },		["Chenghai"] = { ["e"] = 1, ["iê"] = 1, ["iou"] = 1, ["iêh"] = 1, ["eng"] = 1, ["ing"] = 1, ["eg"] = 1, ["ig"] = 1, ["oin"] = 1, ["iên"] = 1, },		["Raoping"] = { ["e"] = 1, ["io"] = 1, ["iau"] = 1, ["ioh"] = 1, ["eng"] = 1, ["am"] = 1, ["ing"] = 1, ["im"] = 1, ["iam"] = 1, ["uam"] = 1, ["eg"] = 1, ["ab"] = 1, ["ig"] = 1, ["ib"] = 1, ["iab"] = 1, ["uab"] = 1, ["oin"] = 1, ["ion"] = 1, },		["Jieyang"] = { ["e"] = 1, ["io"] = 1, ["iau"] = 1, ["ioh"] = 1, ["am"] = 1, ["im"] = 1, ["iam"] = 1, ["uêng"] = 1, ["uam"] = 1, ["ab"] = 1, ["ib"] = 1, ["iab"] = 1, ["uêg"] = 1, ["uab"] = 1, ["ion"] = 1, },		["Chaoyang"] = { ["io"] = 1, ["iau"] = 1, ["ioh"] = 1, ["am"] = 1, ["ing"] = 1, ["im"] = 1, ["iam"] = 1, ["uêng"] = 1, ["uam"] = 1, ["ab"] = 1, ["ig"] = 1, ["ib"] = 1, ["iab"] = 1, ["uêg"] = 1, ["uab"] = 1, ["ion"] = 1, },	}	if not (validInitials[initial] and (validFinals[final] or moreValidFinals[loc][final])) then return "" end return nil end

local pengim_to_ipa_two_letters_above = { ["gh"] = "ɡ", ["bh"] = "β", ["ng"] = "ŋ", ["ao"] = "au", }

local pengim_to_ipa_one_letter = { --initials ["m"] = "m", ["n"] = "n", ["b"] = "p", ["d"] = "t", ["g"] = "k", ["p"] = "pʰ", ["t"] = "tʰ", ["k"] = "kʰ", ["s"] = "s", ["h"] = "h", ["r"] = "d͡z", ["z"] = "t͡s", ["c"] = "t͡sʰ", ["l"] = "l", --vowels ["a"] = "a", ["ê"] = "e", ["e"] = "ɯ", ["i"] = "i", ["o"] = "o", ["u"] = "u", --tones ["1"] = "³³⁻²³",	["2"] = "⁵²⁻³⁵",	["２"] = "⁵²⁻²¹",	["3"] = "²¹³⁻⁵⁵",	["4"] = "²⁻⁴",	["5"] = "⁵⁵⁻¹¹",	["6"] = "³⁵⁻¹¹",	["7"] = "¹¹",	["8"] = "⁴⁻²", }

local pengim_tone_value = {} pengim_tone_value["Chaozhou"] = { ["1"] = "33", ["2"] = "53", ["3"] = "213", ["4"] = "2",	["5"] = "55", ["6"] = "35", ["7"] = "11", ["8"] = "5" } pengim_tone_value["Shantou"] = pengim_tone_value["Chaozhou"] pengim_tone_value["Chenghai"] = pengim_tone_value["Chaozhou"] pengim_tone_value["Jieyang"] = pengim_tone_value["Chaozhou"] pengim_tone_value["Chaoyang"] = { ["1"] = "33", ["2"] = "53", ["3"] = "31", ["4"] = "2",	["5"] = "55", ["6"] = "313", ["7"] = "11", ["8"] = "5" }

local pengim_tone_sandhi = {} pengim_tone_sandhi["Chaozhou"] = { ["1"] = "23",	["2A"] = "23", ["2B"] = "35", ["3A"] = "31", ["3B"] = "53", ["4A"] = "3", ["4B"] = "5", ["5"] = "11", ["6"] = "21", ["7"] = "", ["8"] = "2" } pengim_tone_sandhi["Shantou"] = { ["1"] = "23",	["2A"] = "35", ["2B"] = "35", ["3A"] = "55", ["3B"] = "55", ["4A"] = "5", ["4B"] = "5", ["5"] = "11", ["6"] = "21", ["7"] = "", ["8"] = "2" } pengim_tone_sandhi["Chenghai"] = pengim_tone_value["Chaozhou"] pengim_tone_sandhi["Jieyang"] = pengim_tone_value["Chaozhou"] pengim_tone_sandhi["Chaoyang"] = { ["1"] = "23",	["2A"] = "31", ["2B"] = "31", ["3A"] = "55", ["3B"] = "55", ["4A"] = "5", ["4B"] = "5", ["5"] = "11", ["6"] = "33", ["7"] = "33", ["8"] = "2" }

local pengim_to_ipa_fix = { ["β"] = "b", ["p([²⁴⁻]+[ -/])"] = "p̚%1", ["k([²⁴⁻]+[ -/])"] = "k̚%1", ["h([²⁴⁻]+[ -/])"] = "ʔ%1", }

local pengim_to_ipa_nasal = { ["a"] = "ã", ["e"] = "ẽ", -- ê ["ɯ"] = "ɯ̃", -- e	["i"] = "ĩ", ["o"] = "õ", ["u"] = "ũ", ["n"] = "", }

function export.pengim_to_ipa_conv(text) local result for key, val in pairs(pengim_to_ipa_two_letters_above) do		text = gsub(text, key, val) end text = gsub(text, "([234]) ([^12345678 ]+)2$", "%1 %2２") local function verbose_function(char) return pengim_to_ipa_one_letter[char] or char end -- This should work, but it doesn't convert the tone number in "diên1": -- result = gsub(text, ".", pengim_to_ipa_one_letter) result = gsub(text, ".", verbose_function) result = result .. "/"	for key, val in pairs(pengim_to_ipa_fix) do		result = gsub(result, key, val) end result = gsub(result, "[aeiouɯ]+nʔ?[¹²³⁴⁵⁻]+[ -/]", function (a)		return gsub(a, ".", pengim_to_ipa_nasal)	end) result = gsub(result, "(⁻[¹²³⁴⁵]+)/", function(a) return (a ~= "⁻²¹" and "/" or a .. "/") end) result = gsub(result, "/$", "") result = gsub(result, "/", "/, /") return "/" .. result .. "/" end

function export.pengim_display(text) text = gsub(text, "([1-8])/", "%1 / ") text = gsub(text, "([1-8])", "%1") -- note: originally ([1-8-]+) but it seems like websites have the final tone within parentheses, if at all return text end

function export.pengim_to_pojlike_conv(text) -- kind of based on MTR (http://www.ispeakmin.com/bbs/viewthread.php?tid=2784) local words = split(text, '/') local tone_marks = { ['1'] = , 		['2'] = '́',		['3'] = '̀',		['4'] = ,		['5'] = '̂',		['6'] = '̆', -- this is a breve; MTR: breve; current hokkien dialect convention: hacek; missionary: tilde or breve?? ['7'] = '̄',		['8'] = '̍'	}	local function get_tone_mark(a, num) return tone_marks[num] .. a end local function convert_final(x,c,t) -- convert final -g and -b (but not -ng) if c=='b' then c='p'		elseif c=='g' then c='k' end return x..c..t	end local cons_correspondences = { ['bh']='b', ['gh']='g', ['b']='p', ['d']='t', ['g']='k', ['p']='ph', ['t']='th', ['k']='kh', ['z']='ts', ['c']='tsh', ['r']='j' } local function nasalization(n,h,t) return h..'ⁿ'..t end for i, word in ipairs(words) do		local syllables = split(word, ' ') for i, syllable in ipairs(syllables) do			syllable = gsub(syllable, '^[bdgptkzcr]h?', cons_correspondences) syllable = gsub(syllable, '([^n])([bg])(%d)', convert_final)

syllable = gsub(syllable, '[êe]', { ['ê']='e', ['e']='ṳ' } ) syllable = gsub(syllable, 'ao', 'au' )

syllable = gsub(syllable, '(n)(h?)(%d)', nasalization)

if find(syllable, 'uai') then syllable = gsub(syllable, 'uai', 'ua符i') elseif find(syllable, '[aiueoṳ][aiueoṳ]') then syllable = gsub(syllable, '([aouṳ])i', '%1符i') -- ?i syllable = gsub(syllable, 'i([aoeuṳ])', 'i%1符') -- i?				syllable = gsub(syllable, '([ao])([uṳ])', '%1符%2') -- ?u syllable = gsub(syllable, '([uṳ])([ae])', '%1%2符') -- u?			elseif find(syllable, '[^aiueoṳ][aiueoṳ]') or find(syllable, '^[aiueoṳ]') then syllable = gsub(syllable, '([aiueoṳ])', '%1符') elseif find(syllable, 'ngh?%d') then syllable = gsub(syllable, 'ng(h?)(%d)', 'n符g%1%2') elseif find(syllable, '[^aiueoṳ]h?%d') then syllable = gsub(syllable, '([^aiueoṳ])(h?)(%d)', '%1符%2%3') end syllable = gsub(syllable, '符(.*)(%d)', get_tone_mark)

syllables[i] = syllable end words[i] = table.concat(syllables, ' ') end

return mw.ustring.toNFC(table.concat(words, ' / ')) end

return export