Module:cpx-pron
Appearance
- This module lacks a documentation subpage. Please create it.
- Useful links: subpage list • links • transclusions • testcases • sandbox
local export = {}
local m_string_utils = require("Module:string utilities")
local sub = m_string_utils.sub
local find = m_string_utils.find
local gsub = m_string_utils.gsub
local match = m_string_utils.match
local toNFD = mw.ustring.toNFD
local toNFC = mw.ustring.toNFC
local function font_consolas(text)
return '<span style=\"font-family: Consolas, monospace;\">' .. text .. '</span>'
end
local function font_ipa(text)
return '<span class=\"IPA\">/' .. text .. '/</span>'
end
local dialects = {
pt = "[[w:Putian dialect|Putian]]",
xy = "[[w:Xianyou dialect|Xianyou]]",
}
------------------ BUC ------------------
local buc_initials = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["c"] = "ts", ["ch"] = "tsʰ", ["s"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = ""
}
local buc_finals = {
["i"] = "i", ["u"] = "u", ["ṳ"] = "y", ["a"] = "a", ["ia"] = "ia", ["ua"] = "ua",
["e"] = "ɛ", ["a̤"] = "e", ["e̤"] = "ø", ["o̤"] = "ɒ", ["io̤"] = "yɒ", ["eo"] = "o",
["ai"] = "ai", ["uai"] = "uai", ["oi"] = "oi", ["ui"] = "ui", ["au"] = "au",
["a̤u"] = "eu", ["o"] = "ou", ["iu"] = "iu",
["aⁿ"] = "ã", ["iaⁿ"] = "ĩã", ["uaⁿ"] = "ũã", ["a̤ⁿ"] = "ẽ", ["e̤ⁿ"] = "ø", ["o̤ⁿ"] = "ɒ",
["io̤ⁿ"] = "ỹɒ", ["oiⁿ"] = "õĩ", ["auⁿ"] = "ãũ", ["a̤uⁿ"] = "ẽũ",
["ang"] = "aŋ", ["iang"] = "iaŋ", ["uang"] = "uaŋ", ["eng"] = "ɛŋ", ["e̤ng"] = "œŋ",
["o̤ng"] = "ɒŋ", ["io̤ng"] = "iɒŋ", ["eong"] = "oŋ", ["ing"] = "iŋ", ["ṳng"] = "yŋ",
["ah"] = "aʔ", ["iah"] = "iaʔ", ["uah"] = "uaʔ", ["a̤h"] = "eʔ", ["o̤h"] = "ɒʔ",
["ih"] = "iʔ", ["uh"] = "uʔ", ["ṳh"] = "yʔ", ["io̤h"] = "yɒʔ", ["eoh"] = "oʔ",
["aih"] = "aiʔ", ["oih"] = "oiʔ", ["a̤uh"] = "euʔ", ["eh"] = "ɛʔ", ["e̤h"] = "œʔ",
["ng"] = "ŋ"
}
-- class A: longer and now lost in most dialects, class B: shorter and mostly preserverd
local buc_yangru_type = {
["ah"] = "AB", ["iah"] = "AB", ["uah"] = "AB", ["a̤h"] = "A", ["o̤h"] = "AB",
["ih"] = "AB", ["uh"] = "B", ["ṳh"] = "B", ["io̤h"] = "AB", ["eoh"] = "AB",
["aih"] = "A", ["oih"] = "A", ["a̤uh"] = "A", ["eh"] = "B", ["e̤h"] = "B",
}
local buc_tones = {
["1"] = "꜀", -- 陰平
["2"] = "꜁", -- 陽平
["3"] = "꜂", -- 陰上
["4"] = "꜄", -- 陰去
["5"] = "꜅", -- 陽去
["6"] = "꜆", -- 陰入
["7A"] = "꜇", -- 陽入甲
["7B"] = "꜇" -- 陽入乙
}
local buc_tone_marks = "́" .. "̂" .. "̍" .. "̄"
local function buc_seperate_final_and_tone(text)
local tone_num
local ends_with_h = match(text, "h[ᴬᴮ]?$") ~= nil
local last_char = sub(text, -1)
local tone_markers = {["́"] = "2", ["̂"] = "3", ["̍"] = "4", ["̄"] = "5"}
for marker, num in pairs(tone_markers) do
if find(text, marker) then
if marker == "̍" and match(last_char, "[hᴬᴮ]") then
tone_num = (last_char == "ᴬ" and "7A") or (last_char == "ᴮ" and "7B") or "7"
else
tone_num = num
end
break
end
end
if not tone_num then
tone_num = ends_with_h and "6" or "1"
end
local text_without_tone = gsub(text, '[' .. buc_tone_marks .. ']', '')
return text_without_tone, tone_num
end
local function split_buc_syllable(syllable)
mw.log(toNFD(syllable))
local initial, final_tone = match(toNFD(syllable), ("^([bpmdtnlzcsgk]?h?n?g?)([aeiouynghⁿ" .. buc_tone_marks .. "̤" .. "ᴬᴮ]+)$"))
final, tone = buc_seperate_final_and_tone(final_tone)
if not initial then
initial = ""
end
-- need better solution
if initial:len() == 2 and (initial ~= 'ng') then
final, initial = initial:sub(2, 2) .. final, initial:sub(1, 1)
end
if initial:sub(-1) == 'n' and final == 'g' then
initial, final = sub(initial, 1, -2), 'ng'
end
if sub(final, 1, 1) == 'h' then
initial, final = initial .. sub(final, 1, 1), sub(final, 2)
end
if not final or not tone then
error("Invalid syllable: " .. syllable)
end
return initial, toNFC(final), tone
end
local function validate_buc(word)
word = gsub(word:lower(), "-", " ")
local function validate_syllable(syllable)
local initial, final, tone = split_buc_syllable(syllable)
if not buc_initials[initial] then
error("Invalid BUC initial: " .. initial)
end
local final_without_class = gsub(final, "[ᴬᴮ]", "")
if not buc_finals[final_without_class] then
error("Invalid BUC final: " .. final)
end
-- Handle 陽入 finals that does not form a minimal pair
if tone == "7" then
local yangru_type = buc_yangru_type[final]
if not yangru_type then
error("Please specify the tone class of the syllable " .. syllable .. " by adding ᴬ or ᴮ.")
end
elseif not buc_tones[tone] then
error("Invalid BUC tone: " .. tone)
end
return true
end
for syllable in word:gmatch("%S+") do
validate_syllable(syllable)
end
return true
end
------------------ Pouseng Ping'ing ------------------
local initials = {
pt = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = ""
},
xy = {
["b"] = "p", ["p"] = "pʰ", ["m"] = "m",
["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l",
["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "ɬ",
["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["h"] = "h",
[""] = "",
["bh"] = "β",
},
}
local finals = {
pt = {
["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "o",
["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
["ou"] = "ɔu", ["ua"] = "ua", ["ue"] = "uei", ["ui"] = "ui", ["yo"] = "yɒ",
["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ", ["oeng"] = "œŋ", ["ong"] = "ɔŋ",
["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["ung"] = "uŋ", ["uang"] = "uaŋ", ["yng"] = "yŋ",
["yong"] = "yɒŋ", ["ng"] = "ŋ̍",
["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ", ["oeh"] = "œʔ", ["oh"] = "ɔʔ",
["ih"] = "iʔ", ["iah"] = "iaʔ", ["ieh"] = "iɛʔ", ["uh"] = "uʔ", ["uah"] = "uaʔ",
["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yoh"] = "yɒʔ"
},
xy = {
["a"] = "a", ["ae"] = "ɛ", ["e"] = "e", ["i"] = "i", ["o"] = "ɵ",
["oe"] = "ø", ["or"] = "ɒ", ["u"] = "u", ["y"] = "y",
["ai"] = "ai", ["ao"] = "au", ["ia"] = "ia", ["ieo"] = "ieu", ["iu"] = "iu",
["ou"] = "ɔu", ["ua"] = "ua", ["ue"] = "uei", ["ui"] = "ui", ["ya"] = "ya",
["ang"] = "aŋ", ["orng"] = "ɒŋ", ["eng"] = "ɛŋ",
["ing"] = "iŋ", ["ieng"] = "iɛŋ", ["yng"] = "yŋ",
["yeng"] = "yøŋ", ["uong"] = "uoŋ", ["ng"] = "ŋ̍",
["ah"] = "aʔ", ["orh"] = "ɒʔ", ["eh"] = "ɛʔ",
["ih"] = "iʔ", ["ieh"] = "iɛʔ", ["uh"] = "uʔ",
["uoh"] = "uoʔ", ["yh"] = "yʔ", ["yeh"] = "yøʔ",
["iah"] = "iaʔ", ["uah"] = "uaʔ", -- iah, uah only for 代詞促調
["aⁿ"] = "ã", ["iⁿ"] = "ĩ", ["yⁿ"] = "ỹ", ["orⁿ"] = "ɒ̃", ["aiⁿ"] = "ãĩ",
["aoⁿ"] = "ãũ", ["iaⁿ"] = "ĩã", ["iuⁿ"] = "ĩũ", ["uaⁿ"] = "ũã", ["uiⁿ"] = "ũĩ",
["yaⁿ"] = "ỹã"
},
}
-- 1 ~ 7 correspond to 陰平, 陽平, 陰上, 陰去, 陽去, 陰入, 陽入, S are "special tones"
-- S1, S4, S7: the tones sounds a bit like 1, 4, 7 after tone sandhi (according to 莆仙方言大詞典)
-- S3: 代詞促調, act like 陰上 in both Putian and Xianyou after tone sandhi
-- S5: 古陰入, labelled as 陽去 in dictionaries but has its own rule for tone sandhi
local tones = {
pt = {
["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "⁴⁵³", ["4"] = "⁴²",
["5"] = "²¹", ["6"] = "¹", ["7"] = "⁴",
["S1"] = "⁵⁵", ["S3"] = "³²", ["S4"] = "⁴²", ["S5"] = "²¹", ["S7"] = "⁴⁵"
},
xy = {
["1"] = "⁵³³", ["2"] = "¹³", ["3"] = "³³²", ["4"] = "⁴²",
["5"] = "²¹", ["6"] = "²", ["7"] = "²⁴",
["S1"] = "⁵⁵", ["S3"] = "³²", ["S5"] = "²¹"
},
}
local sandhi_rules = {
pt = {
["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
["3"] = {["1"]="5", ["2"]="2", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
["4"] = {["1"]="S1", ["2"]="4", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"},
["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
["6"] = {["1"]="S7", ["2"]="S7", ["3"]="S7", ["4"]="S7", ["5"]="S4", ["6"]="S4", ["7"]="S7"},
["7"] = {["1"]="6", ["2"]="6", ["3"]="6", ["4"]="7", ["5"]="S4", ["6"]="S4", ["7"]="6"},
["S3"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
["S5"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"}
},
xy = {
["1"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
["2"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
["3"] = {["1"]="5", ["2"]="S1", ["3"]="5", ["4"]="5", ["5"]="2", ["6"]="2", ["7"]="5"},
["4"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"},
["5"] = {["1"]="5", ["2"]="5", ["3"]="5", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="5"},
["6"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
["7"] = {["1"]="6", ["2"]="6", ["3"]="6", ["4"]="7", ["5"]="4", ["6"]="4", ["7"]="6"},
["S3"] = {["1"]="7", ["2"]="7", ["3"]="7", ["4"]="7", ["5"]="7", ["6"]="7", ["7"]="7"},
["S5"] = {["1"]="S1", ["2"]="S1", ["3"]="S1", ["4"]="S1", ["5"]="4", ["6"]="4", ["7"]="S1"}
}
}
local initial_assimilation_rules = {
pt = {
nasal_final = {
["b"] = "m", ["p"] = "m", ["m"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
},
glottal_final = {}, -- remain unchanged
other_final = {
["b"] = "", ["p"] = "",
["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
}
},
xy = {
nasal_final = {
["b"] = "m", ["p"] = "m", ["m"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "ng", ["k"] = "ng", ["h"] = "ng", ["ng"] = "ng", [""] = "ng"
},
nasalized_final = {
["b"] = "m", ["m"] = "m", ["p"] = "m",
["d"] = "n", ["t"] = "n", ["n"] = "n", ["l"] = "n", ["z"] = "n", ["c"] = "n", ["s"] = "n",
["g"] = "", ["k"] = "", ["h"] = "",
["ng"] = "ng",
[""] = ""
},
glottal_final = {}, -- remain unchanged
other_final = {
["b"] = "bh", ["p"] = "bh",
["m"] = "m", ["n"] = "n", ["l"] = "l", ["ng"] = "ng",
["d"] = "l", ["t"] = "l", ["z"] = "l", ["c"] = "l", ["s"] = "l",
["g"] = "", ["k"] = "", ["h"] = "", [""] = ""
}
}
}
local function get_final_type(initial, final, dialect)
mw.log("gettype: " .. final)
if sub(final, -2) == "ng" then
return "nasal_final"
elseif sub(final, -1) == "h" then
return "glottal_final"
elseif sub(final, -1) == "ⁿ" or (match(initial, "[mn]g?") and get_final_type("", final) == "other_final" and dialect == "xy") then -- harcoding xy
return "nasalized_final"
elseif sub(final, -1) == "n" then
error('Please replace the syllable-final "n" with "ⁿ"')
else
return "other_final"
end
end
local function split_syllable(syllable)
local no_assimilation = syllable:sub(1, 1) == "*"
if no_assimilation then
syllable = syllable:sub(2)
end
local no_sandhi = syllable:sub(-1) == "#"
if no_sandhi then
syllable = syllable:sub(1, -2)
end
local original_form, changed_form, tone_part
if syllable:find(">") then
original_form, changed_form, tone_part = syllable:match("(.-)>(.-)([1-7S]+.*)$")
else
original_form, tone_part = syllable:match("(.-)([1-7S]+.*)$")
changed_form = original_form
end
local function split_initial_final(form)
local initial, final
if form:sub(1, 2) == "bh" then
initial, final = "bh", form:sub(3)
elseif form == "ng" then
initial, final = "", form
elseif form:sub(1, 2) == "ng" and #form > 2 then
initial, final = "ng", form:sub(3)
else
initial = form:match("^[bpmnltdzcsghk]h?") or ""
final = form:sub(#initial + 1)
end
return initial, final
end
local orig_initial, orig_final = split_initial_final(original_form)
local changed_initial, changed_final = split_initial_final(changed_form)
local tone, sandhi_tone
if tone_part:find("-") then
tone, sandhi_tone = tone_part:match("^([1-7S]+)%-([1-7S]+)$")
else
tone = tone_part
end
if tone == '3' and get_final_type(changed_initial, changed_final) == 'glottal_final' then
tone = 'S3'
end
if not orig_initial or not orig_final or not tone then
error("Invalid syllable: " .. syllable)
end
-- temporary
local initials_to_be_checked = { ['yeh'] = true, ['uoh'] = true, ['yeng'] = true, ['uong'] = true }
if initials_to_be_checked[orig_final] then
require("Module:debug/track")("cpx-pron/Xianyou merged finals")
end
return orig_initial, orig_final, changed_initial, changed_final, tone, sandhi_tone, no_sandhi, no_assimilation
end
local function create_syllable_info(syllable)
local orig_initial, orig_final, changed_initial, changed_final, orig_tone, manual_sandhi_tone, no_sandhi, no_assimilation = split_syllable(syllable)
return {
original_initial = orig_initial,
original_final = orig_final,
original_tone = orig_tone,
changed_initial = changed_initial,
changed_final = changed_final,
changed_tone = orig_tone, -- default: original tone
no_sandhi = no_sandhi,
no_assimilation = no_assimilation,
is_first_syllable = false, -- default: not first syllable
manual_sandhi_tone = manual_sandhi_tone
}
end
local function apply_initial_assimilation(dialect, syllable_infos)
local result = {}
-- handle first syllable
result[1] = syllable_infos[1]
result[1].is_first_syllable = true
for i = 2, #syllable_infos do
local prev_syllable = result[i-1]
local curr_syllable = syllable_infos[i]
local final_type = get_final_type(prev_syllable.changed_initial, prev_syllable.changed_final, dialect)
-- Special rule: 陰聲韻 + b, p, d, t, z, c, s with 鼻化韻, initial becomes m or n
if final_type == "other_final" and
curr_syllable.original_initial:match("^[bpdtzcs]") and
get_final_type(curr_syllable.original_initial, curr_syllable.original_final, dialect) == "nasalized_final" then
final_type = "nasal_final"
end
if not curr_syllable.no_assimilation and curr_syllable.changed_initial == curr_syllable.original_initial then
curr_syllable.changed_initial = initial_assimilation_rules[dialect][final_type][curr_syllable.original_initial] or curr_syllable.original_initial
end
-- remove duplicate nasalization like "norⁿ1"
if curr_syllable.changed_initial:match("^[mn]g?") and curr_syllable.changed_final:match("ⁿ$") then
curr_syllable.changed_final = curr_syllable.changed_final:gsub("ⁿ$", "")
end
-- remove syllable like "ngng1"
if curr_syllable.changed_initial == "ng" and curr_syllable.changed_final == "ng" then
curr_syllable.changed_initial = ""
end
table.insert(result, curr_syllable)
end
return result
end
local function apply_sandhi(dialect, syllable_infos)
for i = 1, #syllable_infos do
local curr_syllable = syllable_infos[i]
local next_syllable = syllable_infos[i + 1]
if curr_syllable.no_sandhi then
curr_syllable.changed_tone = curr_syllable.original_tone
else
if curr_syllable.manual_sandhi_tone then
curr_syllable.changed_tone = curr_syllable.manual_sandhi_tone
elseif next_syllable then
local current_tone = curr_syllable.original_tone
local next_tone = next_syllable.original_tone
next_tone = gsub(next_tone, "S5", "5") -- S5 is considered 5 in non-sandhi position
local new_tone = sandhi_rules[dialect][current_tone][next_tone]
curr_syllable.changed_tone = new_tone or current_tone
else
curr_syllable.changed_tone = curr_syllable.original_tone
end
end
if curr_syllable.changed_tone == '3' and curr_syllable.changed_final:sub(-1) == 'h' then
curr_syllable.changed_tone = 'S3'
end
end
end
local function get_ipa(typ, dial, inp)
local lookup = { initials = initials, finals = finals, tones = tones }
local table = lookup[typ]
if not table then
error("Invalid type")
end
local result = table[dial] and table[dial][inp]
local final_variant = {
["au"] = "ao", ["iang"] = "ieng", ["ieu"] = "ieo", ["iau"] = "ieo", ["iao"] = "ieo",
["uai"] = "ue", ["uei"] = "ue",
["yoeh"] = "yeh", ["yoeng"] = "yeng",
["yor"] = "yo", ["yorh"] = "yoh", ["yorng"] = "yong",
}
if not result then
if typ == "finals" and final_variant[inp] then
error("Invalid " .. typ:sub(1, -2) .. ": " .. inp .. ". Please use \"" .. final_variant[inp] .. "\" instead.")
else
error("Invalid " .. typ:sub(1, -2) .. ": " .. inp .. ".")
end
end
return result
end
local function syllable_to_ipa(syllable_info, dialect)
local ipa_initial = get_ipa("initials", dialect, syllable_info.changed_initial)
local ipa_final = get_ipa("finals", dialect, syllable_info.changed_final)
local ipa_tone = get_ipa("tones", dialect, syllable_info.original_tone)
if syllable_info.changed_tone ~= syllable_info.original_tone then
local sandhi_ipa_tone = tones[dialect][syllable_info.changed_tone]
if sandhi_ipa_tone then
ipa_tone = ipa_tone .. "⁻" .. sandhi_ipa_tone
else
error("Invalid sandhi tone: " .. syllable_info.changed_tone .. " for dialect: " .. dialect)
end
end
local original_initial_display = ""
if not syllable_info.is_first_syllable and
(syllable_info.original_initial ~= syllable_info.changed_initial or
syllable_info.original_initial ~= syllable_info.changed_initial) then
if syllable_info.original_initial == "" then
original_initial_display = "<sup>(Ø-)</sup>"
else
original_initial_display = "<sup>(" .. get_ipa("initials", dialect, syllable_info.original_initial) .. "-)</sup>"
end
end
local duplicate_nasalization = nil
if syllable_info.changed_initial:match("[mn]g?") and syllable_info.changed_final:match("(.+)ⁿ") then
duplicate_nasalization = true
ipa_final = ipa_final:gsub("ⁿ", "")
end
return original_initial_display .. ipa_initial .. ipa_final .. ipa_tone
end
local function split_dialect_codes(code)
local codes = {}
for c in code:gmatch("[^,]+") do
if not dialects[c] then
error("Unsupported dialect: " .. c)
end
table.insert(codes, c)
end
return codes
end
local function process_pronunciation(dialect_codes, word, index, buc_index)
local result = {
dialect_codes = dialect_codes,
word = word,
processed = {},
index = index,
buc_index = buc_index
}
if dialect_codes == "buc" then
validate_buc(word)
else
local dialect_list = split_dialect_codes(dialect_codes)
local syllable_infos = {}
for syllable in word:gmatch("%S+") do
table.insert(syllable_infos, create_syllable_info(syllable))
end
syllable_infos[1].is_first_syllable = true
for _, dialect in ipairs(dialect_list) do
local assimilated_syllables = apply_initial_assimilation(dialect, syllable_infos)
apply_sandhi(dialect, assimilated_syllables)
local ipa = {}
local actual_pronunciation = {}
for i, syllable_info in ipairs(assimilated_syllables) do
table.insert(actual_pronunciation, syllable_info.changed_initial .. syllable_info.changed_final .. syllable_info.changed_tone)
table.insert(ipa, syllable_to_ipa(syllable_info, dialect))
end
local original_pronunciation = word
local actual_pronunciation_str = table.concat(actual_pronunciation, " ")
table.insert(result.processed, {
dialect = dialect,
original = original_pronunciation,
actual = actual_pronunciation_str,
ipa = table.concat(ipa, " "),
index = index
})
end
end
return result
end
function export.rom_display(text, mode)
if type(text) == "table" then
text = text.args[1]
end
mode = mode or "debug" -- default "debug"
if not text or text == "" then
error("Invalid input: text must be a non-empty string")
end
local results = {}
local buc_results = {}
local index = 1
for pronunciation in text:gmatch("[^/]+") do
local dialect_codes, word = pronunciation:match("^(.+):(.+)$")
if not dialect_codes or not word then
error("Invalid input format: " .. pronunciation)
end
if dialect_codes == "buc" then
local buc_index = word:match("%((%d+)%)$")
if buc_index then
word = word:gsub("%(%d+%)$", "")
buc_index = tonumber(buc_index)
else
buc_index = index
end
validate_buc(word)
table.insert(buc_results, {word = word, buc_index = buc_index})
else
table.insert(results, process_pronunciation(dialect_codes, word, index))
end
index = index + 1
end
for _, buc_result in ipairs(buc_results) do
if results[buc_result.buc_index] then
if not results[buc_result.buc_index].buc then
results[buc_result.buc_index].buc = {}
end
table.insert(results[buc_result.buc_index].buc, buc_result)
end
end
-- Format the output according to different modes
if mode == "debug" then
return format_debug_output(results)
elseif mode == "brief" then
return format_brief_output(results)
elseif mode == "complete" then
return format_complete_output(results)
else
error("Unsupported mode: " .. mode)
end
end
function format_debug_output(results)
local output = {}
for _, result in ipairs(results) do
if result.dialect_codes == "buc" then
table.insert(output, result.dialect_codes .. ":" .. result.word .. " /" .. result.ipa .. "/")
else
for _, processed in ipairs(result.processed) do
local debug_str = processed.dialect .. ": " .. processed.original
if processed.original ~= processed.actual then
debug_str = debug_str .. " → " .. processed.actual
end
debug_str = debug_str .. " /" .. processed.ipa .. "/"
table.insert(output, debug_str)
end
end
end
return table.concat(output, ", ")
end
local function clear_pinging_format(text)
text = gsub(text, "%-S?%d", "") -- remove tone sandhi
text = gsub(text, ">[a-zⁿ]+", "") -- remove irregular sound change
text = gsub(text, "[#*]+", "") -- remove special symbols
text = gsub(text, "(%d)", "<sup>%1</sup>") -- superscript tone numbers
text = gsub(text, "S", "") -- remove "S" in special tones
return text
end
function format_brief_output(results)
local buc_results = {}
local pinging_results = {}
local pinging_codes = {}
local function add_unique(list, item)
for _, existing in ipairs(list) do
if clear_pinging_format(existing) == clear_pinging_format(item) then
return
end
end
table.insert(list, item)
end
local function add_codes(codes)
for code in codes:gmatch("[^,]+") do
add_unique(pinging_codes, code)
end
end
for _, result in ipairs(results) do
if result.dialect_codes == "buc" then
table.insert(buc_results, result.word)
else
add_unique(pinging_results, result.processed[1].original)
add_codes(result.dialect_codes)
end
end
local output = ""
if #pinging_results > 0 then
output = output .. (#buc_results > 0 and "\n*: " or " ")
local dialect_names = {}
for _, code in ipairs(pinging_codes) do
table.insert(dialect_names, dialects[code] or code)
end
local dialects_str = ""
if #dialect_names == 1 then
dialects_str = dialect_names[1] .. ", "
end
output = output .. "<small>(<i>" .. dialects_str .. "[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]</i>): </small>"
.. font_consolas(clear_pinging_format(table.concat(pinging_results, " / ")))
end
return output
end
function format_complete_output(results)
local output = ""
local pinging_results = {}
local order = {}
for _, result in ipairs(results) do
if result.dialect_codes ~= "buc" then
local dialects = split_dialect_codes(result.dialect_codes)
for _, dialect in ipairs(dialects) do
for _, processed in ipairs(result.processed) do
if processed.dialect == dialect then
processed.index = result.index
processed.input_order = #pinging_results + 1
processed.buc = result.buc or {}
table.insert(pinging_results, processed)
table.insert(order, processed.input_order)
break
end
end
end
end
end
if #pinging_results > 0 then
local grouped_results = {}
for _, result in ipairs(pinging_results) do
local key = result.original .. result.actual .. result.ipa
if not grouped_results[key] then
grouped_results[key] = {result}
else
table.insert(grouped_results[key], result)
end
end
for _, input_order in ipairs(order) do
for key, group in pairs(grouped_results) do
if group[1].input_order == input_order then
local dialect_names = {}
for _, r in ipairs(group) do
table.insert(dialect_names, dialects[r.dialect] or r.dialect)
end
local dialects_str = table.concat(dialect_names, ", ")
output = output .. "\n** <small>(''" .. dialects_str .. "'')</small>"
output = output .. "\n*** <small>''[[Wiktionary:About Chinese/Puxian Min|Pouseng Ping'ing]]'': </small>" .. font_consolas(clear_pinging_format(group[1].original))
if clear_pinging_format(group[1].original) ~= clear_pinging_format(group[1].actual) then
output = output .. " [<small>Phonetic</small>: " .. font_consolas(clear_pinging_format(group[1].actual)) .. "]"
end
if group[1].dialect == "pt" and #group[1].buc > 0 then
local buc_words = {}
for _, buc in ipairs(group[1].buc) do
table.insert(buc_words, buc.word)
end
output = output .. "\n*** <small>''[[w:Hinghwa Romanized|Báⁿ-uā-ci̍]]'': </small>" .. font_consolas(gsub(table.concat(buc_words, " / "), "[ᴬᴮ]", ""))
end
output = output .. '\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]] <sup>([[w:Pu–Xian Min|key]])</sup>: </small>'.. font_ipa(group[1].ipa)
break
end
end
end
end
return output
end
return export