Module:och-pron

The following documentation is located at Module:och-pron/documentation. ^[edit]
Useful links: subpage list • links • transclusions • testcases • sandbox
Old Chinese pronunciation module. See {{zh-pron}}. Data stored at Module:zh/data/och-pron-BS, Module:zh/data/och-pron-ZS and their subpages.
local export = {}
local m_string_utils = require("Module:string utilities")

local codepoint = m_string_utils.codepoint
local gsub = m_string_utils.gsub
local len = m_string_utils.len
local safe_require = require("Module:utilities").safe_require
local u = m_string_utils.char

local colour_1 = "var(--wikt-palette-lightblue, #d9ebff)"
local colour_2 = "var(--wikt-palette-cyan, #eaffff)"
local colour_3 = "var(--wikt-palette-paleblue, #f8f9fa)"

local function zh_fmt(text)
	return text ~= "" and '<span class="Hani" lang="zh">' .. text .. '</span>' or ""
end

local function insert_pron(reading_temp, text, system, i, return_note, index)
	reading_temp = gsub(reading_temp, "([̥̊]) ", "%1")
	local part = mw.text.split(reading_temp, " ")
	local IPA = gsub(part[1], "^%*", "")
	table.remove(part, 1)
	return ((i == 1 and (len(text) == 1 or (index or 1) == 1)) and "/*" or "") .. IPA .. 
		(len(text) == i and "/" or "") ..
		((#part > 0 and return_note) and (" " .. table.concat(part, " ")) or "")
end

local function pron_table(titlechar, reading, system, reading_index, count, i)
	return system == "BS"
		
		and {
			nil,
			'<b>' .. zh_fmt(titlechar) .. '</b>',
			reading_index .. "/" .. count,
			reading[1],
			'‹ <i>' .. gsub(reading[2], '([XH])', '<sup>%1</sup>') .. '</i> ›',
			'<span class="IPAchar">' .. gsub(insert_pron(reading[3], titlechar, "BS", 1, true), "ˤ", "ˁ") .. '</span>',
			reading[4]
		}
		
		or {
			nil,
			'<b>'..zh_fmt(titlechar)..'</b>',
			reading_index .. "/" .. count,
			reading[1],
			zh_fmt("[["..reading[2].."]]"),
			zh_fmt("[["..reading[3].."]]"),
			reading[4],
			zh_fmt("[["..reading[5].."]]"),
			'<span class="IPAchar">/*' .. reading[6] .. '/</span>',
			gsub(gsub(gsub(reading[7], "([一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏，]+)", zh_fmt("%1")), "([^>，][^>，][^>，][^>，][^>，][^>，]，)", "%1<br>"), "([一-鿿㐀-䶿﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧-﨩𠀀-𪛟𪜀-𮯯𰀀-𱍏])", "[[%1]]")
		}
end

local function get_data(system, ch)
	return safe_require(("Module:zh/data/och-pron-%s/%s"):format(system, ch)) or nil
end

function export.ipa(index_text, preview)
	local titlechar = mw.title.getCurrentTitle().text
	local reading_index = mw.text.split(index_text, ";")
	local output_text = {}
	local systems = { "BS", "ZS" }
	local indiv_pronunciation = { ["BS"] = {}, ["ZS"] = {} }
	local rand = ""

	local fields = {
		
		["BS"] = {
			"[[w:William H. Baxter|Baxter]]–[[w:Laurent Sagart|Sagart]] system 1.1 " ..
				"([http://ocbaxtersagart.lsait.lsa.umich.edu/ 2014])",
			"<small>Character</small>",
			"<small>Reading #</small>",
			"<small>Modern<br>Beijing<br/>(Pinyin)</small>",
			"<small>Middle<br>Chinese</small>",
			"<small>Old<br>Chinese</small>",
			"<small>English</small>"
		},
	
		["ZS"] = {
			"[[w:Zhengzhang Shangfang|Zhengzhang]] system (2003)",
			"<small>Character</small>",
			"<small>Reading #</small>",
			"<small>No.</small>",
			"<small>Phonetic<br>component</small>",
			"<small>Rime<br>group</small>",
			"<small>Rime<br>subdivision</small>",
			"<small>Corresponding<br>MC rime</small>",
			"<small>Old<br>Chinese</small>",
			"<small>Notes</small>"
		}
	}
	
	for system_seq, system in ipairs(systems) do
		for i, cp in ipairs { codepoint(titlechar, 1, -1) } do
			local ch = u(cp)
			local data_module = get_data(system, ch)
			if data_module then
				local count = 0
				for index, value in ipairs(data_module) do
					count = count + 1
				end
				local reading_number = reading_index[i] and (mw.text.split(reading_index[i], ',')[system_seq] or reading_index[i]) or "y"
				if reading_number == "y" then
					for reading_index, reading in ipairs(data_module) do
						table.insert(indiv_pronunciation[system], pron_table(ch, reading, system, reading_index, count, i))
					end
				elseif reading_number == "n" then
					break
				else
					for indiv_number in mw.text.gsplit(reading_number, '%+') do
						table.insert(indiv_pronunciation[system], pron_table(ch, data_module[tonumber(indiv_number)], system, indiv_number, count, i))
					end
				end
			end
		end
		if indiv_pronunciation[system][1] then
			local hash, results = {}, {}
			local value_eff
			for _, value in ipairs(indiv_pronunciation[system]) do
				local valeur = value
				table.remove(valeur, 1)
				value_eff = table.concat(valeur)
				if not hash[value_eff] then
					hash[value_eff] = true
					table.insert(value, 1, nil)
					results[#results + 1] = value
				end
			end
			rand = rand ~= "" and rand or gsub("oc-" .. value_eff, "[^A-Za-z0-9]", codepoint)
			local fmt = {
				header = '\n{| class="wikitable mw-collapsible mw-collapsed" id="mw-customcollapsible-oc' .. rand ..
					'" style="width:100%; margin:0; text-align:center; border-collapse: collapse; border-style: hidden; display: table;"',
				lv1 = '\n|-\n! style="background-color:' .. colour_1 .. '" colspan=' .. #results+1 .. '|',
				lv2 = '\n|-\n! style="background-color:' .. colour_2 .. '; width:8em"|',
				lv3 = '\n| style="background-color:' .. colour_3 .. '"|',
				closing = '\n|}',
				
				BS_note = '\n|-\n|colspan=' .. #results+1 ..
					[=[ style="text-align:left; font-size:90%"|<div class="toccolours mw-collapsible mw-collapsed">
					'''''Notes''' for Old Chinese notations in the Baxter–Sagart system:''
					<div class="mw-collapsible-content">
					* Parentheses "()" indicate uncertain presence;<br>
					* Square brackets "[]" indicate uncertain identity, e.g. *[t] as coda may in fact be *-t or *-p;<br>
					* Angle brackets "&lt;>" indicate infix;<br>
					* Hyphen "-" indicates morpheme boundary;<br>
					* Period "." indicates syllable boundary.</div>
					</div>]=]
			}
		
			table.insert(output_text, fmt.header)
			for field_index, field in ipairs(fields[system]) do
				if field:find("small") then
					local field_set = {}
					for _, result in ipairs(results) do
						table.insert(field_set, result[field_index])
					end
					if table.concat(field_set) ~= "" then
						table.insert(output_text, fmt.lv2 .. field .. fmt.lv3 .. table.concat(field_set, fmt.lv3))
					end
				else
					table.insert(output_text, fmt.lv1 .. field)
				end
			end
			table.insert(output_text, (system == "BS" and fmt.BS_note or "") .. fmt.closing)
		end
	end
	local fold = '\n* <div title="expand" class="mw-customtoggle-oc' .. rand .. '"> ' ..
		'[[w:Old Chinese|Old Chinese]]<span style="float:right; border:1px solid #ccc; border-radius:1px;' ..
		' padding:0 0; font-size:90%">▼</span>' .. preview .. '</div>'
	return output_text[1] and fold .. gsub(table.concat(output_text), "%[%[%]%]", "") or ""
end

function export.retrieve_pron(text, reconstruction, no_intro, index)
	if type(text) == "table" then text = text.args[1] end
	text = require("Module:links").remove_links(text)
	local retrieve_result = {}
	local intro = no_intro and "" or "<span style=\"border-bottom: 1px dotted #000; cursor:help\" title=\"Old Chinese\">OC</span> "
	if not reconstruction then
		local index_set
		if index and index ~= "y" then
			index_set = mw.text.split(index, ",")
		end
		for char_index, cp in ipairs { codepoint(text, 1, -1) } do
			local char_pronunciation = {}
			local ch = u(cp)
			local data_module = get_data("ZS", ch)
			if data_module then
				local reading_no = index_set and index_set[char_index] or "y"
				if reading_no ~= "y" then
					for number in mw.text.gsplit(reading_no, "+") do
						table.insert(char_pronunciation, data_module[tonumber(number)][6])
					end
				else
					for _, reading in ipairs(data_module) do
						table.insert(char_pronunciation, reading[6])
					end
				end
			else
				return nil
			end
			table.insert(retrieve_result, table.concat(char_pronunciation, len(text) == 1 and ", *" or "/"))
		end
	end
	return intro .. (reconstruction or "*" .. table.concat(retrieve_result, " "))
end

function export.generate_show(text, index)
	local index_set = mw.text.split(index, ";")
	local extract_results = { ["BS"] = {}, ["ZS"] = {} }
	local result = {}
	local position = { ["BS"] = 3, ["ZS"] = 6 }
	
	local fmt = {
		beginning = {
			["BS"] = "\n*: <small>(''[[w:William H. Baxter|Baxter]]–[[w:Laurent Sagart|Sagart]]'')</small>: " ..
				'<span style="font-size:95%">',
			["ZS"] = "\n*: <small>(''[[w:Zhengzhang Shangfang|Zhengzhang]]'')</small>: " ..
				'<span style="font-size:95%">',
		},
		ending = {
			["BS"] = "</span>", ["ZS"] = "</span>"
		}
	}
	
	for system_seq, system in ipairs({ "BS", "ZS" }) do
		for i, cp in ipairs { codepoint(text, 1, -1) } do
			local char_pronunciation = {}
			local ch = u(cp)
			local data_module = get_data(system, ch)
			if data_module then
				local existing_pron = {}
				local reading_number = index_set[i] and (mw.text.split(index_set[i], ',')[system_seq] or index_set[i]) or "y"
				if reading_number == "y" then
					index = 0
					for _, reading in ipairs(data_module) do
						index = index + 1
						local reading_temp = reading[position[system]]
						if not existing_pron[reading_temp] then
							table.insert(char_pronunciation, insert_pron(reading_temp, text, system, i, false, index))
							existing_pron[reading_temp] = true
						end
					end
				elseif reading_number == "n" then
					break
				else
					index = 0
					for indiv_number in mw.text.gsplit(reading_number, '%+') do
						index = index + 1
						local reading_temp = data_module[tonumber(indiv_number)][position[system]]
						if not existing_pron[reading_temp] then
							table.insert(char_pronunciation, insert_pron(reading_temp, text, system, i, false, index))
							existing_pron[reading_temp] = true
						end
					end
				end
				table.insert(extract_results[system], table.concat(char_pronunciation, len(text) == 1 and ", " or "｜"))
			else
				extract_results[system] = {}
				break
			end
		end
		if extract_results[system][1] then
			table.insert(result, fmt.beginning[system] .. 
				'<span class="IPAchar">' .. 
				table.concat(extract_results[system], "&nbsp; ") .. 
				 '</span>' .. 
				 fmt.ending[system])
		end
	end
	text = result[1] and gsub(table.concat(result), "｜%*", "｜") or nil
	text = text and gsub(text, "/｜", "｜") or nil
	text = text and gsub(text, "｜", '<span style="padding-left:2px; padding-right:2px">|</span>')
	return text
end

function export.link(frame, arg)
	local args = arg or frame:getParent().args
	local text, meaning, lit = args[1], args[2] or args['gloss'] or nil, args['lit'] or nil
	return require("Module:zh/link").link(frame, nil, { "*" .. text, tr = export.retrieve_pron(text, args["tr"] or false, args["no_intro"] or false, args["id"] or false), gloss = meaning, lit = lit }, mw.title.getCurrentTitle().subpageText)
end

return export