Jump to content

မေႃႇၵျူး:pcc-pron

လုၵ်ႉတီႈ ဝိၵ်ႇသျိၼ်ႇၼရီႇ မႃး
This မေႃႇၵျူး page is experimental.
The details of its operation have not yet been fully decided upon. Do not deploy widely until the မေႃႇၵျူး page is finished.

Pronunciation module for Bouyei. See {{pcc-pron}}.


local export = {}
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local gsplit = mw.text.gsplit
local lower = mw.ustring.lower
local upper = mw.ustring.upper
local len = mw.ustring.len

-- https://en.wikipedia.org/wiki/Bouyei_language

local initialConv = {
	['b']   = 'p',
	['p']	= 'pʰ',
	['mb']  = 'ɓ',
	['m']   = 'm',
	['f']   = 'f',
	['v']   = 'v',
	['w']   = 'w', -- to support /v/ ~ [w] in some cases

	['d']   = 't',
	['t']	= 'tʰ',
	['nd']  = 'ɗ',
	['n']   = 'n',
	['sl']	= 'ɬ',
	['l']   = 'l',

	['g']   = 'k',
	['k']   = 'kʰ',
	['ng']  = 'ŋ',
	['h']   = 'x',
	['hr']	= 'ɣ',

	['j']	= 't͡ɕ',
	['q']	= 't͡ɕʰ',
	['ny']  = 'ɲ',
	['x']   = 'ɕ',
	['y']   = 'j',
	
	['z']	= 't͡s',
	['c']	= 't͡sʰ',
	['s']   = 's',
	['r']   = 'z',
	
	['by']  = 'pʲ',
	['my']  = 'mʲ',
	['qy']	= 'ˀj',
	
	['gv']  = 'kʷ',
	['ngv'] = 'ŋʷ',
	['qv']  = 'ˀv',

	['']    = 'ʔ',
}

local rimeConv = {
	['a']		= 'a',
	['o']		= 'o',
	['ô']		= 'ɔ',
	['ee']		= 'e',
	['ê']		= 'ɛ', -- e in Chinese loanwords
	['i']		= 'i',
	['î']		= 'z̩',
	['u']		= 'u',
	['e']		= 'ɯ',
	
	['aai']		= 'aːi',
	['ai']		= 'ɐi',
	['oi']		= 'oːi',
	['ei']		= 'ɯi',
	
	['aau']		= 'aːu',
	['au']		= 'ɐu',
	['eeu']		= 'eːu',
	['iu']		= 'iu',
	
	['ae']		= 'ɐɯ',
	['ie']		= 'iə',
	['ue']		= 'uə',
	['ea']		= 'ɯə',
	
	['aam']		= 'aːm',
	['am']		= 'ɐm',
	['oom']		= 'oːm',
	['om']		= 'ɔm',
	['eem']		= 'eːm',
	['iam']		= 'iəm',
	['im']		= 'im',
	['uam']		= 'uəm',
	['um']		= 'um',
	['eam']		= 'ɯəm',
	
	['aan']		= 'aːn',
	['an']		= 'ɐn',
	['oon']		= 'oːn',
	['on']		= 'ɔn',
	['een']		= 'eːn',
	['ian']		= 'iən',
	['in']		= 'in',
	['uan']		= 'uən',
	['un']		= 'un',
	['ean']		= 'ɯən',
	['en']		= 'ɯn',
	
	['aang']	= 'aːŋ',
	['ang']		= 'ɐŋ',
	['oong']	= 'oːŋ',
	['ong']		= 'ɔŋ',
	['eeng']	= 'eːŋ',
	['iang']	= 'iəŋ',
	['ing']		= 'iŋ',
	['uang']	= 'uəŋ',
	['ung']		= 'uŋ',
	['eang']	= 'ɯəŋ',
	['eng']		= 'ɯŋ',
	
	['aab']		= 'aːp̚',
	['ab']		= 'ɐp̚',
	['oob']		= 'oːp̚',
	['ob']		= 'ɔp̚',
	['eeb']		= 'eːp̚',
	['iab']		= 'iəp̚',
	['ib']		= 'ip̚',
	['uab']		= 'uəp̚',
	['ub']		= 'up̚',
	['eab']		= 'ɯəp̚',
	['eb']		= 'ɯp̚',
	
	['aad']		= 'aːt̚',
	['ad']		= 'ɐt̚',
	['ood']		= 'oːt̚',
	['od']		= 'ɔt̚',
	['eed']		= 'eːt̚',
	['iad']		= 'iət̚',
	['id']		= 'it̚',
	['uad']		= 'uət̚',
	['ud']		= 'ut̚',
	['ead']		= 'ɯət̚',
	['ed']		= 'ɯt̚',
	
	['ag']		= 'ɐk̚',
	['og']		= 'ɔk̚',
	['eeg']		= 'ek̚',
	['ig']		= 'ik̚',
	['ug']		= 'uk̚',
	['eg']		= 'ɯk̚',
	
	['ia']		= 'ia',
	['io']		= 'io',
	['iao']		= 'iɐu',
	['ua']		= 'ua',
	['ui']		= 'ui',
	['uai']		= 'uɐi',
	['ao']		= 'aːu',
	['ou']		= 'əu',
	['er']		= 'ɚ',
}

local toneConv = {
	['l']	= '˨˦',
	['z']	= '˩',
	['c']	= '˥˧',
	['x']	= '˧˩',
	['s']	= '˧˥',
	['h']	= '˧',
	['t']	= '˧˥',
	['']	= '˧',
	
	['y']	= '˧',
	['f']	= '˧˩',
	['j']	= '˥˧',
	['q']	= '˨˦',
}

local function get_tone(syllable)
	local toneless, tone = syllable, ""
	if find(syllable, "[lzcxshtyfjq]$") then
		toneless, tone = match(syllable, "([a-z]+)([lzcxshtyfjq])$")
	end
	mw.log(toneless, tone)
	return toneless, tone
end

local function syllabify(text)
	text = gsub(text, "'", " ")
	text = gsub(text, "([aeiou][^aeiou])([aeiou])", "%1 %2")
	--text = gsub(text, "([lzcxshtyfjqbdg])([^aeiou])", "%1 %2")
	return mw.text.gsplit(text, "[- ]")
end

function export.ipa(text)
	text = string.lower(text)
	local syllables = {}
	for syllable in syllabify(text) do
		local initial, rime, tone
		syllable, tone = get_tone(syllable)
		initial, rime = match(syllable, "^([mnshbq]?[bpfwdtlgkrjyxzc]?v?)([aeiou][aeiou]?[ioubdgmnr]?g?)$")
		if not initial or not rime then
			error(syllable .. " cannot be recognized")
		end
		if find(tone, "^[yfjq]$") then
			if initial == "e" then
				initial = "ê"
			elseif initial == "o" then
				initial = "ô"
			elseif initial == "i" and find(final, "^[zcsr]$") then
				initial = "î"
			end
		end
		local initial_ipa, rime_ipa, tone_value = initialConv[initial], rimeConv[rime], toneConv[tone]
		if not initial_ipa then
			error(initial .. " is not a valid initial")
		elseif not rime_ipa then
			error(rime .. " is not a valid rime")
		end
		table.insert(syllables, initial_ipa .. rime_ipa .. tone_value)
	end
	return "/" .. table.concat(syllables, ".") .. "/"
end

function export.show(frame)
	local params = {
		[1] = { },
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local text = args[1]
	if not text then text = mw.title.getCurrentTitle().text end
	
	local display = string.format('* %s',
		require('Module:IPA').format_IPA_full(
			require('Module:languages').getByCode('pcc'),
			{ { pron = export.ipa(text) } }
		)
	)
	
	return display
end

return export