Module:Sandbox/Erutuon/X-SAMPA

From Chalo Chatu, Zambia online encyclopedia

local p = {}

local U = mw.ustring.char local gsub = mw.ustring.gsub local sub = mw.ustring.sub local find = mw.ustring.find local length = mw.ustring.len

-- Slashes \, apostrophes ', and double quotes " are escaped with \. -- \\ = \, \' = ', \" = "

local data = { ["a"] = { "a" }, ["b"] = { "b" }, -- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary ["b\\"] = { "ⱱ" }, ["b_<"] = { "ɓ" }, ["c"] = { "c" }, ["d"] = { "d" }, ["d`"] = { "ɖ", has_descender = true }, ["d_<"] = { "ɗ" }, -- not in official X-SAMPA; Wikipedia-specific ["d`_<"] = { "ᶑ", has_descender = true }, ["e"] = { "e" }, ["f"] = { "f" }, ["g"] = { "ɡ", has_descender = true }, ["g_<"] = { "ɠ", has_descender = true }, ["h"] = { "h" }, ["h\\"] = { "ɦ" }, ["i"] = { "i" }, ["j"] = { "j", has_descender = true }, ["j\\"] = { "ʝ", has_descender = true }, ["k"] = { "k" }, ["l"] = { "l" }, ["l`"] = { "ɭ", has_descender = true }, ["l\\"] = { "ɺ" }, ["m"] = { "m" }, ["n"] = { "n" }, ["n`"] = { "ɳ", has_descender = true }, ["o"] = { "o" }, ["p"] = { "p", has_descender = true }, ["p\\"] = { "ɸ", has_descender = true }, ["q"] = { "q", has_descender = true }, ["r"] = { "r" }, ["r`"] = { "ɽ", has_descender = true }, ["r\\"] = { "ɹ" }, ["r\\`"] = { "ɻ", has_descender = true }, ["s"] = { "s" }, ["s`"] = { "ʂ", has_descender = true }, ["s\\"] = { "ɕ" }, ["t"] = { "t" }, ["t`"] = { "ʈ" }, ["u"] = { "u" }, ["v"] = { "v" }, ["v\\"] = { "ʋ" }, ["w"] = { "w" }, ["x"] = { "x" }, ["x\\"] = { "ɧ", has_descender = true }, ["y"] = { "y", has_descender = true }, ["z"] = { "z" }, ["z`"] = { "ʐ", has_descender = true }, ["z\\"] = { "ʑ" }, ["A"] = { "ɑ" }, ["B"] = { "β", has_descender = true }, ["B\\"] = { "ʙ" }, ["C"] = { "ç", has_descender = true }, ["D"] = { "ð" }, ["E"] = { "ɛ" }, ["F"] = { "ɱ", has_descender = true }, ["G"] = { "ɣ", has_descender = true }, ["G\\"] = { "ɢ" }, ["G\\_<"] = { "ʛ" }, ["H"] = { "ɥ", has_descender = true }, ["H\\"] = { "ʜ" }, ["I"] = { "ɪ" }, ["I\\"] = { "ɪ̈" }, ["J"] = { "ɲ", has_descender = true }, ["J\\"] = { "ɟ" }, ["J\\_<"] = { "ʄ", has_descender = true }, ["K"] = { "ɬ" }, ["K\\"] = { "ɮ", has_descender = true }, ["L"] = { "ʎ" }, ["L\\"] = { "ʟ" }, ["M"] = { "ɯ" }, ["M\\"] = { "ɰ", has_descender = true }, ["N"] = { "ŋ", has_descender = true }, ["N\\"] = { "ɴ" }, ["O"] = { "ɔ" }, ["O\\"] = { "ʘ" }, ["P"] = { "ʋ" }, ["Q"] = { "ɒ" }, ["R"] = { "ʁ" }, ["R\\"] = { "ʀ" }, ["S"] = { "ʃ", has_descender = true }, ["T"] = { "θ" }, ["U"] = { "ʊ" }, ["U\\"] = { "ʊ̈" }, ["V"] = { "ʌ" }, ["W"] = { "ʍ" }, ["X"] = { "χ", has_descender = true }, ["X\\"] = { "ħ" }, ["Y"] = { "ʏ" }, ["Z"] = { "ʒ", has_descender = true }, ["."] = { "." }, ["\""] = { "ˈ" }, ["%"] = { "ˌ" }, -- not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary ["%\\"] = { "ᴙ" }, ["'"] = { "ʲ", is_diacritic = true }, [":"] = { "ː", is_diacritic = true }, [":\\"] = { "ˑ", is_diacritic = true }, ["@"] = { "ə" }, ["@`"] = { "ɚ" }, ["@\\"] = { "ɘ" }, ["{"] = { "æ" }, ["}"] = { "ʉ" }, ["1"] = { "ɨ" }, ["2"] = { "ø" }, ["3"] = { "ɜ" }, ["3`"] = { "ɝ" }, ["3\\"] = { "ɞ" }, ["4"] = { "ɾ" }, ["5"] = { "ɫ" }, ["6"] = { "ɐ" }, ["7"] = { "ɤ" }, ["8"] = { "ɵ" }, ["9"] = { "œ" }, ["&"] = { "ɶ" }, ["?"] = { "ʔ" }, ["?\\"] = { "ʕ" }, ["<\\"] = { "ʢ" }, [">\\"] = { "ʡ" }, ["^"] = { "ꜛ" }, ["!"] = { "ꜜ" }, -- not in official X-SAMPA ["!!"] = { "‼" }, ["!\\"] = { "ǃ" }, ["|"] = { "|", has_descender = true }, ["|\\"] = { "ǀ", has_descender = true }, ["||"] = { "‖", has_descender = true }, ["|\\|\\"] = { "ǁ", has_descender = true }, ["=\\"] = { "ǂ", has_descender = true }, -- linking mark, liaison ["-\\"] = { "‿", is_diacritic = true }, -- coarticulated; not in official X-SAMPA; used by Wiktionary ["__"] = { U(0x361) }, -- fortis, strong articulation; not in official X-SAMPA; used by Wiktionary ["_:"] = { U(0x348) }, ["_\""] = { U(0x308), is_diacritic = true }, -- advanced ["_+"] = { U(0x31F), with_descender = "˖", is_diacritic = true }, -- retracted ["_-"] = { U(0x320), with_descender = "˗", is_diacritic = true }, -- rising tone ["_/"] = { U(0x30C), is_diacritic = true }, -- voiceless ["_0"] = { U(0x325), with_descender = U(0x30A), is_diacritic = true }, -- syllabic ["="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true }, -- syllabic ["_="] = { U(0x329), with_descender = U(0x30D), is_diacritic = true }, -- strident: not in official X-SAMPA; from http://www.kneequickie.com/kq/Z-SAMPA and used by Wiktionary ["_%\\"] = { U(0x1DFD) }, -- ejective ["_>"] = { "ʼ", is_diacritic = true }, -- pharyngealized ["_?\\"] = { "ˤ", is_diacritic = true }, -- falling tone ["_\\"] = { U(0x302), is_diacritic = true }, -- non-syllabic ["_^"] = { U(0x32F), with_descender = U(0x311), is_diacritic = true }, -- no audible release ["_}"] = { U(0x31A), is_diacritic = true }, -- r-coloring (colouring), rhotacization ["`"] = { U(0x2DE), is_diacritic = true }, -- nasalization ["~"] = { U(0x303), is_diacritic = true }, -- advanced tongue root ["_A"] = { U(0x318), is_diacritic = true }, -- apical ["_a"] = { U(0x33A), is_diacritic = true }, -- extra-low tone ["_B"] = { U(0x30F), is_diacritic = true }, -- low rising tone ["_B_L"] = { U(0x1DC5), is_diacritic = true }, -- less rounded ["_c"] = { U(0x31C), is_diacritic = true }, -- dental ["_d"] = { U(0x32A), is_diacritic = true }, -- velarized or pharyngealized (dark) ["_e"] = { U(0x334), is_diacritic = true }, -- downstep ["<F>"] = { "↘" }, -- falling tone ["_F"] = { U(0x302), is_diacritic = true }, -- velarized ["_G"] = { "ˠ", is_diacritic = true }, -- high tone ["_H"] = { U(0x301), is_diacritic = true }, -- high rising tone ["_H_T"] = { U(0x1DC4), is_diacritic = true }, -- aspiration ["_h"] = { "ʰ", is_diacritic = true }, -- palatalization ["_j"] = { "ʲ", is_diacritic = true }, -- creaky voice, laryngealization, vocal fry ["_k"] = { U(0x330), is_diacritic = true }, -- low tone ["_L"] = { U(0x300), is_diacritic = true }, -- lateral release ["_l"] = { "ˡ", is_diacritic = true }, -- mid tone ["_M"] = { U(0x304), is_diacritic = true }, -- laminal ["_m"] = { U(0x33B), is_diacritic = true }, -- linguolabial ["_N"] = { U(0x33C), is_diacritic = true }, -- nasal release ["_n"] = { "ⁿ", is_diacritic = true }, -- more rounded ["_O"] = { U(0x339), is_diacritic = true }, -- lowered ["_o"] = { U(0x31E), with_descender = "˕", is_diacritic = true }, -- retracted tongue root ["_q"] = { U(0x319), is_diacritic = true }, -- global rise ["<R>"] = { "↗" }, -- rising tone ["_R"] = { U(0x30C), is_diacritic = true }, -- rising falling tone ["_R_F"] = { U(0x1DC8), is_diacritic = true }, -- raised ["_r"] = { U(0x31D), is_diacritic = true }, -- extra-high tone ["_T"] = { U(0x30B), is_diacritic = true }, -- breathy voice, murmured voice, murmur, whispery voice ["_t"] = { U(0x324), is_diacritic = true }, -- voiced ["_v"] = { U(0x32C), is_diacritic = true }, -- labialized ["_w"] = { "ʷ", is_diacritic = true }, -- extra-short ["_X"] = { U(0x306), is_diacritic = true }, -- mid-centralized ["_x"] = { U(0x33D), is_diacritic = true }, ["__T"] = { "˥" }, ["__H"] = { "˦" }, ["__M"] = { "˧" }, ["__L"] = { "˨" }, ["__B"] = { "˩" }, ["0"] = { "◌" }, -- dotted circle }

local function escape(text, pattern, list, i) text = mw.ustring.gsub( text, pattern, function(match) list[i] = match local replacement = string.rep("$", i) i = i + 1 return replacement end )

return text end

local function _XSAMPAtoIPA(text) local output = {} local characteristics = {}

local escaped = {} local i = 1 local toBeEscaped = { "*(.)", "" }

for i, pattern in pairs(toBeEscaped) do text = mw.ustring.gsub( text, pattern, function(match) escaped[i] = match local replacement = string.rep("$", i) .. "*" i = i + 1 return replacement end ) end mw.log(i, #escaped, text) mw.logObject(escaped)

while #text > 0 do local substrings = { sub(text, 1, 4), sub(text, 1, 3), sub(text, 1, 2), sub(text, 1, 1) }

for i, substring in ipairs(substrings) do local result, IPA, with_descender, has_descender, is_diacritic

if data[substring] then result = data[substring] IPA = result[1] with_descender = result.with_descender has_descender = result.has_descender diacritic = result.is_diacritic if with_descender then -- Go backwords through the transcription, skipping any diacritics. local i = 0 while characteristics[#characteristics - i].is_diacritic do i = i + 1 end --[[ Look at the first non-diacritic symbol before the current symbol. If it has a descender, use the descender form of the current symbol. ]] if characteristics[#characteristics - i].has_descender then IPA = with_descender end end elseif not substrings[i + 1] then IPA = substring end

if IPA then text = sub(text, 6 - i) table.insert(output, IPA) table.insert(characteristics, { has_descender = has_descender, is_diacritic = is_diacritic } ) break end end end

output = table.concat(output)

output = mw.ustring.gsub( output, "($+)%*", function(match) local i = string.len(match) return escaped[i] end )

return output end

function p.X2IPA(frame) local text

if type(frame) == "table" then text = frame.getParent and frame:getParent().args[1] or frame.args and frame.args[1]

invalidParameters = {}

for key, value in pairs(frame.getParent and frame:getParent().args or {}) do if key ~= 1 then table.insert(invalidParameters, key) end end

for key, value in pairs(frame.args or {}) do if key ~= 1 then table.insert(invalidParameters, key) end end

if #invalidParameters > 0 then if #invalidParameters > 1 then error('The parameters "' .. table.concat(invalidParameters, '", "') .. '" are not used by this template') else error('The parameter "' .. invalidParameters[1] .. '" is not used by this template') end end else text = frame end

return _XSAMPAtoIPA(text) end

local function _IPAspan(text) return ""..text.."" end

function p.example(frame) local args = frame.args local parentargs = frame.getParent and frame:getParent().args

local text = parentargs and parentargs[1] or args and args[1] or type(frame) == "string" and frame or error("No text provided")

local output = { " {{subst:x2i|" }

if find(text, "=") then table.insert(output, "1=") end table.insert(output, text)

table.insert(output, "}}")

table.insert(output, "\n| ") local IPA = _IPAspan(p.X2IPA(text)) table.insert(output, IPA)

return table.concat(output) end

return p