local export = {}
local m_IPA = require("Module:IPA")
local m_str_utils = require("Module:string utilities")
local m_table = require("Module:table")
local put_module = "Module:parse utilities"
local set_utilities_module = "Module:set utilities"
local headword_data_module = "Module:headword/data"
local accent_qualifier_module = "Module:accent qualifier"
local accent_qualifier_data_module = "Module:accent qualifier/data"
local rhymes_module = "Module:rhymes"
local hyphenation_module = "Module:hyphenation"
local lang = require("Module:languages").getByCode("id")
local maxn = table.maxn
local rfind = m_str_utils.find
local rsubn = m_str_utils.gsub
local rsplit = m_str_utils.split
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local trim = mw.text.trim
local u = m_str_utils.char
local ulen = m_str_utils.len
local ulower = m_str_utils.lower
local AC = u(0x0301) -- acute = ́
local GR = u(0x0300) -- grave = ̀
local CFLEX = u(0x0302) -- circumflex = ̂
local MAC = u(0x0304) -- macron
local BR = u(0x0306) -- breve = ˘
local vowel = "aeéèioòuəɛɔ" -- vowel
local V = "[" .. vowel .. "]"
local NV = "[^" .. vowel .. "]"
local accent = AC .. GR .. MAC .. BR
local accent_c = "[" .. accent .. "]"
local stress_c = "[" .. MAC .. BR .. "]"
local ipa_stress = "ˈ"
local ipa_stress_c = "[" .. ipa_stress .. "]"
local separator = "# ."
local separator_c = "[" .. separator .. "]"
local C = "[^" .. vowel .. separator .. "]" -- consonant
local unstressed_words = require("Module:table").listToSet({ --feel free to add more unstressed words
"di", "ké", -- prepositions
"dan", -- conjunctions
"ku", "mu", "nya", -- pronouns
})
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- version of rsubn() that returns a 2nd argument boolean indicating whether
-- a substitution was made.
local function rsubb(term, foo, bar)
local retval, nsubs = rsubn(term, foo, bar)
return retval, nsubs > 0
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
-- ĵ, ɟ and ć are used internally to represent [d͡ʒ], [j] and [t͡ʃ]
--
function export.IPA(text, phonetic)
local debug = {}
text = ulower(text or mw.title.getCurrentTitle().text)
-- decompose everything but é, è
text = mw.ustring.toNFD(text)
text = rsub(text, "." .. "[" .. AC .. CFLEX .. GR .. "]", {
["e" .. AC] = "é",
["e" .. GR] = "è",
["o" .. GR] = "ò", -- O as in the Javanese place names "Solo", "Purwokerto", "Probolinggo"
})
-- convert commas and en/en dashes to IPA foot boundaries
text = rsub(text, "%s*[,–—]%s*", " | ")
-- question mark or exclamation point in the middle of a sentence -> IPA foot boundary
text = rsub(text, "([^%s])%s*[!?]%s*([^%s])", "%1 | %2")
-- canonicalize multiple spaces and remove leading and trailing spaces
local function canon_spaces(text)
text = rsub(text, "%s+", " ")
text = rsub(text, "^ ", "")
text = rsub(text, " $", "")
return text
end
text = canon_spaces(text)
-- Make prefixes unstressed unless they have an explicit stress marker; also make certain
-- monosyllabic words (e.g. [[di]], [[ke]], [[se-]], [[ban]], etc.) without stress marks be
-- unstressed.
local words = rsplit(text, " ")
for i, word in ipairs(words) do
if rfind(word, "%-$") and not rfind(word, accent_c) or unstressed_words[word] then
-- add BR to the last vowel not the first one
-- adding the BR after the 'u'
words[i] = rsub(word, "^(.*" .. V .. ")", "%1" .. BR)
end
end
text = table.concat(words, " ")
-- Convert hyphens to spaces
text = rsub(text, "%-", " ")
-- canonicalize multiple spaces again, which may have been introduced by hyphens
text = canon_spaces(text)
-- now eliminate punctuation
text = rsub(text, "[!?']", "")
-- put # at word beginning and end and double ## at text/foot boundary beginning/end
text = rsub(text, " | ", "# | #")
text = "##" .. rsub(text, " ", "# #") .. "##"
table.insert(debug, text)
--"i" or "u" to glide (as part of a diphthong)
text = rsub(text, "(" .. V .. ")i([#.])", "%1ɟ%2")
text = rsub(text, "(" ..V.. ")u([#.])", "%1w%2")
-- syllable-initial X (e.g. in [[xenofobia]], [[xenon]], [[xilofon]])
text = rsub(text, "x("..V..")", "s%1")
-- handle certain combinations; kh, ng, ny and sy handling needs to go first
text = rsub(text, "kh", "x")
text = rsub(text, "ng", "ŋ")
text = rsub(text, "ny", "ɲ")
text = rsub(text, "sy", "ʃ")
table.insert(debug, text)
--alphabet-to-phoneme
text = rsub(text, "[ceéègjòqvy]",
--["g"]="ɡ": U+0067 LATIN SMALL LETTER G → U+0261 LATIN SMALL LETTER SCRIPT G
{ ["c"] = "ć", ["e"] = "ə", ["é"] = "e", ["è"] = "ɛ", ["g"] = "ɡ", ["j"] = "ĵ", ["ò"] = "ɔ", ["q"] = "k", ["y"] = "j" })
-- glottal stop. use also to replace "k" when this corresponds to it
text = rsub(text, "7", "ʔ")
table.insert(debug, text)
--syllable division
local vowel_to_glide = { ["i"] = "j", ["u"] = "w" }
-- i, o and u between vowels -> j and u e.g. [[rangkaian]])
text = rsub_repeatedly(text, "(" .. V .. ")([iu])(" .. V .. ")",
function(v1, iu, v2)
return v1 .. vowel_to_glide[iu] .. v2
end
)
text = rsub_repeatedly(text, "(" .. V .. accent_c .."*)(" .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. V .. accent_c .."*" .. C .. ")(" .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. V .. accent_c .."*" .. C .. "+)(" .. C .. C .. V .. ")", "%1.%2")
text = rsub_repeatedly(text, "(" .. C .. ")%.s(" .. C .. ")", "%1s.%2")
text = rsub_repeatedly(text, "([aeiouɛɔ]" .. accent_c .. "*)([aeiouɛɔ])", "%1.%2")
table.insert(debug, text)
local accent_to_stress_mark = { [MAC] = "ˈ", [BR] = "" }
local function accent_word(word, syllables)
-- Now stress the word. If any accent exists in the word (including breves indicating an unaccented word),
-- put the stress mark(s) at the beginning of the indicated syllable(s). Otherwise, apply the default
-- stress rule.
if rfind(word, accent_c) then
for i = 1, #syllables do
syllables[i] = rsub(syllables[i], "^(.*)(" .. accent_c .. ")(.*)$",
function(pre, accent, post)
return accent_to_stress_mark[accent] .. pre .. post
end
)
end
else
-- Default stress rule. Words without vowels (e.g. IPA foot boundaries) don't get stress.
if #syllables > 1 and (rfind(word, "[^aəeéèioòuɛɔʔbcdfgɡhjɟĵklmnŋɲpqrstvwxz#]#")) or #syllables == 1 and rfind(word, V) then
syllables[#syllables] = "ˈ" .. syllables[#syllables]
elseif #syllables <= 2 and rfind(word, "[ə]") then
syllables[#syllables] = "ˈ" .. syllables[#syllables]
elseif #syllables >= 3 and rfind(word, "[ə]") then
syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1]
elseif #syllables > 1 then
syllables[#syllables - 1] = "ˈ" .. syllables[#syllables - 1]
end
end
end
local words = rsplit(text, " ")
for j, word in ipairs(words) do
local syllables = rsplit(word, "%.")
accent_word(word, syllables)
-- Reconstruct the word.
words[j] = table.concat(syllables, phonetic and "." or "")
end
text = table.concat(words, " ")
-- suppress syllable mark before IPA stress indicator
text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")
table.insert(debug, text)
local id_IPA_table = {
["phonetic"] = text,
["phonemic"] = text
}
for key, value in pairs(id_IPA_table) do
text = id_IPA_table[key]
--phonetic transcription
if key == "phonetic" then
table.insert(debug, text)
--phonemic diphthongs
text = rsub(text, "([aeou])([ɟj])([#.ˈ])", "%1i̯%3")
text = rsub(text, "([a])w([#.ˈ])", "%1u̯%2")
table.insert(debug, text)
--change e, i, u in closed final syllables
text = rsub(text, "([bćdfhjĵɟklmnɲŋprsʃtwz])e([bćdfhjĵɟklmnɲŋprstwz])([#])","%1ɛ%2%3")
text = rsub(text, "([bćdfhjĵɟklmnɲŋprsʃtwz])i([bćdfhjĵɟklmnɲŋprstwz])([#])","%1ɪ%2%3")
text = rsub(text, "([bćdfhjĵɟklmnɲŋprsʃtwz])u([bćdfhjĵɟklmnɲŋprstwz])([#])","%1ʊ%2%3")
table.insert(debug, text)
--i, u in closed stressed syllables with nasal coda
text = rsub(text, "([ˈ])([bćdfhjĵɟklmnɲŋprsʃtwz])ɪ([mnŋ])([.#])","%1%2i%3%4")
text = rsub(text, "([ˈ])([bćdfhjĵɟklmnɲŋprsʃtwz])ʊ([mnŋ])([.#])","%1%2u%3%4")
table.insert(debug, text)
--devoice final B, D an G
text = rsub(text, "b([#.ˈ])","p̚%1")
text = rsub(text, "d([#.ˈ])","t̚%1")
text = rsub(text, "ɡ([#.ˈ])","k̚%1")
--/n/ and /ŋ/ sandhi
text = rsub(text,"([nŋ])([# .]*[bpm])", "m%2")
text = rsub(text,"([ŋ])([ˈˌ# .]*[dlstz])","n%2")
text = rsub(text,"([n])([ˈˌ# .]*[ćĵʃ])","ɲ%2")
--final K to glottal stop
text = rsub(text, "k([#.ˈ])","ʔ%1")
--dental T
text = rsub(text, "t","t̪")
--V to F
text = rsub(text, "v","f")
mw.log(text)
end
table.insert(debug, text)
-- convert fake symbols to real ones
local final_conversions = {
["ć"] = "t͡ʃ", -- fake "c" to real "c"
["ɟ"] = "j", -- fake "i" to real "i"
["ĵ"] = "d͡ʒ" -- fake "j" to real "j"
}
text = rsub(text, "[ĉɟĵ]", final_conversions)
-- Do not have multiple syllable break consecutively
text = rsub_repeatedly(text, "([.]+)", ".")
text = rsub_repeatedly(text, "([.]?)(‿)([.]?)", "%2")
-- remove # symbols at word and text boundaries
text = rsub_repeatedly(text, "([.]?)#([.]?)", "")
-- resuppress syllable mark before IPA stress indicator
text = rsub(text, "%.(" .. ipa_stress_c .. ")", "%1")
text = rsub_repeatedly(text, "([.]?)(" .. ipa_stress_c .. ")([.]?)", "%2")
id_IPA_table[key] = toNFC(text)
end
return id_IPA_table
end
function export.show(frame)
local params = {
[1] = {},
["pre"] = {},
["bullets"] = {type = "number", default = 1},
}
local parargs = frame:getParent().args
local args = require("Module:parameters").process(parargs, params)
local results = {}
local text = args[1] or mw.title.getCurrentTitle().text
local IPA_result = export.IPA(text)
table.insert(results, { pron = "/" .. IPA_result["phonemic"] .. "/" })
table.insert(results, { pron = "[" .. IPA_result["phonetic"] .. "]" })
local pre = args.pre and args.pre .. " " or ""
local bullet = (args.bullets ~= 0) and "* " or ""
return bullet .. pre .. m_IPA.format_IPA_full { lang = lang, items = results }
end
local function parse_gloss(arg)
local poses, gloss
if arg:find("%^") then
poses, gloss = arg:match("^(.-)%^(.*)$")
if gloss == "" then
gloss = nil
end
else
gloss = arg
end
if poses then
poses = split_on_comma(poses)
local m_headword_data = mw.loadData(headword_data_module)
for i, pos in ipairs(poses) do
poses[i] = m_headword_data.pos_aliases[pos] or pos
end
end
return {
poses = poses,
gloss = gloss,
}
end
-- Parse a raw accent spec, which is one or more comma-separated accents, each of which may be aliases listed in the
-- accent data in [[Module:accent qualifier/data]]. FIXME: The separate accent qualifier data will be going away and
-- merged into label data, at which point we'll have to rewrite this.
local function parse_accents(arg)
-- Accent group processing
local accent_data = mw.loadData(accent_qualifier_data_module)
-- Split on commas and canonicalize aliases.
local accents = rsplit(arg, "%s*,%s*")
for i, alias in ipairs(accents) do
if accent_data.aliases[alias] then
accents[i] = accent_data.aliases[alias]
end
end
return accents
end
-- Return the number of syllables of a phonemic or phonetic representation, which should have syllable dividers in it
-- but no hyphens.
local function get_num_syl_from_ipa(pron)
-- Maybe we should just count vowels instead of the below code.
pron = rsub(pron, "|", " ") -- remove IPA foot boundaries
local words = rsplit(pron, " +")
for i, word in ipairs(words) do
-- IPA stress marks are syllable divisions if between characters; otherwise just remove.
word = rsub(word, "(.)[ˌˈ](.)", "%1.%2")
word = rsub(word, "[ˌˈ]", "")
words[i] = word
end
-- There should be a syllable boundary between words.
pron = table.concat(words, ".")
return ulen(rsub(pron, "[^.]", "")) + 1
end
-- Get the rhyme by truncating everything up through the last stress mark + any following consonants, and remove
-- syllable boundary markers.
local function convert_phonemic_to_rhyme(phonemic)
-- NOTE: This works because the phonemic vowels are just [aeiou] possibly with diacritics that are separate
-- Unicode chars. If we want to handle things like ɛ or ɔ we need to add them to `vowel`.
phonemic = rsplit(phonemic, " ")
phonemic = phonemic[#phonemic]
return rsub(rsub(phonemic, ".*[ˌˈ]", ""), "^" .. NV .. "*", ""):gsub("%.", "")
end
local function split_syllabified_spelling(spelling)
return rsplit(spelling, "%.")
end
-- "Align" syllabified respelling `syllab` to original spelling `spelling` by matching character-by-character, allowing
-- for extra syllable and accent markers in the syllabification and certain mismatches in the consonants. The goal is to
-- produce the appropriately syllabified version of the original spelling (the pagename) by matching characters in the
-- syllabified respelling to the original spelling, putting the syllable boundaries in the appropriate places in the
-- original spelling. As an example, given syllabified respelling 'a.ma.7ín' and original spelling 'amain', we would
-- like to produce 'a.ma.in'.
--
-- If we encounter an extra syllable marker (.), we allow and keep it. If we encounter an extra accent marker in the
-- syllabification, we drop it. We allow for mismatches in capitalization and for certain other mismatches, e.g. extra
-- glottal stops (written 7), h in respelling vs. g or j in the original, etc. If we can't match, we return nil
-- indicating the alignment failed.
local function align_syllabification_to_spelling(syllab, spelling)
local result = {}
local function concat_result()
-- Postprocess to remove dots (syllable boundaries) next to hyphens.
return (toNFC(table.concat(result)):gsub("%.%-", "-"):gsub("%-%.", "-"))
end
-- Remove glottal stop (7) from respelling to simplify the code below, because it's never found in the original
-- spelling. (FIXME: We should do the same for diacritics, but they're currently removed earlier, in
-- syllabify_from_spelling(). We should probably get rid of the removal there and put it here.)
syllab = decompose(syllab):gsub("7", "")
spelling = decompose(spelling)
local syll_chars = rsplit(ulower(syllab), "")
local spelling_chars = rsplit(spelling, "")
local i = 1
local j = 1
local function matches(uci, ucj)
-- Return true if a syllabified respelling character (uci) matches the corresponding spelling char (ucj).
-- Both uci and ucj should be lowercase.
return uci == ucj or
uci == "h" and (ucj == "g" or ucj == "j" or ucj == "x") or
uci == "j" and ucj == "g" or
uci == "y" and ucj == "i" or
uci == "w" and ucj == "u"
end
local function silent_spelling_letter(ucj)
return ucj == "h" or ucj == "'" or ucj == "-"
end
local function syll_at(pos)
return syll_chars[pos] or ""
end
local function spell_at(pos)
return spelling_chars[pos] or ""
end
local function uspell_at(pos)
local c = spelling_chars[pos]
return c and ulower(c) or ""
end
while i <= #syll_chars or j <= #spelling_chars do
local uci = syll_at(i)
local cj = spell_at(j)
local ucj = uspell_at(j)
if uci == "g" and syll_at(i - 1) == "n" and syll_at(i + 1) == "." and matches(syll_at(i + 2), ucj) and
not matches(syll_at(i + 2), uspell_at(j + 1)) then
-- As a special case, before checking whether the corresponding characters match, we have to skip an extra
-- g in an -ng- sequence in the syllabified respelling if the corresponding spelling character matches the
-- next respelling character (taking into account the syllable boundary). This is so that e.g.
-- syll='ba.rang.gay' matches spelling='barangay'. Otherwise we will match the first respelling g against
-- the spelling g and the second respelling g won't match. A similar case occurs with
-- syll='E.vang.he.lis.ta' and spelling='Evangelista'. But we need an extra condition to not do this hack
-- when syll='ba.rang.gay' matches spelling='baranggay'.
i = i + 1
elseif matches(uci, ucj) then
table.insert(result, cj)
i = i + 1
j = j + 1
elseif ucj == uspell_at(j - 1) and uci == "." and ucj ~= syll_at(i + 1) then
-- See below. We want to allow for a doubled letter in spelling that is pronounced single, and preserve the
-- doubled letter. But it's tricky in the presence of syllable boundaries on both sides of the doubled
-- letter as well as doubled letters pronounced double. Specifically, there are three possibilities,
-- exemplified by:
-- (1) syll='Mal.lig', spelling='Mallig' -> 'Mal.lig';
-- (2) syll='Ma.lig', spelling='Mallig' -> 'Ma.llig';
-- (3) syll='Wil.iam', spelling='William' -> 'Will.iam'.
-- If we copy the dot first, we get (1) and (2) right but not (3).
-- If we copy the double letter first, we get (2) and (3) right but not (1).
-- We choose to copy the dot first except in the situation exemplified by (3), where we copy the doubled
-- letter first. The condition above handles (3) (the doubled letter matches against a dot) while not
-- interfering with (1) (where the doubled letter also matches against a dot but the next letter in the
-- syllabification is the same as the doubled letter, because the doubled letter is pronounced double).
table.insert(result, cj)
j = j + 1
elseif silent_spelling_letter(ucj) and uci == "." and ucj ~= syll_at(i + 1) and
not rfind(uspell_at(j + 1), V) then
-- See below for apostrophe in spelling. This condition is parallel to the one directly above
-- for silent doubled letters in spelling and handles the case of syllab='Abduramán', spelling='Abdurahman',
-- which should be syllabified 'Ab.du.rah.man'. But we need a check to see that the next spelling character
-- isn't a vowel, because in that case we want the silent letter to go after the period, e.g.
-- syllab='Jumu7á', spelling='Jumu'ah' -> 'Ju.mu.'ah' (the 7 is removed above).
table.insert(result, cj)
j = j + 1
elseif uci == "." then
table.insert(result, uci)
i = i + 1
elseif ucj == uspell_at(j - 1) then
-- A doubled letter in spelling that is pronounced single. Examples:
-- * syllab='Ab.dur.rah.man', spelling='Abdurrahman' -> 'Ab.du.rrah.man' (with r)
-- * syllab='a.sa.la.mu a.lai.kum', spelling='assalamu alaikum' -> 'as.sa.la.mu a.lai.kum' (with s)
-- * syllab='Tal.lo', spelling='Tallo' -> 'Ta.llo' (with ll)
-- * syllab='Ha.sa.nu.din', spelling='Hasanuddin' -> 'Ha.sa.nu.din' (with b)
-- * syllab='Ka.ba', spelling='Kaaba' -> 'Kaa.ba' (with a)
table.insert(result, cj)
j = j + 1
elseif silent_spelling_letter(ucj) then
-- A silent h, apostrophe or hyphen in spelling. Examples:
-- * syllab='Ramadān', spelling='Ramadhan' -> 'Ra.ma.dhan'
table.insert(result, cj)
j = j + 1
elseif uci == AC or uci == GR or uci == CFLEX or uci == DIA or uci == TILDE or uci == MACRON or
uci == "y" or uci == "w" then
-- skip character
i = i + 1
else
-- non-matching character
mw.log(("Syllabification alignment mismatch for pagename '%s' (position %s, character %s), syllabified respelling '%s' (position %s, character %s), aligned result so far '%s'"
):format(spelling, j, ucj, syllab, i, uci, concat_result()))
return nil
end
end
if i <= #syll_chars or j <= #spelling_chars then
-- left-over characters on one side or the other
mw.log(("Syllabification alignment mismatch for pagename '%s' (%s), syllabified respelling '%s' (%s), aligned result so far '%s'"
):format(
spelling, j > #spelling_chars and "end of string" or ("position %s, character %s"):format(j, uspell_at(j)),
syllab, i > #syll_chars and "end of string" or ("position %s, character %s"):format(i, syll_at(i)),
concat_result()))
return nil
end
return concat_result()
end
local function generate_syll_obj(term)
return {syllabification = term, hyph = split_syllabified_spelling(term)}
end
-- Word should already be decomposed.
local function word_has_vowels(word)
word = ulower(word)
return rfind(word, V) or word:find("y")
end
local function any_words_have_vowels(term)
local words = rsplit(decompose(term), "[ %-]")
for i, word in ipairs(words) do
-- Allow empty word; this occurs with prefixes and suffixes.
if word_has_vowels(word) then
return true
end
end
return false
end
local function should_generate_rhyme_from_respelling(term)
local words = rsplit(decompose(term), " +")
local last_word = words[#words]
local should_generate_cat = #words == 1
local should_generate_rhyme =
not last_word:find("%-$") and -- no if word is a prefix
not (last_word:find("^%-") and last_word:find(MACRON)) and -- no if word is an unstressed suffix
word_has_vowels(last_word) -- no if word has no vowels (e.g. a single letter)
return should_generate_rhyme, should_generate_cat
end
local function should_generate_rhyme_from_ipa(ipa)
local should_generate_cat = not ipa:find("%s")
local should_generate_rhyme = word_has_vowels(decompose(ipa))
return should_generate_rhyme, should_generate_cat
end
local function should_generate_rhyme_from_termobj(termobj)
if termobj.raw then
return should_generate_rhyme_from_ipa(termobj.raw_phonemic or termobj.raw_phonetic)
else
return should_generate_rhyme_from_respelling(termobj.term)
end
end
local function process_specified_rhymes(rhymes, sylls, parsed_respellings)
local rhyme_ret = {}
for _, rhyme in ipairs(rhymes) do
local num_syl = rhyme.num_syl
local no_num_syl = false
-- If user explicitly gave the rhyme but didn't explicitly specify the number of syllables, try to take it from
-- the syllabification.
if not num_syl then
num_syl = {}
for _, syll in ipairs(sylls) do
if should_generate_rhyme_from_respelling(syll.syllabification) then
local this_num_syl = 1 + ulen(rsub(syll.syllabification, "[^.]", ""))
m_table.insertIfNot(num_syl, this_num_syl)
else
no_num_syl = true
break
end
end
if no_num_syl or #num_syl == 0 then
num_syl = nil
end
end
-- If that fails and term is single-word, try to take it from the phonemic.
if not no_num_syl and not num_syl then
for _, parsed in ipairs(parsed_respellings) do
for _, pronun in ipairs(parsed.pronuns) do
-- Check that pronun.phonemic exists (it may not if raw phonetic-only pronun is given), and rhyme
-- isn't suppressed (which may happen if the term has a qualifier "colloquial", "obsolete" or the
-- like or is an auto-generated "glottal stop elision" pronunciation).
if pronun.phonemic and not pronun.no_rhyme then
if not should_generate_rhyme_from_ipa(pronun.phonemic) then
no_num_syl = true
break
end
-- Count number of syllables by looking at syllable boundaries (including stress marks).
local this_num_syl = get_num_syl_from_ipa(pronun.phonemic)
m_table.insertIfNot(num_syl, this_num_syl)
end
end
if no_num_syl then
break
end
end
if no_num_syl or #num_syl == 0 then
num_syl = nil
end
end
local rhymeobj = m_table.shallowcopy(rhyme)
rhymeobj.num_syl = num_syl
table.insert(rhyme_ret, rhymeobj)
end
end
-- Parse a pronunciation modifier in `arg`, the argument portion in an inline modifier (after the prefix), which
-- specifies a pronunciation property such as rhyme, syllabification, homophones or audio. The argument can itself have
-- inline modifiers, e.g. <audio:Foo.ogg<a:Jakarta>>. The allowed inline modifiers are specified by `param_mods` (of
-- the format expected by `parse_inline_modifiers()`); in addition to any modifiers specified there, the modifiers
-- <q:...>, <qq:...>, <a:...> and <aa:...> are always accepted (and can be repeated). `generate_obj` and `parse_err` are
-- like in `parse_inline_modifiers()` and specify respectively a function to generate the object into which modifier
-- properties are stored given the non-modifier part of the argument, and a function to generate an error message (given
-- the message). Normally, a comma-separated list of pronunciation properties is accepted and parsed, where each element
-- in the list can have its own inline modifiers and where no spaces are allowed next to the commas in order for them to
-- be recognized as separators. If `no_split_on_comma` is given, only a single pronunciation property is accepted. If
-- `has_outer_container` is given, the list of pronunciation properties is embedded in the `terms` property of an outer
-- container, into which other list-level modifiers can also be stored (by setting `overall = "true"` in the respective
-- spec in `param_mods`). The return value is a list if neither `no_split_on_comma` nor `has_outer_container` are given,
-- otherwise a container object (which, in the case of `has_outer_container`, will contain a list inside of it, in the
-- `terms` property).
local function parse_pron_modifier(arg, parse_err, generate_obj, param_mods, no_split_on_comma, has_outer_container)
if arg:find("<") then
local insert = { store = "insert" }
param_mods.q = insert
param_mods.qq = insert
param_mods.a = insert
param_mods.aa = insert
return require(put_module).parse_inline_modifiers(arg, {
param_mods = param_mods,
generate_obj = generate_obj,
parse_err = parse_err,
splitchar = not no_split_on_comma and "," or nil,
outer_container = has_outer_container and {} or nil,
})
elseif no_split_on_comma then
return generate_obj(arg)
else
local retval = {}
for _, term in ipairs(split_on_comma(arg)) do
table.insert(retval, generate_obj(term))
end
if has_outer_container then
retval = {
terms = retval,
}
end
return retval
end
end
local function parse_rhyme(arg, parse_err)
local function generate_obj(term)
return {rhyme = term}
end
local param_mods = {
s = {
item_dest = "num_syl",
convert = function(arg, parse_err)
local nsyls = rsplit(arg, ",")
for i, nsyl in ipairs(nsyls) do
if not nsyl:find("^[0-9]+$") then
parse_err("Number of syllables '" .. nsyl .. "' should be numeric")
end
nsyls[i] = tonumber(nsyl)
end
return nsyls
end,
},
}
return parse_pron_modifier(arg, parse_err, generate_obj, param_mods)
end
local function parse_syll(arg, parse_err)
local param_mods = {
cap = { overall = true},
}
-- We need to pass in has_outer_container because we have an overall property <cap:...> (the caption, defaulting
-- to "Syllabification") applying to the whole set of syllabifications.
return parse_pron_modifier(arg, parse_err, generate_syll_obj, param_mods, nil, "has outer container")
end
local function parse_homophone(arg, parse_err)
local function generate_obj(term)
return {term = term}
end
local param_mods = {
t = {
-- We need to store the <t:...> inline modifier into the "gloss" key of the parsed term,
-- because that is what [[Module:links]] (called from [[Module:homophones]]) expects.
item_dest = "gloss",
},
gloss = {},
pos = {},
alt = {},
lit = {},
id = {},
g = {
-- We need to store the <g:...> inline modifier into the "genders" key of the parsed term,
-- because that is what [[Module:links]] (called from [[Module:homophones]]) expects.
item_dest = "genders",
convert = function(arg)
return rsplit(arg, ",")
end,
},
}
return parse_pron_modifier(arg, parse_err, generate_obj, param_mods)
end
local function generate_audio_obj(arg)
local file, gloss = arg:match("^(.-)%s*#%s*(.*)$")
if not file then
file = arg
gloss = "Audio"
end
return {file = file, gloss = gloss}
end
local function parse_audio(arg, parse_err)
-- None other than qualifiers
local param_mods = {}
-- Don't split on comma because some filenames have embedded commas not followed by a space (typically followed by
-- an underscore).
return parse_pron_modifier(arg, parse_err, generate_audio_obj, param_mods, "no split on comma")
end
return export