-- Primary module authorship: Chernorizets (original Bulgarian syllabification code)
-- Port to Lua: Kiril Kovachev
-- Adaptation to Toki Pona: Kiril Kovachev
-- 17 April 2024.
local export = {}
local substring = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local U = mw.ustring.char
local lang = require("Module:languages").getByCode("tok")
local script = require("Module:scripts").getByCode("Latn")
local hvowels_c = "[aioeu]"
local HYPH = U(0x2027)
local BREAK_MARKER = "."
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
local function char_at(str, index)
return substring(str, index, index)
end
local function count_vowels(word)
local _, vowel_count = mw.ustring.gsub(word, hvowels_c, "")
return vowel_count
end
local function is_vowel(ch)
for _, v in pairs{"a", "e", "i", "o", "u"} do
if v == ch then
return true
end
end
return false
end
---- Main syllabification code
-- word: the word being scanned
-- left/right vowels: integers
local function find_next_syllable_onset(word, left_vowel, right_vowel)
local n_cons = right_vowel - left_vowel - 1
-- No consonants - syllable starts on rightVowel
if n_cons == 0 then return right_vowel end
-- Single consonant between two vowels - starts a syllable
if n_cons == 1 then return left_vowel + 1 end
-- Two ("or more") consonants between the vowels.
-- In Toki Pona, the phonotactics only allow this if the first syllable
-- ends in a nasal and the second begins in a consonant, so there can only
-- ever be two consonants, and the sonority break occurs between the two
-- consonants (so just add 2 to the left vowel).
local sonority_break = left_vowel + 2
return sonority_break
end
-- Returns a table of strings (list)
local function syllabify_poly(word)
local syllables = {}
local prev_vowel = -1
local prev_onset = 1;
for i = 1, mw.ustring.len(word) do
if is_vowel(mw.ustring.lower(char_at(word, i))) then
-- A vowel, yay!
local should_skip = false
if prev_vowel == -1 then
prev_vowel = i
should_skip = true;
end
-- This is not the first vowel we've seen. In-between
-- the previous vowel and this one, there is a syllable
-- break, and the first character after the break starts
-- a new syllable.
if not should_skip then
local next_onset = find_next_syllable_onset(word, prev_vowel, i)
table.insert(syllables, substring(word, prev_onset, next_onset - 1))
prev_vowel = i
prev_onset = next_onset
end
end
end
-- Add the last syllable
table.insert(syllables, substring(word, prev_onset))
return syllables
end
function export.syllabify_word(word)
if mw.ustring.len(word) == 0 then return {} end;
local n_vowels = count_vowels(word)
local syllables = n_vowels <= 1 and {word} or syllabify_poly(word)
return table.concat(syllables, HYPH)
end
function export.syllabify(term)
local words = rsplit(term, " ")
local out = {}
for _, word in pairs(words) do
table.insert(out, export.syllabify_word(word))
end
return table.concat(out, " ")
end
function export.show_syllabification(frame)
local params = {
[1] = {},
}
local title = mw.title.getCurrentTitle()
local args = require("Module:parameters").process(frame:getParent().args, params)
local term = args[1] or title.nsText == "Template" and "sitelen" or title.subpageText
local syllabification = export.syllabify(term)
local syllables = rsplit(syllabification, HYPH)
return require("Module:hyphenation").format_hyphenations(
{
lang = lang,
hyphs = { { hyph = syllables } },
sc = script,
caption = "Hyphenation",
}
)
end
return export