Module:User:Chernorizets/bg-orthography
- The following documentation is located at Module:User:Chernorizets/bg-orthography/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
export.is_valid
editfunction export.is_valid(word, validation_opts)
Checks whether the provided word is valid according to Bulgarian orthographic rules.
Parameters
editword
: astring
representing a Bulgarian word.- The empty string is considered valid.
nil
is invalid by default, but that can be overriden via an option.
- The empty string is considered valid.
validation_opts
: atable
of Boolean validation options|nil_is_valid=
: treatsnil
as valid input
Errors
editIf word
is not a string
, the function raises the error "Input must be a string!"
.
Return values
editThe function returns two values: result
and message
:
result
:true
if the word is orthographically valid,false
otherwisemessage
:nil
ifresult == true
, otherwise identifies the first failing orthographic rule
local export = {}
local umatch = mw.ustring.match
local ufind = mw.ustring.find
local ulower = mw.ustring.lower
local vowels_lower = "аъоуеиюяѝ"
local vowels_upper = "АЪОУЕИЮЯ"
export.vowels_lower_c = "[" .. vowels_lower .. "]"
export.vowels_upper_c = "[" .. vowels_upper .. "]"
export.vowels_c = "[" .. vowels_lower .. vowels_upper .. "]"
local consonants_lower = "бвгджзйклмнпрстфхцчшщь"
local consonants_upper = "БВГДЖЗЙКЛМНПРСТФХЦЧШЩЬ"
export.cons_lower_c = "[" .. consonants_lower .. "]"
export.cons_upper_c = "[" .. consonants_upper .. "]"
export.cons_c = "[" .. consonants_lower .. consonants_upper .. "]"
local alpha_lower = vowels_lower .. consonants_lower
local alpha_upper = vowels_upper .. consonants_upper
export.alpha_lower_c = "[" .. alpha_lower .. "]"
export.alpha_upper_c = "[" .. alpha_upper .. "]"
export.alphabet_c = "[" .. alpha_lower .. alpha_upper .. "]"
export.non_bulgarian_c = "[^" .. alpha_lower .. alpha_upper .. "]"
local function get_opt(validation_opts, key)
if validation_opts and type(validation_opts) == "table" then
return validation_opts[key]
else
return nil
end
end
local orthographic_rules = {}
orthographic_rules["valid letter case"] = function(word, opts)
-- All uppercase, all lowercase, or capitalized
return umatch(word, "^%u+$") or umatch(word, "^%l+$") or umatch(word, "^%u%l*$")
end
orthographic_rules["correct use of ьЬ"] = function(word, opts)
if ufind(word, "[ьЬ]") then
local lowered = ulower(word)
return umatch(lowered, export.cons_lower_c .. "ьо") ~= nil
end
return true
end
orthographic_rules["no alphabet mixing"] = function(word, opts)
return umatch(word, "^" .. export.alphabet_c .. "+$") or
umatch(word, "^" .. export.non_bulgarian_c .. "+$")
end
--[==[
Checks whether the provided word is valid according to Bulgarian orthographic rules.
===Parameters===
* word: a {string} representing a Bulgarian word.
*: The empty string is considered valid. {nil} is invalid by default, but that can be overriden via an option.
* validation_opts: a {table} of Boolean validation options
*: |nil_is_valid=: treats {nil} as valid input
===Errors===
If `word` is not a {string}, the function raises the error {"Input must be a string!"}.
===Return values===
The function returns two values: `result` and `message`:
* `result`: {true} if the word is orthographically valid, {false} otherwise
* `message`: {nil} if {result == true}, otherwise identifies the first failing orthographic rule
]==]
function export.is_valid(word, validation_opts)
if not word then
if get_opt(validation_opts, "nil_is_valid") then
return true, nil
else
return false, "no input"
end
end
if type(word) ~= "string" then error("Input must be a string!") end
if word == "" then return true, nil end
for rule_name, rule in pairs(orthographic_rules) do
if not rule(word, validation_opts) then
return false, rule_name
end
end
return true, nil
end
return export