Module:User:Babr/Sandbox
- This module lacks a documentation subpage. You may create it.
- Useful links: root page • root page’s subpages • links • transclusions • testcases • user page • user talk page • userspace
This is a private module sandbox of Babr, for their own experimentation. Items in this module may be added and removed at Babr's discretion; do not rely on this module's stability.
local has_diacritics_subs = {
-- remove punctuation and tashdid
{ "[" .. punctuation .. tashdid .. highhmz .. numbers .. fatHataan .."]", ""},
{"[" .. consonants .. alif .. "][" .. semivowel .. "]?$", ""},
{"[" .. consonants .. alif .. "][" .. semivowel .. "]?(" .. space_like_class .. ")", "%1"},
{"[" .. consonants .. alif .. "][" .. semivowel .. "]?%-", "-"},
-- these are required for arabic al- to work
{"[" .. consonants2 .. "]" .. "([".. zer .. pesh .."])" .. alif .. laam , laam },
{"[" .. consonants2 .. "]([".. zer .. pesh .."])%-" .. alif .. laam , laam },
-- remove CVV pairs
{ "[" .. consonants2 .. "]" .. jazm .. "[" .. semivowel .. "][" .. semivowel .. "]" , ""},
{ "[" .. consonants2 .. "]" .. jazm .. "[" .. semivowel .. "]([" .. ZZP .. "])" , "%1"},
{ "[" .. consonants2 .. alif .."][" .. semivowel .. "][" .. semivowel .. "][" .. semivowel .. jazm .. "]" , ""},
{ "[" .. consonants2 .. alif .. "][" .. semivowel .. "][" .. semivowel .. "]([" .. ZZP .. "])" , "%1"},
{ "[" .. consonants2 .. alif .. ZZP .. "][" .. semivowel .. "][" .. semivowel .. "]" , ""},
-- CV
{ alif .. "[" .. semivowel .. "]" , ""},
{ "[" .. consonants2 .. "]" .. jazm , ""},
{ "[" .. consonants2 .. alif .. "]" .. "([" .. ZZP .. "])" , "%1"},
{ malif , ""}, -- counts as a CV pair
{ jazm .. alif .. "[" .. ZZP .. "]", ""},
{ "[" .. consonants2 .. alif .."][" .. ZZP .. semivowel .. "]", ""},
-- consonants paired to alif
{ "[" .. consonants2 .. "]" .. jazm .. malif, ""},
{ "[" .. consonants2 .. "]" .. zabar .. alif, ""},
{ zer .. ye , ""},
{ pesh .. vao , ""},
{ zabar .. alif , ""},
-- remove numbers, hamzatu l-waṣl, alif madda and ZWNJ
{ "[" .. numbers .. "ٱ" .. "آ" .. "]", ""},
{ "%s", ""},
{ "%-", ""},
{ "[" .. ZZP .. "]", ""},
--remove non arabic characters
{"[^" .. U(0x0600) .. "-" .. U(0x06FF) .. U(0x0750) .. "-" .. U(0x077F) ..
U(0x08A0) .. "-" .. U(0x08FF) .. U(0xFB50) .. "-" .. U(0xFDFF) ..
U(0xFE70) .. "-" .. U(0xFEFF) .. "]", ""}
}