Module:pnb-convert/sandbox
- The following documentation is located at Module:pnb-convert/sandbox/documentation. [edit]
- Useful links: root page • root page’s subpages • links • transclusions • testcases • sandbox of
Test cases edit
5 of 13 tests failed. (refresh)
Text | Expected | Actual | Differs at | |
---|---|---|---|---|
جُھوٹھ (jhūṭh) | ਝੂਠ | ਝੁੋਠ | 2 | |
گَھر (ghar) | ਘਰ | ਘਰ | ||
میز (mez) | ਮੇਜ਼ | ਮੇਜ਼ | ||
مَوسَم (mausam) | ਮੌਸਮ | ਮੌਸਮ | ||
پَینْدا (paindā) | ਪੈਂਦਾ | ਪੈਨ੍ਦਾ | 3 | |
تُہاڈا (tuhāḍā) | ਤੁਹਾਡਾ | ਤੁਹਾਡਾ | ||
لان٘گھا (lāṅghā) | ਲਾਂਘਾ | ਲਾਨ٘ਘਾ | 3 | |
جَھلّا (jhallā) | ਝੱਲਾ | ਝੱਲਾ | ||
نِچّھ (nicch) | ਨਿੱਛ | ਨਿੱਛ | ||
لِتَّر (littar) | ਲਿੱਤਰ | ਲਿਤّਰ | 3 | |
چِھتَّر (chittar) | ਛਿੱਤਰ | ਛਿਤّਰ | 3 | |
رَوࣇا (rauḷā) | ਰੌਲ਼ਾ | ਰੌਲ਼ਾ | ||
تْراہ (trāh) | ਤ੍ਰਾਹ | ਤ੍ਰਾਹ |
Progress edit
History edit
- 19/08/23: A boilerplate module has been created (taken from Module:ur-convert/sandbox). First step will be to ensure that basic characters and diacritics are parsed correctly.
- Done 19/08/23 –
Diff:75689721Diff:75689796
- Done 19/08/23 –
- 19/08/23:
Ensure aspirates with diacritics are parsed correctlySort out prolonged vowels.
Kept in Mind edit
- Parsing of aspirates + diacritics.
- Shadda placed on letter being geminated, adhak placed after letter being geminated
- Parsing of 'ain
local U = mw.ustring.char
local gsub = mw.ustring.gsub
local export = {}
-- Shahmukhi diacritics --
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ہ"
-- Gurmukhi diacritics --
local sihari = "ਿ" -- zer
local bihari = "ੀ" -- zer + ye
local aunkar = "ੁ" -- pesh
local dulainkar = "ੂ" -- pesh + vow
local lavan = "ੇ" -- e
local dulavan = "ੈ" -- ai
local hora = "ੋ" -- o
local kanaura = "ੌ" -- au
local dutt = "੍" -- jazm
local adhak = " ੱ" -- tashdeed
local consonants = "ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨوہھٹڈںڑشؕ"
local consonantS = "ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨہھٹڈںڑشؕ"
local consonantS2 = "یببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨںوہھٹڈڑشؕ"
local sun = "تثصشسزرذدنلطظض"
local vowels = "ایئےۓوؤ"
local hes = "ہح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"
local mapping = {
["آ"] = 'ਆ', ["ب"] = 'ਬ', ["پ"] = 'ਪ',
["ت"] = 'ਤ', ["ٹ"] = 'ਟ', ["ث"] = 'ਸ',
["ج"] = 'ਜ', ["چ"] = 'ਚ', ["ح"] = 'ਹ',
["خ"] = 'ਖ਼', ["د"] = 'ਦ', ["ڈ"] = 'ਡ',
["ذ"] = 'ਜ਼', ["ر"] = 'ਰ', ["ڑ"] = "ੜ",
["ز"] = 'ਜ਼', ["ژ"] = 'ਸ਼਼', ["س"] = 'ਸ',
["ش"] = 'ਸ਼', ["ص"] = 'ਸ', ["ض"] = 'ਜ਼',
["ط"] = 'ਤ', ["ظ"] = 'ਜ਼', ["غ"] = 'ਗ਼',
["ف"] = 'ਫ਼', ["ق"] = 'ਕ਼', ["ک"] = 'ਕ',
["گ"] = 'ਗ', ["ل"] = 'ਲ', ["م"] = 'ਮ',
["ن"] = 'ਨ', ["و"] = 'ਵ', ["ہ"] = 'ਹ',
["ی"] = 'ਯ',
["ں"] = 'ਂ',
["ݨ"] = 'ਣ', ["ࣇ"] = 'ਲ਼', ["ك"] = 'ਕ',
["ع"] = 'ਅ',
["ء"] = '',
["ئ"] = '',
["ؤ"] = 'ਓ',
["أ"] = '',
-- diacritics
[zabar] = "",
[zer] = "" .. sihari .. "",
[pesh] = "" .. aunkar .. "",
[jazm] = "" .. dutt .. "",
[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
-- ligatures
["ﻻ"] = "ला",
["ﷲ"] = "अल्लाह",
-- kashida
["ـ"] = "-", -- kashida, no sound
-- numerals
["١"] = "੧", ["٢"] = "੨", ["٣"] = "੩", ["٤"] ="੪", ["٥"] = "੫",
["٦"] = "੬", ["٧"] = "੭", ["٨"] = "੮", ["٩"] = "੯", ["٠"] = "੦",
["۱"] = "੧", ["۲"] = "੨", ["۳"] = "੩", ["۴"] = "੪", ["۵"] = "੫",
["۶"] = "੬", ["۷"] = "੭", ["۸"] = "੮", ["۹"] = "੯", ["۰"] = "੦",
-- punctuation (leave on separate lines)
["۔"] = "।",
["؟"] = "?", -- question mark
["،"] = ",", -- comma
["؛"] = ";", -- semicolon
["«"] = '“', -- quotation mark
["»"] = '”', -- quotation mark
["٪"] = "%", -- percent
["؉"] = "‰", -- per mille
["٫"] = ".", -- decimals
["٬"] = ",", -- thousand
["ۓ"] = "-ਏ",
["ۂ"] = "-ਏ" -- he ye (in ezâfe)
}
local ain = 'ع'
local kzabar = 'ٰ'
local alif = 'ا'
local madda = 'آ'
local ye = 'ی'
local ye2 = 'ئ'
local ye3 = "ے"
local vao = "و"
local ye4 = "ۓ"
local he2 = "ۂ"
local aspirate = 'ھ'
local lam = 'ل'
local noon = 'ن'
local gunDia = '٘'
function export.tr(text, lang, sc)
-- Tashdeed
text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid, "ੱ%1")
text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid .. '([' .. ZZP .. '])', "ੱ%1%2")
--text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1य्य%2")
--text = gsub(text, '([' .. ZZP .. '])' .. vao .. '([' .. ZZP .. '])' .. tashdid, "%1व्व%2")
-- prolonged vowels
text = gsub(text, "" .. alif, "ਾ") -- prolonged alif
text = gsub(text, "" .. zabar .. alif, "ਾ") -- prolonged alif but with zabar
text = gsub (text, zabar .. ye .. "", "ੈ") -- zabar ye as ai
text = gsub(text, "" .. ye .. "", "ੇ") -- ye as /e:/
--text = gsub(text, "" .. ye .. jazm .. "", "ੇ") -- ye with jazm as /e:/
text = gsub(text, zabar .. vao, "ੌ") -- vao as au
text = gsub(text, "" .. vao, "ੋ") -- vao as /o:/
-- aspirates
text = gsub(text, "کھ", "ਖ")
text = gsub(text, "گھ", "ਘ")
text = gsub(text, "چھ", "ਛ")
text = gsub(text, "جھ", "ਝ")
text = gsub(text, "ٹھ", "ਠ")
text = gsub(text, "ڈھ", "ਢ")
text = gsub(text, "تھ", "ਥ")
text = gsub(text, 'دھ', "ਧ")
text = gsub(text, "پھ", "ਫ")
text = gsub(text, "بھ", "ਭ")
--- aspirate
text = gsub(text, "(ک)" .. "([" .. ZZP .. "])" .. aspirate, "ਖ%2")
text = gsub(text, "(گ)" .. "([" .. ZZP .. "])" .. aspirate, "ਘ%2")
text = gsub(text, "(چ)" .. "([" .. ZZP .. "])" .. aspirate, "ਛ%2")
text = gsub(text, "(ج)" .. "([" .. ZZP .. "])" .. aspirate, "ਝ%2")
text = gsub(text, "(ٹ)" .. "([" .. ZZP .. "])" .. aspirate, "ਠ%2")
text = gsub(text, "(ڈ)" .. "([" .. ZZP .. "])" .. aspirate, "ਢ%2")
text = gsub(text, "(ت)" .. "([" .. ZZP .. "])" .. aspirate, "ਥ%2")
text = gsub(text, "(د)" .. "([" .. ZZP .. "])" .. aspirate, "ਧ%2")
text = gsub(text, "(پ)" .. "([" .. ZZP .. "])" .. aspirate, "ਫ%2")
text = gsub(text, "(ب)" .. "([" .. ZZP .. "])" .. aspirate, "ਭ%2")
text = gsub(text, "(ڑ)" .. "([" .. ZZP .. "])" .. aspirate, "ੜ੍ਹ%2")
text = gsub(text, "(م)" .. "([" .. ZZP .. "])" .. aspirate, "ਮ੍ਹ%2")
text = gsub(text, "(ن)" .. "([" .. ZZP .. "])" .. aspirate, "ਨ੍ਹ%2")
text = gsub(text, "(ل)" .. "([" .. ZZP .. "])" .. aspirate, "ਲ੍ਹ%2")
text = mw.ustring.gsub(text, '.', mapping)
return text
end
return export