Module:pnb-convert/sandbox

Test cases edit

5 of 13 tests failed. (refresh)

TextExpectedActualDiffers at
test_convert_urdu:
Failedجُھوٹھ (jhūṭh)ਝੂਠਝੁੋਠ2
Passedگَھر (ghar)ਘਰਘਰ
Passedمیز (mez)ਮੇਜ਼ਮੇਜ਼
Passedمَوسَم (mausam)ਮੌਸਮਮੌਸਮ
Failedپَینْدا (paindā)ਪੈਂਦਾਪੈਨ੍ਦਾ3
Passedتُہاڈا (tuhāḍā)ਤੁਹਾਡਾਤੁਹਾਡਾ
Failedلان٘گھا (lāṅghā)ਲਾਂਘਾਲਾਨ٘ਘਾ3
Passedجَھلّا (jhallā)ਝੱਲਾਝੱਲਾ
Passedنِچّھ (nicch)ਨਿੱਛਨਿੱਛ
Failedلِتَّر (littar)ਲਿੱਤਰਲਿਤّਰ3
Failedچِھتَّر (chittar)ਛਿੱਤਰਛਿਤّਰ3
Passedرَوࣇا (rauḷā)ਰੌਲ਼ਾਰੌਲ਼ਾ
Passedتْراہ (trāh)ਤ੍ਰਾਹਤ੍ਰਾਹ

Progress edit

History edit

  • 19/08/23: A boilerplate module has been created (taken from Module:ur-convert/sandbox). First step will be to ensure that basic characters and diacritics are parsed correctly.
 Done 19/08/23 – Diff:75689721 Diff:75689796
  • 19/08/23: Ensure aspirates with diacritics are parsed correctly Sort out prolonged vowels.

Kept in Mind edit

  • Parsing of aspirates + diacritics.
  • Shadda placed on letter being geminated, adhak placed after letter being geminated
  • Parsing of 'ain

local U = mw.ustring.char
local gsub = mw.ustring.gsub
local export = {}

-- Shahmukhi diacritics --
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"

local he = "ہ"

-- Gurmukhi diacritics --
local sihari = "ਿ" -- zer
local bihari = "ੀ" -- zer + ye
local aunkar = "ੁ" -- pesh
local dulainkar = "ੂ" -- pesh + vow
local lavan = "ੇ" -- e
local dulavan = "ੈ" -- ai
local hora = "ੋ" -- o
local kanaura = "ੌ" -- au
local dutt = "੍" -- jazm
local adhak = " ੱ" -- tashdeed

local consonants = "ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨوہھٹڈںڑشؕ"
local consonantS = "ببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨہھٹڈںڑشؕ"
local consonantS2 = "یببپتثجچحخدذرزژسشصضطظعغفقکگلࣇمنݨںوہھٹڈڑشؕ" 
local sun = "تثصشسزرذدنلطظض"
local vowels = "ایئےۓوؤ"
local hes = "ہح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"

local mapping = {
	["آ"] = 'ਆ', ["ب"] = 'ਬ', ["پ"] = 'ਪ',
	["ت"] = 'ਤ', ["ٹ"] = 'ਟ', ["ث"] = 'ਸ',
	["ج"] = 'ਜ', ["چ"] = 'ਚ', ["ح"] = 'ਹ',
	["خ"] = 'ਖ਼', ["د"] = 'ਦ', ["ڈ"] = 'ਡ',
	["ذ"] = 'ਜ਼', ["ر"] = 'ਰ', ["ڑ"] = "ੜ",
	["ز"] = 'ਜ਼', ["ژ"] = 'ਸ਼਼', ["س"] = 'ਸ',
	["ش"] = 'ਸ਼', ["ص"] = 'ਸ', ["ض"] = 'ਜ਼',
	["ط"] = 'ਤ', ["ظ"] = 'ਜ਼', ["غ"] = 'ਗ਼',
	["ف"] = 'ਫ਼', ["ق"] = 'ਕ਼', ["ک"] = 'ਕ',
	["گ"] = 'ਗ', ["ل"] = 'ਲ', ["م"] = 'ਮ',
	["ن"] = 'ਨ', ["و"] = 'ਵ', ["ہ"] = 'ਹ',
	["ی"] = 'ਯ',
	["ں"] = 'ਂ',
	
	["ݨ"] = 'ਣ', ["ࣇ"] = 'ਲ਼', ["ك"] = 'ਕ',
	
	["ع"] = 'ਅ',
	["ء"] = '',
	["ئ"] = '',
	["ؤ"] = 'ਓ',
	["أ"] = '',
	
	-- diacritics
	[zabar] = "",
	[zer] = "" .. sihari .. "",
	[pesh] = "" .. aunkar .. "",
	[jazm] = "" .. dutt .. "", 
	[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
	
	-- ligatures
	["ﻻ"] = "ला",
	["ﷲ"] = "अल्लाह",
	
	-- kashida
	["ـ"] = "-", -- kashida, no sound
	

-- numerals
	["١"] = "੧", ["٢"] = "੨", ["٣"] = "੩", ["٤"] ="੪", ["٥"] = "੫", 
	["٦"] = "੬", ["٧"] = "੭", ["٨"] = "੮", ["٩"] = "੯", ["٠"] = "੦",
	["۱"] = "੧", ["۲"] = "੨", ["۳"] = "੩", ["۴"] = "੪", ["۵"] = "੫",
	["۶"] = "੬", ["۷"] = "੭", ["۸"] = "੮", ["۹"] = "੯", ["۰"] = "੦",
	
	-- punctuation (leave on separate lines)
    ["۔"] = "।", 
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
    ["ۓ"] = "-ਏ", 
	["ۂ"] = "-ਏ" -- he ye (in ezâfe)
}

local ain = 'ع'
local kzabar = 'ٰ'
local alif = 'ا'
local madda = 'آ'
local ye = 'ی'
local ye2 = 'ئ'
local ye3 = "ے"
local vao = "و"
local ye4 = "ۓ"
local he2 = "ۂ"
local aspirate = 'ھ'
local lam = 'ل'
local noon = 'ن'
local gunDia = '٘'

function export.tr(text, lang, sc)
	
	-- Tashdeed
    text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid,  "ੱ%1")
	text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid .. '([' .. ZZP .. '])', "ੱ%1%2")
	--text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1य्य%2")
	--text = gsub(text, '([' .. ZZP .. '])' .. vao .. '([' .. ZZP .. '])' .. tashdid, "%1व्व%2")
	
	-- prolonged vowels
	text = gsub(text, "" .. alif, "ਾ") -- prolonged alif
	text = gsub(text, "" .. zabar .. alif, "ਾ") -- prolonged alif but with zabar
	
	text = gsub (text, zabar .. ye .. "", "ੈ") -- zabar ye as ai
	text = gsub(text, "" .. ye .. "", "ੇ") -- ye as /e:/
	--text = gsub(text, "" .. ye .. jazm .. "", "ੇ") -- ye with jazm as /e:/
	
	text = gsub(text, zabar .. vao, "ੌ") -- vao as au
	text = gsub(text, "" .. vao, "ੋ") -- vao as /o:/
	
	-- aspirates
	text = gsub(text, "کھ", "ਖ")
	text = gsub(text, "گھ", "ਘ")
	text = gsub(text, "چھ", "ਛ")
	text = gsub(text, "جھ", "ਝ")
	text = gsub(text, "ٹھ", "ਠ")
	text = gsub(text, "ڈھ", "ਢ")
	text = gsub(text, "تھ", "ਥ")
	text = gsub(text, 'دھ', "ਧ")
	text = gsub(text, "پھ", "ਫ")
	text = gsub(text, "بھ", "ਭ")
	
  --- aspirate
	text = gsub(text, "(ک)" .. "([" .. ZZP  .. "])" .. aspirate, "ਖ%2")
    text = gsub(text, "(گ)" .. "([" .. ZZP  .. "])" .. aspirate, "ਘ%2")
    text = gsub(text, "(چ)" .. "([" .. ZZP  .. "])" .. aspirate, "ਛ%2")
    text = gsub(text, "(ج)" .. "([" .. ZZP  .. "])" .. aspirate, "ਝ%2")
    text = gsub(text, "(ٹ)" .. "([" .. ZZP  .. "])" .. aspirate, "ਠ%2")
    text = gsub(text, "(ڈ)" .. "([" .. ZZP  .. "])" .. aspirate, "ਢ%2")
    text = gsub(text, "(ت)" .. "([" .. ZZP  .. "])" .. aspirate, "ਥ%2")
    text = gsub(text, "(د)" .. "([" .. ZZP  .. "])" .. aspirate, "ਧ%2")
	text = gsub(text, "(پ)" .. "([" .. ZZP  .. "])" .. aspirate, "ਫ%2")
	text = gsub(text, "(ب)" .. "([" .. ZZP  .. "])" .. aspirate, "ਭ%2")
	text = gsub(text, "(ڑ)" .. "([" .. ZZP  .. "])" .. aspirate, "ੜ੍ਹ%2")
	text = gsub(text, "(م)" .. "([" .. ZZP  .. "])" .. aspirate, "ਮ੍ਹ%2")
	text = gsub(text, "(ن)" .. "([" .. ZZP  .. "])" .. aspirate, "ਨ੍ਹ%2")
	text = gsub(text, "(ل)" .. "([" .. ZZP  .. "])" .. aspirate, "ਲ੍ਹ%2")
	
	text = mw.ustring.gsub(text, '.', mapping)
	return text
end
return export