Module:pa-Arab-translit

This module page is in beta stage.
Module has been updated. It should work as expected for majority of the words. It may have problems with diphthongs and a minority of Arabic loanwords. Module needs verification from other native Punjabi speakers who understand Shahmukhi lemmas with diacritics, because this is quite difficult to get your head wrapped around. Check documentation for further listed problems.
This module will transliterate text in the Shahmukhi script. It is also used to transliterate Old Punjabi, Pahari-Potwari, and Saraiki.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:pa-Arab-translit/testcases.

Functions

tr(text, lang, sc)
Transliterates a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the transliteration fails, returns nil.

Introduction edit

....

Exceptions edit

The following words do not need any diacritics:

To do list edit

  • Sort out any problems with diphthongs
    • make sure all diphthongs are represented correctly
    • change īū -> iyū
  • Arabic loanwords
    • al and non al words
    • archaic ha murtaba tah
  • Diacritics detector
  • Distinction between a schwa and loanwords ـہ | need not diacritics when a schwa ('..a') and need when loanword ('..ah')

Working Template Examples edit

Headword-line templates edit

With replacing header:
{{pa-noun|gur=ਮਸੀਤ|g=f|head=مَسِیت}}

Check: مسیت (example can't be shown on module documentation)


Without (needing to) replacing header:
{{pa-noun|g=f|gur=ਅਲਮਾਰੀ}}

Check: الماری (example can't be shown on module documentation)

Other templates edit

{{ux|pa|مَیں کَلّھ لَندَن جا رہا واں۔|I am going to London tomorrow.}}:

مَیں کَلّھ لَندَن جا رِہا واں۔
maiṉ kallh landan jā rihā vāṉ.
I am going to London tomorrow.

Status edit

Last updated: 23/08/2021


local m_str_utils = require("Module:string utilities")

local U = m_str_utils.char
local gsub = m_str_utils.gsub

local export = {}

local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ہ"

local ain = 'ع'
local alif = 'ا'
local ye = 'ی'
local ye2 = 'ئ'
local ye3 = "ے"
local vao = "و"
local aspirate = 'ھ'
local nasal = 'ں'

local consonants = "بٻبپتثجڄڄچحخدݙذرزژسشصضطظعغفقکڳگلࣇمنںݨوہھٹڈڑ"
local consonantS = "بٻبپتثجڃڄچحخدݙذرزژسشصضطظعغفقکگڳلࣇمنݨہھٹڈڑ"
local consonantS2 = "یٻببپتثجڃڄچحخدݙذرزژسشصضطظعغفقکڳگلࣇمنݨوہھٹڈڑ" 
local vowels = "اآیئےۓوؤ"
local hes = "ہح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"

local mapping = {
	["آ"] = 'ā', ["ب"] = 'b', ["ٻ"] = 'ḇ', ["پ"] = 'p', ["ت"] = 't', ["ٹ"] = 'ṭ', ["ث"] = 's̱',
	["ج"] = 'j', ["ڄ"] = 'ǰ', ["چ"] = 'c', ["ح"] = 'ḥ', ["خ"] = 'x', 
	["د"] = 'd', ["ڈ"] = 'ḍ', ["ݙ"] = 'ḏ', ["ذ"] = 'ẕ', ["ر"] = 'r', ['ڑ'] = "ṛ", ["ز"] = 'z', ["ژ"] = 'ž',
	["س"] = 's', ["ش"] = 'ś', ["ص"] = 'ṣ', ["ض"] = 'ẓ', 
	["ط"] = 't̤', ["ظ"] = 'z̤', ["ع"] = 'ʻ', ["غ"] = 'ġ', ["ف"] = 'f', ["ق"] = 'q',
	["ک"] = 'k', ["گ"] = 'g', ["ڳ"] = 'g̈', ["ݨ"] = 'ṇ', ["ࣇ"] = 'ḷ',
	["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["و"] = 'v', ["ہ"] = 'h', ["ی"] = 'y', ["۔"] = ".", ["ں"] = 'ṉ',

	["ھ"] = "h", 
	["ؤ"] = "'o",
	
	-- diacritics
	[zabar] = "a",
	[zer] = "i",
	[pesh] = "u",
	[jazm] = "", -- also sukun - no vowel
	[U(0x200C)] = "-", -- ZWNJ (zero-width non-joiner)
	
	-- ligatures
	["ﻻ"] = "lā",
	["ﷲ"] = "allāh",
	
	-- kashida
	["ـ"] = "-", -- kashida, no sound
	
	-- numerals
	["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
	["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
	
	-- punctuation (leave on separate lines)
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
	["ۓ"] = "-ye", 
	["ۀ"] = "h-e" -- he ye (in izafat)
}

function export.tr(text, lang, sc)
	
	-- EXCEPTIONS - leave as they are, unless they have been sorted out elsewhere

	text = gsub(text, '([' .. consonants .. '])' .. ye .. vao .. nasal, "%1eyoṉ") -- needs to be fixed
	text = gsub(text, '([' .. consonants .. '])' .. ye .. vao, "%1eyo") -- needs to be fixed
    text = gsub(text, '([' .. consonants .. '])' .. ye .. '([' .. consonants .. '])' , "%1e%2") -- needs to be fixed
    text = gsub(text, '([' .. consonants .. '])' .. ye .. alif, "%1eyā") -- needs to be fixed
	text = gsub(text, zabar .. aspirate .. "(ی)" , "hai") -- needs to be fixed
	text = gsub(text, zabar .. aspirate .. "(و)" , "hau") -- needs to be fixed
	text = gsub(text, "ئے", "'e")
	text = gsub(text, "َے", "ai")
	text = gsub(text, "ے", "e")
	text = gsub(text, "ہہ", "h")
    text = gsub(text, "اے", "e")
    text = gsub(text, "اَے", "ai")
    text = gsub(text, "ن٘", "ṉ")
    text = gsub(text, "اللہ", "allāh")
    text = gsub(text, "ؤ" .. pesh, "ū") -- needs to be fixed

    --text = gsub(text, "ُھوِیں", "vīṉ")
    text = gsub(text, "([" .. pesh .. aspirate  .. "])" .. "وِیں", "%1vīṉ")
    
  	-- diacritics
	text = gsub(text, "([" .. consonants  .. "])" .. zer .. ye .. alif, "%1īyā")
	text = gsub(text, pesh .. vao .. jazm .. "", "ū")
	text = gsub(text, "([" .. consonants  .. "])" .. zabar .. vao, "%1au")
	text = gsub(text, "([" .. consonants  .. "])" .. zabar .. ye, "%1ai")
	text = gsub(text, "([" .. alif .. consonants  .. "])" .. zabar .. ye3, "%1ai")
	text = gsub(text, "([" .. consonants  .. "])" .. zer .. ye, "%1ī")
    text = gsub(text, jazm .. alif, "ā")
    
    text = gsub(text, ye2 .. zer .. ye, "'ī")
    text = gsub(text, "" .. alif .. ye2 .. "([" .. consonants  .. "])", "ā'i%1") -- needs to be fixed
    
	-- Initial alif
	text = gsub(text, alif .. zer, "ī")
	text = gsub(text, alif .. zabar .. '([' .. consonantS .. '])', "a%1")

	text = gsub(text, alif .. zabar .. vao .. jazm .. "", "au")
	text = gsub(text, alif .. vao .. jazm .. "", "o")
	text = gsub(text, alif .. zabar .. ye .. jazm .. "", "ai")
	text = gsub(text, alif .. ye .. jazm .. "", "e")

	text = gsub(text, alif .. pesh .. vao, "ū")
	text = gsub(text, alif .. pesh .. vao .. jazm .. "", "ū")	
	text = gsub(text, alif .. pesh, "u")

    -- do-chashme-he zabar, zer, pesh / no need to mess about
    --- works for short vowels
    text = gsub(text, "([" .. ZZP  .. "])" .. aspirate, "h%1")
    text = gsub(text, pesh .. aspirate .. vao .. jazm .. "", "ū")
    text = gsub(text, zabar .. aspirate .. vao .. jazm .. "", "ai")
    text = gsub(text, '([' .. consonants .. '])' .. aspirate .. ye .. jazm .. "", "%1he")
   

	-- Tashdeed
	text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid, "%1%1")
	text = gsub(text, '([' .. consonantS2 .. '])' .. tashdid .. '([' .. ZZP .. '])', "%1%1%2")
	text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1yy%2")
	text = gsub(text, '([' .. ZZP .. '])' .. vao .. '([' .. ZZP .. '])' .. tashdid, "%1vv%2")
	-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
	text = gsub(text, '([' .. consonants .. '])' .. '([' .. ZZP .. '])' .. tashdid, "%1%1%2")
	
	-- tanween diacritic / no need to mess about
	text = gsub(text, '([' .. consonants .. '])' .. 'ً' .. alif, "%1an")
	text = gsub(text, alif .. 'ً', "an")
	text = gsub(text, '([' .. consonants .. '])' .. 'ً', "%1an")

	-- khari zabar -- / no need to mess about
	text = gsub(text, '([' .. vowels .. '])' .. 'ٰ', "á")
	text = gsub(text, '([' .. consonants .. '])' .. 'ٰ' .. '([' .. vowels .. '])', "%1á")

	-- ‘ain
	text = gsub(text, alif .. ain , "ā‘") 
	text = gsub(text, ain .. alif  .. '([' .. consonants .. '])', "ʻā%1") 
	text = gsub(text, '([' .. consonants .. '])' .. ain .. he, "%1ʻa")
	text = gsub(text, '([' .. consonants .. '])' .. '([' .. zer .. pesh .. ']?)' .. ain, "%1%2ʻ")
	text = gsub(text, ain .. zabar .. vao .. '([' .. consonants .. '])', "‘au%1")
	text = gsub(text, ain .. zabar .. ye .. '([' .. consonants .. '])', "‘ai%1")
	text = gsub(text, ain .. zer  .. '([' .. consonants .. '])', "ʻi%1")
	text = gsub(text, ain .. pesh  .. '([' .. consonants .. '])', "ʻu%1")
	text = gsub(text, ain .. zer .. ye .. '([' .. consonants .. '])', "ʻī%1")
    text = gsub(text, ain .. pesh .. vao .. '([' .. consonantS .. '])', "ʻū%1")
    
    ---  alif
    text = gsub(text, '([' .. consonants .. '])' .. zabar .. alif, "%1ā")
	text = gsub(text, '([' .. consonants .. '])' .. alif, "%1ā")
	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. alif, "%1%1ā")
  
	-- Vao
	text = gsub(text, vao .. '([' .. ZZP .. '])', "v%1")
    text = gsub(text, 'ُو', "ū")
	text = gsub(text, '([' .. consonants .. '])' .. zabar .. vao .. alif, "%1avā")
	

	-- medial/final consonants

    --- (e) -- works
	text = gsub(text, '([' .. consonants .. '])' .. ye .. jazm .. '([' .. consonants .. '])', "%1e%2")
	text = gsub(text, '([' .. consonants .. '])' .. ye3, "%1e")
	text = gsub(text, '([' .. consonants .. '])' .. zabar .. ye3, "%1ai")
    --- izafat
	text = gsub(text, '([' .. consonants .. '])' .. zer .. " ", "%1-e ")

    --- he 
	text = gsub(text, zabar .. he .. zer .. ye, "ahī")
	text = gsub(text, zabar .. he .. alif, "ahā")
	text = gsub(text, zabar .. he .. '([' .. consonants .. vowels .. '])', "ah%1")

    --- vao

	text = gsub(text, '([' .. consonants .. '])' .. vao, "%1o")
	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. vao, "%1%1o")

	text = gsub(text, '([' .. consonants .. '])' .. tashdid .. zer .. ye .. jazm .. alif, "%1%1īyā")
	text = gsub(text, zer .. ye .. jazm .. alif, "īyā")
	text = gsub(text, zer .. ye .. alif, "iyā")
	
	--- ye
	text = gsub(text, ye .. zabar .. alif, "yā")
	text = gsub(text, '([' .. consonants .. zer .. '])' .. ye, "%1ī")
	
	text = gsub(text, "ۂ", "a-e")

	text = gsub(text, '.', mapping)

	
	-- Changed these to 'iy(*)', because they will be used for with ی, which are normally written as 'iy'
	text = gsub(text, 'īā', "iyā")
	text = gsub(text, 'īa', "iya")

	text = gsub(text, 'aa', "ā")
	--
	
    text = gsub(text, 'ئy', "'ī") 
    text = gsub(text, "" .. 'ئے', "'ye")
    text = gsub(text, "īے", "iye")
    text = gsub(text, "iīe", "iye")
    text = gsub(text, "īe", "iye")
    text = gsub(text, "iīv", "iyo")
    text = gsub(text, "ئiy", "'ī")

	return text
end
return export