Module:grc-translit
- The following documentation is located at Module:grc-translit/documentation. [edit] Categories were auto-generated by Module:module categorization. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox (diff)
This module will transliterate Ancient Greek language text per WT:GRC TR. It is also used to transliterate Demotic, Greek, Paeonian, Old Ossetic, Oscan, Dacian, Alanic, Ancient Macedonian, and Phrygian.
The module should preferably not be called directly from templates or other modules.
To use it from a template, use {{xlit}}
.
Within a module, use Module:languages#Language:transliterate.
For testcases, see Module:grc-translit/testcases.
Functions
tr(text, lang, sc)
- Transliterates a given piece of
text
written in the script specified by the codesc
, and language specified by the codelang
. - When the transliteration fails, returns
nil
.
2 of 36 tests failed. (refresh)
Text | Expected | Actual | |
---|---|---|---|
λόγος | lógos | lógos | |
σφίγξ | sphínx | sphínx | |
ϝάναξ | wánax | wánax | |
οἷαι | hoîai | hoîai | |
current problems | |||
ΙΧΘΥΣ | IKHTHUS | IKhThUS | |
Υἱός | '''Hu'''iós | '''U'''hiós | |
u/y | |||
ταῦρος | taûros | taûros | |
νηῦς | nēûs | nēûs | |
σῦς | sûs | sûs | |
ὗς | hûs | hûs | |
γυῖον | guîon | guîon | |
ἀναῡ̈τέω | anaṻtéō | anaṻtéō | |
δαΐφρων | daḯphrōn | daḯphrōn | |
vowel length | |||
τῶν | tôn | tôn | |
τοὶ | toì | toì | |
τῷ | tôi | tôi | |
τούτῳ | toútōi | toútōi | |
σοφίᾳ | sophíāi | sophíāi | |
μᾱ̆νός | mānós | mānós | |
h (rough breathing) | |||
ὁ | ho | ho | |
οἱ | hoi | hoi | |
εὕρισκε | heúriske | heúriske | |
ὑϊκός | huïkós | huïkós | |
πυρρός | purrhós | purrhós | |
ῥέω | rhéō | rhéō | |
σάἁμον | sáhamon | sáhamon | |
capitals | |||
Ὀδυσσεύς | Odusseús | Odusseús | |
Εἵλως | Heílōs | Heílōs | |
ᾍδης | Hā́idēs | Hā́idēs | |
ἡ Ἑλήνη | hē Helḗnē | hē Helḗnē | |
punctuation | |||
ἔχεις μοι εἰπεῖν, ὦ Σώκρατες, ἆρα διδακτὸν ἡ ἀρετή; | ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ? | ékheis moi eipeîn, ô Sṓkrates, âra didaktòn hē aretḗ? | |
τί τηνικάδε ἀφῖξαι, ὦ Κρίτων; ἢ οὐ πρῲ ἔτι ἐστίν; | tí tēnikáde aphîxai, ô Krítōn? ḕ ou prṑi éti estín? | tí tēnikáde aphîxai, ô Krítōn? ḕ ou prṑi éti estín? | |
τούτων φωνήεντα μέν ἐστιν ἑπτά· α ε η ι ο υ ω. | toútōn phōnḗenta mén estin heptá; a e ē i o u ō. | toútōn phōnḗenta mén estin heptá; a e ē i o u ō. | |
πήγ(νῡμῐ) | pḗg(nūmi) | pḗg(nūmi) | |
HTML entities | |||
καλός καὶ ἀγαθός | kalós kaì agathós | kalós kaì agathós | |
καλός καὶ ἀγαθός | kalós kaì agathós | kalós kaì agathós |
local export = {}
local m_data = require("Module:grc-utilities/data")
local m_str_utils = require("Module:string utilities")
local tokenize = require('Module:grc-utilities').tokenize
local ufind = m_str_utils.find
local ugsub = m_str_utils.gsub
local U = m_str_utils.char
local ulower = m_str_utils.lower
local uupper = m_str_utils.upper
-- Diacritics
local diacritics = m_data.named
-- Greek
local acute = diacritics.acute
local grave = diacritics.grave
local circumflex = diacritics.circum
local diaeresis = diacritics.diaeresis
local smooth = diacritics.smooth
local rough = diacritics.rough
local macron = diacritics.macron
local breve = diacritics.breve
local subscript = diacritics.subscript
-- Latin
local hat = diacritics.Latin_circum
local macron_diaeresis = macron .. diaeresis .. "?" .. hat
local a_subscript = '^[αΑ].*' .. subscript .. '$'
local velar = 'κγχξ'
local tt = {
-- Vowels
["α"] = "a",
["ε"] = "e",
["η"] = "e" .. macron,
["ι"] = "i",
["ο"] = "o",
["υ"] = "u",
["ω"] = "o" .. macron,
-- Consonants
["β"] = "b",
["γ"] = "g",
["δ"] = "d",
["ζ"] = "z",
["θ"] = "th",
["κ"] = "k",
["λ"] = "l",
["μ"] = "m",
["ν"] = "n",
["ξ"] = "x",
["π"] = "p",
["ρ"] = "r",
["σ"] = "s",
["ς"] = "s",
["τ"] = "t",
["φ"] = "ph",
["χ"] = "kh",
["ψ"] = "ps",
-- Archaic letters
["ϛ"] = "st",
["ϝ"] = "w",
["ϻ"] = "ś",
["ϙ"] = "q",
["ϡ"] = "š",
["ͷ"] = "v",
-- Incorrect characters: see [[Wiktionary:About Ancient Greek#Miscellaneous]].
-- These are tracked by [[Module:script utilities]].
["ϐ"] = "b",
["ϑ"] = "th",
["ϰ"] = "k",
["ϱ"] = "r",
["ϲ"] = "s",
["ϕ"] = "ph",
-- Diacritics
-- unchanged: macron, diaeresis, grave, acute
[breve] = '',
[smooth] = '',
[rough] = '',
[circumflex] = hat,
[subscript] = 'i',
}
function export.tr(text, lang, sc)
if text == '῾' then
return 'h'
end
--[[
Replace semicolon or Greek question mark with regular question mark,
except after an ASCII alphanumeric character (to avoid converting
semicolons in HTML entities).
]]
text = ugsub(text, "([^A-Za-z0-9])[;" .. U(0x37E) .. "]", "%1?")
-- Handle the middle dot. It is equivalent to semicolon or colon, but semicolon is probably more common.
text = text:gsub("·", ";")
local tokens = tokenize(text)
--now read the tokens
local output = {}
for i, token in pairs(tokens) do
-- Convert token to lowercase and substitute each character
-- for its transliteration
local translit = ulower(token):gsub(".[\128-\191]*", tt)
local next_token = tokens[i + 1]
if token == 'γ' and next_token and velar:find(next_token, 1, true) then
-- γ before a velar should be <n>
translit = 'n'
elseif token == 'ρ' and tokens[i - 1] == 'ρ' then
-- ρ after ρ should be <rh>
translit = 'rh'
elseif ufind(token, a_subscript) then
-- add macron to ᾳ
translit = ugsub(translit, '([aA])', '%1' .. macron)
end
if token:find(rough) then
if ufind(token, '^[Ρρ]') then
translit = translit .. 'h'
else -- vowel
translit = 'h' .. translit
end
end
-- Remove macron from a vowel that has a circumflex.
if ufind(translit, macron_diaeresis) then
translit = translit:gsub(macron, '')
end
-- Capitalize first character of transliteration.
if token ~= ulower(token) then
translit = translit:gsub("^" .. ".[\128-\191]*", uupper)
end
table.insert(output, translit)
end
output = table.concat(output)
return output
end
return export