Modul:fa-ira-translit

Notes

THIS MODULE SHOULD NOT BE USED FOR CLASSICAL PERSIAN OR DARI Due to pronunciation differences between modern Iranian Persian from other varieties of Persian, as well ans differences in vowel notation, Iranian Persian cannot be transliterated the same way as other varieties of Persian. If you need to transliterate a variety of Persian other than modern Iranian Persian, use Module:fa-cls-translit.
This module uses Module:fa-cls-translit as a backend.
Test cases

Lua error Xetay pele package.lua dı rêza 80 de ya: module 'Module:string/nowiki' not found.
local U = mw.ustring.char
local gsub = mw.ustring.gsub
local export = {}

local fatHatan = U(0x64B)
local zabar = U(0x64E)
local zer = U(0x650)
local pesh = U(0x64F)
local tashdid = U(0x651) -- also called shadda
local jazm = "ْ"
local he = "ه"
local zwnj = U(0x200C)
local highhmz = U(0x654)
local alef_wasla = "ٱ"

-- this module is also used by balti, some characters are only included for balti

local consonants = "بپتٹثجڃچڇحخدڈذرڑزڗژسشݜصضطظغفقککٔگلمݨنݩھه"
local consonants2 = "بپتٹثجڃچڇحخدڈذرڑزڗژسشݜصضطظغفقککٔگلمݨنݩوؤهھیئ"
local vowels = "âایئوؤ" --â is counted because of alif conversions
local hes = "هح"
local diacritics = "َُِّْٰ"
local ZZP = "َُِ"

local mapping = {
	["آ"] = 'â', ["ب"] = 'b', ["پ"] = 'p', ["ت"] = 't', ["ث"] = 's',
	["ج"] = 'j', ["چ"] = 'č', ["ح"] = 'h', ["خ"] = 'x', 
	["د"] = 'd', ["ذ"] = 'z', ["ر"] = 'r', ["ز"] = 'z', ["ژ"] = 'ž',
	["س"] = 's', ["ش"] = 'š', ["ص"] = 's', ["ض"] = 'z', 
	["ط"] = 't', ["ظ"] = 'z', ["غ"] = 'ğ', ["ف"] = 'f', ["ق"] = 'q',
	["ک"] = 'k', ["گ"] = 'g',
	["ل"] = 'l', ["م"] = 'm', ["ن"] = 'n', ["و"] = 'u', ["ہ"] = 'h', ["ی"] = 'i', ["۔"] = ".",

	["ه"] = "h",
	
	["ع"] = "'",
	["ء"] = "'",
	["ئ"] = "'", 
	["ؤ"] = "'",
	["أ"] = "'",
	
	-- diacritics
	[zabar] = "a",
	[zer] = "e",
	[pesh] = "o",
	[jazm] = "", -- also sukun - no vowel
	[zwnj] = "-", -- ZWNJ (zero-width non-joiner)
	[highhmz] = "-ye",
	
	-- ligatures
	["ﻻ"] = "lâ",
	["ﷲ"] = "allâh",
	
	-- kashida
	["ـ"] = "-", -- kashida, no sound
	
	-- alef_wasla
	[alef_wasla] = "",	-- nothing
	
	-- numerals
	["۱"] = "1", ["۲"] = "2", ["۳"] = "3", ["۴"] = "4", ["۵"] = "5",
	["۶"] = "6", ["۷"] = "7", ["۸"] = "8", ["۹"] = "9", ["۰"] = "0",
	
	-- punctuation (leave on separate lines)
	["؟"] = "?", -- question mark
	["،"] = ",", -- comma
	["؛"] = ";", -- semicolon
	["«"] = '“', -- quotation mark
	["»"] = '”', -- quotation mark
	["٪"] = "%", -- percent
	["؉"] = "‰", -- per mille
	["٫"] = ".", -- decimals
	["٬"] = ",", -- thousand
	
	-- Balti characters (NOT FOR PERSIAN!!)
	-- cant do anything about ژ because it conflicts with persian
	["ڃ"]= "ž",
	["ٹ"] = "ṭ",
	["ڇ"] = "č̣",
	["ڈ"] = "ḍ",
	["ڑ"] = "ṛ",
	["ڗ"] = "dz",
	["ݜ"] = "ṣ",
	["کٔ"] = "ǩ",
	["ݨ"] = "ng",
	["ݩ"] = "ny",
	["ھ"] = "h",
	["ے"] = "e",
}
 
local ain = 'ع'
local alif = 'ا'
local ye = 'ی'
local ye2 = 'ئ'
local ye3 = 'ے' -- for balti
local vao = "و"

function export.tr(text, lang, sc)
	
	--define the "end" of a word
	text = gsub(text, "#", "HASHTAG")
	text = gsub(text, " | ", "# | #")
	text = gsub(text, "\n" , "#".."\n" .. "#")
	text = "##" .. gsub(text, " ", "# #") .. "##"
	text = gsub(text, zwnj, "#"..zwnj.."#")
	text = gsub(text, highhmz, "#"..highhmz.."#")
	
	--exceptions
	text = gsub(text, 'ہ', "ه")-- get rid of balti he (allows balti to transliterate)
	--text = gsub(text, 'ىٰ', "â") -- the first letter is U+0649 (Arabic alif maqṣūra), it doesn't belong here
	text = gsub(text, 'یٰ', "â") -- the first letter is U+06CC (Farsi ye)
	text = gsub(text, 'ٰ', "â")
	text = gsub(text, 'ا' .. fatHatan, "an")
	text = gsub(text, 'الله', "ﷲ")
	text = gsub(text, 'لا', "ﻻ")
	-- remove silent vaav
	text = gsub(text, 'خوا', "خا")
	text = gsub(text, 'خوی', "خی")
	text = gsub(text, 'خوَ', "خُ")
	text = gsub(text, '#'..vao..'#', "#o#") 
	text = gsub(text, "(["..diacritics..ZZP.."])" .. ye3, "%1"..ye.."") 
	text = gsub(text, zabar .. jazm, "-") -- invisible ZWNJ
	
	-- Tashdeed
	text = gsub(text, '([' .. consonants2 .. '])' .. tashdid, "%1%1")
	text = gsub(text, '([' .. consonants2 .. '])' .. tashdid .. '([' .. ZZP .. '])', "%1%1%2")
	text = gsub(text, '([' .. ZZP .. '])' .. ye .. '([' .. ZZP .. '])' .. tashdid, "%1yy%2")
	text = gsub(text, '([' .. ZZP .. '])' .. vao .. '([' .. ZZP .. '])' .. tashdid, "%1vv%2")
	-- For some reason the tashdeed gets pushed after the other diacritics, so this line is necessary for tashdeed to work with other diacritics
	text = gsub(text, '([' .. consonants .. '])' .. '([' .. ZZP .. '])' .. tashdid, "%1%1%2")
	

    --initial alif
    text = gsub(text, "(["..consonants2.."])" .. alif, "%1â") 
    --alifs paired to a consonant are a vowel
    text = gsub(text, jazm .. alif, "-") -- invisible ZWNJ
    text = gsub(text, jazm .. "آ", "-â") -- invisible ZWNJ
    text = gsub(text, "(["..consonants2.."])" .. "آ", "%1'â") 
    text = gsub(text, "(["..diacritics.."])" .. alif, "%1")
    text = gsub(text, "(["..ZZP.."])" .. alif, "%1")
    --alifs not paired to a consonant are a glottal stop (not shown currently)
    text = gsub(text, alif.."(["..diacritics.."])".. "(["..consonants2.."])", "%1%2")
    text = gsub(text, alif..ye, "i")
    text = gsub(text, alif..vao, "u")
    text = gsub(text, alif.. "(["..consonants2.."])", "a%1") 
    
    -- convert semi vowels
    -- conversions for vaav/vaw/waaw
    text = gsub(text, pesh.. vao, "ow")
    text = gsub(text, vao.. "(["..diacritics..ZZP.."])", "v%1")
    text = gsub(text, "(["..diacritics..ZZP.."])" .. vao, "%1v")
    text = gsub(text, "(["..vowels.."])" .. vao, "%1v")
    text = gsub(text, vao .. "(["..vowels.."]+"..jazm..")", "u%1")
    text = gsub(text, vao .. "(["..vowels.."])", "v%1")
    -- conversions for ye
    text = gsub(text, ye .. "(["..diacritics..ZZP.."])", "y%1")
    text = gsub(text, "(["..diacritics..ZZP.."])" .. ye , "%1y")
    text = gsub(text, "(["..vowels.."])" .. ye, "%1y")
    text = gsub(text, ye .. "(["..vowels.."]+"..jazm..")", "i%1")
    text = gsub(text, ye .. "(["..vowels.."])", "y%1")
    
    -- final he and izafa/ezafe
    text = gsub(text, "y" .. zer .. "#", "i-ye#")
    text = gsub(text, zer .. "#", "-e#")
    text = gsub(text, "(["..consonants2.."])" .. he .. "#", "%1e#")
    text = gsub(text, zer .. he .. "#" .. zwnj, "e-")
    text = gsub(text, zer .. he .. "#", "e#")
    
    -- get rid of hashtags (not needed)
    text = gsub(text, "#", "")
    text = gsub(text, "HASHTAG", "#")
    -- convert all characters
    text = mw.ustring.gsub(text, '.', mapping)
    

	return text
end
 
return export