Pashtoxnx 2013
import re
def tokenize(s):
return re.findall(r'[\u0600-\u06FF]+|[A-Za-z0-9]+|[^\s]', s)
A — Software/library: "pashtoxnx 2013" (assumed CLI or Python package)
from pashtoxnx import PashtoProcessor
p = PashtoProcessor(model='nx2013') # load default pipeline
text = "زما نوم احمد دی."
tokens = p.tokenize(text)
translit = p.transliterate(text)
print(tokens)
print(translit)