မာတိကာသို့ ခုန်သွားရန်

မော်ဂျူး:headword/data/sandbox

ဝစ်ရှင်နရီ မှ

Documentation for this module may be created at မော်ဂျူး:headword/data/sandbox/doc

local headword_page_module = "Module:headword/page/sandbox"

local list_to_set = require("Module:table").listToSet

local data = {}

------ 1. Lists which are converted into sets. ------

-- Zero-plurals (i.e. invariable plurals).
local irregular_plurals = list_to_set({
	"cmavo",
	"cmene",
	"fu'ivla",
	"gismu",
	"Han tu",
	"hanja",
	"hanzi",
	"jyutping",
	"kana",
	"kanji",
	"lujvo",
	"phrasebook",
	"pinyin",
	"rafsi",
}, function(_, item)
	return item
end)

-- Irregular non-zero plurals AND any regular plurals where the singular ends in "s",
-- because the module assumes that inputs ending in "s" are plurals. The singular and
-- plural both need to be added, as the module will generate a default plural if
-- the input doesn't match a key in this table.
for sg, pl in next, {
	mora = "morae"
} do
	irregular_plurals[sg], irregular_plurals[pl] = pl, pl
end

data.irregular_plurals = irregular_plurals

data.lemmas = list_to_set{
	"အကျဉ်းချုံးများ", "အကျဉ်းချုံး", -- abbreviations
	"အတိုကောက်များ", "အတိုကောက်",  -- acronyms
	"နာမဝိသေသနများ", "နာမဝိသေသန", -- adjectives
	"adnominals",
	"adpositions", --adpositions
	"ကြိယာဝိသေသနများ", "ကြိယာဝိသေသန", -- adverbs
	"အဆက်များ", "အဆက်", -- affixes
	"ambipositions",
	"အညွှန်းစကားလုံးများ", "အညွှန်းစကားလုံး", -- articles
	"circumfixes",
	"circumpositions",
	"ရေတွက်ပုံများ", "ရေတွက်ပုံ", --classifiers
	"cmavo",
	"cmavo clusters",
	"cmene",
	"ပေါင်းစပ်ပုဒ်များ", "ပေါင်းစပ်ပုဒ်", -- combining forms
	"သမ္ဗန္ဓများ", "သမ္ဗန္ဓ", -- conjunctions
	"counters",
	"ဝါစင်္ဂများ", "ဝါစင်္ဂ", -- determiners
	"အသံပြောင်းအမှတ်များ", "အသံပြောင်းအမှတ်", --diacritical marks
	"ဗျည်းတွဲများ", "ဗျည်းတွဲ", --digraphs
	"တူညီနှိုင်းယှဉ်နာမဝိသေသနများ", "တူညီနှိုင်းယှဉ်နာမဝိသေသန", --equative adjectives
	"fu'ivla",
	"gismu",
	"ဟန် အက္ခရာများ", "ဟန် အက္ခရာ", -- Han characters
	"ဟန်တု", -- Han tu
	"ဟန်ဂျာ", -- hanja
	"ဟန်ဇီ", -- hanzi
	"သံပြိုင်ပုဒ်များ", "သံပြိုင်ပုဒ်", --ideophones
	"အီဒီယမ်များ", "အီဒီယမ်", -- idioms
	"infixes",
	"အက္ခရာအတိုကောက်များ", "အက္ခရာအတိုကောက်", --initialisms
	"ထပ်ဆင့်အမှတ်အသားများ", "ထပ်ဆင့်အမှတ်အသား", --iteration marks
	"ပဒ်ဆက်ပစ္စည်းများ", "ပဒ်ဆက်ပစ္စည်း", --interfixes
	"အာမေဍိတ်များ", "အာမေဍိတ်", -- interjections
	"ခန", --kana
	"ခန်းဂျီး", -- kanji
	"စာလုံးများ", "စာလုံး", -- letters
	"ligatures",
	"လိုလိုဂရမ်များ", "လိုလိုဂရမ်", -- logograms
	"lujvo",
	"morae",
	"ရုပ်ရင်းများ", "ရုပ်ရင်း", -- morphemes
	"non-constituents",
	"နာမ်များ", "နာမ်", -- nouns
	"နံပါတ်များ", "နံပါတ်", -- numbers
	"ကိန်းဂဏန်း သင်္ကေတများ", "ကိန်းဂဏန်း သင်္ကေတ", -- numeral symbols
	"ဂဏန်းခြေများ", "ဂဏန်းခြေ", -- numerals
	"ပစ္စည်းများ", "ပစ္စည်း", -- particles
	"ပုဒ်စုများ", "ပုဒ်စု", --  phrases
	"နာမ်ဝိဘတ်များ", "နာမ်ဝိဘတ်", --postpositions
	"နာမ်ဝိဘတ်ပါသော ပုဒ်စုများ", "နာမ်ဝိဘတ်ပါသော ပုဒ်စု", -- postpositional phrases
	"ကြိယာနောက်လိုက် နာမဝိသေသနများ", "ကြိယာနောက်လိုက် နာမဝိသေသန", -- predicatives
	"ရှေ့ဆက်များ", "ရှေ့ဆက်", -- prefixes
	"ဝိဘတ် စကားစုများ", "ဝိဘတ် စကားစု", -- prepositional phrases
	"ဝိဘတ်များ", "ဝိဘတ်", -- prepositions
	"ကြိယာပုဒ်ရှေ့ဆက်ပစ္စည်းများ", "ကြိယာပုဒ်ရှေ့ဆက်ပစ္စည်း", --preverbs
	"နာမ်စားရင်းခံနာမဝိသေသနများ", "နာမ်စားရင်းခံနာမဝိသေသန", --pronominal adverbs
	"နာမ်စားများ", "နာမ်စား", -- pronouns
	"တစ်ဦးဆိုင်နာမ်များ", "တစ်ဦးဆိုင်နာမ်", -- proper nouns
	"စကားပုံများ", "စကားပုံ", -- proverbs
	"ပုဒ်ဖြတ်ပုဒ်ရပ်သင်္ကေတများ", "ပုဒ်ဖြတ်ပုဒ်ရပ်သင်္ကေတ", -- punctuation marks
	"အညွှန်းနာမ်စားများ", "အညွှန်းနာမ်စား", -- relatives
	"roots",
	"stems",
	"နောက်ဆက်များ", "နောက်ဆက်", -- suffixes
	"ဝဏ္ဏများ", "ဝဏ္ဏ",  -- syllables
	"သင်္ကေတများ", "သင်္ကေတ", -- symbols
	"ကြိယာများ", "ကြိယာ", -- verbs
}

data.nonlemmas = list_to_set{
	"active participle forms",
	"active participles",
	"adjectival participles",
    "adjective case forms",
	"နာမဝိသေသနပုဒ်များ", "နာမဝိသေသနပုဒ်", -- adjective forms
	"နာမဝိသေသန ဣတ္ထိလိင်ပုဒ်များ", "နာမဝိသေသန ဣတ္ထိလိင်ပုဒ်", -- adjective feminine forms
	"နာမဝိသေသန ဗဟုဝုစ်ပုဒ်များ", "နာမဝိသေသန ဗဟုဝုစ်ပုဒ်", -- adjective plural forms
	"ကြိယာဝိသေသနပုဒ်များ", "ကြိယာဝိသေသနပုဒ်", -- adverb forms
	"adverbial participles",
	"agent participles",
	"အညွှန်းပုဒ်များ", "အညွှန်းပုဒ်", -- article forms
	"circumfix forms",
	"ပေါင်းစပ်ထားသောပုဒ်များ", "ပေါင်းစပ်ထားသောပုဒ်", -- combined forms
	"နှိုင်းယှဉ်နိုင်သော နာမဝိသေသနပုဒ်များ", "နှိုင်းယှဉ်နိုင်သော နာမဝိသေသနပုဒ်", -- comparative adjective forms
	"နှိုင်းယှဉ်နိုင်သော နာမဝိသေသနများ", "နှိုင်းယှဉ်နိုင်သော နာမဝိသေသန", -- comparative adjectives
	"နှိုင်းယှဉ်နိုင်သော ကြိယာဝိသေသနပုဒ်များ", "နှိုင်းယှဉ်နိုင်သော ကြိယာဝိသေသနပုဒ်", -- comparative adverb forms
	"နှိုင်းယှဉ်နိုင်သော ကြိယာဝိသေသနများ", "နှိုင်းယှဉ်နိုင်သော ကြိယာဝိသေသန", -- comparative adverbs
	"သမ္ဗန္ဓပုဒ်များ", "သမ္ဗန္ဓပုဒ်", -- conjunction forms
	"အတိုချုံ့စကားလုံးများ", "အတိုချုံ့စကားလုံး", -- contractions
	"converbs",
	"determiner comparative forms",
	"ဝါစင်္ဂပုဒ်များ", "ဝါစင်္ဂပုဒ်", -- determiner forms
	"determiner superlative forms",
	"diminutive nouns",
	"elative adjectives",
	"equative adjective forms",
	"equative adjectives",
	"future participles",
	"gerunds",
	"infinitive forms",
	"infinitives",
	"interjection forms",
	"jyutping",
	"စာလုံးပေါင်းအမှားများ", "စာလုံးပေါင်းအမှား", --misspellings
	"negative participles",
	"nominal participles",
	"noun case forms",
	"noun construct forms",
	"noun dual forms",
	"နာမ်ပုဒ်များ", "နာမ်ပုဒ်", -- noun forms
	"noun paucal forms",
	"နာမ် ဗဟုဝုစ်ပုဒ်များ", "နာမ် ဗဟုဝုစ်ပုဒ်", -- noun plural forms
	"noun possessive forms",
	"noun singulative forms",
	"ဂဏန်းခြေပုဒ်များ", "ဂဏန်းခြေပုဒ်", -- numeral forms
	"ကြိယာသဏ္ဌာန်များ", "ကြိယာသဏ္ဌာန်", -- participles
	"ကြိယာသဏ္ဌာန်ပုဒ်များ", "ကြိယာသဏ္ဌာန်ပုဒ်", -- participle forms
	"ပစ္စည်းပုဒ်များ", "ပစ္စည်းပုဒ်", -- particle forms
	"passive participles",
	"past active participles",
	"past adverbial participles",
	"အတိတ်ကာလပြ ကြိယာသဏ္ဌာန်များ", "အတိတ်ကာလပြ ကြိယာသဏ္ဌာန်", --past participles
	"အတိတ်ကာလပြ ကြိယာသဏ္ဌာန်ပုဒ်များ", "အတိတ်ကာလပြ ကြိယာသဏ္ဌာန်ပုဒ်", -- past participle forms
	"past passive participles",
	"perfect active participles",
	"perfect participles",
	"perfect passive participles",
	"pinyin",
	"ဗဟုဝုစ်များ", "ဗဟုဝုစ်", -- plurals
	"postposition forms",
	"ရှေ့ဆက်ပုဒ်များ", "ရှေ့ဆက်ပုဒ်", -- prefix forms
	"preposition contractions",
	"ဝိဘတ်ပုဒ်များ", "ဝိဘတ်ပုဒ်", -- preposition forms
	"ဝိဘတ်နာမ်စားများ", "ဝိဘတ်နာမ်စား", -- prepositional pronouns
	"present active participles",
	"present adverbial participles",
	"ဖြစ်ဆဲပြ ကြိယာသဏ္ဌာန်များ", "ဖြစ်ဆဲပြ ကြိယာသဏ္ဌာန်", -- present participles
	"present passive participles",
	"preverb forms",
	"နာမ်စားပုဒ်များ", "နာမ်စားပုဒ်", -- pronoun forms
	"နာမ်စားပိုင်ဆိုင်မှုပြပုဒ်များ", "နာမ်စားပိုင်ဆိုင်မှုပြပုဒ်", --pronoun possessive forms
	"တစ်ဦးဆိုင်နာမ်ပုဒ်များ", "တစ်ဦးဆိုင်နာမ်ပုဒ်", -- proper noun forms
	"တစ်ဦးဆိုင်နာမ် ဗဟုဝုစ်ပုဒ်များ", "တစ်ဦးဆိုင်နာမ် ဗဟုဝုစ်ပုဒ်", -- proper noun plural forms
	"rafsi",
	"ရောမအက္ခရာဖလှယ်ခြင်းများ", "ရောမအက္ခရာဖလှယ်ခြင်း", -- romanizations
	"root forms",
	"singulatives",
	"နောက်ဆက်ပုဒ်များ", "နောက်ဆက်ပုဒ်", -- suffix forms
	"အသာလွန်ဆုံး နာမဝိသေသနပုဒ်များ", "အသာလွန်ဆုံး နာမဝိသေသနပုဒ်", -- superlative adjective forms
	"အသာလွန်ဆုံး နာမဝိသေသနများ", "အသာလွန်ဆုံး နာမဝိသေသန", -- superlative adjectives
	"အသာလွန်ဆုံး ကြိယာဝိသေသနပုဒ်များ", "အသာလွန်ဆုံး ကြိယာဝိသေသနပုဒ်", -- superlative adverb forms
	"အသာလွန်ဆုံး ကြိယာဝိသေသနများ", "အသာလွန်ဆုံး ကြိယာဝိသေသန", -- superlative adverbs
	"ကြိယာပုဒ်များ", "ကြိယာပုဒ်", -- verb forms
	"ကြိယာနာမ်များ", "ကြိယာနာမ်", -- verbal nouns
}

-- These langauges will not have links to separate parts of the headword.
data.no_multiword_links = list_to_set{
	"zh",
}

-- These languages will not have "LANG multiword terms" categories added.
data.no_multiword_cat = list_to_set{
	-------- Languages without spaces between words (sometimes spaces between phrases) --------
	"blt", -- Tai Dam
	"ja", -- Japanese
	"khb", -- Lü
	"km", -- Khmer
	"lo", -- Lao
	"mnw", -- Mon
	"my", -- Burmese
	"nan", -- Min Nan (some words in Latin script; hyphens between syllables)
	"nan-hbl", -- Hokkien (some words in Latin script; hyphens between syllables)
	"nod", -- Northern Thai
	"ojp", -- Old Japanese
	"shn", -- Shan
	"sou", -- Southern Thai
	"tdd", -- Tai Nüa
	"th", -- Thai
	"tts", -- Isan
	"twh", -- Tai Dón
	"txg", -- Tangut
	"zh", -- Chinese (all varieties with Chinese characters)
	"zkt", -- Khitan

	-------- Languages with spaces between syllables --------
	"ahk", -- Akha
	"aou", -- A'ou
	"atb", -- Zaiwa
	"byk", -- Biao
	"cdy", -- Chadong
	--"duu", -- Drung; not sure
	--"hmx-pro", -- Proto-Hmong-Mien
	--"hnj", -- Green Hmong; not sure
	"huq", -- Tsat
	"ium", -- Iu Mien
	--"lis", -- Lisu; not sure
	"mtq", -- Muong
	--"mww", -- White Hmong; not sure
	"onb", -- Lingao
	--"sit-gkh", -- Gokhy; not sure
	--"swi", -- Sui; not sure
	"tbq-lol-pro", -- Proto-Loloish
	"tdh", -- Thulung
	"ukk", -- Muak Sa-aak
	"vi", -- Vietnamese
	"yig", -- Wusa Nasu
	"zng", -- Mang

	-------- Languages with ~ with surrounding spaces used to separate variants --------
	"mkh-ban-pro", -- Proto-Bahnaric
	"sit-pro", -- Proto-Sino-Tibetan; listed above

	-------- Other weirdnesses --------
	"mul", -- Translingual; gestures, Morse code, etc.
	"aot", -- Atong (India); bullet is a letter

	-------- All sign languages	--------
	"ads",
	"aed",
	"aen",
	"afg",
	"ase",
	"asf",
	"asp",
	"asq",
	"asw",
	"bfi",
	"bfk",
	"bog",
	"bqn",
	"bqy",
	"bvl",
	"bzs",
	"cds",
	"csc",
	"csd",
	"cse",
	"csf",
	"csg",
	"csl",
	"csn",
	"csq",
	"csr",
	"doq",
	"dse",
	"dsl",
	"ecs",
	"esl",
	"esn",
	"eso",
	"eth",
	"fcs",
	"fse",
	"fsl",
	"fss",
	"gds",
	"gse",
	"gsg",
	"gsm",
	"gss",
	"gus",
	"hab",
	"haf",
	"hds",
	"hks",
	"hos",
	"hps",
	"hsh",
	"hsl",
	"icl",
	"iks",
	"ils",
	"inl",
	"ins",
	"ise",
	"isg",
	"isr",
	"jcs",
	"jhs",
	"jls",
	"jos",
	"jsl",
	"jus",
	"kgi",
	"kvk",
	"lbs",
	"lls",
	"lsl",
	"lso",
	"lsp",
	"lst",
	"lsy",
	"lws",
	"mdl",
	"mfs",
	"mre",
	"msd",
	"msr",
	"mzc",
	"mzg",
	"mzy",
	"nbs",
	"ncs",
	"nsi",
	"nsl",
	"nsp",
	"nsr",
	"nzs",
	"okl",
	"pgz",
	"pks",
	"prl",
	"prz",
	"psc",
	"psd",
	"psg",
	"psl",
	"pso",
	"psp",
	"psr",
	"pys",
	"rms",
	"rsl",
	"rsm",
	"sdl",
	"sfb",
	"sfs",
	"sgg",
	"sgx",
	"slf",
	"sls",
	"sqk",
	"sqs",
	"ssp",
	"ssr",
	"svk",
	"swl",
	"syy",
	"tse",
	"tsm",
	"tsq",
	"tss",
	"tsy",
	"tza",
	"ugn",
	"ugy",
	"ukl",
	"uks",
	"vgt",
	"vsi",
	"vsl",
	"vsv",
	"xki",
	"xml",
	"xms",
	"ygs",
	"ysl",
	"zib",
	"zsl",
}

-- In these languages, the hyphen is not considered a word separator for the "multiword terms" category.
data.hyphen_not_multiword_sep = list_to_set{
	"akk", -- Akkadian; hyphens between syllables
	"akl", -- Aklanon; hyphens for mid-word glottal stops
	"ber-pro", -- Proto-Berber; morphemes separated by hyphens
	"ceb", -- Cebuano; hyphens for mid-word glottal stops
	"cnk", -- Khumi Chin; hyphens used in single words
	"cpi", -- Chinese Pidgin English; Chinese-derived words with hyphens between syllables
	"de", -- too many false positives
	"esx-esk-pro", -- hyphen used to separate morphemes
	"fi", -- Finnish; hyphen used to separate components in compound words if the final and initial vowels match, respectively
	"hil", -- Hiligaynon; hyphens for mid-word glottal stops
	"hnn", -- Hanunoo; too many false positives
	"ilo", -- Ilocano; hyphens for mid-word glottal stops
	"kne", -- Kankanaey; hyphens for mid-word glottal stops
	"lcp", -- Western Lawa; dash as syllable joiner
	"lwl", -- Eastern Lawa; dash as syllable joiner
	"mfa", -- Pattani Malay in Thai script; dash as syllable joiner
	"mkh-vie-pro", -- Proto-Vietic; morphemes separated by hyphens
	"msb", -- Masbatenyo; too many false positives
	"tl", -- Tagalog; too many false positives
	"war", -- Waray-Waray; too many false positives
	"yo", -- Yoruba; hyphens used to show lengthened nasal vowels
}

-- These languages will not have "LANG masculine nouns" and similar categories added.
data.no_gender_cat = list_to_set{
	-- Languages without gender but which use the gender field for other purposes
	"ja",
	"th",
}

data.notranslit = list_to_set{
	"ams",
	"az",
	"bbc",
	"bug",
	"cdo",
	"cia",
	"cjm",
	"cjy",
	"cmn",
	"cnp",
	"cpi",
	"cpx",
	"csp",
	"czh",
	"czo",
	"gan",
	"hak",
	"hnm",
	"hsn",
	"ja",
	"kzg",
	"lad",
	"ltc",
	"luh",
	"lzh",
	"mnp",
	"ms",
	"mul",
	"mvi",
	"nan",
	"nan-dat",
	"nan-hbl",
	"nan-hlh",
	"nan-lnx",
	"nan-tws",
	"nan-zhe",
	"nan-zsh",
	"och",
	"oj",
	"okn",
	"ryn",
	"rys",
	"ryu",
	"sh",
	"sjc",
	"tgt",
	"th",
	"tkn",
	"tly",
	"txg",
	"und",
	"vi",
	"wuu",
	"xug",
	"yoi",
	"yox",
	"yue",
	"za",
	"zh",
	"zhx-sic",
	"zhx-tai",
}

-- Script codes for which a script-tagged display title will be added.
data.toBeTagged = list_to_set{
	"Ahom",
	"Arab",
		"fa-Arab",
		"glk-Arab",
		"kk-Arab",
		"ks-Arab",
		"ku-Arab",
		"mzn-Arab",
		"ms-Arab",
		"ota-Arab",
		"pa-Arab",
		"ps-Arab",
		"sd-Arab",
		"tt-Arab",
		"ug-Arab",
		"ur-Arab",
	"Armi",
	"Armn",
	"Avst",
	"Bali",
	"Bamu",
	"Batk",
	"Beng",
		"as-Beng",
	"Bopo",
	"Brah",
	"Brai",
	"Bugi",
	"Buhd",
	"Cakm",
	"Cans",
	"Cari",
	"Cham",
	"Cher",
	"Copt",
	"Cprt",
	"Cyrl",
	"Cyrs",
	"Deva",
	"Dsrt",
	"Egyd",
	"Egyp",
	"Ethi",
	"Geok",
	"Geor",
	"Glag",
	"Goth",
	"Grek",
		"Polyt",
		"polytonic",
	"Gujr",
	"Guru",
	"Hang",
	"Hani",
	"Hano",
	"Hebr",
	"Hira",
	"Hluw",
	"Ital",
	"Java",
	"Kali",
	"Kana",
	"Khar",
	"Khmr",
	"Knda",
	"Kthi",
	"Lana",
	"Laoo",
	"Latn",
		"Latf",
		"Latg",
		"Latnx",
		"Latinx",
		"pjt-Latn",
	"Lepc",
	"Limb",
	"Linb",
	"Lisu",
	"Lyci",
	"Lydi",
	"Mand",
	"Mani",
	"Marc",
	"Merc",
	"Mero",
	"Mlym",
	"Mong",
		"mnc-Mong",
		"sjo-Mong",
		"xwo-Mong",
	"Mtei",
	"Mymr",
	"Narb",
	"Nkoo",
	"Nshu",
	"Ogam",
	"Olck",
	"Orkh",
	"Orya",
	"Osma",
	"Ougr",
	"Palm",
	"Phag",
	"Phli",
	"Phlv",
	"Phnx",
	"Plrd",
	"Prti",
	"Rjng",
	"Runr",
	"Samr",
	"Sarb",
	"Saur",
	"Sgnw",
	"Shaw",
	"Shrd",
	"Sinh",
	"Sora",
	"Sund",
	"Sylo",
	"Syrc",
	"Tagb",
	"Tale",
	"Talu",
	"Taml",
	"Tang",
	"Tavt",
	"Telu",
	"Tfng",
	"Tglg",
	"Thaa",
	"Thai",
	"Tibt",
	"Ugar",
	"Vaii",
	"Xpeo",
	"Xsux",
	"Yiii",
	"Zmth",
	"Zsym",

	"Ipach",
	"Music",
	"Rumin",
}

-- Parts of speech which will not be categorised in categories like "English terms spelled with É" if
-- the term is the character in question (e.g. the letter entry for English [[é]]). This contrasts with
-- entries like the French adjective [[m̂]], which is a one-letter word spelled with the letter.
data.pos_not_spelled_with_self = list_to_set{
	"diacritical marks",
	"Han characters",
	"Han tu",
	"hanja",
	"hanzi",
	"iteration marks",
	"kana",
	"kanji",
	"letters",
	"ligatures",
	"logograms",
	"morae",
	"numeral symbols",
	"numerals",
	"punctuation marks",
	"syllables",
	"symbols",
}

------ 2. Lists not converted into sets. ------

-- Recognized aliases for parts of speech (param 2=). Key is the short form and value is the canonical singular (not
-- pluralized) form. It is singular so that the same table can be used in [[Module:form of]] for the p=/POS= param
-- and [[Module:links]] for the pos= param.
data.pos_aliases = {
	a = "နာမဝိသေသန",
	adj = "နာမဝိသေသန",
	adv = "ကြိယာဝိသေသန",
	art = "အညွှန်းစကားလုံး",
	det = "ဝါစင်္ဂ",
	compadj = "comparative adjective",
	compadv = "comparative adverb",
	conj = "သမ္ဗန္ဓ",
	conv = "converb",
	int = "အာမေဍိတ်",
	interj = "အာမေဍိတ်",
	intj = "အာမေဍိတ်",
	n = "နာမ်",
	-- the next two support Algonquian languages; see also vii/vai/vti/vta below
	na = "animate noun",
	ni = "inanimate noun",
	num = "ဂဏန်းခြေ",
	part = "participle",
	pcl = "ပစ္စည်း",
	phr = "စကားစု",
	pn = "တစ်ဦးဆိုင်နာမ်",
	postp = "postposition",
	pref = "prefix",
	prep = "ဝိဘတ်",
	pron = "နာမ်စား",
	prop = "တစ်ဦးဆိုင်နာမ်",
	proper = "တစ်ဦးဆိုင်နာမ်",
	propn = "တစ်ဦးဆိုင်နာမ်",
	rom = "ရောမအက္ခရာဖလှယ်ခြင်း",
	suf = "နောက်ဆက်",
	supadj = "အသာလွန်ဆုံး နာမဝိသေသန",
	supadv = "အသာလွန်ဆုံး ကြိယာဝိသေသန",
	v = "ကြိယာ",
	vb = "ကြိယာ",
	vi = "ကံမလို ကြိယာ",
	vt = "ကံလို ကြိယာများ",
	-- the next four support Algonquian languages
	vii = "inanimate intransitive verb",
	vai = "animate intransitive verb",
	vti = "ကံလိုနှင့် ကံမလို ကြိယာများ",
	vta = "transitive animate verb",
}

-- Parts of speech for which categories like "German masculine nouns" or "Russian imperfective verbs"
-- will be generated if the headword is of the appropriate gender/number.
data.pos_for_gender_number_cat = {
	["နာမ်"] = "နာမ်",
	["တစ်ဦးဆိုင်နာမ်"] = "နာမ်",
	["နောက်ဆက်"] = "နောက်ဆက်",
	-- We include verbs because impf and pf are valid "genders".
	["ကြိယာ"] = "ကြိယာ",
}

------ 3. Page-wide processing (so that it only needs to be done once per page). ------
data.page = require(headword_page_module).process_page()
-- Fuckme, random references to data.pagename and data.encoded_pagename are scattered throughout the codebase. FIXME!
data.pagename = data.page.pagename
data.encoded_pagename = data.page.encoded_pagename

return data