python-ox/oxutils/lang.py

244 lines
7.6 KiB
Python
Raw Normal View History

2008-06-01 13:11:39 +00:00
# -*- coding: utf-8 -*-
2008-06-19 09:21:21 +00:00
# vi:si:et:sw=4:sts=4:ts=4
2008-06-01 13:11:39 +00:00
_iso639_languages = [
2008-06-19 09:21:21 +00:00
("Unknown", "", "", "und"),
("Afar", "", "aa", "aar"),
("Abkhazian", "", "ab", "abk"),
("Afrikaans", "", "af", "afr"),
("Akan", "", "ak", "aka"),
("Albanian", "", "sq", "sqi"),
("Amharic", "", "am", "amh"),
("Arabic", "", "ar", "ara"),
("Aragonese", "", "an", "arg"),
("Armenian", "", "hy", "hye"),
("Assamese", "", "as", "asm"),
("Avaric", "", "av", "ava"),
("Avestan", "", "ae", "ave"),
("Aymara", "", "ay", "aym"),
("Azerbaijani", "", "az", "aze"),
("Bashkir", "", "ba", "bak"),
("Bambara", "", "bm", "bam"),
("Basque", "", "eu", "eus"),
("Belarusian", "", "be", "bel"),
("Bengali", "", "bn", "ben"),
("Bihari", "", "bh", "bih"),
("Bislama", "", "bi", "bis"),
("Bosnian", "", "bs", "bos"),
("Breton", "", "br", "bre"),
("Bulgarian", "", "bg", "bul"),
("Burmese", "", "my", "mya"),
("Catalan", "", "ca", "cat"),
("Chamorro", "", "ch", "cha"),
("Chechen", "", "ce", "che"),
("Chinese", "", "zh", "zho"),
("Church Slavic", "", "cu", "chu"),
("Chuvash", "", "cv", "chv"),
("Cornish", "", "kw", "cor"),
("Corsican", "", "co", "cos"),
("Cree", "", "cr", "cre"),
("Czech", "", "cs", "ces"),
("Danish", "Dansk", "da", "dan"),
("Divehi", "", "dv", "div"),
("Dutch", "Nederlands", "nl", "nld"),
("Dzongkha", "", "dz", "dzo"),
("English", "English", "en", "eng"),
("Esperanto", "", "eo", "epo"),
("Estonian", "", "et", "est"),
("Ewe", "", "ee", "ewe"),
("Faroese", "", "fo", "fao"),
("Fijian", "", "fj", "fij"),
("Finnish", "Suomi", "fi", "fin"),
("French", "Francais", "fr", "fra"),
("Western Frisian", "", "fy", "fry"),
("Fulah", "", "ff", "ful"),
("Georgian", "", "ka", "kat"),
("German", "Deutsch", "de", "deu"),
("Gaelic (Scots)", "", "gd", "gla"),
("Irish", "", "ga", "gle"),
("Galician", "", "gl", "glg"),
("Manx", "", "gv", "glv"),
("Greek, Modern", "", "el", "ell"),
("Guarani", "", "gn", "grn"),
("Gujarati", "", "gu", "guj"),
("Haitian", "", "ht", "hat"),
("Hausa", "", "ha", "hau"),
("Hebrew", "", "he", "heb"),
("Herero", "", "hz", "her"),
("Hindi", "", "hi", "hin"),
("Hiri Motu", "", "ho", "hmo"),
("Hungarian", "Magyar", "hu", "hun"),
("Igbo", "", "ig", "ibo"),
("Icelandic", "Islenska", "is", "isl"),
("Ido", "", "io", "ido"),
("Sichuan Yi", "", "ii", "iii"),
("Inuktitut", "", "iu", "iku"),
("Interlingue", "", "ie", "ile"),
("Interlingua", "", "ia", "ina"),
("Indonesian", "", "id", "ind"),
("Inupiaq", "", "ik", "ipk"),
("Italian", "Italiano", "it", "ita"),
("Javanese", "", "jv", "jav"),
("Japanese", "", "ja", "jpn"),
("Kalaallisut (Greenlandic)", "", "kl", "kal"),
("Kannada", "", "kn", "kan"),
("Kashmiri", "", "ks", "kas"),
("Kanuri", "", "kr", "kau"),
("Kazakh", "", "kk", "kaz"),
("Central Khmer", "", "km", "khm"),
("Kikuyu", "", "ki", "kik"),
("Kinyarwanda", "", "rw", "kin"),
("Kirghiz", "", "ky", "kir"),
("Komi", "", "kv", "kom"),
("Kongo", "", "kg", "kon"),
("Korean", "", "ko", "kor"),
("Kuanyama", "", "kj", "kua"),
("Kurdish", "", "ku", "kur"),
("Lao", "", "lo", "lao"),
("Latin", "", "la", "lat"),
("Latvian", "", "lv", "lav"),
("Limburgan", "", "li", "lim"),
("Lingala", "", "ln", "lin"),
("Lithuanian", "", "lt", "lit"),
("Luxembourgish", "", "lb", "ltz"),
("Luba-Katanga", "", "lu", "lub"),
("Ganda", "", "lg", "lug"),
("Macedonian", "", "mk", "mkd"),
("Marshallese", "", "mh", "mah"),
("Malayalam", "", "ml", "mal"),
("Maori", "", "mi", "mri"),
("Marathi", "", "mr", "mar"),
("Malay", "", "ms", "msa"),
("Malagasy", "", "mg", "mlg"),
("Maltese", "", "mt", "mlt"),
("Moldavian", "", "mo", "mol"),
("Mongolian", "", "mn", "mon"),
("Nauru", "", "na", "nau"),
("Navajo", "", "nv", "nav"),
("Ndebele, South", "", "nr", "nbl"),
("Ndebele, North", "", "nd", "nde"),
("Ndonga", "", "ng", "ndo"),
("Nepali", "", "ne", "nep"),
("Norwegian Nynorsk", "", "nn", "nno"),
("Norwegian Bokmål", "", "nb", "nob"),
("Norwegian", "Norsk", "no", "nor"),
("Chichewa; Nyanja", "", "ny", "nya"),
("Occitan (post 1500); Provençal", "", "oc", "oci"),
("Ojibwa", "", "oj", "oji"),
("Oriya", "", "or", "ori"),
("Oromo", "", "om", "orm"),
("Ossetian; Ossetic", "", "os", "oss"),
("Panjabi", "", "pa", "pan"),
("Persian", "", "fa", "fas"),
("Pali", "", "pi", "pli"),
("Polish", "", "pl", "pol"),
("Portuguese", "Portugues", "pt", "por"),
("Pushto", "", "ps", "pus"),
("Quechua", "", "qu", "que"),
("Romansh", "", "rm", "roh"),
("Romanian", "", "ro", "ron"),
("Rundi", "", "rn", "run"),
("Russian", "", "ru", "rus"),
("Sango", "", "sg", "sag"),
("Sanskrit", "", "sa", "san"),
("Serbian", "", "sr", "srp"),
("Croatian", "Hrvatski", "hr", "hrv"),
("Sinhala", "", "si", "sin"),
("Slovak", "", "sk", "slk"),
("Slovenian", "", "sl", "slv"),
("Northern Sami", "", "se", "sme"),
("Samoan", "", "sm", "smo"),
("Shona", "", "sn", "sna"),
("Sindhi", "", "sd", "snd"),
("Somali", "", "so", "som"),
("Sotho, Southern", "", "st", "sot"),
("Spanish", "Espanol", "es", "spa"),
("Sardinian", "", "sc", "srd"),
("Swati", "", "ss", "ssw"),
("Sundanese", "", "su", "sun"),
("Swahili", "", "sw", "swa"),
("Swedish", "Svenska", "sv", "swe"),
("Tahitian", "", "ty", "tah"),
("Tamil", "", "ta", "tam"),
("Tatar", "", "tt", "tat"),
("Telugu", "", "te", "tel"),
("Tajik", "", "tg", "tgk"),
("Tagalog", "", "tl", "tgl"),
("Thai", "", "th", "tha"),
("Tibetan", "", "bo", "bod"),
("Tigrinya", "", "ti", "tir"),
("Tonga (Tonga Islands)", "", "to", "ton"),
("Tswana", "", "tn", "tsn"),
("Tsonga", "", "ts", "tso"),
("Turkmen", "", "tk", "tuk"),
("Turkish", "", "tr", "tur"),
("Twi", "", "tw", "twi"),
("Uighur", "", "ug", "uig"),
("Ukrainian", "", "uk", "ukr"),
("Urdu", "", "ur", "urd"),
("Uzbek", "", "uz", "uzb"),
("Venda", "", "ve", "ven"),
("Vietnamese", "", "vi", "vie"),
("Volapük", "", "vo", "vol"),
("Welsh", "", "cy", "cym"),
("Walloon", "", "wa", "wln"),
("Wolof", "", "wo", "wol"),
("Xhosa", "", "xh", "xho"),
("Yiddish", "", "yi", "yid"),
("Yoruba", "", "yo", "yor"),
("Zhuang", "", "za", "zha"),
("Zulu", "", "zu", "zul"),
2008-06-01 13:11:39 +00:00
]
2008-06-02 13:04:56 +00:00
def codeToLang(code):
2008-06-19 09:21:21 +00:00
code = code.lower()
if len(code) == 2:
for l in _iso639_languages:
if l[2] == code:
return l[0]
elif len(code) == 3:
for l in _iso639_languages:
if l[3] == code:
return l[0]
return None
2008-06-01 13:11:39 +00:00
def langTo3Code(lang):
2008-06-19 09:21:21 +00:00
lang = englishName(lang)
if lang:
lang=lang.lower()
for l in _iso639_languages:
if l[0].lower() == lang:
return l[3]
return None
2008-06-01 13:11:39 +00:00
def langTo2Code(lang):
2008-06-19 09:21:21 +00:00
lang = englishName(lang)
if lang:
lang=lang.lower()
for l in _iso639_languages:
if l[0].lower() == lang:
return l[2]
return None
2008-06-01 13:11:39 +00:00
def langCode2To3(code):
2008-06-19 09:21:21 +00:00
langTo3Code(codeToLang(code))
2008-06-01 13:11:39 +00:00
def langCode3To2(code):
2008-06-19 09:21:21 +00:00
langTo2Code(codeToLang(code))
2008-06-01 13:11:39 +00:00
def englishName(lang):
2008-06-19 09:21:21 +00:00
lang = lang.lower()
for l in _iso639_languages:
if l[1].lower() == lang:
return l[0]
return None
2008-06-01 13:11:39 +00:00
def languages2Letter():
languages = []
for l in _iso639_languages:
if l[2]:
languages.append(l[2])
return languages