mirror of
https://github.com/yacy/yacy_search_server.git
synced 2025-07-19 08:44:42 -04:00
Updated internal ISO 639-1 language codes with latest standards.
Includes 54 language code additions, some name modifications, and marking a few deprecated.
This commit is contained in:
@ -1,4 +1,4 @@
|
||||
// iso639.java
|
||||
// ISO639.java
|
||||
// (C) 2008 by Michael Peter Christen; mc@yacy.net, Frankfurt a. M., Germany
|
||||
// first published 19.09.2008 on http://yacy.net
|
||||
//
|
||||
@ -30,110 +30,172 @@ import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* Support for ISO 639 language codes.
|
||||
* @see <a href="https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes">Wikipedia list of ISO 639-1 codes</a>
|
||||
* @see <a href="http://www.loc.gov/standards/iso639-2/php/code_list.php">Language Code List from the ISO 639-2 Registration Authority (Library of Congress)</a>
|
||||
* @see <a href="http://www-01.sil.org/iso639-3/">Home page of the ISO 639-3 Registration Authority (SIL International)</a>
|
||||
* @see <a href="https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry">IANA language subtag registry</a>
|
||||
* @see <a href="http://www.loc.gov/standards/iso639-2/php/code_changes.php">Code Changes history from the ISO 639-2 Registration Authority</a>
|
||||
*/
|
||||
public class ISO639 {
|
||||
|
||||
/*
|
||||
* Note : using icu4j package classes such as com.ibm.icu.impl.LocaleIDs may be
|
||||
* considered to maintain a more up to date support of ISO 639 codes, notably to
|
||||
* support ISO 639 3 letters language codes.
|
||||
*/
|
||||
|
||||
/** ISO 639-1 language codes table : [two letters code] - [ISO Reference name] */
|
||||
private static final String[] codes = {
|
||||
"aa-Afar",
|
||||
"ab-Abkhazian",
|
||||
"ae-Avestan",
|
||||
"af-Afrikaans",
|
||||
"ak-Akan",
|
||||
"am-Amharic",
|
||||
"an-Aragonese",
|
||||
"ar-Arabic",
|
||||
"as-Assamese",
|
||||
"av-Avaric",
|
||||
"ay-Aymara",
|
||||
"az-Azerbaijani",
|
||||
"ba-Bashkir",
|
||||
"be-Byelorussian",
|
||||
"be-Belarusian",
|
||||
"bg-Bulgarian",
|
||||
"bh-Bihari",
|
||||
"bh-Bihari", // collective language code for bho-Bhojpuri, mag-Magahi, and mai-Maithili
|
||||
"bi-Bislama",
|
||||
"bn-Bengali;-Bangla",
|
||||
"bm-Bambara",
|
||||
"bn-Bengali",
|
||||
"bo-Tibetan",
|
||||
"br-Breton",
|
||||
"bs-Bosnian",
|
||||
"ca-Catalan",
|
||||
"ce-Chechen",
|
||||
"ch-Chamorro",
|
||||
"co-Corsican",
|
||||
"cr-Cree",
|
||||
"cs-Czech",
|
||||
"cu-Church Slavic",
|
||||
"cv-Chuvash",
|
||||
"cy-Welsh",
|
||||
"da-Danish",
|
||||
"de-German",
|
||||
"dz-Bhutani",
|
||||
"el-Greek",
|
||||
"dv-Dhivehi",
|
||||
"dz-Dzongkha",
|
||||
"ee-Ewe",
|
||||
"el-Modern Greek (1453-)",
|
||||
"en-English",
|
||||
"eo-Esperanto",
|
||||
"es-Spanish",
|
||||
"et-Estonian",
|
||||
"eu-Basque",
|
||||
"fa-Persian",
|
||||
"ff-Fulah",
|
||||
"fi-Finnish",
|
||||
"fj-Fiji",
|
||||
"fo-Faeroese",
|
||||
"fj-Fijian",
|
||||
"fo-Faroese",
|
||||
"fr-French",
|
||||
"fy-Frisian",
|
||||
"fy-Western Frisian",
|
||||
"ga-Irish",
|
||||
"gd-Scots-Gaelic",
|
||||
"gd-Scottish Gaelic",
|
||||
"gl-Galician",
|
||||
"gn-Guarani",
|
||||
"gu-Gujarati",
|
||||
"gv-Manx",
|
||||
"ha-Hausa",
|
||||
"he-Hebrew",
|
||||
"hi-Hindi",
|
||||
"ho-Hiri Motu",
|
||||
"hr-Croatian",
|
||||
"ht-Haitian",
|
||||
"hu-Hungarian",
|
||||
"hy-Armenian",
|
||||
"hz-Herero",
|
||||
"ia-Interlingua",
|
||||
"id-Indonesian",
|
||||
"ie-Interlingue",
|
||||
"ik-Inupiak",
|
||||
"in-Indonesian",
|
||||
"ig-Igbo",
|
||||
"ii-Sichuan Yi",
|
||||
"ik-Inupiaq",
|
||||
"in-Indonesian", // deprecated on 1989-03-11 in favor of id-Indonesian
|
||||
"io-Ido",
|
||||
"is-Icelandic",
|
||||
"it-Italian",
|
||||
"iw-Hebrew",
|
||||
"iu-Inuktitut",
|
||||
"iw-Hebrew", // deprecated on 1989-03-11 in favor of he-Hebrew
|
||||
"ja-Japanese",
|
||||
"ji-Yiddish",
|
||||
"jw-Javanese",
|
||||
"ji-Yiddish", // deprecated on 1989-03-11 in favor of yi-Yiddish
|
||||
"jv-Javanese",
|
||||
"ka-Georgian",
|
||||
"kg-Kongo",
|
||||
"ki-Kikuyu",
|
||||
"kj-Kuanyama",
|
||||
"kk-Kazakh",
|
||||
"kl-Greenlandic",
|
||||
"km-Cambodian",
|
||||
"kl-Kalaallisut; Greenlandic",
|
||||
"km-Central Khmer",
|
||||
"kn-Kannada",
|
||||
"ko-Korean",
|
||||
"kr-Kanuri",
|
||||
"ks-Kashmiri",
|
||||
"ku-Kurdish",
|
||||
"kv-Komi",
|
||||
"kw-Cornish",
|
||||
"ky-Kirghiz",
|
||||
"la-Latin",
|
||||
"lb-Luxembourgish",
|
||||
"lg-Ganda",
|
||||
"li-Limburgan",
|
||||
"ln-Lingala",
|
||||
"lo-Laothian",
|
||||
"lo-Lao",
|
||||
"lt-Lithuanian",
|
||||
"lv-Latvian,-Lettish",
|
||||
"lu-Luba-Katanga",
|
||||
"lv-Latvian",
|
||||
"mg-Malagasy",
|
||||
"mh-Marshallese",
|
||||
"mi-Maori",
|
||||
"mk-Macedonian",
|
||||
"ml-Malayalam",
|
||||
"mn-Mongolian",
|
||||
//"mo-Moldavian", // this maps on 'mozilla' :(
|
||||
//"mo-Moldavian", // this maps on 'mozilla' :( // deprecated on 2008-11-03 in favor of ro-Romanian to be used for the variant of the Romanian language also known as Moldavian
|
||||
"mr-Marathi",
|
||||
"ms-Malay",
|
||||
"mt-Maltese",
|
||||
"my-Burmese",
|
||||
"na-Nauru",
|
||||
"nb-Norwegian Bokmål",
|
||||
"nd-North Ndebele",
|
||||
"ne-Nepali",
|
||||
"ng-Ndonga",
|
||||
"nl-Dutch",
|
||||
"nn-Norwegian Nynorsk",
|
||||
"no-Norwegian",
|
||||
"oc-Occitan",
|
||||
"om-(Afan)-Oromo",
|
||||
"nr-South Ndebele",
|
||||
"nv-Navajo",
|
||||
"ny-Nyanja",
|
||||
"oc-Occitan (post 1500)",
|
||||
"oj-Ojibwa",
|
||||
"om-Oromo",
|
||||
"or-Oriya",
|
||||
"pa-Punjabi",
|
||||
"os-Ossetian",
|
||||
"pa-Panjabi; Punjabi",
|
||||
"pi-Pali",
|
||||
"pl-Polish",
|
||||
"ps-Pashto,-Pushto",
|
||||
"ps-Pushto; Pashto",
|
||||
"pt-Portuguese",
|
||||
"qu-Quechua",
|
||||
"rm-Rhaeto-Romance",
|
||||
"rn-Kirundi",
|
||||
"rm-Romansh",
|
||||
"rn-Rundi",
|
||||
"ro-Romanian",
|
||||
"ru-Russian",
|
||||
"rw-Kinyarwanda",
|
||||
"sa-Sanskrit",
|
||||
"sc-Sardinian",
|
||||
"sd-Sindhi",
|
||||
"sg-Sangro",
|
||||
"se-Northern Sami",
|
||||
"sg-Sango",
|
||||
"sh-Serbo-Croatian",
|
||||
"si-Singhalese",
|
||||
"si-Sinhala; Sinhalese",
|
||||
"sk-Slovak",
|
||||
"sl-Slovenian",
|
||||
"sm-Samoan",
|
||||
@ -141,35 +203,42 @@ public class ISO639 {
|
||||
"so-Somali",
|
||||
"sq-Albanian",
|
||||
"sr-Serbian",
|
||||
"ss-Siswati",
|
||||
"st-Sesotho",
|
||||
"ss-Swati",
|
||||
"st-Southern Sotho",
|
||||
"su-Sundanese",
|
||||
"sv-Swedish",
|
||||
"sw-Swahili",
|
||||
"ta-Tamil",
|
||||
"te-Tegulu",
|
||||
"te-Telugu",
|
||||
"tg-Tajik",
|
||||
"th-Thai",
|
||||
"ti-Tigrinya",
|
||||
"tk-Turkmen",
|
||||
"tl-Tagalog",
|
||||
"tn-Setswana",
|
||||
"to-Tonga",
|
||||
"tn-Tswana",
|
||||
"to-Tonga (Tonga Islands)",
|
||||
"tr-Turkish",
|
||||
"ts-Tsonga",
|
||||
"tt-Tatar",
|
||||
"tw-Twi",
|
||||
"ty-Tahitian",
|
||||
"ug-Uighur",
|
||||
"uk-Ukrainian",
|
||||
"ur-Urdu",
|
||||
"uz-Uzbek",
|
||||
"ve-Venda",
|
||||
"vi-Vietnamese",
|
||||
"vo-Volapuk",
|
||||
"vo-Volapük",
|
||||
"wa-Walloon",
|
||||
"wo-Wolof",
|
||||
"xh-Xhosa",
|
||||
"yi-Yiddish",
|
||||
"yo-Yoruba",
|
||||
"za-Zhuang",
|
||||
"zh-Chinese",
|
||||
"zu-Zulu"};
|
||||
|
||||
/** Mapping from 2 letters ISO 639-1 code to ISO language reference name in English. */
|
||||
private static Map<String, String> mapping = new ConcurrentHashMap<String, String>(codes.length);
|
||||
|
||||
static {
|
||||
|
Reference in New Issue
Block a user