// Matt Wells, copyright Jul 2001

// . language detector
// . TODO: use stopwords in doc to determine the language

#ifndef GB_LANG_H
#define GB_LANG_H

#include <inttypes.h>

#define MAX_LANGUAGES 64
// for langs 1-55, exclude translingual
// 64 - 8 is 56, then minus 1 is 55 bits
// translingual is the 31st bit, english is the first bit
//#define LANG_BIT_MASK 0x007fffffffffffffLL
#define LANG_BIT_MASK 0x7fffffffffffffffLL

enum lang_t {
	langUnknown     = 0,
	langEnglish     = 1,
	langFrench      = 2,
	langSpanish     = 3,
	langRussian     = 4,
	langTurkish     = 5,
	langJapanese    = 6,
	langChineseTrad = 7,
	langChineseSimp = 8,
	langKorean      = 9,
	langGerman      = 10,
	langDutch       = 11,
	langItalian     = 12,
	langFinnish     = 13,
	langSwedish     = 14,
	langNorwegian   = 15,
	langPortuguese  = 16,
	langVietnamese  = 17,
	langArabic      = 18,
	langHebrew      = 19,
	langIndonesian  = 20,
	langGreek       = 21,
	langThai        = 22,
	langHindi       = 23,
	langBengala     = 24,
	langPolish      = 25,
	langTagalog     = 26,
	// added for wiktionary
	langLatin          = 27,
	langEsperanto      = 28,
	langCatalan        = 29,
	langBulgarian      = 30,
	langTranslingual   = 31, // used by multiple langs in wiktionary
	langSerboCroatian  = 32,
	langHungarian      = 33,
	langDanish         = 34,
	langLithuanian     = 35,
	langCzech          = 36,
	langGalician       = 37,
	langGeorgian       = 38,
	langScottishGaelic = 39,
	langGothic         = 40,
	langRomanian       = 41,
	langIrish          = 42,
	langLatvian        = 43,
	langArmenian       = 44,
	langIcelandic      = 45,
	langAncientGreek   = 46,
	langManx           = 47,
	langIdo            = 48,
	langPersian        = 49,
	langTelugu         = 50,
	langVenetian       = 51,
	langMalgasy        = 52,
	langKurdish        = 53,
	langLuxembourgish  = 54,
	langEstonian       = 55,
	langMaltese        = 56,
	langSlovak         = 57,
	langSlovenian      = 58,
	langBasque         = 59,
	langWelsh          = 60,
	langGreenlandic    = 61,
	langFaroese        = 62,
	langUnwanted       = 63,
	langLast
};

lang_t getLangIdFromAbbr ( const char *abbr ) ;
lang_t getLangIdFromCharset(uint16_t charset);

void        languageToString ( unsigned char lang , char *buf );
const char* getLanguageString ( unsigned char lang);
const char* getLanguageAbbr ( unsigned char langId);

#endif // GB_LANG_H