privacore-open-source-searc.../Lang.h
2017-11-21 16:18:45 +01:00

95 lines
2.3 KiB
C

// Matt Wells, copyright Jul 2001
// . language detector
// . TODO: use stopwords in doc to determine the language
#ifndef GB_LANG_H
#define GB_LANG_H
#include <inttypes.h>
#define MAX_LANGUAGES 64
// for langs 1-55, exclude translingual
// 64 - 8 is 56, then minus 1 is 55 bits
// translingual is the 31st bit, english is the first bit
//#define LANG_BIT_MASK 0x007fffffffffffffLL
#define LANG_BIT_MASK 0x7fffffffffffffffLL
enum lang_t {
langUnknown = 0,
langEnglish = 1,
langFrench = 2,
langSpanish = 3,
langRussian = 4,
langTurkish = 5,
langJapanese = 6,
langChineseTrad = 7,
langChineseSimp = 8,
langKorean = 9,
langGerman = 10,
langDutch = 11,
langItalian = 12,
langFinnish = 13,
langSwedish = 14,
langNorwegian = 15,
langPortuguese = 16,
langVietnamese = 17,
langArabic = 18,
langHebrew = 19,
langIndonesian = 20,
langGreek = 21,
langThai = 22,
langHindi = 23,
langBengala = 24,
langPolish = 25,
langTagalog = 26,
// added for wiktionary
langLatin = 27,
langEsperanto = 28,
langCatalan = 29,
langBulgarian = 30,
langTranslingual = 31, // used by multiple langs in wiktionary
langSerboCroatian = 32,
langHungarian = 33,
langDanish = 34,
langLithuanian = 35,
langCzech = 36,
langGalician = 37,
langGeorgian = 38,
langScottishGaelic = 39,
langGothic = 40,
langRomanian = 41,
langIrish = 42,
langLatvian = 43,
langArmenian = 44,
langIcelandic = 45,
langAncientGreek = 46,
langManx = 47,
langIdo = 48,
langPersian = 49,
langTelugu = 50,
langVenetian = 51,
langMalgasy = 52,
langKurdish = 53,
langLuxembourgish = 54,
langEstonian = 55,
langMaltese = 56,
langSlovak = 57,
langSlovenian = 58,
langBasque = 59,
langWelsh = 60,
langGreenlandic = 61,
langFaroese = 62,
langUnwanted = 63,
langLast
};
lang_t getLangIdFromAbbr ( const char *abbr ) ;
lang_t getLangIdFromCharset(uint16_t charset);
void languageToString ( unsigned char lang , char *buf );
const char* getLanguageString ( unsigned char lang);
const char* getLanguageAbbr ( unsigned char langId);
#endif // GB_LANG_H