220 lines
5.0 KiB
C++
220 lines
5.0 KiB
C++
#ifndef UCENUMS_H_
|
|
#define UCENUMS_H_
|
|
#include <inttypes.h>
|
|
|
|
namespace Unicode {
|
|
|
|
enum class script_t : uint8_t {
|
|
Adlam = 1,
|
|
Ahom = 2,
|
|
Anatolian_Hieroglyphs = 3,
|
|
Arabic = 4,
|
|
Armenian = 5,
|
|
Avestan = 6,
|
|
Balinese = 7,
|
|
Bamum = 8,
|
|
Bassa_Vah = 9,
|
|
Batak = 10,
|
|
Bengali = 11,
|
|
Bhaiksuki = 12,
|
|
Bopomofo = 13,
|
|
Brahmi = 14,
|
|
Braille = 15,
|
|
Buginese = 16,
|
|
Buhid = 17,
|
|
Canadian_Aboriginal = 18,
|
|
Carian = 19,
|
|
Caucasian_Albanian = 20,
|
|
Chakma = 21,
|
|
Cham = 22,
|
|
Cherokee = 23,
|
|
Common = 24,
|
|
Coptic = 25,
|
|
Cuneiform = 26,
|
|
Cypriot = 27,
|
|
Cyrillic = 28,
|
|
Deseret = 29,
|
|
Devanagari = 30,
|
|
Duployan = 31,
|
|
Egyptian_Hieroglyphs = 32,
|
|
Elbasan = 33,
|
|
Ethiopic = 34,
|
|
Georgian = 35,
|
|
Glagolitic = 36,
|
|
Gothic = 37,
|
|
Grantha = 38,
|
|
Greek = 39,
|
|
Gujarati = 40,
|
|
Gurmukhi = 41,
|
|
Han = 42,
|
|
Hangul = 43,
|
|
Hanunoo = 44,
|
|
Hatran = 45,
|
|
Hebrew = 46,
|
|
Hiragana = 47,
|
|
Imperial_Aramaic = 48,
|
|
Inherited = 49,
|
|
Inscriptional_Pahlavi = 50,
|
|
Inscriptional_Parthian = 51,
|
|
Javanese = 52,
|
|
Kaithi = 53,
|
|
Kannada = 54,
|
|
Katakana = 55,
|
|
Kayah_Li = 56,
|
|
Kharoshthi = 57,
|
|
Khmer = 58,
|
|
Khojki = 59,
|
|
Khudawadi = 60,
|
|
Lao = 61,
|
|
Latin = 62,
|
|
Lepcha = 63,
|
|
Limbu = 64,
|
|
Linear_A = 65,
|
|
Linear_B = 66,
|
|
Lisu = 67,
|
|
Lycian = 68,
|
|
Lydian = 69,
|
|
Mahajani = 70,
|
|
Malayalam = 71,
|
|
Mandaic = 72,
|
|
Manichaean = 73,
|
|
Marchen = 74,
|
|
Masaram_Gondi = 75,
|
|
Meetei_Mayek = 76,
|
|
Mende_Kikakui = 77,
|
|
Meroitic_Cursive = 78,
|
|
Meroitic_Hieroglyphs = 79,
|
|
Miao = 80,
|
|
Modi = 81,
|
|
Mongolian = 82,
|
|
Mro = 83,
|
|
Multani = 84,
|
|
Myanmar = 85,
|
|
Nabataean = 86,
|
|
New_Tai_Lue = 87,
|
|
Newa = 88,
|
|
Nko = 89,
|
|
Nushu = 90,
|
|
Ogham = 91,
|
|
Ol_Chiki = 92,
|
|
Old_Hungarian = 93,
|
|
Old_Italic = 94,
|
|
Old_North_Arabian = 95,
|
|
Old_Permic = 96,
|
|
Old_Persian = 97,
|
|
Old_South_Arabian = 98,
|
|
Old_Turkic = 99,
|
|
Oriya = 100,
|
|
Osage = 101,
|
|
Osmanya = 102,
|
|
Pahawh_Hmong = 103,
|
|
Palmyrene = 104,
|
|
Pau_Cin_Hau = 105,
|
|
Phags_Pa = 106,
|
|
Phoenician = 107,
|
|
Psalter_Pahlavi = 108,
|
|
Rejang = 109,
|
|
Runic = 110,
|
|
Samaritan = 111,
|
|
Saurashtra = 112,
|
|
Sharada = 113,
|
|
Shavian = 114,
|
|
Siddham = 115,
|
|
SignWriting = 116,
|
|
Sinhala = 117,
|
|
Sora_Sompeng = 118,
|
|
Soyombo = 119,
|
|
Sundanese = 120,
|
|
Syloti_Nagri = 121,
|
|
Syriac = 122,
|
|
Tagalog = 123,
|
|
Tagbanwa = 124,
|
|
Tai_Le = 125,
|
|
Tai_Tham = 126,
|
|
Tai_Viet = 127,
|
|
Takri = 128,
|
|
Tamil = 129,
|
|
Tangut = 130,
|
|
Telugu = 131,
|
|
Thaana = 132,
|
|
Thai = 133,
|
|
Tibetan = 134,
|
|
Tifinagh = 135,
|
|
Tirhuta = 136,
|
|
Ugaritic = 137,
|
|
Unknown = 138,
|
|
Vai = 139,
|
|
Warang_Citi = 140,
|
|
Yi = 141,
|
|
Zanabazar_Square = 142,
|
|
};
|
|
|
|
enum class general_category_t : uint8_t {
|
|
Cc = 1, //control
|
|
Cf = 2, //format control
|
|
Co = 3, //private-use
|
|
Cs = 4, //surrogate
|
|
Ll = 5, //lowercase
|
|
Lm = 6, //modifier letter
|
|
Lo = 7, //other letter
|
|
Lt = 8, //titlecase
|
|
Lu = 9, //uppercase
|
|
Mc = 10, //spacing mark
|
|
Me = 11, //enclosing mark
|
|
Mn = 12, //nonspacing mark
|
|
Nd = 13, //decimal digit
|
|
Nl = 14, //letterlike numeric character
|
|
No = 15, //numeric character of other type
|
|
Pc = 16, //connecting punctuation mark
|
|
Pd = 17, //dash/hyphen
|
|
Pe = 18, //closing punctuation mark
|
|
Pf = 19, //final quotation mark
|
|
Pi = 20, //initial quotation mark
|
|
Po = 21, //punctuation mark of other type
|
|
Ps = 22, //opening punctuation (of pair)
|
|
Sc = 23, //closing punctuation (of pair)
|
|
Sk = 24, //non-letterlike modifier symbol
|
|
Sm = 25, //symbol of mathematical use
|
|
So = 26, //symbol of other type
|
|
Zl = 27, //U+2028 LINE SEPARATOR only
|
|
Zp = 28, //U+2029 PARAGRAPH SEPARATOR only
|
|
Zs = 29, //space character (of various non-zero widths)
|
|
};
|
|
|
|
static const uint32_t White_Space = 1<<0;
|
|
static const uint32_t ASCII_Hex_Digit = 1<<1;
|
|
static const uint32_t Bidi_Control = 1<<2;
|
|
static const uint32_t Dash = 1<<3;
|
|
static const uint32_t Diacritic = 1<<4;
|
|
static const uint32_t Extender = 1<<5;
|
|
static const uint32_t Hex_Digit = 1<<6;
|
|
static const uint32_t Hyphen = 1<<7;
|
|
static const uint32_t IDS_Binary_Operator = 1<<8;
|
|
static const uint32_t IDS_Trinary_Operator = 1<<9;
|
|
static const uint32_t Ideographic = 1<<10;
|
|
static const uint32_t Join_Control = 1<<11;
|
|
static const uint32_t Logical_Order_Exception = 1<<12;
|
|
static const uint32_t Noncharacter_Code_Point = 1<<13;
|
|
static const uint32_t Other_Alphabetic = 1<<14;
|
|
static const uint32_t Other_Default_Ignorable_Code_Point = 1<<15;
|
|
static const uint32_t Other_Grapheme_Extend = 1<<16;
|
|
static const uint32_t Other_ID_Continue = 1<<17;
|
|
static const uint32_t Other_ID_Start = 1<<18;
|
|
static const uint32_t Other_Lowercase = 1<<19;
|
|
static const uint32_t Other_Math = 1<<20;
|
|
static const uint32_t Other_Uppercase = 1<<21;
|
|
static const uint32_t Pattern_White_Space = 1<<22;
|
|
static const uint32_t Prepended_Concatenation_Mark = 1<<23;
|
|
static const uint32_t Quotation_Mark = 1<<24;
|
|
static const uint32_t Radical = 1<<25;
|
|
static const uint32_t Regional_Indicator = 1<<26;
|
|
static const uint32_t Sentence_Terminal = 1<<27;
|
|
static const uint32_t Soft_Dotted = 1<<28;
|
|
static const uint32_t Terminal_Punctuation = 1<<29;
|
|
static const uint32_t Unified_Ideograph = 1<<30;
|
|
static const uint32_t Variation_Selector = 1U<<31;
|
|
|
|
}
|
|
|
|
#endif
|