2622 lines
68 KiB
C++
2622 lines
68 KiB
C++
#include "gb-include.h"
|
|
|
|
#include "Loop.h"
|
|
#include "Entities.h"
|
|
#include "UCWordIterator.h"
|
|
#include "SafeBuf.h"
|
|
#include "Xml.h"
|
|
#include "XmlNode.h"
|
|
#include "iana_charset.h"
|
|
|
|
static bool g_clockInSync = false;
|
|
|
|
bool g_clockNeedsUpdate = true;
|
|
|
|
bool isClockInSync() {
|
|
if ( g_hostdb.m_initialized && g_hostdb.m_hostId == 0 ) return true;
|
|
return g_clockInSync;
|
|
}
|
|
|
|
|
|
bool print96 ( char *k ) {
|
|
key_t *kp = (key_t *)k;
|
|
printf("n1=0x%" XINT32 " n0=0x%" XINT64 "\n",(int32_t)kp->n1,(int64_t)kp->n0);
|
|
return true;
|
|
}
|
|
|
|
bool print96 ( key_t *kp ) {
|
|
printf("n1=0x%" XINT32 " n0=0x%" XINT64 "\n",(int32_t)kp->n1,(int64_t)kp->n0);
|
|
return true;
|
|
}
|
|
|
|
bool print128 ( char *k ) {
|
|
key128_t *kp = (key128_t *)k;
|
|
printf("n1=0x%" XINT64 " n0=0x%" XINT64 "\n",(int64_t)kp->n1,(int64_t)kp->n0);
|
|
return true;
|
|
}
|
|
|
|
bool print128 ( key128_t *kp ) {
|
|
printf("n1=0x%" XINT64 " n0=0x%" XINT64 "\n",(int64_t)kp->n1,(int64_t)kp->n0);
|
|
return true;
|
|
}
|
|
|
|
// . put all the maps here now
|
|
// . convert "c" to lower case
|
|
const unsigned char g_map_to_lower[] = {
|
|
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ,
|
|
8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
|
|
16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ,
|
|
24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
|
|
32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 ,
|
|
40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
|
|
48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 ,
|
|
56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
|
|
64 , 'a','b' ,'c' ,'d' ,'e' ,'f' ,'g' ,
|
|
'h', 'i','j' ,'k' ,'l' ,'m' ,'n' ,'o' ,
|
|
'p', 'q','r' ,'s' ,'t' ,'u' ,'v' ,'w' ,
|
|
'x', 'y','z' , 91 , 92 ,93 ,94 ,95 ,
|
|
96 , 'a','b' ,'c' ,'d' ,'e' ,'f' ,'g' ,
|
|
'h', 'i','j' ,'k' ,'l' ,'m' ,'n' ,'o' ,
|
|
'p', 'q','r' ,'s' ,'t' ,'u' ,'v' ,'w' ,
|
|
'x', 'y','z' ,123 ,124 ,125 ,126 ,127 ,
|
|
128,129,130,131,132,133,134,135,
|
|
136,137,138,139,140,141,142,143,
|
|
144,145,146,147,148,149,150,151,
|
|
152,153,154,155,156,157,158,159,
|
|
160,161,162,163,164,165,166,167,
|
|
168,169,170,171,172,173,174,175,
|
|
176,177,178,179,180,181,182,183,
|
|
184,185,186,187,188,189,190,191,
|
|
224,225,226,227,228,229,230,231,
|
|
232,233,234,235,236,237,238,239,
|
|
240,241,242,243,244,245,246,247,
|
|
248,249,250,251,252,253,254,223,
|
|
224,225,226,227,228,229,230,231,
|
|
232,233,234,235,236,237,238,239,
|
|
240,241,242,243,244,245,246,247,
|
|
248,249,250,251,252,253,254,255
|
|
};
|
|
|
|
|
|
// converts ascii chars and IS_O chars to their lower case versions
|
|
const unsigned char g_map_to_upper[] = {
|
|
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ,
|
|
8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
|
|
16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ,
|
|
24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
|
|
32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 ,
|
|
40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
|
|
48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 ,
|
|
56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
|
|
64 , 'A','B' ,'C' ,'D' ,'E' ,'F' ,'G' ,
|
|
'H', 'I','J' ,'K' ,'L' ,'M' ,'N' ,'O' ,
|
|
'P', 'Q','R' ,'S' ,'T' ,'U' ,'V' ,'W' ,
|
|
'X', 'Y','Z' , 91 , 92 ,93 ,94 ,95 ,
|
|
96 , 'A','B' ,'C' ,'D' ,'E' ,'F' ,'G' ,
|
|
'H', 'I','J' ,'K' ,'L' ,'M' ,'N' ,'O' ,
|
|
'P', 'Q','R' ,'S' ,'T' ,'U' ,'V' ,'W' ,
|
|
'X', 'Y','Z' ,123 ,124 ,125 ,126 ,127 ,
|
|
128,129,130,131,132,133,134,135,
|
|
136,137,138,139,140,141,142,143,
|
|
144,145,146,147,148,149,150,151,
|
|
152,153,154,155,156,157,158,159,
|
|
160,161,162,163,164,165,166,167,
|
|
168,169,170,171,172,173,174,175,
|
|
176,177,178,179,180,181,182,183,
|
|
184,185,186,187,188,189,190,191,
|
|
192,193,194,195,196,197,198,199,
|
|
200,201,202,203,204,205,206,207,
|
|
208,209,210,211,212,213,214,215,
|
|
216,217,218,219,220,221,222,223,
|
|
192,193,194,195,196,197,198,199,
|
|
200,201,202,203,204,205,206,207,
|
|
208,209,210,211,212,213,214,215,
|
|
216,217,218,219,220,221,222,255
|
|
};
|
|
|
|
const unsigned char g_map_to_ascii[] = {
|
|
0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ,
|
|
8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
|
|
16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 ,
|
|
24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
|
|
32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 ,
|
|
40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
|
|
48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 ,
|
|
56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
|
|
64 , 'A','B' ,'C' ,'D' ,'E' ,'F' ,'G' ,
|
|
'H', 'I','J' ,'K' ,'L' ,'M' ,'N' ,'O' ,
|
|
'P', 'Q','R' ,'S' ,'T' ,'U' ,'V' ,'W' ,
|
|
'X', 'Y','Z' , 91 , 92 ,93 ,94 ,95 ,
|
|
96 , 'a','b' ,'c' ,'d' ,'e' ,'f' ,'g' ,
|
|
'h', 'i','j' ,'k' ,'l' ,'m' ,'n' ,'o' ,
|
|
'p', 'q','r' ,'s' ,'t' ,'u' ,'v' ,'w' ,
|
|
'x', 'y','z' ,123 ,124 ,125 ,126 ,127 ,
|
|
128,129,130,131, 132,133,134,135,
|
|
136,137,138,139, 140,141,142,143,
|
|
144,145,146,147, 148,149,150,151,
|
|
152,153,154,155, 156,157,158,159,
|
|
160,161,162,'#', 'o','Y','|','S',
|
|
168,169,'a',171, 172,173,174,175,
|
|
176,177,'2','3', 180,'u',182,183,
|
|
' ','1','o',187, 188,189,190,'?',
|
|
'A','A','A','A', 'A','A','A'/*198-AE*/,'C',
|
|
'E','E','E','E', 'I','I','I','I',
|
|
'D','N','O','O', 'O','O','O','x',
|
|
'O','U','U','U', 'U','Y',222/*TH*/,'s'/*changed from B*/,
|
|
'a','a','a','a', 'a','a','a'/*230-ae*/,'c',
|
|
'e','e','e','e', 'i','i','i','i',
|
|
'd','n','o','o', 'o','o','o','/',
|
|
'o','u','u','u', 'u','y',254/*th*/,'y'
|
|
};
|
|
|
|
|
|
const char g_map_is_upper[] = {
|
|
0,0,0,0,0,0,0,0, // 0 -7
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,1,1,1,1,1,1,1, // 64
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,0,0,0,0,0, // 88
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 15*8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 20*8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1, // 192
|
|
1,1,1,1,1,1,1,1, // 200
|
|
1,1,1,1,1,1,1,0, // 208
|
|
1,1,1,1,1,1,1,1, // 216
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0, // 232
|
|
0,0,0,0,0,0,0,0, // 240
|
|
0,0,0,0,0,0,0,0}; // 248
|
|
|
|
|
|
// can this character be in an html (or xml) tag name??
|
|
const char g_map_canBeInTagName[] = {
|
|
0,0,0,0,0,0,0,0, // 0 -7
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,1,0,0, // 40 -- hyphen can be in tag name
|
|
0,0,0,0,0,0,0,0, // 48
|
|
0,0,0,0,0,0,0,0, // 56
|
|
0,1,1,1,1,1,1,1, // 64
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,0,0,0,0,0, // 88
|
|
0,1,1,1,1,1,1,1, // 96
|
|
1,1,1,1,1,1,1,1, // 104
|
|
1,1,1,1,1,1,1,1, // 112
|
|
1,1,1,0,0,0,0,0, // 15*8 = 120
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 20*8 = 160
|
|
0,0,0,0,0,0,0,0, // 168
|
|
0,0,0,0,0,0,0,0, // 176
|
|
0,0,0,0,0,0,0,0, // 184
|
|
0,0,0,0,0,0,0,0, // 192
|
|
0,0,0,0,0,0,0,0, // 200
|
|
0,0,0,0,0,0,0,0, // 208
|
|
0,0,0,0,0,0,0,0, // 216
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0, // 232
|
|
0,0,0,0,0,0,0,0, // 240
|
|
0,0,0,0,0,0,0,0}; // 248
|
|
|
|
|
|
const char g_map_is_control [] = {
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 96
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,1, // 120, 127 = DEL
|
|
1,1,1,1,1,1,1,1, // 128
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,0,0,0,0,0,0,0, // 160 = backspace
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 192
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0, // 232
|
|
0,0,0,0,0,0,0,0, // 240
|
|
0,0,0,0,0,0,0,0}; // 248
|
|
|
|
// people mix windows 1252 into latin-1 so we have to be less restrictive here...
|
|
const char g_map_is_binary[] = {
|
|
1,1,1,1,1,1,1,1,
|
|
1,0,0,1,1,0,1,1, // \t=9 \n = 10 \r = 13
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 96
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,1, // 120, 127 = DEL
|
|
0,1,1,1,1,1,1,1, // 128 (128 is a quote)
|
|
1,1,1,1,1,1,1,1, // 136
|
|
1,0,0,0,0,0,0,1, // 144 (145 146 147 are quotes, 148 is dash, 149 bullet,150 dash)
|
|
0,0,1,1,0,0,1,1, // 152 (152 & 153 are quotes, 156 & 157 are double quotes)
|
|
0,0,0,0,0,0,0,0, // 160 = backspace (some urls have this???)
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 192
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0, // 232
|
|
0,0,0,0,0,0,0,0, // 240
|
|
0,0,0,0,0,0,0,0}; // 248
|
|
|
|
// ' ' '\n' '\t' '\r'
|
|
const char g_map_is_wspace[] = {
|
|
0,0,0,0,0,0,0,0, // 0 -7
|
|
0,1,1,0,0,1,0,0, // \t=9 \n = 10 \r = 13
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
1,0,0,0,0,0,0,0, // space=32
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 88
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 15*8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 160 -- turn off 160, it might be utf8 byte
|
|
0,0,0,0,0,0,0,0, // 168
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 192
|
|
0,0,0,0,0,0,0,0, // 200
|
|
0,0,0,0,0,0,0,0, // 208
|
|
0,0,0,0,0,0,0,0, // 216
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0, // 232
|
|
0,0,0,0,0,0,0,0, // 240
|
|
0,0,0,0,0,0,0,0}; // 248
|
|
|
|
|
|
// '\n'
|
|
const char g_map_is_vspace[] = {
|
|
0,0,0,0,0,0,0,0, // 0 -7
|
|
0,0,1,0,0,0,0,0, // \t=9 \n = 10
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // space=32
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 88
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 15*8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 20*8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1, // 192
|
|
1,1,1,1,1,1,1,1, // 200
|
|
1,1,1,1,1,1,1,0, // 208
|
|
1,1,1,1,1,1,1,1, // 216
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0, // 232
|
|
0,0,0,0,0,0,0,0, // 240
|
|
0,0,0,0,0,0,0,0}; // 248
|
|
|
|
// ' ' '\t'
|
|
const char g_map_is_hspace[] = {
|
|
0,0,0,0,0,0,0,0, // 0 -7
|
|
0,1,0,0,0,0,0,0, // \t=9 \n = 10
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
1,0,0,0,0,0,0,0, // space=32
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 88
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 15*8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 20*8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1, // 192
|
|
1,1,1,1,1,1,1,1, // 200
|
|
1,1,1,1,1,1,1,0, // 208
|
|
1,1,1,1,1,1,1,1, // 216
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0, // 232
|
|
0,0,0,0,0,0,0,0, // 240
|
|
0,0,0,0,0,0,0,0}; // 248
|
|
|
|
|
|
const char g_map_is_vowel[] = {
|
|
0,0,0,0,0,0,0,0, // 0 -7
|
|
0,0,0,0,0,0,0,0, // 8-15
|
|
0,0,0,0,0,0,0,0, // 16-
|
|
0,0,0,0,0,0,0,0, // 24-
|
|
0,0,0,0,0,0,0,0, // 32-
|
|
0,0,0,0,0,0,0,0, // 40-
|
|
0,0,0,0,0,0,0,0, // 48-
|
|
0,0,0,0,0,0,0,0, // 56-
|
|
0,1,0,0,0,1,0,0, // 64 (A=65)
|
|
0,1,0,0,0,0,0,1, // 72
|
|
0,0,0,0,0,1,0,0, // 80
|
|
0,0,0,0,0,0,0,0, // 88-
|
|
0,1,0,0,0,1,0,0, // 96- (a=97)
|
|
0,1,0,0,0,0,0,1,
|
|
0,0,0,0,0,1,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 160
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 192
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0};
|
|
|
|
|
|
|
|
// converts ascii chars and IS_O chars to their lower case versions
|
|
const char g_map_is_lower[] = { // 97-122 and 224-255 (excluding 247)
|
|
0,0,0,0,0,0,0,0, // 0 -7
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,1,1,1,1,1,1,1, // 96
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,0,0,0,0,0, // 120
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 160
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 192
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1, // 224
|
|
1,1,1,1,1,1,1,1, // 232
|
|
1,1,1,1,1,1,1,0, // 240
|
|
1,1,1,1,1,1,1,1}; // 248
|
|
|
|
const char g_map_is_ascii[] = { // 32 to 126
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0, // 8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1, // 32
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,0,
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0};
|
|
|
|
// just from 0-127, used by the inlined *_utf8() functions in fctypes.h
|
|
const char g_map_is_ascii3[] = { // 32 to 126
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 32
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0};
|
|
|
|
|
|
|
|
const char g_map_is_iso[] = { // 32 to 126
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0, // 8
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 32
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 152
|
|
0,1,1,1,1,1,1,1, // 160
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1};
|
|
|
|
const char g_map_is_punct[] = { // 33-47, 58-64, 91-96, 123-126, 161-191, 215,247
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 16
|
|
0,0,0,0,0,0,0,0,
|
|
0,1,1,1,1,1,1,1, // 32
|
|
1,1,1,1,1,1,1,1,
|
|
0,0,0,0,0,0,0,0, // 48
|
|
0,0,1,1,1,1,1,1,
|
|
1,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 80
|
|
0,0,0,1,1,1,1,1,
|
|
1,0,0,0,0,0,0,0, // 96
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 112
|
|
0,0,0,1,1,1,1,0,
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 144
|
|
0,0,0,0,0,0,0,0,
|
|
0,1,1,1,1,1,1,1, // 160
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 176
|
|
1,1,1,1,1,1,1,1,
|
|
0,0,0,0,0,0,0,0, // 192
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,1, // 208
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,1, // 240
|
|
0,0,0,0,0,0,0,0}; // 248
|
|
|
|
const char g_map_is_alnum[] = { // 48-57, 65-90,97-122,192-255(excluding 215,247)
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 16
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 32
|
|
0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1, // 48
|
|
1,1,0,0,0,0,0,0,
|
|
0,1,1,1,1,1,1,1, // 64
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 80
|
|
1,1,1,0,0,0,0,0,
|
|
0,1,1,1,1,1,1,1, // 96
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 112
|
|
1,1,1,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 144
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 160
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 176
|
|
0,0,0,0,0,0,0,0,
|
|
|
|
1,1,1,1,1,1,1,1, // 192
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,0, // 208
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 224
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,0, // 240
|
|
1,1,1,1,1,1,1,1};
|
|
|
|
const char g_map_is_alpha[] = { // 65-90, 97-122, 192-255 (excluding 215, 247)
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 16
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 32
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 48
|
|
0,0,0,0,0,0,0,0,
|
|
0,1,1,1,1,1,1,1, // 64
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 80
|
|
1,1,1,0,0,0,0,0,
|
|
0,1,1,1,1,1,1,1, // 96
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 112
|
|
1,1,1,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 144
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 160
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 176
|
|
0,0,0,0,0,0,0,0,
|
|
|
|
1,1,1,1,1,1,1,1, // 192
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,0, // 208
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 224
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,0, // 240
|
|
1,1,1,1,1,1,1,1};
|
|
|
|
const char g_map_is_digit[] = { // 48-57
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 16
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 32
|
|
0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1, // 48
|
|
1,1,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0};
|
|
|
|
|
|
const char g_map_is_hex[] = { // 48-57
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 16
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 32
|
|
0,0,0,0,0,0,0,0,
|
|
1,1,1,1,1,1,1,1, // 48
|
|
1,1,0,0,0,0,0,0, // 56
|
|
0,1,1,1,1,1,1,0, // 64 (65='A')
|
|
0,0,0,0,0,0,0,0, // 72
|
|
0,0,0,0,0,0,0,0, // 80
|
|
0,0,0,0,0,0,0,0, // 88
|
|
0,1,1,1,1,1,1,0, // 96 (97='a')
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0};
|
|
|
|
// stolen from is_alnum, but turned on - and _
|
|
const char g_map_is_tagname_char [] = { // 48-57, 65-90,97-122,192-255(excluding 215,247)
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 16
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 32
|
|
0,0,0,0,0,1,0,0, // -
|
|
1,1,1,1,1,1,1,1, // 48
|
|
1,1,1,0,0,0,0,0, // we include the : for feedburner:origlink
|
|
0,1,1,1,1,1,1,1, // 64
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 80
|
|
1,1,1,0,0,0,0,1, // _
|
|
0,1,1,1,1,1,1,1, // 96
|
|
1,1,1,1,1,1,1,1,
|
|
1,1,1,1,1,1,1,1, // 112
|
|
1,1,1,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 128
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 144
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 160
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 176
|
|
0,0,0,0,0,0,0,0,
|
|
|
|
// we are no longer necessarily latin-1!!
|
|
0,0,0,0,0,0,0,0, // 192
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 208
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 224
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 240
|
|
0,0,0,0,0,0,0,0};
|
|
|
|
const char g_map_is_tag_control_char[] = { // 48-57
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 16
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,1,0,0,0,0,1, // 32 " and '
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0, // 48
|
|
0,0,0,0,1,0,1,0, // 56 < and >
|
|
0,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0};
|
|
|
|
// when matching query terms to words/phrases in doc skip over spaces
|
|
// or other punct so that "flypaper" in the query matches "fly paper" in the
|
|
// doc
|
|
/*
|
|
const char g_map_is_match_skip[] = { // 48-57
|
|
0,0,0,0,0,0,0,0, // 0
|
|
0,1,1,0,0,0,0,0, // \t and \n
|
|
0,0,0,0,0,0,0,0, // 16
|
|
0,0,0,0,0,0,0,0,
|
|
1,0,0,0,0,0,0,1, // 32 space and '
|
|
0,0,0,0,0,1,0,0, // 40 -
|
|
0,0,0,0,0,0,0,0, // 48
|
|
0,0,0,0,0,0,0,0, // 56
|
|
0,0,0,0,0,0,0,0, // 64
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0,
|
|
0,0,0,0,0,0,0,0};
|
|
*/
|
|
|
|
// seems like this should be defined, but it isn't
|
|
int32_t strnlen ( const char *s , int32_t maxLen ) {
|
|
int32_t i ;
|
|
for ( i = 0 ; i < maxLen ; i++ ) if ( ! s[i] ) return i;
|
|
return i;
|
|
}
|
|
|
|
char *strncasestr( char *haystack, int32_t haylen, char *needle){
|
|
int32_t matchLen = 0;
|
|
int32_t needleLen = gbstrlen(needle);
|
|
for (int32_t i = 0; i < haylen;i++){
|
|
char c1 = to_lower_a(haystack[i]);
|
|
char c2 = to_lower_a(needle[matchLen]);
|
|
if ( c1 != c2 ){
|
|
// no match
|
|
matchLen = 0;
|
|
continue;
|
|
}
|
|
// we matched another character
|
|
matchLen++;
|
|
if (matchLen < needleLen) continue;
|
|
|
|
// we've matched the whole string
|
|
return haystack + i - matchLen + 1;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
char *strnstr2( char *haystack, int32_t haylen, char *needle){
|
|
int32_t matchLen = 0;
|
|
int32_t needleLen = gbstrlen(needle);
|
|
for (int32_t i = 0; i < haylen;i++){
|
|
char c1 = (haystack[i]);
|
|
char c2 = (needle[matchLen]);
|
|
if ( c1 != c2 ){
|
|
// no match
|
|
matchLen = 0;
|
|
continue;
|
|
}
|
|
// we matched another character
|
|
matchLen++;
|
|
if (matchLen < needleLen) continue;
|
|
|
|
// we've matched the whole string
|
|
return haystack + i - matchLen + 1;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
// . get the # of words in this string
|
|
int32_t getNumWords ( char *s , int32_t len, int32_t titleVersion ) {
|
|
|
|
int32_t wordCount = 0;
|
|
bool inWord = false;
|
|
for ( int32_t i = 0 ; i < len ; i++ ) {
|
|
if ( ! is_alnum_a ( s[i] ) && s[i]!='\'' ) {
|
|
inWord = false;
|
|
continue;
|
|
}
|
|
if ( ! inWord ) {
|
|
inWord = true;
|
|
wordCount++;
|
|
}
|
|
}
|
|
return wordCount;
|
|
}
|
|
|
|
// . this stores a "n" into "s" and returns the # of bytes written into "s"
|
|
// . it also puts commas into the number
|
|
// . it now also NULL terminates bytes written into "s"
|
|
int32_t ulltoa ( char *s , uint64_t n ) {
|
|
// if n is zero, it's easy
|
|
if ( n == 0LL ) { *s++='0'; *s='\0'; return 1; }
|
|
// a hunk is a number in [0,999]
|
|
int32_t hunks[10];
|
|
int32_t lastHunk = -1;
|
|
// . get the hunks
|
|
// . the first hunk we get is called the "lowest hunk"
|
|
// . "lastHunk" is called the "highest hunk"
|
|
for ( int32_t i = 0 ; i < 10 ; i++ ) {
|
|
hunks[i] = n % 1000;
|
|
n /= 1000;
|
|
if ( hunks[i] != 0 ) lastHunk = i;
|
|
}
|
|
// remember start of buf for calculating # bytes written
|
|
char *start = s;
|
|
// print the hunks separated by comma
|
|
for ( int32_t i = lastHunk ; i >= 0 ; i-- ) {
|
|
// pad all hunks except highest hunk with zeroes
|
|
if ( i != lastHunk ) sprintf ( s , "%03" INT32 "" , hunks[i] );
|
|
else sprintf ( s , "%" INT32 "" , hunks[i] );
|
|
s += gbstrlen(s);
|
|
// comma after all hunks but lowest hunk
|
|
if ( i != 0 ) *s++ = ',';
|
|
}
|
|
// null terminate it
|
|
*s = '\0';
|
|
// return # of bytes stored into "s"
|
|
return s - start;
|
|
}
|
|
|
|
/*
|
|
int32_t atol2 ( const char *s, int32_t len ) {
|
|
char tmp[32];
|
|
if ( len > 30 ) len = 30;
|
|
gbmemcpy ( tmp , s , len );
|
|
tmp [ len ] = '\0';
|
|
return atol ( s );
|
|
}
|
|
*/
|
|
|
|
int32_t atol2 ( const char *s, int32_t len ) {
|
|
// skip over spaces
|
|
const char *end = s + len;
|
|
while ( s < end && is_wspace_a ( *s ) ) s++;
|
|
// return 0 if all spaces
|
|
if ( s == end ) return 0;
|
|
int32_t i = 0;
|
|
int32_t val = 0;
|
|
bool negative = false;
|
|
if ( s[0] == '-' ) { negative = true; i++; }
|
|
while ( i < len && is_digit(s[i]) ) val = val * 10 + ( s[i++] - '0' );
|
|
if ( negative ) return -val;
|
|
return val;
|
|
}
|
|
|
|
int64_t atoll1 ( const char *s ) {
|
|
return atoll ( s );
|
|
}
|
|
|
|
int64_t atoll2 ( const char *s, int32_t len ) {
|
|
// skip over spaces
|
|
const char *end = s + len;
|
|
while ( s < end && is_wspace_a ( *s ) ) s++;
|
|
// return 0 if all spaces
|
|
if ( s == end ) return 0;
|
|
int32_t i = 0;
|
|
int64_t val = 0LL;
|
|
bool negative = false;
|
|
if ( s[0] == '-' ) { negative = true; i++; }
|
|
while ( i < len && is_digit(s[i]) ) val = val * 10LL + ( s[i++] - '0');
|
|
if ( negative ) return -val;
|
|
return val;
|
|
}
|
|
|
|
double atof2 ( const char *s, int32_t len ) {
|
|
// skip over spaces
|
|
const char *end = s + len;
|
|
while ( s < end && is_wspace_a ( *s ) ) { s++; len--; }
|
|
// return 0 if all spaces
|
|
if ( s == end ) return 0;
|
|
char tmpBuf[128];
|
|
if ( len >= 128 ) len = 127;
|
|
//strncpy ( dst , s , len );
|
|
|
|
const char *p = s;
|
|
const char *srcEnd = s + len;
|
|
char *dst = tmpBuf;
|
|
// remove commas
|
|
for ( ; p < srcEnd ; p++ ) {
|
|
// skip commas
|
|
if ( *p == ',' ) continue;
|
|
// otherwise store it
|
|
*dst++ = *p;
|
|
}
|
|
// null term
|
|
*dst = '\0';
|
|
//buf[len] = '\0';
|
|
return atof ( tmpBuf );
|
|
}
|
|
|
|
double atod2 ( char *s, int32_t len ) {
|
|
// point to end
|
|
char *end = s + len;
|
|
// null term temp
|
|
char c = *end;
|
|
*end = '\0';
|
|
// get it
|
|
double ret = strtod ( s , NULL );
|
|
// undo it
|
|
*end = c;
|
|
return ret;
|
|
}
|
|
|
|
|
|
bool atob ( const char *s, int32_t len ) {
|
|
// skip over spaces
|
|
const char *end = s + len;
|
|
while ( s < end && is_wspace_a ( *s ) ) s++;
|
|
// return false if all spaces
|
|
if ( s == end ) return false;
|
|
// parse the ascii bool value
|
|
if ( s[0] == 't' || s[0] == 'T' ) return true;
|
|
if ( s[0] == 'y' || s[0] == 'Y' ) return true;
|
|
if ( ! is_digit ( *s ) || *s == '0' ) return false;
|
|
return true;
|
|
}
|
|
|
|
// hexadecimal ascii to key_t
|
|
int64_t htoint32_tint32_t ( const char *s, int32_t len ) {
|
|
// skip over spaces
|
|
const char *end = s + len;
|
|
while ( s < end && is_wspace_a ( *s ) ) s++;
|
|
// return 0 if all spaces
|
|
if ( s == end ) return 0;
|
|
int32_t i = 0;
|
|
int64_t val = 0;
|
|
while ( i < len && is_hex(s[i]) )
|
|
val = val * 16 + htob ( s[i++] );
|
|
return val;
|
|
}
|
|
|
|
// convert hex ascii string into binary at "dst"
|
|
void hexToBin ( char *src , int32_t srcLen , char *dst ) {
|
|
char *srcEnd = src + srcLen;
|
|
for ( ; src && src < srcEnd ; ) {
|
|
*dst = htob(*src++);
|
|
*dst <<= 4;
|
|
*dst |= htob(*src++);
|
|
dst++;
|
|
}
|
|
// sanity check
|
|
if ( src != srcEnd ) { char *xx=NULL;*xx=0; }
|
|
}
|
|
|
|
void binToHex ( unsigned char *src , int32_t srcLen , char *dst ) {
|
|
unsigned char *srcEnd = src + srcLen;
|
|
for ( ; src && src < srcEnd ; ) {
|
|
*dst++ = btoh(*src>>4);
|
|
*dst++ = btoh(*src&15);
|
|
src++;
|
|
}
|
|
// always null term!
|
|
*dst = '\0';
|
|
// sanity check
|
|
if ( src != srcEnd ) { char *xx=NULL;*xx=0; }
|
|
}
|
|
|
|
|
|
// . like strstr but haystack may not be NULL terminated
|
|
// . needle, however, IS null terminated
|
|
char *strncasestr ( char *haystack , char *needle , int32_t haystackSize ) {
|
|
int32_t needleSize = gbstrlen(needle);
|
|
int32_t n = haystackSize - needleSize ;
|
|
for ( int32_t i = 0 ; i <= n ; i++ ) {
|
|
// keep looping if first chars do not match
|
|
if ( to_lower_a(haystack[i]) != to_lower_a(needle[0]) )
|
|
continue;
|
|
// if needle was only 1 char it's a match
|
|
if ( ! needle[1] ) return &haystack[i];
|
|
// compare the whole strings now
|
|
if ( strncasecmp ( &haystack[i] , needle , needleSize ) == 0 )
|
|
return &haystack[i];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
// . like strstr but haystack may not be NULL terminated
|
|
// . needle, however, IS null terminated
|
|
char *strncasestr ( char *haystack , char *needle ,
|
|
int32_t haystackSize, int32_t needleSize ) {
|
|
int32_t n = haystackSize - needleSize ;
|
|
for ( int32_t i = 0 ; i <= n ; i++ ) {
|
|
// keep looping if first chars do not match
|
|
if ( to_lower_a(haystack[i]) != to_lower_a(needle[0]) )
|
|
continue;
|
|
// if needle was only 1 char it's a match
|
|
if ( ! needle[1] ) return &haystack[i];
|
|
// compare the whole strings now
|
|
if ( strncasecmp ( &haystack[i] , needle , needleSize ) == 0 )
|
|
return &haystack[i];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
char *strnstr ( char *haystack , char *needle , int32_t haystackSize ) {
|
|
int32_t needleSize = gbstrlen(needle);
|
|
int32_t n = haystackSize - needleSize ;
|
|
for ( int32_t i = 0 ; i <= n ; i++ ) {
|
|
// keep looping if first chars do not match
|
|
if ( haystack[i] != needle[0] ) continue;
|
|
// if needle was only 1 char it's a match
|
|
if ( ! needle[1] ) return &haystack[i];
|
|
// compare the whole strings now
|
|
if ( strncmp ( &haystack[i] , needle , needleSize ) == 0 )
|
|
return &haystack[i];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
// independent of case
|
|
char *gb_strcasestr ( char *haystack , char *needle ) {
|
|
int32_t needleSize = gbstrlen(needle);
|
|
int32_t haystackSize = gbstrlen(haystack);
|
|
int32_t n = haystackSize - needleSize ;
|
|
for ( int32_t i = 0 ; i <= n ; i++ ) {
|
|
// keep looping if first chars do not match
|
|
if ( to_lower_a(haystack[i]) != to_lower_a(needle[0]) )
|
|
continue;
|
|
// if needle was only 1 char it's a match
|
|
if ( ! needle[1] ) return &haystack[i];
|
|
// compare the whole strings now
|
|
if ( strncasecmp ( &haystack[i] , needle , needleSize ) == 0 )
|
|
return &haystack[i];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
char *gb_strncasestr ( char *haystack , int32_t haystackSize , char *needle ) {
|
|
// temp term
|
|
char c = haystack[haystackSize];
|
|
haystack[haystackSize] = '\0';
|
|
char *res = gb_strcasestr ( haystack , needle );
|
|
haystack[haystackSize] = c;
|
|
return res;
|
|
}
|
|
|
|
// . convert < to < and > to >
|
|
// . store "t" into "s"
|
|
// . returns bytes stored into "s"
|
|
// . NULL terminates "s" if slen > 0
|
|
int32_t saftenTags ( char *s , int32_t slen , char *t , int32_t tlen ) {
|
|
char *start = s ;
|
|
// bail if slen is 0
|
|
if ( slen <= 0 ) return 0;
|
|
// leave a char for the \0
|
|
char *send = s + slen - 1;
|
|
char *tend = t + tlen;
|
|
for ( ; t < tend && s + 4 < send ; t++ ) {
|
|
if ( *t == '<' ) {
|
|
*s++ = '&';
|
|
*s++ = 'l';
|
|
*s++ = 't';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
if ( *t == '>' ) {
|
|
*s++ = '&';
|
|
*s++ = 'g';
|
|
*s++ = 't';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
*s++ = *t;
|
|
}
|
|
// NULL terminate "s"
|
|
*s = '\0';
|
|
// return # of bytes, excluding \0, stored into s
|
|
return s - start;
|
|
}
|
|
|
|
// . if "doSpecial" is true, then we change <, > and & to
|
|
// the following:
|
|
// UnicodeData.txt:22E6;LESS-THAN BUT NOT EQUIVALENT TO;Sm;0;ON;;;;;Y;
|
|
// UnicodeData.txt:22E7;GREATER-THAN BUT NOT EQUIVALENT TO;Sm;0;ON;;;;;Y;
|
|
// UnicodeData.txt:E0026;TAG AMPERSAND;Cf;0;BN;;;;;N;;;;;
|
|
// UnicodeData.txt:235E;APL FUNCTIONAL SYMBOL QUOTE QUAD;So;0;L;;;;;N;;;;;
|
|
int32_t htmlDecode ( char *dst , char *src , int32_t srcLen , bool doSpecial ,
|
|
int32_t niceness ) {
|
|
if ( srcLen == 0 ) return 0;
|
|
char *start = dst;
|
|
char *srcEnd = src + srcLen;
|
|
for ( ; src < srcEnd ; ) {
|
|
// breathe
|
|
QUICKPOLL(niceness);
|
|
// utf8 support?
|
|
char size = getUtf8CharSize(src);
|
|
// all entities must start with '&'
|
|
if ( *src != '&' ) {
|
|
if ( size == 1 ) { *dst++ = *src++; continue; }
|
|
gbmemcpy ( dst , src , size );
|
|
src += size;
|
|
dst += size;
|
|
continue;
|
|
//*dst++ = *src++; continue; }
|
|
}
|
|
// TODO: avoid doSpecial by not decoding crap in tags...
|
|
//if ( src[0] == '<' ) {
|
|
// // skip to tag end then!
|
|
//
|
|
// store decoded entity char into dst[j]
|
|
uint32_t c;
|
|
// "skip" is how many bytes the entities was in "src"
|
|
int32_t skip = getEntity_a (src, srcEnd-src, &c );
|
|
// ignore the "entity" if it was invalid
|
|
if ( skip == 0 ) { *dst++ = *src++ ; continue; }
|
|
// force this now always since some tags contain "
|
|
// and it was causing the tags to be terminated too early
|
|
// for richmondspca.org
|
|
//if ( c == '\"' ) c = '\'';
|
|
//if ( c == '<' ) c = '[';
|
|
//if ( c == '>' ) c = ']';
|
|
// . special mapping
|
|
// . make < and > special so Xml::set() still works
|
|
// . and make & special so we do not screw up summaries
|
|
if ( doSpecial ) {
|
|
// no longer use this!
|
|
//char *xx=NULL;*xx=0;
|
|
if ( c == '<' ) {
|
|
// using [ and ] looks bad in event titles...
|
|
*dst = '|';
|
|
dst++;
|
|
src += skip;
|
|
continue;
|
|
gbmemcpy(dst,"+!-",3);
|
|
//gbmemcpy(dst,"<gb",3);
|
|
dst += 3;
|
|
src += skip;
|
|
continue;
|
|
// paragraph sign:
|
|
//c = 0xc2b6;
|
|
}
|
|
if ( c == '>' ) {
|
|
// using [ and ] looks bad in event titles...
|
|
*dst = '|';
|
|
dst++;
|
|
src += skip;
|
|
continue;
|
|
//gbmemcpy(dst,"gb>",3);
|
|
gbmemcpy(dst,"-!+",3);
|
|
dst += 3;
|
|
src += skip;
|
|
continue;
|
|
// high-rise hyphen:
|
|
//c = 0xc2af;
|
|
}
|
|
// some tags have " in their value strings
|
|
// so we have to preserve that!
|
|
// use curling quote:
|
|
//http://www.dwheeler.com/essays/quotes-test-utf-8.html
|
|
// curling double and single quotes resp:
|
|
// “ ” ‘ ”
|
|
if ( c == '\"' ) {
|
|
//c = 0x201c; // 0x235e;
|
|
*dst = '\'';
|
|
dst++;
|
|
src += skip;
|
|
continue;
|
|
}
|
|
//if ( c == '<' ) c = 0x22d6; // e6;
|
|
//if ( c == '>' ) c = 0x22d7; // e7;
|
|
// this was working ok, but just code it to an
|
|
// ampersand. when displaying a page we can code all
|
|
// ampersands back into & i guess! that way
|
|
// the check for a " & " in the place name in
|
|
// Address.cpp works out...
|
|
//if ( c == '&' ) c = 0xff06; // full width ampersand
|
|
}
|
|
// . otherwise it was a legit entity
|
|
// . store it into "dst" in utf8 format
|
|
// . "numBytes" is how many bytes it stored into 'dst"
|
|
int32_t numBytes = utf8Encode ( c , dst );
|
|
// sanity check. do not eat our tail if dst == src
|
|
if ( numBytes > skip ) { char *xx=NULL;*xx=0; }
|
|
// advance dst ptr
|
|
dst += numBytes;
|
|
// skip over the encoded entity in the source string
|
|
src += skip;
|
|
}
|
|
// NULL term
|
|
*dst = '\0';
|
|
return dst - start;
|
|
}
|
|
|
|
// cdata
|
|
int32_t cdataDecode ( char *dst , char *src , int32_t niceness ) {
|
|
if ( ! src ) return 0;
|
|
char *start = dst;
|
|
for ( ; *src ; ) {
|
|
// breathe
|
|
QUICKPOLL(niceness);
|
|
// utf8 support?
|
|
char size = getUtf8CharSize(src);
|
|
// see SafeBuf::cdataEncode() we do the opposite here
|
|
if ( src[0] != ']' ||
|
|
src[1] != ']' ||
|
|
src[2] != '&' ||
|
|
src[3] != 'g' ||
|
|
src[4] != 't' ) {
|
|
if ( size == 1 ) { *dst++ = *src++; continue; }
|
|
gbmemcpy ( dst , src , size );
|
|
src += size;
|
|
dst += size;
|
|
continue;
|
|
//*dst++ = *src++; continue; }
|
|
}
|
|
// make it ]]>
|
|
gbmemcpy ( dst , "]]>" , 3 );
|
|
src += 5;
|
|
dst += 3;
|
|
}
|
|
// NULL term
|
|
*dst = '\0';
|
|
return dst - start;
|
|
}
|
|
|
|
// . make something safe as an form input value by translating the quotes
|
|
// . store "t" into "s" and return bytes stored
|
|
// . does not do bounds checking
|
|
int32_t dequote ( char *s , char *send , char *t , int32_t tlen ) {
|
|
char *start = s;
|
|
char *tend = t + tlen;
|
|
for ( ; t < tend && s < send ; t++ ) {
|
|
if ( *t == '"' ) {
|
|
if ( s + 5 >= send ) return 0;
|
|
*s++ = '&';
|
|
*s++ = '#';
|
|
*s++ = '3';
|
|
*s++ = '4';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
*s++ = *t;
|
|
}
|
|
// all or nothing
|
|
if ( s + 1 >= send ) return 0;
|
|
*s = '\0';
|
|
return s - start;
|
|
}
|
|
|
|
bool dequote ( SafeBuf* sb , char *t , int32_t tlen ) {
|
|
char *tend = t + tlen;
|
|
for ( ; t < tend; t++ ) {
|
|
if ( *t == '"' ) {
|
|
sb->safeMemcpy(""", 5);
|
|
continue;
|
|
}
|
|
*sb += *t;
|
|
}
|
|
*sb += '\0';
|
|
return true;
|
|
}
|
|
|
|
//int32_t dequote ( char *s , char *t ) {
|
|
// return dequote ( s , t , gbstrlen ( t ) );
|
|
//}
|
|
|
|
// . entity-ize a string so it's safe for html output
|
|
// . store "t" into "s" and return bytes stored
|
|
// . does bounds checking
|
|
char *htmlEncode ( char *s , char *send , char *t , char *tend , bool pound ,
|
|
int32_t niceness ) {
|
|
for ( ; t < tend ; t++ ) {
|
|
QUICKPOLL(niceness);
|
|
if ( s + 7 >= send ) { *s = '\0'; return s; }
|
|
if ( *t == '"' ) {
|
|
*s++ = '&';
|
|
*s++ = '#';
|
|
*s++ = '3';
|
|
*s++ = '4';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
if ( *t == '<' ) {
|
|
*s++ = '&';
|
|
*s++ = 'l';
|
|
*s++ = 't';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
if ( *t == '>' ) {
|
|
*s++ = '&';
|
|
*s++ = 'g';
|
|
*s++ = 't';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
if ( *t == '&' ) {
|
|
*s++ = '&';
|
|
*s++ = 'a';
|
|
*s++ = 'm';
|
|
*s++ = 'p';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
if ( *t == '#' && pound ) {
|
|
*s++ = '&';
|
|
*s++ = '#';
|
|
*s++ = '0';
|
|
*s++ = '3';
|
|
*s++ = '5';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
*s++ = *t;
|
|
}
|
|
*s = '\0';
|
|
return s;
|
|
}
|
|
|
|
|
|
// . entity-ize a string so it's safe for html output
|
|
// . store "t" into "s" and return true on success
|
|
bool htmlEncode ( SafeBuf* s , char *t , char *tend , bool pound ,
|
|
int32_t niceness ) {
|
|
for ( ; t < tend ; t++ ) {
|
|
QUICKPOLL(niceness);
|
|
if ( *t == '"' ) {
|
|
s->safeMemcpy(""", 5);
|
|
continue;
|
|
}
|
|
if ( *t == '<' ) {
|
|
s->safeMemcpy("<", 4);
|
|
continue;
|
|
}
|
|
if ( *t == '>' ) {
|
|
s->safeMemcpy(">", 4);
|
|
continue;
|
|
}
|
|
if ( *t == '&' ) {
|
|
s->safeMemcpy("&", 5);
|
|
continue;
|
|
}
|
|
if ( *t == '#' && pound ) {
|
|
s->safeMemcpy("#", 6);
|
|
continue;
|
|
}
|
|
// our own specially decoded entities!
|
|
if ( *t == '+' && t[1]=='!' && t[2]=='-' ) {
|
|
s->safeMemcpy("<",4);
|
|
continue;
|
|
}
|
|
// our own specially decoded entities!
|
|
if ( *t == '-' && t[1]=='!' && t[2]=='+' ) {
|
|
s->safeMemcpy(">",4);
|
|
continue;
|
|
}
|
|
*s += *t;
|
|
}
|
|
*s += '\0';
|
|
return true;
|
|
}
|
|
|
|
// . convert "-->%22 , &-->%26, +-->%2b, space-->+, ?-->%3f is that it?
|
|
// . convert so we can display as a cgi PARAMETER within a url
|
|
// . used by HttPage2 (cached web page) to encode the query into a url
|
|
// . used by PageRoot to do likewise
|
|
// . returns bytes written into "d" not including terminating \0
|
|
int32_t urlEncode ( char *d , int32_t dlen , char *s , int32_t slen, bool requestPath ) {
|
|
char *dstart = d;
|
|
// subtract 1 to make room for a terminating \0
|
|
char *dend = d + dlen - 1;
|
|
char *send = s + slen;
|
|
for ( ; s < send && d < dend ; s++ ) {
|
|
if ( *s == '\0' && requestPath ) {
|
|
*d++ = *s;
|
|
continue;
|
|
}
|
|
// encode if not fit for display
|
|
if ( ! is_ascii ( *s ) ) goto encode;
|
|
switch ( *s ) {
|
|
case ' ': goto encode;
|
|
case '&': goto encode;
|
|
case '"': goto encode;
|
|
case '+': goto encode;
|
|
case '%': goto encode;
|
|
case '#': goto encode;
|
|
// encoding < and > are more for displaying on an
|
|
// html page than sending to an http server
|
|
case '>': goto encode;
|
|
case '<': goto encode;
|
|
case '?': if ( requestPath ) break;
|
|
goto encode;
|
|
}
|
|
// otherwise, no need to encode
|
|
*d++ = *s;
|
|
continue;
|
|
encode:
|
|
// space to +
|
|
if ( *s == ' ' && d + 1 < dend ) { *d++ = '+'; continue; }
|
|
// break out if no room to encode
|
|
if ( d + 2 >= dend ) break;
|
|
*d++ = '%';
|
|
// store first hex digit
|
|
unsigned char v = ((unsigned char)*s)/16 ;
|
|
if ( v < 10 ) v += '0';
|
|
else v += 'A' - 10;
|
|
*d++ = v;
|
|
// store second hex digit
|
|
v = ((unsigned char)*s) & 0x0f ;
|
|
if ( v < 10 ) v += '0';
|
|
else v += 'A' - 10;
|
|
*d++ = v;
|
|
}
|
|
// NULL terminate it
|
|
*d = '\0';
|
|
// and return the length
|
|
return d - dstart;
|
|
}
|
|
|
|
// determine the length of the encoded url, does NOT include NULL
|
|
int32_t urlEncodeLen ( char *s , int32_t slen , bool requestPath ) {
|
|
int32_t dLen = 0;
|
|
char *send = s + slen;
|
|
for ( ; s < send ; s++ ) {
|
|
if ( *s == '\0' && requestPath ) {
|
|
dLen++;
|
|
continue;
|
|
}
|
|
// encode if not fit for display
|
|
if ( ! is_ascii ( *s ) ) goto encode;
|
|
switch ( *s ) {
|
|
case ' ': goto encode;
|
|
case '&': goto encode;
|
|
case '"': goto encode;
|
|
case '+': goto encode;
|
|
case '%': goto encode;
|
|
case '#': goto encode;
|
|
// encoding < and > are more for displaying on an
|
|
// html page than sending to an http server
|
|
case '>': goto encode;
|
|
case '<': goto encode;
|
|
case '?': if ( requestPath ) break;
|
|
goto encode;
|
|
}
|
|
// otherwise, no need to encode
|
|
dLen++;
|
|
continue;
|
|
encode:
|
|
// space to +
|
|
if ( *s == ' ' ) { dLen++; continue; }
|
|
// hex code
|
|
dLen += 3; // %XX
|
|
}
|
|
//dLen++; // NULL TERM
|
|
// and return the length
|
|
return dLen;
|
|
}
|
|
|
|
// . decodes "s/slen" and stores into "dest"
|
|
// . returns the number of bytes stored into "dest"
|
|
int32_t urlDecode ( char *dest , char *s , int32_t slen ) {
|
|
int32_t j = 0;
|
|
for ( int32_t i = 0 ; i < slen ; i++ ) {
|
|
if ( s[i] == '+' ) { dest[j++]=' '; continue; }
|
|
dest[j++] = s[i];
|
|
if ( s[i] != '%' ) continue;
|
|
if ( i + 2 >= slen ) continue;
|
|
// if two chars after are not hex chars, it's not an encoding
|
|
if ( ! is_hex ( s[i+1] ) ) continue;
|
|
if ( ! is_hex ( s[i+2] ) ) continue;
|
|
// convert hex chars to values
|
|
unsigned char a = htob ( s[i+1] ) * 16;
|
|
unsigned char b = htob ( s[i+2] ) ;
|
|
dest[j-1] = (char) (a + b);
|
|
i += 2;
|
|
}
|
|
return j;
|
|
}
|
|
|
|
|
|
int32_t urlDecodeNoZeroes ( char *dest , char *s , int32_t slen ) {
|
|
int32_t j = 0;
|
|
for ( int32_t i = 0 ; i < slen ; i++ ) {
|
|
if ( s[i] == '+' ) { dest[j++]=' '; continue; }
|
|
dest[j++] = s[i];
|
|
if ( s[i] != '%' ) continue;
|
|
if ( i + 2 >= slen ) continue;
|
|
// if two chars after are not hex chars, it's not an encoding
|
|
if ( ! is_hex ( s[i+1] ) ) continue;
|
|
if ( ! is_hex ( s[i+2] ) ) continue;
|
|
// convert hex chars to values
|
|
unsigned char a = htob ( s[i+1] ) * 16;
|
|
unsigned char b = htob ( s[i+2] ) ;
|
|
// NO ZEROES! fixes &content= having decoded \0's in it
|
|
// and setting our parms
|
|
if ( a + b == 0 ) {
|
|
log("fctypes: urlDecodeNoZeros encountered url "
|
|
"encoded zero. truncating http request.");
|
|
return j;
|
|
}
|
|
dest[j-1] = (char) (a + b);
|
|
i += 2;
|
|
}
|
|
return j;
|
|
}
|
|
|
|
// . like above, but only decodes chars that should not have been encoded
|
|
// . will also encode binary chars
|
|
int32_t urlNormCode ( char *d , int32_t dlen , char *s , int32_t slen ) {
|
|
// save start of destination buffer for returning the length
|
|
char *dstart = d;
|
|
// subtract 1 for NULL termination
|
|
char *dend = d + dlen - 1;
|
|
char *send = s + slen;
|
|
for ( ; s < send && d < dend ; s++ ) {
|
|
// if its non-ascii, encode it so it displays correctly
|
|
if ( ! is_ascii ( *s ) ) {
|
|
// break if no room to encode it
|
|
if ( d + 2 >= dend ) break;
|
|
// store it encoded
|
|
*d++ = '%';
|
|
// store first hex digit
|
|
unsigned char v = ((unsigned char)*s)/16 ;
|
|
if ( v < 10 ) v += '0';
|
|
else v += 'A' - 10;
|
|
*d++ = v;
|
|
// store second hex digit
|
|
v = ((unsigned char)*s) & 0x0f ;
|
|
if ( v < 10 ) v += '0';
|
|
else v += 'A' - 10;
|
|
*d++ = v;
|
|
continue;
|
|
}
|
|
// store it
|
|
*d++ = *s;
|
|
// but it might be something encoded that should not have been
|
|
if ( *s != '%' ) continue;
|
|
// it requires to following chars to decode
|
|
if ( s + 2 >= send ) continue;
|
|
// if two chars after are not hex chars, it's not an encoding
|
|
if ( ! is_hex ( s[1] ) ) continue;
|
|
if ( ! is_hex ( s[2] ) ) continue;
|
|
// convert hex chars to values
|
|
unsigned char a = htob ( s[1] ) * 16;
|
|
unsigned char b = htob ( s[2] ) ;
|
|
unsigned char v = a + b;
|
|
// don't decode if it decodes in these chars
|
|
switch ( v ) {
|
|
case ' ': continue;
|
|
case '&': continue;
|
|
case '"': continue;
|
|
case '+': continue;
|
|
case '%': continue;
|
|
case '>': continue;
|
|
case '<': continue;
|
|
case '?': continue;
|
|
case '=': continue;
|
|
}
|
|
// otherwise, it's fine to decode it
|
|
d[-1] = (char) (a + b);
|
|
// skip over those 2 chars as well as leading '%'
|
|
s += 2;
|
|
}
|
|
// NULL terminate
|
|
*d = '\0';
|
|
// return length
|
|
return d - dstart ;
|
|
}
|
|
|
|
// approximate # of non-punct words
|
|
int32_t getNumWords ( char *s ) {
|
|
int32_t count = 0;
|
|
loop:
|
|
// skip punct
|
|
while ( ! is_alnum_a(*s) ) s++;
|
|
// bail if done
|
|
if ( !*s ) return count;
|
|
// count a word
|
|
count++;
|
|
// skip word
|
|
while ( is_alnum_a(*s) ) s++;
|
|
// watch for ' letter punct
|
|
if ( *s=='\'' && is_alnum_a(*(s+1)) && !is_alnum_a(*(s+2)) ) {
|
|
// skip apostrophe
|
|
s++;
|
|
// skip rest of word
|
|
while ( is_alnum_a(*s) ) s++;
|
|
}
|
|
goto loop;
|
|
}
|
|
|
|
static int64_t s_adjustment = 0;
|
|
|
|
int64_t globalToLocalTimeMilliseconds ( int64_t global ) {
|
|
// sanity check
|
|
//if ( ! g_clockInSync )
|
|
// log("gb: Converting global time but clock not in sync.");
|
|
return global - s_adjustment;
|
|
}
|
|
|
|
int64_t localToGlobalTimeMilliseconds ( int64_t local ) {
|
|
// sanity check
|
|
//if ( ! g_clockInSync )
|
|
// log("gb: Converting global time but clock not in sync.");
|
|
return local + s_adjustment;
|
|
}
|
|
|
|
int32_t globalToLocalTimeSeconds ( int32_t global ) {
|
|
// sanity check
|
|
//if ( ! g_clockInSync )
|
|
// log("gb: Converting global time but clock not in sync.");
|
|
return global - (s_adjustment/1000);
|
|
}
|
|
|
|
int32_t localToGlobalTimeSeconds ( int32_t local ) {
|
|
// sanity check
|
|
//if ( ! g_clockInSync )
|
|
// log("gb: Converting global time but clock not in sync.");
|
|
return local + (s_adjustment/1000);
|
|
}
|
|
|
|
#include "Timedb.h"
|
|
|
|
|
|
static char s_tafile[1024];
|
|
static bool s_hasFileName = false;
|
|
|
|
// returns false and sets g_errno on error
|
|
bool setTimeAdjustmentFilename ( char *dir, char *filename ) {
|
|
s_hasFileName = true;
|
|
int32_t len1 = gbstrlen(dir);
|
|
int32_t len2 = gbstrlen(filename);
|
|
if ( len1 + len2 > 1000 ) { char *xx=NULL;*xx=0; }
|
|
sprintf(s_tafile,"%s/%s",dir,filename);
|
|
return true;
|
|
}
|
|
|
|
// returns false and sets g_errno on error
|
|
bool loadTimeAdjustment ( ) {
|
|
// bail if no filename to read
|
|
if ( ! s_hasFileName ) return true;
|
|
// read it in
|
|
// one line in text
|
|
int fd = open ( s_tafile , O_RDONLY );
|
|
if ( fd < 0 ) {
|
|
log("util: could not open %s for reading",s_tafile);
|
|
g_errno = errno;
|
|
return false;
|
|
}
|
|
char rbuf[1024];
|
|
// read in max bytes
|
|
int nr = read ( fd , rbuf , 1000 );
|
|
if ( nr <= 10 || nr > 1000 ) {
|
|
log("util: reading %s had error: %s",s_tafile,
|
|
mstrerror(errno));
|
|
close(fd);
|
|
g_errno = errno;
|
|
return false;
|
|
}
|
|
close(fd);
|
|
// parse the text line
|
|
int64_t stampTime = 0LL;
|
|
int64_t clockAdj = 0LL;
|
|
sscanf ( rbuf , "%" UINT64 " %" INT64 "", &stampTime, &clockAdj );
|
|
// get stamp age
|
|
int64_t local = gettimeofdayInMillisecondsLocal();
|
|
int64_t stampAge = local - stampTime;
|
|
// if too old forget about it
|
|
if ( stampAge > 2*86400 ) return true;
|
|
// update adjustment
|
|
s_adjustment = clockAdj;
|
|
// if stamp in file is within 2 days old, assume its still good
|
|
// this will prevent having to rebuild a sortbydatetable
|
|
// and really slow down loadups
|
|
g_clockInSync = true;
|
|
// note it
|
|
log("util: loaded %s and put clock in sync. age=%" UINT64 " adj=%" INT64 "",
|
|
s_tafile,stampAge,clockAdj);
|
|
return true;
|
|
}
|
|
|
|
// . returns false and sets g_errno on error
|
|
// . saved by Process::saveBlockingFiles1()
|
|
bool saveTimeAdjustment ( ) {
|
|
// fortget it if setTimeAdjustmentFilename never called
|
|
if ( ! s_hasFileName ) return true;
|
|
// must be in sync!
|
|
if ( ! g_clockInSync ) return true;
|
|
// store it
|
|
int64_t local = gettimeofdayInMillisecondsLocal();
|
|
char wbuf[1024];
|
|
sprintf (wbuf,"%" UINT64 " %" INT64 "\n",local,s_adjustment);
|
|
// write it out
|
|
int fd = open ( s_tafile , O_CREAT|O_RDWR|O_TRUNC , 00666 );
|
|
if ( fd < 0 ) {
|
|
log("util: could not open %s for writing",s_tafile);
|
|
g_errno = errno;
|
|
return false;
|
|
}
|
|
// how many bytes to write?
|
|
int32_t len = gbstrlen(wbuf);
|
|
// read in max bytes
|
|
int nw = write ( fd , wbuf , len );
|
|
if ( nw != len ) {
|
|
log("util: writing %s had error: %s",s_tafile,
|
|
mstrerror(errno));
|
|
close(fd);
|
|
g_errno = errno;
|
|
return false;
|
|
}
|
|
close(fd);
|
|
// note it
|
|
log("util: saved %s",s_tafile);
|
|
// it was written ok
|
|
return true;
|
|
}
|
|
|
|
// a "fake" settimeofdayInMilliseconds()
|
|
void settimeofdayInMillisecondsGlobal ( int64_t newTime ) {
|
|
// can't do this in sig handler
|
|
if ( g_inSigHandler ) return;
|
|
// this isn't async signal safe...
|
|
struct timeval tv;
|
|
gettimeofday ( &tv , NULL );
|
|
int64_t now=(int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;
|
|
// bail if no change... UNLESS we need to sync clock!!
|
|
if ( s_adjustment == newTime - now && g_clockInSync ) return;
|
|
// log it, that way we know if there is another issue
|
|
// with flip-flopping (before we synced with host #0 and also
|
|
// with proxy #0)
|
|
int64_t delta = s_adjustment - (newTime - now) ;
|
|
if ( delta > 100 || delta < -100 )
|
|
logf(LOG_INFO,"gb: Updating clock adjustment from "
|
|
"%" INT64 " ms to %" INT64 " ms", s_adjustment , newTime - now );
|
|
// set adjustment
|
|
s_adjustment = newTime - now;
|
|
// return?
|
|
if ( g_clockInSync ) return;
|
|
// we are now in sync
|
|
g_clockInSync = true;
|
|
// log it
|
|
if ( s_hasFileName )
|
|
logf(LOG_INFO,"gb: clock is now synced with host #0. "
|
|
"saving to %s",s_tafile);
|
|
else
|
|
logf(LOG_INFO,"gb: clock is now synced with host #0.");
|
|
// save
|
|
saveTimeAdjustment();
|
|
// force timedb to load now!
|
|
//initAllSortByDateTables ( );
|
|
}
|
|
|
|
time_t getTimeGlobal() {
|
|
return gettimeofdayInMillisecondsSynced() / 1000;
|
|
}
|
|
|
|
time_t getTimeGlobalNoCore() {
|
|
return gettimeofdayInMillisecondsGlobalNoCore() / 1000;
|
|
}
|
|
|
|
time_t getTimeSynced() {
|
|
return gettimeofdayInMillisecondsSynced() / 1000;
|
|
}
|
|
|
|
int64_t gettimeofdayInMillisecondsGlobal() {
|
|
return gettimeofdayInMillisecondsSynced();
|
|
}
|
|
|
|
#include "Threads.h"
|
|
|
|
int64_t gettimeofdayInMillisecondsSynced() {
|
|
// if in a sig handler then return g_now
|
|
//if ( g_inSigHandler ) return g_nowGlobal;
|
|
// i find that a pthread can call this function even though
|
|
// a signal handler is underway in the main thread!
|
|
if ( g_inSigHandler && ! g_threads.amThread() ) {
|
|
char *xx = NULL; *xx = 0; }
|
|
// sanity check
|
|
if ( ! isClockInSync() ) {
|
|
static int s_printed = 0;
|
|
if ( (s_printed % 100) == 0 ) {
|
|
s_printed++;
|
|
log("xml: clock not in sync with host #0 yet!!!!!!");
|
|
}
|
|
//char *xx = NULL; *xx = 0; }
|
|
}
|
|
|
|
int64_t now;
|
|
|
|
// the real tiem sigalrm interrupt in Loop.cpp sets this to
|
|
// true once per millisecond
|
|
if ( ! g_clockNeedsUpdate ) {
|
|
now = g_now;
|
|
}
|
|
else {
|
|
//if ( ! g_clockInSync )
|
|
// log("gb: Getting global time but clock not in sync.");
|
|
// this isn't async signal safe...
|
|
struct timeval tv;
|
|
gettimeofday ( &tv , NULL );
|
|
now = (int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;
|
|
}
|
|
|
|
// update g_nowLocal
|
|
if ( now > g_now ) g_now = now;
|
|
|
|
g_clockNeedsUpdate = false;
|
|
|
|
// adjust from Msg0x11 time adjustments
|
|
now += s_adjustment;
|
|
// update g_now if it is more accurate
|
|
//if ( now > g_nowGlobal ) g_nowGlobal = now;
|
|
return now;
|
|
}
|
|
|
|
int64_t gettimeofdayInMillisecondsGlobalNoCore() {
|
|
// if in a sig handler then return g_now
|
|
//if ( g_inSigHandler ) return g_nowGlobal;
|
|
// i find that a pthread can call this function even though
|
|
// a signal handler is underway in the main thread!
|
|
if ( g_inSigHandler && ! g_threads.amThread() ) {
|
|
char *xx = NULL; *xx = 0; }
|
|
// sanity check
|
|
//if ( ! g_clockInSync ) { char *xx = NULL; *xx = 0; }
|
|
//if ( ! g_clockInSync )
|
|
// log("gb: Getting global time but clock not in sync.");
|
|
// this isn't async signal safe...
|
|
struct timeval tv;
|
|
gettimeofday ( &tv , NULL );
|
|
int64_t now=(int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;
|
|
// update g_nowLocal
|
|
if ( now > g_now ) g_now = now;
|
|
// adjust from Msg0x11 time adjustments
|
|
now += s_adjustment;
|
|
// update g_now if it is more accurate
|
|
//if ( now > g_nowGlobal ) g_nowGlobal = now;
|
|
return now;
|
|
}
|
|
|
|
int64_t gettimeofdayInMillisecondsLocal() {
|
|
return gettimeofdayInMilliseconds();
|
|
}
|
|
|
|
uint64_t gettimeofdayInMicroseconds(void) {
|
|
struct timeval tv;
|
|
gettimeofday(&tv, NULL);
|
|
return(((uint64_t)tv.tv_sec * 1000000LL) + (uint64_t)tv.tv_usec);
|
|
}
|
|
|
|
// "local" means the time on this machine itself, NOT a timezone thing.
|
|
int64_t gettimeofdayInMilliseconds() {
|
|
// if in a sig handler then return g_now
|
|
//if ( g_inSigHandler ) return g_now;
|
|
// i find that a pthread can call this function even though
|
|
// a signal handler is underway in the main thread!
|
|
if ( g_inSigHandler && ! g_threads.amThread() ) {
|
|
char *xx = NULL; *xx = 0; }
|
|
|
|
// the real tiem sigalrm interrupt in Loop.cpp sets this to
|
|
// true once per millisecond
|
|
if ( ! g_clockNeedsUpdate )
|
|
return g_now;
|
|
|
|
g_clockNeedsUpdate = false;
|
|
|
|
// this isn't async signal safe...
|
|
struct timeval tv;
|
|
//g_loop.disableTimer();
|
|
gettimeofday ( &tv , NULL );
|
|
//g_loop.enableTimer();
|
|
int64_t now=(int64_t)(tv.tv_usec/1000)+((int64_t)tv.tv_sec)*1000;
|
|
// update g_nowLocal
|
|
if ( now > g_now ) g_now = now;
|
|
// adjust from Msg0x11 time adjustments
|
|
//now += s_adjustment;
|
|
// update g_now if it is more accurate
|
|
// . or don't, bad to update it here because it could be very different
|
|
// from what it should be
|
|
//if ( now > g_now ) g_now = now;
|
|
return now;
|
|
}
|
|
|
|
|
|
int64_t gettimeofdayInMilliseconds_force ( ) {
|
|
g_clockNeedsUpdate = true;
|
|
return gettimeofdayInMilliseconds();
|
|
}
|
|
|
|
time_t getTime () {
|
|
return getTimeLocal();
|
|
}
|
|
|
|
// . get time in seconds
|
|
// . use this instead of call to time(NULL) cuz it uses adjustment
|
|
time_t getTimeLocal () {
|
|
// if in a sig handler then return g_now/1000
|
|
//if ( g_inSigHandler ) return (time_t)(g_now / 1000);
|
|
// i find that a pthread can call this function even though
|
|
// a signal handler is underway in the main thread!
|
|
if ( g_inSigHandler && ! g_threads.amThread() ) {
|
|
char *xx = NULL; *xx = 0; }
|
|
// get time now
|
|
uint32_t now = gettimeofdayInMilliseconds() / 1000;
|
|
// and adjust it
|
|
//now += s_adjustment / 1000;
|
|
return (time_t)now;
|
|
}
|
|
|
|
// . make it so we can display the ascii string on an html browser
|
|
int32_t saftenTags2 ( char *s , int32_t slen , char *t , int32_t tlen ) {
|
|
char *start = s ;
|
|
// bail if slen is 0
|
|
if ( slen <= 0 ) return 0;
|
|
// leave a char for the \0
|
|
char *send = s + slen - 1;
|
|
char *tend = t + tlen;
|
|
for ( ; t < tend && s + 6 < send ; t++ ) {
|
|
if ( *t == '<' ) {
|
|
*s++ = '&';
|
|
*s++ = 'l';
|
|
*s++ = 't';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
if ( *t == '>' ) {
|
|
*s++ = '&';
|
|
*s++ = 'g';
|
|
*s++ = 't';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
if ( *t == '&' ) {
|
|
*s++ = '&';
|
|
*s++ = 'a';
|
|
*s++ = 'm';
|
|
*s++ = 'p';
|
|
*s++ = ';';
|
|
continue;
|
|
}
|
|
*s++ = *t;
|
|
}
|
|
// return NULL if we broke out because there was not enough room
|
|
//if ( s + 6 >= send ) return NULL;
|
|
// NULL terminate "s"
|
|
*s = '\0';
|
|
// return # of bytes, excluding \0, stored into s
|
|
return s - start;
|
|
}
|
|
|
|
void getCalendarFromMs(int64_t ms,
|
|
int32_t* days,
|
|
int32_t* hours,
|
|
int32_t* minutes,
|
|
int32_t* secs,
|
|
int32_t* msecs) {
|
|
int32_t s = 1000;
|
|
int32_t m = s * 60;
|
|
int32_t h = m * 60;
|
|
int32_t d = h * 24;
|
|
|
|
*days = ms / d;
|
|
int64_t tmp = ms % d;
|
|
*hours = tmp / h;
|
|
tmp = tmp % h;
|
|
*minutes = tmp / m;
|
|
tmp = tmp % m;
|
|
*secs = tmp / s;
|
|
|
|
*msecs = tmp % s;
|
|
}
|
|
|
|
uint32_t calculateChecksum(char *buf, int32_t bufLen){
|
|
uint32_t sum = 0;
|
|
for(int32_t i = 0; i < bufLen>>2;i++)
|
|
sum += ((uint32_t*)buf)[i];
|
|
return sum;
|
|
}
|
|
|
|
bool anchorIsLink( char *tag, int32_t tagLen){
|
|
if (strncasestr(tag, tagLen, "href")) return true;
|
|
if (strncasestr(tag, tagLen, "onclick")) return true;
|
|
return false;
|
|
}
|
|
|
|
bool has_alpha_a ( char *s , char *send ) {
|
|
for ( ; s < send ; s++ )
|
|
if (is_alpha_a(*s)) return true;
|
|
return false;
|
|
}
|
|
|
|
bool has_alpha_utf8 ( char *s , char *send ) {
|
|
char cs = 0;
|
|
for ( ; s < send ; s += cs ) {
|
|
cs = getUtf8CharSize ( s );
|
|
if ( cs == 1 ) {
|
|
if (is_alpha_a(*s)) return true;
|
|
continue;
|
|
}
|
|
if ( is_alpha_utf8(s) ) return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
//takes an input skips leading spaces
|
|
//puts next nonspace char* in numPtr
|
|
//an returns the next space after that.
|
|
char* getNextNum(char* input, char** numPtr) {
|
|
char* p = input;
|
|
char* nextspace;
|
|
while(*p && isspace(*p)) p++;
|
|
nextspace = p;
|
|
*numPtr = p;
|
|
while(*nextspace && !isspace(*nextspace))
|
|
nextspace++;
|
|
return nextspace;
|
|
}
|
|
|
|
#include "HttpMime.h" // CT_HTML
|
|
|
|
// returns length of stripped content, but will set g_errno and return -1
|
|
// on error
|
|
int32_t stripHtml( char *content, int32_t contentLen, int32_t version, int32_t strip ) {
|
|
if ( !strip ) {
|
|
log( LOG_WARN, "query: html stripping not required!" );
|
|
return contentLen;
|
|
}
|
|
if ( ! content )
|
|
return 0;
|
|
if ( contentLen == 0 )
|
|
return 0;
|
|
|
|
// filter content if we should
|
|
// keep this on the big stack so "content" still references something
|
|
Xml tmpXml;
|
|
// . get the content as xhtml (should be NULL terminated)
|
|
// . parse as utf8 since all we are doing is messing with
|
|
// the tags...content manipulation comes later
|
|
if ( ! tmpXml.set ( content , contentLen,
|
|
false, 0, false, version , true , 0 , CT_HTML ) )
|
|
return -1;
|
|
|
|
//if( strip == 4 )
|
|
// return tmpXml.getText( content, contentLen );
|
|
|
|
// go tag by tag
|
|
int32_t n = tmpXml.getNumNodes();
|
|
XmlNode *nodes = tmpXml.getNodes();
|
|
// Xml class may have converted to utf16
|
|
content = tmpXml.getContent();
|
|
contentLen = tmpXml.getContentLen();
|
|
char *x = content;
|
|
char *xend = content + contentLen;
|
|
int32_t stackid = -1;
|
|
int32_t stackc = 0;
|
|
char skipIt = 0;
|
|
// . hack COL tag to NOT require a back tag
|
|
// . do not leave it that way as it could mess up our parsing
|
|
//g_nodes[25].m_hasBackTag = 0;
|
|
for ( int32_t i = 0 ; i < n ; i++ ) {
|
|
// get id of this node
|
|
int32_t id = nodes[i].m_nodeId;
|
|
|
|
// if strip is 4, just remove the script tag
|
|
if( strip == 4 ){
|
|
if ( id ){
|
|
if ( id == 83 ){
|
|
skipIt ^= 1;
|
|
continue;
|
|
}
|
|
}
|
|
else if ( skipIt ) continue;
|
|
goto keepit;
|
|
}
|
|
|
|
// if strip is 3, ALL tags will be removed!
|
|
if( strip == 3 ) {
|
|
if( id ) {
|
|
// . we dont want anything in between:
|
|
// - script tags (83)
|
|
// - style tags (111)
|
|
if ((id == 83) || (id == 111)) skipIt ^= 1;
|
|
// save img to have alt text kept.
|
|
if ( id == 54 ) goto keepit;
|
|
continue;
|
|
}
|
|
else {
|
|
if( skipIt ) continue;
|
|
goto keepit;
|
|
}
|
|
}
|
|
// get it
|
|
int32_t fk;
|
|
if ( strip == 1 ) fk = g_nodes[id].m_filterKeep1;
|
|
else fk = g_nodes[id].m_filterKeep2;
|
|
// if tag is <link ...> only keep it if it has
|
|
// rel="stylesheet" or rel=stylesheet
|
|
if ( strip == 2 && id == 62 ) { // <link> tag id
|
|
int32_t fflen;
|
|
char *ff = nodes[i].getFieldValue ( "rel" , &fflen );
|
|
if ( ff && fflen == 10 &&
|
|
strncmp(ff,"stylesheet",10) == 0 )
|
|
goto keepit;
|
|
}
|
|
// just remove just the tag if this is 2
|
|
if ( fk == 2 ) continue;
|
|
// keep it if not in a stack
|
|
if ( ! stackc && fk ) goto keepit;
|
|
// if no front/back for tag, just skip it
|
|
if ( ! nodes[i].m_hasBackTag ) continue;
|
|
// start stack if none
|
|
if ( stackc == 0 ) {
|
|
// but not if this is a back tag
|
|
if ( nodes[i].m_node[1] == '/' ) continue;
|
|
// now start the stack
|
|
stackid = id;
|
|
stackc = 1;
|
|
continue;
|
|
}
|
|
// skip if this tag does not match what is on stack
|
|
if ( id != stackid ) continue;
|
|
// if ANOTHER front tag, inc stack
|
|
if ( nodes[i].m_node[1] != '/' ) stackc++;
|
|
// otherwise, dec the stack count
|
|
else stackc--;
|
|
// . ensure not negative from excess back tags
|
|
// . reset stackid to -1 to indicate no stack
|
|
if ( stackc <= 0 ) { stackid= -1; stackc = 0; }
|
|
// skip it
|
|
continue;
|
|
keepit:
|
|
// replace images with their alt text
|
|
int32_t vlen;
|
|
char *v;
|
|
if ( id == 54 ) {
|
|
v = nodes[i].getFieldValue("alt", &vlen );
|
|
// try title if no alt text
|
|
if ( ! v )
|
|
v = nodes[i].getFieldValue("title", &vlen );
|
|
if ( v ) { gbmemcpy ( x, v, vlen ); x += vlen; }
|
|
continue;
|
|
}
|
|
// remove background image from body,table,td tags
|
|
if ( id == 19 || id == 93 || id == 95 ) {
|
|
v = nodes[i].getFieldValue("background", &vlen);
|
|
// remove background, just sabotage it
|
|
if ( v ) v[-4] = 'x';
|
|
}
|
|
// store it
|
|
gbmemcpy ( x , nodes[i].m_node , nodes[i].m_nodeLen );
|
|
x += nodes[i].m_nodeLen;
|
|
// sanity check
|
|
if ( x > xend ) { char *xx=NULL;*xx=0;}
|
|
}
|
|
contentLen = x - content;
|
|
content [ contentLen ] = '\0';
|
|
// unhack COL tag
|
|
//g_nodes[25].m_hasBackTag = 1;
|
|
return contentLen;
|
|
}
|
|
|
|
|
|
bool is_urlchar(char s) {
|
|
// [a-z0-9/:_-.?$,~=#&%+@]
|
|
if(isalnum(s)) return true;
|
|
if(s == '/' ||
|
|
s == ':' ||
|
|
s == '_' ||
|
|
s == '-' ||
|
|
s == '.' ||
|
|
s == '?' ||
|
|
s == '$' ||
|
|
s == ',' ||
|
|
s == '~' ||
|
|
s == '=' ||
|
|
s == '#' ||
|
|
s == '&' ||
|
|
s == '%' ||
|
|
s == '+' ||
|
|
s == '@') return true;
|
|
return false;
|
|
}
|
|
// don't allow "> in our input boxes
|
|
int32_t cleanInput(char *outbuf, int32_t outbufSize, char *inbuf, int32_t inbufLen){
|
|
char *p = outbuf;
|
|
int32_t numQuotes=0;
|
|
int32_t lastQuote = 0;
|
|
for (int32_t i=0;i<inbufLen;i++){
|
|
if (p-outbuf >= outbufSize-1) break;
|
|
|
|
if (inbuf[i] == '"'){
|
|
numQuotes++;
|
|
lastQuote = i;
|
|
}
|
|
// if we have an odd number of quotes and a close angle bracket
|
|
// it could be an xss attempt
|
|
if (inbuf[i] == '>' && (numQuotes & 1)) {
|
|
p = outbuf+lastQuote;
|
|
break;
|
|
}
|
|
*p = inbuf[i];
|
|
p++;
|
|
}
|
|
*p = '\0';
|
|
return p-outbuf;
|
|
}
|
|
|
|
|
|
//
|
|
// get rid of the virtual Msg class because it screws up how we
|
|
// serialize/deserialize every time we compile gb it seems
|
|
//
|
|
|
|
int32_t getMsgStoredSize ( int32_t baseSize,
|
|
int32_t *firstSizeParm,
|
|
int32_t *lastSizeParm ) {
|
|
//int32_t size = (int32_t)sizeof(Msg);
|
|
int32_t size = baseSize;//getBaseSize();
|
|
// add up string buffer sizes
|
|
int32_t *sizePtr = firstSizeParm;//getFirstSizeParm(); // &size_qbuf;
|
|
int32_t *sizeEnd = lastSizeParm;//getLastSizeParm (); // &size_displayMeta
|
|
for ( ; sizePtr <= sizeEnd ; sizePtr++ )
|
|
size += *sizePtr;
|
|
return size;
|
|
}
|
|
|
|
// . return ptr to the buffer we serialize into
|
|
// . return NULL and set g_errno on error
|
|
char *serializeMsg ( int32_t baseSize ,
|
|
int32_t *firstSizeParm ,
|
|
int32_t *lastSizeParm ,
|
|
char **firstStrPtr ,
|
|
void *thisPtr ,
|
|
int32_t *retSize ,
|
|
char *userBuf ,
|
|
int32_t userBufSize ,
|
|
bool makePtrsRefNewBuf ) {
|
|
// make a buffer to serialize into
|
|
char *buf = NULL;
|
|
//int32_t need = getStoredSize();
|
|
int32_t need = getMsgStoredSize(baseSize,firstSizeParm,lastSizeParm);
|
|
// big enough?
|
|
if ( need <= userBufSize ) buf = userBuf;
|
|
// alloc if we should
|
|
if ( ! buf ) buf = (char *)mmalloc ( need , "Ra" );
|
|
// bail on error, g_errno should be set
|
|
if ( ! buf ) return NULL;
|
|
// set how many bytes we will serialize into
|
|
*retSize = need;
|
|
// copy the easy stuff
|
|
char *p = buf;
|
|
gbmemcpy ( p , (char *)thisPtr , baseSize );//getBaseSize() );
|
|
p += baseSize; // getBaseSize();
|
|
// then store the strings!
|
|
int32_t *sizePtr = firstSizeParm;//getFirstSizeParm(); // &size_qbuf;
|
|
int32_t *sizeEnd = lastSizeParm;//getLastSizeParm (); // &size_displayMet
|
|
char **strPtr = firstStrPtr;//getFirstStrPtr (); // &ptr_qbuf;
|
|
for ( ; sizePtr <= sizeEnd ; ) {
|
|
// if we are NULL, we are a "bookmark", so
|
|
// we alloc'd space for it, but don't copy into
|
|
// the space until after this call toe serialize()
|
|
if ( ! *strPtr ) goto skip;
|
|
// sanity check -- cannot copy onto ourselves
|
|
if ( p > *strPtr && p < *strPtr + *sizePtr ) {
|
|
char *xx = NULL; *xx = 0; }
|
|
// copy the string into the buffer
|
|
gbmemcpy ( p , *strPtr , *sizePtr );
|
|
skip:
|
|
// . make it point into the buffer now
|
|
// . MDW: why? that is causing problems for the re-call in
|
|
// Msg3a, it calls this twice with the same "m_r"
|
|
if ( makePtrsRefNewBuf ) *strPtr = p;
|
|
// advance our destination ptr
|
|
p += *sizePtr;
|
|
// advance both ptrs to next string
|
|
sizePtr++;
|
|
strPtr++;
|
|
}
|
|
return buf;
|
|
}
|
|
|
|
char *serializeMsg2 ( void *thisPtr ,
|
|
int32_t objSize ,
|
|
char **firstStrPtr ,
|
|
int32_t *firstSizeParm ,
|
|
int32_t *retSize ) {
|
|
|
|
// make a buffer to serialize into
|
|
char *buf = NULL;
|
|
int32_t baseSize = (char *)firstStrPtr - (char *)thisPtr;
|
|
int nptrs=((char *)firstSizeParm-(char *)firstStrPtr)/sizeof(char *);
|
|
int32_t need = baseSize;
|
|
need += nptrs * sizeof(char *);
|
|
need += nptrs * sizeof(int32_t);
|
|
// tally up the string sizes
|
|
int32_t *srcSizePtr = (int32_t *)firstSizeParm;
|
|
char **srcStrPtr = (char **)firstStrPtr;
|
|
int32_t totalStringSizes = 0;
|
|
for ( int i = 0 ; i < nptrs ; i++ ) {
|
|
if ( srcStrPtr[i] == NULL ) continue;
|
|
totalStringSizes += srcSizePtr[i];
|
|
|
|
}
|
|
int32_t stringBufferOffset = need;
|
|
need += totalStringSizes;
|
|
// alloc if we should
|
|
if ( ! buf ) buf = (char *)mmalloc ( need , "sm2" );
|
|
// bail on error, g_errno should be set
|
|
if ( ! buf ) return NULL;
|
|
// set how many bytes we will serialize into
|
|
*retSize = need;
|
|
// copy everything over except strings themselves
|
|
char *p = buf;
|
|
gbmemcpy ( p , (char *)thisPtr , stringBufferOffset );//need );
|
|
// point to the string buffer
|
|
p += stringBufferOffset;
|
|
// then store the strings!
|
|
char **dstStrPtr = (char **)(buf + baseSize );
|
|
int32_t *dstSizePtr = (int32_t *)(buf + baseSize+sizeof(char *)*nptrs);
|
|
for ( int count = 0 ; count < nptrs ; count++ ) {
|
|
// copy ptrs
|
|
//*dstStrPtr = *srcStrPtr;
|
|
//*dstSizePtr = *srcSizePtr;
|
|
// if we are NULL, we are a "bookmark", so
|
|
// we alloc'd space for it, but don't copy into
|
|
// the space until after this call toe serialize()
|
|
if ( ! *srcStrPtr )
|
|
goto skip;
|
|
// if this is valid then size can't be 0! fix upstream.
|
|
if ( ! *srcSizePtr ) { char *xx=NULL;*xx=0; }
|
|
// if size is 0 use gbstrlen. helps with InjectionRequest
|
|
// where we set ptr_url or ptr_content but not size_url, etc.
|
|
//if ( ! *srcSizePtr )
|
|
// *srcSizePtr = gbstrlen(*strPtr);
|
|
// sanity check -- cannot copy onto ourselves
|
|
if ( p > *srcStrPtr && p < *srcStrPtr + *srcSizePtr ) {
|
|
char *xx = NULL; *xx = 0; }
|
|
// copy the string into the buffer
|
|
gbmemcpy ( p , *srcStrPtr , *srcSizePtr );
|
|
skip:
|
|
// point it now into the string buffer
|
|
*dstStrPtr = p;
|
|
// if it is 0 length, make ptr NULL in destination
|
|
if ( *srcSizePtr == 0 || *srcStrPtr == NULL ) {
|
|
*dstStrPtr = NULL;
|
|
*dstSizePtr = 0;
|
|
}
|
|
// advance our destination ptr
|
|
p += *dstSizePtr;
|
|
// advance both ptrs to next string
|
|
srcSizePtr++;
|
|
srcStrPtr++;
|
|
dstSizePtr++;
|
|
dstStrPtr++;
|
|
}
|
|
return buf;
|
|
}
|
|
|
|
|
|
// convert offsets back into ptrs
|
|
int32_t deserializeMsg ( int32_t baseSize ,
|
|
int32_t *firstSizeParm ,
|
|
int32_t *lastSizeParm ,
|
|
char **firstStrPtr ,
|
|
char *stringBuf ) {
|
|
// point to our string buffer
|
|
char *p = stringBuf;//getStringBuf(); // m_buf;
|
|
// then store the strings!
|
|
int32_t *sizePtr = firstSizeParm;//getFirstSizeParm(); // &size_qbuf;
|
|
int32_t *sizeEnd = lastSizeParm;//getLastSizeParm (); // &size_displayMet
|
|
char **strPtr = firstStrPtr;//getFirstStrPtr (); // &ptr_qbuf;
|
|
for ( ; sizePtr <= sizeEnd ; ) {
|
|
// convert the offset to a ptr
|
|
*strPtr = p;
|
|
// make it NULL if size is 0 though
|
|
if ( *sizePtr == 0 ) *strPtr = NULL;
|
|
// sanity check
|
|
if ( *sizePtr < 0 ) { g_errno = ECORRUPTDATA; return -1;}
|
|
// advance our destination ptr
|
|
p += *sizePtr;
|
|
// advance both ptrs to next string
|
|
sizePtr++;
|
|
strPtr++;
|
|
}
|
|
// return how many bytes we processed
|
|
return baseSize + (p - stringBuf);//getStringBuf());
|
|
}
|
|
|
|
bool deserializeMsg2 ( char **firstStrPtr , // ptr_url
|
|
int32_t *firstSizeParm ) { // size_url
|
|
int nptrs=((char *)firstSizeParm-(char *)firstStrPtr)/sizeof(char *);
|
|
// point to our string buffer
|
|
char *p = ((char *)firstSizeParm + sizeof(int32_t)*nptrs);
|
|
// then store the strings!
|
|
int32_t *sizePtr = firstSizeParm;//getFirstSizeParm(); // &size_qbuf;
|
|
//int32_t *sizeEnd = lastSizeParm;//getLastSizeParm (); // &size_displ
|
|
char **strPtr = firstStrPtr;//getFirstStrPtr (); // &ptr_qbuf;
|
|
int count = 0;
|
|
for ( ; count < nptrs ; count++ ) { // sizePtr <= sizeEnd ; ) {
|
|
// convert the offset to a ptr
|
|
*strPtr = p;
|
|
// make it NULL if size is 0 though
|
|
if ( *sizePtr == 0 ) *strPtr = NULL;
|
|
// sanity check
|
|
if ( *sizePtr < 0 ) return false;//{ char *xx = NULL; *xx =0; }
|
|
// advance our destination ptr
|
|
p += *sizePtr;
|
|
// advance both ptrs to next string
|
|
sizePtr++;
|
|
strPtr++;
|
|
}
|
|
// return how many bytes we processed
|
|
//return baseSize + (p - stringBuf);//getStringBuf());
|
|
return true;
|
|
}
|
|
|
|
// print it to stdout for debugging Dates.cpp
|
|
int32_t printTime ( time_t ttt ) {
|
|
//char *s = ctime(&ttt);
|
|
// print in UTC!
|
|
char *s = asctime ( gmtime(&ttt) );
|
|
// strip \n
|
|
s[gbstrlen(s)-1] = '\0';
|
|
fprintf(stderr,"%s UTC\n",s);
|
|
return 0;
|
|
}
|
|
|
|
// this uses our local timezone which is MST, so we need to tell
|
|
// it to use UTC somehow...
|
|
time_t mktime_utc ( struct tm *ttt ) {
|
|
time_t local = mktime ( ttt );
|
|
// bad?
|
|
if ( local < 0 ) return local;
|
|
/*
|
|
// sanity check
|
|
static char s_mm = 1;
|
|
static int32_t s_localOff;
|
|
if ( s_mm ) {
|
|
s_mm = 0;
|
|
struct tm ff;
|
|
ff.tm_mon = 0;
|
|
ff.tm_year = 70;
|
|
ff.tm_mday = 1;
|
|
ff.tm_hour = 0;
|
|
ff.tm_min = 0;
|
|
ff.tm_sec = 0;
|
|
int32_t qq = mktime ( &ff );
|
|
//fprintf(stderr,"qq=%" INT32 "\n",qq);
|
|
// . set this then
|
|
// . we subtract s_localOff to further mktime() returns to
|
|
// get it into utc
|
|
s_localOff = qq;
|
|
// sanity
|
|
if ( s_localOff != timezone ) { char *xx=NULL;*xx=0; }
|
|
}
|
|
*/
|
|
// see what our timezone is!
|
|
//fprintf(stderr,"%" INT32 "=tz\n",timezone);
|
|
// mod that
|
|
return local - timezone;
|
|
}
|
|
|
|
bool verifyUtf8 ( char *txt , int32_t tlen ) {
|
|
if ( ! txt || tlen <= 0 ) return true;
|
|
char size;
|
|
char *p = txt;
|
|
char *pend = txt + tlen;
|
|
for ( ; p < pend ; p += size ) {
|
|
size = getUtf8CharSize(p);
|
|
// skip if ascii
|
|
if ( ! (p[0] & 0x80) ) continue;
|
|
// ok, it's a utf8 char, it must have both hi bits set
|
|
if ( (p[0] & 0xc0) != 0xc0 ) return false;
|
|
// if only one byte, we are done.. how can that be?
|
|
if ( size == 1 ) return false;
|
|
//if ( ! utf8IsSane ( p[0] ) ) return false;
|
|
// successive utf8 chars must have & 0xc0 be equal to 0x80
|
|
// but the first char it must equal 0xc0, both set
|
|
if ( (p[1] & 0xc0) != 0x80 ) return false;
|
|
if ( size == 2 ) continue;
|
|
if ( (p[2] & 0xc0) != 0x80 ) return false;
|
|
if ( size == 3 ) continue;
|
|
if ( (p[3] & 0xc0) != 0x80 ) return false;
|
|
}
|
|
if ( p != pend ) return false;
|
|
return true;
|
|
}
|
|
|
|
bool verifyUtf8 ( char *txt ) {
|
|
int32_t tlen = gbstrlen(txt);
|
|
return verifyUtf8(txt,tlen);
|
|
}
|