forked from Mirrors/privacore-open-source-search-engine
fctypes: const + remove unused
Added const-quailifier to pointer parameters in fctypes.h and removed unused functions too. const propagated down to hash.h and Entities.cpp but no further.
This commit is contained in:
14
Entities.cpp
14
Entities.cpp
@ -7,15 +7,15 @@
|
||||
// JAB: const-ness for optimizer...
|
||||
// don't call these, they're used internally
|
||||
static bool initEntityTable();
|
||||
static uint32_t getTextEntity ( char *s , int32_t len );
|
||||
static uint32_t getDecimalEntity ( char *s , int32_t len );
|
||||
static uint32_t getHexadecimalEntity ( char *s , int32_t len );
|
||||
static uint32_t getTextEntity ( const char *s , int32_t len );
|
||||
static uint32_t getDecimalEntity ( const char *s , int32_t len );
|
||||
static uint32_t getHexadecimalEntity ( const char *s , int32_t len );
|
||||
|
||||
// . s[maxLen] should be the NULL
|
||||
// . returns full length of entity @ "s" if there is a valid one, 0 otherwise
|
||||
// . sets *c to the iso character the entity represents (if there is one)
|
||||
// JAB: const-ness for optimizer...
|
||||
int32_t getEntity_a ( char *s , int32_t maxLen , uint32_t *c ) {
|
||||
int32_t getEntity_a ( const char *s , int32_t maxLen , uint32_t *c ) {
|
||||
// ensure there's an & as first char
|
||||
if ( s[0] != '&' ) return 0;
|
||||
// compute maximum length of entity, if it's indeed an entity
|
||||
@ -424,7 +424,7 @@ static bool initEntityTable(){
|
||||
// . return the 32-bit unicode char it represents
|
||||
// . returns 0 if none
|
||||
// . JAB: const-ness for optimizer...
|
||||
uint32_t getTextEntity ( char *s , int32_t len ) {
|
||||
uint32_t getTextEntity ( const char *s , int32_t len ) {
|
||||
if ( !initEntityTable()) return 0;
|
||||
// take the ; off, if any
|
||||
if ( s[len-1] == ';' ) len--;
|
||||
@ -451,7 +451,7 @@ uint32_t getTextEntity ( char *s , int32_t len ) {
|
||||
// . get a decimal encoded entity
|
||||
// . s/len is the whol thing
|
||||
// . JAB: const-ness for optimizer...
|
||||
uint32_t getDecimalEntity ( char *s , int32_t len ) {
|
||||
uint32_t getDecimalEntity ( const char *s , int32_t len ) {
|
||||
// take the ; off, if any
|
||||
if ( s[len-1] == ';' ) len--;
|
||||
// .  is smallest it can be
|
||||
@ -504,7 +504,7 @@ uint32_t getDecimalEntity ( char *s , int32_t len ) {
|
||||
// . get a hexadecimal encoded entity
|
||||
// . JAB: const-ness for optimizer...
|
||||
// . returns a UChar32
|
||||
uint32_t getHexadecimalEntity ( char *s , int32_t len ) {
|
||||
uint32_t getHexadecimalEntity ( const char *s , int32_t len ) {
|
||||
// take the ; off, if any
|
||||
if ( s[len-1] == ';' ) len--;
|
||||
// .  is smallest it can be
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
// call these two
|
||||
// JAB: const-ness for the optimizer
|
||||
int32_t getEntity_a ( char *s , int32_t maxLen , uint32_t *c );
|
||||
int32_t getEntity_a ( const char *s , int32_t maxLen , uint32_t *c );
|
||||
|
||||
//int32_t getEntity_utf8 (char *s , int32_t maxLen , int32_t *d , int32_t *ds ) ;
|
||||
|
||||
|
@ -39,7 +39,7 @@ static int bytes_in_utf8_code[] = {
|
||||
};
|
||||
|
||||
// how many bytes is char pointed to by p?
|
||||
inline char getUtf8CharSize ( uint8_t *p ) {
|
||||
inline char getUtf8CharSize ( const uint8_t *p ) {
|
||||
uint8_t c = *p;
|
||||
if(c<128)
|
||||
return 1;
|
||||
@ -62,7 +62,7 @@ inline char getUtf8CharSize ( uint8_t c ) {
|
||||
return bytes_in_utf8_code[c];
|
||||
}
|
||||
|
||||
inline char getUtf8CharSize2 ( uint8_t *p ) {
|
||||
inline char getUtf8CharSize2 ( const uint8_t *p ) {
|
||||
if ( ! (p[0] & 0x80) ) return 1;
|
||||
if ( ! (p[0] & 0x20) ) return 2;
|
||||
if ( ! (p[0] & 0x10) ) return 3;
|
||||
@ -273,7 +273,7 @@ inline int32_t utf8Encode(UChar32 c, char* buf) {
|
||||
}
|
||||
|
||||
// return the utf8 character at "p" as a 32-bit unicode character
|
||||
inline UChar32 utf8Decode(char *p){//, char **next){
|
||||
inline UChar32 utf8Decode(const char *p){
|
||||
// single byte character
|
||||
if (!(*p & 0x80)){
|
||||
//*next = (char*) p + 1;
|
||||
|
438
fctypes.cpp
438
fctypes.cpp
@ -15,28 +15,6 @@ bool isClockInSync() {
|
||||
}
|
||||
|
||||
|
||||
bool print96 ( char *k ) {
|
||||
key_t *kp = (key_t *)k;
|
||||
printf("n1=0x%"XINT32" n0=0x%"XINT64"\n",(int32_t)kp->n1,(int64_t)kp->n0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool print96 ( key_t *kp ) {
|
||||
printf("n1=0x%"XINT32" n0=0x%"XINT64"\n",(int32_t)kp->n1,(int64_t)kp->n0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool print128 ( char *k ) {
|
||||
key128_t *kp = (key128_t *)k;
|
||||
printf("n1=0x%"XINT64" n0=0x%"XINT64"\n",(int64_t)kp->n1,(int64_t)kp->n0);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool print128 ( key128_t *kp ) {
|
||||
printf("n1=0x%"XINT64" n0=0x%"XINT64"\n",(int64_t)kp->n1,(int64_t)kp->n0);
|
||||
return true;
|
||||
}
|
||||
|
||||
// . put all the maps here now
|
||||
// . convert "c" to lower case
|
||||
const unsigned char g_map_to_lower[] = {
|
||||
@ -530,7 +508,7 @@ const char g_map_is_vowel[] = {
|
||||
0,0,0,0,0,0,0,0, // 240
|
||||
0,0,0,0,0,0,0,0};
|
||||
|
||||
char *strncasestr( char *haystack, int32_t haylen, char *needle){
|
||||
char *strncasestr( char *haystack, int32_t haylen, const char *needle){
|
||||
int32_t matchLen = 0;
|
||||
int32_t needleLen = gbstrlen(needle);
|
||||
for (int32_t i = 0; i < haylen;i++){
|
||||
@ -551,7 +529,7 @@ char *strncasestr( char *haystack, int32_t haylen, char *needle){
|
||||
return NULL;
|
||||
}
|
||||
|
||||
char *strnstr2( char *haystack, int32_t haylen, char *needle) {
|
||||
char *strnstr2( char *haystack, int32_t haylen, const char *needle) {
|
||||
int32_t matchLen = 0;
|
||||
int32_t needleLen = gbstrlen(needle);
|
||||
for (int32_t i = 0; i < haylen; ++i) {
|
||||
@ -755,8 +733,8 @@ void hexToBin ( const char *src , int32_t srcLen , char *dst ) {
|
||||
if ( src != srcEnd ) { char *xx=NULL;*xx=0; }
|
||||
}
|
||||
|
||||
void binToHex ( unsigned char *src , int32_t srcLen , char *dst ) {
|
||||
unsigned char *srcEnd = src + srcLen;
|
||||
void binToHex ( const unsigned char *src , int32_t srcLen , char *dst ) {
|
||||
const unsigned char *srcEnd = src + srcLen;
|
||||
for ( ; src && src < srcEnd ; ) {
|
||||
*dst++ = btoh(*src>>4);
|
||||
*dst++ = btoh(*src&15);
|
||||
@ -771,7 +749,7 @@ void binToHex ( unsigned char *src , int32_t srcLen , char *dst ) {
|
||||
|
||||
// . like strstr but haystack may not be NULL terminated
|
||||
// . needle, however, IS null terminated
|
||||
char *strncasestr ( char *haystack , char *needle , int32_t haystackSize ) {
|
||||
char *strncasestr ( char *haystack , const char *needle , int32_t haystackSize ) {
|
||||
int32_t needleSize = gbstrlen(needle);
|
||||
int32_t n = haystackSize - needleSize ;
|
||||
for ( int32_t i = 0 ; i <= n ; i++ ) {
|
||||
@ -789,7 +767,7 @@ char *strncasestr ( char *haystack , char *needle , int32_t haystackSize ) {
|
||||
|
||||
// . like strstr but haystack may not be NULL terminated
|
||||
// . needle, however, IS null terminated
|
||||
char *strncasestr ( char *haystack , char *needle ,
|
||||
char *strncasestr ( char *haystack , const char *needle ,
|
||||
int32_t haystackSize, int32_t needleSize ) {
|
||||
int32_t n = haystackSize - needleSize ;
|
||||
for ( int32_t i = 0 ; i <= n ; i++ ) {
|
||||
@ -821,7 +799,7 @@ char *strnstr ( char *haystack , char *needle , int32_t haystackSize ) {
|
||||
}
|
||||
|
||||
// independent of case
|
||||
char *gb_strcasestr ( char *haystack , char *needle ) {
|
||||
char *gb_strcasestr ( char *haystack , const char *needle ) {
|
||||
int32_t needleSize = gbstrlen(needle);
|
||||
int32_t haystackSize = gbstrlen(haystack);
|
||||
int32_t n = haystackSize - needleSize ;
|
||||
@ -839,7 +817,7 @@ char *gb_strcasestr ( char *haystack , char *needle ) {
|
||||
}
|
||||
|
||||
|
||||
char *gb_strncasestr ( char *haystack , int32_t haystackSize , char *needle ) {
|
||||
char *gb_strncasestr ( char *haystack , int32_t haystackSize , const char *needle ) {
|
||||
// temp term
|
||||
char c = haystack[haystackSize];
|
||||
haystack[haystackSize] = '\0';
|
||||
@ -852,34 +830,34 @@ char *gb_strncasestr ( char *haystack , int32_t haystackSize , char *needle ) {
|
||||
// . store "t" into "s"
|
||||
// . returns bytes stored into "s"
|
||||
// . NULL terminates "s" if slen > 0
|
||||
int32_t saftenTags ( char *s , int32_t slen , char *t , int32_t tlen ) {
|
||||
char *start = s ;
|
||||
int32_t saftenTags ( char *dst , int32_t dstlen , const char *src , int32_t srclen ) {
|
||||
char *start = dst ;
|
||||
// bail if slen is 0
|
||||
if ( slen <= 0 ) return 0;
|
||||
if ( dst <= 0 ) return 0;
|
||||
// leave a char for the \0
|
||||
char *send = s + slen - 1;
|
||||
char *tend = t + tlen;
|
||||
for ( ; t < tend && s + 4 < send ; t++ ) {
|
||||
if ( *t == '<' ) {
|
||||
*s++ = '&';
|
||||
*s++ = 'l';
|
||||
*s++ = 't';
|
||||
*s++ = ';';
|
||||
char *dstend = dst + dstlen - 1;
|
||||
const char *srcend = src + srclen;
|
||||
for ( ; src < srcend && dst + 4 < dstend ; src++ ) {
|
||||
if ( *src == '<' ) {
|
||||
*dst++ = '&';
|
||||
*dst++ = 'l';
|
||||
*dst++ = 't';
|
||||
*dst++ = ';';
|
||||
continue;
|
||||
}
|
||||
if ( *t == '>' ) {
|
||||
*s++ = '&';
|
||||
*s++ = 'g';
|
||||
*s++ = 't';
|
||||
*s++ = ';';
|
||||
if ( *src == '>' ) {
|
||||
*dst++ = '&';
|
||||
*dst++ = 'g';
|
||||
*dst++ = 't';
|
||||
*dst++ = ';';
|
||||
continue;
|
||||
}
|
||||
*s++ = *t;
|
||||
*dst++ = *src;
|
||||
}
|
||||
// NULL terminate "s"
|
||||
*s = '\0';
|
||||
// NULL terminate "dst"
|
||||
*dst = '\0';
|
||||
// return # of bytes, excluding \0, stored into s
|
||||
return s - start;
|
||||
return dst - start;
|
||||
}
|
||||
|
||||
// . if "doSpecial" is true, then we change <, > and & to
|
||||
@ -888,11 +866,11 @@ int32_t saftenTags ( char *s , int32_t slen , char *t , int32_t tlen ) {
|
||||
// UnicodeData.txt:22E7;GREATER-THAN BUT NOT EQUIVALENT TO;Sm;0;ON;;;;;Y;
|
||||
// UnicodeData.txt:E0026;TAG AMPERSAND;Cf;0;BN;;;;;N;;;;;
|
||||
// UnicodeData.txt:235E;APL FUNCTIONAL SYMBOL QUOTE QUAD;So;0;L;;;;;N;;;;;
|
||||
int32_t htmlDecode ( char *dst , char *src , int32_t srcLen , bool doSpecial ,
|
||||
int32_t htmlDecode ( char *dst , const char *src , int32_t srcLen , bool doSpecial ,
|
||||
int32_t niceness ) {
|
||||
if ( srcLen == 0 ) return 0;
|
||||
char *start = dst;
|
||||
char *srcEnd = src + srcLen;
|
||||
const char *srcEnd = src + srcLen;
|
||||
for ( ; src < srcEnd ; ) {
|
||||
// breathe
|
||||
QUICKPOLL(niceness);
|
||||
@ -995,44 +973,12 @@ int32_t htmlDecode ( char *dst , char *src , int32_t srcLen , bool doSpecial ,
|
||||
return dst - start;
|
||||
}
|
||||
|
||||
// cdata
|
||||
int32_t cdataDecode ( char *dst , char *src , int32_t niceness ) {
|
||||
if ( ! src ) return 0;
|
||||
char *start = dst;
|
||||
for ( ; *src ; ) {
|
||||
// breathe
|
||||
QUICKPOLL(niceness);
|
||||
// utf8 support?
|
||||
char size = getUtf8CharSize(src);
|
||||
// see SafeBuf::cdataEncode() we do the opposite here
|
||||
if ( src[0] != ']' ||
|
||||
src[1] != ']' ||
|
||||
src[2] != '&' ||
|
||||
src[3] != 'g' ||
|
||||
src[4] != 't' ) {
|
||||
if ( size == 1 ) { *dst++ = *src++; continue; }
|
||||
gbmemcpy ( dst , src , size );
|
||||
src += size;
|
||||
dst += size;
|
||||
continue;
|
||||
//*dst++ = *src++; continue; }
|
||||
}
|
||||
// make it ]]>
|
||||
gbmemcpy ( dst , "]]>" , 3 );
|
||||
src += 5;
|
||||
dst += 3;
|
||||
}
|
||||
// NULL term
|
||||
*dst = '\0';
|
||||
return dst - start;
|
||||
}
|
||||
|
||||
// . make something safe as an form input value by translating the quotes
|
||||
// . store "t" into "s" and return bytes stored
|
||||
// . does not do bounds checking
|
||||
int32_t dequote ( char *s , char *send , char *t , int32_t tlen ) {
|
||||
int32_t dequote ( char *s , char *send , const char *t , int32_t tlen ) {
|
||||
char *start = s;
|
||||
char *tend = t + tlen;
|
||||
const char *tend = t + tlen;
|
||||
for ( ; t < tend && s < send ; t++ ) {
|
||||
if ( *t == '"' ) {
|
||||
if ( s + 5 >= send ) return 0;
|
||||
@ -1051,129 +997,71 @@ int32_t dequote ( char *s , char *send , char *t , int32_t tlen ) {
|
||||
return s - start;
|
||||
}
|
||||
|
||||
bool dequote ( SafeBuf* sb , char *t , int32_t tlen ) {
|
||||
char *tend = t + tlen;
|
||||
for ( ; t < tend; t++ ) {
|
||||
if ( *t == '"' ) {
|
||||
sb->safeMemcpy(""", 5);
|
||||
continue;
|
||||
}
|
||||
*sb += *t;
|
||||
}
|
||||
*sb += '\0';
|
||||
return true;
|
||||
}
|
||||
|
||||
//int32_t dequote ( char *s , char *t ) {
|
||||
// return dequote ( s , t , gbstrlen ( t ) );
|
||||
//}
|
||||
|
||||
// . entity-ize a string so it's safe for html output
|
||||
// . store "t" into "s" and return bytes stored
|
||||
// . does bounds checking
|
||||
char *htmlEncode ( char *s , char *send , char *t , char *tend , bool pound ,
|
||||
int32_t niceness ) {
|
||||
for ( ; t < tend ; t++ ) {
|
||||
char *htmlEncode ( char *dst, char *dstend, const char *src, const char *srcend,
|
||||
bool pound, int32_t niceness ) {
|
||||
for ( ; src < srcend ; src++ ) {
|
||||
QUICKPOLL(niceness);
|
||||
if ( s + 7 >= send ) { *s = '\0'; return s; }
|
||||
if ( *t == '"' ) {
|
||||
*s++ = '&';
|
||||
*s++ = '#';
|
||||
*s++ = '3';
|
||||
*s++ = '4';
|
||||
*s++ = ';';
|
||||
if ( dst + 7 >= dstend ) { *dst = '\0'; return dst; }
|
||||
if ( *src == '"' ) {
|
||||
*dst++ = '&';
|
||||
*dst++ = '#';
|
||||
*dst++ = '3';
|
||||
*dst++ = '4';
|
||||
*dst++ = ';';
|
||||
continue;
|
||||
}
|
||||
if ( *t == '<' ) {
|
||||
*s++ = '&';
|
||||
*s++ = 'l';
|
||||
*s++ = 't';
|
||||
*s++ = ';';
|
||||
if ( *src == '<' ) {
|
||||
*dst++ = '&';
|
||||
*dst++ = 'l';
|
||||
*dst++ = 't';
|
||||
*dst++ = ';';
|
||||
continue;
|
||||
}
|
||||
if ( *t == '>' ) {
|
||||
*s++ = '&';
|
||||
*s++ = 'g';
|
||||
*s++ = 't';
|
||||
*s++ = ';';
|
||||
if ( *src == '>' ) {
|
||||
*dst++ = '&';
|
||||
*dst++ = 'g';
|
||||
*dst++ = 't';
|
||||
*dst++ = ';';
|
||||
continue;
|
||||
}
|
||||
if ( *t == '&' ) {
|
||||
*s++ = '&';
|
||||
*s++ = 'a';
|
||||
*s++ = 'm';
|
||||
*s++ = 'p';
|
||||
*s++ = ';';
|
||||
if ( *src == '&' ) {
|
||||
*dst++ = '&';
|
||||
*dst++ = 'a';
|
||||
*dst++ = 'm';
|
||||
*dst++ = 'p';
|
||||
*dst++ = ';';
|
||||
continue;
|
||||
}
|
||||
if ( *t == '#' && pound ) {
|
||||
*s++ = '&';
|
||||
*s++ = '#';
|
||||
*s++ = '0';
|
||||
*s++ = '3';
|
||||
*s++ = '5';
|
||||
*s++ = ';';
|
||||
if ( *src == '#' && pound ) {
|
||||
*dst++ = '&';
|
||||
*dst++ = '#';
|
||||
*dst++ = '0';
|
||||
*dst++ = '3';
|
||||
*dst++ = '5';
|
||||
*dst++ = ';';
|
||||
continue;
|
||||
}
|
||||
*s++ = *t;
|
||||
*dst++ = *src;
|
||||
}
|
||||
*s = '\0';
|
||||
return s;
|
||||
*dst = '\0';
|
||||
return dst;
|
||||
}
|
||||
|
||||
|
||||
// . entity-ize a string so it's safe for html output
|
||||
// . store "t" into "s" and return true on success
|
||||
bool htmlEncode ( SafeBuf* s , char *t , char *tend , bool pound ,
|
||||
int32_t niceness ) {
|
||||
for ( ; t < tend ; t++ ) {
|
||||
QUICKPOLL(niceness);
|
||||
if ( *t == '"' ) {
|
||||
s->safeMemcpy(""", 5);
|
||||
continue;
|
||||
}
|
||||
if ( *t == '<' ) {
|
||||
s->safeMemcpy("<", 4);
|
||||
continue;
|
||||
}
|
||||
if ( *t == '>' ) {
|
||||
s->safeMemcpy(">", 4);
|
||||
continue;
|
||||
}
|
||||
if ( *t == '&' ) {
|
||||
s->safeMemcpy("&", 5);
|
||||
continue;
|
||||
}
|
||||
if ( *t == '#' && pound ) {
|
||||
s->safeMemcpy("#", 6);
|
||||
continue;
|
||||
}
|
||||
// our own specially decoded entites!
|
||||
if ( *t == '+' && t[1]=='!' && t[2]=='-' ) {
|
||||
s->safeMemcpy("<",4);
|
||||
continue;
|
||||
}
|
||||
// our own specially decoded entites!
|
||||
if ( *t == '-' && t[1]=='!' && t[2]=='+' ) {
|
||||
s->safeMemcpy(">",4);
|
||||
continue;
|
||||
}
|
||||
*s += *t;
|
||||
}
|
||||
*s += '\0';
|
||||
return true;
|
||||
}
|
||||
|
||||
// . convert "-->%22 , &-->%26, +-->%2b, space-->+, ?-->%3f is that it?
|
||||
// . convert so we can display as a cgi PARAMETER within a url
|
||||
// . used by HttPage2 (cached web page) to encode the query into a url
|
||||
// . used by PageRoot to do likewise
|
||||
// . returns bytes written into "d" not including terminating \0
|
||||
int32_t urlEncode ( char *d , int32_t dlen , char *s , int32_t slen, bool requestPath ) {
|
||||
int32_t urlEncode ( char *d , int32_t dlen , const char *s , int32_t slen, bool requestPath ) {
|
||||
char *dstart = d;
|
||||
// subtract 1 to make room for a terminating \0
|
||||
char *dend = d + dlen - 1;
|
||||
char *send = s + slen;
|
||||
const char *send = s + slen;
|
||||
for ( ; s < send && d < dend ; s++ ) {
|
||||
if ( *s == '\0' && requestPath ) {
|
||||
*d++ = *s;
|
||||
@ -1221,48 +1109,9 @@ int32_t urlEncode ( char *d , int32_t dlen , char *s , int32_t slen, bool reques
|
||||
return d - dstart;
|
||||
}
|
||||
|
||||
// determine the length of the encoded url, does NOT include NULL
|
||||
int32_t urlEncodeLen ( char *s , int32_t slen , bool requestPath ) {
|
||||
int32_t dLen = 0;
|
||||
char *send = s + slen;
|
||||
for ( ; s < send ; s++ ) {
|
||||
if ( *s == '\0' && requestPath ) {
|
||||
dLen++;
|
||||
continue;
|
||||
}
|
||||
// encode if not fit for display
|
||||
if ( ! is_ascii ( *s ) ) goto encode;
|
||||
switch ( *s ) {
|
||||
case ' ': goto encode;
|
||||
case '&': goto encode;
|
||||
case '"': goto encode;
|
||||
case '+': goto encode;
|
||||
case '%': goto encode;
|
||||
case '#': goto encode;
|
||||
// encoding < and > are more for displaying on an
|
||||
// html page than sending to an http server
|
||||
case '>': goto encode;
|
||||
case '<': goto encode;
|
||||
case '?': if ( requestPath ) break;
|
||||
goto encode;
|
||||
}
|
||||
// otherwise, no need to encode
|
||||
dLen++;
|
||||
continue;
|
||||
encode:
|
||||
// space to +
|
||||
if ( *s == ' ' ) { dLen++; continue; }
|
||||
// hex code
|
||||
dLen += 3; // %XX
|
||||
}
|
||||
//dLen++; // NULL TERM
|
||||
// and return the length
|
||||
return dLen;
|
||||
}
|
||||
|
||||
// . decodes "s/slen" and stores into "dest"
|
||||
// . returns the number of bytes stored into "dest"
|
||||
int32_t urlDecode ( char *dest , char *s , int32_t slen ) {
|
||||
int32_t urlDecode ( char *dest , const char *s , int32_t slen ) {
|
||||
int32_t j = 0;
|
||||
for ( int32_t i = 0 ; i < slen ; i++ ) {
|
||||
if ( s[i] == '+' ) { dest[j++]=' '; continue; }
|
||||
@ -1282,7 +1131,7 @@ int32_t urlDecode ( char *dest , char *s , int32_t slen ) {
|
||||
}
|
||||
|
||||
|
||||
int32_t urlDecodeNoZeroes ( char *dest , char *s , int32_t slen ) {
|
||||
int32_t urlDecodeNoZeroes ( char *dest , const char *s , int32_t slen ) {
|
||||
int32_t j = 0;
|
||||
for ( int32_t i = 0 ; i < slen ; i++ ) {
|
||||
if ( s[i] == '+' ) { dest[j++]=' '; continue; }
|
||||
@ -1308,91 +1157,6 @@ int32_t urlDecodeNoZeroes ( char *dest , char *s , int32_t slen ) {
|
||||
return j;
|
||||
}
|
||||
|
||||
// . like above, but only decodes chars that should not have been encoded
|
||||
// . will also encode binary chars
|
||||
int32_t urlNormCode ( char *d , int32_t dlen , char *s , int32_t slen ) {
|
||||
// save start of detination buffer for returning the length
|
||||
char *dstart = d;
|
||||
// subtract 1 for NULL termination
|
||||
char *dend = d + dlen - 1;
|
||||
char *send = s + slen;
|
||||
for ( ; s < send && d < dend ; s++ ) {
|
||||
// if its non-ascii, encode it so it displays correctly
|
||||
if ( ! is_ascii ( *s ) ) {
|
||||
// break if no room to encode it
|
||||
if ( d + 2 >= dend ) break;
|
||||
// store it encoded
|
||||
*d++ = '%';
|
||||
// store first hex digit
|
||||
unsigned char v = ((unsigned char)*s)/16 ;
|
||||
if ( v < 10 ) v += '0';
|
||||
else v += 'A' - 10;
|
||||
*d++ = v;
|
||||
// store second hex digit
|
||||
v = ((unsigned char)*s) & 0x0f ;
|
||||
if ( v < 10 ) v += '0';
|
||||
else v += 'A' - 10;
|
||||
*d++ = v;
|
||||
continue;
|
||||
}
|
||||
// store it
|
||||
*d++ = *s;
|
||||
// but it might be something encoded that should not have been
|
||||
if ( *s != '%' ) continue;
|
||||
// it requires to following chars to decode
|
||||
if ( s + 2 >= send ) continue;
|
||||
// if two chars after are not hex chars, it's not an encoding
|
||||
if ( ! is_hex ( s[1] ) ) continue;
|
||||
if ( ! is_hex ( s[2] ) ) continue;
|
||||
// convert hex chars to values
|
||||
unsigned char a = htob ( s[1] ) * 16;
|
||||
unsigned char b = htob ( s[2] ) ;
|
||||
unsigned char v = a + b;
|
||||
// don't decode if it decodes in these chars
|
||||
switch ( v ) {
|
||||
case ' ': continue;
|
||||
case '&': continue;
|
||||
case '"': continue;
|
||||
case '+': continue;
|
||||
case '%': continue;
|
||||
case '>': continue;
|
||||
case '<': continue;
|
||||
case '?': continue;
|
||||
case '=': continue;
|
||||
}
|
||||
// otherwise, it's fine to decode it
|
||||
d[-1] = (char) (a + b);
|
||||
// skip over those 2 chars as well as leading '%'
|
||||
s += 2;
|
||||
}
|
||||
// NULL terminate
|
||||
*d = '\0';
|
||||
// return length
|
||||
return d - dstart ;
|
||||
}
|
||||
|
||||
// approximate # of non-punct words
|
||||
int32_t getNumWords ( char *s ) {
|
||||
int32_t count = 0;
|
||||
loop:
|
||||
// skip punct
|
||||
while ( ! is_alnum_a(*s) ) s++;
|
||||
// bail if done
|
||||
if ( !*s ) return count;
|
||||
// count a word
|
||||
count++;
|
||||
// skip word
|
||||
while ( is_alnum_a(*s) ) s++;
|
||||
// watch for ' letter punct
|
||||
if ( *s=='\'' && is_alnum_a(*(s+1)) && !is_alnum_a(*(s+2)) ) {
|
||||
// skip apostrophe
|
||||
s++;
|
||||
// skip rest of word
|
||||
while ( is_alnum_a(*s) ) s++;
|
||||
}
|
||||
goto loop;
|
||||
}
|
||||
|
||||
static int64_t s_adjustment = 0;
|
||||
|
||||
int64_t globalToLocalTimeMilliseconds ( int64_t global ) {
|
||||
@ -1428,7 +1192,7 @@ static char s_tafile[1024];
|
||||
static bool s_hasFileName = false;
|
||||
|
||||
// returns false and sets g_errno on error
|
||||
bool setTimeAdjustmentFilename ( char *dir, char *filename ) {
|
||||
bool setTimeAdjustmentFilename ( const char *dir, const char *filename ) {
|
||||
s_hasFileName = true;
|
||||
int32_t len1 = gbstrlen(dir);
|
||||
int32_t len2 = gbstrlen(filename);
|
||||
@ -1935,7 +1699,7 @@ bool is_urlchar(char s) {
|
||||
return false;
|
||||
}
|
||||
// don't allow "> in our input boxes
|
||||
int32_t cleanInput(char *outbuf, int32_t outbufSize, char *inbuf, int32_t inbufLen){
|
||||
int32_t cleanInput(char *outbuf, int32_t outbufSize, const char *inbuf, int32_t inbufLen){
|
||||
char *p = outbuf;
|
||||
int32_t numQuotes=0;
|
||||
int32_t lastQuote = 0;
|
||||
@ -2168,57 +1932,11 @@ bool deserializeMsg2 ( char **firstStrPtr , // ptr_url
|
||||
return true;
|
||||
}
|
||||
|
||||
// print it to stdout for debugging Dates.cpp
|
||||
int32_t printTime ( time_t ttt ) {
|
||||
//char *s = ctime(&ttt);
|
||||
// print in UTC!
|
||||
char *s = asctime ( gmtime(&ttt) );
|
||||
// strip \n
|
||||
s[gbstrlen(s)-1] = '\0';
|
||||
fprintf(stderr,"%s UTC\n",s);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// this uses our local timezone which is MST, so we need to tell
|
||||
// it to use UTC somehow...
|
||||
time_t mktime_utc ( struct tm *ttt ) {
|
||||
time_t local = mktime ( ttt );
|
||||
// bad?
|
||||
if ( local < 0 ) return local;
|
||||
/*
|
||||
// sanity check
|
||||
static char s_mm = 1;
|
||||
static int32_t s_localOff;
|
||||
if ( s_mm ) {
|
||||
s_mm = 0;
|
||||
struct tm ff;
|
||||
ff.tm_mon = 0;
|
||||
ff.tm_year = 70;
|
||||
ff.tm_mday = 1;
|
||||
ff.tm_hour = 0;
|
||||
ff.tm_min = 0;
|
||||
ff.tm_sec = 0;
|
||||
int32_t qq = mktime ( &ff );
|
||||
//fprintf(stderr,"qq=%"INT32"\n",qq);
|
||||
// . set this then
|
||||
// . we subtract s_localOff to further mktime() returns to
|
||||
// get it into utc
|
||||
s_localOff = qq;
|
||||
// sanity
|
||||
if ( s_localOff != timezone ) { char *xx=NULL;*xx=0; }
|
||||
}
|
||||
*/
|
||||
// see what our timezone is!
|
||||
//fprintf(stderr,"%"INT32"=tz\n",timezone);
|
||||
// mod that
|
||||
return local - timezone;
|
||||
}
|
||||
|
||||
bool verifyUtf8 ( char *txt , int32_t tlen ) {
|
||||
bool verifyUtf8 ( const char *txt , int32_t tlen ) {
|
||||
if ( ! txt || tlen <= 0 ) return true;
|
||||
char size;
|
||||
char *p = txt;
|
||||
char *pend = txt + tlen;
|
||||
const char *p = txt;
|
||||
const char *pend = txt + tlen;
|
||||
for ( ; p < pend ; p += size ) {
|
||||
size = getUtf8CharSize(p);
|
||||
// skip if ascii
|
||||
@ -2240,7 +1958,7 @@ bool verifyUtf8 ( char *txt , int32_t tlen ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool verifyUtf8 ( char *txt ) {
|
||||
bool verifyUtf8 ( const char *txt ) {
|
||||
int32_t tlen = gbstrlen(txt);
|
||||
return verifyUtf8(txt,tlen);
|
||||
}
|
||||
|
199
fctypes.h
199
fctypes.h
@ -12,22 +12,13 @@
|
||||
// the hash value as 32 bits in posdb
|
||||
typedef uint32_t FacetValHash_t;
|
||||
|
||||
bool verifyUtf8 ( char *txt ) ;
|
||||
bool verifyUtf8 ( char *txt , int32_t tlen ) ;
|
||||
bool verifyUtf8 ( const char *txt ) ;
|
||||
bool verifyUtf8 ( const char *txt , int32_t tlen ) ;
|
||||
|
||||
bool print96 ( char *k ) ;
|
||||
bool print96 ( key_t *kp ) ;
|
||||
bool print128 ( char *k ) ;
|
||||
bool print128 ( key128_t *kp ) ;
|
||||
|
||||
|
||||
// print it to stdout for debugging Dates.cpp
|
||||
int32_t printTime ( int32_t ttt );
|
||||
time_t mktime_utc ( struct tm *ttt ) ;
|
||||
|
||||
class SafeBuf;
|
||||
// this too
|
||||
char *strncasestr( char *haystack, int32_t haylen, char *needle);
|
||||
char *strncasestr( char *haystack, int32_t haylen, const char *needle);
|
||||
|
||||
// just like sprintf(s,"%"UINT64"",n), but we insert commas
|
||||
int32_t ulltoa ( char *s , uint64_t n ) ;
|
||||
@ -36,47 +27,37 @@ int32_t ulltoa ( char *s , uint64_t n ) ;
|
||||
// . store "t" into "s"
|
||||
// . returns bytes stored into "s"
|
||||
// . NULL terminates "s"
|
||||
int32_t saftenTags ( char *s , int32_t slen , char *t , int32_t tlen ) ;
|
||||
int32_t saftenTags ( char *dst , int32_t dstlen , const char *src , int32_t srclen ) ;
|
||||
|
||||
// . basically just converts "'s to "'s
|
||||
// . store "src" into "dest" and return bytes stored
|
||||
// . does not do bounds checking in "dest"
|
||||
// . used to encode things as form input variables, like query in HttpPage0.cpp
|
||||
int32_t dequote ( char *dest , char *dend , char *src , int32_t srcLen ) ;
|
||||
int32_t dequote ( char *dest , char *dend , const char *src , int32_t srcLen ) ;
|
||||
|
||||
// . entity-ize a string so it's safe for html output
|
||||
// . converts "'s to "'s, &'s to &s; <'s the < and >'s to >
|
||||
// . store "src" into "dest" and return bytes stored
|
||||
// . does not do bounds checking on "dest"
|
||||
// . encode t into s
|
||||
char *htmlEncode ( char *s , char *send , char *t , char *tend ,
|
||||
char *htmlEncode ( char *dst, char *dstend, const char *src, const char *srcend,
|
||||
bool pound = false , int32_t niceness = 0) ;
|
||||
bool htmlEncode ( SafeBuf* s , char *t , char *tend ,
|
||||
bool pound = false , int32_t niceness = 0 );
|
||||
|
||||
// . like above but src is NULL terminated
|
||||
// . returns length of string stored into "dest"
|
||||
// . decode html entities like & and >
|
||||
int32_t htmlDecode ( char *dst, char *src, int32_t srcLen,
|
||||
int32_t htmlDecode ( char *dst, const char *src, int32_t srcLen,
|
||||
bool doSpecial ,//=false);
|
||||
int32_t niceness);
|
||||
|
||||
int32_t cdataDecode ( char *dst , char *src , int32_t niceness ) ;
|
||||
|
||||
// . convert " to %22 , & to %26, is that it?
|
||||
// . urlEncode() stores the encoded, NULL-terminated URL in "dest"
|
||||
// . requestPath leaves \0 and ? characters intact, for encoding requests
|
||||
int32_t urlEncode ( char *dest , int32_t destLen , char *src , int32_t srcLen ,
|
||||
int32_t urlEncode ( char *dest , int32_t destLen , const char *src , int32_t srcLen ,
|
||||
bool requestPath = false ) ;
|
||||
// determine the length of the encoded url, does NOT include NULL
|
||||
int32_t urlEncodeLen ( char *s , int32_t slen , bool requestPath = false ) ;
|
||||
// decode a url -- decode ALL %XX's
|
||||
int32_t urlDecode ( char *dest , char *t , int32_t tlen ) ;
|
||||
int32_t urlDecodeNoZeroes ( char *dest , char *t , int32_t tlen ) ;
|
||||
// . normalize the encoding
|
||||
// . like urlDecode() but only decodes chars that should not have been encoded
|
||||
// . also, will encode characters that should have been encoded
|
||||
int32_t urlNormCode ( char *dest , int32_t destLen , char *src , int32_t srcLen ) ;
|
||||
int32_t urlDecode ( char *dest , const char *src , int32_t tlen ) ;
|
||||
int32_t urlDecodeNoZeroes ( char *dest , const char *src , int32_t tlen ) ;
|
||||
|
||||
bool is_digit(unsigned char c) ;
|
||||
|
||||
@ -90,36 +71,21 @@ char btoh ( char s ) ;
|
||||
// convert hex ascii string into binary
|
||||
void hexToBin ( const char *src , int32_t srcLen , char *dst );
|
||||
// convert binary number of size srcLen bytes into hex string in "dst"
|
||||
void binToHex ( unsigned char *src , int32_t srcLen , char *dst );
|
||||
void binToHex ( const unsigned char *src , int32_t srcLen , char *dst );
|
||||
|
||||
// the _a suffix denotes an ascii string
|
||||
bool has_alpha_utf8(char *s, char *send ) ;
|
||||
bool is_cap_utf8 (char *s,int32_t len) ;
|
||||
bool is_cap_utf8 (const char *s,int32_t len) ;
|
||||
|
||||
// does it have at least one upper case character in it?
|
||||
bool has_upper_a (char *s,int32_t len) ;
|
||||
bool has_binary_a (char *s,int32_t len) ;
|
||||
void to_lower3_a (char *s,int32_t len, char *buf) ;
|
||||
void to_lower3_a (const char *s,int32_t len, char *buf) ;
|
||||
|
||||
void to_lower1 (char *s) ;
|
||||
int32_t to_lower_alnum (char *s,int32_t len, char *buf) ;
|
||||
int32_t to_lower_utf8 (char *dst , char *src ) ;
|
||||
int32_t to_lower_utf8 (char *dst , char *dstEnd, char *src ) ;
|
||||
int32_t to_lower_utf8 (char *dst , char *dstEnd, char *src, char *srcEnd) ;
|
||||
int32_t to_lower_utf8_32 (char *src ) ;
|
||||
int32_t to_cap_alnum (char *s,int32_t len, char *buf) ;
|
||||
int32_t to_alnum (char *s,int32_t len, char *buf) ;
|
||||
void to_upper3_a (char *s,int32_t len, char *buf) ;
|
||||
void to_cap (char *s,int32_t len, char *buf) ;
|
||||
|
||||
// . approximate # of non-punct words
|
||||
// . s must be NULL terminated
|
||||
// . used by LinkInfo.cpp to weight link text based on # of words
|
||||
int32_t getNumWords ( char *s );
|
||||
|
||||
// true if character should be stripped from the end/beginning of the title
|
||||
// error! make a map of this
|
||||
bool is_title_junk(char c) ;
|
||||
int32_t to_lower_utf8 (char *dst , const char *src ) ;
|
||||
int32_t to_lower_utf8 (char *dst , char *dstEnd, const char *src ) ;
|
||||
int32_t to_lower_utf8 (char *dst , char *dstEnd, const char *src, const char *srcEnd) ;
|
||||
void to_upper3_a (const char *s,int32_t len, char *buf) ;
|
||||
|
||||
// . get the # of words in this string
|
||||
int32_t getNumWords ( char *s , int32_t len, int32_t titleVersion ) ;
|
||||
@ -131,15 +97,15 @@ double atod2 ( char *s, int32_t len ) ;
|
||||
bool atob ( const char *s, int32_t len ) ;
|
||||
|
||||
// like strstr but haystack need not be NULL terminated
|
||||
char *strncasestr ( char *haystack , char *needle , int32_t haystackSize ) ;
|
||||
char *strncasestr ( char *haystack , char *needle ,
|
||||
char *strncasestr ( char *haystack , const char *needle , int32_t haystackSize ) ;
|
||||
char *strncasestr ( char *haystack , const char *needle ,
|
||||
int32_t haystackSize, int32_t needleSize ) ;
|
||||
|
||||
// independent of case
|
||||
char *gb_strcasestr ( char *haystack , char *needle );
|
||||
char *gb_strncasestr ( char *haystack , int32_t haystackSize , char *needle ) ;
|
||||
char *gb_strcasestr ( char *haystack , const char *needle );
|
||||
char *gb_strncasestr ( char *haystack , int32_t haystackSize , const char *needle ) ;
|
||||
|
||||
char *strnstr2( char *haystack, int32_t haylen, char *needle);
|
||||
char *strnstr2( char *haystack, int32_t haylen, const char *needle);
|
||||
|
||||
// updates our static var, s_adjustment to keep our clock in sync to hostId #0
|
||||
void settimeofdayInMillisecondsGlobal ( int64_t newTime ) ;
|
||||
@ -193,7 +159,7 @@ extern int64_t g_adjustment;
|
||||
|
||||
bool isClockInSync();
|
||||
|
||||
bool setTimeAdjustmentFilename ( char *dir, char *filename ) ;
|
||||
bool setTimeAdjustmentFilename ( const char *dir, const char *filename ) ;
|
||||
bool loadTimeAdjustment ( ) ;
|
||||
bool saveTimeAdjustment ( ) ;
|
||||
|
||||
@ -215,7 +181,7 @@ bool saveTimeAdjustment ( ) ;
|
||||
#define is_hex(c) g_map_is_hex[(unsigned char)c]
|
||||
#define is_tagname_char(c) g_map_is_tagname_char[(unsigned char)c]
|
||||
|
||||
inline bool is_upper_utf8 ( char *s );
|
||||
inline bool is_upper_utf8 ( const char *s );
|
||||
|
||||
/*
|
||||
// is character, "s", used in textual hexadecimal representation?
|
||||
@ -241,90 +207,38 @@ inline char btoh ( char s ) {
|
||||
return (s - 10) + 'a';
|
||||
}
|
||||
|
||||
inline bool is_ascii2_a(char *s,int32_t len) {
|
||||
inline bool is_ascii2_a(const char *s, int32_t len) {
|
||||
for (int32_t i=0;i<len;i++)
|
||||
if (!is_ascii(s[i]))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
inline bool is_cap_utf8 (char *s,int32_t len) {
|
||||
inline bool is_cap_utf8 (const char *s, int32_t len) {
|
||||
if ( ! is_upper_utf8 ( s ) ) return false;
|
||||
char *send = s + len;
|
||||
const char *send = s + len;
|
||||
for ( ; s < send ; s += getUtf8CharSize ( s ) )
|
||||
if ( is_upper_utf8 ( s ) ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
// does it have at least one upper case character in it?
|
||||
inline bool has_upper_a (char *s,int32_t len) {
|
||||
for (int32_t i=0;i<len;i++)
|
||||
if ( is_upper_a(s[i]))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// does it have at least one binary character in it?
|
||||
inline bool has_binary_a (char *s,int32_t len) {
|
||||
for (int32_t i=0;i<len;i++)
|
||||
if ( is_binary_a(s[i]))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
inline void to_lower3_a(char *s,int32_t len, char *buf) {
|
||||
inline void to_lower3_a(const char *s, int32_t len, char *buf) {
|
||||
for (int32_t i=0;i<len ;i++)
|
||||
buf[i]=to_lower_a((unsigned char)s[i]);
|
||||
}
|
||||
|
||||
inline void to_lower1_a(char *s) {
|
||||
for (int32_t i=0;s[i] ;i++)
|
||||
s[i]=to_lower_a((unsigned char)s[i]);
|
||||
}
|
||||
|
||||
inline int32_t to_lower_alnum_a(char *s,int32_t len, char *buf) {
|
||||
int32_t j=0;
|
||||
for (int32_t i=0;i<len ;i++)
|
||||
if (is_alnum_a(s[i]))
|
||||
buf[j++]=to_lower_a((unsigned char)s[i]);
|
||||
return j;
|
||||
}
|
||||
|
||||
inline int32_t to_cap_alnum_a(char *s,int32_t len, char *buf) {
|
||||
buf[0] = to_upper_a(s[0]);
|
||||
int32_t j=1;
|
||||
for (int32_t i=1;i<len ;i++)
|
||||
if (is_alnum_a(s[i]))
|
||||
buf[j++]=to_lower_a((unsigned char)s[i]);
|
||||
return j;
|
||||
}
|
||||
|
||||
inline int32_t to_alnum_a(char *s,int32_t len, char *buf) {
|
||||
int32_t j=0;
|
||||
for (int32_t i=0;i<len ;i++)
|
||||
if (is_alnum_a(s[i]))
|
||||
buf[j++]=s[i];
|
||||
return j;
|
||||
}
|
||||
|
||||
inline void to_upper3_a(char *s,int32_t len, char *buf) {
|
||||
inline void to_upper3_a(const char *s, int32_t len, char *buf) {
|
||||
for (int32_t i=0;i<len;i++)
|
||||
buf[i]=to_upper_a(s[i]);
|
||||
}
|
||||
|
||||
inline void to_cap_a(char *s,int32_t len, char *buf) {
|
||||
buf[0]=to_upper_a(s[0]);
|
||||
for (int32_t i=1;i<len;i++)
|
||||
buf[i]=to_lower_a(s[i]);
|
||||
}
|
||||
|
||||
inline bool is_binary_utf8 ( char *p ) {
|
||||
inline bool is_binary_utf8 ( const char *p ) {
|
||||
if ( getUtf8CharSize((uint8_t *)p) != 1 ) return false;
|
||||
// it is ascii, use that table now
|
||||
return is_binary_a ( *p );
|
||||
}
|
||||
|
||||
inline bool is_lower_utf8 ( char *src ) {
|
||||
inline bool is_lower_utf8 ( const char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) return is_lower_a ( *src );
|
||||
// convert to a code point
|
||||
@ -333,7 +247,7 @@ inline bool is_lower_utf8 ( char *src ) {
|
||||
return ucIsLower ( x );
|
||||
}
|
||||
|
||||
inline bool is_upper_utf8 ( char *src ) {
|
||||
inline bool is_upper_utf8 ( const char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) return is_upper_a ( *src );
|
||||
// convert to a code point
|
||||
@ -342,7 +256,7 @@ inline bool is_upper_utf8 ( char *src ) {
|
||||
return ucIsUpper ( x );
|
||||
}
|
||||
|
||||
inline bool is_alnum_utf8 ( char *src ) {
|
||||
inline bool is_alnum_utf8 ( const char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) return is_alnum_a ( *src );
|
||||
// convert to a code point
|
||||
@ -351,7 +265,7 @@ inline bool is_alnum_utf8 ( char *src ) {
|
||||
return ucIsAlnum ( x );
|
||||
}
|
||||
|
||||
inline bool is_alnum_utf8 ( unsigned char *src ) {
|
||||
inline bool is_alnum_utf8 ( const unsigned char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) return is_alnum_a ( *src );
|
||||
// convert to a code point
|
||||
@ -360,7 +274,7 @@ inline bool is_alnum_utf8 ( unsigned char *src ) {
|
||||
return ucIsAlnum ( x );
|
||||
}
|
||||
|
||||
inline bool is_alpha_utf8 ( char *src ) {
|
||||
inline bool is_alpha_utf8 ( const char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) return is_alpha_a ( *src );
|
||||
// convert to a code point
|
||||
@ -369,7 +283,7 @@ inline bool is_alpha_utf8 ( char *src ) {
|
||||
return ucIsAlpha ( x );
|
||||
}
|
||||
|
||||
inline bool is_punct_utf8 ( char *src ) {
|
||||
inline bool is_punct_utf8 ( const char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) return is_punct_a ( *src );
|
||||
// convert to a code point
|
||||
@ -379,7 +293,7 @@ inline bool is_punct_utf8 ( char *src ) {
|
||||
else return true;
|
||||
}
|
||||
|
||||
inline bool is_wspace_utf8 ( uint8_t *src ) {
|
||||
inline bool is_wspace_utf8 ( const uint8_t *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) return is_wspace_a ( *src );
|
||||
// convert to a code point
|
||||
@ -388,7 +302,7 @@ inline bool is_wspace_utf8 ( uint8_t *src ) {
|
||||
return is_wspace_uc ( x );
|
||||
}
|
||||
|
||||
inline bool is_wspace_utf8 ( char *src ) {
|
||||
inline bool is_wspace_utf8 ( const char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3((uint8_t)*src) ) return is_wspace_a ( (uint8_t)*src );
|
||||
// convert to a code point
|
||||
@ -400,7 +314,7 @@ inline bool is_wspace_utf8 ( char *src ) {
|
||||
// . returns bytes stored into "dst" from "src"
|
||||
// . just do one character, which may be from 1 to 4 bytes
|
||||
// . TODO: make a native utf8 to_lower to avoid converting to a code point
|
||||
inline int32_t to_lower_utf8 ( char *dst , char *src ) {
|
||||
inline int32_t to_lower_utf8 ( char *dst , const char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) { *dst = to_lower_a ( *src ); return 1; }
|
||||
// convert to a code point
|
||||
@ -411,18 +325,6 @@ inline int32_t to_lower_utf8 ( char *dst , char *src ) {
|
||||
return utf8Encode ( y , dst );
|
||||
}
|
||||
|
||||
// store answer in the int32_t and return that!
|
||||
inline int32_t to_lower_utf8_32 ( char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) return (int32_t) to_lower_a ( *src );
|
||||
// convert to a code point
|
||||
UChar32 x = utf8Decode(src);
|
||||
// covert to lower
|
||||
UChar32 y = ucToLower ( x );
|
||||
// give that back
|
||||
return y;
|
||||
}
|
||||
|
||||
inline int32_t to_upper_utf8 ( char *dst , char *src ) {
|
||||
// if in ascii do it quickly
|
||||
if ( is_ascii3(*src) ) { *dst = to_upper_a ( *src ); return 1; }
|
||||
@ -434,7 +336,7 @@ inline int32_t to_upper_utf8 ( char *dst , char *src ) {
|
||||
return utf8Encode ( y , dst );
|
||||
}
|
||||
|
||||
inline int32_t to_lower_utf8 (char *dst, char *dstEnd, char *src, char *srcEnd ){
|
||||
inline int32_t to_lower_utf8 (char *dst, char *dstEnd, const char *src, const char *srcEnd ){
|
||||
char *dstart = dst;
|
||||
for ( ; src < srcEnd ; src += getUtf8CharSize((uint8_t *)src) )
|
||||
dst += to_lower_utf8 ( dst , src );
|
||||
@ -442,7 +344,7 @@ inline int32_t to_lower_utf8 (char *dst, char *dstEnd, char *src, char *srcEnd )
|
||||
return dst - dstart;
|
||||
}
|
||||
|
||||
inline int32_t to_lower_utf8 (char *dst, char *dstEnd, char *src ){
|
||||
inline int32_t to_lower_utf8 (char *dst, char *dstEnd, const char *src ){
|
||||
char *dstart = dst;
|
||||
for ( ; *src ; src += getUtf8CharSize((uint8_t *)src) )
|
||||
dst += to_lower_utf8 ( dst , src );
|
||||
@ -478,24 +380,7 @@ inline bool ucIsWordChar(UChar32 c) {
|
||||
}
|
||||
|
||||
// don't allow "> in our input boxes
|
||||
int32_t cleanInput(char *outbuf, int32_t outbufSize, char *inbuf, int32_t inbufLen);
|
||||
|
||||
// like strcpy but return the length and always null terminates
|
||||
// dst should be of size maxDstLen + 1
|
||||
inline int32_t setstr ( char *dst,
|
||||
int32_t maxDstLen,
|
||||
char *src,
|
||||
int32_t srcLen ) {
|
||||
// get the proper length
|
||||
int32_t dstLen = srcLen;
|
||||
if ( srcLen > maxDstLen ) dstLen = maxDstLen;
|
||||
// copy the string
|
||||
gbmemcpy ( dst, src, dstLen );
|
||||
// NULL terminate
|
||||
dst[dstLen] = '\0';
|
||||
// return the proper length
|
||||
return dstLen;
|
||||
}
|
||||
int32_t cleanInput(char *outbuf, int32_t outbufSize, const char *inbuf, int32_t inbufLen);
|
||||
|
||||
//
|
||||
// these three functions replace the Msg.cpp/.h class
|
||||
|
4
hash.h
4
hash.h
@ -35,7 +35,7 @@ uint32_t hash32_cont ( char *s, char *slen,
|
||||
uint32_t startHash , int32_t *conti );
|
||||
uint64_t hash64n ( char *s, uint64_t startHash =0LL);
|
||||
uint64_t hash64 ( uint64_t h1,uint64_t h2);
|
||||
uint64_t hash64 ( char *s,int32_t len,uint64_t startHash=0);
|
||||
uint64_t hash64 ( const char *s,int32_t len,uint64_t startHash=0);
|
||||
uint64_t hash64_cont ( char *s,int32_t len,
|
||||
uint64_t startHash,int32_t *conti);
|
||||
uint64_t hash64b ( char *s, uint64_t startHash=0);
|
||||
@ -74,7 +74,7 @@ inline uint64_t hash64b ( char *s , uint64_t startHash ) {
|
||||
return h;
|
||||
}
|
||||
|
||||
inline uint64_t hash64 ( char *s, int32_t len,
|
||||
inline uint64_t hash64 ( const char *s, int32_t len,
|
||||
uint64_t startHash ) {
|
||||
uint64_t h = startHash;
|
||||
int32_t i = 0;
|
||||
|
Reference in New Issue
Block a user