fctypes: const + remove unused

Added const-quailifier to pointer parameters in fctypes.h and removed unused functions
too. const propagated down to hash.h and Entities.cpp but no further.
This commit is contained in:
Ivan Skytte Jørgensen
2015-12-07 19:50:31 +01:00
parent 08986fd341
commit d44cabfdb2
6 changed files with 133 additions and 530 deletions

@ -7,15 +7,15 @@
// JAB: const-ness for optimizer...
// don't call these, they're used internally
static bool initEntityTable();
static uint32_t getTextEntity ( char *s , int32_t len );
static uint32_t getDecimalEntity ( char *s , int32_t len );
static uint32_t getHexadecimalEntity ( char *s , int32_t len );
static uint32_t getTextEntity ( const char *s , int32_t len );
static uint32_t getDecimalEntity ( const char *s , int32_t len );
static uint32_t getHexadecimalEntity ( const char *s , int32_t len );
// . s[maxLen] should be the NULL
// . returns full length of entity @ "s" if there is a valid one, 0 otherwise
// . sets *c to the iso character the entity represents (if there is one)
// JAB: const-ness for optimizer...
int32_t getEntity_a ( char *s , int32_t maxLen , uint32_t *c ) {
int32_t getEntity_a ( const char *s , int32_t maxLen , uint32_t *c ) {
// ensure there's an & as first char
if ( s[0] != '&' ) return 0;
// compute maximum length of entity, if it's indeed an entity
@ -424,7 +424,7 @@ static bool initEntityTable(){
// . return the 32-bit unicode char it represents
// . returns 0 if none
// . JAB: const-ness for optimizer...
uint32_t getTextEntity ( char *s , int32_t len ) {
uint32_t getTextEntity ( const char *s , int32_t len ) {
if ( !initEntityTable()) return 0;
// take the ; off, if any
if ( s[len-1] == ';' ) len--;
@ -451,7 +451,7 @@ uint32_t getTextEntity ( char *s , int32_t len ) {
// . get a decimal encoded entity
// . s/len is the whol thing
// . JAB: const-ness for optimizer...
uint32_t getDecimalEntity ( char *s , int32_t len ) {
uint32_t getDecimalEntity ( const char *s , int32_t len ) {
// take the ; off, if any
if ( s[len-1] == ';' ) len--;
// . &#1 is smallest it can be
@ -504,7 +504,7 @@ uint32_t getDecimalEntity ( char *s , int32_t len ) {
// . get a hexadecimal encoded entity
// . JAB: const-ness for optimizer...
// . returns a UChar32
uint32_t getHexadecimalEntity ( char *s , int32_t len ) {
uint32_t getHexadecimalEntity ( const char *s , int32_t len ) {
// take the ; off, if any
if ( s[len-1] == ';' ) len--;
// . &#x1 is smallest it can be

@ -12,7 +12,7 @@
// call these two
// JAB: const-ness for the optimizer
int32_t getEntity_a ( char *s , int32_t maxLen , uint32_t *c );
int32_t getEntity_a ( const char *s , int32_t maxLen , uint32_t *c );
//int32_t getEntity_utf8 (char *s , int32_t maxLen , int32_t *d , int32_t *ds ) ;

@ -39,7 +39,7 @@ static int bytes_in_utf8_code[] = {
};
// how many bytes is char pointed to by p?
inline char getUtf8CharSize ( uint8_t *p ) {
inline char getUtf8CharSize ( const uint8_t *p ) {
uint8_t c = *p;
if(c<128)
return 1;
@ -62,7 +62,7 @@ inline char getUtf8CharSize ( uint8_t c ) {
return bytes_in_utf8_code[c];
}
inline char getUtf8CharSize2 ( uint8_t *p ) {
inline char getUtf8CharSize2 ( const uint8_t *p ) {
if ( ! (p[0] & 0x80) ) return 1;
if ( ! (p[0] & 0x20) ) return 2;
if ( ! (p[0] & 0x10) ) return 3;
@ -273,7 +273,7 @@ inline int32_t utf8Encode(UChar32 c, char* buf) {
}
// return the utf8 character at "p" as a 32-bit unicode character
inline UChar32 utf8Decode(char *p){//, char **next){
inline UChar32 utf8Decode(const char *p){
// single byte character
if (!(*p & 0x80)){
//*next = (char*) p + 1;

@ -15,28 +15,6 @@ bool isClockInSync() {
}
bool print96 ( char *k ) {
key_t *kp = (key_t *)k;
printf("n1=0x%"XINT32" n0=0x%"XINT64"\n",(int32_t)kp->n1,(int64_t)kp->n0);
return true;
}
bool print96 ( key_t *kp ) {
printf("n1=0x%"XINT32" n0=0x%"XINT64"\n",(int32_t)kp->n1,(int64_t)kp->n0);
return true;
}
bool print128 ( char *k ) {
key128_t *kp = (key128_t *)k;
printf("n1=0x%"XINT64" n0=0x%"XINT64"\n",(int64_t)kp->n1,(int64_t)kp->n0);
return true;
}
bool print128 ( key128_t *kp ) {
printf("n1=0x%"XINT64" n0=0x%"XINT64"\n",(int64_t)kp->n1,(int64_t)kp->n0);
return true;
}
// . put all the maps here now
// . convert "c" to lower case
const unsigned char g_map_to_lower[] = {
@ -530,7 +508,7 @@ const char g_map_is_vowel[] = {
0,0,0,0,0,0,0,0, // 240
0,0,0,0,0,0,0,0};
char *strncasestr( char *haystack, int32_t haylen, char *needle){
char *strncasestr( char *haystack, int32_t haylen, const char *needle){
int32_t matchLen = 0;
int32_t needleLen = gbstrlen(needle);
for (int32_t i = 0; i < haylen;i++){
@ -551,7 +529,7 @@ char *strncasestr( char *haystack, int32_t haylen, char *needle){
return NULL;
}
char *strnstr2( char *haystack, int32_t haylen, char *needle) {
char *strnstr2( char *haystack, int32_t haylen, const char *needle) {
int32_t matchLen = 0;
int32_t needleLen = gbstrlen(needle);
for (int32_t i = 0; i < haylen; ++i) {
@ -755,8 +733,8 @@ void hexToBin ( const char *src , int32_t srcLen , char *dst ) {
if ( src != srcEnd ) { char *xx=NULL;*xx=0; }
}
void binToHex ( unsigned char *src , int32_t srcLen , char *dst ) {
unsigned char *srcEnd = src + srcLen;
void binToHex ( const unsigned char *src , int32_t srcLen , char *dst ) {
const unsigned char *srcEnd = src + srcLen;
for ( ; src && src < srcEnd ; ) {
*dst++ = btoh(*src>>4);
*dst++ = btoh(*src&15);
@ -771,7 +749,7 @@ void binToHex ( unsigned char *src , int32_t srcLen , char *dst ) {
// . like strstr but haystack may not be NULL terminated
// . needle, however, IS null terminated
char *strncasestr ( char *haystack , char *needle , int32_t haystackSize ) {
char *strncasestr ( char *haystack , const char *needle , int32_t haystackSize ) {
int32_t needleSize = gbstrlen(needle);
int32_t n = haystackSize - needleSize ;
for ( int32_t i = 0 ; i <= n ; i++ ) {
@ -789,7 +767,7 @@ char *strncasestr ( char *haystack , char *needle , int32_t haystackSize ) {
// . like strstr but haystack may not be NULL terminated
// . needle, however, IS null terminated
char *strncasestr ( char *haystack , char *needle ,
char *strncasestr ( char *haystack , const char *needle ,
int32_t haystackSize, int32_t needleSize ) {
int32_t n = haystackSize - needleSize ;
for ( int32_t i = 0 ; i <= n ; i++ ) {
@ -821,7 +799,7 @@ char *strnstr ( char *haystack , char *needle , int32_t haystackSize ) {
}
// independent of case
char *gb_strcasestr ( char *haystack , char *needle ) {
char *gb_strcasestr ( char *haystack , const char *needle ) {
int32_t needleSize = gbstrlen(needle);
int32_t haystackSize = gbstrlen(haystack);
int32_t n = haystackSize - needleSize ;
@ -839,7 +817,7 @@ char *gb_strcasestr ( char *haystack , char *needle ) {
}
char *gb_strncasestr ( char *haystack , int32_t haystackSize , char *needle ) {
char *gb_strncasestr ( char *haystack , int32_t haystackSize , const char *needle ) {
// temp term
char c = haystack[haystackSize];
haystack[haystackSize] = '\0';
@ -852,34 +830,34 @@ char *gb_strncasestr ( char *haystack , int32_t haystackSize , char *needle ) {
// . store "t" into "s"
// . returns bytes stored into "s"
// . NULL terminates "s" if slen > 0
int32_t saftenTags ( char *s , int32_t slen , char *t , int32_t tlen ) {
char *start = s ;
int32_t saftenTags ( char *dst , int32_t dstlen , const char *src , int32_t srclen ) {
char *start = dst ;
// bail if slen is 0
if ( slen <= 0 ) return 0;
if ( dst <= 0 ) return 0;
// leave a char for the \0
char *send = s + slen - 1;
char *tend = t + tlen;
for ( ; t < tend && s + 4 < send ; t++ ) {
if ( *t == '<' ) {
*s++ = '&';
*s++ = 'l';
*s++ = 't';
*s++ = ';';
char *dstend = dst + dstlen - 1;
const char *srcend = src + srclen;
for ( ; src < srcend && dst + 4 < dstend ; src++ ) {
if ( *src == '<' ) {
*dst++ = '&';
*dst++ = 'l';
*dst++ = 't';
*dst++ = ';';
continue;
}
if ( *t == '>' ) {
*s++ = '&';
*s++ = 'g';
*s++ = 't';
*s++ = ';';
if ( *src == '>' ) {
*dst++ = '&';
*dst++ = 'g';
*dst++ = 't';
*dst++ = ';';
continue;
}
*s++ = *t;
*dst++ = *src;
}
// NULL terminate "s"
*s = '\0';
// NULL terminate "dst"
*dst = '\0';
// return # of bytes, excluding \0, stored into s
return s - start;
return dst - start;
}
// . if "doSpecial" is true, then we change &lt;, &gt; and &amp; to
@ -888,11 +866,11 @@ int32_t saftenTags ( char *s , int32_t slen , char *t , int32_t tlen ) {
// UnicodeData.txt:22E7;GREATER-THAN BUT NOT EQUIVALENT TO;Sm;0;ON;;;;;Y;
// UnicodeData.txt:E0026;TAG AMPERSAND;Cf;0;BN;;;;;N;;;;;
// UnicodeData.txt:235E;APL FUNCTIONAL SYMBOL QUOTE QUAD;So;0;L;;;;;N;;;;;
int32_t htmlDecode ( char *dst , char *src , int32_t srcLen , bool doSpecial ,
int32_t htmlDecode ( char *dst , const char *src , int32_t srcLen , bool doSpecial ,
int32_t niceness ) {
if ( srcLen == 0 ) return 0;
char *start = dst;
char *srcEnd = src + srcLen;
const char *srcEnd = src + srcLen;
for ( ; src < srcEnd ; ) {
// breathe
QUICKPOLL(niceness);
@ -995,44 +973,12 @@ int32_t htmlDecode ( char *dst , char *src , int32_t srcLen , bool doSpecial ,
return dst - start;
}
// cdata
int32_t cdataDecode ( char *dst , char *src , int32_t niceness ) {
if ( ! src ) return 0;
char *start = dst;
for ( ; *src ; ) {
// breathe
QUICKPOLL(niceness);
// utf8 support?
char size = getUtf8CharSize(src);
// see SafeBuf::cdataEncode() we do the opposite here
if ( src[0] != ']' ||
src[1] != ']' ||
src[2] != '&' ||
src[3] != 'g' ||
src[4] != 't' ) {
if ( size == 1 ) { *dst++ = *src++; continue; }
gbmemcpy ( dst , src , size );
src += size;
dst += size;
continue;
//*dst++ = *src++; continue; }
}
// make it ]]>
gbmemcpy ( dst , "]]>" , 3 );
src += 5;
dst += 3;
}
// NULL term
*dst = '\0';
return dst - start;
}
// . make something safe as an form input value by translating the quotes
// . store "t" into "s" and return bytes stored
// . does not do bounds checking
int32_t dequote ( char *s , char *send , char *t , int32_t tlen ) {
int32_t dequote ( char *s , char *send , const char *t , int32_t tlen ) {
char *start = s;
char *tend = t + tlen;
const char *tend = t + tlen;
for ( ; t < tend && s < send ; t++ ) {
if ( *t == '"' ) {
if ( s + 5 >= send ) return 0;
@ -1051,129 +997,71 @@ int32_t dequote ( char *s , char *send , char *t , int32_t tlen ) {
return s - start;
}
bool dequote ( SafeBuf* sb , char *t , int32_t tlen ) {
char *tend = t + tlen;
for ( ; t < tend; t++ ) {
if ( *t == '"' ) {
sb->safeMemcpy("&#34;", 5);
continue;
}
*sb += *t;
}
*sb += '\0';
return true;
}
//int32_t dequote ( char *s , char *t ) {
// return dequote ( s , t , gbstrlen ( t ) );
//}
// . entity-ize a string so it's safe for html output
// . store "t" into "s" and return bytes stored
// . does bounds checking
char *htmlEncode ( char *s , char *send , char *t , char *tend , bool pound ,
int32_t niceness ) {
for ( ; t < tend ; t++ ) {
char *htmlEncode ( char *dst, char *dstend, const char *src, const char *srcend,
bool pound, int32_t niceness ) {
for ( ; src < srcend ; src++ ) {
QUICKPOLL(niceness);
if ( s + 7 >= send ) { *s = '\0'; return s; }
if ( *t == '"' ) {
*s++ = '&';
*s++ = '#';
*s++ = '3';
*s++ = '4';
*s++ = ';';
if ( dst + 7 >= dstend ) { *dst = '\0'; return dst; }
if ( *src == '"' ) {
*dst++ = '&';
*dst++ = '#';
*dst++ = '3';
*dst++ = '4';
*dst++ = ';';
continue;
}
if ( *t == '<' ) {
*s++ = '&';
*s++ = 'l';
*s++ = 't';
*s++ = ';';
if ( *src == '<' ) {
*dst++ = '&';
*dst++ = 'l';
*dst++ = 't';
*dst++ = ';';
continue;
}
if ( *t == '>' ) {
*s++ = '&';
*s++ = 'g';
*s++ = 't';
*s++ = ';';
if ( *src == '>' ) {
*dst++ = '&';
*dst++ = 'g';
*dst++ = 't';
*dst++ = ';';
continue;
}
if ( *t == '&' ) {
*s++ = '&';
*s++ = 'a';
*s++ = 'm';
*s++ = 'p';
*s++ = ';';
if ( *src == '&' ) {
*dst++ = '&';
*dst++ = 'a';
*dst++ = 'm';
*dst++ = 'p';
*dst++ = ';';
continue;
}
if ( *t == '#' && pound ) {
*s++ = '&';
*s++ = '#';
*s++ = '0';
*s++ = '3';
*s++ = '5';
*s++ = ';';
if ( *src == '#' && pound ) {
*dst++ = '&';
*dst++ = '#';
*dst++ = '0';
*dst++ = '3';
*dst++ = '5';
*dst++ = ';';
continue;
}
*s++ = *t;
*dst++ = *src;
}
*s = '\0';
return s;
*dst = '\0';
return dst;
}
// . entity-ize a string so it's safe for html output
// . store "t" into "s" and return true on success
bool htmlEncode ( SafeBuf* s , char *t , char *tend , bool pound ,
int32_t niceness ) {
for ( ; t < tend ; t++ ) {
QUICKPOLL(niceness);
if ( *t == '"' ) {
s->safeMemcpy("&#34;", 5);
continue;
}
if ( *t == '<' ) {
s->safeMemcpy("&lt;", 4);
continue;
}
if ( *t == '>' ) {
s->safeMemcpy("&gt;", 4);
continue;
}
if ( *t == '&' ) {
s->safeMemcpy("&amp;", 5);
continue;
}
if ( *t == '#' && pound ) {
s->safeMemcpy("&#035;", 6);
continue;
}
// our own specially decoded entites!
if ( *t == '+' && t[1]=='!' && t[2]=='-' ) {
s->safeMemcpy("&lt;",4);
continue;
}
// our own specially decoded entites!
if ( *t == '-' && t[1]=='!' && t[2]=='+' ) {
s->safeMemcpy("&gt;",4);
continue;
}
*s += *t;
}
*s += '\0';
return true;
}
// . convert "-->%22 , &-->%26, +-->%2b, space-->+, ?-->%3f is that it?
// . convert so we can display as a cgi PARAMETER within a url
// . used by HttPage2 (cached web page) to encode the query into a url
// . used by PageRoot to do likewise
// . returns bytes written into "d" not including terminating \0
int32_t urlEncode ( char *d , int32_t dlen , char *s , int32_t slen, bool requestPath ) {
int32_t urlEncode ( char *d , int32_t dlen , const char *s , int32_t slen, bool requestPath ) {
char *dstart = d;
// subtract 1 to make room for a terminating \0
char *dend = d + dlen - 1;
char *send = s + slen;
const char *send = s + slen;
for ( ; s < send && d < dend ; s++ ) {
if ( *s == '\0' && requestPath ) {
*d++ = *s;
@ -1221,48 +1109,9 @@ int32_t urlEncode ( char *d , int32_t dlen , char *s , int32_t slen, bool reques
return d - dstart;
}
// determine the length of the encoded url, does NOT include NULL
int32_t urlEncodeLen ( char *s , int32_t slen , bool requestPath ) {
int32_t dLen = 0;
char *send = s + slen;
for ( ; s < send ; s++ ) {
if ( *s == '\0' && requestPath ) {
dLen++;
continue;
}
// encode if not fit for display
if ( ! is_ascii ( *s ) ) goto encode;
switch ( *s ) {
case ' ': goto encode;
case '&': goto encode;
case '"': goto encode;
case '+': goto encode;
case '%': goto encode;
case '#': goto encode;
// encoding < and > are more for displaying on an
// html page than sending to an http server
case '>': goto encode;
case '<': goto encode;
case '?': if ( requestPath ) break;
goto encode;
}
// otherwise, no need to encode
dLen++;
continue;
encode:
// space to +
if ( *s == ' ' ) { dLen++; continue; }
// hex code
dLen += 3; // %XX
}
//dLen++; // NULL TERM
// and return the length
return dLen;
}
// . decodes "s/slen" and stores into "dest"
// . returns the number of bytes stored into "dest"
int32_t urlDecode ( char *dest , char *s , int32_t slen ) {
int32_t urlDecode ( char *dest , const char *s , int32_t slen ) {
int32_t j = 0;
for ( int32_t i = 0 ; i < slen ; i++ ) {
if ( s[i] == '+' ) { dest[j++]=' '; continue; }
@ -1282,7 +1131,7 @@ int32_t urlDecode ( char *dest , char *s , int32_t slen ) {
}
int32_t urlDecodeNoZeroes ( char *dest , char *s , int32_t slen ) {
int32_t urlDecodeNoZeroes ( char *dest , const char *s , int32_t slen ) {
int32_t j = 0;
for ( int32_t i = 0 ; i < slen ; i++ ) {
if ( s[i] == '+' ) { dest[j++]=' '; continue; }
@ -1308,91 +1157,6 @@ int32_t urlDecodeNoZeroes ( char *dest , char *s , int32_t slen ) {
return j;
}
// . like above, but only decodes chars that should not have been encoded
// . will also encode binary chars
int32_t urlNormCode ( char *d , int32_t dlen , char *s , int32_t slen ) {
// save start of detination buffer for returning the length
char *dstart = d;
// subtract 1 for NULL termination
char *dend = d + dlen - 1;
char *send = s + slen;
for ( ; s < send && d < dend ; s++ ) {
// if its non-ascii, encode it so it displays correctly
if ( ! is_ascii ( *s ) ) {
// break if no room to encode it
if ( d + 2 >= dend ) break;
// store it encoded
*d++ = '%';
// store first hex digit
unsigned char v = ((unsigned char)*s)/16 ;
if ( v < 10 ) v += '0';
else v += 'A' - 10;
*d++ = v;
// store second hex digit
v = ((unsigned char)*s) & 0x0f ;
if ( v < 10 ) v += '0';
else v += 'A' - 10;
*d++ = v;
continue;
}
// store it
*d++ = *s;
// but it might be something encoded that should not have been
if ( *s != '%' ) continue;
// it requires to following chars to decode
if ( s + 2 >= send ) continue;
// if two chars after are not hex chars, it's not an encoding
if ( ! is_hex ( s[1] ) ) continue;
if ( ! is_hex ( s[2] ) ) continue;
// convert hex chars to values
unsigned char a = htob ( s[1] ) * 16;
unsigned char b = htob ( s[2] ) ;
unsigned char v = a + b;
// don't decode if it decodes in these chars
switch ( v ) {
case ' ': continue;
case '&': continue;
case '"': continue;
case '+': continue;
case '%': continue;
case '>': continue;
case '<': continue;
case '?': continue;
case '=': continue;
}
// otherwise, it's fine to decode it
d[-1] = (char) (a + b);
// skip over those 2 chars as well as leading '%'
s += 2;
}
// NULL terminate
*d = '\0';
// return length
return d - dstart ;
}
// approximate # of non-punct words
int32_t getNumWords ( char *s ) {
int32_t count = 0;
loop:
// skip punct
while ( ! is_alnum_a(*s) ) s++;
// bail if done
if ( !*s ) return count;
// count a word
count++;
// skip word
while ( is_alnum_a(*s) ) s++;
// watch for ' letter punct
if ( *s=='\'' && is_alnum_a(*(s+1)) && !is_alnum_a(*(s+2)) ) {
// skip apostrophe
s++;
// skip rest of word
while ( is_alnum_a(*s) ) s++;
}
goto loop;
}
static int64_t s_adjustment = 0;
int64_t globalToLocalTimeMilliseconds ( int64_t global ) {
@ -1428,7 +1192,7 @@ static char s_tafile[1024];
static bool s_hasFileName = false;
// returns false and sets g_errno on error
bool setTimeAdjustmentFilename ( char *dir, char *filename ) {
bool setTimeAdjustmentFilename ( const char *dir, const char *filename ) {
s_hasFileName = true;
int32_t len1 = gbstrlen(dir);
int32_t len2 = gbstrlen(filename);
@ -1935,7 +1699,7 @@ bool is_urlchar(char s) {
return false;
}
// don't allow "> in our input boxes
int32_t cleanInput(char *outbuf, int32_t outbufSize, char *inbuf, int32_t inbufLen){
int32_t cleanInput(char *outbuf, int32_t outbufSize, const char *inbuf, int32_t inbufLen){
char *p = outbuf;
int32_t numQuotes=0;
int32_t lastQuote = 0;
@ -2168,57 +1932,11 @@ bool deserializeMsg2 ( char **firstStrPtr , // ptr_url
return true;
}
// print it to stdout for debugging Dates.cpp
int32_t printTime ( time_t ttt ) {
//char *s = ctime(&ttt);
// print in UTC!
char *s = asctime ( gmtime(&ttt) );
// strip \n
s[gbstrlen(s)-1] = '\0';
fprintf(stderr,"%s UTC\n",s);
return 0;
}
// this uses our local timezone which is MST, so we need to tell
// it to use UTC somehow...
time_t mktime_utc ( struct tm *ttt ) {
time_t local = mktime ( ttt );
// bad?
if ( local < 0 ) return local;
/*
// sanity check
static char s_mm = 1;
static int32_t s_localOff;
if ( s_mm ) {
s_mm = 0;
struct tm ff;
ff.tm_mon = 0;
ff.tm_year = 70;
ff.tm_mday = 1;
ff.tm_hour = 0;
ff.tm_min = 0;
ff.tm_sec = 0;
int32_t qq = mktime ( &ff );
//fprintf(stderr,"qq=%"INT32"\n",qq);
// . set this then
// . we subtract s_localOff to further mktime() returns to
// get it into utc
s_localOff = qq;
// sanity
if ( s_localOff != timezone ) { char *xx=NULL;*xx=0; }
}
*/
// see what our timezone is!
//fprintf(stderr,"%"INT32"=tz\n",timezone);
// mod that
return local - timezone;
}
bool verifyUtf8 ( char *txt , int32_t tlen ) {
bool verifyUtf8 ( const char *txt , int32_t tlen ) {
if ( ! txt || tlen <= 0 ) return true;
char size;
char *p = txt;
char *pend = txt + tlen;
const char *p = txt;
const char *pend = txt + tlen;
for ( ; p < pend ; p += size ) {
size = getUtf8CharSize(p);
// skip if ascii
@ -2240,7 +1958,7 @@ bool verifyUtf8 ( char *txt , int32_t tlen ) {
return true;
}
bool verifyUtf8 ( char *txt ) {
bool verifyUtf8 ( const char *txt ) {
int32_t tlen = gbstrlen(txt);
return verifyUtf8(txt,tlen);
}

199
fctypes.h

@ -12,22 +12,13 @@
// the hash value as 32 bits in posdb
typedef uint32_t FacetValHash_t;
bool verifyUtf8 ( char *txt ) ;
bool verifyUtf8 ( char *txt , int32_t tlen ) ;
bool verifyUtf8 ( const char *txt ) ;
bool verifyUtf8 ( const char *txt , int32_t tlen ) ;
bool print96 ( char *k ) ;
bool print96 ( key_t *kp ) ;
bool print128 ( char *k ) ;
bool print128 ( key128_t *kp ) ;
// print it to stdout for debugging Dates.cpp
int32_t printTime ( int32_t ttt );
time_t mktime_utc ( struct tm *ttt ) ;
class SafeBuf;
// this too
char *strncasestr( char *haystack, int32_t haylen, char *needle);
char *strncasestr( char *haystack, int32_t haylen, const char *needle);
// just like sprintf(s,"%"UINT64"",n), but we insert commas
int32_t ulltoa ( char *s , uint64_t n ) ;
@ -36,47 +27,37 @@ int32_t ulltoa ( char *s , uint64_t n ) ;
// . store "t" into "s"
// . returns bytes stored into "s"
// . NULL terminates "s"
int32_t saftenTags ( char *s , int32_t slen , char *t , int32_t tlen ) ;
int32_t saftenTags ( char *dst , int32_t dstlen , const char *src , int32_t srclen ) ;
// . basically just converts "'s to &#34;'s
// . store "src" into "dest" and return bytes stored
// . does not do bounds checking in "dest"
// . used to encode things as form input variables, like query in HttpPage0.cpp
int32_t dequote ( char *dest , char *dend , char *src , int32_t srcLen ) ;
int32_t dequote ( char *dest , char *dend , const char *src , int32_t srcLen ) ;
// . entity-ize a string so it's safe for html output
// . converts "'s to &#34;'s, &'s to &amps; <'s the &lt; and >'s to &gt;
// . store "src" into "dest" and return bytes stored
// . does not do bounds checking on "dest"
// . encode t into s
char *htmlEncode ( char *s , char *send , char *t , char *tend ,
char *htmlEncode ( char *dst, char *dstend, const char *src, const char *srcend,
bool pound = false , int32_t niceness = 0) ;
bool htmlEncode ( SafeBuf* s , char *t , char *tend ,
bool pound = false , int32_t niceness = 0 );
// . like above but src is NULL terminated
// . returns length of string stored into "dest"
// . decode html entities like &amp; and &gt;
int32_t htmlDecode ( char *dst, char *src, int32_t srcLen,
int32_t htmlDecode ( char *dst, const char *src, int32_t srcLen,
bool doSpecial ,//=false);
int32_t niceness);
int32_t cdataDecode ( char *dst , char *src , int32_t niceness ) ;
// . convert " to %22 , & to %26, is that it?
// . urlEncode() stores the encoded, NULL-terminated URL in "dest"
// . requestPath leaves \0 and ? characters intact, for encoding requests
int32_t urlEncode ( char *dest , int32_t destLen , char *src , int32_t srcLen ,
int32_t urlEncode ( char *dest , int32_t destLen , const char *src , int32_t srcLen ,
bool requestPath = false ) ;
// determine the length of the encoded url, does NOT include NULL
int32_t urlEncodeLen ( char *s , int32_t slen , bool requestPath = false ) ;
// decode a url -- decode ALL %XX's
int32_t urlDecode ( char *dest , char *t , int32_t tlen ) ;
int32_t urlDecodeNoZeroes ( char *dest , char *t , int32_t tlen ) ;
// . normalize the encoding
// . like urlDecode() but only decodes chars that should not have been encoded
// . also, will encode characters that should have been encoded
int32_t urlNormCode ( char *dest , int32_t destLen , char *src , int32_t srcLen ) ;
int32_t urlDecode ( char *dest , const char *src , int32_t tlen ) ;
int32_t urlDecodeNoZeroes ( char *dest , const char *src , int32_t tlen ) ;
bool is_digit(unsigned char c) ;
@ -90,36 +71,21 @@ char btoh ( char s ) ;
// convert hex ascii string into binary
void hexToBin ( const char *src , int32_t srcLen , char *dst );
// convert binary number of size srcLen bytes into hex string in "dst"
void binToHex ( unsigned char *src , int32_t srcLen , char *dst );
void binToHex ( const unsigned char *src , int32_t srcLen , char *dst );
// the _a suffix denotes an ascii string
bool has_alpha_utf8(char *s, char *send ) ;
bool is_cap_utf8 (char *s,int32_t len) ;
bool is_cap_utf8 (const char *s,int32_t len) ;
// does it have at least one upper case character in it?
bool has_upper_a (char *s,int32_t len) ;
bool has_binary_a (char *s,int32_t len) ;
void to_lower3_a (char *s,int32_t len, char *buf) ;
void to_lower3_a (const char *s,int32_t len, char *buf) ;
void to_lower1 (char *s) ;
int32_t to_lower_alnum (char *s,int32_t len, char *buf) ;
int32_t to_lower_utf8 (char *dst , char *src ) ;
int32_t to_lower_utf8 (char *dst , char *dstEnd, char *src ) ;
int32_t to_lower_utf8 (char *dst , char *dstEnd, char *src, char *srcEnd) ;
int32_t to_lower_utf8_32 (char *src ) ;
int32_t to_cap_alnum (char *s,int32_t len, char *buf) ;
int32_t to_alnum (char *s,int32_t len, char *buf) ;
void to_upper3_a (char *s,int32_t len, char *buf) ;
void to_cap (char *s,int32_t len, char *buf) ;
// . approximate # of non-punct words
// . s must be NULL terminated
// . used by LinkInfo.cpp to weight link text based on # of words
int32_t getNumWords ( char *s );
// true if character should be stripped from the end/beginning of the title
// error! make a map of this
bool is_title_junk(char c) ;
int32_t to_lower_utf8 (char *dst , const char *src ) ;
int32_t to_lower_utf8 (char *dst , char *dstEnd, const char *src ) ;
int32_t to_lower_utf8 (char *dst , char *dstEnd, const char *src, const char *srcEnd) ;
void to_upper3_a (const char *s,int32_t len, char *buf) ;
// . get the # of words in this string
int32_t getNumWords ( char *s , int32_t len, int32_t titleVersion ) ;
@ -131,15 +97,15 @@ double atod2 ( char *s, int32_t len ) ;
bool atob ( const char *s, int32_t len ) ;
// like strstr but haystack need not be NULL terminated
char *strncasestr ( char *haystack , char *needle , int32_t haystackSize ) ;
char *strncasestr ( char *haystack , char *needle ,
char *strncasestr ( char *haystack , const char *needle , int32_t haystackSize ) ;
char *strncasestr ( char *haystack , const char *needle ,
int32_t haystackSize, int32_t needleSize ) ;
// independent of case
char *gb_strcasestr ( char *haystack , char *needle );
char *gb_strncasestr ( char *haystack , int32_t haystackSize , char *needle ) ;
char *gb_strcasestr ( char *haystack , const char *needle );
char *gb_strncasestr ( char *haystack , int32_t haystackSize , const char *needle ) ;
char *strnstr2( char *haystack, int32_t haylen, char *needle);
char *strnstr2( char *haystack, int32_t haylen, const char *needle);
// updates our static var, s_adjustment to keep our clock in sync to hostId #0
void settimeofdayInMillisecondsGlobal ( int64_t newTime ) ;
@ -193,7 +159,7 @@ extern int64_t g_adjustment;
bool isClockInSync();
bool setTimeAdjustmentFilename ( char *dir, char *filename ) ;
bool setTimeAdjustmentFilename ( const char *dir, const char *filename ) ;
bool loadTimeAdjustment ( ) ;
bool saveTimeAdjustment ( ) ;
@ -215,7 +181,7 @@ bool saveTimeAdjustment ( ) ;
#define is_hex(c) g_map_is_hex[(unsigned char)c]
#define is_tagname_char(c) g_map_is_tagname_char[(unsigned char)c]
inline bool is_upper_utf8 ( char *s );
inline bool is_upper_utf8 ( const char *s );
/*
// is character, "s", used in textual hexadecimal representation?
@ -241,90 +207,38 @@ inline char btoh ( char s ) {
return (s - 10) + 'a';
}
inline bool is_ascii2_a(char *s,int32_t len) {
inline bool is_ascii2_a(const char *s, int32_t len) {
for (int32_t i=0;i<len;i++)
if (!is_ascii(s[i]))
return false;
return true;
}
inline bool is_cap_utf8 (char *s,int32_t len) {
inline bool is_cap_utf8 (const char *s, int32_t len) {
if ( ! is_upper_utf8 ( s ) ) return false;
char *send = s + len;
const char *send = s + len;
for ( ; s < send ; s += getUtf8CharSize ( s ) )
if ( is_upper_utf8 ( s ) ) return false;
return true;
}
// does it have at least one upper case character in it?
inline bool has_upper_a (char *s,int32_t len) {
for (int32_t i=0;i<len;i++)
if ( is_upper_a(s[i]))
return true;
return false;
}
// does it have at least one binary character in it?
inline bool has_binary_a (char *s,int32_t len) {
for (int32_t i=0;i<len;i++)
if ( is_binary_a(s[i]))
return true;
return false;
}
inline void to_lower3_a(char *s,int32_t len, char *buf) {
inline void to_lower3_a(const char *s, int32_t len, char *buf) {
for (int32_t i=0;i<len ;i++)
buf[i]=to_lower_a((unsigned char)s[i]);
}
inline void to_lower1_a(char *s) {
for (int32_t i=0;s[i] ;i++)
s[i]=to_lower_a((unsigned char)s[i]);
}
inline int32_t to_lower_alnum_a(char *s,int32_t len, char *buf) {
int32_t j=0;
for (int32_t i=0;i<len ;i++)
if (is_alnum_a(s[i]))
buf[j++]=to_lower_a((unsigned char)s[i]);
return j;
}
inline int32_t to_cap_alnum_a(char *s,int32_t len, char *buf) {
buf[0] = to_upper_a(s[0]);
int32_t j=1;
for (int32_t i=1;i<len ;i++)
if (is_alnum_a(s[i]))
buf[j++]=to_lower_a((unsigned char)s[i]);
return j;
}
inline int32_t to_alnum_a(char *s,int32_t len, char *buf) {
int32_t j=0;
for (int32_t i=0;i<len ;i++)
if (is_alnum_a(s[i]))
buf[j++]=s[i];
return j;
}
inline void to_upper3_a(char *s,int32_t len, char *buf) {
inline void to_upper3_a(const char *s, int32_t len, char *buf) {
for (int32_t i=0;i<len;i++)
buf[i]=to_upper_a(s[i]);
}
inline void to_cap_a(char *s,int32_t len, char *buf) {
buf[0]=to_upper_a(s[0]);
for (int32_t i=1;i<len;i++)
buf[i]=to_lower_a(s[i]);
}
inline bool is_binary_utf8 ( char *p ) {
inline bool is_binary_utf8 ( const char *p ) {
if ( getUtf8CharSize((uint8_t *)p) != 1 ) return false;
// it is ascii, use that table now
return is_binary_a ( *p );
}
inline bool is_lower_utf8 ( char *src ) {
inline bool is_lower_utf8 ( const char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) return is_lower_a ( *src );
// convert to a code point
@ -333,7 +247,7 @@ inline bool is_lower_utf8 ( char *src ) {
return ucIsLower ( x );
}
inline bool is_upper_utf8 ( char *src ) {
inline bool is_upper_utf8 ( const char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) return is_upper_a ( *src );
// convert to a code point
@ -342,7 +256,7 @@ inline bool is_upper_utf8 ( char *src ) {
return ucIsUpper ( x );
}
inline bool is_alnum_utf8 ( char *src ) {
inline bool is_alnum_utf8 ( const char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) return is_alnum_a ( *src );
// convert to a code point
@ -351,7 +265,7 @@ inline bool is_alnum_utf8 ( char *src ) {
return ucIsAlnum ( x );
}
inline bool is_alnum_utf8 ( unsigned char *src ) {
inline bool is_alnum_utf8 ( const unsigned char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) return is_alnum_a ( *src );
// convert to a code point
@ -360,7 +274,7 @@ inline bool is_alnum_utf8 ( unsigned char *src ) {
return ucIsAlnum ( x );
}
inline bool is_alpha_utf8 ( char *src ) {
inline bool is_alpha_utf8 ( const char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) return is_alpha_a ( *src );
// convert to a code point
@ -369,7 +283,7 @@ inline bool is_alpha_utf8 ( char *src ) {
return ucIsAlpha ( x );
}
inline bool is_punct_utf8 ( char *src ) {
inline bool is_punct_utf8 ( const char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) return is_punct_a ( *src );
// convert to a code point
@ -379,7 +293,7 @@ inline bool is_punct_utf8 ( char *src ) {
else return true;
}
inline bool is_wspace_utf8 ( uint8_t *src ) {
inline bool is_wspace_utf8 ( const uint8_t *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) return is_wspace_a ( *src );
// convert to a code point
@ -388,7 +302,7 @@ inline bool is_wspace_utf8 ( uint8_t *src ) {
return is_wspace_uc ( x );
}
inline bool is_wspace_utf8 ( char *src ) {
inline bool is_wspace_utf8 ( const char *src ) {
// if in ascii do it quickly
if ( is_ascii3((uint8_t)*src) ) return is_wspace_a ( (uint8_t)*src );
// convert to a code point
@ -400,7 +314,7 @@ inline bool is_wspace_utf8 ( char *src ) {
// . returns bytes stored into "dst" from "src"
// . just do one character, which may be from 1 to 4 bytes
// . TODO: make a native utf8 to_lower to avoid converting to a code point
inline int32_t to_lower_utf8 ( char *dst , char *src ) {
inline int32_t to_lower_utf8 ( char *dst , const char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) { *dst = to_lower_a ( *src ); return 1; }
// convert to a code point
@ -411,18 +325,6 @@ inline int32_t to_lower_utf8 ( char *dst , char *src ) {
return utf8Encode ( y , dst );
}
// store answer in the int32_t and return that!
inline int32_t to_lower_utf8_32 ( char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) return (int32_t) to_lower_a ( *src );
// convert to a code point
UChar32 x = utf8Decode(src);
// covert to lower
UChar32 y = ucToLower ( x );
// give that back
return y;
}
inline int32_t to_upper_utf8 ( char *dst , char *src ) {
// if in ascii do it quickly
if ( is_ascii3(*src) ) { *dst = to_upper_a ( *src ); return 1; }
@ -434,7 +336,7 @@ inline int32_t to_upper_utf8 ( char *dst , char *src ) {
return utf8Encode ( y , dst );
}
inline int32_t to_lower_utf8 (char *dst, char *dstEnd, char *src, char *srcEnd ){
inline int32_t to_lower_utf8 (char *dst, char *dstEnd, const char *src, const char *srcEnd ){
char *dstart = dst;
for ( ; src < srcEnd ; src += getUtf8CharSize((uint8_t *)src) )
dst += to_lower_utf8 ( dst , src );
@ -442,7 +344,7 @@ inline int32_t to_lower_utf8 (char *dst, char *dstEnd, char *src, char *srcEnd )
return dst - dstart;
}
inline int32_t to_lower_utf8 (char *dst, char *dstEnd, char *src ){
inline int32_t to_lower_utf8 (char *dst, char *dstEnd, const char *src ){
char *dstart = dst;
for ( ; *src ; src += getUtf8CharSize((uint8_t *)src) )
dst += to_lower_utf8 ( dst , src );
@ -478,24 +380,7 @@ inline bool ucIsWordChar(UChar32 c) {
}
// don't allow "> in our input boxes
int32_t cleanInput(char *outbuf, int32_t outbufSize, char *inbuf, int32_t inbufLen);
// like strcpy but return the length and always null terminates
// dst should be of size maxDstLen + 1
inline int32_t setstr ( char *dst,
int32_t maxDstLen,
char *src,
int32_t srcLen ) {
// get the proper length
int32_t dstLen = srcLen;
if ( srcLen > maxDstLen ) dstLen = maxDstLen;
// copy the string
gbmemcpy ( dst, src, dstLen );
// NULL terminate
dst[dstLen] = '\0';
// return the proper length
return dstLen;
}
int32_t cleanInput(char *outbuf, int32_t outbufSize, const char *inbuf, int32_t inbufLen);
//
// these three functions replace the Msg.cpp/.h class

4
hash.h

@ -35,7 +35,7 @@ uint32_t hash32_cont ( char *s, char *slen,
uint32_t startHash , int32_t *conti );
uint64_t hash64n ( char *s, uint64_t startHash =0LL);
uint64_t hash64 ( uint64_t h1,uint64_t h2);
uint64_t hash64 ( char *s,int32_t len,uint64_t startHash=0);
uint64_t hash64 ( const char *s,int32_t len,uint64_t startHash=0);
uint64_t hash64_cont ( char *s,int32_t len,
uint64_t startHash,int32_t *conti);
uint64_t hash64b ( char *s, uint64_t startHash=0);
@ -74,7 +74,7 @@ inline uint64_t hash64b ( char *s , uint64_t startHash ) {
return h;
}
inline uint64_t hash64 ( char *s, int32_t len,
inline uint64_t hash64 ( const char *s, int32_t len,
uint64_t startHash ) {
uint64_t h = startHash;
int32_t i = 0;