Merge branch 'diffbot-testing' of github.com:gigablast/open-source-search-engine into diffbot-testing

Conflicts:
	Pages.cpp
	Parms.cpp
This commit is contained in:
Matt Wells 2015-01-29 09:48:38 -07:00
commit 2cb37042d2
201 changed files with 6802 additions and 1983 deletions

11
.gitignore vendored Normal file

@ -0,0 +1,11 @@
*.o
/.project
/coll.main.0/
/catdb/
/cachedb/
*.dat
*.txt
*.conf.saving
*.conf
/diskusage
*.cache

@ -1025,7 +1025,7 @@ bool Addresses::set ( Sections *sections ,
Place *name1 = &a->m_name1;
Place *street = &a->m_street;
// street name was name1
memcpy ( name1 , street , sizeof(Place) );
gbmemcpy ( name1 , street , sizeof(Place) );
// and set the street to what it should be
street->m_str = ma->m_street.m_str;
street->m_strlen = ma->m_street.m_strlen;
@ -1284,7 +1284,7 @@ bool Addresses::updateAddresses ( ) {
if ( ! street ) return false;
a->m_street = street;
// street name was name1
//memcpy ( name1 , street , sizeof(Place) );
//gbmemcpy ( name1 , street , sizeof(Place) );
// and set the street to what it should be
street->m_str = sp;
street->m_strlen = spend - sp;
@ -3418,27 +3418,29 @@ bool Addresses::setGeocoderLatLons ( void *state,
//char *start = p;
// request needs street,state,city (and zip if there)
p += sprintf(p,"addr%"INT32"=",num++);
memcpy(p,aa->m_street->m_str,aa->m_street->m_strlen);
gbmemcpy(p,aa->m_street->m_str,aa->m_street->m_strlen);
p += aa->m_street->m_strlen;
*p++ = ',';
*p++ = ' ';
if ( aa->m_city ) {
memcpy(p,aa->m_city->m_str,aa->m_city->m_strlen);
gbmemcpy(p,aa->m_city->m_str,aa->m_city->m_strlen);
p += aa->m_city->m_strlen;
}
else if ( aa->m_zip ) {
int32_t clen = strlen(aa->m_zip->m_cityStr);
memcpy(p,aa->m_zip->m_cityStr,clen);
gbmemcpy(p,aa->m_zip->m_cityStr,clen);
p += clen;
}
else if ( aa->m_flags3 & AF2_LATLON );
else { char *xx=NULL; *xx=0; }
*p++ = ' ';
// get state abbr
if ( aa->m_adm1 )
memcpy(p,aa->m_adm1->m_adm1,2);
else if ( aa->m_zip )
memcpy(p,aa->m_zip->m_adm1,2);
if ( aa->m_adm1 ) {
gbmemcpy(p,aa->m_adm1->m_adm1,2);
}
else if ( aa->m_zip ) {
gbmemcpy(p,aa->m_zip->m_adm1,2);
}
else if ( aa->m_flags3 & AF2_LATLON );
else { char *xx=NULL;*xx=0; }
p += 2;
@ -3446,7 +3448,7 @@ bool Addresses::setGeocoderLatLons ( void *state,
if ( aa->m_zip ) {
*p++ = ' ';
int32_t zlen = aa->m_zip->m_strlen;
memcpy(p,aa->m_zip->m_str,zlen);
gbmemcpy(p,aa->m_zip->m_str,zlen);
p += zlen;
}
*p++ = '&';
@ -5713,7 +5715,7 @@ bool Addresses::set2 ( ) {
Place *p = (Place *)m_pm.getMem(sizeof(Place));
if ( ! p ) return false;
// ok, good to add
memcpy ( p , pc , sizeof(Place) );
gbmemcpy ( p , pc , sizeof(Place) );
// set PLF_FROMTITLE bit
if ( inTitle ) p->m_bits |= PLF_FROMTITLE;
// if last word was in,set this
@ -5725,7 +5727,7 @@ bool Addresses::set2 ( ) {
Place *p = (Place *)m_pm.getMem(sizeof(Place));
if ( ! p ) return false;
// ok, good to add
memcpy ( p , ps , sizeof(Place) );
gbmemcpy ( p , ps , sizeof(Place) );
// set PLF_FROMTITLE bit
if ( inTitle ) p->m_bits |= PLF_FROMTITLE;
// if last word was in,set this
@ -11507,7 +11509,7 @@ void setFromStr2 ( char *addr ,
}
// copy into our static buffer
memcpy ( s_addr , addr , len+1 );
gbmemcpy ( s_addr , addr , len+1 );
// parse it in our static buffer so we do not destroy it
char *p = s_addr;
@ -12288,7 +12290,7 @@ int32_t memcpy2 ( char *dst , char *src , int32_t bytes , bool filterCommas ,
// everything else
if( cs == 1 ) { *dst++ = *src; continue; }
// otherwise characters is > 1 byte
memcpy ( dst , src , cs );
gbmemcpy ( dst , src , cs );
dst += cs;
}
// return bytes written
@ -12441,7 +12443,7 @@ int32_t Address::serialize ( char *buf , int32_t bufSize , char *origUrl ,
// append the adm1 code
//if ( d->m_adm1[0] ) {
// *p++ = '(';
// memcpy(p,d->m_adm1,2);
// gbmemcpy(p,d->m_adm1,2);
// p += 2;
// *p++ = ')';
//}
@ -12458,7 +12460,7 @@ int32_t Address::serialize ( char *buf , int32_t bufSize , char *origUrl ,
int32_t slen = gbstrlen(str);
// limit to 64 since that is getStoredSize() number
if ( slen > 64 ) slen = 64;
memcpy ( p , str ,slen );
gbmemcpy ( p , str ,slen );
p += slen;
}
}
@ -12479,7 +12481,7 @@ int32_t Address::serialize ( char *buf , int32_t bufSize , char *origUrl ,
// append the adm1 code
//if ( d->m_adm1[0] ) {
// *p++ = '(';
// memcpy(p,d->m_adm1,2);
// gbmemcpy(p,d->m_adm1,2);
// p += 2;
// *p++ = ')';
//}
@ -12495,7 +12497,7 @@ int32_t Address::serialize ( char *buf , int32_t bufSize , char *origUrl ,
else if ( m_flags3 & AF2_LATLON ) {
// this is the nearest city's state based on our lat/lon
if ( pd && pd->m_adm1[0] && pd->m_adm1[1] ) {
memcpy ( p , pd->m_adm1 ,2 );
gbmemcpy ( p , pd->m_adm1 ,2 );
p += 2;
}
}
@ -12513,7 +12515,7 @@ int32_t Address::serialize ( char *buf , int32_t bufSize , char *origUrl ,
// append the adm1 code
//if ( d->m_adm1[0] ) {
// *p++ = '(';
// memcpy(p,d->m_adm1,2);
// gbmemcpy(p,d->m_adm1,2);
// p += 2;
// *p++ = ')';
//}
@ -12523,13 +12525,13 @@ int32_t Address::serialize ( char *buf , int32_t bufSize , char *origUrl ,
// use country code from "crid"
//char *cn = (char *)g_countryCode.getAbbr(m_adm1->m_crid-1);
//if ( cn ) {
// memcpy(p,cn,gbstrlen(cn));
// gbmemcpy(p,cn,gbstrlen(cn));
// p += gbstrlen(cn);
//}
if ( m_flags3 & AF2_LATLON ) {
if ( pd && pd->m_crid ) {
char *cc = getCountryCode(pd->m_crid);
memcpy ( p , cc , 2 );
gbmemcpy ( p , cc , 2 );
p += 2;
}
}
@ -15783,7 +15785,7 @@ bool addIndicator ( int64_t h , char bit , float indScore ) {
// set bit, should only be one
id.m_bit = bit;
id.m_indScore = indScore;
// add it. should memcpy "pd"
// add it. should gbmemcpy "pd"
return g_indicators.addKey ( &h , &id ) ;
}
@ -16258,7 +16260,7 @@ bool Msg2c::launchRequests ( ) {
*(char *)p = isName ; p += 1;
// collection
//int32_t collSize = gbstrlen(m_coll) + 1;
//memcpy ( p , m_coll , collSize );
//gbmemcpy ( p , m_coll , collSize );
//p += collSize;
*(collnum_t *)p = m_collnum;
p += sizeof(collnum_t);
@ -16853,7 +16855,7 @@ void gotList2c ( void *state , RdbList *xxx , Msg5 *yyy ) {
// get length
int32_t len = gbstrlen(str);
// store in reply buf, include \0
memcpy ( rptr , str , len + 1 );
gbmemcpy ( rptr , str , len + 1 );
// skip over
rptr += len + 1;
// sanity check
@ -17009,7 +17011,7 @@ void sendBackAddress ( State2c *st ) {
// how much to copy, include \0
int32_t bytes = wlen + 1;
// copy over all but lat and lon if there, includes last ';'
memcpy ( p , winner , bytes ); p += bytes;
gbmemcpy ( p , winner , bytes ); p += bytes;
// how big is reply?
int32_t replySize = p - reply;
// sanity check
@ -18267,7 +18269,7 @@ bool getIPLocation ( int32_t ip ,
if ( ctry ) *ctry = p;
//len = gbstrlen(gir->country_code);
//memcpy ( p , gir->country_code , len + 1 );
//gbmemcpy ( p , gir->country_code , len + 1 );
p[0] = gir->country_code[0];
p[1] = gir->country_code[1];
p += 2;
@ -18278,7 +18280,7 @@ bool getIPLocation ( int32_t ip ,
if ( gir->region ) len = gbstrlen(gir->region);
// bogus?
if ( len == 0 ) return false;
//memcpy ( p , gir->region , len + 1 );
//gbmemcpy ( p , gir->region , len + 1 );
// make it all lowercase so we don't core anywhere
int32_t written = to_lower_alnum_a(gir->region,len,p);
// sanity
@ -18296,7 +18298,7 @@ bool getIPLocation ( int32_t ip ,
if ( gir->city ) len = gbstrlen(gir->city);
// bogus?
if ( len == 0 ) return false;
memcpy ( p , gir->city , len );
gbmemcpy ( p , gir->city , len );
p += len;
*p++ = '\0';
@ -19323,19 +19325,19 @@ bool getLatLonFromUserInput ( float *radius,
// mark it
gotStuff = true;
// use gbstate:
memcpy ( p , "gbeventstatecode:", 17 );
gbmemcpy ( p , "gbeventstatecode:", 17 );
p += 17;
// special treatment. a state abbr is always 2 chars
memcpy ( p , finalStateDesc->m_adm1 , 2 );
gbmemcpy ( p , finalStateDesc->m_adm1 , 2 );
p += 2;
// store the country as well for that state whether
// it was entered or not! because some states are
// reduced to their numeric code like "08" and
// many countries have that same code!
char *cc = getCountryCode(finalStateDesc->m_crid);
memcpy ( p , " gbeventcountrycode:", 20 );
gbmemcpy ( p , " gbeventcountrycode:", 20 );
p += 20;
memcpy ( p , cc , 2 );
gbmemcpy ( p , cc , 2 );
p += 2;
// also set the timezone
*timeZone2 = finalStateDesc->m_timeZoneOffset;
@ -19371,9 +19373,9 @@ bool getLatLonFromUserInput ( float *radius,
gotStuff = true;
// special treatment. a country abbr is always 2 chars
char *cc = getCountryCode(finalCountryDesc->m_crid);
memcpy ( p , "gbeventcountrycode:", 19 );
gbmemcpy ( p , "gbeventcountrycode:", 19 );
p += 19;
memcpy ( p , cc , 2 );
gbmemcpy ( p , cc , 2 );
p += 2;
ignoreUntil = finalCountryB;
continue;
@ -19392,11 +19394,11 @@ bool getLatLonFromUserInput ( float *radius,
// mark it
gotStuff = true;
// field header
memcpy ( p , "gbwhere:", 8 );
gbmemcpy ( p , "gbwhere:", 8 );
// advance
p += 8;
// otherwise store into buffer as is
memcpy ( p , wptrs[i] , wlens[i] );
gbmemcpy ( p , wptrs[i] , wlens[i] );
// advance ptr cursor
p += wlens[i];
}
@ -19555,7 +19557,7 @@ void *PlaceMem::getMem ( int32_t need ) {
// to be safe to avoid bad mem writes
m_placePtrs[i] = NULL;
}
//memcpy ( newPtrs, m_placePtrs , m_numPlacePtrs*4);
//gbmemcpy ( newPtrs, m_placePtrs , m_numPlacePtrs*4);
mfree ( m_placePtrs , oldSize , "pptbl");
m_placePtrs = newPtrs;
m_numPlacePtrsAllocated = newAlloc;
@ -19610,7 +19612,7 @@ void *PlaceMem::getMem ( int32_t need ) {
newAlloc = m_initNumPoolPtrs;
char **newPtrs = (char **)mmalloc(newAlloc*4,"pptbl2");
if ( ! newPtrs ) return NULL;
memcpy ( newPtrs , m_poolPtrs , m_numPoolsAllocated*4 );
gbmemcpy ( newPtrs , m_poolPtrs , m_numPoolsAllocated*4 );
mfree ( m_poolPtrs , oldSize , "pptbl2");
m_poolPtrs = newPtrs;
m_numPoolPtrsAllocated = newAlloc;

@ -927,7 +927,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
removeIp(ip);
char *beginning;
char ipbuf[64];//gotta NULL terminate for strstr
memcpy(ipbuf, clear, clearLen);
gbmemcpy(ipbuf, clear, clearLen);
ipbuf[clearLen] = '\0';
beginning = findToken(g_conf.m_banIps, ipbuf,
clearLen);
@ -955,7 +955,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
if(ip) {
char *beginning;
char ipbuf[64];//gotta NULL terminate for strstr
memcpy(ipbuf, allow, allowLen);
gbmemcpy(ipbuf, allow, allowLen);
ipbuf[allowLen] = '\0';
beginning = findToken(g_conf.m_allowIps, ipbuf,
allowLen);
@ -966,7 +966,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
if(p - g_conf.m_allowIps + allowLen + 2
< AUTOBAN_TEXT_SIZE) {
*p++ = '\n';
memcpy(p, ipbuf,allowLen);
gbmemcpy(p, ipbuf,allowLen);
*(p + allowLen) = '\0';
}
else {
@ -1005,7 +1005,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
if(ip) {
char *beginning;
char ipbuf[64];//gotta NULL terminate for strstr
memcpy(ipbuf, deny, denyLen);
gbmemcpy(ipbuf, deny, denyLen);
ipbuf[denyLen] = '\0';
beginning = findToken(g_conf.m_banIps, ipbuf, denyLen);
if(!beginning) {
@ -1015,7 +1015,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
if(p - g_conf.m_banIps + denyLen + 2 <
AUTOBAN_TEXT_SIZE) {
*p++ = '\n';
memcpy(p, ipbuf,denyLen);
gbmemcpy(p, ipbuf,denyLen);
*(p + denyLen) = '\0';
}
else {
@ -1063,7 +1063,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
validCodesLen = 0;
}
else {
memcpy(g_conf.m_validCodes, validCodes, validCodesLen);
gbmemcpy(g_conf.m_validCodes, validCodes, validCodesLen);
g_conf.m_validCodes[validCodesLen] = '\0';
trimWhite(g_conf.m_validCodes);
setCodesFromConf();
@ -1097,7 +1097,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
removeIp(m_detectKeys[i]);
}
}
memcpy(g_conf.m_banIps, banIps, banIpsLen);
gbmemcpy(g_conf.m_banIps, banIps, banIpsLen);
g_conf.m_banIps[banIpsLen] = '\0';
changed = true;
}
@ -1121,7 +1121,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
removeIp(m_detectKeys[i]);
}
}
memcpy(g_conf.m_allowIps, allowIps, allowIpsLen);
gbmemcpy(g_conf.m_allowIps, allowIps, allowIpsLen);
g_conf.m_allowIps[allowIpsLen] = '\0';
changed = true;
}

@ -966,7 +966,7 @@ void *readwriteWrapper_r ( void *state , ThreadEntry *t ) {
// save this shit on the stack in case fstate gets pull from under us
FileState tmp;
memcpy ( &tmp , orig , sizeof(FileState ));
gbmemcpy ( &tmp , orig , sizeof(FileState ));
FileState *fstate = &tmp;
// lead Threads::bailOnReads() know we can be bailed on now since

@ -523,7 +523,7 @@ void Blaster::gotDoc1( void *state, TcpSocket *s){
// null character there. So as a precaution, just allocating the
// max buf size.
st->m_buf1=(char*) mcalloc(s->m_readBufSize,"Blaster5");
memcpy(st->m_buf1,s->m_readBuf,s->m_readOffset);
gbmemcpy(st->m_buf1,s->m_readBuf,s->m_readOffset);
//st->m_buf1=(char*) mdup(s->m_readBuf,s->m_readOffset,"Blaster5");
st->m_buf1Len=s->m_readOffset;
st->m_buf1MaxLen=s->m_readBufSize;

@ -55,7 +55,7 @@ bool CatRec::set ( Url *url , char *data , int32_t dataSize , bool gotByIp ) {
// assume url does not have a rec in tagdb
m_hadRec = false;
// set our collection
//if ( coll ) memcpy ( m_coll , coll , collLen );
//if ( coll ) gbmemcpy ( m_coll , coll , collLen );
//m_collLen = collLen;
// . if "data" is i guess the rec did not exist... so make a dummy rec
// . MDW: why?
@ -80,7 +80,7 @@ bool CatRec::set ( Url *url , char *data , int32_t dataSize , bool gotByIp ) {
return false;
}
// copy the raw data
memcpy(m_data, data, dataSize);
gbmemcpy(m_data, data, dataSize);
m_dataSize = dataSize;
// set up a parsing ptr into "data"
//char *p = data;
@ -304,11 +304,11 @@ bool CatRec::set ( Url *site ,
//if (catids) {
//if (rdbId == RDB_CATDB) {
// add the count
memcpy(p, &m_numCatids, 1);
gbmemcpy(p, &m_numCatids, 1);
p++;
// add the ids
m_catids = (int32_t*)p;
memcpy(p, catids, 4*m_numCatids);
gbmemcpy(p, catids, 4*m_numCatids);
// skip over "numCatids" NOT m_numCatids which is TRUNCATED
// to MAX_CATIDS
p += 4*numCatids;
@ -318,13 +318,13 @@ bool CatRec::set ( Url *site ,
// store the filenum (3 bytes)
//*(int32_t *) p = filenum ; p += 4;
//int32_t filenum = 0; // make this 0 for catdb rec: MDW
memcpy(p, &filenum, 3); p += 3;
gbmemcpy(p, &filenum, 3); p += 3;
// store the version (1 byte)
*p = m_version; p++;
// the site
m_url = p;
m_urlLen = site->getUrlLen();
memcpy ( p , site->getUrl() , site->getUrlLen() );
gbmemcpy ( p , site->getUrl() , site->getUrlLen() );
p += site->getUrlLen();
// NULL terminate the site
if ( m_version >= 1 ) {
@ -335,7 +335,7 @@ bool CatRec::set ( Url *site ,
if ( m_version >= 2 && rdbId != RDB_CATDB ) {
// time stamp
m_timeStamp = timeStamp;
memcpy(p, &timeStamp, 4);
gbmemcpy(p, &timeStamp, 4);
p += 4;
// comment
m_comment = p;
@ -424,7 +424,7 @@ bool CatRec::set ( Url *site ,
// steal ip from "site"
m_site.setIp ( site->getIp() );
// save the collection into m_coll
//memcpy ( m_coll , coll , collLen );
//gbmemcpy ( m_coll , coll , collLen );
//m_collLen = collLen;
// save the fileNum as well
//m_filenum = filenum;
@ -509,7 +509,7 @@ void CatRec::setIndirectCatids ( int32_t *indCatids, int32_t numIndCatids ) {
if ( m_numIndCatids > MAX_IND_CATIDS )
m_numIndCatids = MAX_IND_CATIDS;
// store the ids
memcpy ( m_indCatids, indCatids, m_numIndCatids*4 );
gbmemcpy ( m_indCatids, indCatids, m_numIndCatids*4 );
}
/*
@ -792,7 +792,7 @@ uint32_t CatRec::getScoreForType(uint8_t type) {
void CatRec::setFilenum ( int32_t filenum ) {
m_filenum = filenum;
// gotta update the m_data[] buffer too!
memcpy(m_filenumPtr, &filenum, 3);
gbmemcpy(m_filenumPtr, &filenum, 3);
}
void CatRec::addSiteType ( uint8_t type, uint32_t score ) {
@ -817,11 +817,11 @@ void CatRec::addSiteType ( uint8_t type, uint32_t score ) {
// how much to shift down
int32_t toShift = m_data + m_dataSize - p;
// shift it
memcpy ( p + totalSize , p , toShift );
gbmemcpy ( p + totalSize , p , toShift );
// store new type
*(uint8_t *)p = type; p++;
// store new score
memcpy ( p , &score , scoreSize );
gbmemcpy ( p , &score , scoreSize );
// inc data size
m_dataSize += totalSize;
// inc this guy

@ -200,7 +200,7 @@ bool Catdb::verify ( char *coll ) {
void Catdb::normalizeUrl ( Url *srcUrl, Url *dstUrl ) {
char urlStr[MAX_URL_LEN];
int32_t urlStrLen = srcUrl->getUrlLen();
memcpy(urlStr, srcUrl->getUrl(), urlStrLen);
gbmemcpy(urlStr, srcUrl->getUrl(), urlStrLen);
// fix the url
urlStrLen = g_categories->fixUrl(urlStr, urlStrLen);
// create the normalized url
@ -477,7 +477,7 @@ int32_t Catdb::getIndirectMatches ( RdbList *list ,
Url partialUrl;
key_t partialUrlKey;
// start with the whole url...include real catid in indirect
memcpy(path, url->getUrl(), url->getUrlLen());
gbmemcpy(path, url->getUrl(), url->getUrlLen());
pathLen = url->getUrlLen();
// loop looking for partial matches
char *data = NULL;
@ -516,10 +516,10 @@ int32_t Catdb::getIndirectMatches ( RdbList *list ,
//char msg[4096];
//char *mp = msg;
//mp += sprintf(mp, "For ");
//memcpy(mp, url->getUrl(), url->getUrlLen());
//gbmemcpy(mp, url->getUrl(), url->getUrlLen());
//mp += url->getUrlLen();
//mp += sprintf(mp, " , got Indirect: ");
//memcpy(mp, x, xlen);
//gbmemcpy(mp, x, xlen);
//mp += xlen;
//*mp = '\0';
//log ( LOG_INFO, "tagdb: %s", msg );

@ -124,25 +124,25 @@ int32_t Categories::loadCategories ( char *filename ) {
return 1;
}
char *p = tempBuffer;
memcpy ( m_nameBuffer, p, m_nameBufferSize );
gbmemcpy ( m_nameBuffer, p, m_nameBufferSize );
p += m_nameBufferSize;
// read and fill the cats
for (int32_t i = 0; i < m_numCats; i++) {
memcpy(&m_cats[i].m_catid, p, sizeof(int32_t));
gbmemcpy(&m_cats[i].m_catid, p, sizeof(int32_t));
p += sizeof(int32_t);
memcpy(&m_cats[i].m_parentid, p, sizeof(int32_t));
gbmemcpy(&m_cats[i].m_parentid, p, sizeof(int32_t));
p += sizeof(int32_t);
memcpy(&m_cats[i].m_nameOffset, p, sizeof(int32_t));
gbmemcpy(&m_cats[i].m_nameOffset, p, sizeof(int32_t));
p += sizeof(int32_t);
memcpy(&m_cats[i].m_nameLen, p, sizeof(int16_t));
gbmemcpy(&m_cats[i].m_nameLen, p, sizeof(int16_t));
p += sizeof(int16_t);
memcpy(&m_cats[i].m_structureOffset, p, sizeof(int32_t));
gbmemcpy(&m_cats[i].m_structureOffset, p, sizeof(int32_t));
p += sizeof(int32_t);
memcpy(&m_cats[i].m_contentOffset, p, sizeof(int32_t));
gbmemcpy(&m_cats[i].m_contentOffset, p, sizeof(int32_t));
p += sizeof(int32_t);
memcpy(&m_cats[i].m_numUrls, p, sizeof(int32_t));
gbmemcpy(&m_cats[i].m_numUrls, p, sizeof(int32_t));
p += sizeof(int32_t);
/*
@ -205,7 +205,7 @@ int32_t Categories::loadCategories ( char *filename ) {
}
*/
memcpy(&m_catHash[i].m_hash, p, sizeof(int32_t));
gbmemcpy(&m_catHash[i].m_hash, p, sizeof(int32_t));
p += sizeof(int32_t);
// assign the index
@ -951,13 +951,13 @@ nextTag:
/*
urlStrLen = htmlDecode(decodedUrl, urlStr, urlStrLen,false,
niceness);
memcpy(urlStr, decodedUrl, urlStrLen);
gbmemcpy(urlStr, decodedUrl, urlStrLen);
normUrl.set(urlStr, urlStrLen, true);
g_catdb.normalizeUrl(&normUrl, &normUrl);
// copy it back
urlStrLen = normUrl.getUrlLen();
memcpy(urlStr, normUrl.getUrl(), urlStrLen);
gbmemcpy(urlStr, normUrl.getUrl(), urlStrLen);
// make sure there's a trailing / on root urls
// and no www.
//urlStrLen = fixUrl(urlStr, urlStrLen);
@ -1011,7 +1011,7 @@ nextTag:
if (urlAnchor) {
if (urlAnchorLen > maxAnchorLen)
urlAnchorLen = maxAnchorLen;
memcpy(anchor, urlAnchor, urlAnchorLen);
gbmemcpy(anchor, urlAnchor, urlAnchorLen);
*anchorLen = urlAnchorLen;
}
else
@ -1062,11 +1062,11 @@ bool Categories::getTitleAndSummary ( char *urlOrig,
char* p;
uint32_t readSize;
// fix the original url
//memcpy(url, urlOrig, urlOrigLen);
//gbmemcpy(url, urlOrig, urlOrigLen);
//urlLen = fixUrl(url, urlOrigLen);
normUrl.set(urlOrig, urlOrigLen, true);
g_catdb.normalizeUrl(&normUrl, &normUrl);
memcpy(url, normUrl.getUrl(), normUrl.getUrlLen());
gbmemcpy(url, normUrl.getUrl(), normUrl.getUrlLen());
urlLen = normUrl.getUrlLen();
// lookup the index for this catid
catIndex = getIndexFromId(catid);
@ -1146,7 +1146,7 @@ nextTag:
// html decode the url
urlStrLen = htmlDecode(decodedUrl, urlStr, urlStrLen,false,
niceness);
memcpy(urlStr, decodedUrl, urlStrLen);
gbmemcpy(urlStr, decodedUrl, urlStrLen);
// normalize with Url
//normUrl.set(urlStr, urlStrLen, false, false, false, true);
normUrl.set(urlStr, urlStrLen, true);
@ -1163,7 +1163,7 @@ nextTag:
}
// copy it back
urlStrLen = normUrl.getUrlLen();
memcpy(urlStr, normUrl.getUrl(), urlStrLen);
gbmemcpy(urlStr, normUrl.getUrl(), urlStrLen);
// make sure there's a trailing / on root urls
// and no www.
//urlStrLen = fixUrl(urlStr, urlStrLen);
@ -1208,7 +1208,7 @@ foundTag:
if (urlAnchor) {
if (urlAnchorLen > maxAnchorLen)
urlAnchorLen = maxAnchorLen;
memcpy(anchor, urlAnchor, urlAnchorLen);
gbmemcpy(anchor, urlAnchor, urlAnchorLen);
*anchorLen = urlAnchorLen;
}
else
@ -1357,7 +1357,7 @@ nextTag:
catStrLen ,
false,
0);
memcpy(catStr, htmlDecoded, catStrLen);
gbmemcpy(catStr, htmlDecoded, catStrLen);
// reset this offset
nameStart = 0;
nameLen = catStrLen;
@ -1432,10 +1432,10 @@ nextTag:
cat->m_nameLen = nameLen;
cat->m_type = currType;
p = cat->m_buf;
memcpy ( p , catStr + prefixStart , prefixLen );
gbmemcpy ( p , catStr + prefixStart , prefixLen );
p += prefixLen;
*p++ = '\0';
memcpy ( p , catStr + nameStart , nameLen );
gbmemcpy ( p , catStr + nameStart , nameLen );
p += nameLen;
*p++ = '\0';
@ -1446,13 +1446,13 @@ nextTag:
subCats[numSubCats].m_prefixOffset = catp;
subCats[numSubCats].m_prefixLen = prefixLen;
if (prefixLen > 0) {
memcpy(&((*catBuffer)[catp]), &catStr[prefixStart], prefixLen);
gbmemcpy(&((*catBuffer)[catp]), &catStr[prefixStart], prefixLen);
catp += prefixLen;
}
subCats[numSubCats].m_nameOffset = catBuf->length();//catp;
subCats[numSubCats].m_nameLen = nameLen;
if (nameLen > 0) {
memcpy(&((*catBuffer)[catp]), &catStr[nameStart], nameLen);
gbmemcpy(&((*catBuffer)[catp]), &catStr[nameStart], nameLen);
catp += nameLen;
}
subCats[numSubCats].m_type = currType;
@ -1520,7 +1520,7 @@ int32_t Categories::createDirSearchRequest ( char *requestBuf,
, catid
, catid);
// coll
memcpy(p, coll, collLen);
gbmemcpy(p, coll, collLen);
p += collLen;
// add extra cgi if we have it and have room
if ( cgi && cgiLen > 0 && p + cgiLen + 76 < pend ) {
@ -1543,13 +1543,13 @@ int32_t Categories::createDirSearchRequest ( char *requestBuf,
}
}
else {
memcpy(p, cgi, cgiLen);
gbmemcpy(p, cgi, cgiLen);
p += cgiLen;
}
}
// hostname
p += sprintf(p, " HTTP/1.0\r\nHost: http://");
memcpy(p, hostname, hostnameLen);
gbmemcpy(p, hostname, hostnameLen);
p += hostnameLen;
// rest of the request
p += sprintf(p, "\r\n"

@ -68,7 +68,7 @@ static void clusterGetPages ( DiskPageCache *pc,
if ( bufPtr + size >= bufEnd )
size = bufEnd - bufPtr;
// copy the list into the buffer
memcpy ( bufPtr, cacheList.m_list, size );
gbmemcpy ( bufPtr, cacheList.m_list, size );
// advance to the next page
bufPtr += size;
*newOffset += size;
@ -457,7 +457,7 @@ void Clusterdb::getSampleVector ( char *vec ,
// so get it
char *p = doc->getSampleVector ( );
// and store it. int16_t vectors are padded with 0's.
memcpy ( vec , p , SAMPLE_VECTOR_SIZE );
gbmemcpy ( vec , p , SAMPLE_VECTOR_SIZE );
}
*/
@ -604,7 +604,7 @@ void Clusterdb::getSampleVector ( char *vec , TermTable *table ) {
if ( nd > SAMPLE_VECTOR_LEN - 1 ) nd = SAMPLE_VECTOR_LEN - 1;
// make sure last component is a 0
d [ nd ] = 0;
memcpy ( vec , (char *)d , (nd+1) * 4 );
gbmemcpy ( vec , (char *)d , (nd+1) * 4 );
}
*/
@ -842,7 +842,7 @@ void Clusterdb::makeRecFromTitleRec ( char *rec,
lang,
siteHash,
false );
memcpy(rec, &key, sizeof(key_t));
gbmemcpy(rec, &key, sizeof(key_t));
}
void Clusterdb::makeRecFromTitleRecKey ( char *rec,
@ -862,6 +862,6 @@ void Clusterdb::makeRecFromTitleRecKey ( char *rec,
0,
siteHash,
false );
memcpy(rec, &ckey, sizeof(key_t));
gbmemcpy(rec, &ckey, sizeof(key_t));
}
*/

@ -424,9 +424,9 @@ bool Collectiondb::addNewColl ( char *coll ,
// if ( cpcrec ) {
// // copy it, but not the timedb hashtable, etc.
// int32_t size = (char *)&(cpcrec->m_END_COPY) - (char *)cpcrec;
// // JAB: bad memcpy - no donut!
// // JAB: bad gbmemcpy - no donut!
// // this is not how objects are supposed to be copied!!!
// memcpy ( cr , cpcrec , size);
// gbmemcpy ( cr , cpcrec , size);
// }
// set coll id and coll name for coll id #i
@ -1869,7 +1869,7 @@ bool CollectionRec::load ( char *coll , int32_t i ) {
if ( sb.fillFromFile ( tmp1 ) > 0 )
//m_localCrawlInfo.setFromSafeBuf(&sb);
// it is binary now
memcpy ( &m_localCrawlInfo , sb.getBufStart(),sb.length() );
gbmemcpy ( &m_localCrawlInfo , sb.getBufStart(),sb.length() );
if ( ! g_conf.m_doingCommandLine )
@ -1917,7 +1917,7 @@ bool CollectionRec::load ( char *coll , int32_t i ) {
if ( sb.fillFromFile ( tmp1 ) > 0 )
//m_globalCrawlInfo.setFromSafeBuf(&sb);
// it is binary now
memcpy ( &m_globalCrawlInfo , sb.getBufStart(),sb.length() );
gbmemcpy ( &m_globalCrawlInfo , sb.getBufStart(),sb.length() );
if ( ! g_conf.m_doingCommandLine )
log("coll: Loaded %s (%"INT32") global hasurlsready=%"INT32"",
@ -3424,12 +3424,14 @@ bool CollectionRec::rebuildUrlFiltersDiffbot() {
//
// if they did not EXPLICITLY provide a url crawl pattern or
// url crawl regex then restrict to seeds to prevent from spidering
// the entire internet
if ( ! ucp && ! m_hasucr ) { // m_restrictDomain ) {
m_regExs[i].set("!isonsamedomain && !ismanualadd");
m_spiderPriorities [i] = SPIDER_PRIORITY_FILTERED;
i++;
}
// the entire internet.
//if ( ! ucp && ! m_hasucr ) { // m_restrictDomain ) {
// MDW: even if they supplied a crawl pattern let's restrict to seed
// domains 12/15/14
m_regExs[i].set("!isonsamedomain && !ismanualadd");
m_spiderPriorities [i] = SPIDER_PRIORITY_FILTERED;
i++;
//}
bool ucpHasPositive = false;
// . scan them to see if all patterns start with '!' or not

@ -552,7 +552,7 @@ class CollectionRec {
float m_languageUnknownWeight;
float m_languageWeightFactor;
char m_enableLanguageSorting;
char m_defaultSortLanguage[6];
char m_defaultSortLanguage2[6];
char m_languageMethodWeights[10];
int32_t m_languageBailout;
int32_t m_languageThreshold;
@ -585,8 +585,8 @@ class CollectionRec {
//int32_t m_checksumdbMinFilesToMerge ;
//int32_t m_clusterdbMinFilesToMerge ;
//int32_t m_datedbMinFilesToMerge ;
//int32_t m_linkdbMinFilesToMerge ;
//int32_t m_tagdbMinFilesToMerge ;
int32_t m_linkdbMinFilesToMerge ;
int32_t m_tagdbMinFilesToMerge ;
//char m_spiderdbRootUrlPriority ; // 0-(MAX_SPIDER_PRIORITIES-1)
//char m_spiderdbAddUrlPriority ;

4
Conf.h

@ -103,6 +103,8 @@ class Conf {
char m_save;
bool m_runAsDaemon;
bool m_logToFile;
bool m_isLocal;
@ -827,7 +829,7 @@ class Conf {
char m_rebuildRoots ;
char m_rebuildNonRoots ;
char m_rebuildSkipSitedbLookup ;
//char m_rebuildSkipSitedbLookup ;
// for caching the qualities of urls (see Msg20.cpp)
int32_t m_maxQualityCacheAge ;

@ -1219,7 +1219,7 @@ uint8_t s_getLangIdxFromDMOZ(char *topic, int len) {
int limit = len;
if(limit > 2047) limit = 2047;
memset(buf, 0, 2048);
memcpy(buf, topic, limit);
gbmemcpy(buf, topic, limit);
if(gbstrlen(buf) < 1) return(0);
for(int x = 2; x < langTagalog; x++) {
if(x == 5) continue;
@ -1323,7 +1323,7 @@ int CountryCode::lookupCountryFromDMOZTopic(const char *catname, int len) {
if(!s_countryRegex) return(0);
char buf[2049];
if(len > 2047) len = 2047;
memcpy(buf, catname, len);
gbmemcpy(buf, catname, len);
buf[len+1] = 0;
if(gbstrlen(buf) < 1) return(0);
for(int x = 1; x < s_numCountryCodes; x++)

@ -72,9 +72,9 @@ void DailyMerge::dailyMergeLoop ( ) {
// must have got a ping reply from him
if ( ! h->m_gotPingReply ) return;
// hostid #0 must NOT be in mode 0
if ( h->m_flags & PFLAG_MERGEMODE0 ) return;
if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 ) return;
// get the collnum that host #0 is currently daily merging
i = g_hostdb.m_hosts[0].m_dailyMergeCollnum;
i = g_hostdb.m_hosts[0].m_pingInfo.m_dailyMergeCollnum;
// this means host #0 is not daily merging a collnum now
if ( i < 0 ) return;
// if it is valid, the CollectionRec MUST be there
@ -152,7 +152,8 @@ void DailyMerge::dailyMergeLoop ( ) {
if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
continue;
// that's good if he is in mode 0
if ( g_hostdb.m_hosts[i].m_flags & PFLAG_MERGEMODE0 )
if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags &
PFLAG_MERGEMODE0 )
continue;
// oops, someone is not mode 0
return;
@ -197,7 +198,7 @@ void DailyMerge::dailyMergeLoop ( ) {
if ( g_hostdb.isDead(h) )
continue;
// return if a host still in merge mode 0. wait for it.
if ( h->m_flags & PFLAG_MERGEMODE0 )
if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 )
return;
}
// ok, everyone is out of mode 0 now
@ -218,7 +219,8 @@ void DailyMerge::dailyMergeLoop ( ) {
if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
continue;
// if host still has spiders out, we can't go to mode 4
if ( g_hostdb.m_hosts[i].m_flags & PFLAG_HASSPIDERS )
if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags &
PFLAG_HASSPIDERS )
return;
}
// ok, nobody has spiders now
@ -314,7 +316,8 @@ void DailyMerge::dailyMergeLoop ( ) {
if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
continue;
// if host in mode 6 or 0, that's good
if ( g_hostdb.m_hosts[i].m_flags & PFLAG_MERGEMODE0OR6)
if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags &
PFLAG_MERGEMODE0OR6)
continue;
// otherwise, wait for it to be in 6 or 0
return;

@ -89,10 +89,10 @@ public:
m_numTiers = pt->m_numTiers;
m_numResultLevels = pt->m_numResultLevels;
m_monthlyFee = pt->m_monthlyFee;
memcpy(m_tierMax, pt->m_tierMax, sizeof(int32_t)*m_numTiers);
memcpy(m_resultLevels, pt->m_resultLevels, sizeof(int32_t)*m_numResultLevels);
gbmemcpy(m_tierMax, pt->m_tierMax, sizeof(int32_t)*m_numTiers);
gbmemcpy(m_resultLevels, pt->m_resultLevels, sizeof(int32_t)*m_numResultLevels);
int32_t numCosts = m_numTiers*m_numResultLevels*2;
memcpy(m_levelCosts, pt->m_levelCosts, sizeof(int32_t)*numCosts);
gbmemcpy(m_levelCosts, pt->m_levelCosts, sizeof(int32_t)*numCosts);
}
*/
// locals

@ -144,7 +144,7 @@ class Datedb {
// extract the termId from a key
int64_t getTermId ( key128_t *k ) {
int64_t termId = 0LL;
memcpy ( &termId , ((char *)k) + 10 , 6 );
gbmemcpy ( &termId , ((char *)k) + 10 , 6 );
return termId ;
};

@ -23287,7 +23287,7 @@ int32_t Dates::addIntervals ( Date *di ,
// copy into this buffer
Interval buf[MAX_INTERVALS];
if ( ni > MAX_INTERVALS ) ni = MAX_INTERVALS;
memcpy(buf,retInt,ni*sizeof(Interval));
gbmemcpy(buf,retInt,ni*sizeof(Interval));
// store here
Interval *dst = retInt;
@ -23636,7 +23636,7 @@ int32_t Dates::addIntervalsB ( Date *di ,
// we are already using that as the accumulator, return now
if ( int1 == retInt ) return ni1;
// ok, do the copy
memcpy ( retInt , int1 , ni1 * sizeof(Interval) );
gbmemcpy ( retInt , int1 , ni1 * sizeof(Interval) );
// return how many intervals are in "retInt"
return ni1;
}
@ -24674,11 +24674,11 @@ int32_t Dates::intersect3 ( Interval *int1 ,
//return p3-int3;
}
if ( p1 >= p1end ) {
//memcpy ( p3 , p2 , (p2end - p2) * sizeof(Interval) );
//gbmemcpy ( p3 , p2 , (p2end - p2) * sizeof(Interval) );
//p3 += p2end - p2;
if ( unionOp ) {
if ( p3 + (p2end - p2) > p3max ) goto overflow;
memcpy ( p3 , p2 , (p2end - p2) * sizeof(Interval) );
gbmemcpy ( p3 , p2 , (p2end - p2) * sizeof(Interval) );
p3 += p2end - p2;
}
return p3-int3;
@ -24689,12 +24689,12 @@ int32_t Dates::intersect3 ( Interval *int1 ,
// then the rest of p1 survives
if ( subtractint2 ) {
if ( p3 + (p1end - p1) > p3max ) goto overflow;
memcpy ( p3 , p1 , (p1end - p1) * sizeof(Interval) );
gbmemcpy ( p3 , p1 , (p1end - p1) * sizeof(Interval) );
p3 += p1end - p1;
}
else if ( unionOp ) {
if ( p3 + (p1end - p1) > p3max ) goto overflow;
memcpy ( p3 , p1 , (p1end - p1) * sizeof(Interval) );
gbmemcpy ( p3 , p1 , (p1end - p1) * sizeof(Interval) );
p3 += p1end - p1;
}
return p3-int3;
@ -26632,9 +26632,9 @@ bool Dates::printNormalizedTime ( Date *dx ,
struct tm1;
struct tm2;
ts = gmtime ( &ii->m_a );
memcpy ( &tm1 , ts , sizeof(tm) );
gbmemcpy ( &tm1 , ts , sizeof(tm) );
ts = gmtime ( &ii->m_b );
memcpy ( &tm2 , ts , sizeof(tm) );
gbmemcpy ( &tm2 , ts , sizeof(tm) );
// just copy it
cd[i]->m_year1 = tm1->tm_year + 1900;
cd[i]->m_year2 = tm2->tm_year + 1900;

@ -264,7 +264,7 @@ bool DiskPageCache::init ( const char *dbname ,
// a malloc tag, must be LESS THAN 16 bytes including the NULL
char *p = m_memTag;
memcpy ( p , "pgcache-" , 8 ); p += 8;
gbmemcpy ( p , "pgcache-" , 8 ); p += 8;
if ( dbname ) strncpy ( p , dbname , 8 );
// so we know what db we are caching for
m_dbname = p;
@ -489,7 +489,7 @@ void DiskPageCache::getPages ( int32_t vfd ,
// . start reading at an offset of "HEADERSIZE+start1" into
// the memory page
readFromCache(bufPtr, poff, HEADERSIZE + start1 , size);
//memcpy ( bufPtr , s + HEADERSIZE + start1 , size );
//gbmemcpy ( bufPtr , s + HEADERSIZE + start1 , size );
bufPtr += size;
*newOffset += size;
*newNumBytes -= size;
@ -529,7 +529,7 @@ void DiskPageCache::getPages ( int32_t vfd ,
promotePage ( s , poff , false );
// don't store more than asked for
if ( bufEnd - size < bufPtr ) size = bufEnd - bufPtr;
memcpy ( bufEnd - size , s + HEADERSIZE + start2 , size );
gbmemcpy ( bufEnd - size , s + HEADERSIZE + start2 , size );
bufEnd -= size;
*newNumBytes -= size;
// return if we got it all
@ -572,7 +572,7 @@ void DiskPageCache::addPages ( int32_t vfd,
// sometimes the file got unlinked on us
if ( ! m_memOffFromDiskPage[vfd] ) return;
// for some reason profiler cores all the time in here
if ( g_profiler.m_realTimeProfilerRunning ) return;
//if ( g_profiler.m_realTimeProfilerRunning ) return;
// . "diskPageNum" is the first DISK page #
// . "offset" is the offset on disk the data was read from
@ -627,7 +627,7 @@ char *DiskPageCache::getMemPtrFromMemOff ( int32_t off ) {
// for some reason profiler cores all the time in here
// and m_numPageSets is 0 like we got reset
if ( g_profiler.m_realTimeProfilerRunning ) return NULL;
//if ( g_profiler.m_realTimeProfilerRunning ) return NULL;
// get set number
int32_t sn = off / m_maxPageSetSize ;
@ -826,7 +826,7 @@ void DiskPageCache::addPage(int32_t vfd,
// disk data because it is not aligned perfectly with the mem page.
writeToCache( poff, HEADERSIZE + skip, pageData, size);
//memcpy ( p + HEADERSIZE + skip , page , size );
//gbmemcpy ( p + HEADERSIZE + skip , page , size );
// transform mem ptr to memory offset
//if ( !m_useRAMDisk && ! m_useSHM ) {
@ -904,7 +904,7 @@ void DiskPageCache::enhancePage (int32_t poff, char *page, int32_t size,
return;
}
writeToCache(poff, HEADERSIZE + skip , page , diff);
//memcpy ( p + HEADERSIZE + skip , page , diff );
//gbmemcpy ( p + HEADERSIZE + skip , page , diff );
psize += diff;
pskip -= diff;
writeToCache(poff, OFF_SIZE, &psize, sizeof(int32_t));
@ -921,7 +921,7 @@ void DiskPageCache::enhancePage (int32_t poff, char *page, int32_t size,
// we don't want any holes...
if ( diff > size ) return;
writeToCache(poff, HEADERSIZE + pend, page + size - diff, diff);
//memcpy ( p + HEADERSIZE + pend , page + size - diff , diff );
//gbmemcpy ( p + HEADERSIZE + pend , page + size - diff , diff );
int32_t tmp = psize+diff;
writeToCache(poff, OFF_SIZE, &tmp, sizeof(int32_t));
//*(int32_t *)(p+OFF_SIZE) = (int32_t)psize + diff;
@ -1437,7 +1437,7 @@ void DiskPageCache::writeToCache( int32_t memOff,
// (int32_t)shmid);
}
// store it into the cache
memcpy ( mem + poff , inBuf , size );
gbmemcpy ( mem + poff , inBuf , size );
return;
}
#endif
@ -1453,7 +1453,7 @@ void DiskPageCache::writeToCache( int32_t memOff,
*/
char *p = getMemPtrFromMemOff ( memOff );
memcpy(p + memPageOff, inBuf, size);
gbmemcpy(p + memPageOff, inBuf, size);
}
// . store cached disk info into "outBuf". up to "size" bytes of it.
@ -1507,7 +1507,7 @@ void DiskPageCache::readFromCache( void *outBuf,
// (int32_t)shmid);
}
// store it in outBuf
memcpy ( outBuf , mem + poff , size );
gbmemcpy ( outBuf , mem + poff , size );
return;
}
#endif
@ -1524,7 +1524,7 @@ void DiskPageCache::readFromCache( void *outBuf,
*/
// the old fashioned way
char *p = getMemPtrFromMemOff ( memOff );
memcpy(outBuf, p + pageOffset, bytesToCopy );
gbmemcpy(outBuf, p + pageOffset, bytesToCopy );
}
// lastly, we need some way to "force" a merge at around midnight when traffic

37
Dns.cpp

@ -169,7 +169,7 @@ inline bool parseTLD(DnsState* ds, char* buf, int32_t* len) {
if (cend - cbeg > *len - 1)
return false;
*len = cend - cbeg;
memcpy(buf, cbeg, *len);
gbmemcpy(buf, cbeg, *len);
buf[*len] = '\0';
for (curs = buf; *curs; curs++)
*curs = to_lower_a(*curs);
@ -187,7 +187,7 @@ inline uint32_t TLDIPKey(char* buf, int32_t len) {
key = 0;
if (len > 4)
len = 4;
memcpy(&key, buf, len);
gbmemcpy(&key, buf, len);
return key;
}
@ -253,7 +253,7 @@ static void setTLDIP( DnsState* ds,
return;
}
char tld[64];
memcpy(tld, buf, len);
gbmemcpy(tld, buf, len);
tld[len] = '\0';
if (cached == NULL) {
uint32_t key = TLDIPKey(buf, len);
@ -267,7 +267,7 @@ static void setTLDIP( DnsState* ds,
}
else if (cached->expiry <= now) {
// JAB: non-const cast...
memcpy((TLDIPEntry*) cached, tldip, sizeof(TLDIPEntry));
gbmemcpy((TLDIPEntry*) cached, tldip, sizeof(TLDIPEntry));
log(LOG_DEBUG, "dns: TLD .%s NS cache update", buf);
dumpTLDIP(tld, tldip);
}
@ -315,7 +315,7 @@ bool Dns::getIp ( char *hostname ,
}
// debug msg
char tmp[256];
memcpy ( tmp , hostname , hostnameLen );
gbmemcpy ( tmp , hostname , hostnameLen );
tmp [ hostnameLen ] = '\0';
log(LOG_DEBUG, "dns: hostname '%s'", tmp);
@ -610,7 +610,7 @@ bool Dns::getIp ( char *hostname ,
ds->m_callback = callback;
int32_t newlen = hostnameLen;
if ( newlen > 127 ) newlen = 127;
memcpy ( ds->m_hostname , hostname , newlen );
gbmemcpy ( ds->m_hostname , hostname , newlen );
ds->m_hostname [ newlen ] = '\0';
// copy the sendBuf cuz we need it in gotIp() to ensure hostnames match
@ -629,7 +629,7 @@ bool Dns::getIp ( char *hostname ,
// this will fill in depth 1 in the query,
// if we have the nameservers cached...
log(LOG_DEBUG,"dns: hostname %s", ds->m_hostname);
memcpy(ds->m_dnsIps[0],g_conf.m_rnsIps, g_conf.m_numRns * 4);
gbmemcpy(ds->m_dnsIps[0],g_conf.m_rnsIps, g_conf.m_numRns * 4);
ds->m_numDnsIps[0] = g_conf.m_numRns;
ds->m_numDnsNames[0] = 0;
ds->m_rootTLD[0] = true;
@ -637,7 +637,7 @@ bool Dns::getIp ( char *hostname ,
// if a TLD is cached, copy it to depth 1
const TLDIPEntry* tldip = getTLDIP(ds);
if (tldip) {
memcpy( ds->m_dnsIps[1],
gbmemcpy( ds->m_dnsIps[1],
tldip->TLDIP,
tldip->numTLDIPs * sizeof(uint32_t));
ds->m_numDnsIps[1] = tldip->numTLDIPs;
@ -649,7 +649,7 @@ bool Dns::getIp ( char *hostname ,
}
// otherwise, use the local bind9 servers
else {
//memcpy(ds->m_dnsIps[0],g_conf.m_dnsIps,g_conf.m_numDns * 4);
//gbmemcpy(ds->m_dnsIps[0],g_conf.m_dnsIps,g_conf.m_numDns * 4);
int32_t numDns = 0;
for ( int32_t i = 0; i < MAX_DNSIPS; i++ ) {
if ( g_conf.m_dnsIps[i] == 0 ) continue;
@ -1555,7 +1555,7 @@ char *getRRName ( char *rr , char *dgram, char *end ) {
return NULL;
}
// copy the hostname
memcpy ( dst , p+1 , *p );
gbmemcpy ( dst , p+1 , *p );
dst += *p;
*dst++ = '.';
p += ((u_char)*p) + 1;
@ -1961,7 +1961,7 @@ int32_t Dns::gotIp ( UdpSlot *slot , DnsState *ds ) {
return -1;
}
// copy the hostname
memcpy ( dst , p+1 , *p );
gbmemcpy ( dst , p+1 , *p );
dst += *p;
*dst++ = '.';
p += ((u_char)*p) + 1;
@ -2021,7 +2021,7 @@ int32_t Dns::gotIp ( UdpSlot *slot , DnsState *ds ) {
const TLDIPEntry* cached;
if ( g_conf.m_askRootNameservers &&
(cached = getTLDIP(ds))) {
memcpy( ds->m_dnsIps[d],
gbmemcpy( ds->m_dnsIps[d],
cached->TLDIP,
cached->numTLDIPs *
sizeof(uint32_t));
@ -2033,7 +2033,7 @@ int32_t Dns::gotIp ( UdpSlot *slot , DnsState *ds ) {
}
else if ( g_conf.m_askRootNameservers ) {
memcpy ( ds->m_dnsIps[d],
gbmemcpy ( ds->m_dnsIps[d],
g_conf.m_rnsIps,
g_conf.m_numRns * 4);
ds->m_numDnsIps[d] = g_conf.m_numRns;
@ -2044,7 +2044,7 @@ int32_t Dns::gotIp ( UdpSlot *slot , DnsState *ds ) {
}
// otherwise, use the local bind9 servers
else {
memcpy ( ds->m_dnsIps[d],
gbmemcpy ( ds->m_dnsIps[d],
g_conf.m_dnsIps,
g_conf.m_numDns * 4);
ds->m_numDnsIps[d] = g_conf.m_numDns;
@ -2079,7 +2079,7 @@ int32_t Dns::gotIp ( UdpSlot *slot , DnsState *ds ) {
// . now "s" should pt to the resource data, hopefully the ip
// . add another ip to our array and inc numIps
// . ips should be in network order
uint32_t ip ; memcpy ( (char *)&ip , rr , 4 );
uint32_t ip ; gbmemcpy ( (char *)&ip , rr , 4 );
// verisign's ip is a does not exist
if ( (int32_t)ip == 0x0b6e5e40) { //atoip ( "64.94.110.11",12)) {
@ -2091,6 +2091,10 @@ int32_t Dns::gotIp ( UdpSlot *slot , DnsState *ds ) {
return 0;
}
// this is no longer needed since ppl use gb to spider
// internal intranets now, not just the web. however, be
// careful we don't spider sensitive gb info as a proxy!!!
/*
unsigned char *ipstr = (unsigned char *)&ip;
if ( (int32_t)ip == 0x0100007f || // aotip("127.0.0.1")
(ipstr[0]==192 && ipstr[1]==168) ||
@ -2103,6 +2107,7 @@ int32_t Dns::gotIp ( UdpSlot *slot , DnsState *ds ) {
addToCache ( ds->m_hostnameKey , 0 );
return 0;
}
*/
// debug msg
//fprintf(stderr,".... got ip=%s for %s\n",iptoa(ip),hostname);
@ -2302,7 +2307,7 @@ bool Dns::extractHostname ( char *dgram ,
if ( i + len >= 255 ) { g_errno = EBADREPLY; return false; }
if ( src + len >= end ) { g_errno = EBADREPLY; return false; }
// copy to hostname
memcpy ( &hostname[i] , record + 1 , len );
gbmemcpy ( &hostname[i] , record + 1 , len );
// if we had a ptr then just add 2
if ( times > 0 ) record += 2;
else record += 1 + len;

@ -150,7 +150,7 @@ class DnsProtocol : public UdpProtocol {
bool hadError ,
int32_t niceness ) {
//if ( msgSize > maxDgramSize ) msgSize = maxDgramSize;
//memcpy ( dgram , msg , msgSize );
//gbmemcpy ( dgram , msg , msgSize );
//return msgSize;
return;
}

@ -188,6 +188,7 @@ case EDMOZNOTREADY: return "Dmoz is not setup, follow instructions in "
case EPROXYSSLCONNECTFAILED: return "SSL tunnel through HTTP proxy failed";
case EINLINESECTIONS: return "Error generating section votes";
case EREADONLYMODE: return "In read only mode. Failed.";
case ENOTITLEREC: return "No title rec found when recycling content";
}
// if the remote error bit is clear it must be a regulare errno
//if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum );

@ -192,6 +192,7 @@ enum {
EDMOZNOTREADY,
EPROXYSSLCONNECTFAILED,
EINLINESECTIONS,
EREADONLYMODE
EREADONLYMODE,
ENOTITLEREC
};
#endif

@ -526,7 +526,7 @@ bool Msgfb::gotFBAccessToken ( TcpSocket *s ) {
for ( ; *p && *p != '&' ;p++ );
int32_t len = p - start;
if ( len > MAX_TOKEN_LEN ) { char *xx=NULL;*xx=0; }
memcpy ( m_accessToken , start , len );
gbmemcpy ( m_accessToken , start , len );
m_accessToken [ len ] = '\0';
}
@ -4120,7 +4120,7 @@ bool Msgfc::addLikedbTag ( int64_t userId ,
eventHash64 ,
value );
// add to list otherwise
memcpy ( p , recs , size );
gbmemcpy ( p , recs , size );
p += size;
count += 2;
@ -4488,7 +4488,7 @@ bool Msgfb::gotAppAccessToken ( TcpSocket *s ) {
for ( ; *p && *p != '&' ;p++ );
int32_t len = p - start;
if ( len > MAX_TOKEN_LEN ) { char *xx=NULL;*xx=0; }
memcpy ( s_appAccessToken , start , len );
gbmemcpy ( s_appAccessToken , start , len );
s_appAccessToken [ len ] = '\0';
}
@ -4799,9 +4799,9 @@ bool Emailer::getMailServerIP ( EmailState *es ) {
// get the ip. use kinda a fake hostname to pass into MsgC
// so that it understands its a special MX record lookup
char *dst = es->m_emailSubdomain;
memcpy ( dst , "gbmxrec-" , 8 );
gbmemcpy ( dst , "gbmxrec-" , 8 );
dst += 8;
memcpy ( dst , dom , domLen );
gbmemcpy ( dst , dom , domLen );
dst += domLen;
*dst = '\0';
@ -5751,8 +5751,8 @@ bool loadQueryLoopState ( ) {
s_ptr1 = *(int32_t *)p; p += 4;
s_ptr2 = *(int32_t *)p; p += 4;
g_n1 = *(int32_t *)p; p += 4;
memcpy ( g_fbq1 , p , g_n1 * 4 ); p += g_n1 * 4;
memcpy ( g_colls1 , p , g_n1 * 4 ); p += g_n1 * 4;
gbmemcpy ( g_fbq1 , p , g_n1 * 4 ); p += g_n1 * 4;
gbmemcpy ( g_colls1 , p , g_n1 * 4 ); p += g_n1 * 4;
if ( p >= pend ) goto done;
s_ptr3 = *(int32_t *)p; p += 4;
if ( p >= pend ) goto done;

@ -110,7 +110,7 @@ void File::set ( char *filename ) {
// if we already had another file open then we must close it first.
if ( m_vfd >= 0 ) close();
// copy into m_filename and NULL terminate
memcpy ( m_filename , filename , len );
gbmemcpy ( m_filename , filename , len );
m_filename [ len ] = '\0';
// TODO: make this a bool returning function if ( ! m_filename ) g_log
}
@ -612,10 +612,27 @@ bool File::closeLeastUsed () {
return true;
}
int64_t getFileSize ( char *filename ) {
FILE *fd = fopen ( filename , "r" );
if ( ! fd ) {
//log("disk: error getFileSize(%s) : %s",
// filename , strerror(g_errno));
return 0;//-1;
}
fseek(fd,0,SEEK_END);
int64_t fileSize = ftell ( fd );
fclose ( fd );
return fileSize;
}
// . returns -2 on error
// . returns -1 if does not exist
// . otherwise returns file size in bytes
int32_t File::getFileSize ( ) {
int64_t File::getFileSize ( ) {
// allow the substitution of another filename
//struct stat stats;
@ -624,19 +641,7 @@ int32_t File::getFileSize ( ) {
//int status = stat ( m_filename , &stats );
FILE *fd = fopen ( m_filename , "r" );
if ( ! fd ) {
log("disk: error getFileSize(%s) : %s",
m_filename , strerror(g_errno));
return -1;
}
fseek(fd,0,SEEK_END);
int32_t fileSize = ftell ( fd );
fclose ( fd );
return fileSize;
return ::getFileSize ( m_filename );
// return the size if the status was ok
//if ( status == 0 ) return stats.st_size;

4
File.h

@ -31,6 +31,8 @@
#include "Mem.h" // for g_mem
#include "Loop.h" // for g_loop.setNonBlocking(int fd)
int64_t getFileSize ( char *filename ) ;
// for avoiding unlink/opens that mess up our threaded read
int32_t getCloseCount_r ( int fd );
@ -111,7 +113,7 @@ class File {
// . returns -1 on error
// . otherwise returns file size in bytes
// . returns 0 if does not exist
int32_t getFileSize ( );
int64_t getFileSize ( );
// . when was it last touched?
time_t getLastModifiedTime ( );

@ -44,7 +44,7 @@ bool Flags::resize( int32_t size ) {
// copy as many of old flags over as possible
int32_t min = m_numFlags;
if ( min > size ) min = size;
memcpy( newFlags, m_flags, min*sizeof(char) );
gbmemcpy( newFlags, m_flags, min*sizeof(char) );
mfree( m_flags, m_numFlags*sizeof(char), "Flags" );
m_flags = NULL;
// find new values for member variables

@ -242,7 +242,7 @@ GEOIP_API const char * GeoIP_region_name_by_code(const char *country_code, const
GEOIP_API const char * GeoIP_time_zone_by_country_and_region(const char *country_code, const char *region_code);
#ifdef BSD
#define memcpy(dest, src, n) bcopy(src, dest, n)
#define gbmemcpy(dest, src, n) bcopy(src, dest, n)
#endif
#ifdef __cplusplus

@ -97,8 +97,8 @@ bool HashTableT<Key_t, Val_t>::copy(HashTableT<Key_t, Val_t>* src) {
// maybe this should be a member copy, but that's a LOT slower and
// bitwise should work with everything we're using the HashTableT
// for so far
memcpy(newKeys, src->m_keys, keySize);
memcpy(newVals, src->m_vals, valSize);
gbmemcpy(newKeys, src->m_keys, keySize);
gbmemcpy(newVals, src->m_vals, valSize);
reset();
m_keys = newKeys;
m_vals = newVals;
@ -397,9 +397,9 @@ int32_t HashTableT<Key_t, Val_t>::deserialize(char* s) {
return p - s;
}
memcpy((char*)m_keys, p, sizeof(Key_t) * numSlots);
gbmemcpy((char*)m_keys, p, sizeof(Key_t) * numSlots);
p += sizeof(Key_t) * numSlots;
memcpy((char*)m_vals, p, sizeof(Val_t) * numSlots);
gbmemcpy((char*)m_vals, p, sizeof(Val_t) * numSlots);
p += sizeof(Val_t) * numSlots;
m_numSlotsUsed = numSlotsUsed;
return p - s;

@ -257,7 +257,7 @@ bool HashTableX::addKey ( void *key , void *val , int32_t *slot ) {
// and store the key
if ( m_ks == 4 ) ((int32_t *)m_keys)[n] = *(int32_t *)key;
else if ( m_ks == 8 ) ((int64_t *)m_keys)[n] = *(int64_t *)key;
else memcpy ( m_keys + m_ks * n , key , m_ks );
else gbmemcpy ( m_keys + m_ks * n , key , m_ks );
}
// insert the value for this key
if ( val ) setValue ( n , val );
@ -743,7 +743,7 @@ int32_t HashTableX::serialize ( char *buf , int32_t bufSize ) {
// sanity check count
used++;
// store key
memcpy ( p , m_keys + i * m_ks , m_ks );
gbmemcpy ( p , m_keys + i * m_ks , m_ks );
// advance
p += m_ks;
}
@ -754,7 +754,7 @@ int32_t HashTableX::serialize ( char *buf , int32_t bufSize ) {
// skip if empty
if ( m_flags[i] == 0 ) continue;
// store key
memcpy ( p , m_vals + i * m_ds , m_ds );
gbmemcpy ( p , m_vals + i * m_ds , m_ds );
// advance
p += m_ds;
}

@ -143,6 +143,14 @@ class HashTableX {
bool addTerm144 ( key144_t *kp , int32_t score = 1 ) {
// debug XmlDoc.cpp's hash table
//int64_t termId = ((key144_t *)kp)->n2 >> 16;
//if ( termId == 59194288760543LL ) {
// log("got it");
// char *xx=NULL;*xx=0;
//}
// grow it!
if ( (m_numSlots < 20 || 4 * m_numSlotsUsed >= m_numSlots) &&
m_numSlots < m_maxSlots ) {
@ -160,7 +168,7 @@ class HashTableX {
while ( count++ < m_numSlots ) {
// this is set to 0x01 if non-empty
if ( m_flags [ n ] == 0 ) {
memcpy( &((key144_t *)m_keys)[n] ,kp,18);
gbmemcpy( &((key144_t *)m_keys)[n] ,kp,18);
m_vals[n*m_ds] = score;
m_flags[n] = 1;
m_numSlotsUsed++;
@ -384,7 +392,7 @@ class HashTableX {
void setValue ( int32_t n , void *val ) {
if (m_ds == 4) ((int32_t *)m_vals)[n] = *(int32_t *)val;
else if (m_ds == 8) ((int64_t *)m_vals)[n] = *(int64_t *)val;
else memcpy(m_vals+n*m_ds,val,m_ds);
else gbmemcpy(m_vals+n*m_ds,val,m_ds);
};
void *getValueFromSlot ( int32_t n ) { return m_vals + n * m_ds; };

@ -278,7 +278,7 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
if ( i + mat->m_numWords > backTagi )
backTagi = i + mat->m_numWords;
//memcpy ( m_bufPtr , backTag , backTagLen );
//gbmemcpy ( m_bufPtr , backTag , backTagLen );
//m_bufPtr += backTagLen ;
//backTagi = -1;
}
@ -305,7 +305,7 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
else if ( endHead ) {
// include the tags style sheet immediately before
// the closing </TITLE> tag
//memcpy( m_bufPtr, s_styleSheet, s_styleSheetLen );
//gbmemcpy( m_bufPtr, s_styleSheet, s_styleSheetLen );
m_sb->safeMemcpy( s_styleSheet , s_styleSheetLen );
//m_bufPtr += s_styleSheetLen;
}
@ -336,14 +336,14 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
// write the alnum word
//m_bufPtr +=latin1ToUtf8(m_bufPtr, m_bufEnd-m_bufPtr,w, wlen);
// everything is utf8 now
//memcpy ( m_bufPtr, w , wlen );
//gbmemcpy ( m_bufPtr, w , wlen );
//m_bufPtr += wlen;
m_sb->safeMemcpy ( w , wlen );
// back tag
if ( i == backTagi-1 ) {
// store the back tag
//memcpy ( m_bufPtr , backTag , backTagLen );
//gbmemcpy ( m_bufPtr , backTag , backTagLen );
//m_bufPtr += backTagLen ;
m_sb->safeMemcpy ( (char *)backTag , backTagLen );
//log(LOG_DEBUG,

@ -501,7 +501,7 @@ bool Hostdb::init ( int32_t hostIdArg , char *netName ,
return false;
}
// copy it
memcpy ( h->m_hostname , host , hlen );
gbmemcpy ( h->m_hostname , host , hlen );
// null term it
h->m_hostname[hlen] = '\0';
// need this for hashing
@ -575,7 +575,7 @@ bool Hostdb::init ( int32_t hostIdArg , char *netName ,
}
// a direct ip address?
if ( hostname2 ) {
memcpy ( h->m_hostname2,hostname2,hlen2);
gbmemcpy ( h->m_hostname2,hostname2,hlen2);
h->m_hostname2[hlen2] = '\0';
ip2 = atoip ( h->m_hostname2 );
}
@ -696,7 +696,7 @@ bool Hostdb::init ( int32_t hostIdArg , char *netName ,
while ( *n && *n != '\n' && n < pend ) n++;
int32_t noteSize = n - p;
if ( noteSize > 127 ) noteSize = 127;
memcpy(h->m_note, p, noteSize);
gbmemcpy(h->m_note, p, noteSize);
*p++ = '\0'; // NULL terminate for atoip
}
else
@ -803,10 +803,11 @@ bool Hostdb::init ( int32_t hostIdArg , char *netName ,
// copy it over
//strcpy ( m_hosts[i].m_dir , wdir );
memcpy(m_hosts[i].m_dir, wdir, wdirlen);
gbmemcpy(m_hosts[i].m_dir, wdir, wdirlen);
m_hosts[i].m_dir[wdirlen] = '\0';
// reset this
//m_hosts[i].m_pingInfo.m_lastPing = 0LL;
m_hosts[i].m_lastPing = 0LL;
// and don't send emails on him until we got a good ping
m_hosts[i].m_emailCode = -2;
@ -817,8 +818,8 @@ bool Hostdb::init ( int32_t hostIdArg , char *netName ,
// so UdpServer.cpp knows if we are in g_hostdb or g_hostdb2
m_hosts[i].m_hostdb = this;
// reset these
m_hosts[i].m_flags = 0;
m_hosts[i].m_cpuUsage = 0.0;
m_hosts[i].m_pingInfo.m_flags = 0;
m_hosts[i].m_pingInfo.m_cpuUsage = 0.0;
m_hosts[i].m_loadAvg = 0.0;
// point to next one
i++;
@ -1060,6 +1061,12 @@ bool Hostdb::init ( int32_t hostIdArg , char *netName ,
if ( ! localIps )
return log("conf: Failed to get local IP address. Exiting.");
// if no cwd, then probably calling 'gb inject foo.warc <hosts.conf>'
if ( ! cwd ) {
log("hosts: missing cwd");
return true;
}
// now get host based on cwd and ip
Host *host = getHost2 ( cwd , localIps );
@ -1189,7 +1196,8 @@ bool Hostdb::init ( int32_t hostIdArg , char *netName ,
sprintf ( m_httpRootDir , "%shtml/" , m_dir );
sprintf ( m_logFilename , "%slog%03"INT32"", m_dir , m_hostId );
if ( ! g_conf.m_runAsDaemon )
if ( ! g_conf.m_runAsDaemon &&
! g_conf.m_logToFile )
sprintf(m_logFilename,"/dev/stderr");
@ -1703,7 +1711,7 @@ int32_t Hostdb::getAliveIp ( Host *h ) {
int64_t Hostdb::getNumGlobalRecs ( ) {
int64_t n = 0;
for ( int32_t i = 0 ; i < m_numHosts ; i++ )
n += getHost ( i )->m_docsIndexed;
n += getHost ( i )->m_pingInfo.m_totalDocsIndexed;
return n / m_numHostsPerShard;
}
@ -1721,7 +1729,7 @@ bool Hostdb::setNote ( int32_t hostId, char *note, int32_t noteLen ) {
if ( !h ) return true;
//h->m_note[0] = ' ';
//h->m_note[1] = '#';
memcpy(h->m_note, note, noteLen);
gbmemcpy(h->m_note, note, noteLen);
h->m_note[noteLen] = '\0';
// write this hosts conf out
return saveHostsConf();
@ -1734,7 +1742,7 @@ bool Hostdb::setSpareNote ( int32_t spareId, char *note, int32_t noteLen ) {
if ( !h ) return true;
//h->m_note[0] = ' ';
//h->m_note[1] = '#';
memcpy(h->m_note, note, noteLen);
gbmemcpy(h->m_note, note, noteLen);
h->m_note[noteLen] = '\0';
// write this hosts conf out
return saveHostsConf();
@ -1751,9 +1759,9 @@ bool Hostdb::replaceHost ( int32_t origHostId, int32_t spareHostId ) {
Host tmp;
memcpy ( &tmp , oldHost , sizeof(Host) );
memcpy ( oldHost , spareHost , sizeof(Host) );
memcpy ( spareHost , &tmp , sizeof(Host) );
gbmemcpy ( &tmp , oldHost , sizeof(Host) );
gbmemcpy ( oldHost , spareHost , sizeof(Host) );
gbmemcpy ( spareHost , &tmp , sizeof(Host) );
// however, these values need to change
oldHost->m_hostId = origHostId;
@ -1767,41 +1775,47 @@ bool Hostdb::replaceHost ( int32_t origHostId, int32_t spareHostId ) {
oldHost->m_hostdb = spareHost->m_hostdb;
oldHost->m_inProgress1 = spareHost->m_inProgress1;
oldHost->m_inProgress2 = spareHost->m_inProgress2;
oldHost->m_lastPing = spareHost->m_lastPing; // last ping timestamp
// last ping timestamp
//oldHost->m_pingInfo.m_lastPing = spareHost->m_pingInfo.m_lastPing;
oldHost->m_lastPing = spareHost->m_lastPing;
// and the new spare gets a new hostid too
spareHost->m_hostId = spareHostId;
memset ( &oldHost->m_pingInfo , 0 , sizeof(PingInfo) );
// reset these stats
oldHost->m_pingMax = 0;
oldHost->m_gotPingReply = false;
oldHost->m_loadAvg = 0;
oldHost->m_percentMemUsed = 0;
//oldHost->m_percentMemUsed = 0;
oldHost->m_firstOOMTime = 0;
oldHost->m_cpuUsage = 0;
oldHost->m_docsIndexed = 0;
//oldHost->m_cpuUsage = 0;
oldHost->m_pingInfo.m_totalDocsIndexed = 0;
oldHost->m_eventsIndexed = 0;
oldHost->m_slowDiskReads = 0;
oldHost->m_kernelErrors = 0;
//oldHost->m_slowDiskReads = 0;
//oldHost->m_kernelErrors = 0;
oldHost->m_kernelErrorReported = false;
oldHost->m_flags = 0;
oldHost->m_dailyMergeCollnum = 0;
//oldHost->m_flags = 0;
//oldHost->m_dailyMergeCollnum = 0;
oldHost->m_ping = g_conf.m_deadHostTimeout;
oldHost->m_pingShotgun = g_conf.m_deadHostTimeout;
oldHost->m_emailCode = 0;
oldHost->m_wasAlive = false;
oldHost->m_etryagains = 0;
oldHost->m_totalResends = 0;
oldHost->m_pingInfo.m_etryagains = 0;
oldHost->m_pingInfo.m_udpSlotsInUse = 0;
oldHost->m_pingInfo.m_totalResends = 0;
oldHost->m_errorReplies = 0;
oldHost->m_dgramsTo = 0;
oldHost->m_dgramsFrom = 0;
oldHost->m_repairMode = 0;
oldHost->m_splitsDone = 0;
oldHost->m_splitTimes = 0;
oldHost->m_hdtemps[0] = 0;
oldHost->m_hdtemps[1] = 0;
oldHost->m_hdtemps[2] = 0;
oldHost->m_hdtemps[3] = 0;
// oldHost->m_hdtemps[0] = 0;
// oldHost->m_hdtemps[1] = 0;
// oldHost->m_hdtemps[2] = 0;
// oldHost->m_hdtemps[3] = 0;
// . just swap ips and ports and directories
// . first store all the old info so we can put it away
@ -1819,9 +1833,9 @@ bool Hostdb::replaceHost ( int32_t origHostId, int32_t spareHostId ) {
char oldSwitchId = oldHost->m_switchId;
uint16_t oldDnsPort = oldHost->m_dnsClientPort;
char oldDir[128];
memcpy(oldDir, oldHost->m_dir, 128);
gbmemcpy(oldDir, oldHost->m_dir, 128);
char oldNote[128];
memcpy(oldNote, oldHost->m_note, 128);
gbmemcpy(oldNote, oldHost->m_note, 128);
// . now copy in the spare's info
oldHost->m_ip = spareHost->m_ip;
oldHost->m_ipShotgun = spareHost->m_ipShotgun;
@ -1835,8 +1849,8 @@ bool Hostdb::replaceHost ( int32_t origHostId, int32_t spareHostId ) {
oldHost->m_ideChannel = spareHost->m_ideChannel;
oldHost->m_switchId = spareHost->m_switchId;
oldHost->m_dnsClientPort = spareHost->m_dnsClientPort;
memcpy(oldHost->m_dir, spareHost->m_dir, 128);
memcpy(oldHost->m_note, spareHost->m_note, 128);
gbmemcpy(oldHost->m_dir, spareHost->m_dir, 128);
gbmemcpy(oldHost->m_note, spareHost->m_note, 128);
// . now store the old info off
spareHost->m_ip = oldIp;
spareHost->m_ipShotgun = oldIp2;
@ -1850,8 +1864,8 @@ bool Hostdb::replaceHost ( int32_t origHostId, int32_t spareHostId ) {
spareHost->m_ideChannel = oldIdeChannel;
spareHost->m_switchId = oldSwitchId;
spareHost->m_dnsClientPort = oldDnsPort;
memcpy(spareHost->m_dir, oldDir, 128);
memcpy(spareHost->m_note, oldNote, 128);
gbmemcpy(spareHost->m_dir, oldDir, 128);
gbmemcpy(spareHost->m_note, oldNote, 128);
*/
// write this hosts conf out
saveHostsConf();

@ -51,10 +51,11 @@ enum {
#define PFLAG_REBALANCING 0x20
#define PFLAG_FOREIGNRECS 0x40
#define PFLAG_RECOVERYMODE 0x80
#define PFLAG_OUTOFSYNC 0x100
// added slow disk reads to it, 4 bytes (was 52)
// 21 bytes for the gbversion (see getVersionSize())
#define MAX_PING_SIZE (44+4+4+21)
//#define MAX_PING_SIZE (44+4+4+21)
#define HT_GRUNT 0x01
#define HT_SPARE 0x02
@ -80,6 +81,42 @@ public:
void clear ( ) { memset ( this , 0 , sizeof(EventStats) ); };
};
class PingInfo {
public:
// m_lastPing MUST be on top for now...
//int64_t m_lastPing;
// this timestamp MUST be on top because we set requestSize to 8
// and treat it like an old 8-byte ping in PingServer.cpp
int64_t m_localHostTimeMS;
int32_t m_hostId;
int32_t m_loadAvg;
float m_percentMemUsed;
float m_cpuUsage;
int32_t m_totalDocsIndexed;
int32_t m_slowDiskReads;
int32_t m_hostsConfCRC;
float m_diskUsage;
int32_t m_flags;
// some new stuff
int32_t m_numCorruptDiskReads;
int32_t m_numOutOfMems;
int32_t m_socketsClosedFromHittingLimit;
int32_t m_totalResends;
int32_t m_etryagains;
int32_t m_udpSlotsInUse;
int16_t m_currentSpiders;
collnum_t m_dailyMergeCollnum;
int16_t m_hdtemps[4];
char m_gbVersionStr[21];
char m_repairMode;
char m_kernelErrors;
};
class Host {
public:
@ -117,7 +154,7 @@ class Host {
// his checksum of his hosts.conf so we can ensure we have the
// same hosts.conf file! 0 means not legit.
int32_t m_hostsConfCRC;
//int32_t m_hostsConfCRC;
// used by Process.cpp to do midnight stat dumps and emails
EventStats m_eventStats;
@ -141,36 +178,39 @@ class Host {
// have we ever got a ping reply from him?
char m_gotPingReply;
double m_loadAvg;
float m_percentMemUsed;
//float m_percentMemUsed;
// the first time we went OOM (out of mem, i.e. >= 99% mem used)
int64_t m_firstOOMTime;
// cpu usage
float m_cpuUsage;
//float m_cpuUsage;
float m_diskUsage;
//float m_diskUsage;
int32_t m_slowDiskReads;
//int32_t m_slowDiskReads;
// doc count
int32_t m_docsIndexed;
//int32_t m_docsIndexed;
int32_t m_urlsIndexed;
int32_t m_eventsIndexed;
// did gb log system errors that were given in g_conf.m_errstr ?
char m_kernelErrors;
//char m_kernelErrors;
bool m_kernelErrorReported;
int32_t m_flags;
//int32_t m_flags;
// used be SEO pipeline in xmldoc.cpp
int32_t m_numOutstandingRequests;
// used by DailyMerge.cpp exclusively
collnum_t m_dailyMergeCollnum;
//collnum_t m_dailyMergeCollnum;
// last time g_hostdb.ping(i) was called for this host in milliseconds.
int64_t m_lastPing;
char m_tmpBuf[4];
// . first time we sent an unanswered ping request to this host
// . used so we can determine when to send an email alert
int64_t m_startTime;
@ -229,9 +269,9 @@ class Host {
// eth0 and eth1 of this host
char m_shotgunBit;
// how many ETRYAGAINs we received as replies from this host
int32_t m_etryagains;
//int32_t m_etryagains;
// how many resends total we had to do to this host
int32_t m_totalResends;
//int32_t m_totalResends;
// how many total error replies we got from this host
int32_t m_errorReplies;
@ -273,10 +313,10 @@ class Host {
// . temps in celsius of the hard drives
// . set in Process.cpp
int16_t m_hdtemps[4];
//int16_t m_hdtemps[4];
// 24 bytes including ending \0
char m_gbVersionStrBuf[24];
//char m_gbVersionStrBuf[24];
// Syncdb.cpp uses these
char m_inSync ;
@ -296,7 +336,8 @@ class Host {
int32_t m_lastTryError;
int32_t m_lastTryTime;
char m_requestBuf[MAX_PING_SIZE];
//char m_requestBuf[MAX_PING_SIZE];
PingInfo m_pingInfo;//RequestBuf;
};
#define MAX_HOSTS 512
@ -393,7 +434,7 @@ class Hostdb {
bool hasDeadHost ( );
bool kernelErrors (Host *h) { return h->m_kernelErrors; };
bool kernelErrors (Host *h) { return h->m_pingInfo.m_kernelErrors; };
int64_t getNumGlobalRecs ( );

@ -664,10 +664,10 @@ const char *HttpMime::getContentEncodingFromExtension ( char *ext ) {
// make a redirect mime
void HttpMime::makeRedirMime ( char *redir , int32_t redirLen ) {
char *p = m_buf;
memcpy ( p , "HTTP/1.0 302 RD\r\nLocation: " , 27 );
gbmemcpy ( p , "HTTP/1.0 302 RD\r\nLocation: " , 27 );
p += 27;
if ( redirLen > 600 ) redirLen = 600;
memcpy ( p , redir , redirLen );
gbmemcpy ( p , redir , redirLen );
p += redirLen;
*p++ = '\r';
*p++ = '\n';

@ -92,7 +92,7 @@ void HttpRequest::reset() {
// returns false with g_errno set on error
bool HttpRequest::copy ( class HttpRequest *r , bool stealBuf ) {
memcpy ( this , r , sizeof(HttpRequest) );
gbmemcpy ( this , r , sizeof(HttpRequest) );
// do not copy this over though in that way
m_reqBuf.m_capacity = 0;
m_reqBuf.m_length = 0;
@ -104,7 +104,7 @@ bool HttpRequest::copy ( class HttpRequest *r , bool stealBuf ) {
// if he's on the stack, that's a problem!
if ( r->m_reqBuf.m_usingStack ) { char *xx=NULL;*xx=0; }
// copy the safebuf member var directly
memcpy ( &m_reqBuf , &r->m_reqBuf , sizeof(SafeBuf) );
gbmemcpy ( &m_reqBuf , &r->m_reqBuf , sizeof(SafeBuf) );
// do not let it free anything
r->m_reqBuf.m_usingStack = true;
// that's it!
@ -607,7 +607,8 @@ bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) {
bool multipart = false;
if ( m_requestType == 2 ) { // is POST?
char *cd =strcasestr(req,"Content-Type: multipart/form-data");
char *cd ;
cd = gb_strcasestr(req,"Content-Type: multipart/form-data");
if ( cd ) multipart = true;
}
@ -727,7 +728,7 @@ bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) {
// truncate if too big
if ( m_hostLen >= 255 ) m_hostLen = 254;
// copy into hostname
memcpy ( m_host , s , m_hostLen );
gbmemcpy ( m_host , s , m_hostLen );
}
// NULL terminate it
m_host [ m_hostLen ] = '\0';
@ -755,7 +756,7 @@ bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) {
// truncate if too big
if ( m_refLen >= 255 ) m_refLen = 254;
// copy into m_ref
memcpy ( m_ref , s , m_refLen );
gbmemcpy ( m_ref , s , m_refLen );
}
// NULL terminate it
m_ref [ m_refLen ] = '\0';
@ -783,7 +784,7 @@ bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) {
// truncate if too big
if ( len > 127 ) len = 127;
// copy into m_userAgent
memcpy ( m_userAgent , s , len );
gbmemcpy ( m_userAgent , s , len );
}
// NULL terminate it
m_userAgent [ len ] = '\0';
@ -817,7 +818,7 @@ bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) {
// trunc if too big
//if (m_cookieBufLen > 1023) m_cookieBufLen = 1023;
// copy into m_cookieBuf
//memcpy(m_cookieBuf, s, m_cookieBufLen);
//gbmemcpy(m_cookieBuf, s, m_cookieBufLen);
}
// NULL terminate it
if ( m_cookiePtr ) m_cookiePtr[m_cookieLen] = '\0';
@ -1002,13 +1003,13 @@ bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) {
}
char *p = newBuf;
if (m_cgiBuf2Size) {
memcpy(newBuf, m_cgiBuf2, m_cgiBuf2Size);
gbmemcpy(newBuf, m_cgiBuf2, m_cgiBuf2Size);
p += m_cgiBuf2Size-1;
mfree(m_cgiBuf2, m_cgiBuf2Size, "extraParms");
m_cgiBuf2 = NULL;
m_cgiBuf2Size = 0;
}
memcpy(p, buf, bufLen);
gbmemcpy(p, buf, bufLen);
m_cgiBuf2 = newBuf;
m_cgiBuf2Size = newSize;
p += bufLen;
@ -1095,7 +1096,7 @@ bool HttpRequest::set ( char *origReq , int32_t origReqLen , TcpSocket *sock ) {
// ensure no overflow (add 1 cuz we NULL terminate it)
//if ( m_cgiBufLen>=1023) { g_errno = EBUFTOOSMALL;return false;}
// copy cgi string into m_cgiBuf
//memcpy ( m_cgiBuf , s , slen );
//gbmemcpy ( m_cgiBuf , s , slen );
// NULL terminate and include it in the length
//m_cgiBuf [ m_cgiBufLen++ ] = '\0';
m_cgiBuf [ slen ] = '\0';

@ -84,7 +84,7 @@ class HttpRequest {
// this is NULL terminated too
char *getUserAgent () { return m_userAgent; };
// just does a simply memcpy() operation, since it should be pointing
// just does a simply gbmemcpy() operation, since it should be pointing
// into the TcpSocket's buffer which is safe until after reply is sent
// . returns false and sets g_errno on error, true otherwise
bool copy ( class HttpRequest *r , bool steal = false ) ;

@ -228,15 +228,15 @@ bool HttpServer::getDoc ( char *url ,
req = (char *) mmalloc( need ,"HttpServer");
char *p = req;
if ( req && sb.length() ) {
memcpy ( p , sb.getBufStart() , sb.length() );
gbmemcpy ( p , sb.getBufStart() , sb.length() );
p += sb.length();
}
if ( req ) {
memcpy ( p , r.getRequest() , reqSize );
gbmemcpy ( p , r.getRequest() , reqSize );
p += reqSize;
}
if ( req && pcLen ) {
memcpy ( p , postContent , pcLen );
gbmemcpy ( p , postContent , pcLen );
p += pcLen;
}
reqSize = p - req;
@ -810,7 +810,7 @@ void HttpServer::requestHandler ( TcpSocket *s ) {
char stackMem[1024];
int32_t maxLen = s->m_readOffset;
if ( maxLen > 1020 ) maxLen = 1020;
memcpy(stackMem,s->m_readBuf,maxLen);
gbmemcpy(stackMem,s->m_readBuf,maxLen);
stackMem[maxLen] = '\0';
// . sendReply returns false if blocked, true otherwise
@ -1442,7 +1442,7 @@ bool HttpServer::sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin) {
delete (f);
return sendErrorReply(s,500,mstrerror(g_errno));
}
memcpy ( sendBuf , m.getMime() , mimeLen );
gbmemcpy ( sendBuf , m.getMime() , mimeLen );
// save sd
int sd = s->m_sd;
// . send it away
@ -1594,7 +1594,7 @@ bool HttpServer::sendReply2 ( char *mime,
// if we are a proxy, and not a compression proxy, then just forward
// the blob as-is if it is a "ZET" (GET-compressed=ZET)
else if ( (ht & HT_PROXY) && (*rb == 'Z') ) {
memcpy ( sendBuf , content, contentLen );
gbmemcpy ( sendBuf , content, contentLen );
// sanity check
if ( sendBufSize != contentLen ) { char *xx=NULL;*xx=0; }
// note it
@ -1602,10 +1602,10 @@ bool HttpServer::sendReply2 ( char *mime,
}
else {
// copy mime into sendBuf first
memcpy ( p , mime , mimeLen );
gbmemcpy ( p , mime , mimeLen );
p += mimeLen;
// then the page
memcpy ( p , content, contentLen );
gbmemcpy ( p , content, contentLen );
p += contentLen;
// sanity check
if ( sendBufSize != contentLen+mimeLen) { char *xx=NULL;*xx=0;}
@ -2051,7 +2051,7 @@ bool HttpServer::sendQueryErrorReply( TcpSocket *s , int32_t error ,
tcp->destroySocket ( s );
return true;
}
memcpy ( sendBuf , msg , msgSize );
gbmemcpy ( sendBuf , msg , msgSize );
// erase g_errno for sending
g_errno = 0;
// . this returns false if blocked, true otherwise
@ -2713,7 +2713,7 @@ bool HttpServer::sendDynamicPage ( TcpSocket *s ,
// if we are a proxy, and not a compression proxy, then just forward
// the blob as-is if it is a "ZET" (GET-compressed=ZET)
else if ( (ht & HT_PROXY) && *rb == 'Z' ) {
memcpy ( sendBuf , page , pageLen );
gbmemcpy ( sendBuf , page , pageLen );
// sanity check
if ( sendBufSize != pageLen ) { char *xx=NULL;*xx=0; }
// note it
@ -2721,10 +2721,10 @@ bool HttpServer::sendDynamicPage ( TcpSocket *s ,
}
else {
// copy mime into sendBuf first
memcpy ( p , m.getMime() , mimeLen );
gbmemcpy ( p , m.getMime() , mimeLen );
p += mimeLen;
// then the page
memcpy ( p , page , pageLen );
gbmemcpy ( p , page , pageLen );
p += pageLen;
// sanity check
if ( sendBufSize != pageLen+mimeLen ) { char *xx=NULL;*xx=0;}
@ -2850,7 +2850,7 @@ TcpSocket *HttpServer::unzipReply(TcpSocket* s) {
// sometimes they are missing Content-Length:
if ( ptr1 ) {
// copy ptr1 to src
memcpy ( pnew, src, ptr1 - src );
gbmemcpy ( pnew, src, ptr1 - src );
pnew += ptr1 - src;
src += ptr1 - src;
// store either the new content encoding or new length
@ -2864,7 +2864,7 @@ TcpSocket *HttpServer::unzipReply(TcpSocket* s) {
if ( ptr2 ) {
// copy ptr2 to src
memcpy ( pnew , src , ptr2 - src );
gbmemcpy ( pnew , src , ptr2 - src );
pnew += ptr2 - src;
src += ptr2 - src;
// now insert the new shit
@ -2877,7 +2877,7 @@ TcpSocket *HttpServer::unzipReply(TcpSocket* s) {
}
// copy the rest
memcpy ( pnew , src , mimeEnd - src );
gbmemcpy ( pnew , src , mimeEnd - src );
pnew += mimeEnd - src;
src += mimeEnd - src;
@ -2893,7 +2893,7 @@ TcpSocket *HttpServer::unzipReply(TcpSocket* s) {
// return s;
// }
// memcpy(pnew, pold, restLen);
// gbmemcpy(pnew, pold, restLen);
// pold += restLen;
// pnew += restLen;
@ -3348,16 +3348,16 @@ bool sendPageApi ( TcpSocket *s , HttpRequest *r ) {
// < as &lt; and > as &gt;
for ( ; *src ; src++ ) {
if ( *src == '#' ) {
memcpy ( dst,"<font color=gray>",17);
gbmemcpy ( dst,"<font color=gray>",17);
dst += 17;
inFont = true;
}
if ( *src == '<' ) {
memcpy ( dst , "&lt;",4);
gbmemcpy ( dst , "&lt;",4);
dst += 4;
// boldify start tags
//if ( src[1] != '/' && src[1] !='!' ) {
// memcpy(dst,"<b>",3);
// gbmemcpy(dst,"<b>",3);
// dst += 3;
// inBold = true;
//}
@ -3366,21 +3366,21 @@ bool sendPageApi ( TcpSocket *s , HttpRequest *r ) {
else if ( *src == '>' ) {
// end bold tags
if ( inBold ) {
memcpy(dst,"</b>",4);
gbmemcpy(dst,"</b>",4);
dst += 4;
inBold = false;
}
memcpy ( dst , "&gt;",4);
gbmemcpy ( dst , "&gt;",4);
dst += 4;
continue;
}
else if ( *src == '\n' ) {
if ( inFont ) {
memcpy(dst,"</font>",7);
gbmemcpy(dst,"</font>",7);
dst += 7;
inFont = false;
}
memcpy ( dst , "<br>",4);
gbmemcpy ( dst , "<br>",4);
dst += 4;
continue;
}
@ -3559,7 +3559,7 @@ void gotSquidProxiedUrlIp ( void *state , int32_t ip ) {
// char *proxiedReqBuf = r->ptr_url;
// // store into there
// memcpy ( proxiedReqBuf,
// gbmemcpy ( proxiedReqBuf,
// sqs->m_sock->m_readBuf,
// // include +1 for the terminating \0
// sqs->m_sock->m_readOffset + 1);

@ -686,10 +686,10 @@ bool Images::downloadImages () {
// now copy the data over sequentially
char *p = ti->m_buf;
// the image url
memcpy(p,m_imageUrl.getUrl(),urlSize);
gbmemcpy(p,m_imageUrl.getUrl(),urlSize);
p += urlSize;
// the image thumbnail data
memcpy(p,m_imgData,m_thumbnailSize);
gbmemcpy(p,m_imgData,m_thumbnailSize);
p += m_thumbnailSize;
// update buf length of course
m_imageBuf.setLength ( p - m_imageBuf.getBufStart() );
@ -979,14 +979,16 @@ void Images::thumbStart_r ( bool amThread ) {
makeTrashDir();
// get thread id
int32_t id = getpidtid();
// get thread id. pthread_t is 64 bit and pid_t is 32 bit on
// 64 bit oses
pthread_t id = getpidtid();
// pass the input to the program through this file
// rather than a pipe, since popen() seems broken.
// m_dir ends in / so this should work.
char in[364];
snprintf ( in , 363,"%strash/in.%"INT32"", g_hostdb.m_dir, id );
snprintf ( in , 363,"%strash/in.%"INT64""
, g_hostdb.m_dir, (int64_t)id );
unlink ( in );
log( LOG_DEBUG, "image: thumbStart_r create in file." );
@ -994,7 +996,8 @@ void Images::thumbStart_r ( bool amThread ) {
// collect the output from the filter from this file
// m_dir ends in / so this should work.
char out[364];
snprintf ( out , 363,"%strash/out.%"INT32"", g_hostdb.m_dir, id );
snprintf ( out , 363,"%strash/out.%"INT64""
, g_hostdb.m_dir, (int64_t)id );
unlink ( out );
log( LOG_DEBUG, "image: thumbStart_r create out file." );

@ -214,8 +214,8 @@ void IndexReadInfo::update ( IndexList *lists, int32_t numLists,
char *startKey = &m_startKeys[i*m_ks];
// . load lastPart into lower 6 bytes of "startKey"
// . little endian
//memcpy ( &startKey , lastPart , 6 );
memcpy ( startKey , lastPart , m_hks );
//gbmemcpy ( &startKey , lastPart , 6 );
gbmemcpy ( startKey , lastPart , m_hks );
// debug msg
//log("pre-startKey for list #%"INT32" is n1=%"XINT32",n0=%"XINT64"",
// i,startKey.n1,startKey.n0);

@ -636,13 +636,13 @@ void IndexTable::addLists_r (IndexList lists[MAX_TIERS][MAX_QUERY_TERMS],
char *p = lists[j][i].getList();
// remember to swap back when done!!
//char ttt[6];
//memcpy ( ttt , p , 6 );
//memcpy ( p , p + 6 , 6 );
//memcpy ( p + 6 , ttt , 6 );
//gbmemcpy ( ttt , p , 6 );
//gbmemcpy ( p , p + 6 , 6 );
//gbmemcpy ( p + 6 , ttt , 6 );
char ttt[10];
memcpy ( ttt , p , hks );
memcpy ( p , p + hks , 6 );
memcpy ( p + 6 , ttt , hks );
gbmemcpy ( ttt , p , hks );
gbmemcpy ( p , p + hks , 6 );
gbmemcpy ( p + 6 , ttt , hks );
// point to the low "hks" bytes now
p += 6;
// turn half bit on
@ -806,9 +806,9 @@ swapBack:
//char *p = lists[j][i].getList();
// remember to swap back when done!!
//char ttt[6];
//memcpy ( ttt , p , 6 );
//memcpy ( p , p + 6 , 6 );
//memcpy ( p + 6 , ttt , 6 );
//gbmemcpy ( ttt , p , 6 );
//gbmemcpy ( p , p + 6 , 6 );
//gbmemcpy ( p + 6 , ttt , 6 );
// turn half bit off again
//*p &= 0xfd;
}
@ -1110,9 +1110,9 @@ void IndexTable::addLists2_r ( IndexList lists[MAX_TIERS][MAX_QUERY_TERMS] ,
// this is now done in addLists_r()
// remember to swap back when done!!
//char ttt[6];
//memcpy ( ttt , p , 6 );
//memcpy ( p , p + 6 , 6 );
//memcpy ( p + 6 , ttt , 6 );
//gbmemcpy ( ttt , p , 6 );
//gbmemcpy ( p , p + 6 , 6 );
//gbmemcpy ( p + 6 , ttt , 6 );
// point to the low 6 bytes now
p += 6;
// turn half bit on
@ -1470,7 +1470,7 @@ void IndexTable::addLists2_r ( IndexList lists[MAX_TIERS][MAX_QUERY_TERMS] ,
nnstart = nn;
// debug point
//int64_t ddd ;
//memcpy ( &ddd , ptrs[i] , 6 );
//gbmemcpy ( &ddd , ptrs[i] , 6 );
//ddd >>= 2;
//ddd &= DOCID_MASK;
//if ( ddd == 261380478983LL )
@ -1586,7 +1586,7 @@ void IndexTable::addLists2_r ( IndexList lists[MAX_TIERS][MAX_QUERY_TERMS] ,
if ( /*! rat &&*/ explicitBits[nn] & ebits ) {
// no point in logging since in thread!
//int64_t dd ;
//memcpy ( &dd , ptrs[i] , 6 );
//gbmemcpy ( &dd , ptrs[i] , 6 );
//dd >>= 2;
//dd &= DOCID_MASK;
//fprintf(stderr,"got dup score for docid=%"INT64"\n",dd);
@ -2203,7 +2203,7 @@ void IndexTable::addLists2_r ( IndexList lists[MAX_TIERS][MAX_QUERY_TERMS] ,
if ( (*(unsigned char *)(tdp2[i] ) & 0xfc) >
(*(unsigned char *)(minp) & 0xfc) ) continue;
gotIt:
memcpy ( &topd[mini] , tdp2[i] , 6 );
gbmemcpy ( &topd[mini] , tdp2[i] , 6 );
topd[mini] >>= 2;
topd[mini] &= DOCID_MASK;
topp[mini] = tdp2 [i];
@ -2253,7 +2253,7 @@ void IndexTable::addLists2_r ( IndexList lists[MAX_TIERS][MAX_QUERY_TERMS] ,
// . the m_topDocIdPtrs array is score/docid 6 byte combos
else if ( ! rat ) {
for ( int32_t i = 0 ; i < numTopDocIds ; i++ ) {
memcpy ( &topd[i] , topp[i] , 6 );
gbmemcpy ( &topd[i] , topp[i] , 6 );
topd[i] >>= 2;
topd[i] &= DOCID_MASK;
}
@ -2571,7 +2571,7 @@ skip:
char explicits [ MAX_RESULTS * MAX_TIERS ];
int32_t scores [ MAX_RESULTS * MAX_TIERS ];
char tiers [ MAX_RESULTS * MAX_TIERS ];
// use memcpy for speed reasons
// use gbmemcpy for speed reasons
for ( int32_t i = 0 ; i < m_numTiers ; i++ ) {
// how many top docIds in this one?
int32_t nt = m_numTopDocIds[i];
@ -2587,11 +2587,11 @@ skip:
//if ( m_numExactExplicitMatches[i] < 10 && i != m_numTiers-1)
// continue;
// store all top docIds from all tiers into "docIds"
memcpy ( & docIds [ nn ] , &m_topDocIds [i] , nt * 8 );
gbmemcpy ( & docIds [ nn ] , &m_topDocIds [i] , nt * 8 );
// also save the bit scores, for sorting
memcpy ( & scores [ nn ] , &m_topScores [i] , nt * 4 );
memcpy ( & bitScores [ nn ] , &m_topBitScores[i] , nt );
memcpy ( & explicits [ nn ] , &m_topExplicits[i] , nt );
gbmemcpy ( & scores [ nn ] , &m_topScores [i] , nt * 4 );
gbmemcpy ( & bitScores [ nn ] , &m_topBitScores[i] , nt );
gbmemcpy ( & explicits [ nn ] , &m_topExplicits[i] , nt );
memset ( & tiers [ nn ] , i , nt );
// inc the count
nn += nt;

@ -448,8 +448,8 @@ void IndexTable2::setAffWeights ( Msg39Request *r ) {
m_freqWeights[i] = tfWeightsQS [m_imap[i]];
m_affWeights [i] = affWeightsQS[m_imap[i]];
}
//memcpy ( m_freqWeights , tfWeights , nqt * sizeof(float) );
//memcpy ( m_affWeights , affWeights , nqt * sizeof(float) );
//gbmemcpy ( m_freqWeights , tfWeights , nqt * sizeof(float) );
//gbmemcpy ( m_affWeights , affWeights , nqt * sizeof(float) );
// do not compute them ourselves again
m_computedAffWeights = true;
@ -561,8 +561,8 @@ bool IndexTable2::recompute ( Msg39Request *r ) {
// retrieve the imap
m_nb = s_recs[i].m_nb;
memcpy ( m_blocksize , s_recs[i].m_blocksize , m_nb * 4 );
memcpy ( m_imap , s_recs[i].m_imap , m_ni * 4 );
gbmemcpy ( m_blocksize , s_recs[i].m_blocksize , m_nb * 4 );
gbmemcpy ( m_imap , s_recs[i].m_imap , m_ni * 4 );
// fill in the related stuff
setStuffFromImap();
@ -731,7 +731,7 @@ bool IndexTable2::cacheIntersectionForRecompute ( Msg39Request *r ) {
//if ( ! data ) return false;
// hopefully this is super fast
//memcpy ( data , keepStart , keepSize );
//gbmemcpy ( data , keepStart , keepSize );
// save the termlists since m_tmpDocIdPtrs2[] references into them
for ( int32_t j = 0 ; j < m_numLists ; j++ ) {
@ -762,8 +762,8 @@ bool IndexTable2::cacheIntersectionForRecompute ( Msg39Request *r ) {
if ( ! m_imapIsValid ) { char *xx=NULL;*xx=0;}
// store the imap in case it changes
s_recs[i].m_nb = m_nb;
memcpy ( s_recs[i].m_blocksize , m_blocksize , m_nb * 4 );
memcpy ( s_recs[i].m_imap , m_imap , m_ni * 4 );
gbmemcpy ( s_recs[i].m_blocksize , m_blocksize , m_nb * 4 );
gbmemcpy ( s_recs[i].m_imap , m_imap , m_ni * 4 );
//m_ni = m_q->getImap ( m_sizes , m_imap , m_blocksize , &m_nb );
@ -1606,9 +1606,9 @@ void IndexTable2::addLists_r ( int32_t *totalListSizes , float sortByDateWeight
char *p = m_lists[i].getList();
// remember to swap back when done!!
char ttt[10];
memcpy ( ttt , p , hks );
memcpy ( p , p + hks , 6 );
memcpy ( p + 6 , ttt , hks );
gbmemcpy ( ttt , p , hks );
gbmemcpy ( p , p + hks , 6 );
gbmemcpy ( p + 6 , ttt , hks );
// point to the low "hks" bytes now
p += 6;
// turn half bit on
@ -1784,9 +1784,9 @@ swapBack:
//char *p = m_lists[i].getList();
// swap back
//char ttt[10];
//memcpy ( ttt , p , hks );
//memcpy ( p , p + hks , 6 );
//memcpy ( p + 6 , ttt , hks );
//gbmemcpy ( ttt , p , hks );
//gbmemcpy ( p , p + hks , 6 );
//gbmemcpy ( p + 6 , ttt , hks );
// turn half bit off
//*p &= ~0x02;
}
@ -2623,7 +2623,7 @@ void IndexTable2::addLists2_r ( int32_t numListsToDo ,
// debug point
/*
int64_t ddd ;
memcpy ( &ddd , ptrs[i] , 6 );
gbmemcpy ( &ddd , ptrs[i] , 6 );
ddd >>= 2;
ddd &= DOCID_MASK;
if ( ddd == 7590103015LL )
@ -2736,7 +2736,7 @@ void IndexTable2::addLists2_r ( int32_t numListsToDo ,
if ( explicitBits[nn] & ebits ) {
// no point in logging since in thread!
//int64_t dd ;
//memcpy ( &dd , ptrs[i] , 6 );
//gbmemcpy ( &dd , ptrs[i] , 6 );
//dd >>= 2;
//dd &= DOCID_MASK;
//fprintf(stderr,"got dup score for docid=%"INT64"\n",dd);
@ -2968,7 +2968,7 @@ void IndexTable2::addLists2_r ( int32_t numListsToDo ,
// some of the phrase term vector components may
// be non-zero when they should be zero! fix this
// below when computing the final winners in done:.
memcpy ( &m_tmpScoresVec2[nqt*newTmpDocIds2] ,
gbmemcpy ( &m_tmpScoresVec2[nqt*newTmpDocIds2] ,
&scoresVec [nqt*i ] ,
nqt );
// like we have a score vector, one score per query
@ -3165,7 +3165,7 @@ void IndexTable2::addLists2_r ( int32_t numListsToDo ,
// store the ptr to the docid
m_tmpDocIdPtrs2[j] = docIdPtrs[i];
// store the score vector
memcpy ( &m_tmpScoresVec2[j * nqt], &scoresVec[nqt*i], nqt);
gbmemcpy ( &m_tmpScoresVec2[j * nqt], &scoresVec[nqt*i], nqt);
// store this too
if ( m_searchingEvents ) {
m_tmpEventIds2[j] = eventIds[i];
@ -3318,7 +3318,7 @@ void IndexTable2::addLists2_r ( int32_t numListsToDo ,
// loop above instead of two
m_tmpDocIdPtrs2 [ lastGuy ] =
m_tmpDocIdPtrs2 [ newTmpDocIds2 ];
memcpy(&m_tmpScoresVec2[lastGuy*nqt],
gbmemcpy(&m_tmpScoresVec2[lastGuy*nqt],
&m_tmpScoresVec2[newTmpDocIds2*nqt],
nqt);
if ( m_searchingEvents ) {
@ -4047,7 +4047,7 @@ void IndexTable2::hashTmpDocIds2 ( uint32_t *maxDocId ,
// hold ptr to our stuff
docIdPtrs [ nn ] = m_tmpDocIdPtrs2[i];
// store score
memcpy ( &scoresVec [nn * nqt] ,
gbmemcpy ( &scoresVec [nn * nqt] ,
&m_tmpScoresVec2[i * nqt],
nqt );
// and this vector
@ -5054,7 +5054,7 @@ void IndexTable2::computeWeightedScores ( int32_t numDocIds ,
for ( int32_t i = 0; i < numDocIds; i++ ) {
//if ( flags[i] == 0 ) continue;
int64_t d = 0;
memcpy(&d, docIdPtrs[i], 6);
gbmemcpy(&d, docIdPtrs[i], 6);
d >>= 2;
d &= DOCID_MASK;
// log the score vec and the final score

@ -41,7 +41,7 @@ typedef uint32_t score_t;
// . works for both docid ptrs from m_topTree and m_topDocIdPtrs[]
inline int64_t getDocIdFromPtr ( char *docIdPtr ) {
int64_t d;
memcpy ( &d , docIdPtr , 6 );
gbmemcpy ( &d , docIdPtr , 6 );
d >>= 2;
d &= DOCID_MASK;
return d;

@ -33,7 +33,7 @@
//#define INDEXDB_SPLIT 8
//#define DOCID_OFFSET_MASK (INDEXDB_SPLIT-1)
#define DOCID_OFFSET_MASK (g_conf.m_indexdbSplit-1)
#define MAX_SHARDS 128
#define MAX_SHARDS 1024
class Indexdb {
@ -82,7 +82,7 @@ class Indexdb {
// extract the termId from a key
int64_t getTermId ( key_t *k ) {
int64_t termId = 0LL;
memcpy ( &termId , ((char *)k) + 6 , 6 );
gbmemcpy ( &termId , ((char *)k) + 6 , 6 );
return termId ;
};
int64_t getTermId ( key_t k ) { return getTermId ( &k ); };

@ -1484,10 +1484,10 @@ bool Language::getRecommendation( char *origWord, int32_t origWordLen,
char *p = possiblePhonet;
// first put in all the chars the are before the char
// to be added
memcpy ( p, origPhonet, i ); p += i;
gbmemcpy ( p, origPhonet, i ); p += i;
// the index of m_ruleChars[] is the char to be added
*p++ = j;
memcpy ( p, origPhonet + i, origLen - i );
gbmemcpy ( p, origPhonet + i, origLen - i );
p += origLen - i;
*p++ = '\0';
numRecos = tryPhonet( possiblePhonet, origPhonet,
@ -1500,9 +1500,9 @@ bool Language::getRecommendation( char *origWord, int32_t origWordLen,
for ( int32_t i = 0; i < origLen; i++ ){
char *p = possiblePhonet;
// put the chars that come before the deleted char
memcpy ( p, origPhonet, i ); p += i;
gbmemcpy ( p, origPhonet, i ); p += i;
// put the chars that come after the deleted char
memcpy ( p, origPhonet + i + 1, origLen - i - 1 );
gbmemcpy ( p, origPhonet + i + 1, origLen - i - 1 );
p += origLen - i - 1;
*p++ = '\0';
numRecos = tryPhonet( possiblePhonet, origPhonet,
@ -1518,11 +1518,11 @@ bool Language::getRecommendation( char *origWord, int32_t origWordLen,
// cannot substitue if both chars are the same
if ( j == *( origPhonet + i ) ) continue;
// put the chars that come before the substituted char
memcpy ( p, origPhonet, i ); p += i;
gbmemcpy ( p, origPhonet, i ); p += i;
// substitute the char
*p++ = j;
// put the chars that come after the deleted char
memcpy ( p, origPhonet + i + 1, origLen - i - 1);
gbmemcpy ( p, origPhonet + i + 1, origLen - i - 1);
p += origLen - i - 1;
*p++ = '\0';
numRecos = tryPhonet( possiblePhonet, origPhonet,
@ -1537,12 +1537,12 @@ bool Language::getRecommendation( char *origWord, int32_t origWordLen,
// cannot swap if both chars are the same
if ( *( origPhonet + i ) == *( origPhonet + i + 1 ) ) continue;
// put the chars that come before the swapped char
memcpy ( p, origPhonet, i ); p += i;
gbmemcpy ( p, origPhonet, i ); p += i;
//swap the chars
*p++ = *( origPhonet + i + 1);
*p++ = *( origPhonet + i );
// put the chars that come after the deleted char
memcpy ( p, origPhonet + i + 2, origLen - i - 2);
gbmemcpy ( p, origPhonet + i + 2, origLen - i - 2);
p += origLen - i - 2;
*p++ = '\0';
numRecos = tryPhonet( possiblePhonet, origPhonet,
@ -2577,7 +2577,7 @@ bool Language::hasMispelling(char *phrase, int32_t phraseLen){
while ( *pend != ' ' && pend < phrase + phraseLen )
pend++;
char word[1024];
memcpy(word, p, pend - p);
gbmemcpy(word, p, pend - p);
word[pend - p] = '\0';
uint32_t key = hash32d(p, pend - p);
int32_t slot = m_misp.getSlot(key);
@ -3711,7 +3711,7 @@ bool Language::makeQueryFiles ( ) {
char frag[1024];
int32_t flen;
char *query = r2.getString( "uip",&flen );
memcpy ( frag, query, flen );
gbmemcpy ( frag, query, flen );
frag[flen++] = '\t';
int32_t queryLen;
query = r2.getString( "q",&queryLen );
@ -3817,7 +3817,7 @@ bool Language::makeQueryFiles ( ) {
continue;
}
// otherwise, more than 1 byte char
memcpy(frag+fragLen,p,cs);
gbmemcpy(frag+fragLen,p,cs);
fragLen += cs;
}

@ -962,7 +962,7 @@ static bool s_isLangTag(char *str) {
static uint8_t s_getCountryFromSpec(char *str) {
char code[6];
memset(code, 6, 0);
memcpy(code, str, s_wordLen(str));
gbmemcpy(code, str, s_wordLen(str));
for(int x = 0; x < 6; x++)
if(code[x] > 'A' && code[x] < 'Z') code[x] -= ('A' - 'a');
if(code[2] == '_' || code[2] == '-')

@ -57,7 +57,7 @@ bool LanguagePages::setLanguagePage(uint8_t lang,
(uint8_t *)mmalloc(uint8strlen(pageText) + 1, "langPage");
if(!m_languagePages[lang]) return(false);
memset(m_languagePages[lang], 0, uint8strlen(pageText) + 1);
memcpy(m_languagePages[lang], pageText, uint8strlen(pageText));
gbmemcpy(m_languagePages[lang], pageText, uint8strlen(pageText));
} else {
m_languagePages[lang] = pageText;
}
@ -80,7 +80,7 @@ bool LanguagePages::setLanguageHeader(uint8_t lang,
(uint8_t *)mmalloc(uint8strlen(pageText) + 1, "langHeader");
if(!m_languageHeaders[lang]) return(false);
memset(m_languageHeaders[lang], 0, uint8strlen(pageText) + 1);
memcpy(m_languageHeaders[lang], pageText, uint8strlen(pageText));
gbmemcpy(m_languageHeaders[lang], pageText, uint8strlen(pageText));
} else {
m_languageHeaders[lang] = pageText;
}
@ -103,7 +103,7 @@ bool LanguagePages::setLanguageFooter(uint8_t lang,
(uint8_t *)mmalloc(uint8strlen(pageText) + 1, "langFooter");
if(!m_languageFooters[lang]) return(false);
memset(m_languageFooters[lang], 0, uint8strlen(pageText) + 1);
memcpy(m_languageFooters[lang], pageText, uint8strlen(pageText));
gbmemcpy(m_languageFooters[lang], pageText, uint8strlen(pageText));
} else {
m_languageFooters[lang] = pageText;
}

@ -131,7 +131,7 @@ bool Linkdb::init ( ) {
0 , // fixeddatasize is 0 since no data
// keep it high since we are mostly ssds now and
// the reads are small...
6,//g_conf.m_linkdbMinFilesToMerge ,
-1,//g_conf.m_linkdbMinFilesToMerge ,
// fix this to 15 and rely on the page cache of
// just the satellite files and the daily merge to
// keep things fast.
@ -813,15 +813,15 @@ void Msg25Request::serialize ( ) {
char *p = m_buf;
memcpy ( p , ptr_url , size_url );
gbmemcpy ( p , ptr_url , size_url );
ptr_url = (char *)(p - m_buf);
p += size_url;
memcpy ( p , ptr_site , size_site );
gbmemcpy ( p , ptr_site , size_site );
ptr_site = (char *)(p - m_buf);
p += size_site;
memcpy ( p , ptr_oldLinkInfo , size_oldLinkInfo );
gbmemcpy ( p , ptr_oldLinkInfo , size_oldLinkInfo );
ptr_oldLinkInfo = (char *)(p - m_buf);
p += size_oldLinkInfo;
}
@ -1064,7 +1064,7 @@ bool Msg25::doReadLoop ( ) {
// resume from where we left off?
if ( m_round > 0 )
//startKey = m_nextKey;
memcpy ( &startKey , &m_nextKey , LDBKS );
gbmemcpy ( &startKey , &m_nextKey , LDBKS );
// but new links: algo does not need internal links with no link test
// see Links.cpp::hash() for score table
@ -1478,7 +1478,7 @@ bool Msg25::sendRequests ( ) {
// is it expired?
lostDate = g_linkdb.getLostDate_uk(&key);
// update this
memcpy ( &m_nextKey , &key , LDBKS );
gbmemcpy ( &m_nextKey , &key , LDBKS );
//if ( ip32+1 < ip32 ) { char *xx=NULL;*xx=0; }
// skip to next ip!
//g_linkdb.setIp32_uk ( &m_nextKey , ip32+1 );
@ -1504,7 +1504,7 @@ bool Msg25::sendRequests ( ) {
// is it expired?
lostDate = g_linkdb.getLostDate_uk(&key);
// update this
memcpy ( &m_nextKey , &key , LDBKS );
gbmemcpy ( &m_nextKey , &key , LDBKS );
//if ( ip32+1 < ip32 ) { char *xx=NULL;*xx=0; }
// skip to next ip!
//g_linkdb.setIp32_uk ( &m_nextKey , ip32+1 );
@ -3302,7 +3302,7 @@ bool Msg25::addNote ( char *note , int32_t noteLen , int64_t docId ) {
e->m_docIds[0] = docId;
e->m_docIds[1] = -1LL;
// store note into the buffer, NULL terminated
memcpy ( p , note , noteLen ); p += noteLen;
gbmemcpy ( p , note , noteLen ); p += noteLen;
*p++ = '\0';
// add to the table
int32_t slot = -1;
@ -3392,13 +3392,13 @@ bool Msg25::getPageLinkInfo2 ( Url *url ,
// make a Msg25 request for fresh link info
char *p = m_request;
// store url
memcpy ( p , url->getUrl() , url->getUrlLen() );
gbmemcpy ( p , url->getUrl() , url->getUrlLen() );
// skip over url
p += url->getUrlLen();
// store \0
*p++ = '\0';
// store remote coll
memcpy ( p , remoteColl , remoteCollLen );
gbmemcpy ( p , remoteColl , remoteCollLen );
// skip over it
p += remoteCollLen;
// store \0
@ -4144,7 +4144,7 @@ bool Inlink::setXmlFromLinkText ( Xml *xml ) {
// sanity check
if ( len > 900 ) { char *xx=NULL;*xx=0; }
// copy
memcpy ( buf , ptr_linkText , size_linkText );
gbmemcpy ( buf , ptr_linkText , size_linkText );
// ensure null termination
buf [ size_linkText ] = '\0';
buf [ size_linkText + 1 ] = '\0';
@ -4390,40 +4390,108 @@ void Inlink::set ( Msg20Reply *r ) {
int32_t poff = 0;
char *p = m_buf;
int32_t need =
r->size_ubuf +
r->size_linkText +
r->size_surroundingText +
r->size_rssItem +
r->size_categories +
r->size_gigabitQuery +
r->size_templateVector;
char *pend = p + need;
// -10 to add \0's for remaining guys in case of breach
pend -= 10;
size_urlBuf = r->size_ubuf;
size_linkText = r->size_linkText;
size_surroundingText = r->size_surroundingText;
size_rssItem = r->size_rssItem;
size_categories = r->size_categories;
size_gigabitQuery = r->size_gigabitQuery;
size_templateVector = r->size_templateVector;
/////////////
off_urlBuf = poff;
memcpy ( p , r->ptr_ubuf , r->size_ubuf );
poff += r->size_ubuf;
p += r->size_ubuf;
gbmemcpy ( p , r->ptr_ubuf , size_urlBuf );
poff += size_urlBuf;
p += size_urlBuf;
/////////////
off_linkText = poff;
memcpy ( p , r->ptr_linkText , r->size_linkText );
poff += r->size_linkText;
p += r->size_linkText;
gbmemcpy ( p , r->ptr_linkText , size_linkText );
poff += size_linkText;
p += size_linkText;
/////////////
off_surroundingText = poff;
memcpy ( p , r->ptr_surroundingText , r->size_surroundingText );
poff += r->size_surroundingText;
p += r->size_surroundingText;
if ( p + r->size_surroundingText < pend ) {
gbmemcpy (p,r->ptr_surroundingText , size_surroundingText );
}
else {
size_surroundingText = 1;
*p = '\0';
}
poff += size_surroundingText;
p += size_surroundingText;
/////////////
off_rssItem = poff;
memcpy ( p , r->ptr_rssItem , r->size_rssItem );
poff += r->size_rssItem;
p += r->size_rssItem;
if ( p + r->size_rssItem < pend ) {
gbmemcpy ( p , r->ptr_rssItem , size_rssItem );
}
else {
size_rssItem = 1;
*p = '\0';
}
poff += size_rssItem;
p += size_rssItem;
/////////////
off_categories = poff;
memcpy ( p , r->ptr_categories , r->size_categories );
poff += r->size_categories;
p += r->size_categories;
if ( p + r->size_categories < pend ) {
gbmemcpy ( p , r->ptr_categories , size_categories );
}
else {
size_categories = 1;
*p = '\0';
}
poff += size_categories;
p += size_categories;
/////////////
off_gigabitQuery = poff;
memcpy ( p , r->ptr_gigabitQuery , r->size_gigabitQuery );
poff += r->size_gigabitQuery;
p += r->size_gigabitQuery;
if ( p + r->size_gigabitQuery < pend ) {
gbmemcpy ( p , r->ptr_gigabitQuery , size_gigabitQuery );
}
else {
size_gigabitQuery = 1;
*p = '\0';
}
poff += size_gigabitQuery;
p += size_gigabitQuery;
/////////////
off_templateVector = poff;
memcpy ( p , r->ptr_templateVector , r->size_templateVector );
poff += r->size_templateVector;
p += r->size_templateVector;
if ( p + r->size_templateVector < pend ) {
gbmemcpy (p , r->ptr_templateVector , size_templateVector );
}
else {
size_templateVector = 1;
*p = '\0';
}
poff += size_templateVector;
p += size_templateVector;
/*
MDW: take this out for 64 bit offset-only conversion
@ -4436,13 +4504,7 @@ void Inlink::set ( Msg20Reply *r ) {
ptr_templateVector = r->ptr_templateVector;
*/
size_urlBuf = r->size_ubuf;
size_linkText = r->size_linkText;
size_surroundingText = r->size_surroundingText;
size_rssItem = r->size_rssItem;
size_categories = r->size_categories;
size_gigabitQuery = r->size_gigabitQuery;
size_templateVector = r->size_templateVector;
}
// Msg25 calls this to make a "fake" msg20 reply for recycling Inlinks
@ -4549,13 +4611,13 @@ void Inlink::set2 ( Inlink *old ) {
int fullSize = old->getStoredSize();
// return how many bytes we processed
memcpy ( (char *)this , (char *)old , fullSize );
gbmemcpy ( (char *)this , (char *)old , fullSize );
return;
// this old way is pre-64bit
/*
memcpy ( (char *)this , (char *)old , old->m_firstStrPtrOffset );
gbmemcpy ( (char *)this , (char *)old , old->m_firstStrPtrOffset );
// set our offset to the string ptrs
m_firstStrPtrOffset = (char *)&ptr_urlBuf - (char *)this;
// and our base
@ -4563,11 +4625,11 @@ void Inlink::set2 ( Inlink *old ) {
// now copy over string ptrs
char *dst = (char *)this + m_firstStrPtrOffset;
char *src = (char *)old + old->m_firstStrPtrOffset;
memcpy ( dst , src , old->m_numStrings * 4 );
gbmemcpy ( dst , src , old->m_numStrings * 4 );
// and the sizes
dst += 4 * m_numStrings ;
src += 4 * old->m_numStrings ;
memcpy ( dst , src , old->m_numStrings * 4 );
gbmemcpy ( dst , src , old->m_numStrings * 4 );
// sanity tests. make sure they match up
//if ( old->ptr_urlBuf != ptr_urlBuf ) { char *xx=NULL;*xx=0; }
//if ( old->ptr_rssItem != ptr_rssItem ) { char *xx=NULL;*xx=0; }
@ -4630,7 +4692,7 @@ char *Inlink::serialize ( int32_t *retSize ,
// copy the easy stuff
char *p = buf;
char *pend = buf + need;
memcpy ( p , (char *)this , need );
gbmemcpy ( p , (char *)this , need );
p += need;
if ( p != pend ) { char *xx=NULL;*xx=0; }
@ -4651,7 +4713,7 @@ char *Inlink::serialize ( int32_t *retSize ,
// if ( p > m_buf+*offPtr && p < m_buf+*offPtr + *sizePtr ) {
// char *xx = NULL; *xx = 0; }
// // copy the string into the buffer
// memcpy ( p , m_buf + *offPtr , *sizePtr );
// gbmemcpy ( p , m_buf + *offPtr , *sizePtr );
// //skip:
// // . make it point into the buffer now
// // . MDW: why? that is causing problems for the re-call in
@ -5452,28 +5514,28 @@ bool Links::addLink ( char *link , int32_t linkLen , int32_t nodeNum ,
if ( p > newBuf + newAllocSize ) { char *xx = NULL; *xx = 0; }
if (m_linkBuf){
memcpy(newLinkPtrs, m_linkPtrs,
gbmemcpy(newLinkPtrs, m_linkPtrs,
m_numLinks * sizeof(char*));
QUICKPOLL(niceness);
memcpy(newLinkLens, m_linkLens,
gbmemcpy(newLinkLens, m_linkLens,
m_numLinks * sizeof(int32_t));
QUICKPOLL(niceness);
memcpy(newLinkNodes, m_linkNodes,
gbmemcpy(newLinkNodes, m_linkNodes,
m_numLinks * sizeof(int32_t));
QUICKPOLL(niceness);
memcpy(newLinkHashes, m_linkHashes,
gbmemcpy(newLinkHashes, m_linkHashes,
m_numLinks * sizeof(uint64_t));
QUICKPOLL(niceness);
memcpy(newHostHashes, m_hostHashes,
gbmemcpy(newHostHashes, m_hostHashes,
m_numLinks * sizeof(uint64_t));
QUICKPOLL(niceness);
memcpy(newDomHashes, m_domHashes,
gbmemcpy(newDomHashes, m_domHashes,
m_numLinks * sizeof(int32_t));
QUICKPOLL(niceness);
memcpy(newLinkFlags, m_linkFlags,
gbmemcpy(newLinkFlags, m_linkFlags,
m_numLinks * sizeof(linkflags_t));
QUICKPOLL(niceness);
memcpy(newSpamNotes,m_spamNotes,
gbmemcpy(newSpamNotes,m_spamNotes,
m_numLinks * sizeof(char *));
int32_t oldSize = getLinkBufferSize(m_allocLinks);
mfree(m_linkBuf, oldSize, "Links");
@ -5655,7 +5717,7 @@ bool Links::addLink ( char *link , int32_t linkLen , int32_t nodeNum ,
QUICKPOLL(niceness);
if ( m_allocBuf ) {
QUICKPOLL(niceness);
memcpy ( newBuf , m_allocBuf , m_allocSize );
gbmemcpy ( newBuf , m_allocBuf , m_allocSize );
QUICKPOLL(niceness);
// update pointers to previous buffer
int64_t offset = newBuf - m_allocBuf;
@ -5690,7 +5752,7 @@ bool Links::addLink ( char *link , int32_t linkLen , int32_t nodeNum ,
m_linkLens [ m_numLinks ] = url.getUrlLen();
m_linkNodes [ m_numLinks ] = nodeNum;
// serialize the normalized link into the buffer
memcpy ( m_bufPtr , url.getUrl(), url.getUrlLen() );
gbmemcpy ( m_bufPtr , url.getUrl(), url.getUrlLen() );
m_bufPtr += url.getUrlLen();
QUICKPOLL(niceness);
@ -6060,12 +6122,12 @@ int32_t Links::getLinkText2 ( int32_t i ,
int32_t rss = m_xml->isRSSFeed();
if ( rss == 1 ) {
//del = "item";
memcpy(del, "item\0", 5);
gbmemcpy(del, "item\0", 5);
dlen = 4;
}
else if ( rss == 2 ) {
//del = "entry";
memcpy(del, "entry\0", 6);
gbmemcpy(del, "entry\0", 6);
dlen = 5;
}
// if rss or atom page, return the whole xml <item> or <entry>
@ -6399,7 +6461,7 @@ void Links::removeExternalLinks ( ) {
// skip if not internal (by hostname)
if ( ! isInternalHost(i) ) continue;
// copy it over
memcpy ( p , m_linkPtrs[i] , m_linkLens[i] );
gbmemcpy ( p , m_linkPtrs[i] , m_linkLens[i] );
// add it back
m_linkPtrs [j] = p;
m_linkLens [j] = m_linkLens [i];

106
Log.cpp

@ -28,8 +28,8 @@ static pthread_mutex_t s_lock = PTHREAD_MUTEX_INITIALIZER;
char *g_dbuf = NULL;
int32_t g_dbufSize = 0;
// main process id
static pid_t s_pid = -1;
// main process id. pthread_t is 64 bit and pid_t is 32 bit on 64 bit oses
static pthread_t s_pid = (pthread_t)-1;
void Log::setPid ( ) {
s_pid = getpidtid();
@ -61,6 +61,34 @@ void Log::reset ( ) {
#endif
}
// for example, RENAME log000 to log000-2013_11_04-18:19:32
bool renameCurrentLogFile ( ) {
File f;
char tmp[16];
sprintf(tmp,"log%03"INT32"",g_hostdb.m_hostId);
f.set ( g_hostdb.m_dir , tmp );
// make new filename like log000-2013_11_04-18:19:32
time_t now = getTimeLocal();
tm *tm1 = gmtime((const time_t *)&now);
char tmp2[64];
strftime(tmp2,64,"%Y_%m_%d-%T",tm1);
SafeBuf newName;
if ( ! newName.safePrintf ( "%slog%03"INT32"-%s",
g_hostdb.m_dir,
g_hostdb.m_hostId,
tmp2 ) ) {
fprintf(stderr,"log rename failed\n");
return false;
}
// rename log000 to log000-2013_11_04-18:19:32
if ( f.doesExist() ) {
//fprintf(stdout,"renaming file\n");
f.rename ( newName.getBufStart() );
}
return true;
}
bool Log::init ( char *filename ) {
// set the main process id
//s_pid = getpidtid();
@ -89,30 +117,12 @@ bool Log::init ( char *filename ) {
// RENAME log000 to log000-2013_11_04-18:19:32
//
if ( g_conf.m_runAsDaemon ) {
File f;
char tmp[16];
sprintf(tmp,"log%03"INT32"",g_hostdb.m_hostId);
f.set ( g_hostdb.m_dir , tmp );
// make new filename like log000-2013_11_04-18:19:32
time_t now = getTimeLocal();
tm *tm1 = gmtime((const time_t *)&now);
char tmp2[64];
strftime(tmp2,64,"%Y_%m_%d-%T",tm1);
SafeBuf newName;
if ( ! newName.safePrintf ( "%slog%03"INT32"-%s",
g_hostdb.m_dir,
g_hostdb.m_hostId,
tmp2 ) ) {
fprintf(stderr,"log rename failed\n");
return false;
}
// rename log000 to log000-2013_11_04-18:19:32
if ( f.doesExist() ) {
//fprintf(stdout,"renaming file\n");
f.rename ( newName.getBufStart() );
}
// returns false on error
if ( ! renameCurrentLogFile() ) return false;
}
// get size of current file. getFileSize() is defined in File.h.
m_logFileSize = getFileSize ( m_filename );
// open it for appending.
// create with -rw-rw-r-- permissions if it's not there.
@ -207,6 +217,10 @@ bool Log::shouldLog ( int32_t type , char *msg ) {
return true;
}
// 1GB max log file size
#define MAXLOGFILESIZE 1000000000
// for testing:
//#define MAXLOGFILESIZE 3000
bool Log::logR ( int64_t now , int32_t type , char *msg , bool asterisk ,
bool forced ) {
@ -246,7 +260,7 @@ bool Log::logR ( int64_t now , int32_t type , char *msg , bool asterisk ,
// chop off any spaces at the end of the msg.
while ( is_wspace_a ( msg [ msgLen - 1 ] ) && msgLen > 0 ) msgLen--;
// get this pid
pid_t pid = getpidtid();
pthread_t pid = getpidtid();
// a tmp buffer
char tt [ MAX_LINE_LEN ];
char *p = tt;
@ -291,9 +305,9 @@ bool Log::logR ( int64_t now , int32_t type , char *msg , bool asterisk ,
// MDW... no i like it
//while ( p < pend && *x && is_alnum_a(*x) ) { x++; cc--; }
// thread id if in "thread"
if ( pid != s_pid && s_pid != -1 ) {
if ( pid != s_pid && s_pid != (pthread_t)-1 ) {
//sprintf ( p , "[%"INT32"] " , (int32_t)getpid() );
sprintf ( p , "[%"UINT32"] " , (uint32_t)pid );
sprintf ( p , "[%"UINT64"] " , (uint64_t)pid );
p += gbstrlen ( p );
}
// then message itself
@ -346,15 +360,23 @@ bool Log::logR ( int64_t now , int32_t type , char *msg , bool asterisk ,
if ( *ttp == '\t' ) *ttp = ' ';
}
// . if filesize would be too big then make a new log file
// . should make a new m_fd
if ( m_logFileSize + tlen+1 > MAXLOGFILESIZE )
makeNewLogFile();
if ( m_fd >= 0 ) {
write ( m_fd , tt , tlen );
write ( m_fd , "\n", 1 );
m_logFileSize += tlen + 1;
}
else {
// print it out for now
fprintf ( stderr, "%s\n", tt );
}
// set the stuff in the array
m_errorMsg [m_numErrors] = msg;
m_errorMsgLen [m_numErrors] = msgLen;
@ -370,6 +392,28 @@ bool Log::logR ( int64_t now , int32_t type , char *msg , bool asterisk ,
return false;
}
bool Log::makeNewLogFile ( ) {
// . rename old log file like log000 to log000-2013_11_04-18:19:32
// . returns false on error
if ( ! renameCurrentLogFile() ) return false;
// close old fd
if ( m_fd >= 0 ) ::close ( m_fd );
// invalidate
m_fd = -1;
// reset
m_logFileSize = 0;
// open it for appending.
// create with -rw-rw-r-- permissions if it's not there.
m_fd = open ( m_filename ,
O_APPEND | O_CREAT | O_RDWR ,
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
if ( m_fd >= 0 ) return true;
// bitch to stderr and return false on error
fprintf(stderr,"could not open new log file %s for appending\n",
m_filename);
return false;
}
// keep a special buf
static char s_buf[1024*64];
static char *s_bufEnd = s_buf + 1024*64;
@ -527,11 +571,11 @@ void Log::printBuf ( ) {
}
// first 4 bytes are the size of the string arguments
int32_t stringSizes;
memcpy ( (char *)&stringSizes , p , 4 );
gbmemcpy ( (char *)&stringSizes , p , 4 );
p += 4;
// then the type of the msg
int32_t type;
memcpy ( (char *)&type , p , 4 );
gbmemcpy ( (char *)&type , p , 4 );
p += 4;
// then the format string
char *format = p;
@ -557,11 +601,11 @@ void Log::printBuf ( ) {
}
// get time
int64_t now ;
memcpy ( (char *)&now , p , 8 );
gbmemcpy ( (char *)&now , p , 8 );
p += 8;
// get size of args
int32_t apsize ;
memcpy ( (char *)&apsize , p , 4 );
gbmemcpy ( (char *)&apsize , p , 4 );
p += 4;
// dword align
int32_t rem = ((PTRTYPE)p) % 4;

3
Log.h

@ -154,6 +154,9 @@ class Log {
char *m_hostname;
int m_port;
int64_t m_logFileSize;
bool makeNewLogFile ( );
char *m_errorMsg [ MAX_LOG_MSGS ];
int16_t m_errorMsgLen [ MAX_LOG_MSGS ];
int64_t m_errorTime [ MAX_LOG_MSGS ];

@ -817,7 +817,7 @@ bool Loop::init ( ) {
// handling a SIGIO signal, so don't worry about that
// . what sigs should be blocked when in our handler? the same
// sigs we are handling i guess
memcpy ( &sa2.sa_mask , &sigs , sizeof(sigs) );
gbmemcpy ( &sa2.sa_mask , &sigs , sizeof(sigs) );
sa2.sa_flags = SA_SIGINFO ; //| SA_ONESHOT;
// call this function
sa2.sa_sigaction = sigHandlerQueue_r;
@ -929,9 +929,10 @@ bool Loop::init ( ) {
m_realInterrupt.it_value.tv_sec = 0;
m_realInterrupt.it_value.tv_usec = 10 * 1000;
// 1000 microseconds in a millisecond
m_realInterrupt.it_value.tv_usec = 1 * 1000;
m_realInterrupt.it_interval.tv_sec = 0;
m_realInterrupt.it_interval.tv_usec = 10 * 1000;
m_realInterrupt.it_interval.tv_usec = 1 * 1000;
m_noInterrupt.it_value.tv_sec = 0;
@ -969,6 +970,35 @@ void sigpwrHandler ( int x , siginfo_t *info , void *y ) {
g_loop.m_shutdown = 3;
}
#include <execinfo.h>
void printStackTrace ( int signum , siginfo_t *info , void *ptr ) {
// int arch = 32;
// if ( __WORDSIZE == 64 ) arch = 64;
// if ( __WORDSIZE == 128 ) arch = 128;
// right now only works for 32 bit
//if ( arch != 32 ) return;
logf(LOG_DEBUG,"gb: seg fault. printing stack trace. use "
"'addr2line -e gb' to decode the hex below.");
static void *s_bt[200];
int sz = backtrace(s_bt, 200);
//char **strings = backtrace_symbols(s_bt, sz);
for( int i = 0; i < sz; ++i) {
//unsigned long long ba;
//ba = g_profiler.getFuncBaseAddr((PTRTYPE)s_bt[i]);
//sigsegv_outp("%s", strings[i]);
//logf(LOG_DEBUG,"[0x%llx->0x%llx] %s"
logf(LOG_DEBUG,"[0x%"XINT64"]"
,(uint64_t)s_bt[i]
//,ba
//,g_profiler.getFnName(ba,0));
);
}
}
// TODO: if we get a segfault while saving, what then?
void sigbadHandler ( int x , siginfo_t *info , void *y ) {
@ -997,6 +1027,11 @@ void sigbadHandler ( int x , siginfo_t *info , void *y ) {
log("loop: sigbadhandler. shutdown already called.");
return;
}
// unwind
printStackTrace( x , info , y );
// if we're a thread, let main process know to shutdown
g_loop.m_shutdown = 2;
log("loop: sigbadhandler. trying to save now. mode=%"INT32"",
@ -1117,19 +1152,29 @@ void sigvtalrmHandler ( int x , siginfo_t *info , void *y ) {
void sigalrmHandler ( int x , siginfo_t *info , void *y ) {
// so we don't call gettimeofday() thousands of times a second...
g_clockNeedsUpdate = true;
// stats
g_numAlarms++;
// . see where we are in the code
// . for computing cpu usage
// . if idling we will be in sigtimedwait() at the lowest level
Host *h = g_hostdb.m_myHost;
// if doing injects...
if ( ! h ) return;
// . i guess this means we were doing something... (otherwise idle)
// . this is KINDA like a 100 point sample, but it has crazy decay
// logic built into it
if ( ! g_inWaitState )
h->m_cpuUsage = .99 * h->m_cpuUsage + .01 * 100;
h->m_pingInfo.m_cpuUsage =
.99 * h->m_pingInfo.m_cpuUsage + .01 * 100;
else
h->m_cpuUsage = .99 * h->m_cpuUsage + .01 * 000;
h->m_pingInfo.m_cpuUsage =
.99 * h->m_pingInfo.m_cpuUsage + .01 * 000;
if ( g_profiler.m_realTimeProfilerRunning )
g_profiler.getStackFrame(0);
}
static sigset_t s_rtmin;
@ -1823,9 +1868,9 @@ void Loop::doPoll ( ) {
fd_set readfds;
fd_set writefds;
fd_set exceptfds;
memcpy ( &readfds, &s_selectMaskRead , sizeof(fd_set) );
memcpy ( &writefds, &s_selectMaskWrite , sizeof(fd_set) );
//memcpy ( &exceptfds, &s_selectMaskExcept , sizeof(fd_set) );
gbmemcpy ( &readfds, &s_selectMaskRead , sizeof(fd_set) );
gbmemcpy ( &writefds, &s_selectMaskWrite , sizeof(fd_set) );
//gbmemcpy ( &exceptfds, &s_selectMaskExcept , sizeof(fd_set) );
// what is the point of fds for writing... its for when we
// get a new socket via accept() it is read for writing...

@ -89,12 +89,10 @@ LIBS = ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a
# are we a 32-bit architecture? use different libraries then
else ifeq ($(ARCH), i686)
CPPFLAGS= -m32 -g -Wall -pipe -fno-stack-protector -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -DPTHREADS -Wno-unused-but-set-variable -static
LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
else ifeq ($(ARCH), i386)
CPPFLAGS= -m32 -g -Wall -pipe -fno-stack-protector -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -DPTHREADS -Wno-unused-but-set-variable -static
LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
@ -104,10 +102,6 @@ else
#
CPPFLAGS = -g -Wall -pipe -fno-stack-protector -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -DPTHREADS -Wno-unused-but-set-variable -static
#LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
# use this for compiling on CYGWIN: (only for 32bit cygwin right now and
# you have to install the packages that have these libs.
#LIBS= -lz -lm -lpthread -lssl -lcrypto -liconv
# apt-get install libssl-dev (to provide libssl and libcrypto)
# to build static libiconv.a do a './configure --enable-static' then 'make'
# in the iconv directory
@ -156,18 +150,42 @@ vclean:
@echo ""
@echo "*****"
@echo ""
@echo "If make fails then first run:"
@echo "If make fails on Ubuntu then first run:"
@echo ""
@echo "sudo apt-get update ; sudo apt-get install make g++ libssl-dev"
@echo ""
@echo ""
@echo "If make fails on RedHat then first run:"
@echo ""
@echo "sudo yum install gcc-c++"
@echo ""
@echo "*****"
@echo ""
gb: vclean $(OBJS) main.o $(LIBFILES)
$(CC) $(DEFS) $(CPPFLAGS) -o $@ main.o $(OBJS) $(LIBS)
# use this for compiling on CYGWIN:
# only for 32bit cygwin right now and
# you have to install the packages that have these libs.
# you have to get these packages from cygwin:
# 1. LIBS > zlib-devel: Gzip de/compression library (development)
# 2. LIBS > libiconv: GNU character set conversion library and utlities
# 3. DEVEL > openssl: cygwin32-openssl: OpenSSL for Cygwin 32bit toolchain
# 3. NET > openssl: A general purpose cryptographt toolkit with TLS impl...
# 4. DEVEL > mingw-pthreads: Libpthread for MinGW.org Wind32 toolchain
# 5. DEVEL > gcc-g++: GNU Compiler Collection (C++)
# 6. DEVEL > make: The GNU version of the 'make' utility
# 7. DEVEL > git: Distributed version control system
# 8. EDITORS > emacs
cygwin:
make DEFS="-DCYGWIN -D_REENTRANT_ $(CHECKFORMATSTRING) -I." gb
make DEFS="-DCYGWIN -D_REENTRANT_ $(CHECKFORMATSTRING) -I." LIBS=" -lz -lm -lpthread -lssl -lcrypto -liconv" gb
gb32:
make CPPFLAGS="-m32 -g -Wall -pipe -fno-stack-protector -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -DPTHREADS -Wno-unused-but-set-variable -static" LIBS=" -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread " gb
@ -595,14 +613,14 @@ master-rpm:
# DEBIAN PACKAGE SECTION BEGIN
# need to do 'apt-get intall dh-make'
# need to do 'apt-get install dh-make'
# deb-master
master-deb:
master-deb32:
# need to change in changelog too!! dont' forget!!!
git archive --format=tar --prefix=gb-1.16/ master > ../gb_1.16.orig.tar
git archive --format=tar --prefix=gb-1.17/ master > ../gb_1.17.orig.tar
rm -rf debian
# change "-p gb_1.0" to "-p gb_1.1" to update version for example
dh_make -e gigablast@mail.com -p gb_1.16 -f ../gb_1.16.orig.tar
dh_make -s -e gigablast@mail.com -p gb_1.17 -f ../gb_1.17.orig.tar
# zero this out, it is just filed with the .txt files erroneously and it'll
# try to automatiicaly install in /usr/docs/
rm debian/docs
@ -621,16 +639,52 @@ master-deb:
# YOU HAVE TO RUN THIS before you run 'make'
# export LD_LIBRARY_PATH=./debian/gb/var/gigablast/data0
# build the package now
dpkg-buildpackage -nc -ai386 -ti386 -b -uc -rfakeroot
dpkg-buildpackage -j6 -nc -ai386 -ti386 -b -uc -rfakeroot
# move to current dur
mv ../gb_*.deb .
# upload den
scp gb*.deb gk268:/w/html/
# alien it
sudo alien --to-rpm gb_1.16-1_i386.deb
sudo alien --to-rpm gb_1.17-1_i386.deb
# upload rpm
scp gb*.rpm gk268:/w/html/
master-deb64:
# need to change in changelog too!! dont' forget!!!
git archive --format=tar --prefix=gb-1.17/ master > ../gb_1.17.orig.tar
rm -rf debian
# change "-p gb_1.0" to "-p gb_1.1" to update version for example
dh_make -s -e gigablast@mail.com -p gb_1.17 -f ../gb_1.17.orig.tar
# zero this out, it is just filed with the .txt files erroneously and it'll
# try to automatiicaly install in /usr/docs/
rm debian/docs
touch debian/docs
# make the debian/copyright file contain the license
cp copyright.head debian/copyright
# cat LICENSE | awk -Fxvcty '{print " "$1}' >> debian/copyright
cat LICENSE >> debian/copyright
cat copyright.tail >> debian/copyright
# the control file describes the package
cp control.deb debian/control
# try to use our own rules so we can override dh_shlibdeps and others
cp gb.deb.rules debian/rules
cp changelog debian/changelog
# fix dh_shlibdeps from bitching about dependencies on shared libs
# YOU HAVE TO RUN THIS before you run 'make'
# export LD_LIBRARY_PATH=./debian/gb/var/gigablast/data0
# build the package now
dpkg-buildpackage -nc -aamd64 -tamd64 -b -uc -rfakeroot
# move to current dur
mv ../gb_*.deb .
# upload den
scp gb*.deb gk268:/w/html/
# alien it
sudo alien --to-rpm gb_1.17-1_amd64.deb
# upload rpm
scp gb*.rpm gk268:/w/html/
#deb-testing
testing-deb:
git archive --format=tar --prefix=gb-1.5/ testing > ../gb_1.5.orig.tar

@ -510,7 +510,7 @@ bool Matches::set ( XmlDoc *xd ,
//tmpLen = utf16ToUtf8( tmp, 512, qw, qwLen );
int32_t tmpLen = qwLen;
if ( tmpLen > 500 ) tmpLen = 500;
memcpy ( tmp , qw , tmpLen );
gbmemcpy ( tmp , qw , tmpLen );
tmp[tmpLen] = '\0';
log(LOG_DEBUG,"query: term#=%"INT32" fieldLen=%"INT32":%s",i,tmpLen,tmp);
@ -1801,10 +1801,10 @@ int32_t MatchOffsets::serialize(char *buf, int32_t bufsize){
*(int32_t*) p = need; p += 4;
*(int32_t*) p = m_numMatches; p += 4;
*(int32_t*) p = m_numAlnums; p += 4;
memcpy(p, m_queryWords, m_numMatches); p += m_numMatches;
memcpy(p, m_matchOffsets, m_numMatches*4); p += m_numMatches*4;
gbmemcpy(p, m_queryWords, m_numMatches); p += m_numMatches;
gbmemcpy(p, m_matchOffsets, m_numMatches*4); p += m_numMatches*4;
*(int32_t*) p = m_numBreaks; p += 4;
memcpy(p, m_breakOffsets, m_numBreaks*4); p += m_numBreaks*4;
gbmemcpy(p, m_breakOffsets, m_numBreaks*4); p += m_numBreaks*4;
return p - buf;
}
@ -1821,10 +1821,10 @@ int32_t MatchOffsets::deserialize(char *buf, int32_t bufsize){
}
m_numMatches = *(int32_t*) p ; p += 4;
m_numAlnums = *(int32_t*) p ; p += 4;
memcpy(m_queryWords, p, m_numMatches); p += m_numMatches;
memcpy(m_matchOffsets, p, m_numMatches*4); p += m_numMatches*4;
gbmemcpy(m_queryWords, p, m_numMatches); p += m_numMatches;
gbmemcpy(m_matchOffsets, p, m_numMatches*4); p += m_numMatches*4;
m_numBreaks = *(int32_t*) p ; p += 4;
memcpy(m_breakOffsets, p, m_numBreaks*4); p += m_numBreaks*4;
gbmemcpy(m_breakOffsets, p, m_numBreaks*4); p += m_numBreaks*4;
return p - buf;

29
Mem.cpp

@ -270,6 +270,7 @@ void * operator new (size_t size) throw (std::bad_alloc) {
newmemloop:
//void *mem = s_pool.malloc ( size );
if ( ! mem && size > 0 ) {
g_mem.m_outOfMems++;
g_errno = errno;
log("mem: new(%"INT32"): %s",(int32_t)size,mstrerror(g_errno));
//if ( unlock ) mutexUnlock();
@ -358,6 +359,7 @@ newmemloop:
//void *mem = s_pool.malloc ( size );
if ( ! mem && size > 0 ) {
g_errno = errno;
g_mem.m_outOfMems++;
log("mem: new(%"UINT32"): %s",
(uint32_t)size, mstrerror(g_errno));
//if ( unlock ) mutexUnlock();
@ -414,6 +416,8 @@ Mem::Mem() {
m_stackStart = NULL;
// shared mem used
m_sharedUsed = 0LL;
// count how many allocs/news failed
m_outOfMems = 0;
}
Mem::~Mem() {
@ -706,7 +710,7 @@ void Mem::addMem ( void *mem , int32_t size , const char *note , char isnew ) {
int32_t len = gbstrlen(note);
if ( len > 15 ) len = 15;
char *here = &s_labels [ h * 16 ];
memcpy ( here , note , len );
gbmemcpy ( here , note , len );
// make sure NULL terminated
here[len] = '\0';
// unlock for threads
@ -897,7 +901,7 @@ bool Mem::lblMem( void *mem, int32_t size, const char *note ) {
int32_t len = gbstrlen(note);
if ( len > 15 ) len = 15;
char *here = &s_labels [ h * 16 ];
memcpy ( here , note , len );
gbmemcpy ( here , note , len );
// make sure NULL terminated
here[len] = '\0';
val = true;
@ -1058,7 +1062,7 @@ bool Mem::rmMem ( void *mem , int32_t size , const char *note ) {
s_mptrs[k] = (void *)mem;
s_sizes[k] = s_sizes[h];
s_isnew[k] = s_isnew[h];
memcpy(&s_labels[k*16],&s_labels[h*16],16);
gbmemcpy(&s_labels[k*16],&s_labels[h*16],16);
// try next bucket now
h++;
// wrap if we need to
@ -1373,6 +1377,7 @@ void *Mem::gbmalloc ( int size , const char *note ) {
int32_t memLoop = 0;
mallocmemloop:
if ( ! mem && size > 0 ) {
g_mem.m_outOfMems++;
// try to free temp mem. returns true if it freed some.
if ( freeCacheMem() ) goto retry;
g_errno = errno;
@ -1502,7 +1507,7 @@ void *Mem::gbrealloc ( void *ptr , int oldSize , int newSize ,
mem = (char *)mmalloc ( newSize , note );
if ( ! mem ) return NULL;
// copy over to it
memcpy ( mem , ptr , oldSize );
gbmemcpy ( mem , ptr , oldSize );
// free the old
mfree ( ptr , oldSize , note );
// done
@ -1535,16 +1540,17 @@ void *Mem::gbrealloc ( void *ptr , int oldSize , int newSize ,
mem = (char *)mmalloc ( newSize , note );
// bail on error
if ( ! mem ) {
g_mem.m_outOfMems++;
// restore the original buf we tried to grow
addMem ( ptr , oldSize , note , 0 );
errno = g_errno = ENOMEM;
return NULL;
}
// log a note
log(LOG_INFO,"mem: had to use malloc/memcpy instead of "
log(LOG_INFO,"mem: had to use malloc/gbmemcpy instead of "
"realloc.");
// copy over to it
memcpy ( mem , ptr , oldSize );
gbmemcpy ( mem , ptr , oldSize );
// we already called rmMem() so don't double call
sysfree ( (char *)ptr - UNDERPAD );
// free the old. this was coring because it was double calling rmMem()
@ -1556,7 +1562,7 @@ void *Mem::gbrealloc ( void *ptr , int oldSize , int newSize ,
char *Mem::dup ( const void *data , int32_t dataSize , const char *note ) {
// keep it simple
char *mem = (char *)mmalloc ( dataSize , note );
if ( mem ) memcpy ( mem , data , dataSize );
if ( mem ) gbmemcpy ( mem , data , dataSize );
return mem;
}
@ -1814,7 +1820,9 @@ void memset_nice( register void *dest , register const char c , int32_t len ,
// . TODO: avoid byteCopy by copying remnant bytes
// . ass = async signal safe, dumb ass
// . NOTE: src/dest should not overlap in this version of memcpy
// . NOTE: src/dest should not overlap in this version of gbmemcpy
// . MDW: i replaced this is a #define bcopy in gb-include.h
/*
void memcpy_ass ( register void *dest2, register const void *src2, int32_t len ) {
// for now keep it simple!!
len--;
@ -1822,9 +1830,10 @@ void memcpy_ass ( register void *dest2, register const void *src2, int32_t len )
((char *)dest2)[len] = ((char *)src2)[len];
len--;
}
*/
/*
// debug test
//memcpy ( dest2 , src2 , len );
//gbmemcpy ( dest2 , src2 , len );
//return;
// the end for the fast copy by word with partially unrolled loop
register int32_t *dest = (int32_t *)dest2;
@ -1857,7 +1866,7 @@ void memcpy_ass ( register void *dest2, register const void *src2, int32_t len )
len--;
while ( len >= 0 ) { dest2[len] = src2[len]; len--; }
*/
}
//}
// Check the current stack usage
int32_t Mem::checkStackSize() {

5
Mem.h

@ -52,7 +52,7 @@ uint64_t getHighestLitBitValueLL ( uint64_t bits ) ;
uint32_t reverseBits ( uint32_t x ) ;
// async signal safe functions
void memcpy_ass ( register void *dest , register const void *src , int32_t len ) ;
//void memcpy_ass ( register void *dest , register const void *src , int32_t len ) ;
void memset_ass ( register void *dst , register const char c , int32_t len ) ;
void memset_nice ( register void *dst , register const char c , int32_t len ,
int32_t niceness ) ;
@ -169,6 +169,9 @@ class Mem {
// currently used mem (estimate)
int64_t m_used;
// count how many allocs/news failed
int32_t m_outOfMems;
int32_t m_numAllocated;
int64_t m_numTotalAllocated;
uint32_t m_memtablesize;

@ -1411,7 +1411,7 @@ void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
continue;
}
// store it
memcpy (dst , rec , LDBKS );
gbmemcpy (dst , rec , LDBKS );
dst += LDBKS;
// update it
lastIp32 = ip32;

@ -143,13 +143,13 @@ bool Msg1::addList ( RdbList *list ,
goto skip;
}
// steal the list, we don't want caller to free it
memcpy ( &Y->m_ourList , list , sizeof(RdbList) );
gbmemcpy ( &Y->m_ourList , list , sizeof(RdbList) );
QUICKPOLL(niceness);
// if list is small enough use our buf
if ( ! list->m_ownData && list->m_listSize <= MSG1_BUF_SIZE ) {
memcpy ( Y->m_buf , list->m_list , list->m_listSize );
gbmemcpy ( Y->m_buf , list->m_list , list->m_listSize );
Y->m_ourList.m_list = Y->m_buf;
Y->m_ourList.m_listEnd = Y->m_buf + list->m_listSize;
Y->m_ourList.m_alloc = NULL;
@ -525,7 +525,7 @@ skip:
if ( m_injecting ) *p |= 0x80;
p++;
// then collection name
//memcpy ( p , m_coll , collLen );
//gbmemcpy ( p , m_coll , collLen );
//p += collLen;
//*p++ = '\0';
*(collnum_t *)p = m_collnum;
@ -540,7 +540,7 @@ skip:
//if ( m_deleteRecs ) request[1] |= 0x80;
//if ( m_overwriteRecs ) request[1] |= 0x40;
// store the list after coll
memcpy ( p , listData , listSize );
gbmemcpy ( p , listData , listSize );
QUICKPOLL(m_niceness);
// debug msg
//if ( ! m_waitForReply ) // (m_rdbId == RDB_SPIDERDB ||

@ -534,7 +534,8 @@ void handleRequest13 ( UdpSlot *slot , int32_t niceness ) {
url.set ( r->ptr_url );
// . never download /master urls from ips of hosts in cluster
// . TODO: FIX! the pages might be in another cluster!
if ( ( strncasecmp ( url.getPath() , "/master/" , 8 ) == 0 ||
// . pages are now /admin/* not any /master/* any more.
if ( ( //strncasecmp ( url.getPath() , "/master/" , 8 ) == 0 ||
strncasecmp ( url.getPath() , "/admin/" , 7 ) == 0 )) {
log("spider: Got request to download possible "
"gigablast control page %s. Sending back "
@ -989,7 +990,8 @@ void downloadTheDocForReals3b ( Msg13Request *r ) {
if ( r->m_proxyIp ) {
char tmpIp[64];
sprintf(tmpIp,"%s",iptoa(r->m_urlIp));
log("sproxy: got proxy %s:%"INT32" and agent=\"%s\" to spider "
log(LOG_INFO,
"sproxy: got proxy %s:%"INT32" and agent=\"%s\" to spider "
"%s %s (numBannedProxies=%"INT32")",
iptoa(r->m_proxyIp),
(int32_t)r->m_proxyPort,
@ -1465,9 +1467,9 @@ void gotHttpReply2 ( void *state ,
// if too big, forget it! otherwise we breach tmpBuf
if ( loc && locLen > 0 && locLen < 1024 ) {
p += sprintf ( p , "Location: " );
memcpy ( p , loc , locLen );
gbmemcpy ( p , loc , locLen );
p += locLen;
memcpy ( p , "\r\n", 2 );
gbmemcpy ( p , "\r\n", 2 );
p += 2;
}
// close it up
@ -1481,7 +1483,7 @@ void gotHttpReply2 ( void *state ,
bytesOutPtr = &g_stats.m_compressMimeErrorBytesOut;
// only replace orig reply if we are smaller
if ( newSize < replySize ) {
memcpy ( reply , tmpBuf , newSize );
gbmemcpy ( reply , tmpBuf , newSize );
replySize = newSize;
}
// reset content hash
@ -2221,7 +2223,7 @@ int32_t convertIntoLinks ( char *reply ,
// use this to ensure we do not breach
char *dstEnd = reply + replySize;
// . store into the new buffer
// . use memcpy() because it deal with potential overlap issues
// . use gbmemcpy() because it deal with potential overlap issues
char *dst = reply;
// store the thing first
if ( dst + 100 >= dstEnd )
@ -2240,7 +2242,7 @@ int32_t convertIntoLinks ( char *reply ,
char *content = dst;
// this tells xmldoc.cpp what's up
//memcpy ( dst , "<!--links-->\n", 13 );
//gbmemcpy ( dst , "<!--links-->\n", 13 );
//dst += 13;
// iterate over the links
for ( int32_t i = 0 ; i < links.m_numLinks ; i++ ) {
@ -2253,13 +2255,13 @@ int32_t convertIntoLinks ( char *reply ,
// ensure no breach. if so, return now
if ( dst + len + 2 > dstEnd ) return dst - reply;
// lead it
memcpy ( dst, "<a href=", 8 );
gbmemcpy ( dst, "<a href=", 8 );
dst += 8;
// copy over, should be ok with overlaps
memcpy ( dst , str , len );
gbmemcpy ( dst , str , len );
dst += len;
// end tag and line
memcpy ( dst , "></a>\n", 6 );
gbmemcpy ( dst , "></a>\n", 6 );
dst += 6;
}
// null term it!
@ -2489,14 +2491,14 @@ int32_t filterRobotsTxt ( char *reply ,
if ( ! *s || agent ) {
// this is a problem... if somehow its got a smaller
// mime than us, we can't let our new mime overwrite
// the user-agent line we were going to memcpy()
// the user-agent line we were going to gbmemcpy()
if ( reply + 16 > agent ) return replySize;
if ( dst == reply ) {
memcpy ( dst , "HTTP/1.0 200\r\n\r\n", 16 );
gbmemcpy ( dst , "HTTP/1.0 200\r\n\r\n", 16 );
dst += 16;
}
// store the user-agent and following allows/disallows
memcpy ( dst, agent , start - agent );
gbmemcpy ( dst, agent , start - agent );
dst += ( start - agent );
// restart
agent = NULL;
@ -2846,7 +2848,8 @@ bool addToHammerQueue ( Msg13Request *r ) {
// which will store maybe a -1 if currently downloading...
if ( queueIt ) {
// debug
log("spider: adding %s to crawldelayqueue cd=%"INT32"ms",
log(LOG_INFO,
"spider: adding %s to crawldelayqueue cd=%"INT32"ms",
r->ptr_url,crawlDelayMS);
// save this
//r->m_udpSlot = slot; // this is already saved!
@ -3010,7 +3013,7 @@ void stripProxyAuthorization ( char *squidProxiedReqBuf ) {
loop:
// include space so it won't match anything in url
char *needle = "Proxy-Authorization: ";
char *s = strcasestr ( squidProxiedReqBuf , needle );
char *s = gb_strcasestr ( squidProxiedReqBuf , needle );
if ( ! s ) return;
// find next \r\n
char *end = strstr ( s , "\r\n");
@ -3021,7 +3024,7 @@ void stripProxyAuthorization ( char *squidProxiedReqBuf ) {
int32_t reqLen = gbstrlen(squidProxiedReqBuf);
char *reqEnd = squidProxiedReqBuf + reqLen;
// include \0, so add +1
memcpy ( s ,end , reqEnd-end + 1);
gbmemcpy ( s ,end , reqEnd-end + 1);
// bury more of them
goto loop;
}
@ -3055,7 +3058,7 @@ void fixGETorPOST ( char *squidProxiedReqBuf ) {
// bury the http://xyz.com part now
char *reqEnd = squidProxiedReqBuf + gbstrlen(squidProxiedReqBuf);
// include the terminating \0, so add +1
memcpy ( httpStart , s , reqEnd - s + 1 );
gbmemcpy ( httpStart , s , reqEnd - s + 1 );
// now make HTTP/1.1 into HTTP/1.0
char *hs = strstr ( httpStart , "HTTP/1.1" );
if ( ! hs ) return;

@ -149,7 +149,7 @@ bool Msg17::getFromCache ( char cacheId,
*p++ = m_cacheId;
// the flag (0 means read request, 1 means store request)
*p++ = 0;
memcpy ( p , &collnum, sizeof(collnum_t)); p += sizeof(collnum_t);
gbmemcpy ( p , &collnum, sizeof(collnum_t)); p += sizeof(collnum_t);
//strcpy ( p , coll ); p += gbstrlen ( coll ) + 1;
// . send the request to the key host
// . this returns false and sets g_errno on error
@ -366,7 +366,7 @@ void handleRequest17 ( UdpSlot *slot , int32_t niceness ) {
char *x = buf;
*(int32_t *)x = cachedTimeDelta; x += 4;
memcpy ( x , rec , recSize );
gbmemcpy ( x , rec , recSize );
// . set the msg40 from the cached record
// . UdpServer should free "rec" when he's done sending it
@ -450,7 +450,7 @@ bool Msg17::storeInCache ( char cacheId ,
*p++ = 1;
//char *coll = si->m_coll;
//strcpy ( p , coll ); p += gbstrlen(coll) + 1; // includes '\0'
memcpy ( p ,&collnum ,sizeof(collnum_t)); p += sizeof(collnum_t);
gbmemcpy ( p ,&collnum ,sizeof(collnum_t)); p += sizeof(collnum_t);
QUICKPOLL(niceness);
@ -494,7 +494,7 @@ bool Msg17::storeInCache ( char cacheId ,
// bail if not enough room!
if ( recSize > pend - p ) return true;
// otheriwse, store it
memcpy ( p, recPtr, recSize );
gbmemcpy ( p, recPtr, recSize );
// advance p by how many bytes we stored into "p"
p += recSize;
}

@ -94,7 +94,7 @@ bool Msg20::registerHandler ( ) {
// copy "src" to ourselves
void Msg20::copyFrom ( Msg20 *src ) {
memcpy ( this , src , sizeof(Msg20) );
gbmemcpy ( this , src , sizeof(Msg20) );
// if the Msg20Reply was actually in src->m_replyBuf[] we have to
// re-serialize into our this->m_replyBuf[] in order for the ptrs
// to be correct
@ -639,7 +639,7 @@ char *Msg20Request::serialize ( int32_t *retSize ,
*retSize = need;
// copy the easy stuff
char *p = buf;
memcpy ( p , (char *)this , sizeof(Msg20Request) );
gbmemcpy ( p , (char *)this , sizeof(Msg20Request) );
p += (int32_t)sizeof(Msg20Request);
// then store the strings!
int32_t *sizePtr = &size_qbuf;
@ -650,7 +650,7 @@ char *Msg20Request::serialize ( int32_t *retSize ,
if ( p > *strPtr && p < *strPtr + *sizePtr ) {
char *xx = NULL; *xx = 0; }
// copy the string into the buffer
memcpy ( p , *strPtr , *sizePtr );
gbmemcpy ( p , *strPtr , *sizePtr );
// advance our destination ptr
p += *sizePtr;
// advance both ptrs to next string
@ -700,7 +700,7 @@ int32_t Msg20Reply::getStoredSize ( ) {
int32_t Msg20Reply::serialize ( char *buf , int32_t bufSize ) {
// copy the easy stuff
char *p = buf;
memcpy ( p , (char *)this , sizeof(Msg20Reply) );
gbmemcpy ( p , (char *)this , sizeof(Msg20Reply) );
p += (int32_t)sizeof(Msg20Reply);
// then store the strings!
int32_t *sizePtr = &size_tbuf;
@ -711,7 +711,7 @@ int32_t Msg20Reply::serialize ( char *buf , int32_t bufSize ) {
// sometimes the ptr is NULL but size is positive
// so watch out for that
if ( *strPtr ) {
memcpy ( p , *strPtr , *sizePtr );
gbmemcpy ( p , *strPtr , *sizePtr );
// advance our destination ptr
p += *sizePtr;
}

11
Msg20.h

@ -654,10 +654,13 @@ class Msg20 {
// link we can highlight the relevant event sections.
//EventIdBits m_eventIdBits;
int32_t getStoredSize ( ) { return m_r->getStoredSize(); };
int32_t getStoredSize ( ) {
if ( ! m_r ) return 0;
return m_r->getStoredSize(); };
// . return how many bytes we serialize into "buf"
// . sets g_errno and returns -1 on error
int32_t serialize ( char *buf , int32_t bufSize ) {
if ( ! m_r ) return 0;
return m_r->serialize ( buf , bufSize ); };
// . this is destructive on the "buf". it converts offs to ptrs
// . sets m_r to the modified "buf" when done
@ -667,9 +670,9 @@ class Msg20 {
// to keep the size of the cached Msg40 down, we do not cache certain
// things. so we have to "clear" these guys out before caching.
//void clearBigSample () { m_r->clearBigSample(); };
void clearOutlinks () { m_r->clearOutlinks (); };
void clearLinks () { m_r->clearOutlinks (); };
void clearVectors () { m_r->clearVectors (); };
void clearOutlinks () { if ( m_r ) m_r->clearOutlinks (); };
void clearLinks () { if ( m_r ) m_r->clearOutlinks (); };
void clearVectors () { if ( m_r ) m_r->clearVectors (); };
// copy "src" to ourselves
void copyFrom ( class Msg20 *src ) ;

@ -589,7 +589,7 @@ void handleRequest22 ( UdpSlot *slot , int32_t netnice ) {
// then dataSize
*(int32_t *)p = dataSize; p += 4;
// then the data
memcpy ( p , data , dataSize ); p += dataSize;
gbmemcpy ( p , data , dataSize ); p += dataSize;
// send off the record
us->sendReply_ass (buf, need,buf, need,slot);
// don't forget to free the state
@ -976,7 +976,7 @@ void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) {
// otherwise, alloc space for the reply
reply = (char *)mmalloc (recSize, "Msg22");
if ( ! reply ) goto hadError;
memcpy ( reply , rec , recSize );
gbmemcpy ( reply , rec , recSize );
}
// otherwise we send back the whole list!
else {

@ -225,17 +225,17 @@ bool Msg24::generateTopics ( char *coll ,
//*(int32_t *)p = maxWordsPerPhrase ; p += 4;
// store topic group information
*(int32_t *)p = numTopicGroups; p += 4;
memcpy ( p , topicGroups , size ); p += size;
gbmemcpy ( p , topicGroups , size ); p += size;
// then coll
memcpy ( p , coll , collLen ); p += collLen ;
gbmemcpy ( p , coll , collLen ); p += collLen ;
*p++ = '\0';
// then query
memcpy ( p , query , queryLen ); p += queryLen;
gbmemcpy ( p , query , queryLen ); p += queryLen;
*p++ = '\0';
// then docids
memcpy ( p , docIds , numDocIds * 8 ); p += numDocIds * 8;
gbmemcpy ( p , docIds , numDocIds * 8 ); p += numDocIds * 8;
// then cluster levels
memcpy ( p , clusterLevels , numDocIds ); p += numDocIds ;
gbmemcpy ( p , clusterLevels , numDocIds ); p += numDocIds ;
// how big is it?
//m_requestSize = p - m_request;
// sanity check
@ -393,14 +393,14 @@ void handleRequest24 ( UdpSlot *slot , int32_t netnice ) {
// get topic group information
st->m_numTopicGroups = *(int32_t *)p ; p += 4;
int32_t size = sizeof(TopicGroup) * st->m_numTopicGroups ;
memcpy ( st->m_topicGroups , p , size ); p += size;
gbmemcpy ( st->m_topicGroups , p , size ); p += size;
// then coll
st->m_coll = p; p += strlen(p) + 1;
// . then the query, a NULL terminated string
// . store it in state
int32_t qlen = strlen ( p );
if ( qlen > MAX_QUERY_LEN ) qlen = MAX_QUERY_LEN;
memcpy ( st->m_query , p , qlen );
gbmemcpy ( st->m_query , p , qlen );
st->m_query [ qlen ] = '\0';
st->m_queryLen = qlen;
p += qlen + 1;
@ -459,7 +459,7 @@ void handleRequest24 ( UdpSlot *slot , int32_t netnice ) {
int32_t tlen = strlen ( t->m_meta );
if ( p + tlen + 1 >= pend ) break;
if ( i > 0 ) *p++ = ' ';
memcpy ( p , t->m_meta , tlen );
gbmemcpy ( p , t->m_meta , tlen );
p += tlen;
}
//int32_t dbufLen = p - dbuf;
@ -1040,7 +1040,7 @@ bool getTopics ( State24 *st ,
int32_t len = master->getTermLen(i);
char ff[1024];
if ( len > 1020 ) len = 1020;
memcpy ( ff , ptr , len );
gbmemcpy ( ff , ptr , len );
ff[len] = '\0';
// we can have html entities in here now
//if ( ! is_alnum(ff[0]) ) { char *xx = NULL; *xx = 0; }
@ -1795,7 +1795,7 @@ bool getTopics ( State24 *st ,
else ppops [j] = 0;
ndocids [j] = 0;
dptrs [j] = NULL; // dummy placeholder
memcpy ( ptext , ptrs[i] , lens[i] ); ptext += lens[i];
gbmemcpy ( ptext , ptrs[i] , lens[i] ); ptext += lens[i];
//if ( hashes && j < GIGABITS_IN_VECTOR )
// hashes[j] = hash32Lower (ptrs[i],lens[i]);
*ptext++ = '\0';
@ -1857,7 +1857,7 @@ bool getTopics ( State24 *st ,
*(int32_t *)p = scores[i]; p += 4;
*(int32_t *)p = lens [i]; p += 4;
*(char *)p = gid ; p += 1;
memcpy ( p , ptrs[i] , lens[i] ); p += lens[i];
gbmemcpy ( p , ptrs[i] , lens[i] ); p += lens[i];
*p++ = '\0';
}
*/
@ -3129,25 +3129,25 @@ int32_t Msg24::serialize ( char *buf , int32_t bufLen ) {
for ( int32_t i = 0 ; i < m_numTopics ; i++ ) {
*(int32_t *)p = m_topicPtrs[i] - base; p += 4; }
// then the scores
memcpy ( p , m_topicScores , m_numTopics * 4 ); p += m_numTopics * 4;
memcpy ( p , m_topicLens , m_numTopics * 4 ); p += m_numTopics * 4;
memcpy ( p , m_topicNumDocIds, m_numTopics * 4 ); p += m_numTopics * 4;
gbmemcpy ( p , m_topicScores , m_numTopics * 4 ); p += m_numTopics * 4;
gbmemcpy ( p , m_topicLens , m_numTopics * 4 ); p += m_numTopics * 4;
gbmemcpy ( p , m_topicNumDocIds, m_numTopics * 4 ); p += m_numTopics * 4;
// these m_topicDocIds, are just essentially placeholders for ptrs
// to the docids, just like the topic ptrs above, but these call all
// be NULL if we didn't get back the list of docids for each gigabit
p += m_numTopics * 4;
// then the popularity rating of each topic
memcpy ( p , m_topicPops , m_numTopics * 4 ); p += m_numTopics * 4;
memcpy ( p , m_topicGids , m_numTopics ); p += m_numTopics;
gbmemcpy ( p , m_topicPops , m_numTopics * 4 ); p += m_numTopics * 4;
gbmemcpy ( p , m_topicGids , m_numTopics ); p += m_numTopics;
// then the text
for ( int32_t i = 0 ; i < m_numTopics ; i++ ) {
memcpy ( p , m_topicPtrs[i] , m_topicLens[i] ) ;
gbmemcpy ( p , m_topicPtrs[i] , m_topicLens[i] ) ;
p += m_topicLens[i];
*p++ = '\0';
}
// and one array of docids per topic
for ( int32_t i = 0 ; i < m_numTopics ; i++ ) {
memcpy ( p , m_topicDocIds[i] , m_topicNumDocIds[i] * 8 );
gbmemcpy ( p , m_topicDocIds[i] , m_topicNumDocIds[i] * 8 );
p += m_topicNumDocIds[i] * 8;
// sanity check
//for ( int32_t k = 0 ; k < m_topicNumDocIds[i] ; k++ )
@ -3284,13 +3284,13 @@ bool Msg24::generateTopicsLocal ( char *coll ,
st.m_numRequests = numMsg20s;
st.m_numReplies = numMsg20s;
memcpy ( st.m_query , query , queryLen );
gbmemcpy ( st.m_query , query , queryLen );
st.m_query [ queryLen ] = '\0';
st.m_queryLen = queryLen;
st.m_qq.set ( st.m_query , st.m_queryLen , NULL , 0, 2 , true );
st.m_numTopicGroups = m_numTopicGroups;
memcpy(st.m_topicGroups, m_topicGroups,
gbmemcpy(st.m_topicGroups, m_topicGroups,
sizeof(TopicGroup) * m_numTopicGroups);
st.m_maxCacheAge = 0;
st.m_addToCache = false;

@ -112,10 +112,10 @@ int32_t Msg2b::serialize ( char *buf, int32_t bufLen ) {
*(int32_t *)p = m_numSubCats; p += sizeof(int32_t);
*(int32_t *)p = m_catBufferLen; p += sizeof(int32_t);
// sub cats
memcpy(p, m_subCats, sizeof(SubCategory)*m_numSubCats);
gbmemcpy(p, m_subCats, sizeof(SubCategory)*m_numSubCats);
p += sizeof(SubCategory)*m_numSubCats;
// cat buffer
memcpy(p, m_catBuffer, m_catBufferLen);
gbmemcpy(p, m_catBuffer, m_catBufferLen);
p += m_catBufferLen;
// sanity check
if (p - buf != storedSize) {

@ -578,7 +578,7 @@ bool Msg3::readList ( char rdbId ,
maps[fn]->getKey ( pn , tmpKey );
if ( KEYCMP(tmpKey,lastTmpKey,m_ks) == 0 )
ccount++;
memcpy(lastTmpKey,tmpKey,m_ks);
gbmemcpy(lastTmpKey,tmpKey,m_ks);
}
}
if ( ccount > 10 ) {

@ -35,12 +35,12 @@ bool Msg30::update ( CollectionRec *rec ,
if ( deleteIt ) {
// include the terminating \0
m_sendBufSize = gbstrlen ( rec->m_coll ) + 1;
memcpy ( m_sendBuf , rec->m_coll , m_sendBufSize );
gbmemcpy ( m_sendBuf , rec->m_coll , m_sendBufSize );
}
else {
// serialize the rec into m_sendBuf
m_sendBufSize = sizeof(CollectionRec);
memcpy ( m_sendBuf , rec , sizeof(CollectionRec) );
gbmemcpy ( m_sendBuf , rec , sizeof(CollectionRec) );
}
// reset some parms
m_requests = 0;
@ -129,7 +129,7 @@ void handleRequest30 ( UdpSlot *slot , int32_t niceness ) {
return;
}
// set to what it should be
memcpy ( nr , cr , sizeof(CollectionRec) );
gbmemcpy ( nr , cr , sizeof(CollectionRec) );
// always return a reply immediately, even though list not loaded yet
g_udpServer.sendReply_ass ( NULL , 0 , NULL , 0 , slot );
}

@ -275,6 +275,16 @@ void Msg39::getDocIds2 ( Msg39Request *req ) {
m_phase = 0;
// if ( m_r->m_docsToGet <= 0 ) {
// estimateHitsAndSendReply ( );
// return;
// }
// if ( m_tmpq.m_numTerms <= 0 ) {
// estimateHitsAndSendReply ( );
// return;
// }
// . otherwise, to prevent oom, split up docids into ranges
// and get winners of each range.
//if ( ! doDocIdSplitLoop() ) return;
@ -494,7 +504,7 @@ bool Msg39::doDocIdSplitLoop ( ) {
// BUT only do this if we are in a "full split" config, because that
// way we can guarantee all clusterdb recs are local (on this host)
// and should be in the page cache. the page cache should do ultra
// quick lookups and no memcpy()'s for this operation. it should
// quick lookups and no gbmemcpy()'s for this operation. it should
// be <<1ms to lookup thousands of docids.
// . when doing innerLoopSiteClustering we always use top tree now
// because our number of "top docids" can be somewhat unpredictably
@ -645,7 +655,7 @@ bool Msg39::getLists () {
if ( ttlen > 254 ) ttlen = 254;
if ( ttlen < 0 ) ttlen = 0;
// old:painful: convert each term from unicode to ascii
memcpy ( tt , m_tmpq.getTerm(i) , ttlen );
gbmemcpy ( tt , m_tmpq.getTerm(i) , ttlen );
*/
int32_t isSynonym = 0;
QueryTerm *st = qt->m_synonymOf;
@ -1132,7 +1142,7 @@ bool Msg39::addedLists ( ) {
// BUT only do this if we are in a "full split" config, because that
// way we can guarantee all clusterdb recs are local (on this host)
// and should be in the page cache. the page cache should do ultra
// quick lookups and no memcpy()'s for this operation. it should
// quick lookups and no gbmemcpy()'s for this operation. it should
// be <<1ms to lookup thousands of docids.
// . when doing innerLoopSiteClustering we always use top tree now
// because our number of "top docids" can be somewhat unpredictably
@ -1468,7 +1478,8 @@ void Msg39::estimateHitsAndSendReply ( ) {
*(int64_t *)p = qt->m_termId;
p += 8;
int32_t used = ft->getNumSlotsUsed();
if ( used > (int32_t)MAX_FACETS ) used = (int32_t)MAX_FACETS;
if ( used > (int32_t)MAX_FACETS )
used = (int32_t)MAX_FACETS;
// store count
*(int32_t *)p = used;
p += 4;
@ -1487,7 +1498,13 @@ void Msg39::estimateHitsAndSendReply ( ) {
// lookup the text of the facet in Msg40.cpp
FacetEntry *fe;
fe = (FacetEntry *)ft->getValFromSlot(k);
memcpy ( p , fe , sizeof(FacetEntry) );
// sanity
// no, count can be zero if its a range facet
// that was never added to. we add those
// empty FaceEntries only for range facets
// in Posdb.cpp
//if(fe->m_count == 0 ) { char *xx=NULL;*xx=0;}
gbmemcpy ( p , fe , sizeof(FacetEntry) );
p += sizeof(FacetEntry);
// do not breach
if ( ++count >= (int32_t)MAX_FACETS ) break;
@ -1577,7 +1594,7 @@ void Msg39::estimateHitsAndSendReply ( ) {
if ( docCount <= 50 ) m_topScore50 = t->m_score;
if ( m_debug ) {
log(LOG_DEBUG,"query: msg39: [%"PTRFMT"] "
logf(LOG_DEBUG,"query: msg39: [%"PTRFMT"] "
"%03"INT32") docId=%012"UINT64" sum=%.02f",
(PTRTYPE)this, docCount,
t->m_docId,t->m_score);

@ -915,6 +915,8 @@ bool Msg3a::mergeLists ( ) {
key_t *ksPtr [MAX_SHARDS];
int64_t *diEnd [MAX_SHARDS];
for ( int32_t j = 0; j < m_numHosts ; j++ ) {
// how does this happen?
if ( j >= MAX_SHARDS ) { char *xx=NULL;*xx=0; }
Msg39Reply *mr =m_reply[j];
// if we have gbdocid:| in query this could be NULL
if ( ! mr ) {
@ -1012,6 +1014,12 @@ bool Msg3a::mergeLists ( ) {
"termid %"UINT64" for facet",termId);
break;
}
bool isFloat = false;
bool isInt = false;
if ( qt->m_fieldCode == FIELD_GBFACETFLOAT ) isFloat = true;
if ( qt->m_fieldCode == FIELD_GBFACETINT ) isInt = true;
// the end point
char *pend = p + ((4+sizeof(FacetEntry)) * nh);
// int16_tcut
@ -1035,16 +1043,43 @@ bool Msg3a::mergeLists ( ) {
fe2 = (FacetEntry *)ft->getValue ( &facetValue );
if ( ! fe2 ) {
ft->addKey ( &facetValue,fe );
continue;
}
else {
fe2->m_count += fe->m_count;
// prefer docid kinda randomly to balance
// lookupFacets() load in Msg40.cpp
if ( rand() % 2 )
fe2->m_docId = fe->m_docId;
fe2->m_count += fe->m_count;
// prefer docid kinda randomly to balance
// lookupFacets() load in Msg40.cpp
if ( rand() % 2 )
fe2->m_docId = fe->m_docId;
if ( isFloat ) {
// accumulate sum as double
double sum1 = *((double *)&fe ->m_sum);
double sum2 = *((double *)&fe2->m_sum);
sum2 += sum1;
*((double *)&fe2->m_sum) = sum2;
// and min/max as floats
float min1 = *((float *)&fe ->m_min);
float min2 = *((float *)&fe2->m_min);
if ( min1 < min2 ) min2 = min1;
*((float *)&fe2->m_min) = min2;
float max1 = *((float *)&fe ->m_max);
float max2 = *((float *)&fe2->m_max);
if ( max1 > max2 ) max2 = max1;
*((float *)&fe2->m_max) = max2;
}
if ( isInt ) {
fe2->m_sum += fe->m_sum;
if ( fe->m_min < fe2->m_min )
fe2->m_min = fe->m_min;
if ( fe->m_max > fe2->m_max )
fe2->m_max = fe->m_max;
}
}
// now get the next gbfacet: term if there was one
if ( p < last ) goto ploop;
}
@ -1316,12 +1351,12 @@ int32_t Msg3a::serialize ( char *buf , char *bufEnd ) {
// estimated # of total hits
*(int32_t *)p = m_numTotalEstimatedHits; p += 8;
// store each docid, 8 bytes each
memcpy ( p , m_docIds , m_numDocIds * 8 ); p += m_numDocIds * 8;
gbmemcpy ( p , m_docIds , m_numDocIds * 8 ); p += m_numDocIds * 8;
// store scores
memcpy ( p , m_scores , m_numDocIds * sizeof(double) );
gbmemcpy ( p , m_scores , m_numDocIds * sizeof(double) );
p += m_numDocIds * sizeof(double) ;
// store cluster levels
memcpy ( p , m_clusterLevels , m_numDocIds ); p += m_numDocIds;
gbmemcpy ( p , m_clusterLevels , m_numDocIds ); p += m_numDocIds;
// sanity check
if ( p > pend ) { char *xx = NULL ; *xx = 0; }
// return how much we did

@ -699,6 +699,12 @@ bool Msg4::addMetaList2 ( ) {
// . point to next record
// . will point past records if no more left!
m_currentPtr = p; // += recSize;
// debug log
// int off = (int)(m_currentPtr-m_metaList);
// log("msg4: cpoff=%i",off);
// if ( off == 5271931 )
// log("msg4: hey");
// debug
// get next rec
continue;
}
@ -768,7 +774,7 @@ bool storeRec ( collnum_t collnum ,
if(s_hostBufs[hostId]) {
//if the old buf was too small, resize
memcpy( buf, s_hostBufs[hostId],
gbmemcpy( buf, s_hostBufs[hostId],
*(int32_t*)(s_hostBufs[hostId]));
mfree ( s_hostBufs[hostId],
s_hostBufSizes[hostId] , "Msg4b" );
@ -812,7 +818,7 @@ bool storeRec ( collnum_t collnum ,
*(collnum_t *)p = collnum; p += sizeof(collnum_t);
*(char *)p = rdbId ; p += 1;
*(int32_t *)p = recSize; p += 4;
memcpy ( p , rec , recSize ); p += recSize;
gbmemcpy ( p , rec , recSize ); p += recSize;
// update buffer used
*(int32_t *)buf = used + (p - start);
// all done, did not "block"
@ -996,7 +1002,7 @@ void gotReplyWrapper4 ( void *state , void *state2 ) {
returnMulticast ( mcast );
storeLineWaiters ( );
storeLineWaiters ( ); // try to launch more msg4 requests in waiting
//
// now if all buffers are empty, let any flush request know that
@ -1080,7 +1086,7 @@ void storeLineWaiters ( ) {
// now were we waiting on a multicast to return in order to send
// another request? return if not.
if ( ! msg4 ) return;
// grab the first Msg4 in line
// grab the first Msg4 in line. ret fls if blocked adding more of list.
if ( ! msg4->addMetaList2 ( ) ) return;
// hey, we were able to store that Msg4's list, remove him
s_msg4Head = msg4->m_next;
@ -1131,13 +1137,14 @@ void handleRequest4 ( UdpSlot *slot , int32_t netnice ) {
if ( ! g_pingServer.m_hostsConfInAgreement ) {
// . if we do not know the sender's hosts.conf crc, wait 4 it
// . this is 0 if not received yet
if ( ! slot->m_host->m_hostsConfCRC ) {
if ( ! slot->m_host->m_pingInfo.m_hostsConfCRC ) {
g_errno = EWAITINGTOSYNCHOSTSCONF;
us->sendErrorReply ( slot , g_errno );
return;
}
// compare our hosts.conf to sender's otherwise
if ( slot->m_host->m_hostsConfCRC != g_hostdb.getCRC() ) {
if ( slot->m_host->m_pingInfo.m_hostsConfCRC !=
g_hostdb.getCRC() ) {
g_errno = EBADHOSTSCONF;
us->sendErrorReply ( slot , g_errno );
return;
@ -1280,21 +1287,37 @@ bool addMetaList ( char *p , UdpSlot *slot ) {
// return true;
//}
// an uninitialized secondary rdb? it will have a keysize
// if 0 if its never been intialized from the repair page
// of 0 if its never been intialized from the repair page.
// don't core any more, we probably restarted this shard
// and it needs to wait for host #0 to syncs its
// g_conf.m_repairingEnabled to '1' so it can start its
// Repair.cpp repairWrapper() loop and init the secondary
// rdbs so "rdb" here won't be NULL any more.
if ( rdb && rdb->m_ks <= 0 ) {
log("msg4: oops. got an rdbId key for a secondary "
"rdb and not in repair mode! fix xmldoc!");
char *xx=NULL;*xx=0;
time_t currentTime = getTime();
static time_t s_lastTime = 0;
if ( currentTime > s_lastTime + 10 ) {
s_lastTime = currentTime;
log("msg4: oops. got an rdbId key for a "
"secondary "
"rdb and not in repair mode. waiting to "
"be in repair mode.");
g_errno = ETRYAGAIN;
return false;
//char *xx=NULL;*xx=0;
}
}
if ( ! rdb ) {
if ( slot )
log("msg4: rdbId of %"INT32" unrecognized from "
"hostip=%s. "
log("msg4: rdbId of %"INT32" unrecognized "
"from hostip=%s. "
"dropping WHOLE request", (int32_t)rdbId,
iptoa(slot->m_ip));
else
log("msg4: rdbId of %"INT32" unrecognized. "
"dropping WHOLE request", (int32_t)rdbId);
g_errno = ETRYAGAIN;
return false;
// drop it for now!!
//if ( p < pend ) goto loop;
// all done
@ -1327,7 +1350,11 @@ bool addMetaList ( char *p , UdpSlot *slot ) {
// sanity check
if ( rdb->getKeySize() == 0 ) {
log("seems like a stray /e/repair-addsinprogress.dat file "
"rdbId=%"INT32". not in repair mode. dropping.",(int32_t)rdbId);
"rdbId=%"INT32". waiting to be in repair mode."
,(int32_t)rdbId);
//not in repair mode. dropping.",(int32_t)rdbId);
g_errno = ETRYAGAIN;
return false;
char *xx=NULL;*xx=0;
// drop it for now!!
p += recSize;

319
Msg40.cpp

@ -106,6 +106,7 @@ Msg40::Msg40() {
m_lastChunk = false;
m_didSummarySkip = false;
m_omitCount = 0;
m_printCount = 0;
//m_numGigabitInfos = 0;
}
@ -241,7 +242,19 @@ bool Msg40::getResults ( SearchInput *si ,
m_cachedResults = false;
// bail now if 0 requested!
if ( m_si->m_docsWanted == 0 ) return true;
// crap then we don't stream anything if in streaming mode.
if ( m_si->m_docsWanted == 0 ) {
log("msg40: setting streamresults to false. n=0.");
m_si->m_streamResults = false;
return true;
}
// or if no query terms
if ( m_si->m_q.m_numTerms <= 0 ) {
log("msg40: setting streamresults to false. numTerms=0.");
m_si->m_streamResults = false;
return true;
}
// . do this now in case results were cached.
// . set SearchInput class instance, m_si
@ -429,11 +442,27 @@ bool Msg40::getResults ( SearchInput *si ,
return false;
// reset g_errno, we're just a cache
g_errno = 0;
return gotCacheReply();
bool status = gotCacheReply();
if ( status && m_si->m_streamResults ) {
log("msg40: setting streamresults to false. "
"was in cache.");
m_si->m_streamResults = false;
}
return status;
}
// keep going
return prepareToGetDocIds ( );
bool status = prepareToGetDocIds ( );
if ( status && m_si->m_streamResults ) {
log("msg40: setting streamresults to false. "
"prepare did not block.");
m_si->m_streamResults = false;
}
return status;
}
/*
@ -704,7 +733,7 @@ bool Msg40::federatedLoop ( ) {
// assign it
m_msg3aPtrs[i] = mp;
// assign the request for it
memcpy ( &mp->m_rrr , &mr , sizeof(Msg39Request) );
gbmemcpy ( &mp->m_rrr , &mr , sizeof(Msg39Request) );
// then customize it to just search this collnum
mp->m_rrr.m_collnum = cp[i];
@ -1508,8 +1537,8 @@ bool Msg40::launchMsg20s ( bool recalled ) {
req.size_hqbuf = gbstrlen(req.ptr_hqbuf)+1;
}
int32_t q3size = m_si->m_sbuf3.length()+1;
if ( q3size == 1 ) q3size = 0;
//int32_t q3size = m_si->m_sbuf3.length()+1;
//if ( q3size == 1 ) q3size = 0;
//req.ptr_q2buf = m_si->m_sbuf3.getBufStart();
//req.size_q2buf = q3size;
@ -1694,7 +1723,8 @@ bool gotSummaryWrapper ( void *state ) {
THIS->m_numReplies++;
// log every 1000 i guess
if ( (THIS->m_numReplies % 1000) == 0 )
log("msg40: got %"INT32" summaries out of %"INT32"",THIS->m_numReplies,
log("msg40: got %"INT32" summaries out of %"INT32"",
THIS->m_numReplies,
THIS->m_msg3a.m_numDocIds);
// it returns false if we're still awaiting replies
if ( ! THIS->gotSummary ( ) ) return false;
@ -1712,6 +1742,11 @@ void doneSendingWrapper9 ( void *state , TcpSocket *sock ) {
Msg40 *THIS = (Msg40 *)state;
// the send completed, count it
THIS->m_sendsIn++;
// error?
if ( THIS->m_sendsIn > THIS->m_sendsOut )
log("msg40: sendsin > sendsout");
// debug
//g_errno = ETCPTIMEDOUT;
// socket error? if client closes the socket midstream we get one.
if ( g_errno ) {
THIS->m_socketHadError = g_errno;
@ -1889,7 +1924,8 @@ bool Msg40::gotSummary ( ) {
mr->m_contentType != CT_STATUS &&
m_dedupTable.isInTable ( &mr->m_contentHash32 ) ) {
//if ( g_conf.m_logDebugQuery )
log("msg40: dup sum #%"INT32" (%"UINT32")(d=%"INT64")",m_printi,
log("msg40: dup sum #%"INT32" (%"UINT32")"
"(d=%"INT64")",m_printi,
mr->m_contentHash32,mr->m_docId);
// make it available to be reused
m20->reset();
@ -1926,8 +1962,12 @@ bool Msg40::gotSummary ( ) {
// do not print it if before the &s=X start position though
if ( m_si && m_numDisplayed <= m_si->m_firstResultNum ){
log("msg40: hiding #%"INT32" (%"UINT32")(d=%"INT64")",
m_printi,mr->m_contentHash32,mr->m_docId);
if ( m_printCount == 0 )
log("msg40: hiding #%"INT32" (%"UINT32")"
"(d=%"INT64")",
m_printi,mr->m_contentHash32,mr->m_docId);
m_printCount++;
if ( m_printCount == 100 ) m_printCount = 0;
m20->reset();
continue;
}
@ -2028,6 +2068,8 @@ bool Msg40::gotSummary ( ) {
m_printedTail = true;
printSearchResultsTail ( st );
if ( m_sendsIn < m_sendsOut ) { char *xx=NULL;*xx=0; }
if ( g_conf.m_logDebugTcp )
log("tcp: disabling streamingMode now");
// this will be our final send
st->m_socket->m_streamingMode = false;
}
@ -2247,11 +2289,13 @@ bool Msg40::gotSummary ( ) {
logf( LOG_DEBUG, "query: result %"INT32" (docid=%"INT64") had "
"an error (%s) and will not be shown.", i,
m_msg3a.m_docIds[i], mstrerror(m->m_errno));
*level = CR_ERROR_SUMMARY;
//m_visibleContiguous--;
// update our m_errno while here
if ( ! m_errno ) m_errno = m->m_errno;
continue;
if ( ! m_si->m_showErrors ) {
*level = CR_ERROR_SUMMARY;
//m_visibleContiguous--;
continue;
}
}
// a special case
if ( mr && mr->m_errno == CR_RULESET_FILTERED ) {
@ -2265,9 +2309,10 @@ bool Msg40::gotSummary ( ) {
//m_visibleContiguous--;
continue;
}
if ( ! m_si->m_showBanned && mr->m_isBanned ) {
if ( ! m_si->m_showBanned && mr && mr->m_isBanned ) {
if ( m_si->m_debug || g_conf.m_logDebugQuery )
logf ( LOG_DEBUG, "query: result %"INT32" (docid=%"INT64") is "
logf ( LOG_DEBUG, "query: result %"INT32" "
"(docid=%"INT64") is "
"banned and will not be shown.", i,
m_msg3a.m_docIds[i] );
*level = CR_BANNED_URL;
@ -2275,20 +2320,21 @@ bool Msg40::gotSummary ( ) {
continue;
}
// filter out urls with <![CDATA in them
if ( strstr(mr->ptr_ubuf, "<![CDATA[") ) {
if ( mr && strstr(mr->ptr_ubuf, "<![CDATA[") ) {
*level = CR_BAD_URL;
//m_visibleContiguous--;
continue;
}
// also filter urls with ]]> in them
if ( strstr(mr->ptr_ubuf, "]]>") ) {
if ( mr && strstr(mr->ptr_ubuf, "]]>") ) {
*level = CR_BAD_URL;
//m_visibleContiguous--;
continue;
}
if( ! mr->m_hasAllQueryTerms ) {
if( mr && ! mr->m_hasAllQueryTerms ) {
if ( m_si->m_debug || g_conf.m_logDebugQuery )
logf( LOG_DEBUG, "query: result %"INT32" (docid=%"INT64") is "
logf( LOG_DEBUG, "query: result %"INT32" "
"(docid=%"INT64") is "
"missing query terms and will not be"
" shown.", i, m_msg3a.m_docIds[i] );
*level = CR_MISSING_TERMS;
@ -3296,7 +3342,7 @@ bool isSubDom(char *s , int32_t len) {
//////////////////////////////////
bool hashSample ( Query *q,
bool hashGigabitSample ( Query *q,
HashTableX *master,
TopicGroup *tg ,
SafeBuf *vecBuf,
@ -3376,6 +3422,9 @@ bool Msg40::computeGigabits( TopicGroup *tg ) {
// . the sample is a bunch of text snippets surrounding the
// query terms in the doc in the search results
Msg20Reply *reply = thisMsg20->getReply();
// if m_si->m_showErrors then reply can be NULL if the
// titleRec was not found
if ( ! reply ) continue;
char *sample = reply->ptr_gigabitSample;
int32_t slen = reply->size_gigabitSample;
// but if doing metas, get the display content as the sample
@ -3500,9 +3549,12 @@ bool Msg40::computeGigabits( TopicGroup *tg ) {
numDocsProcessed++;
// . hash it into the master table
// . this may alloc st->m_mem, so be sure to free below
hashSample ( q,
hashGigabitSample ( q,
&master,
tg ,
// vecbuf is an ongoing accumulation
// of wordid vectors from the samples
// we let into the master hash table.
&vecBuf,
thisMsg20,
&repeatTable,
@ -3520,7 +3572,7 @@ bool Msg40::computeGigabits( TopicGroup *tg ) {
int32_t len = master->getTermLen(i);
char ff[1024];
if ( len > 1020 ) len = 1020;
memcpy ( ff , ptr , len );
gbmemcpy ( ff , ptr , len );
ff[len] = '\0';
// we can have html entities in here now
//if ( ! is_alnum(ff[0]) ) { char *xx = NULL; *xx = 0; }
@ -3863,7 +3915,10 @@ void hashExcerpt ( Query *q ,
// . returns false and sets g_errno on error
// . here's the tricky part
bool hashSample ( Query *q,
// . this compates thisMsg20->getReply()->ptr_gigabitSample excerpts
// to all from other docids and this docids that we have accumulated
// because they are distinct enough.
bool hashGigabitSample ( Query *q,
HashTableX *master,
TopicGroup *tg ,
SafeBuf *vecBuf,
@ -3877,6 +3932,9 @@ bool hashSample ( Query *q,
// "topic generation.");
Msg20Reply *reply = thisMsg20->getReply();
// if m_si->m_showErrors is true then reply can be NULL
// if titleRec was not found
if ( ! reply ) return true;
// get the ith big sample
char *bigSampleBuf = reply->ptr_gigabitSample;
int32_t bigSampleLen = reply->size_gigabitSample;
@ -3929,10 +3987,13 @@ bool hashSample ( Query *q,
//
// hash each excerpt
// hash each \0 separated excerpt in bigSampleBuf
char *p = bigSampleBuf;
// most samples are under 5k, i've seend a 32k sample take 11ms!
char *pend = p + bigSampleLen;
// compile all \0 terminated excerpts into a single vector for this
// docid
while ( p < pend ) {
// debug
//log("docId=%"INT64" EXCERPT=%s",docId,p);
@ -3940,9 +4001,14 @@ bool hashSample ( Query *q,
// parse into words
Words ww;
ww.setx ( p, plen, 0);// niceness
// save it
//log("gbits: getting sim for %s",p);
// advance to next excerpt
p += plen + 1;
// p is only non-NULL if we are doing it the old way
// 'tg' indicates where the gigabits came from, like the
// body, or a particular meta tag.
// 'repeatTable' is for counting the same word
hashExcerpt ( q,
&localGigabitTable,
ww,
@ -3950,13 +4016,19 @@ bool hashSample ( Query *q,
repeatTable ,
thisMsg20 ,
debugGigabits );
// skip if not deduping
// . skip if not deduping
// . if a sample is too similar to another sample then we
// do not allow its gigabits to vote. its considered too
// spammy.
if ( tg->m_dedupSamplePercent <= 0 ) continue;
// make a vector out of words
int64_t *wids = ww.getWordIds();
int32_t nw = ww.getNumWords();
// put all the words from this sample into simTable hash table
// and just make vbuf a list of the unique wordIds from all
// gigabit samples this docid provides.
for ( int32_t i = 0 ; i < nw ; i++ ) {
// make it this
// convert word to a number
uint32_t widu = (uint64_t)(wids[i]);
// donot allow this! zero is a vector terminator
if ( widu == 0 ) widu = 1;
@ -3975,6 +4047,11 @@ bool hashSample ( Query *q,
vbuf.truncLen(((int32_t)SAMPLE_VECTOR_SIZE) - 4);
// make last int32_t a 0
vbuf.pushLong(0);
// now vbuf is a fairly decent vector of words that represent
// the gigabit sample for this docid. see if it is already
// too similar to ones we've stored in "vecBuf" which has all the
// saples from all the other docids that were considered
// mutually distinct enough.
// . compute the fingerprint/similarirtyVector from this table
// the same way we do for documents for deduping them at query time
@ -3987,7 +4064,7 @@ bool hashSample ( Query *q,
// point to it
char *v1 = vbuf.getBufStart();
// get # stored so far
int32_t numVecs = vecBuf->length() / (int32_t)SAMPLE_VECTOR_SIZE;
int32_t numVecs = vecBuf->length()/(int32_t)SAMPLE_VECTOR_SIZE;
char *v2 = vecBuf->getBufStart();
// see if our vector is too similar
for ( int32_t i = 0 ; i < numVecs ; i++ ) {
@ -3998,12 +4075,22 @@ bool hashSample ( Query *q,
// return true if too similar to another sample we did
if ( ss >= tg->m_dedupSamplePercent ) { // 80 ) {
localGigabitTable.reset();
log(LOG_DEBUG,"gbits: removed dup sample.");
// log(LOG_DEBUG,"gbits: removed dup sample "
// "\"%s\" too similar to sample #%i"
// , bigSampleBuf
// , i
// );
return true;
}
}
// add our vector to the array
// this docid sample as considered unique enough with respect
// to the other samples from other docids, so add the
// wordids to our list to dedup the next excerpts
vecBuf->safeMemcpy(v1,(int32_t)SAMPLE_VECTOR_SIZE);
// log(LOG_DEBUG,"gbits: adding unique sample #%i %s "
// ,numVecs,bigSampleBuf);
}
//log("TOOK %"INT64" ms plen=%"INT32"",gettimeofdayInMilliseconds()-start,
@ -4069,7 +4156,7 @@ bool hashSample ( Query *q,
// sanity
if ( gc->m_numWords > MAX_GIGABIT_WORDS ) {
char*xx=NULL;*xx=0;}
memcpy((char *)gbit.m_wordIds,
gbmemcpy((char *)gbit.m_wordIds,
(char *)gc->m_wordIds,
gc->m_numWords * 8 );
if ( ! master->addKey ( &termId64, &gbit ) )
@ -5325,6 +5412,9 @@ bool Msg40::computeFastFacts ( ) {
Msg20* thisMsg20 = m_msg20[i];
// must be there! wtf?
Msg20Reply *reply = thisMsg20->getReply();
// if m_si->m_showErrors is true then reply can be NULL
// if titleRec was not found
if ( ! reply ) return true;
// get sample. sample uses \0 as delimeters between excerpts
char *p = reply-> ptr_gigabitSample;
char *pend = p + reply->size_gigabitSample; // includes \0
@ -5542,7 +5632,7 @@ bool Msg40::addFacts ( HashTableX *queryTable,
// make last int32_t a 0 so Clusterdb::getSimilarity() likes it
vbuf.pushLong(0);
// now store it in the Fact struct
memcpy ( fact.m_dedupVector , vbuf.getBufStart(), vbuf.length() );
gbmemcpy ( fact.m_dedupVector , vbuf.getBufStart(), vbuf.length() );
// otherwise, add it
@ -5574,8 +5664,12 @@ bool Msg40::printSearchResult9 ( int32_t ix , int32_t *numPrintedSoFar ,
// i guess we can print "Next 10" link
m_moreToCome = true;
// hide if above limit
log("msg40: hiding above docsWanted #%"INT32" (%"UINT32")(d=%"INT64")",
m_printi,mr->m_contentHash32,mr->m_docId);
if ( m_printCount == 0 )
log(LOG_INFO,"msg40: hiding above docsWanted "
"#%"INT32" (%"UINT32")(d=%"INT64")",
m_printi,mr->m_contentHash32,mr->m_docId);
m_printCount++;
if ( m_printCount == 100 ) m_printCount = 0;
// do not exceed what the user asked for
return true;
}
@ -5593,9 +5687,9 @@ bool Msg40::printSearchResult9 ( int32_t ix , int32_t *numPrintedSoFar ,
m_hadPrintError = true;
}
log("msg40: printing #%"INT32" (%"UINT32")(d=%"INT64")",
m_printi,mr->m_contentHash32,mr->m_docId);
// log(LOG_INFO,"msg40: printing #%"INT32" (%"UINT32")(d=%"INT64")",
// m_printi,mr->m_contentHash32,mr->m_docId);
// count it
m_numPrinted++;
@ -5709,7 +5803,12 @@ bool Msg40::printCSVHeaderRow ( SafeBuf *sb ) {
Msg20 *m20 = getCompletedSummary(i);
if ( ! m20 ) break;
if ( m20->m_errno ) continue;
// unless they specified &showerrors=1 do not show
// doc not found errors from a bad title rec lookup
if ( m20->m_errno && ! m_si->m_showErrors )
continue;
if ( ! m20->m_r ) { char *xx=NULL;*xx=0; }
Msg20Reply *mr = m20->m_r;
@ -6107,7 +6206,8 @@ void Msg40::lookupFacets2 ( ) {
// skip empty slots
if ( ! fht->m_flags[m_j] ) continue;
// get hash of the facet value
FacetValHash_t fvh = *(int32_t *)fht->getKeyFromSlot(m_j);
FacetValHash_t fvh ;
fvh = *(int32_t *)fht->getKeyFromSlot(m_j);
//int32_t count = *(int32_t *)fht->getValFromSlot(j);
// get the docid as well
FacetEntry *fe =(FacetEntry *)fht->getValFromSlot(m_j);
@ -6259,7 +6359,11 @@ bool Msg40::printFacetsForTable ( SafeBuf *sb , QueryTerm *qt ) {
HttpRequest *hr = &m_si->m_hr;
bool firstTime = true;
bool isString = false;
bool isFloat = false;
bool isInt = false;
if ( qt->m_fieldCode == FIELD_GBFACETSTR ) isString = true;
if ( qt->m_fieldCode == FIELD_GBFACETFLOAT ) isFloat = true;
if ( qt->m_fieldCode == FIELD_GBFACETINT ) isInt = true;
char format = m_si->m_format;
// a new table for each facet query term
bool needTable = true;
@ -6297,23 +6401,27 @@ bool Msg40::printFacetsForTable ( SafeBuf *sb , QueryTerm *qt ) {
termBuf.nullTerm();
char *term = termBuf.getBufStart();
char tmp[64];
char tmp9[128];
SafeBuf sb9(tmp9,128);
QueryWord *qw= qt->m_qword;
if ( qt->m_fieldCode == FIELD_GBFACETINT ) {
sprintf(tmp,"%"INT32"",(int32_t)*fvh);
text = tmp;
if ( qt->m_fieldCode == FIELD_GBFACETINT &&
qw->m_numFacetRanges == 0 ) {
sb9.safePrintf("%"INT32"",(int32_t)*fvh);
text = sb9.getBufStart();
}
if ( qt->m_fieldCode == FIELD_GBFACETFLOAT ) {
sprintf(tmp,"%f",*(float *)fvh);
text = tmp;
if ( qt->m_fieldCode == FIELD_GBFACETFLOAT
&& qw->m_numFacetRanges == 0 ) {
sb9.printFloatPretty ( *(float *)fvh );
text = sb9.getBufStart();
}
int32_t k2 = -1;
// get the facet range that this FacetEntry represents (int)
for ( int32_t k = 0 ; k < qw->m_numFacetRanges; k++ ) {
if ( qt->m_fieldCode != FIELD_GBFACETINT )
break;
@ -6321,14 +6429,15 @@ bool Msg40::printFacetsForTable ( SafeBuf *sb , QueryTerm *qt ) {
continue;
if ( *(int32_t *)fvh >= qw->m_facetRangeIntB[k])
continue;
sprintf(tmp,"[%"INT32"-%"INT32")"
,qw->m_facetRangeIntA[k]
,qw->m_facetRangeIntB[k]
);
text = tmp;
sb9.safePrintf("[%"INT32"-%"INT32")"
,qw->m_facetRangeIntA[k]
,qw->m_facetRangeIntB[k]
);
text = sb9.getBufStart();
k2 = k;
}
// get the facet range that this FacetEntry represents (float)
for ( int32_t k = 0 ; k < qw->m_numFacetRanges; k++ ) {
if ( qt->m_fieldCode != FIELD_GBFACETFLOAT )
break;
@ -6336,11 +6445,13 @@ bool Msg40::printFacetsForTable ( SafeBuf *sb , QueryTerm *qt ) {
continue;
if ( *(float *)fvh >= qw->m_facetRangeFloatB[k])
continue;
sprintf(tmp,"[%f-%f)"
,qw->m_facetRangeFloatA[k]
,qw->m_facetRangeFloatB[k]
);
text = tmp;
sb9.pushChar('[');
sb9.printFloatPretty(qw->m_facetRangeFloatA[k]);
sb9.pushChar('-');
sb9.printFloatPretty(qw->m_facetRangeFloatB[k]);
sb9.pushChar(')');
sb9.nullTerm();
text = sb9.getBufStart();
k2 = k;
}
@ -6374,7 +6485,38 @@ bool Msg40::printFacetsForTable ( SafeBuf *sb , QueryTerm *qt ) {
sb->safePrintf("</value>\n"
"\t\t<docCount>%"INT32""
"</docCount>\n"
"\t</facet>\n",count);
,count);
// some stats now for floats
if ( isFloat && fe->m_count ) {
sb->safePrintf("\t\t<average>");
double sum = *(double *)&fe->m_sum;
double avg = sum/(double)fe->m_count;
sb->printFloatPretty ( (float)avg );
sb->safePrintf("\t\t</average>\n");
sb->safePrintf("\t\t<min>");
float min = *(float *)&fe->m_min;
sb->printFloatPretty ( min );
sb->safePrintf("</min>\n");
sb->safePrintf("\t\t<max>");
float max = *(float *)&fe->m_max;
sb->printFloatPretty ( max );
sb->safePrintf("</max>\n");
}
// some stats now for ints
if ( isInt && fe->m_count ) {
sb->safePrintf("\t\t<average>");
int64_t sum = fe->m_sum;
double avg = (double)sum/(double)fe->m_count;
sb->printFloatPretty ( (float)avg );
sb->safePrintf("\t\t</average>\n");
sb->safePrintf("\t\t<min>");
int32_t min = fe->m_min;
sb->safePrintf("%"INT32"</min>\n",min);
sb->safePrintf("\t\t<max>");
int32_t max = fe->m_max;
sb->safePrintf("%"INT32"</max>\n",max);
}
sb->safePrintf("\t</facet>\n");
continue;
}
@ -6444,8 +6586,49 @@ bool Msg40::printFacetsForTable ( SafeBuf *sb , QueryTerm *qt ) {
// just use quotes for ranges like "[1-3)" now
sb->safePrintf("\"");
sb->safePrintf(",\n"
"\t\"docCount\":%"INT32"\n"
"}\n,\n", count);
"\t\"docCount\":%"INT32""
, count );
// if it's a # then we print stats after
if ( isString || fe->m_count == 0 )
sb->safePrintf("\n");
else
sb->safePrintf(",\n");
// some stats now for floats
if ( isFloat && fe->m_count ) {
sb->safePrintf("\t\"average\":");
double sum = *(double *)&fe->m_sum;
double avg = sum/(double)fe->m_count;
sb->printFloatPretty ( (float)avg );
sb->safePrintf(",\n");
sb->safePrintf("\t\"min\":");
float min = *(float *)&fe->m_min;
sb->printFloatPretty ( min );
sb->safePrintf(",\n");
sb->safePrintf("\t\"max\":");
float max = *(float *)&fe->m_max;
sb->printFloatPretty ( max );
sb->safePrintf("\n");
}
// some stats now for ints
if ( isInt && fe->m_count ) {
sb->safePrintf("\t\"average\":");
int64_t sum = fe->m_sum;
double avg = (double)sum/(double)fe->m_count;
sb->printFloatPretty ( (float)avg );
sb->safePrintf(",\n");
sb->safePrintf("\t\"min\":");
int32_t min = fe->m_min;
sb->safePrintf("%"INT32",\n",min);
sb->safePrintf("\t\"max\":");
int32_t max = fe->m_max;
sb->safePrintf("%"INT32"\n",max);
}
sb->safePrintf("}\n,\n" );
continue;
}
@ -6479,22 +6662,22 @@ bool Msg40::printFacetsForTable ( SafeBuf *sb , QueryTerm *qt ) {
}
else if ( qt->m_fieldCode == FIELD_GBFACETFLOAT &&
qw->m_numFacetRanges > 0 ) {
float min = qw->m_facetRangeIntA[k2];
float max = qw->m_facetRangeIntB[k2];
float min = qw->m_facetRangeFloatA[k2];
float max = qw->m_facetRangeFloatB[k2];
if ( min == max )
newStuff.safePrintf("prepend="
"gbequalfloat%%3A%s%%3A%f+"
,term
,*(float *)fvh);
else
newStuff.safePrintf("prepend="
"gbminfloat%%3A%s%%3A%f+"
"gbmaxfloat%%3A%s%%3A%f+"
,term
,min
,term
,max
);
newStuff.safePrintf("prepend="
"gbminfloat%%3A%s%%3A%f+"
"gbmaxfloat%%3A%s%%3A%f+"
,term
,min
,term
,max
);
}
else if ( qt->m_fieldCode == FIELD_GBFACETFLOAT )
newStuff.safePrintf("prepend="

@ -335,6 +335,7 @@ class Msg40 {
int32_t m_tasksRemaining;
int32_t m_printCount;
// buffer we deserialize from, allocated by Msg17, but we free it
char *m_buf;

@ -897,7 +897,7 @@ bool Msg5::needsRecall ( ) {
// so common for doledb because of key annihilations
if ( m_rdbId == RDB_DOLEDB && m_round < 10 ) logIt = false;
if ( logIt )
logf(LOG_DEBUG,"db: Reading %"INT32" again from %s (need %"INT32" total "
log("db: Reading %"INT32" again from %s (need %"INT32" total "
"got %"INT32" totalListSizes=%"INT32" sk=%s) "
"cn=%"INT32" this=0x%"PTRFMT" round=%"INT32".",
m_newMinRecSizes , base->m_dbname , m_minRecSizes,

@ -98,12 +98,12 @@ bool Msg8b::getCatRec ( Url *url ,
*p = (char)niceness ; p++;
*p = (char)useCanonicalName; p++;
// coll
//memcpy(p, m_coll, m_collLen);
//gbmemcpy(p, m_coll, m_collLen);
//p += m_collLen;
//*p = '\0';
//p++;
// url
memcpy(p, m_url->getUrl(), m_url->getUrlLen());
gbmemcpy(p, m_url->getUrl(), m_url->getUrlLen());
p += m_url->getUrlLen();
*p = '\0';
p++;
@ -476,17 +476,17 @@ void gotCatRecWrapper ( void *state ) { // , CatRec *catrec ) {
}
}
p = data;
memcpy(p, &catrec->m_dataSize, 4);
gbmemcpy(p, &catrec->m_dataSize, 4);
p += 4;
memcpy(p, catrec->m_data, catrec->m_dataSize);
gbmemcpy(p, catrec->m_data, catrec->m_dataSize);
p += catrec->m_dataSize;
memcpy(p, &catrec->m_gotByIp, 1);
gbmemcpy(p, &catrec->m_gotByIp, 1);
p++;
memcpy(p, &catrec->m_hadRec, 1);
gbmemcpy(p, &catrec->m_hadRec, 1);
p++;
memcpy(p, &catrec->m_numIndCatids, 4);
gbmemcpy(p, &catrec->m_numIndCatids, 4);
p += 4;
memcpy(p, catrec->m_indCatids, catrec->m_numIndCatids*4);
gbmemcpy(p, catrec->m_indCatids, catrec->m_numIndCatids*4);
p += catrec->m_numIndCatids*4;
// sanity check

@ -78,7 +78,7 @@ bool MsgC::getIp(char *hostname , int32_t hostnameLen ,
if ( g_conf.m_useEtcHosts && g_dns.isInFile ( key , ip ))return 1;
// debug msg
//char tmp[2048];
//memcpy ( tmp , hostname , hostnameLen );
//gbmemcpy ( tmp , hostname , hostnameLen );
//tmp [ hostnameLen ] = '\0';
// . try getting from the cache first
// . this returns true if was in the cache and sets *ip to the ip

@ -127,7 +127,7 @@ rename msgaa to Site.
char *p = m_qbuf;
strcpy ( p , "site:" );
p += 5;
memcpy ( p , m_url->getHost() , m_url->getHostLen() );
gbmemcpy ( p , m_url->getHost() , m_url->getHostLen() );
p += m_url->getHostLen();
// sort them by the random score term, gbrandscore (see XmlDoc.cpp)
p += sprintf (p ,

@ -701,7 +701,7 @@ bool addTestIp ( char *host , int32_t hostLen , int32_t ip ) {
s_last = s_testBufPtr;
s_lastLen = hostLen;
// print it
memcpy ( s_testBufPtr , host , hostLen );
gbmemcpy ( s_testBufPtr , host , hostLen );
// skip it
s_testBufPtr += hostLen;
// then space and ip

@ -445,6 +445,7 @@ void Multicast::gotReply2 ( UdpSlot *slot ) {
}
// don't log ETRYAGAIN, may come across as bad when it is normal
if ( m_errnos[i] == ETRYAGAIN ) logIt = false;
//logIt = true;
// log a failure msg
if ( logIt ) { // m_errnos[i] != ETRYAGAIN ) {
Host *h = m_hostdb->getHost ( slot->m_ip ,slot->m_port );

@ -86,7 +86,7 @@ bool sendPageCatdb ( TcpSocket *s , HttpRequest *r ) {
st->m_r.copy(r);
// copy collection
if (collLen > MAX_COLL_LEN) collLen = MAX_COLL_LEN - 1;
memcpy(st->m_coll, coll, collLen);
gbmemcpy(st->m_coll, coll, collLen);
st->m_coll[collLen] = '\0';
st->m_collLen = collLen;
// defaults

@ -1338,7 +1338,7 @@ char *getNewCollName ( ) { // char *token , int32_t tokenLen ) {
//if ( tokenLen + 16 + 5>= MAX_COLL_LEN ) { char *xx=NULL;*xx=0;}
// ensure the crawlid is the full 16 characters long so we
// can quickly extricate the crawlid from the collection name
//memcpy ( s_collBuf, token, tokenLen );
//gbmemcpy ( s_collBuf, token, tokenLen );
//sprintf(s_collBuf + tokenLen ,"-%016"XINT64"",crawlId64);
sprintf(s_collBuf ,"%016"XINT64"",crawlId64);
return s_collBuf;

@ -140,9 +140,9 @@ bool sendPageDirectory ( TcpSocket *s , HttpRequest *r ) {
//s->m_readBufSize = r->m_bufLen+1;
s->m_readBufSize = requestBufLen+1;
}
//memcpy(s->m_readBuf, r->m_buf, r->m_bufLen);
//gbmemcpy(s->m_readBuf, r->m_buf, r->m_bufLen);
//s->m_readBuf[r->m_bufLen] = '\0';
memcpy(s->m_readBuf, requestBuf, requestBufLen);
gbmemcpy(s->m_readBuf, requestBuf, requestBufLen);
s->m_readBuf[requestBufLen] = '\0';
// create the new search request
//if (!r->set(s->m_readBuf, r->m_bufLen, s))

@ -7744,7 +7744,7 @@ bool brformat ( SafeBuf *src ,
if ( size == 1 )
*d++ = *p;
else {
memcpy ( d , p , size );
gbmemcpy ( d , p , size );
d += size;
}
// do not exceed maxchars
@ -7774,7 +7774,7 @@ bool brformat ( SafeBuf *src ,
// if "word" bigger than "col" do not rewrind because
// we get into an infinite loop
p - lastSpaceSrc >= width ) {
memcpy(d,"<br>\n",5);
gbmemcpy(d,"<br>\n",5);
d += 5;
continue;
}
@ -7786,7 +7786,7 @@ bool brformat ( SafeBuf *src ,
// skip d after the space
d++;
// then insert the br
memcpy(d,"<br>\n",5);
gbmemcpy(d,"<br>\n",5);
d += 5;
lastWasBr = true;
}
@ -7822,12 +7822,12 @@ void printAdminEventOptions ( SafeBuf* sb,
uu.set ( url , urlLen );
char dbuf [ MAX_URL_LEN ];
int32_t dlen = uu.getDomainLen();
memcpy ( dbuf , uu.getDomain() , dlen );
gbmemcpy ( dbuf , uu.getDomain() , dlen );
dbuf [ dlen ] = '\0';
// newspaperarchive urls have no domain
if ( dlen == 0 ) {
dlen = uu.getHostLen();
memcpy ( dbuf , uu.getHost() , dlen );
gbmemcpy ( dbuf , uu.getHost() , dlen );
dbuf [ dlen ] = '\0';
}
@ -7870,7 +7870,7 @@ void printAdminEventOptions ( SafeBuf* sb,
dlen = uu.getDomainLen();
memcpy ( dbuf , uu.getDomain() , dlen );
gbmemcpy ( dbuf , uu.getDomain() , dlen );
dbuf [ dlen ] = '\0';
sb->safePrintf("Ban By Domain: ");
@ -8614,12 +8614,12 @@ static bool printResult ( CollectionRec *cr,
if ( si->m_isAssassin && !si->m_isFriend ) {
char dbuf [ MAX_URL_LEN ];
int32_t dlen = uu.getDomainLen();
memcpy ( dbuf , uu.getDomain() , dlen );
gbmemcpy ( dbuf , uu.getDomain() , dlen );
dbuf [ dlen ] = '\0';
// newspaperarchive urls have no domain
if ( dlen == 0 ) {
dlen = uu.getHostLen();
memcpy ( dbuf , uu.getHost() , dlen );
gbmemcpy ( dbuf , uu.getHost() , dlen );
dbuf [ dlen ] = '\0';
}
sb.safePrintf (" - "
@ -8638,7 +8638,7 @@ static bool printResult ( CollectionRec *cr,
dbuf , coll , dbuf );
banSites->safePrintf("%s+", dbuf);
dlen = uu.getHostLen();
memcpy ( dbuf , uu.getHost() , dlen );
gbmemcpy ( dbuf , uu.getHost() , dlen );
dbuf [ dlen ] = '\0';
sb.safePrintf(" - "
" <a href=\"/admin/tagdb?"
@ -18700,16 +18700,16 @@ bool sendPageAbout2 ( State3 *st3 ) {
// < as &lt; and > as &gt;
for ( ; *src ; src++ ) {
if ( *src == '#' ) {
memcpy ( dst,"<font color=gray>",17);
gbmemcpy ( dst,"<font color=gray>",17);
dst += 17;
inFont = true;
}
if ( *src == '<' ) {
memcpy ( dst , "&lt;",4);
gbmemcpy ( dst , "&lt;",4);
dst += 4;
// boldify start tags
//if ( src[1] != '/' && src[1] !='!' ) {
// memcpy(dst,"<b>",3);
// gbmemcpy(dst,"<b>",3);
// dst += 3;
// inBold = true;
//}
@ -18718,21 +18718,21 @@ bool sendPageAbout2 ( State3 *st3 ) {
else if ( *src == '>' ) {
// end bold tags
if ( inBold ) {
memcpy(dst,"</b>",4);
gbmemcpy(dst,"</b>",4);
dst += 4;
inBold = false;
}
memcpy ( dst , "&gt;",4);
gbmemcpy ( dst , "&gt;",4);
dst += 4;
continue;
}
else if ( *src == '\n' ) {
if ( inFont ) {
memcpy(dst,"</font>",7);
gbmemcpy(dst,"</font>",7);
dst += 7;
inFont = false;
}
memcpy ( dst , "<br>",4);
gbmemcpy ( dst , "<br>",4);
dst += 4;
continue;
}

@ -612,7 +612,7 @@ bool processLoop ( void *state ) {
(char *)thisUrl );// base url for ClcknScrll
//p += hilen;
// now an hr
//memcpy ( p , "</span></table></table>\n" , 24 ); p += 24;
//gbmemcpy ( p , "</span></table></table>\n" , 24 ); p += 24;
sb->safeStrcpy("</span></table></table>\n");
}

@ -155,7 +155,7 @@ skipReplaceHost:
"<tr><td colspan=%s><center>"
//"<font size=+1>"
"<b>Hosts "
"(<a href=\"/admin/hosts?c=%s&sort=%"INT32"&reset=1\">"
"(<a href=\"/admin/hosts?c=%s&sort=%"INT32"&resetstats=1\">"
"reset)</b>"
//"</font>"
"</td></tr>"
@ -200,9 +200,15 @@ skipReplaceHost:
//"<td><b>resends sent</td>"
//"<td><b>errors recvd</td>"
//"<td><b>ETRYAGAINS recvd</td>"
"<td><b>try agains recvd</td>"
"<td><a href=\"/admin/hosts?c=%s&sort=3\">"
"<b>dgrams resent</a></td>"
/*
MDW: take out for adding new stuff
"<td><a href=\"/admin/hosts?c=%s&sort=4\">"
"<b>errors recvd</a></td>"
"<td><a href=\"/admin/hosts?c=%s&sort=5\">"
@ -212,6 +218,15 @@ skipReplaceHost:
"<b>dgrams to</a></td>"
"<td><a href=\"/admin/hosts?c=%s&sort=7\">"
"<b>dgrams from</a></td>"
*/
// "<td><a href=\"/admin/hosts?c=%s&sort=18\">"
// "<b>corrupts</a></td>"
// "<td><a href=\"/admin/hosts?c=%s&sort=19\">"
// "<b># ooms</a></td>"
// "<td><a href=\"/admin/hosts?c=%s&sort=20\">"
// "<b>socks closed</a></td>"
//"<td><a href=\"/admin/hosts?c=%s&sort=8\">"
//"<b>loadavg</a></td>"
@ -271,24 +286,24 @@ skipReplaceHost:
cs,
cs,
cs,
cs,
cs,
cs,
cs,
shotcol );
// loop through each host we know and print it's stats
int32_t nh = g_hostdb.getNumHosts();
// should we reset resends, errorsRecvd and ETRYAGAINS recvd?
if ( r->getLong("reset",0) ) {
if ( r->getLong("resetstats",0) ) {
for ( int32_t i = 0 ; i < nh ; i++ ) {
// get the ith host (hostId)
Host *h = g_hostdb.getHost ( i );
h->m_totalResends = 0;
h->m_pingInfo.m_totalResends = 0;
h->m_errorReplies = 0;
h->m_etryagains = 0;
h->m_pingInfo.m_etryagains = 0;
h->m_dgramsTo = 0;
h->m_dgramsFrom = 0;
h->m_splitTimes = 0;
h->m_splitsDone = 0;
h->m_pingInfo.m_slowDiskReads =0;
}
}
@ -314,6 +329,7 @@ skipReplaceHost:
case 15:gbsort ( hostSort, nh, sizeof(int32_t), slowDiskSort ); break;
case 16:gbsort ( hostSort, nh, sizeof(int32_t), defaultSort ); break;
case 17:gbsort ( hostSort, nh, sizeof(int32_t), diskUsageSort ); break;
}
// we are the only one that uses these flags, so set them now
@ -348,7 +364,7 @@ skipReplaceHost:
int32_t i = hostSort[si];
// get the ith host (hostId)
Host *h = g_hostdb.getHost ( i );
char *vbuf = h->m_gbVersionStrBuf;
char *vbuf = h->m_pingInfo.m_gbVersionStr;//gbVersionStrBuf;
int32_t vhash32 = hash32n ( vbuf );
if ( vhash32 == majorityHash32 ) lastCount++;
else lastCount--;
@ -399,7 +415,7 @@ skipReplaceHost:
*hp = '\0';
}
*/
char *vbuf = h->m_gbVersionStrBuf;
char *vbuf = h->m_pingInfo.m_gbVersionStr;//m_gbVersionStrBuf;
// get hash
int32_t vhash32 = hash32n ( vbuf );
char *vbuf1 = "";
@ -429,22 +445,23 @@ skipReplaceHost:
char *fontTagFront = "";
char *fontTagBack = "";
if ( h->m_percentMemUsed >= 98.0 && format == FORMAT_HTML ) {
if ( h->m_pingInfo.m_percentMemUsed >= 98.0 &&
format == FORMAT_HTML ) {
fontTagFront = "<font color=red>";
fontTagBack = "</font>";
}
float cpu = h->m_cpuUsage;
float cpu = h->m_pingInfo.m_cpuUsage;
if ( cpu > 100.0 ) cpu = 100.0;
if ( cpu < 0.0 ) cpu = -1.0;
char diskUsageMsg[64];
sprintf(diskUsageMsg,"%.1f%%",h->m_diskUsage);
if ( h->m_diskUsage < 0.0 )
sprintf(diskUsageMsg,"%.1f%%",h->m_pingInfo.m_diskUsage);
if ( h->m_pingInfo.m_diskUsage < 0.0 )
sprintf(diskUsageMsg,"???");
if ( h->m_diskUsage >= 98.0 && format == FORMAT_HTML )
if ( h->m_pingInfo.m_diskUsage>=98.0 && format == FORMAT_HTML )
sprintf(diskUsageMsg,"<font color=red><b>%.1f%%"
"</b></font>",h->m_diskUsage);
"</b></font>",h->m_pingInfo.m_diskUsage);
// split time, don't divide by zero!
@ -459,63 +476,124 @@ skipReplaceHost:
//*fs = '\0';
// does its hosts.conf file disagree with ours?
if ( h->m_hostsConfCRC &&
if ( h->m_pingInfo.m_hostsConfCRC &&
format == FORMAT_HTML &&
h->m_hostsConfCRC != g_hostdb.getCRC() )
h->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC() )
fb.safePrintf("<font color=red><b title=\"Hosts.conf "
"in disagreement with ours.\">H"
"</b></font>");
if ( h->m_hostsConfCRC &&
if ( h->m_pingInfo.m_hostsConfCRC &&
format != FORMAT_HTML &&
h->m_hostsConfCRC != g_hostdb.getCRC() )
h->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC() )
fb.safePrintf("Hosts.conf in disagreement with ours");
int32_t flags = h->m_pingInfo.m_flags;
if ( format == FORMAT_HTML ) {
// use these new ones for now
int n = h->m_pingInfo.m_numCorruptDiskReads;
if ( n )
fb.safePrintf("<font color=red><b>"
"C"
"<sup>%"INT32"</sup>"
"</b></font>"
, n );
n = h->m_pingInfo.m_numOutOfMems;
if ( n )
fb.safePrintf("<font color=red><b>"
"O"
"<sup>%"INT32"</sup>"
"</b></font>"
, n );
n = h->m_pingInfo.m_socketsClosedFromHittingLimit;
if ( n )
fb.safePrintf("<font color=red><b>"
"K"
"<sup>%"INT32"</sup>"
"</b></font>"
, n );
if ( flags & PFLAG_OUTOFSYNC )
fb.safePrintf("<font color=red><b>"
"N"
"</b></font>"
);
}
// recovery mode? reocvered from coring?
if ((h->m_flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML )
if ((flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML )
fb.safePrintf("<b title=\"Recovered from core"
"\">x</b>");
if ((h->m_flags & PFLAG_RECOVERYMODE)&& format != FORMAT_HTML )
if ((flags & PFLAG_RECOVERYMODE)&& format != FORMAT_HTML )
fb.safePrintf("Recovered from core");
// rebalancing?
if ( (h->m_flags & PFLAG_REBALANCING)&& format == FORMAT_HTML )
if ( (flags & PFLAG_REBALANCING)&& format == FORMAT_HTML )
fb.safePrintf("<b title=\"Currently "
"rebalancing\">R</b>");
if ( (h->m_flags & PFLAG_REBALANCING)&& format != FORMAT_HTML )
if ( (flags & PFLAG_REBALANCING)&& format != FORMAT_HTML )
fb.safePrintf("Currently rebalancing");
// has recs that should be in another shard? indicates
// we need to rebalance or there is a bad hosts.conf
if ((h->m_flags & PFLAG_FOREIGNRECS) && format == FORMAT_HTML )
if ((flags & PFLAG_FOREIGNRECS) && format == FORMAT_HTML )
fb.safePrintf("<font color=red><b title=\"Foreign "
"data "
"detected. Needs rebalance.\">F"
"</b></font>");
if ((h->m_flags & PFLAG_FOREIGNRECS) && format != FORMAT_HTML )
if ((flags & PFLAG_FOREIGNRECS) && format != FORMAT_HTML )
fb.safePrintf("Foreign data detected. "
"Needs rebalance.");
// if it has spiders going on say "S"
if ((h->m_flags & PFLAG_HASSPIDERS) && format == FORMAT_HTML )
fb.safePrintf ( "<span title=\"Spidering\">S</span>");
if ((h->m_flags & PFLAG_HASSPIDERS) && format != FORMAT_HTML )
// if it has spiders going on say "S" with # as the superscript
if ((flags & PFLAG_HASSPIDERS) && format == FORMAT_HTML )
fb.safePrintf ( "<span title=\"Spidering\">S"
"<sup>%"INT32"</sup>"
"</span>"
,h->m_pingInfo.m_currentSpiders
);
if ( format == FORMAT_HTML ) {
char *f1 = "";
char *f2 = "";
if ( h->m_pingInfo.m_udpSlotsInUse >= 200 ) {
f1 = "<b>";
f2 = "</b>";
}
if ( h->m_pingInfo.m_udpSlotsInUse >= 400 ) {
f1 = "<b><font color=red>";
f2 = "</font></b>";
}
fb.safePrintf("<span title=\"udpSlotsInUse\">"
"%s"
"U"
"<sup>%"INT32"</sup>"
"%s"
"</span>"
,f1
,h->m_pingInfo.m_udpSlotsInUse
,f2
);
}
if ((flags & PFLAG_HASSPIDERS) && format != FORMAT_HTML )
fb.safePrintf ( "Spidering");
// say "M" if merging
if ( (h->m_flags & PFLAG_MERGING) && format == FORMAT_HTML )
if ( (flags & PFLAG_MERGING) && format == FORMAT_HTML )
fb.safePrintf ( "<span title=\"Merging\">M</span>");
if ( (h->m_flags & PFLAG_MERGING) && format != FORMAT_HTML )
if ( (flags & PFLAG_MERGING) && format != FORMAT_HTML )
fb.safePrintf ( "Merging");
// say "D" if dumping
if ( (h->m_flags & PFLAG_DUMPING) && format == FORMAT_HTML )
if ( (flags & PFLAG_DUMPING) && format == FORMAT_HTML )
fb.safePrintf ( "<span title=\"Dumping\">D</span>");
if ( (h->m_flags & PFLAG_DUMPING) && format != FORMAT_HTML )
if ( (flags & PFLAG_DUMPING) && format != FORMAT_HTML )
fb.safePrintf ( "Dumping");
// say "y" if doing the daily merge
if ( !(h->m_flags & PFLAG_MERGEMODE0) )
if ( !(flags & PFLAG_MERGEMODE0) )
fb.safePrintf ( "y");
// clear it if it is us, this is invalid
@ -564,19 +642,43 @@ skipReplaceHost:
sb.safePrintf("\t\t<gbVersion>%s</gbVersion>\n",vbuf);
sb.safePrintf("\t\t<resends>%"INT32"</resends>\n",
h->m_totalResends);
h->m_pingInfo.m_totalResends);
/*
MDW: take out for new stuff
sb.safePrintf("\t\t<errorReplies>%"INT32"</errorReplies>\n",
h->m_errorReplies);
*/
sb.safePrintf("\t\t<errorTryAgains>%"INT32""
"</errorTryAgains>\n",
h->m_etryagains);
h->m_pingInfo.m_etryagains);
sb.safePrintf("\t\t<udpSlotsInUse>%"INT32""
"</udpSlotsInUse>\n",
h->m_pingInfo.m_udpSlotsInUse);
/*
sb.safePrintf("\t\t<dgramsTo>%"INT64"</dgramsTo>\n",
h->m_dgramsTo);
sb.safePrintf("\t\t<dgramsFrom>%"INT64"</dgramsFrom>\n",
h->m_dgramsFrom);
*/
sb.safePrintf("\t\t<numCorruptDiskReads>%"INT32""
"</numCorruptDiskReads>\n"
,h->m_pingInfo.m_numCorruptDiskReads);
sb.safePrintf("\t\t<numOutOfMems>%"INT32""
"</numOutOfMems>\n"
,h->m_pingInfo.m_numOutOfMems);
sb.safePrintf("\t\t<numClosedSockets>%"INT32""
"</numClosedSockets>\n"
,h->m_pingInfo.
m_socketsClosedFromHittingLimit);
sb.safePrintf("\t\t<numOutstandingSpiders>%"INT32""
"</numOutstandingSpiders>\n"
,h->m_pingInfo.m_currentSpiders );
sb.safePrintf("\t\t<splitTime>%"INT32"</splitTime>\n",
splitTime);
@ -588,15 +690,15 @@ skipReplaceHost:
sb.safePrintf("\t\t<slowDiskReads>%"INT32""
"</slowDiskReads>\n",
h->m_slowDiskReads);
h->m_pingInfo.m_slowDiskReads);
sb.safePrintf("\t\t<docsIndexed>%"INT32""
"</docsIndexed>\n",
h->m_docsIndexed);
h->m_pingInfo.m_totalDocsIndexed);
sb.safePrintf("\t\t<percentMemUsed>%.1f%%"
"</percentMemUsed>",
h->m_percentMemUsed); // float
h->m_pingInfo.m_percentMemUsed); // float
sb.safePrintf("\t\t<cpuUsage>%.1f%%"
"</cpuUsage>",
@ -653,18 +755,36 @@ skipReplaceHost:
sb.safePrintf("\t\t\"gbVersion\":\"%s\",\n",vbuf);
sb.safePrintf("\t\t\"resends\":%"INT32",\n",
h->m_totalResends);
h->m_pingInfo.m_totalResends);
/*
sb.safePrintf("\t\t\"errorReplies\":%"INT32",\n",
h->m_errorReplies);
*/
sb.safePrintf("\t\t\"errorTryAgains\":%"INT32",\n",
h->m_etryagains);
h->m_pingInfo.m_etryagains);
sb.safePrintf("\t\t\"udpSlotsInUse\":%"INT32",\n",
h->m_pingInfo.m_udpSlotsInUse);
/*
sb.safePrintf("\t\t\"dgramsTo\":%"INT64",\n",
h->m_dgramsTo);
sb.safePrintf("\t\t\"dgramsFrom\":%"INT64",\n",
h->m_dgramsFrom);
*/
sb.safePrintf("\t\t\"numCorruptDiskReads\":%"INT32",\n"
,h->m_pingInfo.m_numCorruptDiskReads);
sb.safePrintf("\t\t\"numOutOfMems\":%"INT32",\n"
,h->m_pingInfo.m_numOutOfMems);
sb.safePrintf("\t\t\"numClosedSockets\":%"INT32",\n"
,h->m_pingInfo.
m_socketsClosedFromHittingLimit);
sb.safePrintf("\t\t\"numOutstandingSpiders\":%"INT32""
",\n"
,h->m_pingInfo.m_currentSpiders );
sb.safePrintf("\t\t\"splitTime\":%"INT32",\n",
splitTime);
@ -675,13 +795,13 @@ skipReplaceHost:
fb.getBufStart());
sb.safePrintf("\t\t\"slowDiskReads\":%"INT32",\n",
h->m_slowDiskReads);
h->m_pingInfo.m_slowDiskReads);
sb.safePrintf("\t\t\"docsIndexed\":%"INT32",\n",
h->m_docsIndexed);
h->m_pingInfo.m_totalDocsIndexed);
sb.safePrintf("\t\t\"percentMemUsed\":\"%.1f%%\",\n",
h->m_percentMemUsed); // float
h->m_pingInfo.m_percentMemUsed); // float
sb.safePrintf("\t\t\"cpuUsage\":\"%.1f%%\",\n",cpu);
@ -737,15 +857,17 @@ skipReplaceHost:
// resends
"<td>%"INT32"</td>"
// error replies
"<td>%"INT32"</td>"
//"<td>%"INT32"</td>"
// etryagains
"<td>%"INT32"</td>"
// # dgrams sent to
"<td>%"INT64"</td>"
//"<td>%"INT64"</td>"
// # dgrams recvd from
"<td>%"INT64"</td>"
//"<td>%"INT64"</td>"
// loadavg
//"<td>%.2f</td>"
@ -805,12 +927,14 @@ skipReplaceHost:
vbuf1,
vbuf,//hdbuf,
vbuf2,
h->m_totalResends,
h->m_errorReplies,
h->m_etryagains,
h->m_dgramsTo,
h->m_dgramsFrom,
h->m_pingInfo.m_totalResends,
// h->m_errorReplies,
h->m_pingInfo.m_etryagains,
// h->m_dgramsTo,
// h->m_dgramsFrom,
//h->m_loadAvg, // double
splitTime,
@ -818,11 +942,11 @@ skipReplaceHost:
fb.getBufStart(),//flagString,
h->m_slowDiskReads,
h->m_docsIndexed,
h->m_pingInfo.m_slowDiskReads,
h->m_pingInfo.m_totalDocsIndexed,
fontTagFront,
h->m_percentMemUsed, // float
h->m_pingInfo.m_percentMemUsed, // float
fontTagBack,
cpu, // float
diskUsageMsg,
@ -1136,17 +1260,19 @@ skipReplaceHost:
"</td>"
"</tr>\n"
/*
"<tr class=poo>"
"<td>errors recvd</td>"
"<td>How many errors were received from a host in response "
"to a request to retrieve or insert data."
"</td>"
"</tr>\n"
*/
"<tr class=poo>"
"<td>ETRYAGAINS recvd</td>"
"<td>How many ETRYAGAIN were received in response to a "
"<td>try agains recvd</td>"
"<td>How many ETRYAGAIN errors "
"were received in response to a "
"request to add data. Usually because the host's memory "
"is full and it is dumping its data to disk. This number "
"can be high if the host if failing to dump the data "
@ -1155,6 +1281,7 @@ skipReplaceHost:
"</td>"
"</tr>\n"
/*
"<tr class=poo>"
"<td>dgrams to</td>"
"<td>How many datagrams were sent to the host from the "
@ -1172,6 +1299,7 @@ skipReplaceHost:
"selected host since startup. Includes ACK datagrams."
"</td>"
"</tr>\n"
*/
"<tr class=poo>"
"<td>avg split time</td>"
@ -1244,7 +1372,7 @@ skipReplaceHost:
"<tr class=poo>"
"<td>ping1 age</td>"
"<td>How int32_t ago the last ping request was sent to "
"<td>How long ago the last ping request was sent to "
"this host. Let's us know how fresh the ping time is."
"</td>"
"</tr>\n"
@ -1310,6 +1438,40 @@ skipReplaceHost:
"</td>"
"</tr>\n"
"<tr class=poo>"
"<td>C (status flag)</td>"
"<td>Indicates # of corrupted disk reads."
"</td>"
"</tr>\n"
"<tr class=poo>"
"<td>K (status flag)</td>"
"<td>Indicates # of sockets closed from hitting limit."
"</td>"
"</tr>\n"
"<tr class=poo>"
"<td><nobr>O (status flag)</nobr></td>"
"<td>Indicates # of times we ran out of memory."
"</td>"
"</tr>\n"
"<tr class=poo>"
"<td><nobr>N (status flag)</nobr></td>"
"<td>Indicates host's clock is NOT in sync with host #0. "
"Gigablast should automatically sync on startup, "
"so this would be a problem "
"if it does not go away. Hosts need to have their clocks "
"in sync before they can add data to their index."
"</td>"
"</tr>\n"
"<tr class=poo>"
"<td><nobr>U (status flag)</nobr></td>"
"<td>Indicates the number of active UDP transactions "
"which are either intiating or receiving."
"</td>"
"</tr>\n"
,
TABLE_STYLE
@ -1349,11 +1511,11 @@ int32_t generatePingMsg( Host *h, int64_t nowms, char *buf ) {
sprintf(buf, "<font color=#ff0000><b>DEAD</b></font>");
}
// for kernel errors
else if ( h->m_kernelErrors > 0 ){
if ( h->m_kernelErrors == ME_IOERR )
else if ( h->m_pingInfo.m_kernelErrors > 0 ){
if ( h->m_pingInfo.m_kernelErrors == ME_IOERR )
sprintf(buf, "<font color=#ff0080><b>IOERR"
"</b></font>");
else if ( h->m_kernelErrors == ME_100MBPS )
else if ( h->m_pingInfo.m_kernelErrors == ME_100MBPS )
sprintf(buf, "<font color=#ff0080><b>100MBPS"
"</b></font>");
else
@ -1385,11 +1547,13 @@ int32_t generatePingMsg( Host *h, int64_t nowms, char *buf ) {
int defaultSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
PingInfo *p1 = &h1->m_pingInfo;
PingInfo *p2 = &h2->m_pingInfo;
// float up to the top if the host is reporting kernel errors
// even if the ping is normal
if ( h1->m_kernelErrors > 0 && h2->m_kernelErrors <= 0 ) return -1;
if ( h2->m_kernelErrors > 0 && h1->m_kernelErrors <= 0 ) return 1;
if ( h2->m_kernelErrors > 0 && h1->m_kernelErrors > 0 ) {
if ( p1->m_kernelErrors > 0 && p2->m_kernelErrors <= 0 ) return -1;
if ( p2->m_kernelErrors > 0 && p1->m_kernelErrors <= 0 ) return 1;
if ( p2->m_kernelErrors > 0 && p1->m_kernelErrors > 0 ) {
if ( h1->m_hostId < h2->m_hostId ) return -1;
return 1;
}
@ -1403,10 +1567,12 @@ int defaultSort ( const void *i1, const void *i2 ) {
int pingSort1 ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
PingInfo *p1 = &h1->m_pingInfo;
PingInfo *p2 = &h2->m_pingInfo;
// float up to the top if the host is reporting kernel errors
// even if the ping is normal
if ( h1->m_kernelErrors > 0 ) return -1;
if ( h2->m_kernelErrors > 0 ) return 1;
if ( p1->m_kernelErrors > 0 ) return -1;
if ( p2->m_kernelErrors > 0 ) return 1;
if ( h1->m_ping > h2->m_ping ) return -1;
if ( h1->m_ping < h2->m_ping ) return 1;
return 0;
@ -1415,10 +1581,12 @@ int pingSort1 ( const void *i1, const void *i2 ) {
int pingSort2 ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
PingInfo *p1 = &h1->m_pingInfo;
PingInfo *p2 = &h2->m_pingInfo;
// float up to the top if the host is reporting kernel errors
// even if the ping is normal
if ( h1->m_kernelErrors > 0 ) return -1;
if ( h2->m_kernelErrors > 0 ) return 1;
if ( p1->m_kernelErrors > 0 ) return -1;
if ( p2->m_kernelErrors > 0 ) return 1;
if ( h1->m_pingShotgun > h2->m_pingShotgun ) return -1;
if ( h1->m_pingShotgun < h2->m_pingShotgun ) return 1;
return 0;
@ -1435,14 +1603,18 @@ int pingMaxSort ( const void *i1, const void *i2 ) {
int slowDiskSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
if ( h1->m_slowDiskReads > h2->m_slowDiskReads ) return -1;
if ( h1->m_slowDiskReads < h2->m_slowDiskReads ) return 1;
PingInfo *p1 = &h1->m_pingInfo;
PingInfo *p2 = &h2->m_pingInfo;
if ( p1->m_slowDiskReads > p2->m_slowDiskReads ) return -1;
if ( p1->m_slowDiskReads < p2->m_slowDiskReads ) return 1;
return 0;
}
int pingAgeSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
//PingInfo *p1 = &h1->m_pingInfo;
//PingInfo *p2 = &h2->m_pingInfo;
if ( h1->m_lastPing > h2->m_lastPing ) return -1;
if ( h1->m_lastPing < h2->m_lastPing ) return 1;
return 0;
@ -1463,16 +1635,20 @@ int splitTimeSort ( const void *i1, const void *i2 ) {
int flagSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
if ( h1->m_flags > h2->m_flags ) return -1;
if ( h1->m_flags < h2->m_flags ) return 1;
PingInfo *p1 = &h1->m_pingInfo;
PingInfo *p2 = &h2->m_pingInfo;
if ( p1->m_flags > p2->m_flags ) return -1;
if ( p1->m_flags < p2->m_flags ) return 1;
return 0;
}
int resendsSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
if ( h1->m_totalResends > h2->m_totalResends ) return -1;
if ( h1->m_totalResends < h2->m_totalResends ) return 1;
if ( h1->m_pingInfo.m_totalResends > h2->m_pingInfo.m_totalResends )
return -1;
if ( h1->m_pingInfo.m_totalResends < h2->m_pingInfo.m_totalResends )
return 1;
return 0;
}
@ -1487,8 +1663,8 @@ int errorsSort ( const void *i1, const void *i2 ) {
int tryagainSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
if ( h1->m_etryagains > h2->m_etryagains ) return -1;
if ( h1->m_etryagains < h2->m_etryagains ) return 1;
if ( h1->m_pingInfo.m_etryagains>h2->m_pingInfo.m_etryagains)return -1;
if ( h1->m_pingInfo.m_etryagains<h2->m_pingInfo.m_etryagains)return 1;
return 0;
}
@ -1522,24 +1698,30 @@ int loadAvgSort ( const void *i1, const void *i2 ) {
int memUsedSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
if ( h1->m_percentMemUsed > h2->m_percentMemUsed ) return -1;
if ( h1->m_percentMemUsed < h2->m_percentMemUsed ) return 1;
PingInfo *p1 = &h1->m_pingInfo;
PingInfo *p2 = &h2->m_pingInfo;
if ( p1->m_percentMemUsed > p2->m_percentMemUsed ) return -1;
if ( p1->m_percentMemUsed < p2->m_percentMemUsed ) return 1;
return 0;
}
int cpuUsageSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
if ( h1->m_cpuUsage > h2->m_cpuUsage ) return -1;
if ( h1->m_cpuUsage < h2->m_cpuUsage ) return 1;
PingInfo *p1 = &h1->m_pingInfo;
PingInfo *p2 = &h2->m_pingInfo;
if ( p1->m_cpuUsage > p2->m_cpuUsage ) return -1;
if ( p1->m_cpuUsage < p2->m_cpuUsage ) return 1;
return 0;
}
int diskUsageSort ( const void *i1, const void *i2 ) {
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
if ( h1->m_diskUsage > h2->m_diskUsage ) return -1;
if ( h1->m_diskUsage < h2->m_diskUsage ) return 1;
PingInfo *p1 = &h1->m_pingInfo;
PingInfo *p2 = &h2->m_pingInfo;
if ( p1->m_diskUsage > p2->m_diskUsage ) return -1;
if ( p1->m_diskUsage < p2->m_diskUsage ) return 1;
return 0;
}

@ -119,10 +119,10 @@ bool sendPageIndexdb ( TcpSocket *s , HttpRequest *r ) {
st->m_docId = r->getLongLong ("d", 0LL );
st->m_score = r->getLong ("score", 0 );
// copy query/collection
memcpy ( st->m_query , query , queryLen );
gbmemcpy ( st->m_query , query , queryLen );
st->m_queryLen = queryLen;
st->m_query [ queryLen ] ='\0';
//memcpy ( st->m_coll , coll , collLen );
//gbmemcpy ( st->m_coll , coll , collLen );
//st->m_collLen = collLen;
//st->m_coll [ collLen ] ='\0';
st->m_coll = coll;
@ -237,14 +237,14 @@ loop:
int32_t ks;
if ( st->m_useDatedb ) {
memcpy ( startKey , &s16 , 16 );
memcpy ( endKey , &e16 , 16 );
gbmemcpy ( startKey , &s16 , 16 );
gbmemcpy ( endKey , &e16 , 16 );
rdbId = RDB_DATEDB;
ks = 16;
}
else {
memcpy ( startKey , &s12 , 12 );
memcpy ( endKey , &e12 , 12 );
gbmemcpy ( startKey , &s12 , 12 );
gbmemcpy ( endKey , &e12 , 12 );
rdbId = RDB_INDEXDB;
ks = 12;
}

@ -622,6 +622,29 @@ bool Msg7::inject ( void *state ,
m_injectUrlBuf.reset();
// by default append a -<ch64> to the provided url
m_injectUrlBuf.safePrintf("%s-%"UINT64"",u.getUrl(),ch64);
// HOWEVER, if an hasmime is true and an http:// follows
// the delimeter then use that as the url...
// this way we can specify our own urls.
char *du = start;
du += gbstrlen(delim);
if ( du && is_wspace_a ( *du ) ) du++;
if ( du && is_wspace_a ( *du ) ) du++;
if ( du && is_wspace_a ( *du ) ) du++;
if ( gr->m_hasMime &&
(strncasecmp( du,"http://",7) == 0 ||
strncasecmp( du,"https://",8) == 0 ) ) {
// find end of it
char *uend = du + 7;
for ( ; *uend && ! is_wspace_a(*uend) ; uend++ );
// inject that then
m_injectUrlBuf.reset();
m_injectUrlBuf.safeMemcpy ( du , uend - du );
m_injectUrlBuf.nullTerm();
// and point to the actual http mime then
start = uend;
}
}
// count them

@ -163,7 +163,7 @@ bool sendPageLogView ( TcpSocket *s , HttpRequest *r ) {
sprintf(filtbuf, "fs%"INT32"", i);
st->m_filterStr[i] = r->getString(filtbuf, &len, "");
if(len != 0) {
memcpy(st->m_lastPtr, st->m_filterStr[i], len);
gbmemcpy(st->m_lastPtr, st->m_filterStr[i], len);
st->m_filterStr[i] = st->m_lastPtr;
st->m_lastPtr += len;
*(st->m_lastPtr) = '\0';
@ -257,7 +257,7 @@ void gotRemoteLogWrapper(void *state, UdpSlot *slot) {
st->m_readBufSize + st->m_readBuf)
goto noRoom;
memcpy(st->m_lastPtr,
gbmemcpy(st->m_lastPtr,
nextLine + 1,
segSize);
st->m_readBufPtrs[st->m_numSlots] = st->m_lastPtr;

@ -583,7 +583,7 @@ bool PageNetTest::controls( TcpSocket *s, HttpRequest *r ) {
int32_t len = 0;
char *coll = r->getString( "c", &len );
memcpy( m_coll, coll, len );
gbmemcpy( m_coll, coll, len );
//int32_t ntnd = r->getLong( "ntnd", 0 );
//int32_t rcv = r->getLong( "ntrs", 0 );

@ -424,8 +424,8 @@ bool sendPageParser2 ( TcpSocket *s ,
//"<input type=checkbox name=xml value=1> "
"<select name=ctype>\n"
"<option value=%"INT32" selected>HTML</option>\n"
"<option value=%"INT32" selected>XML</option>\n"
"<option value=%"INT32" selected>JSON</option>\n"
"<option value=%"INT32">XML</option>\n"
"<option value=%"INT32">JSON</option>\n"
"</select>\n"
"</td>"

@ -87,10 +87,12 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
//get the 'path' part of the request.
char rbuf[1024];
if(r->getRequestLen() > 1023)
memcpy( rbuf, r->getRequest(), 1023);
else
memcpy( rbuf, r->getRequest(), r->getRequestLen());
if(r->getRequestLen() > 1023) {
gbmemcpy( rbuf, r->getRequest(), 1023);
}
else {
gbmemcpy( rbuf, r->getRequest(), r->getRequestLen());
}
char* rbufEnd = rbuf;
//skip GET
while (!isspace(*rbufEnd)) rbufEnd++;

@ -408,6 +408,9 @@ bool Msg1c::gotList ( ) {
m_sb.setLabel("reiadd");
State13 *st = (State13 *)m_state;
GigablastRequest *gr = &st->m_gr;
m_numDocIdsAdded = 0;
//int32_t count = 0;
// list consists of docIds, loop through each one
@ -432,7 +435,11 @@ bool Msg1c::gotList ( ) {
// url is a docid!
sprintf ( sr.m_url , "%"UINT64"" , docId );
// make a fake first ip
int32_t firstIp = (docId & 0xffffffff);
// use only 64k values so we don't stress doledb/waittrees/etc.
// for large #'s of docids
int32_t firstIp = (docId & 0x0000ffff);
// 0 is not a legit val. it'll core below.
if ( firstIp == 0 ) firstIp = 1;
// use a fake ip
sr.m_firstIp = firstIp;//nowGlobal;
// we are not really injecting...
@ -445,6 +452,9 @@ bool Msg1c::gotList ( ) {
sr.m_probDocId = docId;
// use test-parser not test-spider
sr.m_useTestSpiderDir = 0;
// now you can recycle content instead of re-downloading it
// for every docid
sr.m_recycleContent = gr->m_recycleContent;
// if this is zero we end up getting deduped in
// dedupSpiderList() if there was a SpiderReply whose
// spider time was > 0

@ -194,6 +194,15 @@ bool sendReply ( State0 *st , char *reply ) {
}
*/
// if we had a broken pipe from the browser while sending
// them the search results, then we end up closing the socket fd
// in TcpServer::sendChunk() > sendMsg() > destroySocket()
if ( s->m_numDestroys ) {
log("results: not sending back error on destroyed socket "
"sd=%"INT32"",s->m_sd);
return true;
}
int32_t status = 500;
if (savedErr == ETOOMANYOPERANDS ||
savedErr == EBADREQUEST ||
@ -1137,7 +1146,20 @@ bool gotResults ( void *state ) {
log("res: socket still in streaming mode. wtf?");
st->m_socket->m_streamingMode = false;
}
log("msg40: done streaming. nuking state.");
log("msg40: done streaming. nuking state=%"PTRFMT" q=%s. "
"msg20sin=%i msg20sout=%i sendsin=%i sendsout=%i "
"numrequests=%i numreplies=%i "
,(PTRTYPE)st
,si->m_q.m_orig
, msg40->m_numMsg20sIn
, msg40->m_numMsg20sOut
, msg40->m_sendsIn
, msg40->m_sendsOut
, msg40->m_numRequests
, msg40->m_numReplies
);
mdelete(st, sizeof(State0), "PageResults2");
delete st;
return true;
@ -2407,7 +2429,9 @@ bool printSearchResultsHeader ( State0 *st ) {
char c = term[qt->m_termLen];
term[qt->m_termLen] = '\0';
sb->safePrintf("\t\t\t<termStr><![CDATA[");
sb->cdataEncode(qt->m_term);
char *printTerm = qt->m_term;
if ( is_wspace_a(term[0])) printTerm++;
sb->cdataEncode(printTerm);
sb->safePrintf("]]>"
"</termStr>\n");
term[qt->m_termLen] = c;
@ -2419,7 +2443,7 @@ bool printSearchResultsHeader ( State0 *st ) {
sb->safePrintf("\t\t\t<termLang>"
"<![CDATA[");
bool first = true;
for ( int i = 0 ; i <= MAXLANGID ; i++ ) {
for ( int i = 0 ; i < langLast ; i++ ) {
uint64_t bit = (uint64_t)1 << i;
if ( ! (qt->m_langIdBits&bit))continue;
char *str = getLangAbbr(i);
@ -2434,15 +2458,24 @@ bool printSearchResultsHeader ( State0 *st ) {
char *term = sq->m_term;
char c = term[sq->m_termLen];
term[sq->m_termLen] = '\0';
char *printTerm = term;
if ( is_wspace_a(term[0])) printTerm++;
sb->safePrintf("\t\t\t<synonymOf>"
"<![CDATA[%s]]>"
"</synonymOf>\n"
,sq->m_term);
,printTerm);
term[sq->m_termLen] = c;
}
int64_t tf = msg40->m_msg3a.m_termFreqs[i];
sb->safePrintf("\t\t\t<termFreq>%"INT64"</termFreq>\n"
,tf);
sb->safePrintf("\t\t\t<termHash48>%"INT64"</termHash48>\n"
,qt->m_termId);
sb->safePrintf("\t\t\t<termHash64>%"UINT64"</termHash64>\n"
,qt->m_rawTermId);
QueryWord *qw = qt->m_qword;
sb->safePrintf("\t\t\t<prefixHash64>%"UINT64"</prefixHash64>\n"
,qw->m_prefixHash);
sb->safePrintf("\t\t</term>\n");
}
sb->safePrintf("\t</queryInfo>\n");
@ -2480,7 +2513,7 @@ bool printSearchResultsHeader ( State0 *st ) {
// language map from wiktionary
sb->safePrintf("\t\t\"termLang\":\"");
bool first = true;
for ( int i = 0 ; i <= MAXLANGID ; i++ ) {
for ( int i = 0 ; i < langLast ; i++ ) {
uint64_t bit = (uint64_t)1 << i;
if ( ! (qt->m_langIdBits&bit))continue;
char *str = getLangAbbr(i);
@ -2501,8 +2534,19 @@ bool printSearchResultsHeader ( State0 *st ) {
term[sq->m_termLen] = c;
}
int64_t tf = msg40->m_msg3a.m_termFreqs[i];
sb->safePrintf("\t\t\"termFreq\":%"INT64"\n"
sb->safePrintf("\t\t\"termFreq\":%"INT64",\n"
,tf);
sb->safePrintf("\t\t\"termHash48\":%"INT64",\n"
,qt->m_termId);
sb->safePrintf("\t\t\"termHash64\":%"UINT64",\n"
,qt->m_rawTermId);
// don't end last query term attr on a omma
QueryWord *qw = qt->m_qword;
sb->safePrintf("\t\t\"prefixHash64\":%"UINT64"\n"
,qw->m_prefixHash);
sb->safePrintf("\t}");
if ( i + 1 < q->m_numTerms )
sb->pushChar(',');
@ -3102,6 +3146,12 @@ bool printSearchResultsTail ( State0 *st ) {
msg40->printFacetTables ( sb );
if ( st->m_header ) sb->safePrintf("}\n");
//////////////////////
// for some reason if we take too long to write out this
// tail we get a SIGPIPE on a firefox browser.
//////////////////////
// all done for json
return true;
}
@ -3188,11 +3238,19 @@ bool printSearchResultsTail ( State0 *st ) {
SafeBuf newUrl4;
replaceParm2 ( "s=0", &newUrl4 , newUrl3.getBufStart(),
newUrl3.length());
// show errors
SafeBuf newUrl5;
replaceParm2 ( "showerrors=1",
&newUrl5 ,
newUrl4.getBufStart(),
newUrl4.length());
sb->safePrintf("<center>"
"<i>"
"%"INT32" results were omitted because they "
"were considered duplicates, banned, <br>"
"were considered duplicates, banned, errors "
"<br>"
"or "
"from the same site as other results. "
"<a href=%s>Click here to show all results</a>."
@ -3200,7 +3258,7 @@ bool printSearchResultsTail ( State0 *st ) {
"</center>"
"<br><br>"
, msg40->m_omitCount
, newUrl4.getBufStart() );
, newUrl5.getBufStart() );
}
@ -3795,7 +3853,12 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
// . sometimes the msg20reply is NULL so prevent it coring
// . i think this happens if all hosts in a shard are down or timeout
// or something
if ( ! mr ) return false;
if ( ! mr ) {
sb->safePrintf("<i>getting summary for docid %"INT64" had "
"error: %s</i><br><br>"
,d,mstrerror(m20->m_errno));
return true;
}
// . if section voting info was request, display now, it's in json
// . so if in csv it will mess things up!!!
@ -5174,12 +5237,12 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
char dbuf [ MAX_URL_LEN ];
int32_t dlen = uu.getDomainLen();
if ( si->m_format == FORMAT_HTML ) {
memcpy ( dbuf , uu.getDomain() , dlen );
gbmemcpy ( dbuf , uu.getDomain() , dlen );
dbuf [ dlen ] = '\0';
// newspaperarchive urls have no domain
if ( dlen == 0 ) {
dlen = uu.getHostLen();
memcpy ( dbuf , uu.getHost() , dlen );
gbmemcpy ( dbuf , uu.getHost() , dlen );
dbuf [ dlen ] = '\0';
}
}
@ -5203,7 +5266,9 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
un = "UN";
banVal = 0;
}
sb->safePrintf("<br>"
// don't put on a separate line because then it is too
// easy to mis-click on it
sb->safePrintf(//"<br>"
" <a style=color:green; href=\"/admin/tagdb?"
"user=admin&"
"tagtype0=manualban&"
@ -5218,7 +5283,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
, dbuf );
//banSites->safePrintf("%s+", dbuf);
dlen = uu.getHostLen();
memcpy ( dbuf , uu.getHost() , dlen );
gbmemcpy ( dbuf , uu.getHost() , dlen );
dbuf [ dlen ] = '\0';
sb->safePrintf(" - "
" <a style=color:green; href=\"/admin/tagdb?"
@ -5755,7 +5820,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
else if ( si->m_format == FORMAT_HTML && si->m_doSiteClustering ) {
char hbuf [ MAX_URL_LEN ];
int32_t hlen = uu.getHostLen();
memcpy ( hbuf , uu.getHost() , hlen );
gbmemcpy ( hbuf , uu.getHost() , hlen );
hbuf [ hlen ] = '\0';
sb->safePrintf (" - <nobr><a href=\"/search?"
"q=%%2Bsite%%3A%s+%s&sc=0&c=%s\">"
@ -6326,9 +6391,9 @@ bool printPairScore ( SafeBuf *sb , SearchInput *si , PairScore *ps ,
sb->safePrintf("<td id=tf>%"INT64" <font color=magenta>"
"%.02f</font></td>",
tf1,tfw1);
// insamewikiphrase?
sb->safePrintf("<td>%s %"INT32"/%.01f</td>",
wp,ps->m_qdist,wiw);
// inSamePhraseId distInQuery phraseWeight
sb->safePrintf("<td>%s</td><td>%"INT32"</td><td>%.01f</td>"
,wp,ps->m_qdist,wiw);
// end the row
sb->safePrintf("</tr>");
//
@ -6407,9 +6472,9 @@ bool printPairScore ( SafeBuf *sb , SearchInput *si , PairScore *ps ,
sb->safePrintf("<td id=tf>%"INT64" <font color=magenta>"
"%.02f</font></td>",
tf2,tfw2);
// insamewikiphrase?
sb->safePrintf("<td>%s/%"INT32" %.01f</td>",
wp,ps->m_qdist,wiw);
// inSamePhraseId distInQuery phraseWeight
sb->safePrintf("<td>%s</td><td>%"INT32"</td><td>%.01f</td>"
,wp,ps->m_qdist,wiw);
// end the row
sb->safePrintf("</tr>");
sb->safePrintf("<tr><td ");
@ -6548,8 +6613,11 @@ bool printScoresHeader ( SafeBuf *sb ) {
//"<td>diversityRank</td>"
"<td>density</td>"
"<td>spam</td>"
"<td>inlnkPR</td>" // nlinkSiteRank</td>"
"<td>inlinkPR</td>" // nlinkSiteRank</td>"
"<td>termFreq</td>"
"<td>inSamePhrase</td>"
"<td>distInQuery</td>"
"<td>phraseWeight</td>"
"</tr>\n"
);
return true;
@ -9258,6 +9326,13 @@ bool replaceParm2 ( char *cgi , SafeBuf *newUrl ,
goto tryagain;
}
// fix &s= replaceing &sb=
if ( found && found[cgiLen] != '=' ) {
// try again
p = found + 1;
goto tryagain;
}
// if no collision, just append it
if ( ! found ) {

@ -565,7 +565,7 @@ bool expandHtml ( SafeBuf& sb,
//p += gbstrlen ( p );
sb.safePrintf("<input type=hidden name=c "
"value=\"");
//memcpy ( p , coll , collLen );
//gbmemcpy ( p , coll , collLen );
//p += collLen;
sb.safeMemcpy(coll, collLen);
//sprintf ( p , "\">\n");
@ -2155,7 +2155,7 @@ bool printTopDirectory ( SafeBuf& sb , char format ) {
//if (pend - p <= topListLen+1)
// return p;
// copy it in
//memcpy(p, topList, topListLen);
//gbmemcpy(p, topList, topListLen);
//p += topListLen;
//*p = '\0';
//return p;
@ -2394,7 +2394,7 @@ bool sendPageAddUrl ( TcpSocket *sock , HttpRequest *hr ) {
// save the "ufu" (url of file of urls)
st1->m_ufu[0] = '\0';
st1->m_ufuLen = ufuLen;
memcpy ( st1->m_ufu , ufu , ufuLen );
gbmemcpy ( st1->m_ufu , ufu , ufuLen );
st1->m_ufu[ufuLen] = '\0';
st1->m_doTuringTest = cr->m_doTuringTest;
@ -2942,6 +2942,11 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
// cr->m_coll);
char *qc = "demo";
char *host = "http://www.gigablast.com";
// for debug make it local on laptop
host = "";
sb.safePrintf(
"<br>"
"<table width=650px cellpadding=5 cellspacing=0 border=0>"
@ -2962,30 +2967,30 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
"<th><font color=33dcff>Description</font></th>"
"</tr>"
"<tr> "
"<td><a href=/search?q=cat+dog>cat dog</a></td>"
"<td><a href=%s/search?c=%s&q=cat+dog>cat dog</a></td>"
" <td>Search results have the word <em>cat</em> and the word <em>dog</em> "
" in them. They could also have <i>cats</i> and <i>dogs</i>.</td>"
" </tr>"
""
""
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=%%2Bcat>+cat</a></td>"
" <td><a href=%s/search?c=%s&q=%%2Bcat>+cat</a></td>"
" <td>Search results have the word <em>cat</em> in them. If the search results has the word <i>cats</i> then it will not be included. The plus sign indicates an exact match and not to use synonyms, hypernyms or hyponyms or any other form of the word.</td>"
" </tr>"
""
""
" <tr> "
" <td height=10><a href=/search?q=mp3+%%22take+five%%22>mp3&nbsp;\"take&nbsp;five\"</a></td>"
" <td height=10><a href=%s/search?c=%s&q=mp3+%%22take+five%%22>mp3&nbsp;\"take&nbsp;five\"</a></td>"
" <td>Search results have the word <em>mp3</em> and the exact phrase <em>take "
" five</em> in them.</td>"
" </tr>"
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=%%22john+smith%%22+-%%22bob+dole%%22>\"john&nbsp;smith\"&nbsp;-\"bob&nbsp;dole\"</a></td>"
" <td><a href=%s/search?c=%s&q=%%22john+smith%%22+-%%22bob+dole%%22>\"john&nbsp;smith\"&nbsp;-\"bob&nbsp;dole\"</a></td>"
" <td>Search results have the phrase <em>john smith</em> but NOT the "
" phrase <em>bob dole</em> in them.</td>"
" </tr>"
" <tr> "
" <td><a href=/search?q=bmx+-game>bmx&nbsp;-game</a></td>"
" <td><a href=%s/search?c=%s&q=bmx+-game>bmx&nbsp;-game</a></td>"
" <td>Search results have the word <em>bmx</em> but not <em>game</em>.</td>"
" </tr>"
// " <tr> "
@ -3063,6 +3068,17 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
// ""
// ""
, GOLD
, host
, qc
, host
, qc
, host
, qc
, host
, qc
, host
, qc
);
@ -3076,19 +3092,25 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
//"<td><font color=33dcff><b>Description</b></font></td>"
// "</tr>"
"<tr bgcolor=#E1FFFF>"
"<td>cat | dog</td><td>"
"<td><a href=%s/search?c=%s&q=cat|dog>cat | dog</a>"
"</td><td>"
"Match documents that have cat and dog in them, but "
"do not allow cat to affect the ranking score, only "
"dog. This is called a <i>query refinement</i>."
"</td></tr>\n"
"<tr bgcolor=#ffFFFF>"
"<td>document.title:paper</td><td>"
"<td><a href=%s/search?c=%s&q=document.title:paper>"
"document.title:paper</a></td><td>"
"That query will match a JSON document like "
"<i>"
"{ \"document\":{\"title\":\"This is a good paper.\" "
"}}</i> or, alternatively, an XML document like <i>"
, host
, qc
, host
, qc
);
sb.htmlEncode("<document><title>This is a good paper"
@ -3141,8 +3163,11 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
// fix table internal cell bordering
if ( ! d || d[0] == '\0' ) d = "&nbsp;";
sb.safePrintf("<tr bgcolor=%s>"
"<td><nobr><a href=\"/search?q="
, bgcolor );
"<td><nobr><a href=\"%s/search?c=%s&q="
, bgcolor
, host
, qc
);
sb.urlEncode ( f->example );
sb.safePrintf("\">");
sb.safePrintf("%s</a></nobr></td>"
@ -3194,17 +3219,17 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
" </td>"
" </tr>"
" <tr> "
" <td><a href=/search?q=cat+AND+dog>cat&nbsp;AND&nbsp;dog</a></td>"
" <td><a href=%s/search?c=%s&q=cat+AND+dog>cat&nbsp;AND&nbsp;dog</a></td>"
" <td>Search results have the word <em>cat</em> AND the word <em>dog</em> "
" in them.</td>"
" </tr>"
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=cat+OR+dog>cat&nbsp;OR&nbsp;dog</a></td>"
" <td><a href=%s/search?c=%s&q=cat+OR+dog>cat&nbsp;OR&nbsp;dog</a></td>"
" <td>Search results have the word <em>cat</em> OR the word <em>dog</em> "
" in them, but preference is given to results that have both words.</td>"
" </tr>"
" <tr> "
" <td><a href=/search?q=cat+dog+OR+pig>cat&nbsp;dog&nbsp;OR&nbsp;pig</a></td>"
" <td><a href=%s/search?c=%s&q=cat+dog+OR+pig>cat&nbsp;dog&nbsp;OR&nbsp;pig</a></td>"
" <td>Search results have the two words <em>cat</em> and <em>dog</em> "
" OR search results have the word <em>pig</em>, but preference is "
" given to results that have all three words. This illustrates how "
@ -3212,26 +3237,26 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
" to be true.</td>"
" </tr>"
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=%%22cat+dog%%22+OR+pig>\"cat&nbsp;dog\"&nbsp;OR&nbsp;pig</a></td>"
" <td><a href=%s/search?c=%s&q=%%22cat+dog%%22+OR+pig>\"cat&nbsp;dog\"&nbsp;OR&nbsp;pig</a></td>"
" <td>Search results have the phrase <em>\"cat dog\"</em> in them OR they "
" have the word <em>pig</em>, but preference is given to results that "
" have both.</td>"
" </tr>"
" <tr> "
" <td><a href=/search?q=title%%3A%%22cat+dog%%22+OR+pig>title</a><a href=/search?q=title%%3A%%22cat+dog%%22+OR+pig>:\"cat "
" <td><a href=%s/search?c=%s&q=title%%3A%%22cat+dog%%22+OR+pig>title</a><a href=%s/search?c=%s&q=title%%3A%%22cat+dog%%22+OR+pig>:\"cat "
" dog\" OR pig</a></td>"
" <td>Search results have the phrase <em>\"cat dog\"</em> in their title "
" OR they have the word <em>pig</em>, but preference is given to results "
" that have both.</td>"
" </tr>"
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=cat+OR+dog+OR+pig>cat&nbsp;OR&nbsp;dog&nbsp;OR&nbsp;pig</a></td>"
" <td><a href=%s/search?c=%s&q=cat+OR+dog+OR+pig>cat&nbsp;OR&nbsp;dog&nbsp;OR&nbsp;pig</a></td>"
" <td>Search results need only have one word, <em>cat</em> or <em>dog</em> "
" or <em>pig</em>, but preference is given to results that have the "
" most of the words.</td>"
" </tr>"
" <tr> "
" <td><a href=/search?q=cat+OR+dog+AND+pig>cat&nbsp;OR&nbsp;dog&nbsp;AND&nbsp;pig</a></td>"
" <td><a href=%s/search?c=%s&q=cat+OR+dog+AND+pig>cat&nbsp;OR&nbsp;dog&nbsp;AND&nbsp;pig</a></td>"
" <td>Search results have <em>dog</em> and <em>pig</em>, but they may "
" or may not have <em>cat</em>. Preference is given to results that "
" have all three. To evaluate expressions with more than two operands, "
@ -3245,11 +3270,11 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
" expressions with more than one boolean operator.</td>"
" </tr>"
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=cat+AND+NOT+dog>cat&nbsp;AND&nbsp;NOT&nbsp;dog</a></td>"
" <td><a href=%s/search?c=%s&q=cat+AND+NOT+dog>cat&nbsp;AND&nbsp;NOT&nbsp;dog</a></td>"
" <td>Search results have <em>cat</em> but do not have <em>dog</em>.</td>"
" </tr>"
" <tr> "
" <td><a href=/search?q=cat+AND+NOT+%%28dog+OR+pig%%29>cat&nbsp;AND&nbsp;NOT&nbsp;(dog&nbsp;OR&nbsp;pig)</a></td>"
" <td><a href=%s/search?c=%s&q=cat+AND+NOT+%%28dog+OR+pig%%29>cat&nbsp;AND&nbsp;NOT&nbsp;(dog&nbsp;OR&nbsp;pig)</a></td>"
" <td>Search results have <em>cat</em> but do not have <em>dog</em> "
" and do not have <em>pig</em>. When evaluating a boolean expression "
" that contains ()'s you can evaluate the sub-expression in the ()'s "
@ -3260,7 +3285,7 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
" AND false = false</em>. Does anyone actually read this far?</td>"
" </tr>"
" <tr bgcolor=#E1FFFF> "
" <td><a href=/search?q=%%28cat+OR+dog%%29+AND+NOT+%%28cat+AND+dog%%29>(cat&nbsp;OR&nbsp;dog)&nbsp;AND&nbsp;NOT&nbsp;(cat&nbsp;AND&nbsp;dog)</a></td>"
" <td><a href=%s/search?c=%s&q=%%28cat+OR+dog%%29+AND+NOT+%%28cat+AND+dog%%29>(cat&nbsp;OR&nbsp;dog)&nbsp;AND&nbsp;NOT&nbsp;(cat&nbsp;AND&nbsp;dog)</a></td>"
" <td>Search results have <em>cat</em> or <em>dog</em> but not both.</td>"
" </tr>"
" <tr> "
@ -3279,6 +3304,28 @@ bool sendPageHelp ( TcpSocket *sock , HttpRequest *hr ) {
//"</table>"
//"<br>"
, GOLD
, host
, qc
, host
, qc
, host
, qc
, host
, qc
, host
, qc
, host
, qc
, host
, qc
, host
, qc
, host
, qc
, host
, qc
, host
, qc
);

@ -144,22 +144,32 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
"<td>%"INT64"</td></tr>\n"
"<tr class=poo><td>max single alloc by</td>"
"<td>%s</td></tr>\n"
"<tr class=poo><td>shared mem used</td>"
"<td>%"INT64"</td></tr>\n"
// "<tr class=poo><td>shared mem used</td>"
// "<td>%"INT64"</td></tr>\n"
"<tr class=poo><td># out of memory errors</td>"
"<td>%"INT32"</td></tr>\n"
"<tr class=poo><td>swaps</td>"
"<td>%"INT64"</td></tr>\n"
"<tr class=poo><td>"
"collections swapped out"
"</td>"
"<td>%"INT32"</td></tr>\n"
// "<tr class=poo><td>"
// "collections swapped out"
// "</td>"
// "<td>%"INT32"</td></tr>\n"
,
//"<tr class=poo><td>num alloc chunks</td>
//<td>%"INT32"</td></tr>\n",
g_mem.getMaxAlloc(),
g_mem.getMaxAllocBy() ,
g_mem.m_sharedUsed,
(int64_t)ru.ru_nswap,// idrss,
g_collectiondb.m_numCollsSwappedOut
//g_mem.m_sharedUsed,
g_mem.m_outOfMems,
(int64_t)ru.ru_nswap// idrss,
//g_collectiondb.m_numCollsSwappedOut
);
p.safePrintf (
"<tr class=poo><td><b>current allocations</b>"

@ -695,7 +695,7 @@ time_t genDate( char *date, int32_t dateLen ) {
char tmp[18];
char *p = tmp;
memcpy( p, date, dateLen );
gbmemcpy( p, date, dateLen );
p[2] = '\0';
p[5] = '\0';
@ -724,7 +724,7 @@ time_t genDate( char *date, int32_t dateLen ) {
else if ( !nowDST.tm_isdst && resultDST.tm_isdst )
tmBuild.tm_hour--;
memcpy( p, date, dateLen );
gbmemcpy( p, date, dateLen );
p[16] = '\0';
log ( LOG_DEBUG, "stats: user string [%s]", p );
log ( LOG_DEBUG, "stats: user provided time [%s]", ctime( &result ) );

@ -1460,5 +1460,5 @@ void adjustContentLength ( SafeBuf *rb ) {
char ttt[32];
int32_t toPrint = sprintf(ttt,format,clen);
// just copy it over, padded with zeroes
memcpy ( f , ttt , toPrint );
gbmemcpy ( f , ttt , toPrint );
}

@ -158,7 +158,7 @@ bool sendPageTurkStats ( TcpSocket *s , HttpRequest *r ) {
char *coll = cr->m_coll;
int32_t collLen = gbstrlen ( coll );
memcpy ( st->m_coll , coll , collLen );
gbmemcpy ( st->m_coll , coll , collLen );
st->m_coll [ collLen ] = '\0';
st->m_collLen=collLen;
@ -1880,7 +1880,7 @@ bool sendPageTurk ( TcpSocket *s , HttpRequest *r ) {
// get collection name and its length
char *coll = cr->m_coll;
int32_t collLen = gbstrlen ( coll );
memcpy ( st->m_coll , coll , collLen );
gbmemcpy ( st->m_coll , coll , collLen );
st->m_coll [ collLen ] = '\0';
st->m_collLen=collLen;

@ -201,6 +201,7 @@ static WebPage s_pages[] = {
sendPageCloneColl , 0 ,NULL,NULL,
PG_MASTERADMIN|PG_ACTIVE},
// let's replace this with query reindex for the most part
{ PAGE_REPAIR , "admin/rebuild" , 0 , "rebuild" , 1 , 0 ,
"rebuild data",
//USER_MASTER ,
@ -272,6 +273,7 @@ static WebPage s_pages[] = {
sendPageAutoban , 0 ,NULL,NULL,
PG_NOAPI|PG_MASTERADMIN},
// deactivate until works on 64-bit... mdw 12/14/14
{ PAGE_PROFILER , "admin/profiler" , 0 , "profiler" , 0 ,M_POST,
//USER_MASTER ,
"profiler",
@ -2373,6 +2375,10 @@ bool Pages::printAdminLinks ( SafeBuf *sb,
// visible window... so just try 1000px max
sb->safePrintf("<div style=max-width:800px;>");
// int arch = 32;
// if ( __WORDSIZE == 64 ) arch = 64;
// if ( __WORDSIZE == 128 ) arch = 128;
//int32_t matt1 = atoip ( MATTIP1 , gbstrlen(MATTIP1) );
//int32_t matt2 = atoip ( MATTIP2 , gbstrlen(MATTIP2) );
for ( int32_t i = PAGE_BASIC_SETTINGS ; i < s_numPages ; i++ ) {
@ -2393,6 +2399,10 @@ bool Pages::printAdminLinks ( SafeBuf *sb,
if ( ! g_conf.m_isMattWells && i == PAGE_AUTOBAN )
continue;
// profiler.cpp only works for 32-bit elf headers right now
//if ( i == PAGE_PROFILER && arch != 32 )
// continue;
// is this page basic?
bool pageBasic = false;
if ( i >= PAGE_BASIC_SETTINGS &&
@ -2403,6 +2413,7 @@ bool Pages::printAdminLinks ( SafeBuf *sb,
// under the advanced menu...
if ( isBasic != pageBasic ) continue;
// ignore these for now
//if ( i == PAGE_SECURITY ) continue;
if ( i == PAGE_ACCESS ) continue;
@ -3901,6 +3912,7 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
return (bool)adds;
}
bool printedMaster = false;
if ( g_conf.m_masterPwds.length() == 0 &&
g_conf.m_connectIps.length() == 0 ) {
if ( adds ) mb->safePrintf("<br>");
@ -3913,11 +3925,16 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
"table. Right now anybody might be able "
"to access the Gigablast admin controls.");
mb->safePrintf("%s",boxEnd);
printedMaster = true;
}
CollectionRec *cr = g_collectiondb.getRec ( hr );
char *coll = "";
if ( cr ) coll = cr->m_coll;
if ( cr &&
! printedMaster &&
g_conf.m_useCollectionPasswords &&
cr->m_collectionPasswords.length() == 0 &&
cr->m_collectionIps.length() == 0 ) {
@ -3926,10 +3943,14 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
mb->safePrintf("%s",box);
mb->safePrintf("URGENT. Please specify a COLLECTION password "
"or IP address in the "
"<a href=/admin/collectionpasswords>"
"<a href=/admin/collectionpasswords?c=%s>"
"password</a> "
"table. Right now anybody might be able "
"to access the Gigablast admin controls.");
"to access the Gigablast admin controls "
"for the <b>%s</b> collection."
, cr->m_coll
, cr->m_coll
);
mb->safePrintf("%s",boxEnd);
}
@ -3937,7 +3958,7 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
int32_t out = 0;
for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
Host *h = &g_hostdb.m_hosts[i];
if ( h->m_diskUsage < 98.0 ) continue;
if ( h->m_pingInfo.m_diskUsage < 98.0 ) continue;
out++;
}
if ( out > 0 ) {
@ -3947,8 +3968,8 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
if ( out == 1 ) s = " is";
mb->safePrintf("%s",box);
mb->safePrintf("%"INT32" host%s over 98%% disk usage. "
"See the <a href=/admin/hosts>"
"hosts</a> table.",out,s);
"See the <a href=/admin/hosts?c=%s>"
"hosts</a> table.",out,s,coll);
mb->safePrintf("%s",boxEnd);
}
@ -3958,7 +3979,8 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
// count if not dead
Host *h1 = &g_hostdb.m_hosts[i-1];
Host *h2 = &g_hostdb.m_hosts[i];
if (!strcmp(h1->m_gbVersionStrBuf,h2->m_gbVersionStrBuf))
if (!strcmp(h1->m_pingInfo.m_gbVersionStr,
h2->m_pingInfo.m_gbVersionStr))
continue;
sameVersions = false;
break;
@ -3968,11 +3990,34 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
adds++;
mb->safePrintf("%s",box);
mb->safePrintf("One or more hosts have different gb versions. "
"See the <a href=/admin/hosts>hosts</a> "
"table.");
"See the <a href=/admin/hosts?c=%s>hosts</a> "
"table.",coll);
mb->safePrintf("%s",boxEnd);
}
int jammedHosts = 0;
for ( int32_t i = 1 ; i < g_hostdb.getNumHosts() ; i++ ) {
Host *h = &g_hostdb.m_hosts[i];
if ( g_hostdb.isDead( h ) ) continue;
if ( h->m_pingInfo.m_udpSlotsInUse >= 400 ) jammedHosts++;
}
if ( jammedHosts > 0 ) {
if ( adds ) mb->safePrintf("<br>");
adds++;
char *s = "s are";
if ( out == 1 ) s = " is";
mb->safePrintf("%s",box);
mb->safePrintf("%"INT32" host%s jammed with "
"over %"INT32" outstanding "
"udp transactions. "
"See <a href=/admin/sockets?c=%s>sockets</a>"
" table.",jammedHosts,s,400,coll);
mb->safePrintf("%s",boxEnd);
}
if ( g_pingServer.m_hostsConfInDisagreement ) {
@ -4050,8 +4095,8 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
mb->safePrintf("%s",box);
mb->safePrintf("%"INT32" %s dead and not responding to "
"pings. See the "
"<a href=/admin/host>hosts table</a>.",
ps->m_numHostsDead ,s );
"<a href=/admin/host?c=%s>hosts table</a>.",
ps->m_numHostsDead ,s ,coll);
mb->safePrintf("%s",boxEnd);
}
@ -4062,7 +4107,8 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
mb->safePrintf("All Threads are disabled. "
"Might hurt performance for doing system "
"calls which call 3rd party executables and "
"can take a int32_t time to run, like pdf2html.");
"can take a int32_t time to run, "
"like pdf2html.");
mb->safePrintf("%s",boxEnd);
}
@ -4084,7 +4130,8 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
"Might hurt performance because these "
"are calls to "
"3rd party executables and "
"can take a int32_t time to run, like pdf2html.");
"can take a int32_t time to run, "
"like pdf2html.");
mb->safePrintf("%s",boxEnd);
}

209
Parms.cpp

@ -171,6 +171,9 @@ bool CommandUpdateSiteList ( char *rec ) {
// . we'll show it in a special msg box on all admin pages if required
bool CommandRebalance ( char *rec ) {
g_rebalance.m_userApproved = true;
// force this to on so it goes through
g_rebalance.m_numForeignRecs = 1;
g_rebalance.m_needsRebalanceValid = false;
return true;
}
@ -752,7 +755,7 @@ bool CommandReloadLanguagePages ( char *rec ) {
}
bool CommandClearKernelError ( char *rec ) {
g_hostdb.m_myHost->m_kernelErrors = 0;
g_hostdb.m_myHost->m_pingInfo.m_kernelErrors = 0;
return true;
}
@ -1179,10 +1182,8 @@ bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r ) {
if ( ! g_conf.m_allowCloudUsers &&
! isMasterAdmin &&
! isCollAdmin ) {
return g_httpServer.sendDynamicPage (s,
"",
0);
char *msg = "NO PERMISSION";
return g_httpServer.sendDynamicPage (s, msg,gbstrlen(msg));
}
//
@ -2628,9 +2629,12 @@ bool Parms::printParm ( SafeBuf* sb,
// }
// }
if ( m->m_flags & PF_TEXTAREA ) {
int rows = 10;
if ( m->m_flags & PF_SMALLTEXTAREA )
rows = 4;
sb->safePrintf ("<textarea id=tabox "
"name=%s rows=10 cols=80>",
cgi);
"name=%s rows=%i cols=80>",
cgi,rows);
//sb->dequote ( s , gbstrlen(s) );
// note it
//log("hack: %s",sx->getBufStart());
@ -2700,8 +2704,8 @@ bool Parms::printParm ( SafeBuf* sb,
strncpy ( s, "00:00", 5 );
char hr[3];
char min[3];
memcpy ( hr, s, 2 );
memcpy ( min, s + 3, 2 );
gbmemcpy ( hr, s, 2 );
gbmemcpy ( min, s + 3, 2 );
hr[2] = '\0';
min[2] = '\0';
// print the time in the input forms
@ -3009,7 +3013,7 @@ bool Parms::removeParm ( int32_t i , int32_t an , char *THIS ) {
// how much to bury it with
int32_t size = (num - an - 1 ) * m->m_size ;
// bury it
memcpy ( dst , src , size );
gbmemcpy ( dst , src , size );
// and detach the buf on the tail so it doesn't core in Mem.cpp
// when it tries to free...
@ -3236,7 +3240,7 @@ void Parms::setParm ( char *THIS , Parm *m , int32_t mm , int32_t j , char *s ,
memcmp ( dst , s , len ) == 0 )
return;
// this means that we can not use string POINTERS as parms!!
if ( ! isHtmlEncoded ) memcpy ( dst , s , len );
if ( ! isHtmlEncoded ) {gbmemcpy ( dst , s , len ); }
else len = htmlDecode (dst , s,len,false,0);
dst[len] = '\0';
// . might have to set length
@ -3376,7 +3380,7 @@ void Parms::setToDefault ( char *THIS , char objType , CollectionRec *argcr ) {
if ( ! argcr ) { char *xx=NULL;*xx=0; }
char *def = m->m_defOff+(char *)argcr;
char *dst = (char *)THIS + m->m_off;
memcpy ( dst , def , m->m_size );
gbmemcpy ( dst , def , m->m_size );
continue;
}
// leave arrays empty, set everything else to default
@ -3896,10 +3900,10 @@ skip2:
d++;
}
if ( ! *d ) last = d;
//memcpy ( p , "# " , 2 );
//gbmemcpy ( p , "# " , 2 );
//p += 2;
sb.safeMemcpy("# ",2);
//memcpy ( p , start , last - start );
//gbmemcpy ( p , start , last - start );
//p += last - start;
sb.safeMemcpy(start,last-start);
//*p++='\n';
@ -4349,7 +4353,7 @@ bool Parms::serializeConfParm( Parm *m, int32_t i, char **p, char *end,
// copy the parm's whole value
if ( sp->val + size > end )
return true; // overflow
memcpy( sp->val,
gbmemcpy( sp->val,
(char *)&g_conf + m->m_off, size );
// inc by tot size if array
*p += sizeof( *sp ) + size;
@ -4463,7 +4467,7 @@ bool Parms::serializeCollParm( CollectionRec *cr,
// copy whole value
if ( sp->val + size > end )
return true;
memcpy( sp->val,
gbmemcpy( sp->val,
(char *)cr + m->m_off,
size );
// inc by whole size of value
@ -4613,7 +4617,7 @@ void Parms::deserializeConfParm( Parm *m, SerParm *sp, char **p,
sp->size ) );
if ( ! goodParm ) {
// copy the new parm to m's loc
memcpy( (char *)&g_conf + m->m_off, sp->val,
gbmemcpy( (char *)&g_conf + m->m_off, sp->val,
sp->size );
// set num of member
@ -4741,7 +4745,7 @@ void Parms::deserializeCollParm( CollectionRec *cr,
if ( 0 != memcmp( sp->val, (char *)cr + m->m_off, sp->size) ) {
// copy the new value
memcpy( (char *)cr + m->m_off,
gbmemcpy( (char *)cr + m->m_off,
sp->val,
sp->size );
@ -6530,6 +6534,18 @@ void Parms::init ( ) {
m->m_flags = PF_REDBOX;
m++;
m->m_title = "show errors";
m->m_desc = "Show errors from generating search result summaries "
"rather than just hide the docid. Useful for debugging.";
m->m_cgi = "showerrors";
m->m_off = (char *)&si.m_showErrors - y;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_flags = PF_API;
m->m_page = PAGE_RESULTS;
m->m_obj = OBJ_SI;
m++;
m->m_title = "site cluster";
m->m_desc = "Should search results be site clustered? This "
"limits each site to appearing at most twice in the "
@ -6564,6 +6580,7 @@ void Parms::init ( ) {
"So documents must be exactly the same for the most part.";
m->m_cgi = "dr"; // dedupResultsByDefault";
m->m_off = (char *)&si.m_doDupContentRemoval - y;
m->m_defOff= (char *)&cr.m_dedupResultsByDefault - x;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_group = 1;
@ -6911,7 +6928,7 @@ void Parms::init ( ) {
m->m_off = (char *)&si.m_defaultSortLang - y;
m->m_type = TYPE_CHARPTR;
//m->m_size = 6; // up to 5 chars + NULL, e.g. "en_US"
m->m_def = "xx";//_US";
m->m_def = "";//"xx";//_US";
m->m_group = 0;
m->m_flags = PF_API;
m->m_page = PAGE_RESULTS;
@ -9890,7 +9907,9 @@ void Parms::init ( ) {
m->m_cgi = "ms";
m->m_off = (char *)&g_conf.m_httpMaxSockets - g;
m->m_type = TYPE_LONG;
m->m_def = "100";
// up this some, am seeing sockets closed because of using gb
// as a cache...
m->m_def = "300";
m->m_page = PAGE_MASTER;
m->m_obj = OBJ_CONF;
m++;
@ -10284,11 +10303,18 @@ void Parms::init ( ) {
m->m_title = "ask for gzipped docs when downloading";
m->m_desc = "If this is true, gb will send Accept-Encoding: gzip "
"to web servers when doing http downloads.";
"to web servers when doing http downloads. It does have "
"a tendency to cause out-of-memory errors when you enable "
"this, so until that is fixed better, it's probably a good "
"idea to leave this disabled.";
m->m_cgi = "afgdwd";
m->m_off = (char *)&g_conf.m_gzipDownloads - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
// keep this default off because it seems some pages are huge
// uncomressed causing OOM errors and possibly corrupting stuff?
// not sure exactly, but i don't like going OOM. so maybe until
// that is fixed leave this off.
m->m_def = "0";
m->m_page = PAGE_MASTER;
m->m_obj = OBJ_CONF;
m++;
@ -12181,7 +12207,8 @@ void Parms::init ( ) {
m->m_title = "use threads for intersects and merges";
m->m_desc = "If enabled, Gigablast will use threads for these ops. "
"Default is now on in the event you have simultaneous queries "
"so one query does not hold back the other.";
"so one query does not hold back the other. There seems "
"to be a bug so leave this ON for now.";
//"Until pthreads is any good leave this off.";
m->m_cgi = "utfio";
m->m_off = (char *)&g_conf.m_useThreadsForIndexOps - g;
@ -12263,7 +12290,7 @@ void Parms::init ( ) {
m->m_cgi = "fw";
m->m_off = (char *)&g_conf.m_flushWrites - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_def = "0";
m->m_group = 0;
m->m_flags = PF_HIDDEN | PF_NOSAVE;
m->m_page = PAGE_MASTER;
@ -14941,7 +14968,11 @@ void Parms::init ( ) {
"it consist of multiple documents separated by this "
"delimeter. Each such item will be injected as an "
"independent document. Some possible delimeters: "
"<i>========</i> or <i>&lt;doc&gt;</i>";
"<i>========</i> or <i>&lt;doc&gt;</i>. If you set "
"<i>hasmime</i> above to true then Gigablast will check "
"for a url after the delimeter and use that url as the "
"injected url. Otherwise it will append numbers to the "
"url you provide above.";
m->m_cgi = "delim";
m->m_obj = OBJ_GBREQUEST;
m->m_type = TYPE_CHARPTR;
@ -15107,6 +15138,23 @@ void Parms::init ( ) {
m->m_flags = PF_API ;
m++;
m->m_title = "recycle content";
m->m_desc = "If you check this box then Gigablast will not "
"re-download the content, but use the content that was "
"stored in the cache from last time. Useful for rebuilding "
"the index to pick up new inlink text or fresher "
"sitenuminlinks counts which influence ranking.";
m->m_cgi = "qrecycle";
m->m_obj = OBJ_GBREQUEST;
m->m_type = TYPE_CHECKBOX;
m->m_def = "0";
m->m_flags = PF_API;
m->m_page = PAGE_REINDEX;
m->m_off = (char *)&gr.m_recycleContent - (char *)&gr;
m++;
m->m_title = "FORCE DELETE";
m->m_desc = "Check this checkbox to delete the results, not just "
"reindex them.";
@ -15326,7 +15374,7 @@ void Parms::init ( ) {
"abbreviations at the bottom of the "
"<a href=/admin/filters>url filters</a> page.";
m->m_cgi = "qlang";
m->m_off = (char *)&cr.m_defaultSortLanguage - x;
m->m_off = (char *)&cr.m_defaultSortLanguage2 - x;
m->m_type = TYPE_STRING;
m->m_size = 6; // up to 5 chars + NULL, e.g. "en_US"
m->m_def = "xx";//_US";
@ -16916,27 +16964,34 @@ void Parms::init ( ) {
m->m_type = TYPE_LONG;
m->m_group = 0;
m++;
*/
m->m_title = "linkdb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many linkdb data files "
"are on disk.";
"are on disk. Raise this when initially growing an index "
"in order to keep merging down.";
m->m_cgi = "mlkftm";
m->m_off = (char *)&cr.m_linkdbMinFilesToMerge - x;
m->m_def = "4";
m->m_def = "6";
m->m_type = TYPE_LONG;
m->m_group = 0;
m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
m->m_page = PAGE_SPIDER;
m->m_obj = OBJ_COLL;
m++;
*/
//m->m_title = "tagdb min files to merge";
//m->m_desc = "Merge is triggered when this many linkdb data files "
// "are on disk.";
//m->m_cgi = "mtftm";
//m->m_off = (char *)&cr.m_tagdbMinFilesToMerge - x;
//m->m_def = "2";
//m->m_type = TYPE_LONG;
//m->m_group = 0;
//m++;
m->m_title = "tagdb min files to merge";
m->m_desc = "Merge is triggered when this many linkdb data files "
"are on disk.";
m->m_cgi = "mtftgm";
m->m_off = (char *)&cr.m_tagdbMinFilesToMerge - x;
m->m_def = "2";
m->m_type = TYPE_LONG;
m->m_group = 0;
m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
m->m_page = PAGE_SPIDER;
m->m_obj = OBJ_COLL;
m++;
// this is overridden by collection
m->m_title = "titledb min files needed to trigger to merge";
@ -16964,13 +17019,15 @@ void Parms::init ( ) {
m->m_title = "posdb min files needed to trigger to merge";
m->m_desc = "Merge is triggered when this many posdb data files "
"are on disk.";
"are on disk. Raise this while doing massive injections "
"and not doing much querying. Then when done injecting "
"keep this low to make queries fast.";
m->m_cgi = "mpftm";
m->m_off = (char *)&cr.m_posdbMinFilesToMerge - x;
m->m_def = "6";
m->m_type = TYPE_LONG;
m->m_group = 0;
m->m_flags = PF_HIDDEN | PF_NOSAVE;
m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
m->m_page = PAGE_SPIDER;
m->m_obj = OBJ_COLL;
m++;
@ -16997,12 +17054,17 @@ void Parms::init ( ) {
m->m_title = "enable link voting";
m->m_desc = "If this is true Gigablast will "
"index hyper-link text and use hyper-link "
"structures to boost the quality of indexed documents.";
"structures to boost the quality of indexed documents. "
"You can disable this when doing a ton of injections to "
"keep things fast. Then do a posdb (index) rebuild "
"after re-enabling this when you are done injecting. Or "
"if you simply do not want link voting this will speed up"
"your injections and spidering a bit.";
m->m_cgi = "glt";
m->m_off = (char *)&cr.m_getLinkInfo - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_flags = PF_HIDDEN | PF_NOSAVE;
m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE;
m->m_page = PAGE_SPIDER;
m->m_obj = OBJ_COLL;
m++;
@ -17602,14 +17664,18 @@ void Parms::init ( ) {
m->m_flags = PF_CLONE;
m++;
m->m_title = "index spider replies";
m->m_desc = "Index the spider replies of every url the spider "
m->m_title = "index spider status documents";
m->m_desc = "Index a spider status \"document\" "
"for every url the spider "
"attempts to spider. Search for them using special "
"query operators like type:status or gberrorstr:success or "
"stats:gberrornum to get a histogram. They will not otherwise "
"show up in the search results. This will not work for "
"diffbot crawlbot collections yet until it has proven "
"more stable.";
"stats:gberrornum to get a histogram. "
"See <a href=/syntax.html>syntax</a> page for more examples. "
"They will not otherwise "
"show up in the search results.";
// "This will not work for "
// "diffbot crawlbot collections yet until it has proven "
// "more stable.";
m->m_cgi = "isr";
m->m_off = (char *)&cr.m_indexSpiderReplies - x;
m->m_type = TYPE_BOOL;
@ -17618,7 +17684,7 @@ void Parms::init ( ) {
// and we add gbdocspidertime and gbdocindextime terms so you
// can use those to sort regular docs and not have spider reply
// status docs in the serps.
m->m_def = "1";
m->m_def = "0";
m->m_page = PAGE_SPIDER;
m->m_obj = OBJ_COLL;
m->m_flags = PF_CLONE;
@ -18339,9 +18405,15 @@ void Parms::init ( ) {
m->m_group = 0;
m++;
m->m_title = "recycle link info";
m->m_desc = "If enabled, gigablast will recycle the link info "
"when rebuilding titledb.";
m->m_title = "recycle link text";
m->m_desc = "If enabled, gigablast will recycle the link text "
"when rebuilding titledb. "
"The siterank, which is determined by the "
"number of inlinks to a site, is stored/cached in tagdb "
"so that is a separate item. If you want to pick up new "
"link text you will want to set this to <i>NO</i> and "
"make sure to rebuild titledb, since that stores the "
"link text.";
m->m_cgi = "rrli"; // repair full rebuild
m->m_off = (char *)&g_conf.m_rebuildRecycleLinkInfo - g;
m->m_type = TYPE_BOOL;
@ -18572,10 +18644,13 @@ void Parms::init ( ) {
m->m_group = 0;
m++;
/*
m->m_title = "skip tagdb lookup";
m->m_desc = "When rebuilding spiderdb and scanning it for new spiderdb "
"records, should a tagdb lookup be performed? Runs much much "
"faster without it. Will also keep the original doc quality and "
m->m_desc = "When rebuilding spiderdb and scanning it for new "
"spiderdb records, should a tagdb lookup be performed? "
"Runs much much "
"faster without it. Will also keep the original doc quality "
"and "
"spider priority in tact.";
m->m_cgi = "rssl";
m->m_off = (char *)&g_conf.m_rebuildSkipSitedbLookup - g;
@ -18585,6 +18660,7 @@ void Parms::init ( ) {
m->m_def = "0";
m->m_group = 0;
m++;
*/
///////////////////////////////////////////
// END PAGE REPAIR //
@ -18702,7 +18778,7 @@ void Parms::init ( ) {
//m->m_max = MAX_MASTER_PASSWORDS;
//m->m_size = PASSWORD_MAX_LEN+1;
//m->m_addin = 1; // "insert" follows?
m->m_flags = PF_PRIVATE | PF_TEXTAREA;
m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
m++;
@ -18730,7 +18806,7 @@ void Parms::init ( ) {
//m->m_addin = 1; // "insert" follows?
//m->m_flags = PF_HIDDEN | PF_NOSAVE;
m->m_obj = OBJ_CONF;
m->m_flags = PF_PRIVATE | PF_TEXTAREA;
m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
m++;
// m->m_title = "remove connect ip";
@ -18826,7 +18902,11 @@ void Parms::init ( ) {
m->m_title = "Collection Passwords";
m->m_desc = "Whitespace separated list of passwords. "
"Any matching password will have administrative access "
"to the controls for just this collection.";
"to the controls for just this collection. The master "
"password and IPs are controled through the "
"<i>master passwords</i> link under the ADVANCED controls "
"tab. The master passwords or IPs have administrative "
"access to all collections.";
m->m_cgi = "collpwd";
m->m_xml = "collectionPasswords";
m->m_obj = OBJ_COLL;
@ -18834,7 +18914,7 @@ void Parms::init ( ) {
m->m_def = "";
m->m_type = TYPE_SAFEBUF; // STRINGNONEMPTY;
m->m_page = PAGE_COLLPASSWORDS;
m->m_flags = PF_PRIVATE | PF_TEXTAREA;
m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
m++;
m->m_title = "Collection IPs";
@ -18848,7 +18928,7 @@ void Parms::init ( ) {
m->m_def = "";
m->m_type = TYPE_SAFEBUF; // STRINGNONEMPTY;
m->m_page = PAGE_COLLPASSWORDS;
m->m_flags = PF_PRIVATE | PF_TEXTAREA;
m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
m++;
@ -21043,7 +21123,8 @@ bool Parms::doParmSendingLoop ( ) {
}
// debug log
log("parms: sending parm request to hostid %"INT32"",h->m_hostId);
log(LOG_INFO,"parms: sending parm request "
"to hostid %"INT32"",h->m_hostId);
// count it
pn->m_numRequests++;
@ -21770,7 +21851,7 @@ bool Parms::updateParm ( char *rec , WaitEntry *we ) {
}
else {
// and copy the data into collrec or g_conf
memcpy ( dst , data , dataSize );
gbmemcpy ( dst , data , dataSize );
}
SafeBuf val2;
@ -22591,7 +22672,7 @@ bool Parms::cloneCollRec ( char *dstCR , char *srcCR ) {
}
else {
// this should work for most types
memcpy ( dst , src , m->m_size );
gbmemcpy ( dst , src , m->m_size );
}
continue;
}
@ -22616,7 +22697,7 @@ bool Parms::cloneCollRec ( char *dstCR , char *srcCR ) {
}
else {
// this should work for most types
memcpy ( dst , src , m->m_size );
gbmemcpy ( dst , src , m->m_size );
}
src += m->m_size;

@ -188,7 +188,7 @@ class GigablastRequest {
int32_t m_ern;
char *m_qlang;
bool m_forceDel;
char m_recycleContent;
// useful bufs to copy data over
SafeBuf m_tmpBuf1;
SafeBuf m_tmpBuf2;
@ -233,6 +233,7 @@ class GigablastRequest {
#define PF_CLONE 0x20000
#define PF_PRIVATE 0x40000 // for password to not show in api
#define PF_SMALLTEXTAREA 0x80000
class Parm {
public:

@ -177,7 +177,7 @@ void PingServer::initKernelErrorCheck(){
// clear the kernel Errors
for ( int32_t i = 0; i < g_hostdb.m_numHosts; i++ ){
g_hostdb.m_hosts[i].m_kernelErrors = 0;
g_hostdb.m_hosts[i].m_pingInfo.m_kernelErrors = 0;
g_hostdb.m_hosts[i].m_kernelErrorReported = false;
}
@ -271,15 +271,15 @@ void PingServer::sendPingsToAll ( ) {
}
class HostStatus {
public:
int64_t m_lastPing;
char m_repairMode;
char m_kernelError;
char m_loadAvg;
char m_percentMemUsed;
// class HostStatus {
// public:
// int64_t m_lastPing;
// char m_repairMode;
// char m_kernelError;
// char m_loadAvg;
// char m_percentMemUsed;
};
// };
// ping host #i
void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
@ -352,6 +352,7 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
// only ping a host once every 5 seconds tops
//if ( now - h->m_lastPing < 5000 ) return;
// stamp it
//h->m_pingInfo.m_lastPing = nowmsLocal;
h->m_lastPing = nowmsLocal;
// count it
s_outstandingPings++;
@ -397,32 +398,58 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
// use the tmp buf
//char request[14+4+4+1];
//char *p = m_request;
// we can have multiple outstanding pings, so keep request bufs
// independent...
char *request = h->m_requestBuf;
char *p = h->m_requestBuf;
//char *request = h->m_requestBuf;
//char *p = h->m_requestBuf;
// we send our stats to host "h"
PingInfo *pi = &me->m_pingInfo;//RequestBuf;
pi->m_numCorruptDiskReads = g_numCorrupt;
pi->m_numOutOfMems = g_mem.m_outOfMems;
pi->m_socketsClosedFromHittingLimit = g_stats.m_closedSockets;
pi->m_currentSpiders = g_spiderLoop.m_numSpidersOut;
// store the last ping we got from it first
*(int64_t *)p = h->m_lastPing; p += sizeof(int64_t);
//*(int64_t *)p = h->m_lastPing; p += sizeof(int64_t);
// i don't think this should be in pinginfo
//pi->m_lastPing = h->m_pingInfo.m_lastPing;
// let the receiver know our repair mode
*p = g_repairMode; p++;
//*p = g_repairMode; p++;
pi->m_repairMode = g_repairMode;
// problem is that we know when the error occurs, but don't know when
// the error has been fixed. So just consider this host as dead unless
// gb is restarted and the problem is fixed
*p = me->m_kernelErrors; p++;
//*p = me->m_kernelErrors; p++;
//pi->m_kernelErrors = me->m_pingInfo.m_kernelErrors;
//if ( me->m_kernelErrors ){
//char *xx = NULL; *xx = 0;
//}
int32_t l_loadavg = (int32_t) (g_process.getLoadAvg() * 100.0);
memcpy(p, &l_loadavg, sizeof(int32_t)); p += sizeof(int32_t);
//gbmemcpy(p, &l_loadavg, sizeof(int32_t)); p += sizeof(int32_t);
pi->m_loadAvg = l_loadavg ;
// then our percent mem used
float mem = ((float)g_mem.getUsedMem()*100.0)/(float)g_mem.getMaxMem();
*(float *)p = mem ; p += sizeof(float); // 4 bytes
//*(float *)p = mem ; p += sizeof(float); // 4 bytes
pi->m_percentMemUsed = mem;
// our cpu usage
*(float *)p = me->m_cpuUsage ; p += sizeof(float); // 4 bytes
//*(float *)p = me->m_cpuUsage ; p += sizeof(float); // 4 bytes
//pi->m_cpuUsage = me->m_pingInfo.m_cpuUsage;
// our num recs, docsIndexed
//*(int32_t*)p = (int32_t)g_clusterdb.getRdb()->getNumTotalRecs();
*(int32_t*)p = (int32_t)g_process.getTotalDocsIndexed();
p += sizeof(int32_t);
// *(int32_t*)p = (int32_t)g_process.getTotalDocsIndexed();
// p += sizeof(int32_t);
pi->m_totalDocsIndexed = (int32_t)g_process.getTotalDocsIndexed();
// urls indexed since startup
//*(int32_t*)p = (int32_t)g_test.m_urlsIndexed;
//p += sizeof(int32_t);
@ -431,16 +458,21 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
//*(int32_t *)p = 0;
//p += sizeof(int32_t);
// slow disk reads
*(int32_t*)p = g_stats.m_slowDiskReads;
p += sizeof(int32_t);
// *(int32_t*)p = g_stats.m_slowDiskReads;
// p += sizeof(int32_t);
pi->m_slowDiskReads = g_stats.m_slowDiskReads;
// and hosts.conf crc
*(int32_t *)p = g_hostdb.getCRC(); p += 4;
//*(int32_t *)p = g_hostdb.getCRC(); p += 4;
pi->m_hostsConfCRC = g_hostdb.getCRC();
// ensure crc is legit
if ( g_hostdb.getCRC() == 0 ) { char *xx=NULL;*xx=0; }
// disk usage (df -ka)
*(float *)p = g_process.m_diskUsage; p += 4;
//*(float *)p = g_process.m_diskUsage; p += 4;
pi->m_diskUsage = g_process.m_diskUsage;
// flags indicating our state
int32_t flags = 0;
@ -457,34 +489,48 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
if ( g_dailyMerge.m_mergeMode == 0 ) flags |= PFLAG_MERGEMODE0;
if ( g_dailyMerge.m_mergeMode ==0 || g_dailyMerge.m_mergeMode == 6 )
flags |= PFLAG_MERGEMODE0OR6;
if ( ! isClockInSync() ) flags |= PFLAG_OUTOFSYNC;
*(int32_t *)p = flags; p += 4; // 4 bytes
//*(int32_t *)p = flags; p += 4; // 4 bytes
pi->m_flags = flags;
// the collection number we are daily merging (currently 2 bytes)
collnum_t cn = -1;
if ( g_dailyMerge.m_cr ) cn = g_dailyMerge.m_cr->m_collnum;
*(collnum_t *)p = cn ; p += sizeof(collnum_t);
//*(collnum_t *)p = cn ; p += sizeof(collnum_t);
pi->m_dailyMergeCollnum = cn;
pi->m_hostId = me->m_hostId;
pi->m_localHostTimeMS = gettimeofdayInMillisecondsLocal();
pi->m_udpSlotsInUse = g_udpServer.getNumUsedSlots();
// store hd temps
memcpy ( p , me->m_hdtemps , 4 * 2 );
p += 4 * 2;
// gbmemcpy ( p , me->m_hdtemps , 4 * 2 );
// p += 4 * 2;
//gbmemcpy ( &pi->m_hdtemps , me->m_pingInfo.m_hdtemps , 4 * 2 );
// store the gbVersionStrBuf now, just a date with a \0 included
char *v = getVersion();
int32_t vsize = getVersionSize(); // 21 bytes
memcpy ( p , v , vsize );
p += vsize;
// gbmemcpy ( p , v , vsize );
// p += vsize;
if ( vsize != 21 ) { char *xx=NULL;*xx=0; }
gbmemcpy ( pi->m_gbVersionStr , v , vsize );
// int32_t requestSize = sizeof(PingRequest);//p - request;
// // sanity check
// if ( requestSize != sizeof(PingRequest) ) {
// // (44+4+4+21) ) { // MAX_PING_SIZE ) {
// log("ping: "
// "YOU ARE MIXING MULTIPLE GB VERSIONS IN YOUR CLUSTER. "
// "MAKE SURE THEY ARE ALL THE SAME GB BINARY");
// char *xx = NULL; *xx = 0; }
int32_t requestSize = p - request;
// sanity check
if ( requestSize != MAX_PING_SIZE ) {
log("ping: "
"YOU ARE MIXING MULTIPLE GB VERSIONS IN YOUR CLUSTER. "
"MAKE SURE THEY ARE ALL THE SAME GB BINARY");
char *xx = NULL; *xx = 0; }
//char *request = (char *)pi;
// debug msg
//logf(LOG_DEBUG,"net: Sending ping request to hid=%"INT32" ip=%s.",
@ -509,8 +555,8 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
port++;
if ( h->m_isProxy ) hostId = -1;
if ( g_udpServer.sendRequest ( request ,
requestSize ,
if ( g_udpServer.sendRequest ( (char *)pi , //request ,
sizeof(PingInfo),//requestSize ,
0x11 ,
ip ,//h->m_ip ,
port ,//h->m_port ,
@ -683,16 +729,16 @@ void gotReplyWrapperP ( void *state , UdpSlot *slot ) {
h->m_wasEverAlive = true;
}
if ( isAlive && h->m_percentMemUsed >= 99.0 &&
if ( isAlive && h->m_pingInfo.m_percentMemUsed >= 99.0 &&
h->m_firstOOMTime == 0 )
h->m_firstOOMTime = nowms;
if ( isAlive && h->m_percentMemUsed < 99.0 )
if ( isAlive && h->m_pingInfo.m_percentMemUsed < 99.0 )
h->m_firstOOMTime = 0LL;
// if this host is alive and has been at 99% or more mem usage
// for the last X minutes, and we have got at least 10 ping replies
// from him, then send an email alert.
if ( isAlive &&
h->m_percentMemUsed >= 99.0 &&
h->m_pingInfo.m_percentMemUsed >= 99.0 &&
nowms - h->m_firstOOMTime >= g_conf.m_sendEmailTimeout )
g_pingServer.sendEmail ( h , NULL , true , true );
@ -703,9 +749,12 @@ void gotReplyWrapperP ( void *state , UdpSlot *slot ) {
if ( ! isAlive && nowms - h->m_startTime >= g_conf.m_sendEmailTimeout)
g_pingServer.sendEmail ( h ) ;
PingInfo *pi = &h->m_pingInfo;
// if this host is alive but has some kernel error, then send an
// email alert.
if ( h->m_kernelErrors && !h->m_kernelErrorReported ){
if ( pi->m_kernelErrors && !h->m_kernelErrorReported ){
log("net: Host #%"INT32" is reporting kernel errors.",
h->m_hostId);
// MDW: disable for now, so does not wake us up at 3am
@ -714,7 +763,7 @@ void gotReplyWrapperP ( void *state , UdpSlot *slot ) {
}
// reset if the machine has come back up
if ( !h->m_kernelErrors && h->m_kernelErrorReported )
if ( !pi->m_kernelErrors && h->m_kernelErrorReported )
h->m_kernelErrorReported = false;
// reset in case sendEmail() set it
@ -767,9 +816,13 @@ void gotReplyWrapperP ( void *state , UdpSlot *slot ) {
if ( h->m_isProxy ) hid = -1;
// send back what his ping was so he knows
*(int32_t *)h->m_requestBuf = *pingPtr;
//*(int32_t *)h->m_requestBuf = *pingPtr;
//h->m_pingInfo.m_lastPing = *pingPtr;
*(int32_t *)h->m_tmpBuf = *pingPtr;
if ( g_udpServer.sendRequest ( h->m_requestBuf ,
if ( g_udpServer.sendRequest (h->m_tmpBuf,//RequestBuf,
//h->m_requestBuf ,
4 , // 4 byte request
0x11 ,
slot->m_ip , // h->m_ip ,
@ -885,10 +938,20 @@ void handleRequest11 ( UdpSlot *slot , int32_t niceness ) {
// . a request size of 10 means to set g_repairMode to 1
// . it can only be advanced to 2 when we receive ping replies from
// everyone that they are not spidering or merging titledb...
if ( requestSize == MAX_PING_SIZE){//14+4+4+4+4+sizeof(collnum_t)+1 ) {
if ( requestSize == sizeof(PingInfo)){
//MAX_PING_SIZE
//14+4+4+4+4+sizeof(collnum_t)+1 ) {
// sanity
PingInfo *pi2 = (PingInfo *)request;
if ( pi2->m_hostId != h->m_hostId ) {
char *xx=NULL;*xx=0; }
// now we just copy the class
gbmemcpy ( &h->m_pingInfo , request , requestSize );
/*
char* p = request + 10;
// fetch load avg...
h->m_loadAvg = ((double)(*((int32_t*)(p)))) / 100.0;
p += sizeof(int32_t);
@ -902,6 +965,7 @@ void handleRequest11 ( UdpSlot *slot , int32_t niceness ) {
// the host's global doc count.
h->m_docsIndexed = *(int32_t*)(p);
p += sizeof(int32_t);
*/
// how many we indexed since startup
//h->m_urlsIndexed = *(int32_t*)(p);
@ -911,6 +975,7 @@ void handleRequest11 ( UdpSlot *slot , int32_t niceness ) {
//h->m_eventsIndexed = *(int32_t*)(p);
//p += sizeof(int32_t);
/*
// slow disk reads is important
h->m_slowDiskReads = *(int32_t*)(p);
p += sizeof(int32_t);
@ -933,18 +998,20 @@ void handleRequest11 ( UdpSlot *slot , int32_t niceness ) {
p += sizeof(collnum_t);
// the 4 hd temps
memcpy ( h->m_hdtemps , p , 4 * 2 );
gbmemcpy ( h->m_hdtemps , p , 4 * 2 );
p += 4 * 2;
// at the end the gbverstionstrbuf
int32_t vsize = getVersionSize(); // 21
memcpy ( h->m_gbVersionStrBuf , p , vsize );
gbmemcpy ( h->m_gbVersionStrBuf , p , vsize );
p += vsize;
*/
// if any one of them is overheating, then turn off
// spiders on ourselves (and thus the full cluster)
for ( int32_t k = 0 ; k < 4 ; k++ )
if ( h->m_hdtemps[k] > g_conf.m_maxHardDriveTemp )
if ( h->m_pingInfo.m_hdtemps[k] >
g_conf.m_maxHardDriveTemp )
g_conf.m_spideringEnabled = 0;
@ -970,27 +1037,31 @@ void handleRequest11 ( UdpSlot *slot , int32_t niceness ) {
// . 4 means done repairing and ready to exit repair mode,
// but waiting for other hosts to be mode 4 or 0 before
// it can exit, too, and go back to mode 0
/*
char mode = request[8];
h->m_kernelErrors = request[9];
*/
char mode = h->m_pingInfo.m_repairMode;
//if ( h->m_kernelErrors ){
//char *xx = NULL; *xx = 0;
//}
// set his repair mode in the hosts table
if ( h ) {
// if his mode is 2 he is ready to start repairing
// because he has stopped all spiders and titledb
// merges from happening. if he just entered mode
// 2 now, check to see if all hosts are in mode 2 now.
char oldMode = h->m_repairMode;
// update his repair mode
h->m_repairMode = mode;
// . get the MIN repair mode of all hosts
// . expensive, so only do if a host changes modes
if ( oldMode != mode ||
g_pingServer.m_minRepairMode == -1 )
g_pingServer.setMinRepairMode ( h );
}
//if ( h ) {
// if his mode is 2 he is ready to start repairing
// because he has stopped all spiders and titledb
// merges from happening. if he just entered mode
// 2 now, check to see if all hosts are in mode 2 now.
char oldMode = h->m_repairMode;
// update his repair mode
h->m_repairMode = mode;
// . get the MIN repair mode of all hosts
// . expensive, so only do if a host changes modes
if ( oldMode != mode ||
g_pingServer.m_minRepairMode == -1 )
g_pingServer.setMinRepairMode ( h );
//}
// make it a normal ping now
requestSize = 8;
}
@ -1013,16 +1084,16 @@ void handleRequest11 ( UdpSlot *slot , int32_t niceness ) {
int32_t i; for ( i = 0 ; i < g_hostdb.getNumGrunts() ; i++ ) {
Host *h = &g_hostdb.m_hosts[i];
if ( h->m_flags & PFLAG_FOREIGNRECS )
if ( h->m_pingInfo.m_flags & PFLAG_FOREIGNRECS )
ps->m_numHostsWithForeignRecs++;
if ( g_hostdb.isDead ( h ) )
ps->m_numHostsDead++;
// skip if not received yet
if ( ! h->m_hostsConfCRC ) continue;
if ( ! h->m_pingInfo.m_hostsConfCRC ) continue;
// badness?
if ( h->m_hostsConfCRC != g_hostdb.m_crc ) {
if ( h->m_pingInfo.m_hostsConfCRC != g_hostdb.m_crc ) {
ps->m_hostsConfInDisagreement = true;
break;
}
@ -1495,13 +1566,13 @@ bool PingServer::sendEmail ( Host *h ,
char hr[3],min[3];
hr[2] = '\0';
min[2] = '\0';
memcpy ( hr, g_conf.m_delayEmailsAfter, 2 );
gbmemcpy ( hr, g_conf.m_delayEmailsAfter, 2 );
deaHr = atoi(hr);
memcpy ( min, g_conf.m_delayEmailsAfter + 3, 2 );
gbmemcpy ( min, g_conf.m_delayEmailsAfter + 3, 2 );
deaMin = atoi(min);
memcpy ( hr, g_conf.m_delayEmailsBefore, 2 );
gbmemcpy ( hr, g_conf.m_delayEmailsBefore, 2 );
debHr = atoi(hr);
memcpy ( min, g_conf.m_delayEmailsBefore + 3, 2 );
gbmemcpy ( min, g_conf.m_delayEmailsBefore + 3, 2 );
debMin = atoi(min);
//get the current time. use getTime() because
//then it is sync'ed with host 0's time.
@ -2175,8 +2246,8 @@ bool pageSprintPCS2 ( void *state , TcpSocket *s) {
if ( ! ss ) { char *xx = NULL; *xx = 0; }
ss += 8; // points to right after the space
// insert the cookie
memcpy ( ss+cookieLen , ss , pend - ss );
memcpy ( ss , cookie , cookieLen );
gbmemcpy ( ss+cookieLen , ss , pend - ss );
gbmemcpy ( ss , cookie , cookieLen );
pend += cookieLen;
// replace xxx
@ -2403,7 +2474,7 @@ bool PingServer::broadcastShutdownNotes ( bool sendEmailAlert ,
// request will be freed by UdpServer
//char *r = (char *) mmalloc ( 4 , "PingServer" );
//if ( ! r ) return true;
//memcpy ( r , (char *)(&h->m_hostId) , 4 );
//gbmemcpy ( r , (char *)(&h->m_hostId) , 4 );
// send it right now
if ( g_udpServer.sendRequest ( s_buf ,
5 , // rqstSz
@ -2774,7 +2845,7 @@ void checkKernelErrors( int fd, void *state ){
// at the kernel ring buffer, keep returning until someone clicks
// 'clear kernel error message' control in Master Controls.
// but don't return before copying over the new buffer
if ( me->m_kernelErrors > 0 ) return;
if ( me->m_pingInfo.m_kernelErrors > 0 ) return;
// check if we match any error strings in master controls
char *p = NULL;
@ -2800,14 +2871,14 @@ void checkKernelErrors( int fd, void *state ){
if ( strncasestr ( p, gbstrlen(p) , "scsi" ) &&
g_numIOErrors > s_lastCount ) {
me->m_kernelErrors = ME_IOERR;
me->m_pingInfo.m_kernelErrors = ME_IOERR;
s_lastCount = g_numIOErrors;
}
else if ( strncasestr ( p, gbstrlen(p), "100 mbps" ) )
me->m_kernelErrors = ME_100MBPS;
me->m_pingInfo.m_kernelErrors = ME_100MBPS;
// assume an I/O IO error here otherwise
else if ( g_numIOErrors > s_lastCount ) {
me->m_kernelErrors = ME_UNKNWN;
me->m_pingInfo.m_kernelErrors = ME_UNKNWN;
s_lastCount = g_numIOErrors;
}
log ( LOG_DEBUG,"PingServer: error message in "
@ -3226,6 +3297,7 @@ bool sendNotification ( EmailInfo *ei ) {
, uu.getPath()
);
fullReq.safeMemcpy ( uu.getHost() , uu.getHostLen() );
fullReq.safePrintf("\r\n");
// make custom headers
fullReq.safePrintf ("X-Crawl-Name: %s\r\n"
// last \r\n is added in HttpRequest.cpp

@ -138,7 +138,7 @@ bool Pops::makeLocalPopFile ( char *coll ) {
goto loop;
}
// new top?
if ( size == ks ) { memcpy ( top , p + (ks-6) , 6 ); haveTop = true; }
if ( size == ks ) { gbmemcpy ( top , p + (ks-6) , 6 ); haveTop = true; }
// warning msg
if ( ! haveTop && ! warned ) {
warned = true;
@ -158,7 +158,7 @@ bool Pops::makeLocalPopFile ( char *coll ) {
if ( count >= minDocs ) {
// if so, store the upper 4 bytes of the termid
int32_t h;
memcpy ( &h , tmp+8 , 4 );
gbmemcpy ( &h , tmp+8 , 4 );
// write it out
out.write ( &h , 4 );
// and the count
@ -174,8 +174,8 @@ bool Pops::makeLocalPopFile ( char *coll ) {
// make the key
memcpy ( tmp , p , ks-6 );
memcpy ( tmp + ks-6 , top , 6 );
gbmemcpy ( tmp , p , ks-6 );
gbmemcpy ( tmp + ks-6 , top , 6 );
// print the key
//if ( ks == 12 )
// fprintf(stdout,"%08lli) %08"XINT32" %016"XINT64"\n",

Some files were not shown because too many files have changed in this diff Show More