Merge branch 'master' into diffbot

This commit is contained in:
Matt Wells
2013-08-30 16:33:00 -07:00
11 changed files with 147 additions and 66 deletions

@ -220,7 +220,7 @@ bool Log::logR ( long long now , long type , char *msg , bool asterisk ,
// thread id if in "thread"
if ( pid != s_pid && s_pid != -1 ) {
//sprintf ( p , "[%li] " , (long)getpid() );
sprintf ( p , "[%li] " , (long)pid );
sprintf ( p , "[%lu] " , (unsigned long)pid );
p += gbstrlen ( p );
}
// then message itself

@ -833,7 +833,9 @@ void sigalrmHandler ( int x , siginfo_t *info , void *y ) {
// if we missed to many, then dump core
if ( g_niceness == 1 && g_missedQuickPolls >= 4 ) {
g_inSigHandler = true;
log("loop: missed quickpoll");
g_inSigHandler = false;
// seems to core a lot in gbcompress() we need to
// put a quickpoll into zlib deflate() or
// deflat_slot() or logest_match() function

@ -670,7 +670,7 @@ Log.o: Log.cpp gb-include.h types.h fctypes.h Unicode.h \
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
Log.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h iana_charset.h \
File.h Loop.h ip.h Hostdb.h HttpRequest.h SafeBuf.h Url.h TcpSocket.h \
Collectiondb.h Process.h Msg28.h
Collectiondb.h Process.h Msg28.h Threads.h
Loop.o: Loop.cpp gb-include.h types.h fctypes.h Unicode.h \
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
Log.h Loop.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h \

@ -820,7 +820,7 @@ bool Msg5::needsRecall ( ) {
// the disk... we should really keep stats on this...
logIt = true;
// seems to be very common for doledb, so don't log unless extreme
if ( m_rdbId == RDB_DOLEDB && m_round < 15 ) logIt = false;
//if ( m_rdbId == RDB_DOLEDB && m_round < 15 ) logIt = false;
if ( logIt )
logf(LOG_DEBUG,"db: Reading %li again from %s (need %li total "
"got %li) this=0x%lx round=%li.",

@ -3545,7 +3545,7 @@ bool Proxy::hitCreditCard ( StateUser *su ) {
//
// INSERT YOUR secret transaction/api key for authorize.net
//
#ifdef PRIVATESTUFF
#ifdef _PRIVATESTUFF_
url.safePrintf("&x_tran_key=%s",g_secret_tran_key);
url.safePrintf("&x_login=%s",g_secret_api_key);
#else

24
Rdb.cpp

@ -2163,7 +2163,8 @@ bool Rdb::addRecord ( collnum_t collnum,
}
else if ( (tn=m_tree.addNode ( collnum, key , data , dataSize ))>=0) {
// if adding to spiderdb, add to cache, too
if ( m_rdbId != RDB_SPIDERDB ) return true;
if ( m_rdbId != RDB_SPIDERDB || m_rdbId != RDB_DOLEDB )
return true;
// or if negative key
if ( KEYNEG(key) ) return true;
// . this will create it if spiders are on and its NULL
@ -2172,6 +2173,27 @@ bool Rdb::addRecord ( collnum_t collnum,
SpiderColl *sc = g_spiderCache.getSpiderColl(collnum);
// skip if not there
if ( ! sc ) return true;
// if doing doledb...
if ( m_rdbId == RDB_DOLEDB ) {
long pri = g_doledb.getPriority((key_t *)key);
// skip over corruption
if ( pri < 0 || pri >= MAX_SPIDER_PRIORITIES )
return true;
// if added positive key is before cursor, update curso
if ( KEYCMP((char *)key,
(char *)&sc->m_nextKeys[pri],
sizeof(key_t)) < 0 ) {
KEYSET((char *)&sc->m_nextKeys[pri],
(char *)key,
sizeof(key_t) );
// debug log
if ( g_conf.m_logDebugSpider )
log("rdb: cursor reset pri=%li to %s",
pri,KEYSTR(key,12));
}
// that's it for doledb mods
return true;
}
// . ok, now add that reply to the cache
// . g_now is in milliseconds!
//long nowGlobal = localToGlobalTimeSeconds ( g_now/1000 );

@ -1251,6 +1251,15 @@ void SpiderColl::reset ( ) {
m_waitingTable.reset();
m_waitingTree .reset();
m_waitingMem .reset();
// each spider priority in the collection has essentially a cursor
// that references the next spider rec in doledb to spider. it is
// used as a performance hack to avoid the massive positive/negative
// key annihilations related to starting at the top of the priority
// queue every time we scan it, which causes us to do upwards of
// 300 re-reads!
for ( long i = 0 ; i < MAX_SPIDER_PRIORITIES ; i++ )
m_nextKeys[i] = g_doledb.makeFirstKey2 ( i );
}
bool SpiderColl::updateSiteNumInlinksTable ( long siteHash32,
@ -3144,6 +3153,9 @@ void SpiderLoop::spiderDoledUrls ( ) {
// can't get spidered until the one that is doled does.
if ( g_conf.m_testSpiderEnabled ) maxSpiders = 6;
}
// debug log
if ( g_conf.m_logDebugSpider )
log("spider: has %li spiders out",m_sc->m_spidersOut);
// obey max spiders per collection too
if ( m_sc->m_spidersOut >= maxSpiders ) continue;
// ok, we are good to launch a spider for coll m_cri
@ -3203,8 +3215,13 @@ void SpiderLoop::spiderDoledUrls ( ) {
m_sc->m_didRound = true;
// reset for next coll
m_sc->m_pri = MAX_SPIDER_PRIORITIES - 1;
// reset key now too since this coll was exhausted WE HIT HERE!!!
m_sc->m_nextDoledbKey = g_doledb.makeFirstKey2 ( m_sc->m_pri );
// reset key now too since this coll was exhausted
//m_sc->m_nextDoledbKey=g_doledb.makeFirstKey2 ( m_sc->m_pri );
// we can't keep starting over because there are often tons
// of annihilations between positive and negative keys
// and causes massive disk slow down because we have to do
// like 300 re-reads or more of about 2k each on coeus
m_sc->m_nextDoledbKey = m_sc->m_nextKeys [ m_sc->m_pri ];
// and go up top
goto collLoop;
}
@ -3229,8 +3246,9 @@ void SpiderLoop::spiderDoledUrls ( ) {
m_sc->m_pri--;
// set the new key for this priority if valid
if ( m_sc->m_pri >= 0 )
m_sc->m_nextDoledbKey =
g_doledb.makeFirstKey2(m_sc->m_pri);
//m_sc->m_nextDoledbKey =
// g_doledb.makeFirstKey2(m_sc->m_pri);
m_sc->m_nextDoledbKey = m_sc->m_nextKeys [m_sc->m_pri];
// and try again
goto loop;
}
@ -3239,8 +3257,15 @@ void SpiderLoop::spiderDoledUrls ( ) {
m_gettingDoledbList = true;
// log this now
//if ( g_conf.m_logDebugSpider )
// logf(LOG_DEBUG,"spider: loading list from doledb");
if ( g_conf.m_logDebugSpider ) {
m_doleStart = gettimeofdayInMillisecondsLocal();
// 12 byte doledb keys
long pri = g_doledb.getPriority(&m_sc->m_nextDoledbKey);
logf(LOG_DEBUG,"spider: loading list from doledb startkey=%s "
"pri=%li",
KEYSTR(&m_sc->m_nextDoledbKey,12),
pri);
}
// get a spider rec for us to spider from doledb
if ( ! m_msg5.getList ( RDB_DOLEDB ,
@ -3254,6 +3279,7 @@ void SpiderLoop::spiderDoledUrls ( ) {
// we need to read in a lot because we call
// "goto listLoop" below if the url we want
// to dole is locked.
// seems like a ton of negative recs
2000 , // minRecSizes
true , // includeTree
false , // addToCache
@ -3309,6 +3335,16 @@ bool SpiderLoop::gotDoledbList2 ( ) {
// unlock
m_gettingDoledbList = false;
// log this now
if ( g_conf.m_logDebugSpider ) {
long long now = gettimeofdayInMillisecondsLocal();
long long took = now - m_doleStart;
logf(LOG_DEBUG,"spider: GOT list from doledb in %llims "
"size=%li bytes",
took,m_list.getListSize());
}
// bail instantly if in read-only mode (no RdbTrees!)
if ( g_conf.m_readOnlyMode ) return false;
// or if doing a daily merge
@ -3322,6 +3358,9 @@ bool SpiderLoop::gotDoledbList2 ( ) {
// bail if list is empty
if ( m_list.getListSize() <= 0 ) {
if ( g_conf.m_logDebugSpider )
log("spider: resetting doledb priority pri=%li",
m_sc->m_pri);
// trigger a reset
m_sc->m_pri = -1;
// . let the sleep timer init the loop again!
@ -3384,6 +3423,14 @@ bool SpiderLoop::gotDoledbList2 ( ) {
// get priority from doledb key
long pri = g_doledb.getPriority ( doledbKey );
if ( g_conf.m_logDebugSpider )
log("spider: setting pri=%li nextkey to %s",
m_sc->m_pri,KEYSTR(&m_sc->m_nextDoledbKey,12));
// update next doledbkey for this priority
m_sc->m_nextKeys [ m_sc->m_pri ] = m_sc->m_nextDoledbKey;
// sanity
if ( pri < 0 || pri >= MAX_SPIDER_PRIORITIES ) { char *xx=NULL;*xx=0; }
// skip the priority if we already have enough spiders on it
@ -3411,7 +3458,8 @@ bool SpiderLoop::gotDoledbList2 ( ) {
// all done if priority is negative
if ( m_sc->m_pri < 0 ) return true;
// set to next priority otherwise
m_sc->m_nextDoledbKey = g_doledb.makeFirstKey2 ( m_sc->m_pri );
//m_sc->m_nextDoledbKey=g_doledb.makeFirstKey2 ( m_sc->m_pri );
m_sc->m_nextDoledbKey = m_sc->m_nextKeys [m_sc->m_pri];
// and load that list
return true;
}

@ -869,6 +869,9 @@ class SpiderColl {
SpiderReply *srep,
uint64_t nowGlobalMS);
// doledb cursor keys for each priority to speed up performance
key_t m_nextKeys[MAX_SPIDER_PRIORITIES];
// maps priority to first ufn that uses that
// priority. map to -1 if no ufn uses it. that way when we scan
// priorities for spiderrequests to dole out we can start with
@ -1125,6 +1128,8 @@ class SpiderLoop {
// for round robining in SpiderLoop::doleUrls(), etc.
long m_cri;
long long m_doleStart;
long m_processed;
};

@ -702,8 +702,9 @@ ThreadEntry *ThreadQueue::addEntry ( long niceness ,
if ( i == m_top ) m_top++;
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: [%lu] queued %s thread. "
"niceness=%lu. ", (long)t,getThreadType(), niceness );
log(LOG_DEBUG,"thread: [t=0x%lx] queued %s thread for launch. "
"niceness=%lu. ", (unsigned long)t,
getThreadType(), niceness );
// success
return t;
}
@ -889,6 +890,12 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
log("threads: pthread_join %li = %s (%li)",
(long)t->m_joinTid,mstrerror(status),status);
}
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined1 with t=0x%lx "
"jointid=0x%lx.",
(long)t,(long)t->m_joinTid);
#else
again:
@ -897,8 +904,8 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
int err = errno;
// debug the waitpid
if ( g_conf.m_logDebugThread || g_process.m_exiting )
log(LOG_DEBUG,"thread: Waiting for t=%lu pid=%li.",
(long)t,(long)t->m_pid);
log(LOG_DEBUG,"thread: Waiting for t=0x%lx pid=%li.",
(unsigned long)t,(long)t->m_pid);
// bitch and continue if join failed
if ( pid != t->m_pid ) {
// waitpid() gets interrupted by various signals so
@ -924,14 +931,14 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
// re-protect this stack
mprotect ( t->m_stack + GUARDSIZE , STACK_SIZE - GUARDSIZE,
PROT_NONE );
#endif
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined with pid=%li pid=%li.",
(long)t->m_pid,(long)t->m_pid);
#endif
// we may get cleaned up and re-used and our niceness reassignd
// right after set m_isDone to true, so save niceness
long niceness = t->m_niceness;
@ -1025,18 +1032,8 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
//only allow a quickpoll if we are nice.
//g_loop.canQuickPoll(t->m_niceness);
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback type=%s "
"nice=%li",getThreadType(),(long)t->m_niceness);
makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback type=%s "
"nice=%li", getThreadType(),(long)t->m_niceness);
//long long took = gettimeofdayInMilliseconds()-startTime;
//if(took > 8 && maxNiceness > 0) {
// if(g_conf.m_sequentialProfiling)
@ -1053,13 +1050,13 @@ bool ThreadQueue::timedCleanUp ( long maxNiceness ) {
if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] %s done. "
log(LOG_DEBUG,"thread: [t=0x%lx] %s done1. "
"active=%li "
"time since queued = %llu ms "
"time since launch = %llu ms "
"time since pre-exit = %llu ms "
"time since exit = %llu ms",
(long)t,
(unsigned long)t,
getThreadType() ,
(long)(m_launched - m_returned) ,
now - t->m_queuedTime,
@ -1087,6 +1084,17 @@ void makeCallback ( ThreadEntry *t ) {
// save it
long saved = g_niceness;
// log it now
if ( g_conf.m_logDebugLoop || g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: enter thread callback t=0x%lx "
//"type=%s "
"state=0x%lx "
"nice=%li",
(long)t,
//getThreadType(),
(long)t->m_state,
(long)t->m_niceness);
// time it?
long long start;
if ( g_conf.m_maxCallbackDelay >= 0 )
@ -1109,6 +1117,16 @@ void makeCallback ( ThreadEntry *t ) {
}
// log it now
if ( g_conf.m_logDebugLoop || g_conf.m_logDebugThread )
log(LOG_DEBUG,"loop: exit thread callback t=0x%lx "
//"type=%s "
"nice=%li",
(long)t,
//getThreadType(),
(long)t->m_niceness);
// restore global niceness
g_niceness = saved;
@ -1199,6 +1217,12 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
log("threads: pthread_join2 %li = %s (%li)",
(long)t->m_joinTid,mstrerror(status),status);
}
// debug msg
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: joined2 with t=0x%lx "
"jointid=0x%lx.",
(long)t,(long)t->m_joinTid);
#else
again:
@ -1207,7 +1231,7 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
int err = errno;
// debug the waitpid
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: Waiting for t=%lu pid=%li.",
log(LOG_DEBUG,"thread: Waiting for t=0x%lx pid=%li.",
(long)t,(long)t->m_pid);
// bitch and continue if join failed
if ( pid != t->m_pid ) {
@ -1362,21 +1386,10 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
//g_threads.launchThreads();
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback type=%s",
getThreadType());
g_errno = 0;
makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback type=%s",
getThreadType());
// long long took = gettimeofdayInMilliseconds()-startTime;
// if(took > 8 && maxNiceness > 0) {
// if(g_conf.m_sequentialProfiling)
@ -1393,13 +1406,13 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] %s done. "
log(LOG_DEBUG,"thread: [t=0x%lx] %s done2. "
"active=%li "
"time since queued = %llu ms "
"time since launch = %llu ms "
"time since pre-exit = %llu ms "
"time since exit = %llu ms",
(long)t,
(unsigned long)t,
getThreadType() ,
(long)(m_launched - m_returned) ,
now - t->m_queuedTime,
@ -1438,13 +1451,13 @@ bool ThreadQueue::cleanUp ( ThreadEntry *tt , long maxNiceness ) {
if ( g_conf.m_logDebugThread ) {
long long now = gettimeofdayInMilliseconds();
for ( long i = 0 ; i < numCallbacks ; i++ )
log(LOG_DEBUG,"thread: [%lu] %s done. "
log(LOG_DEBUG,"thread: [tid=%lu] %s done3. "
"active=%li "
"time since queued = %llu ms "
"time since launch = %llu ms "
"time since pre-exit = %llu ms "
"time since exit = %llu ms",
(long)tids[i],
(unsigned long)tids[i],
getThreadType() ,
(long)(m_launched - m_returned) ,
now - times [i],
@ -1923,9 +1936,10 @@ bool ThreadQueue::launchThread ( ThreadEntry *te ) {
if ( g_conf.m_logDebugThread ) {
active = m_launched - m_returned ;
long long now = gettimeofdayInMilliseconds();
log(LOG_DEBUG,"thread: [%lu] launched %s thread. active=%lli "
log(LOG_DEBUG,"thread: [t=0x%lx] launched %s thread. "
"active=%lli "
"niceness=%lu. waited %llu ms in queue.",
(long)t, getThreadType(), active, realNiceness,
(unsigned long)t, getThreadType(), active, realNiceness,
now - t->m_queuedTime);
}
// be lazy with this since it uses a significant amount of cpu
@ -1998,7 +2012,7 @@ bool ThreadQueue::launchThread ( ThreadEntry *te ) {
// we're back from pthread_create
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: Back from clone t=%lu pid=%li.",
log(LOG_DEBUG,"thread: Back from clone t=0x%lx pid=%li.",
(long)t,(long)pid);
@ -2162,8 +2176,8 @@ int startUp ( void *state ) {
//t->m_tid = pthread_self();
// debug
if ( g_conf.m_logDebugThread )
log(LOG_DEBUG,"thread: [%lu] in startup pid=%li pppid=%li",
(long)t,(long)getpidtid(),(long)getppid());
log(LOG_DEBUG,"thread: [t=0x%lx] in startup pid=%li pppid=%li",
(unsigned long)t,(long)getpidtid(),(long)getppid());
// debug msg
//fprintf(stderr,"new thread tid=%li pid=%li\n",
// (long)t->m_tid,(long)t->m_pid);
@ -2219,8 +2233,8 @@ int startUp ( void *state ) {
t->m_exitTime = now;
if ( g_conf.m_logDebugThread ) {
log(LOG_DEBUG,"thread: [%lu] done with startup pid=%li",
(long)t,(long)getpidtid());
log(LOG_DEBUG,"thread: [t=0x%lx] done with startup pid=%li",
(unsigned long)t,(long)getpidtid());
}
// . now mark thread as ready for removal
@ -2299,7 +2313,7 @@ void ThreadQueue::print ( ) {
// print it
log(LOG_INIT,"thread: address=%lu pid=%u state=%lu "
"occ=%i done=%i lnch=%i",
(long)t , t->m_pid ,
(unsigned long)t , t->m_pid ,
(unsigned long)t->m_state , t->m_isOccupied , t->m_isDone ,
t->m_isLaunched );
}
@ -2411,18 +2425,8 @@ void ThreadQueue::removeThreads ( BigFile *bf ) {
// keep track
maxi = i;
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: enter thread callback2 type=%s",
getThreadType());
makeCallback ( t );
// log it now
if ( g_conf.m_logDebugLoop )
log(LOG_DEBUG,"loop: exit thread callback2 type=%s",
getThreadType());
}
// do we have to decrement top
if ( m_top == maxi + 1 )

@ -1,5 +1,5 @@
// iana_charset.h
// Generated automatically by parse_iana_charsets.pl Wed Jul 31 00:10:12 2013
// Generated automatically by parse_iana_charsets.pl Fri Aug 30 03:13:59 2013
// DO NOT EDIT!!!
#include "gb-include.h"

@ -1,5 +1,5 @@
// iana_charset.h
// Generated automatically by parse_iana_charsets.pl Wed Jul 31 00:10:12 2013
// Generated automatically by parse_iana_charsets.pl Fri Aug 30 03:13:59 2013
// DO NOT EDIT!!!
#ifndef IANA_CHARSET_H__