Merge branch 'testing'

This commit is contained in:
Matt 2015-03-06 21:34:21 -08:00
commit 213e430c31
2 changed files with 15 additions and 6 deletions

@ -846,7 +846,10 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
//"<tr class=poo><td><b>Gigablast Version</b></td><td>%s %s</td></tr>\n"
"<tr class=poo><td><b>Parsing Inconsistencies</b></td><td>%"INT32"</td>\n"
"<tr class=poo><td><b>Spiderdb Overflows</b></td><td>%"INT32"</td>\n"
// overflows. when we have too many unindexed
// spiderrequests for a particular firstip, we
// start dropping so we don't spam spiderdb
"<tr class=poo><td><b>Dropped Outlinks</b></td><td>%"INT32"</td>\n"
"<tr class=poo><td><b>Index Shards</b></td><td>%"INT32"</td>\n"
"<tr class=poo><td><b>Hosts per Shard</b></td><td>%"INT32"</td>\n"

@ -3959,7 +3959,9 @@ bool SpiderColl::scanListForWinners ( ) {
}
// if a replie-less new url spiderrequest count it
if ( ! srep && m_lastSreqUh48 != uh48 )
if ( ! srep && m_lastSreqUh48 != uh48 &&
// avoid counting query reindex requests
! sreq->m_fakeFirstIp )
m_totalNewSpiderRequests++;
m_lastSreqUh48 = uh48;
@ -4747,6 +4749,8 @@ bool SpiderColl::scanListForWinners ( ) {
bool overflow = false;
// don't add any more outlinks to this firstip after we
// have 10M spider requests for it.
// lower for testing
//if ( m_totalNewSpiderRequests > 100 )
if ( m_totalNewSpiderRequests > 10000000 )
overflow = true;
@ -4765,9 +4769,9 @@ bool SpiderColl::scanListForWinners ( ) {
if ( m_lastOverflowFirstIp == firstIp )
return true;
m_lastOverflowFirstIp = firstIp;
if ( g_conf.m_logDebugSpider && overflow )
log("spider: got overflow for firstip %s",
iptoa(firstIp));
if ( overflow && g_conf.m_logDebugSpider )
log("spider: firstip %s overflowing with %"INT32" new reqs",
iptoa(firstIp),(int32_t)m_totalNewSpiderRequests);
for ( oi = 0 ; ; oi++ ) {
// sanity
if ( ! m_overflowList ) break;
@ -4783,6 +4787,7 @@ bool SpiderColl::scanListForWinners ( ) {
}
// if we need to add it...
if ( overflow && ! found && m_overflowList ) {
log("spider: adding %s to overflow list",iptoa(firstIp));
// take the empty slot if there is one
if ( emptySlot >= 0 )
m_overflowList[emptySlot] = firstIp;
@ -4805,6 +4810,7 @@ bool SpiderColl::scanListForWinners ( ) {
if ( m_overflowList[oi2] != firstIp ) continue;
// take it out of list
m_overflowList[oi2] = -1;
log("spider: removing %s from overflow list",iptoa(firstIp));
break;
}
/////
@ -13874,5 +13880,5 @@ bool SpiderColl::isFirstIpInOverflowList ( int32_t firstIp ) {
// an ip of zero is end of the list
if ( m_overflowList[oi] == firstIp ) return true;
}
return true;
return false;
}