Merge branch 'diffbot' into diffbot-testing
Conflicts: iana_charset.cpp iana_charset.h
This commit is contained in:
commit
7b768d4b86
@ -1572,6 +1572,7 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
bool minOld ;
|
||||
long id2 = -1;
|
||||
long minToMerge;
|
||||
bool overide = false;
|
||||
//long smini = - 1;
|
||||
//long sn ;
|
||||
//long long tfndbSize = 0;
|
||||
@ -1884,7 +1885,7 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
|
||||
startMerge:
|
||||
// sanity check
|
||||
if ( n <= 1 ) {
|
||||
if ( n <= 1 && ! overide ) {
|
||||
log(LOG_LOGIC,"merge: gotTokenForMerge: Not merging %li files.",
|
||||
n);
|
||||
g_msg35.releaseToken();
|
||||
|
21
Spider.cpp
21
Spider.cpp
@ -249,6 +249,7 @@ long SpiderRequest::printToTable ( SafeBuf *sb , char *status ,
|
||||
long long now = gettimeofdayInMilliseconds();
|
||||
long long elapsed = now - xd->m_startTime;
|
||||
sb->safePrintf(" <td>%llims</td>\n",elapsed);
|
||||
sb->safePrintf(" <td>%li</td>\n",(long)xd->m_collnum);
|
||||
}
|
||||
|
||||
sb->safePrintf(" <td><nobr>%s</nobr></td>\n",m_url);
|
||||
@ -346,8 +347,10 @@ long SpiderRequest::printTableHeaderSimple ( SafeBuf *sb ,
|
||||
sb->safePrintf("<tr>\n");
|
||||
|
||||
// how long its been being spidered
|
||||
if ( currentlySpidering )
|
||||
if ( currentlySpidering ) {
|
||||
sb->safePrintf(" <td><b>elapsed</b></td>\n");
|
||||
sb->safePrintf(" <td><b>coll</b></td>\n");
|
||||
}
|
||||
|
||||
sb->safePrintf(" <td><b>url</b></td>\n");
|
||||
sb->safePrintf(" <td><b>status</b></td>\n");
|
||||
@ -448,8 +451,10 @@ long SpiderRequest::printTableHeader ( SafeBuf *sb , bool currentlySpidering) {
|
||||
sb->safePrintf("<tr>\n");
|
||||
|
||||
// how long its been being spidered
|
||||
if ( currentlySpidering )
|
||||
if ( currentlySpidering ) {
|
||||
sb->safePrintf(" <td><b>elapsed</b></td>\n");
|
||||
sb->safePrintf(" <td><b>coll</b></td>\n");
|
||||
}
|
||||
|
||||
sb->safePrintf(" <td><b>url</b></td>\n");
|
||||
sb->safePrintf(" <td><b>status</b></td>\n");
|
||||
@ -4466,7 +4471,7 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
if ( g_dailyMerge.m_mergeMode ) return;
|
||||
// skip if too many udp slots being used
|
||||
if ( g_udpServer.getNumUsedSlots() >= 1300 ) return;
|
||||
// stop if too many out
|
||||
// stop if too many out. this is now 50 down from 500.
|
||||
if ( m_numSpidersOut >= MAX_SPIDERS ) return;
|
||||
// bail if no collections
|
||||
if ( g_collectiondb.m_numRecs <= 0 ) return;
|
||||
@ -6248,8 +6253,14 @@ bool Msg12::gotLockReply ( UdpSlot *slot ) {
|
||||
60*60*24*365 ) )
|
||||
return false;
|
||||
// error?
|
||||
log("spider: error re-sending confirm request: %s",
|
||||
mstrerror(g_errno));
|
||||
// don't spam the log!
|
||||
static long s_last = 0;
|
||||
long now = getTimeLocal();
|
||||
if ( now - s_last >= 1 ) {
|
||||
s_last = now;
|
||||
log("spider: error re-sending confirm "
|
||||
"request: %s", mstrerror(g_errno));
|
||||
}
|
||||
}
|
||||
// only log every 10 seconds for ETRYAGAIN
|
||||
if ( g_errno == ETRYAGAIN ) {
|
||||
|
5
Spider.h
5
Spider.h
@ -1274,8 +1274,9 @@ void handleRequestc1 ( UdpSlot *slot , long niceness ) ;
|
||||
// . supports <META NAME="ROBOTS" CONTENT="NOFOLLOW"> (no links)
|
||||
// . supports limiting spiders per domain
|
||||
|
||||
// max spiders we can have going at once for this process
|
||||
#define MAX_SPIDERS 500
|
||||
// . max spiders we can have going at once for this process
|
||||
// . limit to 50 to preven OOM conditions
|
||||
#define MAX_SPIDERS 50
|
||||
|
||||
class SpiderLoop {
|
||||
|
||||
|
11
XmlDoc.cpp
11
XmlDoc.cpp
@ -7681,9 +7681,20 @@ long long *XmlDoc::getExactContentHash64 ( ) {
|
||||
unsigned char *pend = (unsigned char *)p + plen;
|
||||
unsigned long long h64 = 0LL;
|
||||
unsigned char pos = 0;
|
||||
bool lastWasSpace = true;
|
||||
for ( ; p < pend ; p++ ) {
|
||||
// breathe
|
||||
QUICKPOLL ( m_niceness );
|
||||
// treat sequences of white space as a single ' ' (space)
|
||||
if ( is_wspace_a(*p) ) {
|
||||
if ( lastWasSpace ) continue;
|
||||
lastWasSpace = true;
|
||||
// treat all white space as a space
|
||||
h64 ^= g_hashtab[pos][' '];
|
||||
pos++;
|
||||
continue;
|
||||
}
|
||||
lastWasSpace = false;
|
||||
// xor this in right
|
||||
h64 ^= g_hashtab[pos][p[0]];
|
||||
pos++;
|
||||
|
@ -1,5 +1,5 @@
|
||||
// iana_charset.h
|
||||
// Generated automatically by parse_iana_charsets.pl Tue Dec 10 22:42:31 2013
|
||||
// Generated automatically by parse_iana_charsets.pl Thu Dec 12 20:57:59 2013
|
||||
// DO NOT EDIT!!!
|
||||
|
||||
#include "gb-include.h"
|
||||
|
@ -1,5 +1,5 @@
|
||||
// iana_charset.h
|
||||
// Generated automatically by parse_iana_charsets.pl Tue Dec 10 22:42:31 2013
|
||||
// Generated automatically by parse_iana_charsets.pl Thu Dec 12 20:57:59 2013
|
||||
// DO NOT EDIT!!!
|
||||
|
||||
#ifndef IANA_CHARSET_H__
|
||||
|
Loading…
x
Reference in New Issue
Block a user