Merge branch 'diffbot-testing' of github.com:gigablast/open-source-search-engine into diffbot-testing
This commit is contained in:
37
Linkdb.cpp
37
Linkdb.cpp
@ -4955,7 +4955,9 @@ bool Links::set ( bool useRelNoFollow ,
|
||||
//char *coll ,
|
||||
bool parentIsPermalink ,
|
||||
Links *oldLinks ,
|
||||
bool doQuickSet ) {
|
||||
bool doQuickSet ,
|
||||
// some json from diffbot:
|
||||
SafeBuf *diffbotReply ) {
|
||||
|
||||
reset();
|
||||
|
||||
@ -5026,6 +5028,39 @@ bool Links::set ( bool useRelNoFollow ,
|
||||
// break;
|
||||
//}
|
||||
|
||||
|
||||
// get list of links from diffbot json reply
|
||||
char *p = NULL;
|
||||
if ( diffbotReply && diffbotReply->length() > 10 )
|
||||
p = strstr ( diffbotReply->getBufStart() , "\"links\":[\"" );
|
||||
// skip over the heading stuff
|
||||
if ( p ) p += 10;
|
||||
// parse out the links from diffbot reply
|
||||
for ( ; p ; ) {
|
||||
// must not be json mark up
|
||||
if ( ! *p || *p == ']' || *p == '\"' ) break;
|
||||
// save p
|
||||
char *start = p;
|
||||
// get length of the link
|
||||
for ( ; *p && *p != '\"' ; p++ );
|
||||
// set end of link
|
||||
char *end = p;
|
||||
// add the link
|
||||
if ( ! addLink ( start , // linkStr
|
||||
end - start , // linkStrLen
|
||||
-1, // i
|
||||
setLinkHash ,
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
niceness ,
|
||||
false , // isRSS?
|
||||
TAG_LINK , // node id -> LF_LINKTAG flag
|
||||
0 )) // flags
|
||||
return false;
|
||||
// now advance to next link if any.
|
||||
for ( ; *p == '\"' || *p == ',' || is_wspace_a(*p) ; p++ );
|
||||
}
|
||||
|
||||
|
||||
// visit each node in the xml tree. a node can be a tag or a non-tag.
|
||||
char *urlattr = NULL;
|
||||
for ( int32_t i=0; i < m_numNodes ; i++ ) {
|
||||
|
3
Linkdb.h
3
Linkdb.h
@ -1177,7 +1177,8 @@ public:
|
||||
Links *oldLinks , // for LF_OLDLINKS flag
|
||||
// this is used by Msg13.cpp to quickly get ptrs
|
||||
// to the links in the document, no normalization!
|
||||
bool doQuickSet = false );
|
||||
bool doQuickSet = false ,
|
||||
class SafeBuf *diffbotReply = NULL );
|
||||
|
||||
// set from a simple text buffer
|
||||
bool set ( char *buf , int32_t niceness ) ;
|
||||
|
@ -849,7 +849,7 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
||||
// overflows. when we have too many unindexed
|
||||
// spiderrequests for a particular firstip, we
|
||||
// start dropping so we don't spam spiderdb
|
||||
"<tr class=poo><td><b>Dropped Outlinks</b></td><td>%"INT32"</td>\n"
|
||||
"<tr class=poo><td><b>Dropped Spider Requests</b></td><td>%"INT32"</td>\n"
|
||||
|
||||
"<tr class=poo><td><b>Index Shards</b></td><td>%"INT32"</td>\n"
|
||||
"<tr class=poo><td><b>Hosts per Shard</b></td><td>%"INT32"</td>\n"
|
||||
|
@ -2672,9 +2672,17 @@ bool Pages::printCollectionNavBar ( SafeBuf *sb ,
|
||||
int32_t numPrinted = 0;
|
||||
bool printMsg = false;
|
||||
|
||||
// if doing qa test don't print out collection names because
|
||||
// they are somewhat random and throw off the diff in qa.cpp
|
||||
int32_t qa = hr->getLong("qa",0);
|
||||
//if ( ! strcmp(coll,"qatest123") ) qa = 1;
|
||||
|
||||
//for ( int32_t i = a ; i < b ; i++ ) {
|
||||
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
||||
|
||||
if ( qa )
|
||||
break;
|
||||
|
||||
CollectionRec *cc = g_collectiondb.m_recs[i];
|
||||
if ( ! cc ) continue;
|
||||
|
||||
|
13
Rdb.cpp
13
Rdb.cpp
@ -2571,6 +2571,19 @@ bool Rdb::addRecord ( collnum_t collnum,
|
||||
"skipping.",sreq->m_url);
|
||||
return true;
|
||||
}
|
||||
// if we are overflowing...
|
||||
if ( isReq &&
|
||||
! sreq->m_isAddUrl &&
|
||||
! sreq->m_isPageReindex &&
|
||||
! sreq->m_urlIsDocId &&
|
||||
! sreq->m_forceDelete &&
|
||||
sc->isFirstIpInOverflowList ( sreq->m_firstIp ) ) {
|
||||
if ( g_conf.m_logDebugSpider )
|
||||
log("spider: skipping for overflow url %s ",
|
||||
sreq->m_url);
|
||||
g_stats.m_totalOverflows++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
if ( m_useTree && (tn=m_tree.addNode (collnum,key,data,dataSize))>=0) {
|
||||
|
39
Spider.cpp
39
Spider.cpp
@ -3791,6 +3791,27 @@ bool SpiderColl::readListFromSpiderdb ( ) {
|
||||
//if ( m_isReadDone ) return true;
|
||||
}
|
||||
|
||||
static int32_t s_lastIn = 0;
|
||||
static int32_t s_lastOut = 0;
|
||||
|
||||
bool SpiderColl::isFirstIpInOverflowList ( int32_t firstIp ) {
|
||||
if ( ! m_overflowList ) return false;
|
||||
if ( firstIp == 0 || firstIp == -1 ) return false;
|
||||
if ( firstIp == s_lastIn ) return true;
|
||||
if ( firstIp == s_lastOut ) return false;
|
||||
for ( int32_t oi = 0 ; ; oi++ ) {
|
||||
// stop at end
|
||||
if ( ! m_overflowList[oi] ) break;
|
||||
// an ip of zero is end of the list
|
||||
if ( m_overflowList[oi] == firstIp ) {
|
||||
s_lastIn = firstIp;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
s_lastOut = firstIp;
|
||||
return false;
|
||||
}
|
||||
|
||||
// . ADDS top X winners to m_winnerTree
|
||||
// . this is ONLY CALLED from evalIpLoop() above
|
||||
// . scan m_list that we read from spiderdb for m_scanningIp IP
|
||||
@ -4787,7 +4808,7 @@ bool SpiderColl::scanListForWinners ( ) {
|
||||
// don't add any more outlinks to this firstip after we
|
||||
// have 10M spider requests for it.
|
||||
// lower for testing
|
||||
//if ( m_totalNewSpiderRequests > 100 )
|
||||
//if ( m_totalNewSpiderRequests > 1 )
|
||||
if ( m_totalNewSpiderRequests > 10000000 )
|
||||
overflow = true;
|
||||
|
||||
@ -4825,6 +4846,8 @@ bool SpiderColl::scanListForWinners ( ) {
|
||||
// if we need to add it...
|
||||
if ( overflow && ! found && m_overflowList ) {
|
||||
log("spider: adding %s to overflow list",iptoa(firstIp));
|
||||
// reset this little cache thingy
|
||||
s_lastOut = 0;
|
||||
// take the empty slot if there is one
|
||||
if ( emptySlot >= 0 )
|
||||
m_overflowList[emptySlot] = firstIp;
|
||||
@ -4848,6 +4871,8 @@ bool SpiderColl::scanListForWinners ( ) {
|
||||
// take it out of list
|
||||
m_overflowList[oi2] = -1;
|
||||
log("spider: removing %s from overflow list",iptoa(firstIp));
|
||||
// reset this little cache thingy
|
||||
s_lastIn = 0;
|
||||
break;
|
||||
}
|
||||
/////
|
||||
@ -13954,15 +13979,3 @@ void SpiderLoop::buildActiveList ( ) {
|
||||
tail = cr;
|
||||
}
|
||||
}
|
||||
|
||||
bool SpiderColl::isFirstIpInOverflowList ( int32_t firstIp ) {
|
||||
if ( ! m_overflowList ) return false;
|
||||
if ( firstIp == 0 || firstIp == -1 ) return false;
|
||||
for ( int32_t oi = 0 ; ; oi++ ) {
|
||||
// stop at end
|
||||
if ( ! m_overflowList[oi] ) break;
|
||||
// an ip of zero is end of the list
|
||||
if ( m_overflowList[oi] == firstIp ) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -2834,7 +2834,7 @@ int TcpServer::sslHandshake ( TcpSocket *s ) {
|
||||
}
|
||||
// if the connection happened return r, should be 1
|
||||
if ( r > 0 ) {
|
||||
//if ( g_conf.m_logDebugTcp )
|
||||
if ( g_conf.m_logDebugTcp )
|
||||
log("tcp: ssl handshake done. entering writing mode "
|
||||
"sd=%i",s->m_sd);
|
||||
// ok, it completed, go into writing mode
|
||||
@ -2882,7 +2882,9 @@ int TcpServer::sslHandshake ( TcpSocket *s ) {
|
||||
// read callbacks are always registered and if we need a read
|
||||
// hopefully it will be called. TODO: verify this...
|
||||
if ( sslError == SSL_ERROR_WANT_READ ) {
|
||||
log("tcp: ssl handshake is not want write sd=%i",s->m_sd);
|
||||
if ( g_conf.m_logDebugTcp )
|
||||
log("tcp: ssl handshake is not want write sd=%i",
|
||||
s->m_sd);
|
||||
//logSSLError(s->m_ssl, r);
|
||||
return 0;
|
||||
}
|
||||
|
8
Xml.cpp
8
Xml.cpp
@ -369,10 +369,16 @@ bool Xml::set ( char *s ,
|
||||
// set his parent xml node if is xml
|
||||
xi->m_parent = parent;
|
||||
|
||||
bool endsInSlash = false;
|
||||
if ( xi->m_node[xi->m_nodeLen-2] == '/' ) endsInSlash = true;
|
||||
if ( xi->m_node[xi->m_nodeLen-2] == '?' ) endsInSlash = true;
|
||||
|
||||
// if not text node then he's the new parent
|
||||
if ( pureXml &&
|
||||
xi->m_nodeId &&
|
||||
xi->m_nodeId != TAG_COMMENT ) {
|
||||
xi->m_nodeId != TAG_COMMENT &&
|
||||
xi->m_nodeId != TAG_CDATA &&
|
||||
! endsInSlash ) {
|
||||
|
||||
// if we are a back tag pop the stack
|
||||
if ( ! xi->isFrontTag() ) {
|
||||
|
90
XmlDoc.cpp
90
XmlDoc.cpp
@ -189,8 +189,6 @@ static int64_t s_lastTimeStart = 0LL;
|
||||
|
||||
void XmlDoc::reset ( ) {
|
||||
|
||||
m_linkOverflows = 0;
|
||||
|
||||
m_isImporting = false;
|
||||
|
||||
m_printedMenu = false;
|
||||
@ -7676,6 +7674,16 @@ Links *XmlDoc::getLinks ( bool doQuickSet ) {
|
||||
if ( m_linksValid ) return &m_links;
|
||||
// set status
|
||||
setStatus ( "getting outlinks");
|
||||
|
||||
// . add links from diffbot reply
|
||||
// . get the reply of json objects from diffbot
|
||||
// . this will be empty if we are a json object!
|
||||
// . will also be empty if not meant to be sent to diffbot
|
||||
// . the TOKENIZED reply consists of \0 separated json objects that
|
||||
// we create from the original diffbot reply
|
||||
SafeBuf *dbr = getDiffbotReply();
|
||||
if ( ! dbr || dbr == (void *)-1 ) return (Links *)dbr;
|
||||
|
||||
// this will set it if necessary
|
||||
Xml *xml = getXml();
|
||||
// bail on error
|
||||
@ -7739,7 +7747,8 @@ Links *XmlDoc::getLinks ( bool doQuickSet ) {
|
||||
m_niceness ,
|
||||
*pp , // parent url in permalink format?
|
||||
oldLinks ,// oldLinks, might be NULL!
|
||||
doQuickSet ))
|
||||
doQuickSet ,
|
||||
dbr ) )
|
||||
return NULL;
|
||||
|
||||
m_linksValid = true;
|
||||
@ -14431,18 +14440,40 @@ void gotDiffbotReplyWrapper ( void *state , TcpSocket *s ) {
|
||||
if ( THIS->m_diffbotReplyError ) countIt = false;
|
||||
|
||||
/*
|
||||
|
||||
// solution for bug #2092 but probably not really needed so
|
||||
// commented out.
|
||||
|
||||
// if doing /vxxx/analzye?mode=xxxx then ensure matches
|
||||
bool isAnalyze = false;
|
||||
if ( countIt &&
|
||||
m_diffbotApiUrlValid &&
|
||||
strstr ( m_diffbotApiUrl.getBufStart(), "/analyze?") )
|
||||
THIS->m_diffbotApiUrlValid &&
|
||||
strstr ( THIS->m_diffbotApiUrl.getBufStart(), "/analyze?") )
|
||||
isAnalyze = true;
|
||||
|
||||
char *mode = NULL;
|
||||
if ( isAnalyze ) {
|
||||
mode = strstr (m_diffbotApiUrl.getBufStart(), "mode=");
|
||||
mode = strstr (THIS->m_diffbotApiUrl.getBufStart(), "mode=");
|
||||
if ( mode ) mode += 5;
|
||||
// find end of it
|
||||
}
|
||||
|
||||
char *pageType = NULL;
|
||||
int32_t pageTypeLen;
|
||||
if ( mode &&
|
||||
THIS->m_diffbotReplyValid &&
|
||||
THIS->m_diffbotReply.length() > 5 ) {
|
||||
char *reply = THIS->m_diffbotReply.getBufStart();
|
||||
pageType = strstr ( reply , "\"type\":\"" );
|
||||
if ( pageType ) pageType += 8;
|
||||
char *e = pageType;
|
||||
for ( ; *e && *e != '\"' ; e++ );
|
||||
pageTypeLen = e - pageType;
|
||||
}
|
||||
|
||||
// if it does not match, do not count it
|
||||
if ( mode && pageType && strncmp ( mode , pageType , pageTypeLen ) )
|
||||
countIt = false;
|
||||
*/
|
||||
|
||||
// increment this counter on a successful reply from diffbot
|
||||
@ -20096,10 +20127,6 @@ bool XmlDoc::logIt ( SafeBuf *bb ) {
|
||||
sb->safePrintf("outlinksadded=%04"INT32" ",
|
||||
(int32_t)m_numOutlinksAdded);
|
||||
|
||||
if ( m_linkOverflows )
|
||||
sb->safePrintf("linkoverflows=%04"INT32" ",
|
||||
(int32_t)m_linkOverflows);
|
||||
|
||||
if ( m_metaListValid )
|
||||
sb->safePrintf("addlistsize=%05"INT32" ",
|
||||
(int32_t)m_metaListSize);
|
||||
@ -25287,7 +25314,7 @@ char *XmlDoc::addOutlinkSpiderRecsToMetaList ( ) {
|
||||
bool ignore = false;
|
||||
if ( mbuf[0] == '1' ) ignore = true;
|
||||
|
||||
SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull ( m_collnum );
|
||||
//SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull ( m_collnum );
|
||||
|
||||
//
|
||||
// serialize each link into the metalist now
|
||||
@ -25306,11 +25333,12 @@ char *XmlDoc::addOutlinkSpiderRecsToMetaList ( ) {
|
||||
// if firstIp is in the SpiderColl::m_overflowFirstIps list
|
||||
// then do not add any more links to it. it already has
|
||||
// more than 500MB worth.
|
||||
if ( sc && sc->isFirstIpInOverflowList ( firstIp ) ) {
|
||||
m_linkOverflows++;
|
||||
g_stats.m_totalOverflows++;
|
||||
continue;
|
||||
}
|
||||
// this was moved to Rdb.cpp's addRecord()
|
||||
// if ( sc && sc->isFirstIpInOverflowList ( firstIp ) ) {
|
||||
// m_linkOverflows++;
|
||||
// g_stats.m_totalOverflows++;
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// sanity check
|
||||
//if ( firstIp == 0x03 ) {char *xx=NULL;*xx=0; }
|
||||
@ -29326,7 +29354,8 @@ bool XmlDoc::hashAds ( HashTableX *tt ) {
|
||||
char *descr;
|
||||
//buflen = snprintf(buf,128,"%s-%s",
|
||||
// m_adProvider[i],m_adClient[i]);
|
||||
int32_t buflen = snprintf(buf,128,"%"UINT64"",ptr_adVector[i] );
|
||||
snprintf(buf,128,"%"UINT64"",ptr_adVector[i] );
|
||||
int32_t bufLen = gbstrlen(buf);
|
||||
field = "gbad";
|
||||
descr = "ad provider and id";
|
||||
// update hash parms
|
||||
@ -29338,7 +29367,7 @@ bool XmlDoc::hashAds ( HashTableX *tt ) {
|
||||
//log(LOG_WARN, "build: url %s indexing ad termid %s:%s",
|
||||
// getFirstUrl()->getUrl(), field, buf);
|
||||
//this returns false on failure
|
||||
if ( ! hashString ( buf,buflen,&hi ) ) return false;
|
||||
if ( ! hashString ( buf,bufLen,&hi ) ) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -33758,20 +33787,20 @@ bool XmlDoc::hashFacet2 ( char *prefix,
|
||||
if ( strcmp(prefix,"gbfacetfloat")==0 ) isFloat = true;
|
||||
|
||||
// store in buffer for display on pageparser.cpp output
|
||||
char buf[128];
|
||||
int32_t bufLen;
|
||||
char buf[130];
|
||||
if ( isFloat )
|
||||
bufLen=sprintf(buf,"facetField=%s facetVal32=%f",term,
|
||||
*(float *)&val32);
|
||||
snprintf(buf,128,"facetField=%s facetVal32=%f",term,
|
||||
*(float *)&val32);
|
||||
else
|
||||
bufLen=sprintf(buf,"facetField=%s facetVal32=%"UINT32"",
|
||||
term,(uint32_t)val32);
|
||||
snprintf(buf,128,"facetField=%s facetVal32=%"UINT32"",
|
||||
term,(uint32_t)val32);
|
||||
int32_t bufLen = gbstrlen(buf);
|
||||
|
||||
// make a special hashinfo for this facet
|
||||
HashInfo hi;
|
||||
hi.m_tt = tt;
|
||||
// the full prefix
|
||||
char fullPrefix[64];
|
||||
char fullPrefix[66];
|
||||
snprintf(fullPrefix,64,"%s:%s",prefix,term);
|
||||
hi.m_prefix = fullPrefix;//"gbfacet";
|
||||
|
||||
@ -33865,7 +33894,7 @@ bool XmlDoc::hashFieldMatchTerm ( char *val , int32_t vlen , HashInfo *hi ) {
|
||||
hi2.m_tt = tt;
|
||||
// the full prefix
|
||||
char fullPrefix[64];
|
||||
snprintf(fullPrefix,64,"%s:%s",prefix,hi->m_prefix);
|
||||
snprintf(fullPrefix,62,"%s:%s",prefix,hi->m_prefix);
|
||||
hi2.m_prefix = fullPrefix;//"gbfacet";
|
||||
|
||||
// add to wts for PageParser.cpp display
|
||||
@ -34143,7 +34172,8 @@ bool XmlDoc::hashNumber2 ( float f , HashInfo *hi , char *sortByStr ) {
|
||||
|
||||
// store in buffer
|
||||
char buf[128];
|
||||
int32_t bufLen = sprintf(buf,"%s:%s float32=%f",sortByStr,hi->m_prefix,f);
|
||||
snprintf(buf,126,"%s:%s float32=%f",sortByStr,hi->m_prefix,f);
|
||||
int32_t bufLen = gbstrlen(buf);
|
||||
|
||||
// add to wts for PageParser.cpp display
|
||||
// store it
|
||||
@ -34251,7 +34281,8 @@ bool XmlDoc::hashNumber3 ( int32_t n , HashInfo *hi , char *sortByStr ) {
|
||||
|
||||
// store in buffer
|
||||
char buf[128];
|
||||
int32_t bufLen = sprintf(buf,"%s:%s int32=%"INT32"",sortByStr,hi->m_prefix,n);
|
||||
snprintf(buf,126,"%s:%s int32=%"INT32"",sortByStr, hi->m_prefix,n);
|
||||
int32_t bufLen = gbstrlen(buf);
|
||||
|
||||
// add to wts for PageParser.cpp display
|
||||
// store it
|
||||
@ -49843,6 +49874,9 @@ char *XmlDoc::hashXMLFields ( HashTableX *table ) {
|
||||
// . we just want the "text" nodes
|
||||
if ( nodes[i].isTag() ) continue;
|
||||
|
||||
//if(!strncmp(nodes[i].m_node,"Congress%20Presses%20Uber",20))
|
||||
// log("hey:hy");
|
||||
|
||||
// assemble the full parent name
|
||||
// like "tag1.tag2.tag3"
|
||||
nameBuf.reset();
|
||||
|
1
XmlDoc.h
1
XmlDoc.h
@ -2032,7 +2032,6 @@ class XmlDoc {
|
||||
char m_isFiltered;
|
||||
int32_t m_urlFilterNum;
|
||||
int32_t m_numOutlinksAdded;
|
||||
int32_t m_linkOverflows;
|
||||
int32_t m_numOutlinksAddedFromSameDomain;
|
||||
int32_t m_numOutlinksFiltered;
|
||||
int32_t m_numOutlinksBanned;
|
||||
|
23
qa.cpp
23
qa.cpp
@ -182,6 +182,7 @@ void processReply ( char *reply , int32_t replyLen ) {
|
||||
|
||||
markOut ( content , "spider is done (");
|
||||
markOut ( content , "spider is paused (");
|
||||
markOut ( content , "spider queue empty (");
|
||||
|
||||
markOut ( content , "<totalShards>");
|
||||
|
||||
@ -545,7 +546,7 @@ bool qainject1 ( ) {
|
||||
// turn off images thumbnails
|
||||
if ( ! s_flags[17] ) {
|
||||
s_flags[17] = true;
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1"
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1"
|
||||
// no spider replies because it messes
|
||||
// up our last test to make sure posdb
|
||||
// is 100% empty.
|
||||
@ -851,7 +852,7 @@ bool qainject2 ( ) {
|
||||
if ( ! s_flags[17] ) {
|
||||
s_flags[17] = true;
|
||||
// can't turn off spiders because we need for query reindex
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1"
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1"
|
||||
// turn off use robots to avoid that
|
||||
// xyz.com/robots.txt redir to seekseek.com
|
||||
"&obeyRobots=0"
|
||||
@ -1041,7 +1042,7 @@ bool qaSyntax ( ) {
|
||||
if ( ! s_flags[2] ) {
|
||||
s_flags[2] = true;
|
||||
// can't turn off spiders because we need for query reindex
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1"
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1"
|
||||
// index spider reply status docs
|
||||
"&isr=1"
|
||||
// turn off use robots to avoid that
|
||||
@ -1249,7 +1250,7 @@ bool qaimport () {
|
||||
// turn spiders off so it doesn't spider while we are importing
|
||||
if ( ! s_flags[18] ) {
|
||||
s_flags[18] = true;
|
||||
if ( ! getUrl ( "/admin/spider?cse=0&c=qatest123",
|
||||
if ( ! getUrl ( "/admin/spider?cse=0&qa=1&c=qatest123",
|
||||
// checksum of reply expected
|
||||
238170006 ) )
|
||||
return false;
|
||||
@ -1353,7 +1354,7 @@ bool qainlinks() {
|
||||
// turn spiders off so it doesn't spider while we are importing
|
||||
if ( ! s_flags[18] ) {
|
||||
s_flags[18] = true;
|
||||
if ( ! getUrl ( "/admin/spider?cse=0&c=qatest123",
|
||||
if ( ! getUrl ( "/admin/spider?cse=0&qa=1&c=qatest123",
|
||||
// checksum of reply expected
|
||||
238170006 ) )
|
||||
return false;
|
||||
@ -1485,7 +1486,7 @@ bool qareindex() {
|
||||
// turn off images thumbnails
|
||||
if ( ! s_flags[17] ) {
|
||||
s_flags[17] = true;
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1",
|
||||
// checksum of reply expected
|
||||
238170006 ) )
|
||||
return false;
|
||||
@ -1755,7 +1756,7 @@ bool qaspider1 ( ) {
|
||||
// set max spiders to 1 for consistency!
|
||||
if ( ! s_flags[24] ) {
|
||||
s_flags[24] = true;
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1"
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1"
|
||||
// so site2:www.walmart.com works
|
||||
"&isr=1"
|
||||
,
|
||||
@ -2022,7 +2023,7 @@ bool qaspider2 ( ) {
|
||||
// turn off images thumbnails
|
||||
if ( ! s_flags[24] ) {
|
||||
s_flags[24] = true;
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1",
|
||||
// checksum of reply expected
|
||||
238170006 ) )
|
||||
return false;
|
||||
@ -2227,7 +2228,7 @@ bool qascrape ( ) {
|
||||
// turn off images thumbnails
|
||||
if ( ! s_flags[24] ) {
|
||||
s_flags[24] = true;
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1",
|
||||
// checksum of reply expected
|
||||
238170006 ) )
|
||||
return false;
|
||||
@ -2346,7 +2347,7 @@ bool qajson ( ) {
|
||||
// turn off images thumbnails
|
||||
if ( ! s_flags[24] ) {
|
||||
s_flags[24] = true;
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1"
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1"
|
||||
// index spider replies status docs
|
||||
"&isr=1"
|
||||
,
|
||||
@ -2570,7 +2571,7 @@ bool qaxml ( ) {
|
||||
// turn off images thumbnails
|
||||
if ( ! s_flags[24] ) {
|
||||
s_flags[24] = true;
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&mit=0&mns=1",
|
||||
if ( ! getUrl ( "/admin/spider?c=qatest123&qa=1&mit=0&mns=1",
|
||||
// checksum of reply expected
|
||||
238170006 ) )
|
||||
return false;
|
||||
|
Reference in New Issue
Block a user