forked from Mirrors/privacore-open-source-search-engine
Removed defunct member SpiderRequest::m_parentPrevSpiderTime
This commit is contained in:
8
Spider.h
8
Spider.h
@ -449,13 +449,7 @@ public:
|
||||
// date in the request... UNFORTUNATELY we lose m_addedTime then!!!
|
||||
uint32_t m_addedTime; // time_t
|
||||
|
||||
// if m_isNewOutlink is true, then this SpiderRequest is being added
|
||||
// for a link that did not exist on this page the last time it was
|
||||
// spidered. XmlDoc.cpp needs to set XmlDoc::m_min/maxPubDate for
|
||||
// m_url. if m_url's content does not contain a pub date explicitly
|
||||
// then we can estimate it based on when m_url's parent was last
|
||||
// spidered (when m_url was not an outlink on its parent page)
|
||||
uint32_t m_parentPrevSpiderTime; // time_t
|
||||
uint32_t m_reserved4b; //m_parentPrevSpiderTime
|
||||
|
||||
// # of spider requests from different c-blocks. capped at 255.
|
||||
// taken from the # of SpiderRequests.
|
||||
|
28
XmlDoc.cpp
28
XmlDoc.cpp
@ -706,22 +706,6 @@ bool XmlDoc::set4 ( SpiderRequest *sreq ,
|
||||
m_version = TITLEREC_CURRENT_VERSION;
|
||||
m_versionValid = true;
|
||||
|
||||
/*
|
||||
// set min/max pub dates right away
|
||||
m_minPubDate = -1;
|
||||
m_maxPubDate = -1;
|
||||
// parentPrevSpiderTime is 0 if that was the first time that the
|
||||
// parent was spidered, in which case isNewOutlink will always be set
|
||||
// for every outlink it had!
|
||||
if ( sreq->m_isNewOutlink && sreq->m_parentPrevSpiderTime ) {
|
||||
// sanity check
|
||||
if ( ! sreq->m_parentPrevSpiderTime ) {g_process.shutdownAbort(true);}
|
||||
// pub date is somewhere between these two times
|
||||
m_minPubDate = sreq->m_parentPrevSpiderTime;
|
||||
m_maxPubDate = sreq->m_addedTime;
|
||||
}
|
||||
*/
|
||||
|
||||
// this is used to removing the rec from doledb after we spider it
|
||||
m_doledbKey.setMin();
|
||||
if ( doledbKey ) m_doledbKey = *doledbKey;
|
||||
@ -14790,18 +14774,6 @@ char *XmlDoc::addOutlinkSpiderRecsToMetaList ( ) {
|
||||
// the urls in dmoz, not their outlinks.
|
||||
if ( avoid ) ksr.m_avoidSpiderLinks = 1;
|
||||
|
||||
// . if this is the 2nd+ time we were spidered and this outlink
|
||||
// wasn't there last time, then set this!
|
||||
// . if this is the first time spidering this doc then set it
|
||||
// to zero so that m_minPubDate is set to -1 when the outlink
|
||||
// defined by "ksr" is spidered.
|
||||
if ( m_oldDocValid && m_oldDoc ) {
|
||||
int32_t oldSpideredTime = m_oldDoc->getSpideredTime();
|
||||
ksr.m_parentPrevSpiderTime = oldSpideredTime;
|
||||
} else {
|
||||
ksr.m_parentPrevSpiderTime = 0;
|
||||
}
|
||||
|
||||
//
|
||||
// . inherit manual add bit if redirecting to simplified url
|
||||
// . so we always spider seed url even if prohibited by
|
||||
|
1
main.cpp
1
main.cpp
@ -3268,7 +3268,6 @@ static int32_t dumpSpiderdbCsv(const char *coll) {
|
||||
printf("%u,",spiderRequest->m_siteHash32);
|
||||
printf("%d,",spiderRequest->m_siteNumInlinks);
|
||||
printf("%d,",spiderRequest->m_addedTime);
|
||||
printf("%d,",spiderRequest->m_parentPrevSpiderTime);
|
||||
printf("%d,",spiderRequest->m_pageNumInlinks);
|
||||
printf("%d,",spiderRequest->m_sameErrCount);
|
||||
printf("%d,",spiderRequest->m_version);
|
||||
|
Reference in New Issue
Block a user