mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-11 02:16:07 -04:00
Merge branch 'master' into nomerge2
This commit is contained in:
78
Msge1.cpp
78
Msge1.cpp
@ -6,30 +6,30 @@
|
||||
|
||||
#include "Msge1.h"
|
||||
|
||||
Msge1::Msge1() {
|
||||
m_buf = NULL;
|
||||
m_numReplies = 0;
|
||||
|
||||
// Coverity
|
||||
m_coll = NULL;
|
||||
m_niceness = 0;
|
||||
m_urlPtrs = NULL;
|
||||
m_urlFlags = NULL;
|
||||
m_numUrls = 0;
|
||||
m_addTags = false;
|
||||
m_skipOldLinks = 0;
|
||||
m_bufSize = 0;
|
||||
m_ipErrors = NULL;
|
||||
m_numRequests = 0;
|
||||
m_n = 0;
|
||||
m_grv = NULL;
|
||||
m_state = NULL;
|
||||
m_callback = NULL;
|
||||
m_nowGlobal = 0;
|
||||
memset(m_ns, 0, sizeof(m_ns));
|
||||
memset(m_used, 0, sizeof(m_used));
|
||||
|
||||
reset();
|
||||
Msge1::Msge1()
|
||||
: m_niceness(0),
|
||||
m_urlPtrs(NULL),
|
||||
m_urlFlags(NULL),
|
||||
m_numUrls(0),
|
||||
m_addTags(false),
|
||||
m_buf(NULL),
|
||||
m_bufSize(0),
|
||||
m_ipBuf(NULL),
|
||||
m_ipErrors(NULL),
|
||||
m_numRequests(0),
|
||||
m_numReplies(0),
|
||||
m_n(0),
|
||||
m_msgCs(),
|
||||
m_grv(NULL),
|
||||
m_state(NULL),
|
||||
m_callback(NULL),
|
||||
m_nowGlobal(0),
|
||||
m_errno(0)
|
||||
{
|
||||
for(int i=0; i<MAX_OUTSTANDING_MSGE1; i++)
|
||||
m_ns[i] = 0;
|
||||
for(int i=0; i<MAX_OUTSTANDING_MSGE1; i++)
|
||||
m_used[i] = false;
|
||||
}
|
||||
|
||||
Msge1::~Msge1() {
|
||||
@ -38,12 +38,20 @@ Msge1::~Msge1() {
|
||||
|
||||
void Msge1::reset() {
|
||||
m_errno = 0;
|
||||
m_ipBuf = NULL;
|
||||
if ( m_buf ) {
|
||||
mfree(m_buf, m_bufSize, "Msge1buf");
|
||||
m_buf = NULL;
|
||||
}
|
||||
m_ipBuf = NULL;
|
||||
m_ipErrors = NULL;
|
||||
m_numRequests = 0;
|
||||
m_numReplies = 0;
|
||||
m_n = 0;
|
||||
|
||||
for(int i=0; i<MAX_OUTSTANDING_MSGE1; i++)
|
||||
m_ns[i] = 0;
|
||||
for(int i=0; i<MAX_OUTSTANDING_MSGE1; i++)
|
||||
m_used[i] = false;
|
||||
}
|
||||
|
||||
// . get various information for each url in a list of urls
|
||||
@ -51,12 +59,9 @@ void Msge1::reset() {
|
||||
// . used to be called getSiteRecs()
|
||||
// . you can pass in a list of docIds rather than urlPtrs
|
||||
bool Msge1::getFirstIps ( TagRec **grv ,
|
||||
char **urlPtrs ,
|
||||
linkflags_t *urlFlags ,//Links::m_linkFlags
|
||||
const char **urlPtrs,
|
||||
const linkflags_t *urlFlags,
|
||||
int32_t numUrls ,
|
||||
// if skipOldLinks && urlFlags[i]&LF_OLDLINK, skip it
|
||||
bool skipOldLinks ,
|
||||
char *coll ,
|
||||
int32_t niceness ,
|
||||
void *state ,
|
||||
void (*callback)(void *state) ,
|
||||
@ -72,8 +77,6 @@ bool Msge1::getFirstIps ( TagRec **grv ,
|
||||
m_urlPtrs = urlPtrs;
|
||||
m_urlFlags = urlFlags;
|
||||
m_numUrls = numUrls;
|
||||
m_skipOldLinks = skipOldLinks;
|
||||
m_coll = coll;
|
||||
m_niceness = niceness;
|
||||
m_state = state;
|
||||
m_callback = callback;
|
||||
@ -131,15 +134,6 @@ bool Msge1::launchRequests ( int32_t starti ) {
|
||||
if ( m_n >= m_numUrls ) return (m_numRequests == m_numReplies);
|
||||
// if we are maxed out, we basically blocked!
|
||||
if (m_numRequests - m_numReplies >= MAX_OUTSTANDING_MSGE1)return false;
|
||||
// . skip if "old"
|
||||
// . we are not planning on adding this to spiderdb, so Msg16
|
||||
// want to skip the ip lookup, etc.
|
||||
if ( m_urlFlags && (m_urlFlags[m_n] & LF_OLDLINK) && m_skipOldLinks ) {
|
||||
m_numRequests++;
|
||||
m_numReplies++;
|
||||
m_n++;
|
||||
goto loop;
|
||||
}
|
||||
|
||||
// grab the "firstip" from the tagRec if we can
|
||||
TagRec *gr = m_grv[m_n];
|
||||
@ -186,7 +180,7 @@ bool Msge1::launchRequests ( int32_t starti ) {
|
||||
|
||||
// . get the next url
|
||||
// . if m_xd is set, create the url from the ad id
|
||||
char *p = m_urlPtrs[m_n];
|
||||
const char *p = m_urlPtrs[m_n];
|
||||
|
||||
// if it is ip based that makes things easy
|
||||
int32_t hlen = 0;
|
||||
|
14
Msge1.h
14
Msge1.h
@ -28,12 +28,9 @@ public:
|
||||
// doling/throttling these urls/requests out to other hosts to
|
||||
// spider them. see Spider.h/.cpp for more info
|
||||
bool getFirstIps ( class TagRec **grv ,
|
||||
char **urlPtrs ,
|
||||
linkflags_t *urlFlags ,
|
||||
const char **urlPtrs,
|
||||
const linkflags_t *urlFlags,
|
||||
int32_t numUrls ,
|
||||
// if urlFlags[i]&LF_OLDLINK is true, skip it
|
||||
bool skipOldLinks ,
|
||||
char *coll ,
|
||||
int32_t niceness ,
|
||||
void *state ,
|
||||
void (*callback)(void *state) ,
|
||||
@ -52,16 +49,13 @@ private:
|
||||
bool addTag ( int32_t i );
|
||||
bool doneAddingTag ( int32_t i );
|
||||
|
||||
char *m_coll ;
|
||||
int32_t m_niceness ;
|
||||
|
||||
char **m_urlPtrs;
|
||||
linkflags_t *m_urlFlags;
|
||||
const char **m_urlPtrs;
|
||||
const linkflags_t *m_urlFlags;
|
||||
int32_t m_numUrls;
|
||||
bool m_addTags;
|
||||
|
||||
bool m_skipOldLinks;
|
||||
|
||||
// buffer to hold all the data we accumulate for all the urls in urlBuf
|
||||
char *m_buf;
|
||||
int32_t m_bufSize;
|
||||
|
@ -743,7 +743,8 @@ static bool CommandDiskDump(const char *rec) {
|
||||
g_spiderdb.getRdb()->dumpTree();
|
||||
g_posdb.getRdb()->dumpTree();
|
||||
g_titledb.getRdb()->dumpTree();
|
||||
g_statsdb.getRdb()->dumpTree();
|
||||
// disable statsdb from dump to disk command to prevent 0 byte file from being created
|
||||
//g_statsdb.getRdb()->dumpTree();
|
||||
g_linkdb.getRdb()->dumpTree();
|
||||
g_errno = 0;
|
||||
return true;
|
||||
|
@ -10859,11 +10859,9 @@ int32_t **XmlDoc::getOutlinkFirstIpVector () {
|
||||
// . this just dns looks up the DOMAINS of each outlink because these
|
||||
// are *first* ips and ONLY used by Spider.cpp for throttling!!!
|
||||
if ( ! m_msge1.getFirstIps ( *grv ,
|
||||
links->m_linkPtrs ,
|
||||
const_cast<const char**>(links->m_linkPtrs),
|
||||
links->m_linkFlags ,
|
||||
links->m_numLinks ,
|
||||
false , // skip old?
|
||||
cr->m_coll ,
|
||||
m_niceness ,
|
||||
m_masterState ,
|
||||
m_masterLoop ,
|
||||
|
Reference in New Issue
Block a user