update crawl info once per sec again

now that we only send if localCrawlInfo
has changed.
This commit is contained in:
Matt Wells
2014-06-25 12:55:10 -07:00
parent 3cf31ed230
commit 39fbb5b5b6
2 changed files with 14 additions and 2 deletions

@ -5178,8 +5178,10 @@ void SpiderLoop::startLoop ( ) {
// then try not to send crawlinfo the host should already have.
// each collrec can have a checksum for each host of the last
// info we sent it. but we should resend all every 100 secs anyway
// in case host when dead
if ( !g_loop.registerSleepCallback(4000,
// in case host when dead.
// now that we only send the info on startup and if changed,
// let's move back down to 1 second
if ( !g_loop.registerSleepCallback(1000,
this,
updateAllCrawlInfosSleepWrapper))
log("build: failed to register updatecrawlinfowrapper");

@ -14410,6 +14410,9 @@ char **XmlDoc::getHttpReply ( ) {
// otherwise, assume reply is valid
return &m_httpReply;
}
setStatus("getting http reply");
// come back up here if a redirect invalidates it
loop:
// sanity test -- only if not the test collection (NO, might be EBADIP)
@ -14497,6 +14500,8 @@ void gotHttpReplyWrapper ( void *state ) {
char **XmlDoc::getHttpReply2 ( ) {
if ( m_httpReplyValid ) return &m_httpReply;
setStatus("getting http reply2");
// get ip
long *ip = getIp();
if ( ! ip || ip == (long *)-1 ) return (char **)ip;
@ -15069,6 +15074,9 @@ int16_t *XmlDoc::getHttpStatus ( ) {
HttpMime *XmlDoc::getMime () {
if ( m_mimeValid ) return &m_mime;
// log debug
setStatus("getting http mime");
Url *cu = getCurrentUrl();
if ( ! cu || cu == (void *)-1) return (HttpMime *)cu;
@ -15267,6 +15275,8 @@ char getContentTypeFromContent ( char *p , long niceness ) {
uint8_t *XmlDoc::getContentType ( ) {
if ( m_contentTypeValid ) return &m_contentType;
// log debug
setStatus("getting content type");
// get the mime first
HttpMime *mime = getMime();
if ( ! mime || mime == (HttpMime *)-1 ) return (uint8_t *)mime;