mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-16 02:46:08 -04:00
A bit more trace log added
This commit is contained in:
28
Dns.cpp
28
Dns.cpp
@ -2458,7 +2458,7 @@ key_t Dns::getKey ( char *hostname , int32_t hostnameLen ) {
|
||||
// . returns -1 if not host available to send request to
|
||||
Host *Dns::getResponsibleHost ( key_t key )
|
||||
{
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: BEGIN", __FILE__,__FUNCTION__);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: BEGIN", __FILE__,__func__, __LINE__);
|
||||
|
||||
// just keep this on this cluster now
|
||||
Hostdb *hostdb = &g_hostdb;
|
||||
@ -2468,9 +2468,9 @@ Host *Dns::getResponsibleHost ( key_t key )
|
||||
|
||||
if( g_conf.m_logDebugDetailed )
|
||||
{
|
||||
log("%s:%s: numHosts: %"UINT32"", __FILE__,__FUNCTION__,hostdb->getNumHosts());
|
||||
log("%s:%s: key.n1: %"UINT32"", __FILE__,__FUNCTION__,key.n1);
|
||||
log("%s:%s: hostId: %"UINT32"", __FILE__,__FUNCTION__,hostId);
|
||||
log("%s:%s:%d: numHosts: %"UINT32"", __FILE__,__func__, __LINE__, hostdb->getNumHosts());
|
||||
log("%s:%s:%d: key.n1: %"UINT32"", __FILE__,__func__, __LINE__, key.n1);
|
||||
log("%s:%s:%d: hostId: %"UINT32"", __FILE__,__func__, __LINE__, hostId);
|
||||
}
|
||||
|
||||
|
||||
@ -2479,7 +2479,7 @@ Host *Dns::getResponsibleHost ( key_t key )
|
||||
|
||||
if ( h->m_spiderEnabled && ! hostdb->isDead ( hostId ) )
|
||||
{
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: END. Spidering enabled and not dead. Returning.", __FILE__,__FUNCTION__);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: END. Spidering enabled and not dead. Returning.", __FILE__,__func__, __LINE__);
|
||||
return h;
|
||||
}
|
||||
|
||||
@ -2488,13 +2488,13 @@ Host *Dns::getResponsibleHost ( key_t key )
|
||||
// how many are up?
|
||||
int32_t numAlive = hostdb->getNumHostsAlive();
|
||||
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: Above is dead. numAlive: %"UINT32"", __FILE__,__FUNCTION__, numAlive);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: Above is dead. numAlive: %"UINT32"", __FILE__,__func__, __LINE__, numAlive);
|
||||
|
||||
|
||||
// NULL if none
|
||||
if ( numAlive == 0 )
|
||||
{
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: None alive. return NULL", __FILE__,__FUNCTION__);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: None alive. return NULL", __FILE__,__func__, __LINE__);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -2503,8 +2503,8 @@ Host *Dns::getResponsibleHost ( key_t key )
|
||||
|
||||
if( g_conf.m_logDebugDetailed )
|
||||
{
|
||||
log("%s:%s: hostNum: %"INT32"", __FILE__,__FUNCTION__, hostNum);
|
||||
log("%s:%s: m_numHosts: %"INT32"", __FILE__,__FUNCTION__, hostdb->m_numHosts);
|
||||
log("%s:%s:%d: hostNum: %"INT32"", __FILE__,__func__, __LINE__, hostNum);
|
||||
log("%s:%s:%d: m_numHosts: %"INT32"", __FILE__,__func__, __LINE__, hostdb->m_numHosts);
|
||||
}
|
||||
|
||||
|
||||
@ -2517,34 +2517,34 @@ Host *Dns::getResponsibleHost ( key_t key )
|
||||
Host *host = &hostdb->m_hosts[i];
|
||||
if ( !host->m_spiderEnabled )
|
||||
{
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: i: %"INT32" - spidering disabled", __FILE__,__FUNCTION__, i);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: i: %"INT32" - spidering disabled", __FILE__,__func__, __LINE__, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip him if he is dead
|
||||
if ( hostdb->isDead ( host ) )
|
||||
{
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: i: %"INT32" - dead", __FILE__,__FUNCTION__, i);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: i: %"INT32" - dead", __FILE__,__func__, __LINE__, i);
|
||||
continue;
|
||||
}
|
||||
|
||||
// count it if alive, continue if not our number
|
||||
if ( count++ != hostNum )
|
||||
{
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: i: %"INT32" - not our host (%"INT32")", __FILE__,__FUNCTION__, i, hostNum);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: i: %"INT32" - not our host (%"INT32")", __FILE__,__func__, __LINE__, i, hostNum);
|
||||
|
||||
continue;
|
||||
}
|
||||
// we got a match, we cannot use hostNum as the hostId now
|
||||
// because the host with that hostId might be dead
|
||||
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: END. i: %"INT32" - Match!", __FILE__,__FUNCTION__, i);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: END. i: %"INT32" - Match!", __FILE__,__func__, __LINE__, i);
|
||||
|
||||
return host;
|
||||
}
|
||||
|
||||
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s: END. Return EHOSTDEAD. None found", __FILE__,__FUNCTION__);
|
||||
if( g_conf.m_logDebugDetailed ) log("%s:%s:%d: END. Return EHOSTDEAD. None found", __FILE__,__func__, __LINE__ );
|
||||
|
||||
g_errno = EHOSTDEAD;
|
||||
return NULL;
|
||||
|
@ -980,6 +980,9 @@ void handleRequest7Import ( UdpSlot *slot , int32_t netnice ) {
|
||||
// hack this
|
||||
xd->m_slot = slot;
|
||||
// then index it
|
||||
|
||||
if( g_conf.m_logDebugDetailed ) log(LOG_TRACE,"%s:%s:%d: Calling XmlDoc->indexDoc", __FILE__, __func__, __LINE__);
|
||||
|
||||
if ( ! xd->indexDoc() )
|
||||
// return if would block
|
||||
return;
|
||||
|
@ -2037,6 +2037,8 @@ bool SpiderLoop::spiderUrl9 ( SpiderRequest *sreq ,
|
||||
|
||||
bool SpiderLoop::spiderUrl2 ( ) {
|
||||
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: BEGIN", __FILE__, __func__, __LINE__);
|
||||
|
||||
// sanity check
|
||||
//if ( ! m_sreq->m_doled ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
@ -2067,6 +2069,8 @@ bool SpiderLoop::spiderUrl2 ( ) {
|
||||
log("build: Could not allocate %"INT32" bytes to spider "
|
||||
"the url %s. Will retry later.",
|
||||
(int32_t)sizeof(XmlDoc), m_sreq->m_url );
|
||||
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: END, new XmlDoc failed", __FILE__, __func__, __LINE__);
|
||||
return true;
|
||||
}
|
||||
// register it's mem usage with Mem.cpp class
|
||||
@ -2128,6 +2132,7 @@ bool SpiderLoop::spiderUrl2 ( ) {
|
||||
delete (m_docs[i]);
|
||||
m_docs[i] = NULL;
|
||||
// error, g_errno should be set!
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: END, xd->set4 returned false", __FILE__, __func__, __LINE__);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2198,7 +2203,9 @@ bool SpiderLoop::spiderUrl2 ( ) {
|
||||
|
||||
// . return if this blocked
|
||||
// . no, launch another spider!
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: calling xd->indexDoc", __FILE__, __func__, __LINE__);
|
||||
bool status = xd->indexDoc();
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: indexDoc status [%s]", __FILE__, __func__, __LINE__, status?"true":"false");
|
||||
|
||||
// . reset the next doledbkey to start over!
|
||||
// . when spiderDoledUrls() see this negative priority it will
|
||||
@ -2210,12 +2217,17 @@ bool SpiderLoop::spiderUrl2 ( ) {
|
||||
//m_sc->setPriority ( MAX_SPIDER_PRIORITIES - 1 );
|
||||
|
||||
// if we were injecting and it blocked... return false
|
||||
if ( ! status ) return false;
|
||||
if ( ! status )
|
||||
{
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: END, indexDoc blocked", __FILE__, __func__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
|
||||
// deal with this error
|
||||
indexedDoc ( xd );
|
||||
|
||||
// "callback" will not be called cuz it should be NULL
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: END, return true", __FILE__, __func__, __LINE__);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2251,7 +2263,8 @@ void indexedDocWrapper ( void *state ) {
|
||||
// . returns false if blocked, true otherwise
|
||||
// . sets g_errno on error
|
||||
bool SpiderLoop::indexedDoc ( XmlDoc *xd ) {
|
||||
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: BEGIN", __FILE__, __func__, __LINE__);
|
||||
|
||||
// save the error in case a call changes it below
|
||||
//int32_t saved = g_errno;
|
||||
|
||||
@ -2463,6 +2476,7 @@ bool SpiderLoop::indexedDoc ( XmlDoc *xd ) {
|
||||
//if ( ! removeAllLocks () ) return false;
|
||||
|
||||
// we did not block, so return true
|
||||
if( g_conf.m_logTraceSpider ) log(LOG_TRACE,"%s:%s:%d: END", __FILE__, __func__, __LINE__);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
55
XmlDoc.cpp
55
XmlDoc.cpp
@ -1674,14 +1674,25 @@ void XmlDoc::setCallback ( void *state, bool (*callback) (void *state) ) {
|
||||
m_callback2 = callback;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void indexDocWrapper ( void *state ) {
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: BEGIN", __FILE__, __func__, __LINE__);
|
||||
|
||||
XmlDoc *THIS = (XmlDoc *)state;
|
||||
// make sure has not been freed from under us!
|
||||
if ( THIS->m_freed ) { char *xx=NULL;*xx=0;}
|
||||
// note it
|
||||
THIS->setStatus ( "in index doc wrapper" );
|
||||
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: Calling XmlDoc::indexDoc", __FILE__, __func__, __LINE__);
|
||||
// return if it blocked
|
||||
if ( ! THIS->indexDoc( ) ) return;
|
||||
if ( ! THIS->indexDoc( ) )
|
||||
{
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, indexDoc blocked", __FILE__, __func__, __LINE__);
|
||||
return;
|
||||
}
|
||||
|
||||
// otherwise, all done, call the caller callback
|
||||
|
||||
// g_statsdb.addStat ( MAX_NICENESS,
|
||||
@ -1691,15 +1702,35 @@ void indexDocWrapper ( void *state ) {
|
||||
// );
|
||||
|
||||
|
||||
if ( THIS->m_callback1 ) THIS->m_callback1 ( THIS->m_state );
|
||||
else THIS->m_callback2 ( THIS->m_state );
|
||||
if ( THIS->m_callback1 )
|
||||
{
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: Calling callback1", __FILE__, __func__, __LINE__);
|
||||
|
||||
THIS->m_callback1 ( THIS->m_state );
|
||||
}
|
||||
else
|
||||
{
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: Calling callback2", __FILE__, __func__, __LINE__);
|
||||
|
||||
THIS->m_callback2 ( THIS->m_state );
|
||||
}
|
||||
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END", __FILE__, __func__, __LINE__);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// for registerSleepCallback
|
||||
void indexDocWrapper2 ( int fd , void *state ) {
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: BEGIN", __FILE__, __func__, __LINE__);
|
||||
|
||||
indexDocWrapper ( state );
|
||||
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END", __FILE__, __func__, __LINE__);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// . inject from http request
|
||||
// . replace more of Msg7.cpp logic with this?
|
||||
//bool XmlDoc::injectDoc ( HttpRequest *hr ) {
|
||||
@ -1999,6 +2030,9 @@ void XmlDoc::getRebuiltSpiderRequest ( SpiderRequest *sreq ) {
|
||||
}
|
||||
|
||||
|
||||
|
||||
#include <execinfo.h>
|
||||
|
||||
////////////////////////////////////////////////////////////////////
|
||||
// THIS IS THE HEART OF HOW THE PARSER ADDS TO THE RDBS
|
||||
////////////////////////////////////////////////////////////////////
|
||||
@ -2573,6 +2607,7 @@ bool XmlDoc::indexDoc2 ( ) {
|
||||
// HACK: flush it if we are injecting it in case the next thing we
|
||||
// spider is dependent on this one
|
||||
if ( flush ) {
|
||||
if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: Flushing msg4 buffers", __FILE__, __func__, __LINE__);
|
||||
// note it
|
||||
setStatus ( "flushing msg4" );
|
||||
// only do it once
|
||||
@ -15920,6 +15955,8 @@ int32_t *XmlDoc::getSpiderPriority ( ) {
|
||||
return &m_priority;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool XmlDoc::logIt (SafeBuf *bb ) {
|
||||
|
||||
// set errCode
|
||||
@ -17968,6 +18005,16 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
||||
if ( m_indexCode == EDOCNONCANONICAL )
|
||||
spideringLinks = true;
|
||||
|
||||
|
||||
|
||||
//@@@ BR TEST. Awaiting Matt's sanity check
|
||||
//if( !m_setFromTitleRec )
|
||||
//{
|
||||
|
||||
//if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: m_setFromTitleRec: %s", __FILE__, __func__, __LINE__, m_setFromTitleRec?"true":"false");
|
||||
//if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: spideringLinks...: %s", __FILE__, __func__, __LINE__, spideringLinks?"true":"false");
|
||||
|
||||
|
||||
//
|
||||
// . prepare the outlink info if we are adding links to spiderdb!
|
||||
// . do this before we start hashing so we do not block and re-hash!!
|
||||
@ -17999,6 +18046,8 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
||||
//if ( ! ipi || ipi == (char *)-1 ) return (char *)ipi;
|
||||
}
|
||||
|
||||
//}
|
||||
|
||||
// get the tag buf to add to tagdb
|
||||
SafeBuf *ntb = NULL;
|
||||
if ( m_useTagdb && ! m_deleteFromIndex ) {
|
||||
|
Reference in New Issue
Block a user