clean up logging so i can see what's going on
This commit is contained in:
parent
db74af766b
commit
76bb3d05e1
@ -37,7 +37,7 @@ Collectiondb::Collectiondb ( ) {
|
||||
|
||||
// reset rdb
|
||||
void Collectiondb::reset() {
|
||||
log("db: resetting collectiondb.");
|
||||
log(LOG_INFO,"db: resetting collectiondb.");
|
||||
for ( long i = 0 ; i < m_numRecs ; i++ ) {
|
||||
if ( ! m_recs[i] ) continue;
|
||||
mdelete ( m_recs[i], sizeof(CollectionRec), "CollectionRec" );
|
||||
@ -96,7 +96,7 @@ bool Collectiondb::load ( bool isDump ) {
|
||||
if ( ! d.open ()) return log("admin: Could not load collection config "
|
||||
"files.");
|
||||
// note it
|
||||
log(LOG_INIT,"admin: Loading collection config files.");
|
||||
log(LOG_INFO,"db: Loading collection config files.");
|
||||
// . scan through all subdirs in the collections dir
|
||||
// . they should be like, "coll.main/" and "coll.mycollection/"
|
||||
char *f;
|
||||
@ -118,7 +118,7 @@ bool Collectiondb::load ( bool isDump ) {
|
||||
return false;
|
||||
}
|
||||
// note it
|
||||
log(LOG_INIT,"admin: Loaded data for %li collections. Ranging from "
|
||||
log(LOG_INFO,"db: Loaded data for %li collections. Ranging from "
|
||||
"collection #0 to #%li.",m_numRecsUsed,m_numRecs-1);
|
||||
// update the time
|
||||
updateTime();
|
||||
@ -570,7 +570,7 @@ bool Collectiondb::registerCollRec ( CollectionRec *cr ,
|
||||
if ( ! g_doledb.addColl ( coll, verify ) ) goto hadError;
|
||||
|
||||
// debug message
|
||||
log ( LOG_INFO, "admin: verified collection \"%s\" (%li).",
|
||||
log ( LOG_INFO, "db: verified collection \"%s\" (%li).",
|
||||
coll,(long)i);
|
||||
|
||||
// tell SpiderCache about this collection, it will create a
|
||||
@ -703,7 +703,7 @@ bool Collectiondb::deleteRec2 ( collnum_t collnum , WaitEntry *we ) {
|
||||
char *coll = cr->m_coll;
|
||||
|
||||
// note it
|
||||
log("coll: deleting coll \"%s\"",coll);
|
||||
log(LOG_INFO,"db: deleting coll \"%s\"",coll);
|
||||
// we need a save
|
||||
m_needsSave = true;
|
||||
|
||||
@ -1437,7 +1437,7 @@ bool CollectionRec::load ( char *coll , long i ) {
|
||||
// LOAD LOCAL
|
||||
sprintf ( tmp1 , "%scoll.%s.%li/localcrawlinfo.dat",
|
||||
g_hostdb.m_dir , m_coll , (long)m_collnum );
|
||||
log("coll: loading %s",tmp1);
|
||||
log(LOG_INFO,"db: loading %s",tmp1);
|
||||
m_localCrawlInfo.reset();
|
||||
SafeBuf sb;
|
||||
// fillfromfile returns 0 if does not exist, -1 on read error
|
||||
@ -1448,7 +1448,7 @@ bool CollectionRec::load ( char *coll , long i ) {
|
||||
// LOAD GLOBAL
|
||||
sprintf ( tmp1 , "%scoll.%s.%li/globalcrawlinfo.dat",
|
||||
g_hostdb.m_dir , m_coll , (long)m_collnum );
|
||||
log("coll: loading %s",tmp1);
|
||||
log(LOG_INFO,"db: loading %s",tmp1);
|
||||
m_globalCrawlInfo.reset();
|
||||
sb.reset();
|
||||
if ( sb.fillFromFile ( tmp1 ) > 0 )
|
||||
@ -1694,7 +1694,7 @@ bool CollectionRec::save ( ) {
|
||||
// binary now
|
||||
sb.safeMemcpy ( &m_localCrawlInfo , sizeof(CrawlInfo) );
|
||||
if ( sb.dumpToFile ( tmp ) == -1 ) {
|
||||
log("coll: failed to save file %s : %s",
|
||||
log("db: failed to save file %s : %s",
|
||||
tmp,mstrerror(g_errno));
|
||||
g_errno = 0;
|
||||
}
|
||||
@ -1707,7 +1707,7 @@ bool CollectionRec::save ( ) {
|
||||
// binary now
|
||||
sb.safeMemcpy ( &m_globalCrawlInfo , sizeof(CrawlInfo) );
|
||||
if ( sb.dumpToFile ( tmp ) == -1 ) {
|
||||
log("coll: failed to save file %s : %s",
|
||||
log("db: failed to save file %s : %s",
|
||||
tmp,mstrerror(g_errno));
|
||||
g_errno = 0;
|
||||
}
|
||||
|
2
Conf.cpp
2
Conf.cpp
@ -328,7 +328,7 @@ void Conf::setRootIps ( ) {
|
||||
for ( long i = 0 ; i < n ; i++ ) {
|
||||
m_rnsIps [i] = atoip(rootIps[i],gbstrlen(rootIps[i]));
|
||||
m_rnsPorts[i] = 53;
|
||||
log("dns: Using root nameserver #%li %s.",
|
||||
log(LOG_INIT,"dns: Using root nameserver #%li %s.",
|
||||
i,iptoa(m_rnsIps[i]));
|
||||
}
|
||||
}
|
||||
|
2
File.cpp
2
File.cpp
@ -699,7 +699,7 @@ bool File::unlink ( ) {
|
||||
// return false and set g_errno on error
|
||||
if ( status < 0 ) return false;
|
||||
// log it so we can see what happened to timedb!
|
||||
log("disk: unlinking %s", m_filename );
|
||||
log(LOG_INFO,"disk: unlinking %s", m_filename );
|
||||
// remove ourselves from the disk
|
||||
if ( ::unlink ( m_filename ) == 0 ) return true;
|
||||
// sync it to disk in case power goes out
|
||||
|
5
Json.cpp
5
Json.cpp
@ -346,10 +346,13 @@ void Json::test ( ) {
|
||||
|
||||
|
||||
long niceness = 0;
|
||||
|
||||
JsonItem *ji = parseJsonStringIntoJsonItems ( json , niceness );
|
||||
|
||||
// print them out?
|
||||
log("json: type0=%li",(long)ji->m_type);
|
||||
//log("json: type0=%li",(long)ji->m_type);
|
||||
// sanity test
|
||||
if ( ji->m_type != 6 ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
return;
|
||||
}
|
||||
|
1563
Make.depend
1563
Make.depend
File diff suppressed because it is too large
Load Diff
@ -17156,7 +17156,7 @@ bool sendPageSiteMap ( TcpSocket *s , HttpRequest *r ) {
|
||||
#include "HashTable.h"
|
||||
#include "Msg4.h"
|
||||
#include "AutoBan.h"
|
||||
#include "CollectionRec.h"
|
||||
//#include "CollectionRec.h"
|
||||
//#include "Links.h"
|
||||
#include "Users.h"
|
||||
#include "HashTableT.h"
|
||||
|
10
Parms.cpp
10
Parms.cpp
@ -3700,13 +3700,14 @@ bool Parms::setFromFile ( void *THIS ,
|
||||
// . all the collectionRecs have the same default file in
|
||||
// the workingDir/collections/default.conf
|
||||
// . so use our built in buffer for that
|
||||
/*
|
||||
if ( THIS != &g_conf && ! m_isDefaultLoaded ) {
|
||||
m_isDefaultLoaded = true;
|
||||
File f;
|
||||
f.set ( filenameDef );
|
||||
if ( ! f.doesExist() ) {
|
||||
log(LOG_INIT,
|
||||
"admin: Default collection configuration file "
|
||||
"db: Default collection configuration file "
|
||||
"%s was not found. Newly created collections "
|
||||
"will use hard coded defaults.",f.getFilename());
|
||||
goto skip;
|
||||
@ -3718,6 +3719,7 @@ bool Parms::setFromFile ( void *THIS ,
|
||||
}
|
||||
|
||||
skip:
|
||||
*/
|
||||
long vlen;
|
||||
char *v ;
|
||||
//char c ;
|
||||
@ -3928,9 +3930,9 @@ bool Parms::setFromFile ( void *THIS ,
|
||||
|
||||
// always make sure we got some admin security
|
||||
if ( g_conf.m_numMasterIps <= 0 && g_conf.m_numMasterPwds <= 0 ) {
|
||||
log(LOG_INFO,
|
||||
"conf: No master IP or password provided. Using default "
|
||||
"password 'footbar23'." );
|
||||
//log(LOG_INFO,
|
||||
// "conf: No master IP or password provided. Using default "
|
||||
// "password 'footbar23'." );
|
||||
//g_conf.m_masterIps[0] = atoip ( "64.139.94.202", 13 );
|
||||
//g_conf.m_numMasterIps = 1;
|
||||
strcpy ( g_conf.m_masterPwds[0] , "footbar23" );
|
||||
|
2
Rdb.cpp
2
Rdb.cpp
@ -100,7 +100,7 @@ void Rdb::addBase ( collnum_t collnum , RdbBase *base ) {
|
||||
if ( ! cr ) return;
|
||||
if ( cr->m_bases[(unsigned char)m_rdbId] ) { char *xx=NULL;*xx=0; }
|
||||
cr->m_bases[(unsigned char)m_rdbId] = base;
|
||||
log("rdb: added base to collrec "
|
||||
log ( LOG_INFO,"db: added base to collrec "
|
||||
"for rdb=%s rdbid=%li coll=%s collnum=%li base=0x%lx",
|
||||
m_dbname,(long)m_rdbId,cr->m_coll,(long)collnum,(long)base);
|
||||
}
|
||||
|
@ -132,8 +132,9 @@ bool RdbBase::init ( char *dir ,
|
||||
char tmp[1024];
|
||||
sprintf ( tmp , "%scoll.%s.%li" , dir , coll , (long)collnum );
|
||||
|
||||
// debug
|
||||
log("base: adding new base for dir=%s coll=%s collnum=%li db=%s",
|
||||
// logDebugAdmin
|
||||
log(LOG_INIT,"db: "
|
||||
"adding new base for dir=%s coll=%s collnum=%li db=%s",
|
||||
dir,coll,(long)collnum,dbname);
|
||||
|
||||
// catdb is collection independent
|
||||
@ -502,7 +503,7 @@ bool RdbBase::setFiles ( ) {
|
||||
// we are getting this from a bogus m_dir
|
||||
return log("db: Had error opening directory %s", getDir());
|
||||
// note it
|
||||
logf(LOG_INFO,"db: Loading files for %s coll=%s (%li).",
|
||||
log(LOG_DEBUG,"db: Loading files for %s coll=%s (%li).",
|
||||
m_dbname,m_coll,(long)m_collnum );
|
||||
// . set our m_files array
|
||||
// . addFile() will return -1 and set g_errno on error
|
||||
|
@ -1021,7 +1021,7 @@ bool Speller::loadUnifiedDict() {
|
||||
char *end = start + m_unifiedBuf.length();
|
||||
for ( char *p = start ; p < end ; p++ )
|
||||
if ( *p == '\n' ) *p = '\0';
|
||||
log("speller: done loading successfully");
|
||||
log(LOG_DEBUG,"speller: done loading successfully");
|
||||
|
||||
// a quick little checksum
|
||||
if ( ! g_conf.m_isLive ) return true;
|
||||
|
25
Spider.cpp
25
Spider.cpp
@ -915,7 +915,7 @@ bool SpiderCache::needsSave ( ) {
|
||||
}
|
||||
|
||||
void SpiderCache::reset ( ) {
|
||||
log("spider: resetting spidercache");
|
||||
log(LOG_DEBUG,"spider: resetting spidercache");
|
||||
// loop over all SpiderColls and get the best
|
||||
for ( long i = 0 ; i < g_collectiondb.getNumRecs() ; i++ ) {
|
||||
SpiderColl *sc = getSpiderCollIffNonNull(i);
|
||||
@ -970,7 +970,7 @@ SpiderColl *SpiderCache::getSpiderColl ( collnum_t collnum ) {
|
||||
//m_spiderColls [ collnum ] = sc;
|
||||
cr->m_spiderColl = sc;
|
||||
// note it
|
||||
log("spider: made spidercoll=%lx for cr=%lx",
|
||||
log(LOG_DEBUG,"spider: made spidercoll=%lx for cr=%lx",
|
||||
(long)sc,(long)cr);
|
||||
// update this
|
||||
//if ( m_numSpiderColls < collnum + 1 )
|
||||
@ -992,7 +992,8 @@ SpiderColl *SpiderCache::getSpiderColl ( collnum_t collnum ) {
|
||||
// sanity check
|
||||
if ( ! cr ) { char *xx=NULL;*xx=0; }
|
||||
// note it!
|
||||
log("spider: adding new spider collection for %s",cr->m_coll);
|
||||
log(LOG_DEBUG,"spider: adding new spider collection for %s",
|
||||
cr->m_coll);
|
||||
// that was it
|
||||
return sc;
|
||||
}
|
||||
@ -1130,7 +1131,7 @@ bool SpiderColl::load ( ) {
|
||||
// this should block since we are at startup...
|
||||
bool SpiderColl::makeDoleIPTable ( ) {
|
||||
|
||||
log("spider: making dole ip table for %s",m_coll);
|
||||
log(LOG_DEBUG,"spider: making dole ip table for %s",m_coll);
|
||||
|
||||
key_t startKey ; startKey.setMin();
|
||||
key_t endKey ; endKey.setMax();
|
||||
@ -1203,7 +1204,7 @@ bool SpiderColl::makeDoleIPTable ( ) {
|
||||
// watch out for wrap around
|
||||
if ( startKey >= *(key_t *)list.getLastKey() ) goto loop;
|
||||
done:
|
||||
log("spider: making dole ip table done.");
|
||||
log(LOG_DEBUG,"spider: making dole ip table done.");
|
||||
// re-enable threads
|
||||
if ( enabled ) g_threads.enableThreads();
|
||||
// we wrapped, all done
|
||||
@ -1317,7 +1318,8 @@ void SpiderColl::urlFiltersChanged ( ) {
|
||||
|
||||
// this one has to scan all of spiderdb
|
||||
bool SpiderColl::makeWaitingTree ( ) {
|
||||
log("spider: making waiting tree for %s",m_coll);
|
||||
|
||||
log(LOG_DEBUG,"spider: making waiting tree for %s",m_coll);
|
||||
|
||||
key128_t startKey ; startKey.setMin();
|
||||
key128_t endKey ; endKey.setMax();
|
||||
@ -1408,7 +1410,7 @@ bool SpiderColl::makeWaitingTree ( ) {
|
||||
// watch out for wrap around
|
||||
if ( startKey >= *(key128_t *)list.getLastKey() ) goto loop;
|
||||
done:
|
||||
log("spider: making waiting tree done.");
|
||||
log(LOG_DEBUG,"spider: making waiting tree done.");
|
||||
// re-enable threads
|
||||
if ( enabled ) g_threads.enableThreads();
|
||||
// we wrapped, all done
|
||||
@ -1444,7 +1446,7 @@ long long SpiderColl::getEarliestSpiderTimeFromWaitingTree ( long firstIp ) {
|
||||
|
||||
|
||||
bool SpiderColl::makeWaitingTable ( ) {
|
||||
logf(LOG_INFO,"spider: making waiting table for %s.",m_coll);
|
||||
log(LOG_DEBUG,"spider: making waiting table for %s.",m_coll);
|
||||
long node = m_waitingTree.getFirstNode();
|
||||
for ( ; node >= 0 ; node = m_waitingTree.getNextNode(node) ) {
|
||||
// breathe
|
||||
@ -1460,7 +1462,7 @@ bool SpiderColl::makeWaitingTable ( ) {
|
||||
// store in waiting table
|
||||
if ( ! m_waitingTable.addKey(&ip,&spiderTimeMS) ) return false;
|
||||
}
|
||||
logf(LOG_INFO,"spider: making waiting table done.");
|
||||
log(LOG_DEBUG,"spider: making waiting table done.");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -1536,7 +1538,7 @@ void SpiderColl::reset ( ) {
|
||||
|
||||
char *coll = "unknown";
|
||||
if ( m_coll[0] ) coll = m_coll;
|
||||
logf(LOG_DEBUG,"spider: resetting spider cache coll=%s",coll);
|
||||
log(LOG_DEBUG,"spider: resetting spider cache coll=%s",coll);
|
||||
|
||||
m_ufnMapValid = false;
|
||||
|
||||
@ -4221,7 +4223,8 @@ void doneSleepingWrapperSL ( int fd , void *state ) {
|
||||
// if a scan is ongoing, this will re-set it
|
||||
sc->m_nextKey2.setMin();
|
||||
sc->m_waitingTreeNeedsRebuild = true;
|
||||
log("spider: hit rebuild timeout for %s",
|
||||
log(LOG_INFO,
|
||||
"spider: hit rebuild timeout for %s",
|
||||
cr->m_coll);
|
||||
// flush the ufn table
|
||||
clearUfnTable();
|
||||
|
@ -1873,7 +1873,7 @@ bool Tagdb::verify ( char *coll ) {
|
||||
char *rdbName = NULL;
|
||||
rdbName = "Tagdb";
|
||||
|
||||
log ( LOG_INFO, "tagdb: Verifying %s for coll %s...", rdbName, coll );
|
||||
log ( LOG_INFO, "db: Verifying %s for coll %s...", rdbName, coll );
|
||||
|
||||
g_threads.disableThreads();
|
||||
|
||||
@ -1945,7 +1945,7 @@ bool Tagdb::verify ( char *coll ) {
|
||||
g_threads.enableThreads();
|
||||
return g_conf.m_bypassValidation;
|
||||
}
|
||||
log ( LOG_INFO, "tagdb: %s passed verification successfully for %li "
|
||||
log ( LOG_INFO, "db: %s passed verification successfully for %li "
|
||||
"recs.",rdbName, count );
|
||||
|
||||
// turn threads back on
|
||||
|
@ -183,13 +183,15 @@ bool Threads::init ( ) {
|
||||
// set s_pid to the main process id
|
||||
#ifdef PTHREADS
|
||||
s_pid = pthread_self();
|
||||
log("threads: main process THREAD id = %lu",(long unsigned)s_pid);
|
||||
log(LOG_INFO,
|
||||
"threads: main process THREAD id = %lu",(long unsigned)s_pid);
|
||||
pthread_t tid = pthread_self();
|
||||
sched_param param;
|
||||
int policy;
|
||||
// scheduling parameters of target thread
|
||||
pthread_getschedparam ( tid, &policy, ¶m);
|
||||
log("threads: min/max thread priority settings = %li/%li (policy=%li)",
|
||||
log(LOG_INFO,
|
||||
"threads: min/max thread priority settings = %li/%li (policy=%li)",
|
||||
(long)sched_get_priority_min(policy),
|
||||
(long)sched_get_priority_max(policy),
|
||||
(long)policy);
|
||||
|
@ -84,9 +84,6 @@
|
||||
# The spider round number.
|
||||
<spiderRoundNum>0</>
|
||||
|
||||
# The spider status number.
|
||||
<spiderStatus>0</>
|
||||
|
||||
# Do searches for queries in this hosts part of the query log.
|
||||
<scrapingEnabledProcog>0</>
|
||||
|
||||
@ -354,34 +351,6 @@
|
||||
# <i>undefined</i> to indicate no change in the priority of the url.
|
||||
<priorityOfUrlsBeingRetried>-1</>
|
||||
|
||||
# Weight title this much more or less. This units are percentage. A 100 means
|
||||
# to not give the title any special weight. Generally, though, you want to
|
||||
# give it significantly more weight than that, so 2400 is the default.
|
||||
<titleWeight>4600</>
|
||||
|
||||
# Weight terms in header tags by this much more or less. This units are
|
||||
# percentage. A 100 means to not give the header any special weight.
|
||||
# Generally, though, you want to give it significantly more weight than that,
|
||||
# so 600 is the default.
|
||||
<headerWeight>600</>
|
||||
|
||||
# Weight text in url path this much more. The units are percentage. A 100
|
||||
# means to not give any special weight. Generally, though, you want to give it
|
||||
# significantly more weight than that, so 600 is the default.
|
||||
<urlPathWordWeight>1600</>
|
||||
|
||||
# Weight text in the incoming external link text this much more. The units are
|
||||
# percentage. It already receives a decent amount of weight naturally.
|
||||
<externalLinkTextWeight>600</>
|
||||
|
||||
# Weight text in the incoming internal link text this much more. The units are
|
||||
# percentage. It already receives a decent amount of weight naturally.
|
||||
<internalLinkTextWeight>200</>
|
||||
|
||||
# Weight concepts this much more. The units are percentage. It already
|
||||
# receives a decent amount of weight naturally. AKA: surrounding text boost.
|
||||
<conceptWeight>50</>
|
||||
|
||||
# If this is true Gigablast will only search the root index file for docIds.
|
||||
# Saves on disk seeks, but may use older versions of indexed web pages.
|
||||
<restrictIndexdbForQueries>0</>
|
||||
|
@ -55,7 +55,7 @@ num-mirrors: 0
|
||||
# The working directory is the last string on each line. That is where the
|
||||
# 'gb' binary resides.
|
||||
#
|
||||
0 5998 7000 8000 9000 127.0.0.1 127.0.0.1 /home/mwells/github/
|
||||
0 5998 7000 8000 9000 127.0.0.1 127.0.0.1 /home/mwells/parmdb/
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user