163 lines
5.0 KiB
C++
163 lines
5.0 KiB
C++
#include "Doledb.h"
|
|
#include "SpiderCache.h"
|
|
#include "SpiderLoop.h"
|
|
#include "SpiderColl.h"
|
|
#include "ScopedLock.h"
|
|
#include "Collectiondb.h"
|
|
#include "Conf.h"
|
|
#include "Loop.h"
|
|
|
|
|
|
Doledb g_doledb;
|
|
|
|
|
|
static void nukeAllDoledbsPeriodically(int, void *);
|
|
|
|
/////////////////////////
|
|
///////////////////////// DOLEDB
|
|
/////////////////////////
|
|
|
|
// reset rdb
|
|
void Doledb::reset() { m_rdb.reset(); }
|
|
|
|
bool Doledb::init ( ) {
|
|
if(g_conf.m_doledbNukeInterval>0) {
|
|
log(LOG_INFO,"spider: nuking Doledb periodically is enabled, interval = %d seconds", g_conf.m_doledbNukeInterval);
|
|
g_loop.registerSleepCallback(g_conf.m_doledbNukeInterval*1000, NULL, nukeAllDoledbsPeriodically, "nukeAllDoledbsPeriodically");
|
|
} else
|
|
log(LOG_INFO,"@spider: nuking Doledb periodically is disabled");
|
|
|
|
// . what's max # of tree nodes?
|
|
// . assume avg spider rec size (url) is about 45
|
|
// . 45 + 33 bytes overhead in tree is 78
|
|
// . use 5MB for the tree
|
|
int32_t maxTreeMem = 150000000; // 150MB
|
|
int32_t maxTreeNodes = maxTreeMem / 78;
|
|
|
|
// initialize our own internal rdb
|
|
return m_rdb.init ( "doledb" ,
|
|
-1 , // fixedDataSize
|
|
2 , // MinFilesToMerge
|
|
maxTreeMem ,
|
|
maxTreeNodes ,
|
|
false, // half keys?
|
|
12, // key size
|
|
false); //useIndexFile
|
|
}
|
|
|
|
//
|
|
// remove all recs from doledb for the given collection
|
|
//
|
|
static void nukeDoledbWrapper ( int fd , void *state ) {
|
|
g_loop.unregisterSleepCallback ( state , nukeDoledbWrapper );
|
|
collnum_t collnum = *(collnum_t *)state;
|
|
nukeDoledb ( collnum );
|
|
}
|
|
|
|
void nukeDoledb ( collnum_t collnum ) {
|
|
// in case we changed url filters for this collection #
|
|
g_spiderLoop.nukeWinnerListCache(collnum);
|
|
|
|
// . nuke doledb for this collnum
|
|
// . it will unlink the files and maps for doledb for this collnum
|
|
// . it will remove all recs of this collnum from its tree too
|
|
if (g_doledb.getRdb()->isSavingTree()) {
|
|
g_loop.registerSleepCallback(100, &collnum, nukeDoledbWrapper, "Doledb::nukeDoledbWrapper");
|
|
return;
|
|
}
|
|
|
|
// . ok, tree is not saving, it should complete entirely from this call
|
|
g_doledb.getRdb()->deleteAllRecs(collnum);
|
|
|
|
SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(collnum);
|
|
if ( sc ) {
|
|
// . make sure to nuke m_doledbIpTable as well
|
|
sc->clearDoledbIpTable();
|
|
// need to recompute this!
|
|
//sc->m_ufnMapValid = false;
|
|
|
|
{
|
|
ScopedLock sl(sc->m_waitingTree.getLock());
|
|
|
|
// log it
|
|
log("spider: rebuilding %s from doledb nuke", sc->getCollName());
|
|
// activate a scan if not already activated
|
|
sc->m_waitingTreeNeedsRebuild = true;
|
|
// if a scan is ongoing, this will re-set it
|
|
sc->resetWaitingTreeNextKey();
|
|
// clear it?
|
|
sc->m_waitingTree.clear_unlocked();
|
|
sc->clearWaitingTable();
|
|
}
|
|
// kick off the spiderdb scan to rep waiting tree and doledb
|
|
sc->populateWaitingTreeFromSpiderdb(false);
|
|
}
|
|
|
|
// note it
|
|
log("spider: finished nuking doledb for coll (%" PRId32")",
|
|
(int32_t)collnum);
|
|
}
|
|
|
|
|
|
static void nukeAllDoledbsWrapper(int /*fd*/, void *state) {
|
|
g_loop.unregisterSleepCallback(state, nukeAllDoledbsWrapper);
|
|
nukeAllDoledbs();
|
|
}
|
|
|
|
void nukeAllDoledbs() {
|
|
log(LOG_INFO,"spider: Beginning nuking all doledbs");
|
|
if (g_doledb.getRdb()->isSavingTree()) {
|
|
g_loop.registerSleepCallback(100, NULL, nukeAllDoledbsWrapper, "nukeAllDoledbsWrapper");
|
|
return;
|
|
}
|
|
|
|
for(collnum_t collnum=g_collectiondb.getFirstCollnum();
|
|
collnum<g_collectiondb.getNumRecs();
|
|
collnum++)
|
|
{
|
|
if(g_collectiondb.getRec(collnum)) {
|
|
// in case we changed url filters for this collection #
|
|
g_spiderLoop.nukeWinnerListCache(collnum);
|
|
|
|
// . nuke doledb for this collnum
|
|
// . it will unlink the files and maps for doledb for this collnum
|
|
// . it will remove all recs of this collnum from its tree too
|
|
g_doledb.getRdb()->deleteAllRecs(collnum);
|
|
|
|
SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(collnum);
|
|
if ( sc ) {
|
|
// . make sure to nuke m_doledbIpTable as well
|
|
sc->clearDoledbIpTable();
|
|
// need to recompute this!
|
|
//sc->m_ufnMapValid = false;
|
|
|
|
{
|
|
ScopedLock sl(sc->m_waitingTree.getLock());
|
|
|
|
// log it
|
|
log("spider: rebuilding %s from doledb nuke", sc->getCollName());
|
|
// activate a scan if not already activated
|
|
sc->m_waitingTreeNeedsRebuild = true;
|
|
// if a scan is ongoing, this will re-set it
|
|
sc->resetWaitingTreeNextKey();
|
|
// clear it?
|
|
sc->m_waitingTree.clear_unlocked();
|
|
sc->clearWaitingTable();
|
|
}
|
|
// kick off the spiderdb scan to rep waiting tree and doledb
|
|
sc->populateWaitingTreeFromSpiderdb(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
// note it
|
|
log(LOG_INFO,"spider: finished nuking all doledbs");
|
|
}
|
|
|
|
|
|
//Nuking doledb+waitingtree shouldn't really be necessary but the code handling spiderdb+doledb+doledbiptable+waitingtree+waitingtable+doledbiptable isn't 100% error-free
|
|
//so we sometimes end up with lost records and priority inversions. The easiest solution for this right now is to periodically nuke all doledbs.
|
|
static void nukeAllDoledbsPeriodically(int, void *) {
|
|
nukeAllDoledbs();
|
|
}
|