2137 lines
62 KiB
C++
2137 lines
62 KiB
C++
#include "Rdb.h"
|
|
#include "Clusterdb.h"
|
|
#include "Hostdb.h"
|
|
#include "Tagdb.h"
|
|
#include "Posdb.h"
|
|
#include "Titledb.h"
|
|
#include "Repair.h"
|
|
#include "RdbMerge.h"
|
|
#include "Process.h"
|
|
#include "Sections.h"
|
|
#include "SpiderCache.h"
|
|
#include "SpiderColl.h"
|
|
#include "Doledb.h"
|
|
#include "Linkdb.h"
|
|
#include "Collectiondb.h"
|
|
#include "hash.h"
|
|
#include "Stats.h"
|
|
#include "GbMoveFile.h"
|
|
#include "ip.h"
|
|
#include "max_niceness.h"
|
|
#include "Conf.h"
|
|
#include "Mem.h"
|
|
#include "ScopedLock.h"
|
|
#include "Errno.h"
|
|
#include <sys/stat.h> //mdir()
|
|
#include <unistd.h>
|
|
|
|
|
|
Rdb::Rdb ( ) {
|
|
|
|
m_lastReclaim = -1;
|
|
|
|
m_cacheLastTime = 0;
|
|
m_cacheLastTotal = 0LL;
|
|
|
|
//m_numBases = 0;
|
|
m_initialized = false;
|
|
m_numMergesOut = 0;
|
|
|
|
// Coverity
|
|
m_fixedDataSize = 0;
|
|
m_dbnameLen = 0;
|
|
m_useIndexFile = false;
|
|
m_useTree = false;
|
|
m_minToMerge = 0;
|
|
m_dumpErrno = 0;
|
|
m_useHalfKeys = false;
|
|
m_niceness = false;
|
|
m_isDumping = false;
|
|
m_rdbId = RDB_NONE;
|
|
m_ks = 0;
|
|
m_pageSize = 0;
|
|
// PVS-Studio
|
|
memset(m_dbname, 0, sizeof(m_dbname));
|
|
memset(m_treeAllocName, 0, sizeof(m_treeAllocName));
|
|
memset(m_memAllocName, 0, sizeof(m_memAllocName));
|
|
|
|
reset();
|
|
}
|
|
|
|
void Rdb::reset ( ) {
|
|
// reset tree and cache
|
|
m_tree.reset();
|
|
m_buckets.reset();
|
|
m_mem.reset();
|
|
}
|
|
|
|
Rdb::~Rdb ( ) {
|
|
reset();
|
|
}
|
|
|
|
|
|
int32_t Rdb::getNumBases() const {
|
|
return g_collectiondb.getNumRecs();
|
|
}
|
|
|
|
RdbBase *Rdb::getBase ( collnum_t collnum ) {
|
|
CollectionRec *cr = g_collectiondb.getRec(collnum);
|
|
if ( ! cr ) return NULL;
|
|
// this might load the rdbbase on demand now
|
|
return cr->getBase ( m_rdbId ); // m_bases[(unsigned char)m_rdbId];
|
|
}
|
|
|
|
// used by Rdb::addBase1()
|
|
void Rdb::addBase ( collnum_t collnum , RdbBase *base ) {
|
|
CollectionRec *cr = g_collectiondb.getRec(collnum);
|
|
if ( ! cr ) return;
|
|
//if ( cr->m_bases[(unsigned char)m_rdbId] ) { g_process.shutdownAbort(true); }
|
|
RdbBase *oldBase = cr->getBase( m_rdbId );
|
|
if ( oldBase ) { g_process.shutdownAbort(true); }
|
|
cr->setBasePtr ( m_rdbId , base );
|
|
log ( LOG_DEBUG,"db: added base to collrec "
|
|
"for rdb=%s rdbid=%" PRId32" coll=%s collnum=%" PRId32" "
|
|
"base=%p",
|
|
m_dbname,(int32_t)m_rdbId,cr->m_coll,(int32_t)collnum,
|
|
base);
|
|
}
|
|
|
|
bool Rdb::init(const char *dbname,
|
|
int32_t fixedDataSize ,
|
|
int32_t minToMerge ,
|
|
int32_t maxTreeMem ,
|
|
int32_t maxTreeNodes ,
|
|
bool useHalfKeys ,
|
|
char keySize ,
|
|
bool useIndexFile ) {
|
|
// reset all
|
|
reset();
|
|
|
|
// save the dbname NULL terminated into m_dbname/m_dbnameLen
|
|
m_dbnameLen = strlen ( dbname );
|
|
memcpy ( m_dbname , dbname , m_dbnameLen );
|
|
m_dbname [ m_dbnameLen ] = '\0';
|
|
|
|
// store the other parameters for initializing each Rdb
|
|
m_fixedDataSize = fixedDataSize;
|
|
m_useHalfKeys = useHalfKeys;
|
|
m_ks = keySize;
|
|
m_useIndexFile = useIndexFile;
|
|
m_isDumping = false;
|
|
|
|
// set our id
|
|
m_rdbId = getIdFromRdb(this);
|
|
if (m_rdbId <= 0) {
|
|
log( LOG_LOGIC, "db: dbname of %s is invalid.", dbname );
|
|
return false;
|
|
}
|
|
|
|
// sanity check
|
|
if (m_ks != getKeySizeFromRdbId(m_rdbId)) {
|
|
g_process.shutdownAbort(true);
|
|
}
|
|
|
|
// get page size
|
|
switch(m_rdbId) {
|
|
case RDB_POSDB:
|
|
case RDB2_POSDB2:
|
|
case RDB_TITLEDB:
|
|
case RDB2_TITLEDB2:
|
|
case RDB_SPIDERDB_DEPRECATED:
|
|
case RDB_DOLEDB:
|
|
case RDB2_SPIDERDB2_DEPRECATED:
|
|
case RDB_LINKDB:
|
|
case RDB2_LINKDB2:
|
|
m_pageSize = GB_INDEXDB_PAGE_SIZE;
|
|
break;
|
|
// Not a real rdb: case RDB_SPIDERDB_SQLITE:
|
|
// Not a real rdb: case RDB2_SPIDERDB2_SQLITE:
|
|
default:
|
|
m_pageSize = GB_TFNDB_PAGE_SIZE;
|
|
}
|
|
|
|
// we can't merge more than MAX_RDB_FILES files at a time
|
|
if ( minToMerge > MAX_RDB_FILES ) minToMerge = MAX_RDB_FILES;
|
|
m_minToMerge = minToMerge;
|
|
|
|
m_useTree = true;
|
|
if ( m_rdbId == RDB_POSDB || m_rdbId == RDB2_POSDB2 ) {
|
|
m_useTree = false;
|
|
}
|
|
|
|
if(m_useTree) {
|
|
sprintf(m_treeAllocName,"tree-%s",m_dbname);
|
|
if (!m_tree.set(fixedDataSize, maxTreeNodes, maxTreeMem, false, m_treeAllocName, m_dbname, m_ks, m_rdbId)) {
|
|
log( LOG_ERROR, "db: Failed to set tree." );
|
|
return false;
|
|
}
|
|
} else {
|
|
sprintf(m_treeAllocName,"buckets-%s",m_dbname);
|
|
if (!m_buckets.set(fixedDataSize, maxTreeMem, m_treeAllocName, m_rdbId, m_dbname, m_ks)) {
|
|
log( LOG_ERROR, "db: Failed to set buckets." );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// now get how much mem the tree is using (not including stored recs)
|
|
int32_t dataMem;
|
|
if (m_useTree) dataMem = maxTreeMem - m_tree.getTreeOverhead();
|
|
else dataMem = maxTreeMem - m_buckets.getMemOccupied();
|
|
|
|
sprintf(m_memAllocName,"mem-%s",m_dbname);
|
|
|
|
if ( fixedDataSize != 0 && ! m_mem.init ( dataMem, m_memAllocName ) ) {
|
|
log( LOG_ERROR, "db: Failed to initialize memory: %s.", mstrerror( g_errno ) );
|
|
return false;
|
|
}
|
|
|
|
// load any saved tree
|
|
if ( ! loadTree ( ) ) {
|
|
log( LOG_ERROR, "db: Failed to load tree." );
|
|
return false;
|
|
}
|
|
|
|
m_initialized = true;
|
|
|
|
// success
|
|
return true;
|
|
}
|
|
|
|
// . when the PageRepair.cpp rebuilds our rdb for a particular collection
|
|
// we clear out the old data just for that collection and point to the newly
|
|
// rebuilt data
|
|
// . rdb2 is the rebuilt/secondary rdb we want to set this primary rdb to
|
|
// . rename, for safe keeping purposes, current old files to :
|
|
// trash/coll.mycoll.timestamp.indexdb0001.dat.part30 and
|
|
// trash/timestamp.indexdb-saved.dat
|
|
// . rename newly rebuilt files from indexdbRebuild0001.dat.part30 to
|
|
// indexdb0001.dat.part30 (just remove the "Rebuild" from the filename)
|
|
// . remove all recs for that coll from the tree AND cache because the rebuilt
|
|
// rdb is replacing the primary rdb for this collection
|
|
// . the rebuilt secondary tree should be empty! (force dumped)
|
|
// . reload the maps/files in the primary rdb after we remove "Rebuild" from
|
|
// their filenames
|
|
// . returns false and sets g_errno on error
|
|
bool Rdb::updateToRebuildFiles ( Rdb *rdb2 , char *coll ) {
|
|
// how come not in repair mode?
|
|
if ( g_repairMode==REPAIR_MODE_NONE ) { g_process.shutdownAbort(true); }
|
|
// make a dir in the trash subfolder to hold them
|
|
uint32_t t = (uint32_t)getTime();
|
|
char dstDir[sizeof(g_hostdb.m_dir)+128];
|
|
// make the trash dir if not there
|
|
snprintf(dstDir, sizeof(dstDir), "%s/trash/" , g_hostdb.m_dir );
|
|
dstDir[ sizeof(dstDir)-1 ] = '\0';
|
|
|
|
int32_t status = ::mkdir ( dstDir , getDirCreationFlags() );
|
|
if ( status && errno != EEXIST ) {
|
|
g_errno = errno;
|
|
log(LOG_WARN, "repair: Could not mkdir(%s): %s",dstDir, mstrerror(errno));
|
|
return false;
|
|
}
|
|
|
|
// we have to create it
|
|
snprintf(dstDir, sizeof(dstDir), "%s/trash/rebuilt%" PRIu32"/" , g_hostdb.m_dir , t );
|
|
dstDir[ sizeof(dstDir)-1 ] = '\0';
|
|
|
|
status = ::mkdir ( dstDir , getDirCreationFlags() );
|
|
if ( status && errno != EEXIST ) {
|
|
g_errno = errno;
|
|
log(LOG_WARN, "repair: Could not mkdir(%s): %s",dstDir, mstrerror(errno));
|
|
return false;
|
|
}
|
|
|
|
// clear it in case it existed
|
|
g_errno = 0;
|
|
|
|
// delete old collection recs
|
|
CollectionRec *cr = g_collectiondb.getRec ( coll );
|
|
if ( ! cr ) {
|
|
log(LOG_WARN, "db: Exchange could not find coll, %s.",coll);
|
|
return false;
|
|
}
|
|
collnum_t collnum = cr->m_collnum;
|
|
|
|
RdbBase *base = getBase ( collnum );
|
|
if ( ! base ) {
|
|
log(LOG_WARN, "repair: Could not find old base for %s.", coll);
|
|
return false;
|
|
}
|
|
|
|
RdbBase *base2 = rdb2->getBase ( collnum );
|
|
if ( ! base2 ) {
|
|
log(LOG_WARN, "repair: Could not find new base for %s.", coll);
|
|
return false;
|
|
}
|
|
|
|
if ( rdb2->getNumUsedNodes() != 0 ) {
|
|
log(LOG_WARN, "repair: Recs present in rebuilt tree for db %s and collection %s.", m_dbname, coll);
|
|
return false;
|
|
}
|
|
|
|
logf(LOG_INFO,"repair: Updating rdb %s for collection %s.",
|
|
m_dbname,coll);
|
|
|
|
// now MOVE the tree file on disk
|
|
char src[1024];
|
|
char dst[1024];
|
|
char rebuildFilePath[1024];
|
|
if(m_useTree) {
|
|
sprintf ( src , "%s/%s-saved.dat" , g_hostdb.m_dir , m_dbname );
|
|
sprintf ( dst , "%s/%s-saved.dat" , dstDir , m_dbname );
|
|
sprintf(rebuildFilePath, "%s/%s-saved.dat", g_hostdb.m_dir, rdb2->m_dbname);
|
|
}
|
|
else {
|
|
sprintf ( src , "%s/%s-buckets-saved.dat", g_hostdb.m_dir , m_dbname );
|
|
sprintf ( dst , "%s/%s-buckets-saved.dat", dstDir , m_dbname );
|
|
sprintf(rebuildFilePath, "%s/%s-buckets-saved.dat", g_hostdb.m_dir, rdb2->m_dbname);
|
|
}
|
|
|
|
const char *structName = m_useTree ? "tree" : "buckets";
|
|
|
|
logf(LOG_INFO,"repair: Moving *-saved.dat %s from %s to %s", structName, src, dst);
|
|
|
|
errno = 0;
|
|
|
|
// ignore missing file error
|
|
if (moveFile(src, dst) != 0 && errno != ENOENT) {
|
|
log( LOG_ERROR, "repair: Moving saved %s had error: %s.", structName, mstrerror( errno ) );
|
|
return false;
|
|
}
|
|
|
|
log("repair: Moving saved %s: %s",structName, mstrerror(errno));
|
|
|
|
// now move our map and data files to the "trash" subdir, "dstDir"
|
|
logf(LOG_INFO,"repair: Moving old data and map files to trash.");
|
|
if ( ! base->moveToTrash(dstDir) ) {
|
|
log(LOG_WARN, "repair: Trashing new rdb for %s failed.", coll);
|
|
return false;
|
|
}
|
|
|
|
// . now rename the newly rebuilt files to our filenames
|
|
// . just removes the "Rebuild" from their filenames
|
|
logf(LOG_INFO,"repair: Renaming new data and map files.");
|
|
if ( ! base2->removeRebuildFromFilenames() ) {
|
|
log(LOG_WARN, "repair: Renaming old rdb for %s failed.", coll);
|
|
return false;
|
|
}
|
|
|
|
// delete unneeded rebuild files (everything is already dumped to Rdb files)
|
|
if(base2->getTreeIndex())
|
|
base2->getTreeIndex()->unlink();
|
|
::unlink(rebuildFilePath);
|
|
|
|
// reset the rdb bases (clears out files and maps from mem)
|
|
base->reset ();
|
|
base2->reset();
|
|
|
|
// reload the newly rebuilt files into the primary rdb
|
|
logf(LOG_INFO,"repair: Loading new data and map files.");
|
|
if ( ! base->setFiles() ) {
|
|
log(LOG_WARN, "repair: Failed to set new files for %s.", coll);
|
|
return false;
|
|
}
|
|
|
|
// . make rdb2, the secondary rdb used for rebuilding, give up its mem
|
|
// . if we do another rebuild its ::init() will be called by PageRepair
|
|
rdb2->reset();
|
|
|
|
// clean out tree, newly rebuilt rdb does not have any data in tree
|
|
if ( m_useTree ) m_tree.delColl ( collnum );
|
|
else m_buckets.delColl(collnum);
|
|
// reset our cache
|
|
//m_cache.clear ( collnum );
|
|
|
|
// Success
|
|
return true;
|
|
}
|
|
|
|
// . returns false and sets g_errno on error, returns true on success
|
|
bool Rdb::addRdbBase1 ( const char *coll ) {
|
|
collnum_t collnum = g_collectiondb.getCollnum ( coll );
|
|
return addRdbBase2 ( collnum );
|
|
}
|
|
|
|
bool Rdb::addRdbBase2 ( collnum_t collnum ) { // addColl2()
|
|
|
|
if ( ! m_initialized ) {
|
|
g_errno = EBADENGINEER;
|
|
log(LOG_WARN, "db: adding coll to uninitialized rdb!");
|
|
return false;
|
|
}
|
|
|
|
// ensure no max breech
|
|
if ( collnum < (collnum_t) 0 ) {
|
|
g_errno = ENOBUFS;
|
|
int64_t maxColls = 1LL << (sizeof(collnum_t)*8);
|
|
log(LOG_WARN, "db: %s: Failed to add collection #%i. Would breech maximum number of collections, %" PRId64".",
|
|
m_dbname,collnum,maxColls);
|
|
return false;
|
|
}
|
|
|
|
|
|
CollectionRec *cr = g_collectiondb.getRec(collnum);
|
|
const char *coll = NULL;
|
|
if ( cr ) coll = cr->m_coll;
|
|
|
|
// . ensure no previous one exists
|
|
// . well it will be there but will be uninitialized, m_rdb will b NULL
|
|
RdbBase *base = NULL;
|
|
if ( cr ) base = cr->getBase( m_rdbId );
|
|
if ( base ) { // m_bases [ collnum ] ) {
|
|
g_errno = EBADENGINEER;
|
|
log(LOG_WARN, "db: Rdb for db \"%s\" and collection \"%s\" (collnum %" PRId32") exists.",
|
|
m_dbname,coll,(int32_t)collnum);
|
|
return false;
|
|
}
|
|
// make a new one
|
|
RdbBase *newColl = NULL;
|
|
try {newColl= new(RdbBase);}
|
|
catch(std::bad_alloc&){
|
|
g_errno = ENOMEM;
|
|
log(LOG_WARN, "db: %s: Failed to allocate %" PRId32" bytes for collection \"%s\".",
|
|
m_dbname,(int32_t)sizeof(Rdb),coll);
|
|
return false;
|
|
}
|
|
mnew(newColl, sizeof(RdbBase), "Rdb Coll");
|
|
//m_bases [ collnum ] = newColl;
|
|
|
|
base = newColl;
|
|
// add it to CollectionRec::m_bases[] base ptrs array
|
|
addBase ( collnum , newColl );
|
|
|
|
// . init it
|
|
// . g_hostdb.m_dir should end in /
|
|
if ( ! base->init ( g_hostdb.m_dir,
|
|
m_dbname ,
|
|
m_fixedDataSize ,
|
|
m_minToMerge ,
|
|
m_useHalfKeys ,
|
|
m_ks ,
|
|
m_pageSize ,
|
|
coll ,
|
|
collnum ,
|
|
getTree() ,
|
|
getBuckets() ,
|
|
this ,
|
|
m_useIndexFile ) ) {
|
|
logf(LOG_INFO,"db: %s: Failed to initialize db for "
|
|
"collection \"%s\".", m_dbname,coll);
|
|
//exit(-1);
|
|
return false;
|
|
}
|
|
|
|
//if ( (int32_t)collnum >= m_numBases ) m_numBases = (int32_t)collnum + 1;
|
|
// Success
|
|
return true;
|
|
}
|
|
|
|
bool Rdb::resetBase ( collnum_t collnum ) {
|
|
CollectionRec *cr = g_collectiondb.getRec(collnum);
|
|
if ( ! cr ) return true;
|
|
// get the ptr, don't use CollectionRec::getBase() so we do not swapin
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( ! base ) return true;
|
|
base->reset();
|
|
return true;
|
|
}
|
|
|
|
bool Rdb::deleteAllRecs ( collnum_t collnum ) {
|
|
|
|
// remove from tree
|
|
if(m_useTree) m_tree.delColl ( collnum );
|
|
else m_buckets.delColl(collnum);
|
|
|
|
// only for doledb now, because we unlink we do not move the files
|
|
// into the trash subdir and doledb is easily regenerated. i don't
|
|
// want to take the risk with other files.
|
|
if ( m_rdbId != RDB_DOLEDB ) { g_process.shutdownAbort(true); }
|
|
|
|
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
|
|
// deleted from under us?
|
|
if ( ! cr ) {
|
|
log("rdb: deleteallrecs: cr is NULL");
|
|
return true;
|
|
}
|
|
|
|
//Rdbbase *base = cr->m_bases[(unsigned char)m_rdbId];
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( ! base ) return true;
|
|
|
|
// scan files in there
|
|
for ( int32_t i = 0 ; i < base->getNumFiles() ; i++ ) {
|
|
BigFile *f = base->getFile(i);
|
|
// move to trash
|
|
char newdir[1024];
|
|
sprintf(newdir, "%strash/",g_hostdb.m_dir);
|
|
f->move ( newdir );
|
|
}
|
|
|
|
// nuke all the files
|
|
base->reset();
|
|
|
|
// reset rec counts
|
|
cr->m_numNegKeysInTree[RDB_DOLEDB] = 0;
|
|
cr->m_numPosKeysInTree[RDB_DOLEDB] = 0;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool makeTrashDir() {
|
|
char trash[1024];
|
|
sprintf(trash, "%strash/",g_hostdb.m_dir);
|
|
if ( ::mkdir ( trash , getDirCreationFlags() ) ) {
|
|
if ( errno != EEXIST ) {
|
|
log("dir: mkdir %s had error: %s",
|
|
trash,mstrerror(errno));
|
|
return false;
|
|
}
|
|
// clear it
|
|
errno = 0;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Rdb::deleteColl( collnum_t collnum, collnum_t newCollnum) {
|
|
// remove these collnums from tree
|
|
if(m_useTree) {
|
|
m_tree.delColl(collnum);
|
|
}
|
|
else {
|
|
m_buckets.delColl(collnum);
|
|
}
|
|
|
|
// . close all files, set m_numFiles to 0 in RdbBase
|
|
// . TODO: what about outstanding merge or dump operations?
|
|
// . it seems like we can't really recycle this too easily
|
|
// because reset it not resetting filenames or directory name?
|
|
// just nuke it and rebuild using addRdbBase2()...
|
|
RdbBase *oldBase = getBase ( collnum );
|
|
mdelete (oldBase, sizeof(RdbBase), "Rdb Coll");
|
|
delete (oldBase);
|
|
|
|
// NULL it out...
|
|
CollectionRec *oldcr = g_collectiondb.getRec(collnum);
|
|
if( !oldcr ) {
|
|
logError("could not get record for collection %d", (int)collnum);
|
|
return false;
|
|
}
|
|
oldcr->setBasePtr ( m_rdbId , NULL );
|
|
char *coll = oldcr->m_coll;
|
|
|
|
const char *msg = "deleted";
|
|
|
|
// if just resetting recycle base
|
|
if (collnum != newCollnum) {
|
|
addRdbBase2(newCollnum);
|
|
msg = "moved";
|
|
}
|
|
|
|
|
|
log(LOG_DEBUG,"db: %s base from collrec "
|
|
"rdb=%s rdbid=%" PRId32" coll=%s collnum=%" PRId32" newcollnum=%" PRId32,
|
|
msg,m_dbname,(int32_t)m_rdbId,coll,(int32_t)collnum,
|
|
(int32_t)newCollnum);
|
|
|
|
// move the files into trash
|
|
// nuke it on disk
|
|
char oldname[1024];
|
|
sprintf(oldname, "%scoll.%s.%" PRId32"/",g_hostdb.m_dir,coll,
|
|
(int32_t)collnum);
|
|
char newname[1024];
|
|
sprintf(newname, "%strash/coll.%s.%" PRId32".%" PRId64"/",g_hostdb.m_dir,coll,
|
|
(int32_t)collnum,gettimeofdayInMilliseconds());
|
|
// ensure ./trash dir is there
|
|
makeTrashDir();
|
|
// move into that dir
|
|
if( ::rename ( oldname , newname ) == -1 ) {
|
|
logError("Failed renaming [%s] to [%s]. errno %d: %s.", oldname, newname, errno, mstrerror(errno) );
|
|
}
|
|
|
|
log ( LOG_DEBUG, "db: cleared data for coll \"%s\" (%" PRId32") rdb=%s.",
|
|
coll,(int32_t)collnum ,getDbnameFromId(m_rdbId));
|
|
|
|
return true;
|
|
}
|
|
|
|
// returns false and sets g_errno on error, returns true on success
|
|
bool Rdb::delColl(const char *coll) {
|
|
collnum_t collnum = g_collectiondb.getCollnum ( coll );
|
|
|
|
if( collnum < (collnum_t)0 ) {
|
|
log(LOG_WARN, "Failed to delete collection. Could not look up collection [%s]", coll);
|
|
return false;
|
|
}
|
|
RdbBase *base = getBase ( collnum );
|
|
|
|
// ensure its there
|
|
if(!base) {
|
|
g_errno = EBADENGINEER;
|
|
log(LOG_WARN, "db: %s: Failed to delete collection #%i. Does not exist.", m_dbname,collnum);
|
|
return false;
|
|
}
|
|
|
|
// move all files to trash and clear the tree/buckets
|
|
deleteColl(collnum, collnum);
|
|
return true;
|
|
}
|
|
|
|
bool Rdb::isSavingTree() const {
|
|
if ( m_useTree ) return m_tree.isSaving();
|
|
return m_buckets.isSaving();
|
|
}
|
|
|
|
bool Rdb::saveTree(bool useThread, void *state, void (*callback)(void *state)) {
|
|
bool result;
|
|
|
|
// . if RdbTree::m_needsSave is false this will return true
|
|
// . if RdbTree::m_isSaving is true this will return false
|
|
// . returns false if blocked, true otherwise
|
|
// . sets g_errno on error
|
|
if (m_useTree) {
|
|
result = m_tree.fastSave(getDir(), useThread, state, callback);
|
|
} else {
|
|
result = m_buckets.fastSave(getDir(), useThread, state, callback);
|
|
}
|
|
|
|
if (m_useIndexFile) {
|
|
// now loop over bases
|
|
for (int32_t i = 0; i < getNumBases(); i++) {
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if (!cr) {
|
|
continue;
|
|
}
|
|
|
|
// if swapped out, this will be NULL, so skip it
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if (base) {
|
|
base->saveTreeIndex();
|
|
}
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
bool Rdb::saveIndexes() {
|
|
// now loop over bases
|
|
for (int32_t i = 0; i < getNumBases(); i++) {
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if (!cr) {
|
|
continue;
|
|
}
|
|
|
|
// if swapped out, this will be NULL, so skip it
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if (base) {
|
|
base->saveIndexes();
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Rdb::saveMaps () {
|
|
// now loop over bases
|
|
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if ( ! cr ) {
|
|
continue;
|
|
}
|
|
|
|
// if swapped out, this will be NULL, so skip it
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( base ) {
|
|
base->saveMaps();
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
// returns false and sets g_errno on error
|
|
bool Rdb::loadTree ( ) {
|
|
// get the filename of the saved tree
|
|
char filename[256];
|
|
sprintf(filename,"%s-saved.dat",m_dbname);
|
|
|
|
//log (0,"Rdb::loadTree: loading %s",filename);
|
|
|
|
// set a BigFile to this filename
|
|
BigFile file;
|
|
file.set ( getDir(), filename);
|
|
bool treeExists = file.doesExist();
|
|
bool status = false ;
|
|
if ( treeExists ) {
|
|
// load the table with file named "THISDIR/saved"
|
|
status = m_tree.fastLoad(&file, &m_mem) ;
|
|
// we close it now instead of him
|
|
}
|
|
|
|
if ( m_useTree ) {
|
|
file.close();
|
|
if ( !status && treeExists ) {
|
|
log( LOG_ERROR, "db: Could not load saved tree." );
|
|
return false;
|
|
}
|
|
} else {
|
|
if ( !m_buckets.loadBuckets(m_dbname) ) {
|
|
log( LOG_ERROR, "db: Could not load saved buckets." );
|
|
return false;
|
|
}
|
|
|
|
int32_t numKeys = m_buckets.getNumKeys();
|
|
|
|
if(!m_buckets.testAndRepair()) {
|
|
log( LOG_ERROR, "db: unrepairable buckets, remove and restart." );
|
|
g_process.shutdownAbort(true);
|
|
}
|
|
|
|
if(treeExists) {
|
|
m_buckets.addTree(&m_tree);
|
|
if (m_buckets.getNumKeys() - numKeys > 0 ) {
|
|
log( LOG_ERROR, "db: Imported %" PRId32" recs from %s's tree to buckets.",
|
|
m_buckets.getNumKeys()-numKeys, m_dbname);
|
|
}
|
|
|
|
if ( g_conf.m_readOnlyMode ) {
|
|
m_buckets.setNeedsSave(false);
|
|
} else {
|
|
char newFilename[256];
|
|
sprintf(newFilename,"%s-%" PRId32".old", filename, (int32_t)getTime());
|
|
file.rename(newFilename,NULL);
|
|
m_tree.reset();
|
|
}
|
|
file.close();
|
|
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/// @todo ALC consider if we need one per rdb
|
|
static GbThreadQueue s_rdbDumpThreadQueue;
|
|
static time_t s_lastTryTime = 0;
|
|
|
|
void Rdb::submitRdbDumpJob(bool forceDump) {
|
|
logTrace(g_conf.m_logTraceRdb, "BEGIN %s", m_dbname);
|
|
|
|
if (getNumUsedNodes() <= 0) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: No used nodes/keys. Returning", m_dbname);
|
|
return;
|
|
}
|
|
|
|
// never dump doledb any more. it's rdbtree only.
|
|
if (m_rdbId == RDB_DOLEDB) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Rdb is doledb. Returning", m_dbname);
|
|
return;
|
|
}
|
|
|
|
// if it has been less than 3 seconds since our last failed attempt
|
|
// do not try again to avoid flooding our log
|
|
if (getTime() - s_lastTryTime < 3) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Less than 3 seconds since last attempt. Returning", m_dbname);
|
|
return;
|
|
}
|
|
|
|
// don't dump if not 90% full
|
|
if (!forceDump && !needsDump()) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Tree not 90 percent full and not force dump. Returning", m_dbname);
|
|
return;
|
|
}
|
|
|
|
// bail if already dumping
|
|
bool isDumping =m_isDumping.exchange(true);
|
|
if (isDumping) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Already dumping. Returning", m_dbname);
|
|
return;
|
|
}
|
|
|
|
s_rdbDumpThreadQueue.addItem(this);
|
|
|
|
log(LOG_INFO, "db: Submitted job %p to dump tree for %s", this, getDbname());
|
|
}
|
|
|
|
void Rdb::dumpRdb(void *item) {
|
|
Rdb *rdb = static_cast<Rdb*>(item);
|
|
|
|
log(LOG_INFO, "db: Processing job %p to dump tree", item);
|
|
rdb->dumpTree();
|
|
log(LOG_INFO, "db: Processed job %p to dump tree", item);
|
|
}
|
|
|
|
bool Rdb::initializeRdbDumpThread() {
|
|
return s_rdbDumpThreadQueue.initialize(dumpRdb, "dump-rdb");
|
|
}
|
|
|
|
void Rdb::finalizeRdbDumpThread() {
|
|
s_rdbDumpThreadQueue.finalize();
|
|
}
|
|
|
|
bool Rdb::hasPendingRdbDumpJob() {
|
|
return !s_rdbDumpThreadQueue.isEmpty();
|
|
}
|
|
|
|
// . start dumping the tree
|
|
// . returns false and sets g_errno on error
|
|
bool Rdb::dumpTree() {
|
|
logTrace( g_conf.m_logTraceRdb, "BEGIN %s", m_dbname );
|
|
|
|
if (!needsDump()) {
|
|
log(LOG_INFO, "db: %s tree not 90 percent full but dumping.",m_dbname);
|
|
}
|
|
|
|
// reset g_errno -- don't forget!
|
|
g_errno = 0;
|
|
|
|
// remember niceness for calling setDump()
|
|
m_niceness = 1;
|
|
|
|
// debug msg
|
|
log(LOG_INFO,"db: Dumping %s to disk. nice=%" PRId32,m_dbname,m_niceness);
|
|
|
|
// only try to fix once per dump session
|
|
int64_t start = gettimeofdayInMilliseconds();
|
|
|
|
// do not do chain testing because that is too slow
|
|
if (m_useTree) {
|
|
ScopedLock sl(m_tree.getLock());
|
|
if (!m_tree.checkTree_unlocked(false, false)) {
|
|
log(LOG_ERROR, "db: %s tree was corrupted in memory. Trying to fix. Your memory is probably bad. "
|
|
"Please replace it.", m_dbname);
|
|
|
|
// if fix failed why even try to dump?
|
|
if (!m_tree.fixTree_unlocked()) {
|
|
// only try to dump every 3 seconds
|
|
s_lastTryTime = getTime();
|
|
log(LOG_ERROR, "db: Could not fix in memory data for %s. Abandoning dump.", m_dbname);
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Unable to fix tree. Returning false", m_dbname);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
log( LOG_INFO, "db: Checking validity of in memory data of %s before dumping, "
|
|
"took %" PRId64" ms.",m_dbname,gettimeofdayInMilliseconds()-start );
|
|
|
|
////
|
|
//
|
|
// see what collnums are in the tree and just try those
|
|
//
|
|
////
|
|
|
|
// loop through collections, dump each one
|
|
|
|
// clear this for dumpCollLoop()
|
|
g_errno = 0;
|
|
m_dumpErrno = 0;
|
|
|
|
for (int collnum = 0; collnum < getNumBases(); ++collnum) {
|
|
RdbBase *base = getBase(collnum);
|
|
if (base) {
|
|
base->setDumpingFileId(-1);
|
|
}
|
|
}
|
|
|
|
for (int collnum = 0; collnum < getNumBases(); ++collnum) {
|
|
RdbBase *base = getBase(collnum);
|
|
if (base && !dumpColl(base)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
doneDumping();
|
|
|
|
logTrace( g_conf.m_logTraceRdb, "END. %s: Done dumping. Returning true", m_dbname );
|
|
return true;
|
|
}
|
|
|
|
bool Rdb::dumpColl(RdbBase *base) {
|
|
// before we create the file, see if tree has anything for this coll
|
|
if (!getTreeCollExist(base->getCollnum())) {
|
|
return true;
|
|
}
|
|
|
|
// if we add to many files then we can not merge, because merge op needs to add a file too
|
|
if (base->getNumFiles() + 2 >= MAX_RDB_FILES) {
|
|
log(LOG_ERROR, "db: could not dump tree to disk for cn=%i %s because it has %" PRId32" files on disk. "
|
|
"Need to wait for merge operation.", (int)base->getCollnum(), m_dbname, base->getNumFiles());
|
|
m_dumpErrno = ETOOMANYFILES;
|
|
return false;
|
|
}
|
|
|
|
// this file must not exist already, we are dumping the tree into it
|
|
int32_t fileId = 0;
|
|
int fn = base->addNewFile(&fileId);
|
|
if (fn < 0) {
|
|
log(LOG_ERROR, "db: rdb: Failed to add new file to dump %s: %s.", m_dbname, mstrerror(g_errno));
|
|
m_dumpErrno = g_errno;
|
|
return false;
|
|
}
|
|
|
|
base->setDumpingFileId(fileId);
|
|
|
|
log(LOG_INFO, "build: Dumping to %s/%s for coll \"%s\".",
|
|
base->getFile(fn)->getDir(),
|
|
base->getFile(fn)->getFilename(),
|
|
g_collectiondb.getCollName(base->getCollnum()));
|
|
|
|
// what is the avg rec size?
|
|
int32_t numRecs = getNumUsedNodes();
|
|
int32_t avgSize;
|
|
|
|
if(m_useTree) {
|
|
if ( numRecs <= 0 ) numRecs = 1;
|
|
avgSize = m_tree.getMemOccupiedForList() / numRecs;
|
|
} else {
|
|
avgSize = m_buckets.getRecSize();
|
|
}
|
|
|
|
/// @todo ALC test speed of getting more than 3000 records
|
|
// . don't get more than 3000 recs from the tree because it gets slow
|
|
// . we'd like to write as much out as possible to reduce possible
|
|
// file interlacing when synchronous writes are enabled. RdbTree::
|
|
// getList() should really be sped up by doing the neighbor node
|
|
// thing. would help for adding lists, too, maybe.
|
|
int32_t bufSize = 300 * 1024;
|
|
int32_t bufSize2 = 3000 * avgSize;
|
|
if (bufSize2 < bufSize) {
|
|
bufSize = bufSize2;
|
|
}
|
|
|
|
if (!m_useTree) {
|
|
//buckets are much faster at getting lists
|
|
bufSize *= 4;
|
|
}
|
|
|
|
// . RdbDump will set the filename of the map we pass to this
|
|
// . RdbMap should dump itself out CLOSE!
|
|
// . it returns false if blocked, true otherwise & sets g_errno on err
|
|
// . but we only return false on error here
|
|
if (!m_dump.set(base->getCollnum(),
|
|
base->getFile(fn),
|
|
getBuckets(),
|
|
getTree(),
|
|
base->getMap(fn),
|
|
base->getIndex(fn),
|
|
bufSize, // write buf size
|
|
m_niceness, // niceness of 1 will NOT block
|
|
NULL,
|
|
NULL,
|
|
m_useHalfKeys,
|
|
0LL, // dst start offset
|
|
KEYMIN(), // prev last key
|
|
m_ks, // keySize
|
|
m_rdbId)) {
|
|
log(LOG_ERROR, "db: RdbDump blocked for %s", m_dbname);
|
|
|
|
// we must never block
|
|
gbshutdownLogicError();
|
|
}
|
|
|
|
// error?
|
|
if (g_errno) {
|
|
log(LOG_WARN, "rdb: error dumping = %s", mstrerror(g_errno));
|
|
// shit, what to do here? this is causing our RdbMem
|
|
// to get corrupted!
|
|
// because if we end up continuing it calls doneDumping()
|
|
// and updates RdbMem! maybe set a permanent error then!
|
|
// and if that is there do not clear RdbMem!
|
|
m_dumpErrno = g_errno;
|
|
|
|
s_lastTryTime = getTime();
|
|
|
|
/// @todo ALC do we want to delete the current dumping file?
|
|
// if(!base->getFile(fn)->doesExist() || base->getFile(fn)->getFileSize() <= 0 ) {
|
|
// log("build: File %s is zero bytes, removing from memory.",base->getFile(fn)->getFilename());
|
|
// base->buryFiles ( fn , fn+1 );
|
|
// }
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Moved a lot of the logic originally here in Rdb::doneDumping into
|
|
// RdbDump.cpp::dumpTree()
|
|
void Rdb::doneDumping ( ) {
|
|
log(LOG_INFO,"db: Done dumping %s: %s.",m_dbname,
|
|
mstrerror(m_dumpErrno));
|
|
|
|
// free mem in the primary buffer
|
|
if ( ! m_dumpErrno ) {
|
|
if(m_useTree)
|
|
m_tree.clear();
|
|
else
|
|
m_buckets.clear();
|
|
m_mem.clear();
|
|
|
|
for(int collnum=0; collnum<getNumBases(); collnum++) {
|
|
RdbBase *base = getBase(collnum);
|
|
if (base) {
|
|
if (isUseIndexFile()) {
|
|
base->clearTreeIndex();
|
|
base->submitGlobalIndexJob(true, base->getDumpingFileId());
|
|
} else {
|
|
base->markNewFileReadable();
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
if(g_collectiondb.getNumRecsUsed()>1)
|
|
log(LOG_ERROR,"db: Error encountered while dumping %s tree to file: %s. "
|
|
"You have multiple collections and this may lead to duplicated data until the error conditions has been cleared and GB restarted.",
|
|
m_dbname, mstrerror(m_dumpErrno));
|
|
else
|
|
log(LOG_ERROR,"db: Error encountered while dumping %s tree to file: %s",
|
|
m_dbname, mstrerror(m_dumpErrno));
|
|
}
|
|
|
|
// . tell RdbDump it is done
|
|
// . we have to set this here otherwise RdbMem's memory ring buffer
|
|
// will think the dumping is no longer going on and use the primary
|
|
// memory for allocating new titleRecs and such and that is not good!
|
|
m_isDumping = false;
|
|
}
|
|
|
|
void forceMergeAll(rdbid_t rdbId) {
|
|
// set flag on all RdbBases
|
|
for ( int32_t i = 0 ; i < g_collectiondb.getNumRecs(); i++ ) {
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if ( ! cr )
|
|
{
|
|
log(LOG_INFO,"%s:%s:%d: coll %" PRId32" - could not get CollectionRec", __FILE__,__func__,__LINE__,i);
|
|
continue;
|
|
}
|
|
RdbBase *base = cr->getBase ( rdbId );
|
|
if ( ! base )
|
|
{
|
|
log(LOG_INFO,"%s:%s:%d: coll %" PRId32" - could not get RdbBase", __FILE__,__func__,__LINE__,i);
|
|
continue;
|
|
}
|
|
|
|
log(LOG_INFO,"%s:%s:%d: coll %" PRId32" - Set next merge to Forced", __FILE__,__func__,__LINE__,i);
|
|
base->forceNextMerge();
|
|
}
|
|
|
|
// and try to merge now
|
|
attemptMergeAll();
|
|
}
|
|
|
|
// this should be called every few seconds by the sleep callback, too
|
|
void attemptMergeAllCallback ( int fd , void *state ) {
|
|
attemptMergeAll();
|
|
}
|
|
|
|
|
|
static int compareBaseNumFilesReverse(const void *pv1, const void *pv2) {
|
|
const RdbBase *base_1 = *((const RdbBase**)pv1);
|
|
const RdbBase *base_2 = *((const RdbBase**)pv2);
|
|
return base_2->getNumFiles() - base_1->getNumFiles();
|
|
}
|
|
|
|
// . TODO: if rdbbase::attemptMerge() needs to launch a merge but can't
|
|
// then do NOT remove from linked list. maybe set a flag like 'needsMerge'
|
|
void attemptMergeAll() {
|
|
|
|
// wait for any current merge to stop!
|
|
if ( g_merge.isMerging() ) {
|
|
log(LOG_INFO,"Attempted merge, but merge already running");
|
|
return;
|
|
}
|
|
|
|
const int32_t niceness = MAX_NICENESS;
|
|
const bool forceMergeAll = false;
|
|
static collnum_t s_lastCollnum = 0;
|
|
|
|
// limit to 1000 checks to save the cpu since we call this once every 2 seconds.
|
|
for(int loop_count=0; loop_count<1000 && loop_count<g_collectiondb.getNumRecs(); loop_count++) {
|
|
if(s_lastCollnum >= g_collectiondb.getNumRecs())
|
|
s_lastCollnum = 0;
|
|
|
|
CollectionRec *cr = g_collectiondb.getRec(s_lastCollnum);
|
|
s_lastCollnum++;
|
|
if(!cr)
|
|
continue;
|
|
|
|
// args = niceness, forceMergeAll, doLog, minToMergeOverride
|
|
// if RdbBase::attemptMerge() returns true that means it
|
|
// launched a merge and it will call attemptMergeAll2() when
|
|
// the merge completes.
|
|
static const rdbid_t rdbid[] = {
|
|
RDB_POSDB,
|
|
RDB_TITLEDB,
|
|
RDB_TAGDB,
|
|
RDB_LINKDB,
|
|
RDB_SPIDERDB_DEPRECATED,
|
|
RDB_CLUSTERDB,
|
|
// also try to merge on rdbs being rebuilt
|
|
RDB2_POSDB2,
|
|
RDB2_TITLEDB2,
|
|
RDB2_TAGDB2,
|
|
RDB2_LINKDB2,
|
|
RDB2_SPIDERDB2_DEPRECATED,
|
|
RDB2_CLUSTERDB2
|
|
};
|
|
static const unsigned numRdbs = sizeof(rdbid)/sizeof(rdbid[0]);
|
|
|
|
//Try to merge the rdbbases with the most files
|
|
|
|
//collect the bases into an array
|
|
RdbBase *base[numRdbs];
|
|
unsigned numRdbs2=0;
|
|
for(unsigned i=0; i<numRdbs; i++) {
|
|
base[numRdbs2] = cr->getBase(rdbid[i]);
|
|
if(base[numRdbs2])
|
|
numRdbs2++;
|
|
}
|
|
//sort them
|
|
qsort(base,numRdbs2,sizeof(base[0]),compareBaseNumFilesReverse);
|
|
|
|
//then try merging them
|
|
for(unsigned i=0; i<numRdbs2; i++) {
|
|
if(base[i]->attemptMerge(niceness,forceMergeAll))
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// . return false and set g_errno on error
|
|
// . TODO: speedup with m_tree.addSortedKeys() already partially written
|
|
bool Rdb::addList(collnum_t collnum, RdbList *list, bool checkForRoom) {
|
|
// pick it
|
|
if ( collnum < 0 || collnum > getNumBases() || ! getBase(collnum) ) {
|
|
g_errno = ENOCOLLREC;
|
|
log(LOG_WARN, "db: %s bad collnum of %i.",m_dbname,collnum);
|
|
return false;
|
|
}
|
|
// make sure list is reset
|
|
list->resetListPtr();
|
|
// if nothing then just return true
|
|
if ( list->isExhausted() ) {
|
|
return true;
|
|
}
|
|
// sanity check
|
|
if ( list->getKeySize() != m_ks ) { g_process.shutdownAbort(true); }
|
|
|
|
// if we are well into repair mode, level 2, do not add anything
|
|
// to spiderdb or titledb... that can mess up our titledb scan.
|
|
// we always rebuild tfndb, clusterdb and spiderdb
|
|
// but we often just repair titledb, indexdb and datedb because
|
|
// they are bigger. it may add to indexdb/datedb
|
|
if ( g_repair.isRepairActive() &&
|
|
// but only check for collection we are repairing/rebuilding
|
|
g_repair.isRepairingColl(collnum) &&
|
|
// exception, spider status docs can be deleted from titledb
|
|
// if user turns off 'index spider replies' before doing
|
|
// the rebuild, when not rebuilding titledb.
|
|
((m_rdbId == RDB_TITLEDB && list->getListSize() != 12 ) ||
|
|
m_rdbId == RDB_POSDB ||
|
|
m_rdbId == RDB_CLUSTERDB ||
|
|
m_rdbId == RDB_LINKDB ||
|
|
m_rdbId == RDB_DOLEDB ||
|
|
m_rdbId == RDB_SPIDERDB_DEPRECATED ) ) {
|
|
|
|
// allow banning of sites still
|
|
log(LOG_WARN, "db: How did an add come in while in repair mode? rdbName=%s", getDbnameFromId(m_rdbId));
|
|
g_errno = EREPAIRING;
|
|
return false;
|
|
}
|
|
|
|
// . if we don't have enough room to store list, initiate a dump and
|
|
// return g_errno of ETRYAGAIN
|
|
// . otherwise, we're guaranteed to have room for this list
|
|
if( checkForRoom && ! hasRoom(list) ) {
|
|
// if tree is empty, list will never fit!!!
|
|
if ( m_useTree && m_tree.getNumUsedNodes() <= 0 ) {
|
|
g_errno = ELISTTOOBIG;
|
|
log( LOG_WARN, "db: Tried to add a record that is simply too big (%" PRId32" bytes) to ever fit in "
|
|
"the memory space for %s. Please increase the max memory for %s in gb.conf.",
|
|
list->getListSize(), m_dbname, m_dbname );
|
|
return false;
|
|
}
|
|
|
|
logTrace( g_conf.m_logTraceRdb, "%s: Not enough room. Calling dumpTree", m_dbname );
|
|
submitRdbDumpJob(true);
|
|
|
|
// set g_errno after intiating the dump!
|
|
g_errno = ETRYAGAIN;
|
|
|
|
// return false since we didn't add the list
|
|
return false;
|
|
}
|
|
|
|
do {
|
|
char key[MAX_KEY_BYTES];
|
|
list->getCurrentKey(key);
|
|
int32_t dataSize;
|
|
const char *data;
|
|
|
|
// negative keys have no data
|
|
if ( ! KEYNEG(key) ) {
|
|
dataSize = list->getCurrentDataSize();
|
|
data = list->getCurrentData();
|
|
}
|
|
else {
|
|
dataSize = 0;
|
|
data = NULL;
|
|
}
|
|
|
|
if ( ! addRecord ( collnum , key , data , dataSize ) ) {
|
|
// bitch
|
|
static int32_t s_last = 0;
|
|
int32_t now = time(NULL);
|
|
|
|
// . do not log this more than once per second to stop log spam
|
|
// . i think this can really lockup the cpu, too
|
|
if ( now - s_last != 0 ) {
|
|
log( LOG_INFO, "db: Had error adding data to %s: %s.", m_dbname, mstrerror( g_errno ));
|
|
}
|
|
|
|
s_last = now;
|
|
|
|
// force initiate the dump now if addRecord failed for no mem
|
|
if ( g_errno == ENOMEM ) {
|
|
// start dumping the tree to disk so we have room 4 add
|
|
logTrace( g_conf.m_logTraceRdb, "%s: Not enough memory. Calling dumpTree", m_dbname );
|
|
submitRdbDumpJob(true);
|
|
// tell caller to try again later (1 second or so)
|
|
g_errno = ETRYAGAIN;
|
|
}
|
|
|
|
// discontinue adding any more of the list
|
|
return false;
|
|
}
|
|
} while ( list->skipCurrentRecord() ); // skip to next record, returns false on end of list
|
|
|
|
//Do not try initiating a dump here as it will make Msg4 unhappy being interrupted in the middle of multiple lists
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
//delete node and data in tree. Currently only called by SpiderLoop
|
|
bool Rdb::deleteTreeNode(collnum_t collnum, const char *key) {
|
|
if(!m_useTree)
|
|
gbshutdownLogicError();
|
|
|
|
return m_tree.deleteNode(collnum, key, true);
|
|
}
|
|
|
|
|
|
void Rdb::verifyTreeIntegrity() {
|
|
if(m_useTree)
|
|
m_tree.verifyIntegrity();
|
|
else
|
|
m_buckets.verifyIntegrity();
|
|
}
|
|
|
|
|
|
bool Rdb::needsDump() const {
|
|
if (m_mem.is90PercentFull()) {
|
|
return true;
|
|
}
|
|
|
|
if (m_useTree) {
|
|
if (m_tree.is90PercentFull()) {
|
|
return true;
|
|
}
|
|
} else {
|
|
if (m_buckets.needsDump()) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool Rdb::hasRoom(int32_t totalRecs, int32_t totalDataSize) const {
|
|
logTrace(g_conf.m_logTraceRdb, "BEGIN %s: numRecs=%" PRId32" dataSize=%" PRId32" availMem=%" PRId32,
|
|
m_dbname, totalRecs, totalDataSize, m_mem.getAvailMem());
|
|
|
|
// nodes
|
|
if (m_useTree) {
|
|
if (m_tree.getNumAvailNodes() < totalRecs) {
|
|
logTrace(g_conf.m_logTraceRdb, "END %s: Insufficient tree nodes. Returning false", m_dbname);
|
|
return false;
|
|
}
|
|
} else {
|
|
if (!m_buckets.hasRoom(totalRecs)) {
|
|
logTrace(g_conf.m_logTraceRdb, "END %s: Insufficient buckets. Returning false", m_dbname);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// memory (only use for data)
|
|
bool result = (m_mem.getAvailMem() >= totalDataSize);
|
|
logTrace(g_conf.m_logTraceRdb, "END %s: Memory check. Returning %s", m_dbname, result ? "true" : "false");
|
|
return result;
|
|
}
|
|
|
|
|
|
bool Rdb::hasRoom(RdbList *list) {
|
|
// how many nodes will tree need?
|
|
int32_t numNodes = list->getNumRecs( );
|
|
|
|
// does tree have room for these nodes?
|
|
if (m_useTree) {
|
|
if (m_tree.getNumAvailNodes() < numNodes) {
|
|
return false;
|
|
}
|
|
} else {
|
|
if (!m_buckets.hasRoom(numNodes)) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// how many nodes will tree need?
|
|
// how much space will RdbMem, m_mem, need?
|
|
int64_t overhead = m_ks;
|
|
if ( list->getFixedDataSize() == -1 ) {
|
|
overhead += 4;
|
|
}
|
|
|
|
// how much mem will the data use?
|
|
int64_t dataSpace = (int64_t)list->getListSize() - ((int64_t)numNodes * overhead);
|
|
|
|
// if we are doledb, we are a tree-only rdb, so try to reclaim
|
|
// memory from deleted nodes. works by condesing the used memory.
|
|
if ( m_rdbId == RDB_DOLEDB &&
|
|
// if there is no room left in m_mem (RdbMem class)...
|
|
( m_mem.getAvailMem() < dataSpace || g_conf.m_forceIt) &&
|
|
// and last time we tried this, if any, it reclaimed 1MB+
|
|
(m_lastReclaim>1024*1024||m_lastReclaim==-1||g_conf.m_forceIt)){
|
|
// reclaim the memory now. returns -1 and sets g_errno on error
|
|
int32_t reclaimed = reclaimMemFromDeletedTreeNodes();
|
|
// reset force flag
|
|
g_conf.m_forceIt = false;
|
|
// ignore errors for now
|
|
g_errno = 0;
|
|
// how much did we free up?
|
|
if ( reclaimed >= 0 )
|
|
m_lastReclaim = reclaimed;
|
|
}
|
|
|
|
// does m_mem have room for "dataSpace"?
|
|
if ( (int64_t)m_mem.getAvailMem() < dataSpace ) return false;
|
|
// otherwise, we do have room
|
|
return true;
|
|
}
|
|
|
|
// . NOTE: low bit should be set , only antiKeys (deletes) have low bit clear
|
|
// . returns false and sets g_errno on error, true otherwise
|
|
// . if RdbMem, m_mem, has no mem, sets g_errno to ETRYAGAIN and returns false
|
|
// because dump should complete soon and free up some mem
|
|
// . this overwrites dups
|
|
bool Rdb::addRecord(collnum_t collnum, const char *key, const char *data, int32_t dataSize) {
|
|
if (g_conf.m_logTraceRdb) {
|
|
char keyStrBuf[MAX_KEYSTR_BYTES];
|
|
KEYSTR(key, m_ks, keyStrBuf);
|
|
logTrace(g_conf.m_logTraceRdb, "BEGIN %s: collnum=%" PRId32" key=%s dataSize=%" PRId32,
|
|
m_dbname, collnum, keyStrBuf, dataSize);
|
|
}
|
|
|
|
if (!getBase(collnum)) {
|
|
g_errno = EBADENGINEER;
|
|
log(LOG_LOGIC,"db: addRecord: collection #%i is gone.", collnum);
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: collection gone. Returning false", m_dbname);
|
|
return false;
|
|
}
|
|
|
|
// we must not get into this state (we must not insert while dumping; and vice versa)
|
|
if (isDumping()) {
|
|
gbshutdownLogicError();
|
|
}
|
|
|
|
// sanity check
|
|
if (KEYNEG(key)) {
|
|
if ((dataSize > 0 && data)) {
|
|
log(LOG_LOGIC, "db: Got data for a negative key.");
|
|
gbshutdownLogicError();
|
|
}
|
|
} else if ( m_fixedDataSize >= 0 && dataSize != m_fixedDataSize ) {
|
|
log(LOG_LOGIC, "db: addRecord: DataSize is %" PRId32" should be %" PRId32, dataSize, m_fixedDataSize);
|
|
gbshutdownLogicError();
|
|
}
|
|
|
|
// copy the data before adding if we don't already own it
|
|
char *dataCopy = NULL;
|
|
if (data) {
|
|
// sanity check
|
|
if ( m_fixedDataSize == 0 && dataSize > 0 ) {
|
|
log(LOG_LOGIC, "db: addRecord: Data is present. Should not be");
|
|
gbshutdownLogicError();
|
|
}
|
|
|
|
dataCopy = (char *) m_mem.dupData(data, dataSize);
|
|
if ( ! dataCopy ) {
|
|
g_errno = ETRYAGAIN;
|
|
log(LOG_WARN, "db: Could not allocate %" PRId32" bytes to add data to %s. Retrying.",dataSize,m_dbname);
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Unable to allocate data. Returning false", m_dbname);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (m_rdbId == RDB_DOLEDB && g_conf.m_logDebugSpider) {
|
|
// must be 96 bits
|
|
if (m_ks != 12) {
|
|
g_process.shutdownAbort(true);
|
|
}
|
|
|
|
// set this
|
|
key96_t doleKey = *(key96_t *)key;
|
|
|
|
// remove from g_spiderLoop.m_lockTable too!
|
|
if (KEYNEG(key)) {
|
|
// log debug
|
|
logf(LOG_DEBUG,"spider: removed doledb key for pri=%" PRId32" time=%" PRIu32" uh48=%" PRIu64,
|
|
(int32_t)Doledb::getPriority(&doleKey),
|
|
(uint32_t)Doledb::getSpiderTime(&doleKey),
|
|
Doledb::getUrlHash48(&doleKey));
|
|
} else {
|
|
// do not overflow!
|
|
// log debug
|
|
const SpiderRequest *sreq = reinterpret_cast<const SpiderRequest *>(dataCopy);
|
|
logf(LOG_DEBUG, "spider: added doledb key for pri=%" PRId32" time=%" PRIu32" uh48=%" PRIu64" u=%s",
|
|
(int32_t)Doledb::getPriority(&doleKey),
|
|
(uint32_t)Doledb::getSpiderTime(&doleKey),
|
|
Doledb::getUrlHash48(&doleKey),
|
|
sreq->m_url);
|
|
}
|
|
}
|
|
|
|
// make the opposite key of "key"
|
|
char oppKey[MAX_KEY_BYTES];
|
|
KEYSET(oppKey, key, m_ks);
|
|
KEYXOR(oppKey, 0x01);
|
|
|
|
char newKey[MAX_KEY_BYTES];
|
|
|
|
if (m_useIndexFile) {
|
|
char specialOppKey[MAX_KEY_BYTES];
|
|
|
|
bool isSpecialKey;
|
|
bool isShardedByTermId;
|
|
bool isShardedByTermIdSameHost = false;
|
|
|
|
if (m_rdbId == RDB_POSDB || m_rdbId == RDB2_POSDB2) {
|
|
isSpecialKey = (Posdb::getTermId(key) == POSDB_DELETEDOC_TERMID);
|
|
isShardedByTermId = Posdb::isShardedByTermId(key);
|
|
|
|
if (isShardedByTermId) {
|
|
isShardedByTermIdSameHost = (g_hostdb.getShard(g_hostdb.getShardNum(m_rdbId, key)) == g_hostdb.getShard(g_hostdb.getShardNumFromDocId(Posdb::getDocId(key))));
|
|
|
|
// if it's a positive key, we need to delete the existing delete doc key that could be present in tree/bucket
|
|
if (!isShardedByTermIdSameHost && !KEYNEG(key)) {
|
|
Posdb::makeDeleteDocKey(specialOppKey, Posdb::getDocId(key), false);
|
|
(void)(m_useTree ? m_tree.deleteNode(collnum, oppKey, true) : m_buckets.deleteNode(collnum, oppKey));
|
|
}
|
|
}
|
|
} else {
|
|
/// @todo ALC cater for other rdb types here
|
|
gbshutdownLogicError();
|
|
}
|
|
|
|
// there are no negative keys when we're using index (except special keys eg: posdb with termId 0)
|
|
// if we're adding key that have a corresponding opposite key, it means we want to remove the key from the tree
|
|
// even if it's a positive key (how else would we remove the special negative key?)
|
|
|
|
// we only need to delete opposing key when it's a negative key, or it's a special key (even if it's positive)
|
|
if (KEYNEG(key) || isSpecialKey) {
|
|
bool deleted = m_useTree ? m_tree.deleteNode(collnum, oppKey, true) : m_buckets.deleteNode(collnum, oppKey);
|
|
|
|
// only return if we don't need to add special deleteDoc key for shardByTermId
|
|
if (deleted && (!isShardedByTermId || (isShardedByTermId && isShardedByTermIdSameHost))) {
|
|
// assume that we don't need to delete from index even when we get positive special key
|
|
// since positive special key will only be inserted when a new document is added
|
|
// this means that other keys should overwrite the existing deleted docId
|
|
logTrace(g_conf.m_logTraceRdb,
|
|
"END. %s: Key with corresponding opposite key deleted in tree. Returning true", m_dbname);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// if we have no files on disk for this db, don't bother preserving a a negative rec, it just wastes tree space
|
|
if (KEYNEG(key)) {
|
|
// return if all data is in the tree
|
|
if (getBase(collnum)->getNumFiles() == 0) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Negative key with all data in tree. Returning true", m_dbname);
|
|
return true;
|
|
}
|
|
|
|
// we need to change shard by termId delete key to a doc delete key
|
|
// this is to avoid dangling positive termId when docId is deleted
|
|
if (isShardedByTermId && !isShardedByTermIdSameHost) {
|
|
// we only make special key if termId do not belong to the same shard as docId
|
|
logTrace(g_conf.m_logTraceRdb, "%s: Shard by termId key found. Making special key.", m_dbname);
|
|
Posdb::makeDeleteDocKey(newKey, Posdb::getDocId(key), true);
|
|
key = newKey;
|
|
isSpecialKey = true;
|
|
}
|
|
|
|
// we should only store special delete keys (eg: posdb with termId 0)
|
|
// we will have non-special keys here to simplify logic in XmlDoc::getMetaList (and we can't really be sure
|
|
// if the key we're adding is in RdbTree/RdbBuckets at that point of time. It could potentially be dumped
|
|
// after the check.
|
|
if (!isSpecialKey) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Negative key with non-zero termId found. Returning true", m_dbname);
|
|
return true;
|
|
}
|
|
} else {
|
|
// make sure that positive special key is not persisted (reasons as delete key above; the XmlDoc::getMetaList part)
|
|
if (isSpecialKey) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Positive key with zero termId found. Returning true", m_dbname);
|
|
return true;
|
|
}
|
|
}
|
|
} else {
|
|
if (m_useTree) {
|
|
// . TODO: save this tree-walking state for adding the node!!!
|
|
// . TODO: use something like getNode_unlocked(key,&lastNode) then addNode (lastNode,key,dataCopy,dataSize)
|
|
// . #1) if we're adding a positive key, replace negative counterpart
|
|
// in the tree, because we'll override the positive rec it was
|
|
// deleting
|
|
// . #2) if we're adding a negative key, replace positive counterpart
|
|
// in the tree, but we must keep negative rec in tree in case
|
|
// the positive counterpart was overriding one on disk (as in #1)
|
|
|
|
// . freeData should be true, the tree doesn't own the data
|
|
// so it shouldn't free it really
|
|
m_tree.deleteNode(collnum, oppKey, true);
|
|
|
|
/// @todo ALC is this necessary? we remove delete keys when we dump to Rdb anyway for the first file
|
|
// if we have no files on disk for this db, don't bother preserving a a negative rec, it just wastes tree space
|
|
if (KEYNEG(key)) {
|
|
// return if all data is in the tree
|
|
if (getBase(collnum)->getNumFiles() == 0) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Negative key with all data in tree. Returning true", m_dbname);
|
|
return true;
|
|
}
|
|
// . otherwise, assume we match a positive...
|
|
}
|
|
}
|
|
}
|
|
|
|
if (m_useTree) {
|
|
if (!m_tree.addNode(collnum, key, dataCopy, dataSize)) {
|
|
log(LOG_INFO, "db: Had error adding data to %s: %s", m_dbname, mstrerror(g_errno));
|
|
return false;
|
|
}
|
|
} else {
|
|
// . TODO: add using "lastNode" as a start node for the insertion point
|
|
// . should set g_errno if failed
|
|
// . caller should retry on g_errno of ETRYAGAIN or ENOMEM
|
|
if (!m_buckets.addNode(collnum, key, dataCopy, dataSize)) {
|
|
log(LOG_INFO, "db: Had error adding data to %s: %s", m_dbname, mstrerror(g_errno));
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Add data record to the current index file for the -saved.dat file.
|
|
// This index is stored in the Rdb record- the individual part file
|
|
// indexes are in RdbBase and are read-only except when merging).
|
|
// we only add to index after adding to tree/buckets
|
|
RdbIndex *index = getBase(collnum)->getTreeIndex();
|
|
if (index) {
|
|
index->addRecord(key);
|
|
}
|
|
|
|
// if adding to spiderdb, add to cache, too (except negative key)
|
|
if (m_rdbId == RDB_DOLEDB && !KEYNEG(key)) {
|
|
// . this will create it if spiders are on and its NULL
|
|
// . even if spiders are off we need to create it so
|
|
// that the request can adds its ip to the waitingTree
|
|
SpiderColl *sc = g_spiderCache.getSpiderColl(collnum);
|
|
// skip if not there
|
|
if (!sc) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Done. No spider coll. Returning true", m_dbname);
|
|
return true;
|
|
}
|
|
|
|
int32_t pri = Doledb::getPriority((key96_t *)key);
|
|
// skip over corruption
|
|
if (pri < 0 || pri >= MAX_SPIDER_PRIORITIES) {
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Done. Skip over corruption", m_dbname);
|
|
return true;
|
|
}
|
|
// if added positive key is before cursor, update curso
|
|
if (KEYCMP(key, (char *)&sc->m_nextKeys[pri], sizeof(key96_t)) < 0) {
|
|
KEYSET((char *)&sc->m_nextKeys[pri], key, sizeof(key96_t));
|
|
|
|
if (g_conf.m_logDebugSpider) {
|
|
char keyStrBuf[MAX_KEYSTR_BYTES];
|
|
KEYSTR(key, 12, keyStrBuf);
|
|
logDebug(g_conf.m_logDebugSpider, "spider: cursor reset pri=%" PRId32" to %s", pri, keyStrBuf);
|
|
}
|
|
}
|
|
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Done. For doledb. Returning true", m_dbname);
|
|
|
|
// that's it for doledb mods
|
|
return true;
|
|
}
|
|
|
|
logTrace(g_conf.m_logTraceRdb, "END. %s: Done. Returning true", m_dbname);
|
|
return true;
|
|
}
|
|
|
|
// . use the maps and tree to estimate the size of this list w/o hitting disk
|
|
// . used by Indexdb.cpp to get the size of a list for IDF weighting purposes
|
|
int64_t Rdb::estimateListSize(collnum_t collnum, const char *startKey, const char *endKey, char *max, int64_t oldTruncationLimit) const {
|
|
// pick it
|
|
if ( collnum < 0 || collnum > getNumBases() || ! getBase(collnum) ) {
|
|
log(LOG_WARN, "db: %s bad collnum of %i", m_dbname, collnum);
|
|
return 0;
|
|
}
|
|
return getBase(collnum)->estimateListSize(startKey, endKey, max, oldTruncationLimit);
|
|
}
|
|
|
|
|
|
bool Rdb::getTreeList(RdbList *result,
|
|
collnum_t collnum,
|
|
const void *startKey, const void *endKey,
|
|
int32_t minRecSizes,
|
|
int32_t *numPositiveRecs, int32_t *numNegativeRecs,
|
|
int32_t *memUsedByTree, int32_t *numUsedNodes)
|
|
{
|
|
int64_t start = gettimeofdayInMilliseconds();
|
|
|
|
// . returns false on error and sets g_errno
|
|
// . endkey of *result may be less than endKey
|
|
const char *structName;
|
|
|
|
if(m_useTree) {
|
|
// get the mem tree for this rdb
|
|
if(!m_tree.getList(collnum,
|
|
static_cast<const char*>(startKey),
|
|
static_cast<const char*>(endKey),
|
|
minRecSizes,
|
|
result,
|
|
numPositiveRecs,
|
|
numNegativeRecs,
|
|
useHalfKeys() ) )
|
|
return true;
|
|
structName = "tree";
|
|
*memUsedByTree = m_tree.getMemOccupiedForList();
|
|
*numUsedNodes = m_tree.getNumUsedNodes();
|
|
} else {
|
|
if(!m_buckets.getList(collnum,
|
|
static_cast<const char*>(startKey),
|
|
static_cast<const char*>(endKey),
|
|
minRecSizes,
|
|
result,
|
|
numPositiveRecs,
|
|
numNegativeRecs,
|
|
useHalfKeys()))
|
|
return true;
|
|
structName = "buckets";
|
|
*memUsedByTree = m_buckets.getMemOccupied();
|
|
*numUsedNodes = m_buckets.getNumKeys();
|
|
}
|
|
|
|
int64_t now = gettimeofdayInMilliseconds();
|
|
int64_t took = now - start;
|
|
if(took > 9)
|
|
logf(LOG_INFO,"net: Got list from %s in %" PRIu64" ms. size=%" PRId32" db=%s.",
|
|
structName, took, result->getListSize(),
|
|
m_dbname);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
// . return number of positive records - negative records
|
|
int64_t Rdb::getNumTotalRecs(bool useCache) const {
|
|
// this gets slammed w/ too many collections so use a cache...
|
|
int32_t now = 0;
|
|
if ( useCache ) {
|
|
now = getTime();
|
|
if ( now - m_cacheLastTime == 0 )
|
|
return m_cacheLastTotal;
|
|
}
|
|
|
|
// same as num recs
|
|
int32_t nb = getNumBases();
|
|
|
|
int64_t total = 0LL;
|
|
|
|
//return 0; // too many collections!!
|
|
for ( int32_t i = 0 ; i < nb ; i++ ) {
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if ( ! cr ) continue;
|
|
// if swapped out, this will be NULL, so skip it
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( ! base ) continue;
|
|
total += base->getNumTotalRecs();
|
|
}
|
|
// . add in the btree
|
|
// . TODO: count negative and positive recs in the b-tree
|
|
//total += m_tree.getNumPositiveKeys();
|
|
//total -= m_tree.getNumNegativeKeys();
|
|
if ( now ) {
|
|
m_cacheLastTime = now;
|
|
m_cacheLastTotal = total;
|
|
}
|
|
|
|
return total;
|
|
}
|
|
|
|
|
|
int64_t Rdb::getCollNumTotalRecs(collnum_t collnum) const {
|
|
|
|
if ( collnum < 0 ) return 0;
|
|
|
|
CollectionRec *cr = g_collectiondb.getRec(collnum);
|
|
if ( ! cr ) return 0;
|
|
// if swapped out, this will be NULL, so skip it
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( ! base ) {
|
|
log("rdb: getcollnumtotalrecs: base swapped out");
|
|
return 0;
|
|
}
|
|
return base->getNumTotalRecs();
|
|
}
|
|
|
|
|
|
|
|
// . how much mem is allocated for all of our maps?
|
|
// . we have one map per file
|
|
int64_t Rdb::getMapMemAllocated() const {
|
|
int64_t total = 0;
|
|
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
|
|
// skip null base if swapped out
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if ( ! cr ) continue;
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( ! base ) continue;
|
|
total += base->getMapMemAllocated();
|
|
}
|
|
return total;
|
|
}
|
|
|
|
// sum of all parts of all big files
|
|
int32_t Rdb::getNumSmallFiles() const {
|
|
int32_t total = 0;
|
|
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
|
|
// skip null base if swapped out
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if ( ! cr ) continue;
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( ! base ) continue;
|
|
total += base->getNumSmallFiles();
|
|
}
|
|
return total;
|
|
}
|
|
|
|
// sum of all parts of all big files
|
|
int32_t Rdb::getNumFiles() const {
|
|
int32_t total = 0;
|
|
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if ( ! cr ) continue;
|
|
// if swapped out, this will be NULL, so skip it
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( ! base ) continue;
|
|
total += base->getNumFiles();
|
|
}
|
|
return total;
|
|
}
|
|
|
|
int64_t Rdb::getDiskSpaceUsed() const {
|
|
int64_t total = 0;
|
|
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
|
|
CollectionRec *cr = g_collectiondb.getRec(i);
|
|
if ( ! cr ) continue;
|
|
// if swapped out, this will be NULL, so skip it
|
|
RdbBase *base = cr->getBase(m_rdbId);
|
|
if ( ! base ) continue;
|
|
total += base->getDiskSpaceUsed();
|
|
}
|
|
return total;
|
|
}
|
|
|
|
bool Rdb::isMerging() const {
|
|
// use this for speed
|
|
return m_numMergesOut!=0;
|
|
}
|
|
|
|
|
|
// maps an rdbId to an Rdb
|
|
Rdb *getRdbFromId ( rdbid_t rdbId ) {
|
|
switch(rdbId) {
|
|
case RDB_TAGDB: return g_tagdb.getRdb();
|
|
case RDB_POSDB: return g_posdb.getRdb();
|
|
case RDB_TITLEDB: return g_titledb.getRdb();
|
|
case RDB_SPIDERDB_DEPRECATED: return g_spiderdb.getRdb_deprecated();
|
|
case RDB_DOLEDB: return g_doledb.getRdb();
|
|
case RDB_CLUSTERDB: return g_clusterdb.getRdb();
|
|
case RDB_LINKDB: return g_linkdb.getRdb();
|
|
|
|
case RDB2_POSDB2: return g_posdb2.getRdb();
|
|
case RDB2_TITLEDB2: return g_titledb2.getRdb();
|
|
case RDB2_SPIDERDB2_DEPRECATED: return g_spiderdb2.getRdb_deprecated();
|
|
case RDB2_CLUSTERDB2: return g_clusterdb2.getRdb();
|
|
case RDB2_LINKDB2: return g_linkdb2.getRdb();
|
|
case RDB2_TAGDB2: return g_tagdb2.getRdb();
|
|
default:
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
// the opposite of the above
|
|
rdbid_t getIdFromRdb ( Rdb *rdb ) {
|
|
if ( rdb == g_tagdb.getRdb () ) return RDB_TAGDB;
|
|
if ( rdb == g_posdb.getRdb () ) return RDB_POSDB;
|
|
if ( rdb == g_titledb.getRdb () ) return RDB_TITLEDB;
|
|
if ( rdb == g_spiderdb.getRdb_deprecated() ) return RDB_SPIDERDB_DEPRECATED;
|
|
if ( rdb == g_doledb.getRdb () ) return RDB_DOLEDB;
|
|
if ( rdb == g_clusterdb.getRdb () ) return RDB_CLUSTERDB;
|
|
if ( rdb == g_linkdb.getRdb () ) return RDB_LINKDB;
|
|
if ( rdb == g_posdb2.getRdb () ) return RDB2_POSDB2;
|
|
if ( rdb == g_tagdb2.getRdb () ) return RDB2_TAGDB2;
|
|
if ( rdb == g_titledb2.getRdb () ) return RDB2_TITLEDB2;
|
|
if ( rdb == g_spiderdb2.getRdb_deprecated() ) return RDB2_SPIDERDB2_DEPRECATED;
|
|
if ( rdb == g_clusterdb2.getRdb () ) return RDB2_CLUSTERDB2;
|
|
if ( rdb == g_linkdb2.getRdb () ) return RDB2_LINKDB2;
|
|
|
|
log(LOG_LOGIC,"db: getIdFromRdb: no rdbId for %s.",rdb->getDbname());
|
|
return RDB_NONE;
|
|
}
|
|
|
|
bool isSecondaryRdb ( rdbid_t rdbId ) {
|
|
switch ( rdbId ) {
|
|
case RDB2_POSDB2 : return true;
|
|
case RDB2_TAGDB2 : return true;
|
|
case RDB2_TITLEDB2 : return true;
|
|
case RDB2_SPIDERDB2_DEPRECATED : return true;
|
|
case RDB2_CLUSTERDB2 : return true;
|
|
case RDB2_LINKDB2 : return true;
|
|
case RDB2_SPIDERDB2_SQLITE : return true;
|
|
//(todo?) rdb2_spiderdb2_sqlite
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// use a quick table now...
|
|
char getKeySizeFromRdbId(rdbid_t rdbId) {
|
|
switch(rdbId) {
|
|
case RDB_SPIDERDB_DEPRECATED:
|
|
case RDB2_SPIDERDB2_DEPRECATED:
|
|
case RDB_TAGDB:
|
|
case RDB2_TAGDB2:
|
|
return sizeof(key128_t); // 16
|
|
case RDB_POSDB:
|
|
case RDB2_POSDB2:
|
|
return sizeof(key144_t); // 18
|
|
case RDB_LINKDB:
|
|
case RDB2_LINKDB2:
|
|
return sizeof(key224_t); // 28
|
|
case RDB_TITLEDB:
|
|
case RDB2_TITLEDB2:
|
|
case RDB_CLUSTERDB:
|
|
case RDB2_CLUSTERDB2:
|
|
case RDB_DOLEDB:
|
|
return sizeof(key96_t); // 12
|
|
case RDB_SITEDEFAULTPAGETEMPERATURE:
|
|
return 8; //fake
|
|
case RDB_NONE:
|
|
case RDB_END:
|
|
default:
|
|
log(LOG_ERROR, "rdb: bad lookup rdbid of %i", (int)rdbId);
|
|
g_process.shutdownAbort(true);
|
|
}
|
|
}
|
|
|
|
// returns -1 if dataSize is variable
|
|
int32_t getDataSizeFromRdbId ( rdbid_t rdbId ) {
|
|
static bool s_flag = true;
|
|
static int32_t s_table2[RDB_END];
|
|
if ( rdbId >= RDB_END )
|
|
g_process.shutdownAbort(true);
|
|
if ( s_flag ) {
|
|
// sanity check
|
|
// loop over all possible rdbIds
|
|
for ( int32_t i = 1 ; i < RDB_END ; i++ ) {
|
|
// assume none
|
|
int32_t ds = 0;
|
|
// only these are 16 as of now
|
|
if ( i == RDB_POSDB ||
|
|
i == RDB_CLUSTERDB ||
|
|
i == RDB_LINKDB )
|
|
ds = 0;
|
|
else if ( i == RDB_TITLEDB ||
|
|
i == RDB_TAGDB ||
|
|
i == RDB_SPIDERDB_DEPRECATED ||
|
|
i == RDB_SPIDERDB_SQLITE ||
|
|
i == RDB_DOLEDB )
|
|
ds = -1;
|
|
else if ( i == RDB2_POSDB2 ||
|
|
i == RDB2_CLUSTERDB2 ||
|
|
i == RDB2_LINKDB2 )
|
|
ds = 0;
|
|
else if ( i == RDB2_TITLEDB2 ||
|
|
i == RDB2_TAGDB2 ||
|
|
i == RDB2_SPIDERDB2_DEPRECATED ||
|
|
i == RDB2_SPIDERDB2_SQLITE )
|
|
ds = -1;
|
|
else if ( i == RDB_SITEDEFAULTPAGETEMPERATURE )
|
|
ds = 4+4; //fake
|
|
else {
|
|
continue;
|
|
}
|
|
|
|
// set the table
|
|
s_table2[i] = ds;
|
|
}
|
|
// only stock the table once
|
|
s_flag = false;
|
|
}
|
|
return s_table2[rdbId];
|
|
}
|
|
|
|
// get the dbname
|
|
const char *getDbnameFromId(rdbid_t rdbId) {
|
|
const Rdb *rdb = getRdbFromId(rdbId);
|
|
if ( rdb )
|
|
return rdb->getDbname();
|
|
else {
|
|
log(LOG_LOGIC,"db: rdbId of %" PRId32" is invalid.",(int32_t)rdbId);
|
|
return "INVALID";
|
|
}
|
|
}
|
|
|
|
|
|
bool initialiseAllPrimaryRdbs() {
|
|
if(!g_posdb.init()) {
|
|
log( LOG_ERROR, "db: Posdb init failed." );
|
|
return false;
|
|
}
|
|
if(!g_titledb.init()) {
|
|
log( LOG_ERROR, "db: Titledb init failed." );
|
|
return false;
|
|
}
|
|
if(!g_tagdb.init()) {
|
|
log( LOG_ERROR, "db: Tagdb init failed." );
|
|
return false;
|
|
}
|
|
if(!g_spiderdb.init()) {
|
|
log( LOG_ERROR, "db: Spiderdb init failed." );
|
|
return false;
|
|
}
|
|
if(!g_doledb.init()) {
|
|
log( LOG_ERROR, "db: Doledb init failed." );
|
|
return false;
|
|
}
|
|
if(!g_clusterdb.init() ) {
|
|
log( LOG_ERROR, "db: Clusterdb init failed." );
|
|
return false;
|
|
}
|
|
if(!g_linkdb.init()) {
|
|
log( LOG_ERROR, "db: Linkdb init failed." );
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
// get the RdbBase class for an rdbId and collection name
|
|
RdbBase *getRdbBase(rdbid_t rdbId, collnum_t collnum) {
|
|
Rdb *rdb = getRdbFromId ( rdbId );
|
|
if ( ! rdb ) {
|
|
log("db: Collection #%" PRId32" does not exist.",(int32_t)collnum);
|
|
return NULL;
|
|
}
|
|
return rdb->getBase(collnum);
|
|
}
|
|
|
|
int32_t Rdb::getNumUsedNodes ( ) const {
|
|
if(m_useTree) return m_tree.getNumUsedNodes();
|
|
return m_buckets.getNumKeys();
|
|
}
|
|
|
|
int32_t Rdb::getMaxTreeMem() const {
|
|
if(m_useTree) return m_tree.getMaxMem();
|
|
return m_buckets.getMaxMem();
|
|
}
|
|
|
|
int32_t Rdb::getNumNegativeKeys() const {
|
|
if(m_useTree) return m_tree.getNumNegativeKeys();
|
|
return m_buckets.getNumNegativeKeys();
|
|
}
|
|
|
|
|
|
int32_t Rdb::getTreeMemOccupied() const {
|
|
if(m_useTree) return m_tree.getMemOccupied();
|
|
return m_buckets.getMemOccupied();
|
|
}
|
|
|
|
int32_t Rdb::getTreeMemAllocated () const {
|
|
if(m_useTree) return m_tree.getMemAllocated();
|
|
return m_buckets.getMemAllocated();
|
|
}
|
|
|
|
bool Rdb::needsSave() const {
|
|
if(m_useTree) return m_tree.needsSave();
|
|
else return m_buckets.needsSave();
|
|
}
|
|
|
|
void Rdb::cleanTree() {
|
|
if(m_useTree) return m_tree.cleanTree();
|
|
else return m_buckets.cleanBuckets();
|
|
}
|
|
|
|
bool Rdb::getTreeCollExist(collnum_t collnum) const {
|
|
return (m_useTree ? m_tree.collExists(collnum) : m_buckets.collExists(collnum));
|
|
}
|
|
|
|
// if we are doledb, we are a tree-only rdb, so try to reclaim
|
|
// memory from deleted nodes. works by condensing the used memory.
|
|
// returns how much we reclaimed.
|
|
int32_t Rdb::reclaimMemFromDeletedTreeNodes() {
|
|
ScopedLock sl(m_tree.getLock());
|
|
|
|
log("rdb: reclaiming tree mem for doledb");
|
|
|
|
// this only works for non-dumped RdbMem right now, i.e. doledb only
|
|
if ( m_rdbId != RDB_DOLEDB ) { g_process.shutdownAbort(true); }
|
|
|
|
ScopedLock sl2(m_mem.getLock());
|
|
|
|
// start scanning the mem pool
|
|
char *p = m_mem.m_mem;
|
|
char *pend = m_mem.m_ptr1;
|
|
|
|
char *memEnd = m_mem.m_mem + m_mem.m_memSize;
|
|
|
|
char *dst = p;
|
|
|
|
int32_t inUseOld = pend - p;
|
|
|
|
char *pstart = p;
|
|
|
|
int32_t marked = 0;
|
|
int32_t occupied = 0;
|
|
|
|
HashTableX ht;
|
|
if (!ht.set(4, 4, m_tree.getNumUsedNodes_unlocked() * 2, NULL, 0, false, "trectbl", true)) {// useMagic? yes..
|
|
return -1;
|
|
}
|
|
|
|
int32_t dups = 0;
|
|
|
|
// mark the data of unoccupied nodes somehow
|
|
int32_t nn = m_tree.getMinUnusedNode_unlocked();
|
|
for ( int32_t i = 0 ; i < nn ; i++ ) {
|
|
// skip empty nodes in tree
|
|
if (m_tree.isEmpty_unlocked(i) ) {marked++; continue; }
|
|
|
|
// get data ptr
|
|
const char *data = m_tree.getData_unlocked(i);
|
|
|
|
// sanity, ensure legit
|
|
if ( data < pstart ) { g_process.shutdownAbort(true); }
|
|
|
|
// offset
|
|
int32_t doff = (int32_t)(data - pstart);
|
|
|
|
// a dup? sanity check
|
|
if ( ht.isInTable ( &doff ) ) {
|
|
int32_t *vp = (int32_t *) ht.getValue ( &doff );
|
|
log("rdb: reclaim got dup oldi=%p "
|
|
"newi=%" PRId32" dataoff=%" PRId32"."
|
|
,vp,i,doff);
|
|
dups++;
|
|
continue;
|
|
}
|
|
|
|
// indicate it is legit
|
|
int32_t val = i;
|
|
ht.addKey ( &doff , &val );
|
|
occupied++;
|
|
}
|
|
|
|
if ( occupied + dups != m_tree.getNumUsedNodes_unlocked() )
|
|
log("rdb: reclaim mismatch1");
|
|
|
|
if ( ht.getNumUsedSlots() + dups != m_tree.getNumUsedNodes_unlocked() )
|
|
log("rdb: reclaim mismatch2");
|
|
|
|
int32_t skipped = 0;
|
|
|
|
// the spider requests should be linear in there. so we can scan
|
|
// them. then put their offset into a map that maps it to the new
|
|
// offset after doing the memmove().
|
|
for ( ; p < pend ; ) {
|
|
SpiderRequest *sreq = (SpiderRequest *)p;
|
|
int32_t oldOffset = p - pstart;
|
|
int32_t recSize = sreq->getRecSize();
|
|
// negative key? this shouldn't happen
|
|
if ( (sreq->m_key.n0 & 0x01) == 0x00 ) {
|
|
log("rdb: reclaim got negative doldb key in scan");
|
|
p += sizeof(key96_t);
|
|
skipped++;
|
|
continue;
|
|
}
|
|
// if not in hash table it was deleted from tree i guess
|
|
if ( ! ht.isInTable ( &oldOffset ) ) {
|
|
p += recSize;
|
|
skipped++;
|
|
continue;
|
|
}
|
|
|
|
// corrupted? or breach of mem buf?
|
|
if ( sreq->isCorrupt() || dst + recSize > memEnd ) {
|
|
log( LOG_WARN, "rdb: not readding corrupted doledb1 in scan. deleting from tree.");
|
|
g_process.shutdownAbort(true);
|
|
}
|
|
|
|
//// re -add with the proper value now
|
|
//
|
|
// otherwise, copy it over if still in tree
|
|
memmove ( dst , p , recSize );
|
|
int32_t newOffset = dst - pstart;
|
|
// store in map, overwrite old value of 1
|
|
ht.addKey ( &oldOffset , &newOffset );
|
|
dst += recSize;
|
|
p += recSize;
|
|
}
|
|
|
|
int32_t inUseNew = dst - pstart;
|
|
|
|
// update mem class as well
|
|
m_mem.m_ptr1 = dst;
|
|
|
|
// how much did we reclaim
|
|
int32_t reclaimed = inUseOld - inUseNew;
|
|
|
|
if ( reclaimed < 0 ) { g_process.shutdownAbort(true); }
|
|
if ( inUseNew < 0 ) { g_process.shutdownAbort(true); }
|
|
if ( inUseNew > m_mem.m_memSize ) { g_process.shutdownAbort(true); }
|
|
|
|
// now update data ptrs in the tree, m_data[]
|
|
for ( int i = 0 ; i < nn ; i++ ) {
|
|
// skip empty nodes in tree
|
|
if (m_tree.isEmpty_unlocked(i)) continue;
|
|
// update the data otherwise
|
|
const char *data = m_tree.getData_unlocked(i);
|
|
// sanity, ensure legit
|
|
if ( data < pstart ) { g_process.shutdownAbort(true); }
|
|
int32_t offset = data - pstart;
|
|
int32_t *newOffsetPtr = (int32_t *)ht.getValue ( &offset );
|
|
if ( ! newOffsetPtr ) { g_process.shutdownAbort(true); }
|
|
char *newData = pstart + *newOffsetPtr;
|
|
m_tree.setData_unlocked(i, newData);
|
|
}
|
|
|
|
log("rdb: reclaimed %" PRId32" bytes after scanning %" PRId32" "
|
|
"undeleted nodes and %" PRId32" deleted nodes for doledb"
|
|
,reclaimed,nn,marked);
|
|
|
|
// return # of bytes of mem we reclaimed
|
|
return reclaimed;
|
|
}
|