lots of fixes for collection swapping.

This commit is contained in:
Matt Wells 2014-09-29 20:16:39 -07:00
parent cfb2ab7e82
commit 8c6d216a14
4 changed files with 84 additions and 17 deletions

@ -606,7 +606,8 @@ RdbBase *CollectionRec::getBasePtr ( char rdbId ) {
static bool s_inside = false;
// returns NULL w/ g_errno set on error.
// . returns NULL w/ g_errno set on error.
// . TODO: ensure not called from in thread, not thread safe
RdbBase *CollectionRec::getBase ( char rdbId ) {
if ( s_inside ) { char *xx=NULL;*xx=0; }
@ -615,14 +616,25 @@ RdbBase *CollectionRec::getBase ( char rdbId ) {
log("cdb: swapin collnum=%li",(long)m_collnum);
// sanity!
if ( g_threads.amThread() ) { char *xx=NULL;*xx=0; }
s_inside = true;
// turn off quickpoll to avoid getbase() being re-called and
// coring from s_inside being true
long saved = g_conf.m_useQuickpoll;
g_conf.m_useQuickpoll = false;
// load them back in. return NULL w/ g_errno set on error.
if ( ! g_collectiondb.addRdbBasesForCollRec ( this ) ) {
log("coll: error swapin: %s",mstrerror(g_errno));
g_conf.m_useQuickpoll = saved;
s_inside = false;
return NULL;
}
g_conf.m_useQuickpoll = saved;
s_inside = false;
g_collectiondb.m_numCollsSwappedOut--;

@ -412,6 +412,7 @@ class CollectionRec {
char m_dailyMergeDOWList[48];
long m_treeCount;
bool swapOut();
bool m_swappedOut;

@ -1578,6 +1578,8 @@ void Msg5::repairLists_r ( ) {
// . logging the key ranges gives us an idea of how long
// it will take to patch the bad data
long nn = m_msg3.m_numFileNums;
// TODO: fix this. can't call Collectiondb::getBase from
// within a thread!
RdbBase *base = getRdbBase ( m_rdbId , m_collnum );
if ( i < nn && base ) {
long fn = m_msg3.m_fileNums[i];
@ -1631,8 +1633,8 @@ void Msg5::mergeLists_r ( ) {
if ( KEYCMP(m_prevKey,m_fileStartKey,m_ks)>=0 ) m_prevCount = 0;
// get base, returns NULL and sets g_errno to ENOCOLLREC on error
RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) {
log("No collection found."); return; }
//RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) {
// log("No collection found."); return; }
/*
if ( m_rdbId == RDB_POSDB ) {
@ -1765,7 +1767,8 @@ void Msg5::mergeLists_r ( ) {
m_minEndKey ,
m_minRecSizes ,
m_removeNegRecs ,
getIdFromRdb ( base->m_rdb ) ,
//getIdFromRdb ( base->m_rdb ) ,
m_rdbId ,
&m_filtered ,
NULL,//m_tfns , // used for titledb
NULL,//&m_tfndbList , // used for titledb

77
Rdb.cpp

@ -1384,6 +1384,40 @@ bool Rdb::gotTokenForDump ( ) {
"db: Checking validity of in memory data of %s before dumping, "
"took %lli ms.",m_dbname,gettimeofdayInMilliseconds()-start);
////
//
// see what collnums are in the tree and just try those
//
////
CollectionRec *cr = NULL;
for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
cr = g_collectiondb.m_recs[i];
if ( ! cr ) continue;
// reset his tree count flag thing
cr->m_treeCount = 0;
}
if ( m_useTree ) {
// now scan the rdbtree and inc treecount where appropriate
for ( long i = 0 ; i < m_tree.m_minUnusedNode ; i++ ) {
// skip node if parents is -2 (unoccupied)
if ( m_tree.m_parents[i] == -2 ) continue;
// get rec from tree collnum
cr = g_collectiondb.m_recs[m_tree.m_collnums[i]];
if ( cr ) cr->m_treeCount++;
}
}
else {
for(long i = 0; i < m_buckets.m_numBuckets; i++) {
RdbBucket *b = m_buckets.m_buckets[i];
collnum_t cn = b->getCollnum();
long nk = b->getNumKeys();
for ( long j = 0 ; j < nk; j++ ) {
cr = g_collectiondb.m_recs[cn];
if ( cr ) cr->m_treeCount++;
}
}
}
// loop through collections, dump each one
m_dumpCollnum = (collnum_t)-1;
// clear this for dumpCollLoop()
@ -1403,13 +1437,20 @@ bool Rdb::gotTokenForDump ( ) {
bool Rdb::dumpCollLoop ( ) {
loop:
CollectionRec *cr = g_collectiondb.m_recs[m_dumpCollnum];
if ( ! cr ) return true;
// if no more, we're done...
if ( m_dumpCollnum >= getNumBases() ) return true;
// the only was g_errno can be set here is from a previous dump
// error?
if ( g_errno ) {
hadError:
// if swapped out, this will be NULL, so skip it
RdbBase *base = cr->getBasePtr(m_rdbId);
RdbBase *base = NULL;
CollectionRec *cr = NULL;
if ( m_dumpCollnum>=0 )
cr = g_collectiondb.m_recs[m_dumpCollnum];
if ( cr )
base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(m_dumpCollnum);
log("build: Error dumping collection: %s.",mstrerror(g_errno));
// . if we wrote nothing, remove the file
@ -1427,16 +1468,26 @@ bool Rdb::dumpCollLoop ( ) {
s_lastTryTime = getTime();
return true;
}
// advance
// advance for next round
m_dumpCollnum++;
// advance m_dumpCollnum until we have a non-null RdbBase
while ( m_dumpCollnum < getNumBases() &&
! cr->getBasePtr (m_rdbId) )
m_dumpCollnum++;
// if no more, we're done...
if ( m_dumpCollnum >= getNumBases() ) return true;
RdbBase *base = cr->getBasePtr(m_rdbId);//m_dumpCollnum);
CollectionRec *cr = g_collectiondb.m_recs[m_dumpCollnum];
// collrec is valid?
if ( ! cr ) goto loop;
// base is null if swapped out. skip it then. is that correct?
// probably not!
//RdbBase *base = cr->getBasePtr(m_rdbId);//m_dumpCollnum);
// swap it in for dumping purposes if we have to
RdbBase *base = cr->getBase(m_rdbId);//m_dumpCollnum);
// hwo can this happen
if ( ! base ) {
log("rdb: dumpcollloop base was null for cn=%li",
(long)m_dumpCollnum-1);
goto hadError;
}
// before we create the file, see if tree has anything for this coll
//key_t k; k.setMin();
@ -2684,7 +2735,7 @@ long long Rdb::getMapMemAlloced () {
long long total = 0;
for ( long i = 0 ; i < getNumBases() ; i++ ) {
// skip null base if swapped out
CollectionRec *cr = g_collectiondb.m_recs[m_dumpCollnum];
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) return true;
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);
@ -2699,7 +2750,7 @@ long Rdb::getNumSmallFiles ( ) {
long total = 0;
for ( long i = 0 ; i < getNumBases() ; i++ ) {
// skip null base if swapped out
CollectionRec *cr = g_collectiondb.m_recs[m_dumpCollnum];
CollectionRec *cr = g_collectiondb.m_recs[i];
if ( ! cr ) return true;
RdbBase *base = cr->getBasePtr(m_rdbId);
//RdbBase *base = getBase(i);