reduce mem usage in rdbmap. useful

for when there are thousands of tiny collections.
This commit is contained in:
Matt Wells 2014-11-07 08:49:08 -08:00
parent 4a91b5da9a
commit 444ed14cde
3 changed files with 39 additions and 3 deletions

@ -754,6 +754,9 @@ long RdbBase::addFile ( long id , bool isNew , long mergeNum , long id2 ,
g_mem.m_maxMem = mm;
// sanity check
if ( id2 < 0 && m_isTitledb ) { char *xx = NULL; *xx = 0; }
CollectionRec *cr = NULL;
// set the data file's filename
char name[256];
// if we're converting, just add to m_filesIds and m_fileIds2
@ -856,6 +859,7 @@ long RdbBase::addFile ( long id , bool isNew , long mergeNum , long id2 ,
}
if ( ! isNew ) log(LOG_DEBUG,"db: Added %s for collnum=%li pages=%li",
name ,(long)m_collnum,m->getNumPages());
// open this big data file for reading only
if ( ! isNew ) {
if ( mergeNum < 0 )
@ -885,6 +889,11 @@ long RdbBase::addFile ( long id , bool isNew , long mergeNum , long id2 ,
m_files [i] = f;
m_maps [i] = m;
// to free up mem for diffbot's many collections...
cr = g_collectiondb.getRec ( m_collnum );
if ( ! isNew && cr && cr->m_isCustomCrawl )
m->reduceMemFootPrint();
// are we resuming a killed merge?
if ( g_conf.m_readOnlyMode && ((id & 0x01)==0) ) {
log("db: Cannot start in read only mode with an incomplete "

@ -8,6 +8,7 @@ RdbMap::RdbMap() {
m_numSegments = 0;
m_numSegmentPtrs = 0;
m_numSegmentOffs = 0;
m_newPagesPerSegment = 0;
reset ( );
}
@ -55,10 +56,12 @@ bool RdbMap::close ( bool urgent ) {
void RdbMap::reset ( ) {
m_generatingMap = false;
int pps = PAGES_PER_SEGMENT;
if ( m_newPagesPerSegment > 0 ) pps = m_newPagesPerSegment;
for ( long i = 0 ; i < m_numSegments; i++ ) {
//mfree(m_keys[i],sizeof(key_t)*PAGES_PER_SEGMENT,"RdbMap");
mfree(m_keys[i],m_ks*PAGES_PER_SEGMENT,"RdbMap");
mfree(m_offsets[i], 2*PAGES_PER_SEGMENT,"RdbMap");
mfree(m_keys[i],m_ks *pps,"RdbMap");
mfree(m_offsets[i], 2*pps,"RdbMap");
// set to NULL so we know if accessed illegally
m_keys [i] = NULL;
m_offsets[i] = NULL;
@ -71,6 +74,8 @@ void RdbMap::reset ( ) {
m_numSegmentPtrs = 0;
m_numSegmentOffs = 0;
m_newPagesPerSegment = 0;
m_needToWrite = false;
m_fileStartOffset = 0LL;
m_numSegments = 0;
@ -1237,8 +1242,26 @@ bool RdbMap::addSegmentPtr ( long n ) {
}
return true;
}
// try to save memory when there are many collections with tiny files on disk
void RdbMap::reduceMemFootPrint () {
if ( m_numSegments != 1 ) return;
if ( m_numPages >= 100 ) return;
//return;
char *oldKeys = m_keys[0];
short *oldOffsets = m_offsets[0];
int pps = m_numPages;
m_keys [0] = (char *)mmalloc ( m_ks * pps , "RdbMap" );
m_offsets[0] = (short *)mmalloc ( 2 * pps , "RdbMap" );
// copy over
memcpy ( m_keys [0] , oldKeys , m_ks * pps );
memcpy ( m_offsets[0] , oldOffsets , 2 * pps );
int oldPPS = PAGES_PER_SEGMENT;
mfree ( oldKeys , m_ks * oldPPS , "RdbMap" );
mfree ( oldOffsets , 2 * oldPPS , "RdbMap" );
m_newPagesPerSegment = m_numPages;
}
// . add "n" segments
// . returns false and sets g_errno on error
bool RdbMap::addSegment ( ) {

@ -101,6 +101,8 @@ class RdbMap {
// . if it's -1 then each record's data is of variable size
long getFixedDataSize() { return m_fixedDataSize; };
void reduceMemFootPrint();
// . this is called automatically when close() is called
// . however, we may wish to call it externally to ensure no data loss
// . return false if any write failes
@ -361,6 +363,8 @@ class RdbMap {
// . NOTE: also used as the file size of the file we're mapping
long long m_offset;
long m_newPagesPerSegment;
// we keep global tallies on the number of non-deleted records
// and deleted records
long long m_numPositiveRecs;