176 lines
5.1 KiB
C++
176 lines
5.1 KiB
C++
#include "Cachedb.h"
|
|
#include "Threads.h"
|
|
|
|
// for seo-related objects:
|
|
Cachedb g_cachedb;
|
|
|
|
// for seo serps:
|
|
Cachedb g_serpdb;
|
|
|
|
void Cachedb::reset() {
|
|
m_rdb.reset();
|
|
}
|
|
|
|
bool Cachedb::init ( ) {
|
|
// we use the same disk page size as indexdb (for rdbmap.cpp)
|
|
//int32_t pageSize = GB_INDEXDB_PAGE_SIZE;
|
|
// set this for debugging
|
|
//int64_t maxTreeMem = 1000000;
|
|
// i've seen some debug entries like 33MB because of
|
|
// m_debugDocIdScoreBuf and m_origDocIdScoreBuf take up so much space!
|
|
// so don't cache those any more!!
|
|
int64_t maxTreeMem = 40000000; // 40MB g_serpdb, 40MB g_cachedb
|
|
// . what's max # of tree nodes?
|
|
// . key+4+left+right+parents+dataPtr = sizeof(key96_t)+4 +4+4+4+4
|
|
// . 32 bytes per record when in the tree
|
|
// . >1000 bytes of data per rec
|
|
int32_t maxTreeNodes = maxTreeMem /(sizeof(key96_t)+16+1000);
|
|
// disk page cache mem, 100MB on gk0 now
|
|
//int32_t pcmem = 0; // g_conf.m_cachedbMaxDiskPageCacheMem;
|
|
// keep this low if we are the tmp cluster
|
|
//if ( g_hostdb.m_useTmpCluster ) pcmem = 0;
|
|
// TODO: would be nice to just do page caching on the satellite files;
|
|
// look into "minimizeDiskSeeks" at some point...
|
|
m_name = "cachedb";
|
|
m_rdbId = RDB_CACHEDB;
|
|
if ( this == &g_serpdb ) {
|
|
m_name = "serpdb";
|
|
m_rdbId = RDB_SERPDB;
|
|
}
|
|
|
|
// if ( ! m_pc.init ( m_name ,
|
|
// m_rdbId, // RDB_CACHEDB,
|
|
// pcmem ,
|
|
// pageSize ))
|
|
// return log("db: %s init failed.",m_name);
|
|
// init the rdb
|
|
if ( ! m_rdb.init ( g_hostdb.m_dir ,
|
|
m_name ,
|
|
true , // dedup
|
|
-1 , // fixeddatasize is 0 since no data
|
|
4,//g_conf.m_cachedbMinFilesToMerge ,
|
|
// fix this to 15 and rely on the page cache of
|
|
// just the satellite files and the daily merge to
|
|
// keep things fast.
|
|
//15 ,
|
|
maxTreeMem ,
|
|
maxTreeNodes ,
|
|
true , //isTreeBalanced
|
|
0 , // cache mem
|
|
0 , // cache nodes
|
|
false, // true , // use half keys
|
|
false , // load cache from disk
|
|
NULL,//&m_pc ,
|
|
false , // false
|
|
false , // preload page cache
|
|
sizeof(key96_t) ,
|
|
true , // bias page cache? (true!)
|
|
true )) // is collectionless???? !!!!
|
|
return false;
|
|
|
|
// add the base since it is a collectionless rdb
|
|
return m_rdb.addRdbBase1 ( NULL );
|
|
}
|
|
/*
|
|
bool Cachedb::addColl ( char *coll, bool doVerify ) {
|
|
if ( ! m_rdb.addColl ( coll ) ) return false;
|
|
if ( ! doVerify ) return true;
|
|
// verify
|
|
if ( verify(coll) ) return true;
|
|
// if not allowing scale, return false
|
|
if ( ! g_conf.m_allowScale ) return false;
|
|
// otherwise let it go
|
|
log ( "db: Verify failed, but scaling is allowed, passing." );
|
|
return true;
|
|
}
|
|
*/
|
|
bool Cachedb::verify ( char *coll ) {
|
|
// coll is NULL here methinks
|
|
log ( LOG_DEBUG, "db: Verifying %s...",m_name );
|
|
g_threads.disableThreads();
|
|
|
|
Msg5 msg5;
|
|
Msg5 msg5b;
|
|
RdbList list;
|
|
key224_t startKey;
|
|
key224_t endKey;
|
|
startKey.setMin();
|
|
endKey.setMax();
|
|
int32_t minRecSizes = 64000;
|
|
CollectionRec *cr = g_collectiondb.getRec(coll);
|
|
|
|
if ( ! msg5.getList ( m_rdbId,//RDB_CACHEDB ,
|
|
cr->m_collnum ,
|
|
&list ,
|
|
(char*)&startKey ,
|
|
(char*)&endKey ,
|
|
minRecSizes ,
|
|
true , // includeTree ,
|
|
false , // add to cache?
|
|
0 , // max cache age
|
|
0 , // startFileNum ,
|
|
-1 , // numFiles ,
|
|
NULL , // state
|
|
NULL , // callback
|
|
0 , // niceness
|
|
false , // err correction?
|
|
NULL ,
|
|
0 ,
|
|
-1 ,
|
|
true ,
|
|
-1LL ,
|
|
&msg5b ,
|
|
true )) {
|
|
g_threads.enableThreads();
|
|
return log("db: HEY! it did not block");
|
|
}
|
|
|
|
int32_t count = 0;
|
|
int32_t got = 0;
|
|
for ( list.resetListPtr() ; ! list.isExhausted() ;
|
|
list.skipCurrentRecord() ) {
|
|
key224_t k;
|
|
list.getCurrentKey((char*)&k);
|
|
count++;
|
|
uint32_t shardNum = getShardNum ( m_rdbId , &k );//RDB_CACHEDB
|
|
if ( shardNum == getMyShardNum() ) got++;
|
|
}
|
|
if ( got != count ) {
|
|
log ("db: Out of first %" INT32 " records in %s , "
|
|
"only %" INT32 " belong to our group.",count,m_name,got);
|
|
|
|
/*
|
|
// repeat with log
|
|
for ( list.resetListPtr() ; ! list.isExhausted() ;
|
|
list.skipCurrentRecord() ) {
|
|
|
|
key224_t k;
|
|
list.getCurrentKey((char*)&k);
|
|
uint32_t shardNum = getShardNum ( RDB_CACHEDB , &k );
|
|
int32_t groupNum = g_hostdb.getGroupNum(groupId);
|
|
uint32_t sh32 ;
|
|
sh32 = g_cachedb.getLinkeeSiteHash32_uk(&k);
|
|
uint16_t sh16 = sh32 >> 19;
|
|
log("db: sh16=0x%"XINT32" group=%"INT32"",
|
|
(int32_t)sh16,groupNum);
|
|
}
|
|
*/
|
|
|
|
|
|
// exit if NONE, we probably got the wrong data
|
|
if ( got == 0 ) log("db: Are you sure you have the "
|
|
"right "
|
|
"data in the right directory? "
|
|
"Exiting.");
|
|
log ( "db: Exiting due to inconsistency.");
|
|
g_threads.enableThreads();
|
|
return g_conf.m_bypassValidation;
|
|
}
|
|
log ( LOG_DEBUG, "db: %s passed verification successfully for "
|
|
"%" INT32 " recs.", m_name,count );
|
|
// DONE
|
|
g_threads.enableThreads();
|
|
return true;
|
|
}
|
|
|