privacore-open-source-searc.../Rdb.h

327 lines
9.9 KiB
C
Raw Normal View History

2013-08-02 16:12:24 -04:00
// Matt Wells, copyright Sep 2000
// contains one RdbBase for each collection
2016-03-08 16:14:30 -05:00
#ifndef GB_RDB_H
#define GB_RDB_H
2013-08-02 16:12:24 -04:00
#include "RdbBase.h"
#include "RdbTree.h"
#include "RdbMem.h"
#include "RdbDump.h"
#include "RdbBuckets.h"
2016-08-05 09:42:20 -04:00
#include "RdbIndex.h"
#include "Hostdb.h"
#include "rdbid_t.h"
#include <atomic>
2013-08-02 16:12:24 -04:00
bool makeTrashDir() ;
2013-08-02 16:12:24 -04:00
// get the RdbBase class for an rdbId and collection name
2016-08-24 18:34:36 -04:00
class RdbBase *getRdbBase(rdbid_t rdbId, collnum_t collnum);
2013-08-02 16:12:24 -04:00
// maps an rdbId to an Rdb
class Rdb *getRdbFromId ( rdbid_t rdbId ) ;
2013-08-02 16:12:24 -04:00
// the reverse of the above
rdbid_t getIdFromRdb ( class Rdb *rdb ) ;
bool isSecondaryRdb ( rdbid_t rdbId ) ;
2013-08-02 16:12:24 -04:00
// get the dbname
2016-10-20 12:30:14 -04:00
const char *getDbnameFromId(rdbid_t rdbId);
//initialize all primary Rdbs
bool initialiseAllPrimaryRdbs();
2013-08-02 16:12:24 -04:00
// size of keys
2016-08-24 18:37:27 -04:00
char getKeySizeFromRdbId(rdbid_t rdbId);
2013-08-02 16:12:24 -04:00
// and this is -1 if dataSize is variable
2017-03-22 06:35:45 -04:00
int32_t getDataSizeFromRdbId ( rdbid_t rdbId );
2016-08-24 09:09:16 -04:00
void forceMergeAll(rdbid_t rdbId);
2013-08-02 16:12:24 -04:00
// main.cpp calls this
void attemptMergeAllCallback ( int fd , void *state ) ;
void attemptMergeAll ( );
2013-08-02 16:12:24 -04:00
class Rdb {
public:
2013-08-02 16:12:24 -04:00
Rdb ( );
~Rdb ( );
2016-03-15 09:04:30 -04:00
bool addRdbBase1 ( const char *coll );
2017-03-22 06:51:47 -04:00
2016-03-15 09:04:30 -04:00
bool delColl ( const char *coll );
2013-08-02 16:12:24 -04:00
2013-12-08 00:12:48 -05:00
bool resetBase ( collnum_t collnum );
bool deleteAllRecs ( collnum_t collnum ) ;
bool deleteColl ( collnum_t collnum , collnum_t newCollnum ) ;
2013-10-18 18:21:00 -04:00
bool init ( const char *dbname , // "indexdb","tagdb",...
2014-11-10 17:45:11 -05:00
int32_t fixedDataSize , //= -1 ,
int32_t minToMerge , //, //= 2 ,
int32_t maxTreeMem , //= 1024*1024*32 ,
int32_t maxTreeNodes ,
2013-08-02 16:12:24 -04:00
bool useHalfKeys ,
char keySize,
bool useIndexFile);
2017-04-03 06:21:02 -04:00
2016-08-01 11:21:13 -04:00
bool needsSave() const;
2013-08-02 16:12:24 -04:00
// . returns false and sets g_errno on error
// . caller should retry later on g_errno of ENOMEM or ETRYAGAIN
// . returns the node # in the tree it added the record to
// . key low bit must be set (otherwise it indicates a delete)
/**
* @note special behaviour when index is enabled. if corresponding opposite key is found in tree/bucket,
* it is assumed that we want the opposite key deleted.
* eg:
* positive key in tree, negative key passed to addRecord (positive key deleted)
* negative key in tree, positive key passed to addRecord (negative key deleted)
*
* the reason for this is so that we can eliminate negative key in tree
* (negative key should never be there when index is used, unless for special reasons. eg: posdb for deleted document)
*
* the scenario where we want to eliminate a negative key is when we deleted a document, and then we respider successfully
*/
bool addRecord(collnum_t collnum, const char *key, const char *data, int32_t dataSize);
2013-08-02 16:12:24 -04:00
// returns false if no room in tree or m_mem for a list to add
2017-03-22 06:51:33 -04:00
bool hasRoom(int32_t numRecs, int32_t dataSize) const;
2013-08-02 16:12:24 -04:00
// . returns false on error and sets errno
// . return true on success
// . if we can't handle all records in list we don't add any and
// set errno to ETRYAGAIN or ENOMEM
// . we copy all data so you can free your list when we're done
bool addList(collnum_t collnum, RdbList *list) {
return addList(collnum,list,true);
}
bool addListNoSpaceCheck(collnum_t collnum, RdbList *list) {
return addList(collnum,list,false);
}
2013-08-02 16:12:24 -04:00
bool deleteTreeNode(collnum_t collnum, const char *key);
void verifyTreeIntegrity();
2016-08-01 11:21:13 -04:00
bool isSecondaryRdb() const {
2017-03-22 06:35:45 -04:00
return ::isSecondaryRdb(m_rdbId);
}
2014-01-15 18:42:59 -05:00
2016-08-01 11:21:13 -04:00
bool isInitialized() const { return m_initialized; }
2016-08-01 11:21:13 -04:00
int32_t getFixedDataSize() const { return m_fixedDataSize; }
2013-08-02 16:12:24 -04:00
2016-08-01 11:21:13 -04:00
bool useHalfKeys() const { return m_useHalfKeys; }
char getKeySize() const { return m_ks; }
2016-08-05 06:12:22 -04:00
int32_t getPageSize() const { return m_pageSize; }
2013-08-02 16:12:24 -04:00
bool isTitledb() const { return m_rdbId==RDB_TITLEDB || m_rdbId==RDB2_TITLEDB2; }
RdbTree* getTree() { return (m_useTree ? &m_tree : NULL); }
RdbBuckets *getBuckets() { return (m_useTree ? NULL : &m_buckets); }
2016-10-17 08:12:25 -04:00
int32_t getAvailMem() const { return m_mem.getAvailMem(); }
int32_t getUsedMem() const { return m_mem.getUsedMem(); }
2016-08-01 11:21:13 -04:00
bool useTree() const { return m_useTree;}
2013-08-02 16:12:24 -04:00
2016-06-14 08:06:37 -04:00
int32_t getNumUsedNodes() const;
2016-08-01 11:21:13 -04:00
int32_t getMaxTreeMem() const;
int32_t getTreeMemOccupied() const;
int32_t getTreeMemAllocated() const;
2016-08-01 11:21:13 -04:00
int32_t getNumNegativeKeys() const;
2016-08-24 12:04:06 -04:00
void cleanTree();
2013-08-02 16:12:24 -04:00
2016-10-17 06:29:51 -04:00
RdbBase *getBase(collnum_t collnum );
const RdbBase *getBase(collnum_t collnum ) const { return const_cast<Rdb*>(this)->getBase(collnum); }
int32_t getNumBases() const;
2013-08-02 16:12:24 -04:00
// how much mem is allocated for our maps?
2016-10-17 06:29:51 -04:00
int64_t getMapMemAllocated() const;
2013-08-02 16:12:24 -04:00
2016-10-17 06:29:51 -04:00
int32_t getNumFiles() const;
2013-08-02 16:12:24 -04:00
// sum of all parts of all big files
2016-10-17 06:29:51 -04:00
int32_t getNumSmallFiles() const;
int64_t getDiskSpaceUsed() const;
2013-08-02 16:12:24 -04:00
// use the maps and tree to estimate the size of this list
int64_t estimateListSize(collnum_t collnum,
const char *startKey, const char *endKey, char *maxKey,
int64_t oldTruncationLimit) const;
2013-08-02 16:12:24 -04:00
//Get list from tree or buckets. Returns true on success
bool getTreeList(RdbList *result,
collnum_t collnum,
const void *startKey, const void *endKey,
int32_t minRecSizes,
int32_t *numPositiveRecs, int32_t *numNegativeRecs,
int32_t *memUsedByTree, int32_t *numUsedNodes);
2013-08-02 16:12:24 -04:00
// positive minus negative
2016-10-17 06:29:51 -04:00
int64_t getNumTotalRecs(bool useCache = false) const;
2013-08-02 16:12:24 -04:00
2016-10-17 06:29:51 -04:00
int64_t getCollNumTotalRecs(collnum_t collnum) const; //could technically be static
2013-08-02 16:12:24 -04:00
// used for keeping track of stats
2016-08-05 06:41:40 -04:00
void didSeek() { m_numSeeks++; }
void didRead(int64_t bytes) { m_numRead += bytes; }
void didReSeek() { m_numReSeeks++; }
int64_t getNumSeeks() const { return m_numSeeks; }
int64_t getNumReSeeks() const { return m_numReSeeks; }
int64_t getNumRead() const { return m_numRead ; }
2013-08-02 16:12:24 -04:00
// net stats for "get" requests
2016-08-05 06:41:40 -04:00
void readRequestGet(int32_t bytes) { m_numReqsGet++; m_numNetReadGet += bytes; }
void sentReplyGet(int32_t bytes) { m_numRepliesGet++; m_numNetSentGet += bytes; }
int64_t getNumRequestsGet() const { return m_numReqsGet; }
int64_t getNetReadGet() const { return m_numNetReadGet; }
int64_t getNumRepliesGet() const { return m_numRepliesGet; }
int64_t getNetSentGet() const { return m_numNetSentGet; }
2013-08-02 16:12:24 -04:00
// net stats for "add" requests
2016-08-05 06:41:40 -04:00
void readRequestAdd(int32_t bytes) { m_numReqsAdd++; m_numNetReadAdd += bytes; }
void sentReplyAdd(int32_t bytes) { m_numRepliesAdd++ ; m_numNetSentAdd += bytes; }
int64_t getNumRequestsAdd() const { return m_numReqsAdd; }
int64_t getNetReadAdd() const { return m_numNetReadAdd; }
int64_t getNumRepliesAdd() const { return m_numRepliesAdd; }
int64_t getNetSentAdd() const { return m_numNetSentAdd; }
2013-08-02 16:12:24 -04:00
2016-08-24 12:04:06 -04:00
rdbid_t getRdbId() const { return m_rdbId; }
const char* getDbname() const { return m_dbname; }
2017-04-03 15:03:51 -04:00
bool isDumping() const { return m_isDumping; }
2016-08-24 12:04:06 -04:00
bool isUseIndexFile() const { return m_useIndexFile; }
2013-08-02 16:12:24 -04:00
// . you'll lose your data in this class if you call this
void reset();
2016-10-20 12:24:32 -04:00
bool isSavingTree() const;
bool saveTree(bool useThread, void *state, void (*callback)(void *state));
bool saveIndexes();
bool saveMaps();
2016-08-05 09:42:20 -04:00
2013-08-02 16:12:24 -04:00
// . load the tree named "saved.dat", keys must be out of order because
// tree is not balanced
bool loadTree ( ) ;
static bool initializeRdbDumpThread();
static void finalizeRdbDumpThread();
2017-05-08 08:10:10 -04:00
static bool hasPendingRdbDumpJob();
void submitRdbDumpJob(bool forceDump);
2013-08-02 16:12:24 -04:00
2016-08-05 06:41:40 -04:00
bool needsDump() const;
2013-08-02 16:12:24 -04:00
// these are used for computing load on a machine
2016-08-05 06:41:40 -04:00
bool isMerging() const;
2017-03-22 06:35:45 -04:00
void incrementNumMerges() { ++m_numMergesOut; }
void decrementNumMerges() { --m_numMergesOut; }
2013-08-02 16:12:24 -04:00
// PageRepair.cpp calls this when it is done rebuilding an rdb
// and wants to tell the primary rdb to reload itself using the newly
// rebuilt files, pointed to by rdb2.
bool updateToRebuildFiles ( Rdb *rdb2 , char *coll ) ;
2016-08-24 12:04:06 -04:00
private:
2017-03-22 06:51:47 -04:00
bool addRdbBase2 ( collnum_t collnum );
void addBase(collnum_t collnum, RdbBase *base);
2017-03-22 06:35:45 -04:00
// returns false if no room in tree or m_mem for a list to add
bool hasRoom(RdbList *list);
static void dumpRdb(void *item);
2017-04-26 09:56:44 -04:00
bool getTreeCollExist(collnum_t collnum) const;
// . write out tree to a file with keys in order
bool dumpTree();
bool addList(collnum_t collnum, RdbList *list, bool checkForRoom);
2016-10-17 07:00:09 -04:00
// get the directory name where this rdb stores its files
const char *getDir() const { return g_hostdb.m_dir; }
bool dumpColl(RdbBase *base);
2017-03-22 06:35:45 -04:00
// . called when we've dumped the tree to disk w/ keys ordered
void doneDumping ( );
int32_t reclaimMemFromDeletedTreeNodes();
int32_t m_lastReclaim;
2014-11-10 17:45:11 -05:00
int32_t m_fixedDataSize;
2013-08-02 16:12:24 -04:00
char m_dbname [32];
2014-11-10 17:45:11 -05:00
int32_t m_dbnameLen;
2013-08-02 16:12:24 -04:00
2016-08-05 09:42:20 -04:00
bool m_useIndexFile;
2013-08-02 16:12:24 -04:00
// for storing records in memory
RdbTree m_tree;
RdbBuckets m_buckets;
2013-08-02 16:12:24 -04:00
bool m_useTree;
2013-08-02 16:12:24 -04:00
// for dumping a table to an rdb file
RdbDump m_dump;
2013-08-02 16:12:24 -04:00
// memory for us to use to avoid calling malloc()/mdup()/...
RdbMem m_mem;
2016-10-17 06:29:51 -04:00
mutable int32_t m_cacheLastTime;
mutable int64_t m_cacheLastTotal;
2014-08-28 10:45:43 -04:00
std::atomic<int32_t> m_numMergesOut;
2014-11-10 17:45:11 -05:00
int32_t m_minToMerge; // need at least this many files b4 merging
2013-08-02 16:12:24 -04:00
2014-11-10 17:45:11 -05:00
int32_t m_dumpErrno;
2013-08-02 16:12:24 -04:00
// a dummy data string for deleting records when m_fixedDataSize > 0
// for keeping stats
std::atomic<int64_t> m_numSeeks;
std::atomic<int64_t> m_numReSeeks;
std::atomic<int64_t> m_numRead;
2013-08-02 16:12:24 -04:00
// network request/reply info for get requests
std::atomic<int64_t> m_numReqsGet;
std::atomic<int64_t> m_numNetReadGet;
std::atomic<int64_t> m_numRepliesGet;
std::atomic<int64_t> m_numNetSentGet;
2013-08-02 16:12:24 -04:00
// network request/reply info for add requests
std::atomic<int64_t> m_numReqsAdd;
std::atomic<int64_t> m_numNetReadAdd;
std::atomic<int64_t> m_numRepliesAdd;
std::atomic<int64_t> m_numNetSentAdd;
2013-08-02 16:12:24 -04:00
// . when we dump list to an rdb file, can we use short keys?
2013-08-02 16:12:24 -04:00
// . currently exclusively used by indexdb
bool m_useHalfKeys;
bool m_niceness;
char m_treeAllocName[64]; //for memory used m_tree/m_buckets
char m_memAllocName[64]; //for memory used by m_mem
2013-08-02 16:12:24 -04:00
// set to true when dumping tree so RdbMem does not use the memory
// being dumped to hold newly added records
2017-05-03 04:14:24 -04:00
std::atomic<bool> m_isDumping;
2013-08-02 16:12:24 -04:00
rdbid_t m_rdbId;
2016-08-05 06:12:22 -04:00
2013-08-02 16:12:24 -04:00
char m_ks; // key size
2014-11-10 17:45:11 -05:00
int32_t m_pageSize;
2013-08-02 16:12:24 -04:00
2014-01-15 18:42:59 -05:00
bool m_initialized;
2013-08-02 16:12:24 -04:00
};
2016-03-08 16:14:30 -05:00
#endif // GB_RDB_H