178 lines
5.4 KiB
C++
178 lines
5.4 KiB
C++
// Zak Betz, Copyright, Jun 2008
|
|
|
|
// A sorted list of sorted lists for storing rdb keys. Faster and more
|
|
// cache friendly than RdbTree. Optimized for batched adds, amortized O(1)
|
|
// add operation, O(log n) for retrival, ranged getList for k keys is
|
|
// O(log(n) + k) where as RdbTree is O(k * log(n)).
|
|
// Memory is allocated and used on a on demand basis rather than all up
|
|
// front as with RdbTree, so memory usage is much lower most of the time.
|
|
|
|
// Collections are handled as a linked list, each RdbBuckets has a nextColl
|
|
// pointer. The front bucket acts as the gatekeeper for all of the
|
|
// other buckets, Only it's values for needsSave and isWritable are
|
|
// significant
|
|
|
|
//when selecting bucketnum and also when deduping, use KEYCMPNEGEQ which
|
|
//will mask off the delbit, that way pos and neg keys will be in the same
|
|
//bucket and only the newest key will survive. When getting or deleting
|
|
//a list, use KEYCMP within a bucket and use KEYCMPNEGEQ to select the
|
|
//bucket nums. This is because iterators in rdb dump get a list, then
|
|
//add 1 to a key and get the next list and adding 1 to a pos key will get
|
|
//the negative one.
|
|
|
|
#ifndef GB_RDBBUCKETS_H
|
|
#define GB_RDBBUCKETS_H
|
|
|
|
#include <cstdint>
|
|
#include <functional>
|
|
#include <atomic>
|
|
#include "rdbid_t.h"
|
|
#include "collnum_t.h"
|
|
#include "types.h"
|
|
#include "GbMutex.h"
|
|
#include "JobScheduler.h"
|
|
|
|
class BigFile;
|
|
class RdbList;
|
|
class RdbTree;
|
|
class RdbBucket;
|
|
|
|
class RdbBuckets {
|
|
friend class RdbBucket;
|
|
|
|
public:
|
|
RdbBuckets( );
|
|
~RdbBuckets( );
|
|
void clear();
|
|
void reset();
|
|
|
|
bool set(int32_t fixedDataSize, int32_t maxMem, const char *allocName, rdbid_t rdbId, const char *dbname,
|
|
char keySize);
|
|
|
|
bool addNode(collnum_t collnum, const char *key, const char *data, int32_t dataSize);
|
|
|
|
bool getList(collnum_t collnum, const char *startKey, const char *endKey, int32_t minRecSizes, RdbList *list,
|
|
int32_t *numPosRecs, int32_t *numNegRecs, bool useHalfKeys) const;
|
|
|
|
bool deleteNode(collnum_t collnum, const char *key);
|
|
|
|
int64_t estimateListSize(collnum_t collnum, const char *startKey, const char *endKey, char *minKey, char *maxKey) const;
|
|
|
|
bool collExists(collnum_t coll) const;
|
|
|
|
// don't need to lock. only set in RdbBuckets::set
|
|
int32_t getRecSize() const { return m_recSize; }
|
|
|
|
/// @todo ALC verify saving/writable logic is okay with multithread
|
|
bool isSaving() const;
|
|
bool needsSave() const;
|
|
void setNeedsSave(bool s);
|
|
|
|
// don't need to lock. only set in RdbBuckets::set
|
|
int32_t getMaxMem() const { return m_maxMem; }
|
|
|
|
int32_t getMemAllocated() const;
|
|
bool needsDump() const;
|
|
bool hasRoom(int32_t numRecs) const;
|
|
|
|
int32_t getNumKeys() const;
|
|
int32_t getMemOccupied() const;
|
|
|
|
int32_t getNumNegativeKeys() const;
|
|
int32_t getNumPositiveKeys() const;
|
|
|
|
void cleanBuckets();
|
|
bool delColl(collnum_t collnum);
|
|
|
|
//just for this collection
|
|
int32_t getNumKeys(collnum_t collnum) const;
|
|
|
|
//DEBUG
|
|
void verifyIntegrity();
|
|
int32_t addTree(RdbTree *rt);
|
|
void printBuckets(std::function<void(const char *, int32_t)> print_fn = nullptr);
|
|
void printBucketsStartEnd();
|
|
|
|
bool testAndRepair();
|
|
|
|
//Save/Load/Dump
|
|
bool fastSave(const char *dir, bool useThread, void *state, void (*callback)(void *state));
|
|
bool loadBuckets(const char *dbname);
|
|
|
|
private:
|
|
GbMutex& getLock() { return m_mtx; }
|
|
|
|
static void saveWrapper(void *state);
|
|
static void saveDoneWrapper(void *state, job_exit_t exit_type);
|
|
|
|
void reset_unlocked();
|
|
|
|
bool resizeTable_unlocked(int32_t numNeeded);
|
|
bool selfTest_unlocked(bool thorough, bool core);
|
|
bool repair_unlocked();
|
|
|
|
bool addNode_unlocked(collnum_t collnum, const char *key, const char *data, int32_t dataSize);
|
|
|
|
bool getList_unlocked(collnum_t collnum, const char *startKey, const char *endKey, int32_t minRecSizes, RdbList *list,
|
|
int32_t *numPosRecs, int32_t *numNegRecs, bool useHalfKeys) const;
|
|
|
|
bool deleteList_unlocked(collnum_t collnum, RdbList *list);
|
|
|
|
bool delColl_unlocked(collnum_t collnum);
|
|
|
|
void addBucket_unlocked(RdbBucket *newBucket, int32_t i);
|
|
int32_t getBucketNum_unlocked(collnum_t collnum, const char *key) const;
|
|
char bucketCmp_unlocked(collnum_t acoll, const char *akey, const RdbBucket *b) const;
|
|
|
|
//syntactic sugar
|
|
RdbBucket* bucketFactory_unlocked();
|
|
void updateNumRecs_unlocked(int32_t n, int32_t bytes, int32_t numNeg);
|
|
int32_t getNumKeys_unlocked() const;
|
|
|
|
bool fastSave_unlocked();
|
|
int64_t fastSaveColl_unlocked(int fd);
|
|
bool fastLoad_unlocked(BigFile *f, const char *dbname);
|
|
int64_t fastLoadColl_unlocked(BigFile *f, const char *dbname);
|
|
|
|
mutable GbMutex m_mtx;
|
|
RdbBucket **m_buckets;
|
|
RdbBucket *m_bucketsSpace;
|
|
char *m_masterPtr;
|
|
int32_t m_masterSize;
|
|
int32_t m_firstOpenSlot; //first slot in m_bucketSpace that is available (never-used or empty)
|
|
int32_t m_numBuckets; //number of used buckets
|
|
int32_t m_maxBuckets; //current number of (pre-)allocated buckets
|
|
uint8_t m_ks;
|
|
int32_t m_fixedDataSize;
|
|
int32_t m_recSize;
|
|
int32_t m_numKeysApprox;//includes dups
|
|
int32_t m_numNegKeys;
|
|
int32_t m_maxMem;
|
|
int32_t m_maxBucketsCapacity; //max number of buckets given the memory limit
|
|
int32_t m_dataMemOccupied;
|
|
|
|
rdbid_t m_rdbId;
|
|
const char *m_dbname;
|
|
char *m_swapBuf;
|
|
char *m_sortBuf;
|
|
int32_t m_sortBufSize;
|
|
|
|
bool m_repairMode;
|
|
|
|
std::atomic<bool> m_isSaving;
|
|
// true if buckets was modified and needs to be saved
|
|
bool m_needsSave;
|
|
|
|
const char *m_dir;
|
|
void *m_state;
|
|
|
|
void (*m_callback)(void *state);
|
|
|
|
int64_t m_bytesWritten;
|
|
|
|
int32_t m_errno;
|
|
const char *m_allocName;
|
|
};
|
|
|
|
#endif // GB_RDBBUCKETS_H
|