482 lines
18 KiB
C++
482 lines
18 KiB
C++
// Matt Wells, Copyright, Apr 2001
|
|
|
|
// . an UNbalanced b-tree for storing keys in memory
|
|
// . UNbalanced because it's slow to balance and input should be quite random
|
|
// . if the tree becomes highly unbalanced we could just create a new tree
|
|
// and insert things at random into there, but this should we a rarity
|
|
// . we store a -1 in the parent field for nodes that were deleted so if you
|
|
// were to dump the nodes out unordered you'd know where the deleted nodes
|
|
// were
|
|
// . we store "m_emptyNode" in the left kid field of deleted nodes and then
|
|
// assign m_emptyNode to that deleted node's node # so you can re-use them
|
|
// in a linked-list type fashion
|
|
// . "m_minUsedNode" is the max node # ever occupied. it's used so we know the
|
|
// limits of a dump, if we were to dump the nodes out unordered
|
|
// . NOTE: i changed m_maxNode to m_minUnusedNode for clarity
|
|
// . "m_numNodes" is the total (used/unused) # of nodes. this can be grown
|
|
// if we run outta room
|
|
|
|
// . RdbTree(btree) vs. a hash table
|
|
// . 1. does not need to rehash
|
|
// . 2. does not need to sort before dump (uses getNextNode())
|
|
// . 3. takes log(N) to add/get/delete plus lotsa balancing overhead
|
|
// . 4. has 3 int32_ts overhead per node as opposed to 1 for hash table
|
|
// . 4. has 3 int32_ts overhead per node as opposed to 1 for hash table
|
|
// . 5. can do key-range lookups very quickly (hash table can't do this at all)
|
|
|
|
// TODO: use an RdbNode class so we don't have to perform as many
|
|
// random memory accesses which are somewhat slow
|
|
|
|
// What good is just storing keys in this db? What about the data?
|
|
// You can store your data with the keys if it fits in the key size.
|
|
// However, large amounts of data per key are better stored separately because
|
|
// the key files are continually merged and sorted. Carrying around extra
|
|
// weight during these merging processes just slows things down and takes up
|
|
// more disk space. Use the "cdb" database to couple your data with your
|
|
// keys for you. It is much better suited to handling data of variable length.
|
|
|
|
#ifndef _RDBTREE_H_
|
|
#define _RDBTREE_H_
|
|
|
|
#include "Mem.h" // for g_mem.calloc and g_mem.malloc
|
|
#include "BigFile.h" // for saving and loading the tree
|
|
#include "RdbList.h"
|
|
#include "BigFile.h"
|
|
#include "RdbMem.h"
|
|
|
|
class RdbTree {
|
|
|
|
// . this RdbCache class caches scans from a startKey to an endKey
|
|
// . it adds an m_endKeys,m_next,m_prev,m_time to each node
|
|
friend class RdbCache;
|
|
|
|
public:
|
|
|
|
RdbTree ( );
|
|
~RdbTree ( );
|
|
|
|
// . Rdb uses this to determine when to dump this tree to disk
|
|
// . look at % of memory occupied/allocated of max, as well as % of
|
|
// nodes used
|
|
bool is90PercentFull ( ) {
|
|
// . m_memOccupied is amount of alloc'd mem that data occupies
|
|
// . now we /90 and /100 since multiplying overflowed
|
|
//if ( m_memOccupied /90 >= m_maxMem /100 ) return true;
|
|
if ( m_numUsedNodes/90 >= m_numNodes/100 ) return true;
|
|
return false;
|
|
// if we have data-less records (just keys) then we're not
|
|
// full so return false
|
|
//if ( m_fixedDataSize == 0 ) return false;
|
|
// how much space does avg rec take up?
|
|
//int32_t avgDataSize ;
|
|
//avgDataSize = m_memAlloced - ( m_numUsedNodes * m_overhead );
|
|
//avgDataSize / m_numUsedNodes;
|
|
// how much space is left
|
|
// are we at 90% capacity?
|
|
// otherwise we should check mem alloc'd as well
|
|
//if ( m_memAlloced *100 >= m_maxMem *90 ) return true;
|
|
};
|
|
|
|
bool isFull ( ) { return (m_numUsedNodes >= m_numNodes); };
|
|
|
|
bool hasRoomForKeys ( int32_t nk ) {
|
|
return (m_numUsedNodes + nk <= m_numNodes); };
|
|
|
|
// . a fixedDataSize of -1 means each node has data of a variable size
|
|
// . set maxMem to -1 for no max
|
|
// . returns false & sets errno if fails to alloc "maxNumNodes" nodes
|
|
bool set ( int32_t fixedDataSize , int32_t maxNumNodes ,
|
|
bool doBalancing , int32_t maxMem , bool ownData ,
|
|
char *allocName ,
|
|
bool dataInPtrs = false ,
|
|
char *dbname = NULL , char keySize = 12 ,
|
|
bool useProtection = false ,
|
|
bool allowDups = false ,
|
|
char rdbId = -1 );
|
|
|
|
// . frees the used memory, etc.
|
|
// . override so derivatives can free up extra header arrays
|
|
void reset ( );
|
|
|
|
// . this just makes all the nodes available for occupation (liberates)
|
|
// . it does not free this tree's control structures
|
|
// . returns # of occupied nodes we liberated
|
|
int32_t clear ( );
|
|
|
|
// remove recs from tree that have invalid collnums. this is done
|
|
// at load time. i dunno why it happens. it should never!
|
|
void cleanTree ( ); // char **bases );
|
|
|
|
void delColl ( collnum_t collnum ) ;
|
|
|
|
// . this will overwrite nodes with the same key
|
|
// . returns -1 if it couldn't grab the memory or grow the table
|
|
// . returns the node # we added it to on success
|
|
// . don't free your data because we don't copy it!
|
|
// . sets errno if it returns -1
|
|
//int32_t addNode ( collnum_t collnum , key_t key, char *data,
|
|
int32_t addKey ( void *key ) {
|
|
return addNode ( 0,(char *)key,NULL,0);};
|
|
int32_t addNode ( collnum_t collnum , char *key, char *data,
|
|
int32_t dataSize );
|
|
int32_t addNode ( collnum_t collnum , key_t key, char *data,
|
|
int32_t dataSize ) {
|
|
return addNode(collnum,(char *)&key,data,dataSize);};
|
|
int32_t addNode ( collnum_t collnum , char *key ) { //key_t &key ) {
|
|
return addNode ( collnum , key , NULL , 0 ); };
|
|
|
|
// . returns -1 if not found
|
|
// . otherwise return the node #
|
|
int32_t getNode ( collnum_t collnum , char *key ); //key_t &key );
|
|
int32_t getNode ( collnum_t collnum , key_t &key ) {
|
|
return getNode(collnum,(char *)&key);};
|
|
|
|
// . get the node's data directly
|
|
char *getData ( collnum_t collnum , char *key ); // key_t &key ) ;
|
|
|
|
// . get the node whose key is >= key
|
|
// . much much slower than getNextNode() below
|
|
int32_t getNextNode ( collnum_t collnum , char *key ); // key_t &key );
|
|
int32_t getNextNode ( collnum_t collnum , key_t &key ) {
|
|
return getNextNode ( collnum, (char *)&key); };
|
|
|
|
// . get the next node # AFTER "node" by key
|
|
// . used for dumping out the nodes ordered by their keys
|
|
// . returns -1 on end
|
|
int32_t getNextNode ( int32_t node );
|
|
|
|
int32_t getFirstNode ( );
|
|
int32_t getLastNode ( );
|
|
|
|
int32_t getFirstNode2 ( collnum_t collnum );
|
|
|
|
// . get the node whose key is <= "key"
|
|
int32_t getPrevNode ( collnum_t collnum , char *key ); // key_t &key );
|
|
int32_t getPrevNode ( collnum_t collnum , key_t &key ) {
|
|
return getPrevNode(collnum,(char *)&key);};
|
|
|
|
// . get the prev node # whose key is <= to key of node #i
|
|
int32_t getPrevNode ( int32_t i ) ;
|
|
|
|
// returns the node # with the lowest key, -1 if no nodes in tree
|
|
int32_t getLowestNode ( ) ;
|
|
|
|
// . returns true iff was found and deleted
|
|
// . returns false iff not found
|
|
// . frees m_data[node] if freeIt is true
|
|
void deleteNode3 ( int32_t node , bool freeData );
|
|
//int32_t deleteNode ( collnum_t collnum , key_t &key,bool freeData) ;
|
|
int32_t deleteNode ( collnum_t collnum , char *key , bool freeData ) ;
|
|
int32_t deleteNode ( collnum_t collnum , key_t &key , bool freeData) {
|
|
return deleteNode ( collnum , (char *)&key , freeData ); };
|
|
|
|
// delete all nodes with keys in [startKey,endKey]
|
|
void deleteNodes ( collnum_t collnum ,
|
|
key_t startKey , key_t endKey , bool freeData ) {
|
|
deleteNodes(collnum,(char *)&startKey,(char *)&endKey,
|
|
freeData); };
|
|
|
|
void deleteNodes ( collnum_t collnum ,
|
|
//key_t startKey , key_t endKey , bool freeData );
|
|
char *startKey , char *endKey , bool freeData );
|
|
|
|
// . delete all records in this list from the tree
|
|
// . call deleteNode()
|
|
// . when deleting lists from spiderdb this seems to take a LONG time
|
|
// so to make it faster set doBalancing to false. at least nodes
|
|
// being added after this will still be balanced
|
|
// . returns false if a key in list was not found
|
|
// . this happens if memory is corrupted!
|
|
bool deleteList ( collnum_t collnum ,
|
|
RdbList *list , bool doBalancing ); //= true );
|
|
|
|
bool deleteKeys ( collnum_t collnum , char *keys , int32_t numKeys );
|
|
|
|
// . if the list's keys are ordered from smallest to largest
|
|
// this acts just like deleteList() above, but saves time by
|
|
// using getNextNode() rather than lookup each key from root of tree
|
|
void deleteOrderedList ( collnum_t collnum ,
|
|
RdbList *list , bool doBalancing ); //= true);
|
|
|
|
// since our arrays aren't public
|
|
char *getData ( int32_t node ) { return m_data [node]; };
|
|
int32_t getDataSize ( int32_t node ) { return m_sizes [node]; };
|
|
//key_t getKey ( int32_t node ) { return m_keys [node]; };
|
|
char *getKey ( int32_t node ) { return &m_keys [node*m_ks]; };
|
|
int32_t getParentNum ( int32_t node ) { return m_parents [node]; };
|
|
|
|
collnum_t getCollnum ( int32_t node ) { return m_collnums [node];};
|
|
|
|
bool isEmpty ( int32_t node ) { return (m_parents [ node ] == -2);};
|
|
|
|
// an upper bound on the # of used nodes
|
|
int32_t getNumNodes ( ) { return m_minUnusedNode; };
|
|
|
|
int32_t getNumUsedNodes ( ) { return m_numUsedNodes; };
|
|
|
|
bool isEmpty ( ) { return (m_numUsedNodes == 0); };
|
|
|
|
int32_t getNumAvailNodes ( ) { return m_numNodes - m_numUsedNodes; };
|
|
|
|
int32_t getNumTotalNodes ( ) { return m_numNodes; };
|
|
|
|
// negative and positive counts
|
|
int32_t getNumNegativeKeys ( ) { return m_numNegativeKeys; };
|
|
int32_t getNumPositiveKeys ( ) { return m_numPositiveKeys; };
|
|
|
|
int32_t getNumNegativeKeys ( collnum_t collnum ) ;
|
|
int32_t getNumPositiveKeys ( collnum_t collnum ) ;
|
|
|
|
void setNumKeys ( class CollectionRec *cr ) ;
|
|
|
|
// how much mem, including data, is used by this class?
|
|
int32_t getMemAlloced ( ) { return m_memAlloced; };
|
|
// . how much of the alloc'd mem is actually in use holding data
|
|
// . includes the tree infrastructure as well as the data itself
|
|
int32_t getMemOccupied ( ) { return m_memOccupied; };
|
|
int32_t getMaxMem ( ) { return m_maxMem; };
|
|
|
|
// . like getMemOccupied() above but does not include left/right/parent
|
|
// . only includes occupied keys/sizes and the dataSizes themself
|
|
int32_t getMemOccupiedForList2 ( collnum_t collnum ,
|
|
//key_t startKey ,
|
|
//key_t endKey ,
|
|
char *startKey ,
|
|
char *endKey ,
|
|
int32_t minRecSizes ,
|
|
int32_t niceness ) ;
|
|
|
|
// how much mem the tree would take if it were made into a list
|
|
int32_t getMemOccupiedForList ( );
|
|
|
|
// . how much mem does this tree use, not including stored data
|
|
// . this will be the same as getMemAlloced() if fixedDataSize is 0
|
|
int32_t getTreeOverhead() { return m_overhead * m_numNodes; };
|
|
|
|
// . throw all the records in this range into this list
|
|
// . used for dumping to an rdb file permanently
|
|
// . sets list->m_lastKey to last key inserted into the list
|
|
// . list->m_lastKey will not be valid if list is empty
|
|
// . returns false if outta memory
|
|
// . "antiNumRecs" is set to # of keys w/ low bit cleared (antiKeys)
|
|
// that were added to "list"
|
|
bool getList ( collnum_t collnum ,
|
|
//key_t startKey ,
|
|
//key_t endKey ,
|
|
char *startKey ,
|
|
char *endKey ,
|
|
int32_t minRecSizes ,
|
|
RdbList *list ,
|
|
int32_t *numPosRecs ,
|
|
int32_t *numNegRecs , // = NULL
|
|
bool useHalfKeys , // = false
|
|
// RdbDump calls with niceness 1 since
|
|
// getMemOccupiedForList2() takes some time!
|
|
int32_t niceness = 0 );
|
|
|
|
bool getList ( collnum_t collnum ,
|
|
key_t startKey ,
|
|
key_t endKey ,
|
|
int32_t minRecSizes ,
|
|
RdbList *list ,
|
|
int32_t *numPosRecs ,
|
|
int32_t *numNegRecs , // = NULL
|
|
bool useHalfKeys ) { // = false
|
|
return getList(collnum,(char *)&startKey,(char *)&endKey,
|
|
minRecSizes,list,numPosRecs,numNegRecs,
|
|
useHalfKeys);};
|
|
|
|
// . don't order by keys, order by node #
|
|
// . used for saving a tree to disk temporarily so it can be re-loaded
|
|
// w/o totally unbalancing the tree
|
|
// . sets "*lastNode" to last node # inserted into the list
|
|
//bool getListUnordered ( int32_t startNode ,
|
|
// int32_t minRecSizes ,
|
|
// RdbList *list , int32_t *lastNode ) ;
|
|
|
|
// estimate the size of the list defined by these keys
|
|
int32_t getListSize ( collnum_t collnum ,
|
|
//key_t startKey ,key_t endKey ,
|
|
//key_t *minKey , key_t *maxKey );
|
|
char *startKey , char *endKey ,
|
|
char *minKey , char *maxKey );
|
|
|
|
// how balanced is this tree? = #nodes w/ right kids / # node w/ left
|
|
// the multiplied by 100. invereted to make smaller than 100.
|
|
int32_t getBalancePercent();
|
|
|
|
// . load & save the tree quickly
|
|
// . returns false on error, true otherwise
|
|
// . sometimes sets g_errno when it returns false
|
|
bool fastLoad ( BigFile *f , RdbMem *memStack ) ;
|
|
// . we now optionally save with a thread
|
|
// . when saving m_isSaving is set to true and nothing can be added
|
|
// to the tree, g_errno will be set to ETRYAGAIN when addNode()
|
|
// is called
|
|
// . returns false if blocked, true otherwise
|
|
// . sets g_errno on error
|
|
bool fastSave ( char *dir ,
|
|
char *dbname ,
|
|
bool useThread ,
|
|
void *state ,
|
|
void (* callback)(void *state ) );
|
|
// this is called by a thread
|
|
bool fastSave_r() ;
|
|
|
|
// to fix lar's machine
|
|
bool oldLoad ( BigFile *f , RdbMem *memStack ) ;
|
|
int32_t oldLoadBlock ( BigFile *f, int32_t remainingNodes , RdbMem *stack,
|
|
int64_t offset );
|
|
|
|
int32_t getMinUnusedNode () { return m_minUnusedNode; };
|
|
|
|
bool checkTree ( bool printMsgs , bool doChainTest );
|
|
bool checkTree2 ( bool printMsgs , bool doChainTest );
|
|
bool fixTree ( );
|
|
|
|
// all except the data: the keys,dataPtr,size,left,right,parents,depth
|
|
int32_t getRecOverhead () { return m_overhead; };
|
|
|
|
// Rdb which contains this class calls this to prevent swap-out once
|
|
// per minute or so
|
|
int32_t scanMem ( ) ;
|
|
|
|
void disableWrites () { m_isWritable = false; };
|
|
void enableWrites () { m_isWritable = true ; };
|
|
|
|
// can we write to the tree?
|
|
bool m_isWritable;
|
|
// . this stuff is accessed by thread an must be public
|
|
// . cannot add to tree when saving
|
|
bool m_isSaving;
|
|
|
|
int32_t m_gettingList;
|
|
|
|
// loading?
|
|
bool m_isLoading;
|
|
|
|
// true if tree was modified and needs to be saved
|
|
bool m_needsSave;
|
|
// need to pass this file to the fastSave() thread
|
|
//BigFile *m_saveFile;
|
|
char m_rdbId;
|
|
//char m_isRealTree;
|
|
char m_dir[128];
|
|
char m_dbname[32];
|
|
char m_memTag[16];
|
|
|
|
// this callback called when fastSave is complete
|
|
void *m_state;
|
|
void (* m_callback) (void *state );
|
|
|
|
int64_t getBytesWritten ( ) { return m_bytesWritten; };
|
|
int64_t getBytesRead ( ) { return m_bytesRead ; };
|
|
|
|
// private:
|
|
|
|
// used by fastSave() and fastLoad()
|
|
int32_t fastSaveBlock_r ( int fd ,
|
|
int32_t start ,
|
|
int64_t offset ) ;
|
|
int32_t fastLoadBlock ( BigFile *f ,
|
|
int32_t start ,
|
|
int32_t totalNodes ,
|
|
RdbMem *stack ,
|
|
int64_t offset );
|
|
|
|
void setDepths ( int32_t bottomNode );
|
|
int32_t rotateRight ( int32_t pivotNode );
|
|
int32_t rotateLeft ( int32_t pivotNode );
|
|
int32_t rotate ( int32_t pivotNode , int32_t *lefts , int32_t *rights );
|
|
int32_t computeDepth ( int32_t headNode );
|
|
// is this tree a balanced binary tree?
|
|
bool m_doBalancing;
|
|
|
|
// used by getListSize() to estiamte a list size
|
|
//int32_t getOrderOfKey ( collnum_t collnum , key_t key , key_t *retKey );
|
|
int32_t getOrderOfKey ( collnum_t collnum , char *key , char *retKey );
|
|
// used by getrderOfKey() (have to estimate if tree not balanced)
|
|
int32_t getTreeDepth ();
|
|
|
|
// . returns true if tree doesn't need to grow/shrink
|
|
// . re-allocs the m_keys,m_data,m_sizes,m_leftNodes,m_rightNodes
|
|
// . used for growing AND shrinking the table
|
|
bool growTree ( int32_t newNumNodes , int32_t niceness );
|
|
|
|
// are we responsible for freeing nodes' data
|
|
bool m_ownData;
|
|
|
|
// true if the m_data[i] ptrs are not really ptrs
|
|
bool m_dataInPtrs;
|
|
|
|
// each node/node in the tree has these datum:
|
|
collnum_t *m_collnums; // each key now has a collection number
|
|
//key_t *m_keys; // 96bits each (3 int32_ts)
|
|
char *m_keys; // X bytes each
|
|
char **m_data; // NULL iff m_dataSize is 0
|
|
int32_t *m_sizes; // NULL iff m_dataSize is 0
|
|
int32_t *m_left; // left kid of this node in the tree
|
|
int32_t *m_right; // right kid of this node in the tree
|
|
int32_t *m_parents; // parent of this node - for getNextNode()
|
|
char *m_depth; // depth of this node (used iff m_doBalancing)
|
|
int32_t m_numNodes; // how many we have, empty or full
|
|
int32_t m_numUsedNodes; // how many of those are used? (full)
|
|
// negative and positive key counts
|
|
int32_t m_numNegativeKeys;
|
|
int32_t m_numPositiveKeys;
|
|
// memory overhead per node (excluding data)
|
|
int32_t m_overhead;
|
|
// switch between picking left and right kids to replace deleted nodes
|
|
// in order to keep the tree more balanced
|
|
char m_pickRight;
|
|
// the node at the top of the tree
|
|
int32_t m_headNode;
|
|
// total mem this tree is using (including data that nodes point to)
|
|
int32_t m_memAlloced;
|
|
// total amount of m_memAlloced that is occupied
|
|
int32_t m_memOccupied;
|
|
// max limit of m_memAlloced
|
|
int32_t m_maxMem;
|
|
// mem allocated for overhead of tree structure
|
|
int32_t m_baseMem;
|
|
// -1 means any dataSize, otherwise, it's fixed to this
|
|
int32_t m_fixedDataSize;
|
|
// node of the next available/empty node
|
|
int32_t m_nextNode;
|
|
// maximum node # that was ever used at some point in time
|
|
int32_t m_minUnusedNode;
|
|
|
|
char *m_allocName;
|
|
|
|
// so we can save the tree within a file that has other stuff
|
|
int64_t m_bytesWritten;
|
|
int64_t m_bytesRead;
|
|
|
|
int32_t m_saveErrno;
|
|
char m_ks;
|
|
|
|
bool m_useProtection;
|
|
bool m_isProtected;
|
|
void protect () {
|
|
if ( m_isProtected ) return;
|
|
m_isProtected = true;
|
|
protect ( PROT_READ ); };
|
|
void unprotect () {
|
|
if ( ! m_isProtected ) return;
|
|
m_isProtected = false;
|
|
protect ( PROT_READ | PROT_WRITE ); };
|
|
void protect ( int prot );
|
|
void gbmprotect ( void *p , int32_t size , int prot );
|
|
|
|
bool m_allowDups;
|
|
|
|
//char m_countsInitialized;
|
|
|
|
int32_t m_corrupt;
|
|
|
|
//int32_t m_numPosKeysPerColl[MAX_COLL_RECS];
|
|
//int32_t m_numNegKeysPerColl[MAX_COLL_RECS];
|
|
};
|
|
|
|
#endif
|