Files
privacore-open-source-searc…/RdbMerge.h
2016-08-31 11:13:31 +02:00

130 lines
3.7 KiB
C++

// Matt Wells, copyright Sep 2000
// . TODO: fix it, include firstPass stuff
// . the merge file reads slots from 1 or more files and dumps to another
// . the merge file dumps those slots in order of keys to the destination file
// . slots with a zero slotSize will be removed
// . does not use any memory or disk space constraints (TODO)
// . the amount of memory it needs is mostly just from map file (is small)
// . disk space is relatively small to memory
// . TODO: create a static var so only one merge can happen at a time
// . on an index of 40 gigs of just key the map file can take 56 megs
// . as we're merging X files into one file we should free up the maps
// we're merging so we don't take too much memory
// . RdbMap should have a base page number, the page # of first page in it's
// m_keys/m_offsets/m_dataSizes array
// . shifting 50 megs down will take probably half a second or so but
// we do it to save memory and it should only be done every 10 megs, say
// . and we can also start benefiting from the merged files immediately in
// seek time
// . RdbScan/RdbGet are different now, we have to figure out a way to
// read in 1 meg or less (as close as we can get to 1 meg) from each
// rdb file... TODO
#ifndef GB_RDBMERGE_H
#define GB_RDBMERGE_H
#include "RdbDump.h"
#include "Msg5.h"
class RdbIndex;
class RdbMerge {
public:
RdbMerge();
~RdbMerge();
void reset();
// . selects the files to merge
// . uses keyMasks and files from the passed Rdb class
// . filter out keys where key & m_keyMask != m_maskValue
// . merge to a new file
// . new file name is stored in m_filename so Rdb can look at it
// . calls rdb->incorporateMerge() when done with merge or had error
// . "maxBufSize" is size of list to get then write (read/write buf)
bool merge(rdbid_t rdbId,
collnum_t collnum,
BigFile *target,
RdbMap *targetMap,
RdbIndex *targetIndex,
int32_t startFileNum,
int32_t numFiles,
int32_t niceness,
char keySize);
int32_t getNumThreads() const { return m_numThreads; }
bool isMerging() const { return m_isMerging; }
void setMerging(bool merging) { m_isMerging = merging; }
bool isSuspended() const { return m_isSuspended; }
bool isDumping() const { return m_dump.isDumping(); }
rdbid_t getRdbId() const { return m_rdbId; }
// suspend the merging until resumeMerge() is called
void suspendMerge();
static void unlinkPartWrapper(void *state);
static void dumpListWrapper(void *state);
static void gotListWrapper(void *state, RdbList *list, Msg5 *msg5);
static void tryAgainWrapper(int fd, void *state);
private:
bool dumpList();
bool getNextList();
bool getAnotherList();
void doneMerging();
// . return false and sets errno on error merging
// . returns true if blocked, or completed successfully
bool resumeMerge();
// . called to continue merge initialization after lock is secure
// . lock is g_isMergingLock
bool gotLock();
void doSleep();
// set to true when m_startKey wraps back to 0
bool m_doneMerging;
int32_t m_numThreads;
// . we get the units from the master and the mergees from the units
int32_t m_startFileNum;
int32_t m_numFiles;
int32_t m_fixedDataSize;
BigFile *m_target;
RdbMap *m_targetMap;
RdbIndex *m_targetIndex;
char m_startKey[MAX_KEY_BYTES];
char m_endKey[MAX_KEY_BYTES];
bool m_isMerging;
bool m_isSuspended;
bool m_isReadyToSave;
// for writing to target file
RdbDump m_dump;
// a Msg5 for getting RdbLists from disk/cache
Msg5 m_msg5;
RdbList m_list;
int32_t m_niceness;
// for getting the RdbBase class doing the merge
rdbid_t m_rdbId;
collnum_t m_collnum;
char m_ks;
};
#endif // GB_RDBMERGE_H