144 lines
4.3 KiB
C++
144 lines
4.3 KiB
C++
// Matt Wells, copyright Oct 2001
|
|
|
|
// . used for looking up IndexLists for queries
|
|
// . call init() to get initial read info per IndexList (1 per termId in query)
|
|
// . call update() to update read info for next read of lists
|
|
// . use getStartKey() , getEndKey(), getNumRecsToRead() to extract read info
|
|
// . tries to keep the amount of reading to a minimal
|
|
// . if # of results is not achieved the call update() to get read info for
|
|
// another read to hopefully get the # of requested docIds
|
|
|
|
#ifndef _INDEXREADINFO_H_
|
|
#define _INDEXREADINFO_H_
|
|
|
|
#include "Query.h" // MAX_QUERY_TERMS
|
|
#include "IndexList.h"
|
|
#include "Titledb.h"
|
|
#include "Indexdb.h"
|
|
|
|
// how many tiered might we break an indexlist into?
|
|
#define MAX_TIERS 3
|
|
|
|
// . define read sizes of each stage
|
|
// . each docid is 6 bytes, but first is 12
|
|
// . stage0 was 5000, but made it 8000 for trek today,
|
|
// . let's see how the powers of ten perform
|
|
#define STAGE0 (10000 *6)
|
|
#define STAGE1 (100000 *6)
|
|
#define STAGE2 (1000000 *6)
|
|
#define STAGESUM (STAGE0 + STAGE1 + STAGE2) // + STAGE3)
|
|
|
|
class IndexReadInfo {
|
|
|
|
public:
|
|
|
|
// just sets m_numLists to 0
|
|
IndexReadInfo();
|
|
|
|
// . this will calculate minStartKey and maxEndKey for each termId
|
|
// . does not copy these, so don't trash this stack
|
|
// . "stage0" is the first # of docIds to read from each IndexList
|
|
// -- dynamic truncation
|
|
void init ( Query *q ,
|
|
int64_t *termFreqs ,
|
|
int32_t docsWanted , char callNum , int32_t stage0 ,
|
|
int32_t *tierStage ,
|
|
bool useDateLists ,
|
|
bool sortByDate ,
|
|
uint32_t date1 ,
|
|
uint32_t date2 ,
|
|
bool isDebug );
|
|
|
|
// . this updates the start keys and docsToRead for each list
|
|
// in preparation for another read
|
|
// . call this after you've done a read and called
|
|
// IndexTable::addLists() so it can hash them and calculate the #
|
|
// of results it got
|
|
// . it advances m_startKey[i] to lastKey + 1 in lists[i]
|
|
void update ( IndexList *lists , int32_t numLists , char callNum );
|
|
|
|
void update2 ( int32_t tier ) ;
|
|
|
|
/* void updateForMsg3b ( char *lastParts,
|
|
int64_t *termFreqs,
|
|
int32_t numLists );*/
|
|
|
|
void update ( int64_t *termFreqs,
|
|
int32_t numLists,
|
|
char callNum );
|
|
|
|
// update without the full lists, just the last part and size
|
|
void update ( char *lastParts,
|
|
int32_t *listSizes,
|
|
int32_t numLists );
|
|
|
|
// call this after calling update to determine read info per list
|
|
char *getStartKeys ( ) { return (char *)m_startKeys ; };
|
|
char *getEndKeys ( ) { return (char *)m_endKeys ; };
|
|
char getIgnored ( int32_t i ) { return m_ignore[i] ; };
|
|
|
|
char getHalfKeySize( ) { return m_hks ; };
|
|
|
|
// getting info directly, like above
|
|
int32_t getReadSize ( int32_t i ) { return m_readSizes[i]; };
|
|
|
|
int32_t *getReadSizes( ) { return m_readSizes; };
|
|
|
|
// . did we get the # of required results
|
|
// . or are all our lists exhausted?
|
|
// . call only AFTER calling update() above
|
|
bool isDone ( ) { return m_isDone ; };
|
|
|
|
// call only after calling init() to estimate # of results
|
|
int64_t getEstimatedTotalHits();
|
|
|
|
int32_t getNumLists () { return m_numLists; };
|
|
|
|
int32_t getStage0Default ( ) ;
|
|
|
|
private:
|
|
|
|
// . reading positions to read next portion of each list
|
|
// . set initially by init()
|
|
// . updated by addLists
|
|
// . might read one list multiple tims if we don't get enough hits
|
|
//key_t m_startKeys [ MAX_QUERY_TERMS ];
|
|
//key_t m_endKeys [ MAX_QUERY_TERMS ];
|
|
//key128_t m_startKeys2 [ MAX_QUERY_TERMS ];
|
|
//key128_t m_endKeys2 [ MAX_QUERY_TERMS ];
|
|
char m_startKeys [ MAX_QUERY_TERMS * MAX_KEY_BYTES ];
|
|
char m_endKeys [ MAX_QUERY_TERMS * MAX_KEY_BYTES ];
|
|
// how many docIds/recs/keys should we read?
|
|
int32_t m_readSizes [ MAX_QUERY_TERMS ];
|
|
char m_ignore [ MAX_QUERY_TERMS ];
|
|
|
|
// . the query we're doing
|
|
// . the above arrays are 1-1 with the arrays in m_q, 1 for each termId
|
|
Query *m_q;
|
|
|
|
// how many index lists we're reading
|
|
int32_t m_numLists;
|
|
|
|
// may be set to true after update() is called
|
|
bool m_isDone;
|
|
|
|
// . for dynamic truncation, first # of docs to read from each list
|
|
// . stages can now be set dynamically on a per query basis
|
|
int32_t m_stage[MAX_TIERS];
|
|
//int32_t m_stageSum;
|
|
|
|
char m_ks;
|
|
char m_hks;
|
|
char m_useDateLists;
|
|
char m_sortByDate;
|
|
uint32_t m_date1;
|
|
uint32_t m_date2;
|
|
|
|
bool m_isDebug;
|
|
};
|
|
|
|
#endif
|
|
|
|
|
|
|