192 lines
5.2 KiB
C++
192 lines
5.2 KiB
C++
// Matt Wells, copyright Jul 2001
|
|
|
|
// . gets the title/summary/docLen/url results from a query
|
|
|
|
#ifndef GB_MSG40_H
|
|
#define GB_MSG40_H
|
|
|
|
#define SAMPLE_VECTOR_SIZE (32*4)
|
|
|
|
#include "SearchInput.h"
|
|
#include "Multicast.h" // multicast send
|
|
#include "Query.h" // Query::set()
|
|
#include "Msg39.h" // getTermFreqs()
|
|
#include "Msg20.h" // for getting summary from docId
|
|
#include "Msg3a.h"
|
|
#include "HashTableT.h"
|
|
#include "GbMutex.h"
|
|
|
|
// make it 2B now. no reason not too limit it so low.
|
|
#define MAXDOCIDSTOCOMPUTE 2000000000
|
|
|
|
class DocIdScore;
|
|
|
|
class Msg40 {
|
|
public:
|
|
|
|
Msg40();
|
|
~Msg40();
|
|
void resetBuf2 ( ) ;
|
|
|
|
// . returns false if blocked, true otherwise
|
|
// . sets errno on error
|
|
// . uses Query class to parse query
|
|
// . uses Indexdb class to intersect the lists to get results
|
|
// . fills local buffer, m_docIds, with resulting docIds
|
|
// . set m_numDocIds to number of docIds in m_docIds
|
|
// . a useCache of -1 means default, 1 means use the cache,0 means dont
|
|
// . "displayMetas" is a space separated list of meta tag names
|
|
// that you want the content for along with the summary
|
|
bool getResults ( class SearchInput *si ,
|
|
bool forward ,
|
|
void *state ,
|
|
void (* callback)(void *state));
|
|
|
|
// a continuation function of getResults() above
|
|
bool prepareToGetDocIds ( );
|
|
bool getDocIds ( bool recall );
|
|
|
|
// keep these public since called by wrapper functions
|
|
bool federatedLoop ( ) ;
|
|
bool gotDocIds ( ) ;
|
|
bool launchMsg20s ( bool recalled ) ;
|
|
Msg20 *getAvailMsg20();
|
|
Msg20 *getCompletedSummary ( int32_t ix );
|
|
bool gotSummary ( ) ;
|
|
bool gotSummaries();
|
|
bool gotEnoughSummaries();
|
|
bool reallocMsg20Buf ( ) ;
|
|
|
|
// . estimated # of total hits
|
|
// . this is now an EXACT count... since we read all posdb termlists
|
|
int64_t getNumTotalHits() const { return m_msg3a.getNumTotalEstimatedHits(); }
|
|
|
|
// . we copy query and coll to our own local buffer
|
|
// . these routines give us back our inputted parameters we saved
|
|
const char *getQuery() const { return m_si->m_q.getQuery(); }
|
|
int32_t getQueryLen() const { return m_si->m_q.getQueryLen(); }
|
|
|
|
int32_t getDocsWanted() const { return m_si->m_docsWanted; }
|
|
int32_t getFirstResultNum() const { return m_si->m_firstResultNum; }
|
|
|
|
int32_t getNumResults() const { return m_msg3a.getNumDocIds(); }
|
|
|
|
char getClusterLevel(int32_t i) const { return m_msg3a.getClusterLevels()[i]; }
|
|
|
|
int64_t getDocId(int32_t i) const { return m_msg3a.getDocIds()[i]; }
|
|
double getScore(int32_t i) const { return m_msg3a.getScores()[i]; }
|
|
|
|
unsigned getFlags(int32_t i) const { return m_msg3a.getFlags()[i]; }
|
|
|
|
DocIdScore *getScoreInfo(int32_t i) {
|
|
if ( ! m_msg3a.getScoreInfos() ) return NULL;
|
|
return m_msg3a.getScoreInfos()[i];
|
|
}
|
|
const DocIdScore *getScoreInfo(int32_t i) const {
|
|
if ( ! m_msg3a.getScoreInfos() ) return NULL;
|
|
return m_msg3a.getScoreInfos()[i];
|
|
}
|
|
|
|
bool moreResultsFollow() const { return m_moreToCome; }
|
|
time_t getCachedTime() const { return m_cachedTime; }
|
|
|
|
int32_t m_numMsg20sOut ;
|
|
int32_t m_numMsg20sIn ;
|
|
|
|
int32_t m_omitCount;
|
|
|
|
HashTableX m_dedupTable;
|
|
|
|
int32_t m_msg3aRecallCnt;
|
|
|
|
int32_t m_docsToGet;
|
|
int32_t m_docsToGetVisible;
|
|
|
|
// incoming parameters
|
|
void *m_state;
|
|
void (* m_callback ) ( void *state );
|
|
|
|
// a bunch of msg20's for getting summaries/titles/...
|
|
Msg20 **m_msg20;
|
|
int32_t m_numMsg20s;
|
|
|
|
char *m_msg20StartBuf;
|
|
int32_t m_numToFree;
|
|
|
|
int32_t m_numPrinted ;
|
|
bool m_printedHeader ;
|
|
bool m_printedTail ;
|
|
int32_t m_sendsOut ;
|
|
int32_t m_sendsIn ;
|
|
int32_t m_printi ;
|
|
int32_t m_numDisplayed ;
|
|
int32_t m_numPrintedSoFar;
|
|
int32_t m_socketHadError;
|
|
|
|
|
|
// use msg3a to get docIds
|
|
Msg3a m_msg3a;
|
|
|
|
// count summary replies (msg20 replies) we get
|
|
int32_t m_numRequests;
|
|
int32_t m_numReplies;
|
|
|
|
// true if more results follow these
|
|
bool m_moreToCome;
|
|
|
|
int32_t m_lastProcessedi;
|
|
|
|
bool m_didSummarySkip;
|
|
|
|
// for timing how long to get all summaries
|
|
int64_t m_startTime;
|
|
|
|
// was Msg40 cached? if so, at what time?
|
|
time_t m_cachedTime;
|
|
|
|
int32_t m_tasksRemaining;
|
|
|
|
int32_t m_printCount;
|
|
|
|
// buffer we deserialize from, allocated by Msg17, but we free it
|
|
char *m_buf;
|
|
int32_t m_bufMaxSize;
|
|
|
|
// for holding the msg20s
|
|
char *m_buf2;
|
|
int32_t m_bufMaxSize2;
|
|
|
|
int32_t m_errno;
|
|
|
|
SearchInput *m_si;
|
|
|
|
bool mergeDocIdsIntoBaseMsg3a();
|
|
void adjustRankingBasedOnFlags();
|
|
int32_t m_numCollsToSearch;
|
|
class Msg3a **m_msg3aPtrs;
|
|
SafeBuf m_msg3aPtrBuf;
|
|
int32_t m_num3aRequests;
|
|
int32_t m_num3aReplies;
|
|
collnum_t m_firstCollnum;
|
|
|
|
HashTableT<uint64_t, uint64_t> m_urlTable;
|
|
|
|
private:
|
|
int64_t m_deadline; //deadline for providing a result, even if empty. (not completely enforced yet)
|
|
|
|
int32_t m_numRealtimeClassificationsStarted;
|
|
int32_t m_numRealtimeClassificationsCompleted;
|
|
GbMutex m_mtxRealtimeClassificationsCounters;
|
|
bool m_realtimeClassificationsSubmitted;
|
|
void incrementRealtimeClassificationsStarted();
|
|
bool incrementRealtimeClassificationsCompleted();
|
|
bool areAllRealtimeClassificationsCompleted() const;
|
|
|
|
bool submitUrlRealtimeClassification();
|
|
|
|
static void urlClassificationCallback0(void *context, uint32_t classification);
|
|
void urlClassificationCallback1(int i, uint32_t classification);
|
|
};
|
|
|
|
#endif // GB_MSG40_H
|