privacore-open-source-searc.../Msg40.h
2018-01-29 16:27:53 +01:00

192 lines
5.2 KiB
C++

// Matt Wells, copyright Jul 2001
// . gets the title/summary/docLen/url results from a query
#ifndef GB_MSG40_H
#define GB_MSG40_H
#define SAMPLE_VECTOR_SIZE (32*4)
#include "SearchInput.h"
#include "Multicast.h" // multicast send
#include "Query.h" // Query::set()
#include "Msg39.h" // getTermFreqs()
#include "Msg20.h" // for getting summary from docId
#include "Msg3a.h"
#include "HashTableT.h"
#include "GbMutex.h"
// make it 2B now. no reason not too limit it so low.
#define MAXDOCIDSTOCOMPUTE 2000000000
class DocIdScore;
class Msg40 {
public:
Msg40();
~Msg40();
void resetBuf2 ( ) ;
// . returns false if blocked, true otherwise
// . sets errno on error
// . uses Query class to parse query
// . uses Indexdb class to intersect the lists to get results
// . fills local buffer, m_docIds, with resulting docIds
// . set m_numDocIds to number of docIds in m_docIds
// . a useCache of -1 means default, 1 means use the cache,0 means dont
// . "displayMetas" is a space separated list of meta tag names
// that you want the content for along with the summary
bool getResults ( class SearchInput *si ,
bool forward ,
void *state ,
void (* callback)(void *state));
// a continuation function of getResults() above
bool prepareToGetDocIds ( );
bool getDocIds ( bool recall );
// keep these public since called by wrapper functions
bool federatedLoop ( ) ;
bool gotDocIds ( ) ;
bool launchMsg20s ( bool recalled ) ;
Msg20 *getAvailMsg20();
Msg20 *getCompletedSummary ( int32_t ix );
bool gotSummary ( ) ;
bool gotSummaries();
bool gotEnoughSummaries();
bool reallocMsg20Buf ( ) ;
// . estimated # of total hits
// . this is now an EXACT count... since we read all posdb termlists
int64_t getNumTotalHits() const { return m_msg3a.getNumTotalEstimatedHits(); }
// . we copy query and coll to our own local buffer
// . these routines give us back our inputted parameters we saved
const char *getQuery() const { return m_si->m_q.getQuery(); }
int32_t getQueryLen() const { return m_si->m_q.getQueryLen(); }
int32_t getDocsWanted() const { return m_si->m_docsWanted; }
int32_t getFirstResultNum() const { return m_si->m_firstResultNum; }
int32_t getNumResults() const { return m_msg3a.getNumDocIds(); }
char getClusterLevel(int32_t i) const { return m_msg3a.getClusterLevels()[i]; }
int64_t getDocId(int32_t i) const { return m_msg3a.getDocIds()[i]; }
double getScore(int32_t i) const { return m_msg3a.getScores()[i]; }
unsigned getFlags(int32_t i) const { return m_msg3a.getFlags()[i]; }
DocIdScore *getScoreInfo(int32_t i) {
if ( ! m_msg3a.getScoreInfos() ) return NULL;
return m_msg3a.getScoreInfos()[i];
}
const DocIdScore *getScoreInfo(int32_t i) const {
if ( ! m_msg3a.getScoreInfos() ) return NULL;
return m_msg3a.getScoreInfos()[i];
}
bool moreResultsFollow() const { return m_moreToCome; }
time_t getCachedTime() const { return m_cachedTime; }
int32_t m_numMsg20sOut ;
int32_t m_numMsg20sIn ;
int32_t m_omitCount;
HashTableX m_dedupTable;
int32_t m_msg3aRecallCnt;
int32_t m_docsToGet;
int32_t m_docsToGetVisible;
// incoming parameters
void *m_state;
void (* m_callback ) ( void *state );
// a bunch of msg20's for getting summaries/titles/...
Msg20 **m_msg20;
int32_t m_numMsg20s;
char *m_msg20StartBuf;
int32_t m_numToFree;
int32_t m_numPrinted ;
bool m_printedHeader ;
bool m_printedTail ;
int32_t m_sendsOut ;
int32_t m_sendsIn ;
int32_t m_printi ;
int32_t m_numDisplayed ;
int32_t m_numPrintedSoFar;
int32_t m_socketHadError;
// use msg3a to get docIds
Msg3a m_msg3a;
// count summary replies (msg20 replies) we get
int32_t m_numRequests;
int32_t m_numReplies;
// true if more results follow these
bool m_moreToCome;
int32_t m_lastProcessedi;
bool m_didSummarySkip;
// for timing how long to get all summaries
int64_t m_startTime;
// was Msg40 cached? if so, at what time?
time_t m_cachedTime;
int32_t m_tasksRemaining;
int32_t m_printCount;
// buffer we deserialize from, allocated by Msg17, but we free it
char *m_buf;
int32_t m_bufMaxSize;
// for holding the msg20s
char *m_buf2;
int32_t m_bufMaxSize2;
int32_t m_errno;
SearchInput *m_si;
bool mergeDocIdsIntoBaseMsg3a();
void adjustRankingBasedOnFlags();
int32_t m_numCollsToSearch;
class Msg3a **m_msg3aPtrs;
SafeBuf m_msg3aPtrBuf;
int32_t m_num3aRequests;
int32_t m_num3aReplies;
collnum_t m_firstCollnum;
HashTableT<uint64_t, uint64_t> m_urlTable;
private:
int64_t m_deadline; //deadline for providing a result, even if empty. (not completely enforced yet)
int32_t m_numRealtimeClassificationsStarted;
int32_t m_numRealtimeClassificationsCompleted;
GbMutex m_mtxRealtimeClassificationsCounters;
bool m_realtimeClassificationsSubmitted;
void incrementRealtimeClassificationsStarted();
bool incrementRealtimeClassificationsCompleted();
bool areAllRealtimeClassificationsCompleted() const;
bool submitUrlRealtimeClassification();
static void urlClassificationCallback0(void *context, uint32_t classification);
void urlClassificationCallback1(int i, uint32_t classification);
};
#endif // GB_MSG40_H