Files
privacore-open-source-searc…/Msg3a.h

150 lines
4.5 KiB
C
Raw Normal View History

2016-03-08 22:14:30 +01:00
#ifndef GB_MSG3A_H_
#define GB_MSG3A_H_
2013-08-02 13:12:24 -07:00
#include "Msg39.h"
#include "Multicast.h"
2013-08-02 13:12:24 -07:00
class SearchInput;
class Query;
void setTermFreqWeights ( collnum_t collnum , Query *q, float termFreqWeightFreqMin, float termFreqWeightFreqMax, float termFreqWeightMin, float termFreqWeightMax);
2013-08-02 13:12:24 -07:00
2016-03-03 21:57:28 +01:00
#define MAX_SHARDS 1024
2013-08-02 13:12:24 -07:00
// ALWAYS get at least 20 docids so we can do better ranking
#define MIN_DOCS_TO_GET 20
#define RBUF_SIZE 2048
2017-01-09 14:11:15 +01:00
class DocIdScore;
2013-08-02 13:12:24 -07:00
class Msg3a {
public:
Msg3a();
~Msg3a();
void constructor();
void reset ( );
// . returns false if blocked, true otherwise
// . sets errno on error
// . "query/coll/docIds" should NOT be on the stack in case we block
// . uses Query class to parse query
// . uses Indexdb class to intersect the lists to get results
// . fills docIds buf with the resulting docIds
// . sets *numDocIds to the # of resulting docIds
// . if restrictindexdbForQuery is true we only read docIds from
// indexdb root file
// . this might ADJUST m_si->m_q.m_termFreqs[] to be more accurate
// . NOTE: Msg39Request MUST NOT BE ON THE STACK! keep it persistent!
bool getDocIds ( const SearchInput *si,
2013-08-02 13:12:24 -07:00
Query *q ,
void *state ,
void (* callback) ( void *state ));
2013-08-02 13:12:24 -07:00
// Msg40 calls this to get Query m_q to pass to Summary class
Query *getQuery() { return m_q ; }
const Query *getQuery() const { return m_q ; }
2013-08-02 13:12:24 -07:00
// Msg40 calls these to get the data pointing into the reply
int64_t *getDocIds() { return m_docIds; }
const int64_t *getDocIds() const { return m_docIds; }
char *getClusterLevels() { return m_clusterLevels; }
const char *getClusterLevels() const { return m_clusterLevels; }
2013-08-02 13:12:24 -07:00
// we basically turn the scores we get from each msg39 split into
// floats (rscore_t) and store them as floats so that PostQueryRerank
// has an easier time
double *getScores() { return m_scores; }
const double *getScores() const { return m_scores; }
int32_t getNumDocIds() const { return m_numDocIds; }
const unsigned *getFlags() const { return m_flags; }
2017-01-09 14:11:15 +01:00
DocIdScore * const * getScoreInfos() { return (DocIdScore * const *)m_scoreInfos; }
const DocIdScore * const * getScoreInfos() const { return (DocIdScore * const *)m_scoreInfos; }
2013-08-02 13:12:24 -07:00
void printTerms ( ) ;
// . estimates based on m_termFreqs, m_termSigns and m_numTerms
// . received in reply
int64_t getNumTotalEstimatedHits() const {
2016-02-25 16:11:45 +01:00
return m_numTotalEstimatedHits; }
2013-08-02 13:12:24 -07:00
// called when we got a reply of docIds
bool gotAllShardReplies ( );
2013-08-02 13:12:24 -07:00
bool mergeLists ( );
// incoming parameters passed to Msg39::getDocIds() function
Query *m_q;
2014-11-10 14:45:11 -08:00
int32_t m_docsToGet;
2013-08-02 13:12:24 -07:00
void *m_state;
void (*m_callback ) ( void *state );
// set by Msg3a initially
2014-11-10 14:45:11 -08:00
//int32_t m_indexdbSplit;
// int32_t m_numHosts;
int32_t m_numQueriedHosts;
2013-08-02 13:12:24 -07:00
bool m_moreDocIdsAvail;
// this is set if IndexTable::addLists() had an error
2014-11-10 14:45:11 -08:00
int32_t m_errno;
2013-08-02 13:12:24 -07:00
// this is now in here so Msg40 can send out one Msg3a per
// collection if it wants to search an entire token
2017-01-09 14:20:46 +01:00
Msg39Request m_msg39req;
2013-08-02 13:12:24 -07:00
// a multicast class to send the request, one for each split
Multicast m_mcast[MAX_SHARDS];
2013-08-02 13:12:24 -07:00
// for timing how long things take
2014-10-30 13:36:39 -06:00
int64_t m_startTime;
2013-08-02 13:12:24 -07:00
// this buffer should be big enough to hold all requests
//char m_request [MAX_MSG39_REQUEST_SIZE * MAX_SHARDS];
2014-11-10 14:45:11 -08:00
int32_t m_numReplies;
2013-08-02 13:12:24 -07:00
int32_t m_skippedShards;
2013-08-02 13:12:24 -07:00
// . # estimated total hits
2014-10-30 13:36:39 -06:00
int64_t m_numTotalEstimatedHits;
2013-08-02 13:12:24 -07:00
// estimated percentage of index searched of the desired scope
// unresponsive shards count as 0.0 toward the global estimate
double m_pctSearched;
2013-08-02 13:12:24 -07:00
// we have one request that we send to each split
char *m_rbufPtr;
2014-11-10 14:45:11 -08:00
int32_t m_rbufSize;
2013-08-02 13:12:24 -07:00
char m_rbuf [ RBUF_SIZE ];
// now we send to the twin as well
SafeBuf m_rbuf2;
// each split gives us a reply
class Msg39Reply *m_reply [MAX_SHARDS];
2014-11-10 14:45:11 -08:00
int32_t m_replyMaxSize[MAX_SHARDS];
2013-08-02 13:12:24 -07:00
2016-02-16 14:39:59 +01:00
bool m_debug;
2013-08-02 13:12:24 -07:00
// final merged lists go here
2014-10-30 13:36:39 -06:00
int64_t *m_docIds ;
double *m_scores ;
unsigned *m_flags;
2013-08-02 13:12:24 -07:00
class DocIdScore **m_scoreInfos ;
key96_t *m_clusterRecs ;
2013-08-02 13:12:24 -07:00
char *m_clusterLevels ;
// this is new
collnum_t *m_collnums;
2014-11-10 14:45:11 -08:00
int32_t m_numDocIds ;
2013-08-02 13:12:24 -07:00
// the above ptrs point into this buffer
char *m_finalBuf;
2014-11-10 14:45:11 -08:00
int32_t m_finalBufSize;
2013-08-02 13:12:24 -07:00
// when merging this list of docids into a final list keep
// track of the cursor into m_docIds[]
2014-11-10 14:45:11 -08:00
int32_t m_cursor;
2013-08-02 13:12:24 -07:00
};
2016-03-08 22:14:30 +01:00
#endif // GB_MSG3A_H