2013-08-02 13:12:24 -07:00
|
|
|
// Matt Wells, copyright Jul 2001
|
|
|
|
|
|
|
|
// . gets the resulting docIds from a query
|
|
|
|
// . TODO: use our own facility to replace Msg2? hash a list as it comes.
|
|
|
|
|
2016-03-08 22:14:30 +01:00
|
|
|
#ifndef GB_MSG39_H
|
|
|
|
#define GB_MSG39_H
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
#include "Query.h" // Query::set()
|
|
|
|
#include "Msg2.h" // getLists()
|
2016-07-04 15:06:15 +02:00
|
|
|
#include "PosdbTable.h"
|
2013-08-02 13:12:24 -07:00
|
|
|
#include "TopTree.h"
|
|
|
|
#include "Msg51.h"
|
2017-12-11 14:44:58 +01:00
|
|
|
#include "BaseScoringParameters.h"
|
2017-11-24 16:43:19 +01:00
|
|
|
#include "WordVariationsConfig.h"
|
2016-07-11 14:48:20 +02:00
|
|
|
#include "JobScheduler.h"
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
|
2016-07-11 14:57:14 +02:00
|
|
|
class UdpSlot;
|
2016-11-08 12:14:32 +01:00
|
|
|
class DocumentIndexChecker;
|
2016-11-07 15:48:14 +01:00
|
|
|
|
2016-07-11 14:57:14 +02:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
class Msg39Request {
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
2016-02-25 16:11:45 +01:00
|
|
|
Msg39Request () { reset(); }
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-07-11 14:26:43 +02:00
|
|
|
void reset();
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// we are requesting that this many docids be returned. Msg40 requests
|
|
|
|
// of Msg3a a little more docids than it needs because it assumes
|
|
|
|
// some will be de-duped at summary gen time.
|
2017-11-13 12:30:53 +01:00
|
|
|
//note: Multicast.cpp decodes first 8 bytes directly
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_docsToGet;
|
2016-02-25 16:11:45 +01:00
|
|
|
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_nqt; // # of query terms
|
2013-08-02 13:12:24 -07:00
|
|
|
char m_niceness;
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_maxAge;
|
|
|
|
int32_t m_maxQueryTerms;
|
|
|
|
int32_t m_numDocIdSplits;
|
2014-09-21 18:47:30 -07:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
uint8_t m_language;
|
|
|
|
|
|
|
|
// flags
|
2017-11-24 16:43:19 +01:00
|
|
|
WordVariationsConfig m_word_variations_config;
|
2016-10-19 13:28:04 +02:00
|
|
|
bool m_debug;
|
|
|
|
bool m_doSiteClustering;
|
|
|
|
bool m_hideAllClustered;
|
|
|
|
bool m_doDupContentRemoval;
|
2016-10-19 16:23:11 +02:00
|
|
|
bool m_addToCache;
|
2016-10-19 13:28:04 +02:00
|
|
|
bool m_familyFilter;
|
|
|
|
bool m_getDocIdScoringInfo;
|
2013-08-02 13:12:24 -07:00
|
|
|
char m_realMaxTop;
|
|
|
|
char m_stripe;
|
2016-10-19 16:23:11 +02:00
|
|
|
bool m_useQueryStopWords;
|
2016-10-19 13:28:04 +02:00
|
|
|
bool m_allowHighFrequencyTermCache;
|
|
|
|
bool m_doMaxScoreAlgo;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2017-07-04 15:14:53 +02:00
|
|
|
bool m_modifyQuery;
|
2017-12-11 14:44:58 +01:00
|
|
|
BaseScoringParameters m_baseScoringParameters;
|
2017-01-20 14:20:34 +01:00
|
|
|
|
2014-03-06 10:45:13 -08:00
|
|
|
collnum_t m_collnum;
|
|
|
|
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t m_minDocId;
|
|
|
|
int64_t m_maxDocId;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2014-05-06 10:47:57 -07:00
|
|
|
// for widget, to only get results to append to last docid
|
|
|
|
double m_maxSerpScore;
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t m_minSerpDocId;
|
2014-05-06 10:47:57 -07:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// msg3a stuff
|
2016-02-15 12:12:06 +01:00
|
|
|
int64_t m_timeout; // in milliseconds
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-06-17 14:03:31 +02:00
|
|
|
char m_queryId[32];
|
|
|
|
|
2014-07-07 12:32:27 -07:00
|
|
|
// do not add new string parms before ptr_readSizes or
|
|
|
|
// after ptr_whiteList so serializeMsg() calls still work
|
2013-08-02 13:12:24 -07:00
|
|
|
char *ptr_termFreqWeights;
|
|
|
|
char *ptr_query; // in utf8?
|
2013-09-15 15:15:56 -06:00
|
|
|
char *ptr_whiteList;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2014-07-07 12:32:27 -07:00
|
|
|
// do not add new string parms before size_readSizes or
|
|
|
|
// after size_whiteList so serializeMsg() calls still work
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t size_termFreqWeights;
|
|
|
|
int32_t size_query;
|
|
|
|
int32_t size_whiteList;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-04-04 12:36:32 +02:00
|
|
|
// variable data comes here
|
2013-08-02 13:12:24 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class Msg39Reply {
|
|
|
|
|
|
|
|
public:
|
|
|
|
|
|
|
|
// zero ourselves out
|
2017-12-07 16:53:48 +01:00
|
|
|
void reset() { memset(this,0,sizeof(*this)); }
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_numDocIds;
|
2013-08-02 13:12:24 -07:00
|
|
|
// # of "unignored" query terms
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_nqt;
|
2013-08-02 13:12:24 -07:00
|
|
|
// # of estimated hits we had
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_estimatedHits;
|
2016-03-01 14:13:44 +01:00
|
|
|
// estimated percentage of index searched of the desired scope
|
|
|
|
double m_pctSearched;
|
2013-08-02 13:12:24 -07:00
|
|
|
// error code
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_errno;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2014-07-07 12:32:27 -07:00
|
|
|
// do not add new string parms before ptr_docIds or
|
|
|
|
// after ptr_clusterRecs so serializeMsg() calls still work
|
2014-10-30 13:36:39 -06:00
|
|
|
char *ptr_docIds ; // the results, int64_t
|
2016-02-25 16:11:45 +01:00
|
|
|
char *ptr_scores ; // now doubles! so we can have intScores
|
2017-01-09 16:11:44 +01:00
|
|
|
char *ptr_flags ; // from Docid2FlagsAndSiteMap
|
2013-08-02 13:12:24 -07:00
|
|
|
char *ptr_scoreInfo ; // transparency info
|
|
|
|
char *ptr_pairScoreBuf ; // transparency info
|
|
|
|
char *ptr_singleScoreBuf ; // transparency info
|
2016-09-02 12:10:29 +02:00
|
|
|
char *ptr_clusterRecs ; // key96_t (might be empty)
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2014-07-07 12:32:27 -07:00
|
|
|
// do not add new string parms before size_docIds or
|
|
|
|
// after size_clusterRecs so serializeMsg() calls still work
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t size_docIds;
|
|
|
|
int32_t size_scores;
|
2017-01-09 16:11:44 +01:00
|
|
|
int32_t size_flags;
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t size_scoreInfo;
|
|
|
|
int32_t size_pairScoreBuf ;
|
|
|
|
int32_t size_singleScoreBuf;
|
|
|
|
int32_t size_clusterRecs;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-04-04 12:36:32 +02:00
|
|
|
// variable data comes here
|
2013-08-02 13:12:24 -07:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
class Msg39 {
|
2016-02-04 17:44:32 +01:00
|
|
|
public:
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
Msg39();
|
2015-07-13 23:17:53 -06:00
|
|
|
~Msg39();
|
2016-02-04 17:44:32 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// register our request handler for Msg39's
|
2016-07-11 15:33:40 +02:00
|
|
|
static bool registerHandler();
|
2016-07-11 15:57:23 +02:00
|
|
|
|
|
|
|
private:
|
|
|
|
static void handleRequest39(UdpSlot *slot, int32_t netnice);
|
2013-08-02 13:12:24 -07:00
|
|
|
// called by handler when a request for docids arrives
|
|
|
|
void getDocIds ( UdpSlot *slot ) ;
|
2016-07-11 14:48:20 +02:00
|
|
|
|
2016-07-11 15:48:02 +02:00
|
|
|
void reset();
|
|
|
|
void reset2();
|
2016-08-01 14:53:50 +02:00
|
|
|
static void coordinatorThreadFunc(void *state);
|
2016-07-11 16:40:04 +02:00
|
|
|
void getDocIds2();
|
2013-08-02 13:12:24 -07:00
|
|
|
// retrieves the lists needed as specified by termIds and PosdbTable
|
2016-08-02 15:23:56 +02:00
|
|
|
void getLists(int fileNum, int64_t docIdStart, int64_t docIdEnd);
|
2013-08-02 13:12:24 -07:00
|
|
|
// called when lists have been retrieved, uses PosdbTable to hash lists
|
2016-11-08 12:14:32 +01:00
|
|
|
void intersectLists(const DocumentIndexChecker &documentIndexChecker);
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// . this is used by handler to reconstruct the incoming Query class
|
|
|
|
// . TODO: have a serialize/deserialize for Query class
|
2016-07-11 12:31:03 +02:00
|
|
|
Query m_query;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// used to get IndexLists all at once
|
|
|
|
Msg2 m_msg2;
|
|
|
|
|
|
|
|
// holds slot after we create this Msg39 to handle a request for docIds
|
|
|
|
UdpSlot *m_slot;
|
|
|
|
|
|
|
|
// . used for getting IndexList startKey/endKey/minNumRecs for each
|
|
|
|
// termId we got from the query
|
|
|
|
// . used for hashing our retrieved IndexLists
|
|
|
|
PosdbTable m_posdbTable;
|
|
|
|
|
|
|
|
// keep a ptr to the request
|
2016-07-11 17:15:53 +02:00
|
|
|
Msg39Request *m_msg39req;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-07-11 14:48:20 +02:00
|
|
|
// always use top tree now
|
2016-07-11 17:10:53 +02:00
|
|
|
TopTree m_toptree;
|
2016-07-11 14:48:20 +02:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// . we hold our IndexLists here for passing to PosdbTable
|
|
|
|
// . one array for each of the tiers
|
2016-04-12 16:25:10 +02:00
|
|
|
RdbList *m_lists;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// used for timing
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t m_startTime;
|
2016-02-22 17:45:06 +01:00
|
|
|
int64_t m_startTimeQuery; //when the getDocIds2() was first called
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// this is set if PosdbTable::addLists() had an error
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_errno;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t m_numTotalHits;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-09-01 15:30:40 +02:00
|
|
|
int32_t m_clusterBufSize;
|
|
|
|
char *m_clusterBuf;
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t *m_clusterDocIds;
|
2013-08-02 13:12:24 -07:00
|
|
|
char *m_clusterLevels;
|
2016-09-02 12:10:29 +02:00
|
|
|
key96_t *m_clusterRecs;
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t m_numClusterDocIds;
|
|
|
|
int32_t m_numVisible;
|
2013-08-02 13:12:24 -07:00
|
|
|
Msg51 m_msg51;
|
|
|
|
bool m_gotClusterRecs;
|
2016-07-11 14:48:20 +02:00
|
|
|
|
2016-08-01 14:53:50 +02:00
|
|
|
void controlLoop();
|
2016-07-11 14:48:20 +02:00
|
|
|
static void intersectListsThreadFunction(void *state);
|
|
|
|
|
2016-02-22 17:45:06 +01:00
|
|
|
int32_t m_docIdSplitNumber; //next split range to do
|
2016-07-11 14:48:20 +02:00
|
|
|
|
2016-08-02 15:23:56 +02:00
|
|
|
void estimateHitsAndSendReply(double pctSearched);
|
2016-08-01 14:53:50 +02:00
|
|
|
void getClusterRecs();
|
2014-05-13 20:50:11 -07:00
|
|
|
bool gotClusterRecs ();
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-02-04 17:44:32 +01:00
|
|
|
public:
|
|
|
|
//debugging aid
|
|
|
|
bool m_inUse;
|
|
|
|
bool m_debug;
|
2013-08-02 13:12:24 -07:00
|
|
|
};
|
|
|
|
|
2016-03-08 22:14:30 +01:00
|
|
|
#endif // GB_MSG39_H
|