#ifndef MSG25_H_
#define MSG25_H_

#include "types.h"
#include "SafeBuf.h"
#include "Msg20.h"     // for getting this url's LinkInfo from another cluster
#include "HashTableX.h"
#include "Msg22.h"
#include "Msg5.h"
#include "max_coll_len.h"
#include "max_url_len.h"

class LinkInfo;
class Inlink;
class Multicast;
class UdpSlot;


void  handleRequest25(UdpSlot *slot, int32_t netnice);


// . get the inlinkers to this SITE (any page on this site)
// . use that to compute a site quality
// . also get the inlinkers sorted by date and see how many good inlinkers
//   we had since X days ago. (each inlinker needs a pub/birth date)
class Msg25Request {
public:
	// either MODE_PAGELINKINFO or MODE_SITELINKINFO
	char          m_mode;
	int32_t       m_ip;
	int64_t       m_docId;
	collnum_t     m_collnum;
	bool          m_isInjecting;
	bool          m_printInXml;

	// when we get a reply we call this
	void         *m_state;
	void        (*m_callback)(void *state);

	int32_t       m_siteNumInlinks;
	LinkInfo     *m_oldLinkInfo;
	int32_t       m_niceness;
	bool          m_doLinkSpamCheck;
	bool          m_oneVotePerIpDom;
	bool          m_canBeCancelled;
	int32_t       m_lastUpdateTime;
	bool          m_onlyNeedGoodInlinks;
	bool          m_getLinkerTitles;
	int32_t       m_ourHostHash32;
	int32_t       m_ourDomHash32;

	// new stuff
	int32_t       m_siteHash32;
	int64_t       m_siteHash64;
	int64_t       m_linkHash64;
	// for linked list of these guys in g_lineTable in Linkdb.cpp
	// but only used on the server end, not client end
	Msg25Request *m_next;
	// the mutlicast we use
	Multicast    *m_mcast;
	UdpSlot      *m_udpSlot;
	bool          m_printDebugMsgs;
	// store final LinkInfo reply in here
	SafeBuf      *m_linkInfoBuf;

	char         *ptr_site;
	char         *ptr_url;
	const char   *ptr_oldLinkInfo;

	int32_t       size_site;
	int32_t       size_url;
	int32_t       size_oldLinkInfo;

	//variable data begins here

	int32_t getStoredSize();
	void serialize();
	void deserialize();
};


// . get ALL the linkText classes for a url and merge 'em into a LinkInfo class
// . also gets the link-adjusted quality of our site's url (root url)
// . first gets all docIds of docs that link to that url via an link: search
// . gets the LinkText, customized for our url, from each docId in that list
// . merge them into a final LinkInfo class for your url


#define MAX_LINKERS 3000

// if a linker is a "title rec not found" or log spam, then we get another
// linker's titleRec. churn through up to these many titleRecs in an attempt
// to get MAX_LINKERS good titlerecs before giving up.
//#define MAX_DOCIDS_TO_SAMPLE 25000
// on news.google.com, 22393 of the 25000 are link spam, and we only end
// up getting 508 good inlinks, so rais from 25000 to 50000
//#define MAX_DOCIDS_TO_SAMPLE 50000
// try a ton of lookups so we can ditch xfactor and keep posdb key as
// simple as possible. just make sure we recycle link info a lot!
#define MAX_DOCIDS_TO_SAMPLE 1000000

// go down from 300 to 100 so XmlDoc::getRecommendLinksBuf() can launch
// like 5 msg25s and have no fear of having >500 msg20 requests outstanding
// which clogs things up
// crap, no, on gk144 we got 128 hosts now, so put back to 300...
// if we have less hosts then limit this proportionately in Linkdb.cpp
#define	MAX_MSG20_OUTSTANDING 300

#define MAX_NOTE_BUF_LEN 20000

#define MSG25_MAX_REQUEST_SIZE (MAX_URL_LEN+MAX_COLL_LEN+64)


class Msg25 {

 public:

	// . returns false if blocked, true otherwise
	// . sets errno on error
	// . this sets Msg25::m_siteRootQuality and Msg25::m_linkInfo
	// . "url/coll" should NOT be on stack in case weBlock
	// . if "reallyGetLinkInfo" is false we don't actually try to fetch
	//   any link text and return true right away, really saves a bunch
	//   of disk seeks when spidering small collections that don't need
	//   link text/info indexing/analysis
	bool getLinkInfo2 (char      *site,
			   char      *url,
			   bool       isSiteLinkInfo,
			   int32_t    ip,
			   int64_t    docId,
			   collnum_t  collnum,
			   char      *qbuf,
			   int32_t    qbufSize,
			   void      *state,
			   void     (*callback)(void *state),
			   bool       isInjecting,
			   bool       printDebugMsgs, // into "Msg25::m_pbuf"
			   bool       printInXml,
			   int32_t    siteNumInlinks,
			   LinkInfo  *oldLinkInfo,
			   int32_t    niceness,
			   bool       doLinkSpamCheck,
			   bool       oneVotePerIpDom,
			   bool       canBeCancelled,
			   int32_t    lastUpdateTime,
			   bool       onlyNeedGoodInlinks,
			   bool       getLinkerTitles, //= false,
			   // if an inlinking document has an outlink
			   // of one of these hashes then we set
			   // Msg20Reply::m_hadLinkToOurDomOrHost.
			   // it is used to remove an inlinker to a related
			   // docid, which also links to our main seo url
			   // being processed. so we do not recommend
			   // such links since they already link to a page
			   // on your domain or hostname. set BOTH to zero
			   // to not perform this algo in handleRequest20()'s
			   // call to XmlDoc::getMsg20Reply().
			   int32_t     ourHostHash32,
			   int32_t     ourDomHash32,
			   SafeBuf    *myLinkInfoBuf);
	Msg25();
	~Msg25();
	void reset();

	// a new parm referencing the request we got over the network
	Msg25Request * m_req25;

	Msg20Reply *getLoser(Msg20Reply *r, Msg20Reply *p);
	const char *isDup   (Msg20Reply *r, Msg20Reply *p);

	bool addNote ( const char *note, int32_t noteLen, int64_t docId );

	// m_linkInfo ptr references into here. provided by caller.
	SafeBuf *m_linkInfoBuf;

	SafeBuf m_realBuf;

	// private:
	// these need to be public for wrappers to call:
	bool gotTermFreq(bool msg42Called);
	bool getRootTitleRec();
	bool gotRootTitleRec();
	bool gotDocId();
	bool gotRootLinkText();
	bool gotRootLinkText2();
	bool getLinkingDocIds();
	bool gotList();
	bool gotClusterRecs();
	bool sendRequests();
	bool gotLinkText(class Msg20Request *req);
	bool gotMsg25Reply();
	bool doReadLoop();

	// input vars
	char *m_url;
	char *m_site;

	int32_t m_ourHostHash32;
	int32_t m_ourDomHash32;

	int32_t m_round;
	uint64_t m_linkHash64;
	key224_t m_nextKey;

	bool       m_retried;
	bool       m_prependWWW;
	bool       m_onlyNeedGoodInlinks;
	bool       m_getLinkerTitles;
	int64_t  m_docId;
	collnum_t m_collnum;
	void      *m_state;
	void     (* m_callback) ( void *state );

	int32_t m_siteNumInlinks;
	enum mode_t {
		MODE_UNSET = 0,
		MODE_PAGELINKINFO = 1,
		MODE_SITELINKINFO = 2
	} m_mode;
	bool m_printInXml;


	// url info
	int32_t m_ip;
	int32_t m_top;
	int32_t m_midDomHash;

	bool m_gettingList;

	// . we now use Msg2 since it has "restrictIndexdb" support to limit
	//   indexdb searches to just the root file to decrease disk seeks
	Msg5 m_msg5;
	RdbList m_list;

	Inlink *m_k;

	// for getting the root title rec so we can share its pwids
	Msg22 m_msg22;

	int32_t      m_maxNumLinkers;

	// should we free the m_replyPtrs on destruction? default=true
	bool         m_ownReplies;

	// Now we just save the replies we get back from Msg20::getSummary()
	// We point to them with a LinkTextReply, which is just a pointer
	// and some access functions.
	Msg20Reply  *m_replyPtrs[MAX_LINKERS];
	int32_t      m_replySizes[MAX_LINKERS];
	int32_t      m_numReplyPtrs;

	Msg20        m_msg20s        [MAX_MSG20_OUTSTANDING];
	Msg20Request m_msg20Requests [MAX_MSG20_OUTSTANDING];
	char         m_inUse         [MAX_MSG20_OUTSTANDING];
	// for "fake" replies
	Msg20Reply   m_msg20Replies  [MAX_MSG20_OUTSTANDING];

	int32_t      m_numDocIds;
	int32_t      m_cblocks;
	int32_t      m_uniqueIps;

	int32_t      m_minRecSizes;

	// Msg20 is for getting the LinkInfo class from this same url's
	// titleRec from another (usually much larger) gigablast cluster/netwrk
	Msg20        m_msg20;

	// how many msg20s have we sent/recvd?
	int32_t      m_numRequests;
	int32_t      m_numReplies;

	int32_t      m_linkSpamOut;

	// have we had an error for any transaction?
	int32_t      m_errno;

	SafeBuf      m_tmp;
	SafeBuf     *m_pbuf; // will point to m_tmp if m_printDebugMsgs

	// copied from CollectionRec
	bool         m_oneVotePerIpDom;
	bool         m_doLinkSpamCheck;
	bool         m_isInjecting;
	char         m_canBeCancelled;
	int32_t      m_lastUpdateTime;

	Multicast m_mcast;

	int32_t      m_good;
	int32_t      m_errors;
	int32_t      m_noText;
	int32_t      m_reciprocal;

	bool         m_spideringEnabled;

	int32_t      m_dupCount;
	int32_t      m_vectorDups;
	int32_t      m_spamLinks;
	int32_t      m_niceness;
	int32_t      m_numFromSameIp;
	int32_t      m_sameMidDomain;

	// stats for allow some link spam inlinks to vote
	int32_t      m_spamCount;
	int32_t      m_spamWeight;
	int32_t      m_maxSpam;

	char m_siteQuality;
	int32_t m_siteNumFreshInlinks;

	// this is used for the linkdb list
	HashTableX   m_ipTable;
	HashTableX   m_fullIpTable;
	HashTableX   m_firstIpTable;

	// this is for deduping docids because we now combine the linkdb
	// list of docids with the old inlinks in the old link info
	//HashTableT <int64_t, char> m_docIdTable;
	HashTableX m_docIdTable;

	// special counts
	int32_t      m_ipDupsLinkdb;
	int32_t      m_docIdDupsLinkdb;
	int32_t      m_linkSpamLinkdb;
	int32_t      m_ipDups;

	uint32_t     m_groupId;
	int64_t      m_probDocId;

	LinkInfo    *m_oldLinkInfo;

	char         m_buf[MAX_NOTE_BUF_LEN];
	char        *m_bufPtr;
	char        *m_bufEnd;
	HashTableX   m_table;

	char         m_request[MSG25_MAX_REQUEST_SIZE];
	int32_t      m_requestSize;

	HashTableX   m_adBanTable;

	// for setting <absScore2> or determining if a search results
	// inlinkers also have the query terms. buzz.
	char        *m_qbuf;
	int32_t      m_qbufSize;
};


#endif