// Matt Wells, copyright Nov 2007

// get various information from a query and a docId, like summary, title, etc.

#ifndef GB_MSG20_H
#define GB_MSG20_H

#include "Multicast.h"
#include "collnum_t.h"
#include "WordVariationsConfig.h"


class Msg20Request {
 public:

	Msg20Request() { reset(); }

	// zero ourselves out
	void reset() { 
		memset(this,0,sizeof(*this));
		// these are the only non-zero defaults
		m_numSummaryLines    = 1;
		m_docId              = -1LL; // set docid to "invalid"
		m_titleMaxLen        = 80  ;
		m_summaryMaxLen      = 180 ;
	}

	int32_t getStoredSize() const;
	char *serialize(int32_t *sizePtr) const;
	int32_t  deserialize   ( );
	int64_t makeCacheKey() const;

	char       m_numSummaryLines           ; // non-zero default
	bool       m_getHeaderTag              ;
	void      *m_state                     ;
	void      *m_state2                    ; // used by Msg25.cpp
	int32_t       m_j                         ; // used by Msg25.cpp
	bool    (* m_callback)( void *m_state );
	int64_t  m_docId                     ;
	int32_t       m_niceness                  ;
	int32_t       m_titleMaxLen               ;
	int32_t       m_summaryMaxLen             ;
	int32_t       m_summaryMaxNumCharsPerLine ;
	int64_t       m_maxCacheAge               ;
	int32_t       m_discoveryDate             ;

	// special shit so we can remove an inlinker to a related docid
	// if they also link to the main url we are processing seo for.
	// set both of these to 0 to disregard.
	int32_t m_ourHostHash32;
	int32_t m_ourDomHash32;

	// language the query is in (ptr_qbuf)
	uint8_t    m_langId;
	uint8_t m_prefferedResultLangId;
	// we now use the numeric collection # and not the ptr_coll
	collnum_t  m_collnum;

	WordVariationsConfig m_word_variations_config;

	unsigned char       m_useQueryStopWords         :1;
	unsigned char       m_highlightQueryTerms       :1;
	unsigned char       m_getSummaryVector          :1;
	unsigned char       m_showBanned                :1;
	unsigned char       m_includeCachedCopy         :1;
	unsigned char       m_doLinkSpamCheck           :1;
	unsigned char       m_isLinkSpam                :1; // Msg25 uses for storage
	unsigned char       m_isSiteLinkInfo            :1; // site link info?
	unsigned char       m_isDebug                   :1;
	// if true, just calls TitleRec::getLinkInfo() to set ptr_linkInfo
	unsigned char       m_getLinkInfo               :1;
	// if this is true we will not compute the title, etc. of BAD inlinks
	// deemed link spam
	unsigned char       m_onlyNeedGoodInlinks       :1;
	// if true, sets ptr_linkText, etc.
	unsigned char       m_getLinkText               :1;
	unsigned char       m_allowHighFrequencyTermCache:1;

	// pointer+size variable section
	char      *ptr_qbuf          ;
	char      *ptr_ubuf          ; // url buffer
	const char *ptr_linkee;           // used by Msg25 for getting link text
	char      *ptr_displayMetas  ;

	int32_t       size_qbuf         ;
	int32_t       size_ubuf         ; // url buffer
	int32_t       size_linkee       ; // size includes terminating \0
	int32_t       size_displayMetas ; // size includes terminating \0

	// variable data comes here
};


struct Msg20State;

class Msg20Reply {
public:

	Msg20Reply();
	// free the merge buf from Msg40.cpp merging event summaries
	~Msg20Reply();
	void destructor();

	// zero ourselves out
	void reset() { memset(this,0,sizeof(*this)); }

	// how many bytes if we had to serialize it?
	int32_t getStoredSize() const;

	int32_t  deserialize ( ) ;
	int32_t  serialize(char *buf, int32_t bufSize) const;


	bool  sendReply(Msg20State *state);

	// after calling these, when serialize() is called again it will 
	// exclude these strings which were "cleared". Used by Msg40 to 
	// reduce the memory required for caching the Msg40 which includes an
	// array of Msg20s.
	void clearOutlinks  ( ) { 
		size_linkText = 0;
		size_surroundingText = 0;
		size_outlinks = 0;
	}

	void clearVectors() {
		size_vbuf = 0;
	}

	int32_t       m_ip                  ;
	int32_t       m_firstIp             ;
	int32_t       m_wordPosStart        ;
	int64_t  m_docId               ;
	int32_t       m_firstSpidered       ;
	int32_t       m_lastSpidered        ;
	int32_t       m_lastModified        ;
	int32_t       m_datedbDate          ;
	int32_t       m_firstIndexedDate    ; // for the url/document as a whole
	int32_t       m_discoveryDate       ; // for the inlink in question...
	int32_t       m_errno               ; // LinkInfo uses it for LinkTextRepl
	collnum_t  m_collnum             ; // collection # we came from
	char       m_noArchive           ;
	char       m_contentType         ;
	char       m_siteRank            ;
	bool       m_isBanned            ;
	char       m_hopcount            ;
	char       m_recycled            ;
	uint8_t    m_language            ;
	uint16_t   m_country             ;
	bool       m_isAdult             ;

	int16_t m_httpStatus;
	int32_t m_indexCode;
	int32_t       m_contentLen          ; // was m_docLen
	int32_t       m_contentHash32       ;  // for deduping diffbot json objects streaming
	int32_t       m_pageNumInlinks      ;
	int32_t       m_pageNumGoodInlinks  ;
	int32_t       m_pageNumUniqueIps    ; // includes our own inlinks
	int32_t       m_pageNumUniqueCBlocks; // includes our own inlinks
	int32_t       m_pageInlinksLastUpdated;
	
	int32_t       m_siteNumInlinks      ; // GOOD inlinks!

	int32_t       m_numOutlinks         ; // replaced m_linkCount

	// these are just storage for LinkInfo::set() to use
	int32_t       m_linkTextNumWords    ;

	int32_t       m_midDomHash          ; // set for m_getLinkText

	char       m_isLinkSpam          ; // set for m_getLinkText
	char       m_outlinkInContent    ; // set for m_getLinkText
	char       m_outlinkInComment    ; // set for m_getLinkText
	char       m_isPermalink         ; // set for m_getLinkText (buzz)

	bool m_isDisplaySumSetFromTags;
	
	// pointer+size variable section
	char       *ptr_tbuf                 ; // title buffer
	char       *ptr_htag                 ; // h1 tag buf
	char       *ptr_ubuf                 ; // url buffer
	char       *ptr_rubuf                ; // redirect url buffer
	char       *ptr_displaySum           ; // summary for displaying
	char       *ptr_dbuf                 ; // display metas \0 separated
	char       *ptr_vbuf                 ; // summary vector
	char       *ptr_imgData              ; // for encoded images
	char       *ptr_site                 ;

	// . if m_computeLinkInfo is true this is computed using Msg25 (fresh)
	// . if m_setLinkInfo is true this is just set from the titleRec
	// . this is a serialized LinkInfo class
	char       *ptr_linkInfo; // inlinks              ;
	// . made using LinkInfo::set ( Msg20Reply **ptrs )
	// . this is a serialized LinkInfo class
	char       *ptr_outlinks             ;

	// . these are used only by Msg25 to compute LinkInfo
	// . Msg25 will call Msg20 on the docid of a potentially good inlinker
	//   instead of calling the now obsolete Msg23::getLinkText()
	int32_t       *ptr_vector1              ; // set for m_getLinkText
	int32_t       *ptr_vector2              ; // set for m_getLinkText
	int32_t       *ptr_vector3              ; // set for m_getLinkText
	char       *ptr_linkText             ; // set for m_getLinkText
	char       *ptr_surroundingText      ; // set for m_getLinkText
	char       *ptr_linkUrl              ; // what we link to
	char       *ptr_rssItem              ; // set for m_getLinkText
	const char       *ptr_categories           ;
	char       *ptr_content              ; // page content in utf8
	char       *ptr_templateVector       ;
	char       *ptr_metadataBuf;

	const char       *ptr_note           ; // reason why it cannot vote

	int32_t       size_tbuf;
	int32_t       size_htag;
	int32_t       size_ubuf;
	int32_t       size_rubuf;
	int32_t       size_displaySum;
	int32_t       size_dbuf;
	int32_t       size_vbuf;
	int32_t       size_imgData;
	int32_t       size_site;
	int32_t       size_linkInfo;
	int32_t       size_outlinks;
	int32_t       size_vector1;
	int32_t       size_vector2;
	int32_t       size_vector3;
	int32_t       size_linkText;
	int32_t       size_surroundingText;
	int32_t       size_linkUrl;
	int32_t       size_rssItem;
	int32_t       size_categories;
	int32_t       size_content; // page content in utf8
	int32_t       size_templateVector;
	int32_t       size_metadataBuf;
	int32_t       size_note;

	// variable data comes here
};

class Msg20 {
public:

	// . this should only be called once
	// . should also register our get record handlers with the udpServer
	static bool registerHandler();

	// see definition of Msg20Request below
	bool getSummary ( class Msg20Request *r );

	// this is cast to m_replyPtr
	Msg20Reply *m_r ;
	int32_t   m_replySize;
	int32_t   m_replyMaxSize;

	// i guess Msg40.cpp looks at this flag
	bool m_gotReply;

	// set if we had an error
	int32_t   m_errno;

	int64_t getRequestDocId () const { return m_requestDocId; }

	int32_t getStoredSize() const {
		if ( ! m_r ) return 0; 
		return m_r->getStoredSize();
	}

	// . return how many bytes we serialize into "buf"
	// . sets g_errno and returns -1 on error
	int32_t serialize ( char *buf , int32_t bufSize ) {
		if ( ! m_r ) return 0;
		return m_r->serialize ( buf , bufSize );
	}

	// . this is destructive on the "buf". it converts offs to ptrs
	// . sets m_r to the modified "buf" when done
	// . sets g_errno and returns -1 on error, otherwise # of bytes deseril
	int32_t deserialize ( char *buf , int32_t bufSize ) ;

	// Msg40 caches each Msg20Reply when it caches the page of results, so,
	// to keep the size of the cached Msg40 down, we do not cache certain
	// things. so we have to "clear" these guys out before caching.
	void clearLinks     () { if ( m_r ) m_r->clearOutlinks (); }
	void clearVectors   () { if ( m_r ) m_r->clearVectors  (); }
	// copy "src" to ourselves
	void moveFrom(Msg20 *src);

	void gotReply ( class UdpSlot *slot );

	// general purpose routines
	Msg20();
	~Msg20();
	// so we can alloc arrays of these using mmalloc()
	void constructor ();
	void destructor  ();
	void freeReply   ();
	void reset       ();

	int32_t m_ii;

	// is the reply in progress? if msg20 has not launched a request
	// this is false. if msg20 received its reply, this is false. 
	// otherwise this is true.
	bool m_inProgress;
	bool m_launched;

private:
	char  *m_request;
	int32_t   m_requestSize;

	int64_t m_requestDocId;

	// for sending the request
	Multicast m_mcast;
	
	bool	m_ownReply;

	bool     (*m_callback ) ( void *state );
	void     (*m_callback2) ( void *state );
	void      *m_state;

	static void gotReplyWrapper20(void *state, void *state20);
};

#endif // GB_MSG20_H