2013-08-02 13:12:24 -07:00
|
|
|
#include "gb-include.h"
|
|
|
|
#include "XmlDoc.h"
|
2016-09-08 15:25:46 +02:00
|
|
|
#include "Hostdb.h"
|
2016-11-12 19:43:59 +01:00
|
|
|
#include "UdpSlot.h"
|
2017-05-16 12:20:06 +02:00
|
|
|
#include "UdpServer.h"
|
2016-11-12 20:01:44 +01:00
|
|
|
#include "ip.h"
|
2016-09-08 15:25:46 +02:00
|
|
|
#include "Process.h"
|
2016-12-08 17:04:38 +01:00
|
|
|
#include "Mem.h"
|
2016-02-12 15:08:11 +01:00
|
|
|
#ifdef _VALGRIND_
|
|
|
|
#include <valgrind/memcheck.h>
|
|
|
|
#endif
|
2016-03-03 16:52:21 +01:00
|
|
|
#include "SummaryCache.h"
|
2017-03-23 15:02:00 +01:00
|
|
|
#include "Conf.h"
|
2016-09-08 15:50:34 +02:00
|
|
|
#include "Stats.h"
|
|
|
|
|
|
|
|
|
|
|
|
struct Msg20State {
|
|
|
|
UdpSlot *m_slot;
|
|
|
|
Msg20Request *m_req;
|
|
|
|
XmlDoc m_xmldoc;
|
|
|
|
Msg20State(UdpSlot *slot, Msg20Request *req) : m_slot(slot), m_req(req), m_xmldoc() {}
|
|
|
|
};
|
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-09-08 15:20:46 +02:00
|
|
|
static void handleRequest20(UdpSlot *slot, int32_t netnice);
|
|
|
|
static bool gotReplyWrapperxd(void *state);
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-03-03 16:52:21 +01:00
|
|
|
|
|
|
|
static bool sendCachedReply ( Msg20Request *req, const void *cached_summary, size_t cached_summary_len, UdpSlot *slot );
|
|
|
|
|
|
|
|
|
2016-09-26 12:30:04 +02:00
|
|
|
Msg20::Msg20 () {
|
|
|
|
constructor();
|
|
|
|
}
|
|
|
|
|
|
|
|
Msg20::~Msg20() {
|
|
|
|
reset();
|
|
|
|
}
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
void Msg20::constructor () {
|
|
|
|
m_request = NULL;
|
|
|
|
m_r = NULL;
|
|
|
|
m_inProgress = false;
|
|
|
|
m_launched = false;
|
2014-03-04 12:07:46 -08:00
|
|
|
m_ii = -1;
|
2013-08-02 13:12:24 -07:00
|
|
|
reset();
|
|
|
|
m_mcast.constructor();
|
|
|
|
}
|
|
|
|
|
2016-09-26 12:30:04 +02:00
|
|
|
void Msg20::destructor() {
|
|
|
|
reset();
|
|
|
|
m_mcast.destructor();
|
|
|
|
}
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2014-01-30 10:04:09 -08:00
|
|
|
|
2014-02-04 17:34:43 -08:00
|
|
|
void Msg20::freeReply() {
|
2016-03-03 12:33:41 +01:00
|
|
|
if (!m_r) {
|
|
|
|
return;
|
|
|
|
}
|
2014-11-25 11:00:27 -07:00
|
|
|
|
2014-02-04 17:34:43 -08:00
|
|
|
// sometimes the msg20 reply carries an merged bffer from
|
|
|
|
// msg40 that is a constructed ptr_eventSummaryLines from a
|
|
|
|
// merge operation in msg40. this fixes the "merge20buf1" memory
|
|
|
|
// leak from Msg40.cpp
|
|
|
|
m_r->destructor();
|
|
|
|
|
2016-03-03 12:33:41 +01:00
|
|
|
if ( m_ownReply ) {
|
|
|
|
mfree(m_r, m_replyMaxSize, "Msg20b");
|
|
|
|
}
|
2014-11-25 11:00:27 -07:00
|
|
|
|
2016-03-03 12:33:41 +01:00
|
|
|
m_r = NULL;
|
|
|
|
}
|
2014-11-25 11:00:27 -07:00
|
|
|
|
2016-03-03 12:33:41 +01:00
|
|
|
void Msg20::reset() {
|
2013-08-02 13:12:24 -07:00
|
|
|
// not allowed to reset one in progress
|
2014-01-30 10:04:09 -08:00
|
|
|
if ( m_inProgress ) {
|
|
|
|
// do not core on abrupt exits!
|
2017-04-20 14:00:45 +02:00
|
|
|
if (g_process.isShuttingDown()) {
|
2014-11-25 11:00:27 -07:00
|
|
|
log("msg20: msg20 not being freed because exiting.");
|
|
|
|
return;
|
|
|
|
}
|
2014-01-30 10:04:09 -08:00
|
|
|
// otherwise core
|
2016-06-20 12:30:26 +02:00
|
|
|
g_process.shutdownAbort(true);
|
2014-01-30 10:04:09 -08:00
|
|
|
}
|
2016-03-03 12:33:41 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
m_launched = false;
|
2016-03-03 12:33:41 +01:00
|
|
|
if ( m_request ) {
|
|
|
|
mfree( m_request, m_requestSize, "Msg20rb1" );
|
|
|
|
}
|
|
|
|
|
2014-02-04 17:34:43 -08:00
|
|
|
freeReply();
|
2016-03-03 12:33:41 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
m_request = NULL; // the request buf ptr
|
|
|
|
m_gotReply = false;
|
|
|
|
m_errno = 0;
|
|
|
|
m_requestDocId = -1LL;
|
|
|
|
m_callback = NULL;
|
|
|
|
m_state = NULL;
|
|
|
|
m_ownReply = true;
|
2016-09-26 12:30:04 +02:00
|
|
|
m_requestSize = 0;
|
|
|
|
m_replySize = 0;
|
|
|
|
m_replyMaxSize = 0;
|
|
|
|
m_callback2 = NULL;
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
bool Msg20::registerHandler ( ) {
|
2016-03-03 12:33:41 +01:00
|
|
|
// . register ourselves with the udp server
|
|
|
|
// . it calls our callback when it receives a msg of type 0x20
|
2016-07-25 16:00:20 +02:00
|
|
|
if ( ! g_udpServer.registerHandler ( msg_type_20, handleRequest20 ))
|
2013-08-02 13:12:24 -07:00
|
|
|
return false;
|
2016-03-03 12:33:41 +01:00
|
|
|
|
|
|
|
return true;
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// copy "src" to ourselves
|
2016-09-08 15:00:03 +02:00
|
|
|
void Msg20::moveFrom(Msg20 *src) {
|
|
|
|
memcpy(this, src, sizeof(Msg20));
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// make sure it does not free it!
|
|
|
|
src->m_r = NULL;
|
2015-12-04 11:09:31 +01:00
|
|
|
m_request = NULL;
|
2016-03-03 12:33:41 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// make sure destructor does not free this
|
|
|
|
src->m_request = NULL;
|
|
|
|
src->destructor();
|
|
|
|
}
|
|
|
|
|
|
|
|
// returns true and sets g_errno on error, otherwise, blocks and returns false
|
|
|
|
bool Msg20::getSummary ( Msg20Request *req ) {
|
|
|
|
// reset ourselves in case recycled
|
|
|
|
reset();
|
|
|
|
|
|
|
|
// consider it "launched"
|
|
|
|
m_launched = true;
|
|
|
|
|
|
|
|
// save it
|
|
|
|
m_requestDocId = req->m_docId;
|
|
|
|
m_state = req->m_state;
|
|
|
|
m_callback = req->m_callback;
|
2016-09-08 13:20:21 +02:00
|
|
|
m_callback2 = NULL;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// does this ever happen?
|
2015-12-14 12:27:56 +01:00
|
|
|
if ( g_hostdb.getNumHosts() <= 0 ) {
|
2013-08-02 13:12:24 -07:00
|
|
|
log("build: hosts2.conf is not in working directory, or "
|
|
|
|
"contains no valid hosts.");
|
|
|
|
g_errno = EBADENGINEER;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-10-02 12:34:08 -07:00
|
|
|
if ( req->m_docId < 0 && ! req->ptr_ubuf ) {
|
|
|
|
log("msg20: docid<0 and no url for msg20::getsummary");
|
|
|
|
g_errno = EBADREQUEST;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// get groupId from docId, if positive
|
2014-11-10 14:45:11 -08:00
|
|
|
uint32_t shardNum;
|
2013-08-02 13:12:24 -07:00
|
|
|
if ( req->m_docId >= 0 )
|
2015-12-14 12:27:56 +01:00
|
|
|
shardNum = g_hostdb.getShardNumFromDocId(req->m_docId);
|
2013-08-02 13:12:24 -07:00
|
|
|
else {
|
2016-09-13 11:06:46 +02:00
|
|
|
int64_t pdocId = Titledb::getProbableDocId(req->ptr_ubuf);
|
2013-10-04 16:18:56 -07:00
|
|
|
shardNum = getShardNumFromDocId(pdocId);
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// we might be getting inlinks for a spider request
|
|
|
|
// so make sure timeout is inifinite for that...
|
2016-02-04 13:57:17 +01:00
|
|
|
const int32_t timeout = (req->m_niceness==0)
|
|
|
|
? multicast_msg20_summary_timeout
|
|
|
|
: multicast_infinite_send_timeout;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// get our group
|
2015-12-14 12:27:56 +01:00
|
|
|
int32_t allNumHosts = g_hostdb.getNumHostsPerShard();
|
2016-02-22 11:48:21 +01:00
|
|
|
Host *allHosts = g_hostdb.getShard ( shardNum );
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// put all alive hosts in this array
|
|
|
|
Host *cand[32];
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t nc = 0;
|
2014-11-10 14:45:11 -08:00
|
|
|
for ( int32_t i = 0 ; i < allNumHosts ; i++ ) {
|
2013-08-02 13:12:24 -07:00
|
|
|
// get that host
|
|
|
|
Host *hh = &allHosts[i];
|
|
|
|
// skip if dead
|
|
|
|
if ( g_hostdb.isDead(hh) ) continue;
|
2015-11-13 15:03:02 -07:00
|
|
|
|
|
|
|
// Respect no-spider, no-query directives from hosts.conf
|
|
|
|
if ( !req->m_getLinkInfo && ! hh->m_queryEnabled ) continue;
|
|
|
|
if ( req->m_getLinkInfo && ! hh->m_spiderEnabled ) continue;
|
2013-08-02 13:12:24 -07:00
|
|
|
// add it if alive
|
|
|
|
cand[nc++] = hh;
|
|
|
|
}
|
2017-03-23 15:02:00 +01:00
|
|
|
if(nc==0) {
|
|
|
|
log(LOG_DEBUG, "msg20: no live candidate hosts for shard %d", shardNum);
|
|
|
|
if(g_conf.m_msg20FallbackToAllHosts) {
|
2017-06-06 14:20:09 +02:00
|
|
|
log(LOG_DEBUG,"msg20: No alive desired hosts in shard %d - falling back to all hosts in the shard", shardNum);
|
2017-03-23 15:02:00 +01:00
|
|
|
for(int32_t i = 0; i < allNumHosts; i++) {
|
|
|
|
cand[nc++] = &allHosts[i];
|
|
|
|
}
|
|
|
|
}
|
2015-10-22 11:46:13 -06:00
|
|
|
}
|
|
|
|
if ( nc == 0 ) {
|
2017-03-23 15:02:00 +01:00
|
|
|
log(LOG_ERROR, "msg20: error sending mcast: no queryable hosts available to handle summary/linkinfo generation in shard %d", shardNum);
|
2015-10-22 11:46:13 -06:00
|
|
|
g_errno = EBADENGINEER;
|
|
|
|
m_gotReply = true;
|
|
|
|
return true;
|
|
|
|
}
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// route based on docid region, not parity, because we want to hit
|
|
|
|
// the urldb page cache as much as possible
|
2016-02-22 11:48:21 +01:00
|
|
|
int64_t sectionWidth =((128LL*1024*1024)/nc)+1;
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t probDocId = req->m_docId;
|
2013-08-02 13:12:24 -07:00
|
|
|
// i think reference pages just pass in a url to get the summary
|
|
|
|
if ( probDocId < 0 && req->size_ubuf )
|
2016-09-13 11:06:46 +02:00
|
|
|
probDocId = Titledb::getProbableDocId ( req->ptr_ubuf );
|
2013-08-02 13:12:24 -07:00
|
|
|
if ( probDocId < 0 ) {
|
|
|
|
log("query: Got bad docid/url combo.");
|
|
|
|
probDocId = 0;
|
|
|
|
}
|
|
|
|
// we mod by 1MB since tied scores resort to sorting by docid
|
|
|
|
// so we don't want to overload the host responsible for the lowest
|
|
|
|
// range of docids. CAUTION: do this for msg22 too!
|
|
|
|
// in this way we should still ensure a pretty good biased urldb
|
|
|
|
// cache...
|
|
|
|
// . TODO: fix the urldb cache preload logic
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t hostNum = (probDocId % (128LL*1024*1024)) / sectionWidth;
|
2013-08-02 13:12:24 -07:00
|
|
|
if ( hostNum < 0 ) hostNum = 0; // watch out for negative docids
|
2016-06-20 12:30:26 +02:00
|
|
|
if ( hostNum >= nc ) { g_process.shutdownAbort(true); }
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t firstHostId = cand [ hostNum ]->m_hostId ;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
m_requestSize = 0;
|
2015-12-04 11:09:31 +01:00
|
|
|
m_request = req->serialize ( &m_requestSize );
|
2013-08-02 13:12:24 -07:00
|
|
|
// . it sets g_errno on error and returns NULL
|
|
|
|
// . we MUST call gotReply() here to set m_gotReply
|
|
|
|
// otherwise Msg40.cpp can end up looping forever
|
|
|
|
// calling Msg40::launchMsg20s()
|
|
|
|
if ( ! m_request ) { gotReply(NULL); return true; }
|
|
|
|
|
2016-03-03 12:33:41 +01:00
|
|
|
// . otherwise, multicast to a host in group "groupId"
|
2013-08-02 13:12:24 -07:00
|
|
|
// . returns false and sets g_errno on error
|
|
|
|
// . use a pre-allocated buffer to hold the reply
|
|
|
|
// . TMPBUFSIZE is how much a UdpSlot can hold w/o allocating
|
2016-11-18 16:14:18 +01:00
|
|
|
if (!m_mcast.send(m_request, m_requestSize, msg_type_20, false, shardNum, false, probDocId, this, NULL, gotReplyWrapper20, timeout, req->m_niceness, firstHostId, false)) {
|
2013-08-02 13:12:24 -07:00
|
|
|
// sendto() sometimes returns "Network is down" so i guess
|
|
|
|
// we just had an "error reply".
|
2013-11-20 10:14:02 -07:00
|
|
|
log("msg20: error sending mcast %s",mstrerror(g_errno));
|
2013-08-02 13:12:24 -07:00
|
|
|
m_gotReply = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// we are officially "in progress"
|
|
|
|
m_inProgress = true;
|
|
|
|
|
|
|
|
// we blocked
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-09-08 13:26:45 +02:00
|
|
|
void Msg20::gotReplyWrapper20 ( void *state , void */*state2*/ ) {
|
2013-08-02 13:12:24 -07:00
|
|
|
Msg20 *THIS = (Msg20 *)state;
|
|
|
|
// gotReply() does not block, and does NOT call our callback
|
|
|
|
THIS->gotReply ( NULL ) ;
|
2016-09-26 12:30:04 +02:00
|
|
|
|
|
|
|
if ( THIS->m_callback ) {
|
|
|
|
THIS->m_callback ( THIS->m_state );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
if( THIS->m_callback2 ) {
|
|
|
|
THIS->m_callback2 ( THIS->m_state );
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
log(LOG_LOGIC,"%s:%s: No callback!", __FILE__, __func__);
|
|
|
|
g_process.shutdownAbort(true);
|
|
|
|
}
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// . set m_reply/m_replySize to the reply
|
|
|
|
void Msg20::gotReply ( UdpSlot *slot ) {
|
|
|
|
// we got the reply
|
|
|
|
m_gotReply = true;
|
2014-11-17 18:13:36 -08:00
|
|
|
// no longer in progress, we got a reply
|
2013-08-02 13:12:24 -07:00
|
|
|
m_inProgress = false;
|
|
|
|
// sanity check
|
2016-06-20 12:30:26 +02:00
|
|
|
if ( m_r ) { g_process.shutdownAbort(true); }
|
2014-02-13 11:21:39 -08:00
|
|
|
|
|
|
|
// free our serialized request buffer to save mem
|
2015-12-04 11:09:31 +01:00
|
|
|
if ( m_request ) {
|
2014-11-25 11:00:27 -07:00
|
|
|
mfree ( m_request , m_requestSize , "Msg20rb2" );
|
2014-02-13 11:21:39 -08:00
|
|
|
m_request = NULL;
|
|
|
|
}
|
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// save error so Msg40 can look at it
|
|
|
|
if ( g_errno ) {
|
2016-06-29 16:32:04 +02:00
|
|
|
m_errno = g_errno;
|
|
|
|
log( LOG_WARN, "query: msg20: got reply for docid %" PRId64" : %s", m_requestDocId,mstrerror(g_errno));
|
2013-08-02 13:12:24 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
// . get the best reply we got
|
|
|
|
// . we are responsible for freeing this reply
|
|
|
|
bool freeit;
|
|
|
|
// . freeit is true if mcast will free it
|
|
|
|
// . we should always own it since we call deserialize and has ptrs
|
|
|
|
// into it
|
|
|
|
char *rp = NULL;
|
|
|
|
if ( slot ) {
|
|
|
|
rp = slot->m_readBuf;
|
|
|
|
m_replySize = slot->m_readBufSize;
|
|
|
|
m_replyMaxSize = slot->m_readBufMaxSize;
|
|
|
|
freeit = false;
|
|
|
|
}
|
2016-02-28 10:44:45 +01:00
|
|
|
else {
|
2013-08-02 13:12:24 -07:00
|
|
|
rp =m_mcast.getBestReply(&m_replySize,&m_replyMaxSize,&freeit);
|
2016-02-28 10:44:45 +01:00
|
|
|
}
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
relabel( rp , m_replyMaxSize, "Msg20-mcastGBR" );
|
|
|
|
|
2014-11-25 11:00:27 -07:00
|
|
|
// sanity check. make sure multicast is not going to free the
|
|
|
|
// slot's m_readBuf... we need to own it.
|
2013-08-02 13:12:24 -07:00
|
|
|
if ( freeit ) {
|
|
|
|
log(LOG_LOGIC,"query: msg20: gotReply: Bad engineer.");
|
2016-06-20 12:30:26 +02:00
|
|
|
g_process.shutdownAbort(true);
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
2016-02-28 10:44:45 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// see if too small for a getSummary request
|
2014-11-10 14:45:11 -08:00
|
|
|
if ( m_replySize < (int32_t)sizeof(Msg20Reply) ) {
|
2013-08-02 13:12:24 -07:00
|
|
|
log("query: Summary reply is too small.");
|
2016-06-20 12:30:26 +02:00
|
|
|
//g_process.shutdownAbort(true);
|
2013-08-02 13:12:24 -07:00
|
|
|
m_errno = g_errno = EREPLYTOOSMALL; return; }
|
|
|
|
|
|
|
|
// cast it
|
|
|
|
m_r = (Msg20Reply *)rp;
|
2014-11-25 11:00:27 -07:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// we own it now
|
|
|
|
m_ownReply = true;
|
2016-02-28 10:44:45 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// deserialize it, sets g_errno on error??? not yet TODO!
|
|
|
|
m_r->deserialize();
|
|
|
|
}
|
|
|
|
|
2016-09-08 15:50:34 +02:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// . this is called
|
|
|
|
// . destroys the UdpSlot if false is returned
|
2016-09-08 15:20:46 +02:00
|
|
|
static void handleRequest20(UdpSlot *slot, int32_t netnice) {
|
2013-08-02 13:12:24 -07:00
|
|
|
// . check g_errno
|
|
|
|
// . before, we were not sending a reply back here and we continued
|
|
|
|
// to process the request, even though it was empty. the slot
|
|
|
|
// had a NULL m_readBuf because it could not alloc mem for the read
|
|
|
|
// buf i'm assuming. and the slot was saved in a line below here...
|
|
|
|
// state20->m_msg22.m_parent = slot;
|
|
|
|
if ( g_errno ) {
|
2016-07-18 13:34:13 +02:00
|
|
|
log(LOG_WARN, "net: Msg20 handler got error: %s.",mstrerror(g_errno));
|
2016-03-18 23:44:28 +01:00
|
|
|
log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
|
2013-08-02 13:12:24 -07:00
|
|
|
g_udpServer.sendErrorReply ( slot , g_errno );
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// ensure request is big enough
|
2014-11-10 14:45:11 -08:00
|
|
|
if ( slot->m_readBufSize < (int32_t)sizeof(Msg20Request) ) {
|
2016-07-18 13:34:13 +02:00
|
|
|
log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Bad request size", __FILE__, __func__, __LINE__);
|
2013-08-02 13:12:24 -07:00
|
|
|
g_udpServer.sendErrorReply ( slot , EBADREQUESTSIZE );
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// parse the request
|
|
|
|
Msg20Request *req = (Msg20Request *)slot->m_readBuf;
|
|
|
|
|
|
|
|
// . turn the string offsets into ptrs in the request
|
|
|
|
// . this is "destructive" on "request"
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t nb = req->deserialize();
|
2013-08-02 13:12:24 -07:00
|
|
|
// sanity check
|
2016-06-20 12:30:26 +02:00
|
|
|
if ( nb != slot->m_readBufSize ) { g_process.shutdownAbort(true); }
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// sanity check, the size include the \0
|
2014-03-06 10:45:13 -08:00
|
|
|
if ( req->m_collnum < 0 ) {
|
2017-05-10 17:54:00 +02:00
|
|
|
char ipbuf[16];
|
2016-07-18 13:34:13 +02:00
|
|
|
log(LOG_WARN, "query: Got empty collection in msg20 handler. FIX! "
|
2017-05-10 17:54:00 +02:00
|
|
|
"from ip=%s port=%i",iptoa(slot->getIp(),ipbuf),(int)slot->getPort());
|
2016-03-18 23:44:28 +01:00
|
|
|
|
|
|
|
log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
|
|
|
|
g_udpServer.sendErrorReply ( slot , ENOTFOUND );
|
2015-09-23 14:39:13 -07:00
|
|
|
return;
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
2016-02-28 10:44:45 +01:00
|
|
|
|
2016-03-03 16:52:21 +01:00
|
|
|
int64_t cache_key = req->makeCacheKey();
|
|
|
|
const void *cached_summary;
|
|
|
|
size_t cached_summary_len;
|
|
|
|
if(g_stable_summary_cache.lookup(cache_key, &cached_summary, &cached_summary_len) ||
|
|
|
|
g_unstable_summary_cache.lookup(cache_key, &cached_summary, &cached_summary_len))
|
|
|
|
{
|
2016-03-18 15:19:48 +01:00
|
|
|
log(LOG_DEBUG, "query: Summary cache hit");
|
2016-03-03 16:52:21 +01:00
|
|
|
sendCachedReply(req,cached_summary,cached_summary_len,slot);
|
|
|
|
return;
|
|
|
|
} else
|
2016-03-18 15:19:48 +01:00
|
|
|
log(LOG_DEBUG, "query: Summary cache miss");
|
2016-03-03 16:52:21 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// if it's not stored locally that's an error
|
2016-09-13 11:06:46 +02:00
|
|
|
if ( req->m_docId >= 0 && ! Titledb::isLocal ( req->m_docId ) ) {
|
2016-07-18 13:34:13 +02:00
|
|
|
log(LOG_WARN, "query: Got msg20 request for non-local docId %" PRId64, req->m_docId);
|
2016-03-18 23:44:28 +01:00
|
|
|
log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
|
|
|
|
g_udpServer.sendErrorReply ( slot , ENOTLOCAL );
|
2013-08-02 13:12:24 -07:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
// sanity
|
2016-06-20 12:30:26 +02:00
|
|
|
if ( req->m_docId == 0 && ! req->ptr_ubuf ) { //g_process.shutdownAbort(true); }
|
2016-07-18 13:34:13 +02:00
|
|
|
log( LOG_WARN, "query: Got msg20 request for docid of 0 and no url for "
|
2016-05-20 09:18:32 +02:00
|
|
|
"collnum=%" PRId32" query %s",(int32_t)req->m_collnum,req->ptr_qbuf);
|
2016-07-18 13:34:13 +02:00
|
|
|
|
2016-03-18 23:44:28 +01:00
|
|
|
log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
|
|
|
|
g_udpServer.sendErrorReply ( slot , ENOTFOUND );
|
2014-05-16 07:59:04 -07:00
|
|
|
return;
|
|
|
|
}
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t startTime = gettimeofdayInMilliseconds();
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// alloc a new state to get the titlerec
|
2016-09-08 15:50:34 +02:00
|
|
|
Msg20State *state;
|
|
|
|
try {
|
|
|
|
state = new Msg20State(slot,req);
|
2017-05-07 20:51:33 +02:00
|
|
|
} catch(std::bad_alloc&) {
|
2013-08-02 13:12:24 -07:00
|
|
|
g_errno = ENOMEM;
|
2016-05-20 09:18:32 +02:00
|
|
|
log("query: msg20 new(%" PRId32"): %s", (int32_t)sizeof(XmlDoc),
|
2013-08-02 13:12:24 -07:00
|
|
|
mstrerror(g_errno));
|
2016-07-18 13:34:13 +02:00
|
|
|
log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. error=%s", __FILE__, __func__, __LINE__, mstrerror( g_errno ));
|
2013-08-02 13:12:24 -07:00
|
|
|
g_udpServer.sendErrorReply ( slot, g_errno );
|
|
|
|
return;
|
|
|
|
}
|
2016-09-08 15:50:34 +02:00
|
|
|
mnew(state, sizeof(*state), "xd20");
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// ok, let's use the new XmlDoc.cpp class now!
|
2016-09-09 14:45:31 +02:00
|
|
|
state->m_xmldoc.setMsg20Request(req);
|
2016-02-28 10:44:45 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// set the callback
|
2016-09-08 15:50:34 +02:00
|
|
|
state->m_xmldoc.setCallback(state, gotReplyWrapperxd);
|
2016-02-28 10:44:45 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// set set time
|
2016-09-08 15:50:34 +02:00
|
|
|
state->m_xmldoc.m_setTime = startTime;
|
|
|
|
state->m_xmldoc.m_cpuSummaryStartTime = 0;
|
2016-02-28 10:44:45 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// . now as for the msg20 reply!
|
|
|
|
// . TODO: move the parse state cache into just a cache of the
|
|
|
|
// XmlDoc itself, and put that cache logic into XmlDoc.cpp so
|
|
|
|
// it can be used more generally.
|
2016-09-08 15:50:34 +02:00
|
|
|
Msg20Reply *reply = state->m_xmldoc.getMsg20Reply ( );
|
2016-02-28 10:44:45 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// this is just blocked
|
|
|
|
if ( reply == (void *)-1 ) return;
|
2016-02-28 10:44:45 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// got it?
|
2016-09-08 15:50:34 +02:00
|
|
|
gotReplyWrapperxd (state);
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
2016-09-08 15:50:34 +02:00
|
|
|
bool gotReplyWrapperxd(void *state_) {
|
|
|
|
Msg20State *state = static_cast<Msg20State*>(state_);
|
2013-08-02 13:12:24 -07:00
|
|
|
// print time
|
2014-10-30 13:36:39 -06:00
|
|
|
int64_t now = gettimeofdayInMilliseconds();
|
2016-09-08 15:50:34 +02:00
|
|
|
int64_t took = now - state->m_xmldoc.m_setTime;
|
2016-03-01 11:01:11 +01:00
|
|
|
int64_t took2 = 0;
|
2016-09-08 15:50:34 +02:00
|
|
|
if ( state->m_xmldoc.m_cpuSummaryStartTime) {
|
|
|
|
took2 = now - state->m_xmldoc.m_cpuSummaryStartTime;
|
2016-03-01 11:01:11 +01:00
|
|
|
}
|
2013-12-01 11:53:41 -07:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// if there is a baclkog of msg20 summary generation requests this
|
2014-11-17 18:24:38 -08:00
|
|
|
// is really not the cpu it took to make the smmary, but how long it
|
2013-08-02 13:12:24 -07:00
|
|
|
// took to get the reply. this request might have had to wait for the
|
|
|
|
// other summaries to finish computing before it got its turn,
|
|
|
|
// meanwhile its clock was ticking. TODO: make this better?
|
|
|
|
// only do for niceness 0 otherwise it gets interrupted by quickpoll
|
2014-11-10 14:45:11 -08:00
|
|
|
// and can take a int32_t time.
|
2016-09-08 15:50:34 +02:00
|
|
|
if ( state->m_req->m_niceness == 0 && (state->m_req->m_isDebug || took > 100 || took2 > 100 ) ) {
|
2016-05-20 09:18:32 +02:00
|
|
|
log(LOG_TIMING, "query: Took %" PRId64" ms (total=%" PRId64" ms) to compute summary for d=%" PRId64" "
|
2016-03-01 11:01:11 +01:00
|
|
|
"u=%s status=%s q=%s",
|
|
|
|
took2,
|
|
|
|
took,
|
2016-09-08 15:50:34 +02:00
|
|
|
state->m_xmldoc.m_docId, state->m_xmldoc.m_firstUrl.getUrl(),
|
2016-03-01 11:01:11 +01:00
|
|
|
mstrerror(g_errno),
|
2016-09-08 15:50:34 +02:00
|
|
|
state->m_req->ptr_qbuf);
|
2016-03-01 11:01:11 +01:00
|
|
|
}
|
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// error?
|
2016-09-08 15:50:34 +02:00
|
|
|
if ( g_errno ) {
|
|
|
|
state->m_xmldoc.m_reply.sendReply(state);
|
|
|
|
return true;
|
|
|
|
}
|
2013-08-02 13:12:24 -07:00
|
|
|
// this should not block now
|
2016-09-08 15:50:34 +02:00
|
|
|
Msg20Reply *reply = state->m_xmldoc.getMsg20Reply();
|
2013-08-02 13:12:24 -07:00
|
|
|
// sanity check, should not block here now
|
2016-06-20 12:30:26 +02:00
|
|
|
if ( reply == (void *)-1 ) { g_process.shutdownAbort(true); }
|
2013-08-02 13:12:24 -07:00
|
|
|
// NULL means error, -1 means blocked. on error g_errno should be set
|
2016-06-20 12:30:26 +02:00
|
|
|
if ( ! reply && ! g_errno ) { g_process.shutdownAbort(true);}
|
2013-08-02 13:12:24 -07:00
|
|
|
// send it off. will send an error reply if g_errno is set
|
2016-09-08 15:50:34 +02:00
|
|
|
return reply->sendReply(state);
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
Msg20Reply::Msg20Reply ( ) {
|
2016-09-26 13:12:22 +02:00
|
|
|
m_ip = 0;
|
|
|
|
m_firstIp = 0;
|
|
|
|
m_wordPosStart = 0;
|
|
|
|
m_docId = 0;
|
|
|
|
m_firstSpidered = 0;
|
|
|
|
m_lastSpidered = 0;
|
|
|
|
m_lastModified = 0;
|
|
|
|
m_datedbDate = 0;
|
|
|
|
m_firstIndexedDate = 0;
|
|
|
|
m_discoveryDate = 0;
|
|
|
|
m_errno = 0;
|
|
|
|
m_collnum = 0;
|
|
|
|
m_noArchive = 0;
|
|
|
|
m_contentType = 0;
|
|
|
|
m_siteRank = 0;
|
2016-10-19 23:23:33 +02:00
|
|
|
m_isBanned = false;
|
2016-09-26 13:12:22 +02:00
|
|
|
m_hopcount = 0;
|
|
|
|
m_recycled = 0;
|
|
|
|
m_language = langUnknown;
|
|
|
|
m_country = 0;
|
|
|
|
m_isAdult = false;
|
2017-05-01 16:50:17 +02:00
|
|
|
m_httpStatus = 0;
|
2016-09-26 13:12:22 +02:00
|
|
|
m_contentLen = 0;
|
|
|
|
m_contentHash32 = 0;
|
|
|
|
m_pageNumInlinks = 0;
|
|
|
|
m_pageNumGoodInlinks = 0;
|
|
|
|
m_pageNumUniqueIps = 0;
|
|
|
|
m_pageNumUniqueCBlocks = 0;
|
|
|
|
m_pageInlinksLastUpdated = 0;
|
|
|
|
m_siteNumInlinks = 0;
|
|
|
|
m_numOutlinks = 0;
|
|
|
|
m_linkTextNumWords = 0;
|
|
|
|
m_midDomHash = 0;
|
|
|
|
m_isLinkSpam = 0;
|
|
|
|
m_outlinkInContent = 0;
|
|
|
|
m_outlinkInComment = 0;
|
|
|
|
m_isPermalink = 0;
|
|
|
|
m_isDisplaySumSetFromTags = 0;
|
|
|
|
|
|
|
|
ptr_tbuf = NULL;
|
|
|
|
ptr_htag = NULL;
|
|
|
|
ptr_ubuf = NULL;
|
|
|
|
ptr_rubuf = NULL;
|
|
|
|
ptr_displaySum = NULL;
|
|
|
|
ptr_dbuf = NULL;
|
|
|
|
ptr_vbuf = NULL;
|
|
|
|
ptr_imgData = NULL;
|
|
|
|
ptr_site = NULL;
|
|
|
|
ptr_linkInfo = NULL;
|
|
|
|
ptr_outlinks = NULL;
|
|
|
|
ptr_vector1 = NULL;
|
|
|
|
ptr_vector2 = NULL;
|
|
|
|
ptr_vector3 = NULL;
|
|
|
|
ptr_linkText = NULL;
|
|
|
|
ptr_surroundingText = NULL;
|
|
|
|
ptr_linkUrl = NULL;
|
|
|
|
ptr_rssItem = NULL;
|
|
|
|
ptr_categories = NULL;
|
|
|
|
ptr_content = NULL;
|
|
|
|
ptr_templateVector = NULL;
|
|
|
|
ptr_metadataBuf = NULL;
|
|
|
|
ptr_note = NULL;
|
|
|
|
|
|
|
|
size_tbuf = 0;
|
|
|
|
size_htag = 0;
|
|
|
|
size_ubuf = 0;
|
|
|
|
size_rubuf = 0;
|
|
|
|
size_displaySum = 0;
|
|
|
|
size_dbuf = 0;
|
|
|
|
size_vbuf = 0;
|
|
|
|
size_imgData = 0;
|
|
|
|
size_site = 0;
|
|
|
|
size_linkInfo = 0;
|
|
|
|
size_outlinks = 0;
|
|
|
|
size_vector1 = 0;
|
|
|
|
size_vector2 = 0;
|
|
|
|
size_vector3 = 0;
|
|
|
|
size_linkText = 0;
|
|
|
|
size_surroundingText = 0;
|
|
|
|
size_linkUrl = 0;
|
|
|
|
size_rssItem = 0;
|
|
|
|
size_categories = 0;
|
|
|
|
size_content = 0; // page content in utf8
|
|
|
|
size_templateVector = 0;
|
|
|
|
size_metadataBuf = 0;
|
|
|
|
size_note = 0;
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// we need to free the ptr_summaryLines if it is pointing into a new buffer
|
|
|
|
// which is what Msg40 sometimes does to it when it merges Msg20Reply's
|
|
|
|
// summaries for events together.
|
|
|
|
Msg20Reply::~Msg20Reply ( ) {
|
|
|
|
destructor();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Msg20Reply::destructor ( ) {
|
|
|
|
}
|
|
|
|
|
2014-06-20 12:28:50 -07:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// . return ptr to the buffer we serialize into
|
|
|
|
// . return NULL and set g_errno on error
|
2016-09-08 15:50:34 +02:00
|
|
|
bool Msg20Reply::sendReply(Msg20State *state) {
|
2013-08-02 13:12:24 -07:00
|
|
|
if ( g_errno ) {
|
|
|
|
// extract titleRec ptr
|
2016-09-08 15:50:34 +02:00
|
|
|
log(LOG_ERROR, "query: Had error generating msg20 reply for d=%" PRId64": %s",state->m_xmldoc.m_docId, mstrerror(g_errno));
|
2013-08-02 13:12:24 -07:00
|
|
|
// don't forget to delete this list
|
|
|
|
haderror:
|
2016-09-08 16:14:13 +02:00
|
|
|
UdpSlot *slot = state->m_slot;
|
2016-09-08 15:50:34 +02:00
|
|
|
mdelete(state, sizeof(*state), "Msg20");
|
|
|
|
delete state;
|
2016-07-18 13:34:13 +02:00
|
|
|
log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. error=%s", __FILE__, __func__, __LINE__, mstrerror( g_errno ));
|
2016-09-08 16:14:13 +02:00
|
|
|
g_udpServer.sendErrorReply(slot, g_errno);
|
2013-08-02 13:12:24 -07:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// now create a buffer to store title/summary/url/docLen and send back
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t need = getStoredSize();
|
2013-08-02 13:12:24 -07:00
|
|
|
char *buf = (char *)mmalloc ( need , "Msg20Reply" );
|
|
|
|
if ( ! buf ) goto haderror;
|
|
|
|
|
|
|
|
// should never have an error!
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t used = serialize ( buf , need );
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2017-05-18 12:39:53 +02:00
|
|
|
// sanity
|
|
|
|
if (ptr_linkInfo && ((LinkInfo *)ptr_linkInfo)->m_lisize != size_linkInfo) {
|
2017-05-21 22:49:45 +02:00
|
|
|
log(LOG_ERROR,"!!! CORRUPTED LINKINFO detected for docId %" PRId64 " - resetting linkinfo", state->m_xmldoc.m_docId);
|
|
|
|
size_linkInfo = 0;
|
|
|
|
ptr_linkInfo = NULL;
|
|
|
|
// gbshutdownAbort(true);
|
2017-05-18 12:39:53 +02:00
|
|
|
}
|
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// sanity
|
2016-06-20 12:30:26 +02:00
|
|
|
if ( used != need ) { g_process.shutdownAbort(true); }
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// use blue for our color
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t color = 0x0000ff;
|
2016-03-03 12:33:41 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// but use dark blue for niceness > 0
|
2016-09-08 15:50:34 +02:00
|
|
|
if ( state->m_xmldoc.m_niceness > 0 ) color = 0x0000b0;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
// sanity check
|
2016-09-08 15:50:34 +02:00
|
|
|
if ( ! state->m_xmldoc.m_utf8ContentValid ) { g_process.shutdownAbort(true); }
|
2016-03-03 12:33:41 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// for records
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t clen = 0;
|
2016-03-03 12:33:41 +01:00
|
|
|
|
2016-09-08 15:50:34 +02:00
|
|
|
if ( state->m_xmldoc.m_utf8ContentValid ) clen = state->m_xmldoc.size_utf8Content - 1;
|
2016-03-03 12:33:41 +01:00
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// show it in performance graph
|
2016-09-08 15:50:34 +02:00
|
|
|
if ( state->m_xmldoc.m_startTimeValid ) {
|
|
|
|
g_stats.addStat_r( clen, state->m_xmldoc.m_startTime, gettimeofdayInMilliseconds(), color );
|
2016-06-29 11:28:58 +02:00
|
|
|
}
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2015-06-30 14:09:57 -06:00
|
|
|
|
2016-03-03 16:52:21 +01:00
|
|
|
//put the reply into the summary cache
|
2016-09-08 15:50:34 +02:00
|
|
|
if(m_isDisplaySumSetFromTags && !state->m_req->m_highlightQueryTerms)
|
|
|
|
g_stable_summary_cache.insert(state->m_req->makeCacheKey(), buf, need);
|
2016-03-03 16:52:21 +01:00
|
|
|
else
|
2016-09-08 15:50:34 +02:00
|
|
|
g_unstable_summary_cache.insert(state->m_req->makeCacheKey(), buf, need);
|
2016-03-03 16:52:21 +01:00
|
|
|
|
2016-09-08 15:50:34 +02:00
|
|
|
UdpSlot *slot = state->m_slot;
|
2013-08-02 13:12:24 -07:00
|
|
|
// . del the list at this point, we've copied all the data into reply
|
|
|
|
// . this will free a non-null State20::m_ps (ParseState) for us
|
2016-09-08 15:50:34 +02:00
|
|
|
mdelete(state, sizeof(*state), "Msg20");
|
|
|
|
delete state;
|
2013-08-02 13:12:24 -07:00
|
|
|
|
2016-08-11 17:22:00 +02:00
|
|
|
g_udpServer.sendReply(buf, need, buf, need, slot);
|
2013-08-02 13:12:24 -07:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-03-03 16:52:21 +01:00
|
|
|
|
|
|
|
static bool sendCachedReply ( Msg20Request *req, const void *cached_summary, size_t cached_summary_len, UdpSlot *slot )
|
|
|
|
{
|
|
|
|
//copy the cached summary to a new temporary buffer, so that UDPSlot/Server can free it when possible
|
|
|
|
char *buf = (char *)mmalloc ( cached_summary_len , "Msg20Reply" );
|
|
|
|
if(!buf) {
|
2016-07-18 13:34:13 +02:00
|
|
|
log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. error=%s", __FILE__, __func__, __LINE__, mstrerror( g_errno ));
|
2016-03-03 16:52:21 +01:00
|
|
|
g_udpServer.sendErrorReply ( slot , g_errno ) ;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
memcpy(buf,cached_summary,cached_summary_len);
|
|
|
|
|
2016-08-11 17:22:00 +02:00
|
|
|
g_udpServer.sendReply(buf, cached_summary_len, buf, cached_summary_len, slot);
|
2016-03-03 16:52:21 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// . this is destructive on the "buf". it converts offs to ptrs
|
|
|
|
// . sets m_r to the modified "buf" when done
|
|
|
|
// . sets g_errno and returns -1 on error, otherwise # of bytes deseril
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t Msg20::deserialize ( char *buf , int32_t bufSize ) {
|
|
|
|
if ( bufSize < (int32_t)sizeof(Msg20Reply) ) {
|
2013-08-02 13:12:24 -07:00
|
|
|
g_errno = ECORRUPTDATA; return -1; }
|
|
|
|
m_r = (Msg20Reply *)buf;
|
|
|
|
// do not free "buf"/"m_r"
|
|
|
|
m_ownReply = false;
|
|
|
|
return m_r->deserialize ( );
|
|
|
|
}
|
|
|
|
|
2016-09-08 14:17:18 +02:00
|
|
|
int32_t Msg20Request::getStoredSize() const {
|
2016-04-04 12:07:03 +02:00
|
|
|
return getMsgStoredSize(sizeof(*this), &size_qbuf, &size_displayMetas);
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// . return ptr to the buffer we serialize into
|
|
|
|
// . return NULL and set g_errno on error
|
2016-09-08 14:17:18 +02:00
|
|
|
char *Msg20Request::serialize(int32_t *retSize) const {
|
2013-08-02 13:12:24 -07:00
|
|
|
// make a buffer to serialize into
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t need = getStoredSize();
|
2013-08-02 13:12:24 -07:00
|
|
|
// alloc if we should
|
2015-12-04 11:09:31 +01:00
|
|
|
char *buf = (char *)mmalloc ( need , "Msg20Ra" );
|
2013-08-02 13:12:24 -07:00
|
|
|
// bail on error, g_errno should be set
|
|
|
|
if ( ! buf ) return NULL;
|
2016-04-04 12:07:03 +02:00
|
|
|
|
|
|
|
return serializeMsg(sizeof(*this),
|
|
|
|
&size_qbuf, &size_displayMetas,
|
|
|
|
&ptr_qbuf,
|
|
|
|
this,
|
|
|
|
retSize,
|
2016-09-08 14:13:42 +02:00
|
|
|
buf, need);
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// convert offsets back into ptrs
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t Msg20Request::deserialize ( ) {
|
2016-04-04 12:07:03 +02:00
|
|
|
return deserializeMsg(sizeof(*this),
|
|
|
|
&size_qbuf, &size_displayMetas,
|
|
|
|
&ptr_qbuf,
|
|
|
|
((char*)this) + sizeof(*this));
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
2016-03-03 16:52:21 +01:00
|
|
|
|
|
|
|
//make a cache key for a request
|
|
|
|
int64_t Msg20Request::makeCacheKey() const
|
|
|
|
{
|
|
|
|
SafeBuf hash_buffer;
|
|
|
|
hash_buffer.pushLong(m_numSummaryLines);
|
|
|
|
hash_buffer.pushLong(m_getHeaderTag);
|
|
|
|
hash_buffer.pushLongLong(m_docId);
|
|
|
|
hash_buffer.pushLong(m_titleMaxLen);
|
|
|
|
hash_buffer.pushLong(m_summaryMaxLen);
|
|
|
|
hash_buffer.pushLong(m_summaryMaxNumCharsPerLine);
|
|
|
|
hash_buffer.pushLong(m_collnum);
|
|
|
|
hash_buffer.pushLong(m_highlightQueryTerms);
|
|
|
|
hash_buffer.pushLong(m_getSummaryVector);
|
|
|
|
hash_buffer.pushLong(m_showBanned);
|
|
|
|
hash_buffer.pushLong(m_includeCachedCopy);
|
|
|
|
hash_buffer.pushLong(m_doLinkSpamCheck);
|
|
|
|
hash_buffer.pushLong(m_isLinkSpam);
|
|
|
|
hash_buffer.pushLong(m_isSiteLinkInfo);
|
|
|
|
hash_buffer.pushLong(m_getLinkInfo);
|
|
|
|
hash_buffer.pushLong(m_onlyNeedGoodInlinks);
|
|
|
|
hash_buffer.pushLong(m_getLinkText);
|
2016-08-05 12:14:42 +02:00
|
|
|
hash_buffer.safeMemcpy(ptr_qbuf,size_qbuf);
|
2016-03-03 16:52:21 +01:00
|
|
|
hash_buffer.safeMemcpy(ptr_ubuf,size_ubuf);
|
|
|
|
hash_buffer.safeMemcpy(ptr_linkee,size_linkee);
|
|
|
|
hash_buffer.safeMemcpy(ptr_displayMetas,size_displayMetas);
|
|
|
|
int64_t h = hash64(hash_buffer.getBufStart(), hash_buffer.length());
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-09-08 14:17:18 +02:00
|
|
|
int32_t Msg20Reply::getStoredSize() const {
|
2016-04-04 11:57:31 +02:00
|
|
|
return getMsgStoredSize(sizeof(*this), &size_tbuf, &size_note);
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// returns NULL and set g_errno on error
|
2016-09-08 14:17:18 +02:00
|
|
|
int32_t Msg20Reply::serialize(char *buf, int32_t bufSize) const {
|
2016-02-12 15:08:11 +01:00
|
|
|
#ifdef _VALGRIND_
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(this,sizeof(*this));
|
2016-02-22 17:25:22 +01:00
|
|
|
if(ptr_htag)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_htag,size_htag);
|
|
|
|
if(ptr_ubuf)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_ubuf,size_ubuf);
|
|
|
|
if(ptr_rubuf)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_rubuf,size_rubuf);
|
|
|
|
if(ptr_displaySum)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_displaySum,size_displaySum);
|
|
|
|
if(ptr_dbuf)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_dbuf,size_dbuf);
|
|
|
|
if(ptr_vbuf)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_vbuf,size_vbuf);
|
|
|
|
if(ptr_imgData)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_imgData,size_imgData);
|
|
|
|
if(ptr_site)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_site,size_site);
|
|
|
|
if(ptr_linkInfo)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_linkInfo,size_linkInfo);
|
|
|
|
if(ptr_outlinks)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_outlinks,size_outlinks);
|
|
|
|
if(ptr_vector1)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_vector1,size_vector1);
|
|
|
|
if(ptr_vector2)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_vector2,size_vector2);
|
|
|
|
if(ptr_vector3)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_vector3,size_vector3);
|
|
|
|
if(ptr_linkText)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_linkText,size_linkText);
|
|
|
|
if(ptr_surroundingText)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_surroundingText,size_surroundingText);
|
|
|
|
if(ptr_linkUrl)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_linkUrl,size_linkUrl);
|
|
|
|
if(ptr_rssItem)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_rssItem,size_rssItem);
|
|
|
|
if(ptr_categories)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_categories,size_categories);
|
|
|
|
if(ptr_content)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_content,size_content);
|
|
|
|
if(ptr_templateVector)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_templateVector,size_templateVector);
|
|
|
|
if(ptr_metadataBuf)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_metadataBuf,size_metadataBuf);
|
|
|
|
if(ptr_note)
|
|
|
|
VALGRIND_CHECK_MEM_IS_DEFINED(ptr_note,size_note);
|
|
|
|
#endif
|
2016-04-04 11:57:31 +02:00
|
|
|
int32_t retSize;
|
|
|
|
serializeMsg(sizeof(*this),
|
|
|
|
&size_tbuf, &size_note,
|
|
|
|
&ptr_tbuf,
|
|
|
|
this,
|
|
|
|
&retSize,
|
2016-09-08 14:13:42 +02:00
|
|
|
buf, bufSize);
|
2016-06-20 12:30:26 +02:00
|
|
|
if ( retSize > bufSize ) { g_process.shutdownAbort(true); }
|
2013-08-02 13:12:24 -07:00
|
|
|
// return it
|
2016-04-04 11:57:31 +02:00
|
|
|
return retSize;
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// convert offsets back into ptrs
|
2014-11-10 14:45:11 -08:00
|
|
|
int32_t Msg20Reply::deserialize ( ) {
|
2016-04-04 11:57:31 +02:00
|
|
|
int32_t bytesParsed = deserializeMsg(sizeof(*this),
|
|
|
|
&size_tbuf, &size_note,
|
|
|
|
&ptr_tbuf,
|
|
|
|
((char*)this) + sizeof(*this));
|
|
|
|
if(bytesParsed<0)
|
|
|
|
return bytesParsed;
|
|
|
|
|
2013-08-02 13:12:24 -07:00
|
|
|
// sanity
|
2017-05-18 12:42:23 +02:00
|
|
|
if (ptr_linkInfo && ((LinkInfo *)ptr_linkInfo)->m_lisize != size_linkInfo) {
|
2017-05-21 22:49:45 +02:00
|
|
|
log(LOG_ERROR,"!!! CORRUPTED LINKINFO detected for docId %" PRId64 " - resetting linkinfo", m_docId);
|
|
|
|
size_linkInfo = 0;
|
|
|
|
ptr_linkInfo = NULL;
|
|
|
|
// gbshutdownAbort(true);
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// return how many bytes we used
|
2016-04-04 11:57:31 +02:00
|
|
|
return bytesParsed;
|
2013-08-02 13:12:24 -07:00
|
|
|
}
|