Merge branch 'master' into nomerge2

2025-07-17 02:56:07 -04:00 · 2017-02-20 16:09:13 +01:00
parent 2fa935b0e3 4a29cca774
commit eab379f3b5
26 changed files with 103 additions and 215 deletions
--- a/.gitignore
+++ b/.gitignore
@ -44,3 +44,4 @@ CMakeLists.txt
 *.gcno
 coverage*.html
 vgcore.*
+Make.depend
--- a/HttpServer.cpp
+++ b/HttpServer.cpp
@ -1150,7 +1150,8 @@ bool HttpServer::sendReply2 ( const char *mime,
 	// if we are a proxy, and not a compression proxy, then just forward
 	// the blob as-is if it is a "ZET" (GET-compressed=ZET)
 	else if ( (myHostType & HT_PROXY) && (*rb == 'Z') ) {
-		gbmemcpy ( sendBuf , content, contentLen );
+		if(content)
+			gbmemcpy ( sendBuf , content, contentLen );
 		// sanity check
 		if ( sendBufSize != contentLen ) { g_process.shutdownAbort(true); }
 		// note it
@ -1161,7 +1162,8 @@ bool HttpServer::sendReply2 ( const char *mime,
 		gbmemcpy ( p , mime , mimeLen );
 		p += mimeLen;
 		// then the page
-		gbmemcpy ( p , content, contentLen );
+		if(content)
+			gbmemcpy ( p , content, contentLen );
 		p += contentLen;
 		// sanity check
 		if ( sendBufSize != contentLen+mimeLen) { g_process.shutdownAbort(true);}
--- a/Images.cpp
+++ b/Images.cpp
@ -352,8 +352,6 @@ bool Images::getThumbnail ( const char *pageSite,

 	// just use msg0 and limit to like 1k or something
 	if ( ! m_msg0.getList ( -1    , // hostid
-				-1    , // ip
-				-1    , // port
 				0     , // maxAge
 				false , // addToCache?
 				RDB_POSDB ,
@ -367,7 +365,6 @@ bool Images::getThumbnail ( const char *pageSite,
 				MAX_NICENESS       ,
 				false , // err correction?
 				true  , // inc tree?
-				true  , // domergeobsolete
 				-1    , // firstHostId
 				0     , // start filenum
 				-1    , // numFiles
@ -440,8 +437,6 @@ bool Images::launchRequests ( ) {

 		// get the termlist
 		if ( ! m_msg0.getList ( -1    , // hostid
-					-1    , // ip
-					-1    , // port
 					0     , // maxAge
 					false , // addToCache?
 					RDB_POSDB,
@ -455,7 +450,6 @@ bool Images::launchRequests ( ) {
 					MAX_NICENESS       ,
 					false , // err correction?
 					true  , // inc tree?
-					true  , // domergeobsolete
 					-1    , // firstHostId
 					0     , // start filenum
 					-1    , // numFiles
--- a/Matches.cpp
+++ b/Matches.cpp
@ -62,7 +62,7 @@ void Matches::reset2() {
 }

 bool Matches::isMatchableTerm(const QueryTerm *qt) const {
-	QueryWord *qw = qt->m_qword;
+	const QueryWord *qw = qt->m_qword;
 	// not derived from  a query word? how?
 	if ( ! qw ) return false;
 	if ( qw->m_ignoreWord == IGNORE_DEFAULT        ) return false;
@ -160,7 +160,7 @@ void Matches::setQuery ( Query *q ) {
 		}

 		// get the word it is from
-		QueryWord *qw = qt->m_qword;
+		const QueryWord *qw = qt->m_qword;

 		// get word #
 		int32_t qwn = qw - q->m_qwords;
--- a/Mem.cpp
+++ b/Mem.cpp
@ -35,7 +35,6 @@ static const char MAGICCHAR = (char)0xda;

 class Mem g_mem;

-static bool freeCacheMem();



@ -59,7 +58,7 @@ static bool   s_initialized = 0;

 //note: the ScopedMemoryLimitBypass is not thread-safe. The "bypass" flag should really
 //be per-thread. Or RdbBase should be reworked to use another technique than artificially
-//raising the memory limit while adding a file. Eg. make freeCacheMem() work again?
+//raising the memory limit while adding a file.
 ScopedMemoryLimitBypass::ScopedMemoryLimitBypass()
  : oldMaxMem(g_conf.m_maxMem)
 {
@ -125,8 +124,9 @@ void Mem::delnew ( void *ptr , size_t size , const char *note ) {
 void * operator new (size_t size) throw (std::bad_alloc) {
 	logTrace( g_conf.m_logTraceMem, "size=%zu", size );

-	// don't let electric fence zap us
-	if ( size == 0 ) return (void *)0x7fffffff;
+	//new operator is required to return a unique pointer even for zero-byte allocations
+	if(size==0)
+		size = 1;

 	if ( allocationShouldFailRandomly() ) {
 		g_errno = ENOMEM; 
@ -163,8 +163,9 @@ void * operator new (size_t size) throw (std::bad_alloc) {
 void * operator new [] (size_t size) throw (std::bad_alloc) {
 	logTrace( g_conf.m_logTraceMem, "size=%zu", size );

-	// don't let electric fence zap us
-	if ( size == 0 ) return (void *)0x7fffffff;
+	//new operator is required to return a unique pointer even for zero-byte allocations
+	if(size==0)
+		size = 1;
 	
 	size_t max = g_conf.m_maxMem;

@ -899,8 +900,9 @@ int Mem::printMem ( ) {
 void *Mem::gbmalloc ( size_t size , const char *note ) {
 	logTrace( g_conf.m_logTraceMem, "size=%zu note='%s'", size, note );

-	// don't let electric fence zap us
-	if ( size == 0 ) return (void *)0x7fffffff;
+	//malloc() can return a NULL pointer if the size is zero
+	if(size==0)
+		return NULL;
 	
 	if ( allocationShouldFailRandomly() ) {
 		g_errno = ENOMEM; 
@ -908,13 +910,10 @@ void *Mem::gbmalloc ( size_t size , const char *note ) {
 		return NULL;
 	} 

-retry:
 	size_t max = g_conf.m_maxMem;

 	// don't go over max
 	if ( g_mem.getUsedMem() + size + UNDERPAD + OVERPAD >= max ) {
-		// try to free temp mem. returns true if it freed some.
-		if ( freeCacheMem() ) goto retry;
 		g_errno = ENOMEM;
 		log( LOG_WARN, "mem: malloc(%zu): Out of memory", size );
 		return NULL;
@ -924,12 +923,8 @@ retry:

 	mem = (void *)sysmalloc ( size + UNDERPAD + OVERPAD );

-	int32_t memLoop = 0;
-mallocmemloop:
 	if ( ! mem && size > 0 ) {
 		g_mem.m_outOfMems++;
-		// try to free temp mem. returns true if it freed some.
-		if ( freeCacheMem() ) goto retry;
 		g_errno = errno;
 		static int64_t s_lastTime;
 		static int32_t s_missed = 0;
@ -952,24 +947,6 @@ mallocmemloop:

 		return NULL;
 	}
-	if ( (PTRTYPE)mem < 0x00010000 ) {
-		void *remem = sysmalloc(size);
-		log( LOG_WARN, "mem: Caught low memory allocation "
-		      "at %08" PTRFMT", "
-		      "reallocated to %08" PTRFMT"",
-		      (PTRTYPE)mem, (PTRTYPE)remem );
-		sysfree(mem);
-		mem = remem;
-		memLoop++;
-		if ( memLoop > 100 ) {
-			log( LOG_WARN, "mem: Attempted to reallocate low "
-					"memory allocation 100 times, "
-					"aborting and returning NOMEM." );
-			g_errno = ENOMEM;
-			return NULL;
-		}
-		goto mallocmemloop;
-	}

 	logTrace( g_conf.m_logTraceMem, "mem=%p size=%zu note='%s'", mem, size, note );

@ -991,27 +968,21 @@ void *Mem::gbcalloc ( size_t size , const char *note ) {
 void *Mem::gbrealloc ( void *ptr , size_t oldSize , size_t newSize , const char *note ) {
 	logTrace( g_conf.m_logTraceMem, "ptr=%p oldSize=%zu newSize=%zu note='%s'", ptr, oldSize, newSize, note );

-	// return dummy values since realloc() returns NULL if failed
-	if ( oldSize == 0 && newSize == 0 ) return (void *)0x7fffffff;
 	// do nothing if size is same
 	if ( oldSize == newSize ) return ptr;

 	// if newSize is 0...
 	if ( newSize == 0 ) {
 		gbfree(ptr, note, oldSize, true);
-		return (void *)0x7fffffff;
+		return NULL;
 	}

-retry:
-
 	// hack so hostid #0 can use more mem
 	size_t max = g_conf.m_maxMem;
 	//if ( g_hostdb.m_hostId == 0 )  max += 2000000000;

 	// don't go over max
 	if ( g_mem.getUsedMem() + newSize - oldSize >= max ) {
-		// try to free temp mem. returns true if it freed some.
-		if ( freeCacheMem() ) goto retry;
 		g_errno = ENOMEM;
 		log( LOG_WARN, "mem: realloc(%zu,%zu): Out of memory.",oldSize,newSize);
 		return NULL;
@ -1026,8 +997,6 @@ retry:
 	rmMem(ptr, oldSize, note, true);

 	// . do the actual realloc
-	// . CAUTION: don't pass in 0x7fffffff in as "ptr" 
-	// . this was causing problems
 	char *mem = (char *)sysrealloc ( (char *)ptr - UNDERPAD , newSize + UNDERPAD + OVERPAD );

 	// remove old guy on sucess
@ -1063,7 +1032,7 @@ retry:
 	return mem;
 }

-char *Mem::dup ( const void *data , size_t dataSize , const char *note ) {
+void *Mem::dup ( const void *data , size_t dataSize , const char *note ) {
 	logTrace( g_conf.m_logTraceMem, "data=%p dataSize=%zu note='%s'", data, dataSize, note );

 	// keep it simple
@ -1074,10 +1043,6 @@ char *Mem::dup ( const void *data , size_t dataSize , const char *note ) {
 	return mem;
 }

-char *Mem::strdup( const char *string, const char *note ) {
-	return dup(string, strlen(string) + 1, note);
-}
-
 void Mem::gbfree ( void *ptr , const char *note, size_t size , bool checksize ) {
 	if(!s_lock.working) return;

@ -1108,15 +1073,3 @@ void Mem::gbfree ( void *ptr , const char *note, size_t size , bool checksize )
 	if ( isnew ) sysfree ( (char *)ptr );
 	else         sysfree ( (char *)ptr - UNDERPAD );
 }
-
-
-//#include "Msg20.h"
-
-static bool freeCacheMem() {
-	// returns true if it did free some stuff
-	//if ( resetMsg20Cache() ) {
-	//	log("mem: freed cache mem.");
-	//	return true;
-	//}
-	return false;
-}
--- a/Mem.h
+++ b/Mem.h
@ -30,10 +30,7 @@ class Mem {
 	void *gbcalloc  ( size_t size , const char *note);
 	void *gbrealloc ( void *oldPtr, size_t oldSize, size_t newSize, const char *note);
 	void gbfree(void *ptr, const char *note, size_t size, bool checksize);
-	char *dup     ( const void *data , size_t dataSize , const char *note);
-	char *strdup  ( const char *string, const char *note );
-
-	int32_t validate();
+	void *dup     ( const void *data , size_t dataSize , const char *note);

 	// this one does not include new/delete mem, only *alloc()/free() mem
 	size_t getUsedMem() const;
@ -74,6 +71,8 @@ class Mem {
 	const char *m_maxAllocBy; // the biggest single alloc ever done

 private:
+	int32_t validate();
+
 	int32_t getMemSlot(void *mem);

 	// currently used mem (estimate)
@ -108,7 +107,7 @@ static inline void mfree(void *ptr, size_t size, const char *note) {
 	return g_mem.gbfree(ptr, note, size, true);
 }

-static inline char *mdup(const void *data, size_t dataSize, const char *note) {
+static inline void *mdup(const void *data, size_t dataSize, const char *note) {
 	return g_mem.dup(data, dataSize, note);
 }

--- a/Msg0.cpp
+++ b/Msg0.cpp
@ -99,8 +99,6 @@ bool Msg0::registerHandler ( ) {
 //   the list updates it on disk it can't flush our cache... so use a small
 //   maxCacheAge of like , 30 seconds or so...
 bool Msg0::getList ( int64_t hostId      , // host to ask (-1 if none)
-		     int32_t      ip          , // info on hostId
-		     int16_t     port        ,
 		     int32_t      maxCacheAge , // max cached age in seconds
 		     bool      addToCache  , // add net recv'd list to cache?
 		     rdbid_t   rdbId       , // specifies the rdb
@ -114,7 +112,6 @@ bool Msg0::getList ( int64_t hostId      , // host to ask (-1 if none)
 		     int32_t      niceness    ,
 		     bool      doErrorCorrection ,
 		     bool      includeTree ,
-		     bool      doMerge     ,
 		     int32_t      firstHostId   ,
 		     int32_t      startFileNum  ,
 		     int32_t      numFiles      ,
@ -228,32 +225,6 @@ bool Msg0::getList ( int64_t hostId      , // host to ask (-1 if none)
 	// it it stored locally?
 	bool isLocal = ( m_hostId == -1 && m_shardNum == getMyShardNum() );

-	/*
-	int64_t singleDocIdQuery = 0LL;
-	if ( rdbId == RDB_POSDB ) {
-		int64_t d1 = g_posdb.getDocId(m_startKey);
-		int64_t d2 = g_posdb.getDocId(m_endKey);
-		if ( d1+1 == d2 ) singleDocIdQuery = d1;
-	}
-
-	// . try the LOCAL termlist cache
-	// . so when msg2 is evaluating a gbdocid:| query and it has to
-	//   use msg0 to go across the network to get the same damn termlist
-	//   over and over again for the same docid, this will help alot.
-	// . ideally it'd be nice if the seo pipe in xmldoc.cpp can try to
-	//   send the same gbdocid:xxxx docids to the same hosts. maybe hash
-	//   based on docid into the list of hosts and if that host is busy
-	//   just chain until we find someone not busy.
-	if ( singleDocIdQuery &&
-	     getListFromTermListCache ( coll,
-					m_startKey,
-					m_endKey,
-					maxCacheAge,
-					list ) )
-		// found!
-		return true;
-	*/
-
 	// but always local if only one host
 	if ( g_hostdb.getNumHosts() == 1 ) isLocal = true;

@ -272,7 +243,7 @@ bool Msg0::getList ( int64_t hostId      , // host to ask (-1 if none)
 			try { m_msg5 = new ( Msg5 ); } 
 			catch ( ... ) {
 				g_errno = ENOMEM;
-				log("net: Local alloc for disk read failed "
+				log(LOG_WARN, "net: Local alloc for disk read failed "
 				    "while tring to read data for %s. "
 				    "Trying remote request.",
 				    getDbnameFromId(m_rdbId));
@ -282,7 +253,7 @@ bool Msg0::getList ( int64_t hostId      , // host to ask (-1 if none)
 			m_deleteMsg5 = true;
 		}

-		if ( ! m_msg5->getList ( (rdbid_t)rdbId,
+		if ( ! m_msg5->getList ( rdbId,
 					 m_collnum ,
 					 m_list ,
 					 m_startKey ,
@ -317,13 +288,10 @@ skip:
 		log(LOG_DEBUG,"net: msg0: Sending request for data to "
 		    "shard=%" PRIu32" "
 		    "listPtr=%" PTRFMT" minRecSizes=%" PRId32" termId=%" PRIu64" "
-		    //"startKey.n1=%" PRIx32",n0=%" PRIx64" (niceness=%" PRId32")",
 		    "startKey.n1=%" PRIx64",n0=%" PRIx64" (niceness=%" PRId32")",
-		    //g_hostdb.makeHostId ( m_groupId ) ,
 		    m_shardNum,
 		    (PTRTYPE)m_list,
 		    m_minRecSizes, Posdb::getTermId(m_startKey) ,
-		    //m_startKey.n1,m_startKey.n0 , (int32_t)m_niceness);
 		    KEY1(m_startKey,m_ks),KEY0(m_startKey),
 		    (int32_t)m_niceness);

@ -346,8 +314,6 @@ skip:
 	*p               = (char)m_allowPageCache; p++;
 	KEYSET(p,m_startKey,m_ks);          ; p+=m_ks;
 	KEYSET(p,m_endKey,m_ks);            ; p+=m_ks;
-	// NULL terminated collection name
-	//strcpy ( p , coll ); p += strlen ( coll ); *p++ = '\0';
 	*(collnum_t *)p = m_collnum; p += sizeof(collnum_t);
 	m_requestSize    = p - m_request;
 	// ask an individual host for this list if hostId is NOT -1
@ -392,7 +358,6 @@ skip:
 	// . need to send out to all the indexdb split hosts
 	m_numRequests = 0;
 	m_numReplies  = 0;
-	//for ( int32_t i = 0; i < m_numSplit; i++ ) {

 	// get the multicast
 	Multicast *m = &m_mcast;
--- a/Msg0.h
+++ b/Msg0.h
@ -43,8 +43,6 @@ class Msg0 {
 	//   out of sync with the data
 	// . a maxCacheAge of 0 (or negative) means not to check the cache
 	bool getList ( int64_t hostId      , // -1 if unspecified
-		       int32_t      ip          , // info on hostId
-		       int16_t     port        ,
 		       int32_t      maxCacheAge , // max cached age in seconds
 		       bool      addToCache  , // add net recv'd list to cache?
 		       rdbid_t   rdbId       , // specifies the rdb
@ -58,7 +56,6 @@ class Msg0 {
 		       int32_t      niceness    ,
 		       bool      doErrorCorrection = true ,
 		       bool      includeTree       = true ,
-		       bool      doMerge           = true ,
 		       int32_t      firstHostId       = -1   ,
 		       int32_t      startFileNum      =  0   ,
 		       int32_t      numFiles          = -1   ,
--- a/Msg13.cpp
+++ b/Msg13.cpp
@ -226,6 +226,13 @@ bool Msg13::forwardRequest ( ) {
 		if ( ++hostId >= nh ) hostId = 0;
 	}

+	if(!h) {
+		//all spider hosts dead (or misconfiguration)
+		if(!g_errno)
+			g_errno = ENOHOSTS;
+		log("spider: msg13 request: %s",mstrerror(g_errno));
+		return true;
+	}

 	hostId = 0; // HACK!!

--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -610,7 +610,7 @@ void Msg39::getLists(int fileNum, int64_t docIdStart, int64_t docIdEnd) {
 			//char *tpc = qt->m_term + qt->m_termLen;
 			char sign = qt->m_termSign;
 			if ( sign == 0 ) sign = '0';
-			QueryWord *qw = qt->m_qword;
+			const QueryWord *qw = qt->m_qword;
 			int32_t wikiPhrId = qw->m_wikiPhraseId;
 			if ( m_query.isPhrase(i) ) wikiPhrId = 0;
 			char leftwikibigram = 0;
--- a/Msg5.cpp
+++ b/Msg5.cpp
@ -1529,8 +1529,6 @@ bool Msg5::getRemoteList ( ) {
 	//   data
 	verify_signature();
 	if ( ! m_msg0->getList ( h->m_hostId          ,
-				 h->m_ip              ,
-				 h->m_port            ,
 				 0                    , // max cached age
 				 false                , // add to cache?
 				 m_rdbId              , // rdbId
@ -1544,7 +1542,6 @@ bool Msg5::getRemoteList ( ) {
 				 m_niceness           ,
 				 false                , // do error correction?
 				 true                 , // include tree?
-				 true                 , // do merge? (obsolete)
 				 -1                   , // first hostid
 				 0                    , // startFileNum
 				 -1                   , // numFiles (-1=all)
--- a/Msg51.cpp
+++ b/Msg51.cpp
@ -314,8 +314,6 @@ bool Msg51::sendRequest ( int32_t    i ) {
 	// . returns false and sets g_errno on error
 	// . otherwise, it blocks and returns true
 	bool s = m_slot[i].m_msg0.getList( -1            , // hostid
-				     -1            , // ip
-				     -1            , // port 
 				     m_maxCacheAge ,
 				     m_addToCache  ,
 				     RDB_CLUSTERDB ,
@ -329,7 +327,6 @@ bool Msg51::sendRequest ( int32_t    i ) {
 				     m_niceness    ,
 				     true        , // doErrorCorrection
 				     true        , // includeTree
-				     true        , // doMerge?
 				     firstHostId ,
 				     0           , // startFileNum
 				     -1          , // numFiles
--- a/Multicast.cpp
+++ b/Multicast.cpp
@ -58,7 +58,6 @@ Multicast::Multicast()
    m_lastLaunch(0),
    m_freeReadBuf(false),
    m_key(0),
-    m_sendToSelf(false),
    m_sentToTwin(false)
 {
 	constructor();
@ -102,16 +101,13 @@ void Multicast::reset ( ) {
 bool Multicast::send(char *msg, int32_t msgSize, msg_type_t msgType, bool ownMsg, uint32_t shardNum, bool sendToWholeGroup_,
                     int32_t key, void *state, void *state2, void (*callback)(void *state, void *state2),
                     int64_t totalTimeout, int32_t niceness, int32_t firstHostId, bool freeReplyBuf) {
-	bool sendToSelf = true;
-
 	// make sure not being re-used!
 	if ( m_inUse ) {
 		log( LOG_ERROR, "net: Attempt to re-use active multicast");
 		g_process.shutdownAbort(true);
 	}
-	// reset to free "m_msg" in case we are being re-used (like by Msg14)
-	//log(LOG_DEBUG, "Multicast: send() 0x%02x",msgType);
 	reset();
+
 	// it is now in use
 	m_inUse = true;
 	// set the parameters in this class
@ -121,7 +117,6 @@ bool Multicast::send(char *msg, int32_t msgSize, msg_type_t msgType, bool ownMsg
 	m_freeReadBuf      = freeReplyBuf;
 	m_msgSize          = msgSize;
 	m_msgType          = msgType;
-	//m_groupId          = groupId;
 	m_state            = state;
 	m_state2           = state2;
 	m_callback         = callback;
@ -135,7 +130,6 @@ bool Multicast::send(char *msg, int32_t msgSize, msg_type_t msgType, bool ownMsg
 	m_readBufSize      = 0;
 	m_readBufMaxSize   = 0;
 	m_registeredSleep  = false;
-	m_sendToSelf       = sendToSelf;
 	m_sentToTwin       = false;
 	m_key              = key;

@ -198,16 +192,7 @@ void Multicast::sendToWholeGroup() {
 		// if we got a nice reply from him skip him
 		//slots[i] && m_host[i].m_slot->doneReading() ) continue;
 		if ( m_host[i].m_retired ) continue;
-		// sometimes msg1.cpp is able to add the data to the tree
-		// without problems and will save us a network trans here
-		if ( ! m_sendToSelf && 
-		     h->m_hostId == g_hostdb.m_hostId &&
-		     ! g_conf.m_interfaceMachine ) {
-			m_host[i].m_retired = true;
-			m_host[i].m_errno = 0;
-			m_numReplies++;
-			continue;
-		}
+
 		// . timeout is in seconds
 		// . timeout is just the time remaining for the whole groupcast
 		// int32_t timeout = m_startTime + m_totalTimeout - getTime();
@ -580,7 +565,6 @@ bool Multicast::sendToHost ( int32_t i ) {
 	int16_t destPort = h->m_port;

 	// if from hosts2.conf pick the best ip!
-	//int32_t  bestIp   = h->m_ip;
 	int32_t bestIp = g_hostdb.getBestHosts2IP ( h );

 	// sanity check
--- a/Multicast.h
+++ b/Multicast.h
@ -194,9 +194,6 @@ private:

 	int32_t        m_key;

-	// Msg1 might be able to add data to our tree to save a net trans.
-	bool        m_sendToSelf;
-
 	bool        m_sentToTwin;

 	void destroySlotsInProgress ( UdpSlot *slot );
--- a/PageResults.cpp
+++ b/PageResults.cpp
@ -651,7 +651,7 @@ static bool printIgnoredWords ( SafeBuf *sb , const SearchInput *si ) {
 	bool firstIgnored = true;
 	for ( int32_t i = 0 ; i < qq2->m_numWords ; i++ ) {
 		//if ( si->m_xml ) break;
-		QueryWord *qw = &qq2->m_qwords[i];
+		const QueryWord *qw = &qq2->m_qwords[i];
 		// only print out words ignored cuz they were stop words
 		if ( qw->m_ignoreWord != IGNORE_QSTOP ) continue;
 		// print header -- we got one
@ -1096,7 +1096,7 @@ bool printSearchResultsHeader ( State0 *st ) {
 				       ,qt->m_termId);
 			sb->safePrintf("\t\t\t<termHash64>%" PRIu64"</termHash64>\n"
 				       ,qt->m_rawTermId);
-			QueryWord *qw = qt->m_qword;
+			const QueryWord *qw = qt->m_qword;
 			sb->safePrintf("\t\t\t<prefixHash64>%" PRIu64"</prefixHash64>\n"
 				       ,qw->m_prefixHash);
 			sb->safePrintf("\t\t</term>\n");
@ -1174,7 +1174,7 @@ bool printSearchResultsHeader ( State0 *st ) {
 				       ,qt->m_rawTermId);

 			// don't end last query term attr on a omma
-			QueryWord *qw = qt->m_qword;
+			const QueryWord *qw = qt->m_qword;
 			sb->safePrintf("\t\t\"prefixHash64\":%" PRIu64"\n"
 				       ,qw->m_prefixHash);

--- a/Parms.cpp
+++ b/Parms.cpp
@ -4881,7 +4881,7 @@ void Parms::init ( ) {
 		"an error message if a shard was down and did not return "
 		"results for a query. The XML and JSON feed let's you know "
 		"when a shard is down and will give you the results back "
-		"any way, but if you would rather have just and error message "
+		"any way, but if you would rather have just an error message "
 		"and no results, then set then set this to 'NO'.";
 	m->m_cgi   = "rra";
 	simple_m_set(Conf,m_returnResultsAnyway);
--- a/PosdbTable.cpp
+++ b/PosdbTable.cpp
@ -1521,8 +1521,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
 	// point to those
 	QueryTermInfo *qtibuf = (QueryTermInfo *)m_qiBuf.getBufStart();

-	RdbList *list = NULL;
-
 	int32_t nrg = 0;

 	// assume not sorting by a numeric termlist
@ -1553,7 +1551,7 @@ bool PosdbTable::setQueryTermInfo ( ) {
 		}
 		
 		// set this stff
-		QueryWord     *qw =   qt->m_qword;
+		const QueryWord     *qw =   qt->m_qword;
 		//int32_t wordNum = qw - &m_q->m_qwords[0];
 		// get one
 		QueryTermInfo *qti = &qtibuf[nrg];
@ -1630,7 +1628,7 @@ bool PosdbTable::setQueryTermInfo ( ) {
 			leftAlreadyAdded = true;
 			// get list
 			//list = m_msg2->getList(left);
-			list = m_q->m_qterms[left].m_posdbListPtr;
+			RdbList *list = m_q->m_qterms[left].m_posdbListPtr;
 			// add list ptr into our required group
 			qti->m_subLists[nn] = list;
 			// left bigram is #2
@ -1680,7 +1678,7 @@ bool PosdbTable::setQueryTermInfo ( ) {
 			rightAlreadyAdded = true;
 			// get list
 			//list = m_msg2->getList(right);
-			list = m_q->m_qterms[right].m_posdbListPtr;
+			RdbList *list = m_q->m_qterms[right].m_posdbListPtr;
 			// add list ptr into our required group
 			qti->m_subLists[nn] = list;
 			// right bigram is #3
@ -1730,7 +1728,7 @@ bool PosdbTable::setQueryTermInfo ( ) {
 		// add to it. add backwards since we give precedence to
 		// the first list and we want that to be the NEWEST list!
 		//list = m_msg2->getList(i);
-		list = m_q->m_qterms[i].m_posdbListPtr;
+		RdbList *list = m_q->m_qterms[i].m_posdbListPtr;
 		// add list ptr into our required group
 		qti->m_subLists[nn] = list;
 		// how many in there?
--- a/Query.cpp
+++ b/Query.cpp
@ -133,9 +133,8 @@ bool Query::set2 ( const char *query        ,

 	// truncate query if too big
 	if ( queryLen >= ABS_MAX_QUERY_LEN ) {
-		log("query: Query length of %" PRId32" must be "
-		    "less than %" PRId32". "
-		    "Truncating.",queryLen,(int32_t)ABS_MAX_QUERY_LEN);
+		log("query: Query length of %" PRId32" must be less than %" PRId32". Truncating.",
+		    queryLen,(int32_t)ABS_MAX_QUERY_LEN);
 		queryLen = ABS_MAX_QUERY_LEN - 1;
 		m_truncated = true;
 	}
@ -256,7 +255,7 @@ bool Query::set2 ( const char *query        ,

 	// disable stuff for site:, ip: and url: queries
 	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
-		QueryWord *qw = &m_qwords[i];
+		const QueryWord *qw = &m_qwords[i];
 		if ( qw->m_ignoreWord  ) continue;
 		if      ( qw->m_fieldCode == FIELD_SITE &&
 			  qw->m_wordSign != '-' ) 
@ -379,7 +378,7 @@ bool Query::setQTerms ( const Words &words ) {
 	// count phrases first for allocating
 	int32_t nqt = 0;
 	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
-		QueryWord *qw  = &m_qwords[i];
+		const QueryWord *qw  = &m_qwords[i];
 		// skip if ignored... mdw...
 		if ( ! qw->m_phraseId ) continue;
 		if (   qw->m_ignorePhrase ) continue; // could be a repeat
@ -390,7 +389,7 @@ bool Query::setQTerms ( const Words &words ) {
 	}
 	// count single terms
 	for ( int32_t i = 0 ; i < m_numWords; i++ ) {
-		QueryWord *qw  = &m_qwords[i];
+		const QueryWord *qw  = &m_qwords[i];
 		if ( qw->m_ignoreWord && 
 		     qw->m_ignoreWord != IGNORE_QSTOP) continue;
 		// ignore if in quotes and part of phrase, watch out
@ -413,7 +412,7 @@ bool Query::setQTerms ( const Words &words ) {
 		int64_t to = hash64n("to");
 		for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
 			// get query word
-			QueryWord *qw  = &m_qwords[i];
+			const QueryWord *qw  = &m_qwords[i];
 			// skip if in quotes, we will not get synonyms for it
 			if ( qw->m_inQuotes ) continue;
 			// skip if has plus sign in front
@ -484,13 +483,13 @@ bool Query::setQTerms ( const Words &words ) {

 		// stop breach
 		if ( n >= ABS_MAX_QUERY_TERMS ) {
-			log("query: lost query phrase terms to max term "
-			    "limit of %" PRId32,(int32_t)ABS_MAX_QUERY_TERMS );
+			log("query: lost query phrase terms to max term limit of %" PRId32,
+			    (int32_t)ABS_MAX_QUERY_TERMS);
 			break;
 		}
 		if ( n >= m_maxQueryTerms ) {
-			log("query: lost query phrase terms to max term cr "
-			    "limit of %" PRId32,(int32_t)m_maxQueryTerms);
+			log("query: lost query phrase terms to max term cr limit of %" PRId32,
+			    (int32_t)m_maxQueryTerms);
 			break;
 		}

@ -579,13 +578,13 @@ bool Query::setQTerms ( const Words &words ) {

 		// stop breach
 		if ( n >= ABS_MAX_QUERY_TERMS ) {
-			log("query: lost query terms to max term "
-			    "limit of %" PRId32,(int32_t)ABS_MAX_QUERY_TERMS );
+			log("query: lost query terms to max term limit of %" PRId32,
+			    (int32_t)ABS_MAX_QUERY_TERMS);
 			break;
 		}
 		if ( n >= m_maxQueryTerms ) {
-			log("query: lost query terms to max term cr "
-			    "limit of %" PRId32,(int32_t)m_maxQueryTerms);
+			log("query: lost query terms to max term cr limit of %" PRId32,
+			    (int32_t)m_maxQueryTerms);
 			break;
 		}

@ -722,7 +721,7 @@ bool Query::setQTerms ( const Words &words ) {
 	// . set implicit bits, m_implicitBits
 	// . set m_inPhrase
 	for (int32_t i = 0; i < m_numWords ; i++ ){
-		QueryWord *qw = &m_qwords[i];
+		const QueryWord *qw = &m_qwords[i];
 		QueryTerm *qt = qw->m_queryWordTerm;
 		if (!qt) continue;
 		if ( qw->m_queryPhraseTerm )
@ -752,7 +751,7 @@ bool Query::setQTerms ( const Words &words ) {
 		//   was working.
 		for ( int32_t j = 0 ; j < m_numWords ; j++ ) {
 			// must be our same wordId (same word, different occ.)
-			QueryWord *qw2 = &m_qwords[j];
+			const QueryWord *qw2 = &m_qwords[j];
 			if ( qw2->m_wordId != qw->m_wordId ) continue;
 			// get first word in the phrase that jth word is in
 			int32_t pn2 = qw2->m_leftPhraseStart;
@ -827,18 +826,16 @@ bool Query::setQTerms ( const Words &words ) {
 			for ( int32_t j = 0 ; j < naids ; j++ ) {
 				// stop breach
 				if ( n >= ABS_MAX_QUERY_TERMS ) {
-					log("query: lost synonyms due to max term "
-					"limit of %" PRId32,
-					(int32_t)ABS_MAX_QUERY_TERMS );
+					log("query: lost synonyms due to max term limit of %" PRId32,
+					    (int32_t)ABS_MAX_QUERY_TERMS);
 					break;
 				}
 				// this happens for 'da da da'
 				if ( ! origTerm ) continue;
 				
 				if ( n >= m_maxQueryTerms ) {
-					log("query: lost synonyms due to max cr term "
-					"limit of %" PRId32,
-					(int32_t)m_maxQueryTerms);
+					log("query: lost synonyms due to max cr term limit of %" PRId32,
+					    (int32_t)m_maxQueryTerms);
 					break;
 				}
 				
@ -1014,8 +1011,7 @@ bool Query::setQTerms ( const Words &words ) {
 	m_forcedBits   = 0; // terms with + signs
 	m_synonymBits  = 0;
 	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) {
-		// QueryTerms are derived from QueryWords
-		QueryTerm *qt = &m_qterms[i];
+		const QueryTerm *qt = &m_qterms[i];
 		// don't require if negative
 		if ( qt->m_termSign == '-' ) {
 			m_negativeBits |= qt->m_explicitBit; // (1 << i );
@ -1040,8 +1036,7 @@ bool Query::setQTerms ( const Words &words ) {
 	// set m_matchRequiredBits which we use for Matches.cpp
 	m_matchRequiredBits = 0;
 	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) {
-		// QueryTerms are derived from QueryWords
-		QueryTerm *qt = &m_qterms[i];
+		const QueryTerm *qt = &m_qterms[i];
 		// don't require if negative
 		if ( qt->m_termSign == '-' ) continue;
 		// skip all phrase terms
@ -1070,7 +1065,6 @@ bool Query::setQTerms ( const Words &words ) {

 	m_numRequired = 0;
 	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) {
-		// QueryTerms are derived from QueryWords
 		QueryTerm *qt = &m_qterms[i];
 		// assume not required
 		qt->m_isRequired = false;
@ -1087,9 +1081,22 @@ bool Query::setQTerms ( const Words &words ) {
 	}


+	//workaround/hack for double-highfreqterm searchs, such as "of a" or "the the" or "the who"
+	if(m_numWords==3 &&
+	   m_qwords[0].m_ignoreWord==IGNORE_HIGHFREMTERM &&
+	   m_qwords[2].m_ignoreWord==IGNORE_HIGHFREMTERM &&
+	   m_numTerms==1 &&
+	   !m_qterms[0].m_isRequired)
+	{
+		log(LOG_DEBUG, "query: Looks like a highfreqterm-highfreqterm query type. Requiring one-and-only QueryTerm/bigram");
+		m_qterms[0].m_isRequired = true;
+		//todo: we should investigate if QueryTerm::m_isRequired actually has any effect. It is used
+		//in a single place in PosdbTable for not generating a QueryTermInfo, but it appears it works
+		//fine even with the QTI.
+	}
+	
 	// required quoted phrase terms
 	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) {
-		// QueryTerms are derived from QueryWords
 		QueryTerm *qt = &m_qterms[i];
 		// quoted phrase?
 		if ( ! qt->m_isPhrase ) continue;
@ -1111,21 +1118,20 @@ bool Query::setQTerms ( const Words &words ) {
 	// . for 'in the nick' , a wiki phrase, make "in the" required
 	//   and give a big bonus for "the nick" below.
 	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) {
-		// QueryTerms are derived from QueryWords
 		QueryTerm *qt = &m_qterms[i];
 		// don't require if negative
 		if ( qt->m_termSign == '-' ) continue;
 		// only check bigrams here
 		if ( ! qt->m_isPhrase ) continue;
 		// get the query word that starts this phrase
-		QueryWord *qw1 = qt->m_qword;
+		const QueryWord *qw1 = qt->m_qword;
 		// must be in a wikiphrase
 		if ( qw1->m_wikiPhraseId <= 0 ) continue;
 		// what query word # is that?
 		int32_t qwn = qw1 - m_qwords;
 		// get the next alnum word after that
 		// assume its the last word in our bigram phrase
-		QueryWord *qw2 = &m_qwords[qwn+2];
+		const QueryWord *qw2 = &m_qwords[qwn+2];
 		// must be in same wikiphrase
 		if ( qw2->m_wikiPhraseId != qw1->m_wikiPhraseId ) continue;
 		// must be two stop words
@ -1191,7 +1197,6 @@ bool Query::setQTerms ( const Words &words ) {
 	//   is a synonym term of the single word term "enough" and is treated
 	//   as such in the Posdb.cpp logic.
 	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) {
-		// QueryTerms are derived from QueryWords
 		QueryTerm *qt = &m_qterms[i];
 		// assume not!
 		qt->m_isWikiHalfStopBigram = 0;
@ -1200,14 +1205,14 @@ bool Query::setQTerms ( const Words &words ) {
 		// only check bigrams here
 		if ( ! qt->m_isPhrase ) continue;
 		// get the query word that starts this phrase
-		QueryWord *qw1 = qt->m_qword;
+		const QueryWord *qw1 = qt->m_qword;
 		// must be in a wikiphrase
 		if ( qw1->m_wikiPhraseId <= 0 ) continue;
 		// what query word # is that?
 		int32_t qwn = qw1 - m_qwords;
 		// get the next alnum word after that
 		// assume its the last word in our bigram phrase
-		QueryWord *qw2 = &m_qwords[qwn+2];
+		const QueryWord *qw2 = &m_qwords[qwn+2];
 		// must be in same wikiphrase
 		if ( qw2->m_wikiPhraseId != qw1->m_wikiPhraseId ) continue;
 		// if both query stop words, should have been handled above
@ -2201,7 +2206,7 @@ bool Query::setQWords ( char boolFlag ,
 	if ( !phrases.set( &words, &bits ) )
 		return false;

-	int64_t *wids = words.getWordIds();
+	const int64_t *wids = words.getWordIds();

 	// do phrases stuff
 	for ( int32_t i = 0 ; i < numWords ; i++ ) {
@ -2474,13 +2479,13 @@ bool Query::setQWords ( char boolFlag ,
 	// . how many non-negative, non-ignored words/phrases do we have?
 	count = 0;
 	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
-		QueryWord *qw = &m_qwords[i];
+		const QueryWord *qw = &m_qwords[i];
 		if ( qw->m_ignoreWord      ) continue;
 		if ( qw->m_wordSign == '-' ) continue;
 		count++;
 	}
 	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
-		QueryWord *qw = &m_qwords[i];
+		const QueryWord *qw = &m_qwords[i];
 		if ( qw->m_ignorePhrase      ) continue;
 		if ( qw->m_phraseSign == '-' ) continue;
 		if ( qw->m_phraseId == 0LL   ) continue;
@ -2592,7 +2597,7 @@ int32_t Query::getWordNum(int64_t wordId) const {
 	// skip if punct or whatever
 	if ( wordId == 0LL || wordId == -1LL ) return -1;
 	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
-		QueryWord *qw = &m_qwords[i];
+		const QueryWord *qw = &m_qwords[i];
 		// the non-raw word id includes a hash with "0", which
 		// signifies an empty field term
 		if ( qw->m_rawWordId == wordId ) return i;
@ -3343,7 +3348,7 @@ bool Expression::isTruth(const unsigned char *bitVec, int32_t vecSize) const {


 		// so operands are expressions as well
-		Expression *e = (Expression *)qw->m_expressionPtr;
+		const Expression *e = (const Expression *)qw->m_expressionPtr;
 		if ( e ) {
 			// save prev one. -1 means no prev.
 			prevResult = opResult;
@ -3375,7 +3380,7 @@ bool Expression::isTruth(const unsigned char *bitVec, int32_t vecSize) const {
 			// save old one
 			prevResult = opResult;
 			// convert word to term #
-			QueryTerm *qt = qw->m_queryWordTerm;
+			const QueryTerm *qt = qw->m_queryWordTerm;
 			// fix title:"notre dame" AND NOT irish
 			if ( ! qt ) qt = qw->m_queryPhraseTerm;
 			if ( ! qt ) continue;
--- a/Query.h
+++ b/Query.h
@ -265,7 +265,7 @@ class QueryTerm {
 	void constructor ( ) ;

 	// the query word we were derived from
-	QueryWord *m_qword;
+	const QueryWord *m_qword;

 	// . are we a phrase termid or single word termid from that QueryWord?
 	// . the QueryWord instance represents both, so we must choose
--- a/RdbCache.cpp
+++ b/RdbCache.cpp
@ -514,7 +514,7 @@ bool RdbCache::getRecord ( collnum_t collnum   ,
 	*rec = p;
 	// copy the data and set "list" with it iff "doCopy" is true
 	if ( doCopy && *recSize > 0 ) {
-		*rec = mdup ( p , *recSize , "RdbCache3" );
+		*rec = (char*)mdup ( p , *recSize , "RdbCache3" );
 		if ( ! *rec ) {
 			log(LOG_WARN, "db: Could not allocate space for cached record for %s of %" PRId32" bytes.",
 			    m_dbname,*recSize);
--- a/SiteGetter.cpp
+++ b/SiteGetter.cpp
@ -298,8 +298,6 @@ bool SiteGetter::getSiteList ( ) {

 		// get the list. returns false if blocked.
 		if (!m_msg0.getList( -1, // hostId
-		                 0, // ip
-		                 0, // port
 		                 0, // maxCacheAge
 		                 false, // addToCache
 		                 RDB_POSDB,
@ -314,7 +312,6 @@ bool SiteGetter::getSiteList ( ) {
 		                 // default parms follow
 				         true,  // doErrorCorrection?
 				         true,  // includeTree?
-				         true,  // doMerge?
 				         -1,  // firstHostId
 				         0,  // startFileNum
 				         -1,  // numFiles
--- a/Tagdb.cpp
+++ b/Tagdb.cpp
@ -1466,8 +1466,6 @@ bool Msg8a::launchGetRequests ( ) {
 			// . launch this request, even if to ourselves
 			// . TODO: just use msg0!!
 			bool status = m->getList ( firstHostId     , // hostId
-						   0          , // ip
-						   0          , // port
 						   0          , // maxCacheAge
 						   false      , // addToCache
 						   RDB_TAGDB  ,
@ -1481,7 +1479,6 @@ bool Msg8a::launchGetRequests ( ) {
 						   m_niceness          ,
 						   true                , // error correction?
 						   true                , // include tree?
-						   true                , // doMerge?
 						   firstHostId         , // firstHostId
 						   0                   , // startFileNum
 						   -1                  , // numFiles
--- a/UdpSlot.cpp
+++ b/UdpSlot.cpp
@ -1524,13 +1524,15 @@ bool UdpSlot::makeReadBuf ( int32_t msgSize , int32_t numDgrams ) {
 	// if msgSize is -1 then it is under 1 dgram, but assume the worst
 	if ( msgSize == -1 ) msgSize = m_maxDgramSize;

-	// . create a msg buf to hold msg, zero out everything...
-	// . label it "umsg" so we can grep the *.cpp files for it
-	m_readBuf = (char *) mmalloc ( msgSize, umsg_label[(uint8_t)m_msgType] );
-	if ( ! m_readBuf ) {
-		m_readBufSize = 0;
-		log(LOG_WARN, "udp: Failed to allocate %" PRId32" bytes to read request or reply on udp socket.", msgSize);
-		return false;
+	if(msgSize!=0) {
+		// . create a msg buf to hold msg, zero out everything...
+		// . label it "umsg" so we can grep the *.cpp files for it
+		m_readBuf = (char *) mmalloc ( msgSize, umsg_label[(uint8_t)m_msgType] );
+		if ( ! m_readBuf ) {
+			m_readBufSize = 0;
+			log(LOG_WARN, "udp: Failed to allocate %" PRId32" bytes to read request or reply on udp socket.", msgSize);
+			return false;
+		}
 	}
 	m_readBufMaxSize = msgSize;
 	// let the caller know we're good
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -4855,8 +4855,6 @@ RdbList *XmlDoc::getDupList ( ) {
 	m_dupListValid = true;
 	// this is a no-split lookup by default now
 	if ( ! m_msg0.getList ( -1    , // hostId
-				0     , // ip
-				0     , // port
 				0     , // maxCacheAge
 				false , // add to cache?
 				RDB_POSDB, // INDEXDB ,
@ -4870,7 +4868,6 @@ RdbList *XmlDoc::getDupList ( ) {
 				m_niceness    ,
 				true , // error correction?
 				true , // include tree?
-				true , // domerge?
 				-1 , // firsthosti
 				0 , // startfilenum
 				-1, // # files
--- a/tools/Makefile
+++ b/tools/Makefile
@ -26,7 +26,7 @@ CPPFLAGS += -std=c++11
 # exported in parent make
 CPPFLAGS += $(CONFIG_CPPFLAGS)

-LIBS += $(BASE_DIR)/libgb.a -lz -lpthread -lssl -lcrypto
+LIBS += $(BASE_DIR)/libgb.a -lz -lpthread -lssl -lcrypto -lpcre
 LIBS += -L$(BASE_DIR) -lcld2_full

 %: libgb.a $(BASE_DIR)/libcld2_full.so %.cpp
--- a/tools/dump_rdbbuckets.cpp
+++ b/tools/dump_rdbbuckets.cpp
@ -58,13 +58,12 @@ int main(int argc, char **argv) {

 	g_conf.init(NULL);

-	BigFile bigFile;
-	bigFile.set(dir, filename);
+	strcpy(g_hostdb.m_dir, dir);

 	RdbBuckets buckets;
 	if (starts_with(filename, "posdb")) {
 		buckets.set(Posdb::getFixedDataSize(), g_conf.m_posdbMaxTreeMem, "buckets-posdb", RDB_POSDB, "posdb", Posdb::getKeySize());
-		if (!buckets.fastLoad(&bigFile, "posdb")) {
+		if (!buckets.loadBuckets("posdb")) {
 			fprintf(stdout, "Unable to load bucket\n");
 			return 1;
 		}