Merge branch 'master' into nomerge2

Conflicts: Msg40.cpp Tagdb.cpp
2025-07-13 02:36:06 -04:00 · 2016-09-15 13:37:50 +02:00
parent 8876b62add 22463d3170
commit 3d53d01c40
51 changed files with 1679 additions and 1737 deletions
--- a/Clusterdb.h
+++ b/Clusterdb.h
@ -28,9 +28,6 @@
 #define GB_CLUSTERDB_H

 #include "Rdb.h"
-#include "Url.h"
-#include "Conf.h"
-#include "Titledb.h"

 // these are now just TitleRec keys
 #define CLUSTER_REC_SIZE (sizeof(key96_t))
@ -69,23 +66,19 @@ public:
 					   false, true ); }

 	// NOTE: THESE NOW USE THE REAL CLUSTERDB REC
-	// // docId occupies the most significant bytes of the key
+	// docId occupies the most significant bytes of the key
 	// now docId occupies the bits after the first 23
 	static int64_t getDocId ( const void *k ) {
-		//int64_t docId = (k.n0) >> (32+24);
-		//docId |= ( ((uint64_t)(k.n1)) << 8 );
 		int64_t docId = (((const key96_t *)k)->n0) >> 35;
 		docId |= ( ((uint64_t)(((const key96_t *)k)->n1)) << 29 );
 		return docId;
 	}

 	static uint32_t getSiteHash26 ( const char *r ) {
-		//return g_titledb.getSiteHash ( (key_t *)r ); }
 		return ((uint32_t)(((const key96_t*)r)->n0 >> 2) & 0x03FFFFFF);
 	}

 	static uint32_t hasAdultContent ( const char *r ) {
-		//return g_titledb.hasAdultContent ( *(key_t *)r ); }
 		return ((uint32_t)(((const key96_t*)r)->n0 >> 34) & 0x00000001);
 	}

--- a/DailyMerge.cpp
+++ b/DailyMerge.cpp
@ -268,7 +268,7 @@ void DailyMerge::dailyMergeLoop ( ) {
 		// ok, all trees are clear and dumped
 		m_mergeMode = 5;
 		// log it
-		log("daily: Merging indexdb and datedb files.");
+		log("daily: Merging indexdb files.");
 	}

 	// start the merge
--- a/DailyMerge.h
+++ b/DailyMerge.h
@ -1,6 +1,6 @@
 // Copyright Gigablast, Inc. Apr 2008

-// tight merge indexdb and datedb at the given time every day
+// tight merge indexdb at the given time every day

 #ifndef GB_DAILYMERGE_H
 #define GB_DAILYMERGE_H
--- a/HashTableX.cpp
+++ b/HashTableX.cpp
@ -81,7 +81,6 @@ void HashTableX::reset ( ) {
 	m_flags = NULL;
 	m_numSlots     = 0;
 	m_numSlotsUsed = 0;
-	m_addIffNotUnique = false;
 	m_maskKeyOffset = 0;
 	//m_useKeyMagic = false;
 	// we should free it in reset()
@ -621,3 +620,17 @@ int32_t HashTableX::getKeyChecksum32 () const {
 	}
 	return checksum;
 }
+
+// print as text into sb for debugging
+void HashTableX::print() {
+	for (int32_t i = 0; i < m_numSlots; i++) {
+		// skip empty bucket
+		if (!m_flags[i]) {
+			continue;
+		}
+
+		// get the key
+		char *kp = (char *)getKeyFromSlot(i);
+		logf(LOG_WARN, "key=%s", KEYSTR(kp, m_ks));
+	}
+}
--- a/HashTableX.h
+++ b/HashTableX.h
@ -298,6 +298,9 @@ class HashTableX {

 	bool setTableSize ( int32_t numSlots , char *buf , int32_t bufSize );

+	// for debugging
+	void print();
+
 	void disableWrites () { m_isWritable = false; }
 	void enableWrites  () { m_isWritable = true ; }
 	bool m_isWritable;
@ -318,18 +321,15 @@ class HashTableX {
 	int32_t     m_numSlotsUsed;
 	uint32_t m_mask;

-	char  m_doFree;
+	bool  m_doFree;
 	char *m_buf;
 	int32_t  m_bufSize;

-	char m_useKeyMagic;
+	bool m_useKeyMagic;

 	int32_t m_ks;
 	int32_t m_ds;
-	char m_allowDups;
-
-	// a flag used by XmlDoc.cpp
-	bool m_addIffNotUnique;
+	bool m_allowDups;

 	bool m_isSaving;
 	bool m_needsSave;
--- a/Hostdb.cpp
+++ b/Hostdb.cpp
@ -1571,7 +1571,7 @@ uint32_t Hostdb::getShardNum(rdbid_t rdbId, const void *k) {
 		return m_map [(*(uint16_t *)((char *)k + 26))>>3];	
 	}
 	else if ( rdbId == RDB_TITLEDB || rdbId == RDB2_TITLEDB2 ) {
-		uint64_t d = g_titledb.getDocId ( (key96_t *)k );
+		uint64_t d = Titledb::getDocId ( (key96_t *)k );
 		return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
 	}
 	else if ( rdbId == RDB_SPIDERDB || rdbId == RDB2_SPIDERDB2 ) {
--- a/Linkdb.cpp
+++ b/Linkdb.cpp
@ -536,9 +536,9 @@ bool getLinkInfo ( SafeBuf   *reqBuf              ,
 	//int32_t siteHash32 = hash32n ( req->ptr_site );
 	// access different parts of linkdb depending on the "mode"
 	if ( req->m_mode == MODE_SITELINKINFO )
-		startKey = g_linkdb.makeStartKey_uk ( req->m_siteHash32 );
+		startKey = Linkdb::makeStartKey_uk ( req->m_siteHash32 );
 	else
-		startKey = g_linkdb.makeStartKey_uk (req->m_siteHash32,
+		startKey = Linkdb::makeStartKey_uk (req->m_siteHash32,
 						     req->m_linkHash64 );
 	// what group has this linkdb list?
 	uint32_t shardNum = getShardNum ( RDB_LINKDB, &startKey );
@ -999,14 +999,14 @@ bool Msg25::doReadLoop ( ) {

 	// access different parts of linkdb depending on the "mode"
 	if ( m_mode == MODE_SITELINKINFO ) {
-		startKey = g_linkdb.makeStartKey_uk ( siteHash32 );
-		endKey   = g_linkdb.makeEndKey_uk   ( siteHash32 );
+		startKey = Linkdb::makeStartKey_uk ( siteHash32 );
+		endKey   = Linkdb::makeEndKey_uk   ( siteHash32 );
 		//log("linkdb: getlinkinfo: "
 		//    "site=%s sitehash32=%" PRIu32,site,siteHash32);
 	}
 	else {
-		startKey = g_linkdb.makeStartKey_uk (siteHash32,m_linkHash64 );
-		endKey   = g_linkdb.makeEndKey_uk   (siteHash32,m_linkHash64 );
+		startKey = Linkdb::makeStartKey_uk (siteHash32,m_linkHash64 );
+		endKey   = Linkdb::makeEndKey_uk   (siteHash32,m_linkHash64 );
 	}

 	// resume from where we left off?
@ -1329,13 +1329,13 @@ bool Msg25::sendRequests ( ) {
 			// get the current key if list has more left
 			key224_t key; m_list.getCurrentKey( &key );

-			itop       = g_linkdb.getLinkerIp24_uk     ( &key );
-			ip32       = g_linkdb.getLinkerIp_uk     ( &key );
-			isLinkSpam = g_linkdb.isLinkSpam_uk  ( &key );
-			docId      = g_linkdb.getLinkerDocId_uk    ( &key );
-			discovered = g_linkdb.getDiscoveryDate_uk(&key);
+			itop       = Linkdb::getLinkerIp24_uk     ( &key );
+			ip32       = Linkdb::getLinkerIp_uk     ( &key );
+			isLinkSpam = Linkdb::isLinkSpam_uk  ( &key );
+			docId      = Linkdb::getLinkerDocId_uk    ( &key );
+			discovered = Linkdb::getDiscoveryDate_uk(&key);
 			// is it expired?
-			lostDate = g_linkdb.getLostDate_uk(&key);
+			lostDate = Linkdb::getLostDate_uk(&key);
 			// update this
 			gbmemcpy ( &m_nextKey  , &key , LDBKS );

@ -1347,15 +1347,15 @@ bool Msg25::sendRequests ( ) {
 			// get the current key if list has more left
 			key224_t key; m_list.getCurrentKey( &key );

-			itop       = g_linkdb.getLinkerIp24_uk     ( &key );
-			ip32       = g_linkdb.getLinkerIp_uk     ( &key );
+			itop       = Linkdb::getLinkerIp24_uk     ( &key );
+			ip32       = Linkdb::getLinkerIp_uk     ( &key );

 			isLinkSpam = false;
-			docId      = g_linkdb.getLinkerDocId_uk    ( &key );
+			docId      = Linkdb::getLinkerDocId_uk    ( &key );

-			discovered = g_linkdb.getDiscoveryDate_uk(&key);
+			discovered = Linkdb::getDiscoveryDate_uk(&key);
 			// is it expired?
-			lostDate = g_linkdb.getLostDate_uk(&key);
+			lostDate = Linkdb::getLostDate_uk(&key);
 			// update this
 			gbmemcpy ( &m_nextKey  , &key , LDBKS );

--- a/Linkdb.h
+++ b/Linkdb.h
@ -159,16 +159,18 @@ bool getLinkInfo ( SafeBuf *reqBuf , // store msg25 request in here
 int32_t getSiteRank ( int32_t sni ) ;

 class Linkdb {
- public:
+public:
 	void reset();

-	bool init    ( );
-	bool init2 ( int32_t treeMem );
-	bool verify  ( char *coll );
-	bool addColl ( char *coll, bool doVerify = true );
+	bool init();
+	bool init2(int32_t treeMem);
+
+	bool verify(char *coll);
+
+	Rdb *getRdb() { return &m_rdb; }

 	// this makes a "url" key
-	key224_t makeKey_uk ( uint32_t  linkeeSiteHash32 ,
+	static key224_t makeKey_uk ( uint32_t  linkeeSiteHash32 ,
 			      uint64_t  linkeeUrlHash64  ,
 			      bool      isLinkSpam     ,
 			      unsigned char linkerSiteRank , // 0-15 i guess
@ -182,7 +184,7 @@ class Linkdb {
 			      bool      isDelete       );
 	

-	key224_t makeStartKey_uk ( uint32_t linkeeSiteHash32 ,
+	static key224_t makeStartKey_uk ( uint32_t linkeeSiteHash32 ,
 				   uint64_t linkeeUrlHash64  = 0LL ) {
 		return makeKey_uk ( linkeeSiteHash32,
 				    linkeeUrlHash64,
@ -198,7 +200,7 @@ class Linkdb {
 				    true); // is delete?
 	}

-	key224_t makeEndKey_uk ( uint32_t linkeeSiteHash32 ,
+	static key224_t makeEndKey_uk ( uint32_t linkeeSiteHash32 ,
 				 uint64_t linkeeUrlHash64  = 
 				 0xffffffffffffffffLL ) {
 		return makeKey_uk ( linkeeSiteHash32,
@ -219,10 +221,11 @@ class Linkdb {
 	// accessors for "url" keys in linkdb
 	//

-	uint32_t getLinkeeSiteHash32_uk ( key224_t *key ) {
-		return (key->n3) >> 32; }
+	static uint32_t getLinkeeSiteHash32_uk ( key224_t *key ) {
+		return (key->n3) >> 32;
+	}

-	uint64_t getLinkeeUrlHash64_uk ( key224_t *key ) {
+	static uint64_t getLinkeeUrlHash64_uk ( key224_t *key ) {
 		uint64_t h = key->n3;
 		h &= 0x00000000ffffffffLL;
 		h <<= 15;
@ -230,19 +233,19 @@ class Linkdb {
 		return h;
 	}

-	char isLinkSpam_uk (key224_t *key ) {
+	static char isLinkSpam_uk (key224_t *key ) {
 		if ((key->n2) & 0x1000000000000LL) return true; 
 		return false;
 	}

-	unsigned char getLinkerSiteRank_uk ( key224_t *k ) {
+	static unsigned char getLinkerSiteRank_uk ( key224_t *k ) {
 		unsigned char rank = (k->n2 >> 40) & 0xff;
 		// complement it back
 		rank = (unsigned char)~rank;//LDB_MAXSITERANK - rank;
 		return rank;
 	}
-	
-	int32_t getLinkerIp_uk ( key224_t *k ) {
+
+	static int32_t getLinkerIp_uk ( key224_t *k ) {
 		uint32_t ip ;
 		// the most significant part of the ip is the lower byte!!!
 		ip = (uint32_t)((k->n2>>8)&0x00ffffff);
@ -250,7 +253,7 @@ class Linkdb {
 		return ip;
 	}

-	void setIp32_uk ( void *k , uint32_t ip ) {
+	static void setIp32_uk ( void *k , uint32_t ip ) {
 		char *ips = (char *)&ip;
 		char *ks = (char *)k;
 		ks[16] = ips[3];
@ -261,11 +264,11 @@ class Linkdb {


 	// we are missing the lower byte, it will be zero
-	int32_t getLinkerIp24_uk ( key224_t *k ) {
+	static int32_t getLinkerIp24_uk ( key224_t *k ) {
 		return (int32_t)((k->n2>>8)&0x00ffffff); 
 	}

-	int64_t getLinkerDocId_uk( key224_t *k ) {
+	static int64_t getLinkerDocId_uk( key224_t *k ) {
 		uint64_t d = k->n2 & 0xff;
 		d <<= 30;
 		d |= k->n1 >>34;
@ -274,7 +277,7 @@ class Linkdb {

 	// . in days since jan 1, 2012 utc
 	// . timestamp of jan 1, 2012 utc is 1325376000
-	int32_t getDiscoveryDate_uk ( void *k ) {
+	static int32_t getDiscoveryDate_uk ( void *k ) {
 		uint32_t date = ((key224_t *)k)->n1 >> 18;
 		date &= 0x00003fff;
 		// if 0 return that
@ -289,7 +292,7 @@ class Linkdb {

 	// . in days since jan 1, 2012 utc
 	// . timestamp of jan 1, 2012 utc is 1325376000
-	void setDiscoveryDate_uk ( void *k , int32_t date ) {
+	static void setDiscoveryDate_uk ( void *k , int32_t date ) {
 		// subtract jan 1 2012
 		date -= LINKDBEPOCH;
 		// convert into days
@ -302,7 +305,7 @@ class Linkdb {
 		((key224_t *)k)->n1 |= ((uint64_t)date) << 18;
 	}

-	int32_t getLostDate_uk ( void *k ) {
+	static int32_t getLostDate_uk ( void *k ) {
 		uint32_t date = ((key224_t *)k)->n1 >> 2;
 		date &= 0x00003fff;
 		// if 0 return that
@ -317,7 +320,7 @@ class Linkdb {

 	// . in days since jan 1, 2012 utc
 	// . timestamp of jan 1, 2012 utc is 1325376000
-	void setLostDate_uk ( void *k , int32_t date ) {
+	static void setLostDate_uk ( void *k , int32_t date ) {
 		// subtract jan 1 2012
 		date -= LINKDBEPOCH;
 		// convert into days
@ -330,18 +333,15 @@ class Linkdb {
 		((key224_t *)k)->n1 |= ((uint64_t)date) << 2;
 	}

-	uint32_t getLinkerSiteHash32_uk( void *k ) {
+	static uint32_t getLinkerSiteHash32_uk( void *k ) {
 		uint32_t sh32 = ((key224_t *)k)->n1 & 0x00000003;
 		sh32 <<= 30;
 		sh32 |= ((key224_t *)k)->n0 >> 2;
 		return sh32;
 	}

-	Rdb           *getRdb()           { return &m_rdb; }
-
- private:
-	Rdb           m_rdb;
-
+private:
+	Rdb m_rdb;
 };

 extern class Linkdb g_linkdb;
--- a/Loop.cpp
+++ b/Loop.cpp
@ -208,6 +208,7 @@ bool Loop::registerSleepCallback ( int32_t tick, void *state, void (* callback)(
 		return false;
 	}

+	ScopedLock sl(m_slotMutex);
 	if ( tick < m_minTick ) {
 		m_minTick = tick;
 	}
--- a/Msg0.cpp
+++ b/Msg0.cpp
@ -775,7 +775,7 @@ void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
 			totalOrigLinks++;
 			// get rec
 			char *rec = list->getCurrentRec();
-			int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec );
+			int32_t ip32 = Linkdb::getLinkerIp_uk((key224_t *)rec );
 			// same as one before?
 			if ( ip32 == lastIp32 && 
 			     // are we the last rec? include that for
--- a/Msg20.cpp
+++ b/Msg20.cpp
@ -141,7 +141,7 @@ bool Msg20::getSummary ( Msg20Request *req ) {
 	if ( req->m_docId >= 0 ) 
 		shardNum = g_hostdb.getShardNumFromDocId(req->m_docId);
 	else {
-		int64_t pdocId = g_titledb.getProbableDocId(req->ptr_ubuf);
+		int64_t pdocId = Titledb::getProbableDocId(req->ptr_ubuf);
 		shardNum = getShardNumFromDocId(pdocId);
 	}

@ -193,7 +193,7 @@ bool Msg20::getSummary ( Msg20Request *req ) {
 	int64_t probDocId    = req->m_docId;
 	// i think reference pages just pass in a url to get the summary
 	if ( probDocId < 0 && req->size_ubuf ) 
-		probDocId = g_titledb.getProbableDocId ( req->ptr_ubuf );
+		probDocId = Titledb::getProbableDocId ( req->ptr_ubuf );
 	if ( probDocId < 0        ) {
 		log("query: Got bad docid/url combo.");
 		probDocId = 0;
@ -363,7 +363,7 @@ static void handleRequest20(UdpSlot *slot, int32_t netnice) {
 		log(LOG_DEBUG, "query: Summary cache miss");

 	// if it's not stored locally that's an error
-	if ( req->m_docId >= 0 && ! g_titledb.isLocal ( req->m_docId ) ) {
+	if ( req->m_docId >= 0 && ! Titledb::isLocal ( req->m_docId ) ) {
 		log(LOG_WARN, "query: Got msg20 request for non-local docId %" PRId64, req->m_docId);
 		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
 		g_udpServer.sendErrorReply ( slot , ENOTLOCAL ); 
--- a/Msg22.cpp
+++ b/Msg22.cpp
@ -134,7 +134,7 @@ bool Msg22::getTitleRec ( Msg22Request  *r              ,

 	// if no docid provided, use probable docid
 	if ( ! docId ) 
-		docId = g_titledb.getProbableDocId ( url );
+		docId = Titledb::getProbableDocId ( url );

 	// get groupId from docId
 	uint32_t shardNum = getShardNumFromDocId ( docId );
@ -359,8 +359,8 @@ void handleRequest22 ( UdpSlot *slot , int32_t netnice ) {
 	// so try the range
 	if ( r->m_getAvailDocIdOnly ) {
 	   int64_t pd = r->m_docId;
-	   int64_t d1 = g_titledb.getFirstProbableDocId ( pd );
-	   int64_t d2 = g_titledb.getLastProbableDocId  ( pd );
+	   int64_t d1 = Titledb::getFirstProbableDocId ( pd );
+	   int64_t d2 = Titledb::getLastProbableDocId  ( pd );
 	   // sanity - bad url with bad subdomain?
 	   if ( pd < d1 || pd > d2 ) { g_process.shutdownAbort(true); }
 	   // make sure we get a decent sample in titledb then in
@ -388,9 +388,9 @@ void handleRequest22 ( UdpSlot *slot , int32_t netnice ) {
 	       delete ( st );
 	       return;
 	   }
-	   int64_t pd = g_titledb.getProbableDocId (r->m_url,dom,dlen);
-	   int64_t d1 = g_titledb.getFirstProbableDocId ( pd );
-	   int64_t d2 = g_titledb.getLastProbableDocId  ( pd );
+	   int64_t pd = Titledb::getProbableDocId (r->m_url,dom,dlen);
+	   int64_t d1 = Titledb::getFirstProbableDocId ( pd );
+	   int64_t d2 = Titledb::getLastProbableDocId  ( pd );
 	   // sanity - bad url with bad subdomain?
 	   if ( pd < d1 || pd > d2 ) { g_process.shutdownAbort(true); }
 	   // store these
@ -406,8 +406,8 @@ void handleRequest22 ( UdpSlot *slot , int32_t netnice ) {
 	// since it would base it on startFileNum and numFiles
 	key96_t cacheKey ; cacheKey.n1 = 0; cacheKey.n0 = r->m_docId;
 	// make titledb keys
-	key96_t startKey = g_titledb.makeFirstKey ( st->m_docId1 );
-	key96_t endKey   = g_titledb.makeLastKey  ( st->m_docId2 );
+	key96_t startKey = Titledb::makeFirstKey ( st->m_docId1 );
+	key96_t endKey   = Titledb::makeLastKey  ( st->m_docId2 );

 	// . load the list of title recs from disk now
 	// . our file range should be solid
@ -468,7 +468,7 @@ void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) {
 	// set probable docid
 	int64_t pd = 0LL;
 	if ( r->m_url[0] ) {
-		pd = g_titledb.getProbableDocId(r->m_url);
+		pd = Titledb::getProbableDocId(r->m_url);
 		if ( pd != st->m_pd ) { 
 			log("db: crap probable docids do not match! u=%s",
 			    r->m_url);
@ -500,7 +500,7 @@ void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) {
 		if ( ( k->n0 & 0x01 ) == 0x00 ) continue;

 		// get docid of that titlerec
-		int64_t dd = g_titledb.getDocId(k);
+		int64_t dd = Titledb::getDocId(k);

 		if ( r->m_getAvailDocIdOnly ) {
 			// make sure our available docids are availble!
@ -511,7 +511,7 @@ void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) {
 		// if we had a url make sure uh48 matches
 		else if ( r->m_url[0] ) {
 			// get it
-			int64_t uh48 = g_titledb.getUrlHash48(k);
+			int64_t uh48 = Titledb::getUrlHash48(k);

 			// make sure our available docids are availble!
 			if ( dd == ad1 ) ad1++;
--- a/Msg3a.cpp
+++ b/Msg3a.cpp
@ -639,7 +639,7 @@ bool Msg3a::gotAllShardReplies ( ) {
 			     j                                        ,
 			     i                                        ,
 			     docIds [j] ,
-			     (int32_t)g_titledb.getDomHash8FromDocId(docIds[j]),
+			     (int32_t)Titledb::getDomHash8FromDocId(docIds[j]),
 			      scores[j] );
 		}
 	}
--- a/Msg4.cpp
+++ b/Msg4.cpp
@ -65,22 +65,15 @@ static Msg4 *s_msg4Tail = NULL;
 // . also, need to update spiderdb rec for the url in Msg14 using Msg4 too!
 // . need to add support for passing in array of lists for Msg14

-static bool       addMetaList ( const char *p , class UdpSlot *slot = NULL );
-static void       gotReplyWrapper4 ( void    *state   , void *state2   ) ;
-static void       storeLineWaiters ( ) ;
-static void       handleRequest4   ( UdpSlot *slot    , int32_t  niceness ) ;
-static void       sleepCallback4   ( int bogusfd      , void *state    ) ;
-static bool       sendBuffer       ( int32_t hostId , int32_t niceness ) ;
-static Multicast *getMulticast     ( ) ;
-static void       returnMulticast  ( Multicast *mcast ) ;
-
-static bool storeRec   ( collnum_t      collnum , 
-			 char           rdbId   ,
-			 uint32_t  gid     ,
-			 int32_t           hostId  ,
-			 const char          *rec     ,
-			 int32_t           recSize ,
-			 int32_t           niceness ) ;
+static bool addMetaList(const char *p, class UdpSlot *slot = NULL);
+static void gotReplyWrapper4(void *state, void *state2);
+static void handleRequest4(UdpSlot *slot, int32_t niceness);
+static void sleepCallback4(int bogusfd, void *state);
+static void flushLocal();
+static bool sendBuffer(int32_t hostId);
+static Multicast *getMulticast();
+static void returnMulticast(Multicast *mcast);
+static bool storeRec(collnum_t collnum, char rdbId, uint32_t gid, int32_t hostId, const char *rec, int32_t recSize);

 // all these parameters should be preset
 bool Msg4::registerHandler() {
@ -128,9 +121,6 @@ bool Msg4::registerHandler() {
 	return rc;
 }

-
-static void flushLocal ( ) ;
-
 // scan all host bufs and try to send on them
 void sleepCallback4 ( int bogusfd , void    *state ) {
 	// wait for clock to be in sync
@ -145,7 +135,7 @@ void flushLocal ( ) {
 	//storeLineWaiters();
 	// now try to send the buffers
 	for ( int32_t i = 0 ; i < s_numHostBufs ; i++ ) 
-		sendBuffer ( i , MAX_NICENESS );
+		sendBuffer ( i );
 	g_errno = 0;
 }

@ -183,12 +173,12 @@ bool hasAddsInQueue   ( ) {
 }

 bool Msg4::addMetaList ( SafeBuf *sb, collnum_t collnum, void *state, void (* callback)(void *state),
-                         int32_t niceness, char rdbId, int32_t shardOverride ) {
-	return addMetaList ( sb->getBufStart(), sb->length(), collnum, state, callback, niceness, rdbId, shardOverride );
+                         rdbid_t rdbId, int32_t shardOverride ) {
+	return addMetaList ( sb->getBufStart(), sb->length(), collnum, state, callback, rdbId, shardOverride );
 }

 bool Msg4::addMetaList ( const char *metaList, int32_t metaListSize, collnum_t collnum, void *state,
-                         void (* callback)(void *state), int32_t niceness, char rdbId,
+                         void (* callback)(void *state), rdbid_t rdbId,
                         // Rebalance.cpp needs to add negative keys to
                         // remove foreign records from where they no
                         // longer belong because of a new hosts.conf file.
@ -212,7 +202,6 @@ bool Msg4::addMetaList ( const char *metaList, int32_t metaListSize, collnum_t c
 	m_state        = state;
 	m_callback     = callback;
 	m_rdbId        = rdbId;
-	m_niceness     = niceness;
 	m_next         = NULL;
 	m_shardOverride = shardOverride;

@ -279,7 +268,7 @@ bool Msg4::addMetaList ( const char *metaList, int32_t metaListSize, collnum_t c
 	return false;
 }

-bool isInMsg4LinkedList ( Msg4 *msg4 ) {
+bool Msg4::isInLinkedList ( Msg4 *msg4 ) {
 	Msg4 *m = s_msg4Head;
 	for ( ; m ; m = m->m_next ) 
 		if ( m == msg4 ) return true;
@ -300,12 +289,10 @@ bool Msg4::addMetaList2 ( ) {
 	// store each record in the list into the send buffers
 	for ( ; p < pend ; ) {
 		// first is rdbId
-		char rdbId = m_rdbId;
-		if ( rdbId < 0 ) rdbId = *p++;
-		// mask off rdbId
-		rdbId &= 0x7f;
-
-		logTrace( g_conf.m_logTraceMsg4, "  rdbId: %02x", rdbId);
+		rdbid_t rdbId = m_rdbId;
+		if ( rdbId == RDB_NONE ) {
+			rdbId = (rdbid_t)(*p++ & 0x7f);
+		}

 		// get the key of the current record
 		const char *key = p;
@ -313,12 +300,8 @@ bool Msg4::addMetaList2 ( ) {
 		// get the key size. a table lookup in Rdb.cpp.
 		int32_t ks = getKeySizeFromRdbId ( rdbId );

-		logTrace( g_conf.m_logTraceMsg4, "  Key: %s", KEYSTR(key, ks) );
-		logTrace( g_conf.m_logTraceMsg4, "  Key size: %" PRId32, ks);
-
 		// negative key?
 		bool del = !( *key & 0x01 );
-		logTrace( g_conf.m_logTraceMsg4, "  Negative key: %s", del?"true":"false");

 		// skip key
 		p += ks;
@ -330,16 +313,12 @@ bool Msg4::addMetaList2 ( ) {
 		if ( m_shardOverride >= 0 ) {
 			shardNum = m_shardOverride;
 		}
-			
-		logTrace( g_conf.m_logTraceMsg4, "  shardNum: %" PRId32, shardNum);

 		// get the record, is -1 if variable. a table lookup.
 		// . negative keys have no data
 		// . this unfortunately is not true according to RdbList.cpp
 		int32_t dataSize = del ? 0 : getDataSizeFromRdbId ( rdbId );

-		logTrace( g_conf.m_logTraceMsg4, "  dataSize: %" PRId32, dataSize);
-
 		// if variable read that in
 		if ( dataSize == -1 ) {
 			// -1 means to read it in
@ -349,8 +328,6 @@ bool Msg4::addMetaList2 ( ) {

 			// skip dataSize
 			p += 4;
-
-			logTrace( g_conf.m_logTraceMsg4, "  dataSize: %" PRId32" (variable size read)", dataSize);
 		}

 		// skip over the data, if any
@ -358,18 +335,15 @@ bool Msg4::addMetaList2 ( ) {
 		
 		// breach us?
 		if ( p > pend ) { g_process.shutdownAbort(true); }
-			
-		// i fixed UdpServer.cpp to NOT call msg4 handlers when in
-		// a quickpoll, in case we receive a niceness 0 msg4 request
-		QUICKPOLL(m_niceness);
 		
 		// convert the gid to the hostid of the first host in this
 		// group. uses a quick hash table.
 		Host *hosts = g_hostdb.getShard ( shardNum );
 		int32_t hostId = hosts[0].m_hostId;
-		logTrace( g_conf.m_logTraceMsg4, "  hostId: %" PRId32, hostId);
-		
-		
+
+		logTrace(g_conf.m_logTraceMsg4, "  rdb=%s key=%s keySize=%" PRId32" isDel=%d dataSize=%" PRId32" shardNum=%" PRId32" hostId=%" PRId32,
+		         getDbnameFromId(rdbId), KEYSTR(key, ks), ks, del, shardNum, dataSize, hostId);
+
 		// . add that rec to this groupId, gid, includes the key
 		// . these are NOT allowed to be compressed (half bit set)
 		//   and this point
@ -377,7 +351,7 @@ bool Msg4::addMetaList2 ( ) {
 #ifdef _VALGRIND_
 	VALGRIND_CHECK_MEM_IS_DEFINED(key,p-key);
 #endif
-		if ( storeRec ( m_collnum, rdbId, shardNum, hostId, key, p - key, m_niceness )) {
+		if ( storeRec ( m_collnum, rdbId, shardNum, hostId, key, p - key )) {
 			// . point to next record
 			// . will point past records if no more left!
 			m_currentPtr = p;
@ -416,8 +390,7 @@ bool storeRec ( collnum_t      collnum ,
 		uint32_t  shardNum,
 		int32_t           hostId  ,
 		const char          *rec     ,
-		int32_t           recSize ,
-		int32_t           niceness ) {
+		int32_t           recSize ) {
 #ifdef _VALGRIND_
 	VALGRIND_CHECK_MEM_IS_DEFINED(&collnum,sizeof(collnum));
 	VALGRIND_CHECK_MEM_IS_DEFINED(&rdbId,sizeof(rdbId));
@ -491,7 +464,7 @@ bool storeRec ( collnum_t      collnum ,
 		//   will he be able to proceed. we will call his callback
 		//   as soon as we can copy... use this->m_msg1 to add the
 		//   list that was passed in...
-		if ( ! sendBuffer ( hostId , niceness ) ) return false;
+		if ( ! sendBuffer ( hostId ) ) return false;
 		// now the buffer should be empty, try again
 		goto retry;
 	}
@ -515,7 +488,7 @@ bool storeRec ( collnum_t      collnum ,
 // . returns false if we were UNable to get a multicast to launch the buffer, 
 //   true otherwise
 // . returns false and sets g_errno on error
-bool sendBuffer ( int32_t hostId , int32_t niceness ) {
+bool sendBuffer ( int32_t hostId ) {
 	//logf(LOG_DEBUG,"build: sending buf");
 	// how many bytes of the buffer are occupied or "in use"?
 	char *buf       = s_hostBufs    [hostId];
@ -673,10 +646,10 @@ void gotReplyWrapper4 ( void *state , void *state2 ) {

 	returnMulticast(mcast);

-	storeLineWaiters(); // try to launch more msg4 requests in waiting
+	Msg4::storeLineWaiters(); // try to launch more msg4 requests in waiting
 }

-void storeLineWaiters ( ) {
+void Msg4::storeLineWaiters ( ) {
 	// try to store all the msg4's lists that are waiting in line
 	for (;;) {
 		Msg4 *msg4 = s_msg4Head;
@ -731,17 +704,14 @@ void storeLineWaiters ( ) {
 void handleRequest4 ( UdpSlot *slot , int32_t netnice ) {
 	logTrace( g_conf.m_logTraceMsg4, "BEGIN" );

-	// easy var
-	UdpServer *us = &g_udpServer;
-
 	// if we just came up we need to make sure our hosts.conf is in
 	// sync with everyone else before accepting this! it might have
 	// been the case that the sender thinks our hosts.conf is the same
 	// since last time we were up, so it is up to us to check this
 	if ( g_pingServer.m_hostsConfInDisagreement ) {
 		g_errno = EBADHOSTSCONF;
-		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
-		us->sendErrorReply ( slot , g_errno );
+		logError("call sendErrorReply");
+		g_udpServer.sendErrorReply ( slot , g_errno );
 		
 		log(LOG_WARN,"%s:%s: END - hostsConfInDisagreement", __FILE__, __func__ );
 		return;
@ -753,8 +723,8 @@ void handleRequest4 ( UdpSlot *slot , int32_t netnice ) {
 		// . this is 0 if not received yet
 		if (!slot->m_host->m_pingInfo.m_hostsConfCRC) {
 			g_errno = EWAITINGTOSYNCHOSTSCONF;
-			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
-			us->sendErrorReply ( slot , g_errno );
+			logError("call sendErrorReply");
+			g_udpServer.sendErrorReply ( slot , g_errno );
 			
 			log(LOG_WARN,"%s:%s: END - EWAITINGTOSYNCHOSTCONF", __FILE__, __func__ );
 			return;
@ -763,8 +733,8 @@ void handleRequest4 ( UdpSlot *slot , int32_t netnice ) {
 		// compare our hosts.conf to sender's otherwise
 		if (slot->m_host->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC()) {
 			g_errno = EBADHOSTSCONF;
-			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
-			us->sendErrorReply ( slot , g_errno );
+			logError("call sendErrorReply");
+			g_udpServer.sendErrorReply ( slot , g_errno );
 			
 			log(LOG_WARN,"%s:%s: END - EBADHOSTSCONF", __FILE__, __func__ );
 			return;
@ -778,8 +748,8 @@ void handleRequest4 ( UdpSlot *slot , int32_t netnice ) {
 	// must at least have an rdbId
 	if (readBufSize < 7) {
 		g_errno = EREQUESTTOOSHORT;
-		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
-		us->sendErrorReply ( slot , g_errno );
+		logError("call sendErrorReply");
+		g_udpServer.sendErrorReply ( slot , g_errno );
 		
 		log(LOG_ERROR,"%s:%s: END - EREQUESTTOOSHORT", __FILE__, __func__ );
 		return;
@ -793,20 +763,15 @@ void handleRequest4 ( UdpSlot *slot , int32_t netnice ) {
 	if ( used != readBufSize ) {
 		// if we send back a g_errno then multicast retries forever
 		// so just absorb it!
-		log(LOG_ERROR,"%s:%s: msg4: got corrupted request from hostid %" PRId32" "
-		    "used [%" PRId32"] != readBufSize [%" PRId32"]",
-		    __FILE__, 
-		    __func__,
-		    slot->m_host->m_hostId,
-		    used,
-		    readBufSize);
+		logError("msg4: got corrupted request from hostid %" PRId32" used [%" PRId32"] != readBufSize [%" PRId32"]",
+		    slot->m_host->m_hostId, used, readBufSize);

 		loghex(LOG_ERROR, readBuf, (readBufSize < 160 ? readBufSize : 160), "readBuf (first max. 160 bytes)");
-		    
-		us->sendReply(NULL, 0, NULL, 0, slot);
-		//us->sendErrorReply(slot,ECORRUPTDATA);return;}
-		
-		log(LOG_ERROR,"%s:%s: END", __FILE__, __func__ );
+
+		g_udpServer.sendReply(NULL, 0, NULL, 0, slot);
+		//g_udpServer.sendErrorReply(slot,ECORRUPTDATA);return;}
+
+		logError("END");
 		return;
 	}

@ -821,8 +786,8 @@ void handleRequest4 ( UdpSlot *slot , int32_t netnice ) {
 		}
 		// tell send to try again shortly
 		g_errno = ETRYAGAIN;
-		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
-		us->sendErrorReply(slot,g_errno);
+		logError("call sendErrorReply");
+		g_udpServer.sendErrorReply(slot,g_errno);
 		
 		logTrace( g_conf.m_logTraceMsg4, "END - ETRYAGAIN. Waiting to sync with host #0" );
 		return; 
@ -830,15 +795,15 @@ void handleRequest4 ( UdpSlot *slot , int32_t netnice ) {

 	// this returns false with g_errno set on error
 	if (!addMetaList(readBuf, slot)) {
-		log(LOG_ERROR, "%s:%s:%d: call sendErrorReply. error='%s'", __FILE__, __func__, __LINE__, mstrerror(g_errno));
-		us->sendErrorReply(slot,g_errno);
+		logError("call sendErrorReply error='%s", mstrerror(g_errno));
+		g_udpServer.sendErrorReply(slot,g_errno);

 		logTrace(g_conf.m_logTraceMsg4, "END - addMetaList returned false. g_errno=%d", g_errno);
 		return;
 	}

 	// good to go
-	us->sendReply(NULL, 0, NULL, 0, slot);
+	g_udpServer.sendReply(NULL, 0, NULL, 0, slot);

 	logTrace(g_conf.m_logTraceMsg4, "END - OK");
 }
@ -934,23 +899,20 @@ bool addMetaList ( const char *p , UdpSlot *slot ) {
 		log(LOG_WARN, "seems like a stray /e/repair-addsinprogress.dat file "
 		    "rdbId=%" PRId32". waiting to be in repair mode."
 		    ,(int32_t)rdbId);
-		    //not in repair mode. dropping.",(int32_t)rdbId);
 		g_errno = ETRYAGAIN;
 		return false;
 	}
+
 	// set the list
-	list.set ( (char*)p                , //todo: dodgy cast. RdbList should be fixed
-		   recSize                 ,
-		   (char*)p                , //todo: dodgy cast. RdbList should be fixed
-		   recSize                 ,
-		   rdb->getFixedDataSize() ,
-		   false                   ,  // ownData?
-		   rdb->useHalfKeys()      ,
-		   rdb->getKeySize ()      ); 
+	// todo: dodgy cast to char*. RdbList should be fixed
+	list.set((char *)p, recSize, (char *)p, recSize, rdb->getFixedDataSize(), false, rdb->useHalfKeys(), rdb->getKeySize());
+
 	// advance over the rec data to point to next entry
 	p += recSize;
+
 	// keep track of stats
 	rdb->readRequestAdd ( recSize );
+
 	// this returns false and sets g_errno on error
 	bool status =rdb->addList(collnum, &list, MAX_NICENESS );

@ -966,16 +928,12 @@ bool addMetaList ( const char *p , UdpSlot *slot ) {
 	// no memory means to try again
 	if ( g_errno == ENOMEM ) g_errno = ETRYAGAIN;
 	// doing a full rebuid will add collections
-	if ( g_errno == ENOCOLLREC  &&
-	     g_repairMode > 0       )
-	     //g_repair.m_fullRebuild   )
+	if ( g_errno == ENOCOLLREC  && g_repairMode > 0       )
 		g_errno = ETRYAGAIN;
-	// ignore enocollrec errors since collection can be reset while
-	// spiders are on now.
-	//if ( g_errno == ENOCOLLREC )
-	//	g_errno = 0;
+
 	// are we done
 	if ( g_errno ) return false;
+
 	// success
 	return true;
 }
--- a/Msg4.h
+++ b/Msg4.h
@ -10,58 +10,56 @@ bool loadAddsInProgress ( const char *filenamePrefix );
 // used by Repair.cpp to make sure we are not adding any more data ("writing")
 bool hasAddsInQueue     ( ) ;

-bool isInMsg4LinkedList ( class Msg4 *msg4 ) ;
-
 #include "SafeBuf.h"
 #include "rdbid_t.h"

 class Msg4 {
+public:
+	Msg4()
+		: m_inUse(false) {
+	}

- public:
-	static bool registerHandler();
-	// meta list format =
-	// (rdbId | 0x08) then rdb record [if nosplit]
-	// (rdbId | 0x00) then rdb record [if split  ]
-	bool addMetaList( class SafeBuf *sb, collnum_t collnum, void *state,
-	                  void (* callback)(void *state), int32_t niceness, char rdbId = -1, int32_t shardOverride = -1 );
-	bool addMetaList( class SafeBuf *sb, collnum_t collnum, void *state,
-	                  void (* callback)(void *state), int32_t niceness, rdbid_t rdbId, int32_t shardOverride = -1 )
-	{ return addMetaList(sb,collnum,state,callback,niceness,(char)rdbId,shardOverride); }
+	// why wasn't this saved in addsinprogress.dat file?
+	~Msg4() {
+		if (m_inUse) {
+			log(LOG_ERROR, "BAD: MSG4 in use!!!!!! this=%p", this);
+		}
+	}
+
+	bool addMetaList(SafeBuf *sb, collnum_t collnum, void *state,
+	                 void (*callback)(void *state), rdbid_t rdbId = RDB_NONE, int32_t shardOverride = -1);

 	// this one is faster...
 	// returns false if blocked
-	bool addMetaList( const char *metaList, int32_t metaListSize, collnum_t collnum, void *state,
-	                  void (* callback)(void *state), int32_t niceness, char rdbId = -1, int32_t shardOverride = -1 );
-	bool addMetaList( const char *metaList, int32_t metaListSize, collnum_t collnum, void *state,
-	                  void (* callback)(void *state), int32_t niceness, rdbid_t rdbId, int32_t shardOverride = -1 )
-	{ return addMetaList(metaList,metaListSize,collnum,state,callback,niceness,(char)rdbId,shardOverride); }
+	bool addMetaList(const char *metaList, int32_t metaListSize, collnum_t collnum, void *state,
+	                 void (*callback)(void *state), rdbid_t rdbId = RDB_NONE, int32_t shardOverride = -1);

-	bool addMetaList2();
+	bool isInUse() const { return m_inUse; }

-	Msg4() { m_inUse = false; }
-	// why wasn't this saved in addsinprogress.dat file?
-	~Msg4() { if ( m_inUse ) log("BAD: MSG4 in use!!!!!!"); }
-
-	// private:
-
-	void         (*m_callback ) ( void *state );
-	void          *m_state;
+	static bool registerHandler();
+	static bool isInLinkedList(Msg4 *msg4);
+	static void storeLineWaiters();

 	SafeBuf m_tmpBuf;

-	char      m_rdbId;
-	char      m_inUse;
+private:
+	bool addMetaList2();
+
+	void (*m_callback )(void *state);
+	void *m_state;
+
+	rdbid_t m_rdbId;
+	bool m_inUse;
 	collnum_t m_collnum;
-	int32_t      m_niceness;

 	int32_t m_shardOverride;

-	const char *m_metaList     ;
-	int32_t  m_metaListSize ;
-	const char *m_currentPtr   ; // into m_metaList
+	const char *m_metaList;
+	int32_t m_metaListSize;
+	const char *m_currentPtr; // into m_metaList

 	// the linked list for waiting in line
-	class Msg4 *m_next;
+	Msg4 *m_next;
 };

 #endif // GB_MSG4_H
--- a/Msg40.cpp
+++ b/Msg40.cpp
@ -16,6 +16,7 @@
 #include "Process.h"
 #include "GbMutex.h"
 #include "ScopedLock.h"
+#include <new>


 // increasing this doesn't seem to improve performance any on a single
@ -62,7 +63,7 @@ void Msg40::resetBuf2 ( ) {
 		// cast it
 		Msg20 *m = (Msg20 *)p;
 		// free its stuff
-		m->destructor();
+		m->~Msg20();
 		// advance
 		p += sizeof(Msg20);
 	}
@ -629,7 +630,7 @@ bool Msg40::reallocMsg20Buf ( ) {
 			// point to the next Msg20
 			p += sizeof(Msg20);
 			// init it
-			tmp[i]->constructor();
+			new (tmp[i]) Msg20();
 			// count it
 			pcount++;
 			// skip it if it is a new docid, we do not have a Msg20
@ -740,7 +741,7 @@ bool Msg40::reallocMsg20Buf ( ) {
 		// point it to its memory
 		m_msg20[i] = (Msg20 *)p;
 		// call its constructor
-		m_msg20[i]->constructor();
+		new (m_msg20[i]) Msg20();
 		// point to the next Msg20
 		p += sizeof(Msg20);
 		// remember num to free in reset() function
--- a/Msg51.cpp
+++ b/Msg51.cpp
@ -12,7 +12,7 @@
 #include "RdbCache.h"
 #include "ScopedLock.h"
 #include "Sanity.h"
-
+#include "Titledb.h"

 // how many Msg0 requests can we launch at the same time?
 #define MSG51_MAX_REQUESTS 60
@ -511,7 +511,7 @@ bool setClusterLevels ( const key96_t   *clusterRecs,
 		// . get the site hash
 		// . these are only 32 bits!
 		if(fakeIt)
-			h = g_titledb.getDomHash8FromDocId(docIds[i]);
+			h = Titledb::getDomHash8FromDocId(docIds[i]);
 		else
 			h = g_clusterdb.getSiteHash26 ( crec );

--- a/Multicast.cpp
+++ b/Multicast.cpp
@ -1,15 +1,10 @@
-#include "gb-include.h"
-
-
-//		i guess both msg0 send requests failed with no route to host, 
-//and they got retired... why didnt they switch to eth1????
-
-
 #include "Multicast.h"
-#include "Rdb.h"       // RDB_TITLEDB
-#include "Msg20.h"
-#include "Profiler.h"
+#include "UdpServer.h"
+#include "Hostdb.h"
 #include "Stats.h"
+#include "Conf.h"
+#include "Loop.h"         // registerSleepCallback()
+#include "ScopedLock.h"
 #include "Process.h"

 // up to 10 twins in a group
@ -19,20 +14,66 @@
 //       to send we should send as much as we can and save the remaining
 //       slots to disk for sending later??

-static void sleepWrapper1       ( int bogusfd , void    *state ) ;
-static void sleepWrapper2       ( int bogusfd , void    *state ) ;
-static void gotReplyWrapperM1    ( void *state , UdpSlot *slot  ) ;
-static void gotReplyWrapperM2    ( void *state , UdpSlot *slot  ) ;

-void Multicast::constructor ( ) {
+
+void Multicast::constructor() {
 	m_msg      = NULL;
 	m_readBuf  = NULL;
 	m_inUse    = false;
 }
-void Multicast::destructor  ( ) { reset(); }

-Multicast::Multicast ( ) { constructor(); }
-Multicast::~Multicast ( ) { reset(); }
+void Multicast::destructor() {
+	reset();
+}
+
+Multicast::Multicast()
+  : m_msg(NULL),
+    m_msgSize(0),
+    m_msgType((msg_type_t)-1),
+    m_ownMsg(false),
+    m_slot(NULL),
+    m_inUse(false),
+    m_next(NULL),
+    m_replyingHost(NULL),
+    m_replyLaunchTime(0),
+    m_hackFileId(0),
+    m_hackFileOff(0),
+    m_importState(NULL),
+    m_mtx(),
+    m_state(NULL), m_state2(NULL),
+    m_callback(NULL),
+    m_totalTimeout(0),
+    m_startTime(0),
+    m_numReplies(0),
+    //m_hostPtrs
+    m_numHosts(0),
+    //m_retired
+    //m_slots
+    //m_errnos
+    //m_inProgress
+    //m_launchTime
+    m_readBuf(NULL),
+    m_readBufSize(0),
+    m_readBufMaxSize(0),
+    m_ownReadBuf(false),
+    m_registeredSleep(false),
+    m_niceness(0),
+    m_lastLaunch(0),
+    m_lastLaunchHost(NULL),
+    m_freeReadBuf(false),
+    m_key(0),
+    m_sendToSelf(false),
+    m_retryCount(0),
+    m_sentToTwin(false)
+{
+	constructor();
+
+}
+
+Multicast::~Multicast() {
+	reset();
+}
+

 // free the send/read (request/reply) bufs we pirated from a UdpSlot or
 // got from the caller
@ -103,10 +144,10 @@ bool Multicast::send(char *msg, int32_t msgSize, msg_type_t msgType, bool ownMsg
 	m_key              = key;

 	// clear m_retired, m_errnos, m_slots
-	memset ( m_retired    , 0 , sizeof(bool     ) * MAX_HOSTS_PER_GROUP );
-	memset ( m_errnos     , 0 , sizeof(int32_t     ) * MAX_HOSTS_PER_GROUP );
-	memset ( m_slots      , 0 , sizeof(UdpSlot *) * MAX_HOSTS_PER_GROUP );
-	memset ( m_inProgress , 0 , sizeof(char     ) * MAX_HOSTS_PER_GROUP );
+	memset(m_retired, 0, sizeof(m_retired));
+	memset(m_errnos, 0, sizeof(m_errnos));
+	memset(m_slots, 0, sizeof(m_slots));
+	memset(m_inProgress, 0, sizeof(m_inProgress));

 	// . get the list of hosts in this group
 	// . returns false if blocked, true otherwise
@ -160,6 +201,7 @@ bool Multicast::send(char *msg, int32_t msgSize, msg_type_t msgType, bool ownMsg
 // . TODO: deal with errors from g_udpServer::sendRequest() better
 // . returns false and sets g_errno on error
 void Multicast::sendToGroup() {
+	ScopedLock sl(m_mtx);
 	// see if anyone gets an error
 	bool hadError = false;
 	// . cast the msg to ALL hosts in the m_hosts group of hosts
@ -203,7 +245,7 @@ void Multicast::sendToGroup() {
 		// . send to a single host
 		// . this creates a transaction control slot, "udpSlot"
 		// . returns false and sets g_errno on error
-		if (us->sendRequest(m_msg, m_msgSize, m_msgType, bestIp, destPort, hid, &m_slots[i], this, gotReplyWrapperM2, m_totalTimeout, m_niceness)) {
+		if (us->sendRequest(m_msg, m_msgSize, m_msgType, bestIp, destPort, hid, &m_slots[i], this, gotReply2, m_totalTimeout, m_niceness)) {
 			continue;
 		}
 		// g_errno must have been set, remember it
@ -237,22 +279,23 @@ void Multicast::sendToGroup() {
 	}
 }

-void sleepWrapper2 ( int bogusfd , void *state ) {
-	Multicast *THIS = (Multicast *)state;
+void Multicast::sleepWrapper2(int bogusfd, void *state) {
+	Multicast *THIS = static_cast<Multicast*>(state);
 	// try another round of sending to see if hosts had errors or not
-	THIS->sendToGroup ( );
+	THIS->sendToGroup();
 }

-// C wrapper for the C++ callback
-void gotReplyWrapperM2 ( void *state , UdpSlot *slot ) {
-	Multicast *THIS = (Multicast *)state;
-        THIS->gotReply2 ( slot );
+
+void Multicast::gotReply2(void *state, UdpSlot *slot) {
+	Multicast *THIS = static_cast<Multicast*>(state);
+        THIS->gotReply2(slot);
 }

 // . otherwise, we were sending to a whole group so ALL HOSTS must produce a 
 //   successful reply
 // . we keep re-trying forever until they do
 void Multicast::gotReply2 ( UdpSlot *slot ) {
+	ScopedLock sl(m_mtx);
 	// don't ever let UdpServer free this send buf (it is m_msg)
 	slot->m_sendBufAlloc = NULL;
 	// save this for msg4 logic that calls injection callback
@ -290,6 +333,7 @@ void Multicast::gotReply2 ( UdpSlot *slot ) {
 		// allow us to be re-used now, callback might relaunch
 		m_inUse = false;
 		if ( m_callback ) {
+			sl.unlock();
 			m_callback ( m_state , m_state2 );
 		}
 		return;
@ -626,7 +670,8 @@ bool Multicast::sendToHost ( int32_t i ) {
 	// . this creates a transaction control slot, "udpSlot"
 	// . return false and sets g_errno on error
 	// . returns true on successful launch and calls callback on completion
-	if (!us->sendRequest(m_msg, m_msgSize, m_msgType, bestIp, destPort, hid, &m_slots[i], this, gotReplyWrapperM1, timeRemaining, m_niceness, NULL, -1, -1, maxResends)) {
+	ScopedLock sl(m_mtx);
+	if (!us->sendRequest(m_msg, m_msgSize, m_msgType, bestIp, destPort, hid, &m_slots[i], this, gotReply1, timeRemaining, m_niceness, NULL, -1, -1, maxResends)) {
 		log(LOG_WARN, "net: Had error sending msgtype 0x%02x to host #%" PRId32": %s. Not retrying.",
 		    m_msgType,h->m_hostId,mstrerror(g_errno));
 		// i've seen ENOUDPSLOTS available msg here along with oom
@ -635,7 +680,7 @@ bool Multicast::sendToHost ( int32_t i ) {
 		return false;
 	}
 	// mark it as outstanding
-	m_inProgress[i] = 1;
+	m_inProgress[i] = true;
 	// set our last launch date
 	m_lastLaunch = nowms ; // gettimeofdayInMilliseconds();
 	// save the host, too
@ -657,7 +702,7 @@ bool Multicast::sendToHost ( int32_t i ) {

 // this is called every 50 ms so we have the chance to launch our request
 // to a more responsive host
-void sleepWrapper1 ( int bogusfd , void    *state ) {
+void Multicast::sleepWrapper1 ( int bogusfd , void    *state ) {
 	Multicast *THIS = (Multicast *) state;
 	// . if our last launch was less than X seconds ago, wait another tick
 	// . we often send out 2+ requests and end up getting one reply before
@ -851,14 +896,16 @@ void sleepWrapper1 ( int bogusfd , void    *state ) {
 	//    THIS->m_msgType);
 }

-// C wrapper for the C++ callback
-void gotReplyWrapperM1 ( void *state , UdpSlot *slot ) {
-	Multicast *THIS = (Multicast *)state;
-    THIS->gotReply1 ( slot );
+
+void Multicast::gotReply1(void *state, UdpSlot *slot) {
+	Multicast *THIS = static_cast<Multicast*>(state);
+	THIS->gotReply1(slot);
 }

 // come here if we've got a reply from a host that's not part of a group send
 void Multicast::gotReply1 ( UdpSlot *slot ) {		
+	ScopedLock sl(m_mtx);
+
 	// don't ever let UdpServer free this send buf (it is m_msg)
 	slot->m_sendBufAlloc = NULL;

@ -887,7 +934,7 @@ void Multicast::gotReply1 ( UdpSlot *slot ) {
 	}

 	// mark it as no longer in progress
-	m_inProgress[i] = 0;
+	m_inProgress[i] = false;

 	Host *h = m_hostPtrs[i];

@ -900,6 +947,8 @@ void Multicast::gotReply1 ( UdpSlot *slot ) {
 		    (int32_t) m_msgType, (PTRTYPE) this, mstrerror(g_errno));
 	}

+	sl.unlock();
+
 	// on error try sending the request to another host
 	// return if we kicked another request off ok
 	if ( g_errno ) {
@ -1069,7 +1118,7 @@ void Multicast::destroySlotsInProgress ( UdpSlot *slot ) {
 		// destroy this slot that's in progress
 		g_udpServer.destroySlot ( m_slots[i] );
 		// do not re-destroy. consider no longer in progress.
-		m_inProgress[i] = 0;
+		m_inProgress[i] = false;
 	}
 }

--- a/Multicast.h
+++ b/Multicast.h
@ -17,9 +17,11 @@
 #ifndef GB_MULTICAST_H
 #define GB_MULTICAST_H

-#include "Hostdb.h"  // getGroup(), getTimes(), stampHost()
-#include "UdpServer.h"        // sendRequest()
-#include "Loop.h"         // registerSleepCallback()
+#include "MsgType.h"
+#include "GbMutex.h"
+#include <inttypes.h>
+#include <stddef.h>
+

 #define MAX_HOSTS_PER_GROUP 10

@ -31,6 +33,9 @@ static const int64_t multicast_msg3a_default_timeout       =      10000;
 static const int64_t multicast_msg3a_maximum_timeout       =      60000;
 static const int64_t multicast_msg1c_getip_default_timeout =      60000;

+class UdpSlot;
+class Host;
+

 class Multicast {

@ -103,31 +108,37 @@ class Multicast {

 	// private:

-	void destroySlotsInProgress ( UdpSlot *slot );
-
-	// keep these public so C wrapper can call them
-	bool sendToHostLoop(int32_t key, int32_t hostNumToTry, int32_t firstHostId);
-	bool sendToHost    ( int32_t i ); 
-	int32_t pickBestHost  ( uint32_t key , int32_t hostNumToTry );
-	void gotReply1     ( UdpSlot *slot ) ;
-	void closeUpShop   ( UdpSlot *slot ) ;
-
-	void sendToGroup();
-	void gotReply2     ( UdpSlot *slot ) ;
-
 	// . stuff set directly by send() parameters
 	char       *m_msg;
 	int32_t        m_msgSize;
 	msg_type_t     m_msgType;
 	bool        m_ownMsg;
-	//uint32_t m_groupId;
+
+	class UdpSlot *m_slot;
+
+	bool        m_inUse;
+
+	// for linked list of available Multicasts in Msg4.cpp
+	class Multicast *m_next;
+
+	// host we got reply from. used by Msg3a for timing.
+	Host      *m_replyingHost;
+	// when the request was launched to the m_replyingHost
+	int64_t  m_replyLaunchTime;
+
+	// more hack stuff used by PageInject.cpp
+	int32_t m_hackFileId;
+	int64_t m_hackFileOff;
+	class ImportState *m_importState;
+
+private:
+	GbMutex m_mtx;
+
 	void       *m_state;
 	void       *m_state2;
 	void       (* m_callback)( void *state , void *state2 );
 	int64_t       m_totalTimeout;   // in milliseconds

-	class UdpSlot *m_slot;
-
 	// . m_slots[] is our list of concurrent transactions
 	// . we delete all the slots only after cast is done
 	int64_t        m_startTime;   // milliseconds since the epoch
@ -150,7 +161,7 @@ class Multicast {
 	// did we have an errno with this slot?
 	int32_t        m_errnos     [MAX_HOSTS_PER_GROUP]; 
 	// transaction in progress?
-	char        m_inProgress [MAX_HOSTS_PER_GROUP]; 
+	bool        m_inProgress [MAX_HOSTS_PER_GROUP];
 	int64_t   m_launchTime [MAX_HOSTS_PER_GROUP];

 	// steal this from the slot(s) we get
@ -168,6 +179,7 @@ class Multicast {
 	// . last sending of the request to ONE host in a group (pick & send)
 	// . in milliseconds
 	int64_t   m_lastLaunch;
+
 	Host       *m_lastLaunchHost;

 	// only free m_reply if this is true
@ -180,22 +192,23 @@ class Multicast {

 	int32_t        m_retryCount;

-	char        m_sentToTwin;
+	bool        m_sentToTwin;

-	char        m_inUse;
+	void destroySlotsInProgress ( UdpSlot *slot );

-	// for linked list of available Multicasts in Msg4.cpp
-	class Multicast *m_next;
+	void sendToGroup();

-	// host we got reply from. used by Msg3a for timing.
-	Host      *m_replyingHost;
-	// when the request was launched to the m_replyingHost
-	int64_t  m_replyLaunchTime;
+	static void sleepWrapper1(int bogusfd, void *state);
+	static void sleepWrapper2(int bogusfd, void *state);
+	static void gotReply1(void *state, UdpSlot *slot);
+	void gotReply1(UdpSlot *slot);
+	static void gotReply2(void *state, UdpSlot *slot);
+	void gotReply2(UdpSlot *slot);

-	// more hack stuff used by PageInject.cpp
-	int32_t m_hackFileId;
-	int64_t m_hackFileOff;
-	class ImportState *m_importState;
+	bool sendToHostLoop(int32_t key, int32_t hostNumToTry, int32_t firstHostId);
+	bool sendToHost    ( int32_t i ); 
+	int32_t pickBestHost  ( uint32_t key , int32_t hostNumToTry );
+	void closeUpShop   ( UdpSlot *slot ) ;
 };

 #endif // GB_MULTICAST_H
--- a/PageAddUrl.cpp
+++ b/PageAddUrl.cpp
@ -95,7 +95,7 @@ bool sendPageAddUrl2 ( TcpSocket *sock , HttpRequest *hr ) {
 	}

 	// add to spiderdb
-	if ( ! gr->m_msg4.addMetaList( &(gr->m_listBuf), cr->m_collnum, gr, addedUrlsToSpiderdbWrapper, 0 ) ) {
+	if (!gr->m_msg4.addMetaList(&(gr->m_listBuf), cr->m_collnum, gr, addedUrlsToSpiderdbWrapper)) {
 		// blocked!
 		return false;
 	}
--- a/PageCrawlBot.cpp
+++ b/PageCrawlBot.cpp
@ -188,7 +188,7 @@ bool getSpiderRequestMetaList ( const char *doc, SafeBuf *listBuf, bool spiderLi
 		if ( url.getUrlLen() <= 0 ) continue;

 		// need this
-		int64_t probDocId = g_titledb.getProbableDocId(&url);
+		int64_t probDocId = Titledb::getProbableDocId(&url);

 		// make it
 		SpiderRequest sreq;
--- a/PageInject.cpp
+++ b/PageInject.cpp
@ -107,7 +107,7 @@ Host *getHostToHandleInjection ( char *url ) {
 	Url norm;
 	norm.set(url);

-	int64_t docId = g_titledb.getProbableDocId ( &norm );
+	int64_t docId = Titledb::getProbableDocId ( &norm );
 	uint32_t shardNum = getShardNumFromDocId(docId);
 	Host *host = g_hostdb.getHostWithSpideringEnabled(shardNum);

@ -1057,7 +1057,7 @@ bool ImportState::importLoop ( ) {
 	mcast->m_hackFileId  = m_bfFileId;

 	// get docid from key
-	docId = g_titledb.getDocIdFromKey ( &tkey );
+	docId = Titledb::getDocIdFromKey ( &tkey );

 	// get shard that holds the titlerec for it
 	shardNum = g_hostdb.getShardNumFromDocId ( docId );
--- a/PageParser.cpp
+++ b/PageParser.cpp
@ -594,7 +594,7 @@ static bool sendPageParser2 ( TcpSocket   *s ,
 	// if facebook, load xml content from title rec...
 	bool isFacebook = (bool)strstr(st->m_u,"http://www.facebook.com/");
 	if ( isFacebook && ! content ) {
-		int64_t docId = g_titledb.getProbableDocId((char*)st->m_u);
+		int64_t docId = Titledb::getProbableDocId((char*)st->m_u);
 		sprintf(sreq.m_url ,"%" PRIu64, docId );
 		sreq.m_isPageReindex = true;
 	}
--- a/PageReindex.cpp
+++ b/PageReindex.cpp
@ -419,7 +419,7 @@ bool Msg1c::gotList ( ) {

 	log("reindex: adding docid list to spiderdb");

-	return m_msg4.addMetaList ( &m_sb, m_collnum, this, addedListWrapper, 0, RDB_SPIDERDB );
+	return m_msg4.addMetaList(&m_sb, m_collnum, this, addedListWrapper, RDB_SPIDERDB);
 }

 void addedListWrapper ( void *state ) {
--- a/Parms.cpp
+++ b/Parms.cpp
@ -11867,8 +11867,8 @@ bool Parms::syncParmsWithHost0 ( ) {
 void handleRequest3e ( UdpSlot *slot , int32_t niceness ) {
 	// right now we must be host #0
 	if ( g_hostdb.m_hostId != 0 ) {
+hadError:
 		g_errno = EBADENGINEER;
-	hadError:
 		g_udpServer.sendErrorReply( slot, g_errno );
 		return;
 	}
--- a/Posdb.cpp
+++ b/Posdb.cpp
@ -143,7 +143,7 @@ bool Posdb::init ( ) {
 			            false , // istitledb?
 			            getKeySize(),
 			            false,
-						true);
+			            g_conf.m_noInMemoryPosdbMerge);
 }

 // init the rebuild/secondary rdb, used by PageRepair.cpp
@ -160,15 +160,17 @@ bool Posdb::init2 ( int32_t treeMem ) {
 	//   must be able to fit all bins in memory
 	// . we do not want posdb's bin tree to ever hit disk since we
 	//   dump it to rdb files when it is 90% full (90% of bins in use)
-	return m_rdb.init ( g_hostdb.m_dir              ,
-			    "posdbRebuild"            ,
-			    getFixedDataSize(),
-			    1000                        , // min files to merge
-			    treeMem                     ,
-			    maxTreeNodes                ,
-			    getUseHalfKeys(),
-			    false ,
-			    getKeySize());
+	return m_rdb.init(g_hostdb.m_dir,
+	                  "posdbRebuild",
+	                  getFixedDataSize(),
+	                  1000, // min files to merge
+	                  treeMem,
+	                  maxTreeNodes,
+	                  getUseHalfKeys(),
+	                  false,
+	                  getKeySize(),
+	                  false,
+	                  g_conf.m_noInMemoryPosdbMerge);
 }


@ -546,7 +548,7 @@ int Posdb::printList ( RdbList &list ) {
 		const char *dd = "";
 		if ( (k.n0 & 0x01) == 0x00 ) dd = " (delete)";
 		int64_t d = g_posdb.getDocId(&k);
-		uint8_t dh = g_titledb.getDomHash8FromDocId(d);
+		uint8_t dh = Titledb::getDomHash8FromDocId(d);
 		char *rec = list.getCurrentRec();
 		int32_t recSize = 18;
 		if ( rec[0] & 0x04 ) recSize = 6;
--- a/PosdbTable.cpp
+++ b/PosdbTable.cpp
@ -2605,6 +2605,309 @@ VALGRIND_CHECK_MEM_IS_DEFINED(&dcs,sizeof(dcs));
 }


+// Pre-advance each termlist's cursor to skip to next docid.
+//
+// Set QueryTermInfo::m_matchingSubListCursor to NEXT docid
+// Set QueryTermInfo::m_matchingSubListSavedCursor to CURRENT docid
+// of each termlist so we are ready for a quick skip over this docid.
+//
+// TODO: use just a single array of termlist ptrs perhaps,
+// then we can remove them when they go NULL.  and we'd save a little
+// time not having a nested loop.
+bool PosdbTable::advanceTermListCursors(const char *docIdPtr, QueryTermInfo *qtibuf) {
+	logTrace(g_conf.m_logTracePosdb, "BEGIN");
+
+	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
+		// get it
+		QueryTermInfo *qti = &qtibuf[i];
+		// do not advance negative termlist cursor
+		if ( qti->m_bigramFlags[0] & BF_NEGATIVE ) {
+			continue;
+		}
+
+		//
+		// In first pass, sublists data is initialized by delNonMatchingDocIdsFromSubLists.
+		// In second pass (to get detailed scoring info for UI output), they are initialized above
+		//
+		for ( int32_t j = 0 ; j < qti->m_numMatchingSubLists ; j++ ) {
+			// shortcuts
+			char *xc    = qti->m_matchingSubListCursor[j];
+			char *xcEnd = qti->m_matchingSubListEnd[j];
+
+			// exhausted? (we can't make cursor NULL because
+			// getMaxPossibleScore() needs the last ptr)
+			// must match docid
+			if ( xc >= xcEnd ||
+			     *(int32_t *)(xc+8) != *(int32_t *)(docIdPtr+1) ||
+			     (*(char *)(xc+7)&0xfc) != (*(char *)(docIdPtr)&0xfc) ) {
+				// flag it as not having the docid
+				qti->m_matchingSubListSavedCursor[j] = NULL;
+				// skip this sublist if does not have our docid
+				continue;
+			}
+
+			// save it
+			qti->m_matchingSubListSavedCursor[j] = xc;
+			// get new docid
+			//log("new docid %" PRId64,Posdb::getDocId(xc) );
+			// advance the cursors. skip our 12
+			xc += 12;
+			// then skip any following 6 byte keys because they
+			// share the same docid
+			for ( ;  ; xc += 6 ) {
+				// end of whole termlist?
+				if ( xc >= xcEnd ) {
+					break;
+				}
+				
+				// sanity. no 18 byte keys allowed
+				if ( (*xc & 0x06) == 0x00 ) {
+					// i've seen this triggered on gk28.
+					// a dump of posdb for the termlist
+					// for 'post' had corruption in it,
+					// yet its twin, gk92 did not. the
+					// corruption could have occurred
+					// anywhere from nov 2012 to may 2013,
+					// and the posdb file was never
+					// re-merged! must have been blatant
+					// disk malfunction?
+					log("posdb: encountered corrupt posdb list. bailing.");
+					logTrace(g_conf.m_logTracePosdb, "END.");
+					return false;
+					//gbshutdownAbort(true);
+				}
+				// the next docid? it will be a 12 byte key.
+				if ( ! (*xc & 0x04) ) {
+					break;
+				}
+			}
+			// assign to next docid word position list
+			qti->m_matchingSubListCursor[j] = xc;
+		}
+	}
+
+	logTrace(g_conf.m_logTracePosdb, "END");
+	return true;
+}
+
+
+
+#define RINGBUFSIZE 4096
+
+//
+// TODO: consider skipping this pre-filter if it sucks, as it does
+// for 'search engine'. it might save time!
+//
+// Returns:
+//	false - docid does not meet minimum score requirement
+//	true - docid can potentially be a top scoring docid
+//
+bool PosdbTable::prefilterMaxPossibleScoreByDistance(QueryTermInfo *qtibuf, const int32_t *qpos, float minWinningScore) {
+//#define RINGBUFSIZE 1024
+	unsigned char ringBuf[RINGBUFSIZE+10];
+	// for overflow conditions in loops below
+	ringBuf[RINGBUFSIZE+0] = 0xff;
+	ringBuf[RINGBUFSIZE+1] = 0xff;
+	ringBuf[RINGBUFSIZE+2] = 0xff;
+	ringBuf[RINGBUFSIZE+3] = 0xff;
+	unsigned char qt;
+	QueryTermInfo *qtx;
+	uint32_t wx;
+	int32_t ourFirstPos = -1;
+	int32_t qdist;
+	
+	logTrace(g_conf.m_logTracePosdb, "BEGIN");
+
+
+	// reset ring buf. make all slots 0xff. should be 1000 cycles or so.
+	memset ( ringBuf, 0xff, RINGBUFSIZE );
+
+	// now to speed up 'time enough for love' query which does not
+	// have many super high scoring guys on top we need a more restrictive
+	// filter than getMaxPossibleScore() so let's pick one query term,
+	// the one with the shortest termlist, and see how close it gets to
+	// each of the other query terms. then score each of those pairs.
+	// so quickly record the word positions of each query term into
+	// a ring buffer of 4096 slots where each slot contains the
+	// query term # plus 1.
+
+	logTrace(g_conf.m_logTracePosdb, "Ring buffer generation");
+	qtx = &qtibuf[m_minTermListIdx];
+	// populate ring buf just for this query term
+	for ( int32_t k = 0 ; k < qtx->m_numMatchingSubLists ; k++ ) {
+		// scan that sublist and add word positions
+		char *sub = qtx->m_matchingSubListSavedCursor[k];
+		// skip sublist if it's cursor is exhausted
+		if ( ! sub ) {
+			continue;
+		}
+
+		char *end = qtx->m_matchingSubListCursor[k];
+		// add first key
+		//int32_t wx = Posdb::getWordPos(sub);
+		wx = (*((uint32_t *)(sub+3))) >> 6;
+		// mod with 4096
+		wx &= (RINGBUFSIZE-1);
+		// store it. 0 is legit.
+		ringBuf[wx] = m_minTermListIdx;
+		// set this
+		ourFirstPos = wx;
+		// skip first key
+		sub += 12;
+		// then 6 byte keys
+		for ( ; sub < end ; sub += 6 ) {
+			// get word position
+			//wx = Posdb::getWordPos(sub);
+			wx = (*((uint32_t *)(sub+3))) >> 6;
+			// mod with 4096
+			wx &= (RINGBUFSIZE-1);
+			// store it. 0 is legit.
+			ringBuf[wx] = m_minTermListIdx;
+		}
+	}
+	
+	// now get query term closest to query term # m_minTermListIdx which
+	// is the query term # with the shortest termlist
+	// get closest term to m_minTermListIdx and the distance
+	logTrace(g_conf.m_logTracePosdb, "Ring buffer generation 2");
+	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
+		if ( i == m_minTermListIdx ) {
+			continue;
+		}
+		
+		// get the query term info
+		QueryTermInfo *qti = &qtibuf[i];
+
+		// if we have a negative term, skip it
+		if ( qti->m_bigramFlags[0] & (BF_NEGATIVE) ) {
+			continue;
+		}
+
+		// store all his word positions into ring buffer AS WELL
+		for ( int32_t k = 0 ; k < qti->m_numMatchingSubLists ; k++ ) {
+			// scan that sublist and add word positions
+			char *sub = qti->m_matchingSubListSavedCursor[k];
+			// skip sublist if it's cursor is exhausted
+			if ( ! sub ) {
+				continue;
+			}
+			
+			char *end = qti->m_matchingSubListCursor[k];
+			// add first key
+			//int32_t wx = Posdb::getWordPos(sub);
+			wx = (*((uint32_t *)(sub+3))) >> 6;
+			// mod with 4096
+			wx &= (RINGBUFSIZE-1);
+			// store it. 0 is legit.
+			ringBuf[wx] = i;
+			// skip first key
+			sub += 12;
+			// then 6 byte keys
+			for ( ; sub < end ; sub += 6 ) {
+				// get word position
+				//wx = Posdb::getWordPos(sub);
+				wx = (*((uint32_t *)(sub+3))) >> 6;
+				// mod with 4096
+				wx &= (RINGBUFSIZE-1);
+				// store it. 0 is legit.
+				ringBuf[wx] = i;
+			}
+		}
+
+		// reset
+		int32_t ourLastPos = -1;
+		int32_t hisLastPos = -1;
+		int32_t bestDist = 0x7fffffff;
+		// how far is this guy from the man?
+		for ( int32_t x = 0 ; x < (int32_t)RINGBUFSIZE ; ) {
+			// skip next 4 slots if all empty. fast?
+			if (*(uint32_t *)(ringBuf+x) == 0xffffffff) {
+				x+=4;
+				continue;
+			}
+
+			// skip if nobody
+			if ( ringBuf[x] == 0xff ) { 
+				x++; 
+				continue; 
+			}
+
+			// get query term #
+			qt = ringBuf[x];
+
+			// if it's the man
+			if ( qt == m_minTermListIdx ) {
+				// record
+				hisLastPos = x;
+				// skip if we are not there yet
+				if ( ourLastPos == -1 ) { 
+					x++; 
+					continue; 
+				}
+
+				// try distance fix
+				if ( x - ourLastPos < bestDist ) {
+					bestDist = x - ourLastPos;
+				}
+			}
+			// if us
+			else 
+			if ( qt == i ) {
+				// record
+				ourLastPos = x;
+				// skip if he's not recorded yet
+				if ( hisLastPos == -1 ) { 
+					x++; 
+					continue; 
+				}
+
+				// update
+				ourLastPos = x;
+//@@@ ^^ dupe
+				// check dist
+				if ( x - hisLastPos < bestDist ) {
+					bestDist = x - hisLastPos;
+				}
+			}
+			x++;
+			continue;	//@@@ doh...
+		}
+
+		// compare last occurence of query term #x with our first occ.
+		// since this is a RING buffer
+		int32_t wrapDist = ourFirstPos + ((int32_t)RINGBUFSIZE-hisLastPos);
+		if ( wrapDist < bestDist ) {
+			bestDist = wrapDist;
+		}
+
+		// query distance
+		qdist = qpos[m_minTermListIdx] - qpos[i];
+		// compute it
+		float maxScore2 = getMaxPossibleScore(&qtibuf[i],
+						      bestDist,
+						      qdist,
+						      &qtibuf[m_minTermListIdx]);
+		// -1 means it has inlink text so do not apply this constraint
+		// to this docid because it is too difficult because we
+		// sum up the inlink text
+		if ( maxScore2 < 0.0 ) {
+			continue;
+		}
+
+		// if any one of these terms have a max score below the
+		// worst score of the 10th result, then it can not win.
+		// @todo: BR. Really? ANY of them?
+		if ( maxScore2 <= minWinningScore ) {
+			logTrace(g_conf.m_logTracePosdb, "END - docid score too low");
+			return false;
+		}
+	}
+
+	logTrace(g_conf.m_logTracePosdb, "END - docid score high enough");
+	return true;
+}
+
+

 // . compare the output of this to intersectLists9_r()
 // . hopefully this will be easier to understand and faster
@ -2720,8 +3023,8 @@ void PosdbTable::intersectLists10_r ( ) {
 	float pss;
 	// scan the posdb keys in the smallest list
 	// raised from 200 to 300,000 for 'da da da' query
-	char mbuf[300000];
-	char *mptrEnd = mbuf + 299000;
+	char miniMergeBuf[300000];
+	char *mptrEnd = miniMergeBuf + 299000;
 	char *mptr;
 	char *docIdPtr;
 	char *docIdEnd = m_docIdVoteBuf.getBufStart()+m_docIdVoteBuf.length();
@ -2732,22 +3035,11 @@ void PosdbTable::intersectLists10_r ( ) {
 	char *lastMptr = NULL;
 	int32_t topCursor = -9;
 	int32_t numProcessed = 0;
-#define RINGBUFSIZE 4096
-//#define RINGBUFSIZE 1024
-	unsigned char ringBuf[RINGBUFSIZE+10];
-	// for overflow conditions in loops below
-	ringBuf[RINGBUFSIZE+0] = 0xff;
-	ringBuf[RINGBUFSIZE+1] = 0xff;
-	ringBuf[RINGBUFSIZE+2] = 0xff;
-	ringBuf[RINGBUFSIZE+3] = 0xff;
-	unsigned char qt;
-	QueryTermInfo *qtx;
-	uint32_t wx;
-	int32_t fail0 = 0;
-	int32_t pass0 = 0;
-	int32_t fail = 0;
-	int32_t pass = 0;
-	int32_t ourFirstPos = -1;
+	
+	int32_t prefiltMaxPossScoreFail 		= 0;
+	int32_t prefiltMaxPossScorePass 		= 0;
+	int32_t prefiltBestDistMaxPossScoreFail = 0;
+	int32_t prefiltBestDistMaxPossScorePass	= 0;


 	// populate the cursors for each sublist
@ -2819,6 +3111,8 @@ void PosdbTable::intersectLists10_r ( ) {
 		bool allDone = false;

 		while( !allDone && docIdPtr < docIdEnd ) {
+			logTrace(g_conf.m_logTracePosdb, "Handling next docID");
+
 			bool skipToNextDocId = false;

 			// second pass? for printing out transparency info.
@ -2830,101 +3124,34 @@ void PosdbTable::intersectLists10_r ( ) {
 				}
 			}

+
 			if( currPassNum == INTERSECT_SCORING ) {
+				//
 				// Pre-advance each termlist's cursor to skip to next docid.
 				//
-				// Set QueryTermInfo::m_matchingSubListCursor to NEXT docid
-				// Set QueryTermInfo::m_matchingSubListSavedCursor to CURRENT docid
-				// of each termlist so we are ready for a quick skip over this docid.
-				//
-				// TODO: use just a single array of termlist ptrs perhaps,
-				// then we can remove them when they go NULL.  and we'd save a little
-				// time not having a nested loop.
-				for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
-					// get it
-					QueryTermInfo *qti = &qtibuf[i];
-					// do not advance negative termlist cursor
-					if ( qti->m_bigramFlags[0] & BF_NEGATIVE ) {
-						continue;
-					}
-
-					//
-					// In first pass, sublists data is initialized by delNonMatchingDocIdsFromSubLists.
-					// In second pass (to get detailed scoring info for UI output), they are initialized above
-					//
-					for ( int32_t j = 0 ; j < qti->m_numMatchingSubLists ; j++ ) {
-						// shortcuts
-						char *xc    = qti->m_matchingSubListCursor[j];
-						char *xcEnd = qti->m_matchingSubListEnd[j];
-
-						// exhausted? (we can't make cursor NULL because
-						// getMaxPossibleScore() needs the last ptr)
-						// must match docid
-						if ( xc >= xcEnd ||
-						     *(int32_t *)(xc+8) != *(int32_t *)(docIdPtr+1) ||
-						     (*(char *)(xc+7)&0xfc) != (*(char *)(docIdPtr)&0xfc) ) {
-							// flag it as not having the docid
-							qti->m_matchingSubListSavedCursor[j] = NULL;
-							// skip this sublist if does not have our docid
-							continue;
-						}
-
-						// save it
-						qti->m_matchingSubListSavedCursor[j] = xc;
-						// get new docid
-						//log("new docid %" PRId64,Posdb::getDocId(xc) );
-						// advance the cursors. skip our 12
-						xc += 12;
-						// then skip any following 6 byte keys because they
-						// share the same docid
-						for ( ;  ; xc += 6 ) {
-							// end of whole termlist?
-							if ( xc >= xcEnd ) {
-								break;
-							}
-							
-							// sanity. no 18 byte keys allowed
-							if ( (*xc & 0x06) == 0x00 ) {
-								// i've seen this triggered on gk28.
-								// a dump of posdb for the termlist
-								// for 'post' had corruption in it,
-								// yet its twin, gk92 did not. the
-								// corruption could have occurred
-								// anywhere from nov 2012 to may 2013,
-								// and the posdb file was never
-								// re-merged! must have been blatant
-								// disk malfunction?
-								log("posdb: encountered corrupt posdb list. bailing.");
-								logTrace(g_conf.m_logTracePosdb, "END.");
-								return;
-								//gbshutdownAbort(true);
-							}
-							// the next docid? it will be a 12 byte key.
-							if ( ! (*xc & 0x04) ) {
-								break;
-							}
-						}
-						// assign to next docid word position list
-						qti->m_matchingSubListCursor[j] = xc;
-					}
+				if( !advanceTermListCursors(docIdPtr, qtibuf) ) {
+					logTrace(g_conf.m_logTracePosdb, "END. advanceTermListCursors failed");
+					return;
 				}

-
-
 				if( !m_q->m_isBoolean ) {

+					//##
+					//## PRE-FILTERS. Discard DocIDs that cannot meet the minimum required
+					//## score, before entering the main scoring loop below
+					//##
+
+
 					// TODO: consider skipping this pre-filter if it sucks, as it does
 					// for 'time enough for love'. it might save time!
-
 					//
 					// Calculate maximum possible score for a document. If the max score
 					// is lower than the current minimum winning score, give up already
 					// now and skip to the next docid.
 					//
-
 					// Only go through this if we actually have a minimum score to compare with ...
 					// No need if minWinningScore is still -1
-					if ( minWinningScore >= 0 ) {
+					if ( minWinningScore >= 0.0 ) {
 						logTrace(g_conf.m_logTracePosdb, "Compute 'upper bound' for each query term");

 						// If there's no way we can break into the winner's circle, give up!
@ -2949,7 +3176,7 @@ void PosdbTable::intersectLists10_r ( ) {
 							// worst score of the 10th result, then it can not win.
 							if ( maxScore <= minWinningScore ) {
 								docIdPtr += 6;
-								fail0++;
+								prefiltMaxPossScoreFail++;
 								skipToNextDocId = true;
 								break;	// break out of numQueryTermsToHandle loop
 							}
@ -2961,197 +3188,14 @@ void PosdbTable::intersectLists10_r ( ) {
 						continue;
 					}

-					pass0++;
+					prefiltMaxPossScorePass++;

+					if ( minWinningScore >= 0.0 && m_sortByTermNum < 0 && m_sortByTermNumInt < 0 ) {

-					if ( m_sortByTermNum < 0 && m_sortByTermNumInt < 0 ) {
-						// TODO: consider skipping this pre-filter if it sucks, as it does
-						// for 'search engine'. it might save time!
-
-						// reset ring buf. make all slots 0xff. should be 1000 cycles or so.
-						memset ( ringBuf, 0xff, RINGBUFSIZE );
-
-						// now to speed up 'time enough for love' query which does not
-						// have many super high scoring guys on top we need a more restrictive
-						// filter than getMaxPossibleScore() so let's pick one query term,
-						// the one with the shortest termlist, and see how close it gets to
-						// each of the other query terms. then score each of those pairs.
-						// so quickly record the word positions of each query term into
-						// a ring buffer of 4096 slots where each slot contains the
-						// query term # plus 1.
-
-						logTrace(g_conf.m_logTracePosdb, "Ring buffer generation");
-						qtx = &qtibuf[m_minTermListIdx];
-						// populate ring buf just for this query term
-						for ( int32_t k = 0 ; k < qtx->m_numMatchingSubLists ; k++ ) {
-							// scan that sublist and add word positions
-							char *sub = qtx->m_matchingSubListSavedCursor[k];
-							// skip sublist if it's cursor is exhausted
-							if ( ! sub ) {
-								continue;
-							}
-
-							char *end = qtx->m_matchingSubListCursor[k];
-							// add first key
-							//int32_t wx = Posdb::getWordPos(sub);
-							wx = (*((uint32_t *)(sub+3))) >> 6;
-							// mod with 4096
-							wx &= (RINGBUFSIZE-1);
-							// store it. 0 is legit.
-							ringBuf[wx] = m_minTermListIdx;
-							// set this
-							ourFirstPos = wx;
-							// skip first key
-							sub += 12;
-							// then 6 byte keys
-							for ( ; sub < end ; sub += 6 ) {
-								// get word position
-								//wx = Posdb::getWordPos(sub);
-								wx = (*((uint32_t *)(sub+3))) >> 6;
-								// mod with 4096
-								wx &= (RINGBUFSIZE-1);
-								// store it. 0 is legit.
-								ringBuf[wx] = m_minTermListIdx;
-							}
-						}
-						
-						// now get query term closest to query term # m_minTermListIdx which
-						// is the query term # with the shortest termlist
-						// get closest term to m_minTermListIdx and the distance
-						logTrace(g_conf.m_logTracePosdb, "Ring buffer generation 2");
-						for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
-							// skip the man
-							if ( i == m_minTermListIdx ) {
-								continue;
-							}
-							
-							// get the query term info
-							QueryTermInfo *qti = &qtibuf[i];
-							// if we have a negative term, skip it
-							if ( qti->m_bigramFlags[0] & (BF_NEGATIVE) ) {
-								// if its empty, that's good!
-								continue;
-							}
-							
-							// store all his word positions into ring buffer AS WELL
-							for ( int32_t k = 0 ; k < qti->m_numMatchingSubLists ; k++ ) {
-								// scan that sublist and add word positions
-								char *sub = qti->m_matchingSubListSavedCursor[k];
-								// skip sublist if it's cursor is exhausted
-								if ( ! sub ) {
-									continue;
-								}
-								
-								char *end = qti->m_matchingSubListCursor[k];
-								// add first key
-								//int32_t wx = Posdb::getWordPos(sub);
-								wx = (*((uint32_t *)(sub+3))) >> 6;
-								// mod with 4096
-								wx &= (RINGBUFSIZE-1);
-								// store it. 0 is legit.
-								ringBuf[wx] = i;
-								// skip first key
-								sub += 12;
-								// then 6 byte keys
-								for ( ; sub < end ; sub += 6 ) {
-									// get word position
-									//wx = Posdb::getWordPos(sub);
-									wx = (*((uint32_t *)(sub+3))) >> 6;
-									// mod with 4096
-									wx &= (RINGBUFSIZE-1);
-									// store it. 0 is legit.
-									ringBuf[wx] = i;
-								}
-							}
-
-							// reset
-							int32_t ourLastPos = -1;
-							int32_t hisLastPos = -1;
-							int32_t bestDist = 0x7fffffff;
-							// how far is this guy from the man?
-							for ( int32_t x = 0 ; x < (int32_t)RINGBUFSIZE ; ) {
-								// skip next 4 slots if all empty. fast?
-								if (*(uint32_t *)(ringBuf+x) == 0xffffffff) {
-									x+=4;
-									continue;
-								}
-
-								// skip if nobody
-								if ( ringBuf[x] == 0xff ) { 
-									x++; 
-									continue; 
-								}
-
-								// get query term #
-								qt = ringBuf[x];
-
-								// if it's the man
-								if ( qt == m_minTermListIdx ) {
-									// record
-									hisLastPos = x;
-									// skip if we are not there yet
-									if ( ourLastPos == -1 ) { 
-										x++; 
-										continue; 
-									}
-
-									// try distance fix
-									if ( x - ourLastPos < bestDist ) {
-										bestDist = x - ourLastPos;
-									}
-								}
-								// if us
-								else 
-								if ( qt == i ) {
-									// record
-									ourLastPos = x;
-									// skip if he's not recorded yet
-									if ( hisLastPos == -1 ) { 
-										x++; 
-										continue; 
-									}
-
-									// update
-									ourLastPos = x;
-
-									// check dist
-									if ( x - hisLastPos < bestDist ) {
-										bestDist = x - hisLastPos;
-									}
-								}
-								x++;
-								continue;
-							}
-
-							// compare last occurence of query term #x with our first occ.
-							// since this is a RING buffer
-							int32_t wrapDist = ourFirstPos + ((int32_t)RINGBUFSIZE-hisLastPos);
-							if ( wrapDist < bestDist ) {
-								bestDist = wrapDist;
-							}
-
-							// query distance
-							qdist = qpos[m_minTermListIdx] - qpos[i];
-							// compute it
-							float maxScore2 = getMaxPossibleScore(&qtibuf[i],
-											      bestDist,
-											      qdist,
-											      &qtibuf[m_minTermListIdx]);
-							// -1 means it has inlink text so do not apply this constraint
-							// to this docid because it is too difficult because we
-							// sum up the inlink text
-							if ( maxScore2 < 0.0 ) {
-								continue;
-							}
-
-							// if any one of these terms have a max score below the
-							// worst score of the 10th result, then it can not win.
-							if ( maxScore2 <= minWinningScore ) {
-								docIdPtr += 6;
-								fail++;
-								skipToNextDocId = true;
-								break;	// break out of numQueryTermsToHandle loop
-							}
+						if( !prefilterMaxPossibleScoreByDistance(qtibuf, qpos, minWinningScore) ) {
+							docIdPtr += 6;
+							prefiltBestDistMaxPossScoreFail++;
+							skipToNextDocId = true;
 						}
 					} // not m_sortByTermNum or m_sortByTermNumInt

@ -3159,7 +3203,7 @@ void PosdbTable::intersectLists10_r ( ) {
 						// Continue docIdPtr < docIdEnd loop
 						continue;	
 					}
-					pass++;
+					prefiltBestDistMaxPossScorePass++;
 				} // !m_q->m_isBoolean
 			}	// currPassNum == INTERSECT_SCORING

@ -3186,6 +3230,8 @@ void PosdbTable::intersectLists10_r ( ) {
 				}
 			}

+
+
 			//
 			// PERFORMANCE HACK:
 			//
@ -3196,7 +3242,7 @@ void PosdbTable::intersectLists10_r ( ) {

 			// all posdb keys for this docid should fit in here, the 
 			// mini merge buf:
-			mptr = mbuf;
+			mptr = miniMergeBuf;

 			// . merge each set of sublists
 			// . like we merge a term's list with its two associated bigram
@ -3204,10 +3250,11 @@ void PosdbTable::intersectLists10_r ( ) {
 			// . and merge all the synonym lists for that term together as well.
 			//   so if the term is 'run' we merge it with the lists for
 			//   'running' 'ran' etc.
-			logTrace(g_conf.m_logTracePosdb, "Merge sublists");
+			logTrace(g_conf.m_logTracePosdb, "Merge sublists into a single list per query term");
 			for ( int32_t j = 0 ; j < m_numQueryTermInfos ; j++ ) {
 				// get the query term info
 				QueryTermInfo *qti = &qtibuf[j];
+
 				// just use the flags from first term i guess
 				// NO! this loses the wikihalfstopbigram bit! so we gotta
 				// add that in for the key i guess the same way we add in
@ -3220,9 +3267,11 @@ void PosdbTable::intersectLists10_r ( ) {
 					// if its empty, that's good!
 					continue;
 				}
+
 				// the merged list for term #j is here:
-				miniMergedList [j] = mptr;
+				miniMergedList[j] = mptr;
 				bool isFirstKey = true;
+
 				// populate the nwp[] arrays for merging
 				int32_t nsub = 0;
 				for ( int32_t k = 0 ; k < qti->m_numMatchingSubLists ; k++ ) {
@ -3261,7 +3310,7 @@ void PosdbTable::intersectLists10_r ( ) {
 					bflags         [j] = nwpFlags[0];
 					continue;
 				}
-				// . ok, merge the lists into a list in mbuf
+				// . ok, merge the lists into a list in miniMergeBuf
 				// . get the min of each list

 				bool currTermDone = false;
@ -3396,7 +3445,7 @@ void PosdbTable::intersectLists10_r ( ) {
 			}

 			// breach?
-			if ( mptr > mbuf + 300000 ) {
+			if ( mptr > miniMergeBuf + 300000 ) {
 				gbshutdownAbort(true);
 			}

@ -4074,9 +4123,7 @@ void PosdbTable::intersectLists10_r ( ) {

 			// advance to next docid
 			docIdPtr += 6;
-
-			logTrace(g_conf.m_logTracePosdb, "^ Now repeat for next docID");
-		}
+		} // docIdPtr < docIdEnd loop


 		if ( m_debug ) {
@ -4091,10 +4138,10 @@ void PosdbTable::intersectLists10_r ( ) {


 	if ( m_debug ) {
-		log(LOG_INFO, "posdb: # fail0 = %" PRId32" ", fail0 );
-		log(LOG_INFO, "posdb: # pass0 = %" PRId32" ", pass0 );
-		log(LOG_INFO, "posdb: # fail = %" PRId32" ", fail );
-		log(LOG_INFO, "posdb: # pass = %" PRId32" ", pass );
+		log(LOG_INFO, "posdb: # prefiltMaxPossScoreFail........: %" PRId32" ", prefiltMaxPossScoreFail );
+		log(LOG_INFO, "posdb: # prefiltMaxPossScorePass........: %" PRId32" ", prefiltMaxPossScorePass );
+		log(LOG_INFO, "posdb: # prefiltBestDistMaxPossScoreFail: %" PRId32" ", prefiltBestDistMaxPossScoreFail );
+		log(LOG_INFO, "posdb: # prefiltBestDistMaxPossScorePass: %" PRId32" ", prefiltBestDistMaxPossScorePass );
 	}

 	// get time now
--- a/PosdbTable.h
+++ b/PosdbTable.h
@ -126,8 +126,11 @@ class PosdbTable {
 		return m_initialized;
 	}

+	// functions used by intersectlist
 	bool genDebugScoreInfo1(int32_t &numProcessed, int32_t &topCursor, QueryTermInfo *qtibuf);
 	bool genDebugScoreInfo2(DocIdScore &dcs, int32_t &lastLen, uint64_t &lastDocId, char siteRank, float score, int32_t intScore, char docLang);
+	bool advanceTermListCursors(const char *docIdPtr, QueryTermInfo *qtibuf);
+	bool prefilterMaxPossibleScoreByDistance(QueryTermInfo *qtibuf, const int32_t *qpos, float minWinningScore);

 	uint64_t m_docId;

--- a/Rdb.cpp
+++ b/Rdb.cpp
@ -123,6 +123,7 @@ bool Rdb::init ( const char     *dir                  ,
 	m_useHalfKeys      = useHalfKeys;
 	m_isTitledb        = isTitledb;
 	m_ks               = keySize;
+	m_useIndexFile     = useIndexFile;
 	m_inDumpLoop       = false;

 	// set our id
@ -137,12 +138,6 @@ bool Rdb::init ( const char     *dir                  ,
 		g_process.shutdownAbort(true);
 	}

-	if (m_rdbId == RDB_POSDB || m_rdbId == RDB2_POSDB2) {
-		m_useIndexFile = g_conf.m_noInMemoryPosdbMerge ? useIndexFile : false;
-	} else {
-		m_useIndexFile = useIndexFile;
-	}
-
 	// get page size
 	m_pageSize = GB_TFNDB_PAGE_SIZE;
 	if ( m_rdbId == RDB_POSDB    ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
--- a/Rdb.h
+++ b/Rdb.h
@ -239,6 +239,8 @@ public:
 		m_inDumpLoop = inDumpLoop;
 	}

+	bool isUseIndexFile() const { return m_useIndexFile; }
+
 	bool inAddList() const { return m_inAddList; }

 	// . you'll lose your data in this class if you call this
--- a/RdbBase.cpp
+++ b/RdbBase.cpp
@ -1408,7 +1408,7 @@ bool RdbBase::attemptMerge( int32_t niceness, bool forceMergeAll, bool doLog , i
 	// then do not do the merge, we do not want to overwrite tfndb via
 	// RdbDump::updateTfndbLoop() 
 	rdbid_t rdbId = getIdFromRdb ( m_rdb );
-	if ( rdbId == RDB_TITLEDB && g_titledb.m_rdb.isDumping() ) {
+	if ( rdbId == RDB_TITLEDB && g_titledb.getRdb()->isDumping() ) {
 		if ( doLog ) {
 			log( LOG_INFO, "db: Can not merge titledb while it is dumping." );
 		}
--- a/RdbList.cpp
+++ b/RdbList.cpp
@ -133,26 +133,32 @@ void RdbList::set(char *list, int32_t listSize, char *alloc, int32_t allocSize,
 	verify_signature();
 	logTrace(g_conf.m_logTraceRdbList, "BEGIN. list=%p listSize=%" PRId32" alloc=%p allocSize=%" PRId32,
 	         list, listSize, alloc, allocSize);
+	logTrace(g_conf.m_logTraceRdbList, "startKey=%s endKey=%s keySize=%hhu fixedDataSize=%" PRId32,
+	         KEYSTR(startKey, keySize), KEYSTR(endKey, keySize), keySize, fixedDataSize);

 	// free and NULLify any old m_list we had to make room for our new list
 	freeList();
+
 	// set this first since others depend on it
 	m_ks = keySize;
+
 	// sanity check (happens when IndexReadInfo exhausts a list to Msg2)
-	if ( KEYCMP(startKey,endKey,m_ks) > 0 )
-		log(LOG_REMIND,"db: rdblist: set: startKey > endKey.");
+	if (KEYCMP(startKey, endKey, m_ks) > 0) {
+		log(LOG_WARN, "db: rdblist: set: startKey > endKey.");
+	}
+
 	// safety check
-	if ( fixedDataSize != 0 && useHalfKeys ) {
-		log(LOG_LOGIC,"db: rdblist: set: useHalfKeys 1 when "
-		    "fixedDataSize not 0.");
+	if (fixedDataSize != 0 && useHalfKeys) {
+		log(LOG_LOGIC, "db: rdblist: set: useHalfKeys 1 when fixedDataSize not 0.");
 		useHalfKeys = false;
 	}
+
 	// got an extremely ugly corrupt stack core without this check
-	if ( m_list && m_listSize == 0 ){
-		log ( LOG_WARN, "rdblist: listSize of 0 but list pointer not "
-		      "NULL!" );
+	if (m_list && m_listSize == 0) {
+		log(LOG_WARN, "rdblist: listSize of 0 but list pointer not NULL!");
 		m_list = NULL;
 	}
+
 	// set our list parms
 	m_list          = list;
 	m_listSize      = listSize;
@ -164,8 +170,11 @@ void RdbList::set(char *list, int32_t listSize, char *alloc, int32_t allocSize,
 	m_fixedDataSize = fixedDataSize;
 	m_ownData       = ownData;
 	m_useHalfKeys   = useHalfKeys;
+
 	// use this call now to set m_listPtr and m_listPtrHi based on m_list
 	resetListPtr();
+
+	logTrace(g_conf.m_logTraceRdbList, "END");
 }

 // like above but uses 0/maxKey for startKey/endKey
@ -976,9 +985,6 @@ bool RdbList::removeBadData_r ( ) {


 int RdbList::printPosdbList() {
-
-	logf(LOG_DEBUG, "%s:%s: BEGIN",__FILE__,__func__);
-
 	// save
 	char *oldp   = m_listPtr;
 	const char *oldphi = m_listPtrHi;
@ -1074,7 +1080,6 @@ int RdbList::printPosdbList() {
 	m_listPtr   = oldp;
 	m_listPtrHi = oldphi;

-	logf(LOG_DEBUG, "%s:%s: END",__FILE__,__func__);
 	return 0;
 }

@ -1084,9 +1089,6 @@ int RdbList::printList() {
 		return printPosdbList();
 	}

-	logf(LOG_DEBUG, "%s:%s: BEGIN",__FILE__,__func__);
-
-	//log("m_list=%" PRId32,(int32_t)m_list);
 	// save
 	char *oldp   = m_listPtr;
 	const char *oldphi = m_listPtrHi;
@ -1120,7 +1122,6 @@ int RdbList::printList() {
 	m_listPtr   = oldp;
 	m_listPtrHi = oldphi;

-	logf(LOG_DEBUG, "%s:%s: END",__FILE__,__func__);
 	return 0;
 }

@ -1570,11 +1571,6 @@ bool RdbList::posdbConstrain(const char *startKey, char *endKey, int32_t minRecS
 		}

 		// write the full key back into "p"
-		KEYSET(p, k, 18);
-	} else if (p[0] & 0x02) {
-		// write the key back 6 bytes
-		p -= 6;
-
 		KEYSET(p, k, 18);
 	}

@ -1755,8 +1751,7 @@ void RdbList::merge_r(RdbList **lists, int32_t numLists, const char *startKey, c

 	// did they call prepareForMerge()?
 	if ( m_mergeMinListSize == -1 ) {
-		log(LOG_LOGIC,"db: rdblist: merge_r: prepareForMerge() not "
-			"called. ignoring error and returning emtpy list.");
+		log(LOG_LOGIC,"db: rdblist: merge_r: prepareForMerge() not called. ignoring error and returning emtpy list.");
 		// this happens if we nuke doledb during a merge of it. it is just bad timing
 		return;
 		// save state and dump core, sigBadHandler will catch this
@ -1770,8 +1765,8 @@ void RdbList::merge_r(RdbList **lists, int32_t numLists, const char *startKey, c

 	// warning msg
 	if ( m_listPtr != m_listEnd ) {
-		log( LOG_LOGIC, "db: rdblist: merge_r: warning. merge not storing at end of list for %s.",
-		     getDbnameFromId( ( uint8_t ) rdbId ) );
+		log(LOG_LOGIC, "db: rdblist: merge_r: warning. merge not storing at end of list for %s.",
+		    getDbnameFromId((uint8_t)rdbId));
 	}

 	// set our key range
@ -1782,8 +1777,6 @@ void RdbList::merge_r(RdbList **lists, int32_t numLists, const char *startKey, c
 	//   deletes all the urls then does a dump of just negative keys.
 	//   so let's comment it out for now
 	if ( KEYCMP(m_startKey,m_endKey,m_ks)!=0 && KEYNEG(m_endKey) ) {
-		// log(LOG_LOGIC,"db: rdblist: merge_r: Illegal endKey for "
-		//     "merging rdb=%s. fixing.",getDbnameFromId(rdbId));
 		// make it legal so it will be read first NEXT time
 		KEYDEC(m_endKey,m_ks);
 	}
@ -1814,6 +1807,13 @@ void RdbList::merge_r(RdbList **lists, int32_t numLists, const char *startKey, c
 		return;
 	}

+	// check that we're not using index for other rdb file than posdb
+	Rdb* rdb = getRdbFromId(rdbId);
+	if (rdb->isUseIndexFile()) {
+		/// @todo ALC logic to use index file is not implemented for any rdb other than posdb. add it below if required
+		gbshutdownLogicError();
+	}
+
 	int32_t required = -1;
 	// . if merge not necessary, print a warning message.
 	// . caller should have just called constrain() then
@ -2133,6 +2133,7 @@ skip:
 ///////

 bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes, bool removeNegKeys) {
+	logTrace(g_conf.m_logTraceRdbList, "BEGIN");
 	// sanity
 	if (m_ks != sizeof(key144_t)) {
 		gbshutdownAbort(true);
@ -2264,6 +2265,7 @@ bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startK
 			// . continue if tie, so we get the oldest first
 			// . treat negative and positive keys as identical for this
 			if (ss < 0) {
+				logTrace(g_conf.m_logTraceRdbList, "ss < 0. continue");
 				continue;
 			}

@ -2271,9 +2273,12 @@ bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startK
 			// and minPtrBase/Lo/Hi was a negative key! so this is
 			// the annihilation. skip the positive key.
 			if (ss == 0) {
+				logTrace(g_conf.m_logTraceRdbList, "ss == 0. skip");
 				goto skip;
 			}

+			logTrace(g_conf.m_logTraceRdbList, "new min i=%" PRId32, i);
+
 			// we got a new min
 			minPtrBase = ptrs  [i];
 			minPtrLo   = loKeys[i];
@ -2283,6 +2288,7 @@ bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startK

 		// ignore if negative i guess, just skip it
 		if (removeNegKeys && (minPtrBase[0] & 0x01) == 0x00) {
+			logTrace(g_conf.m_logTraceRdbList, "removeNegKeys. skip");
 			goto skip;
 		}

@ -2293,11 +2299,13 @@ bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startK
 		if (m_listPtrHi && cmp_6bytes_equal(minPtrHi, m_listPtrHi)) {
 			if (m_listPtrLo && cmp_6bytes_equal(minPtrLo, m_listPtrLo)) {
 				// 6-byte entry
+				logTrace(g_conf.m_logTraceRdbList, "store 6-byte key");
 				memcpy(new_listPtr, minPtrBase, 6);
 				new_listPtr += 6;
 				*pp |= 0x06; //turn on both compression bits
 			} else {
 				// 12-byte entry
+				logTrace(g_conf.m_logTraceRdbList, "store 12-byte key");
 				memcpy(new_listPtr, minPtrBase, 6);
 				new_listPtr += 6;
 				memcpy(new_listPtr, minPtrLo, 6);
@ -2307,6 +2315,7 @@ bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startK
 			}
 		} else {
 			// 18-byte entry
+			logTrace(g_conf.m_logTraceRdbList, "store 18-byte key");
 			memcpy(new_listPtr, minPtrBase, 6);
 			new_listPtr += 6;
 			memcpy(new_listPtr, minPtrLo, 6);
@ -2336,11 +2345,14 @@ skip:
 			// is new key 6 bytes? then do not touch hi/lo ptrs
 			if ( ptrs[mini][0] & 0x04 ) {
 				// no-op
+				logTrace(g_conf.m_logTraceRdbList, "new 6-byte key");
 			} else if ( ptrs[mini][0] & 0x02 ) {
 				// is new key 12 bytes?
+				logTrace(g_conf.m_logTraceRdbList, "new 12-byte key");
 				memcpy(loKeys[mini], ptrs[mini] +  6, 6);
 			} else {
 				// is new key 18 bytes? full key.
+				logTrace(g_conf.m_logTraceRdbList, "new 18-byte key");
 				memcpy(hiKeys[mini], ptrs[mini] + 12, 6);
 				memcpy(loKeys[mini], ptrs[mini] +  6, 6);
 			}
@ -2348,6 +2360,7 @@ skip:
 			//
 			// REMOVE THE LIST at mini
 			//
+			logTrace(g_conf.m_logTraceRdbList, "remove list at mini=%" PRId32, mini);

 			// otherwise, remove him from array
 			for (int32_t i = mini; i < numLists - 1; i++) {
@ -2375,6 +2388,7 @@ skip:

 	// return now if we're empty... all our recs annihilated?
 	if (m_listSize <= 0) {
+		logTrace(g_conf.m_logTraceRdbList, "END. no more list");
 		return true;
 	}

@ -2410,6 +2424,7 @@ skip:
 		if (g_conf.m_logTraceRdbList) {
 			printList();
 		}
+		logTrace(g_conf.m_logTraceRdbList, "END. Less than requested");
 		return true;
 	}

@ -2419,6 +2434,7 @@ skip:
 		if (g_conf.m_logTraceRdbList) {
 			printList();
 		}
+		logTrace(g_conf.m_logTraceRdbList, "END. No more list");
 		return true;
 	}

@ -2449,6 +2465,7 @@ skip:
 		printList();
 	}

+	logTrace(g_conf.m_logTraceRdbList, "END. Done");
 	return true;
 }

--- a/RdbList.h
+++ b/RdbList.h
@ -115,7 +115,7 @@ public:
 	int32_t getAllocSize() const { return m_allocSize; }
 	void setAllocSize(int32_t allocSize) { m_allocSize = allocSize; }

-	int32_t getFixedDataSize() { return m_fixedDataSize; }
+	int32_t getFixedDataSize() const { return m_fixedDataSize; }
 	void setFixedDataSize(int32_t fixedDataSize) { m_fixedDataSize = fixedDataSize; }

 	// . merge_r() sets m_lastKey for the list it merges the others into
@ -135,7 +135,7 @@ public:
 	bool isLastKeyValid() const { return m_lastKeyIsValid; }
 	void setLastKeyIsValid(bool lastKeyIsValid) { m_lastKeyIsValid = lastKeyIsValid; }

-	bool getOwnData() { return m_ownData; }
+	bool getOwnData() const { return m_ownData; }
 	// if you don't want data to be freed on destruction then don't own it
 	void setOwnData(bool ownData) { m_ownData = ownData; }

--- a/RdbTree.cpp
+++ b/RdbTree.cpp
@ -1196,13 +1196,10 @@ bool RdbTree::checkTree2 ( bool printMsgs , bool doChainTest ) {

 	// these guy always use a collnum of 0
 	bool doCollRecCheck = true;
-	if ( !strcmp(m_dbname,"catdb") ) doCollRecCheck = false;
 	if ( !strcmp(m_dbname,"statsdb") ) doCollRecCheck = false;


 	if ( !strcmp(m_dbname,"indexdb") ) useHalfKeys = true;
-	if ( !strcmp(m_dbname,"datedb" ) ) useHalfKeys = true;
-	if ( !strcmp(m_dbname,"tfndb"  ) ) useHalfKeys = true;
 	if ( !strcmp(m_dbname,"linkdb" ) ) useHalfKeys = true;

 	bool isTitledb = false;
--- a/Rebalance.cpp
+++ b/Rebalance.cpp
@ -525,11 +525,11 @@ bool Rebalance::gotList ( ) {
 			KEYINC ( m_nextKey , ks );
 	}

-	if ( ! m_msg4a.addMetaList( &m_posMetaList, m_collnum, this, doneAddingMetaWrapper, MAX_NICENESS, rdb->getRdbId(), -1 ) ) { // shard override, not!
+	if (!m_msg4a.addMetaList(&m_posMetaList, m_collnum, this, doneAddingMetaWrapper, rdb->getRdbId(), -1)) { // shard override, not!
 		++m_blocked;
 	}

-	if ( ! m_msg4b.addMetaList( &m_negMetaList, m_collnum, this, doneAddingMetaWrapper, MAX_NICENESS, rdb->getRdbId(), myShard ) ) { // shard override, not!
+	if (!m_msg4b.addMetaList(&m_negMetaList, m_collnum, this, doneAddingMetaWrapper, rdb->getRdbId(), myShard)) { // shard override, not!
 		++m_blocked;
 	}

--- a/Repair.cpp
+++ b/Repair.cpp
@ -1157,11 +1157,11 @@ bool Repair::gotScanRecList ( ) {
 		m_nextTitledbKey = next;
 		*/
 		// get the docid
-		//int64_t dd = g_titledb.getDocIdFromKey(&m_nextTitledbKey);
+		//int64_t dd = Titledb::getDocIdFromKey(&m_nextTitledbKey);
 		// inc it
 		//dd++;
 		// re-make key
-		//m_nextTitledbKey = g_titledb.makeFirstTitleRecKey ( dd );
+		//m_nextTitledbKey = Titledb::makeFirstTitleRecKey ( dd );
 		// advance one if positive, must always start on a neg
 		if ( (m_nextTitledbKey.n0 & 0x01) == 0x01 ) 
 			m_nextTitledbKey += (uint32_t)1;
@ -1209,7 +1209,7 @@ bool Repair::gotScanRecList ( ) {

 	// nextRec2:
 	key96_t tkey = m_titleRecList.getCurrentKey();
-	int64_t docId = g_titledb.getDocId ( &tkey );
+	int64_t docId = Titledb::getDocId ( &tkey );
 	// save it
 	//m_currentTitleRecKey = tkey;

@ -1372,7 +1372,7 @@ bool Repair::injectTitleRec ( ) {
 		// skip negative recs, first one should not be negative however
 		if ( ( k->n0 & 0x01 ) == 0x00 ) continue;
 		// get docid of that guy
-		int64_t dd = g_titledb.getDocId(k);
+		int64_t dd = Titledb::getDocId(k);
 		// compare that
 		if ( m_docId != dd ) continue;
 		// we got it!
--- a/Sections.h
+++ b/Sections.h
@ -13,10 +13,6 @@
 // hhhhhhhh hhhhhhhh tttttttt dddddddd  t = tag type
 // dddddddd dddddddd dddddddd ddddddHD  d = docid

-// DATA:
-// SSSSSSSS SSSSSSSS SSSSSSSS SSSSSSSS  S = SectionVote::m_score
-// NNNNNNNN NNNNNNNN NNNNNNNN NNNNNNNN  N = SectionVote::m_numSampled
-
 // h: hash value. typically the lower 32 bits of the 
 //    Section::m_contentHash64 vars. we
 //    do not need the full 64 bits because we have the 48 bit site hash included
@ -277,22 +273,4 @@ public:
 	class Section *m_firstSent;
 };

-// . the key in sectiondb is basically the Section::m_tagHash 
-//   (with a docId) and the data portion of the Rdb record is this SectionVote
-// . the Sections::m_nsvt and m_osvt hash tables contain SectionVotes
-//   as their data value and use an tagHash key as well
-class SectionVote {
-public:
-	// . seems like addVote*() always uses a score of 1.0
-	// . seems to be a weight used when setting Section::m_votesFor[Not]Dup
-	// . not sure if we really use this now
-	float m_score;
-	// . how many times does this tagHash occur in this doc?
-	// . this eliminates the need for the SV_UNIQUE section type
-	// . this is not used for tags of type contenthash or taghash
-	// . seems like pastdate and futuredate and eurdatefmt 
-	//   are the only vote types that actually really use this...
-	float m_numSampled;
-} __attribute__((packed, aligned(4)));
-
 #endif // GB_SECTIONS_H
--- a/Spider.cpp
+++ b/Spider.cpp
@ -1674,7 +1674,7 @@ bool updateSiteListBuf ( collnum_t collnum ,
 	SpiderColl *sc = g_spiderCache.getSpiderColl ( cr->m_collnum );

 	// sanity. if in use we should not even be here
-	if ( sc->m_msg4x.m_inUse ) {
+	if ( sc->m_msg4x.isInUse() ) {
 		log( LOG_WARN, "basic: trying to update site list while previous update still outstanding.");
 		g_errno = EBADENGINEER;
 		return true;
@ -2005,7 +2005,7 @@ bool updateSiteListBuf ( collnum_t collnum ,

 	// use spidercoll to contain this msg4 but if in use it
 	// won't be able to be deleted until it comes back..
-	return sc->m_msg4x.addMetaList ( spiderReqBuf, sc->m_collnum, sc, doneAddingSeedsWrapper, MAX_NICENESS, RDB_SPIDERDB );
+	return sc->m_msg4x.addMetaList(spiderReqBuf, sc->m_collnum, sc, doneAddingSeedsWrapper, RDB_SPIDERDB);
 }

 // . Spider.cpp calls this to see if a url it wants to spider is
@ -4139,7 +4139,7 @@ bool getSpiderStatusMsg ( CollectionRec *cx , SafeBuf *msg , int32_t *status ) {

 static int32_t getFakeIpForUrl2(Url *url2) {
 	// make the probable docid
-	int64_t probDocId = g_titledb.getProbableDocId ( url2 );
+	int64_t probDocId = Titledb::getProbableDocId ( url2 );
 	// make one up, like we do in PageReindex.cpp
 	int32_t firstIp = (probDocId & 0xffffffff);
 	return firstIp;
@ -4154,7 +4154,7 @@ bool SpiderRequest::setFromAddUrl(const char *url) {
 	// reset it
 	reset();
 	// make the probable docid
-	int64_t probDocId = g_titledb.getProbableDocId ( url );
+	int64_t probDocId = Titledb::getProbableDocId ( url );

 	// make one up, like we do in PageReindex.cpp
 	int32_t firstIp = (probDocId & 0xffffffff);
--- a/SpiderColl.cpp
+++ b/SpiderColl.cpp
@ -3174,9 +3174,8 @@ bool SpiderColl::scanListForWinners ( ) {
 		// mdw: for testing take this out!
 		if ( m_totalBytesScanned < 25000 ) maxWinners = 1;

-		// sanity. make sure read is somewhat hefty for our 
-		// maxWinners=1 thing
-		if ( (int32_t)SR_READ_SIZE < 500000 ) { g_process.shutdownAbort(true); }
+		// sanity. make sure read is somewhat hefty for our maxWinners=1 thing
+		static_assert(SR_READ_SIZE >= 500000, "ensure read size is big enough");

 		// only compare to min winner in tree if tree is full
 		if ( m_winnerTree.getNumUsedNodes() >= maxWinners ) {
--- a/SpiderLoop.cpp
+++ b/SpiderLoop.cpp
@ -1327,10 +1327,6 @@ bool SpiderLoop::spiderUrl9 ( SpiderRequest *sreq ,
 	// shortcut
 	int64_t lockKeyUh48 = makeLockTableKey ( sreq );

-	//uint64_t lockKey ;
-	//lockKey = g_titledb.getFirstProbableDocId(sreq->m_probDocId);
-	//lockKey = g_titledb.getFirstProbableDocId(sreq->m_probDocId);
-
 	// . now that we have to use msg12 to see if the thing is locked
 	//   to avoid spidering it.. (see comment in above function)
 	//   we often try to spider something we are already spidering. that
--- a/Tagdb.cpp
+++ b/Tagdb.cpp
@ -16,7 +16,6 @@
 #include "GbMutex.h"
 #include "ScopedLock.h"

-static void gotMsg0ReplyWrapper ( void *state );

 static HashTableX s_ht;
 static bool s_initialized = false;
@ -1204,9 +1203,27 @@ static bool s_cacheInitialized = false;
 static RdbCache s_cache;
 static GbMutex s_cacheInitializedMutex;

-Msg8a::Msg8a() {
-	m_replies  = 0;
-	m_requests = 0;
+
+Msg8a::Msg8a()
+  : m_url(NULL),
+    m_collnum(-1),
+    m_callback(NULL),
+    m_state(NULL),
+    //m_msg0s
+    //m_siteStartKey
+    //m_siteEndKey
+    m_niceness(0),
+    m_dom(NULL),
+    m_hostEnd(NULL),
+    m_p(NULL),
+    m_requests(0), m_replies(0),
+    m_doneLaunching(false),
+    m_mtx(),
+    m_errno(0),
+    m_tagRec(NULL),
+    m_state2(NULL),
+    m_state3(NULL)
+{
 }

 Msg8a::~Msg8a ( ) {
@ -1378,43 +1395,9 @@ struct Msg8aState {
 bool Msg8a::launchGetRequests ( ) {
 	// clear it
 	g_errno = 0;
-	bool tryDomain = false;
-
- loop:
-	// return true if nothing to launch
-	if ( m_doneLaunching )
-		return (m_requests == m_replies);
-
-	// don't bother if already got an error
-	if ( m_errno )
-		return (m_requests == m_replies);
-
-	// limit max to 5ish
-	if (m_requests >= MAX_TAGDB_REQUESTS)
-		return (m_requests == m_replies);
-
-	// take a breath
-	QUICKPOLL(m_niceness);
-
-	key128_t startKey ;
-	key128_t endKey   ;
-
-	if ( tryDomain ) {
-		startKey = g_tagdb.makeDomainStartKey ( m_url );
-		endKey   = g_tagdb.makeDomainEndKey   ( m_url );
-		log( LOG_DEBUG, "tagdb: looking up domain tags for %.*s", m_url->getDomainLen(), m_url->getDomain() );
-	}
-	else {
-		// usually the site is the hostname but sometimes it is like
-		// "www.last.fm/user/breendaxx/"
-		startKey = m_siteStartKey;
-		endKey   = m_siteEndKey;
-
-		log( LOG_DEBUG, "tagdb: looking up site tags for %s", m_url->getUrl() );
-	}

 	// initialize cache
-	ScopedLock sl(s_cacheInitializedMutex);
+	ScopedLock sl_cache(s_cacheInitializedMutex);
 	if ( !s_cacheInitialized ) {
 		int64_t maxCacheSize = g_conf.m_tagRecCacheSize;
 		int64_t maxCacheNodes = ( maxCacheSize / 200 );
@ -1422,104 +1405,120 @@ bool Msg8a::launchGetRequests ( ) {
 		s_cacheInitialized = true;
 		s_cache.init( maxCacheSize, -1, true, maxCacheNodes, false, "tagreccache", false, 16, 16, -1 );
 	}
-	sl.unlock();
+	sl_cache.unlock();

-	// get the next mcast
-	Msg0 *m = &m_msg0s[m_requests];
+	//get tag for url and then domain
+	for(int getLoop = 0; getLoop<1; getLoop++) {

-	// and the list
-	RdbList *listPtr = &m_tagRec->m_lists[m_requests];
+		key128_t startKey;
+		key128_t endKey;

-	// try to get from cache
-	RdbCacheLock rcl(s_cache);
-	if ( s_cache.getList( m_collnum, (char*)&startKey, (char*)&startKey, listPtr, true,
-	                      g_conf.m_tagRecCacheMaxAge, true) ) {
-		// got from cache
-		log( LOG_DEBUG, "tagdb: got key=%s from cache", KEYSTR(&startKey, sizeof(startKey)) );
+		if(getLoop==1) {
+			startKey = g_tagdb.makeDomainStartKey ( m_url );
+			endKey   = g_tagdb.makeDomainEndKey   ( m_url );
+			log( LOG_DEBUG, "tagdb: looking up domain tags for %.*s", m_url->getDomainLen(), m_url->getDomain() );
+		} else {
+			// usually the site is the hostname but sometimes it is like
+			// "www.last.fm/user/breendaxx/"
+			startKey = m_siteStartKey;
+			endKey   = m_siteEndKey;

-		rcl.unlock();
-		m_requests++;
-		m_replies++;
-	} else {
-		rcl.unlock();
-		// bias based on the top 64 bits which is the hash of the "site" now
-		int32_t shardNum = getShardNum ( RDB_TAGDB , &startKey );
-		Host *firstHost ;
-
-		// if niceness 0 can't pick noquery host.
-		// if niceness 1 can't pick nospider host.
-		firstHost = g_hostdb.getLeastLoadedInShard ( shardNum , m_niceness );
-		int32_t firstHostId = firstHost->m_hostId;
-
-		Msg8aState *state = NULL;
-		try {
-			state = new Msg8aState(this, startKey, endKey, m_requests);
-		} catch (...) {
-			g_errno = ENOMEM;
-			log(LOG_WARN, "tagdb: unable to allocate memory for Msg8aState");
-			return false;
-		}
-		mnew(state, sizeof(*state), "msg8astate");
-
-		// . launch this request, even if to ourselves
-		// . TODO: just use msg0!!
-		bool status = m->getList ( firstHostId     , // hostId
-		                           0          , // ip
-		                           0          , // port
-		                           0          , // maxCacheAge
-		                           false      , // addToCache
-		                           RDB_TAGDB  ,
-		                           m_collnum     ,
-		                           listPtr    ,
-		                           (char *) &startKey  ,
-		                           (char *) &endKey    ,
-		                           10000000            , // minRecSizes
-		                           state                , // state
-		                           gotMsg0ReplyWrapper ,
-		                           m_niceness          ,
-		                           true                , // error correction?
-		                           true                , // include tree?
-		                           true                , // doMerge?
-		                           firstHostId         , // firstHostId
-		                           0                   , // startFileNum
-		                           -1                  , // numFiles
-		                           msg0_getlist_infinite_timeout );// timeout
-
-		// error?
-		if ( status && g_errno ) {
-			// g_errno should be set, we had an error
-			m_errno = g_errno;
-			return (m_requests == m_replies);
+			log( LOG_DEBUG, "tagdb: looking up site tags for %s", m_url->getUrl() );
 		}

-		// successfully launched
-		m_requests++;
+		// get the next mcast
+		Msg0 *m = &m_msg0s[m_requests];

-		// if we got a reply instantly
-		if ( status ) {
+		// and the list
+		RdbList *listPtr = &m_tagRec->m_lists[m_requests];
+
+		// try to get from cache
+		RdbCacheLock rcl(s_cache);
+		if ( s_cache.getList( m_collnum, (char*)&startKey, (char*)&startKey, listPtr, true,
+				      g_conf.m_tagRecCacheMaxAge, true) ) {
+			// got from cache
+			log( LOG_DEBUG, "tagdb: got key=%s from cache", KEYSTR(&startKey, sizeof(startKey)) );
+
+			rcl.unlock();
+			ScopedLock sl(m_mtx);
+			m_requests++;
 			m_replies++;
+		} else {
+			rcl.unlock();
+			// bias based on the top 64 bits which is the hash of the "site" now
+			int32_t shardNum = getShardNum ( RDB_TAGDB , &startKey );
+			Host *firstHost ;
+
+			// if niceness 0 can't pick noquery host.
+			// if niceness 1 can't pick nospider host.
+			firstHost = g_hostdb.getLeastLoadedInShard ( shardNum , m_niceness );
+			int32_t firstHostId = firstHost->m_hostId;
+
+			Msg8aState *state = NULL;
+			try {
+				state = new Msg8aState(this, startKey, endKey, m_requests);
+			} catch (...) {
+				g_errno = m_errno = ENOMEM;
+				log(LOG_WARN, "tagdb: unable to allocate memory for Msg8aState");
+				break;
+			}
+			mnew(state, sizeof(*state), "msg8astate");
+
+			// . launch this request, even if to ourselves
+			// . TODO: just use msg0!!
+			bool status = m->getList ( firstHostId     , // hostId
+						   0          , // ip
+						   0          , // port
+						   0          , // maxCacheAge
+						   false      , // addToCache
+						   RDB_TAGDB  ,
+						   m_collnum     ,
+						   listPtr    ,
+						   (char *) &startKey  ,
+						   (char *) &endKey    ,
+						   10000000            , // minRecSizes
+						   state                , // state
+						   gotMsg0ReplyWrapper ,
+						   m_niceness          ,
+						   true                , // error correction?
+						   true                , // include tree?
+						   true                , // doMerge?
+						   firstHostId         , // firstHostId
+						   0                   , // startFileNum
+						   -1                  , // numFiles
+						   msg0_getlist_infinite_timeout );// timeout
+
+			// error?
+			if ( status && g_errno ) {
+				// g_errno should be set, we had an error
+				m_errno = g_errno;
+				break;
+			}
+
+			ScopedLock sl(m_mtx);
+
+			// successfully launched
+			m_requests++;
+
+			// if we got a reply instantly
+			if ( status ) {
+				m_replies++;
+			}
 		}
+
 	}

-	if ( ! tryDomain ) {
-		tryDomain = true;
-		goto loop;
-	}
+	ScopedLock sl(m_mtx);

-	//
-	// no more looping!
-	//
-	// i don't think we need to loop any more because we got all the
-	// tags for this hostname. then the lower bits of the Tag key
-	// corresponds to the actual SITE hash. so we gotta filter those
-	// out i guess after we read the whole list.
-	//
 	m_doneLaunching = true;
-
-	return (m_requests == m_replies);
+	
+	if(m_requests == m_replies)
+		return true; // all requests done
+	else
+		return false; // some requests weren't immediate
 }
 	
-static void gotMsg0ReplyWrapper ( void *state ) {
+void Msg8a::gotMsg0ReplyWrapper ( void *state ) {
 	Msg8aState *msg8aState = (Msg8aState*)state;

 	Msg8a *msg8a = msg8aState->m_msg8a;
@ -1529,9 +1528,6 @@ static void gotMsg0ReplyWrapper ( void *state ) {
 	mdelete( msg8aState, sizeof(*msg8aState), "msg8astate" );
 	delete msg8aState;

-	// we got one
-	msg8a->m_replies++;
-
 	// error?
 	if ( g_errno ) {
 		msg8a->m_errno = g_errno;
@ -1548,21 +1544,23 @@ static void gotMsg0ReplyWrapper ( void *state ) {
 		s_cache.addList( msg8a->m_collnum, (char*)&startKey, list);
 	}

-	// launchGetRequests() returns false if still waiting for replies...
-	if ( ! msg8a->launchGetRequests() ) {
-		return;
+	ScopedLock sl(msg8a->m_mtx);
+
+	msg8a->m_replies++;
+	
+	if(msg8a->m_doneLaunching && msg8a->m_requests==msg8a->m_replies) {
+		sl.unlock();
+		// got all the replies
+		msg8a->gotAllReplies();
+
+		// set g_errno for the callback
+		if ( msg8a->m_errno ) {
+			g_errno = msg8a->m_errno;
+		}
+
+		// call callback
+		msg8a->m_callback ( msg8a->m_state );
 	}
-
-	// get all the replies
-	msg8a->gotAllReplies();
-
-	// set g_errno for the callback
-	if ( msg8a->m_errno ) {
-		g_errno = msg8a->m_errno;
-	}
-
-	// otherwise, call callback
-	msg8a->m_callback ( msg8a->m_state );
 }

 // get the TagRec from the reply
--- a/Tagdb.h
+++ b/Tagdb.h
@ -10,6 +10,7 @@
 #include "Loop.h"
 #include "SafeBuf.h"
 #include "Msg0.h"
+#include "GbMutex.h"

 // . Tag::m_type is this if its a dup in the TagRec
 // . so if www.xyz.com has one tag and xyz.com has another, then
@ -214,9 +215,12 @@ class Msg8a {
 	bool getTagRec( Url *url, collnum_t collnum, int32_t niceness, void *state, void (*callback)( void * ),
 	                TagRec *tagRec );
 	
+private:
 	bool launchGetRequests();
 	void gotAllReplies ( ) ;

+	static void gotMsg0ReplyWrapper(void *);
+
 	// some specified input
 	Url   *m_url;

@ -238,13 +242,15 @@ class Msg8a {

 	int32_t  m_requests;
 	int32_t  m_replies;
-	char  m_doneLaunching;
+	bool  m_doneLaunching;
+	GbMutex m_mtx;

 	int32_t  m_errno;

 	// we set this for the caller
 	TagRec *m_tagRec;

+public:
 	// hack for MsgE
 	void *m_state2;
 	void *m_state3;
--- a/Titledb.h
+++ b/Titledb.h
@ -13,8 +13,6 @@
 #include "TitleRecVersion.h"
 #include "Rdb.h"
 #include "Url.h"
-#include "Conf.h"
-#include "Xml.h"

 // new key format:
 // . <docId>     - 38 bits
@ -22,15 +20,11 @@
 // . <delBit>    -  1 bit

 class Titledb {
-
- public:
-
+public:
 	// reset rdb
 	void reset();

-	bool verify ( char *coll );
-
-	//bool addColl ( char *coll, bool doVerify = true );
+	bool verify(char *coll);

 	// init m_rdb
 	bool init ();
@ -38,12 +32,20 @@ class Titledb {
 	// init secondary/rebuild titledb
 	bool init2 ( int32_t treeMem ) ;

+	Rdb* getRdb() { return &m_rdb; }
+
+	// . this is an estimate of the number of docs in the WHOLE db network
+	// . we assume each group/cluster has about the same # of docs as us
+	int64_t getGlobalNumDocs() {
+		return m_rdb.getNumTotalRecs() * (int64_t)g_hostdb.m_numShards;
+	}
+
 	// . get the probable docId from a url/coll
 	// . it's "probable" because it may not be the actual docId because
 	//   in the case of a collision we pick a nearby docId that is 
 	//   different but guaranteed to be in the same group/cluster, so you 
 	//   can be assured the top 32 bits of the docId will be unchanged
-	uint64_t getProbableDocId ( Url *url , bool mask = true ) {
+	static uint64_t getProbableDocId(const Url *url, bool mask = true) {
 		uint64_t probableDocId = hash64b(url->getUrl(),0);
 		// Linkdb::getUrlHash() does not mask it
 		if ( mask ) probableDocId = probableDocId & DOCID_MASK;
@ -59,14 +61,14 @@ class Titledb {
 	}

 	// a different way to do it
-	uint64_t getProbableDocId ( const char *url  ) {
+	static uint64_t getProbableDocId(const char *url) {
 		Url u;
 		u.set( url );
-		return getProbableDocId ( &u ); 
+		return getProbableDocId(&u);
 	}

 	// a different way to do it
-	uint64_t getProbableDocId(const char *url,const char *dom,int32_t domLen) {
+	static uint64_t getProbableDocId(const char *url, const char *dom, int32_t domLen) {
 		uint64_t probableDocId = hash64b(url,0) & 
 			DOCID_MASK;
 		// clear bits 6-13 because we want to put the domain hash there
@ -80,73 +82,56 @@ class Titledb {
 	}

 	// turn off the last 6 bits
-	uint64_t getFirstProbableDocId ( int64_t d ) {
-		return d & 0xffffffffffffffc0LL; }
+	static uint64_t getFirstProbableDocId(int64_t d) {
+		return d & 0xffffffffffffffc0ULL;
+	}

 	// turn on the last 6 bits for the end docId
-	uint64_t getLastProbableDocId  ( int64_t d ) {
-		return d | 0x000000000000003fLL; }
+	static uint64_t getLastProbableDocId(int64_t d) {
+		return d | 0x000000000000003fULL;
+	}

 	// . the top NUMDOCIDBITs of "key" are the docId
 	// . we use the top X bits of the keys to partition the records
 	// . using the top bits to partition allows us to keep keys that
 	//   are near each other (euclidean metric) in the same partition
-	int64_t getDocIdFromKey ( key96_t *key ) {
-		uint64_t docId;
-		docId = ((uint64_t)key->n1)<<(NUMDOCIDBITS - 32);
-		docId|=                      key->n0 >>(64-(NUMDOCIDBITS-32));
+	static int64_t getDocIdFromKey(const key96_t *key) {
+		uint64_t docId = ((uint64_t)key->n1) << (NUMDOCIDBITS - 32);
+		docId |= key->n0 >> (64 - (NUMDOCIDBITS - 32));
 		return docId;
 	}
-	int64_t getDocId ( key96_t *key ) { return getDocIdFromKey(key); }
-	int64_t getDocIdFromKey ( key96_t  key ) {
-		return getDocIdFromKey(&key);}

-	uint8_t getDomHash8FromDocId (int64_t d) {
-		return (d & ~0xffffffffffffc03fULL) >> 6; }
+	static int64_t getDocId(const key96_t *key) { return getDocIdFromKey(key); }

-	int64_t getUrlHash48 ( key96_t *k ) {
-		return ((k->n0 >> 10) & 0x0000ffffffffffffLL); }
+	static uint8_t getDomHash8FromDocId (int64_t d) {
+		return (d & ~0xffffffffffffc03fULL) >> 6;
+	}

-	// . dptr is a char ptr to the docid
-	// . used by IndexTable2.cpp
-	// . "dptr" is pointing into a 6-byte indexdb key
-	// . see IndexTable2.cpp, grep for gbmemcpy() to see
-	//   how the docid is parsed out of this key (or see
-	//   Indexdb.h)
-	// . return  ((*((uint16_t *)dptr)) >> 8) & 0xff; }
-	uint8_t getDomHash8 ( uint8_t *dptr ) { return dptr[1]; }
+	static int64_t getUrlHash48 ( key96_t *k ) {
+		return ((k->n0 >> 10) & 0x0000ffffffffffffLL);
+	}

 	// does this key/docId/url have it's titleRec stored locally?
-	bool isLocal ( int64_t docId );
-	bool isLocal ( Url *url ) {
-		return isLocal ( getProbableDocId(url) ); }
-	bool isLocal ( key96_t key ) {
-		return isLocal (getDocIdFromKey(&key));}
+	static bool isLocal(int64_t docId);

-
-	Rdb *getRdb() { return &m_rdb; }
+	static bool isLocal(Url *url) {
+		return isLocal(getProbableDocId(url));
+	}

 	// . make the key of a TitleRec from a docId
 	// . remember to set the low bit so it's not a delete
 	// . hi bits are set in the key
-	key96_t makeKey ( int64_t docId, int64_t uh48, bool isDel );
+	static key96_t makeKey(int64_t docId, int64_t uh48, bool isDel);

-	key96_t makeFirstKey ( int64_t docId ) {
-		return makeKey ( docId , 0, true ); }
+	static key96_t makeFirstKey(int64_t docId) {
+		return makeKey(docId, 0, true);
+	}

-	key96_t makeLastKey  ( int64_t docId ) {
-		return makeKey ( docId , 0xffffffffffffLL, false ); }
-
-	// . this is an estimate of the number of docs in the WHOLE db network
-	// . we assume each group/cluster has about the same # of docs as us
-	int64_t getGlobalNumDocs ( ) { 
-		return m_rdb.getNumTotalRecs()*
-			(int64_t)g_hostdb.m_numShards;}
-
-	int32_t getLocalNumDocs () { return m_rdb.getNumTotalRecs(); }
-	int32_t getNumDocsInMem () { return m_rdb.getNumUsedNodes(); }
-	int32_t getMemUsed      () { return m_rdb.getTreeMemOccupied(); }
+	static key96_t makeLastKey(int64_t docId) {
+		return makeKey(docId, 0xffffffffffffLL, false);
+	}

+private:
 	// holds binary format title entries
 	Rdb m_rdb;
 };
--- a/TopTree.cpp
+++ b/TopTree.cpp
@ -203,8 +203,7 @@ int32_t TopTree::getHighNode ( ) {
 bool TopTree::addNode ( TopNode *t , int32_t tnn ) {

 	// respect the dom hashes
-	//uint8_t domHash = g_titledb.getDomHash8((uint8_t*)t->m_docIdPtr);
-	uint8_t domHash = g_titledb.getDomHash8FromDocId(t->m_docId);
+	uint8_t domHash = Titledb::getDomHash8FromDocId(t->m_docId);

 	// if vcount is satisfied, only add if better score than tail
 	if ( m_vcount >= m_docsWanted ) {
@ -449,9 +448,7 @@ bool TopTree::addNode ( TopNode *t , int32_t tnn ) {
 		//if ( getNext(tn) == -1 ) gbshutdownLogicError();
 		// get the min node
 		TopNode *t = &m_nodes[tn];
-		// get its docid ptr
-		//uint8_t domHash2 = g_titledb.getDomHash8((ui)t->m_docIdPtr);
-		uint8_t domHash2 = g_titledb.getDomHash8FromDocId(t->m_docId);
+		uint8_t domHash2 = Titledb::getDomHash8FromDocId(t->m_docId);
 		// . also must delete from m_t2
 		// . make the key
 		key96_t k;
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
--- a/XmlDoc.h
+++ b/XmlDoc.h
@ -361,7 +361,7 @@ public:
 	char *getIsPermalink ( ) ;
 	char *getIsUrlPermalinkFormat ( ) ;
 	char *getIsRSS ( ) ;
-	char *getIsSiteMap ( ) ;
+	bool *getIsSiteMap ( ) ;
 	class Xml *getXml ( ) ;
 	uint8_t *getLangVector ( ) ;	
 	uint8_t *getLangId ( ) ;
@ -693,62 +693,59 @@ public:
 	// validity flags. on reset() all these are set to false.
 	char     m_VALIDSTART;
 	// DO NOT add validity flags above this line!
-	char     m_metaListValid;
-	char     m_addedSpiderRequestSizeValid;
-	char     m_addedSpiderReplySizeValid;
-	char     m_addedStatusDocSizeValid;
-	char     m_downloadStartTimeValid;
-	char     m_siteValid;
-	char     m_startTimeValid;
-	char     m_currentUrlValid;
-	char     m_useTimeAxisValid;
-	char     m_timeAxisUrlValid;
-	char     m_firstUrlValid;
-	char     m_firstUrlHash48Valid;
-	char     m_firstUrlHash64Valid;
-	char     m_lastUrlValid;
-	char     m_docIdValid;
-	char     m_availDocIdValid;
-	char     m_tagRecValid;
-	char     m_robotsTxtLenValid;
-	char     m_tagRecDataValid;
-	char     m_newTagBufValid;
-	char     m_rootTitleBufValid;
-	char     m_filteredRootTitleBufValid;
-	char     m_titleBufValid;
-	char     m_fragBufValid;
-	char	 m_isRobotsTxtUrlValid;
-	char     m_wordSpamBufValid;
-	char     m_finalSummaryBufValid;
+	bool m_metaListValid;
+	bool m_addedSpiderRequestSizeValid;
+	bool m_addedSpiderReplySizeValid;
+	bool m_addedStatusDocSizeValid;
+	bool m_downloadStartTimeValid;
+	bool m_siteValid;
+	bool m_startTimeValid;
+	bool m_currentUrlValid;
+	bool m_useTimeAxisValid;
+	bool m_timeAxisUrlValid;
+	bool m_firstUrlValid;
+	bool m_firstUrlHash48Valid;
+	bool m_firstUrlHash64Valid;
+	bool m_lastUrlValid;
+	bool m_docIdValid;
+	bool m_availDocIdValid;
+	bool m_tagRecValid;
+	bool m_robotsTxtLenValid;
+	bool m_tagRecDataValid;
+	bool m_newTagBufValid;
+	bool m_rootTitleBufValid;
+	bool m_filteredRootTitleBufValid;
+	bool m_titleBufValid;
+	bool m_fragBufValid;
+	bool m_isRobotsTxtUrlValid;
+	bool m_wordSpamBufValid;
+	bool m_finalSummaryBufValid;

-	char     m_hopCountValid;
-	char     m_isInjectingValid;
-	char     m_isImportingValid;
-	char     m_metaListCheckSum8Valid;
-	char     m_contentValid;
-	char     m_filteredContentValid;
-	char     m_charsetValid;
-	char     m_langVectorValid;
-	char     m_langIdValid;
-	char     m_datedbDateValid;
-	char     m_isRSSValid;
-	char     m_isSiteMapValid;
-	char     m_isContentTruncatedValid;
-	char     m_xmlValid;
-	char     m_linksValid;
-	char     m_wordsValid;
-	char     m_bitsValid;
-	char     m_bits2Valid;
-	char     m_posValid;
-	char     m_phrasesValid;
-	char     m_sectionsValid;
+	bool m_hopCountValid;
+	bool m_isInjectingValid;
+	bool m_isImportingValid;
+	bool m_metaListCheckSum8Valid;
+	bool m_contentValid;
+	bool m_filteredContentValid;
+	bool m_charsetValid;
+	bool m_langVectorValid;
+	bool m_langIdValid;
+	bool m_isRSSValid;
+	bool m_isSiteMapValid;
+	bool m_isContentTruncatedValid;
+	bool m_xmlValid;
+	bool m_linksValid;
+	bool m_wordsValid;
+	bool m_bitsValid;
+	bool m_bits2Valid;
+	bool m_posValid;
+	bool m_phrasesValid;
+	bool m_sectionsValid;

-	char     m_imageDataValid;
-	char     m_imagesValid;
-	char     m_msge0Valid;
-	char     m_msge1Valid;
-	char     m_sreqValid;
-	char     m_srepValid;
+	bool m_imageDataValid;
+	bool m_imagesValid;
+	bool m_sreqValid;
+	bool m_srepValid;

 	bool m_ipValid;
 	bool m_firstIpValid;
@ -851,7 +848,7 @@ public:
 	bool m_exactContentHash64Valid;
 	bool m_jpValid;

-	char m_isSiteMap;
+	bool m_isSiteMap;

 	// shadows
 	char m_isRSS2;
--- a/XmlDoc_Indexing.cpp
+++ b/XmlDoc_Indexing.cpp
@ -142,7 +142,7 @@ static bool storeTerm ( const char	*s        ,
 // . hash terms that are sharded by TERMID not DOCID!!
 //
 // . returns false and sets g_errno on error
-// . these terms are stored in indexdb/datedb, but all terms with the same
+// . these terms are stored in indexdb, but all terms with the same
 //   termId reside in one and only one group. whereas normally the records
 //   are split based on docid and every group gets 1/nth of the termlist.
 // . we do this "no splitting" so that only one disk seek is required, and
@ -289,155 +289,143 @@ bool XmlDoc::hashNoSplit ( HashTableX *tt ) {
 // . returns -1 if blocked, returns NULL and sets g_errno on error
 // . "sr" is the tagdb Record
 // . "ws" store the terms for PageParser.cpp display
-char *XmlDoc::hashAll ( HashTableX *table ) {
+char *XmlDoc::hashAll(HashTableX *table) {
+	logTrace(g_conf.m_logTraceXmlDoc, "BEGIN");

-	if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: BEGIN", __FILE__,__func__, __LINE__);
-		
-	setStatus ( "hashing document" );
+	setStatus("hashing document");

-	if ( m_allHashed ) return (char *)1;
+	if (m_allHashed) {
+		return (char *)1;
+	}

 	// sanity checks
-	if ( table->m_ks != 18 ) { g_process.shutdownAbort(true); }
-	if ( table->m_ds != 4  ) { g_process.shutdownAbort(true); }
+	if (table->m_ks != 18 || table->m_ds != 4) {
+		g_process.shutdownAbort(true);
+	}

-	if ( m_wts && m_wts->m_ks != 12  ) { g_process.shutdownAbort(true); }
 	// ptr to term = 4 + score = 4 + ptr to sec = 4
-	if ( m_wts && m_wts->m_ds!=sizeof(TermDebugInfo)){g_process.shutdownAbort(true);}
+	if (m_wts && (m_wts->m_ks != 12 || m_wts->m_ds != sizeof(TermDebugInfo))) {
+		g_process.shutdownAbort(true);
+	}

 	uint8_t *ct = getContentType();
-	if ( ! ct )
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, getContentType failed", __FILE__,__func__, __LINE__);
+	if (!ct) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, getContentType failed");
 		return NULL;
 	}
 	
 	// BR 20160127: Never index JSON and XML content
-	if ( *ct == CT_JSON || *ct == CT_XML )
-	{
+	if (*ct == CT_JSON || *ct == CT_XML) {
 		// For XML (JSON should not get here as it should be filtered out during spidering)
 		// store the URL as the only thing in posdb so we are able to find it, and
 		// eventually ban it.
-		if ( !hashUrl( table, true ) )  // urlOnly (skip IP and term generation)
-		{
-			if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashUrl failed", __FILE__,__func__, __LINE__);
+		if (!hashUrl(table, true)) {  // urlOnly (skip IP and term generation)
+			logTrace(g_conf.m_logTraceXmlDoc, "END, hashUrl failed");
 			return NULL;
 		}
 		m_allHashed = true;
 		return (char *)1;
 	}

-
-
 	unsigned char *hc = (unsigned char *)getHopCount();
-	if ( ! hc || hc == (void *)-1 ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, getHopCount returned -1", __FILE__,__func__, __LINE__);
+	if (!hc || hc == (void *)-1) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, getHopCount returned -1");
 		return (char *)hc;
 	}

 	// need this for hashing
 	HashTableX *cnt = getCountTable();
-	if ( ! cnt ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, getCountTable failed", __FILE__,__func__, __LINE__);
+	if (!cnt) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, getCountTable failed");
 		return (char *)cnt;
 	}
-	if ( cnt == (void *)-1 ) { g_process.shutdownAbort(true); }
+	if (cnt == (void *)-1) {
+		g_process.shutdownAbort(true);
+	}

 	// and this
 	Links *links = getLinks();
-	if ( ! links ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, getLinks failed", __FILE__,__func__, __LINE__);
+	if (!links) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, getLinks failed");
 		return (char *)links;
 	}
-	if ( links == (Links *)-1 ) { g_process.shutdownAbort(true); }
+	if (links == (Links *)-1) {
+		g_process.shutdownAbort(true);
+	}

 	char *wordSpamVec = getWordSpamVec();
-	if (!wordSpamVec) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, getWordSpamVec failed", __FILE__,__func__, __LINE__);
-		return (char *)wordSpamVec;
+	if (!wordSpamVec) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, getWordSpamVec failed");
+		return wordSpamVec;
+	}
+	if (wordSpamVec == (void *)-1) {
+		g_process.shutdownAbort(true);
 	}
-	if (wordSpamVec==(void *)-1) {g_process.shutdownAbort(true);}

-	char *fragVec = getFragVec();//m_fragBuf.getBufStart();
-	if ( ! fragVec ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, getFragVec failed", __FILE__,__func__, __LINE__);
-		return (char *)fragVec;
+	char *fragVec = getFragVec();
+	if (!fragVec) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, getFragVec failed");
+		return fragVec;
+	}
+	if (fragVec == (void *)-1) {
+		g_process.shutdownAbort(true);
 	}
-	if ( fragVec == (void *)-1 ) { g_process.shutdownAbort(true); }

 	// why do we need this?
 	if ( m_wts ) {
 		uint8_t *lv = getLangVector();
-		if ( ! lv ) 
-		{
-			if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, getLangVector failed", __FILE__,__func__, __LINE__);
+		if (!lv) {
+			logTrace(g_conf.m_logTraceXmlDoc, "END, getLangVector failed");
 			return (char *)lv;
 		}
-		if ( lv == (void *)-1 ) { g_process.shutdownAbort(true); }
+		if (lv == (void *)-1) {
+			g_process.shutdownAbort(true);
+		}
 	}

 	CollectionRec *cr = getCollRec();
-	if ( ! cr ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, getCollRec failed", __FILE__,__func__, __LINE__);
+	if ( ! cr ) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, getCollRec failed");
 		return NULL;
 	}

-
 	// do not repeat this if the cachedb storage call blocks
 	m_allHashed = true;

 	// reset distance cursor
 	m_dist = 0;

-
-	if ( ! hashContentType   ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashContentType failed", __FILE__,__func__, __LINE__);
-		return NULL;
-	}
-	
-	if ( ! hashUrl           ( table, false ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashUrl failed", __FILE__,__func__, __LINE__);
-		return NULL;
-	}
-	
-	if ( ! hashLanguage      ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashLanguage failed", __FILE__,__func__, __LINE__);
-		return NULL;
-	}
-	
-	if ( ! hashCountry       ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashCountry failed", __FILE__,__func__, __LINE__);
+	if (!hashContentType(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashContentType failed");
 		return NULL;
 	}

-// BR 20160106 removed:	if ( ! hashAds           ( table ) ) return NULL;
-// BR 20160106 removed:	if ( ! hashSubmitUrls    ( table ) ) return NULL;
-	if ( ! hashIsAdult       ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashIsAdult failed", __FILE__,__func__, __LINE__);
+	if (!hashUrl(table, false)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashUrl failed");
 		return NULL;
 	}

-	// has gbhasthumbnail:1 or 0
-// BR 20160106 removed:	if ( ! hashImageStuff    ( table ) ) return NULL;
+	if (!hashLanguage(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashLanguage failed");
+		return NULL;
+	}
+
+	if (!hashCountry(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashCountry failed");
+		return NULL;
+	}
+
+	if (!hashIsAdult(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashIsAdult failed");
+		return NULL;
+	}

 	// now hash the terms sharded by termid and not docid here since they
 	// just set a special bit in posdb key so Rebalance.cpp can work.
 	// this will hash the content checksum which we need for deduping
 	// which we use for diffbot custom crawls as well.
-	if ( ! hashNoSplit ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashNoSplit failed", __FILE__,__func__, __LINE__);
+	if (!hashNoSplit(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashNoSplit failed");
 		return NULL;
 	}

@ -445,16 +433,13 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
 	// global index now, so don't need this... 9/28/2014

 	// stop indexing xml docs
-	bool indexDoc = true;
-	if ( ! cr->m_indexBody   ) indexDoc = false;
-
+	bool indexDoc = cr->m_indexBody;

 	// global index unless this is a json object in which case it is
 	// hashed above in the call to hashJSON(). this will decrease disk
 	// usage by about half, posdb* files are pretty big.
-	if ( ! indexDoc ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, !indexDoc", __FILE__,__func__, __LINE__);
+	if (!indexDoc) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, !indexDoc");
 		return (char *)1;
 	}

@ -464,9 +449,8 @@ char *XmlDoc::hashAll ( HashTableX *table ) {

 	// hash the body of the doc first so m_dist is 0 to match
 	// the rainbow display of sections
-	if ( ! hashBody2 (table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashBody2 failed", __FILE__,__func__, __LINE__);
+	if (!hashBody2(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashBody2 failed");
 		return NULL;
 	}

@ -476,18 +460,16 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
 	// repeated title terms because we do not do spam detection
 	// on them. thus, we need to hash these first before anything
 	// else. give them triple the body score
-	if ( ! hashTitle ( table )) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashTitle failed", __FILE__,__func__, __LINE__);
+	if (!hashTitle(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashTitle failed");
 		return NULL;
 	}

 	// . hash the keywords tag, limited to first 2k of them so far
 	// . hash above the neighborhoods so the neighborhoods only index
 	//   what is already in the hash table
-	if ( ! hashMetaKeywords(table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashMetaKeywords failed", __FILE__,__func__, __LINE__);
+	if (!hashMetaKeywords(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashMetaKeywords failed");
 		return NULL;
 	}

@ -495,18 +477,16 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
 	// we index the single words in the neighborhoods next, and
 	// we had songfacts.com coming up for the 'street light facts'
 	// query because it had a bunch of anomalous inlink text.
-	if ( ! hashIncomingLinkText(table,false,true)) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashIncomingLinkText failed", __FILE__,__func__, __LINE__);
+	if (!hashIncomingLinkText(table, false, true)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashIncomingLinkText failed");
 		return NULL;
 	}

 	// then the meta summary and description tags with half the score of
 	// the body, and only hash a term if was not already hashed above
 	// somewhere.
-	if ( ! hashMetaSummary(table) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashMetaSummary failed", __FILE__,__func__, __LINE__);
+	if (!hashMetaSummary(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashMetaSummary failed");
 		return NULL;
 	}

@ -514,68 +494,48 @@ char *XmlDoc::hashAll ( HashTableX *table ) {
 	// BR 20160220
 	// Store value of meta tag "geo.placename" to help aid searches for
 	// location specific sites, e.g. 'Restaurant in London'
-	if ( ! hashMetaGeoPlacename(table) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashMetaGeoPlacename failed", __FILE__,__func__, __LINE__);
+	if (!hashMetaGeoPlacename(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashMetaGeoPlacename failed");
 		return NULL;
 	}

-
-
- skip:
+skip:

 	// this will only increment the scores of terms already in the table
 	// because we neighborhoods are not techincally in the document
 	// necessarily and we do not want to ruin our precision
-	if ( ! hashNeighborhoods ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashNeighborhoods failed", __FILE__,__func__, __LINE__);
+	if (!hashNeighborhoods(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashNeighborhoods failed");
 		return NULL;
 	}

-	if ( ! hashLinks         ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashLinks failed", __FILE__,__func__, __LINE__);
-		return NULL;
-	}
-	
-	if ( ! hashDateNumbers   ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashDateNumbers failed", __FILE__,__func__, __LINE__);
-		return NULL;
-	}
-	
-	if ( ! hashMetaTags      ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashMetaTags failed", __FILE__,__func__, __LINE__);
+	if (!hashLinks(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashLinks failed");
 		return NULL;
 	}

-	if ( ! hashPermalink     ( table ) )
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashPermaLink failed", __FILE__,__func__, __LINE__);
+	if (!hashDateNumbers(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashDateNumbers failed");
+		return NULL;
+	}
+
+	if (!hashMetaTags(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashMetaTags failed");
+		return NULL;
+	}
+
+	if (!hashPermalink(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashPermaLink failed");
 		return NULL;
 	}

 	// hash gblang:de last for parsing consistency
-	if ( ! hashLanguageString ( table ) ) 
-	{
-		if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, hashLanguageString failed", __FILE__,__func__, __LINE__);
+	if (!hashLanguageString(table)) {
+		logTrace(g_conf.m_logTraceXmlDoc, "END, hashLanguageString failed");
 		return NULL;
 	}

-	// . hash gbkeyword:gbmininlinks where the score is the inlink count
-	// . the inlink count can go from 1 to 255
-	// . an ip neighborhood can vote no more than once
-	// . this is in LinkInfo::hash
-	//if ( ! hashMinInlinks ( table , linkInfo ) ) return NULL;
-
-
-	// return true if we don't need to print parser info
-	//if ( ! m_pbuf ) return true;
-	// print out the table into g_bufPtr now if we need to
-	//table->print ( );
-	if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: END, OK", __FILE__,__func__, __LINE__);
+	logTrace(g_conf.m_logTraceXmlDoc, "END, OK");
 	return (char *)1;
 }

@ -640,7 +600,6 @@ bool XmlDoc::hashMetaTags ( HashTableX *tt ) {
 		// only get content for <meta name=..> not <meta http-equiv=..>
 		int32_t tagLen;
 		char *tag = m_xml.getString ( i , "name" , &tagLen );
-		char *tptr = tag;
 		char tagLower[128];
 		int32_t j ;
 		int32_t code;
@ -697,13 +656,6 @@ bool XmlDoc::hashMetaTags ( HashTableX *tt ) {
 			continue;
 		}

-
-		// . don't allow reserved names: site, url, suburl, link and ip
-		// . actually, the colon is included as part of those
-		//   field names, so we really lucked out...!
-		// . index this converted tag name
-		tptr = tagLower;
-
 		// get the content
 		int32_t len;
 		char *s = m_xml.getString ( i , "content" , &len );
@ -742,22 +694,13 @@ bool XmlDoc::hashMetaTags ( HashTableX *tt ) {
 		// NULL terminate the buffer
 		buf[len] = '\0';

-		// temp null term
-		char c = tptr[tagLen];
-		tptr[tagLen] = 0;
-		
-		
-		// BR 20160220
 		// Now index the wanted meta tags as normal text without prefix so they
 		// are used in user searches automatically.
-		// custom
-		//hi.m_prefix = tptr;
 		hi.m_prefix = NULL;

 		// desc is NULL, prefix will be used as desc
 		bool status = hashString ( buf,len,&hi );
-		// put it back
-		tptr[tagLen] = c;
+
 		// bail on error, g_errno should be set
 		if ( ! status ) return false;

@ -1088,7 +1031,7 @@ bool XmlDoc::hashLinksForLinkdb ( HashTableX *dt ) {
 #endif
 		// set this key, it is the entire record
 		key224_t k;
-		k = g_linkdb.makeKey_uk ( linkeeSiteHash32 ,
+		k = Linkdb::makeKey_uk ( linkeeSiteHash32 ,
 					  m_links.getLinkHash64(i)   ,
 					  spam               , // link spam?
 					  siteRank     , // was quality
@ -1509,8 +1452,7 @@ bool XmlDoc::hashIncomingLinkText ( HashTableX *tt               ,

 	// sanity check
 	if ( hashAnomalies == hashNonAnomalies ) { g_process.shutdownAbort(true); }
-	// display this note in page parser
-	const char *note = "hashing incoming link text";
+
 	// sanity
 	if ( ! m_linkInfo1Valid ) { g_process.shutdownAbort(true); }

@ -1531,8 +1473,6 @@ bool XmlDoc::hashIncomingLinkText ( HashTableX *tt               ,
 	// brought the following code in from LinkInfo.cpp
 	//

-	int32_t noteLen = 0;
-	if ( note ) noteLen = strlen ( note );
 	// count "external" inlinkers
 	int32_t ecount = 0;

@ -1631,11 +1571,6 @@ bool XmlDoc::hashNeighborhoods ( HashTableX *tt ) {

 	//int32_t inlinks = *getSiteNumInlinks();

-	// HACK: to avoid having to pass a flag to TermTable, then to
-	// Words::hash(), Phrases::hash(), etc. just flip a bit in the
-	// table to make it not add anything unless it is already in there.
-	tt->m_addIffNotUnique = true;
-
 	// update hash parms
 	HashInfo hi;
 	hi.m_tt        = tt;
@ -1647,9 +1582,6 @@ bool XmlDoc::hashNeighborhoods ( HashTableX *tt ) {
 	int32_t len = k->size_surroundingText - 1;
 	if ( ! hashString ( s, len, &hi ) ) return false;

-	// now turn it back off
-	tt->m_addIffNotUnique = false;
-
 	// get the next Inlink
 	goto loop;
 }
@ -1992,7 +1924,7 @@ bool XmlDoc::hashSingleTerm( const char *s, int32_t slen, HashInfo *hi ) {


 	key144_t k;
-	g_posdb.makeKey ( &k ,
+	Posdb::makeKey ( &k ,
 			  final,
 			  0LL, // docid
 			  0, // dist
@ -2355,7 +2287,7 @@ bool XmlDoc::hashWords3( HashInfo *hi, const Words *words, Phrases *phrases, Sec
 		// if using posdb
 		key144_t k;

-		g_posdb.makeKey ( &k ,
+		Posdb::makeKey ( &k ,
 				  h ,
 				  0LL,//docid
 				  wposvec[i], // dist,
@ -2405,7 +2337,7 @@ bool XmlDoc::hashWords3( HashInfo *hi, const Words *words, Phrases *phrases, Sec
 			int64_t nah ;
 			nah = hash64Lower_utf8 ( wptrs[i], wlens[i]-2 );
 			if ( plen>0 ) nah = hash64 ( nah , prefixHash );
-			g_posdb.makeKey ( &k ,
+			Posdb::makeKey ( &k ,
 					  nah,
 					  0LL,//docid
 					  wposvec[i], // dist,
@ -2462,7 +2394,7 @@ skipsingleword:
 			// hash with prefix
 			if ( plen > 0 ) ph2 = hash64 ( npid , prefixHash );
 			else            ph2 = npid;
-			g_posdb.makeKey ( &k ,
+			Posdb::makeKey ( &k ,
 					  ph2 ,
 					  0LL,//docid
 					  wposvec[i],//dist,
@ -2565,7 +2497,7 @@ bool XmlDoc::hashFieldMatchTerm ( char *val , int32_t vlen , HashInfo *hi ) {
 	//   a prefix hash
 	// . use mostly fake value otherwise
 	key144_t k;
-	g_posdb.makeKey ( &k ,
+	Posdb::makeKey ( &k ,
 			  ph2 ,
 			  0,//docid
 			  0,// word pos #
@ -2696,7 +2628,7 @@ bool XmlDoc::hashNumberForSortingAsInt32 ( int32_t n , HashInfo *hi , const char
 	//   a prefix hash
 	// . use mostly fake value otherwise
 	key144_t k;
-	g_posdb.makeKey ( &k ,
+	Posdb::makeKey ( &k ,
 			  ph2 ,
 			  0,//docid
 			  0,// word pos #
@ -2719,14 +2651,7 @@ bool XmlDoc::hashNumberForSortingAsInt32 ( int32_t n , HashInfo *hi , const char
 			  false , // delkey?
 			  hi->m_shardByTermId );

-	//int64_t final = hash64n("products.offerprice",0);
-	//int64_t prefix = hash64n("gbsortby",0);
-	//int64_t h64 = hash64 ( final , prefix);
-	//if ( ph2 == h64 )
-	//	log("hey: got offer price");
-	// now set the float in that key
-	//g_posdb.setFloat ( &k , f );
-	g_posdb.setInt ( &k , n );
+	Posdb::setInt ( &k , n );

 	// HACK: this bit is ALWAYS set by Posdb::makeKey() to 1
 	// so that we can b-step into a posdb list and make sure
@ -2736,11 +2661,11 @@ bool XmlDoc::hashNumberForSortingAsInt32 ( int32_t n , HashInfo *hi , const char
 	// key that has a float stored in it. then it will NOT
 	// set the siterank and langid bits which throw our sorting
 	// off!!
-	g_posdb.setAlignmentBit ( &k , 0 );
+	Posdb::setAlignmentBit ( &k , 0 );

 	// sanity
-	//float t = g_posdb.getFloat ( &k );
-	int32_t x = g_posdb.getInt ( &k );
+	//float t = Posdb::getFloat ( &k );
+	int32_t x = Posdb::getInt ( &k );
 	if ( x != n ) { g_process.shutdownAbort(true); }

 	HashTableX *dt = hi->m_tt;
--- a/main.cpp
+++ b/main.cpp
@ -2844,7 +2844,7 @@ void dumpTitledb (const char *coll, int32_t startFileNum, int32_t numFiles, bool
 	startKey.setMin();
 	endKey.setMax();
 	lastKey.setMin();
-	startKey = g_titledb.makeFirstKey ( docid );
+	startKey = Titledb::makeFirstKey ( docid );
 	// turn off threads
 	g_jobScheduler.disallow_new_jobs();
 	// get a meg at a time
@ -2909,7 +2909,7 @@ void dumpTitledb (const char *coll, int32_t startFileNum, int32_t numFiles, bool
 		key96_t k       = list.getCurrentKey();
 		char *rec     = list.getCurrentRec();
 		int32_t  recSize = list.getCurrentRecSize();
-		int64_t docId       = g_titledb.getDocIdFromKey ( k );
+		int64_t docId       = Titledb::getDocIdFromKey ( &k );
 		if ( k <= lastKey )
 			log("key out of order. "
 			    "lastKey.n1=%" PRIx32" n0=%" PRIx64" "
@ -4292,8 +4292,8 @@ bool parseTest ( const char *coll, int64_t docId, const char *query ) {
 	// get a title rec
 	g_jobScheduler.disallow_new_jobs();
 	RdbList tlist;
-	key96_t startKey = g_titledb.makeFirstKey ( docId );
-	key96_t endKey   = g_titledb.makeLastKey  ( docId );
+	key96_t startKey = Titledb::makeFirstKey ( docId );
+	key96_t endKey   = Titledb::makeLastKey  ( docId );
 	// a niceness of 0 tells it to block until it gets results!!
 	Msg5 msg5;

@ -4722,7 +4722,7 @@ void dumpPosdb (const char *coll, int32_t startFileNum, int32_t numFiles, bool i
 		const char *dd = "";
 		if ( (k.n0 & 0x01) == 0x00 ) dd = " (delete)";
 		int64_t d = g_posdb.getDocId(&k);
-		uint8_t dh = g_titledb.getDomHash8FromDocId(d);
+		uint8_t dh = Titledb::getDomHash8FromDocId(d);
 		char *rec = list.getCurrentRec();
 		int32_t recSize = 18;
 		if ( rec[0] & 0x04 ) recSize = 6;
@ -4947,10 +4947,10 @@ void dumpLinkdb ( const char *coll,
 	if ( url ) {
 		Url u;
 		u.set( url, strlen( url ), true, false );
-		uint32_t h32 = u.getHostHash32();//g_linkdb.getUrlHash(&u)
+		uint32_t h32 = u.getHostHash32();
 		int64_t uh64 = hash64n(url,0);
-		startKey = g_linkdb.makeStartKey_uk ( h32 , uh64 );
-		endKey   = g_linkdb.makeEndKey_uk   ( h32 , uh64 );
+		startKey = Linkdb::makeStartKey_uk ( h32 , uh64 );
+		endKey   = Linkdb::makeEndKey_uk   ( h32 , uh64 );
 	}
 	// turn off threads
 	g_jobScheduler.disallow_new_jobs();
@ -5006,7 +5006,7 @@ void dumpLinkdb ( const char *coll,
 		// is it a delete?
 		const char *dd = "";
 		if ( (k.n0 & 0x01) == 0x00 ) dd = " (delete)";
-		int64_t docId = (int64_t)g_linkdb.getLinkerDocId_uk(&k);
+		int64_t docId = (int64_t)Linkdb::getLinkerDocId_uk(&k);
 		int32_t shardNum = getShardNum(RDB_LINKDB,&k);
 		printf("k=%s "
 		       "linkeesitehash32=0x%08" PRIx32" "
@ -5022,16 +5022,16 @@ void dumpLinkdb ( const char *coll,
 		       "shardNum=%" PRIu32" "
 		       "%s\n",
 		       KEYSTR(&k,sizeof(key224_t)),
-		       (int32_t)g_linkdb.getLinkeeSiteHash32_uk(&k),
-		       (int64_t)g_linkdb.getLinkeeUrlHash64_uk(&k),
-		       (int32_t)g_linkdb.isLinkSpam_uk(&k),
-		       (int32_t)g_linkdb.getLinkerSiteRank_uk(&k),
-		       //hc,//g_linkdb.getLinkerHopCount_uk(&k),
-		       iptoa((int32_t)g_linkdb.getLinkerIp_uk(&k)),
+		       (int32_t)Linkdb::getLinkeeSiteHash32_uk(&k),
+		       (int64_t)Linkdb::getLinkeeUrlHash64_uk(&k),
+		       (int32_t)Linkdb::isLinkSpam_uk(&k),
+		       (int32_t)Linkdb::getLinkerSiteRank_uk(&k),
+		       //hc,//Linkdb::getLinkerHopCount_uk(&k),
+		       iptoa((int32_t)Linkdb::getLinkerIp_uk(&k)),
 		       docId,
-		       (int32_t)g_linkdb.getDiscoveryDate_uk(&k),
-		       (int32_t)g_linkdb.getLostDate_uk(&k),
-		       (int32_t)g_linkdb.getLinkerSiteHash32_uk(&k),
+		       (int32_t)Linkdb::getDiscoveryDate_uk(&k),
+		       (int32_t)Linkdb::getLostDate_uk(&k),
+		       (int32_t)Linkdb::getLinkerSiteHash32_uk(&k),
 		       shardNum,
 		       dd );
 	}
@ -5441,7 +5441,7 @@ int injectFile ( const char *filename , char *ips , const char *coll ) {
 		}

 		if ( startDocId != 0LL )
-			s_titledbKey = g_titledb.makeFirstKey(startDocId);
+			s_titledbKey = Titledb::makeFirstKey(startDocId);

 		s_endDocId = endDocId;

@ -5569,7 +5569,7 @@ void doInject ( int fd , void *state ) {
 		// turn off threads so this happens right away
 		g_jobScheduler.disallow_new_jobs();
 		key96_t endKey; //endKey.setMax();
-		endKey = g_titledb.makeFirstKey(s_endDocId);
+		endKey = Titledb::makeFirstKey(s_endDocId);
 		RdbList list;
 		Msg5 msg5;
 		const char *coll = "main";
@ -7160,7 +7160,7 @@ void countdomains( const char* coll, int32_t numRecs, int32_t verbosity, int32_t
 		key96_t k       = list.getCurrentKey();
 		char *rec     = list.getCurrentRec();
 		int32_t  recSize = list.getCurrentRecSize();
-		int64_t docId       = g_titledb.getDocId        ( &k );
+		int64_t docId       = Titledb::getDocId        ( &k );
 		attempts++;

 		if ( k <= lastKey ) 
--- a/misc/Test.cpp
+++ b/misc/Test.cpp
@ -928,7 +928,7 @@ bool Test::injectLoop ( ) {
 	m_sreq.m_domHash32  = fakeIp;
 	m_sreq.m_hostHash32 = fakeIp;
 	m_sreq.m_siteHash32 = fakeIp;
-	//m_sreq.m_probDocId = g_titledb.getProbableDocId( m_sreq.m_url );
+	//m_sreq.m_probDocId = Titledb::getProbableDocId( m_sreq.m_url );
 	// this crap is fake
 	m_sreq.m_isInjecting = 1;
 	// use test-spider subdir for storing pages and spider times?
@ -973,7 +973,6 @@ bool Test::injectLoop ( ) {
 				    m_coll              ,
 				    NULL                ,
 				    injectedWrapper     ,
-				    MAX_NICENESS        ,
 				    RDB_SPIDERDB        ) )
 		// return false if blocked
 		return false;
--- a/misc/urlinfo.cpp
+++ b/misc/urlinfo.cpp
@ -165,9 +165,9 @@ int main ( int argc , char *argv[] ) {
 	printf("encoded: %s\n",dst);

 	// the probable docid
-	int64_t pd = g_titledb.getProbableDocId(&u);
+	int64_t pd = Titledb::getProbableDocId(&u);
 	printf("pdocid: %"UINT64"\n", pd );
-	printf("dom8: 0x%"XINT32"\n", (int32_t)g_titledb.getDomHash8FromDocId(pd) );
+	printf("dom8: 0x%"XINT32"\n", (int32_t)Titledb::getDomHash8FromDocId(pd) );
 	if ( u.isLinkLoop() ) printf("islinkloop: yes\n");
 	else                  printf("islinkloop: no\n");
 	int64_t hh64 = u.getHostHash64();
--- a/test/unit/RdbListTest.cpp
+++ b/test/unit/RdbListTest.cpp
@ -10,6 +10,7 @@ static const char* makePosdbKey(char *key, int64_t termId, uint64_t docId, int32
 }

 TEST(RdbListTest, MergeTestPosdbEmptyAll) {
+	g_conf.m_logTraceRdbList = true;
 	// setup test
 	RdbList list1;
 	list1.set(NULL, 0, NULL, 0, 0, true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
@ -32,6 +33,7 @@ TEST(RdbListTest, MergeTestPosdbEmptyAll) {
 }

 TEST(RdbListTest, MergeTestPosdbEmptyOne) {
+	g_conf.m_logTraceRdbList = true;
 	char key[MAX_KEY_BYTES];

 	// setup test
@ -83,6 +85,7 @@ TEST(RdbListTest, MergeTestPosdbEmptyOne) {

 // verify that list order is from oldest to newest (last list will override first list)
 TEST(RdbListTest, MergeTestPosdbVerifyListOrder) {
+	g_conf.m_logTraceRdbList = true;
 	char key[MAX_KEY_BYTES];

 	// setup test