Merge branch 'master' of github.com:privacore/open-source-search-engine

2025-07-15 02:36:08 -04:00 · 2016-02-25 11:14:35 +01:00
parent 8b57d6db6b cc1a433251
commit 5dddbab96f
77 changed files with 789 additions and 22071 deletions
--- a/Bits.h
+++ b/Bits.h
@ -47,24 +47,21 @@
 // set by Sections.cpp::setMenu() function
 #define D_IN_LINK               0x0400

-// in the place name part of an address?
-//#define D_UNUSED_2            0x0800
+//#define D_UNUSED              0x0800

-// allow for dows for texasdrums.org, so TUESDAYS is set with this and
-// we can keep it as part of the sentence and not split on the colon
-//#define D_IS_IN_DATE_2        0x1000
 // this is so we can still set EV_HASTITLEBYVOTES if a tod date is in the
 // title, all other dates are no-no!
 #define D_IS_DAYNUM             0x1000
 // for setting event titles in Events.cpp
 #define D_GENERIC_WORD          0x2000
-#define D_CRUFTY                0x4000
-#define D_IS_NUM                        0x00008000
-//#define D_UNUSED_3                    0x00010000
-#define D_IS_IN_URL                     0x00020000
+//#define D_UNUSED              0x4000
+#define D_IS_NUM            0x00008000
+//#define D_UNUSED          0x00010000
+#define D_IS_IN_URL         0x00020000
 // like D_IS_TOD above
-#define D_IS_MONTH                      0x00040000
-#define D_IS_HEX_NUM                    0x00080000
+#define D_IS_MONTH          0x00040000
+#define D_IS_HEX_NUM        0x00080000
+
 //
 // the bits below here are used for Summary.cpp when calling 
 // Bits::setForSummary()
--- a/Clusterdb.h
+++ b/Clusterdb.h
@ -36,12 +36,9 @@

 // these are now just TitleRec keys
 #define CLUSTER_REC_SIZE (sizeof(key_t))
-// this now includes the gigabit vector
-#define VECTOR_REC_SIZE (sizeof(key_t)+SAMPLE_VECTOR_SIZE+GIGABIT_VECTOR_SIZE)

 class Clusterdb {
-
-  public:
+public:

 	// reset rdb
 	void reset();
@ -85,16 +82,6 @@ class Clusterdb {
 	// convert a titlerec key into a clusterec key
 	key_t convertTitleRecKey ( key_t titleKey );

-	/*
-	uint32_t getGroupId ( int64_t docId ) {
-		return g_titledb.getGroupId ( docId ); };
-		
-	// cluster rec should be stored on same host as titleRec with the
-	// same docId that this key contains
-	uint32_t getGroupIdFromKey ( key_t *key ) {
-		return g_titledb.getGroupId ( getDocId ( *key ) ); };
-	*/
-
 	// NOTE: THESE NOW USE THE REAL CLUSTERDB REC
 	// // docId occupies the most significant bytes of the key
 	// now docId occupies the bits after the first 23
@ -106,10 +93,6 @@ class Clusterdb {
 		return docId;
 	};

-	//int64_t getDocId ( char *r ) {
-	//	return getDocId(*(key_t*)r);
-	//}
-
 	uint32_t getSiteHash26 ( const char *r ) {
 		//return g_titledb.getSiteHash ( (key_t *)r ); };
 		return ((uint32_t)(((const key_t*)r)->n0 >> 2) & 0x03FFFFFF);
@ -124,52 +107,16 @@ class Clusterdb {
 		return ((unsigned char)(((const key_t*)r)->n0 >> 28) & 0x0000003F);
 	}

-	// NOTE: THESE USE THE OLD "CLUSTERDB" REC GENERATED BY MSG22 (VECTOR)
-	//uint32_t getContentHash ( char *r ) {
-	//	return g_titledb.getContentHash ( *(key_t *)r ); };
-
 	char getFamilyFilter ( const char *r ) {
 		if ( (*(const int64_t *)r) & 0x0000000400000000LL ) return 1;
 		return 0;
 	};

-
-	//uint32_t hasAdultWords   ( char *r ) {
-	//	return g_titledb.hasAdultWords ( *(key_t *)r ); };
-
-	//uint32_t hasAdultCategory ( char *r ) {
-	//	return g_titledb.hasAdultCategory ( *(key_t *)r ); };
-
-	//unsigned char getLanguageFromVector ( char *r ) {
-	//	return 0;
-	//}
-
-	// the random sample vector
-	/*
-	void getSampleVector     ( char *vec  , 
-				   class Doc *doc, 
-				   char *coll ,
-				   int32_t  collLen ,
-				   int32_t niceness = 0 );
-	*/
-	//void getSampleVector     ( char *vec  , class TermTable *table );
 	char getSampleSimilarity ( char *vec0 , char *vec1 , int32_t size );
-	// get the content vector from a cluster rec (used by Msg38.cpp)
-	//char *getSampleVector  ( char *rec ) { return rec + sizeof(key_t); };
-
-	//char *getGigabitVector    ( char *rec ) { 
-	//	return rec + sizeof(key_t) + SAMPLE_VECTOR_SIZE ; };
-	//char getGigabitSimilarity ( char *vec0 , char *vec1 ,
-	//			    int32_t *qtable , int32_t numSlots ) ;
-
-	//DiskPageCache *getDiskPageCache() { return &m_pc; };
-
-  private:

+private:
 	// this rdb holds urls waiting to be spidered or being spidered
 	Rdb m_rdb;
-
-	//DiskPageCache m_pc;
 };

 extern class Clusterdb g_clusterdb;
--- a/Collectiondb.cpp
+++ b/Collectiondb.cpp
@ -456,8 +456,6 @@ bool Collectiondb::addNewColl ( char *coll ,
 		cr->m_collectiveRespiderFrequency = 0.0;
 		//cr->m_restrictDomain = true;
 		// reset the crawl stats
-		// always turn off gigabits so &s=1000 can do summary skipping
-		cr->m_docsToScanForTopics = 0;
 		// turn off link voting, etc. to speed up
 		cr->m_getLinkInfo = false;
 		cr->m_computeSiteNumInlinks = false;
@ -1283,12 +1281,12 @@ char *Collectiondb::getDefaultColl ( HttpRequest *r ) {

 // . get collectionRec from name
 // . returns NULL if not available
-CollectionRec *Collectiondb::getRec ( char *coll ) {
+CollectionRec *Collectiondb::getRec ( const char *coll ) {
 	if ( ! coll ) coll = "";
 	return getRec ( coll , gbstrlen(coll) );
 }

-CollectionRec *Collectiondb::getRec ( char *coll , int32_t collLen ) {
+CollectionRec *Collectiondb::getRec ( const char *coll , int32_t collLen ) {
 	if ( ! coll ) coll = "";
 	collnum_t collnum = getCollnum ( coll , collLen );
 	if ( collnum < 0 ) return NULL;
@ -1333,14 +1331,14 @@ char *Collectiondb::getCollName ( collnum_t collnum ) {
 	return m_recs[collnum]->m_coll;
 }

-collnum_t Collectiondb::getCollnum ( char *coll ) {
+collnum_t Collectiondb::getCollnum ( const char *coll ) {

 	int32_t clen = 0;
 	if ( coll ) clen = gbstrlen(coll );
 	return getCollnum ( coll , clen );
 }

-collnum_t Collectiondb::getCollnum ( char *coll , int32_t clen ) {
+collnum_t Collectiondb::getCollnum ( const char *coll , int32_t clen ) {

 	// default empty collection names
 	if ( coll && ! coll[0] ) coll = NULL;
@ -1674,9 +1672,6 @@ bool CollectionRec::load ( char *coll , int32_t i ) {
 	// fix for diffbot, spider time deduping
 	if ( m_isCustomCrawl ) m_dedupingEnabled = true;

-	// always turn off gigabits so &s=1000 can do summary skipping
-	if ( m_isCustomCrawl ) m_docsToScanForTopics = 0;
-
 	// make min to merge smaller than normal since most collections are
 	// small and we want to reduce the # of vfds (files) we have
 	if ( m_isCustomCrawl ) {
--- a/Collectiondb.h
+++ b/Collectiondb.h
@ -61,8 +61,8 @@ class Collectiondb  {
 	bool m_needsSave;

 	// returns i so that m_recs[i].m_coll = coll
-	collnum_t getCollnum ( char *coll , int32_t collLen );
-	collnum_t getCollnum ( char *coll ); // coll is NULL terminated here
+	collnum_t getCollnum ( const char *coll , int32_t collLen );
+	collnum_t getCollnum ( const char *coll ); // coll is NULL terminated here

 	char *getCollName ( collnum_t collnum );
 	char *getColl     ( collnum_t collnum ) {return getCollName(collnum);};
@ -79,9 +79,9 @@ class Collectiondb  {
 	
 	// . get collectionRec from name
 	// returns NULL if not available
-	class CollectionRec *getRec ( char *coll );
+	class CollectionRec *getRec ( const char *coll );

-	class CollectionRec *getRec ( char *coll , int32_t collLen );
+	class CollectionRec *getRec ( const char *coll , int32_t collLen );

 	class CollectionRec *getRec ( collnum_t collnum);

@ -501,8 +501,7 @@ class CollectionRec {
 	float m_updateVotesFreq         ; // in days. replaced m_recycleVotes
 	float m_sortByDateWeight        ;

-        char  m_dedupURLDefault             ;
-	int32_t  m_topicSimilarCutoffDefault   ;
+    char  m_dedupURLDefault             ;
 	char  m_useNewDeduping              ;
 	char  m_doTierJumping               ;
 	float m_numDocsMultiplier           ;
@ -716,20 +715,6 @@ class CollectionRec {

 	int32_t  m_compoundListMaxSize;

-	// . related topics control
-	// . this can all be overridden by passing in your own cgi parms
-	//   for the query request
-	int32_t  m_numTopics;           // how many do they want by default?
-	int32_t  m_minTopicScore;
-	int32_t  m_docsToScanForTopics; // how many to scan by default?
-	int32_t  m_maxWordsPerTopic;
-	int32_t  m_minDocCount;         // min docs that must contain topic
-	char  m_ipRestrict;
-	int32_t  m_dedupSamplePercent;
-	char  m_topicRemoveOverlaps; // this is generally a good thing
-	int32_t  m_topicSampleSize;     // sample about 5k per document
-	int32_t  m_topicMaxPunctLen;    // keep it set to 1 for speed
-
 	// SPELL CHECK
 	char  m_spellCheck;

@ -887,26 +872,15 @@ class CollectionRec {
 	// post query reranking
 	int32_t  m_pqr_docsToScan; // also for # docs for language
 	float m_pqr_demFactCountry; // demotion for foreign countries
-	float m_pqr_demFactQTTopicsInUrl; // demotion factor fewer for query terms or gigabits in the url
-	int32_t  m_pqr_maxValQTTopicsInUrl; // max value for fewer query terms or gigabits in the url
 	float m_pqr_demFactPaths; // demotion factor for more paths
 	int32_t  m_pqr_maxValPaths; // max value for more paths
-	float m_pqr_demFactCatidHasSupers; // demotion factor for catids with many super topics
-	int32_t  m_pqr_maxValCatidHasSupers; // max value for catids with many super topics
 	float m_pqr_demFactPageSize; // demotion factor for higher page sizes
 	int32_t  m_pqr_maxValPageSize; // max value for higher page sizes
-	float m_pqr_demFactLocTitle; // demotion factor for non-location specific queries with location specific results
-	float m_pqr_demFactLocSummary; // demotion factor for non-location specific queries with location specific results
-	bool  m_pqr_demInTopics; // true to demote if location is in the gigabits, otherwise these locs won't be demoted
 	int32_t  m_pqr_maxValLoc; // max value for non-location specific queries with location specific results
 	float m_pqr_demFactNonHtml; // demotion factor for non-html content type
 	float m_pqr_demFactXml; // demotion factor for xml content type
 	float m_pqr_demFactOthFromHost; // demotion factor for no other pages from same host
 	int32_t  m_pqr_maxValOthFromHost; // max value for no other pages from same host
-	float m_pqr_demFactDmozCatNmNoQT; // demotion factor for dmoz category names that don't contain a query term
-	int32_t  m_pqr_maxValDmozCatNmNoQT; // max value for dmoz category names that don't contain a query term
-	float m_pqr_demFactDmozCatNmNoGigabits; // demotion factor for dmoz category names that don't contain a gigabit
-	int32_t  m_pqr_maxValDmozCatNmNoGigabits; // max value for dmoz category names that don't contain a gigabit
 	float m_pqr_demFactDatedbDate; // demotion for datedb date
 	int32_t  m_pqr_minValDatedbDate; // dates earlier than this will be demoted to the max
 	int32_t  m_pqr_maxValDatedbDate; // dates later than this will not be demoted
--- a/Conf.h
+++ b/Conf.h
@ -147,7 +147,6 @@ class Conf {
 	// tagdb parameters
 	int32_t  m_tagdbMaxTreeMem;

-	int32_t  m_revdbMaxTreeMem;
 	int32_t  m_timedbMaxTreeMem;

 	// clusterdb for site clustering, each rec is 16 bytes
@ -173,6 +172,9 @@ class Conf {
 	int32_t  m_sendEmailTimeout;
 	int32_t  m_pingSpacer;

+	int64_t  m_msg40_msg39_timeout; //timeout for entire get-docid-list phase, in milliseconds.
+	int64_t  m_msg3a_msg39_network_overhead; //additional latency/overhead of sending reqeust+response over network.
+	
 	// the spiderdb holds url records for spidering, when to spider, etc..
 	int32_t  m_maxWriteThreads ;
 	int32_t  m_spiderMaxDiskThreads    ;
@ -184,7 +186,6 @@ class Conf {
 	bool m_useStatsdb;

 	bool  m_spideringEnabled     ;
-	bool  m_turkingEnabled     ;
 	bool  m_injectionsEnabled     ;
 	bool  m_queryingEnabled ;
 	bool  m_returnResultsAnyway;
@ -385,8 +386,6 @@ class Conf {
 	bool   m_detectMemLeaks;

 	// . if false we will not keep spelling information in memory
-	// . we will keep the popularity info from dict though, since related
-	//   topics requires that
 	bool   m_doSpellChecking;

 	// are we running in Matt Wells's private data center? if so we
@ -395,23 +394,6 @@ class Conf {

 	bool   m_forceIt;

-	// maximum number of synonyms/stems to expand a word into
-	//int32_t   m_maxSynonyms;
-
-	// default affinity for spelling suggestions/numbers
-	//float  m_defaultAffinity;
-
-	// threshold for synonym usage
-	//float  m_frequencyThreshold;
-
-	// thesaurus configuration
-	//int32_t   m_maxAffinityRequests;
-	//int32_t   m_maxAffinityErrors;
-	//int32_t   m_maxAffinityAge;
-	//int32_t   m_affinityTimeout;
-	//char   m_affinityServer[MAX_URL_LEN];
-	//char   m_affinityParms[MAX_URL_LEN];
-
 	// new syncing information
 	bool   m_syncEnabled;
 	bool   m_syncIndexdb;
@ -561,7 +543,6 @@ class Conf {
 	bool  m_logDebugThread  ;
 	bool  m_logDebugTimedb  ;
 	bool  m_logDebugTitle   ;
-	bool  m_logDebugTopics  ;
 	bool  m_logDebugTopDocs ;
 	bool  m_logDebugUdp     ;
 	bool  m_logDebugUnicode ;
@ -586,7 +567,6 @@ class Conf {
 	bool m_logTimingNet;
 	bool m_logTimingQuery;
 	bool m_logTimingSpcache;
-	bool m_logTimingTopics;
 	// programmer reminders.
 	bool m_logReminders;

@ -653,17 +633,6 @@ class Conf {
 	int32_t  m_maxHeartbeatDelay;
 	int32_t  m_maxCallbackDelay;

-	// balance value for Msg6, each host can have this many ready domains
-	// per global host
-	//int32_t m_distributedSpiderBalance;
-	//int32_t m_distributedIpWait;
-
-	// parameters for indexdb spitting and tfndb extension bits
-	//int32_t  m_indexdbSplit;
-	//char  m_fullSplit;
-	//char  m_legacyIndexdbSplit;
-	//int32_t  m_tfndbExtBits;
-
 	// used by Repair.cpp
 	char  m_repairingEnabled  ;
 	int32_t  m_maxRepairSpiders  ;
@ -673,23 +642,13 @@ class Conf {
 	char  m_fullRebuild       ;
 	char  m_rebuildAddOutlinks;
 	char  m_rebuildRecycleLinkInfo  ;
-	//char  m_rebuildRecycleLinkInfo2 ;
-	//char  m_removeBadPages    ;
 	char  m_rebuildTitledb    ;
-	//char  m_rebuildTfndb      ;
-	//char  m_rebuildIndexdb    ;
 	char  m_rebuildPosdb    ;
-	//char  m_rebuildNoSplits   ;
-	//char  m_rebuildDatedb     ;
 	char  m_rebuildClusterdb  ;
 	char  m_rebuildSpiderdb   ;
-	//char  m_rebuildSitedb     ;
 	char  m_rebuildLinkdb     ;
-	//char  m_rebuildTagdb      ;
-	//char  m_rebuildPlacedb    ;
 	char  m_rebuildTimedb     ;
 	char  m_rebuildSectiondb  ;
-	//char  m_rebuildRevdb      ;
 	char  m_rebuildRoots      ;
 	char  m_rebuildNonRoots   ;

--- a/Highlight.cpp
+++ b/Highlight.cpp
@ -71,7 +71,7 @@ int32_t Highlight::set( SafeBuf *sb, char *content, int32_t contentLen, Query *q
 	}

 	Phrases phrases;
-	if ( !phrases.set( &words, &bits, true, false, version, niceness ) ) {
+	if ( !phrases.set( &words, &bits, version, niceness ) ) {
 		return -1;
 	}

--- a/Hostdb.cpp
+++ b/Hostdb.cpp
@ -16,7 +16,6 @@
 #include "Clusterdb.h"
 #include "Datedb.h"
 #include "Dns.h"
-#include "Revdb.h"

 // a global class extern'd in .h file
 Hostdb g_hostdb;
@ -1760,7 +1759,7 @@ int64_t Hostdb::getNumGlobalEvents ( ) {
 	return n / m_numHostsPerShard;
 }

-bool Hostdb::setNote ( int32_t hostId, char *note, int32_t noteLen ) {
+bool Hostdb::setNote ( int32_t hostId, const char *note, int32_t noteLen ) {
 	// replace the note on the host
 	if ( noteLen > 125 ) noteLen = 125;
 	Host *h = getHost ( hostId );
@ -1773,7 +1772,7 @@ bool Hostdb::setNote ( int32_t hostId, char *note, int32_t noteLen ) {
 	return saveHostsConf();
 }

-bool Hostdb::setSpareNote ( int32_t spareId, char *note, int32_t noteLen ) {
+bool Hostdb::setSpareNote ( int32_t spareId, const char *note, int32_t noteLen ) {
 	// replace the note on the host
 	if ( noteLen > 125 ) noteLen = 125;
 	Host *h = getSpare ( spareId );
@ -2354,8 +2353,7 @@ int32_t getShardNumFromTermId ( int64_t termId ) {
 // . this allows us to have any # of groups in a stripe, not just power of 2
 // . now we can use 3 stripes of 96 hosts each so spiders will almost never
 //   go down
-//uint32_t Hostdb::getGroupId ( char rdbId,void *k,bool split ) {
-uint32_t Hostdb::getShardNum ( char rdbId, const void *k ) { // ,bool split ) {
+uint32_t Hostdb::getShardNum ( char rdbId, const void *k ) {

 	if ( (rdbId == RDB_POSDB || rdbId == RDB2_POSDB2) &&
 	     // split by termid and not docid?
@ -2372,10 +2370,6 @@ uint32_t Hostdb::getShardNum ( char rdbId, const void *k ) { // ,bool split ) {
 		uint64_t d = g_posdb.getDocId ( k );
 		return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
 	}
-	//if      ( rdbId == RDB_INDEXDB || rdbId == RDB2_INDEXDB2 ) {
-	//	uint64_t d = g_indexdb.getDocId ( (key_t *)k );
-	//	return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
-	//}
 	else if ( rdbId == RDB_DATEDB || rdbId == RDB2_DATEDB2 ) {
 		uint64_t d = g_datedb.getDocId ( k );
 		return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
@ -2383,10 +2377,6 @@ uint32_t Hostdb::getShardNum ( char rdbId, const void *k ) { // ,bool split ) {
 	else if ( rdbId == RDB_LINKDB || rdbId == RDB2_LINKDB2 ) {
 		return m_map [(*(uint16_t *)((char *)k + 26))>>3];	
 	}
-	//else if ( rdbId == RDB_TFNDB || rdbId == RDB2_TFNDB2 ) {
-	//	uint64_t d = g_tfndb.getDocId ( (key_t *)k );
-	//	return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
-	//}
 	else if ( rdbId == RDB_TITLEDB || rdbId == RDB2_TITLEDB2 ) {
 		uint64_t d = g_titledb.getDocId ( (key_t *)k );
 		return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
@ -2416,23 +2406,11 @@ uint32_t Hostdb::getShardNum ( char rdbId, const void *k ) { // ,bool split ) {
 		  rdbId == RDB2_TAGDB2 ) {
 		return m_map [(*(uint16_t *)((char *)k + 10))>>3];
 	}
-	else if ( rdbId == RDB_DOLEDB ) { // || rdbId == RDB2_DOLEDB2 ) {
+	else if ( rdbId == RDB_DOLEDB ) {
 		// HACK:!!!!!!  this is a trick!!! it is us!!!
 		//return g_hostdb.m_myHost->m_groupId;
 		return g_hostdb.m_myHost->m_shardNum;
 	}
-	else if ( rdbId == RDB_SECTIONDB || rdbId == RDB2_SECTIONDB2 ) {
-		// use top 13 bits of key
-		return m_map [(*(uint16_t *)((char *)k + 14))>>3];
-		//uint64_t d = g_datedb.getDocId ( k );
-		//return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
-	}
-	else if ( rdbId == RDB_REVDB || rdbId == RDB2_REVDB2 ) {
-		// key is formed like title key is
-		//int64_t d = g_titledb.getDocId ( (key_t *)k );
-		uint64_t d = g_revdb.getDocId( (key_t *)k );
-		return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
-	}

 	// core -- must be provided
 	char *xx = NULL; *xx = 0;
--- a/Hostdb.h
+++ b/Hostdb.h
@ -593,8 +593,8 @@ class Hostdb {


 	// sets the note for a host
-	bool setNote ( int32_t hostId, char *note, int32_t noteLen );
-	bool setSpareNote ( int32_t spareId, char *note, int32_t noteLen );
+	bool setNote ( int32_t hostId, const char *note, int32_t noteLen );
+	bool setSpareNote ( int32_t spareId, const char *note, int32_t noteLen );
 	
 	// replace a host with a spare
 	bool replaceHost ( int32_t origHostId, int32_t spareHostId );
--- a/HttpServer.cpp
+++ b/HttpServer.cpp
@ -10,6 +10,7 @@
 #include "Proxy.h"
 #include "PageCrawlBot.h"
 #include "Parms.h"
+#include "PageRoot.h"
 #ifdef _VALGRIND_
 #include <valgrind/memcheck.h>
 #endif
@ -471,76 +472,6 @@ void HttpServer::requestHandler ( TcpSocket *s ) {
 	// parse the http request
 	HttpRequest r;

-	// debug
-	/*
-	unsigned char foo[1024];
-	unsigned char *pp = foo;
-	pp += sprintf ( (char *)pp,"GET /search?qcs=iso-8859-1&k0c=107207&code=1M9VNT6&spell=1&ns=2&nrt=0&rat=0&sc=1&DR=1&qh=0&bq2&q=");
-	//pp += sprintf ( (char *)pp,"GET /search?k0c=107207&code=1M9VNT6&spell=1&ns=2&nrt=0&rat=0&sc=1&DR=1&qh=0&bq2&q=");
-
-	static char ddd[] = {
-		0xc3, 0x83, 0xc6, 0x92, 0xc3, 0xa2, 0xe2, 0x80, 0x9e, 0xc2, 
-		0xa2, 0xc3, 0x83, 0xc2, 0xa2, 0xc3, 0xa2, 0xe2, 0x80, 0x9a, 
-		0xc2, 0xac, 0xc3, 0x82, 0xc2, 0xa6, 0xc3, 0x83, 0xc6, 0x92, 
-		0xc3, 0xa2, 0xe2, 0x80, 0x9e, 0xc2, 0xa2, 0xc3, 0x83, 0xe2, 
-		0x80, 0x9a, 0xc3, 0x82, 0xc2, 0x81, 0xc3, 0x83, 0xc6, 0x92, 
-		0xc3, 0xa2, 0xe2, 0x80, 0x9e, 0xc2, 0xa2, 0xc3, 0x83, 0xc2, 
-		
-		0xa2, 0xc3, 0xa2, 0xe2, 0x80, 0x9a, 0xc2, 0xac, 0xc3, 0x82, 
-		0xc2, 0xa1, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0xa2, 0xe2, 0x80, 
-		0x9e, 0xc2, 0xa2, 0xc3, 0x83, 0xe2, 0x80, 0xb9, 0xc3, 0xa2, 
-		0xe2, 0x82, 0xac, 0xc2, 0xa0, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 
-		0xa2, 0xe2, 0x80, 0x9e, 0xc2, 0xa2, 0xc3, 0x83, 0xc2, 0xa2, 
-		0xc3, 0xa2, 0xe2, 0x80, 0x9a, 0xc2, 0xac, 0xc3, 0x82, 0xc2, 
-		0xa6, 0x20, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0x8b, 0xc5, 0x93, 
-		0xc3, 0x83, 0xe2, 0x80, 0x9a, 0xc3, 0x82, 0xc2, 0xa7, 0xc3, 
-		0x83, 0xc6, 0x92, 0xc3, 0xa2, 0xe2, 0x80, 0x9e, 0xc2, 0xa2, 
-		0xc3, 0x83, 0xc2, 0xa2, 0xc3, 0xa2, 0xe2, 0x80, 0x9a, 0xc2, 
-		0xac, 0xc3, 0x85, 0xc2, 0xbe, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 
-		0xa2, 0xe2, 0x80, 0x9e, 0xc2, 0xa2, 0xc3, 0x83, 0xc2, 0xa2, 
-		0xc3, 0xa2, 0xe2, 0x80, 0x9a, 0xc2, 0xac, 0xc3, 0x82, 0xc2, 
-		0xa6, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0xa2, 0xe2, 0x80, 0x9e, 
-		0xc2, 0xa2, 0xc3, 0x83, 0xc2, 0xa2, 0xc3, 0xa2, 0xe2, 0x80, 
-		0x9a, 0xc2, 0xac, 0xc3, 0x82, 0xc2, 0xa0, 0xc3, 0x83, 0xc6, 
-		0x92, 0xc3, 0x8b, 0xc5, 0x93, 0xc3, 0x83, 0xe2, 0x80, 0x9a, 
-		0xc3, 0x82, 0xc2, 0xb8, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0xa2, 
-		0xe2, 0x80, 0x9e, 0xc2, 0xa2, 0xc3, 0x83, 0xe2, 0x80, 0xb9, 
-		0xc3, 0xa2, 0xe2, 0x82, 0xac, 0xc2, 0xa0, 0xc3, 0x83, 0xc6, 
-		0x92, 0xc3, 0xa2, 0xe2, 0x80, 0x9e, 0xc2, 0xa2, 0xc3, 0x83, 
-		0xc2, 0xa2, 0xc3, 0xa2, 0xe2, 0x80, 0x9a, 0xc2, 0xac, 0xc3, 
-		0x82, 0xc2, 0xa6, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0x8b, 0xc5, 
-		0x93, 0xc3, 0x83, 0xe2, 0x80, 0x9a, 0xc3, 0x82, 0xc2, 0xa9, 
-		0x20, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0x8b, 0xc5, 0x93, 0xc3, 
-		0x83, 0xe2, 0x80, 0x9a, 0xc3, 0x82, 0xc2, 0xa7, 0xc3, 0x83, 
-		0xc6, 0x92, 0xc3, 0xa2, 0xe2, 0x80, 0x9e, 0xc2, 0xa2, 0xc3, 
-		0x83, 0xc2, 0xa2, 0xc3, 0xa2, 0xe2, 0x80, 0x9a, 0xc2, 0xac, 
-		0xc3, 0x85, 0xc2, 0xbe, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0x8b, 
-		0xc5, 0x93, 0xc3, 0x83, 0xe2, 0x80, 0x9a, 0xc3, 0x82, 0xc2, 
-		0xa8, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0xa2, 0xe2, 0x80, 0x9e, 
-		0xc2, 0xa2, 0xc3, 0x83, 0xe2, 0x80, 0xa6, 0xc3, 0x82, 0xc2, 
-		0xa0, 0xc3, 0x83, 0xc6, 0x92, 0xc3, 0x8b, 0xc5, 0x93, 0xc3, 
-		0x83, 0xe2, 0x80, 0x9a, 0xc3, 0x82, 0xc2, 0xa6, 0xc3, 0x83, 
-		0xc6, 0x92, 0xc3, 0xa2, 0xe2, 0x80, 0x9e, 0xc2, 0xa2, 0xc3, 
-		0x83, 0xe2, 0x80, 0xa6, 0xc3, 0x82, 0xc2, 0xa0, 0xc3, 0x83, 
-		0xc6, 0x92, 0xc3, 0x8b, 0xc5, 0x93, 0xc3, 0x83, 0xe2, 0x80, 
-		0x9a, 0xc3, 0x82, 0xc2, 0xa9, 0x00, 0x00, 0xda, 0xda, 0xda, 
-		0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 
-		0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 
-		0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0xda, 0x74, 
-		0x65, 0x73, 0x2c, 0x20, 0x68, 0x59, 0x00, 0x00, 0x00, 0xac, 
-		0xed, 0x3b, 0x09, 0xac, 0xed, 0x3b, 0x09, 0x78, 0x51, 0xa7, 
-		0x24, 0xf8, 0xd0, 0xa7, 0x24, 0x00, 0x00, 0x00, 0x00, 0x0a, 
-		0x00};
-
-	for ( int32_t i = 0 ; i < 435 ; i++ ) {
-		//	again:
-		*pp = ddd[i]; // rand() % 256;
-		//if ( *pp < 0x80 ) goto again;
-		pp++;
-	}
-	*pp = 0;
-	*/
-
 	// . since we own the data, we'll free readBuf on r's destruction
 	// . this returns false and sets g_errno on error
 	// . but it should still set m_request to the readBuf to delete it
@ -2592,9 +2523,6 @@ TcpSocket *HttpServer::unzipReply(TcpSocket* s) {
 }


-bool printFrontPageShell ( SafeBuf *sb , char *tabName , CollectionRec *cr ,
-			   bool printGigablast );
-
 bool sendPagePretty ( TcpSocket *s , 
 		      HttpRequest *r , 
 		      char *filename ,
--- a/Images.cpp
+++ b/Images.cpp
@ -108,7 +108,7 @@ void Images::setCandidates ( Url *pageUrl , Words *words , Xml *xml ,
 	// the positive scored window
 	int32_t firstPosScore = -1;
 	int32_t lastPosScore  = -1;
-	int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_MARQUEE;
+	int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT;
 	// find positive scoring window
 	for ( int32_t i = 0 ; i < nw ; i++ ) {
 		// skip if in bad section
--- a/Linkdb.cpp
+++ b/Linkdb.cpp
@ -356,7 +356,6 @@ key224_t Linkdb::makeKey_uk ( uint32_t  linkeeSiteHash32       ,
 /////////

 #include "Collectiondb.h"
-//#include "CollectionRec.h"
 #include "matches2.h"

 // 1MB read size for now
@ -364,12 +363,8 @@ key224_t Linkdb::makeKey_uk ( uint32_t  linkeeSiteHash32       ,

 #define MAX_INTERNAL_INLINKS 10 

-//static void gotRootTitleRecWrapper25 ( void *state ) ;
-//static void gotTermFreqWrapper       ( void *state ) ;
 static void gotListWrapper           ( void *state ,RdbList *list,Msg5 *msg5);
 static bool gotLinkTextWrapper       ( void *state );
-//static void sendLinkInfoReplyWrapper ( void *state );//, LinkInfo *info ) ;
-//static void gotReplyWrapper25        ( void *state , void *state2 ) ;

 Msg25::Msg25() {
 	m_numRequests = 0;
@ -391,12 +386,6 @@ void Msg25::reset() {
 		mfree ( m_replyPtrs[i], m_replySizes[i], "msg25r");
 	// reset array count to 0
 	m_numReplyPtrs = 0;
-	// . free the linkinfo if we are responsible for it
-	// . if someone "steals" it from us, they should set this to NULL
-	//if ( m_linkInfo ) 
-	//	mfree ( m_linkInfo , m_linkInfo->getStoredSize(),"msg25s");
-	// this now points into m_linkInfoBuf safebuf, just NULL it
-	//m_linkInfo = NULL;

 	m_table.reset();
 	m_ipTable.reset();
@ -3359,7 +3348,6 @@ void Inlink::set ( Msg20Reply *r ) {
 		r->size_surroundingText +
 		r->size_rssItem +
 		r->size_categories +
-		r->size_gigabitQuery +
 		r->size_templateVector;

 	char *pend = p + need;
@ -3372,7 +3360,7 @@ void Inlink::set ( Msg20Reply *r ) {
 	size_surroundingText  = r->size_surroundingText;
 	size_rssItem          = r->size_rssItem;
 	size_categories       = r->size_categories;
-	size_gigabitQuery     = r->size_gigabitQuery;
+	size_gigabitQuery     = 0;
 	size_templateVector   = r->size_templateVector;


@ -3432,13 +3420,8 @@ void Inlink::set ( Msg20Reply *r ) {
 	/////////////
 	
 	off_gigabitQuery = poff;
-	if ( p + r->size_gigabitQuery < pend ) {
-		gbmemcpy ( p , r->ptr_gigabitQuery , size_gigabitQuery );
-	}
-	else {
-		size_gigabitQuery = 1;
-		*p = '\0';
-	}
+	size_gigabitQuery = 1;
+	*p = '\0';
 	poff += size_gigabitQuery;
 	p    += size_gigabitQuery;

@ -3468,37 +3451,27 @@ void Inlink::setMsg20Reply ( Msg20Reply *r ) {
 	r->m_firstSpidered       = m_firstSpidered;
 	
 	r->m_lastSpidered        = m_lastSpidered;
-	//r->m_nextSpiderTime      = m_nextSpiderDate;
 	r->m_datedbDate          = m_datedbDate;
 	r->m_firstIndexedDate    = m_firstIndexedDate;
 	r->m_numOutlinks         = m_numOutlinks;
-	//r->m_linkTextBaseScore   = m_baseScore;
-	//r->m_pagePop             = m_pagePop;
-	//r->m_sitePop             = m_sitePop;
-	//r->m_siteNumInlinks      = m_siteNumInlinks;
 	
 	r->m_isPermalink         = m_isPermalink;
 	r->m_outlinkInContent    = m_outlinkInContent;
 	r->m_outlinkInComment    = m_outlinkInComment;
 	
 	r->m_isLinkSpam          = m_isLinkSpam;
-	//r->m_isAnomaly           = m_isAnomaly;
 	r->m_hasAllQueryTerms    = m_hasAllQueryTerms;
 	
 	r->m_country             = m_country;
 	r->m_language            = m_language;
-	//r->m_docQuality        = m_docQuality;
 	r->m_siteRank            = m_siteRank;
-	//r->m_ruleset             = m_ruleset;
 	r->m_hopcount            = m_hopcount;
-	//r->m_linkTextScoreWeight = m_linkTextScoreWeight;
 	
 	r->ptr_ubuf              = getUrl();//ptr_urlBuf;
 	r->ptr_linkText          = getLinkText();//ptr_linkText;
 	r->ptr_surroundingText   = getSurroundingText();//ptr_surroundingText;
 	r->ptr_rssItem           = getRSSItem();//ptr_rssItem;
 	r->ptr_categories        = getCategories();//ptr_categories;
-	r->ptr_gigabitQuery      = getGigabitQuery();//ptr_gigabitQuery;
 	r->ptr_templateVector    = getTemplateVector();//ptr_templateVector;
 	
 	r->size_ubuf             = size_urlBuf;
@ -3506,7 +3479,6 @@ void Inlink::setMsg20Reply ( Msg20Reply *r ) {
 	r->size_surroundingText  = size_surroundingText;
 	r->size_rssItem          = size_rssItem;
 	r->size_categories       = size_categories;
-	r->size_gigabitQuery     = size_gigabitQuery;
 	r->size_templateVector   = size_templateVector;
 }

@ -3583,7 +3555,7 @@ bool LinkInfo::print ( SafeBuf *sb , char *coll ) {
 		int32_t  dlen = k->size_surroundingText - 1;
 		char *r    = k->getRSSItem();//ptr_rssItem;
 		int32_t  rlen = k->size_rssItem - 1;
-		char *g    = k->getGigabitQuery();//ptr_gigabitQuery;
+		char *g    = k->getGigabitQuery();
 		int32_t  glen = k->size_gigabitQuery - 1;
 		char *c    = k->getCategories();//ptr_categories;
 		int32_t  clen = k->size_categories - 1;
@ -4068,12 +4040,6 @@ bool Links::addLink ( char *link , int32_t linkLen , int32_t nodeNum ,

 	// don't add 0 length links
 	if ( linkLen <= 0 ) return true;
-	// ensure buf has enough room
-// 	if (titleRecVersion < 72){
-// 		if ( m_bufPtr-m_buf + linkLen + 1 > LINK_BUF_SIZE ){
-// 			return true;
-// 		}
-// 	}

 	// do we need to alloc more link space?
 	if (m_numLinks >= m_allocLinks) {
@ -4250,8 +4216,6 @@ bool Links::addLink ( char *link , int32_t linkLen , int32_t nodeNum ,
 	else              bufSpace = 0;
 	// allocate dynamic buffer for lotsa links
 	if ( url.getUrlLen() + 1 > bufSpace ) {
-		//if (titleRecVersion < 72 && m_allocSize >= LINK_BUF_SIZE)
-		//	return true;
 		// grow by 100K
 		int32_t newAllocSize;// = m_allocSize+LINK_BUF_SIZE;
 		if ( ! m_allocSize ) newAllocSize = LINK_BUF_SIZE;
--- a/Log.cpp
+++ b/Log.cpp
@ -165,7 +165,6 @@ bool Log::shouldLog ( int32_t type , const char *msg ) {
 		if ( msg[0] == 'n' ) return g_conf.m_logTimingNet;
 		if ( msg[0] == 'q' ) return g_conf.m_logTimingQuery;
 		if ( msg[0] == 's' ) return g_conf.m_logTimingSpcache;
-		if ( msg[0] == 't' ) return g_conf.m_logTimingTopics;
 		return false;
 	}
 	if ( type != LOG_DEBUG ) return true;
@ -205,8 +204,6 @@ bool Log::shouldLog ( int32_t type , const char *msg ) {
 	if (msg[0]=='u'&&msg[1]=='n' ) return g_conf.m_logDebugUnicode;
 	if (msg[0]=='t'&&msg[1]=='o'&&msg[3]=='D' ) 
 		return g_conf.m_logDebugTopDocs;
-	if (msg[0]=='t'&&msg[1]=='o'&&msg[3]!='D' ) 
-		return g_conf.m_logDebugTopics;
 	if (msg[0]=='d'&&msg[1]=='a' ) return g_conf.m_logDebugDate;
 	if (msg[0]=='d'&&msg[1]=='d' ) return g_conf.m_logDebugDetailed;
 		
--- a/Log.h
+++ b/Log.h
@ -69,7 +69,6 @@
 // spcache related to determining what urls to spider next
 // speller query spell checking
 // thread  calling threads
-// topics  related topics
 // udp     udp networking

 // example log:
@ -87,11 +86,6 @@
 #define MAX_LOG_MSGS  1024 // in memory

 // this is for printing out how a page is parsed by PageParser.cpp
-/* extern char *g_pbuf     ; */
-/* extern char *g_pbufPtr  ; */
-/* extern char *g_pterms   ; */
-/* extern char *g_ptermPtr ; */
-/* extern char *g_pend; */
 extern char *g_dbuf;
 extern int32_t  g_dbufSize;

--- a/6
+++ b/6
@ -35,7 +35,7 @@ OBJS =  UdpSlot.o Rebalance.o \
 	Msg1.o \
 	Msg0.o Mem.o Matches.o Loop.o \
 	Log.o Lang.o \
-	Indexdb.o Posdb.o Clusterdb.o IndexList.o Revdb.o \
+	Indexdb.o Posdb.o Clusterdb.o IndexList.o \
 	HttpServer.o HttpRequest.o \
 	HttpMime.o Hostdb.o \
 	Highlight.o File.o Errno.o Entities.o \
@ -75,6 +75,7 @@ CPPFLAGS = -g -Wall -fno-stack-protector -DPTHREADS -Wstrict-aliasing=0

 ifeq ($(CXX), g++)
 CPPFLAGS += -Wno-write-strings -Wno-uninitialized -Wno-unused-but-set-variable
+CPPFLAGS += -Wno-invalid-offsetof
 else ifeq ($(CXX), clang++)
 CPPFLAGS += -Weverything -Wno-cast-align -Wno-reserved-id-macro -Wno-padded -Wno-c++11-long-long -Wno-tautological-undefined-compare -Wno-c++11-compat-reserved-user-defined-literal -Wno-zero-length-array -Wno-float-equal -Wno-c99-extensions -Wno-weak-vtables -Wno-global-constructors -Wno-exit-time-destructors
 CPPFLAGS += -Wno-shadow -Wno-conversion -Wno-extra-semi -Wno-sign-conversion -Wno-old-style-cast -Wno-shorten-64-to-32 -Wno-unused-parameter -Wno-missing-prototypes -Wno-c++11-compat-deprecated-writable-strings
@ -114,6 +115,9 @@ GIT_VERSION=$(shell git rev-parse HEAD)$(DIRTY)

 all: gb

+debug: DEFS += -D_VALGRIND_
+debug: all
+
 utils: blaster2 hashtest monitor seektest urlinfo treetest dnstest gbtitletest

 # third party libraries
--- a/Matches.cpp
+++ b/Matches.cpp
@ -508,7 +508,6 @@ bool Matches::addMatches(Words *words, Phrases *phrases, Sections *sections, Bit

 	if ( getNumXmlNodes() > 512 ) { char *xx=NULL;*xx=0; }

-	// google seems to index SEC_MARQUEE so i took that out of here
 	int32_t badFlags =SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_IN_TITLE;

 	int32_t qwn;
@ -627,10 +626,7 @@ bool Matches::addMatches(Words *words, Phrases *phrases, Sections *sections, Bit

 				// this is 0 if we were an unmatched quote
 				if ( numWords <= 0 ) continue;
-				// we matched a bigram in the document
-				//numWords = 3;
-				// i guess we matched the query phrase bigram
-				//numQWords = 3;
+
 				// got a match
 				goto gotMatch2;
 			}
@ -641,7 +637,6 @@ bool Matches::addMatches(Words *words, Phrases *phrases, Sections *sections, Bit
 			numWords = 1;
 			numQWords = 1;
 			goto gotMatch2;
-			//char *xx=NULL;*xx=0;
 		}

 		//
--- a/Msg0.cpp
+++ b/Msg0.cpp
@ -1164,105 +1164,6 @@ void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx )
 		log(LOG_LOGIC,"net: msg0: Sending more data than what was "
 		    "requested. Ineffcient. Bad engineer. dataSize=%"INT32" "
 		    "minRecSizes=%"INT32".",dataSize,oldSize);
-	/*
-	// always compress these lists
-	if ( st0->m_rdbId == RDB_SECTIONDB ) { // && 1 == 3) {
-
-		// get sh48, the sitehash
-		key128_t *startKey = (key128_t *)msg5->m_startKey ;
-		int64_t sh48 = g_datedb.getTermId(startKey);
-
-		// debug
-		//log("msg0: got sectiondblist from disk listsize=%"INT32"",
-		//    list->getListSize());
-
-		if ( dataSize > 50000 )
-			log("msg0: sending back list rdb=%"INT32" "
-			    "listsize=%"INT32" sh48=0x%"XINT64"",
-			    (int32_t)st0->m_rdbId,
-			    dataSize,
-			    sh48);
-
-		// save it
-		int32_t origDataSize = dataSize;
-		// store compressed list on itself
-		char *dst = list->m_list;
-		// warn if niceness is 0!
-		if ( st0->m_niceness == 0 )
-			log("msg0: compressing sectiondb list at niceness 0!");
-		// compress the list
-		uint32_t lastVoteHash32 = 0LL;
-		SectionVote *lastVote = NULL;
-		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
-			// breathe
-			QUICKPOLL ( st0->m_niceness );
-			// get rec
-			char *rec = list->getCurrentRec();
-			// for ehre
-			key128_t *key = (key128_t *)rec;
-			// the score is the bit which is was set in 
-			// Section::m_flags for that docid
-			int32_t secType = g_indexdb.getScore ( (char *)key );
-			// 0 means it probably used to count # of voters
-			// from this site, so i don't think xmldoc uses
-			// that any more
-			if ( secType == SV_SITE_VOTER ) continue;
-			// treat key like a datedb key and get the taghash
-			uint32_t h32 = g_datedb.getDate ( key );
-			// get data/vote from the current record in the 
-			// sectiondb list
-			SectionVote *sv=(SectionVote *)list->getCurrentData ();
-			// get the average score for this doc
-			float avg = sv->m_score ;
-			if ( sv->m_numSampled > 0.0 ) avg /= sv->m_numSampled;
-			// if same as last guy, add to it
-			if ( lastVoteHash32 == h32 && lastVote ) {
-				// turn possible multi-vote into single docid
-				// into a single vote, with the score averaged.
-				lastVote->m_score += avg;
-				lastVote->m_numSampled++;
-				continue;
-			}
-			// otherwise, add in a new guy!
-			*(key128_t *)dst = *key;
-			dst += sizeof(key128_t);
-			// the new vote
-			SectionVote *dsv = (SectionVote *)dst;
-			dsv->m_score = avg;
-			dsv->m_numSampled = 1;
-			// set this
-			lastVote = dsv;
-			lastVoteHash32 = h32;
-			// skip over
-			dst += sizeof(SectionVote);
-		}
-		// update the list size now for sending back
-		dataSize = dst - data;
-		// if the list was over the requested minrecsizes we need
-		// to set a flag so that the caller will do a re-call.
-		// so making the entire odd, will be the flag.
-	        if ( origDataSize > msg5->m_minRecSizes && 
-		     dataSize < origDataSize ) {
-			*dst++ = '\0';
-			dataSize++;
-		}
-
-		// debug
-		//log("msg0: compressed sectiondblist from disk "
-		//    "newlistsize=%"INT32"", dataSize);
-		
-		// use this timestamp
-		int32_t now = getTimeLocal();//Global();
-		// finally, cache this sucker
-		s_sectiondbCache.addRecord ( msg5->m_coll,
-					     (char *)startKey,//(char *)&sh48
-					     data, 
-					     dataSize ,
-					     now );
-		// ignore errors
-		g_errno = 0;
-	}
-	*/
 		    
 	//
 	// for linkdb lists, remove all the keys that have the same IP32
--- a/Msg2.cpp
+++ b/Msg2.cpp
@ -446,10 +446,7 @@ bool Msg2::gotList ( RdbList *list ) {
 		if ( m_lists[i].m_listSize < m_minRecSizes[i] ) continue;
 		if ( m_minRecSizes[i] == 0 ) continue;
 		if ( m_minRecSizes[i] == -1 ) continue;
-		// do not print this if compiling section xpathsitehash stats
-		// because we only need like 10k of list to get a decent
-		// reading
-		if ( m_req->m_forSectionStats ) break;
+
 		log("msg2: read termlist #%"INT32" size=%"INT32" "
 		    "maxSize=%"INT32". losing docIds!",
 		    i,m_lists[i].m_listSize,m_minRecSizes[i]);
--- a/Msg20.cpp
+++ b/Msg20.cpp
@ -691,8 +691,6 @@ int32_t Msg20Reply::serialize ( char *buf , int32_t bufSize ) {
 		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_displaySum,size_displaySum);
 	if(ptr_dbuf)
 		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_dbuf,size_dbuf);
-	if(ptr_gigabitSample)
-		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_gigabitSample,size_gigabitSample);
 	if(ptr_mbuf)
 		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_mbuf,size_mbuf);
 	if(ptr_vbuf)
@ -723,12 +721,8 @@ int32_t Msg20Reply::serialize ( char *buf , int32_t bufSize ) {
 		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_rssItem,size_rssItem);
 	if(ptr_categories)
 		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_categories,size_categories);
-	if(ptr_gigabitQuery)
-		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_gigabitQuery,size_gigabitQuery);
 	if(ptr_content)
 		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_content,size_content);
-	if(ptr_sectionVotingInfo)
-		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_sectionVotingInfo,size_sectionVotingInfo);
 	if(ptr_tr)
 		VALGRIND_CHECK_MEM_IS_DEFINED(ptr_tr,size_tr);
 	if(ptr_tlistBuf)
--- a/Msg20.h
+++ b/Msg20.h
@ -58,8 +58,6 @@ class Msg20Request {
 	int32_t       m_summaryMaxLen             ;
 	int32_t       m_summaryMaxNumCharsPerLine ;
 	int32_t       m_maxNumCharsPerLine        ;
-	int32_t       m_bigSampleRadius           ;
-	int32_t       m_bigSampleMaxLen           ;
 	int32_t       m_maxCacheAge               ;
 	int32_t       m_discoveryDate             ;

@ -83,12 +81,10 @@ class Msg20Request {
 	unsigned char       m_getSummaryVector          :1;
 	unsigned char       m_showBanned                :1;
 	unsigned char       m_includeCachedCopy         :1;
-	unsigned char       m_getSectionVotingInfo      :1; // in JSON for now
 	unsigned char       m_getMatches                :1;
 	unsigned char       m_getTermListBuf            :1;
 	unsigned char       m_getOutlinks               :1;
 	unsigned char       m_getTitleRec               :1; // sets ptr_tr in reply
-	unsigned char       m_getGigabitVector          :1;
 	unsigned char       m_doLinkSpamCheck           :1;
 	unsigned char       m_isLinkSpam                :1; // Msg25 uses for storage
 	unsigned char       m_isSiteLinkInfo            :1; // site link info?
@ -237,7 +233,6 @@ public:
 	char       *ptr_rubuf                ; // redirect url buffer
 	char       *ptr_displaySum           ; // summary for displaying
 	char       *ptr_dbuf                 ; // display metas \0 separated
-	char       *ptr_gigabitSample        ;
 	char       *ptr_mbuf                 ; // match offsets
 	char       *ptr_vbuf                 ; // summary vector
 	char       *ptr_imgData              ; // for encoded images
@ -263,9 +258,7 @@ public:
 	char       *ptr_linkUrl              ; // what we link to
 	char       *ptr_rssItem              ; // set for m_getLinkText
 	char       *ptr_categories           ;
-	char       *ptr_gigabitQuery         ; // , separated list of gigabits
 	char       *ptr_content              ; // page content in utf8
-	char       *ptr_sectionVotingInfo    ; // in JSON
 	char       *ptr_tr                   ; // like just using msg22
 	char       *ptr_tlistBuf             ;
 	char       *ptr_tiBuf                ; // terminfobuf
@ -285,7 +278,6 @@ public:
 	int32_t       size_rubuf                ;
 	int32_t       size_displaySum           ;
 	int32_t       size_dbuf                 ;
-	int32_t       size_gigabitSample        ; // includes \0
 	int32_t       size_mbuf                 ;
 	int32_t       size_vbuf                 ;
 	int32_t       size_imgData              ;
@ -304,9 +296,7 @@ public:
 	int32_t       size_linkUrl              ;
 	int32_t       size_rssItem              ;
 	int32_t       size_categories           ;
-	int32_t       size_gigabitQuery         ;
 	int32_t       size_content              ; // page content in utf8
-	int32_t       size_sectionVotingInfo    ; // in json, includes \0
 	int32_t       size_tr                   ;
 	int32_t       size_tlistBuf             ;
 	int32_t       size_tiBuf                ;
--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -327,17 +327,15 @@ bool Msg39::controlLoop ( ) {
 		// fix it
 		m_r->m_minDocId = d0;
 		m_r->m_maxDocId = d1; // -1; // exclude d1
-		// allow posdbtable re-initialization each time to set
-		// the msg2 termlist ptrs anew, otherwise we core in
-		// call to PosdbTable::init() below
-		//m_posdbTable.m_initialized = false;
+
 		// reset ourselves, partially, anyway, not tmpq etc.
 		reset2();
+
 		// debug log
-		if ( ! m_r->m_forSectionStats && m_debug )
+		if ( m_debug ) {
 			log("msg39: docid split %d/%d range %"INT64"-%"INT64"", m_docIdSplitNumber-1, m_r->m_numDocIdSplits, d0,d1);
-		// wtf?
-		//if ( d0 >= d1 ) break;
+		}
+
 		// load termlists for these docid ranges using msg2 from posdb
 		if ( ! getLists() ) return false;
 	}
@ -1068,7 +1066,7 @@ void Msg39::estimateHitsAndSendReply ( ) {
 			mr.size_pairScoreBuf   = 0;
 			mr.size_singleScoreBuf = 0;
 		}
-		//mr.m_sectionStats    = pt->m_sectionStats;
+
 		// reserve space for these guys, we fill them in below
 		mr.ptr_docIds       = NULL;
 		mr.ptr_scores       = NULL;
--- a/Msg39.h
+++ b/Msg39.h
@ -53,14 +53,11 @@ class Msg39Request {
 		ptr_readSizes             = NULL;
 		ptr_query                 = NULL; // in utf8?
 		ptr_whiteList             = NULL;
-		//ptr_coll                  = NULL;
-		m_forSectionStats         = false;
 		size_readSizes            = 0;
 		size_query                = 0;
 		size_whiteList            = 0;
 		m_sameLangWeight          = 20.0;
 		m_maxFacets = -1;
-		//size_coll                 = 0;

 		m_getDocIdScoringInfo = 1;

@ -115,11 +112,6 @@ class Msg39Request {
 	char    m_useQueryStopWords;
 	char    m_doMaxScoreAlgo;

-	char    m_forSectionStats;
-
-	// Msg3a still uses this
-	//int32_t    m_myFacetVal32; // for gbfacet:xpathsite really sectionstats
-
 	collnum_t m_collnum;

 	int64_t m_minDocId;
--- a/Msg3a.cpp
+++ b/Msg3a.cpp
@ -131,13 +131,6 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 		log(LOG_LOGIC,"net: bad collection. msg3a. %"INT32"",
 		    (int32_t)m_r->m_collnum);

-	//m_indexdbSplit = g_hostdb.m_indexSplits;
-	// certain query term, like, gbdom:xyz.com, are NOT split
-	// at all in order to keep performance high because such
-	// terms are looked up by the spider. if a query contains
-	// multiple "no split" terms, then it becomes split unfortunately...
-	//if ( ! m_q->isSplit() ) m_indexdbSplit = 1;
-
 	// for a sanity check in Msg39.cpp
 	r->m_nqt = m_q->getNumTerms();

@ -154,10 +147,7 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	// . return now if query empty, no docids, or none wanted...
 	// . if query terms = 0, might have been "x AND NOT x"
 	if ( m_q->getNumTerms() <= 0 ) return true;
-	// sometimes we want to get section stats from the hacked
-	// sectionhash: posdb termlists
-	//if ( m_docsToGet <= 0 && ! m_r->m_getSectionStats )
-	//	return true;
+
 	// . set g_errno if not found and return true
 	// . coll is null terminated
 	CollectionRec *cr = g_collectiondb.getRec(r->m_collnum);
@ -234,24 +224,17 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	// update our read info
 	for ( int32_t j = 0; j < n ; j++ ) {
 		// the read size for THIS query term
-		int32_t rs = 300000000; // toRead; 300MB i guess...
-		// limit to 50MB man! this was 30MB but the
-		// 'time enough for love' query was hitting 30MB termlists.
-		//rs = 50000000;
-		rs = DEFAULT_POSDB_READSIZE;//90000000; // 90MB!
-		// it is better to go oom then leave users scratching their
-		// heads as to why some results are not being returned.
-		// no, because we are going out of mem for queries like
-		// 'www.disney.nl' etc.
-		//rs = -1;
-		// if section stats, limit to 1MB
-		//if ( m_r->m_getSectionStats ) rs = 1000000;
+		int32_t rs = DEFAULT_POSDB_READSIZE;//90000000; // 90MB!
+
 		// get the jth query term
 		QueryTerm *qt = &m_q->m_qterms[j];
+
 		// if query term is ignored, skip it
 		if ( qt->m_ignored ) rs = 0;
+
 		// set it
 		readSizes[j] = rs;
+
 		// serialize these too
 		tfw[j] = qt->m_termFreqWeight;
 	}
@ -265,8 +248,7 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	// Query::expandQuery() above
 	m_r->ptr_query  = m_q->m_orig;
 	m_r->size_query = m_q->m_origLen+1;
-	// the white list now too...
-	//m_r->ptr_whiteList = si->m_whiteListBuf.getBufStart();
+
 	// free us?
 	if ( m_rbufPtr && m_rbufPtr != m_rbuf ) {
 		mfree ( m_rbufPtr , m_rbufSize, "Msg3a" );
@ -314,7 +296,7 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	// high because it is a spider time thing.
 	if ( m_r->m_timeout > 0 ) {
 		timeout = m_r->m_timeout;
-		timeout += 250; //add 250ms for general overhead
+		timeout += g_conf.m_msg3a_msg39_network_overhead;
 	}
 	if ( timeout > multicast_msg3a_maximum_timeout )
 		timeout = multicast_msg3a_maximum_timeout;
@ -774,64 +756,6 @@ bool Msg3a::mergeLists ( ) {
 	//m_totalDocCount = 0; // int32_t docCount = 0;
 	m_moreDocIdsAvail = true;

-	/*
-
-	  this version is too simple. now each query term can be a
-	  gbfacet:price or gbfacet:type term and each has a
-	  list in the Msg39Reply::ptr_facetHashList for its termid
-
-	//
-	// compile facet stats
-	//
-	for ( int32_t j = 0; j < m_numHosts ; j++ ) {
-		Msg39Reply *mr =m_reply[j];
-		// one table for each query term
-		char *p = mr->ptr_facetHashList;
-		// loop over all query terms
-		int32_t n = m_q->getNumTerms();
-		// use this
-		HashTableX tmp;
-		// do the loop
-		for ( int32_t i = 0 ; i < n ; i++ ) {
-			// size of it
-			int32_t psize = *(int32_t *)p;
-			p += 4;
-			tmp.deserialize ( p , psize );
-			p += psize;
-			// now compile the stats into a master table
-			for ( int32_t k = 0 ; k < tmp.m_numSlots ; k++ ) {
-				if ( ! tmp.m_flags[k] ) continue;
-				// get the vlaue
-				int32_t v32 = *(int32_t *)tmp.getKeyFromSlot(k);
-				// and how many of them there where
-				int32_t count = *(int32_t *)tmp.getValueFromSlot(k);
-				// add to master
-				master.addScore32 ( v32 , count );
-			}
-		}
-	}
-	////////
-	//
-	// now set m_facetStats
-	//
-	////////
-	// add up all counts
-	int64_t count = 0LL;
-	for ( int32_t i = 0 ; i < master.getNumSlots() ; i++ ) {
-		if ( ! master.m_flags[i] ) continue;
-		int64_t slotCount = *(int32_t *)master.getValueFromSlot(i);
-		int32_t h32 = *(int32_t *)master.getKeyFromSlot(i);
-		if ( h32 == m_r->m_myFacetVal32 )
-			m_facetStats.m_myValCount = slotCount;
-		count += slotCount;
-	}
-	m_facetStats.m_totalUniqueValues = master.getNumUsedSlots();
-	m_facetStats.m_totalValues = count;
-	*/
-
-
-	// shortcut
-	//int32_t numSplits = m_numHosts;//indexdbSplit;

 	// . point to the various docids, etc. in each shard reply
 	// . tcPtr = term count. how many required query terms does the doc
@ -920,11 +844,6 @@ bool Msg3a::mergeLists ( ) {
 	for ( int32_t j = 0; j < m_numQueriedHosts ; j++ ) {
 		Msg39Reply *mr =m_reply[j];
 		if ( ! mr ) continue;
-		//SectionStats *src = &mr->m_sectionStats;
-		//dst->m_onSiteDocIds      += src->m_onSiteDocIds;
-		//dst->m_offSiteDocIds     += src->m_offSiteDocIds;
-		//dst->m_totalMatches      += src->m_totalMatches;
-		//dst->m_totalEntries      += src->m_totalEntries;
 		// now the list should be the unique site hashes that
 		// had the section hash. we need to uniquify them again
 		// here.
@ -1036,7 +955,6 @@ bool Msg3a::mergeLists ( ) {
 	if ( ! sortFacetEntries() )
 		return true;

-	//if ( m_r->m_getSectionStats ) return true;
 	//
 	// HACK: END section stats merge
 	//
--- a/Msg3a.h
+++ b/Msg3a.h
@ -145,17 +145,6 @@ public:
 	// when merging this list of docids into a final list keep
 	// track of the cursor into m_docIds[]
 	int32_t m_cursor;
-
-	// what collection # are these docids from if m_collnums[] is NULL
-	//collnum_t m_collnum;
-
-	// we don't have FacetStats because we have the actual 
-	// Msg39Reply::ptr_facetHashList from each shard which contains
-	// all the facet hash lists for each gbfacet: query term we had
-	// and the query "Msg3a::m_q.m_qterms[].m_dt" is the hash table
-	// where each key is a facethash for that gbfacet:xxxx term and
-	// the value if the # of occurences.
-	//SectionStats    m_sectionStats;
 };

 #endif
--- a/Msg40.cpp
+++ b/Msg40.cpp
--- a/Msg40.h
+++ b/Msg40.h
@ -14,78 +14,12 @@
 #include "Msg39.h"      // getTermFreqs()
 #include "Msg20.h"      // for getting summary from docId
 #include "Msg17.h"      // a distributed cache of serialized/compressed Msg40s
-//#include "Msg2b.h"      // for generating directories
-//#include "IndexReadInfo.h" // STAGE0,...
 #include "Msg3a.h"
 #include "PostQueryRerank.h"

-// replace CollectionRec::m_maxDocIdsToCompute with this
-//#define MAXDOCIDSTOCOMPUTE 500000
 // make it 2B now. no reason not too limit it so low.
 #define MAXDOCIDSTOCOMPUTE 2000000000

-#define MAX_GIGABIT_WORDS 10
-
-class Gigabit {
-public:
-	char *m_term;
-	int32_t  m_termLen;
-	int64_t m_termId64;
-	float m_gbscore;
-	int32_t m_minPop;
-	int32_t m_numWords;
-	int32_t  m_numPages;
-	int64_t m_lastDocId;
-	// the wordids of the words in the gigabit (m_numWords of them)
-	int64_t m_wordIds[MAX_GIGABIT_WORDS];
-};
-
-
-//
-// TODO: add Gigabit::m_firstFastFactOffset..
-//
-
-
-#define MAX_GIGABIT_PTRS 10
-
-class Fact {
-public:
-	// offset of the gigabit in m_gigabitBuf we belong to
-	int32_t  m_gigabitOffset;
-	// . the sentence contaning the gigabit and a lot of the query terms
-	// . ptr refrences into Msg20Reply::ptr_gigabitSample buffers
-	char *m_fact;
-	int32_t  m_factLen;
-	float m_gigabitModScore;
-	float m_queryScore;
-	float m_maxGigabitModScore; // gigabitscore * #pagesItIsOn
-	int32_t  m_numGigabits;
-	char m_printed;
-	class Gigabit *m_gigabitPtrs[MAX_GIGABIT_PTRS];
-	int32_t  m_numQTerms;
-	int64_t m_docId; // from where it came
-	Msg20Reply *m_reply; // reply from where it came
-	// for deduping sentences
-	char  m_dedupVector[SAMPLE_VECTOR_SIZE]; // 128
-};
-
-
-class GigabitInfo {
- public:
-        int32_t       m_pts;
-        uint32_t   m_hash;
-        int32_t       m_pop;
-        int32_t       m_count;
-        int32_t       m_numDocs;
-        int64_t  m_lastDocId;
-        int32_t       m_currentDocCount;
-        char      *m_ptr;
-        int32_t       m_len;
-};
-
-
-static const int64_t msg40_msg39_timeout = 5000; //timeout for entire get-docid-list phase, in milliseconds.
-
 class Msg40 {

 public:
@ -116,21 +50,6 @@ class Msg40 {
 	bool prepareToGetDocIds ( );
 	bool getDocIds ( bool recall );

-	bool computeGigabits( class TopicGroup *tg );
-	SafeBuf m_gigabitBuf;
-
-	// nuggabits...
-	bool computeFastFacts ( );
-	bool addFacts ( HashTableX *queryTable,
-			HashTableX *gbitTable ,
-			char *pstart,
-			char *pend,
-			bool debugGigabits ,
-			class Msg20Reply *reply,
-			SafeBuf *factBuf ) ;
-
-	SafeBuf m_factBuf;
-
 	// keep these public since called by wrapper functions
 	bool federatedLoop ( ) ;
 	bool gotDocIds        ( ) ;
@ -181,14 +100,7 @@ class Msg40 {
 	bool  moreResultsFollow ( )   {return m_moreToCome; };
 	time_t getCachedTime ( )      {return m_cachedTime; };

-
-	int32_t getNumGigabits (){return m_gigabitBuf.length()/sizeof(Gigabit);};
-	Gigabit *getGigabit ( int32_t i ) {
-		Gigabit *gbs = (Gigabit *)m_gigabitBuf.getBufStart();
-		return &gbs[i];
-	};
-
-        int64_t *getDocIdPtr() { return m_msg3a.m_docIds; }
+	int64_t *getDocIdPtr() { return m_msg3a.m_docIds; }

 	bool printSearchResult9 ( int32_t ix , int32_t *numPrintedSoFar ,
 				  class Msg20Reply *mr ) ;
@ -277,15 +189,10 @@ class Msg40 {
 	char      *m_cachePtr;
 	int32_t       m_cacheSize;

-	//int32_t m_maxDocIdsToCompute;
-
 	// count summary replies (msg20 replies) we get
 	int32_t       m_numRequests;
 	int32_t       m_numReplies;

-	// we launched all docids from 0 to m_maxiLaunched
-	//int32_t       m_maxiLaunched;
-
 	// true if more results follow these
 	bool       m_moreToCome;

@ -303,12 +210,6 @@ class Msg40 {
 	bool       m_cachedResults;
 	time_t     m_cachedTime;

-	// gigabits
-	//Msg24 m_msg24;
-
-	// references
-	//Msg1a m_msg1a;
-	
 	int32_t m_tasksRemaining;

 	int32_t m_printCount;
@ -334,14 +235,6 @@ class Msg40 {

 	SearchInput   *m_si;

-
-	// for topic clustering, saved from CollectionRec
-	int32_t       m_topicSimilarCutoff;
-	int32_t       m_docsToScanForTopics;
-
-	// Msg2b for generating a directory
-	//Msg2b  m_msg2b;
-
 	bool mergeDocIdsIntoBaseMsg3a();
 	int32_t m_numCollsToSearch;
 	class Msg3a **m_msg3aPtrs;
--- a/Msg5.cpp
+++ b/Msg5.cpp
@ -1178,29 +1178,6 @@ bool Msg5::gotList2 ( ) {
 	// . why???
 	if ( m_totalSize < 32*1024 ) goto skipThread;

-	// if we are an interruptible niceness 1, do not use a thread,
-	// we can be interrupted by the alarm callback and serve niceness
-	// 0 requests, that is probably better! although the resolution is
-	// on like 10ms on those alarms... BUT if you use a smaller
-	// mergeBufSize of like 100k, that might make it responsive enough!
-	// allow it to do a thread again so we can take advantage of
-	// multiple cores, or hyperthreads i guess because i am seeing
-	// some missed quickpoll log msgs, i suppose because we did not
-	// insert QUICKPOLL() statements in the RdbList::merge_r() code
-	//if ( m_niceness >= 1 ) goto skipThread;
-
-	// supder duper hack!
-	//if ( m_rdbId == RDB_REVDB ) goto skipThread;
-
-	// i'm not sure why we core in Msg5's call to RdbList::merge_r().
-	// the list appears to be corrupt...
-	//if ( m_rdbId == RDB_FACEBOOKDB ) goto skipThread;
-
-	// skip it for now
-	//goto skipThread;
-
-	//m_waitingForMerge = true;
-
 	// . if size is big, make a thread
 	// . let's always make niceness 0 since it wasn't being very
 	//   aggressive before
--- a/Multicast.cpp
+++ b/Multicast.cpp
@ -854,8 +854,6 @@ void sleepWrapper1 ( int bogusfd , void    *state ) {
 	case 0x20: if ( elapsed <  5000 ) return; break;
 	// msg 0x20 calls this to get the title rec
 	case 0x22: if ( elapsed <  1000 ) return; break;
-	// Msg23 niceness 0 is only for doing &rerank=X queries
-	//case 0x23: if ( elapsed < 100000 ) return; break;
 	// a request to get the score of a docid, can be *very* intensive
 	case 0x3b: if ( elapsed < 500000 ) return; break;
 	// related topics request, calls many Msg22 to get titlerecs...
@ -868,21 +866,6 @@ void sleepWrapper1 ( int bogusfd , void    *state ) {
 		// performance reasons, cuz we do pretty good load balancing
 		// and when things get saturated, rerouting excacerbates it
 		if ( elapsed <  8000 ) return; break;
-		// how many bytes were requested?
-		/*
-		if ( THIS->m_msg ) nb=*(int32_t *)(THIS->m_msg + sizeof(key_t)*2);
-		else               nb=2000000;
-		// . givem 300ms + 1ms per 5000 bytes
-		// . a 6M   read would be allowed 1500ms before re-routing
-		// . a 1M   read would be allowed 500ms
-		// . a 100k read would be allowed 320ms
-		ta = 300 + nb / 5000;
-		// limit it
-		if ( ta < 100  ) ta = 100;
-		if ( ta > 9000 ) ta = 9000; // could this hurt us?
-		if ( elapsed <  ta ) return; 
-		break;
-		*/
 	// msg to get a clusterdb rec
 	case 0x38: if ( elapsed <  2000 ) return; break;
 	// msg to get docIds from a query, may take a while
--- a/Multicast.h
+++ b/Multicast.h
@ -30,7 +30,6 @@ static const int64_t multicast_msg20_summary_timeout       =       1500;
 static const int64_t multicast_msg1_senddata_timeout       =      60000;
 static const int64_t multicast_msg3a_default_timeout       =      10000;
 static const int64_t multicast_msg3a_maximum_timeout       =      60000;
-static const int64_t multicast_xmldoc_sectionstats_timeout =      30000;
 static const int64_t multicast_msg1c_getip_default_timeout =      60000;


--- a/PageBasic.cpp
+++ b/PageBasic.cpp
@ -8,9 +8,8 @@
 #include "SpiderLoop.h"
 #include "PageResults.h" // for RESULT_HEIGHT
 #include "Stats.h"
+#include "PageRoot.h"

-bool printFrontPageShell ( SafeBuf *sb , char *tabName , CollectionRec *cr ,
-			   bool printGigablast ) ;

 // 5 seconds
 #define DEFAULT_WIDGET_RELOAD 1000
--- a/PageCrawlBot.cpp
+++ b/PageCrawlBot.cpp
@ -242,8 +242,6 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {
 			       // no summary similarity dedup, only exact
 			       // doc content hash. otherwise too slow!!
 			       "pss=0&"
-			       // no gigabits
-			       "dsrt=0&"
 			       // do not compute summary. 0 lines.
 			       "ns=0&"
 			      "q=gbsortby%%3Agbspiderdate&"
@ -282,8 +280,6 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {
 			       // no summary similarity dedup, only exact
 			       // doc content hash. otherwise too slow!!
 			       "pss=0&"
-			       // no gigabits
-			       "dsrt=0&"
 			       // do not compute summary. 0 lines.
 			       "ns=0&"
 			       //"q=gbsortby%%3Agbspiderdate&"
@ -321,8 +317,6 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {
 			       // no summary similarity dedup, only exact
 			       // doc content hash. otherwise too slow!!
 			       "pss=0&"
-			       // no gigabits
-			       "dsrt=0&"
 			       // do not compute summary. 0 lines.
 			       "ns=0&"
 			       "q=gbsortby%%3Agbspiderdate&"
@ -372,8 +366,6 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {
 			       // no summary similarity dedup, only exact
 			       // doc content hash. otherwise too slow!!
 			       "pss=0&"
-			       // no gigabits
-			       "dsrt=0&"
 			       // do not compute summary. 0 lines.
 			       //"ns=0&"
 			       "q=gbrevsortbyint%%3AgbssSpiderTime+"
--- a/PageParser.cpp
+++ b/PageParser.cpp
@ -5,6 +5,86 @@
 //#include "IndexTable2.h"
 //#include "XmlDoc.h" // addCheckboxSpan()

+class State8 {
+public:
+	TopTree m_topTree;
+	//Msg16 m_msg16;
+	//Msg14 m_msg14;
+	//Msg15 m_msg15;
+	Msg22 m_msg22;
+	SafeBuf m_dbuf;
+	//XmlDoc m_doc;
+	//Url   m_url;
+	//Url   m_rootUrl;
+	char *m_u;
+	int32_t  m_ulen;
+	bool  m_applyRulesetToRoot;
+	char  m_rootQuality;
+	int32_t  m_reparseRootRetries;
+	char  m_coll[MAX_COLL_LEN];
+	int32_t  m_collLen;
+	//int32_t  m_sfn;
+	//int32_t  m_urlLen;
+	TcpSocket  *m_s;
+	bool       m_isLocal;
+	char  m_pwd[32];
+	HttpRequest m_r;
+	int32_t m_old;
+	// recyle the link info from the title rec?
+	int32_t m_recycle;
+	// recycle the link info that was imported from another coll?
+	int32_t m_recycle2;
+	int32_t m_render;
+	char m_recompute;
+	int32_t m_oips;
+	char m_linkInfoColl[11];
+	//	char m_buf[16384 * 1024];
+
+	//int32_t m_page;
+	// m_pbuf now points to m_sbuf if we are showing the parsing junk
+	SafeBuf  m_xbuf;
+	SafeBuf  m_wbuf;
+	bool m_donePrinting;
+	//SafeBuf  m_sbuf;
+	// this is a buffer which cats m_sbuf into it
+	//SafeBuf  m_sbuf2;
+
+	// new state vars for Msg3b.cpp
+	int64_t  m_docId;
+	void      *m_state ;
+	void    (* m_callback) (void *state);
+	Query      m_tq;
+	Query     *m_q;
+	int64_t *m_termFreqs;
+	float     *m_termFreqWeights;
+	float     *m_affWeights;
+	//score_t    m_total;
+	bool       m_freeIt;
+	bool       m_blocked;
+
+	// these are from rearranging the code
+	int32_t      m_indexCode;
+	//uint64_t m_chksum1;
+	int64_t m_took1;
+	int64_t m_took1b;
+	int64_t m_took2;
+	int64_t m_took3;
+
+	char m_didRootDom;
+	char m_didRootWWW;
+	char m_wasRootDom;
+
+	// call Msg16 with a versino of title rec to do
+	int32_t m_titleRecVersion;
+	
+	char m_hopCount;
+
+	//TitleRec m_tr;
+
+	//XmlDoc m_oldDoc;
+	XmlDoc m_xd;
+};
+
 bool g_inPageParser = false;
 bool g_inPageInject = false;

@ -12,6 +92,17 @@ bool g_inPageInject = false;
 static bool processLoop ( void *state ) ;
 static bool gotXmlDoc ( void *state ) ;
 static bool sendErrorReply ( void *state , int32_t err ) ;
+static bool sendPageParser2 ( TcpSocket    *s , 
+                              HttpRequest  *r ,
+                              class State8 *st ,
+                              int64_t     docId ,
+                              Query        *q ,
+                              int64_t    *termFreqs       ,
+                              float        *termFreqWeights ,
+                              float        *affWeights ,
+                              void         *state ,
+                              void        (* callback)(void *state) ) ;
+

 // . returns false if blocked, true otherwise
 // . sets g_errno on error
@ -26,19 +117,19 @@ bool sendPageParser ( TcpSocket *s , HttpRequest *r ) {
 // . a new interface so Msg3b can call this with "s" set to NULL
 // . returns false if blocked, true otherwise
 // . sets g_errno on error
-bool sendPageParser2 ( TcpSocket   *s , 
-		       HttpRequest *r ,
-		       State8      *st ,
-		       int64_t    docId ,
-		       Query       *q ,
-		       // in query term space, not imap space
-		       int64_t   *termFreqs       ,
-		       // in imap space
-		       float       *termFreqWeights ,
-		       // in imap space
-		       float       *affWeights      ,
-		       void        *state ,
-		       void       (* callback)(void *state) ) {
+static bool sendPageParser2 ( TcpSocket   *s , 
+                              HttpRequest *r ,
+                              State8      *st ,
+                              int64_t    docId ,
+                              Query       *q ,
+                              // in query term space, not imap space
+                              int64_t   *termFreqs       ,
+                              // in imap space
+                              float       *termFreqWeights ,
+                              // in imap space
+                              float       *affWeights      ,
+                              void        *state ,
+                              void       (* callback)(void *state) ) {

 	//log("parser: read sock=%"INT32"",s->m_sd);

--- a/PageParser.h
+++ b/PageParser.h
@ -16,95 +16,4 @@ extern bool g_inPageInject ;

 bool sendPageAnalyze ( TcpSocket *s , HttpRequest *r ) ;

-bool sendPageParser2 ( TcpSocket    *s , 
-		       HttpRequest  *r ,
-		       class State8 *st ,
-		       int64_t     docId ,
-		       Query        *q ,
-		       int64_t    *termFreqs       ,
-		       float        *termFreqWeights ,
-		       float        *affWeights ,
-		       void         *state ,
-		       void        (* callback)(void *state) ) ;
-
-class State8 {
-public:
-	TopTree m_topTree;
-	//Msg16 m_msg16;
-	//Msg14 m_msg14;
-	//Msg15 m_msg15;
-	Msg22 m_msg22;
-	SafeBuf m_dbuf;
-	//XmlDoc m_doc;
-	//Url   m_url;
-	//Url   m_rootUrl;
-	char *m_u;
-	int32_t  m_ulen;
-	bool  m_applyRulesetToRoot;
-	char  m_rootQuality;
-	int32_t  m_reparseRootRetries;
-	char  m_coll[MAX_COLL_LEN];
-	int32_t  m_collLen;
-	//int32_t  m_sfn;
-	//int32_t  m_urlLen;
-	TcpSocket  *m_s;
-	bool       m_isLocal;
-	char  m_pwd[32];
-	HttpRequest m_r;
-	int32_t m_old;
-	// recyle the link info from the title rec?
-	int32_t m_recycle;
-	// recycle the link info that was imported from another coll?
-	int32_t m_recycle2;
-	int32_t m_render;
-	char m_recompute;
-	int32_t m_oips;
-	char m_linkInfoColl[11];
-	//	char m_buf[16384 * 1024];
-
-	//int32_t m_page;
-	// m_pbuf now points to m_sbuf if we are showing the parsing junk
-	SafeBuf  m_xbuf;
-	SafeBuf  m_wbuf;
-	bool m_donePrinting;
-	//SafeBuf  m_sbuf;
-	// this is a buffer which cats m_sbuf into it
-	//SafeBuf  m_sbuf2;
-
-	// new state vars for Msg3b.cpp
-	int64_t  m_docId;
-	void      *m_state ;
-	void    (* m_callback) (void *state);
-	Query      m_tq;
-	Query     *m_q;
-	int64_t *m_termFreqs;
-	float     *m_termFreqWeights;
-	float     *m_affWeights;
-	//score_t    m_total;
-	bool       m_freeIt;
-	bool       m_blocked;
-
-	// these are from rearranging the code
-	int32_t      m_indexCode;
-	//uint64_t m_chksum1;
-	int64_t m_took1;
-	int64_t m_took1b;
-	int64_t m_took2;
-	int64_t m_took3;
-
-	char m_didRootDom;
-	char m_didRootWWW;
-	char m_wasRootDom;
-
-	// call Msg16 with a versino of title rec to do
-	int32_t m_titleRecVersion;
-	
-	char m_hopCount;
-
-	//TitleRec m_tr;
-
-	//XmlDoc m_oldDoc;
-	XmlDoc m_xd;
-};
-
 #endif
--- a/PageReindex.cpp
+++ b/PageReindex.cpp
@ -15,7 +15,6 @@
 #include "Msg40.h"
 #include "sort.h"
 #include "Spider.h"
-#include "Revdb.h"
 #include "XmlDoc.h"
 #include "PageInject.h" // Msg7
 #include "PageReindex.h"
--- a/PageReindex.h
+++ b/PageReindex.h
@ -42,64 +42,4 @@ public:
 	Query  m_qq;
 };

-/*
-// . for indexing tags for events after you add to tagdb
-// . created so zak can very quickly tag eventids that are already indexed
-// . will just add the tag terms directly to datedb for the eventid
-class Msg1d {
-
-public:
-
-	bool updateQuery  ( char *query ,
-			    HttpRequest *r,
-			    TcpSocket *sock,
-			    char *coll  ,
-			    int32_t startNum ,
-			    int32_t endNum ,
-			    void *state ,
-			    void (* callback) (void *state ) ) ;
-	
-	bool updateTagTerms ( ) ;
-
-	bool getMetaList ( int64_t docId , 
-			   int32_t eventId , 
-			   TagRec *egr ,
-			   RdbList *oldList ,
-			   int32_t niceness ,
-			   SafeBuf *addBuf ) ;
-
-	void *m_state;
-	void (* m_callback) (void *state);
-
-	Msg40 m_msg40;
-	SearchInput m_si;
-	int32_t m_startNum;
-	int32_t m_endNum;
-	int32_t m_numDocIds;
-	int32_t m_i;
-	Msg12 m_msg12;
-	Msg8a m_msg8a;
-	Msg0  m_msg0;
-	char *m_coll;
-	int32_t  m_niceness;
-	TagRec m_tagRec;
-	RdbList m_revdbList;
-	SafeBuf m_addBuf;
-	SafeBuf m_rr;
-	char *m_metaList;
-	int32_t  m_metaListSize;
-	Msg4 m_msg4;
-	Query      m_qq;
-
-	Url  m_fakeUrl;
-
-	int32_t m_gotLock;
-	int32_t m_gotTagRec;
-	int32_t m_gotRevdbRec;
-	int32_t m_madeList;
-	int32_t m_addedList;
-	int32_t m_removeLock;
-	int32_t m_flushedList;
-};
-*/
 #endif
--- a/PageResults.cpp
+++ b/PageResults.cpp
@ -19,17 +19,15 @@
 #include "LanguageIdentifier.h"
 #include "CountryCode.h"
 #include "Unicode.h"
-#include "XmlDoc.h" // GigabitInfo class
 #include "Posdb.h" // MAX_TOP definition
 #include "PageResults.h"
+#include "PageRoot.h"
 #include "Proxy.h"

 static bool printSearchFiltersBar ( SafeBuf *sb , HttpRequest *hr ) ;
 static bool printMenu ( SafeBuf *sb , int32_t menuNum , HttpRequest *hr ) ;

-//static void gotSpellingWrapper ( void *state ) ;
 static void gotResultsWrapper  ( void *state ) ;
-//static void gotAdsWrapper      ( void *state ) ;
 static void gotState           ( void *state ) ;
 static bool gotResults         ( void *state ) ;

@ -163,34 +161,6 @@ bool sendReply ( State0 *st , char *reply ) {
 	mdelete(st, sizeof(State0), "PageResults2");
 	delete st;

-	/*
-	if ( format == FORMAT_XML ) {
-		SafeBuf sb;
-		sb.safePrintf("<?xml version=\"1.0\" "
-			      "encoding=\"UTF-8\" ?>\n"
-			      "<response>\n"
-			      "\t<errno>%"INT32"</errno>\n"
-			      "\t<errmsg>%s</errmsg>\n"
-			      "</response>\n"
-			      ,(int32_t)savedErr
-			      ,mstrerror(savedErr)
-			      );
-		// clear it for sending back
-		g_errno = 0;
-		// send back as normal reply
-		g_httpServer.sendDynamicPage(s,
-					     sb.getBufStart(),
-					     sb.length(),
-					     0, // cachetime in secs
-					     false, // POSTReply?
-					     ct,
-					     -1, // httpstatus -1 -> 200
-					     NULL, // cookieptr
-					     charset );
-		return true;
-	}
-	*/
-
 	// if we had a broken pipe from the browser while sending
 	// them the search results, then we end up closing the socket fd
 	// in TcpServer::sendChunk() > sendMsg() > destroySocket()
@ -332,15 +302,6 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
 		// propagate "n"
 		int32_t n = hr->getLong("n",-1);
 		if ( n >= 0 ) sb.safePrintf("&n=%"INT32"",n);
-		// Docs to Scan for Related Topics
-		int32_t dsrt = hr->getLong("dsrt",-1);
-		if ( dsrt >= 0 ) sb.safePrintf("&dsrt=%"INT32"",dsrt);
-		// debug gigabits?
-		int32_t dg = hr->getLong("dg",-1);
-		if ( dg >= 0 ) sb.safePrintf("&dg=%"INT32"",dg);
-		// show gigabits?
-		//int32_t gb = hr->getLong("gigabits",1);
-		//if ( gb >= 1 ) sb.safePrintf("&gigabits=%"INT32"",gb);
 		// show banned results?
 		int32_t showBanned = hr->getLong("sb",0);
 		if ( showBanned ) sb.safePrintf("&sb=1");
@ -367,12 +328,6 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
 			      , h32
 			      , rand64
 			      );
-		//
-		// . login bar
-		// . proxy will replace it byte by byte with a login/logout
-		//   link etc.
-		//
-		//g_proxy.insertLoginBarDirective(&sb);

 		// 
 		// logo header
@ -392,23 +347,6 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
 			      "}}\n"


-			      // gigabit unhide function
-			      "function ccc ( gn ) {\n"
-			      "var e = document.getElementById('fd'+gn);\n"
-			      "var f = document.getElementById('sd'+gn);\n"
-			      "if ( e.style.display == 'none' ){\n"
-			      "e.style.display = '';\n"
-			      "f.style.display = 'none';\n"
-			      "}\n"
-			      "else {\n"
-			      "e.style.display = 'none';\n"
-			      "f.style.display = '';\n"
-			      "}\n"
-			      "}\n"
-			      "</script>\n"
-
-
-
 			      // put search results into this div
 			      "<div id=results>"
 			      "<img height=50 width=50 "
@ -623,259 +561,6 @@ void gotState ( void *state ){
 	gotResults ( state );
 }

-
-// print all sentences containing this gigabit (fast facts) (nuggabits)
-static bool printGigabitContainingSentences ( State0 *st,
-					      SafeBuf *sb , 
-					      Msg40 *msg40 , 
-					      Gigabit *gi , 
-					      SearchInput *si ,
-					      Query *gigabitQuery ,
-					      int32_t gigabitId ) {
-	char format = si->m_format;
-
-	HttpRequest *hr = &st->m_hr;
-	CollectionRec *cr = si->m_cr;//g_collectiondb.getRec(collnum );
-
-	int32_t numOff;
-	int32_t revert;
-	int32_t spaceOutOff;
-
-	if ( format == FORMAT_HTML ) {
-		sb->safePrintf("<nobr><b>");
-
-		// make a new query
-		sb->safePrintf("<a href=\"/search?c=%s&q=",cr->m_coll);
-		sb->urlEncode(gi->m_term,gi->m_termLen);
-		sb->safeMemcpy("+|+",3);
-		char *q = hr->getString("q",NULL,"");
-		sb->urlEncode(q);
-		sb->safePrintf("\">");
-		sb->safeMemcpy(gi->m_term,gi->m_termLen);
-		sb->safePrintf("</a></b>");
-		sb->safePrintf(" <font color=gray size=-1>");
-		numOff = sb->m_length;
-		sb->safePrintf("      ");//,gi->m_numPages);
-		sb->safePrintf("</font>");
-		sb->safePrintf("</b>");
-
-		revert = sb->length();
-
-		sb->safePrintf("<font color=blue style=align:right;>"
-			       "<a style=cursor:hand;cursor:pointer; "
-			       "onclick=ccc(%"INT32");>"
-			       , gigabitId // s_gigabitCount 
-			       );
-		spaceOutOff = sb->length();
-		sb->safePrintf( "%c%c%c",
-				0xe2,
-				0x87,
-				0x93);
-		sb->safePrintf(//"[more]"
-			       "</a></font>");
-	
-
-		sb->safePrintf("</nobr>"); // <br>
-	}
-
-	if ( format == FORMAT_XML ) {
-		sb->safePrintf("\t\t<gigabit>\n");
-		sb->safePrintf("\t\t\t<term><![CDATA[");
-		sb->cdataEncode(gi->m_term,gi->m_termLen);
-		sb->safePrintf("]]></term>\n");
-		sb->safePrintf("\t\t\t<score>%f</score>\n",gi->m_gbscore);
-		sb->safePrintf("\t\t\t<minPop>%"INT32"</minPop>\n",gi->m_minPop);
-	}
-
-	if ( format == FORMAT_JSON ) {
-		sb->safePrintf("\t{\n");
-		//sb->safePrintf("\t\"gigabit\":{\n");
-		sb->safePrintf("\t\t\"term\":\"");
-		sb->jsonEncode(gi->m_term,gi->m_termLen);
-		sb->safePrintf("\",\n");
-		sb->safePrintf("\t\t\"score\":%f,\n",gi->m_gbscore);
-		sb->safePrintf("\t\t\"minPop\":%"INT32",\n",gi->m_minPop);
-	}
-
-	// get facts
-	int32_t numNuggets = 0;
-	int32_t numFacts = msg40->m_factBuf.length() / sizeof(Fact);
-	Fact *facts = (Fact *)msg40->m_factBuf.getBufStart();
-	bool first = true;
-	bool second = false;
-	bool printedSecond = false;
-	//int64_t lastDocId = -1LL;
-	int32_t saveOffset = 0;
-	for ( int32_t i = 0 ; i < numFacts ; i++ ) {
-		Fact *fi = &facts[i];
-
-		// if printed for a higher scoring gigabit, skip
-		if ( fi->m_printed ) continue;
-
-		// check gigabit match
-		int32_t k; for ( k = 0 ; k < fi->m_numGigabits ; k++ ) 
-			if ( fi->m_gigabitPtrs[k] == gi ) break;
-		// skip this fact/sentence if does not contain gigabit
-		if ( k >= fi->m_numGigabits ) continue;
-
-		// do not print if no period at end
-		char *s = fi->m_fact;
-		char *e = s + fi->m_factLen;
-		if ( e[-1] != '*' ) continue;
-		e--;
-
-	again:
-
-		// first time, print in the single fact div
-		if ( first && format == FORMAT_HTML ) {
-			sb->safePrintf("<div "
-				       //"style=\"border:1px lightgray solid;\"
-				      "id=fd%"INT32">",gigabitId);//s_gigabitCount);
-		}
-
-		if ( second && format == FORMAT_HTML ) {
-			sb->safePrintf("<div style=\"max-height:300px;"
-				      "display:none;"
-				      "overflow-x:hidden;"
-				      "overflow-y:auto;"//scroll;"
-				       //"border:1px lightgray solid; "
-				       "\" "
-				      "id=sd%"INT32">",gigabitId);//s_gigabitCount);
-			printedSecond = true;
-		}
-
-		Msg20Reply *reply = fi->m_reply;
-
-		// ok, print it out
-		if ( ! first && ! second && format == FORMAT_HTML ) {
-			sb->safePrintf("<br><br>\n");
-		}
-
-		numNuggets++;
-
-		// let's highlight with gigabits and query terms
-		SafeBuf tmpBuf;
-		Highlight h;
-		h.set ( &tmpBuf , s , e - s , gigabitQuery, "<u>", "</u>", 0 );
-
-		// now highlight the original query as well but in black bold
-		SafeBuf tmpBuf2;
-		h.set ( &tmpBuf2, tmpBuf.getBufStart(), tmpBuf.length(), &si->m_q, "<b>", "</b>", 0  );
-		
-
-		int32_t dlen; char *dom = getDomFast(reply->ptr_ubuf,&dlen);
-
-		// print the sentence
-		if ( format == FORMAT_HTML )
-			sb->safeStrcpy(tmpBuf2.getBufStart());
-
-		if ( format == FORMAT_XML ) {
-			sb->safePrintf("\t\t\t<instance>\n"
-				       "\t\t\t\t<sentence><![CDATA[");
-			sb->cdataEncode(tmpBuf2.getBufStart());
-			sb->safePrintf("]]></sentence>\n");
-			sb->safePrintf("\t\t\t\t<url><![CDATA[");
-			sb->cdataEncode(reply->ptr_ubuf);
-			sb->safePrintf("]]></url>\n");
-			sb->safePrintf("\t\t\t\t<domain><![CDATA[");
-			sb->cdataEncode(dom,dlen);
-			sb->safePrintf("]]></domain>\n");
-			sb->safePrintf("\t\t\t</instance>\n");
-		}
-
-		if ( format == FORMAT_JSON ) {
-			sb->safePrintf("\t\t\"instance\":{\n"
-				       "\t\t\t\"sentence\":\"");
-			sb->jsonEncode(tmpBuf2.getBufStart());
-			sb->safePrintf("\",\n");
-
-			sb->safePrintf("\t\t\t\"url\":\"");
-			sb->jsonEncode(reply->ptr_ubuf);
-			sb->safePrintf("\",\n");
-
-			sb->safePrintf("\t\t\t\"domain\":\"");
-			sb->jsonEncode(dom,dlen);
-			sb->safePrintf("\"\n");
-			sb->safePrintf("\t\t},\n");
-		}
-
-
-		fi->m_printed = 1;
-		saveOffset = sb->length();
-		if ( format == FORMAT_HTML ) {
-			sb->safePrintf(" <a href=/get?c=%s&cnsp=0&"
-				       "strip=0&d=%"INT64">",
-				       cr->m_coll,reply->m_docId);
-			sb->safeMemcpy(dom,dlen);
-			sb->safePrintf("</a>\n");
-			sb->safePrintf("</div>");
-		}
-
-		if ( second ) {
-			second = false;
-		}
-
-		if ( first ) {
-			first = false;
-			second = true;
-
-			// print first gigabit all over again but in 2nd div
-			goto again;
-		}
-	}
-
-	if ( format == FORMAT_XML ) 
-		sb->safePrintf("\t\t</gigabit>\n");
-
-	if ( format == FORMAT_JSON ) {
-		// remove last ,\n
-		sb->m_length -= 2;
-		// replace with just \n
-		// end the gigabit
-		sb->safePrintf("\n\t},\n");
-	}
-
-	// all done if not html
-	if ( format != FORMAT_HTML )
-		return true;
-
-	// we counted the first one twice since we had to throw it into
-	// the hidden div too!
-	if ( numNuggets > 1 ) numNuggets--;
-
-	// do not print the double down arrow if no nuggets printed
-	if ( numNuggets <= 0 ) {
-		sb->m_length = revert;
-		sb->safePrintf("</nobr>");
-	}
-	// just remove down arrow if only 1...
-	else if ( numNuggets == 1 ) {
-		char *dst = sb->getBufStart()+spaceOutOff;
-		dst[0] = ' ';
-		dst[1] = ' ';
-		dst[2] = ' ';
-	}
-	// store the # of nuggets in ()'s like (10 )
-	else {
-		char tmp[10];
-		sprintf(tmp,"(%"INT32")",numNuggets);
-		char *src = tmp;
-		// starting storing digits after "( "
-		char *dst = sb->getBufStart()+numOff;
-		int32_t srcLen = gbstrlen(tmp);
-		if ( srcLen > 5 ) srcLen = 5;
-		for ( int32_t k = 0 ; k < srcLen ; k++ ) 
-			dst[k] = src[k];
-	}
-
-	if ( printedSecond ) {
-		sb->safePrintf("</div>");
-	}
-
-	return true;
-}
-
-
 // . make a web page from results stored in msg40
 // . send it on TcpSocket "s" when done
 // . returns false if blocked, true otherwise
@ -1045,22 +730,6 @@ bool gotResults ( void *state ) {
 	return true;
 }

-// defined in PageRoot.cpp
-bool expandHtml (  SafeBuf& sb,
-		   char *head ,
-		   int32_t hlen ,
-		   char *q    ,
-		   int32_t qlen ,
-		   HttpRequest *r ,
-		   SearchInput *si,
-		   char *method ,
-		   CollectionRec *cr ) ;
-
-
-bool printLeftColumnRocketAndTabs ( SafeBuf *sb,
-				    bool isSearchResultsPage ,
-				    CollectionRec *cr ,
-				    char *tabName );

 bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {

@ -1135,271 +804,26 @@ bool printLeftNavColumn ( SafeBuf &sb, State0 *st ) {
 		// . tabName = "search"
 		printLeftColumnRocketAndTabs ( &sb , true , cr , "search" );

-	}
+		//
+		// BEGIN FACET PRINTING
+		//
+		// 
+		// . print out one table for each gbfacet: term in the query
+		// . LATER: show the text string corresponding to the hash
+		//   by looking it up in the titleRec
+		//
+		msg40->printFacetTables ( &sb );
+		//
+		// END FACET PRINTING
+		//

-
-	//
-	// BEGIN FACET PRINTING
-	//
-	// 
-	// . print out one table for each gbfacet: term in the query
-	// . LATER: show the text string corresponding to the hash
-	//   by looking it up in the titleRec
-	//
-	if ( format == FORMAT_HTML ) msg40->printFacetTables ( &sb );
-	//
-	// END FACET PRINTING
-	//
-
-	//
-	// BEGIN PRINT GIGABITS
-	//
-
-	SafeBuf *gbuf = &msg40->m_gigabitBuf;
-	int32_t numGigabits = gbuf->length()/sizeof(Gigabit);
-
-	if ( ! st->m_header )
-		numGigabits = 0;
-
-	// print gigabits
-	Gigabit *gigabits = (Gigabit *)gbuf->getBufStart();
-
-	if ( numGigabits && format == FORMAT_XML )
-		sb.safePrintf("\t<gigabits>\n");
-
-	if ( numGigabits && format == FORMAT_JSON )
-		sb.safePrintf("\"gigabits\":[\n");
-
-
-	if ( numGigabits && format == FORMAT_HTML )
-		// gigabit unhide function
-		sb.safePrintf (
-				"<script>"
-				"function ccc ( gn ) {\n"
-				"var e = document.getElementById('fd'+gn);\n"
-				"var f = document.getElementById('sd'+gn);\n"
-				"if ( e.style.display == 'none' ){\n"
-				"e.style.display = '';\n"
-				"f.style.display = 'none';\n"
-				"}\n"
-				"else {\n"
-				"e.style.display = 'none';\n"
-				"f.style.display = '';\n"
-				"}\n"
-				"}\n"
-				"</script>\n"
-			       );
-	
-	if ( numGigabits && format == FORMAT_HTML )
-		sb.safePrintf("<div id=gigabits "
-			      "style="
-			      "padding:5px;"
-			      "position:relative;"
-			      "border-width:3px;"
-			      "border-right-width:0px;"
-			      "border-style:solid;"
-			      "margin-left:10px;"
-			      "border-top-left-radius:10px;"
-			      "border-bottom-left-radius:10px;"
-			      "border-color:blue;"
-			      "background-color:white;"
-			      "border-right-color:white;"
-			      "margin-right:-3px;"
-			      ">"
-			      "<table cellspacing=7>"
-			      "<tr><td width=200px; valign=top>"
-			      "<center><img src=/gigabits40.jpg></center>"
-			      "<br>"
-			      "<br>"
-			      );
-
-	Query gigabitQuery;
-	char tmp[1024];
-	SafeBuf ttt(tmp, 1024);
-	// limit it to 40 gigabits for now
-	for ( int32_t i = 0 ; i < numGigabits && i < 40 ; i++ ) {
-		Gigabit *gi = &gigabits[i];
-		ttt.pushChar('\"');
-		ttt.safeMemcpy(gi->m_term,gi->m_termLen);
-		ttt.pushChar('\"');
-		ttt.pushChar(' ');
-	}
-	// term on it
-	ttt.nullTerm();
-
-	if ( numGigabits > 0 ) 
-		gigabitQuery.set2 ( ttt.getBufStart() ,
-				    si->m_queryLangId ,
-				    true , // queryexpansion?
-				    true );  // usestopwords?
-
-	for ( int32_t i = 0 ; i < numGigabits ; i++ ) {
-		//if ( i > 0 && format == FORMAT_HTML )
-		//	sb.safePrintf("<hr>");
-		//if ( perRow && (i % perRow == 0) )
-		//	sb.safePrintf("</td><td valign=top>");
-		// print all sentences containing this gigabit
-		Gigabit *gi = &gigabits[i];
-		// after the first 3 hide them with a more link
-		if ( i == 1 && format == FORMAT_HTML )  {
-			sb.safePrintf("</span><a onclick="
-				      "\""
-				      "var e = "
-				      "document.getElementById('hidegbits');"
-				      "if ( e.style.display == 'none' ){\n"
-				      "e.style.display = '';\n"
-				      "this.innerHtml='Show less';"
-				      "}"
-				      "else {\n"
-				      "e.style.display = 'none';\n"
-				      "this.innerHtml='Show more';\n"
-				      "}\n"
-				      "\" style=cursor:hand;cursor:pointer;>"
-				      "Show more</a>");
-			sb.safePrintf("<span id=hidegbits "
-				      "style=display:none;>"
-				      "<br><br>");
-		}
-
-		printGigabitContainingSentences( st, &sb, msg40, gi, si, &gigabitQuery, i );
-		if ( format == FORMAT_HTML )
-			sb.safePrintf("<br><br>");
-	}
-
-	//if ( numGigabits >= 1 && format == FORMAT_HTML ) 
-
-	if ( numGigabits && format == FORMAT_HTML )
-		sb.safePrintf("</td></tr></table></div><br>");
-
-
-	if ( numGigabits && format == FORMAT_XML )
-		sb.safePrintf("\t</gigabits>\n");
-
-	if ( numGigabits && format == FORMAT_JSON ) {
-		// remove ,\n
-		sb.m_length -=2;
-		// add back just \n
-		// end the gigabits array
-		sb.safePrintf("\n],\n");
-	}
-
-	//
-	// now print various knobs
-	//
-
-	//
-	// print date constraint functions now
-	//
-	if ( format == FORMAT_HTML && 1 == 2)
-		sb.safePrintf(
-			      "<div id=best "
-			      "style="
-			      "font-size:14px;"
-			      "padding:5px;"
-			      "position:relative;"
-			      "border-width:3px;"
-			      "border-right-width:0px;"
-			      "border-style:solid;"
-			      "margin-left:10px;"
-			      "border-top-left-radius:10px;"
-			      "border-bottom-left-radius:10px;"
-			      "border-color:blue;"
-			      "background-color:white;"
-			      "border-right-color:white;"
-			      "margin-right:-3px;"
-			      "text-align:right;"
-			      ">"
-			      "<b>"
-			      "ANYTIME &nbsp; &nbsp;"
-			      "</b>"
-			      "</div>"
-
-			      "<br>"
-
-			      "<div id=newsest "
-			      "style="
-			      "font-size:14px;"
-			      "padding:5px;"
-			      "position:relative;"
-			      "border-width:3px;"
-			      "border-right-width:0px;"
-			      "border-style:solid;"
-			      "margin-left:10px;"
-			      "border-top-left-radius:10px;"
-			      "border-bottom-left-radius:10px;"
-			      "border-color:white;"
-			      "background-color:blue;"
-			      "border-right-color:blue;"
-			      "margin-right:0px;"
-			      "text-align:right;"
-			      "color:white;"
-			      ">"
-			      "<b>"
-			      "LAST 24 HOURS &nbsp; &nbsp;"
-			      "</b>"
-			      "</div>"
-
-			      "<br>"
-
-			      "<div id=newsest "
-			      "style="
-			      "font-size:14px;"
-			      "padding:5px;"
-			      "position:relative;"
-			      "border-width:3px;"
-			      "border-right-width:0px;"
-			      "border-style:solid;"
-			      "margin-left:10px;"
-			      "border-top-left-radius:10px;"
-			      "border-bottom-left-radius:10px;"
-			      "border-color:white;"
-			      "background-color:blue;"
-			      "border-right-color:blue;"
-			      "margin-right:0px;"
-			      "text-align:right;"
-			      "color:white;"
-			      ">"
-			      "<b>"
-			      "LAST 7 DAYS &nbsp; &nbsp;"
-			      "</b>"
-			      "</div>"
-			      "<br>"
-
-			      "<div id=newsest "
-			      "style="
-			      "font-size:14px;"
-			      "padding:5px;"
-			      "position:relative;"
-			      "border-width:3px;"
-			      "border-right-width:0px;"
-			      "border-style:solid;"
-			      "margin-left:10px;"
-			      "border-top-left-radius:10px;"
-			      "border-bottom-left-radius:10px;"
-			      "border-color:white;"
-			      "background-color:blue;"
-			      "border-right-color:blue;"
-			      "margin-right:0px;"
-			      "text-align:right;"
-			      "color:white;"
-			      ">"
-			      "<b>"
-			      "LAST 30 DAYS &nbsp; &nbsp;"
-			      "</b>"
-			      "</div>"
-			      "<br>"
-
-
-			      );
-
-
-
-	//
-	// now the MAIN column
-	//
-	if ( format == FORMAT_HTML )
+		//
+		// now the MAIN column
+		//
 		sb.safePrintf("\n</TD>"
 			      "<TD valign=top style=padding-left:30px;>\n");
+	}
+
 	return true;
 }

@ -1525,7 +949,6 @@ bool printSearchResultsHeader ( State0 *st ) {
 		if ( header ) sb->safeStrcpy ( header );
 	}

-	// this also prints gigabits and nuggabits
 	// if we are xml/json we call this below otherwise we lose
 	// the header of <?xml...> or whatever
 	if ( ! g_conf.m_isMattWells && si->m_format == FORMAT_HTML ) {
@ -1962,12 +1385,6 @@ bool printSearchResultsHeader ( State0 *st ) {
 	     st->m_header ) 
 		msg40->printFacetTables ( sb );

-	// now print gigabits if we are xml/json
-	if ( si->m_format != FORMAT_HTML ) {
-		// this will print gigabits
-		printLeftNavColumn ( *sb,st );
-	}
-
 	// global-index is not a custom crawl but we should use "objects"
 	bool isDiffbot = cr->m_isCustomCrawl;
 	if ( strcmp(cr->m_coll,"GLOBAL-INDEX") == 0 ) isDiffbot = true;
@ -2822,12 +2239,6 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
 		return true;
 	}

-	// . if section voting info was request, display now, it's in json
-	// . so if in csv it will mess things up!!!
-	if ( mr->ptr_sectionVotingInfo )
-		// it is possible this is just "\0"
-		sb->safeStrcpy ( mr->ptr_sectionVotingInfo );
-
 	// each "result" is the actual cached page, in this case, a json
 	// object, because we were called with &icc=1. in that situation
 	// ptr_content is set in the msg20reply.
@ -5202,7 +4613,6 @@ bool printSingleScore ( SafeBuf *sb, SearchInput *si, SingleScore *ss, Msg20Repl
 		wbw = WIKI_BIGRAM_WEIGHT;
 	}
 	float hgw = getHashGroupWeight(ss->m_hashGroup);
-	//float dvw = getDiversityWeight(ss->m_diversityRank);
 	float dnw = getDensityWeight(ss->m_densityRank);
 	float wsw = getWordSpamWeight(ss->m_wordSpamRank);
 	// HACK for inlink text!
@ -5508,13 +4918,6 @@ bool printFrontPageShell ( SafeBuf *sb , char *tabName , CollectionRec *cr,

 // if catId >= 1 then print the dmoz radio button
 bool printLogoAndSearchBox ( SafeBuf *sb, HttpRequest *hr, SearchInput *si ) {
-	char *root = "";
-
-	if ( g_conf.m_isMattWells )
-		root = "http://www.gigablast.com";
-
-	// now make a TABLE, left PANE contains gigabits and stuff
-
 	char *coll = hr->getString("c");
 	if ( ! coll ) coll = "";

@ -6323,53 +5726,6 @@ bool printSearchFiltersBar ( SafeBuf *sb , HttpRequest *hr ) {
 		s_mi[n].m_icon     = NULL;
 		n++;

-#ifdef SUPPORT_FACETS
-		// BR 20160801: Disabled by default
-
-		s_mi[n].m_menuNum  = 4;
-		s_mi[n].m_title    = "Language facet";
-		s_mi[n].m_cgi      = "facet=gbfacetint:gblang";
-		s_mi[n].m_icon     = NULL;
-		n++;
-
-		s_mi[n].m_menuNum  = 4;
-		s_mi[n].m_title    = "Content type facet";
-		s_mi[n].m_cgi      = "facet=gbfacetstr:type";
-		s_mi[n].m_icon     = NULL;
-		n++;
-
-		s_mi[n].m_menuNum  = 4;
-		s_mi[n].m_title    = "Url path depth";
-		s_mi[n].m_cgi      = "facet=gbfacetint:gbpathdepth";
-		s_mi[n].m_icon     = NULL;
-		n++;
-
-		s_mi[n].m_menuNum  = 4;
-		s_mi[n].m_title    = "Spider date facet";
-		s_mi[n].m_cgi      = "facet=gbfacetint:gbspiderdate";
-		s_mi[n].m_icon     = NULL;
-		n++;
-
-		// everything in tagdb is hashed
-		s_mi[n].m_menuNum  = 4;
-		s_mi[n].m_title    = "Site num inlinks facet";
-		s_mi[n].m_cgi      = "facet=gbfacetint:gbtagsitenuminlinks";
-		s_mi[n].m_icon     = NULL;
-		n++;
-
-		// s_mi[n].m_menuNum  = 4;
-		// s_mi[n].m_title    = "Domains facet";
-		// s_mi[n].m_cgi      = "facet=gbfacetint:gbdomhash";
-		// n++;
-
-		s_mi[n].m_menuNum  = 4;
-		s_mi[n].m_title    = "Hopcount facet";
-		s_mi[n].m_cgi      = "facet=gbfacetint:gbhopcount";
-		s_mi[n].m_icon     = NULL;
-		n++;
-#endif
-
-
 		// output
 		s_mi[n].m_menuNum  = 5;
 		s_mi[n].m_title    = "Output HTML";
@ -6600,10 +5956,8 @@ bool printSearchFiltersBar ( SafeBuf *sb , HttpRequest *hr ) {
 		// after 4 make a new line
 		if ( i == 5 ) sb->safePrintf("<br><br>");
 		if ( i == 9 ) sb->safePrintf("<br><br>");
-			
-#ifndef SUPPORT_FACETS
+
 		if( i == 4 ) continue;
-#endif			

 		printMenu ( sb , i , hr );
 	}
--- a/PageRoot.cpp
+++ b/PageRoot.cpp
@ -1,5 +1,6 @@
 #include "gb-include.h"

+#include "PageRoot.h"
 #include "Indexdb.h"     // makeKey(int64_t docId)
 #include "Titledb.h"
 #include "Spider.h"
@ -21,7 +22,7 @@ bool sendPageRoot ( TcpSocket *s, HttpRequest *r ){
 	return sendPageRoot ( s, r, NULL );
 }

-bool printNav ( SafeBuf &sb , HttpRequest *r ) {
+static bool printNav ( SafeBuf &sb , HttpRequest *r ) {
 	sb.safePrintf("</TD></TR></TABLE>"
 		      "</body></html>");
 	return true;
@ -33,7 +34,7 @@ bool printNav ( SafeBuf &sb , HttpRequest *r ) {
 //
 //////////////

-bool printFamilyFilter ( SafeBuf& sb , bool familyFilterOn ) {
+static bool printFamilyFilter ( SafeBuf& sb , bool familyFilterOn ) {
 	char *s1 = "";
 	char *s2 = "";
 	if ( familyFilterOn ) s1 = " checked";
@ -49,7 +50,7 @@ bool printFamilyFilter ( SafeBuf& sb , bool familyFilterOn ) {

 #include "SearchInput.h"

-bool printRadioButtons ( SafeBuf& sb , SearchInput *si ) {
+static bool printRadioButtons ( SafeBuf& sb , SearchInput *si ) {
 	// don't display this for directory search
 	// look it up. returns catId <= 0 if dmoz not setup yet.
 	// From PageDirectory.cpp
@ -127,7 +128,7 @@ bool printRadioButtons ( SafeBuf& sb , SearchInput *si ) {
 	return true;
 }

-bool printLogo ( SafeBuf& sb , SearchInput *si ) {
+static bool printLogo ( SafeBuf& sb , SearchInput *si ) {
 	// if an image was provided...
 	if ( ! si->m_imgUrl || ! si->m_imgUrl[0] ) {
 		// no, now we default to our logo
@ -172,7 +173,7 @@ bool printLogo ( SafeBuf& sb , SearchInput *si ) {


 bool expandHtml (  SafeBuf& sb,
-		   char *head , 
+		   const char *head , 
 		   int32_t hlen ,
 		   char *q    , 
 		   int32_t qlen ,
@ -433,7 +434,7 @@ bool expandHtml (  SafeBuf& sb,
 bool printLeftColumnRocketAndTabs ( SafeBuf *sb , 
 				    bool isSearchResultsPage ,
 				    CollectionRec *cr ,
-				    char *tabName ) {
+				    const char *tabName ) {

 	class MenuItem {
 	public:
@ -661,7 +662,7 @@ bool printLeftColumnRocketAndTabs ( SafeBuf *sb ,
 	return true;
 }

-bool printFrontPageShell ( SafeBuf *sb , char *tabName , CollectionRec *cr ,
+bool printFrontPageShell ( SafeBuf *sb , const char *tabName , CollectionRec *cr ,
 			   bool printGigablast ) {

 	sb->safePrintf("<html>\n");
@ -670,7 +671,7 @@ bool printFrontPageShell ( SafeBuf *sb , char *tabName , CollectionRec *cr ,
 	sb->safePrintf("<meta name=\"description\" content=\"A powerful, new search engine that does real-time indexing!\">\n");
 	sb->safePrintf("<meta name=\"keywords\" content=\"search, search engine, search engines, search the web, fresh index, green search engine, green search, clean search engine, clean search\">\n");

-	char *title = "An Alternative Open Source Search Engine";
+	const char *title = "An Alternative Open Source Search Engine";
 	if ( strcasecmp(tabName,"search") ) {
 		title = tabName;
 	}
@ -736,7 +737,7 @@ bool printFrontPageShell ( SafeBuf *sb , char *tabName , CollectionRec *cr ,
 	return true;
 }

-bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
+static bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
 	SearchInput si;
 	si.set ( sock , r );

@ -870,7 +871,7 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r , TcpSocket *sock ) {
 	return true;
 }

-bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
+static bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {

 	CollectionRec *cr = g_collectiondb.getRec ( r );

@ -1167,7 +1168,7 @@ public:
 // only allow up to 1 Msg10's to be in progress at a time
 static bool s_inprogress = false;

-void doneInjectingWrapper3 ( void *st ) ;
+static void doneInjectingWrapper3 ( void *st ) ;

 // . returns false if blocked, true otherwise
 // . sets g_errno on error
@ -1438,7 +1439,7 @@ bool sendPageAddUrl ( TcpSocket *sock , HttpRequest *hr ) {
 }


-void doneInjectingWrapper3 ( void *st ) {
+static void doneInjectingWrapper3 ( void *st ) {
 	State1i *st1 = (State1i *)st;
 	// allow others to add now
 	s_inprogress = false;
@ -1645,7 +1646,7 @@ void doneInjectingWrapper3 ( void *st ) {
 static HashTable s_htable;
 static bool      s_init = false;
 static int32_t      s_lastTime = 0;
-bool canSubmit ( uint32_t h , int32_t now , int32_t maxAddUrlsPerIpDomPerDay ) {
+static bool canSubmit ( uint32_t h , int32_t now , int32_t maxAddUrlsPerIpDomPerDay ) {
 	// . sometimes no limit
 	// . 0 means no limit because if they don't want any submission they
 	//   can just turn off add url and we want to avoid excess 
--- a/PageRoot.h
+++ b/PageRoot.h
@ -0,0 +1,29 @@
+#ifndef PAGEROOT_H_
+#define PAGEROOT_H_
+
+#include "SafeBuf.h"
+#include "Collectiondb.h"
+class SearchInput;
+
+bool printFrontPageShell ( SafeBuf *sb,
+                           const char *tabName,
+                           CollectionRec *cr,
+                           bool printGigablast );
+
+
+bool expandHtml (  SafeBuf& sb,
+		   const char *head , 
+		   int32_t hlen ,
+		   char *q    , 
+		   int32_t qlen ,
+		   HttpRequest *r ,
+		   SearchInput *si,
+		   char *method ,
+		   CollectionRec *cr );
+
+bool printLeftColumnRocketAndTabs ( SafeBuf *sb , 
+				    bool isSearchResultsPage ,
+				    CollectionRec *cr ,
+				    const char *tabName );
+
+#endif
--- a/PageSockets.cpp
+++ b/PageSockets.cpp
@ -21,27 +21,12 @@ bool sendPageSockets ( TcpSocket *s , HttpRequest *r ) {
 	// don't allow pages bigger than 128k in cache
 	char  buf [ 128*1024 ];
 	SafeBuf p(buf, 128*1024);
-	//char *bufEnd = buf + 256*1024;
-	// a ptr into "buf"
-	// password, too
-	//int32_t pwdLen = 0;
-	//char *pwd = r->getString ( "pwd" , &pwdLen );
-	//if ( pwdLen > 31 ) pwdLen = 31;
-	//if ( pwd ) pwd[pwdLen]='\0';
 	int32_t collLen = 0;
 	char *coll = r->getString( "c", &collLen );
 	if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
 	if ( coll ) coll[collLen] = '\0';
-	//char pbuf [32];
-	//if ( pwdLen > 0 ) strncpy ( pbuf , pwd , pwdLen );
-	//pbuf[pwdLen]='\0';
 	// print standard header
-
-
-	// 	char *ss = p.getBuf();
-	// 	char *ssend = p.getBufEnd();
 	g_pages.printAdminTop ( &p, s , r );
-	//p.incrementLength(sss - ss);

 	// now print out the sockets table for each tcp server we have
 	printTcpTable(&p,"HTTP Server"    ,g_httpServer.getTcp());
@ -62,66 +47,6 @@ bool sendPageSockets ( TcpSocket *s , HttpRequest *r ) {
 		if ( m == 0 ) count++;
 	}

-	/*
-	sprintf ( p , "<table width=100%% bgcolor=#d0d0f0 border=1>"
-		  "<tr><td bgcolor=#c0c0f0 colspan=%"INT32">"
-		  "<center><font size=+1><b>Wait Times</b></font>"
-		  "</td></tr>\n" , 3 + count );
-	p += gbstrlen ( p );
-	// print columns
-	sprintf ( p , 
-		  "<tr>"
-		  "<td><b>machine #</b></td>"
-		  "<td><b>send wait</b></td>"
-		  "<td><b>read wait</b></td>" );
-	p += gbstrlen ( p );	
-	// print disk columns
-	for ( int32_t i = 0 ; i < count ; i++ ) {
-		sprintf ( p , "<td><b>disk %"INT32" wait</b></td>",i);
-		p += gbstrlen ( p );	
-	}
-	// end the top row
-	sprintf ( p , "</tr>\n" );
-	p += gbstrlen ( p );	
-	// print rows
-	for ( int32_t i = 0 ; i < g_hostdb.getNumMachines() ; i++ ) {
-		// print machine #
-		sprintf ( p , "<tr><td><b>%"INT32"</b></td>",i);
-		p += gbstrlen ( p );
-		// then net send
-		float x = (float)g_queryRouter.m_sendWaits[i] / 1000;
-		sprintf ( p , "<td>%.1fms</td>", x );
-		p += gbstrlen ( p );
-		// then net read
-		x = (float)g_queryRouter.m_readWaits[i] / 1000;
-		sprintf ( p , "<td>%.1fms</td>", x );
-		p += gbstrlen ( p );
-		// print disk wait in milliseconds (it's in microseconds)
-		// find any host that matches this machine
-		for ( int32_t j = 0 ; j < g_hostdb.getNumHosts() ; j++ ) {
-			// use in order of ip
-			int32_t hid = g_hostdb.m_hostPtrs[j]->m_hostId;
-			// get machine #
-			int32_t m = g_hostdb.getMachineNum(hid);
-			// skip if no match
-			if ( m != i ) continue;
-			// otherwise print
-			x = (float)g_queryRouter.m_diskWaits[hid] / 1000;
-			sprintf ( p , "<td>%.1fms</td>", x );
-			p += gbstrlen ( p );
-		}
-		// end row
-		sprintf ( p , "</tr>\n");
-		p += gbstrlen ( p );	
-	}
-	// end table
-	sprintf ( p , "</table>");
-	p += gbstrlen ( p );
-	*/
-
-	// print the final tail
-	//p += g_httpServer.printTail ( p , pend - p );
-
 	// calculate buffer length
 	int32_t bufLen = p.length();
 	// . send this page
--- a/Pages.cpp
+++ b/Pages.cpp
@ -8,6 +8,7 @@
 #include "PageParser.h" // g_inPageParser
 #include "Rebalance.h"
 #include "Profiler.h"
+#include "PageRoot.h"

 // a global class extern'd in Pages.h
 Pages g_pages;
@ -1518,8 +1519,6 @@ bool sendPageReportSpam ( TcpSocket *s , HttpRequest *r ) {
 	return 	retval;
 }

-bool printFrontPageShell ( SafeBuf *sb , char *tabName , CollectionRec *cr ,
-			   bool printGigablast ) ;

 // let's use a separate section for each "page"
 // then have 3 tables, the input parms,
@ -2200,81 +2199,6 @@ bool printApiForPage ( SafeBuf *sb , int32_t PAGENUM , CollectionRec *cr ) {
 		sb->safePrintf("<b>\t},\n</b>\n");


-		// gigabits
-		sb->brify2 ( 
-			"\t# The start of the gigabits array. Each gigabit "
-			"is mined from the content of the search results. "
-			"The top "
-			"N results are mined, and you can control N with the "
-			"&dsrt input parameter described above.\n"
-			, cols , "\n\t# " , false );
-		sb->safePrintf("<b>\t\"gigabits\":[\n\n</b>");
-
-
-		// print gigabit #0
-		sb->brify2 ( "\t\t# The first gigabit in the array.\n"
-			     , cols , "\n\t\t# " , false );
-		sb->safePrintf("<b>\t\t{\n\n</b>");
-
-		sb->brify2 ( "\t\t# The gigabit as a string in utf8.\n"
-			     , cols , "\n\t\t# " , false );
-		sb->safePrintf("<b>\t\t\"term\":\"Membership\",\n\n</b>");
-
-		sb->brify2 ( "\t\t# The numeric score of the gigabit.\n"
-			     , cols , "\n\t\t# " , false );
-		sb->safePrintf("<b>\t\t\"score\":240,\n\n</b>");
-
-		sb->brify2 ( "\t\t# The popularity ranking of the gigabit. "
-			     "Out of 10000 random documents, how many "
-			     "documents contain it?\n"
-			     , cols , "\n\t\t# " , false );
-		sb->safePrintf("<b>\t\t\"minPop\":480,\n\n</b>");
-
-		sb->brify2 ( "\t\t# The gigabit in the context of a "
-			     "document.\n"
-			     , cols , "\n\t\t# " , false );
-		sb->safePrintf("<b>\t\t\"instance\":{\n\n</b>");
-
-		sb->brify2 ( "\t\t\t"
-			     "# A sentence, if it exists, "
-			     "from one of the search results "
-			     "which also contains the gigabit and as many "
-			     "significant query terms as possible. In UTF-8.\n"
-			     , cols , "\n\t\t\t# " , false );
-		sb->brify2("<b>\t\t\t\"sentence\":"
-			       "\"Get a free "
-			       "<b>Tested</b> Premium Membership here!\","
-			       "\n\n</b>"
-			     , 80 , "\n\t\t\t " , false );
-
-		sb->brify2 ( "\t\t\t"
-			     "# The url that contained that sentence. Always "
-			     "starts with http.\n"
-			     , cols , "\n\t\t\t# " , false );
-		sb->safePrintf("<b>\t\t\t\"url\":"
-			       "\"http://www.tested.com/\","
-			       "\n\n</b>");
-
-		sb->brify2 ( "\t\t\t"
-			     "# The domain of that url.\n"
-			     , cols , "\n\t\t\t# " , false );
-		sb->safePrintf("<b>\t\t\t\"domain\":"
-			       "\"tested.com\""
-			       "\n</b>");
-		// end instance
-		sb->safePrintf("<b>\t\t}\n\n</b>");
-		// end gigabit
-		sb->safePrintf("\t\t# End of the first gigabit\n"
-			       "<b>\t\t},\n\n</b>");
-
-		sb->safePrintf("\t\t...\n\n");
-
-		sb->brify2 ( 
-			    "\t# End of the JSON gigabits array.\n"
-		, cols , "\n\t# " , false );
-		sb->safePrintf("<b>\t],\n\n</b>");
-
-
 		// BEGIN FACETS
 		sb->safePrintf( "\t# Start of the facets array, if any.\n");
 		sb->safePrintf("<b>\t\"facets\":[\n</b>\n");
--- a/Parms.cpp
+++ b/Parms.cpp
@ -2345,13 +2345,6 @@ bool Parms::setFromRequest ( HttpRequest *r ,
 		char *xx=NULL;*xx=0; 
 	}

-	// need this for searchInput which takes default from "cr"
-	//CollectionRec *cr = g_collectiondb.getRec ( r , true );
-
-	// no SearchInput.cpp does this and then overrides if xml feed
-	// to set m_docsToScanForTopics
-	//setToDefault ( THIS , objType , cr );
-
 	// loop through cgi parms
 	for ( int32_t i = 0 ; i < r->getNumFields() ; i++ ) {
 		// get cgi parm name
@ -4655,59 +4648,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_COLL;
 	m++;

-	m->m_title = "demotion for query terms or gigabits in url";
-	m->m_desc  = "Demotion factor for query terms or gigabits "
-		"in a result's url. "
-		"Score will be penalized by this factor times the number "
-		"of query terms or gigabits in the url divided by "
-		"the max value below such that fewer "
-		"query terms or gigabits in the url causes the result "
-		"to be demoted more heavily, depending on the factor. "
-		"Higher factors demote more per query term or gigabit "
-		"in the page's url. "
-		"Generally, a page may not be demoted more than this "
-		"factor as a percent. Also, how it is demoted is "
-		"dependant on the max value. For example, "
-		"a factor of 0.2 will demote the page 20% if it has no "
-		"query terms or gigabits in its url. And if the max value is "
-		"10, then a page with 5 query terms or gigabits in its "
-		"url will be demoted 10%; and 10 or more query terms or "
-		"gigabits in the url will not be demoted at all. "
-		"0 means no demotion. "
-		"A safe range is from 0 to 0.35. ";
-	m->m_cgi   = "pqrqttiu";
-	m->m_off   = (char *)&cr.m_pqr_demFactQTTopicsInUrl - x;
-	m->m_type  = TYPE_FLOAT;
-	m->m_def   = "0";
-	m->m_group = 0;
-	m->m_flags = PF_HIDDEN | PF_NOSAVE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "max value for pages with query terms or gigabits "
-		"in url";
-	m->m_desc  = "Max number of query terms or gigabits in a url. "
-		"Pages with a number of query terms or gigabits in their "
-		"urls greater than or equal to this value will not be "
-		"demoted. "
-		"This controls the range of values expected to represent "
-		"the number of query terms or gigabits in a url. It should "
-		"be set to or near the estimated max number of query terms "
-		"or topics that can be in a url. Setting to a lower value "
-		"increases the penalty per query term or gigabit that is "
-		"not in a url, but decreases the range of values that "
-		"will be demoted.";
-	m->m_cgi   = "pqrqttium";
-	m->m_off   = (char *)&cr.m_pqr_maxValQTTopicsInUrl - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "10";
-	m->m_group = 0;
-	m->m_flags = PF_HIDDEN | PF_NOSAVE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
 	m->m_title = "demotion for pages that are not "
 		"root or have many paths in the url";
 	m->m_desc  = "Demotion factor each path in the url. "
@ -4775,60 +4715,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_COLL;
 	m++;

-	m->m_title = "demotion for non-location specific queries "
-		"with a location specific title";
-	m->m_desc  = "Demotion factor for non-location specific queries "
-		"with a location specific title. "
-		"Pages which contain a location in their title which is "
-		"not in the query or the gigabits will be demoted by their "
-		"population multiplied by this factor divided by the max "
-		"place population specified below. "
-		"Generally, a page will not be demoted more than this "
-		"value as a percent. "
-		"0 means no demotion. ";
-	m->m_cgi   = "pqrloct";
-	m->m_off   = (char *)&cr.m_pqr_demFactLocTitle - x;
-	m->m_type  = TYPE_FLOAT;
-	m->m_def   = "0.99";
-	m->m_group = 0;
-	m->m_flags = PF_HIDDEN | PF_NOSAVE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "demotion for non-location specific queries "
-		"with a location specific summary";
-	m->m_desc  = "Demotion factor for non-location specific queries "
-		"with a location specific summary. "
-		"Pages which contain a location in their summary which is "
-		"not in the query or the gigabits will be demoted by their "
-		"population multiplied by this factor divided by the max "
-		"place population specified below. "
-		"Generally, a page will not be demoted more than this "
-		"value as a percent. "
-		"0 means no demotion. ";
-	m->m_cgi   = "pqrlocs";
-	m->m_off   = (char *)&cr.m_pqr_demFactLocSummary - x;
-	m->m_type  = TYPE_FLOAT;
-	m->m_def   = "0.95";
-	m->m_group = 0;
-	m->m_flags = PF_HIDDEN | PF_NOSAVE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "demote locations that appear in gigabits";
-	m->m_desc  = "Demote locations that appear in gigabits.";
-	m->m_cgi   = "pqrlocg";
-	m->m_off   = (char *)&cr.m_pqr_demInTopics - x;
-	m->m_type  = TYPE_BOOL;
-	m->m_def   = "1";
-	m->m_group = 0;
-	m->m_flags = PF_HIDDEN | PF_NOSAVE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
 	m->m_title = "max value for non-location specific queries "
 		"with location specific results";
 	m->m_desc  = "Max place population. "
@ -5093,19 +4979,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_COLL;
 	m++;

-	m->m_title = "percent topic similar default";
-	m->m_desc  = "Like above, but used for deciding when to cluster "
-		"results by topic for the news collection.";
-	m->m_cgi   = "ptcd";
-	m->m_off   = (char *)&cr.m_topicSimilarCutoffDefault - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "50";
-	m->m_group = 0;
-	m->m_flags = PF_HIDDEN | PF_NOSAVE;	
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
 	m->m_title = "max query terms";
 	m->m_desc  = "Do not allow more than this many query terms. Helps "
 		"prevent big queries from resource hogging.";
@ -5844,97 +5717,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_SI;
 	m++;

-	m->m_title = "results to scan for gigabits generation";
-	m->m_desc  = "How many search results should we "
-		"scan for gigabit (related topics) generation. Set this to "
-		"zero to disable gigabits!";
-	m->m_cgi   = "dsrt";
-	m->m_off   = (char *)&si.m_docsToScanForTopics - y;
-	m->m_type  = TYPE_LONG;
-	m->m_defOff= (char *)&cr.m_docsToScanForTopics - x;
-	m->m_flags = PF_API;
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
-
-	m->m_title = "ip restriction for gigabits";
-	m->m_desc  = "Should Gigablast only get one document per IP domain "
-		"and per domain for gigabits (related topics) generation?";
-	m->m_cgi   = "ipr";
-	m->m_off   = (char *)&si.m_ipRestrictForTopics - y;
-	m->m_defOff= (char *)&cr.m_ipRestrict - x;
-	m->m_type  = TYPE_BOOL;
-	m->m_group = 0;
-	m->m_flags = PF_API;
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
-
-
-	m->m_title = "number of gigabits to show";
-	m->m_desc  = "What is the number of gigabits (related topics) "
-		"displayed per query? Set to 0 to save a little CPU time.";
-	m->m_cgi   = "nrt";
-	m->m_defOff= (char *)&cr.m_numTopics - x;
-	m->m_off   = (char *)&si.m_numTopicsToDisplay - y;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "11";
-	m->m_group = 0;
-	m->m_sprpg = 0; // do not propagate
-        m->m_sprpp = 0; // do not propagate
-	m->m_flags = PF_API;
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
-
-	m->m_title = "min topics score";
-	m->m_desc  = "Gigabits (related topics) with scores below this "
-		"will be excluded. Scores range from 0% to over 100%.";
-	m->m_cgi   = "mts";
-	m->m_defOff= (char *)&cr.m_minTopicScore - x;
-	m->m_off   = (char *)&si.m_minTopicScore - y;
-	m->m_type  = TYPE_LONG;
-	m->m_group = 0;
-	m->m_flags = PF_API;
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
-
-
-	m->m_title = "min gigabit doc count by default";
-	m->m_desc  = "How many documents must contain the gigabit "
-		"(related topic) in order for it to be displayed.";
-	m->m_cgi   = "mdc";
-	m->m_defOff= (char *)&cr.m_minDocCount - x;
-	m->m_off   = (char *)&si.m_minDocCount - y;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "2";
-	m->m_group = 0;
-	m->m_flags = PF_API;
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
-
-
-	m->m_title = "dedup doc percent for gigabits (related topics)";
-	m->m_desc  = "If a document is this percent similar to another "
-		"document with a higher score, then it will not contribute "
-		"to the gigabit generation.";
-	m->m_cgi   = "dsp";
-	m->m_defOff= (char *)&cr.m_dedupSamplePercent - x;
-	m->m_off   = (char *)&si.m_dedupSamplePercent - y;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "80";
-	m->m_group = 0;
-	m->m_flags = PF_API;
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;

 	///////////////////////////////////////////
 	//  SPIDER PROXY CONTROLS
@ -6050,19 +5832,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_CONF;
 	m++;

-	m->m_title = "max words per gigabit (related topic) by default";
-	m->m_desc  = "Maximum number of words a gigabit (related topic) "
-		"can have. Affects xml feeds, too.";
-	m->m_cgi   = "mwpt";
-	m->m_defOff= (char *)&cr.m_maxWordsPerTopic - x;
-	m->m_off   = (char *)&si.m_maxWordsPerTopic - y;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "6";
-	m->m_group = 0;
-	m->m_flags = PF_API;
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;

 	m->m_title = "show images";
 	m->m_desc  = "Should we return or show the thumbnail images in the "
@ -6364,52 +6133,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_SI;
 	m++;

-	m->m_title = "return number of docs per topic";
-	m->m_desc  = "Use 1 if you want Gigablast to return the number of "
-		"documents in the search results that contained each topic "
-		"(gigabit).";
-	m->m_def   = "1";
-	m->m_off   = (char *)&si.m_returnDocIdCount - y;
-	m->m_type  = TYPE_BOOL;
-	m->m_cgi   = "rdc";
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
-	m->m_title = "return docids per topic";
-	m->m_desc  = "Use 1 if you want Gigablast to return the list of "
-		"docIds from the search results that contained each topic "
-		"(gigabit).";
-	m->m_def   = "0";
-	m->m_off   = (char *)&si.m_returnDocIds - y;
-	m->m_type  = TYPE_BOOL;
-	m->m_cgi   = "rd";
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
-	m->m_title = "return popularity per topic";
-	m->m_desc  = "Use 1 if you want Gigablast to return the popularity "
-		"of each topic (gigabit).";
-	m->m_def   = "0";
-	m->m_off   = (char *)&si.m_returnPops - y;
-	m->m_type  = TYPE_BOOL;
-	m->m_cgi   = "rp";
-	m->m_flags = PF_HIDDEN | PF_NOSAVE;
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
-	m->m_title = "debug gigabits flag";
-	m->m_desc  = "Is 1 to log gigabits debug information, 0 otherwise.";
-	m->m_def   = "0";
-	m->m_off   = (char *)&si.m_debugGigabits - y;
-	m->m_type  = TYPE_BOOL;
-	m->m_cgi   = "debuggigabits";
-	m->m_page  = PAGE_RESULTS;
-	m->m_obj   = OBJ_SI;
-	m++;
-
 	m->m_title = "return docids only";
 	m->m_desc  = "Is 1 to return only docids as query results.";
 	m->m_def   = "0";
@ -9864,147 +9587,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_COLL;
 	m++;

-	m->m_title = "results to scan for gigabits generation by default";
-	m->m_desc  = "How many search results should we "
-		"scan for gigabit (related topics) generation. Set this to "
-		"zero to disable gigabits generation by default.";
-	m->m_cgi   = "dsrt";
-	m->m_off   = (char *)&cr.m_docsToScanForTopics - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "0";
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "ip restriction for gigabits by default";
-	m->m_desc  = "Should Gigablast only get one document per IP domain "
-		"and per domain for gigabits (related topics) generation?";
-	m->m_cgi   = "ipr";
-	m->m_off   = (char *)&cr.m_ipRestrict - x;
-	m->m_type  = TYPE_BOOL;
-	// default to 0 since newspaperarchive only has docs from same IP dom
-	m->m_def   = "0";
-	m->m_group = 0;
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-
-	m->m_title = "remove overlapping topics";
-	m->m_desc  = "Should Gigablast remove overlapping topics (gigabits)?";
-	m->m_cgi   = "rot";
-	m->m_off   = (char *)&cr.m_topicRemoveOverlaps - x;
-	m->m_type  = TYPE_BOOL;
-	m->m_def   = "1";
-	m->m_group = 0;
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "number of gigabits to show by default";
-	m->m_desc  = "What is the number of "
-		"related topics (gigabits) "
-		"displayed per query? Set to 0 to save "
-		"CPU time.";
-	m->m_cgi   = "nrt";
-	m->m_off   = (char *)&cr.m_numTopics - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "0";
-	m->m_group = 0;
-	m->m_sprpg = 0; // do not propagate
-        m->m_sprpp = 0; // do not propagate
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-
-
-	m->m_title = "min gigabit score by default";
-	m->m_desc  = "Gigabits (related topics) with scores below this "
-		"will be excluded. Scores range from 0% to over 100%.";
-	m->m_cgi   = "mts";
-	m->m_off   = (char *)&cr.m_minTopicScore - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "5";
-	m->m_group = 0;
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "min gigabit doc count by default";
-	m->m_desc  = "How many documents must contain the gigabit "
-		"(related topic) in order for it to be displayed.";
-	m->m_cgi   = "mdc";
-	m->m_off   = (char *)&cr.m_minDocCount - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "2";
-	m->m_group = 0;
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "dedup doc percent for gigabits (related topics)";
-	m->m_desc  = "If a document is this percent similar to another "
-		"document with a higher score, then it will not contribute "
-		"to the gigabit generation.";
-	m->m_cgi   = "dsp";
-	m->m_off   = (char *)&cr.m_dedupSamplePercent - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "80";
-	m->m_group = 0;
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "max words per gigabit (related topic) by default";
-	m->m_desc  = "Maximum number of words a gigabit (related topic) "
-		"can have. Affects xml feeds, too.";
-	m->m_cgi   = "mwpt";
-	m->m_off   = (char *)&cr.m_maxWordsPerTopic - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "6";
-	m->m_group = 0;
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-
-	m->m_title = "gigabit max sample size";
-	m->m_desc  = "Max chars to sample from each doc for gigabits "
-		"(related topics).";
-	m->m_cgi   = "tmss";
-	m->m_off   = (char *)&cr.m_topicSampleSize - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "4096";
-	m->m_group = 0;
-	m->m_flags = PF_API | PF_CLONE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
-	m->m_title = "gigabit max punct len";
-	m->m_desc  = "Max sequential punct chars allowed in a gigabit "
-		"(related topic). "
-		" Set to 1 for speed, 5 or more for best topics but twice as "
-		"slow.";
-	m->m_cgi   = "tmpl";
-	m->m_off   = (char *)&cr.m_topicMaxPunctLen - x;
-	m->m_type  = TYPE_LONG;
-	m->m_def   = "1";
-	m->m_group = 0;
-	m->m_flags = PF_HIDDEN | PF_NOSAVE;
-	m->m_page  = PAGE_SEARCH;
-	m->m_obj   = OBJ_COLL;
-	m++;
-
 	m->m_title = "display indexed date";
 	m->m_desc  = "Display the indexed date along with results.";
 	m->m_cgi   = "didt";
@ -10331,6 +9913,31 @@ void Parms::init ( ) {
 	m++;


+	m->m_title = "msg40->39 timeout";
+	m->m_desc  = "Timeout for Msg40/Msg3a to collect candidate docids with Msg39. In milliseconds";
+	m->m_cgi   = "msgfourty_msgthirtynine_timeout";
+	m->m_off   = offsetof(Conf,m_msg40_msg39_timeout);
+	m->m_xml   = "msg40_msg39_timeout";
+	m->m_type  = TYPE_LONG_LONG;
+	m->m_page  = PAGE_SEARCH;
+	m->m_obj   = OBJ_CONF;
+	m->m_def   = "5000";
+	m->m_flags = 0;
+	m++;
+
+
+	m->m_title = "msg3a->39 network overhead";
+	m->m_desc  = "Additional overhead/latecny for msg39 request+response over the network";
+	m->m_cgi   = "msgthreea_msgthirtynine_network_overhead";
+	m->m_off   = offsetof(Conf,m_msg3a_msg39_network_overhead);
+	m->m_xml   = "msg3a_msg39_network_overhead";
+	m->m_type  = TYPE_LONG_LONG;
+	m->m_page  = PAGE_SEARCH;
+	m->m_obj   = OBJ_CONF;
+	m->m_def   = "250";
+	m->m_flags = 0;
+	m++;
+
 	///////////////////////////////////////////
 	// PAGE SPIDER CONTROLS
 	///////////////////////////////////////////
@ -12108,16 +11715,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_CONF;
 	m++;

-	m->m_title = "log debug topic messages";
-	m->m_cgi   = "ldto";
-	m->m_off   = (char *)&g_conf.m_logDebugTopics - g;
-	m->m_type  = TYPE_BOOL;
-	m->m_def   = "0";
-	m->m_priv  = 1;
-	m->m_page  = PAGE_LOG;
-	m->m_obj   = OBJ_CONF;
-	m++;
-
 	m->m_title = "log debug topDoc messages";
 	m->m_cgi   = "ldtopd";
 	m->m_off   = (char *)&g_conf.m_logDebugTopDocs - g;
@ -12334,16 +11931,6 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_CONF;
 	m++;

-	m->m_title = "log timing messages for related topics";
-	m->m_cgi   = "ltt";
-	m->m_off   = (char *)&g_conf.m_logTimingTopics - g;
-	m->m_type  = TYPE_BOOL;
-	m->m_def   = "0";
-	m->m_priv  = 1;
-	m->m_page  = PAGE_LOG;
-	m->m_obj   = OBJ_CONF;
-	m++;
-
 	m->m_title = "log reminder messages";
 	m->m_desc  = "Log reminders to the programmer. You do not need this.";
 	m->m_cgi   = "lr";
--- a/Phrases.cpp
+++ b/Phrases.cpp
@ -5,7 +5,6 @@

 Phrases::Phrases ( ) {
 	m_buf = NULL;
-	m_phraseSpam   = NULL;
 }

 Phrases::~Phrases ( ) {
@ -18,29 +17,13 @@ void Phrases::reset() {
 	}

 	m_buf = NULL;
-	m_phraseSpam   = NULL;
 }

 // initialize this token array with the string, "s" of length, "len".
-bool Phrases::set( Words    *words, 
-		   Bits     *bits ,
-		   bool      useStopWords , 
-		   bool      useStems     ,
-		   int32_t      titleRecVersion,
-		   int32_t      niceness) {
+bool Phrases::set( Words *words, Bits *bits, int32_t titleRecVersion, int32_t niceness ) {
 	// reset in case being re-used
 	reset();

-	// now we never use stop words and we just index two-word phrases
-	// so that a search for "get a" in quotes will match a doc that has
-	// the phrase "get a clue". it might impact performance, but it should
-	// be insignificant... but we need to have this level of precision.
-	// ok -- but what about 'kick a ball'. we might not have that phrase
-	// in the results for "kick a" AND "a ball"!! so we really need to
-	// index "kick a ball" as well as "kick a" and "a ball". i don't think
-	// that will cause too much bloat.
-	//useStopWords = false;
-
 	// ensure we have words
 	if ( ! words ) return true;

@ -49,7 +32,7 @@ bool Phrases::set( Words    *words,
 	m_numPhrases = words->getNumWords();

 	// how much mem do we need?
-	int32_t need = m_numPhrases * (8+8+1+1+1);
+	int32_t need = m_numPhrases * (8+1);

 	// alloc if we need to
 	if ( need > PHRASE_BUF_SIZE ) 
@ -65,26 +48,17 @@ bool Phrases::set( Words    *words,

 	// phrase not using stop words
 	m_phraseIds2     = (int64_t *)p ; p += m_numPhrases * 8;
-	m_phraseIds3     = (int64_t *)p ; p += m_numPhrases * 8;
-	m_phraseSpam    = (unsigned char *)p ; p += m_numPhrases * 1;
 	m_numWordsTotal2= (unsigned char *)p ; p += m_numPhrases * 1;
-	m_numWordsTotal3= (unsigned char *)p ; p += m_numPhrases * 1;

 	// sanity
 	if ( p != m_buf + need ) { char *xx=NULL;*xx=0; }

-	// clear this
-	memset ( m_numWordsTotal2 , 0 , m_numPhrases );
-	memset ( m_numWordsTotal3 , 0 , m_numPhrases );
-
 	// point to this info while we parse
 	m_words        = words;
 	m_wptrs        = words->getWords();
 	m_wlens        = words->getWordLens();
 	m_wids         = words->getWordIds();
 	m_bits         = bits;
-	m_useStopWords = useStopWords;
-	m_useStems     = useStems;

 	// we now are dependent on this
 	m_titleRecVersion = titleRecVersion;
@ -93,7 +67,10 @@ bool Phrases::set( Words    *words,
 	// . sets m_phraseIds [i]
 	// . sets m_phraseSpam[i] to PSKIP if NO phrase exists
 	for ( int32_t i = 0 ; i < words->getNumWords() ; i++ ) {
-		if ( ! m_wids[i] ) continue;
+		if ( ! m_wids[i] ) {
+			continue;
+		}
+
 		setPhrase ( i , niceness);
 	}
 	// success
@ -109,16 +86,15 @@ void Phrases::setPhrase ( int32_t i, int32_t niceness ) {
 	// hash of the phrase
 	int64_t h   = 0LL; 

-	// the hash of the two-word phrase (now we do 3,4 and 5 word phrases)
+	// the hash of the two-word phrase
 	int64_t h2  = 0LL; 
-	int64_t h3  = 0LL; 

 	// reset
 	unsigned char pos = 0;

 	// now look for other tokens that should follow the ith token
-	int32_t          nw               = m_words->getNumWords();
-	int32_t          numWordsInPhrase = 1;
+	int32_t nw = m_words->getNumWords();
+	int32_t numWordsInPhrase = 1;

 	// use the min spam from all words in the phrase as the spam for phrase
 	char minSpam = -1;
@ -142,9 +118,10 @@ void Phrases::setPhrase ( int32_t i, int32_t niceness ) {
 	//   a phrase but not be in it, then the phrase id ends up just
 	//   being the following word's id. causing the synonyms code to
 	//   give a synonym which it should not un Synonyms::set()
-	if ( ! m_bits->canBeInPhrase(i) )
+	if ( ! m_bits->canBeInPhrase(i) ) {
 		// so indeed, skip it then
 		goto nophrase;
+	}

 	h = m_wids[i];

@ -160,14 +137,21 @@ void Phrases::setPhrase ( int32_t i, int32_t niceness ) {
 		// . do not allow more than 32 alnum/punct "words" in a phrase
 		// . this prevents phrases with 100,000 words from slowing
 		//   us down. would put us in a huge double-nested for loop
-		if ( j > i + 32 ) goto nophrase;
+		if ( j > i + 32 ) {
+			goto nophrase;
+		}
+
 		// deal with punct words
 		if ( ! m_wids[j] ) {
 			// if we cannot pair across word j then break
-			if ( ! m_bits->canPairAcross (j) ) break;
+			if ( !m_bits->canPairAcross( j ) ) {
+				break;
+			}

 			// does it have a hyphen?
-			if (j==i+1 && m_words->hasChar(j,'-')) hasHyphen=true;
+			if ( j == i + 1 && m_words->hasChar( j, '-' ) ) {
+				hasHyphen = true;
+			}

 			continue;
 		}
@ -180,51 +164,35 @@ void Phrases::setPhrase ( int32_t i, int32_t niceness ) {
 			int32_t conti = pos;

 			// hash the jth word into the hash
-			h = hash64Lower_utf8_cont(m_wptrs[j], 
-						  m_wlens[j],
-						  h,
-						  &conti );
+			h = hash64Lower_utf8_cont( m_wptrs[j], m_wlens[j], h, &conti );
+
 			pos = conti;

-			numWordsInPhrase++;
+			++numWordsInPhrase;

 			// N-word phrases?
 			if ( numWordsInPhrase == 2 ) {
 				h2 = h;
-				m_numWordsTotal2[i] = j-i+1;
-				if ( m_bits->isStopWord(j) ) 
-					hasStopWord2 = true;
-				continue;
-			}
-			if ( numWordsInPhrase == 3 ) {
-				h3 = h;
-				m_numWordsTotal3[i] = j-i+1;
-				//continue;
+				m_numWordsTotal2[i] = j - i + 1;
+				hasStopWord2 = m_bits->isStopWord(j);
+
 				break;
 			}
 		}

 		// if we cannot pair across word j then break
-		if ( ! m_bits->canPairAcross (j) ) break;
-
-		// keep chugging?
-		if ( numWordsInPhrase >= 5 ) {
-			// if we're not using stop words then break
-			if ( ! m_useStopWords ) break;
-			// if it's not a stop word then break
-			if ( ! m_bits->isStopWord (j) ) break;
+		if ( ! m_bits->canPairAcross (j) ) {
+			break;
 		}
+
 		// otherwise, get the next word
 	}

 	// if we had no phrase then use 0 as id (need 2+ words to be a pharse)
 	if ( numWordsInPhrase <= 1 ) { 
 	nophrase:
-		m_phraseSpam[i]      = PSKIP; 
 		m_phraseIds2[i]      = 0LL; 
-		m_phraseIds3[i]      = 0LL;
 		m_numWordsTotal2[i]   = 0;
-		m_numWordsTotal3[i]   = 0;
 		return;
 	}

@ -236,7 +204,6 @@ void Phrases::setPhrase ( int32_t i, int32_t niceness ) {

 	// set the phrase spam
 	if ( minSpam == -1 ) minSpam = 0;
-	m_phraseSpam[i] = minSpam;

 	// hyphen between numbers does not count (so 1-2 != 12)
 	if ( isNum ) hasHyphen = false;
@ -247,25 +214,23 @@ void Phrases::setPhrase ( int32_t i, int32_t niceness ) {
 	// . "i-phone"   -> iphone
 	// . "e-mail"    -> email
 	if ( hasHyphen || ! hasStopWord2 ) {
-		//m_phraseIds [i] = h;
 		m_phraseIds2[i] = h2;
 	}
 	// . "st. and"    !-> stand
 	// . "the rapist" !-> therapist
 	else {
-		//m_phraseIds [i] = h  ^ 0x768867;
 		m_phraseIds2[i] = h2 ^ 0x768867;
 	}
-
-	// forget hyphen logic for these
-	m_phraseIds3[i] = h3;
 }

 // . store phrase that starts with word #i into "printBuf"
 // . return bytes stored in "printBuf"
 char *Phrases::getPhrase ( int32_t i , int32_t *phrLen , int32_t npw ) {
 	// return 0 if no phrase
-	if ( m_phraseSpam[i] == PSKIP ) return NULL;
+	if ( m_phraseIds2[i] == 0LL ) {
+		return NULL;
+	}
+
 	// store the phrase in here
 	static char buf[256];
 	// . how many words, including punct words, are in phrase?
@ -273,7 +238,6 @@ char *Phrases::getPhrase ( int32_t i , int32_t *phrLen , int32_t npw ) {
 	//int32_t  n     = m_numWordsTotal[i] ;
 	int32_t  n ;
 	if      ( npw == 2 ) n = m_numWordsTotal2[i] ;
-	else if ( npw == 3 ) n = m_numWordsTotal3[i] ;
 	else { char *xx=NULL; *xx=0; }

 	char *s     = buf;
@ -303,42 +267,6 @@ char *Phrases::getPhrase ( int32_t i , int32_t *phrLen , int32_t npw ) {
 	return buf;
 }

-// . word #n is in a phrase if he has [word][punct] or [punct][word]
-//   before/after him and you can pair across the punct and include both
-//   in a phrase
-// . used by SimpleQuery class to see if a word is in a phrase or not
-// . if it is then the query may choose not to represent the word by itself
-bool Phrases::isInPhrase ( int32_t n ) {
-	// returns true if we started a phrase (our phraseSpam is not PSKIP)
-	if ( m_phraseSpam[n] != PSKIP ) return true;
-
-	// . see if we were in a phrase started by a word before us
-	// . this only words since stop words - whose previous word cannot be
-	//   paired across - are able to start phrases
-	if ( n < 2                        ) return false;
-	if ( ! m_bits->canPairAcross(n-1) ) return false;
-	if ( ! m_bits->canBeInPhrase(n-2) ) return false;
-	return true;
-}
-
-
-int32_t Phrases::getMaxWordsInPhrase ( int32_t i , int64_t *pid ) { 
-	*pid = 0LL;
-
-	if ( m_numWordsTotal3[i] ) {
-		*pid = m_phraseIds3[i];
-		return m_numWordsTotal3[i];
-	}
-
-	if ( m_numWordsTotal2[i] ) {
-		*pid = m_phraseIds2[i];
-		return m_numWordsTotal2[i];
-	}
-
-	return 0;
-}
-
-
 int32_t Phrases::getMinWordsInPhrase ( int32_t i , int64_t *pid ) { 
 	*pid = 0LL;

--- a/Phrases.h
+++ b/Phrases.h
@ -8,17 +8,11 @@
 #ifndef _PHRASES_H_
 #define _PHRASES_H_

-//#include "TermTable.h"
 #include "Bits.h"
-//#include "Spam.h"
-//#include "Scores.h"
 #include "Words.h"
-//#include "Weights.h"

 #define PHRASE_BUF_SIZE (MAX_WORDS * 14)

-#define PSKIP 201
-
 class Phrases {

 public:
@ -27,82 +21,32 @@ class Phrases {
 	~Phrases();
 	void reset() ;

-	bool set2 ( Words *words, Bits *bits , int32_t niceness ) {
-		return set ( words,bits,true,false,TITLEREC_CURRENT_VERSION,
-			     niceness); };
+	bool set2( Words *words, Bits *bits, int32_t niceness ) {
+		return set( words, bits, TITLEREC_CURRENT_VERSION, niceness );
+	}

 	// . set the hashes (m_phraseIds) of the phrases for these words
 	// . a phraseSpam of PSKIP means word is not in a phrase
 	// . "bits" describes the words in a phrasing context
 	// . "spam" is % spam of each word (spam may be NULL)
-	bool set ( Words    *words, 
-		   Bits     *bits ,
-		   //Spam     *spam ,
-		   //Scores   *scores ,
-		   bool      useStopWords ,
-		   bool      useStems     ,
-		   int32_t      titleRecVersion,
-		   int32_t      niceness);
+	bool set( Words *words, Bits *bits, int32_t titleRecVersion, int32_t niceness );

-	//int64_t getPhraseId   ( int32_t n ) { return m_phraseIds [n]; };
-	int64_t getPhraseId2  ( int32_t n ) { return m_phraseIds2[n]; };
-	//int64_t *getPhraseIds (        ) { return m_phraseIds ; };
-	int64_t *getPhraseIds2(        ) { return m_phraseIds2; };
-	int64_t *getPhraseIds3(        ) { return m_phraseIds3; };
-	//int64_t *getPhraseIds4(        ) { return m_phraseIds4; };
-	//int64_t *getPhraseIds5(        ) { return m_phraseIds5; };
-
-	//int64_t *getStripPhraseIds (      ) { return m_stripPhraseIds ; };
-	//int64_t getStripPhraseId   ( int32_t n ) 
-	//{ return m_stripPhraseIds [n]; };
-	int32_t      getPhraseSpam ( int32_t n ) { return m_phraseSpam[n]; };
-	bool      hasPhraseId   ( int32_t n ) { return (m_phraseSpam[n]!=PSKIP);};
-	bool      startsAPhrase ( int32_t n ) { return (m_phraseSpam[n]!=PSKIP);};
-	bool      isInPhrase    ( int32_t n ) ;
-	// . often word #i is involved in 2 phrases
-	// . m_phraseIds[i] only holds the one he starts
-	// . this gets the one he's in the middle of or on the right of
-	// . used by Query.cpp for phrase-forcing
-	//int64_t getLeftPhraseId       ( int32_t i ) ;
-	//int64_t getLeftStripPhraseId  ( int32_t i ) ;
-	//int32_t      getLeftPhraseIndex    ( int32_t i ) ;
-
-	// . each non-spammy occurence of phrase adds "baseScore" to it's score
-	/*
-	bool hash ( TermTable      *table       ,
-		    Weights        *weightsPtr  ,
-		    uint32_t   baseScore   ,
-		    uint32_t   maxScore    ,
-		    int64_t       startHash   ,
-		    char           *prefix1     ,
-		    int32_t            prefixLen1  ,
-		    char           *prefix2     ,
-		    int32_t            prefixLen2  ,
-		    bool            hashUniqueOnly ,
-		    int32_t            titleRecVersion,
-		    int32_t            niceness = 0);
-	*/
+	int64_t *getPhraseIds2(        ) { return m_phraseIds2; }

 	// . store phrase that starts with word #i into "dest"
 	// . we also NULL terminated it in "dest"
 	// . return length
 	char *getPhrase ( int32_t i , int32_t *phrLen , int32_t npw );
-	//char *getNWordPhrase ( int32_t i , int32_t *phrLen , int32_t npw ) ;
-	//char *getStripPhrase ( int32_t i , int32_t *phrLen );

-	//int32_t  getNumWords         ( int32_t i ) { return m_numWordsTotal[i]; };
-	//int32_t  getNumWordsInPhrase ( int32_t i ) { return m_numWordsTotal [i]; };
-	int32_t  getNumWordsInPhrase2( int32_t i ) { return m_numWordsTotal2[i]; };
+	int32_t  getNumWordsInPhrase2( int32_t i ) { return m_numWordsTotal2[i]; }

-	int32_t  getMaxWordsInPhrase( int32_t i , int64_t *pid ) ;
 	int32_t  getMinWordsInPhrase( int32_t i , int64_t *pid ) ;

 	// . leave this public so SimpleQuery.cpp can mess with it
 	// . called by Phrases::set() above for each i
 	// . we set phraseSpam to 0 to 100% typically
 	// . we set phraseSpam to PSKIP if word #i cannot start a phrase
-	void setPhrase ( int32_t i ,
-			 int32_t niceness);
+	void setPhrase( int32_t i, int32_t niceness );

 	// private:

@ -111,26 +55,10 @@ class Phrases {
 	char *m_buf;
 	int32_t  m_bufSize;

-	// . these are 1-1 with the words in the Words class
-	// . phraseSpam is PSKIP if the phraseId is invalid
-	//int64_t     *m_phraseIds  ;
 	// the two word hash
 	int64_t     *m_phraseIds2  ;
-	int64_t     *m_phraseIds3  ;
-	//int64_t     *m_phraseIds4  ;
-	//int64_t     *m_phraseIds5  ;
-	//int64_t     *m_stripPhraseIds  ;
-	unsigned char *m_phraseSpam ;
-	// . # words in phrase TOTAL (including punct words)
-	// . used for printing
-	// . used by SimpleQuery::getTermIds() for setting word ranges
-	//   for phrases
-	//unsigned char *m_numWordsTotal ;
 	// for the two word phrases:
 	unsigned char *m_numWordsTotal2 ;
-	unsigned char *m_numWordsTotal3 ;
-	//unsigned char *m_numWordsTotal4 ;
-	//unsigned char *m_numWordsTotal5 ;
 	int32_t           m_numPhrases; // should equal the # of words

 	// placeholders to avoid passing to subroutine
@ -140,19 +68,7 @@ class Phrases {
 	int32_t       *m_wlens;

 	Bits    *m_bits;
-	bool     m_useStems;
-	bool     m_useStopWords;
 	int32_t     m_titleRecVersion;
-
-	// replaces Scores
-	//class Sections *m_sections;
-	//class Section  *m_sectionPtrs;
-
-	// word scores, set in Scores.cpp
-	//int32_t    *m_wordScores;
-	// the score of the phrase is the min of the scores of the words that
-	// make up the phrase
-	//int32_t    *m_phraseScores ;
 };

 #endif
--- a/Pos.h
+++ b/Pos.h
@ -4,7 +4,7 @@
 #define _POS_H_

 #include <stdint.h>
-#include <Titledb.h>
+#include "Titledb.h"

 // this class is used to measure the number of characters between two "words"
 // (as defined in the Words.cpp class) in units of "characters". A utf8
--- a/Posdb.cpp
+++ b/Posdb.cpp
@ -3839,95 +3839,6 @@ void PosdbTable::intersectLists10_r ( ) {

 	if( g_conf.m_logTracePosdb ) log(LOG_TRACE,"%s:%s:%d: seoHack: %s, numTerms: %"INT32"", __FILE__,__func__, __LINE__, seoHack?"true":"false", m_q->m_numTerms);

-	// if we are just a sitehash:xxxxx list and m_getSectionStats is
-	// true then assume the list is one of hacked posdb keys where
-	// the wordposition bits and others are really a 32-bit site hash
-	// and we have to see how many different docids and sites have
-	// this term. and we compare to our site hash, 
-	// m_r->m_sectionSiteHash32 to determine if the posdb key is
-	// onsite or offsite. then XmlDoc::printRainbowSections()
-	// can print out how many page/sites duplicate your section's content.
-	
-	// MDW: TODO: for the facet terms just compile the stats and do not
-	// send to intersecting. they are ignored for those purposes. send
-	// the hashtable back so msg3a can integrate the stats. keep in mind
-	// we have multiple docid ranges sometimes for one query!!!!
-
-	/*
-
-	  MDW: take this out. now treat as a normal termlist but
-	  do not use for scoring. so it is kinda like gbmin: gbmax:
-	  query operators but it will just add the facet values to
-	  QueryTerm::m_facetHashList for transmission back to the aggregator
-	  node. however, it is only for docids in the final result set!
-	
-	if ( m_r->m_getFacetStats ) {
-		// reset
-		m_facetStats.m_totalMatches = 0;
-		m_facetStats.m_totalEntries = 0;
-		m_dt.clear();
-		// scan the posdb keys
-		//for ( int32_t i = 0 ; i < m_msg2->getNumListsInGroup(0); i++) {
-		// get the sublist
-		RdbList *list = m_msg2->getList(0);//Group(0)[i];
-		char *p    =     list->getList    ();
-		char *pend = p + list->getListSize();
-		// test
-		//int64_t final = 5663137686803656554LL;
-		//final &= TERMID_MASK;
-		//if ( p<pend && g_posdb.getTermId(p) == final )
-		//	log("boo");
-		// scan it
-		for ( ; p < pend ; ) {
-			// . first key is the full size
-			// . uses the w,G,s,v and F bits to hold this
-			// . this is no longer necessarily sitehash, but
-			//   can be any val, like now FacetStats is using
-			//   it for the innerHtml sentence content hash32
-			int32_t sh32 = g_posdb.getFacetVal32 ( p );
-			//int64_t d = g_posdb.getDocId(p);
-			//int32_t rs = list->getRecSize(p);
-			// this will not update listptrlo, watch out!
-			p += list->getRecSize ( p );
-			// does this xpath from another docid have the
-			// same inner html as us?
-			if ( sh32 == m_r->m_myFacetVal32 ) // m_siteHash32 ) 
-				m_facetStats.m_totalMatches++;
-			// always this
-			m_facetStats.m_totalEntries++;
-			// unique site count
-			if ( m_dt.isInTable ( &sh32 ) ) continue;
-			// count it
-			m_facetStats.m_numUniqueVals++;
-			// only once
-			m_dt.addKey ( &sh32 );
-			// log it
-			//log("usite: %08"XINT32" %"INT64" rs=%"INT32"",sh32,d,rs);
-			// stop if too much so we do not try to 
-			// re-alloc in a thread!
-			if ( m_dt.m_numSlotsUsed >= 1000000 ) break;
-		}
-		// and return the list of merging
-		int32_t *s    = (int32_t *)m_facetHashList.getBufStart();
-		int32_t *send = (int32_t *)m_facetHashList.getBufEnd();
-		//if ( m_facetStats.m_numUniqueSites == 17 ) { 
-		//	log("q=%s",m_r->ptr_query);
-		//	log("hey");
-		//	//char *xx = NULL;*xx=0; 
-		//}
-		//if(!strcmp(m_r->ptr_query,"gbsectionhash:3335323672699668766"
-		//	log("boo");
-		int32_t *orig = s;
-		for ( int32_t i = 0 ; i < m_dt.m_numSlots ; i++ ) {
-			if ( ! m_dt.m_flags[i] ) continue;
-			*s++ = *(int32_t *)m_dt.getKeyFromSlot(i);
-			if ( s >= send ) break;
-		}
-		m_facetHashList.setLength((char *)s-(char *)orig);
-		return;
-	}
-	*/
-
 	//
 	// hash the docids in the whitelist termlists into a hashtable.
 	// every docid in the search results must be in there. the
@ -5826,9 +5737,7 @@ void PosdbTable::intersectLists10_r ( ) {
 		// . first key is the full size
 		// . uses the w,G,s,v and F bits to hold this
 		// . this is no longer necessarily sitehash,but
-		//   can be any val, like now SectionStats is 
-		//   using it for the innerHtml sentence 
-		//   content hash32
+		//   can be any val
 		int32_t val32 = g_posdb.getFacetVal32 ( p2 );

 		// PREADVANCE "p"
@ -5967,12 +5876,6 @@ void PosdbTable::intersectLists10_r ( ) {

 skipFacetCheck:

-
-	// if only one term like gbfacetstr:gbxpathsitehash123456
-	// then do not bother adding to top tree
-	if ( m_r->m_forSectionStats ) goto advance;
-
-
 	// . seoDebug hack so we can set "dcs"
 	// . we only come here if we actually made it into m_topTree
 	if ( secondPass ) {
--- a/Posdb.h
+++ b/Posdb.h
@ -132,15 +132,6 @@ class Posdb {

 	bool addColl ( char *coll, bool doVerify = true );

-	// . xmldoc.cpp should call this
-	// . store all posdb keys from revdbList into one hashtable
-	//   and only add to new list if not in there
-	//bool makeList ( class RdbList *revdbList ,
-	//		int64_t docId ,
-	//		class Words *words );
-			
-
-
 	// . make a 16-byte key from all these components
 	// . since it is 16 bytes, the big bit will be set
 	void makeKey ( void              *kp             ,
@ -440,80 +431,8 @@ public:
 	int32_t      m_quotedStartId;
 };

-
-/*
-#include "RdbList.h"
-
-class PosdbList : public RdbList {
-
- public:
-
-	// why do i have to repeat this for LinkInfo::set() calling our set()??
-	void set ( char *list , int32_t  listSize  , bool  ownData   ) {
-		RdbList::set ( list     ,
-			       listSize ,
-			       list     , // alloc
-			       listSize , // alloc size
-			       0        , // fixed data size
-			       ownData  ,
-			       true     , // use half keys?
-			       sizeof(key_t));// 12 bytes per key
-	};
-
-	// clear the low bits on the keys so terms are DELETED
-	void clearDelBits ( );
-
-	void print();
-
-
-	// . these are made for special IndexLists, too
-	// . getTermId() assumes as 12 byte key
-	int64_t getCurrentTermId12 ( ) {
-		return getTermId12 ( m_listPtr ); };
-	int64_t getTermId12 ( char *rec ) {
-		return (*(uint64_t *)(&rec[4])) >> 16 ;
-	};
-	int64_t getTermId16 ( char *rec ) {
-		return (*(uint64_t *)(&rec[8])) >> 16 ;
-	};
-	// these 2 assume 12 and 6 byte keys respectively
-	int64_t getCurrentDocId () {
-		if ( isHalfBitOn ( m_listPtr ) ) return getDocId6 (m_listPtr);
-		else                             return getDocId12(m_listPtr);
-	};
-	int64_t getDocId ( char *rec ) {
-		if ( isHalfBitOn ( rec ) ) return getDocId6 (rec);
-		else                       return getDocId12(rec);
-	};
-	int64_t getCurrentDocId12 ( ) {
-		return getDocId12 ( m_listPtr ); };
-	int64_t getDocId12 ( char *rec ) {
-		return ((*(uint64_t *)(rec)) >> 2) & DOCID_MASK; };
-	int64_t getDocId6 ( char *rec ) {
-		int64_t docid;
-		*(int32_t *)(&docid) = *(int32_t *)rec;
-		((char *)&docid)[4] = rec[4];
-		docid >>= 2;
-		return docid & DOCID_MASK;
-	};
-	// this works with either 12 or 6 byte keys
-	unsigned char getCurrentScore ( ) {
-		return getScore(m_listPtr); };
-	unsigned char getScore ( char *rec ) { return ~rec[5]; };
-
-	// uncomplemented...
-	void setScore ( char *rec , char score ) { rec[5] = score; };
-
-	// for date lists only...
-	int32_t getCurrentDate ( ) { return ~*(int32_t *)(m_listPtr+6); };
-};
-*/
-
 #include "Query.h"         // MAX_QUERY_TERMS, qvec_t

-// max # search results that can be viewed without using TopTree
-//#define MAX_RESULTS 1000
-
 class PosdbTable {

 public:
@ -525,10 +444,7 @@ class PosdbTable {
 		   char           debug           ,
 		   void          *logstate        ,
 		   class TopTree *topTree         ,
-		   //char          *coll            ,
 		   collnum_t collnum ,
-		   //IndexList     *lists           ,
-		   //int32_t           numLists        ,
 		   class Msg2 *msg2, 
 		   class          Msg39Request *r );

@ -538,12 +454,6 @@ class PosdbTable {
 	// pre-allocate memory since intersection runs in a thread
 	bool allocTopTree ( );

-	// . returns false on error and sets errno
-	// . we assume there are "m_numTerms" lists passed in (see set() above)
-	//void intersectLists_r ( );
-
-	//void intersectLists9_r ( );
-
 	void  getTermPairScoreForNonBody   ( int32_t i, int32_t j,
 					     char *wpi, char *wpj, 
 					     char *endi, char *endj,
@ -580,7 +490,9 @@ class PosdbTable {
 	void freeMem ( ) ;

 	// has init already been called?
-	bool isInitialized ( ) { return m_initialized; };
+	bool isInitialized() {
+		return m_initialized;
+	}

 	uint64_t m_docId;

@ -609,56 +521,37 @@ class PosdbTable {

 	int32_t            m_maxScores;

-	//char           *m_coll;
 	collnum_t       m_collnum;

 	int32_t *m_qpos;
 	int32_t *m_wikiPhraseIds;
 	int32_t *m_quotedStartIds;
-	//class DocIdScore *m_ds;
 	int32_t  m_qdist;
 	float *m_freqWeights;
-	//int64_t *m_freqs;
 	char  *m_bflags;
 	int32_t  *m_qtermNums;
 	float m_bestWindowScore;
-	//char **m_finalWinners1;
-	//char **m_finalWinners2;
-	//float *m_finalScores;
 	char **m_windowTermPtrs;

 	// how many docs in the collection?
 	int64_t m_docsInColl;

-	//SectionStats m_sectionStats;
-	//SafeBuf m_facetHashList;
-	//HashTableX m_dt;
-
 	class Msg2 *m_msg2;

 	// if getting more than MAX_RESULTS results, use this top tree to hold
 	// them rather than the m_top*[] arrays above
 	class TopTree *m_topTree;

-	//HashTableX m_docIdTable;
-	
 	SafeBuf m_scoreInfoBuf;
 	SafeBuf m_pairScoreBuf;
 	SafeBuf m_singleScoreBuf;

 	SafeBuf m_stackBuf;

-	//SafeBuf m_mergeBuf;
-
 	// a reference to the query
 	Query          *m_q;
 	int32_t m_nqt;

-	// these are NOT in imap space, but in query term space, 1-1 with 
-	// Query::m_qterms[]
-	//IndexList      *m_lists;
-	//int32_t            m_numLists;
-
 	// has init() been called?
 	bool            m_initialized;

@ -668,8 +561,6 @@ class PosdbTable {
 	// for debug msgs
 	void *m_logstate;

-	//int64_t       m_numDocsInColl;
-
 	class Msg39Request *m_r;

 	// for gbsortby:item.price ...
--- a/PostQueryRerank.cpp
+++ b/PostQueryRerank.cpp
@ -341,7 +341,7 @@ bool PostQueryRerank::preRerank ( ) {
 			return false;

 		// . calculate maximum url length in pages for reranking 
-		//   by query terms or topics in a url
+		//   by query terms in a url
 		int32_t urlLen = mr->size_ubuf - 1;//msg20->getUrlLen();
 		if ( urlLen > m_maxUrlLen )
 			m_maxUrlLen = urlLen;
@ -379,7 +379,7 @@ bool PostQueryRerank::preRerank ( ) {
 	}


-	// . setup reranking for query terms or topics in url (pqrqttiu)
+	// . setup reranking for query terms in url (pqrqttiu)
 	// . add space to max url length for terminating NULL and allocate
 	//   room for max length
 	m_maxUrlLen++;
--- a/Process.cpp
+++ b/Process.cpp
@ -266,7 +266,6 @@ bool Process::init ( ) {
 	// . let's try to save tfndb first, that is the most important,
 	//   followed by titledb perhaps...
 	m_rdbs[m_numRdbs++] = g_titledb.getRdb     ();
-	m_rdbs[m_numRdbs++] = g_sectiondb.getRdb   ();
 	m_rdbs[m_numRdbs++] = g_posdb.getRdb     ();
 	m_rdbs[m_numRdbs++] = g_spiderdb.getRdb    ();
 	m_rdbs[m_numRdbs++] = g_clusterdb.getRdb   (); 
@ -277,7 +276,6 @@ bool Process::init ( ) {
 	// save what urls we have been doled
 	m_rdbs[m_numRdbs++] = g_doledb.getRdb      ();
 	m_rdbs[m_numRdbs++] = g_titledb2.getRdb    ();
-	m_rdbs[m_numRdbs++] = g_sectiondb2.getRdb  ();
 	m_rdbs[m_numRdbs++] = g_posdb2.getRdb    ();
 	m_rdbs[m_numRdbs++] = g_spiderdb2.getRdb   ();
 	m_rdbs[m_numRdbs++] = g_clusterdb2.getRdb  ();
--- a/Profiler.h
+++ b/Profiler.h
@ -1,6 +1,4 @@

-#define MAX_TOPICS_PER_TERM 28
-#define MAX_ALLOWED_TOPICS 100
 #define EI_NIDENT 16
 #ifndef _PROFILER_H_
 #define _PROFILER_H_
--- a/Query.cpp
+++ b/Query.cpp
@ -2211,7 +2211,7 @@ bool Query::setQWords ( char boolFlag ,
 			else if ( wp[0]=='-' && wplen==1 ) 
 				posNum += 0;
 			// 'mr. x'
-			else if ( wp[0]=='.' && words.isSpaces2(i,1)) 
+			else if ( wp[0]=='.' && words.isSpaces(i,1))
 				posNum += 0;
 			// animal (dog)
 			else 
@ -3242,14 +3242,7 @@ bool Query::setQWords ( char boolFlag ,


 	// make the phrases from the words and the tweaked Bits class
-	//Phrases phrases;
-	if ( ! phrases.set ( &words , 
-			     &bits  , 
-			     //NULL   ,
-			     true   ,  // use stop words?
-			     false  , // use stems?
-			     TITLEREC_CURRENT_VERSION,
-			     0 /*niceness*/))//disallows HUGE phrases
+	if ( !phrases.set( &words, &bits, TITLEREC_CURRENT_VERSION, 0 ) )
 		return false;

 	int64_t *wids = words.getWordIds();
@ -3258,17 +3251,7 @@ bool Query::setQWords ( char boolFlag ,
 	for ( int32_t i = 0 ; i < numWords ; i++ ) {
 		// get the ith QueryWord
 		QueryWord *qw = &m_qwords[i];
-		// if word is ignored because it is opcode, or whatever,
-		// it cannot start a phrase
-		// THIS IS BROKEN
-		//if ( qw->m_queryOp && qw->m_opcode == OP_PIPE){
-		//	for (int32_t j = i-1;j>=0;j--){
-		//		if (!m_qwords[j].m_phraseId) continue;
-		//		m_qwords[j].m_ignorePhrase = IGNORE_BOOLOP;
-		//		break;
-		//	}
-		//	
-		//}
+
 		if ( qw->m_ignoreWord ) continue;
 		if ( qw->m_fieldCode && qw->m_quoteStart < 0) continue;
 		// get the first word # to our left that starts a phrase
@ -3280,8 +3263,7 @@ bool Query::setQWords ( char boolFlag ,
 			if ( ! bits.canPairAcross(j+1) ) break;
 			//if ( ! bits.canStartPhrase(j)  ) continue;
 			if ( ! wids[j] ) continue;
-			// phrases.getNumWordsInPhrase()
-			//if( j + phrases.getMaxWordsInPhrase(j,&tmp)<i) break;
+
 			qw->m_leftPhraseStart = j;
 			// we can't pair across alnum words now, we just want bigrams
 			if ( wids[j] ) break;
@ -3335,8 +3317,7 @@ bool Query::setQWords ( char boolFlag ,
 			else      qw->m_phraseId = pid;
 			// how many regular words int32_t is the bigram?
 			int32_t plen2; phrases.getPhrase ( i , &plen2 ,2);
-			// the trigram?
-			int32_t plen3; phrases.getPhrase ( i , &plen3 ,3);
+
 			// get just the bigram for now
 			qw->m_phraseLen = plen2;
 			// do not ignore the phrase, it's valid
@ -3736,22 +3717,6 @@ static bool       s_isInitialized = false;

 // 3rd field = m_hasColon
 struct QueryField g_fields[] = {
-
-/*
-	BR 20160117: No longer hashed
-	{"gbfieldmatch",
-	 FIELD_GBFIELDMATCH,
-	 true,
-	 "gbfieldmatch:strings.vendor:\"My Vendor Inc.\"",
-	 "Matches all the meta tag or JSON or XML fields that have "
-	 "the name \"strings.vendor\" and contain the exactly provided "
-	 "value, in this case, <i>My Vendor Inc.</i>. This is CASE "
-	 "SENSITIVE and includes punctuation, so it's exact match. In "
-	 "general, it should be a very short termlist, so it should be fast.",
-	 "Advanced Query Operators",
-	 QTF_BEGINNEWTABLE },
-*/
-
 	{"url", 
 	 FIELD_URL, 
 	 true,
@ -3779,10 +3744,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0 },

-	//{"links", FIELD_LINKS, true,"Same as link:."},
-	//{"ilink", FIELD_ILINK, true,"Similar to above."},
-
-
 	{"sitelink", 
 	 FIELD_SITELINK, 
 	 true,
@ -3809,8 +3770,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 QTF_DUP },

-
-	//{"coll", FIELD_COLL, true,"Not sure if this works."},
 	{"ip", 
 	 FIELD_IP, 
 	 true,
@ -3877,22 +3836,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	0},

-
-	//{"isclean", FIELD_ISCLEAN, true,"Matches all pages that are deemed non-offensive and safe for children."},
-
-
-/*
-	BR 20160108: No longer stored in our posdb as we don't plan to use it
-	{"gbinrss", 
-	 FIELD_GBRSS, 
-	 true,
-	 "gbinrss:1",
-	 "Matches all documents that are in RSS feeds. Likewise, use "
-	 "<i>gbinrss:0</i> to match all documents that are NOT in RSS feeds.",
-	 NULL,
-	 0},
-*/
-
 	{"type", 
 	 FIELD_TYPE, 
 	 false,
@ -3925,44 +3868,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-/*
-	BR 20160117: No longer hash image info
-	{"gbimage",
-	 FIELD_URL,
-	 false,
-	 "gbimage:site.com/image.jpg",
-	 "Matches all documents that contain the specified image.",
-	 NULL,
-	 0},
-
-	{"gbhasthumbnail",
-	 FIELD_GENERIC,
-	 false,
-	 "gbhasthumbnail:1",
-	 "Matches all documents for which Gigablast detected a thumbnail. "
-	 "Likewise use <i>gbhasthumbnail:0</i> to match all documents that "
-	 "do not have thumbnails.",
-	 NULL,
-	 0},
-*/
-
-/*
-	BR 20160117: No longer hash tags
-	{"gbtag*", 
-	 FIELD_TAG, 
-	 false,
-	 "gbtag*",
-	 "Matches all documents whose tag named * have the specified value "
-	 "in the tagdb entry for the url. Example: gbtagsitenuminlinks:2 "
-	 "matches all documents that have 2 qualified "
-	 "inlinks pointing to their site "
-	 "based on the tagdb record. You can also provide your own "
-	 "tags in addition to the tags already present. See the <i>tagdb</i> "
-	 "menu for more information.",
-	 NULL,
-	0},
-*/
-
 	{"gbzipcode", 
 	 FIELD_ZIP, 
 	 false,
@ -3972,25 +3877,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-/*
-	BR 20160108: No longer stored in our posdb as we don't plan to use it
-
-	{"gbcharset", 
-	 FIELD_CHARSET, 
-	 false,
-	 "gbcharset:windows-1252",
-	 "Matches all documents originally in the Windows-1252 charset. "
-	 "Available character sets are listed in the <i>iana_charset.cpp</i> "
-	 "file in the open source distribution. There are a lot. Some "
-	 "more popular ones are: <i>us, latin1, iso-8859-1, csascii, ascii, "
-	 "latin2, latin3, latin4, greek, utf-8, shift_jis.",
-	 NULL,
-	 0},
-*/
-
-	// this just complicates things for now, so comment out
-	//{"urlhash",FIELD_URLHASH, false,""},
-
 	{"gblang",
 	 FIELD_GBLANG,
 	 false,
@ -4005,91 +3891,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-	//{"gbquality",FIELD_GBQUALITY,true,""},
-	//{"gblinktextin",FIELD_LINKTEXTIN,true,""},
-	//{"gblinktextout",FIELD_LINKTEXTOUT,true,""},
-	//{"gbkeyword",FIELD_KEYWORD,true,""},
-	//{"gbcharset", FIELD_CHARSET, false,""},
-
-/*
-	// BR 20160106: No longer stored in our posdb as we don't use it
-	{"gbpathdepth", 
-	 FIELD_GBOTHER, 
-	 false,
-	 "gbpathdepth:3",
-	 "Matches all documents whose url has 3 path components to it like "
-	 "http://somedomain.com/dir1/dir2/dir3/foo.html",
-	 NULL,
-	 0},
-*/
-
-/*
-	// BR 20160108: No longer stored in our posdb as we don't use it
-	{"gbhopcount", 
-	 FIELD_GBOTHER, 
-	 false,
-	 "gbhopcount:2",
-	 "Matches all documents that are a minimum of two link hops away "
-	 "from a root url.",
-	 NULL,
-	 0},
-*/
-
-/*
-	// BR 20160108: No longer stored in our posdb as we don't use it
-	{"gbhasfilename", 
-	 FIELD_GBOTHER, 
-	 false,
-	 "gbhasfilename:1",
-	 "Matches all documents whose url ends in a filename like "
-	 "<i>http://somedomain.com/dir1/myfile</i> and not "
-	 "<i>http://somedomain.com/dir1/dir2/</i>. Likewise, use "
-	 "<i>gbhasfilename:0</i> to match all the documents that do not "
-	 "have a filename in their url.",
-	 NULL,
-	 0},
-*/
-
-/*
-	BR 20160108: No longer stored in our posdb as we don't plan to use it
-
-	{"gbiscgi", 
-	 FIELD_GBOTHER, 
-	 false,
-	 "gbiscgi:1",
-	 "Matches all documents that have a question mark in their url. "
-	 "Likewise gbiscgi:0 matches all documents that do not.",
-	 NULL,
-	0},
-*/
-
-
-/*
-	BR 20160108: No longer stored in our posdb as we don't use it
-
-	{"gbhasext", 
-	 FIELD_GBOTHER, 
-	 false,
-	 "gbhasext:1",
-	 "Matches all documents that have a file extension in their url. "
-	 "Likewise, <i>gbhasext:0</i> matches all documents that do not have "
-	 "a file extension in their url.",
-	 NULL,
-	0},
-*/
-
-/*
-	BR 20160106 removed
-	{"gbsubmiturl", 
-	 FIELD_GBOTHER, 
-	 false,
-	 "gbsubmiturl:domain.com/process.php",
-	 "Matches all documents that have a form that submits to the "
-	 "specified url.",
-	 NULL,
-	0},
-*/
-
 	// diffbot only
 	{"gbparenturl", 
 	 FIELD_GBPARENTURL, 
@ -4131,92 +3932,10 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-
-
 	//
 	// for content type CT_STATUS documents (Spider status docs)
 	//

-
-
-	//{"qdom", FIELD_QUOTA, false,""},
-	//{"qhost", FIELD_QUOTA, false,""},
-
-/*
-// BR 20160117: No longer supported
-	{"gbsortbyfloat", 
-	 FIELD_GBSORTBYFLOAT, 
-	 false,
-	 "cameras gbsortbyfloat:price",
-	 "Sort all documents that "
-	 "contain 'camera' by price. <i>price</i> can be a root JSON field or "
-	 "in a meta tag, or in an xml &lt;price&gt; tag.", 
-	 "Numeric Field Query Operators",
-	 QTF_BEGINNEWTABLE },
-
-
-	{"gbsortbyfloat", 
-	 FIELD_GBSORTBYFLOAT, 
-	 false,
-	 "cameras gbsortbyfloat:product.price",
-	 "Sort all documents that "
-	 "contain 'camera' by price. <i>price</i> can be in a JSON document "
-	 "like "
-	 "<i>{ \"product\":{\"price\":1500.00}} "
-	 "</i> or, alternatively, an XML document like <i>"
-	 "&lt;product&gt;&lt;price&gt;1500.00&lt;/price&gt;&lt;/product&gt;"
-	 "</i>", 
-	 NULL,
-	 0 },
-
-
-	{"gbrevsortbyfloat", 
-	 FIELD_GBREVSORTBYFLOAT, 
-	 false,
-	 "cameras gbrevsortbyfloat:product.price",
-	 "Like above example but sorted with highest prices on top.",
-	 NULL,
-	 0 },
-
-
-	{"gbsortby", 
-	 FIELD_GBSORTBYFLOAT, 
-	 false,
-	 "dog gbsortbyint:gbdocspiderdate",
-	 "Sort the documents that contain 'dog' by "
-	 "the date they were last spidered, with the newest "
-	 "on top.",
-	 NULL,
-	 QTF_HIDE},
-
-	{"gbrevsortby", 
-	 FIELD_GBREVSORTBYFLOAT, 
-	 false,
-	 "dog gbrevsortbyint:gbdocspiderdate",
-	 "Sort the documents that contain 'dog' by "
-	 "the date they were last spidered, but with the "
-	 "oldest on top.",
-	 NULL,
-	 QTF_HIDE},
-*/
-
-
-/*
-// BR 20160117: No longer supported
-
-	{"gbsortbyint", 
-	 FIELD_GBSORTBYINT, 
-	 false,
-	 "pilots gbsortbyint:employees",
-	 "Sort all documents that "
-	 "contain 'pilots' by employees. "
-	 "<i>employees</i> can be a root JSON field or "
-	 "in a meta tag, or in an xml &lt;price&gt; tag. The value it "
-	 "contains is interpreted as a 32-bit integer.", 
-	 NULL,
-	 0 },
-*/
-
 	{"gbsortbyint", 
 	 FIELD_GBSORTBYINT, 
 	 false,
@ -4225,33 +3944,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-/*
-// BR 20160117: No longer supported
-
-	{"gbsortbyint", 
-	 FIELD_GBSORTBYINT, 
-	 false,
-	 "gbsortbyint:company.employees",
-	 "Sort all documents by employees. Documents can contain "
-	 "<i>employees</i> in a JSON document "
-	 "like "
-	 "<i>{ \"product\":{\"price\":1500.00}} "
-	 "</i> or, alternatively, an XML document like <i>"
-	 "&lt;product&gt;&lt;price&gt;1500.00&lt;/price&gt;&lt;/product&gt;"
-	 "</i>", 
-	 NULL,
-	 0 },
-
-	{"gbsortbyint", 
-	 FIELD_GBSORTBYINT, 
-	 false,
-	 "gbsortbyint:gbsitenuminlinks",
-	 "Sort all documents by the number of distinct inlinks the "
-	 "document's site has.",
-	 NULL,
-	 0 },
-*/
-
 	{"gbrevsortbyint", 
 	 FIELD_GBREVSORTBYINT, 
 	 false,
@ -4261,114 +3953,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-
-/*
-// BR 20160117: No longer supported
-
-	// gbmin:price:1.23
-	{"gbminfloat", 
-	 FIELD_GBNUMBERMIN, 
-	 false,
-	 "cameras gbminfloat:price:109.99",
-	 "Matches all documents that "
-	 "contain 'camera' or 'cameras' and have a price of at least 109.99. "
-	 "<i>price</i> can be a root JSON field or "
-	 "in a meta tag name <i>price</i>, or in an xml &lt;price&gt; tag.", 
-	 NULL,
-	 0 },
-
-
-	{"gbminfloat", 
-	 FIELD_GBNUMBERMIN, 
-	 false,
-	 "cameras gbminfloat:product.price:109.99",
-	 "Matches all documents that "
-	 "contain 'camera' or 'cameras' and have a price of at least 109.99 "
-	 "in a JSON document like "
-	 "<i>{ \"product\":{\"price\":1500.00}} "
-	 "</i> or, alternatively, an XML document like <i>"
-	 "&lt;product&gt;&lt;price&gt;1500.00&lt;/price&gt;&lt;/product&gt;"
-	 "</i>", 
-	 NULL,
-	 0 },
-
-
-	// alias we need to bury
-	{"gbmin", 
-	 FIELD_GBNUMBERMIN, 
-	 false,
-	 "",
-	 "",
-	 NULL,
-	 QTF_HIDE},
-
-
-
-	{"gbmaxfloat", 
-	 FIELD_GBNUMBERMAX, 
-	 false,
-	 "cameras gbmaxfloat:price:109.99",
-	 "Like the gbminfloat examples above, but is an upper bound.",
-	 NULL,
-	 0 },
-
-
-
-	{"gbequalfloat", 
-	 FIELD_GBNUMBEREQUALFLOAT, 
-	 false,
-	 "gbequalfloat:product.price:1.23",
-	 "Similar to gbminfloat and gbmaxfloat but is an equality constraint.",
-	 NULL,
-	 0 },
-
-
-
-	{"gbmax", 
-	 FIELD_GBNUMBERMAX, 
-	 false,
-	 "",
-	 "",
-	 NULL,
-	 QTF_HIDE},
-
-
-
-	{"gbminint", 
-	 FIELD_GBNUMBERMININT, 
-	 false,
-	 "gbminint:gbspiderdate:1391749680",
-	 "Matches all documents with a spider timestamp of at least "
-	 "1391749680. Use this as opposed th gbminfloat when you need "
-	 "32 bits of integer precision.",
-	 NULL,
-	 0},
-
-
-	{"gbmaxint", 
-	 FIELD_GBNUMBERMAXINT, 
-	 false,
-	 "gbmaxint:company.employees:20",
-	 "Matches all companies with 20 or less employees "
-	 "in a JSON document like "
-	 "<i>{ \"company\":{\"employees\":13}} "
-	 "</i> or, alternatively, an XML document like <i>"
-	 "&lt;company&gt;&lt;employees&gt;13&lt;/employees&gt;"
-	 "&lt;/company&gt;"
-	 "</i>", 
-	 NULL,
-	 0},
-
-
-	{"gbequalint", 
-	 FIELD_GBNUMBEREQUALINT, 
-	 false,
-	 "gbequalint:company.employees:13",
-	 "Similar to gbminint and gbmaxint but is an equality constraint.",
-	 NULL,
-	 0},
-*/
-
 	{"gbdocspiderdate",
 	 FIELD_GENERIC,
 	 false,
@ -4413,114 +3997,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-	// {"gbreplyspiderdate",FIELD_GENERIC,false,
-	//  "Example: gbspiderdate:1400081479 will return spider log "
-	//  "results that have "
-	//  "that spider date timestamp (UTC)"},
-
-/* BR 20160108: All facets disabled as test. Don't think we will need any of them */
-#ifdef SUPPORT_FACETS	
-	{"gbfacetstr", 
-	 FIELD_GBFACETSTR, 
-	 false,
-	 "gbfacetstr:color",
-	 "Returns facets in "
-	 "the search results "
-	 "by their color field. <i>color</i> is case INsensitive.",
-	 "Facet Related Query Operators",
-	 QTF_BEGINNEWTABLE},
-
-
-	{"gbfacetstr", 
-	 FIELD_GBFACETSTR, 
-	 false,
-	 "gbfacetstr:product.color",
-	 "Returns facets in "
-	 "the color field in a JSON document like "
-	 "<i>{ \"product\":{\"color\":\"red\"}} "
-	 "</i> or, alternatively, an XML document like <i>"
-	 "&lt;product&gt;&lt;color&gt;red&lt;/price&gt;&lt;/product&gt;"
-	 "</i>. <i>product.color</i> is case INsensitive.", 
-	 NULL,
-	 0},
-
-	{"gbfacetstr", 
-	 FIELD_GBFACETSTR, 
-	 false,
-	 "gbfacetstr:gbtagsite cat",
-	 "Returns facets from the site names of all pages "
-	 "that contain the word 'cat' or 'cats', etc. <i>gbtagsite</i> is case insensitive."
-	 ,
-	 NULL,
-	 0},
-
-	{"gbfacetint", FIELD_GBFACETINT, false,
-	 "gbfacetint:product.cores",
-	 "Returns facets in "
-	 "of the <i>cores</i> field in a JSON document like "
-	 "<i>{ \"product\":{\"cores\":10}} "
-	 "</i> or, alternatively, an XML document like <i>"
-	 "&lt;product&gt;&lt;cores&gt;10&lt;/price&gt;&lt;/product&gt;"
-	 "</i>. <i>product.cores</i> is case INsensitive.", 
-	 NULL,
-	 0},
-
-	{"gbfacetint", FIELD_GBFACETINT, false,
-	 "gbfacetint:gbhopcount",
-	 "Returns facets in "
-	 "of the <i>gbhopcount</i> field over the documents so you can "
-	 "search the distribution of hopcounts over the index. <i>gbhopcount</i> is "
-	 "case INsensitive.",
-	 NULL,
-	 0},
-
-	{"gbfacetint", FIELD_GBFACETINT, false,
-	 "gbfacetint:gbtagsitenuminlinks",
-	 "Returns facets in "
-	 "of the <i>sitenuminlinks</i> field for the tag <i>sitenuminlinks</i>"
-	 "in the tag for each site. Any numeric tag in tagdb can be "
-	 "facetizeed "
-	 "in this manner so you can add your own facets this way on a per "
-	 "site or per url basis by making tagdb entries. Case Insensitive.",
-	 NULL,
-	 0},
-
-
-	{"gbfacetint", FIELD_GBFACETINT, false,
-	 "gbfacetint:size,0-10,10-20,30-100,100-200,200-1000,1000-10000",
-	 "Returns facets in "
-	 "of the <i>size</i> field (either in json, field or a meta tag) "
-	 "and cluster the results into the specified ranges. <i>size</i> is "
-	 "case INsensitive.",
-	 NULL,
-	 0},
-
-	{"gbfacetint", FIELD_GBFACETINT, false,
-	 "gbfacetint:gbsitenuminlinks",
-	 "Returns facets based on # of site inlinks the site of each "
-	 "result has. <i>gbsitenuminlinks</i> is case INsensitive.",
-	 NULL,
-	 0},
-
-	{"gbfacetfloat", FIELD_GBFACETFLOAT, false,
-	 "gbfacetfloat:product.weight",
-	 "Returns facets "
-	 "of the <i>weight</i> field in a JSON document like "
-	 "<i>{ \"product\":{\"weight\":1.45}} "
-	 "</i> or, alternatively, an XML document like <i>"
-	 "&lt;product&gt;&lt;weight&gt;1.45&lt;/price&gt;&lt;/product&gt;"
-	 "</i>. <i>product.weight</i> is case INsensitive.", 
-	 NULL,
-	 0},
-
-	{"gbfacetfloat", FIELD_GBFACETFLOAT, false,
-	 "gbfacetfloat:product.price,0-1.5,1.5-5,5.0-20,20-100.0",
-	 "Similar to above but cluster the pricess into the specified ranges. "
-	 "<i>product.price</i> is case insensitive.",
-	 NULL,
-	 0},
-#endif
-
 	//
 	// spider status docs queries
 	//
@ -4610,17 +4086,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-#ifdef SUPPORT_FACETS
-	{"gbssNumRedirects",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssNumRedirects",
-	 "Query on the number of times the url redirect when attempting to "
-	 "index it.",
-	 NULL,
-	 0},
-#endif
-
 	{"gbssDocId",
 	 FIELD_GENERIC,
 	 false,
@ -4629,26 +4094,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-
-#ifdef SUPPORT_FACETS
-	{"gbssHopCount",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssHopCount",
-	 "Query on the hop count of the document.",
-	 NULL,
-	 0},
-
-	{"gbssCrawlRound",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssCrawlRound",
-	 "Query on the crawl round number.",
-	 NULL,
-	 0},
-#endif
-
-
 	{"gbssDupOfDocId",
 	 FIELD_GENERIC,
 	 false,
@ -4689,17 +4134,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-#ifdef SUPPORT_FACETS
-	{"gbssContentHash32",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssContentHash32",
-	 "The hash of the document content, excluding dates and times. Used "
-	 "internally for deduping.",
-	 NULL,
-	 0},
-#endif
-
 	{"gbssDownloadDurationMS",
 	 FIELD_GENERIC,
 	 false,
@ -4724,25 +4158,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-#ifdef SUPPORT_FACETS
-	{"gbssUsedRobotsTxt",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssUsedRobotsTxt",
-	 "This is 0 or 1 depending on if robots.txt was not obeyed or obeyed, "
-	 "respectively.",
-	 NULL,
-	 0},
-
-	{"gbssConsecutiveErrors",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssConsecutiveErrors",
-	 "For the last set of indexing attempts how many were errors?",
-	 NULL,
-	 0},
-#endif
-
 	{"gbssIp",
 	 FIELD_GENERIC,
 	 false,
@ -4778,65 +4193,6 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-#ifdef SUPPORT_FACETS
-	{"gbssContentInjected",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssContentInjected",
-	 "This is 0 or 1 if the content was not injected or injected, "
-	 "respectively.",
-	 NULL,
-	 0},
-
-	{"gbssPercentContentChanged",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetfloat:gbssPercentContentChanged",
-	 "A float between 0 and 100, inclusive. Represents how much "
-	 "the document has changed since the last time we indexed it. This is "
-	 "only valid if the document was successfully indexed this time."
-	 "respectively.",
-	 NULL,
-	 0},
-
-	{"gbssSpiderPriority",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssSpiderPriority",
-	 "The spider priority, from 0 to 127, inclusive, of the document "
-	 "according to the url filters table.",
-	 NULL,
-	 0},
-
-	{"gbssMatchingUrlFilter",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetstr:gbssMatchingUrlFilter",
-	 "The url filter expression the document matched.",
-	 NULL,
-	 0},
-
-	{"gbssLanguage",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetstr:gbssLanguage",
-	 "The language of the document. If document was empty or not "
-	 "downloaded then this will not be present. Uses xx to mean "
-	 "unknown language. Uses the language abbreviations found at the "
-	 "bottom of the url filters page.",
-	 NULL,
-	 0},
-
-	{"gbssContentType",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetstr:gbssContentType",
-	 "The content type of the document. Like html, xml, json, pdf, etc. "
-	 "This field is not present if unknown.",
-	 NULL,
-	 0},
-#endif
-
 	{"gbssContentLen",
 	 FIELD_GENERIC,
 	 false,
@ -4845,93 +4201,8 @@ struct QueryField g_fields[] = {
 	 NULL,
 	 0},

-#ifdef SUPPORT_FACETS
-	{"gbssCrawlDelayMS",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssCrawlDelay",
-	 "The crawl delay according to the robots.txt of the document. "
-	 "This is -1 if not specified in the robots.txt or not found.",
-	 NULL,
-	 0},
-#endif
-
-
-/*
-	{"gbssSentToDiffbotThisTime",
-	 FIELD_GENERIC,
-	 false,
-	 "gbssSentToDiffbotThisTime:1",
-	 "Was the document's url sent to diffbot for processing this time "
-	 "of spidering the url?",
-	 NULL,
-	 0},
-
-	{"gbssSentToDiffbotAtSomeTime",
-	 FIELD_GENERIC,
-	 false,
-	 "gbssSentToDiffbotAtSomeTime:1",
-	 "Was the document's url sent to diffbot for processing, either this "
-	 "time or some time before?",
-	 NULL,
-	 0},
-
-	{"gbssDiffbotReplyCode",
-	 FIELD_GENERIC,
-	 false,
-	 "gbssDiffbotReplyCode:0",
-	 "The reply received from diffbot. 0 means success, otherwise, it "
-	 "indicates an error code.",
-	 NULL,
-	 0},
-
-	{"gbssDiffbotReplyMsg",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetstr:gbssDiffbotReplyMsg:0",
-	 "The reply received from diffbot represented in text.",
-	 NULL,
-	 0},
-
-	{"gbssDiffbotReplyLen",
-	 FIELD_GENERIC,
-	 false,
-	 "gbsortbyint:gbssDiffbotReplyLen",
-	 "The length of the reply received from diffbot.",
-	 NULL,
-	 0},
-
-	{"gbssDiffbotReplyResponseTimeMS",
-	 FIELD_GENERIC,
-	 false,
-	 "gbsortbyint:gbssDiffbotReplyResponseTimeMS",
-	 "The time in milliseconds it took to get a reply from diffbot.",
-	 NULL,
-	 0},
-
-	{"gbssDiffbotReplyRetries",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssDiffbotReplyRetries",
-	 "The number of times we had to resend the request to diffbot "
-	 "because diffbot returned a 504 gateway timed out error.",
-	 NULL,
-	 0},
-
-	{"gbssDiffbotReplyNumObjects",
-	 FIELD_GENERIC,
-	 false,
-	 "gbfacetint:gbssDiffbotReplyNumObjects",
-	 "The number of JSON objects diffbot excavated from the provided url.",
-	 NULL,
-	 0},
-*/
-
-
-
 	// they don't need to know about this
 	{"gbad",FIELD_GBAD,false,"","",NULL,QTF_HIDE},
-//BR 20160117 removed:	{"gbtagvector", FIELD_GBTAGVECTOR, false,"","",NULL,QTF_HIDE},
 	{"gbsamplevector", FIELD_GBSAMPLEVECTOR, false,"","",NULL,QTF_HIDE},
 	{"gbcontenthash", FIELD_GBCONTENTHASH, false,"","",NULL,QTF_HIDE},
 	{"gbduphash"  ,FIELD_GBOTHER,false,"","",NULL,QTF_HIDE},
@ -5606,7 +4877,6 @@ bool QueryTerm::isSplit() {
 	if(!m_fieldCode) return true;
 	if(m_fieldCode == FIELD_QUOTA)           return false;
 //BR 20160117 removed:	if(m_fieldCode == FIELD_GBTAGVECTOR)     return false;
-//BR 20160106 removed:	if(m_fieldCode == FIELD_GBGIGABITVECTOR) return false;
 	if(m_fieldCode == FIELD_GBSAMPLEVECTOR)  return false;
 	if(m_fieldCode == FIELD_GBSECTIONHASH)  return false;
 	if(m_fieldCode == FIELD_GBCONTENTHASH)  return false;
--- a/Query.h
+++ b/Query.h
@ -569,15 +569,9 @@ class QueryTerm {
 	char m_endKey  [MAX_KEY_BYTES];
 	char m_ks;

-	// used by Msg40.cpp for gigabits generation
-	int64_t m_hash64d;
-	int32_t      m_popWeight;
-
 	uint64_t m_numDocsThatHaveFacet;
 };

-//#define MAX_OPSLOTS 256
-
 #define MAX_EXPRESSIONS 100

 // operand1 AND operand2 OR  ...
@ -646,26 +640,14 @@ class Query {
 	int32_t 	serialize(char *buf, int32_t bufLen);
 	int32_t	deserialize(char *buf, int32_t bufLen);

-	// . if a term is truncated in indexdb, change its '+' sign to a '*'
-	// . will recopmute m_bitScores to fix bit #7
-	//void softenTruncatedTerms ( );
-
 	bool setQueryTermScores ( int64_t *termFreqsArg ) ;

-	// about how hits for this query?
-	//int64_t getEstimatedTotalHits ( );
-
 	char *getQuery    ( ) { return m_orig  ; };
 	int32_t  getQueryLen ( ) { return m_origLen; };

-	//int32_t  getNumIgnored    ( ) { return m_numIgnored; };
-	//int32_t  getNumNotIgnored ( ) { return m_numTerms ;  };
-
 	int32_t       getNumTerms  (        ) { return m_numTerms;              };
 	char       getTermSign  ( int32_t i ) { return m_qterms[i].m_termSign;  };
 	bool       isPhrase     ( int32_t i ) { return m_qterms[i].m_isPhrase;  };
-	bool       isInPhrase   ( int32_t i ) { return m_qterms[i].m_inPhrase;  };
-	bool       isInQuotes   ( int32_t i ) { return m_qterms[i].m_inQuotes;  };
 	int64_t  getTermId    ( int32_t i ) { return m_qterms[i].m_termId;    };
 	char       getFieldCode2( int32_t i ) { return m_qterms[i].m_fieldCode; };
 	int64_t  getRawTermId ( int32_t i ) { return m_qterms[i].m_rawTermId; };
@ -687,13 +669,6 @@ class Query {

 	bool isSplit(int32_t i) { return m_qterms[i].isSplit(); };

-	// . Msg39 calls this to get our vector so it can pass it to Msg37
-	// . the signs and ids are dupped in the QueryTerm classes, too
-	//int64_t *getTermFreqs ( ) { return m_termFreqs ; };
-	//int64_t  getTermFreq  ( int32_t i ) { return m_termFreqs[i]; };
-	//int64_t *getTermIds   ( ) { return m_termIds   ; };
-	//char      *getTermSigns ( ) { return m_termSigns ; };
-	//int32_t      *getComponentCodes   ( ) { return m_componentCodes; };
 	int64_t  getRawWordId ( int32_t i ) { return m_qwords[i].m_rawWordId;};

 	int32_t getNumComponentTerms ( ) { return m_numComponents; };
--- a/Rdb.cpp
+++ b/Rdb.cpp
@ -16,7 +16,6 @@
 #include "Spider.h"
 #include "SpiderColl.h"
 #include "Doledb.h"
-#include "Revdb.h"
 #include "hash.h"

 void attemptMergeAll ( int fd , void *state ) ;
@ -168,10 +167,6 @@ bool Rdb::init ( char          *dir                  ,
 	if ( m_rdbId == RDB2_INDEXDB2  ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
 	if ( m_rdbId == RDB_POSDB    ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
 	if ( m_rdbId == RDB2_POSDB2  ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
-	//if ( m_rdbId == RDB_DATEDB     ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
-	//if ( m_rdbId == RDB2_DATEDB2   ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
-	if ( m_rdbId == RDB_SECTIONDB  ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
-	if ( m_rdbId == RDB2_SECTIONDB2) m_pageSize = GB_INDEXDB_PAGE_SIZE;
 	if ( m_rdbId == RDB_TITLEDB    ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
 	if ( m_rdbId == RDB2_TITLEDB2  ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
 	if ( m_rdbId == RDB_SPIDERDB   ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
@ -180,30 +175,7 @@ bool Rdb::init ( char          *dir                  ,
 	if ( m_rdbId == RDB_SERPDB     ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
 	if ( m_rdbId == RDB_LINKDB     ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
 	if ( m_rdbId == RDB2_LINKDB2   ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
-	if ( m_rdbId == RDB_REVDB      ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
-	if ( m_rdbId == RDB2_REVDB2    ) m_pageSize = GB_INDEXDB_PAGE_SIZE;
-	// let's obsolete this rec/list cache because using the
-	// disk page cache cleverly, is usually better than this,
-	// because this ignores newly added data (it is not realtime),
-	// and it really only saves us from having to intersect a
-	// bunch of indexdb/datedb lists.
-	/*
-	loadCacheFromDisk = false;
-	maxCacheMem       = 0;
-	maxCacheNodes     = 0;
-	// . set up our cache
-	// . we could be adding lists so keep fixedDataSize -1 for cache
-	if ( ! m_cache.init ( maxCacheMem   , 
-			      fixedDataSize , 
-			      true          , // support lists
-			      maxCacheNodes ,
-			      m_useHalfKeys ,
-			      m_dbname      ,
-			      loadCacheFromDisk  ,
-			      m_ks               ,   // cache key size
-			      m_ks               ) ) // data  key size
-		return false;
-	*/
+
 	// we can't merge more than MAX_RDB_FILES files at a time
 	if ( minToMerge > MAX_RDB_FILES ) minToMerge = MAX_RDB_FILES;
 	m_minToMerge = minToMerge;
@ -1736,17 +1708,14 @@ bool Rdb::addList ( collnum_t collnum , RdbList *list,
 	     //! g_conf.m_rebuildNoSplits &&
 	     //! g_conf.m_removeBadPages &&
 	     ( m_rdbId == RDB_TITLEDB    ||
-	       //m_rdbId == RDB_SECTIONDB  ||
 	       m_rdbId == RDB_PLACEDB    ||
 	       m_rdbId == RDB_TFNDB      ||
 	       m_rdbId == RDB_INDEXDB    || 
-	       m_rdbId == RDB_POSDB    || 
-	       //m_rdbId == RDB_DATEDB     ||
+	       m_rdbId == RDB_POSDB      ||
 	       m_rdbId == RDB_CLUSTERDB  ||
 	       m_rdbId == RDB_LINKDB     ||
 	       m_rdbId == RDB_DOLEDB     ||
-	       m_rdbId == RDB_SPIDERDB   ||
-	       m_rdbId == RDB_REVDB      ) ) {
+	       m_rdbId == RDB_SPIDERDB   ) ) {

 		// exception, spider status docs can be deleted from titledb
 		// if user turns off 'index spider replies' before doing
@ -1765,20 +1734,6 @@ bool Rdb::addList ( collnum_t collnum , RdbList *list,

 exception:

-	/*
-	if ( g_repair.isRepairActive() &&
-	     g_repair.m_fullRebuild    && 
-	     collnum != g_repair.m_newCollnum &&
-	     m_rdbId != RDB_TAGDB ) {
-		log("db: How did an add come in while in full repair mode?"
-		    " addCollnum=%"INT32" repairCollnum=%"INT32" db=%s",
-		    (int32_t)collnum , (int32_t)g_repair.m_newCollnum ,
-		    m_dbname );
-		g_errno = EREPAIRING;
-		return false;
-	}
-	*/
-
 	// if we are currently in a quickpoll, make sure we are not in
 	// RdbTree::getList(), because we could mess that loop up by adding
 	// or deleting a record into/from the tree now
@ -2811,23 +2766,19 @@ Rdb *getRdbFromId ( uint8_t rdbId ) {
 		s_table9 [ RDB_INDEXDB   ] = g_indexdb.getRdb();
 		s_table9 [ RDB_POSDB     ] = g_posdb.getRdb();
 		s_table9 [ RDB_TITLEDB   ] = g_titledb.getRdb();
-		s_table9 [ RDB_SECTIONDB ] = g_sectiondb.getRdb();
 		s_table9 [ RDB_SPIDERDB  ] = g_spiderdb.getRdb();
 		s_table9 [ RDB_DOLEDB    ] = g_doledb.getRdb();
 		s_table9 [ RDB_CLUSTERDB ] = g_clusterdb.getRdb();
 		s_table9 [ RDB_LINKDB    ] = g_linkdb.getRdb();
 		s_table9 [ RDB_STATSDB   ] = g_statsdb.getRdb();
-		s_table9 [ RDB_REVDB     ] = g_revdb.getRdb();
 		s_table9 [ RDB_PARMDB    ] = NULL;

 		s_table9 [ RDB2_INDEXDB2   ] = g_indexdb2.getRdb();
 		s_table9 [ RDB2_POSDB2     ] = g_posdb2.getRdb();
 		s_table9 [ RDB2_TITLEDB2   ] = g_titledb2.getRdb();
-		s_table9 [ RDB2_SECTIONDB2 ] = g_sectiondb2.getRdb();
 		s_table9 [ RDB2_SPIDERDB2  ] = g_spiderdb2.getRdb();
 		s_table9 [ RDB2_CLUSTERDB2 ] = g_clusterdb2.getRdb();
 		s_table9 [ RDB2_LINKDB2    ] = g_linkdb2.getRdb();
-		s_table9 [ RDB2_REVDB2     ] = g_revdb2.getRdb();
 		s_table9 [ RDB2_TAGDB2     ] = g_tagdb2.getRdb();
 	}
 	if ( rdbId >= RDB_END ) return NULL;
@ -2840,22 +2791,18 @@ char getIdFromRdb ( Rdb *rdb ) {
 	if ( rdb == g_indexdb.getRdb   () ) return RDB_INDEXDB;
 	if ( rdb == g_posdb.getRdb   () ) return RDB_POSDB;
 	if ( rdb == g_titledb.getRdb   () ) return RDB_TITLEDB;
-	if ( rdb == g_sectiondb.getRdb () ) return RDB_SECTIONDB;
 	if ( rdb == g_spiderdb.getRdb  () ) return RDB_SPIDERDB;
 	if ( rdb == g_doledb.getRdb    () ) return RDB_DOLEDB;
 	if ( rdb == g_clusterdb.getRdb () ) return RDB_CLUSTERDB;
 	if ( rdb == g_statsdb.getRdb   () ) return RDB_STATSDB;
 	if ( rdb == g_linkdb.getRdb    () ) return RDB_LINKDB;
-	if ( rdb == g_revdb.getRdb     () ) return RDB_REVDB;
 	if ( rdb == g_indexdb2.getRdb   () ) return RDB2_INDEXDB2;
 	if ( rdb == g_posdb2.getRdb   () ) return RDB2_POSDB2;
 	if ( rdb == g_tagdb2.getRdb     () ) return RDB2_TAGDB2;
 	if ( rdb == g_titledb2.getRdb   () ) return RDB2_TITLEDB2;
-	if ( rdb == g_sectiondb2.getRdb () ) return RDB2_SECTIONDB2;
 	if ( rdb == g_spiderdb2.getRdb  () ) return RDB2_SPIDERDB2;
 	if ( rdb == g_clusterdb2.getRdb () ) return RDB2_CLUSTERDB2;
 	if ( rdb == g_linkdb2.getRdb    () ) return RDB2_LINKDB2;
-	if ( rdb == g_revdb2.getRdb     () ) return RDB2_REVDB2;

 	log(LOG_LOGIC,"db: getIdFromRdb: no rdbId for %s.",rdb->m_dbname);
 	return 0;
@ -2868,12 +2815,10 @@ char isSecondaryRdb ( uint8_t rdbId ) {
 		case RDB2_POSDB2   : return true;
 		case RDB2_TAGDB2     : return true;
 		case RDB2_TITLEDB2   : return true;
-		case RDB2_SECTIONDB2 : return true;
 		case RDB2_PLACEDB2   : return true;
 		case RDB2_SPIDERDB2  : return true;
 		case RDB2_TFNDB2     : return true;
 		case RDB2_CLUSTERDB2 : return true;
-		case RDB2_REVDB2     : return true;
 		case RDB2_LINKDB2 : return true;
 	}
 	return false;
@ -2898,13 +2843,9 @@ char getKeySizeFromRdbId ( uint8_t rdbId ) {
 			     i == RDB_SPIDERDB  ||
 			     i == RDB_TAGDB     ||
 			     i == RDB_SYNCDB    ||
-			     i == RDB_SECTIONDB ||
 			     i == RDB_PLACEDB   ||
-
-			     //i == RDB2_DATEDB2    ||
 			     i == RDB2_SPIDERDB2  ||
 			     i == RDB2_TAGDB2     ||
-			     i == RDB2_SECTIONDB2 ||
 			     i == RDB2_PLACEDB2   )
 				ks = 16;
 			if ( i == RDB_POSDB || i == RDB2_POSDB2 )
@ -2942,11 +2883,9 @@ int32_t getDataSizeFromRdbId ( uint8_t rdbId ) {
 			     i == RDB_TFNDB ||
 			     i == RDB_CLUSTERDB ||
 			     i == RDB_DATEDB ||
-			     //i == RDB_FAKEDB ||
 			     i == RDB_LINKDB )
 				ds = 0;
 			else if ( i == RDB_TITLEDB ||
-				  i == RDB_REVDB   ||
 				  i == RDB_SYNCDB ||
 				  i == RDB_CACHEDB ||
 				  i == RDB_SERPDB ||
@ -2960,8 +2899,6 @@ int32_t getDataSizeFromRdbId ( uint8_t rdbId ) {
 				ds = -1;
 			else if ( i == RDB_STATSDB )
 				ds = sizeof(StatData);
-			else if ( i == RDB_SECTIONDB )
-				ds = sizeof(SectionVote);
 			else if ( i == RDB2_POSDB2 ||
 				  i == RDB2_INDEXDB2 ||
 				  i == RDB2_TFNDB2 ||
@ -2970,23 +2907,17 @@ int32_t getDataSizeFromRdbId ( uint8_t rdbId ) {
 				  i == RDB2_DATEDB2 )
 				ds = 0;
 			else if ( i == RDB2_TITLEDB2 ||
-				  i == RDB2_REVDB2   ||
 				  i == RDB2_TAGDB2   ||
 				  i == RDB2_CATDB2   ||
 				  i == RDB2_SPIDERDB2 ||
 				  i == RDB2_PLACEDB2 )
 				ds = -1;
-			else if ( i == RDB2_SECTIONDB2 )
-				ds = sizeof(SectionVote);
-			else { char *xx=NULL;*xx=0; }
-			// get the rdb for this rdbId
-			//Rdb *rdb = getRdbFromId ( i );
-			// sanity check
-			//if ( ! rdb ) continue;//{ char *xx=NULL;*xx=0; }
-			// sanity!
-			//if ( rdb->m_ks == 0 ) { char *xx=NULL;*xx=0; }
+			else {
+				continue;
+			}
+
 			// set the table
-			s_table2[i] = ds;//rdb->m_fixedDataSize;
+			s_table2[i] = ds;
 		}
 	}
 	return s_table2[rdbId];
--- a/Repair.cpp
+++ b/Repair.cpp
--- a/Repair.h
+++ b/Repair.h
@ -80,40 +80,27 @@ public:
 	Msg5       m_msg5b;
 	Msg4       m_msg4;
 	bool       m_needsCallback;
-	//Msg50      m_msg50;
 	char       m_docQuality;
-	//Msg14      m_msg14;
-	//RdbList    m_scanList;
 	RdbList    m_titleRecList;
 	int64_t  m_docId;
 	char       m_isDelete;
 	RdbList    m_ulist;
 	RdbList    m_addlist;
-	//int32_t       m_ruleset;
-	//LinkTextReply  m_rootLinkText;
 	int64_t  m_totalMem;
 	int32_t       m_stage ;
 	int32_t       m_tfn;
 	int32_t       m_count;
 	bool       m_updated;
-	//key_t      m_currentTitleRecKey; // for tfndb

 	// titledb scan vars
-	//key_t      m_nextRevdbKey;
 	key_t      m_nextTitledbKey;
 	key_t      m_nextSpiderdbKey;
-	//key_t      m_nextIndexdbKey;
 	key_t      m_nextPosdbKey;
-	//key_t      m_nextDatedbKey;
 	key128_t   m_nextLinkdbKey;
-	//key128_t   m_nextPlacedbKey;
 	key_t      m_endKey;
 	int64_t  m_uh48;
-	//TitleRec   m_tr;
-	//Msg8a      m_msg8a;
 	int32_t       m_priority;
 	uint64_t   m_contentHash;
-	//key_t      m_tfndbKey;
 	key_t      m_clusterdbKey ;
 	key_t      m_spiderdbKey;
 	char       m_srBuf[SR_BUFSIZE];
@ -127,8 +114,6 @@ public:

 	// spiderdb scan vars
 	bool       m_isNew;
-	//SpiderRec  m_sr;
-	//SiteRec  m_siteRec;
 	TagRec     m_tagRec;


@ -139,8 +124,6 @@ public:
 	int64_t  m_prevDocId;
 	bool       m_completedFirstScan  ;
 	bool       m_completedSpiderdbScan ;
-	//bool     m_completedIndexdbScan  ;
-	//key_t      m_lastRevdbKey;
 	key_t      m_lastTitledbKey;
 	key_t      m_lastSpiderdbKey;

@ -158,7 +141,6 @@ public:
 	int64_t  m_recsRoot;
 	int64_t  m_recsNonRoot;
 	int64_t  m_recsInjected;
-	//int32_t       m_fn;

 	// spiderdb scan stats
 	int32_t       m_spiderRecsScanned  ;
@ -168,21 +150,13 @@ public:

 	// generic scan parms
 	char       m_rebuildTitledb    ;
-	//char       m_rebuildIndexdb    ;
 	char       m_rebuildPosdb    ;
-	//char       m_rebuildNoSplits   ;
-	//char       m_rebuildDatedb     ;
-	//char       m_rebuildTfndb      ;
 	char       m_rebuildClusterdb  ;
 	char       m_rebuildSpiderdb   ;
 	char       m_rebuildSitedb     ;
 	char       m_rebuildLinkdb     ;
 	char       m_rebuildTagdb      ;
-	//char       m_rebuildPlacedb    ;
-	//char       m_rebuildSectiondb  ;
-	//char       m_rebuildRevdb      ;
 	char       m_fullRebuild       ;
-	//char       m_removeBadPages    ;

 	char       m_rebuildRoots      ;
 	char       m_rebuildNonRoots   ;
@ -208,7 +182,6 @@ public:
 	char       m_SAVE_END;

 	// i'd like to save these but they are ptrs
-	//char      *m_coll;
 	CollectionRec *m_cr;

 	//for timing a repair process
--- a/Revdb.cpp
+++ b/Revdb.cpp
@ -1,169 +0,0 @@
-#include "gb-include.h"
-
-#include "Revdb.h"
-#include "Threads.h"
-
-Revdb g_revdb;
-Revdb g_revdb2;
-
-// reset rdb
-void Revdb::reset() { m_rdb.reset(); }
-
-// init our rdb
-bool Revdb::init ( ) {
-
-	int64_t maxTreeMem = 200000000;
-	// . what's max # of tree nodes?
-	// . assume avg RevRec size (compressed html doc) is about 1k we get:
-	// . NOTE: overhead is about 32 bytes per node
-	int32_t maxTreeNodes  = maxTreeMem  / (1*1024);
-
-	// each entry in the cache is usually just a single record, no lists
-	int32_t maxCacheNodes = 0;//g_conf.m_revdbMaxCacheMem / (10*1024);
-
-	// initialize our own internal rdb
-	if ( ! m_rdb.init ( g_hostdb.m_dir              ,
-			    "revdb"                   ,
-			    true                        , // dedup same keys?
-			    -1                          , // fixed record size
-			    // this should not really be changed...
-			    2                         , // min files to merge
-			    maxTreeMem,//g_conf.m_revdbMaxTreeMem  ,
-			    maxTreeNodes                ,
-			    // now we balance so Sync.cpp can ordered huge list
-			    true                        , // balance tree?
-			    0                           , // cache mem
-			    maxCacheNodes               ,
-			    false                       ,// half keys?
-			    false                       ,// g_conf.m_revdbSav
-			    NULL                        , // page cache ptr
-			    false                       ) )// is titledb?
-		return false;
-	return true;
-}
-
-// init the rebuild/secondary rdb, used by PageRepair.cpp
-bool Revdb::init2 ( int32_t treeMem ) {
-	// . what's max # of tree nodes?
-	// . assume avg RevRec size (compressed html doc) is about 1k we get:
-	// . NOTE: overhead is about 32 bytes per node
-	int32_t maxTreeNodes  = treeMem / (1*1024);
-	// initialize our own internal rdb
-	if ( ! m_rdb.init ( g_hostdb.m_dir              ,
-			    "revdbRebuild"            ,
-			    true                        , // dedup same keys?
-			    -1                          , // fixed record size
-			    240                         , // MinFilesToMerge
-			    treeMem                     ,
-			    maxTreeNodes                ,
-			    // now we balance so Sync.cpp can ordered huge list
-			    true                        , // balance tree?
-			    0                           , // MaxCacheMem ,
-			    0                           , // maxCacheNodes
-			    false                       , // half keys?
-			    false                       , // revdbSaveCache
-			    NULL                        , // page cache ptr
-			    false                        ) )// is titledb?
-		return false;
-	return true;
-}
-/*
-bool Revdb::addColl ( char *coll, bool doVerify ) {
-	if ( ! m_rdb.addColl ( coll ) ) return false;
-	if ( ! doVerify ) return true;
-	// verify
-	if ( verify(coll) ) return true;
-	// if not allowing scale, return false
-	if ( ! g_conf.m_allowScale ) return false;
-	// otherwise let it go
-	log ( "db: Verify failed, but scaling is allowed, passing." );
-	return true;
-}
-*/
-bool Revdb::verify ( char *coll ) {
-	log ( LOG_INFO, "db: Verifying Revdb for coll %s...", coll );
-	g_threads.disableThreads();
-
-	Msg5 msg5;
-	Msg5 msg5b;
-	RdbList list;
-	key_t startKey;
-	key_t endKey;
-	startKey.setMin();
-	endKey.setMax();
-	//int32_t minRecSizes = 64000;
-	CollectionRec *cr = g_collectiondb.getRec(coll);
-
-	if ( ! msg5.getList ( RDB_REVDB   ,
-			      cr->m_collnum       ,
-			      &list         ,
-			      startKey      ,
-			      endKey        ,
-			      1024*1024     , // minRecSizes   ,
-			      true          , // includeTree   ,
-			      false         , // add to cache?
-			      0             , // max cache age
-			      0             , // startFileNum  ,
-			      -1            , // numFiles      ,
-			      NULL          , // state
-			      NULL          , // callback
-			      0             , // niceness
-			      false         , // err correction?
-			      NULL          , // cache key ptr
-			      0             , // retry num
-			      -1            , // maxRetries
-			      true          , // compensate for merge
-			      -1LL          , // sync point
-			      &msg5b        ,
-			      false         )) {
-		g_threads.enableThreads();
-		return log("db: HEY! it did not block");
-	}
-
-	int32_t count = 0;
-	int32_t got   = 0;
-	for ( list.resetListPtr() ; ! list.isExhausted() ;
-	      list.skipCurrentRecord() ) {
-		key_t k = list.getCurrentKey();
-		count++;
-		//uint32_t groupId = getGroupId ( RDB_REVDB , &k );
-		//if ( groupId == g_hostdb.m_groupId ) got++;
-		uint32_t shardNum = getShardNum( RDB_REVDB , &k );
-		if ( shardNum == getMyShardNum() ) got++;
-	}
-	if ( got != count ) {
-		log ("db: Out of first %"INT32" records in revdb, "
-		     "only %"INT32" belong to our group.",count,got);
-		// exit if NONE, we probably got the wrong data
-		if ( count > 10 && got == 0 ) 
-			log("db: Are you sure you have the right "
-				   "data in the right directory? "
-				   "Exiting.");
-		log ( "db: Exiting due to Revdb inconsistency." );
-		g_threads.enableThreads();
-		return g_conf.m_bypassValidation;
-	}
-
-	log ( LOG_INFO, "db: Revdb passed verification successfully for %"INT32""
-			" recs.", count );
-	// DONE
-	g_threads.enableThreads();
-	return true;
-}
-
-// . make the key of a RevRec from a docId
-// . remember to set the low bit so it's not a delete
-// . hi bits are set in the key
-key_t Revdb::makeKey ( int64_t docId, bool isDel ){
-	key_t key ;
-	key.n1 = 0;
-	// shift up for delbit
-	key.n0 = ((uint64_t)docId) << 1;
-	// final del bit
-	if ( ! isDel ) key.n0 |= 0x01;
-	return key;
-};
-
-int64_t Revdb::getDocId ( key_t *k ) {
-	return (k->n0 >> 1);
-}
--- a/Revdb.h
+++ b/Revdb.h
@ -1,52 +0,0 @@
-// Matt Wells, copyright Jun 2001
-
-// . db of metalists used to delete a doc now
-
-#ifndef _REVDB_H_
-#define _REVDB_H_
-
-#include "Rdb.h"
-#include "Url.h"
-#include "Conf.h"
-#include "Xml.h"
-#include "Titledb.h"
-
-// new key format:
-// . <docId>     - 38 bits
-// . <delBit>    -  1 bit
-
-// data format:
-// . a metalist that is passed in to Msg4
-
-class Revdb {
-
- public:
-
-	// reset rdb
-	void reset();
-
-	bool verify ( char *coll );
-
-	bool addColl ( char *coll, bool doVerify = true );
-
-	// init m_rdb
-	bool init ();
-
-	// init secondary/rebuild revdb
-	bool init2 ( int32_t treeMem ) ;
-
-	// like titledb basically
-	key_t makeKey ( int64_t docId , bool del ) ;
-
-	int64_t getDocId ( key_t *k );
-
-	Rdb *getRdb() { return &m_rdb; };
-
-	// holds binary format rev entries
-	Rdb m_rdb;
-};
-
-extern class Revdb g_revdb;
-extern class Revdb g_revdb2;
-
-#endif
--- a/SafeBuf.cpp
+++ b/SafeBuf.cpp
@ -7,7 +7,7 @@
 #include "Words.h"
 #include "Sections.h"

-SafeBuf::SafeBuf(int32_t initSize, char *label ) {
+SafeBuf::SafeBuf(int32_t initSize, const char *label ) {
 	if(initSize <= 0) initSize = 1;
 	m_capacity = initSize;
 	m_length = 0;
@ -36,11 +36,11 @@ SafeBuf::SafeBuf() {
 	m_label = NULL;
 }

-void SafeBuf::setLabel ( char *label ) {
+void SafeBuf::setLabel ( const char *label ) {
 	m_label = label;
 }

-SafeBuf::SafeBuf(char* stackBuf, int32_t cap, char* label) {
+SafeBuf::SafeBuf(char* stackBuf, int32_t cap, const char* label) {
 	m_usingStack = true;
 	m_capacity = cap;
 	m_buf = stackBuf;
@ -133,7 +133,7 @@ bool SafeBuf::safeMemcpy(const char *s, int32_t len) {
 	return true;
 }

-bool SafeBuf::safeMemcpy_nospaces(char *s, int32_t len) {
+bool SafeBuf::safeMemcpy_nospaces(const char *s, int32_t len) {
 	// put a silent \0 at the end
 	int32_t tmp = len + m_length+1;
 	if(tmp >= m_capacity ) {
@ -158,7 +158,7 @@ bool SafeBuf::safeMemcpy ( Words *w , int32_t a , int32_t b ) {
 	return safeMemcpy ( p , pend - p );
 }

-char* SafeBuf::pushStr  (char* str, uint32_t len) {
+char* SafeBuf::pushStr  (const char* str, uint32_t len) {
 	int32_t initLen = m_length;
 	bool status = safeMemcpy ( str , len );
 	status &= nullTerm();
@ -273,7 +273,7 @@ bool SafeBuf::cat(SafeBuf& c) {
 	return safeMemcpy(c.getBufStart(), c.length());
 }

-bool SafeBuf::reserve(int32_t i , char *label, bool clearIt ) {
+bool SafeBuf::reserve(int32_t i , const char *label, bool clearIt ) {

 	// if we don't already have a label and they provided one, use it
 	if ( ! m_label ) {
@ -333,7 +333,7 @@ bool SafeBuf::reserve(int32_t i , char *label, bool clearIt ) {

 //reserve this many bytes, if we need to alloc, we double the 
 //buffer size.
-bool SafeBuf::reserve2x(int32_t i, char *label) {
+bool SafeBuf::reserve2x(int32_t i, const char *label) {
 	//watch out for overflow!
 	if((m_capacity << 1) + i < m_capacity) return false;
 	if(i + m_length >= m_capacity)
@ -433,7 +433,7 @@ int32_t SafeBuf::safeSave (char *filename ) {
 }


-int32_t SafeBuf::fillFromFile(char *dir,char *filename,char *label) {
+int32_t SafeBuf::fillFromFile(const char *dir, const char *filename, const char *label) {
 	m_label = label;
 	char buf[1024];
 	if ( dir ) snprintf(buf,1024,"%s/%s",dir,filename);
@ -451,7 +451,7 @@ char *SafeBuf::getNextLine ( char *p ) {
 }

 // returns -1 on error
-int32_t SafeBuf::catFile(char *filename) {
+int32_t SafeBuf::catFile(const char *filename) {
 	SafeBuf sb2;
 	if ( sb2.fillFromFile(filename) < 0 ) return -1;
 	// add 1 for a null
@ -462,7 +462,7 @@ int32_t SafeBuf::catFile(char *filename) {


 // returns -1 on error
-int32_t SafeBuf::fillFromFile(char *filename) {
+int32_t SafeBuf::fillFromFile(const char *filename) {
 	struct stat results;
 	if (stat(filename, &results) != 0) {
 		// An error occurred
@ -1135,7 +1135,7 @@ bool SafeBuf::addTag ( Tag *tag ) {
 }

 // this puts a \0 at the end but does not update m_length for the \0 
-bool  SafeBuf::safeStrcpy ( char *s ) {
+bool  SafeBuf::safeStrcpy ( const char *s ) {
 	if ( ! s ) return true;
 	int32_t slen = gbstrlen(s);
 	if ( ! reserve ( slen+1 ) ) return false;
@ -1565,7 +1565,7 @@ void SafeBuf::replaceChar ( char src , char dst ) {


 // encode a double quote char to two double quote chars
-bool SafeBuf::csvEncode ( char *s , int32_t len , int32_t niceness ) {
+bool SafeBuf::csvEncode ( const char *s , int32_t len , int32_t niceness ) {

 	if ( ! s ) return true;

@ -1578,7 +1578,7 @@ bool SafeBuf::csvEncode ( char *s , int32_t len , int32_t niceness ) {
 	//char *dstEnd = m_buf + m_capacity;

 	// scan through all 
-	char *send = s + len;
+	const char *send = s + len;
 	for ( ; s < send ; s++ ) {
 		// breathe
 		QUICKPOLL ( niceness );
@ -1603,9 +1603,9 @@ bool SafeBuf::csvEncode ( char *s , int32_t len , int32_t niceness ) {
 	return true;
 }

-bool SafeBuf::base64Encode ( char *sx , int32_t len , int32_t niceness ) {
+bool SafeBuf::base64Encode ( const char *sx , int32_t len , int32_t niceness ) {

-	unsigned char *s = (unsigned char *)sx;
+	const unsigned char *s = (const unsigned char *)sx;

 	if ( ! s ) return true;

@ -1630,7 +1630,7 @@ bool SafeBuf::base64Encode ( char *sx , int32_t len , int32_t niceness ) {

 	unsigned char val;
 	// scan through all 
-	unsigned char *send = s + len;
+	const unsigned char *send = s + len;
 	for ( ; s < send ; ) {
 		// breathe
 		QUICKPOLL ( niceness );
@ -1696,7 +1696,7 @@ bool SafeBuf::base64Encode( char *s ) {
 	return base64Encode(s,gbstrlen(s)); 
 }

-bool SafeBuf::base64Decode ( char *src , int32_t srcLen , int32_t niceness ) {
+bool SafeBuf::base64Decode ( const char *src , int32_t srcLen , int32_t niceness ) {

 	// make the map
 	static unsigned char s_bmap[256];
--- a/SafeBuf.h
+++ b/SafeBuf.h
@ -17,17 +17,17 @@ class SafeBuf {
 public:
 	//*TRUCTORS
 	SafeBuf();
-	SafeBuf(int32_t initSize, char *label);
+	SafeBuf(int32_t initSize, const char *label);

 	void constructor();

 	//be careful with passing in a stackBuf! it could go out
 	//of scope independently of the safebuf.
-	SafeBuf(char* stackBuf, int32_t cap, char* label = NULL);
+	SafeBuf(char* stackBuf, int32_t cap, const char* label = NULL);
 	SafeBuf(char *heapBuf, int32_t bufMax, int32_t bytesInUse, bool ownData);
 	~SafeBuf();

-	void setLabel ( char *label );
+	void setLabel ( const char *label );
 	
 	// CAUTION: BE CAREFUL WHEN USING THE FOLLOWING TWO FUNCTIONS!!
 	// setBuf() allows you reset the contents of the SafeBuf to either
@ -68,11 +68,11 @@ public:
 	// saves to tmp file and if that succeeds then renames to orig filename
 	int32_t safeSave (char *filename );

-	int32_t  fillFromFile(char *filename);
-	int32_t  fillFromFile(char *dir,char *filename, char *label=NULL);
-	int32_t  load(char *dir,char *fname,char *label = NULL) { 
+	int32_t  fillFromFile(const char *filename);
+	int32_t  fillFromFile(const char *dir, const char *filename, const char *label=NULL);
+	int32_t  load(const char *dir, const char *fname, const char *label = NULL) { 
 		return fillFromFile(dir,fname,label);};
-	int32_t  load(char *fname) { return fillFromFile(fname);};
+	int32_t  load(const char *fname) { return fillFromFile(fname);};

 	bool safeTruncateEllipsis ( char *src , int32_t maxLen );
 	bool safeTruncateEllipsis ( char *src , int32_t srcLen, int32_t maxLen );
@ -103,21 +103,21 @@ public:
 #else
 	bool  safePrintf(const char *formatString, ...);
 #endif
-	bool  safeMemcpy(void *s, int32_t len){return safeMemcpy((char *)s,len);}
+	bool  safeMemcpy(const void *s, int32_t len){return safeMemcpy((const char*)s,len);}
 	bool  safeMemcpy(const char *s, int32_t len);
-	bool  safeMemcpy_nospaces(char *s, int32_t len);
+	bool  safeMemcpy_nospaces(const char *s, int32_t len);
 	bool  safeMemcpy(SafeBuf *c){return safeMemcpy(c->m_buf,c->m_length);}
 	bool  safeMemcpy ( class Words *w , int32_t a , int32_t b ) ;
-	bool  safeStrcpy ( char *s ) ;
+	bool  safeStrcpy ( const char *s ) ;
 	//bool  safeStrcpyPrettyJSON ( char *decodedJson ) ;
 	bool  safeUtf8ToJSON ( const char *utf8 ) ;
 	bool jsonEncode ( const char *utf8 ) { return safeUtf8ToJSON(utf8); }
 	bool jsonEncode ( char *utf8 , int32_t utf8Len );

-	bool  csvEncode ( char *s , int32_t len , int32_t niceness = 0 );
+	bool  csvEncode ( const char *s , int32_t len , int32_t niceness = 0 );

-	bool  base64Encode ( char *s , int32_t len , int32_t niceness = 0 );
-	bool  base64Decode ( char *src , int32_t srcLen , int32_t niceness = 0 ) ;
+	bool  base64Encode ( const char *s , int32_t len , int32_t niceness = 0 );
+	bool  base64Decode ( const char *src , int32_t srcLen , int32_t niceness = 0 ) ;

 	bool base64Encode( char *s ) ;

@ -132,8 +132,8 @@ public:

 	// . if clearIt is true we init the new buffer space to zeroes
 	// . used by Collectiondb.cpp
-	bool  reserve(int32_t i, char *label=NULL , bool clearIt = false );
-	bool  reserve2x(int32_t i, char *label = NULL );
+	bool  reserve(int32_t i, const char *label=NULL , bool clearIt = false );
+	bool  reserve2x(int32_t i, const char *label = NULL );

 	char *makeSpace ( int32_t size ) {
 		if ( ! reserve ( size ) ) return NULL;
@ -147,7 +147,7 @@ public:
 	};
 	void  setLength(int32_t i) { m_length = i; }
 	char *getNextLine ( char *p ) ;
-	int32_t  catFile(char *filename) ;
+	int32_t  catFile(const char *filename) ;

 	void  detachBuf();
 	bool  insert ( class SafeBuf *c , int32_t insertPos ) ;
@ -266,7 +266,7 @@ public:
 	// hack off trailing 0's
 	bool printFloatPretty ( float f ) ;

-	char* pushStr  (char* str, uint32_t len);
+	char* pushStr  (const char* str, uint32_t len);
 	bool  pushPtr  ( void *ptr );
 	bool  pushLong (int32_t i);
 	bool  pushLongLong (int64_t i);
@ -307,7 +307,7 @@ public:
 protected:
 	char *m_buf;
 public:
-	char *m_label;
+	const char *m_label;
 	bool  m_usingStack;
 	int16_t m_encoding; // output charset

--- a/SearchInput.cpp
+++ b/SearchInput.cpp
@ -61,59 +61,13 @@ key_t SearchInput::makeKey ( ) {
 	// space separated, NULL terminated, list of meta tag names to display
 	if ( m_displayMetas          ) 
 		k.n0 = hash64b ( m_displayMetas          , k.n0 );
-	// name of collection in external cluster to get titleRecs for 
-	// related pages from
-	//if ( m_rp_getExternalPages && m_rp_externalColl )
-	//	k.n0 = hash64b ( m_rp_externalColl , k.n0 );
-	// collection e import from
-	//if ( m_importColl )
-	//	k.n0 = hash64b ( m_importColl , k.n0 );
-	// the special query parm
-	//if ( m_sq && m_sqLen > 0 )
-	//	k.n0 = hash64 ( m_sq , m_sqLen , k.n0 );
-	//if ( m_noDocIds && m_noDocIdsLen )
-	//	k.n0 = hash64 ( m_noDocIds , m_noDocIdsLen , k.n0 );
-	//if ( m_noSiteIds && m_noSiteIdsLen )
-	//	k.n0 = hash64 ( m_noSiteIds , m_noSiteIdsLen , k.n0 );

-	// no need to hash these again separately, they are in between 
-	// m_START and m_END_HASH
-	// language
-	//if ( m_language )
-	//	k.n0 = hash64 ( m_language , k.n0 );
-	//if ( m_gblang )
-	//	k.n0 = hash64 ( m_gblang , k.n0 );
 	// . now include the hash of the search parameters
-	// . nnot incuding m_docsToScanForTopics since since we got TopicGroups
 	char *a = ((char *)&m_START) + 4 ; // msg40->m_dpf;
 	char *b =  (char *)&m_END_HASH   ; // msg40->m_topicGroups;
 	int32_t size = b - a; 
-	// push and flush some parms that should not contribute
-	//int32_t save1 = m_refs_numToDisplay;
-	//int32_t save2 = m_rp_numToDisplay;
-	//int32_t save3 = m_numTopicsToDisplay;
-	//m_refs_numToDisplay  = 0;
-	//m_rp_numToDisplay    = 0;
-	//m_numTopicsToDisplay = 0;
 	// and hash it all up
 	k.n0 = hash64 ( a , size , k.n0 );
-	// and pop out the parms that did not contribute
-	//m_refs_numToDisplay  = save1;
-	//m_rp_numToDisplay    = save2;
-	//m_numTopicsToDisplay = save3;
-	// hash each topic group
-	for ( int32_t i = 0 ; i < 1 ; i++ ) {
-		TopicGroup *t = &m_topicGroups[i];
-		//k.n0 = hash64 ( t->m_numTopics           , k.n0 );
-		k.n0 = hash64 ( t->m_maxTopics           , k.n0 );
-		k.n0 = hash64 ( t->m_docsToScanForTopics , k.n0 );
-		k.n0 = hash64 ( t->m_minTopicScore       , k.n0 );
-		k.n0 = hash64 ( t->m_maxWordsPerTopic    , k.n0 );
-		k.n0 = hash64b( t->m_meta                , k.n0 );
-		k.n0 = hash64 ( t->m_delimeter           , k.n0 );
-		k.n0 = hash64 ( t->m_useIdfForTopics     , k.n0 );
-		k.n0 = hash64 ( t->m_dedup               , k.n0 );
-	}
 	// . boolean queries have operators (AND OR NOT ( ) ) that we need
 	//   to consider in this hash as well. so
 	// . so just hash the whole damn query
@ -313,18 +267,13 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) { //, Query *q ) {
 	// now override automatic defaults for special cases
 	if ( tmpFormat != FORMAT_HTML ) {
 		m_familyFilter            = 0;
-		m_numTopicsToDisplay      = 0;
 		m_doQueryHighlighting     = 0;
-		//m_spellCheck              = 0;
 		m_getDocIdScoringInfo = false;
-		// turn gigabits off by default if not html
-		//m_docsToScanForTopics = 0;
 	}

 	// if they have a list of sites...
 	if ( m_sites && m_sites[0] ) {
 		m_doSiteClustering        = false;
-		m_ipRestrictForTopics     = false;
 	}


@ -576,18 +525,10 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) { //, Query *q ) {
 		m_doSiteClustering = true;

 	// turn off some parms
-	if ( m_q.m_hasUrlField  ) 
-		m_ipRestrictForTopics = false;
-	if ( m_q.m_hasIpField   )
-		m_ipRestrictForTopics = false;
 	if ( m_q.m_hasPositiveSiteField ) {
-		m_ipRestrictForTopics = false;
 		m_doSiteClustering    = false;
 	}

-	if ( cr && ! cr->m_ipRestrict )
-		m_ipRestrictForTopics = false;
-
 	if ( m_q.m_hasQuotaField ) {
 		m_doSiteClustering    = false;
 		m_doDupContentRemoval = false;
@ -629,36 +570,6 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) { //, Query *q ) {
 	// save it
 	m_rcache = readFromCache;

-
-	//
-	// TODO: use Parms.cpp defaults
-	//
-	TopicGroup *tg = &m_topicGroups[0];
-
-	//
-	//
-	// gigabits
-	//
-	//
-	tg->m_numTopics = 50;
-	tg->m_maxTopics = 50;
-	tg->m_docsToScanForTopics = m_docsToScanForTopics;
-	tg->m_minTopicScore = 0;
-	tg->m_maxWordsPerTopic = 6;
-	tg->m_meta[0] = '\0';
-	tg->m_delimeter = '\0';
-	tg->m_useIdfForTopics = false;
-	tg->m_dedup = true;
-	// need to be on at least 2 pages!
-	tg->m_minDocCount = 2;
-	tg->m_ipRestrict = m_ipRestrictForTopics;
-	tg->m_dedupSamplePercent = 80;
-	tg->m_topicRemoveOverlaps = true;
-	tg->m_topicSampleSize = 4096;
-	// max sequential punct chars allowedin a topic
-	tg->m_topicMaxPunctLen = 1;
-
-
 	return true;
 }

--- a/SearchInput.h
+++ b/SearchInput.h
@ -22,27 +22,6 @@

 #define MAX_TOPIC_GROUPS 1

-// . parameters used to generate a set of related topics (gigabits)
-// . you can have Msg24 generate multiple sets of related topics in one call
-class TopicGroup {
- public:
-        int32_t m_numTopics;
-        int32_t m_maxTopics;
-        int32_t m_docsToScanForTopics;
-        int32_t m_minTopicScore;
-        int32_t m_maxWordsPerTopic;
-        char m_meta[32];
-        char m_delimeter;
-        bool m_useIdfForTopics;
-        bool m_dedup;
-        int32_t m_minDocCount ;
-        bool m_ipRestrict ;
-        char m_dedupSamplePercent; // -1 means no deduping
-        bool m_topicRemoveOverlaps;
-        int32_t m_topicSampleSize;
-        int32_t m_topicMaxPunctLen;
-};
-
 class SearchInput {

 public:
@ -53,9 +32,6 @@ class SearchInput {
 	void  test    ( );
 	key_t makeKey ( ) ;

-	// private
-	void setTopicGroups  ( class HttpRequest *r , 
-			       class CollectionRec *cr ) ;
 	bool setQueryBuffers ( class HttpRequest *hr ) ;

 	//void setToDefaults ( class CollectionRec *cr , int32_t niceness ) ;
@ -110,7 +86,6 @@ class SearchInput {
 	char           m_isCollAdmin;

 	// these are set from things above
-	TopicGroup     m_topicGroups [ MAX_TOPIC_GROUPS ];// msg40
 	SafeBuf m_sbuf1;
 	SafeBuf m_sbuf2;

@ -146,7 +121,6 @@ class SearchInput {
 	char   m_wcache;                     // msg40

 	char   m_debug;                      // msg40
-	char   m_debugGigabits;

 	char   m_spiderResults;
 	char   m_spiderResultRoots;
@ -157,7 +131,6 @@ class SearchInput {


 	// do not include these in makeKey()
-	int32_t   m_numTopicsToDisplay;
 	int32_t   m_refs_numToDisplay;
 	int32_t   m_rp_numToDisplay;  

@ -204,7 +177,6 @@ class SearchInput {
 	char   m_excludeMetaText;
 	char   m_doBotDetection;
 	int32_t   m_includeCachedCopy;
-	char   m_getSectionVotingInfo;
 	char   m_familyFilter;            // msg40
 	char   m_showErrors;
 	char   m_doSiteClustering;        // msg40
@ -228,18 +200,6 @@ class SearchInput {

 	char *m_filetype;

-	// . related topic (gigabits) parameters
-	// . TODO: prepend m_top_ to these var names
-	int32_t   m_docsToScanForTopics;     // msg40
-	int32_t   m_minTopicScore;           // msg40
-	int32_t   m_minDocCount;             // msg40
-	int32_t   m_dedupSamplePercent;      // msg40
-	int32_t   m_maxWordsPerTopic;        // msg40
-	int32_t   m_ipRestrictForTopics;     // msg40
-	char   m_returnDocIdCount;        // msg40
-	char   m_returnDocIds;            // msg40
-	char   m_returnPops;              // msg40
-
 	// . reference page parameters
 	// . copied from CollectionRec.h
 	int32_t   m_refs_numToGenerate;          // msg40
@ -306,12 +266,9 @@ class SearchInput {
 	int32_t   m_docsToScanForReranking;
 	float  m_pqr_demFactSubPhrase;
 	float  m_pqr_demFactCommonInlinks;
-	float  m_pqr_demFactLocTitle;
-	float  m_pqr_demFactLocSummary;
 	float  m_pqr_demFactProximity;
 	float  m_pqr_demFactInSection;
 	float  m_pqr_demFactOrigScore;
-	bool   m_pqr_demInTopics;
 	// . buzz stuff (buzz)
 	// . these controls the set of results, so should be in the makeKey()
 	//   as it is, in between the start and end hash vars
@ -348,15 +305,9 @@ class SearchInput {
 	////////

 	// . end the section we hash in SearchInput::makeKey()
-	// . we also hash displayMetas, TopicGroups and Query into the key
+	// . we also hash displayMetas and Query into the key
 	int32_t   m_END_HASH;

-	//////
-	//
-	// STUFF NOT REALLY USED NWO
-	//
-	//////
-
 	// a marker for SearchInput::test()
 	int32_t      m_END_TEST;

--- a/Sections.cpp
+++ b/Sections.cpp
--- a/Sections.h
+++ b/Sections.h
@ -7,8 +7,6 @@
 #include "Bits.h"
 #include "Words.h"
 #include "Rdb.h"
-//#include "DiskPageCache.h"
-

 // KEY:
 // ssssssss ssssssss ssssssss ssssssss  s = 48 bit site hash
@ -21,7 +19,7 @@
 // NNNNNNNN NNNNNNNN NNNNNNNN NNNNNNNN  N = SectionVote::m_numSampled

 // h: hash value. typically the lower 32 bits of the 
-//    Section::m_sentenceContentHash64 or the Section::m_contentHash64 vars. we
+//    Section::m_contentHash64 vars. we
 //    do not need the full 64 bits because we have the 48 bit site hash included
 //    to reduce collisions substantially.

@ -33,80 +31,43 @@
 // . these are descriptive flags, they are computed when Sections is set
 // . SEC_NOTEXT sections do not vote, i.e. they are not stored in Sectiondb
 #define SEC_NOTEXT       0x0001 // implies section has no alnum words
-
-// . Weights.cpp zeroes out the weights for these types of sections
-// . is section delimeted by the <script> tag, <marquee> tag, etc.
+//#define SEC_UNUSED     0x0002
+//#define SEC_UNUSED     0x0004
 #define SEC_SCRIPT       0x0008
 #define SEC_STYLE        0x0010
 #define SEC_SELECT       0x0020
-#define SEC_MARQUEE      0x0040
-#define SEC_CONTAINER    0x0080
-
-// . in title/header. for gigabits in XmlDoc.cpp
-// . is section delemited by <title> or <hN> tags?
-#define SEC_IN_TITLE     0x0100
-#define SEC_IN_HEADER    0x0200
-
-// used by Events.cpp to indicate if section contains a TimeOfDay ("7 p.m.")
-#define SEC_HAS_TOD      0x0400 
+//#define SEC_UNUSED     0x0040
+//#define SEC_UNUSED     0x0080
+#define SEC_IN_TITLE     0x0100 // in title
+#define SEC_IN_HEADER    0x0200 // in <hN> tags
+//#define SEC_UNUSED     0x0400 
 #define SEC_HIDDEN       0x0800 // <div style="display: none">
-#define SEC_IN_TABLE     0x1000
+//#define SEC_UNUSED     0x1000
 #define SEC_FAKE         0x2000 // <hr>/<br>/sentence based faux section
 #define SEC_NOSCRIPT     0x4000
+//#define SEC_UNUSED     0x8000

-#define SEC_HEADING_CONTAINER 0x8000
-
-#define SEC_MENU         0x010000
-#define SEC_LINK_TEXT    0x020000
-#define SEC_MENU_HEADER  0x040000
-#define SEC_INPUT_HEADER 0x080000
-#define SEC_INPUT_FOOTER 0x100000
-#define SEC_HEADING      0x200000
-
-// reasons why a section is not an event
-#define SEC_UNBALANCED         0x00400000 // interlaced section/tags
-#define SEC_OPEN_ENDED         0x00800000 // no closing tag found
+#define SEC_MENU               0x00010000
+#define SEC_LINK_TEXT          0x00020000
+#define SEC_MENU_HEADER        0x00040000
+#define SEC_INPUT_HEADER       0x00080000
+#define SEC_INPUT_FOOTER       0x00100000
+#define SEC_HEADING            0x00200000
+//#define SEC_UNUSED           0x00400000
+//#define SEC_UNUSED           0x00800000
 #define SEC_SENTENCE           0x01000000 // made by a sentence?
 #define SEC_PLAIN_TEXT         0x02000000
-//#define SEC_UNUSED_1         0x04000000
+//#define SEC_UNUSED           0x04000000
+//#define SEC_UNUSED                0x00008000000LL
+//#define SEC_UNUSED                0x00010000000LL
+//#define SEC_UNUSED                0x00020000000LL
+//#define SEC_UNUSED                0x00040000000LL
+//#define SEC_UNUSED                0x00080000000LL

-// . this is set in Dates.cpp and used by Dates.cpp and Events.cpp
-// . we identify max tod sections and make it so brothers in a list of two
-//   or more such sections cannot telescope to each other's dates, and so we
-//   do not share each other's event descriptions. fixes abqtango.com
-//   and salsapower.com from grabbing event description text from "failed"
-//   event sections that are brothers to successful event sections.
-#define SEC_TOD_EVENT               0x00008000000LL
-#define SEC_NIXED_HEADING_CONTAINER 0x00010000000LL
-
-#define SEC_SECOND_TITLE            0x00020000000LL
-#define SEC_SPLIT_SENT              0x00040000000LL
-#define SEC_HAS_REGISTRATION        0x00080000000LL
-
-#define SEC_HAS_PARKING             0x00100000000LL
+//#define SEC_UNUSED                0x00100000000LL
 #define SEC_MENU_SENTENCE           0x00200000000LL
-// fix for folkmads.org:
-#define SEC_HR_CONTAINER            0x00400000000LL
-#define SEC_HAS_DOM                 0x00800000000LL
-#define SEC_HAS_DOW                 0x01000000000LL
-#define SEC_EVENT_BROTHER           0x02000000000LL
-#define SEC_DATE_LIST_CONTAINER     0x04000000000LL
-#define SEC_TAIL_CRAP               0x08000000000LL
-
-#define SEC_CONTROL                 0x0000010000000000LL
-#define SEC_STRIKE                  0x0000020000000000LL
-#define SEC_STRIKE2                 0x0000040000000000LL
-#define SEC_HAS_MONTH               0x0000080000000000LL
-#define SEC_IGNOREEVENTBROTHER      0x0000100000000000LL
-#define SEC_HASEVENTDOMDOW          0x0000200000000000LL
-#define SEC_STOREHOURSCONTAINER     0x0000400000000000LL
-#define SEC_PUBDATECONTAINER        0x0000800000000000LL
-
-#define SEC_TABLE_HEADER            0x0001000000000000LL
-#define SEC_HASDATEHEADERROW        0x0002000000000000LL
-#define SEC_HASDATEHEADERCOL        0x0004000000000000LL
-#define SEC_MULTIDIMS               0x0008000000000000LL
-#define SEC_HASHXPATH               0x0010000000000000LL
+//#define SEC_UNUSED                0x00400000000LL
+//#define SEC_UNUSED                0x00800000000LL

 // . some random-y numbers for Section::m_baseHash
 // . used by splitSection() function
@ -114,174 +75,10 @@
 #define BH_SENTENCE 4590649
 #define BH_IMPLIED  95468323

-// values for Section::m_sentFlags (sentence flags)
-#define SENT_HAS_COLON       0x00000001
-//#define SENT_UNUSED_1      0x00000002
-#define SENT_BAD_FIRST_WORD  0x00000004
-#define SENT_MIXED_CASE      0x00000008
-#define SENT_POWERED_BY      0x00000010
-#define SENT_MULT_EVENTS     0x00000020
-#define SENT_PAGE_REPEAT     0x00000040
-#define SENT_NUMBERS_ONLY    0x00000080
-//#define SENT_UNUSED_6      0x00000100
-#define SENT_SECOND_TITLE    0x00000200
-#define SENT_IS_DATE         0x00000400
-#define SENT_LAST_STOP       0x00000800
-#define SENT_NUMBER_START    0x00001000
-#define SENT_TAG_INDICATOR   0x00002000
-#define SENT_PRETTY          0x00004000
-#define SENT_IN_HEADER       0x00008000
-#define SENT_MIXED_CASE_STRICT 0x00010000
-#define SENT_IN_LIST         0x00020000
-#define SENT_COLON_ENDS      0x00040000
-//#define SENT_UNUSED_7      0x00080000
-#define SENT_IN_TITLEY_TAG   0x00100000
-#define SENT_CITY_STATE      0x00200000
-#define SENT_PRICEY          0x00400000
-#define SENT_PERIOD_ENDS     0x00800000
-#define SENT_HAS_PHONE       0x01000000
-#define SENT_IN_MENU         0x02000000
-#define SENT_MIXED_TEXT      0x04000000
-#define SENT_TAGS            0x08000000
-#define SENT_INTITLEFIELD    0x10000000
-#define SENT_STRANGE_PUNCT   0x20000000
-#define SENT_INPLACEFIELD    0x40000000
-#define SENT_INNONTITLEFIELD 0x80000000
-
-//#define SENT_UNUSED_2          0x0000000100000000LL
-#define SENT_HASNOSPACE          0x0000000200000000LL
-#define SENT_IS_BYLINE           0x0000000400000000LL
-#define SENT_NON_TITLE_FIELD     0x0000000800000000LL
-#define SENT_TITLE_FIELD         0x0000001000000000LL
-#define SENT_UNIQUE_TAG_HASH     0x0000002000000000LL
-#define SENT_AFTER_SENTENCE      0x0000004000000000LL
-#define SENT_WORD_SANDWICH       0x0000008000000000LL
-//#define SENT_UNUSED_3          0x0000010000000000LL
-#define SENT_NUKE_FIRST_WORD     0x0000020000000000LL
-#define SENT_FIELD_NAME          0x0000040000000000LL
-#define SENT_PERIOD_ENDS_HARD    0x0000080000000000LL
-#define SENT_PARENS_START        0x0000100000000000LL
-#define SENT_IN_MENU_HEADER      0x0000200000000000LL
-#define SENT_IN_TRUMBA_TITLE     0x0000400000000000LL
-//#define SENT_UNUSED_8          0x0000800000000000LL
-#define SENT_FORMTABLE_FIELD     0x0001000000000000LL
-#define SENT_FORMTABLE_VALUE     0x0002000000000000LL
-#define SENT_IN_TAG              0x0004000000000000LL
-#define SENT_AFTER_SPACER        0x0008000000000000LL
-#define SENT_BEFORE_SPACER       0x0010000000000000LL
-#define SENT_OBVIOUS_PLACE       0x0020000000000000LL
-//#define SENT_UNUSED_4          0x0040000000000000LL
-#define SENT_HASSOMEEVENTSDATE   0x0080000000000000LL
-#define SENT_AFTER_COLON         0x0100000000000000LL
-#define SENT_HASTITLEWORDS       0x0200000000000000LL
-//#define SENT_UNUSED_5          0x0400000000000000LL
-//#define SENT_UNUSED_9          0x0800000000000000LL
-#define SENT_IN_BIG_LIST         0x1000000000000000LL
-#define SENT_BADEVENTSTART       0x2000000000000000LL
-#define SENT_MENU_SENTENCE       0x4000000000000000LL
-#define SENT_HAS_PRICE           0x8000000000000000ULL
-
 #define NOINDEXFLAGS (SEC_SCRIPT|SEC_STYLE|SEC_SELECT)

 // the section type (bit flag vector for SEC_*) is currently 32 bits
 typedef int64_t sec_t;
-//typedef int64_t titleflags_t;
-typedef int64_t sentflags_t;
-typedef uint32_t turkbits_t;
-
-bool  isPlaceIndicator ( int64_t *widp ) ;
-char *getSentBitLabel ( sentflags_t sf ) ;
-sentflags_t getMixedCaseFlags ( class Words *words , 
-				wbit_t *bits ,
-				int32_t senta , 
-				int32_t sentb , 
-				int32_t niceness ) ;
-int32_t hasTitleWords ( sentflags_t sflags ,
-		     int32_t senta,
-		     int32_t sentb,
-		     int32_t alnumCount,
-		     class Bits *bits ,
-		     class Words *words ,
-		     bool useAsterisk ,
-		     int32_t niceness );
-
-
-class Sectiondb {
-
- public:
-
-	// reset rdb
-	void reset();
-
-	bool verify ( char *coll );
-
-	bool addColl ( char *coll, bool doVerify = true );
-
-	// init m_rdb
-	bool init ();
-
-	// init secondary/rebuild sectiondb
-	bool init2 ( int32_t treeMem ) ;
-
-	Rdb *getRdb() { return &m_rdb; }
-
-	uint64_t getSiteHash ( void *k ) {
-		return ((*(uint64_t *)(((char *)k)+8))) >> 16;};
-
-
-	uint32_t getSectionHash ( void *k ) {
-		return (*(uint32_t *)(((char *)k)+6)); }
-
-
-	int64_t getDocId ( void *k ) {
-		return ((*(uint64_t *)k) >> 2) & DOCID_MASK; }
-
-
-	uint8_t getSectionType ( void *k ) {
-		return ((unsigned char *)k)[5]; };
-
-	// holds binary format title entries
-	Rdb m_rdb;
-
-	//DiskPageCache *getDiskPageCache ( ) { return &m_pc; };
-
-	//DiskPageCache m_pc;
-};
-
-extern class Sectiondb g_sectiondb;
-extern class Sectiondb g_sectiondb2;
-
-
-// this is only needed for sections, not facets in general i don think.
-// facets has the whole QueryTerm::m_facetHashTable array with more info
-//
-// . for gbfacet:gbxpathsite1234567 posdb query stats compilation to
-//   show how many pages duplicate your section's content on your site
-//   at the same xpath. the hash of the innerHTML for that xpath is 
-//   embedded into the posdb key like a number in a number key, so the
-//   wordpos bits etc are sacrificed to hold that 32-bit number.
-// . used by XmlDoc::getSectionsWithDupStats() for display in
-//   XmlDoc::printRainbowSections()
-// . these are in QueryTerm::m_facetStats and computed from
-//   QueryTerm::m_facetHashTable
-class SectionStats {
- public:
-	SectionStats() { reset(); }
-	void reset ( ) {
-		m_totalMatches  = 0; // posdb key "val" matches ours
-		m_totalEntries  = 0; // total posdb keys
-		m_numUniqueVals = 0; // # of unique "vals"
-		m_totalDocIds   = 0;
-	};
-	// # of times xpath innerhtml matched ours. 1 count per docid max.
-	int64_t m_totalMatches;
-	// # of times this xpath occurred. doc can have multiple times.
-	int64_t m_totalEntries;
-	// # of unique vals this xpath had. doc can have multiple counts.
-	int64_t m_numUniqueVals;
-	int64_t m_totalDocIds;
-};
-

 class Section {
 public:
@ -295,9 +92,6 @@ public:
 	class Section *m_next;
 	class Section *m_prev;

-	// used by Events.cpp to count # of timeofdays in section
-	//class Event *m_event;
-
 	// . if we are an element in a list, what is the list container section
 	// . a containing section is a section containing MULTIPLE 
 	//   smaller sections
@ -314,24 +108,6 @@ public:
 	// are a sentence section then this points to itself.
 	class Section *m_sentenceSection;

-	// . set in XmlDoc::getSectionsWithDupStats()
-	// . voting info for this section over all indexed pages from this site
-	SectionStats m_stats;
-
-	int32_t m_votesForDup;
-	int32_t m_votesForNotDup;
-	float getSectiondbVoteFactor ( ) {
-		// now punish if repeated on many page on the site
-		float a = (float)m_votesForNotDup;
-		float b = (float)m_votesForDup;
-		if ( a == 0 && b == 0 ) return 1.0;
-		// use that as a modifier
-		float factor = a / ( a + b);
-		// minimum so we do not completely nuke title i guess
-		if ( factor < .10 ) factor = .10;
-		return factor;
-	};
-
 	// position of the first and last alnum word contained directly OR
 	// indirectly in this section. use -1 if no text contained...
 	int32_t m_firstWordPos;
@ -348,32 +124,11 @@ public:
 	int32_t m_senta;
 	int32_t m_sentb;

-	// each sentence is numbered
-	//int32_t m_sentNum;
-
-	class Section *m_prevSent;
 	class Section *m_nextSent;

-	// . if we are in a table, what position are we
-	// . starts at 1 and goes upwards
-	// . we start it at 1 so that way we know that 0 is invalid!
-	int32_t m_rowNum;
-	int32_t m_colNum;
-	class Section *m_tableSec;
-
-	class Section *m_headColSection;
-	class Section *m_headRowSection;
-
-	class Section *m_leftCell;
-	class Section *m_aboveCell;
-
 	// hash of this tag's baseHash and all its parents baseHashes combined
 	uint32_t  m_tagHash;

-	// like above but for turk voting. includes hash of the class tag attr
-	// from m_turkBaseHash, whereas m_tagHash uses m_baseHash of parent.
-	uint32_t m_turkTagHash32;
-
 	// for debug output display of color coded nested sections
 	uint32_t m_colorHash;

@ -384,35 +139,13 @@ public:
 	// div and span tags, etc. to make them unique
 	uint32_t  m_baseHash;

-	// just hash the "class=" value along with the tagid
-	uint32_t m_turkBaseHash;
-
 	// kinda like m_baseHash but for xml tags and only hashes the
 	// tag name and none of the fields
 	uint32_t  m_xmlNameHash;

-	// these deal with enumertated tags and are used by Events.cpp
-	int32_t  m_occNum;
-	int32_t  m_numOccurences;
-
-	// used by XmlDoc.cpp to set a topological distance
-	int32_t m_topDist;
-
 	// hash of all the alnum words DIRECTLY in this section
 	uint64_t  m_contentHash64;

-	uint64_t  m_sentenceContentHash64;
-
-	// . used by the SEC_EVENTBROTHER algo in Dates.cpp to detect
-	//   [more] or [details] links that indicate distinct items
-	// . sometimes the "(more)" link is combined into the last sentence
-	//   so we have to treat the last link kinda like its own sentence too!
-	uint32_t  m_lastLinkContentHash32;
-
-	// hash of all sentences contained indirectly or directly.
-	// uses m_sentenceContentHash64 (for sentences)
-	uint64_t m_indirectSentHash64;
-
 	// . range of words in Words class we encompass
 	// . m_wordStart and m_wordEnd are the tag word #'s
 	// . ACTUALLY it is a half-closed interval [a,b) like all else
@ -422,45 +155,27 @@ public:
 	int32_t  m_a;//wordStart;
 	int32_t  m_b;//wordEnd;

-	// for event titles and descriptions
-	sentflags_t m_sentFlags;
-
-	// . # alnum words only in this and only this section
-	// . if we have none, we are SEC_NOTEXT
-	int32_t  m_exclusive;
-
 	// our depth. # of tags in the hash
 	int32_t  m_depth;

 	// container for the #define'd SEC_* values above
 	sec_t m_flags;

-	// used to mark it in Dates.cpp like a breadcrumb trail
-	int32_t m_mark;
-
-	// Events.cpp assigns a date to each section
-	int32_t m_firstDate;
-
 	char m_used;

-	// used in Sections::splitSections() function
-	int32_t m_processedHash;
-
 	int32_t m_gbFrameNum;

 	// do we contain section "arg"?
-	bool contains ( class Section *arg ) {
-		return ( m_a <= arg->m_a && m_b >= arg->m_b ); };
+	bool contains( class Section *arg ) {
+		return ( m_a <= arg->m_a && m_b >= arg->m_b );
+	}

 	// do we contain section "arg"?
 	bool strictlyContains ( class Section *arg ) {
 		if ( m_a <  arg->m_a && m_b >= arg->m_b ) return true;
 		if ( m_a <= arg->m_a && m_b >  arg->m_b ) return true;
 		return false;
-	};
-
-	// does this section contain the word #a?
-	bool contains2 ( int32_t a ) { return ( m_a <= a && m_b > a ); };
+	}

 	bool isVirtualSection ( ) ;
 };
@ -474,84 +189,48 @@ public:
 #define FMT_JSON   3

 class Sections {
+public:
+	Sections();
+	~Sections();

- public:
-
-	Sections ( ) ;
-	void reset() ;
-	~Sections ( ) ;
+	void reset();

 	// . returns false if blocked, true otherwise
 	// . returns true and sets g_errno on error
 	// . sets m_sections[] array, 1-1 with words array "w"
-	bool set(class Words *w, class Phrases *phrases, class Bits *bits, class Url *url,
+	bool set(class Words *w, class Bits *bits, class Url *url,
 			  int64_t siteHash64, char *coll, int32_t niceness, uint8_t contentType );

-	bool addVotes(class SectionVotingTable *nsvt, uint32_t tagPairHash );
-
 	bool verifySections ( ) ;

-	int32_t getStoredSize ( ) ;
-	static int32_t getStoredSize ( char *p ) ;
-	int32_t serialize     ( char *p ) ;
-
 	bool growSections ( );

-	bool getSectiondbList ( );
-	bool gotSectiondbList ( bool *needsRecall ) ;
-
 	void setNextBrotherPtrs ( bool setContainer ) ;

 	// this is used by Events.cpp Section::m_nextSent
 	void setNextSentPtrs();

-	bool print ( SafeBuf *sbuf ,
-		     class HashTableX *pt ,
-		     class HashTableX *et ,
-		     class HashTableX *st ,
-		     class HashTableX *at ,
-		     class HashTableX *tt ,
-		     //class HashTableX *rt ,
-		     class HashTableX *priceTable ) ;
+	bool print( SafeBuf *sbuf, class HashTableX *pt, class HashTableX *et, class HashTableX *st,
+				class HashTableX *at, class HashTableX *tt, class HashTableX *priceTable );

 	void printFlags ( class SafeBuf *sbuf , class Section *sn ) ;

-	bool printVotingInfoInJSON ( SafeBuf *sb ) ;
+	bool print2(SafeBuf *sbuf, int32_t hiPos, int32_t *wposVec, char *densityVec,
+				 char *wordSpamVec, char *fragVec, char format = FMT_HTML );

-	bool print2 ( SafeBuf *sbuf ,
-		      int32_t hiPos,
-		      int32_t *wposVec,
-		      char *densityVec,
-		      char *diversityVec,
-		      char *wordSpamVec,
-		      char *fragVec,
-		      char format = FMT_HTML );
-	bool printSectionDiv ( class Section *sk , char format = FMT_HTML );
+	bool printSectionDiv ( Section *sk , char format = FMT_HTML );
 	class SafeBuf *m_sbuf;

-	char *getSectionsReply ( int32_t *size );
-	char *getSectionsVotes ( int32_t *size );
-
-	bool isHardSection ( class Section *sn );
+	bool isHardSection ( Section *sn );

 	bool setMenus ( );

-	bool setFormTableBits ( ) ;
-	bool setTableRowsAndCols ( class Section *tableSec ) ;
-	bool setTableHeaderBits ( class Section *table );
-	bool setTableScanPtrs ( class Section *ts ) ;
-
 	void setHeader ( int32_t r , class Section *first , sec_t flag ) ;

 	bool setHeadingBit ( ) ;

 	void setTagHashes ( ) ;

-	bool setRegistrationBits ( ) ;
-	bool m_setRegBits ;
-
-	bool m_alnumPosValid;
-
 	// save it
 	class Words *m_words    ;
 	class Bits  *m_bits     ;
@ -564,39 +243,15 @@ class Sections {

 	int32_t *m_wposVec;
 	char *m_densityVec;
-	char *m_diversityVec;
 	char *m_wordSpamVec;
 	char *m_fragVec;
 	
 	// url ends in .rss or .xml ?
 	bool  m_isRSSExt;

-	bool m_isFacebook   ;
-	bool m_isEventBrite ;
-	bool m_isStubHub    ;
-
-	Msg0  m_msg0;
-	key128_t m_startKey;
-	int32_t  m_recall;
-	IndexList m_list;
-	int64_t m_termId;
-
-	int32_t m_numLineWaiters;
-	bool m_waitInLine;
-	int32_t m_articleStartWord;
-	int32_t m_articleEndWord;
-	bool m_hadArticle;
-	int32_t m_numInvalids;
-	int32_t m_totalSiteVoters;
-
-	int32_t m_numAlnumWordsInArticle;
-
 	// word #'s (-1 means invalid)
 	int32_t m_titleStart;
 	int32_t m_titleEnd;
-	int32_t m_titleStartAlnumPos;
-
-	int32_t m_numVotes;

 	// these are 1-1 with the Words::m_words[] array
 	class Section **m_sectionPtrs;
@ -604,25 +259,8 @@ class Sections {
 	// save this too
 	int32_t m_nw ;

-	// new stuff
-	HashTableX m_ot;
-	HashTableX m_vt;
-
-	// for caching parition scores
-	HashTableX m_ct;
-
-	// buf for serializing m_osvt into
-	char *m_buf;
-	int32_t  m_bufSize;
-
-
-	// buf for serializing m_nsvt into
-	char *m_buf2;
-	int32_t  m_bufSize2;
-
 	// allocate m_sections[] buffer
 	class Section  *m_sections;
-	//int32_t            m_sectionsBufSize;
 	int32_t            m_numSections;
 	int32_t            m_maxNumSections;

@ -633,71 +271,25 @@ class Sections {
 	// see what section a word is in.
 	SafeBuf m_sectionPtrBuf;

-	int32_t m_numSentenceSections;
-
 	bool m_isTestColl;

 	// assume no malloc
 	bool  m_needsFree;
 	char  m_localBuf [ SECTIONS_LOCALBUFSIZE ];

-	// set a flag
-	bool  m_badHtml;
-
 	int64_t  *m_wids;
-	int64_t  *m_pids;
 	int32_t       *m_wlens;
 	char      **m_wptrs;
 	nodeid_t   *m_tids;

-	// the new way
-	bool addImpliedSections ( );
-
-	bool setSentFlagsPart1 ( );
-	bool setSentFlagsPart2 ( );
-	sentflags_t getSentEventEndingOrBeginningFlags ( sentflags_t sflags ,
-							 int32_t senta ,
-							 int32_t sentb ,
-							 int32_t alnumCount) ;
-	void setSentPrettyFlag ( class Section *si ) ;
 	int32_t       m_hiPos;
-	bool       m_sentFlagsAreSet;
-	bool       m_addedImpliedSections;
-
-	int32_t addImpliedSections3 ();
-	int32_t getDelimScore ( class Section *bro,
-			     char method,
-			     class Section *delim ,
-			     class Partition *part );
-	int32_t getDelimHash ( char method , class Section *bro ) ;

 	bool addImpliedLists ( ) ;
-	int32_t getDelimScore2 ( class Section *bro,
-			      char method,
-			      class Section *delim ,
-			      int32_t *a ,
-			      int32_t *b );
-
-	bool hashSentBits ( class Section    *sx        ,
-			    class HashTableX *vht       ,
-			    class Section    *container ,
-			    uint32_t          mod       ,
-			    class HashTableX *labelTable,
-			    char             *modLabel  );
-
-	bool hashSentPairs ( Section    *sx ,
-			     Section    *sb ,
-			     HashTableX *vht ,
-			     Section    *container ,
-			     HashTableX *labelTable );

 	bool addSentenceSections ( ) ;

 	class Section *insertSubSection ( int32_t a, int32_t b, int32_t newBaseHash ) ;

-	int32_t splitSectionsByTag ( nodeid_t tagid ) ;
-	bool splitSections ( char *delimeter , int32_t dh );
-
 	class Section *m_rootSection; // the first section, aka m_firstSection
 	class Section *m_lastSection;

@ -706,72 +298,8 @@ class Sections {
 	// kinda like m_rootSection, the first sentence section that occurs
 	// in the document, is NULL iff no sentences in document
 	class Section *m_firstSent;
-	class Section *m_lastSent;
-
-	bool containsTagId ( class Section *si, nodeid_t tagId ) ;
-
-	bool isTagDelimeter ( class Section *si , nodeid_t tagId ) ;
-	
-	bool isDelimeter ( int32_t i , char *delimeter , int32_t *delimEnd ) {
-
-		// . HACK: special case when delimeter is 0x01 
-		// . that means we are back-to-back br tags
-		if ( delimeter == (char *)0x01 ) {
-			// must be a br tag
-			if ( m_tids[i] != TAG_BR ) return false;
-			// assume that
-			int32_t k = i + 1;
-			// bad if end
-			if ( k >= m_nw ) return false;
-			// bad if a wid
-			if ( m_wids[k] ) return false;
-			// inc if punct
-			if ( ! m_tids[k] ) k++;
-			// bad if end
-			if ( k >= m_nw ) return false;
-			// must be another br tag
-			if ( m_tids[k] != TAG_BR ) return false;
-			// mark as end i guess
-			*delimEnd = k + 1;
-			return true;
-		}
-
-		// no word is a delimeter
-		if ( m_wids[i] ) return false;
-		// tags "<hr" and "<br"
-		if ( m_wptrs[i][0] == delimeter[0] &&
-		     m_wptrs[i][1] == delimeter[1] &&
-		     m_wptrs[i][2] == delimeter[2] )
-			return true;
-		// if no match above, forget it
-		if ( m_tids[i] ) return false;
-		// otherwise, we are a punctuation "word"
-		// the bullet is 3 bytes long
-		if ( m_wlens[i] < 3 ) return false;
-		// if not a bullet, skip it (&bull)
-		char *p    = m_wptrs[i];
-		char *pend = p + m_wlens[i];
-		for ( ; p < pend ; p++ ) {
-			if ( p[0] != delimeter[0] ) continue;
-			if ( p[1] != delimeter[1] ) continue;
-			if ( p[2] != delimeter[2] ) continue;
-			return true;
-		}
-		return false;
-	};
-		
-
 };

-// convert sectionType to a string
-char *getSectionTypeAsStr ( int32_t sectionType );
-
-// hash of the last 3 parent tagids
-//uint32_t getSectionContentTagHash3 ( class Section *sn ) ;
-
-// only allow this many urls per site to add sectiondb info
-#define MAX_SITE_VOTERS 32
-
 // . the key in sectiondb is basically the Section::m_tagHash 
 //   (with a docId) and the data portion of the Rdb record is this SectionVote
 // . the Sections::m_nsvt and m_osvt hash tables contain SectionVotes
@ -790,99 +318,4 @@ public:
 	float m_numSampled;
 };

-
-class SectionVotingTable {
- public:
-
-	SectionVotingTable ( ) ;
-
-	//bool set ( Sections *sections , class RdbList *sectiondbList );
-	void reset () { m_svt.reset(); }
-
-	bool print ( SafeBuf *sbuf , char *title ) ;
-
-	// stock table from a sectiondb rdblist
-	bool addListOfVotes ( RdbList *list, 
-			      key128_t **lastKey ,
-			      int64_t docId ,
-			      int32_t niceness ) ;
-
-	// index our sections as flag|tagHash pairs using a termId which
-	// is basically our sitehash. this allows us to "vote" on what
-	// sections are static, dynamic, "texty" by indexing our votes into
-	// datedb.
-	bool hash ( int64_t docId , 
-		    class HashTableX *dt , 
-		    uint64_t siteHash64 ,
-		    int32_t niceness ) ;
-
-	bool addVote2 ( int32_t tagHash, int32_t sectionType , float score ) {
-		return addVote3 ( tagHash,sectionType,score,1);};
-
-	bool addVote3 ( //class HashTableX *ttt         ,
-		       int32_t              tagHash     ,
-		       int32_t              sectionType ,
-		       float             score       ,
-		       float             numSampled  ,
-		       bool              hackFix = false ) ;
-
-	// return -1.0 if no voters!
-	float getScore      ( Section *sn , int32_t sectionType ) {
-		if ( ! sn ) return -1.0;
-		return getScore ( sn->m_tagHash , sectionType ); };
-
-	float getScore      ( int32_t tagHash , int32_t sectionType ) ;
-
-
-	float getNumSampled ( Section *sn , int32_t sectionType ) {
-		if ( ! sn ) return 0.0;
-		return getNumSampled ( sn->m_tagHash , sectionType ); };
-
-	float getNumSampled ( int32_t tagHash , int32_t sectionType ) ;
-
-	int32_t getNumVotes ( ) { return m_svt.getNumSlotsUsed(); };
-
-	bool init ( int32_t numSlots , char *name , int32_t niceness ) {
-		return m_svt.set(8,sizeof(SectionVote),numSlots,
-				 NULL,0,false,niceness,name); };
-
-	HashTableX m_svt;
-
-	int32_t m_totalSiteVoters;
-	//int32_t m_totalSimilarLayouts;
-};
-
-
-//
-// BEGIN SECTION TYPES
-//
-
-// . these are the core section types
-// . these are not to be confused with the section bit flags below
-// . we put these into sectiondb in the form of a SectionVote
-// . the SectionVote is the data portion of the rdb record, and the key
-//   of the rdb record contains the url site hash and the section m_tagHash
-// . in this way, a page can vote on what type of section a tag hash describes
-//#define SV_TEXTY          1 // section has mostly non-hypertext words
-#define SV_CLOCK          2 // DateParse2.cpp. section contains a clock
-#define SV_EURDATEFMT     3 // DateParse2.cpp. contains european date fmt
-#define SV_EVENT          4 // used in Events.cpp to indicate event container
-#define SV_ADDRESS        5 // used in Events.cpp to indicate address container
-
-// . HACK: the "date" is not the enum tag hash, but is the tagPairHash for this
-// . every doc has just one of these describing the entire layout of the page
-// . basically looking for these is same as doing a gbtaghash: query
-#define SV_TAGPAIRHASH   20 
-// . HACK: the "date" is not the enum tag hash, but is the contentHash!
-// . this allows us to detect a duplicate section even though the layout
-//   of the web page is not quite the same, but is from the same site
-#define SV_TAGCONTENTHASH   21 
-
-// now Dates.cpp sets these too
-#define SV_FUTURE_DATE   24
-#define SV_PAST_DATE     25
-#define SV_CURRENT_DATE  26
-#define SV_SITE_VOTER    29
-#define SV_TURKTAGHASH   30
-
 #endif
--- a/Spider.h
+++ b/Spider.h
@ -697,8 +697,6 @@ class SpiderRequest {
 	unsigned    m_reserved3n              :1;
 	unsigned    m_reserved3k              :1;
 	unsigned    m_reserved3e              :1;
-	//unsigned    m_matchesUrlCrawlPattern  :1;
-	//unsigned    m_matchesUrlProcessPattern:1;
 	unsigned    m_reserved3f              :1;
 	unsigned    m_reserved3g              :1;
 	unsigned    m_siteNumInlinksValid     :1;
@ -711,30 +709,16 @@ class SpiderRequest {
 	// want the url's to have their links spidered. default is to make
 	// this 0 and to not avoid spidering the links.
 	unsigned    m_avoidSpiderLinks:1;
-	// for identifying address heavy sites...
-	//unsigned    m_tagYellowPages:1;
 	// when indexing urls for dmoz, i.e. the urls outputted from
 	// 'dmozparse urldump -s' we need to index them even if there
 	// was a ETCPTIMEDOUT because we have to have indexed the same
 	// urls that dmoz has in it in order to be identical to dmoz.
 	unsigned    m_ignoreExternalErrors:1;

-	// called XmlDoc::set4() from PageSubmit.cpp?
-	//unsigned    m_isPageSubmit:1;
-
 	//
 	// INTERNAL USE ONLY
 	//

-	// are we in the m_orderTree/m_doleTables/m_ipTree
-	//unsigned    m_inOrderTree:1;
-	// are we doled out?
-	//unsigned    m_doled:1;
-	// are we a re-add of a spiderrequest already in spiderdb added
-	// from xmldoc.cpp when done spidering so that the spider request
-	// gets back in the cache quickly?
-	//unsigned    m_readd:1;
-
 	// . what url filter num do we match in the url filters table?
 	// . determines our spider priority and wait time
 	int16_t   m_ufn;
@ -772,14 +756,6 @@ class SpiderRequest {

 	int32_t getRecSize () { return m_dataSize + 4 + sizeof(key128_t); }

-	// how much buf will we need to serialize ourselves?
-	//int32_t getRecSize () { 
-	//	//return m_dataSize + 4 + sizeof(key128_t); }
-	//	return (m_url - (char *)this) + gbstrlen(m_url) + 1
-	//		// subtract m_key and m_dataSize
-	//		- sizeof(key_t) - 4 ;
-	//};
-
 	int32_t getUrlLen() { return m_dataSize -
 				   // subtract the \0
 				   ((char *)m_url-(char *)&m_firstIp) - 1;};
--- a/Statsdb.cpp
+++ b/Statsdb.cpp
@ -88,23 +88,6 @@ static Label s_labels[] = {
 	//   eventually
 	{GRAPH_QUANTITY,-1,"docs_indexed", .1,"%.0f docs" , -1,  0x00cc0099,"docs indexed" }

-
-	//{ "termlist_intersect",0x0000ff00},
-	//{ "termlist_intersect_soft",0x00008000}, // rat=0
-	//{ "transmit_data_nice",0x00aa00aa },
-	//{ "transmit_data", 0x00ff00ff },
-	//{ "zak_ref_1a", 0x00ccffcc },
-	//{ "zak_ref_1b",0x00fffacd },
-	//{ "get_summary", 0x0000ff},
-	//{ "get_summary_nice", 0x0000b0},
-	//{ "get_gigabits",0x00d1e1ff },
-	//{ "get_termlists_nice", 0x00aaaa00},
-	//{ "get_termlists",0x00ffff00 },
-	//{ "get_all_summaries", 0x008220ff},
-	//{ "rdb_list_merge",0x0000ffff },
-	//{ "titlerec_compress",0x00ffffff },
-	//{ "titlerec_uncompress", 0x00ffffff} ,
-	//{ "parm_change",0xffc0c0} // pink?
 };

 void drawLine3 ( SafeBuf &sb ,
--- a/StopWords.cpp
+++ b/StopWords.cpp
@ -2014,23 +2014,6 @@ bool isQueryStopWord ( char *s , int32_t len , int64_t h , int32_t langId ) {
 		s_queryStopWords2[langEnglish] = s_queryStopWordsEnglish;
 		s_queryStopWords2[langGerman ] = s_queryStopWordsGerman;
 		// set up the hash table
-// 		if ( ! s_queryStopWordTable.set ( sizeof(s_queryStopWords) * 2 ) ) 
-// 			return log(LOG_INIT,"query: Could not init query "
-// 				   "stop words table.");
-// 		// now add in all the stop words
-// 		int32_t n = (int32_t)sizeof(s_queryStopWords)/ sizeof(char *); 
-// 		for ( int32_t i = 0 ; i < n ; i++ ) {
-// 			char      *sw    = s_queryStopWords[i];
-// 			int32_t       swlen = gbstrlen ( sw );
-// 			int64_t  swh   = hash64Lower ( sw , swlen );
-// 			s_queryStopWordTable.addTerm (swh,i+1,i+1,true);
-// 			// . add w/o accent marks too!
-// 			// . skip "f<>r" though because fur is an eng. word
-// 			//if ( *sw=='f' && *(sw+1)=='<27>' &&
-// 			//     *(sw+2)=='r' && swlen == 3 ) continue;
-// 			//swh   = hash64AsciiLower ( sw , swlen );
-// 			//s_queryStopWordTable.addTerm (swh,i+1,i+1,true);
-// 		}
 		for ( int32_t i = 0 ; i <= MAXLANGID ; i++ ) {
 			HashTableX *ht = &s_queryStopWordTables[i];
 			char **words = s_queryStopWords2[i];
@ -3844,9 +3827,6 @@ static char      *s_commonWords[] = {
 static HashTableX s_commonWordTable;
 static bool       s_commonWordsInitialized = false;

-static HashTableX s_commonQueryWordTable;
-static bool       s_commonQueryWordsInitialized = false;
-

 // for Process.cpp::resetAll() to call when exiting to free all mem
 void resetStopWordTables() {
@ -3854,7 +3834,6 @@ void resetStopWordTables() {
 	for ( int i = 0 ; i <= MAXLANGID ; i++ )
 		s_queryStopWordTables[i].reset();
 	s_commonWordTable.reset();
-	s_commonQueryWordTable.reset();
 }

 // used by Msg24.cpp for gigabits generation
@ -3896,467 +3875,10 @@ int32_t isCommonWord ( int64_t h ) {
 	return s_commonWordTable.getScore ( &h );
 }

-static char *s_verbs[] = {
-	"runs",
-	"run",
-	"go",
-	"goes",
-	"going"
-};
-
-static HashTableX s_verbTable;
-static bool       s_verbsInitialized = false;
-
-// used by Msg24.cpp for gigabits generation
-bool isVerb ( int64_t *hp ) {
-	// include a bunch of foreign prepositions so they don't get required
-	// by the bitScores in IndexTable.cpp
-	if ( ! s_verbsInitialized ) {
-		// set up the hash table
-		if ( ! s_verbTable.set (8,0,sizeof(s_verbs)*2,
-					NULL,0,false,0,"verbs") ) 
-			return log(LOG_INIT,
-				   "query: Could not init verbs table.");
-		// now add in all the stop words
-		int32_t n = (int32_t)sizeof(s_verbs)/ sizeof(char *); 
-		for ( int32_t i = 0 ; i < n ; i++ ) {
-			char *sw    = s_verbs[i];
-			int32_t  swlen = gbstrlen ( sw );
-			// use the same algo that Words.cpp computeWordIds does
-			int64_t swh = hash64Lower_utf8 ( sw , swlen );
-			if ( ! s_verbTable.addKey ( &swh ) ) { 
-				char *xx=NULL;*xx=0; }
-		}
-		s_verbsInitialized = true;
-	} 
-
-	// get from table
-	return (bool)s_verbTable.isInTable ( hp );
-}
-
 void resetStopWords ( ) {
 	s_stopWordTable.reset();
 	for ( int i = 0 ; i <= MAXLANGID ; i++ )
 		s_queryStopWordTables[i].reset();
 	s_commonWordTable.reset();
-	s_verbTable.reset();
-	s_commonQueryWordTable.reset();
-}
-
-
-static char      *s_commonQueryWords[] = {
-	"to",
-	"and",
-	"ands",
-	"anding",
-	"anded",
-	"be", // "be fine" for fatboyshouseofbbq.com matching queries
-	"thereof",
-	"of",
-	"the",
-	"this",
-	"between",
-	"onto",
-	"too",
-
-	"every",
-	"always",
-	"more", // fix "more more" bringing up whitehouse.gov
-
-	"of",
-	"the",
-	"this",
-	"one",
-	"two",
-	"three",
-	"four",
-
-	"01",
-	"02",
-	"03",
-	"04",
-	"05",
-	"06",
-	"07",
-	"08",
-	"09",
-
-	"1",
-	"2",
-	"3",
-	"4",
-	"5",
-	"6",
-	"7",
-	"8",
-	"9",
-	"10",
-	"11",
-	"12",
-	"13",
-	"14",
-	"15",
-	"16",
-	"17",
-	"18",
-	"19",
-	"20",
-	"21",
-	"22",
-	"23",
-	"24",
-	"25",
-	"26",
-	"27",
-	"28",
-	"29",
-	"30",
-	"31",
-
-	"i","ii","iii","iv","vi","vii","viii","ix","x","xi",
-	"xii","xiii","xiv","xv","xvi","xvii","xviii","xix",
-	"xx","xxi","xxii","xxiii","xxiv","xxv","xxvi","xxvii",
-	"xxviii","xxix","xxx","xxxi",
-	
-	"january",
-	"february",
-	"march",
-	"april",
-	"may",
-	"june",
-	"july",
-	"august",
-	"september",
-	"october",
-	"november",
-	"december",
-
-	"jan",
-	"feb",
-	"mar",
-	"apr",
-	"may",
-	"jun",
-	"jul",
-	"aug",
-	"sep",
-	"oct",
-	"nov",
-	"dec",
-
-	"2010",
-	"2011",
-	"2012",
-	"2013",
-	"2014",
-	"2015",
-	
-	"a",
-
-	"over", // fix 'over site' for www.espn.com comeptitor pages
-
-	"am", // 'am so' for voyageofattraction.com
-	"be",
-	"being",
-	"been",
-	"so",
-	"soh",
-
-	"moar",
-	"more",
-	"most",
-	"than",
-	"much",
-
-	"los", // fix 'los dos y com' for www.espn.com comeptitor pages
-	"dos",
-
-	"view", // fix for jezebelgallery.com 'view homepage'
-	"viewed", 
-	"views",
-	"viewing",
-	"homepage",
-	"homepages",
-	"webpage",
-	"webpages",
-	"home",
-	"homed",
-	"homing",
-	"wit", // wtf?
-	"homes",
-	"house",
-	"houses",
-	"housed",
-	"housing",
-	"page",
-	// fix getting 'green web' and 'green pages' for gigablast.com
-	// as two independent queries for a competitor
-	"pages", 
-	// damn, paged is a synonym of pages
-	"paged",
-	"paging",
-	"info",
-	"infos",
-	"informative",
-	"information", // 'the information' for wcnews.com
-	"site",
-	"sites",
-	"sited",
-	"siting",
-
-	"is", // fix 'is website'
-
-	"welcome", // whitehouse.gov fix
-	"online",
-
-	"am", // 'am web' query
-
-	"y", // spanish for "and"
-
-	"at",
-	"be",
-	"by",
-	"on",
-	"or",
-	"do",
-	"doesn't",
-	"in",
-	"into",
-
-	"i",
-	"an",
-	"or",
-	"as",
-	"at",
-	"by",
-	"for",
-	"with",
-	"about",
-	"from",
-
-	"any", // stop 'any web' for diffbot.com
-
-	// german is messing us up so that two queries that should
-	// be basically the same "dos code" and "codes" are not! they
-	// should have the same synbasehash64! fix for cheatcc.com
-	// competitor pages from getting legal sites.
-	// because it matches "dos codes"
-	"dos",
-	"de",
-	"die",
-	"del",
-	"via",
-	"e",
-
-	// spanish. messing up ibm.com competitor pages. 
-	// because it matches "es international"
-	"es",
-
-	// fix newser.com 'more of you' 'know you' 'know more'
-	"you", // "where do you" "you but" "but you"
-	"your",
-	"what",
-	"wat",
-	"where", // "and where you"
-	"who",
-	"when",
-	"what's",
-	"where's",
-	"who's", // 'who's who' for www.fudwatch.co.uk
-	"when's",
-	"which",
-	"wich",
-	"but", // "and but"
-
-	"ver", // fix ver ver related query everyone matches for some reason
-	"click", // click here is so popular!
-	"clicked",
-	"clicks",
-	"clicking",
-	"klick",
-	"klicked"
-	"klicks",
-	"klicking",
-	"here",
-	"per",
-
-	"a",
-	"b",
-	"c",
-	"d",
-	"e",
-	"f",
-	"g",
-	"h",
-	"i",
-	"j",
-	"k",
-	"l",
-	"m",
-	"n",
-	"o",
-	"p",
-	"q",
-	"r",
-	"s",
-	"t",
-	"u",
-	"v",
-	"w",
-	"x",
-	"y",
-	"z",
-
-	"innen", // wtf is this?
-
-
-	// fix matching queries for yahoo.com:
-	"inc",
-	"go",
-	"goes",
-	"going",
-	"gone",
-	"went",
-	"link",
-	"links",
-	"linked",
-	"hyperlinking",
-	"hyperlink",
-	"hyperlinks",
-	"hyperlinked",
-	"hyperlinking",
-	"exit",
-	"ing", // wtf?
-	"ed", // wtf?
-	"om",
-	"por",
-
-	"their",
-	"theirs",
-	"doh", // syn of do!
-	"do",
-	"don't",
-	"doesn't",
-	"did",
-	"does",
-	"done",
-	"do's",
-	"doing",
-	"hame", // wtf?
-	"were",
-	"was",
-	"can",
-	"cans",
-	"canning",
-	"canned",
-	"are",
-	"if",
-	"his",
-	"hers",
-	"him",
-	"her",
-	"fand", // wtf?
-	"s's",
-	"a's",
-	"he",
-	"she",
-	"that",
-	"en", // spanish?
-	"le", // french?
-	"will",
-	"willy",
-
-
-	"www",
-	"w3", // synonym for www
-	"com",
-	"coms", // synonym for com
-	"org",
-	"orgs",
-	"net", // .net
-	"nets",
-	"edu",
-	"gov",
-
-	"no", // fix 'no no' missing term for army-list.com
-	"my", // fix 'my' missing term for army-list.com
-
-	//"no", // 'no http' seems common. because we were ignoring "no"
-	// because it was a query stop word in portuguese!!
-
-	"it", // this hurts I.T. i guess...
-
-	"http",
-	"https",
-	"web",
-	"webs",
-	"below",
-	"site",
-	"website",
-	"sites",
-	"websites",
-	
-	// until we fix it right! this shows up so much
-	"lincoln",
-	"lincolns"
-};
-
-// . used by Msg24.cpp for gigabits generation
-// . h is the full wordid, not 48-bit termid
-// . you can now pass in a 32-bit word hash instead of 64 and it should
-//   still work!!!
-int32_t isCommonQueryWordInEnglish ( int64_t h64 ) {
-
-	// include a bunch of foreign prepositions so they don't get required
-	// by the bitScores in IndexTable.cpp
-	if ( ! s_commonQueryWordsInitialized ) {
-		// set up the hash table
-		int32_t ss = sizeof(s_commonQueryWords);
-		if ( ! s_commonQueryWordTable.set (8,4,ss*2,
-						   NULL,0,false,0,
-						   "commonwrds") ) 
-			return log(LOG_INIT,
-				   "query: Could not init common words "
-				   "table.");
-		// now add in all the stop words
-		int32_t n = (int32_t)sizeof(s_commonQueryWords)/ sizeof(char *); 
-		for ( int32_t i = 0 ; i < n ; i++ ) {
-			char *sw    = s_commonQueryWords[i];
-			int32_t  swlen = gbstrlen ( sw );
-			// use the same algo that Words.cpp computeWordIds does
-			int64_t swh64 = hash64Lower_utf8 ( sw , swlen );
-			if ( ! s_commonQueryWordTable.addTerm ( &swh64,i+1 ) )
-				return false;
-			// if you pass in a 32-bit "h64" from hash32n()
-			// you must make sure it is UNSIGNED so the top
-			// 32 bits of the h64 are not set to 0xffffffff
-			// two's complement
-			swh64 &= 0x00000000ffffffffLL;
-			if ( ! s_commonQueryWordTable.addTerm ( &swh64,i+1 ) )
-				return false;
-			swh64 |= 0xffffffff00000000LL;
-			if ( ! s_commonQueryWordTable.addTerm ( &swh64,i+1 ) )
-				return false;
-			// . add w/o accent marks too!
-			// . skip "f<>r" though because fur is an eng. word
-			//if ( *sw=='f' && *(sw+1)=='<27>' &&
-			//     *(sw+2)=='r' && swlen == 3 ) continue;
-			//swh   = hash64AsciiLower ( sw , swlen );
-			//s_commonQueryWordTable.addTerm (swh,i+1,i+1,true);
-		}
-		s_commonQueryWordsInitialized = true;
-		// sanity test
-		int32_t tid32 = hash32n("on");
-		if ( !isCommonQueryWordInEnglish(tid32)){char *xx=NULL;*xx=0;}
-		tid32 = hash32n("web");
-		if ( !isCommonQueryWordInEnglish(tid32)){char *xx=NULL;*xx=0;}
-	} 
-
-	// . all 1 char letter words are stop words
-	// . good for initials and some contractions
-	//if ( len == 1 && is_alpha_a(*s) ) return true;
-
-	// get from table
-	return (int32_t)s_commonQueryWordTable.getScore ( &h64 );
 }

--- a/StopWords.h
+++ b/StopWords.h
@ -13,27 +13,18 @@ bool isStopWord ( char *s , int32_t len , int64_t h ) ;
 // used by Synonyms.cpp
 bool isStopWord2 ( int64_t *h ) ;

-//just a stub for now
-//bool isStopWord ( UChar *s , int32_t len , int64_t h );
-
-
 // . damn i forgot to include these above
 // . i need these so m_bitScores in IndexTable.cpp doesn't have to require
 //   them! Otherwise, it's like all queries have quotes around them again...
 bool isQueryStopWord ( char *s , int32_t len , int64_t h , int32_t langId ) ;
-//bool isQueryStopWord ( UChar *s , int32_t len , int64_t h ) ;

 // is it a COMMON word?
 int32_t isCommonWord ( int64_t h ) ;

-int32_t isCommonQueryWordInEnglish ( int64_t h ) ;
-
 bool initWordTable(class HashTableX *table, char* words[], 
 		   //int32_t size ,
 		   char *label);

-bool isVerb ( int64_t *hp ) ;
-
 // for Process.cpp::resetAll() to call when exiting to free all mem
 void resetStopWordTables();

--- a/Summary.cpp
+++ b/Summary.cpp
@ -661,7 +661,6 @@ int64_t Summary::getBestWindow ( Matches *matches, int32_t mm, int32_t *lasta,
 	}

 	// . we NULLify the section ptrs if we already used the word in another summary.
-	// . google seems to index SEC_MARQUEE, so i took that out of here
 	int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_IN_TITLE;
 	if ( (bb[matchWordNum] & D_USED) || ( sp && (sp[matchWordNum]->m_flags & badFlags) ) ) {
 		// assume no best window
@ -1059,7 +1058,6 @@ bool Summary::getDefaultSummary ( Xml *xml, Words *words, Sections *sections, Po
 	int32_t bestEnd = -1;
 	int32_t longestConsecutive = 0;
 	int32_t lastAlnum = -1;
-	// google seems to index SEC_MARQUEE, so i took that out of here
 	int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_IN_TITLE;
 	// shortcut
 	nodeid_t  *tids = words->m_tagIds;
--- a/Title.h
+++ b/Title.h
@ -26,12 +26,10 @@ public:

 	// . set m_title to the title of the document represented by "xd"
 	// . if getHardTitle is true will always use the title in the <title>
-	//   tag, but if that is not present, will try dmoz titles before
-	//   resorting to trying to guess a title from the document content
-	//   or incoming link text.
+	//   tag, but if that is not present, will resort to trying to guess 
+	//   a title from the document content or incoming link text.
 	// . uses the following:
 	// .   title tag
-	// .   dmoz title
 	// .   meta title tag
 	// .   incoming link text
 	// .   <hX> tags at the top of the scored content
--- a/Words.cpp
+++ b/Words.cpp
@ -1,7 +1,6 @@
 #include "gb-include.h"

 #include "Words.h"
-#include "Phrases.h" // for isInPhrase() for hashWordIffNotInPhrase
 #include "Unicode.h" // getUtf8CharSize()
 #include "StopWords.h"
 #include "Speller.h"
@ -108,7 +107,9 @@ int32_t countWords ( char *p ) {
 bool Words::set( Xml *xml, bool computeWordIds, int32_t niceness, int32_t node1, int32_t node2 ) {
 	// prevent setting with the same string
 	if ( m_xml == xml ) { char *xx=NULL;*xx=0; }
+
 	reset();
+
 	m_xml = xml;

 	// if xml is empty, bail
@ -171,12 +172,6 @@ bool Words::set( Xml *xml, bool computeWordIds, int32_t niceness, int32_t node1,
 			m_tagIds[m_numWords] |= BACKBIT;
 		}

-		//log(LOG_DEBUG, "Words: Word %"INT32": got tag %s%s (%d)", 
-		//    m_numWords,
-		//    isBackTag(m_numWords)?"/":"",
-		//    g_nodes[getTagId(m_numWords)].m_nodeName,
-		//    getTagId(m_numWords));
-		
 		m_numWords++;

 		// used by XmlDoc.cpp
@ -188,41 +183,6 @@ bool Words::set( Xml *xml, bool computeWordIds, int32_t niceness, int32_t node1,
 	return true;
 }

-bool Words::set11 ( char *s , char *send , int32_t niceness ) {
-	reset();
-
-	// this will make addWords() scan for tags
-	m_hasTags = true;
-
-	// save it
-	char saved = *send;
-
-	// null term
-	*send = '\0';
-
-	// determine rough upper bound on number of words by counting
-	// punct/alnum boundaries
-	m_preCount = countWords ( s );
-
-	// true = tagIds
-	bool status = allocateWordBuffers(m_preCount,true);
-
-	// deal with error now
-	if ( !status ) {
-		*send = saved;
-		return false;
-	}
-
-	// and set the words
-	status = addWords(s,0x7fffffff, true, niceness );
-
-	// bring it back
-	*send = saved;
-
-	// return error?
-	return status;
-}
-
 // . set words from a string
 // . assume no HTML entities in the string "s"
 // . s must be NULL terminated
@ -249,10 +209,7 @@ bool Words::set( char *s, bool computeWordIds, int32_t niceness ) {
 bool Words::addWords( char *s, int32_t nodeLen, bool computeWordIds, int32_t niceness ) {
 	int32_t  i = 0;
 	int32_t  j;
-	//int32_t  k = 0;
 	int32_t  wlen;
-	//uint32_t e;
-	//int32_t  skip;
 	int32_t badCount = 0;

 	bool hadApostrophe = false;
@ -453,21 +410,11 @@ bool Words::addWords( char *s, int32_t nodeLen, bool computeWordIds, int32_t nic
 	m_words   [ m_numWords  ] = &s[j];
 	m_wordLens[ m_numWords  ] = wlen;

-	// . Lars says it's better to leave the accented chars intact
-	// . google agrees
-	// . but what about "re'sume"?
 	if ( computeWordIds ) {
 		int64_t h = hash64Lower_utf8(&s[j],wlen);
 		m_wordIds [m_numWords] = h;
-
-		// until we get an accent removal algo, comment this
-		// out and possibly use the query synonym pipeline
-		// to search without accents. MDW
-		//int64_t h2 = hash64AsciiLowerE(&s[j],wlen);
-		//if ( h2 != h ) m_stripWordIds [m_numWords] = h2;
-		//else           m_stripWordIds [m_numWords] = 0LL;
-		//m_stripWordIds[m_numWords] = 0;
 	}
+
 	m_nodes[m_numWords] = 0;
 	if (m_tagIds) m_tagIds[m_numWords] = 0;
 	m_numWords++;
@ -658,7 +605,6 @@ int32_t Words::getLanguage( Sections *sections ,
 		return -1;
 
 	// . avoid words in these bad sections
-	// . google seems to index SEC_MARQUEE so i took that out of badFlags
 	int32_t badFlags = SEC_SCRIPT|SEC_STYLE|SEC_SELECT;
 	// shortcuts
 	int64_t *wids  = m_wordIds;
@ -798,34 +744,6 @@ int32_t Words::getLanguage( Sections *sections ,
 	return l;
 }

-// get the word index at the given character position 
-int32_t Words::getWordAt ( char *p ) { // int32_t charPos ) {
-	if ( ! p                  ) { char *xx=NULL;*xx=0; }
-	if ( p <  m_words[0]      ) { char *xx=NULL;*xx=0; }
-	if ( p >= getContentEnd() ) { char *xx=NULL;*xx=0; }
-	
-	int32_t step = m_numWords / 2;
-	int32_t i = m_numWords / 2 ;
-
-	for (;;) {
-		// divide it by 2 each time
-		step >>= 1;
-		// always at least one
-		if ( step <= 0 )
-			step = 1;
-		// is it a hit?
-		if ( p >= m_words[i] && p < m_words[i] + m_wordLens[i] )
-			return i;
-		// compare
-		if ( m_words[i] < p )
-			i += step;
-		else
-			i -= step;
-	}
-	return -1;
-}
-
-
 // . return the value of the specified "field" within this html tag, "s"
 // . the case of "field" does not matter
 char *getFieldValue ( char *s , 
--- a/Words.h
+++ b/Words.h
@ -56,8 +56,6 @@ class Words {
 	// . html tags are NOT parsed out
 	bool set( char *s, bool computeIds, int32_t niceness );

-	bool set11 ( char *s , char *send , int32_t niceness ) ;
-
 	// . similar to above
 	// . but we temporarily stick a \0 @ s[slen] for parsing purposes
 	bool set( char *s, int32_t slen, bool computeIds, int32_t niceness = 0 );
@ -112,8 +110,6 @@ class Words {
 		return size;
 	}

-	int32_t getWordAt ( char *charPos );
-
 	// . CAUTION: don't call this for punct "words"... it's bogus for them
 	// . this is only for alnum "words"
 	int64_t getWordId( int32_t n ) const {
@ -121,16 +117,11 @@ class Words {
 	}

 	bool isStopWord ( int32_t n ) {
-		return ::isStopWord(m_words   [n],
-				    m_wordLens[n],
-				    m_wordIds [n]);
+		return ::isStopWord( m_words[n], m_wordLens[n], m_wordIds[n] );
 	}

 	bool isQueryStopWord ( int32_t n , int32_t langId ) {
-		return ::isQueryStopWord(m_words   [n],
-					 m_wordLens[n],
-					 m_wordIds [n],
-					 langId);
+		return ::isQueryStopWord( m_words[n], m_wordLens[n], m_wordIds[n], langId );
 	}


@ -180,13 +171,7 @@ class Words {
 		return false; 
 	}

-	bool      isSpaces        ( int32_t n ) {
-		for ( int32_t i = 0 ; i < m_wordLens[n] ; i++ )
-			if ( ! is_wspace_utf8(&m_words[n][i]) ) return false;
-		return true;
-	}
-
-	bool      isSpaces2       ( int32_t n , int32_t starti ) {
+	bool      isSpaces       ( int32_t n , int32_t starti = 0 ) {
 		for ( int32_t i = starti ; i < m_wordLens[n] ; i++ )
 			if ( ! is_wspace_utf8(&m_words[n][i]) ) return false;
 		return true;
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
--- a/XmlDoc.h
+++ b/XmlDoc.h
@ -18,38 +18,30 @@
 #ifndef _XMLDOC_H_
 #define _XMLDOC_H_

-//#include "HashTableX.h"
 #include "Lang.h"
 #include "Words.h"
 #include "Bits.h"
 #include "Pos.h"
 #include "Phrases.h"
-//#include "Synonyms.h"
-//#include "Weights.h"
 #include "Xml.h"
 #include "SafeBuf.h"
 #include "Images.h"
 #include "Sections.h"
 #include "Msge0.h"
 #include "Msge1.h"
-//#include "Msge2.h"
 #include "Msg4.h"

 #include "SearchInput.h"
 #include "Msg40.h"
-//#include "IndexList.h"
 #include "Msg0.h"
 #include "Msg22.h"
 #include "Tagdb.h"
 #include "Url.h"
 #include "Linkdb.h"
-//#include "LinkInfo.h"
-//#include "Msg25.h"
 #include "MsgC.h"
 #include "Msg13.h"
 #include "RdbList.h"
 #include "SiteGetter.h"
-//#include "CollectionRec.h"
 #include "Msg20.h"
 #include "Matches.h"
 #include "Query.h"
@ -62,24 +54,15 @@
 #include "PingServer.h"
 #include "Json.h"

-//#define XMLDOC_MAX_AD_IDS 4
-//#define XMLDOC_ADLEN      64
-
 #define MAXFRAGWORDS 80000

-#define MAX_WIKI_DOCIDS 20
-
 #define MAX_TAG_PAIR_HASHES 100

 #include "Msg40.h"
-//#define SAMPLE_VECTOR_SIZE (32*4)

 #define POST_VECTOR_SIZE   (32*4)

 #define XD_GQ_MAX_SIZE        1000
-#define XD_MAX_GIGABIT_HASHES 48
-
-#define XD_MAX_AD_IDS         5

 #define MAX_LINK_TEXT_LEN     512
 #define MAX_SURROUNDING_TEXT_WIDTH 600
@ -280,11 +263,11 @@ public:
 	char      *ptr_firstUrl;
 	char      *ptr_redirUrl;
 	char      *ptr_rootTitleBuf;
-	int32_t      *ptr_gigabitHashes;
-	int32_t      *ptr_gigabitScores;
+	int32_t      *ptr_unused12;
+	int32_t      *ptr_unused13;
 	void      *ptr_unused8;
-	int64_t *ptr_wikiDocIds;
-	rscore_t  *ptr_wikiScores;
+	int64_t *ptr_unused10;
+	rscore_t  *ptr_unused11;
 	char      *ptr_imageData;
 	int32_t      *ptr_unused6;
 	int32_t      *ptr_unused7;
@ -305,11 +288,11 @@ public:
 	int32_t       size_firstUrl;
 	int32_t       size_redirUrl;
 	int32_t       size_rootTitleBuf;
-	int32_t       size_gigabitHashes;
-	int32_t       size_gigabitScores;
+	int32_t       size_unused12;
+	int32_t       size_unused13;
 	int32_t       size_unused8;
-	int32_t       size_wikiDocIds;
-	int32_t       size_wikiScores;
+	int32_t       size_unused10;
+	int32_t       size_unused11;
 	int32_t       size_imageData;
 	int32_t       size_unused6;
 	int32_t       size_unused7;
@ -404,9 +387,6 @@ public:
 	SafeBuf m_spiderStatusDocMetaList;
 	char *getIsAdult ( ) ;

-	int64_t **getWikiDocIds ( ) ;
-	void gotWikiResults ( class UdpSlot *slot );
-	//class HashTableX *getClockCandidatesTable();
 	int32_t getOutlinkAge ( int32_t outlinkNum ) ;
 	char *getIsPermalink ( ) ;
 	char *getIsUrlPermalinkFormat ( ) ;
@ -421,19 +401,7 @@ public:
 	class Bits *getBitsForSummary ( ) ;
 	class Pos *getPos ( );
 	class Phrases *getPhrases ( ) ;
-	//class Synonyms *getSynonyms ( );
-	class Sections *getExplicitSections ( ) ;
-	class Sections *getImpliedSections ( ) ;
 	class Sections *getSections ( ) ;
-	class Sections *getSectionsWithDupStats ( );
-//BR 20160106 removed:	class SafeBuf  *getInlineSectionVotingBuf();
-	bool gotSectionFacets( class Multicast *mcast );
-	class SectionStats *getSectionStats ( uint32_t secHash32, uint32_t sentHash32, bool cacheOnly );
-	class SectionVotingTable *getOldSectionVotingTable();
-	class SectionVotingTable *getNewSectionVotingTable();
-	char **getSectionsReply ( ) ;
-	char **getSectionsVotes ( ) ;
-	HashTableX *getSectionVotingTable();
 	int32_t *getLinkSiteHashes ( );
 	class Links *getLinks ( bool doQuickSet = false ) ;
 	class HashTableX *getCountTable ( ) ;
@ -442,36 +410,21 @@ public:
 	int32_t *getSummaryVector ( ) ;
 	int32_t *getPageSampleVector ( ) ;
 	int32_t *getPostLinkTextVector ( int32_t linkNode ) ;
-	int32_t computeVector ( class Sections *sections, class Words *words, 
-			     uint32_t *vec , int32_t start = 0 , int32_t end = -1 );
+	int32_t computeVector ( class Words *words, uint32_t *vec , int32_t start = 0 , int32_t end = -1 );
 	float *getTagSimilarity ( class XmlDoc *xd2 ) ;
-	float *getGigabitSimilarity ( class XmlDoc *xd2 ) ;
 	float *getPageSimilarity ( class XmlDoc *xd2 ) ;
 	float *getPercentChanged ( );
 	uint64_t *getFuzzyDupHash ( );
 	int64_t *getExactContentHash64();
-	int64_t *getLooseContentHash64();
 	class RdbList *getDupList ( ) ;
 	class RdbList *getLikedbListForReq ( );
 	class RdbList *getLikedbListForIndexing ( );
 	char *getIsDup ( ) ;
-	char *isDupOfUs ( int64_t d ) ;
-	uint32_t *getGigabitVectorScorelessHash ( ) ;
-	int32_t **getGigabitHashes ( );
-	char *getGigabitQuery ( ) ;
 	char *getMetaDescription( int32_t *mdlen ) ;
 	char *getMetaSummary ( int32_t *mslen ) ;
 	char *getMetaKeywords( int32_t *mklen ) ;
 	char *getMetaGeoPlacename( int32_t *mgplen );
 	
-	bool addGigabits ( char *s , int64_t docId , uint8_t langId ) ;
-	bool addGigabits2 ( char *s,int32_t slen,int64_t docId,uint8_t langId);
-	bool addGigabits ( class Words *ww , 
-			   int64_t docId,
-			   class Sections *sections,
-			   //class Weights  *we ,
-			   uint8_t langId );
-
 	int32_t *getSiteSpiderQuota ( ) ;
 	class Url *getCurrentUrl ( ) ;
 	class Url *getFirstUrl() ;
@ -626,10 +579,6 @@ public:

 	char *addOutlinkSpiderRecsToMetaList ( );

-	//bool addTable96 ( class HashTableX *tt1     , 
-	//		  int32_t       date1   ,
-	//		  bool       nosplit ) ;
-
 	int32_t getSiteRank ();
 	bool addTable144 ( class HashTableX *tt1 , 
 			   int64_t docId ,
@ -637,11 +586,6 @@ public:

 	bool addTable224 ( HashTableX *tt1 ) ;

-	//bool addTableDate ( class HashTableX *tt1     , //T<key128_t,char> *tt1
-	//                           uint64_t    docId   ,
-	//                           uint8_t     rdbId   ,
-	//                           bool        nosplit ) ;
-
 	bool addTable128 ( class HashTableX *tt1     , // T <key128_t,char>*tt1
                           uint8_t     rdbId   ,
 			   bool        forDelete ) ;
@ -662,10 +606,7 @@ public:
 	bool hashUrl ( class HashTableX *table, bool urlOnly );
 	bool hashDateNumbers ( class HashTableX *tt );
 	bool hashSections ( class HashTableX *table ) ;
-	bool hashIncomingLinkText ( class HashTableX *table            ,
-				    bool       hashAnomalies    ,
-                                    bool       hashNonAnomalies ) ;
-
+	bool hashIncomingLinkText( class HashTableX *table, bool hashAnomalies, bool hashNonAnomalies );
 	bool hashLinksForLinkdb ( class HashTableX *table ) ;
 	bool hashNeighborhoods ( class HashTableX *table ) ;
 	bool hashRSSInfo ( class HashTableX *table ) ;
@ -683,11 +624,8 @@ public:
 	bool hashTagRec ( class HashTableX *table ) ;
 	bool hashPermalink ( class HashTableX *table ) ;
 	bool hashVectors(class HashTableX *table ) ;
-// BR 20160106 removed:	bool hashAds(class HashTableX *table ) ;
 	
 	class Url *getBaseUrl ( ) ;
-// BR 20160106 removed:	bool hashSubmitUrls ( class HashTableX *table ) ;
-// BR 20160106 removed:	bool hashImageStuff ( class HashTableX *table ) ;
 	bool hashIsAdult    ( class HashTableX *table ) ;

 	void set20 ( Msg20Request *req ) ;
@ -700,8 +638,6 @@ public:
 	class Title *getTitle ();
 	class Summary *getSummary () ;
 	char *getHighlightedSummary ();
-	SafeBuf *getSampleForGigabits ( ) ;
-	SafeBuf *getSampleForGigabitsJSON ( ) ;
 	char *getIsNoArchive ( ) ;
 	int32_t *getUrlFilterNum();
 	char *getIsLinkSpam ( ) ;
@ -709,64 +645,21 @@ public:
 	char *getIsErrorPage ( ) ;
 	char* matchErrorMsg(char* p, char* pend );

-	bool hashWords  ( //int32_t            wordStart ,
-			  //int32_t            wordEnd   ,
-			  class HashInfo *hi        ) ;
-	bool hashSingleTerm ( int64_t       termId , 
-			      class HashInfo *hi     ) ;
-	bool hashSingleTerm ( char            *s    ,
-			      int32_t             slen ,
-			      class HashInfo  *hi   );
-	bool hashString ( class HashTableX *ht   ,
-			  //class Weights    *we   ,
-			  class Bits       *bits ,
-			  char             *s    ,
-			  int32_t              slen ) ;
-	bool hashString ( char             *s    ,
-			  int32_t              slen ,
-			  class HashInfo   *hi   ) ;
-	bool hashString ( char             *s    ,
-			  class HashInfo   *hi   ) ;
+	bool hashWords( class HashInfo *hi );
+	bool hashSingleTerm( int64_t termId, class HashInfo *hi );
+	bool hashSingleTerm( char *s, int32_t slen, class HashInfo *hi );
+	bool hashString( class HashTableX *ht, class Bits *bits, char *s, int32_t slen );
+	bool hashString( char *s, int32_t slen, class HashInfo *hi );
+	bool hashString( char *s, class HashInfo *hi );

+	bool hashWords3( class HashInfo *hi, class Words *words, class Phrases *phrases, class Synonyms *synonyms,
+					 class Sections *sections, class HashTableX *countTable, char *fragVec, char *wordSpamVec,
+					 char *langVec, char docLangId, class SafeBuf *pbuf, class HashTableX *wts,
+					 class SafeBuf *wbuf, int32_t niceness );

-
-	bool hashWords3 ( //int32_t              wordStart     ,
-			  //int32_t              wordEnd       ,
-			  class HashInfo   *hi            ,
-			  class Words      *words         , 
-			  class Phrases    *phrases       , 
-			  class Synonyms   *synonyms      , 
-			  class Sections   *sections      ,
-			  class HashTableX *countTable    ,
-			  char *fragVec ,
-			  char *wordSpamVec ,
-			  char *langVec ,
-			  char  docLangId , // default lang id
-			  class SafeBuf    *pbuf          ,
-			  class HashTableX *wts           ,
-			  class SafeBuf    *wbuf          ,
-			  int32_t              niceness      );
-	
-	bool hashString3 ( char             *s              ,
-			  int32_t              slen           ,
-			  class HashInfo   *hi             ,
-			  class HashTableX *countTable     ,
-			  class SafeBuf    *pbuf           ,
-			  class HashTableX *wts            ,
-			  class SafeBuf    *wbuf           ,
-			  int32_t              version        ,
-			  int32_t              siteNumInlinks ,
-			  int32_t              niceness       );
-
-
-	//bool hashSectionTerm ( char *term , 
-	//		       class HashInfo *hi , 
-	//		       int32_t sentHash32 ) ;
-
-	bool hashFacet1 ( char *term, class Words *words , HashTableX *dt) ;
-
-	bool hashFacet2 ( char *prefix,char *term,int32_t val32, HashTableX *dt,
-			  bool shardByTermId = false ) ;
+	bool hashString3( char *s, int32_t slen, class HashInfo *hi, class HashTableX *countTable,
+					  class SafeBuf *pbuf, class HashTableX *wts, class SafeBuf *wbuf, int32_t version,
+					  int32_t siteNumInlinks, int32_t niceness );

 	// gbfieldmatch:
 	bool hashFieldMatchTerm ( char *val, int32_t vlen, class HashInfo *hi);
@ -788,8 +681,6 @@ public:
 					FacetValHash_t fvh ) ;
 	bool storeFacetValuesSite     ( char *qs , SafeBuf *sb , 
 					FacetValHash_t fvh );
-	bool storeFacetValuesSections ( char *qs , class SafeBuf *sb ,
-					FacetValHash_t fvh ) ;
 	bool storeFacetValuesHtml     ( char *qs , class SafeBuf *sb ,
 					FacetValHash_t fvh ) ;
 	bool storeFacetValuesXml      ( char *qs , class SafeBuf *sb ,
@ -819,16 +710,12 @@ public:
 public:

 	// stuff set from the key of the titleRec, above the compression area
-	//key_t     m_key;
 	int64_t m_docId;

 	char     *m_ubuf;
 	int32_t      m_ubufSize;
 	int32_t      m_ubufAlloc;

-	// does this page link to gigablast, or has a search form to it?
-	//bool searchboxToGigablast();
-
 	// private:

 	// we we started spidering it, in milliseconds since the epoch
@ -843,16 +730,6 @@ public:
 	int64_t    m_setTime;
 	int64_t    m_cpuSummaryStartTime;

-	// timers
-	int64_t m_beginSEOTime;
-	int64_t m_beginTimeAllMatch;
-	int64_t m_beginTimeMatchUrl;
-	int64_t m_beginTimeFullQueries;
-	int64_t m_beginTimeLinks;
-	//int64_t m_beginMsg98s;
-	int64_t m_beginRelatedQueries;
-	int64_t m_beginMsg95s;
-
 	// . these should all be set using set*() function calls so their
 	//   individual validity flags can bet set to true, and successive
 	//   calls to their corresponding get*() functions will not core
@ -873,8 +750,6 @@ public:
 	int64_t  m_firstUrlHash64;
 	Url        m_currentUrl;

-	//char      *m_coll;
-	//char       m_collBuf[MAX_COLL_LEN+1]; // include \0
 	CollectionRec *m_lastcr;
 	collnum_t      m_collnum;
 	int32_t           m_lastCollRecResetCount;
@ -908,91 +783,24 @@ public:
 	Bits       m_bits2;
 	Pos        m_pos;
 	Phrases    m_phrases;
-	//Synonyms   m_synonyms;
 	SafeBuf    m_synBuf;
-	//Weights    m_weights;
 	Sections   m_sections;

-	// a hack storage thing used by Msg13.cpp
-	class Msg13Request *m_hsr;
-
-	Section *m_si;
-	//Section *m_nextSection;
-	//Section *m_lastSection;
-	int32_t m_mcastRequestsOut;
-	int32_t m_mcastRequestsIn;
-	int32_t m_secStatsErrno;
-	char *m_queryBuf;
-	Msg39Request *m_msg39RequestArray;
-	SafeBuf m_mcastBuf;
-	Multicast *m_mcastArray;
-	//char  *m_inUse;
-	//Query *m_queryArray;
-	//Query *m_sharedQuery;
-	bool     m_gotDupStats;
-	//Query    m_q4;
-	//Msg3a    m_msg3a;
-	//Msg39Request m_r39;
-	Msg39Request m_mr2;
-	SectionStats m_sectionStats;
-	HashTableX m_sectionStatsTable;
-	//char m_sectionHashQueryBuf[128];
-
-	// also set in getSections()
-	int32_t       m_maxVotesForDup;
-
 	// . for rebuild logging of what's changed
 	// . Repair.cpp sets these based on titlerec
 	char m_logLangId;
 	int32_t m_logSiteNumInlinks;

-	SectionVotingTable m_nsvt;
-
-	SectionVotingTable m_osvt;
-	int32_t m_numSectiondbReads;
-	int32_t m_numSectiondbNeeds;
-	key128_t m_sectiondbStartKey;
-	RdbList m_secdbList;
-	int32_t m_sectiondbRecall;
-
 	bool m_gotFacets;
 	SafeBuf m_tmpBuf2;

-	SafeBuf m_inlineSectionVotingBuf;
-
-	//HashTableX m_rvt;
-	//Msg17 m_msg17;
-	//char *m_cachedRootVoteRec;
-	//int32_t  m_cachedRootVoteRecSize;
-	//bool  m_triedVoteCache;
-	//bool  m_storedVoteCache;
-	//SafeBuf m_cacheRecBuf;
-
 	SafeBuf m_timeAxisUrl;

-	HashTableX m_turkVotingTable;
-	HashTableX m_turkBitsTable;
-	uint32_t m_confirmedTitleContentHash ;
-	uint32_t m_confirmedTitleTagHash     ;
-
-	// turk voting tag rec
-	TagRec m_vtr;
-	// tagrec of banned turks
-	TagRec m_bannedTurkRec;
-	// and the table of the hashed banned turk users
-	HashTableX m_turkBanTable;
-
-	// used for displaying turk votes...
-	HashTableX m_vctab;
-	HashTableX m_vcduptab;
-
 	Images     m_images;
 	HashTableX m_countTable;
 	HttpMime   m_mime;
 	TagRec     m_tagRec;
 	SafeBuf    m_tagRecBuf;
-	// copy of m_oldTagRec but with our modifications, if any
-	//TagRec     m_newTagRec;
 	SafeBuf    m_newTagBuf;
 	SafeBuf    m_fragBuf;
 	SafeBuf    m_wordSpamBuf;
@ -1002,9 +810,6 @@ public:
 	class SafeBuf     *m_savedSb;
 	class HttpRequest *m_savedHr;

-	char m_savedChar;
-
-
 	// validity flags. on reset() all these are set to false.
 	char     m_VALIDSTART;
 	// DO NOT add validity flags above this line!
@ -1013,7 +818,6 @@ public:
 	char     m_addedSpiderReplySizeValid;
 	char     m_addedStatusDocSizeValid;
 	char     m_downloadStartTimeValid;
-	//char   m_docQualityValid;
 	char     m_siteValid;
 	char     m_startTimeValid;
 	char     m_currentUrlValid;
@ -1025,7 +829,6 @@ public:
 	char     m_lastUrlValid;
 	char     m_docIdValid;
 	char     m_availDocIdValid;
-	//char     m_collValid;
 	char     m_tagRecValid;
 	char     m_robotsTxtLenValid;
 	char     m_tagRecDataValid;
@ -1034,7 +837,6 @@ public:
 	char     m_filteredRootTitleBufValid;
 	char     m_titleBufValid;
 	char     m_fragBufValid;
-	char     m_inlineSectionVotingBufValid;
 	char     m_wordSpamBufValid;
 	char     m_finalSummaryBufValid;
 	char     m_matchingQueryBufValid;
@ -1042,32 +844,24 @@ public:
 	char     m_relatedQueryBufValid;
 	char     m_queryLinkBufValid;
 	char     m_redirSpiderRequestValid;
-	//char     m_queryPtrsValid;
 	char     m_queryOffsetsValid;
-	//char     m_queryPtrsSortedValid;
 	char     m_queryPtrsWholeValid;
 	char     m_relatedDocIdBufValid;
 	char     m_topMatchingQueryBufValid;
 	char     m_relatedDocIdsScoredBufValid;
 	char     m_relatedDocIdsWithTitlesValid;
 	char     m_relatedTitleBufValid;
-	//char     m_queryLinkBufValid;
 	char     m_missingTermBufValid;
 	char     m_matchingTermBufValid;
-	//char     m_relPtrsValid;
 	char     m_sortedPosdbListBufValid;
 	char     m_wpSortedPosdbListBufValid;
 	char     m_termListBufValid;
 	char     m_insertableTermsBufValid;
 	char     m_scoredInsertableTermsBufValid;
-	//char     m_iwfiBufValid; // for holding WordFreqInfo instances
 	char     m_wordPosInfoBufValid;
 	char     m_recommendedLinksBufValid;

-	//char     m_queryHashTableValid;
 	char     m_queryOffsetTableValid;
-	//char     m_socketWriteBufValid;
-	//char     m_numBannedOutlinksValid;
 	char     m_hopCountValid;
 	char     m_isInjectingValid;
 	char     m_isImportingValid;
@ -1091,35 +885,19 @@ public:
 	char     m_posValid;
 	char     m_isUrlBadYearValid;
 	char     m_phrasesValid;
-	//char     m_synonymsValid;
-	//char     m_weightsValid;
 	char     m_sectionsValid;
 	char     m_subSentsValid;
-	char     m_osvtValid;
-	char     m_nsvtValid;
-	//char   m_rvtValid;
-	char     m_turkVotingTableValid;
-	char     m_turkBitsTableValid;
-	char     m_turkBanTableValid;
-	char     m_vctabValid;
-	char     m_explicitSectionsValid;
-	char     m_impliedSectionsValid;
-	char     m_sectionVotingTableValid;
+
 	char     m_imageDataValid;
 	char     m_imagesValid;
 	char     m_msge0Valid;
 	char     m_msge1Valid;
-	//char     m_msge2Valid;
-	//char   m_sampleVectorValid;
-	char     m_gigabitHashesValid;
-	//char     m_oldsrValid;
 	char     m_sreqValid;
 	char     m_srepValid;

 	bool m_ipValid;
 	bool m_firstIpValid;
 	bool m_spideredTimeValid;
-	//bool m_nextSpiderTimeValid;
 	bool m_indexedTimeValid;
 	bool m_firstIndexedValid;
 	bool m_isInIndexValid;
@ -1127,26 +905,16 @@ public:
 	bool m_outlinksAddedDateValid;
 	bool m_countryIdValid;
 	bool m_bodyStartPosValid;
-	/*
-	bool m_titleWeightValid;
-	bool m_headerWeightValid;
-	bool m_urlPathWeightValid;
-	bool m_externalLinkTextWeightValid;
-	bool m_internalLinkTextWeightValid;
-	bool m_conceptWeightValid;
-	*/
+
 	bool m_httpStatusValid;
 	bool m_crawlDelayValid;
 	bool m_finalCrawlDelayValid;
 	bool m_titleRecKeyValid;
-	bool m_wikiDocIdsValid;
 	bool m_versionValid;
 	bool m_rawUtf8ContentValid;
 	bool m_expandedUtf8ContentValid;
 	bool m_utf8ContentValid;
 	bool m_isAllowedValid;
-	//bool m_tryAgainTimeDeltaValid;
-	//bool m_eliminateMenusValid;
 	bool m_redirUrlValid;
 	bool m_redirCookieBufValid;
 	bool m_metaRedirUrlValid;
@ -1163,11 +931,9 @@ public:
 	bool m_redirErrorValid;
 	bool m_domHash32Valid;
 	bool m_contentHash32Valid;
-	//bool m_tagHash32Valid;
 	bool m_tagPairHash32Valid;

 	bool m_spiderLinksValid;
-	//bool m_nextSpiderPriorityValid;
 	bool m_firstIndexedDateValid;
 	bool m_isPermalinkValid;

@ -1186,8 +952,6 @@ public:
 	bool m_dupListValid;
 	bool m_likedbListValid;
 	bool m_isDupValid;
-	bool m_gigabitVectorHashValid;
-	bool m_gigabitQueryValid;
 	bool m_metaDescValid;
 	bool m_metaSummaryValid;
 	bool m_metaKeywordsValid;
@ -1196,23 +960,16 @@ public:
 	bool m_oldDocValid;
 	bool m_extraDocValid;
 	bool m_rootDocValid;
-	//bool m_gatewayDocValid;
 	bool m_oldMetaListValid;
 	bool m_oldTitleRecValid;
 	bool m_rootTitleRecValid;
 	bool m_isIndexedValid;
 	bool m_siteNumInlinksValid;
-	//bool m_siteNumInlinksUniqueIpValid;//FreshValid;
-	//bool m_siteNumInlinksUniqueCBlockValid;//sitePopValid
-	//bool m_siteNumInlinksTotalValid;
 	bool m_siteNumInlinks8Valid;
 	bool m_siteLinkInfoValid;
 	bool m_isWWWDupValid;
 	bool m_linkInfo1Valid;
 	bool m_linkSiteHashesValid;
-	bool m_sectionsReplyValid;
-	bool m_sectionsVotesValid;
-	bool m_sectiondbDataValid;
 	bool m_placedbDataValid;
 	bool m_siteHash64Valid;
 	bool m_siteHash32Valid;
@ -1228,7 +985,6 @@ public:
 	bool m_isSiteRootValid;
 	bool m_wasContentInjectedValid;
 	bool m_outlinkHopCountVectorValid;
-	//bool m_isSpamValid;
 	bool m_isFilteredValid;
 	bool m_urlFilterNumValid;
 	bool m_numOutlinksAddedValid;
@ -1245,7 +1001,6 @@ public:
 	bool m_titleValid;
 	bool m_htbValid;
 	bool m_collnumValid;
-	//bool m_twidsValid;
 	bool m_termId32BufValid;
 	bool m_termInfoBufValid;
 	bool m_newTermInfoBufValid;
@ -1254,9 +1009,6 @@ public:
 	bool m_spiderStatusDocMetaListValid;
 	bool m_isCompromisedValid;
 	bool m_isNoArchiveValid;
-	//bool m_isVisibleValid;
-	//bool m_clockCandidatesTableValid;
-	//bool m_clockCandidatesDataValid;
 	bool m_titleRecBufValid;
 	bool m_isLinkSpamValid;
 	bool m_isErrorPageValid;
@ -1280,19 +1032,9 @@ public:
 	// DO NOT add validity flags below this line!
 	char     m_VALIDEND;

-	// more stuff
-	//char *m_utf8Content;
-	//int32_t m_utf8ContentLen;

-	// use this stuff for getting wiki docids that match our doc's gigabits
-	//Query m_wq; 
-	//SearchInput m_si;
-	//Msg40 m_msg40;
 	bool m_printedMenu;
-	//HashTableX m_clockCandidatesTable;
-	//SafeBuf m_cctbuf;
 	int32_t m_urlPubDate;
-	//int32_t m_urlAge;
 	char m_isUrlPermalinkFormat;
 	uint8_t m_summaryLangId;
 	int32_t m_tagPairHashVec[MAX_TAG_PAIR_HASHES];
@ -1306,7 +1048,6 @@ public:
 	int32_t m_postVec[POST_VECTOR_SIZE/4];
 	int32_t m_postVecSize;
 	float m_tagSimilarity;
-	float m_gigabitSimilarity;
 	float m_pageSimilarity;
 	float m_percentChanged;
 	bool  m_unchanged;
@ -1330,17 +1071,6 @@ public:
 	Msg22 m_msg22d;
 	Msg22 m_msg22e;
 	Msg22 m_msg22f;
-	//int32_t m_collLen;
-	uint32_t m_gigabitVectorHash;
-	char m_gigabitQuery [XD_GQ_MAX_SIZE];
-	int32_t m_gigabitHashes [XD_MAX_GIGABIT_HASHES];
-	int32_t m_gigabitScores [XD_MAX_GIGABIT_HASHES];
-	char *m_gigabitPtrs  [XD_MAX_GIGABIT_HASHES];
-	// for debug printing really
-	class GigabitInfo *m_top[100];
-	int32_t               m_numTop;
-	//char  m_metaDesc[1025];
-	//char  m_metaKeywords[1025];
 	// these now reference directly into the html src so our 
 	// WordPosInfo::m_wordPtr algo works in seo.cpp
 	char *m_metaDesc;
@ -1355,11 +1085,9 @@ public:
 	
 	
 	int32_t  m_siteSpiderQuota;
-	//int32_t m_numBannedOutlinks;
 	class XmlDoc *m_oldDoc;
 	class XmlDoc *m_extraDoc;
 	class XmlDoc *m_rootDoc;
-	//class XmlDoc *m_gatewayDoc;
 	RdbList m_oldMetaList;
 	char   *m_oldTitleRec;
 	int32_t    m_oldTitleRecSize;
@ -1377,10 +1105,7 @@ public:
 	int32_t    m_tagdbCollLen;

 	Url   m_extraUrl;
-	//int32_t m_siteNumInlinksFresh;
-	//int32_t m_sitePop;
 	uint8_t m_siteNumInlinks8;
-	//int32_t m_siteNumInlinks;
 	LinkInfo m_siteLinkInfo;
 	SafeBuf m_mySiteLinkInfoBuf;
 	SafeBuf m_myPageLinkInfoBuf;
@ -1391,7 +1116,6 @@ public:
 	char m_useSiteLinkBuf;
 	char m_usePageLinkBuf;
 	char m_printInXml;
-	//Msg25 m_msg25;
 	SafeBuf m_tmpBuf11;
 	SafeBuf m_tmpBuf12;
 	Multicast m_mcast11;
@ -1399,7 +1123,6 @@ public:
 	// lists from cachedb for msg25's msg20 replies serialized
 	RdbList m_siteReplyList;
 	RdbList m_pageReplyList;
-	//void (* m_masterLoopWrapper) (void *state);
 	MsgC m_msgc;
 	bool m_isAllowed;
 	bool m_forwardDownloadRequest;
@ -1410,10 +1133,6 @@ public:
 	// for limiting # of iframe tag expansions
 	int32_t m_numExpansions;
 	char m_newOnly;
-	//int32_t m_tryAgainTimeDelta;
-	//int32_t m_sameIpWait;
-	//int32_t m_sameDomainWait;
-	//int32_t m_maxSpidersPerDomain;
 	char m_isWWWDup;
 	char m_calledMsg0b;

@ -1424,24 +1143,14 @@ public:
 	class RdbList *m_ulist;
 	void *m_hack;
 	class XmlDoc *m_hackxd;
-	//class LinkInfo *m_linkInfo1Ptr;
 	char     *m_linkInfoColl;
-	//char m_injectedReply;
-	//int32_t m_minInlinkerHopCount;
-	//class LinkInfo *m_linkInfo2Ptr;
 	SiteGetter m_siteGetter;
 	int64_t  m_siteHash64;
-	//char *m_site;
-	//int32_t m_siteLen;
-	//Url m_siteUrl;
 	int32_t m_siteHash32;
 	char *m_httpReply;
-	//char m_downloadAttempted;
 	char m_incrementedAttemptsCount;
 	char m_incrementedDownloadCount;
 	char m_redirectFlag;
-	//char m_isScraping;
-	//char m_throttleDownload;
 	char m_spamCheckDisabled;
 	char m_useRobotsTxt;
 	int32_t m_robotsTxtLen;
@ -1455,15 +1164,12 @@ public:
 	int32_t m_filteredContentMaxSize;
 	char m_calledThread;
 	int32_t m_errno;
-	//class CollectionRec *m_cr;
-	//int32_t m_utf8ContentAllocSize;
 	int32_t m_hostHash32a;
 	int32_t m_hostHash32b;
 	int32_t m_domHash32;
 	int32_t m_priorityQueueNum;

 	// this points into m_msge0 i guess
-	//class TagRec **m_outlinkTagRecVector;
 	Msge0 m_msge0;

 	// this points into m_msge1 i guess
@ -1729,8 +1435,6 @@ public:

 	char     *m_wikiqbuf;
 	int32_t      m_wikiqbufSize;
-	int64_t m_wikiDocIds [ MAX_WIKI_DOCIDS ];
-	rscore_t  m_wikiScores [ MAX_WIKI_DOCIDS ];

 	bool      m_registeredSleepCallback;
 	bool      m_addedNegativeDoledbRec;
@ -1741,16 +1445,12 @@ public:
 	int32_t          m_niceness;

 	bool m_usePosdb     ;
-	//bool m_useDatedb    ;
 	bool m_useClusterdb ;
 	bool m_useLinkdb    ;
 	bool m_useSpiderdb  ;
 	bool m_useTitledb   ;
 	bool m_useTagdb     ;
 	bool m_usePlacedb   ;
-	//bool m_useTimedb    ;
-	bool m_useSectiondb ;
-	//bool m_useRevdb     ;
 	bool m_useSecondaryRdbs ;

 	int32_t          m_linkeeQualityBoost;
@ -1762,10 +1462,7 @@ public:
 	bool     m_storeTermListInfo;
 	char     m_sortTermListBy;

-	SafeBuf m_sectiondbData;
-	//char *m_sectiondbData;
 	char *m_placedbData;
-	//int32_t  m_sectiondbDataSize;
 	int32_t  m_placedbDataSize;

 	// we now have HashInfo to replace this
@ -1861,6 +1558,8 @@ public:
 			    void *finalState , 
 			    void (* finalCallback)(void *));

+	void logQueryTiming(const char* function, int64_t startTime);
+
 	bool doInjectLoop ( );
 	void doneInjecting ( class XmlDoc *xd );
 	int32_t  m_i;
--- a/XmlDoc_Indexing.cpp
+++ b/XmlDoc_Indexing.cpp
@ -184,24 +184,13 @@ static bool storeTerm ( char	*s        ,
 //   we know the termlist is small, or the termlist is being used for spidering
 //   or parsing purposes and is usually not sent across the network.
 bool XmlDoc::hashNoSplit ( HashTableX *tt ) {
-
-	//if (  m_pbuf )
-	//	m_pbuf->safePrintf("<h3>Terms which are immune to indexdb "
-	//			   "splitting:</h3>");
-
-	//if ( m_skipIndexing ) return true;
-
 	// this should be ready to go and not block!
 	int64_t *pch64 = getExactContentHash64();
-	//int64_t *pch64 = getLooseContentHash64();
 	if ( ! pch64 || pch64 == (void *)-1 ) { char *xx=NULL;*xx=0; }

 	// shortcut
 	Url *fu = getFirstUrl();

-//BR 20160117: removed:	if ( ! hashVectors ( tt ) ) return false;
-
-
 	// constructor should set to defaults automatically
 	HashInfo hi;
 	hi.m_hashGroup = HASHGROUP_INTAG;
@ -1869,18 +1858,7 @@ bool XmlDoc::hashUrl ( HashTableX *tt, bool urlOnly ) { // , bool isStatusDoc )
 }


-
-/////////////
-//
-// CHROME DETECTION
-//
-// we search for these terms we hash here in getSectionsWithDupStats()
-// so we can remove chrome.
-//
-/////////////
-
 // . returns false and sets g_errno on error
-// . copied Url2.cpp into here basically, so we can now dump Url2.cpp
 bool XmlDoc::hashSections ( HashTableX *tt ) {
 	// BR 20160106: No longer store xpath-hashes in posdb as we do not use them.
 	return true;
@ -2706,134 +2684,14 @@ bool XmlDoc::hashPermalink ( HashTableX *tt ) {
 }


-//hash the tag pair vector, the gigabit vector and the sample vector
 bool XmlDoc::hashVectors ( HashTableX *tt ) {

 	setStatus ( "hashing vectors" );

-	int32_t blen;
-	char buf[32];
-	HashInfo hi;
-	hi.m_tt        = tt;
-	hi.m_shardByTermId = true;
-	
-/*
-  BR 20160117 removed
-
-	int32_t score =  *getSiteNumInlinks8() * 256;
-	if ( score <= 0 ) score = 1;
-	//char *field;
-	//char *descr;
-	//h = m_tagVector.getVectorHash();
-	uint32_t tph = *getTagPairHash32();
-	blen = sprintf(buf,"%"UINT32"", tph);
-	//field = "gbtagvector";
-	//descr = "tag vector hash";
-
-	// update hash parms
-	HashInfo hi;
-	hi.m_tt        = tt;
-	hi.m_hashGroup = HASHGROUP_INTAG;
-	hi.m_prefix    = "gbtagvector";
-	hi.m_desc      = "tag vector hash";
-	hi.m_shardByTermId = true;
-
-	// this returns false on failure
-	if ( ! hashString ( buf,blen, &hi ) ) return false;
-*/
-
-/*
-  BR 20160106 removed
-	uint32_t h = *getGigabitVectorScorelessHash();
-	blen = sprintf(buf,"%"UINT32"",(uint32_t)h);
-	// udpate hash parms
-	hi.m_prefix = "gbgigabitvector";
-	hi.m_desc   = "gigabit vector hash";
-	// this returns false on failure
-	if ( ! hashString ( buf,blen,&hi) ) return false;
-*/
-
-	// . dup checking uses the two hashes above, not this hash!!! MDW
-	// . i think this vector is just used to see if the page changed
-	//   significantly since last spidering
-	// . it is used by getPercentChanged() and by Dates.cpp
-	// . sanity check
-	//if ( ! m_pageSampleVecValid ) { char *xx=NULL;*xx=0; }
-	//int32_t *pc = m_pageSampleVec;
-	//h = hash32((char *)m_pageSampleVec, SAMPLE_VECTOR_SIZE);
-	//blen = sprintf(buf,"%"UINT32"",(int32_t unsigned int)h);
-	//field = "gbsamplevector";
-	//descr = "sample vector hash";
-	// this returns false on failure
-	//if ( ! hashString ( tt,buf,blen,score,field,descr) )
-	//	return false;
-
-	// . hash combined for Dup Dectection
-	// . must match XmlDoc::getDupList ( );
-	//uint64_t h1 = m_tagVector.getVectorHash();
-	//uint64_t h2 = getGigabitVectorScorelessHash(gigabitVec);
-	//uint64_t h64 = hash64 ( h1 , h2 );
-
-	// take this out for now
-	/*
-	uint64_t *dh = getDupHash ( );
-	blen = sprintf(buf,"%"UINT64"", *dh );//h64);
-	//field = "gbduphash";
-	//descr = "dup vector hash";
-	// update hash parms
-	hi.m_prefix    = "gbduphash";
-	hi.m_desc      = "dup vector hash";
-	// this returns false on failure
-	if ( ! hashString ( buf,blen,&hi ) ) return false;
-	*/
-
-	// hash the wikipedia docids we match
-	if ( ! m_wikiDocIdsValid   ) { char *xx=NULL;*xx=0; }
-	for ( int32_t i = 0 ; i < size_wikiDocIds/8 ; i++ ) {
-		blen = sprintf(buf,"%"UINT64"",ptr_wikiDocIds[i]);
-		// convert to int32_t
-		//int32_t convScore = (int32_t)ptr_wikiScores[i];
-		// get score
-		//uint32_t ws = score8to32 ( convScore );
-		// update hash parms
-		hi.m_prefix    = "gbwikidocid";
-		hi.m_desc      = "wiki docid";
-		hi.m_hashGroup = HASHGROUP_INTAG;
-
-		// this returns false on failure
-		if ( ! hashString ( buf,blen,&hi ) ) return false;
-	}
-
 	return true;
 }


-/*
-	BR 20160106 removed.
-// hash gbhasthumbnail:0|1
-bool XmlDoc::hashImageStuff ( HashTableX *tt ) {
-
-	setStatus ("hashing image stuff");
-
-	char *val = "0";
-	char **td = getThumbnailData();
-	if ( *td ) val = "1";
-
-	// update hash parms
-	HashInfo hi;
-	hi.m_tt        = tt;
-	hi.m_hashGroup = HASHGROUP_INTAG;
-	hi.m_prefix    = "gbhasthumbnail";
-	hi.m_desc      = "has a thumbnail";
-
-	// this returns false on failure
-	if ( ! hashString ( val,1,&hi ) ) return false;
-
-	return true;
-}
-*/
-
-
 // returns false and sets g_errno on error
 bool XmlDoc::hashIsAdult ( HashTableX *tt ) {

@ -3080,7 +2938,7 @@ bool XmlDoc::hashString3( char       *s              ,
 		return false;
 	if ( ! bits.set    ( &words , version , niceness ) )
 		return false;
-	if ( ! phrases.set(&words,&bits,true,false,version,niceness ) )
+	if ( !phrases.set( &words, &bits, version, niceness ) )
 		return false;

 	// use primary langid of doc
@ -3348,15 +3206,15 @@ bool XmlDoc::hashWords3 ( //int32_t        wordStart ,
 			//   hashTitle we count all the words in the title
 			//   towards the density rank even if they are
 			//   in different sentences
-			if ( sx->m_flags & SEC_IN_TITLE  )
-				//hashGroup = HASHGROUP_TITLE;
+			if ( sx->m_flags & SEC_IN_TITLE  ) {
 				continue;
-			if ( sx->m_flags & SEC_IN_HEADER )
+			}
+			if ( sx->m_flags & SEC_IN_HEADER ) {
 				hashGroup = HASHGROUP_HEADING;
-			if ( sx->m_flags & ( SEC_MENU          |
-					     SEC_MENU_SENTENCE |
-					     SEC_MENU_HEADER   ) )
+			}
+			if ( sx->m_flags & ( SEC_MENU | SEC_MENU_SENTENCE | SEC_MENU_HEADER ) ) {
 				hashGroup = HASHGROUP_INMENU;
+			}
 		}

 		// this is for link text and meta tags mostly
@ -3381,10 +3239,6 @@ bool XmlDoc::hashWords3 ( //int32_t        wordStart ,
 		// otherwise it will be the document's primary language.
 		char langId = langUnknown;
 		if ( m_wts && langVec ) langId = langVec[i];
-		// keep it as the original vector. i'm not sure we use
-		// this for anything but for display, so show the user
-		// how we made our calculation of the document's primary lang
-		//if ( langId == langUnknown ) langId = docLangId;

 		char wd;
 		if ( hi->m_useCountTable ) wd = wdv[i];
@ -3458,8 +3312,7 @@ bool XmlDoc::hashWords3 ( //int32_t        wordStart ,
 		
 		// if using posdb
 		key144_t k;
-		// if ( i == 11429 )
-		// 	log("foo");
+
 		g_posdb.makeKey ( &k ,
 				  h ,
 				  0LL,//docid
@ -3476,16 +3329,10 @@ bool XmlDoc::hashWords3 ( //int32_t        wordStart ,
 				  false , // delkey?
 				  hi->m_shardByTermId );

-		// get the one we lost
-		// char *kstr = KEYSTR ( &k , sizeof(POSDBKEY) );
-		// if (!strcmp(kstr,"0x0ca3417544e400000000000032b96bf8aa01"))
-		// 	log("got lost key");
-
 		// key should NEVER collide since we are always incrementing
 		// the distance cursor, m_dist
 		dt->addTerm144 ( &k );

-
 		// add to wts for PageParser.cpp display
 		if ( wts ) {
 			if ( ! storeTerm ( wptrs[i],wlens[i],h,hi,i,
@ -3494,7 +3341,6 @@ bool XmlDoc::hashWords3 ( //int32_t        wordStart ,
 					   wd,//v[i],
 					   ws,
 					   hashGroup,
-					   //false, // is phrase?
 					   wbuf,
 					   wts,
 					   SOURCE_NONE, // synsrc
@ -3567,7 +3413,6 @@ skipsingleword:
 		////////

 		int64_t npid = pids2[i];
-		int32_t      npw  = 2;
 		uint64_t  ph2 = 0;

 		// repeat for the two word hash if different!
@ -3599,7 +3444,7 @@ skipsingleword:
 		if ( wts && npid ) {
 			// get phrase as a string
 			int32_t plen;
-			char *phr=phrases->getPhrase(i,&plen,npw);
+			char *phr=phrases->getPhrase(i,&plen,2);
 			// store it
 			if ( ! storeTerm ( phr,plen,ph2,hi,i,
 					   wposvec[i], // wordPos
@ -3647,190 +3492,12 @@ skipsingleword:
 			return false;
 	}

-
-#ifdef SUPPORT_FACETS
-	//BR 20160108 - facets DISABLED AS TEST. Don't think we will use them.
-	//https://gigablast.com/syntax.html?c=main
-
-#ifdef PRIVACORE_SAFE_VERSION
-	#error Oops? Do not enable SUPPORT_FACETS with PRIVACORE_SAFE_VERSION. Stores too much unused data in posdb.
-#endif
-
-	// hash a single term so they can do gbfacet:ext or
-	// gbfacet:siterank or gbfacet:price. a field on a field.
-	if ( prefixHash && words->m_numWords )
-	{
-		// hash gbfacet:price with and store the price in the key
-		hashFacet1 ( hi->m_prefix, words ,hi->m_tt);//, hi );
-	}
-#endif
-
-
 	// between calls? i.e. hashTitle() and hashBody()
-	//if ( wc > 0 ) m_dist = wposvec[wc-1] + 100;
 	if ( i > 0 ) m_dist = wposvec[i-1] + 100;

 	return true;
 }

-// just like hashNumber*() functions but we use "gbfacet" as the
-// primary prefix, NOT gbminint, gbmin, gbmax, gbmaxint, gbsortby,
-// gbsortbyint, gbrevsortby, gbrevsortbyint
-bool XmlDoc::hashFacet1 ( char *term ,
-			  Words *words ,
-			  HashTableX *tt ) {
-
-	// need a prefix
-	//if ( ! hi->m_prefix ) return true;
-
-	// hash the ENTIRE content, all words as one blob
-	int32_t nw = words->getNumWords();
-	char *a = words->m_words[0];
-	char *b = words->m_words[nw-1]+words->m_wordLens[nw-1];
-	// hash the whole string as one value, the value of the facet
-	int32_t val32 = hash32 ( a , b - a );
-
-	if ( ! hashFacet2 ( "gbfacetstr",term, val32 , tt ) ) return false;
-
-	return true;
-}
-
-
-bool XmlDoc::hashFacet2 ( char *prefix,
-			  char *term ,
-			  int32_t val32 ,
-			  HashTableX *tt ,
-			  // we only use this for gbxpathsitehash terms:
-			  bool shardByTermId ) {
-
-	// need a prefix
-	//if ( ! hi->m_prefix ) return true;
-	//int32_t plen = gbstrlen ( hi->m_prefix );
-	//if ( plen <= 0 ) return true;
-	// we gotta make this case insensitive, and skip spaces
-	// because if it is 'focal length' we can't search
-	// 'focal length:10' because that comes across as TWO terms.
-	//int64_t prefixHash =hash64Lower_utf8_nospaces ( hi->m_prefix,plen);
-
-	// now any field has to support gbfacet:thatfield
-	// and store the 32-bit termid into where we normally put
-	// the word position bits, etc.
-	//static int64_t s_facetPrefixHash = 0LL;
-	//if ( ! s_facetPrefixHash )
-	//	s_facetPrefixHash = hash64n ( "gbfacet" );
-
-	// this is case-sensitive
-	int64_t prefixHash = hash64n ( prefix );
-
-	// term is like something like "object.price" or whatever.
-	// it is the json field itself, or the meta tag name, etc.
-	int64_t termId64 = hash64n ( term );
-
-	// combine with the "gbfacet" prefix. old prefix hash on right.
-	// like "price" on right and "gbfacetfloat" on left... see Query.cpp.
-	int64_t ph2 = hash64 ( termId64, prefixHash );
-
-	// . now store it
-	// . use field hash as the termid. normally this would just be
-	//   a prefix hash
-	// . use mostly fake value otherwise
-	key144_t k;
-	g_posdb.makeKey ( &k ,
-			  ph2 ,
-			  0,//docid
-			  0,// word pos #
-			  0,// densityRank , // 0-15
-			  0 , // MAXDIVERSITYRANK
-			  0 , // wordSpamRank ,
-			  0 , //siterank
-			  0 , // hashGroup,
-			  // we set to docLang final hash loop
-			  //langUnknown, // langid
-			  // unless already set. so set to english here
-			  // so it will not be set to something else
-			  // otherwise our floats would be ordered by langid!
-			  // somehow we have to indicate that this is a float
-			  // termlist so it will not be mangled any more.
-			  //langEnglish,
-			  langUnknown,
-			  0 , // multiplier
-			  false, // syn?
-			  false , // delkey?
-			  shardByTermId );
-
-	//int64_t final = hash64n("products.offerprice",0);
-	//int64_t prefix = hash64n("gbsortby",0);
-	//int64_t h64 = hash64 ( final , prefix);
-	//if ( ph2 == h64 )
-	//	log("hey: got offer price");
-
-	// now set the float in that key
-	g_posdb.setInt ( &k , val32 );
-
-	// HACK: this bit is ALWAYS set by Posdb::makeKey() to 1
-	// so that we can b-step into a posdb list and make sure
-	// we are aligned on a 6 byte or 12 byte key, since they come
-	// in both sizes. but for this, hack it off to tell
-	// addTable144() that we are a special posdb key, a "numeric"
-	// key that has a float stored in it. then it will NOT
-	// set the siterank and langid bits which throw our sorting
-	// off!!
-	g_posdb.setAlignmentBit ( &k , 0 );
-
-	HashTableX *dt = tt;//hi->m_tt;
-
-	// the key may indeed collide, but that's ok for this application
-	if ( ! dt->addTerm144 ( &k ) )
-		return false;
-
-	if ( ! m_wts )
-		return true;
-
-	bool isFloat = false;
-	if ( strcmp(prefix,"gbfacetfloat")==0 ) isFloat = true;
-
-	// store in buffer for display on pageparser.cpp output
-	char buf[130];
-	if ( isFloat )
-		snprintf(buf,128,"facetField=%s facetVal32=%f",term,
-			 *(float *)&val32);
-	else
-		snprintf(buf,128,"facetField=%s facetVal32=%"UINT32"",
-			 term,(uint32_t)val32);
-	int32_t bufLen = gbstrlen(buf);
-
-	// make a special hashinfo for this facet
-	HashInfo hi;
-	hi.m_tt = tt;
-	// the full prefix
-	char fullPrefix[66];
-	snprintf(fullPrefix,64,"%s:%s",prefix,term);
-	hi.m_prefix = fullPrefix;//"gbfacet";
-
-	// add to wts for PageParser.cpp display
-	// store it
-	if ( ! storeTerm ( buf,
-			   bufLen,
-			   ph2, // prefixHash, // s_facetPrefixHash,
-			   &hi,
-			   0, // word#, i,
-			   0, // wordPos
-			   0,// densityRank , // 0-15
-			   0, // MAXDIVERSITYRANK,//phrase
-			   0, // ws,
-			   0, // hashGroup,
-			   //true,
-			   &m_wbuf,
-			   m_wts,
-			   // a hack for display in wts:
-			   SOURCE_NUMBER, // SOURCE_BIGRAM, // synsrc
-			   langUnknown ,
-			   k) )
-		return false;
-
-	return true;
-}
-
 bool XmlDoc::hashFieldMatchTerm ( char *val , int32_t vlen , HashInfo *hi ) {

 	HashTableX *tt = hi->m_tt;
@ -4346,27 +4013,6 @@ char *XmlDoc::hashJSONFields2 ( HashTableX *table ,
 			}
 		}

-
-		//
-		// for deduping search results we set m_contentHash32 here for
-		// diffbot json objects.
-		// we can't do this here anymore, we have to set the
-		// contenthash in ::getContentHash32() because we need it to
-		// set EDOCUNCHANGED in ::getIndexCode() above.
-		//
-		/*
-		if ( hi->m_hashGroup != HASHGROUP_INURL ) {
-			// make the content hash so we can set m_contentHash32
-			// for deduping
-			int32_t nh32 = hash32n ( name );
-			// do an exact hash for now...
-			int32_t vh32 = hash32 ( val , vlen , m_niceness );
-			// accumulate, order independently
-			totalHash32 ^= nh32;
-			totalHash32 ^= vh32;
-		}
-		*/
-
 		// index like "title:whatever"
 		hi->m_prefix = name;
 		hashString ( val , vlen , hi );
@ -4384,24 +4030,8 @@ char *XmlDoc::hashJSONFields2 ( HashTableX *table ,
 		hi->m_prefix = NULL;
 		hashString ( val , vlen , hi );

-		/*
-		// a number? hash special then as well
-		if ( ji->m_type != JT_NUMBER ) continue;
-
-		// use prefix for this though
-		hi->m_prefix = name;
-
-		// hash as a number so we can sort search results by
-		// this number and do range constraints
-		float f = ji->m_valueDouble;
-		if ( ! hashNumberForSortingAsFloat ( f , hi ) )
-			return NULL;
-		*/
 	}

-	//m_contentHash32 = totalHash32;
-	//m_contentHash32Valid = true;
-
 	return (char *)0x01;
 }

--- a/gb-include.h
+++ b/gb-include.h
@ -8,10 +8,6 @@
 //
 #define PRIVACORE_SAFE_VERSION

-// Facet support disabled by default to save space in posdb
-#undef SUPPORT_FACETS
-
-
 // fix on 64-bit architectures so sizeof(uint96_t) is 12, not 16!
 //#pragma pack(0)

--- a/main.cpp
+++ b/main.cpp
@ -19,7 +19,6 @@
 #include "Posdb.h"
 #include "Datedb.h"
 #include "Titledb.h"
-#include "Revdb.h"
 #include "Tagdb.h"
 #include "Spider.h"
 #include "SpiderColl.h"
@ -95,8 +94,6 @@ static void dumpTitledb  (char *coll, int32_t sfn, int32_t numFiles, bool includ
 			   int64_t docId , bool justPrintDups );
 static int32_t dumpSpiderdb ( char *coll,int32_t sfn,int32_t numFiles,bool includeTree,
 			   char printStats , int32_t firstIp );
-static void dumpSectiondb( char *coll,int32_t sfn,int32_t numFiles,bool includeTree);
-static void dumpRevdb    ( char *coll,int32_t sfn,int32_t numFiles,bool includeTree);

 static void dumpTagdb( char *coll, int32_t sfn, int32_t numFiles, bool includeTree, char rec = 0,
 					   int32_t rdbId = RDB_TAGDB, char *site = NULL );
@ -653,16 +650,6 @@ int main2 ( int argc , char *argv[] ) {
 			"all events as if the time is UTCtimestamp.\n\n"
 			*/

-			/*
-#ifdef _CLIENT_
-			//there was <hostId> in this command but it 
-			// wasn't used in the program, so deleting it from 
-			// here
-			"dump <V> [C [X [Y [Z]]]]\n\tdump a db in "
-#else
-			*/
-
-			//"dump <db> <collection> [T]\n\tDump a db from disk. "
 			"dump <db> <collection>\n\tDump a db from disk. "
 			"Example: gb dump t main\n"
 			"\t<collection> is the name of the collection.\n"
@ -687,7 +674,6 @@ int main2 ( int argc , char *argv[] ) {
 			"\t<db> is W to dump tagdb for wget.\n"
 			"\t<db> is x to dump doledb.\n"
 			"\t<db> is w to dump waiting tree.\n"
-			"\t<db> is B to dump sectiondb.\n"
 			"\t<db> is C to dump catdb.\n"
 			"\t<db> is l to dump clusterdb.\n"
 			"\t<db> is z to dump statsdb all keys.\n"
@ -2239,10 +2225,6 @@ int main2 ( int argc , char *argv[] ) {
 				fprintf(stdout,"error dumping spiderdb\n");
 			}
 		}
-		else if ( argv[cmdarg+1][0] == 'B' )
-		       dumpSectiondb(coll,startFileNum,numFiles,includeTree);
-		else if ( argv[cmdarg+1][0] == 'V' )
-		       dumpRevdb(coll,startFileNum,numFiles,includeTree);
 		else if ( argv[cmdarg+1][0] == 'S' ) {
 			char *site = NULL;
 			if ( cmdarg+6 < argc ) {
@ -2638,61 +2620,16 @@ int main2 ( int argc , char *argv[] ) {
 	if ( ! g_linkdb.init()     ) {
 		log("db: Linkdb init failed."   ); return 1; }

-	// use sectiondb again for its immense voting power for detecting and
-	// removing web page chrome, categories, etc. only use if 
-	// CollectionRec::m_isCustomCrawl perhaps to save space.
-	if ( ! g_sectiondb.init()     ) {
-		log("db: Sectiondb init failed."   ); return 1; }
-
 	// now clean the trees since all rdbs have loaded their rdb trees
 	// from disk, we need to remove bogus collection data from teh trees
 	// like if a collection was delete but tree never saved right it'll
 	// still have the collection's data in it
 	if ( ! g_collectiondb.addRdbBaseToAllRdbsForEachCollRec ( ) ) {
 		log("db: Collectiondb init failed." ); return 1; }
-	// . now read in a little bit of each db and make sure the contained
-	//   records belong in our group
-	// . only do this if we have more than one group
-	// . we may have records from other groups if we are scaling, but
-	//   if we cannot find *any* records in our group we probably have
-	//   the wrong data files.
-	//if ( ! checkDataParity() ) return 1;

 	//Load the high-frequency term shortcuts (if they exist)
 	g_hfts.load();
 	
-	// init the vector cache
-	/*
-	if ( ! g_vectorCache.init ( g_conf.m_maxVectorCacheMem,
-				    VECTOR_REC_SIZE-sizeof(key_t),
-				    true,
-				    g_conf.m_maxVectorCacheMem /
-				      ( sizeof(collnum_t) + 20 +
-					VECTOR_REC_SIZE )        ,
-				    true,
-				    "vector",
-				    false,
-				    12,
-				    12 ) ) {
-		log("db: Vector Cache init failed." ); return 1; }
-	*/
-	// . gb gendbs 
-	// . hostId should have already been picked up above, so it could be 
-	//   used to initialize all the rdbs
-	//if ( strcmp ( cmd , "gendbs" ) == 0 ) {
-	//	char *coll = argv[cmdarg+1];
-	//	// generate the dbs
-	//	genDbs ( coll ); // coll
-	//	g_log.m_disabled = true;
-	//	return 0;
-	//}
-	//if ( strcmp ( cmd, "genclusterdb" ) == 0 ) {
-	//	char *coll = argv[cmdarg+1];
-	//	makeClusterdb ( coll );
-	//	g_log.m_disabled = true;
-	//	return 0;
-	//}
-
 	// test all collection dirs for write permission -- metalincs' request
 	int32_t pcount = 0;
 	for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
@ -2709,16 +2646,6 @@ int main2 ( int argc , char *argv[] ) {
 		checkDirPerms ( tt ) ;
 	}

-	// and now that all rdbs have loaded lets count the gbeventcount
-	// keys we have in datedb. those represent the # of events we
-	// have indexed.
-	//g_collectiondb.countEvents();
-
-	//if (!ucInit(g_hostdb.m_dir, true)) {
-	//	log("Unicode initialization failed!");
-	//	return 1;
-	//}
-
 	//
 	// NOTE: ANYTHING THAT USES THE PARSER SHOULD GO BELOW HERE, UCINIT!
 	//
@ -2728,20 +2655,6 @@ int main2 ( int argc , char *argv[] ) {
 		return 1;
 	}

-	// have to test after unified dict is loaded because if word is
-	// of unknown langid we try to get syns for it anyway if it has
-	// only one possible lang according to unified dict
-	//if ( ! g_wiktionary.test2() ) return 1;
-
-	/*
-	if ( strcmp ( cmd, "gendaterange" ) == 0 ) {
-		char *coll = argv[cmdarg+1];
-		genDateRange ( coll );
-		g_log.m_disabled = true;
-		return 0;
-	}
-	*/
-
 	// Load the category language table
 	g_countryCode.loadHashTable();
 	int32_t nce = g_countryCode.getNumEntries();
@ -2765,64 +2678,6 @@ int main2 ( int argc , char *argv[] ) {
 		log("db: ResultsCache: %s",mstrerror(g_errno)); 
 		return 1;
 	}
-	/*
-	maxMem = 40000000;
-	int32_t maxNodes2 = maxMem/(8+8+50*(8+4+4));
-	if ( ! g_genericCache[SEORESULTS_CACHEID].init (
-				     maxMem     ,   // max cache mem
-				     -1          ,   // fixedDataSize
-				     false       ,   // support lists of recs?
-				     maxNodes2   ,   // max cache nodes 
-				     false       ,   // use half keys?
-				     "seoresults"   ,   // filename
-				     true)){ // save to disk?
-		log("db: ResultsCache: %s",mstrerror(g_errno)); 
-		return 1;
-	}
-	*/
-	/*
-	int32_t maxMem1 = g_conf.m_siteLinkInfoMaxCacheMem;
-	if ( ! g_genericCache[SITELINKINFO_CACHEID].init (
-				     maxMem1     ,   // max cache mem
-				     4           ,   // fixedDataSize
-				     false       ,   // support lists of recs?
-				     maxMem1/36  ,   // max cache nodes 
-				     false       ,   // use half keys?
-				     "sitelinkinfo" ,   // filename
-				     //g_conf.m_siteLinkInfoSaveCache ) ) {
-				     true)){
-		log("db: SiteLinkInfoCache: %s",mstrerror(g_errno)); 
-		return 1;
-	}
-	int32_t maxMem2a = g_conf.m_siteQualityMaxCacheMem;
-	if ( ! g_genericCache[SITEQUALITY_CACHEID].init (
-				     maxMem2a    ,   // max cache mem
-				     1           ,   // fixedDataSize
-				     false       ,   // support lists of recs?
-				     maxMem2a/36 ,   // max cache nodes 
-				     false       ,   // use half keys?
-				     "sitequality" ,   // filename
-				     //g_conf.m_siteQualitySaveCache ) ) {
-				     true)) {
-		log("db: SiteQualityCache: %s",mstrerror(g_errno)); 
-		return 1;
-	}
-	*/
-	/*
-	int32_t maxMem2b = g_conf.m_siteQualityMaxCacheMem * .10 ;
-	if ( ! g_genericCacheSmallLocal[SITEQUALITY_CACHEID].init (
-				     maxMem2b    ,   // max cache mem
-				     1           ,   // fixedDataSize
-				     false       ,   // support lists of recs?
-				     maxMem2b/36 ,   // max cache nodes 
-				     false       ,   // use half keys?
-				     "sitequality" ,   // filename
-				     //g_conf.m_siteQualitySaveCache ) ) {
-				     false)) {
-		log("db: SiteQualityCacheSmallLocal: %s",mstrerror(g_errno)); 
-		return 1;
-	}
-	*/

 	// init minsitenuminlinks buffer
 	if ( ! g_tagdb.loadMinSiteInlinksBuffer() ) {
@ -7836,223 +7691,6 @@ void *startUp ( void *state , ThreadEntry *t ) {
 	return 0; //NULL;
 }

-void dumpSectiondb(char *coll,int32_t startFileNum,int32_t numFiles,
-		   bool includeTree) {
-	//g_conf.m_spiderdbMaxTreeMem = 1024*1024*30;
-	g_sectiondb.init ();
-	//g_collectiondb.init(true);
-	g_sectiondb.getRdb()->addRdbBase1(coll );
-	key128_t startKey ;
-	key128_t endKey   ;
-	startKey.setMin();
-	endKey.setMax();
-	// turn off threads
-	g_threads.disableThreads();
-	// get a meg at a time
-	int32_t minRecSizes = 1024*1024;
-	Msg5 msg5;
-	RdbList list;
-	char tmpBuf[1024];
-	SafeBuf sb(tmpBuf, 1024);
-	bool firstKey = true;
-	CollectionRec *cr = g_collectiondb.getRec(coll);
- loop:
-	// use msg5 to get the list, should ALWAYS block since no threads
-	if ( ! msg5.getList ( RDB_SECTIONDB ,
-			      cr->m_collnum      ,
-			      &list         ,
-			      (char *)&startKey      ,
-			      (char *)&endKey        ,
-			      minRecSizes   ,
-			      includeTree   ,
-			      false         , // add to cache?
-			      0             , // max cache age
-			      startFileNum  ,
-			      numFiles      ,
-			      NULL          , // state
-			      NULL          , // callback
-			      0             , // niceness
-			      false         )){// err correction?
-		log(LOG_LOGIC,"db: getList did not block.");
-		return;
-	}
-	// all done if empty
-	if ( list.isEmpty() ) return;
-
-	key128_t lastk;
-
-	// loop over entries in list
-	for(list.resetListPtr();!list.isExhausted(); list.skipCurrentRecord()){
-		char *rec  = list.getCurrentRec();
-		key128_t *k = (key128_t *)rec;
-		char *data = list.getCurrentData();
-		int32_t  size = list.getCurrentDataSize();
-		// is it a delete?
-		if ( (k->n0 & 0x01) == 0 ) {
-			printf("k.n1=%016"XINT64" k.n0=%016"XINT64" (delete)\n",
-			       k->n1  , k->n0   | 0x01  );  // fix it!
-			continue;
-		}
-		if ( size != sizeof(SectionVote) ) { char *xx=NULL;*xx=0; }
-		// sanity check
-		if ( ! firstKey ) {
-			if ( k->n1 < lastk.n1 ) { char *xx=NULL;*xx=0; }
-			if ( k->n1 == lastk.n1 && k->n0 < lastk.n0 ) { 
-				char *xx=NULL;*xx=0; }
-		}
-		// no longer a first key
-		firstKey = false;
-		// copy it
-		gbmemcpy ( &lastk , k , sizeof(key128_t) );
-		int32_t shardNum =  getShardNum (RDB_SECTIONDB,k);
-		//int32_t groupNum = g_hostdb.getGroupNum ( gid );
-		// point to the data
-		char  *p       = data;
-		char  *pend    = data + size;
-		// breach check
-		if ( p >= pend ) {
-			printf("corrupt sectiondb rec k.n0=%"UINT64"",k->n0);
-			continue;
-		}
-		// parse it up
-		SectionVote *sv = (SectionVote *)data;
-		int64_t termId = g_datedb.getTermId ( k );
-		// score is the section type
-		unsigned char score2 = g_datedb.getScore(k);
-		char *stype = "unknown";
-		if ( score2 == SV_CLOCK          ) stype = "clock         ";
-		if ( score2 == SV_EURDATEFMT     ) stype = "eurdatefmt    ";
-		if ( score2 == SV_EVENT          ) stype = "event         ";
-		if ( score2 == SV_ADDRESS        ) stype = "address       ";
-		if ( score2 == SV_TAGPAIRHASH    ) stype = "tagpairhash   ";
-		if ( score2 == SV_TAGCONTENTHASH ) stype = "tagcontenthash";
-		if ( score2 == SV_FUTURE_DATE    ) stype = "futuredate    ";
-		if ( score2 == SV_PAST_DATE      ) stype = "pastdate      ";
-		if ( score2 == SV_CURRENT_DATE   ) stype = "currentdate   ";
-		if ( score2 == SV_SITE_VOTER     ) stype = "sitevoter     ";
-		if ( score2 == SV_TURKTAGHASH    ) stype = "turktaghash   ";
-		int64_t d = g_datedb.getDocId(k);
-		int32_t date = g_datedb.getDate(k);
-		// dump it
-		printf("k=%s "
-		       "sh48=%"XINT64" " // sitehash is the termid
-		       "date=%010"UINT32" " 
-		       "%s (%"UINT32") "
-		       "d=%012"UINT64" "
-		       "score=%f samples=%f "
-		       "shardnum=%"INT32""
-		       "\n",
-		       //k->n1,
-		       //k->n0,
-		       KEYSTR(k,sizeof(key128_t)),
-		       termId,
-		       date,
-		       stype,(uint32_t)score2,
-		       d,
-		       sv->m_score,
-		       sv->m_numSampled,
-		       shardNum);
-	}
-		
-	startKey = *(key128_t *)list.getLastKey();
-	startKey += (uint32_t) 1;
-	// watch out for wrap around
-	if ( startKey < *(key128_t *)list.getLastKey() ){ printf("\n"); return;}
-	goto loop;
-}
-
-void dumpRevdb(char *coll,int32_t startFileNum,int32_t numFiles, bool includeTree) {
-	//g_conf.m_spiderdbMaxTreeMem = 1024*1024*30;
-	g_revdb.init ();
-	//g_collectiondb.init(true);
-	g_revdb.getRdb()->addRdbBase1(coll );
-	key_t startKey ;
-	key_t endKey   ;
-	startKey.setMin();
-	endKey.setMax();
-	// turn off threads
-	g_threads.disableThreads();
-	// get a meg at a time
-	int32_t minRecSizes = 1024*1024;
-	Msg5 msg5;
-	RdbList list;
-	char tmpBuf[1024];
-	SafeBuf sb(tmpBuf, 1024);
-	bool firstKey = true;
-	CollectionRec *cr = g_collectiondb.getRec(coll);
- loop:
-	// use msg5 to get the list, should ALWAYS block since no threads
-	if ( ! msg5.getList ( RDB_REVDB     ,
-			      cr->m_collnum ,
-			      &list         ,
-			      (char *)&startKey      ,
-			      (char *)&endKey        ,
-			      minRecSizes   ,
-			      includeTree   ,
-			      false         , // add to cache?
-			      0             , // max cache age
-			      startFileNum  ,
-			      numFiles      ,
-			      NULL          , // state
-			      NULL          , // callback
-			      0             , // niceness
-			      false         )){// err correction?
-		log(LOG_LOGIC,"db: getList did not block.");
-		return;
-	}
-	// all done if empty
-	if ( list.isEmpty() ) return;
-
-	key_t lastk;
-
-	// loop over entries in list
-	for(list.resetListPtr();!list.isExhausted(); list.skipCurrentRecord()){
-		char *rec  = list.getCurrentRec();
-		key_t *k = (key_t *)rec;
-		char *data = list.getCurrentData();
-		int32_t  size = list.getCurrentDataSize();
-		// get docid from key
-		int64_t d = g_revdb.getDocId(k);
-		// is it a delete?
-		if ( (k->n0 & 0x01) == 0 ) {
-			printf("k.n1=%08"XINT32" k.n0=%016"XINT64" d=%"UINT64" (delete)\n",
-			       k->n1  , k->n0   | 0x01  , d );  // fix it!
-			continue;
-		}
-		//if ( size != sizeof(SectionVote) ) { char *xx=NULL;*xx=0; }
-		// sanity check
-		if ( ! firstKey ) {
-			if ( k->n1 < lastk.n1 ) { char *xx=NULL;*xx=0; }
-			if ( k->n1 == lastk.n1 && k->n0 < lastk.n0 ) { 
-				char *xx=NULL;*xx=0; }
-		}
-		// no longer a first key
-		firstKey = false;
-		// copy it
-		gbmemcpy ( &lastk , k , sizeof(key_t) );
-		// point to the data
-		char  *p       = data;
-		char  *pend    = data + size;
-		// breach check
-		if ( p > pend ) {
-			printf("corrupt revdb rec k.n1=0x%08"XINT32" d=%"UINT64"\n",
-			       k->n1,d);
-			continue;
-		}
-		// parse it up
-		//SectionVote *sv = (SectionVote *)data;
-		// dump it
-		printf("k.n1=%08"XINT32" k.n0=%016"XINT64" ds=%06"INT32" d=%"UINT64"\n", 
-		       k->n1,k->n0,size,d);
-	}
-		
-	startKey = *(key_t *)list.getLastKey();
-	startKey += (uint32_t) 1;
-	// watch out for wrap around
-	if ( startKey < *(key_t *)list.getLastKey() ){ printf("\n"); return;}
-	goto loop;
-}
-
 void dumpTagdb( char *coll, int32_t startFileNum, int32_t numFiles, bool includeTree, char req, int32_t rdbId,
 				char *siteArg ) {
 	//g_conf.m_spiderdbMaxTreeMem = 1024*1024*30;
@ -8473,13 +8111,11 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
 	// computeWordIds from xml
 	words.set ( &xml , true , true ) ;
 	bits.set ( &words ,TITLEREC_CURRENT_VERSION, 0);
-	Phrases phrases;
-	phrases.set ( &words,&bits,true,true,TITLEREC_CURRENT_VERSION,0);
 	t = gettimeofdayInMilliseconds_force();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) 
 		//if ( ! words.set ( &xml , true , true ) )
 		// do not supply xd so it will be set from scratch
-		if ( !sections.set( &words, &phrases, &bits, NULL, 0, NULL, 0, 0 ) )
+		if ( !sections.set( &words, &bits, NULL, 0, NULL, 0, 0 ) )
 			return log("build: speedtestxml: sections set: %s",
 				   mstrerror(g_errno));

@ -8493,14 +8129,10 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
 	

 	//Phrases phrases;
+	Phrases phrases;
 	t = gettimeofdayInMilliseconds_force();
-	for ( int32_t i = 0 ; i < 100 ; i++ ) 
-		if ( ! phrases.set ( &words ,
-				     &bits  ,
-				     true     , // use stop words
-				     false    , // use stems
-				     TITLEREC_CURRENT_VERSION ,
-				     0 ) ) // niceness
+	for ( int32_t i = 0 ; i < 100 ; i++ )
+		if ( !phrases.set( &words, &bits, TITLEREC_CURRENT_VERSION, 0 ) )
 			return log("build: speedtestxml: Phrases set: %s",
 				   mstrerror(g_errno));
 	// print time it took
@ -8597,22 +8229,6 @@ bool summaryTest1   ( char *rec , int32_t listSize, char *coll , int64_t docId ,
 		xml.set( content, contentLen, xd.m_version, 0, CT_HTML );

 		xd.getSummary();
-
-		//Summary s;
-		// bool status;
-		/*
-		status = s.set  ( &xml                      , 
-				  &q                        ,
-				  NULL                      , // termFreqs
-				  false                     , // doStemming? 
-				  summaryMaxLen             ,
-				  numSummaryLines           ,
-				  summaryMaxNumCharsPerLine ,
-				  bigSampleRadius           ,
-				  bigSampleMaxLen           ,
-				  ratInSummary              ,
-				  &tr                       );
-		*/
 	}

 	// print time it took
@ -8641,8 +8257,6 @@ bool summaryTest2   ( char *rec , int32_t listSize, char *coll , int64_t docId ,
 	int32_t numSummaryLines           = cr->m_summaryMaxNumLines;
 	int32_t summaryMaxNumCharsPerLine = cr->m_summaryMaxNumCharsPerLine;
 	// these are arbitrary (taken from Msg24.cpp)
-	int32_t bigSampleRadius           = 100;
-	int32_t bigSampleMaxLen           = 4000;
 	bool ratInSummary              = false;

 	Query q;
@ -8731,8 +8345,6 @@ bool summaryTest2   ( char *rec , int32_t listSize, char *coll , int64_t docId ,
 				  summaryMaxLen             ,
 				  numSummaryLines           ,
 				  summaryMaxNumCharsPerLine ,
-				  bigSampleRadius           ,
-				  bigSampleMaxLen           ,
 				  ratInSummary              ,
 				  &tr                       );
 		// time it
--- a/qa.cpp
+++ b/qa.cpp
@ -745,8 +745,7 @@ bool qainject1 ( ) {

 	if ( ! s_flags[16] ) {
 		s_flags[16] = true;
-		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe"
-				"&dsrt=500",
+		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
 				702467314 ) )
 			return false;
 	}
@ -1573,8 +1572,7 @@ bool qaWarcFiles ( ) {
 	}
 	if ( s_flags[EXAMINE_RESULTS1] == 0) {
 		s_flags[EXAMINE_RESULTS1]++;
-		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe"
-				"&dsrt=500",
+		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
 				702467314 ) )
 			return false;
 	}
@ -1596,8 +1594,7 @@ bool qaWarcFiles ( ) {

 	if ( s_flags[EXAMINE_RESULTS2] == 0) {
 		s_flags[EXAMINE_RESULTS2]++;
-		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe"
-				"&dsrt=500",
+		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
 				702467314 ) )
 			return false;
 	}
@ -1790,14 +1787,6 @@ bool qaMetadataFacetSearch ( ) {
 		return false;
 	}

-	// if ( ! s_flags[EXAMINE_RESULTS] ) {
-	// 	s_flags[16] = true;
-	// 	if ( ! getUrl ( "/search?c=qatest123&qa=1&q=%2Bthe"
-	// 			"&dsrt=500",
-	// 			702467314 ) )
-	// 		return false;
-	// }
-
    return true;
 }

@ -1876,8 +1865,7 @@ bool qaimport () {
 	// test query
 	if ( ! s_flags[16] ) {
 		s_flags[16] = true;
-		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe"
-				"&dsrt=500",
+		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&q=%2Bthe",
 				702467314 ) )
 			return false;
 	}
@ -1887,7 +1875,7 @@ bool qaimport () {
 	if ( ! s_flags[29] ) {
 		s_flags[29] = true;
 		if ( ! getUrl ( "/search?c=qatest123&qa=1&format=xml&"
-				"q=mediapost&dsrt=0&sc=1",
+				"q=mediapost&sc=1",
 				702467314 ) )
 			return false;
 	}
--- a/test/system/test_index_file.py
+++ b/test/system/test_index_file.py
@ -16,7 +16,10 @@ def verify_file(gb_api, httpserver, filename, custom_filename, content_type, exp
    # add url
    assert gb_api.add_url(file_url) == True

-    result = gb_api.search('url:' + file_url)
+    payload = {}
+    payload.update({'showerrors': '1'})
+
+    result = gb_api.search('url:' + file_url, payload)
    assert len(result['results']) == 1

    assert result['results'][0]['contentType'] == expected_content_type
--- a/test/unit/SummaryTest.cpp
+++ b/test/unit/SummaryTest.cpp
@ -28,14 +28,11 @@ static void generateSummary(Summary &summary, char *htmlInput, char *queryStr, c
 	Bits bits;
 	ASSERT_TRUE(bits.set(&words, TITLEREC_CURRENT_VERSION, 0));

-	Phrases phrases;
-	ASSERT_TRUE(phrases.set(&words, &bits, true, false, TITLEREC_CURRENT_VERSION, 0));
-
 	Url url;
 	url.set(urlStr);

 	Sections sections;
-	ASSERT_TRUE(sections.set(&words, &phrases, &bits, &url, 0, "", 0, CT_HTML));
+	ASSERT_TRUE(sections.set(&words, &bits, &url, 0, "", 0, CT_HTML));

 	Query query;
 	ASSERT_TRUE(query.set2(queryStr, langEnglish, true));
@ -53,6 +50,9 @@ static void generateSummary(Summary &summary, char *htmlInput, char *queryStr, c
 	Bits bitsForSummary;
 	ASSERT_TRUE(bitsForSummary.setForSummary(&words));

+	Phrases phrases;
+	ASSERT_TRUE(phrases.set(&words, &bits, TITLEREC_CURRENT_VERSION, 0));
+
 	Matches matches;
 	matches.setQuery(&query);
 	ASSERT_TRUE(matches.set(&words, &phrases, &sections, &bitsForSummary, &pos, &xml, &title, &url, &linkInfo, 0));