Remove more commented out codes & unused sections codes

2025-07-15 02:36:08 -04:00 · 2016-02-24 11:56:54 +01:00
parent df2d952e1f
commit 32147c302c
10 changed files with 30 additions and 1237 deletions
--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -1068,7 +1068,7 @@ void Msg39::estimateHitsAndSendReply ( ) {
 			mr.size_pairScoreBuf   = 0;
 			mr.size_singleScoreBuf = 0;
 		}
-		//mr.m_sectionStats    = pt->m_sectionStats;
+
 		// reserve space for these guys, we fill them in below
 		mr.ptr_docIds       = NULL;
 		mr.ptr_scores       = NULL;
--- a/Msg3a.cpp
+++ b/Msg3a.cpp
@ -131,13 +131,6 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 		log(LOG_LOGIC,"net: bad collection. msg3a. %"INT32"",
 		    (int32_t)m_r->m_collnum);

-	//m_indexdbSplit = g_hostdb.m_indexSplits;
-	// certain query term, like, gbdom:xyz.com, are NOT split
-	// at all in order to keep performance high because such
-	// terms are looked up by the spider. if a query contains
-	// multiple "no split" terms, then it becomes split unfortunately...
-	//if ( ! m_q->isSplit() ) m_indexdbSplit = 1;
-
 	// for a sanity check in Msg39.cpp
 	r->m_nqt = m_q->getNumTerms();

@ -154,10 +147,7 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	// . return now if query empty, no docids, or none wanted...
 	// . if query terms = 0, might have been "x AND NOT x"
 	if ( m_q->getNumTerms() <= 0 ) return true;
-	// sometimes we want to get section stats from the hacked
-	// sectionhash: posdb termlists
-	//if ( m_docsToGet <= 0 && ! m_r->m_getSectionStats )
-	//	return true;
+
 	// . set g_errno if not found and return true
 	// . coll is null terminated
 	CollectionRec *cr = g_collectiondb.getRec(r->m_collnum);
@ -234,24 +224,17 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	// update our read info
 	for ( int32_t j = 0; j < n ; j++ ) {
 		// the read size for THIS query term
-		int32_t rs = 300000000; // toRead; 300MB i guess...
-		// limit to 50MB man! this was 30MB but the
-		// 'time enough for love' query was hitting 30MB termlists.
-		//rs = 50000000;
-		rs = DEFAULT_POSDB_READSIZE;//90000000; // 90MB!
-		// it is better to go oom then leave users scratching their
-		// heads as to why some results are not being returned.
-		// no, because we are going out of mem for queries like
-		// 'www.disney.nl' etc.
-		//rs = -1;
-		// if section stats, limit to 1MB
-		//if ( m_r->m_getSectionStats ) rs = 1000000;
+		int32_t rs = DEFAULT_POSDB_READSIZE;//90000000; // 90MB!
+
 		// get the jth query term
 		QueryTerm *qt = &m_q->m_qterms[j];
+
 		// if query term is ignored, skip it
 		if ( qt->m_ignored ) rs = 0;
+
 		// set it
 		readSizes[j] = rs;
+
 		// serialize these too
 		tfw[j] = qt->m_termFreqWeight;
 	}
@ -265,8 +248,7 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	// Query::expandQuery() above
 	m_r->ptr_query  = m_q->m_orig;
 	m_r->size_query = m_q->m_origLen+1;
-	// the white list now too...
-	//m_r->ptr_whiteList = si->m_whiteListBuf.getBufStart();
+
 	// free us?
 	if ( m_rbufPtr && m_rbufPtr != m_rbuf ) {
 		mfree ( m_rbufPtr , m_rbufSize, "Msg3a" );
@ -774,64 +756,6 @@ bool Msg3a::mergeLists ( ) {
 	//m_totalDocCount = 0; // int32_t docCount = 0;
 	m_moreDocIdsAvail = true;

-	/*
-
-	  this version is too simple. now each query term can be a
-	  gbfacet:price or gbfacet:type term and each has a
-	  list in the Msg39Reply::ptr_facetHashList for its termid
-
-	//
-	// compile facet stats
-	//
-	for ( int32_t j = 0; j < m_numHosts ; j++ ) {
-		Msg39Reply *mr =m_reply[j];
-		// one table for each query term
-		char *p = mr->ptr_facetHashList;
-		// loop over all query terms
-		int32_t n = m_q->getNumTerms();
-		// use this
-		HashTableX tmp;
-		// do the loop
-		for ( int32_t i = 0 ; i < n ; i++ ) {
-			// size of it
-			int32_t psize = *(int32_t *)p;
-			p += 4;
-			tmp.deserialize ( p , psize );
-			p += psize;
-			// now compile the stats into a master table
-			for ( int32_t k = 0 ; k < tmp.m_numSlots ; k++ ) {
-				if ( ! tmp.m_flags[k] ) continue;
-				// get the vlaue
-				int32_t v32 = *(int32_t *)tmp.getKeyFromSlot(k);
-				// and how many of them there where
-				int32_t count = *(int32_t *)tmp.getValueFromSlot(k);
-				// add to master
-				master.addScore32 ( v32 , count );
-			}
-		}
-	}
-	////////
-	//
-	// now set m_facetStats
-	//
-	////////
-	// add up all counts
-	int64_t count = 0LL;
-	for ( int32_t i = 0 ; i < master.getNumSlots() ; i++ ) {
-		if ( ! master.m_flags[i] ) continue;
-		int64_t slotCount = *(int32_t *)master.getValueFromSlot(i);
-		int32_t h32 = *(int32_t *)master.getKeyFromSlot(i);
-		if ( h32 == m_r->m_myFacetVal32 )
-			m_facetStats.m_myValCount = slotCount;
-		count += slotCount;
-	}
-	m_facetStats.m_totalUniqueValues = master.getNumUsedSlots();
-	m_facetStats.m_totalValues = count;
-	*/
-
-
-	// shortcut
-	//int32_t numSplits = m_numHosts;//indexdbSplit;

 	// . point to the various docids, etc. in each shard reply
 	// . tcPtr = term count. how many required query terms does the doc
@ -920,11 +844,6 @@ bool Msg3a::mergeLists ( ) {
 	for ( int32_t j = 0; j < m_numQueriedHosts ; j++ ) {
 		Msg39Reply *mr =m_reply[j];
 		if ( ! mr ) continue;
-		//SectionStats *src = &mr->m_sectionStats;
-		//dst->m_onSiteDocIds      += src->m_onSiteDocIds;
-		//dst->m_offSiteDocIds     += src->m_offSiteDocIds;
-		//dst->m_totalMatches      += src->m_totalMatches;
-		//dst->m_totalEntries      += src->m_totalEntries;
 		// now the list should be the unique site hashes that
 		// had the section hash. we need to uniquify them again
 		// here.
@ -1036,7 +955,6 @@ bool Msg3a::mergeLists ( ) {
 	if ( ! sortFacetEntries() )
 		return true;

-	//if ( m_r->m_getSectionStats ) return true;
 	//
 	// HACK: END section stats merge
 	//
--- a/Msg3a.h
+++ b/Msg3a.h
@ -145,17 +145,6 @@ public:
 	// when merging this list of docids into a final list keep
 	// track of the cursor into m_docIds[]
 	int32_t m_cursor;
-
-	// what collection # are these docids from if m_collnums[] is NULL
-	//collnum_t m_collnum;
-
-	// we don't have FacetStats because we have the actual 
-	// Msg39Reply::ptr_facetHashList from each shard which contains
-	// all the facet hash lists for each gbfacet: query term we had
-	// and the query "Msg3a::m_q.m_qterms[].m_dt" is the hash table
-	// where each key is a facethash for that gbfacet:xxxx term and
-	// the value if the # of occurences.
-	//SectionStats    m_sectionStats;
 };

 #endif
--- a/Posdb.cpp
+++ b/Posdb.cpp
@ -3839,95 +3839,6 @@ void PosdbTable::intersectLists10_r ( ) {

 	if( g_conf.m_logTracePosdb ) log(LOG_TRACE,"%s:%s:%d: seoHack: %s, numTerms: %"INT32"", __FILE__,__func__, __LINE__, seoHack?"true":"false", m_q->m_numTerms);

-	// if we are just a sitehash:xxxxx list and m_getSectionStats is
-	// true then assume the list is one of hacked posdb keys where
-	// the wordposition bits and others are really a 32-bit site hash
-	// and we have to see how many different docids and sites have
-	// this term. and we compare to our site hash, 
-	// m_r->m_sectionSiteHash32 to determine if the posdb key is
-	// onsite or offsite. then XmlDoc::printRainbowSections()
-	// can print out how many page/sites duplicate your section's content.
-	
-	// MDW: TODO: for the facet terms just compile the stats and do not
-	// send to intersecting. they are ignored for those purposes. send
-	// the hashtable back so msg3a can integrate the stats. keep in mind
-	// we have multiple docid ranges sometimes for one query!!!!
-
-	/*
-
-	  MDW: take this out. now treat as a normal termlist but
-	  do not use for scoring. so it is kinda like gbmin: gbmax:
-	  query operators but it will just add the facet values to
-	  QueryTerm::m_facetHashList for transmission back to the aggregator
-	  node. however, it is only for docids in the final result set!
-	
-	if ( m_r->m_getFacetStats ) {
-		// reset
-		m_facetStats.m_totalMatches = 0;
-		m_facetStats.m_totalEntries = 0;
-		m_dt.clear();
-		// scan the posdb keys
-		//for ( int32_t i = 0 ; i < m_msg2->getNumListsInGroup(0); i++) {
-		// get the sublist
-		RdbList *list = m_msg2->getList(0);//Group(0)[i];
-		char *p    =     list->getList    ();
-		char *pend = p + list->getListSize();
-		// test
-		//int64_t final = 5663137686803656554LL;
-		//final &= TERMID_MASK;
-		//if ( p<pend && g_posdb.getTermId(p) == final )
-		//	log("boo");
-		// scan it
-		for ( ; p < pend ; ) {
-			// . first key is the full size
-			// . uses the w,G,s,v and F bits to hold this
-			// . this is no longer necessarily sitehash, but
-			//   can be any val, like now FacetStats is using
-			//   it for the innerHtml sentence content hash32
-			int32_t sh32 = g_posdb.getFacetVal32 ( p );
-			//int64_t d = g_posdb.getDocId(p);
-			//int32_t rs = list->getRecSize(p);
-			// this will not update listptrlo, watch out!
-			p += list->getRecSize ( p );
-			// does this xpath from another docid have the
-			// same inner html as us?
-			if ( sh32 == m_r->m_myFacetVal32 ) // m_siteHash32 ) 
-				m_facetStats.m_totalMatches++;
-			// always this
-			m_facetStats.m_totalEntries++;
-			// unique site count
-			if ( m_dt.isInTable ( &sh32 ) ) continue;
-			// count it
-			m_facetStats.m_numUniqueVals++;
-			// only once
-			m_dt.addKey ( &sh32 );
-			// log it
-			//log("usite: %08"XINT32" %"INT64" rs=%"INT32"",sh32,d,rs);
-			// stop if too much so we do not try to 
-			// re-alloc in a thread!
-			if ( m_dt.m_numSlotsUsed >= 1000000 ) break;
-		}
-		// and return the list of merging
-		int32_t *s    = (int32_t *)m_facetHashList.getBufStart();
-		int32_t *send = (int32_t *)m_facetHashList.getBufEnd();
-		//if ( m_facetStats.m_numUniqueSites == 17 ) { 
-		//	log("q=%s",m_r->ptr_query);
-		//	log("hey");
-		//	//char *xx = NULL;*xx=0; 
-		//}
-		//if(!strcmp(m_r->ptr_query,"gbsectionhash:3335323672699668766"
-		//	log("boo");
-		int32_t *orig = s;
-		for ( int32_t i = 0 ; i < m_dt.m_numSlots ; i++ ) {
-			if ( ! m_dt.m_flags[i] ) continue;
-			*s++ = *(int32_t *)m_dt.getKeyFromSlot(i);
-			if ( s >= send ) break;
-		}
-		m_facetHashList.setLength((char *)s-(char *)orig);
-		return;
-	}
-	*/
-
 	//
 	// hash the docids in the whitelist termlists into a hashtable.
 	// every docid in the search results must be in there. the
--- a/Posdb.h
+++ b/Posdb.h
@ -440,80 +440,8 @@ public:
 	int32_t      m_quotedStartId;
 };

-
-/*
-#include "RdbList.h"
-
-class PosdbList : public RdbList {
-
- public:
-
-	// why do i have to repeat this for LinkInfo::set() calling our set()??
-	void set ( char *list , int32_t  listSize  , bool  ownData   ) {
-		RdbList::set ( list     ,
-			       listSize ,
-			       list     , // alloc
-			       listSize , // alloc size
-			       0        , // fixed data size
-			       ownData  ,
-			       true     , // use half keys?
-			       sizeof(key_t));// 12 bytes per key
-	};
-
-	// clear the low bits on the keys so terms are DELETED
-	void clearDelBits ( );
-
-	void print();
-
-
-	// . these are made for special IndexLists, too
-	// . getTermId() assumes as 12 byte key
-	int64_t getCurrentTermId12 ( ) {
-		return getTermId12 ( m_listPtr ); };
-	int64_t getTermId12 ( char *rec ) {
-		return (*(uint64_t *)(&rec[4])) >> 16 ;
-	};
-	int64_t getTermId16 ( char *rec ) {
-		return (*(uint64_t *)(&rec[8])) >> 16 ;
-	};
-	// these 2 assume 12 and 6 byte keys respectively
-	int64_t getCurrentDocId () {
-		if ( isHalfBitOn ( m_listPtr ) ) return getDocId6 (m_listPtr);
-		else                             return getDocId12(m_listPtr);
-	};
-	int64_t getDocId ( char *rec ) {
-		if ( isHalfBitOn ( rec ) ) return getDocId6 (rec);
-		else                       return getDocId12(rec);
-	};
-	int64_t getCurrentDocId12 ( ) {
-		return getDocId12 ( m_listPtr ); };
-	int64_t getDocId12 ( char *rec ) {
-		return ((*(uint64_t *)(rec)) >> 2) & DOCID_MASK; };
-	int64_t getDocId6 ( char *rec ) {
-		int64_t docid;
-		*(int32_t *)(&docid) = *(int32_t *)rec;
-		((char *)&docid)[4] = rec[4];
-		docid >>= 2;
-		return docid & DOCID_MASK;
-	};
-	// this works with either 12 or 6 byte keys
-	unsigned char getCurrentScore ( ) {
-		return getScore(m_listPtr); };
-	unsigned char getScore ( char *rec ) { return ~rec[5]; };
-
-	// uncomplemented...
-	void setScore ( char *rec , char score ) { rec[5] = score; };
-
-	// for date lists only...
-	int32_t getCurrentDate ( ) { return ~*(int32_t *)(m_listPtr+6); };
-};
-*/
-
 #include "Query.h"         // MAX_QUERY_TERMS, qvec_t

-// max # search results that can be viewed without using TopTree
-//#define MAX_RESULTS 1000
-
 class PosdbTable {

 public:
@ -525,10 +453,7 @@ class PosdbTable {
 		   char           debug           ,
 		   void          *logstate        ,
 		   class TopTree *topTree         ,
-		   //char          *coll            ,
 		   collnum_t collnum ,
-		   //IndexList     *lists           ,
-		   //int32_t           numLists        ,
 		   class Msg2 *msg2, 
 		   class          Msg39Request *r );

@ -538,12 +463,6 @@ class PosdbTable {
 	// pre-allocate memory since intersection runs in a thread
 	bool allocTopTree ( );

-	// . returns false on error and sets errno
-	// . we assume there are "m_numTerms" lists passed in (see set() above)
-	//void intersectLists_r ( );
-
-	//void intersectLists9_r ( );
-
 	void  getTermPairScoreForNonBody   ( int32_t i, int32_t j,
 					     char *wpi, char *wpj, 
 					     char *endi, char *endj,
@ -580,7 +499,9 @@ class PosdbTable {
 	void freeMem ( ) ;

 	// has init already been called?
-	bool isInitialized ( ) { return m_initialized; };
+	bool isInitialized() {
+		return m_initialized;
+	}

 	uint64_t m_docId;

@ -609,56 +530,37 @@ class PosdbTable {

 	int32_t            m_maxScores;

-	//char           *m_coll;
 	collnum_t       m_collnum;

 	int32_t *m_qpos;
 	int32_t *m_wikiPhraseIds;
 	int32_t *m_quotedStartIds;
-	//class DocIdScore *m_ds;
 	int32_t  m_qdist;
 	float *m_freqWeights;
-	//int64_t *m_freqs;
 	char  *m_bflags;
 	int32_t  *m_qtermNums;
 	float m_bestWindowScore;
-	//char **m_finalWinners1;
-	//char **m_finalWinners2;
-	//float *m_finalScores;
 	char **m_windowTermPtrs;

 	// how many docs in the collection?
 	int64_t m_docsInColl;

-	//SectionStats m_sectionStats;
-	//SafeBuf m_facetHashList;
-	//HashTableX m_dt;
-
 	class Msg2 *m_msg2;

 	// if getting more than MAX_RESULTS results, use this top tree to hold
 	// them rather than the m_top*[] arrays above
 	class TopTree *m_topTree;

-	//HashTableX m_docIdTable;
-	
 	SafeBuf m_scoreInfoBuf;
 	SafeBuf m_pairScoreBuf;
 	SafeBuf m_singleScoreBuf;

 	SafeBuf m_stackBuf;

-	//SafeBuf m_mergeBuf;
-
 	// a reference to the query
 	Query          *m_q;
 	int32_t m_nqt;

-	// these are NOT in imap space, but in query term space, 1-1 with 
-	// Query::m_qterms[]
-	//IndexList      *m_lists;
-	//int32_t            m_numLists;
-
 	// has init() been called?
 	bool            m_initialized;

@ -668,8 +570,6 @@ class PosdbTable {
 	// for debug msgs
 	void *m_logstate;

-	//int64_t       m_numDocsInColl;
-
 	class Msg39Request *m_r;

 	// for gbsortby:item.price ...
--- a/Sections.cpp
+++ b/Sections.cpp
@ -3798,26 +3798,6 @@ bool Sections::printSectionDiv ( Section *sk , char format ) {
 		                   ,mod);
 	}

-	SectionStats *ss = &sk->m_stats;
-
-	// also the value of the inner html hashed
-	if ( sk->m_flags & SEC_HASHXPATH ) {
-		uint32_t val ;
-		val = (uint32_t) sk->m_indirectSentHash64 ;
-		m_sbuf->safePrintf("xpathsitehashval=%"UINT32" ", val );
-	}
-
-	// some voting stats
-	if ( sk->m_flags & SEC_HASHXPATH ) {
-		m_sbuf->safePrintf("_s=M%"INT32"D%"INT32"n%"INT32"u%"INT32"h%"UINT32" "
-		                   ,(int32_t)ss->m_totalMatches
-		                   ,(int32_t)ss->m_totalDocIds
-		                   ,(int32_t)ss->m_totalEntries
-		                   ,(int32_t)ss->m_numUniqueVals
-		                   ,(uint32_t)mod
-		                   );
-	}
-
 	printFlags ( m_sbuf , sk );
 	
 	if ( isHardSection(sk) )
--- a/Sections.h
+++ b/Sections.h
@ -167,10 +167,6 @@ public:
 	// are a sentence section then this points to itself.
 	class Section *m_sentenceSection;

-	// . set in XmlDoc::getSectionsWithDupStats()
-	// . voting info for this section over all indexed pages from this site
-	SectionStats m_stats;
-
 	// position of the first and last alnum word contained directly OR
 	// indirectly in this section. use -1 if no text contained...
 	int32_t m_firstWordPos;
@ -324,9 +320,6 @@ class Sections {
 	bool printSectionDiv ( class Section *sk , char format = FMT_HTML );
 	class SafeBuf *m_sbuf;

-	char *getSectionsReply ( int32_t *size );
-	char *getSectionsVotes ( int32_t *size );
-
 	bool isHardSection ( class Section *sn );

 	bool setMenus ( );
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -89,8 +89,6 @@ XmlDoc::XmlDoc() {
 	m_msg22Request.m_inUse = 0;
 	m_msg4Waiting = false;
 	m_msg4Launched = false;
-	//m_sectiondbData = NULL;
-	//m_placedbData   = NULL;
 	m_dupTrPtr = NULL;
 	m_oldTitleRec = NULL;
 	m_filteredContent = NULL;
@ -98,40 +96,27 @@ XmlDoc::XmlDoc() {
 	m_metaList = NULL;
 	m_metaListSize = 0;
 	m_metaListAllocSize = 0;
-	//m_titleRec = NULL;
-	//m_freeTitleRec = true;
 	m_rootTitleRec = NULL;
 	m_isIndexed = false;
 	m_isInIndex = false;
 	m_wasInIndex = false;
 	m_outlinkHopCountVector = NULL;
-	//m_gsbuf = NULL;
 	m_extraDoc = NULL;
 	m_wikiqbuf = NULL;
-	//m_cr = NULL;

-	//m_notifyBlocked = 0;
-	//m_mcasts = NULL;
-	//for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ )
-	//	m_currentBinPtrs[i] = NULL;
 	reset();
-};
+}

 XmlDoc::~XmlDoc() {
 	setStatus("freeing this xmldoc");
 	reset();
 	m_freed = true;
-};
+}

 static int64_t s_lastTimeStart = 0LL;


 void XmlDoc::reset ( ) {
-
-	m_savedChar = '\0';
-
-
-
 	m_redirUrl.reset();

 	m_updatedMetaData = false;
@ -148,8 +133,6 @@ void XmlDoc::reset ( ) {

 	m_bodyStartPos = 0;

-	m_mcastArray = NULL;
-
 	m_skipIframeExpansion = false;
 	m_indexedTime = 0;

@ -187,19 +170,9 @@ void XmlDoc::reset ( ) {

 	m_allHashed = false;

-
-	// reset this crap
-	m_beginTimeAllMatch = 0LL;
-	m_beginTimeMatchUrl = 0LL;
-	m_beginTimeFullQueries = 0LL;
-	m_beginTimeLinks = 0LL;
-	//m_beginMsg98s = 0LL;
-	m_beginRelatedQueries = 0LL;
-
 	m_doledbKey.n0 = 0LL;
 	m_doledbKey.n1 = 0;

-
 	m_sortedPosdbListBuf.purge();
 	m_termListBuf.purge();

@ -219,9 +192,6 @@ void XmlDoc::reset ( ) {
 	m_domDedupTablePtr = NULL;

 	m_storeTermListInfo = false;
-	m_gotDupStats = false;
-	//m_nextSection = (Section *)-1;
-	m_si = (Section *)-1;

 	// for limiting # of iframe tag expansions
 	m_numExpansions = 0;
@ -273,16 +243,6 @@ void XmlDoc::reset ( ) {
 	// if this is true, then only index if new
 	m_newOnly = 0;

-	//if ( m_sectiondbData ) {
-	//	mfree ( m_sectiondbData , m_sectiondbDataSize ,"sdbdata" );
-	//	m_sectiondbData = NULL;
-	//}
-
-	//if ( m_placedbData ) {
-	//	mfree ( m_placedbData , m_placedbDataSize ,"pdbdata" );
-	//	m_placedbData = NULL;
-	//}
-
 	if ( m_httpReplyValid && m_httpReply ) {
 		mfree(m_httpReply,m_httpReplyAllocSize,"httprep");
 		m_httpReply = NULL;
@ -295,10 +255,6 @@ void XmlDoc::reset ( ) {
 		m_filteredContentAllocSize = 0;
 	}

-	//if ( m_utf8ContentValid && ! m_setFromTitleRec && ptr_utf8Content )
-	//	mfree ( ptr_utf8Content , m_utf8ContentAllocSize,"Xml3");
-
-
 	if ( m_metaList ) { // m_metaListValid && m_metaList ) {
 		mfree ( m_metaList , m_metaListAllocSize , "metalist");
 		m_metaList          = NULL;
@ -307,18 +263,10 @@ void XmlDoc::reset ( ) {
 	}

 	if ( m_ubuf ) {
-		// log("xmldoc: delete m_ubuf=%"PTRFMT" this=%"PTRFMT
-		//     , (PTRTYPE) m_ubuf
-		//     , (PTRTYPE) this
-		//     );
 		mfree ( m_ubuf     , m_ubufAlloc         , "ubuf");
 		m_ubuf = NULL;
 	}

-	//if ( m_freeTitleRec && m_titleRec ) { // && m_titleRecValid ) {
-	//	mfree ( m_titleRec , m_titleRecAllocSize , "trec" );
-	//}
-	//m_titleRec = NULL;
 	m_titleRecBuf.purge();

 	if ( m_dupTrPtr ) {
@ -345,10 +293,6 @@ void XmlDoc::reset ( ) {
 	}
 	m_outlinkHopCountVector = NULL;

-	//if ( m_gsbufValid && m_gsbuf ) {
-	//	mfree ( m_gsbuf , m_gsbufAllocSize , "gsbuf" );
-	//}
-	//m_gsbuf = NULL;
 	m_gsbuf.reset();


@ -359,7 +303,6 @@ void XmlDoc::reset ( ) {

 	m_hashedMetas = false;

-	m_mcastBuf.purge();
 	m_serpBuf.purge();

 	// Doc.cpp:
@ -376,11 +319,6 @@ void XmlDoc::reset ( ) {
 	m_bits2.reset();
 	m_pos.reset();
 	m_synBuf.reset();
-	m_turkVotingTable.reset();
-	m_turkBitsTable.reset();
-	m_vtr.reset();
-	m_vctab.reset();
-	m_vcduptab.reset();
 	m_images.reset();
 	m_countTable.reset();
 	m_mime.reset();
@ -466,10 +404,6 @@ void XmlDoc::reset ( ) {
 	m_launchedSpecialMsg8a     = false;
 	m_launchedMsg8a2           = false;

-	m_numSectiondbReads        = 0;
-	m_numSectiondbNeeds        = 0;
-	m_sectiondbRecall          = 0;
-
 	m_setTr                    = false;
 	m_triedTagRec              = false;
 	m_didGatewayPage           = false;
@ -4843,606 +4777,6 @@ Sections *XmlDoc::getSections ( ) {
 	return &m_sections;
 }

-// . scan every section and look up its tag and content hashes in
-//   sectiondb to find out how many pages and sites have the same hash
-// . use the secondary sectiondb key, key2
-// . then store the stats in the Sections::m_stats class
-Sections *XmlDoc::getSectionsWithDupStats ( ) {
-
-	Sections *ss = getSections();
-	if ( !ss ||ss==(Sections *)-1) return(Sections *)ss;
-
-	if ( m_gotDupStats ) return ss;
-
-	int32_t *sh32 = getSiteHash32();
-	if ( ! sh32 || sh32 == (int32_t *)-1 ) return (Sections *)sh32;
-	uint32_t siteHash32 = (uint32_t)*sh32;
-
-	// if this is -1, we are called for the first time
-	if ( m_si == (void *)-1 ) {
-		m_si  = ss->m_rootSection;
-		m_mcastRequestsIn = 0;
-		m_mcastRequestsOut = 0;
-		m_secStatsErrno = 0;
-	}
-
-	for ( ; m_si ; m_si = m_si->m_next ) {
-		// breathe
-		QUICKPOLL(m_niceness);
-
-		// don't bother with the section if it doesn't have this set
-		// because this eliminates parent dupage to reduce amount
-		// of gbxpathsitehash123456 terms we index.
-		if ( ! ( m_si->m_flags & SEC_HASHXPATH ) )
-			continue;
-
-		// get hash of sentences this tag contains indirectly
-		uint32_t val32 = (uint32_t)m_si->m_indirectSentHash64;
-		if ( ! val32 )
-			continue;
-
-		// get section xpath hash combined with sitehash
-		uint32_t secHash32 = m_si->m_turkTagHash32 ^ siteHash32;
-
-		// convert this to 32 bits
-		uint32_t innerHash32 ;
-		//sentHash32 = (uint32_t)m_si->m_sentenceContentHash64;
-		innerHash32 = (uint32_t)m_si->m_indirectSentHash64;
-
-		// save in case we need to read more than 5MB
-		//m_lastSection = si;
-		// . does a gbfacets:gbxpathsitehashxxxxxx query on secHash32
-		// . we hack the "sentContentHash32" into each posdb key
-		//   as the "value" so we can do a facet-like histogram
-		//   over all the possible values this xpath has for this site
-		SectionStats *stats = getSectionStats ( secHash32,
-							innerHash32,
-							false ); // cache only?
-		// it returns -1 if would block
-		if ( stats == (void *)-1 ) {
-			// count it as outstanding
-			//m_mcastRequestsOut++;
-			// launch more if we have room
-			// UdpServer.cpp has a limit of 10 on 0x39 requests
-			if ( m_mcastRequestsOut - m_mcastRequestsIn < 10)
-				continue;
-			// advance m_si so we do not repeat
-			m_si = m_si->m_next;
-			// otherwise, return -1 to indicate blocked
-			return (Sections *)-1;
-		}
-		// NULL means g_errno
-		if ( ! stats ) {
-			// ensure g_errno is set
-			if ( ! g_errno ) { char *xx=NULL;*xx=0; }
-			// save it
-			m_secStatsErrno = g_errno;
-			// clear it
-			g_errno = 0;
-			// if still waiting though return -1
-			if ( m_mcastRequestsOut > m_mcastRequestsIn )
-				return (Sections *)-1;
-			// otherwise, all done i guess
-			return NULL;
-		}
-		// if already in the table, skip it!
-	}
-
-	// waiting for more replies to come back?
-	if ( m_mcastRequestsOut > m_mcastRequestsIn )
-		return (Sections *) -1;
-
-	// now scan the sections and copy the stats from the table
-	// into Section::m_stats of each sentence section.
-	// use the key hash as the the hash of the tag/xpath and the innerhtml
-	// and the val instead of being site hash will be hash of the
-	// content. then we can get the histogram of our content hash
-	// for this xpath on our site.
-	Section *si = ss->m_rootSection;
-	for ( ; si ; si = si->m_next ) {
-		// breathe
-		QUICKPOLL(m_niceness);
-		// skip if no content to hash
-		//if ( ! si->m_sentenceContentHash64 ) continue;
-
-		// don't bother with the section if it doesn't have this set
-		// because this eliminates parent dupage to reduce amount
-		// of gbxpathsitehash123456 terms we index
-		if ( ! ( si->m_flags & SEC_HASHXPATH ) )
-			continue;
-
-		// skip if sentence, only hash tags now i guess for diffbot
-		//if ( si->m_sentenceContentHash64 )
-		//	continue;
-
-		// get hash of sentences this tag contains indirectly
-		uint32_t val32 = (uint32_t)si->m_indirectSentHash64;
-		if ( ! val32 )
-			continue;
-
-		// skip if menu!
-		//if ( si->m_flags & menuFlags ) continue;
-
-
-		// get section xpath hash combined with sitehash
-		uint32_t secHash32 = si->m_turkTagHash32 ^ siteHash32;
-
-		// convert this to 32 bits
-		uint32_t innerHash32 ;
-		innerHash32 = (uint32_t)si->m_indirectSentHash64;
-
-		// the "stats" class should be in the table from
-		// the lookups above!!
-		SectionStats *stats = getSectionStats ( secHash32,
-							innerHash32,
-							true ); // cache only?
-		// sanity
-		//if ( ! stats || stats == (void *)-1 ) { char *xx=NULL;*xx=0;}
-		// must have had a network error or something
-		if ( ! stats ) continue;
-		// copy
-		gbmemcpy ( &si->m_stats , stats, sizeof(SectionStats) );
-	}
-
-	//
-	// now if a section has no stats but has the same
-	// m_indirectSentHash64 as a kid, take his stats
-	//
-	Section *sx = ss->m_rootSection;
-	for ( ; sx ; sx = sx->m_next ) {
-		// breathe
-		QUICKPOLL(m_niceness);
-		// don't bother with the section if it doesn't have this set
-		// because this eliminates parent dupage to reduce amount
-		// of gbxpathsitehash123456 terms we index
-		if ( ! ( sx->m_flags & SEC_HASHXPATH ) )
-			continue;
-		// scan up parents and set their stats to ours as int32_t as
-		// they have the same indirect sent hash64
-		Section *p = sx->m_parent;
-		for ( ; p ; p = p->m_parent ) {
-
-			// if parent is like an img tag, skip it
-			if ( p->m_tagId == TAG_IMG )
-				continue;
-
-			if ( p ->m_indirectSentHash64 !=
-			     sx->m_indirectSentHash64 )
-				break;
-
-			// copy it to parent with the same inner html hash
-			gbmemcpy (&p->m_stats,&sx->m_stats,sizeof(SectionStats));
-		}
-	}
-
-	// now free the table's mem
-	m_sectionStatsTable.reset();
-
-	m_gotDupStats = true;
-	return ss;
-}
-
-
-static void gotReplyWrapper39 ( void *state1 , void *state2 ) {
-	//XmlDoc *THIS = (XmlDoc *)state;
-	XmlDoc *THIS = (XmlDoc *)state1;
-	Multicast *mcast = (Multicast *)state2;
-	THIS->gotSectionFacets ( mcast );
-	// this will end up calling getSectionsWithDupStats() again
-	// which will call getSectionStats() some more on new sections
-	// until m_gotDupStats is set to true.
-	THIS->m_masterLoop ( THIS->m_masterState );
-}
-
-
-
-
-
-// . launch a single msg3a::getDocIds() for a section hash, secHash32
-SectionStats *XmlDoc::getSectionStats ( uint32_t secHash32 ,
-					uint32_t innerHash32 ,
-					bool cacheOnly ) {
-
-	// init cache?
-	if ( m_sectionStatsTable.m_numSlots == 0 &&
-	     ! m_sectionStatsTable.set(4,
-				       sizeof(SectionStats),
-				       32,
-				       NULL,
-				       0,
-				       false,
-				       m_niceness,
-				       "secstatsch"))
-		return NULL;
-
-	// check in cache...
-	SectionStats *stats ;
-	stats = (SectionStats *)m_sectionStatsTable.getValue ( &secHash32 );
-	// if there, return it
-	if ( stats ) return stats;
-
-	// if cache only do not launch
-	if ( cacheOnly ) return NULL;
-
-	//
-	// TODO: shard gbxpathsitehashxxxxx by termid
-	// and make sure msg3a only sends to that single shard and sends
-	// the stats back. should make us much faster to sectionize
-	// a web page. but for now try without it...
-	//
-
-	//int32_t *sh32 = getSiteHash32();
-	//if ( ! sh32 || sh32 == (int32_t *)-1 ) return (SectionStats *)sh32;
-
-	int32_t maxOut = 32;
-
-	// . need to make new msg39Request and a new Multicast arrays
-	// . only need multicast since these gbfacetstr:gbxpathsitehash123456
-	//   terms are sharded by termid, otherwise we'd have to use msg3a
-	if ( ! m_mcastArray ) {
-		// how much mem to alloc?
-		int32_t need = 0;
-		need += sizeof(Multicast);
-		need += sizeof(Msg39Request);
-		// query buf str
-		need += 100;
-		need *= maxOut;
-		// a single query now to be shared
-		//need += sizeof(Query);
-		// just in case we are being re-used
-		m_mcastBuf.reset();
-		// alloc space
-		if ( ! m_mcastBuf.reserve(need) ) return NULL;
-		// point to buf
-		char *p = m_mcastBuf.getBufStart();
-		// set them up
-		m_mcastArray = (Multicast *)p;
-		p += sizeof(Multicast) * maxOut;
-		m_msg39RequestArray = (Msg39Request *)p;
-		p += sizeof(Msg39Request) * maxOut;
-		//m_queryArray = (Query *)p;
-		//p += sizeof(Query) * maxOut;
-		//m_sharedQuery = (Query *)p;
-		//p += sizeof(Query);
-		// for holding the query string
-		// assume query will not exceed 100 bytes incuding \0
-		m_queryBuf = p;
-		p += 100 * maxOut;
-		// initialize all!
-		for ( int32_t i = 0 ; i < maxOut ; i++ ) {
-			m_mcastArray       [i].constructor();
-			m_msg39RequestArray[i].reset();//constructor();
-			//m_queryArray       [i].constructor();
-			m_queryBuf[100*i] = '\0';
-			//m_inUse[i] = 0;
-		}
-	}
-
-	// get first available
-	int32_t i;
-	for ( i = 0 ; i < maxOut ; i++ )
-		if ( ! m_mcastArray[i].m_inUse ) break;
-
-	// wtf?
-	if ( i >= maxOut ) { char *xx=NULL;*xx=0; }
-
-	// and our vehicle
-	Multicast *mcast = &m_mcastArray[i];
-
-	// mark as in use up here in case we quickpoll into this same code?!
-	// yeah, i guess set2() calls quickpoll?
-	//mcast->m_inUse = 1;
-
-	// save this for reply
-	//mcast->m_hack = this;
-
-	char *qbuf = m_queryBuf + 100 * i;
-
-	// . hash this special term (was gbsectionhash)
-	// . the wordbits etc will be a number though, the hash of the content
-	//   of the xpath, the inner html hash
-	// . preceeding this term with gbfacet: will make gigablast return
-	//   the statistics for all the values in the posdb keys of this
-	//   termlist, which happen to be innerHTML hashes for all pages
-	//   with this same xpath and on this same site.
-	sprintf(qbuf,"gbfacetstr:gbxpathsitehash%"UINT32"",
-		(uint32_t)secHash32);
-
-	CollectionRec *cr = getCollRec();
-	if ( ! cr ) return NULL;
-
-	// set the msg39 request
-	Msg39Request *r = &m_msg39RequestArray[i];
-
-	// reset all to defaults
-	r->reset();
-
-	//r-> ptr_coll             = cr->m_coll;
-	//r->size_coll             = gbstrlen(cr->m_coll)+1;
-	r->m_collnum = cr->m_collnum;
-	r->m_maxAge              = 60; // cache timeout?
-	r->m_addToCache          = true;
-	r->m_docsToGet           = 0; // just calc stats
-	r->m_niceness            = m_niceness;
-	r->m_debug               = 0;
-	r->m_doSiteClustering    = false;
-	//r->m_doIpClustering      = false;
-	r->m_doDupContentRemoval = false;
-	r->m_boolFlag            = 2;
-	r->m_familyFilter        = 0;
-	r->m_language            = 0;
-	r->ptr_query             = qbuf;//m_sectionHashQueryBuf;
-	r->size_query            = gbstrlen(r->ptr_query)+1;
-	r->m_timeout             = 3600*1000; //todo: do we really want to wait an hour for this?
-	r->m_maxQueryTerms       = 10;
-
-	// how much of each termlist to read in bytes
-	int32_t readList     = 10000;
-	r-> ptr_readSizes = (char *)&readList;
-	r->size_readSizes = 4;
-
-	// term freqs
-	float tfw = 1.0;
-	r-> ptr_termFreqWeights = (char *)&tfw;
-	r->size_termFreqWeights = 4;
-
-	// speed it up some with this flag
-	r->m_forSectionStats = true;
-
-	// only do a single read of docids... do not split up
-	r->m_numDocIdSplits  = 1;
-
-	// 1 query term
-	r->m_nqt = 1;
-
-	///////////////////////
-	//
-	// this tells msg3a/msg39/posdbtable its a hack! no need to do this
-	// because it's implied by the query.
-	// BUT REALLY let's eliminate this and just make our queries like
-	// gbfacet:gbxpathsitehash1234567 where 1234567 is the hash of
-	// the section's xpath with the site. the values of that term in
-	// the posdb key will be 32-bit hashes of the innerHtml for such
-	// sections from all pages with the same xpath on the same site.
-	// so no need for this now, comment out.
-	//
-	//r->m_getFacetStats     = true;
-	//
-	/////////////////////////
-
-
-	// we need to know what site is the base site so the section stats
-	// can set m_onSiteDocIds and m_offSiteDocIds correctly
-	//r->m_siteHash32          = *sh32;
-
-	// . now we use the hash of the innerHtml of the xpath
-	// . this is our value for the facet field of gbxpathsitehash12345678
-	//   which is the hash of the innerHTML for that xpath on this site.
-	//   12345678 is the hash of the xpath and the site.
-	//r->m_myFacetVal32 = sentHash32;
-
-
-	//Query *qq = &m_queryArray[i];
-	// set query for msg3a. queryExpansion=false
-	//qq->set2 ( r->ptr_query , langUnknown , false );
-
-	Query qq;
-	qq.set2 ( r->ptr_query , langUnknown , false );
-
-	// TODO: ensure this just hits the one host since it is sharded
-	// by termid...
-
-	// what shard owns this termlist. we shard these
-	// gbfacetstr:gbxpathsitehash123456 terms by termid.
-	int64_t termId = qq.getTermId(0);
-	int32_t shardNum = getShardNumFromTermId ( termId );
-
-	// hack in our inner html content hash for this xpath
-	mcast->m_hack32 = innerHash32;
-	mcast->m_hack64 = secHash32;
-
-	// malloc and store the request. mcast will free it when done.
-	int32_t reqSize;
-	char *req = serializeMsg ( sizeof(Msg39Request),
-				   &r->size_readSizes,
-				   &r->size_whiteList,
-				   &r->ptr_readSizes,
-				   r,
-				   &reqSize,
-				   NULL,
-				   0,
-				   false);
-
-	// . send out a msg39 request to each shard
-	// . multicasts to a host in group "groupId"
-	// . we always block waiting for the reply with a multicast
-	// . returns false and sets g_errno on error
-	// . sends the request to fastest host in group "groupId"
-	// . if that host takes more than about 5 secs then sends to
-	//   next host
-	// . key should be largest termId in group we're sending to
-	bool status;
-	status = mcast->send ( req               , // m_rbufPtr         ,
-			       reqSize           , // request size
-			       0x39              , // msgType 0x39
-			       true              , // mcast owns m_request?
-			       shardNum          , // group to send to
-			       false             , // send to whole group?
-			       0,//(int32_t)qh          , // 0 // startKey.n1
-			       this              , // state1 data
-			       mcast             , // state2 data
-			       gotReplyWrapper39 ,
-			       multicast_xmldoc_sectionstats_timeout, //timeout
-			       m_niceness,//m_r->m_niceness   ,
-			       -1, // firstHostId, // -1// bestHandlingHostId ,
-			       NULL              , // m_replyBuf   ,
-			       0                 , // MSG39REPLYSIZE,
-			       // this is true if multicast should free the
-			       // reply, otherwise caller is responsible
-			       // for freeing it after calling
-			       // getBestReply().
-			       // actually, this should always be false,
-			       // there is a bug in Multicast.cpp.
-			       // no, if we error out and never steal
-			       // the buffers then they will go unfreed
-			       // so they are freed by multicast by default
-			       // then we steal control explicitly
-			       true             );
-
-	m_mcastRequestsOut++;
-
-	// if successfully launch, wait...
-	if ( status ) return (SectionStats *) -1;
-
-	// error?
-	if ( g_errno ) return NULL;//{ mcast->m_inUse = 0; return NULL; }
-
-	// sets &m_sectionStats and adds to the table
-	gotSectionFacets ( mcast );
-
-	// i guess did not block...
-	//return &msg3a->m_sectionStats;
-	return &m_sectionStats;
-}
-
-
-// . come here when msg39 got the ptr_faceHashList for our single
-//   gbfacet:gbxpathsitehash
-// . returns false and sets g_errno on error
-bool XmlDoc::gotSectionFacets ( Multicast *mcast ) {
-	//SectionStats *stats = &msg39->m_sectionStats;
-
-	if ( mcast->m_inUse ) { char *xx=NULL;*xx=0;}
-
-	// count it as returned
-	m_mcastRequestsIn++;
-	// mark it as available now
-	int32_t num = mcast - m_mcastArray;
-	// sanity
-	//if ( ! msg39->m_inUse ) { char *xx=NULL;*xx=0; }
-
-	// grab the xpath/site hash
-	uint32_t secHash32 = mcast->m_hack64;
-
-	// and our innher html for that xpath
-	int32_t myFacetVal32 = mcast->m_hack32;
-
-	// sanity. should only be a gbfacet:gbxpathsitehash12345567 term.
-	//if ( mcast->m_q->m_numTerms != 1 ) { char *xx=NULL;*xx=0; }
-
-	// reset all counts to 0
-	m_sectionStats.reset();
-
-	//////
-	//
-	// compile m_sectionStats
-	//
-	///////
-
-	// set m_sectionStats from the list of facet values for this
-	// gbfacet:xpathsitehash term...
-	// Query::m_queryTerm.m_facetHashTable has the facets merged
-	// from all the shards. so now compute the stats from them.
-	// set the section stats.
-	//QueryTerm *qt = &msg3a->m_q->m_qterms[0];
-	//HashTableX *ft = &qt->m_facetHashTable;
-
-	// . get the list of facet field/value pairs.
-	// . see how Msg3a.cpp merges these to see how they are stored
-	Msg39Reply *mr = (Msg39Reply *)mcast->m_readBuf;//getBestReply();
-
-	// this is NULL with g_errno set on error
-	if ( ! mr ) {
-		log("xmldoc: got error from sec stats mcast: %s",
-		    mstrerror(g_errno));
-		return false;
-	}
-
-	deserializeMsg ( sizeof(Msg39Reply) ,
-			 &mr->size_docIds,
-			 &mr->size_clusterRecs,
-			 &mr->ptr_docIds,
-			 mr->m_buf );
-
-	char *p = (char *)(mr->ptr_facetHashList);
-	//char *pfinal = p + mr->size_facetHashList;
-
-	//
-	// should only be one termid of facets in here, so no need to re-loop
-	//
-	int32_t nh = 0;
-	// "matches" is how many docids with this facet field had our facet val
-	int32_t matches = 0;
-	// "totalDocIds" is how many docids had this facet field
-	int32_t totalFields = 0;
-
-	if ( p ) {
-		// first is the termid
-		//int64_t termId = *(int64_t *)p;
-		// skip that
-		p += 8;
-		// the # of unique 32-bit facet values
-		nh = *(int32_t *)p;
-		p += 4;
-		// the end point
-		char *pend = p + (8 * nh);
-		// now compile the facet hash list into there
-		for ( ; p < pend ; ) {
-			// does this facet value match ours?
-			// (i.e. same inner html?)
-			if ( *(int32_t *)p == myFacetVal32 )
-				matches += *(int32_t *)(p+4);
-			p += 4;
-			// now how many docids had this facet value?
-			totalFields += *(int32_t *)p;
-			p += 4;
-		}
-	}
-
-	// how many unique inner html content hashes for this xpath/site
-	// hash were there?
-	m_sectionStats.m_numUniqueVals = nh;//ft->m_numSlotsUsed;
-
-	// how many xpaths existsed over all docs. doc can have multiple.
-	m_sectionStats.m_totalEntries = totalFields;
-
-	// total # unique docids that had this facet
-	m_sectionStats.m_totalDocIds = mr->m_estimatedHits;//totalHits;
-
-	// how many had the same inner html content hash for
-	// this xpath/site as we did?
-	m_sectionStats.m_totalMatches = matches;
-
-	////////
-	//
-	// store m_sectionStats in cache
-	//
-	////////
-
-	// cache them. this does a copy of m_sectionStats
-	if ( ! m_sectionStatsTable.addKey ( &secHash32 , &m_sectionStats ) )
-		log("xmldoc: failed to add sections stats: %s",
-		    mstrerror(g_errno));
-
-	// reset that msg39 to free its data
-	//msg39->reset();
-
-	if ( mcast != &m_mcastArray[num] ) { char *xx=NULL;*xx=0; }
-
-	// . make it available again
-	// . do this after all in case we were in quickpoll interruptting
-	//   the getSectionStats() function below
-	//mcast->m_inUse = 0;
-
-	// free query Query::m_qwords array etc. to stop mem leaks
-	m_mcastArray       [num].reset();
-	m_msg39RequestArray[num].reset();
-	//m_queryArray       [num].reset();
-	// now when the master loop calls getSectionsWithDupStats() it
-	// should find the stats class in the cache!
-	return true;
-}
-
 int32_t *XmlDoc::getLinkSiteHashes ( ) {
 	if( g_conf.m_logTraceXmlDoc ) log(LOG_TRACE,"%s:%s:%d: BEGIN", __FILE__, __func__, __LINE__);
 		
@ -20338,15 +19672,6 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
 	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
 	if ( ! cr ) { g_errno = ENOCOLLREC; return NULL; }

-
-	//CollectionRec *cr = getCollRec();
-	//if ( ! cr ) return NULL;
-
-	// set this important member var
-	//if (!cr ) cr=g_collectiondb.getRec(cr->m_coll,gbstrlen(cr->m_coll));
-	// return NULL with g_errno set on error
-	//if ( ! cr ) return NULL;
-
 	// . cache it for one hour
 	// . this will set our ptr_ and size_ member vars
 	char **otr = getOldTitleRec ( );
@ -22760,9 +22085,6 @@ bool XmlDoc::printDoc ( SafeBuf *sb ) {

 	printRainbowSections ( sb , NULL );

-	// cut it short for debugging
-	logf(LOG_DEBUG,"xmldoc: FIX ME remove return");
-
 	//
 	// PRINT LINKINFO
 	//
@ -23645,11 +22967,7 @@ bool XmlDoc::printRainbowSections ( SafeBuf *sb , HttpRequest *hr ) {
 	//
 	// PRINT SECTIONS
 	//
-	Sections *sections ;
-	// hr is NULL if being called from page parser which does not have the
-	// dup stats! and we core if we block here!
-	if ( hr ) sections = getSectionsWithDupStats();
-	else      sections = getSections();
+	Sections *sections = getSections();
 	if ( ! sections) return true;if (sections==(Sections *)-1)return false;

 	Words *words = getWords();
@ -23661,18 +22979,14 @@ bool XmlDoc::printRainbowSections ( SafeBuf *sb , HttpRequest *hr ) {


 	int32_t nw = words->getNumWords();
-	//int32_t wordStart = 0;
-	//int32_t wordEnd = nw;
 	int64_t *wids = words->getWordIds();

 	int32_t isXml = false;
 	if ( hr ) isXml = (bool)hr->getLong("xml",0);

-	//if ( ! isXml ) printMenu ( sb );
-
 	// now complement, cuz bigger is better in the ranking world
-	//int32_t densityRank = getDensityRank ( wids , 0 , nw , HASHGROUP_BODY );
 	SafeBuf densBuf;
+
 	// returns false and sets g_errno on error
 	if ( ! getDensityRanks((int64_t *)wids,
 			       nw,
@ -23683,20 +22997,6 @@ bool XmlDoc::printRainbowSections ( SafeBuf *sb , HttpRequest *hr ) {
 		return true;
 	// a handy ptr
 	char *densityVec = (char *)densBuf.getBufStart();
-
-
-	/*
-	if ( ! isXml )
-		sb->safePrintf("<br><b>density rank of body = %"INT32"</b> "
-			       "(out of %"INT32")"
-			       "<br>"
-			       "<br>"
-			       , densityRank
-			       , (int32_t)MAXDENSITYRANK
-			       );
-	*/
-
-
 	char *wordSpamVec = getWordSpamVec();
 	char *fragVec = m_fragBuf.getBufStart();

@ -23704,18 +23004,13 @@ bool XmlDoc::printRainbowSections ( SafeBuf *sb , HttpRequest *hr ) {
 	if(!getDiversityVec(words,phrases,cnt,&dwbuf,m_niceness))return true;
 	char *diversityVec = dwbuf.getBufStart();

-	// hack fack debug
-	//m_bodyStartPos =2136;
-
 	SafeBuf wpos;
 	if ( ! getWordPosVec ( words ,
 			       sections,
-			       //wordStart,
-			       //wordEnd,
 			       // we save this in the titlerec, when we
 			       // start hashing the body. we have the url
 			       // terms before the body, so this is necessary.
-			       m_bodyStartPos,//0, // hi->m_startDist,
+			       m_bodyStartPos,
 			       fragVec,
 			       m_niceness,
 			       &wpos) ) return true;
--- a/XmlDoc.h
+++ b/XmlDoc.h
@ -401,16 +401,9 @@ public:
 	class Bits *getBitsForSummary ( ) ;
 	class Pos *getPos ( );
 	class Phrases *getPhrases ( ) ;
-	//class Synonyms *getSynonyms ( );
 	class Sections *getExplicitSections ( ) ;
 	class Sections *getImpliedSections ( ) ;
 	class Sections *getSections ( ) ;
-	class Sections *getSectionsWithDupStats ( );
-//BR 20160106 removed:	class SafeBuf  *getInlineSectionVotingBuf();
-	bool gotSectionFacets( class Multicast *mcast );
-	class SectionStats *getSectionStats ( uint32_t secHash32, uint32_t sentHash32, bool cacheOnly );
-	char **getSectionsReply ( ) ;
-	char **getSectionsVotes ( ) ;
 	int32_t *getLinkSiteHashes ( );
 	class Links *getLinks ( bool doQuickSet = false ) ;
 	class HashTableX *getCountTable ( ) ;
@ -591,10 +584,6 @@ public:

 	char *addOutlinkSpiderRecsToMetaList ( );

-	//bool addTable96 ( class HashTableX *tt1     , 
-	//		  int32_t       date1   ,
-	//		  bool       nosplit ) ;
-
 	int32_t getSiteRank ();
 	bool addTable144 ( class HashTableX *tt1 , 
 			   int64_t docId ,
@ -602,11 +591,6 @@ public:

 	bool addTable224 ( HashTableX *tt1 ) ;

-	//bool addTableDate ( class HashTableX *tt1     , //T<key128_t,char> *tt1
-	//                           uint64_t    docId   ,
-	//                           uint8_t     rdbId   ,
-	//                           bool        nosplit ) ;
-
 	bool addTable128 ( class HashTableX *tt1     , // T <key128_t,char>*tt1
                           uint8_t     rdbId   ,
 			   bool        forDelete ) ;
@ -627,10 +611,7 @@ public:
 	bool hashUrl ( class HashTableX *table, bool urlOnly );
 	bool hashDateNumbers ( class HashTableX *tt );
 	bool hashSections ( class HashTableX *table ) ;
-	bool hashIncomingLinkText ( class HashTableX *table            ,
-				    bool       hashAnomalies    ,
-                                    bool       hashNonAnomalies ) ;
-
+	bool hashIncomingLinkText( class HashTableX *table, bool hashAnomalies, bool hashNonAnomalies );
 	bool hashLinksForLinkdb ( class HashTableX *table ) ;
 	bool hashNeighborhoods ( class HashTableX *table ) ;
 	bool hashRSSInfo ( class HashTableX *table ) ;
@ -648,11 +629,8 @@ public:
 	bool hashTagRec ( class HashTableX *table ) ;
 	bool hashPermalink ( class HashTableX *table ) ;
 	bool hashVectors(class HashTableX *table ) ;
-// BR 20160106 removed:	bool hashAds(class HashTableX *table ) ;
 	
 	class Url *getBaseUrl ( ) ;
-// BR 20160106 removed:	bool hashSubmitUrls ( class HashTableX *table ) ;
-// BR 20160106 removed:	bool hashImageStuff ( class HashTableX *table ) ;
 	bool hashIsAdult    ( class HashTableX *table ) ;

 	void set20 ( Msg20Request *req ) ;
@ -672,59 +650,21 @@ public:
 	char *getIsErrorPage ( ) ;
 	char* matchErrorMsg(char* p, char* pend );

-	bool hashWords  ( //int32_t            wordStart ,
-			  //int32_t            wordEnd   ,
-			  class HashInfo *hi        ) ;
-	bool hashSingleTerm ( int64_t       termId , 
-			      class HashInfo *hi     ) ;
-	bool hashSingleTerm ( char            *s    ,
-			      int32_t             slen ,
-			      class HashInfo  *hi   );
-	bool hashString ( class HashTableX *ht   ,
-			  //class Weights    *we   ,
-			  class Bits       *bits ,
-			  char             *s    ,
-			  int32_t              slen ) ;
-	bool hashString ( char             *s    ,
-			  int32_t              slen ,
-			  class HashInfo   *hi   ) ;
-	bool hashString ( char             *s    ,
-			  class HashInfo   *hi   ) ;
+	bool hashWords( class HashInfo *hi );
+	bool hashSingleTerm( int64_t termId, class HashInfo *hi );
+	bool hashSingleTerm( char *s, int32_t slen, class HashInfo *hi );
+	bool hashString( class HashTableX *ht, class Bits *bits, char *s, int32_t slen );
+	bool hashString( char *s, int32_t slen, class HashInfo *hi );
+	bool hashString( char *s, class HashInfo *hi );

+	bool hashWords3( class HashInfo *hi, class Words *words, class Phrases *phrases, class Synonyms *synonyms,
+					 class Sections *sections, class HashTableX *countTable, char *fragVec, char *wordSpamVec,
+					 char *langVec, char docLangId, class SafeBuf *pbuf, class HashTableX *wts,
+					 class SafeBuf *wbuf, int32_t niceness );

-
-	bool hashWords3 ( //int32_t              wordStart     ,
-			  //int32_t              wordEnd       ,
-			  class HashInfo   *hi            ,
-			  class Words      *words         , 
-			  class Phrases    *phrases       , 
-			  class Synonyms   *synonyms      , 
-			  class Sections   *sections      ,
-			  class HashTableX *countTable    ,
-			  char *fragVec ,
-			  char *wordSpamVec ,
-			  char *langVec ,
-			  char  docLangId , // default lang id
-			  class SafeBuf    *pbuf          ,
-			  class HashTableX *wts           ,
-			  class SafeBuf    *wbuf          ,
-			  int32_t              niceness      );
-	
-	bool hashString3 ( char             *s              ,
-			  int32_t              slen           ,
-			  class HashInfo   *hi             ,
-			  class HashTableX *countTable     ,
-			  class SafeBuf    *pbuf           ,
-			  class HashTableX *wts            ,
-			  class SafeBuf    *wbuf           ,
-			  int32_t              version        ,
-			  int32_t              siteNumInlinks ,
-			  int32_t              niceness       );
-
-
-	//bool hashSectionTerm ( char *term , 
-	//		       class HashInfo *hi , 
-	//		       int32_t sentHash32 ) ;
+	bool hashString3( char *s, int32_t slen, class HashInfo *hi, class HashTableX *countTable,
+					  class SafeBuf *pbuf, class HashTableX *wts, class SafeBuf *wbuf, int32_t version,
+					  int32_t siteNumInlinks, int32_t niceness );

 	bool hashFacet1 ( char *term, class Words *words , HashTableX *dt) ;

@ -782,16 +722,12 @@ public:
 public:

 	// stuff set from the key of the titleRec, above the compression area
-	//key_t     m_key;
 	int64_t m_docId;

 	char     *m_ubuf;
 	int32_t      m_ubufSize;
 	int32_t      m_ubufAlloc;

-	// does this page link to gigablast, or has a search form to it?
-	//bool searchboxToGigablast();
-
 	// private:

 	// we we started spidering it, in milliseconds since the epoch
@ -806,16 +742,6 @@ public:
 	int64_t    m_setTime;
 	int64_t    m_cpuSummaryStartTime;

-	// timers
-	int64_t m_beginSEOTime;
-	int64_t m_beginTimeAllMatch;
-	int64_t m_beginTimeMatchUrl;
-	int64_t m_beginTimeFullQueries;
-	int64_t m_beginTimeLinks;
-	//int64_t m_beginMsg98s;
-	int64_t m_beginRelatedQueries;
-	int64_t m_beginMsg95s;
-
 	// . these should all be set using set*() function calls so their
 	//   individual validity flags can bet set to true, and successive
 	//   calls to their corresponding get*() functions will not core
@ -836,8 +762,6 @@ public:
 	int64_t  m_firstUrlHash64;
 	Url        m_currentUrl;

-	//char      *m_coll;
-	//char       m_collBuf[MAX_COLL_LEN+1]; // include \0
 	CollectionRec *m_lastcr;
 	collnum_t      m_collnum;
 	int32_t           m_lastCollRecResetCount;
@ -871,88 +795,24 @@ public:
 	Bits       m_bits2;
 	Pos        m_pos;
 	Phrases    m_phrases;
-	//Synonyms   m_synonyms;
 	SafeBuf    m_synBuf;
-	//Weights    m_weights;
 	Sections   m_sections;

-	// a hack storage thing used by Msg13.cpp
-	class Msg13Request *m_hsr;
-
-	Section *m_si;
-	//Section *m_nextSection;
-	//Section *m_lastSection;
-	int32_t m_mcastRequestsOut;
-	int32_t m_mcastRequestsIn;
-	int32_t m_secStatsErrno;
-	char *m_queryBuf;
-	Msg39Request *m_msg39RequestArray;
-	SafeBuf m_mcastBuf;
-	Multicast *m_mcastArray;
-	//char  *m_inUse;
-	//Query *m_queryArray;
-	//Query *m_sharedQuery;
-	bool     m_gotDupStats;
-	//Query    m_q4;
-	//Msg3a    m_msg3a;
-	//Msg39Request m_r39;
-	Msg39Request m_mr2;
-	SectionStats m_sectionStats;
-	HashTableX m_sectionStatsTable;
-	//char m_sectionHashQueryBuf[128];
-
-	// also set in getSections()
-	int32_t       m_maxVotesForDup;
-
 	// . for rebuild logging of what's changed
 	// . Repair.cpp sets these based on titlerec
 	char m_logLangId;
 	int32_t m_logSiteNumInlinks;

-	int32_t m_numSectiondbReads;
-	int32_t m_numSectiondbNeeds;
-	key128_t m_sectiondbStartKey;
-	RdbList m_secdbList;
-	int32_t m_sectiondbRecall;
-
 	bool m_gotFacets;
 	SafeBuf m_tmpBuf2;

-	SafeBuf m_inlineSectionVotingBuf;
-
-	//HashTableX m_rvt;
-	//Msg17 m_msg17;
-	//char *m_cachedRootVoteRec;
-	//int32_t  m_cachedRootVoteRecSize;
-	//bool  m_triedVoteCache;
-	//bool  m_storedVoteCache;
-	//SafeBuf m_cacheRecBuf;
-
 	SafeBuf m_timeAxisUrl;

-	HashTableX m_turkVotingTable;
-	HashTableX m_turkBitsTable;
-	uint32_t m_confirmedTitleContentHash ;
-	uint32_t m_confirmedTitleTagHash     ;
-
-	// turk voting tag rec
-	TagRec m_vtr;
-	// tagrec of banned turks
-	TagRec m_bannedTurkRec;
-	// and the table of the hashed banned turk users
-	HashTableX m_turkBanTable;
-
-	// used for displaying turk votes...
-	HashTableX m_vctab;
-	HashTableX m_vcduptab;
-
 	Images     m_images;
 	HashTableX m_countTable;
 	HttpMime   m_mime;
 	TagRec     m_tagRec;
 	SafeBuf    m_tagRecBuf;
-	// copy of m_oldTagRec but with our modifications, if any
-	//TagRec     m_newTagRec;
 	SafeBuf    m_newTagBuf;
 	SafeBuf    m_fragBuf;
 	SafeBuf    m_wordSpamBuf;
@ -962,9 +822,6 @@ public:
 	class SafeBuf     *m_savedSb;
 	class HttpRequest *m_savedHr;

-	char m_savedChar;
-
-
 	// validity flags. on reset() all these are set to false.
 	char     m_VALIDSTART;
 	// DO NOT add validity flags above this line!
@ -992,7 +849,6 @@ public:
 	char     m_filteredRootTitleBufValid;
 	char     m_titleBufValid;
 	char     m_fragBufValid;
-	char     m_inlineSectionVotingBufValid;
 	char     m_wordSpamBufValid;
 	char     m_finalSummaryBufValid;
 	char     m_matchingQueryBufValid;
@ -1044,10 +900,6 @@ public:
 	char     m_sectionsValid;
 	char     m_subSentsValid;

-	char     m_turkVotingTableValid;
-	char     m_turkBitsTableValid;
-	char     m_turkBanTableValid;
-	char     m_vctabValid;
 	char     m_explicitSectionsValid;
 	char     m_impliedSectionsValid;
 	char     m_imageDataValid;
@ -1132,9 +984,6 @@ public:
 	bool m_isWWWDupValid;
 	bool m_linkInfo1Valid;
 	bool m_linkSiteHashesValid;
-	bool m_sectionsReplyValid;
-	bool m_sectionsVotesValid;
-	bool m_sectiondbDataValid;
 	bool m_placedbDataValid;
 	bool m_siteHash64Valid;
 	bool m_siteHash32Valid;
@ -1197,9 +1046,6 @@ public:
 	// DO NOT add validity flags below this line!
 	char     m_VALIDEND;

-	// more stuff
-	//char *m_utf8Content;
-	//int32_t m_utf8ContentLen;

 	bool m_printedMenu;
 	int32_t m_urlPubDate;
@ -1253,11 +1099,9 @@ public:
 	
 	
 	int32_t  m_siteSpiderQuota;
-	//int32_t m_numBannedOutlinks;
 	class XmlDoc *m_oldDoc;
 	class XmlDoc *m_extraDoc;
 	class XmlDoc *m_rootDoc;
-	//class XmlDoc *m_gatewayDoc;
 	RdbList m_oldMetaList;
 	char   *m_oldTitleRec;
 	int32_t    m_oldTitleRecSize;
@ -1275,10 +1119,7 @@ public:
 	int32_t    m_tagdbCollLen;

 	Url   m_extraUrl;
-	//int32_t m_siteNumInlinksFresh;
-	//int32_t m_sitePop;
 	uint8_t m_siteNumInlinks8;
-	//int32_t m_siteNumInlinks;
 	LinkInfo m_siteLinkInfo;
 	SafeBuf m_mySiteLinkInfoBuf;
 	SafeBuf m_myPageLinkInfoBuf;
@ -1289,7 +1130,6 @@ public:
 	char m_useSiteLinkBuf;
 	char m_usePageLinkBuf;
 	char m_printInXml;
-	//Msg25 m_msg25;
 	SafeBuf m_tmpBuf11;
 	SafeBuf m_tmpBuf12;
 	Multicast m_mcast11;
@ -1297,7 +1137,6 @@ public:
 	// lists from cachedb for msg25's msg20 replies serialized
 	RdbList m_siteReplyList;
 	RdbList m_pageReplyList;
-	//void (* m_masterLoopWrapper) (void *state);
 	MsgC m_msgc;
 	bool m_isAllowed;
 	bool m_forwardDownloadRequest;
@ -1308,10 +1147,6 @@ public:
 	// for limiting # of iframe tag expansions
 	int32_t m_numExpansions;
 	char m_newOnly;
-	//int32_t m_tryAgainTimeDelta;
-	//int32_t m_sameIpWait;
-	//int32_t m_sameDomainWait;
-	//int32_t m_maxSpidersPerDomain;
 	char m_isWWWDup;
 	char m_calledMsg0b;

@ -1322,24 +1157,14 @@ public:
 	class RdbList *m_ulist;
 	void *m_hack;
 	class XmlDoc *m_hackxd;
-	//class LinkInfo *m_linkInfo1Ptr;
 	char     *m_linkInfoColl;
-	//char m_injectedReply;
-	//int32_t m_minInlinkerHopCount;
-	//class LinkInfo *m_linkInfo2Ptr;
 	SiteGetter m_siteGetter;
 	int64_t  m_siteHash64;
-	//char *m_site;
-	//int32_t m_siteLen;
-	//Url m_siteUrl;
 	int32_t m_siteHash32;
 	char *m_httpReply;
-	//char m_downloadAttempted;
 	char m_incrementedAttemptsCount;
 	char m_incrementedDownloadCount;
 	char m_redirectFlag;
-	//char m_isScraping;
-	//char m_throttleDownload;
 	char m_spamCheckDisabled;
 	char m_useRobotsTxt;
 	int32_t m_robotsTxtLen;
@ -1353,15 +1178,12 @@ public:
 	int32_t m_filteredContentMaxSize;
 	char m_calledThread;
 	int32_t m_errno;
-	//class CollectionRec *m_cr;
-	//int32_t m_utf8ContentAllocSize;
 	int32_t m_hostHash32a;
 	int32_t m_hostHash32b;
 	int32_t m_domHash32;
 	int32_t m_priorityQueueNum;

 	// this points into m_msge0 i guess
-	//class TagRec **m_outlinkTagRecVector;
 	Msge0 m_msge0;

 	// this points into m_msge1 i guess
@ -1657,10 +1479,7 @@ public:
 	bool     m_storeTermListInfo;
 	char     m_sortTermListBy;

-	SafeBuf m_sectiondbData;
-	//char *m_sectiondbData;
 	char *m_placedbData;
-	//int32_t  m_sectiondbDataSize;
 	int32_t  m_placedbDataSize;

 	// we now have HashInfo to replace this
--- a/XmlDoc_Indexing.cpp
+++ b/XmlDoc_Indexing.cpp
@ -1869,18 +1869,7 @@ bool XmlDoc::hashUrl ( HashTableX *tt, bool urlOnly ) { // , bool isStatusDoc )
 }


-
-/////////////
-//
-// CHROME DETECTION
-//
-// we search for these terms we hash here in getSectionsWithDupStats()
-// so we can remove chrome.
-//
-/////////////
-
 // . returns false and sets g_errno on error
-// . copied Url2.cpp into here basically, so we can now dump Url2.cpp
 bool XmlDoc::hashSections ( HashTableX *tt ) {
 	// BR 20160106: No longer store xpath-hashes in posdb as we do not use them.
 	return true;
@ -3539,7 +3528,6 @@ skipsingleword:


 	// between calls? i.e. hashTitle() and hashBody()
-	//if ( wc > 0 ) m_dist = wposvec[wc-1] + 100;
 	if ( i > 0 ) m_dist = wposvec[i-1] + 100;

 	return true;