Merge branch 'master' of https://github.com/privacore/open-source-search-engine

2025-07-14 02:36:06 -04:00 · 2016-01-07 14:41:24 +01:00
parent afe57ab085 7d0fa2385d
commit 14e952076c
7 changed files with 140 additions and 674 deletions
--- a/Matches.cpp
+++ b/Matches.cpp
@ -555,37 +555,7 @@ bool Matches::addMatches( Words *words, Phrases *phrases, Sections *sections, Bi
 	int32_t nextMatchWordPos = 0;
 	int32_t lasti = -3;

-	int32_t dist = 0;
-
-	// . every tag increments "dist" by a value
-	// . rather than use a switch/case statement, which does a binary
-	//   lookup thing which is really slow, let's use a 256 bucket table
-	//   for constant lookup, rather than log(N).
-	static char   s_tableInit = false;
-	static int8_t s_tab[512];
 	if ( getNumXmlNodes() > 512 ) { char *xx=NULL;*xx=0; }
-	for ( int32_t i = 0 ; ! s_tableInit && i < 128 ; i++ ) {
-		char step = 0;
-		if ( i == TAG_TR    ) step = 2;
-		if ( i == TAG_P     ) step = 10;
-		if ( i == TAG_HR    ) step = 10;
-		if ( i == TAG_H1    ) step = 10;
-		if ( i == TAG_H2    ) step = 10;
-		if ( i == TAG_H3    ) step = 10;
-		if ( i == TAG_H4    ) step = 10;
-		if ( i == TAG_H5    ) step = 10;
-		if ( i == TAG_H6    ) step = 10;
-		if ( i == TAG_TABLE ) step = 30;
-		if ( i == TAG_BLOCKQUOTE ) step = 10;
-		// default
-		if ( step == 0 ) {
-			if ( g_nodes[i].m_isBreaking ) step = 10;
-			else                           step = 1;
-		}
-		// account for both the back and the front tags
-		s_tab[i     ] = step;
-	}
-	s_tableInit = true;

 	// google seems to index SEC_MARQUEE so i took that out of here
 	int32_t badFlags =SEC_SCRIPT|SEC_STYLE|SEC_SELECT|SEC_IN_TITLE;
@ -604,29 +574,13 @@ bool Matches::addMatches( Words *words, Phrases *phrases, Sections *sections, Bi
 		//else if (tids && (tids[i]&BACKBITCOMP) == TAG_A) 
 		//	inAnchTag = false;

-		// for each word increment distance
-		dist++;
-
-		//if ( addToMatches && tids && tids[i] ){
 		if ( tids && tids[i] ){
-			int32_t tid = tids[i] & BACKBITCOMP;
-			// accumulate distance
-			dist += s_tab[tid];
-			// monitor boundaries so that the proximity algo
-			// knows when two matches are separated by such tags
-			// MDW: isn't the "dist" good enough for this?????
-			// let's try just using "dist" then.
-			// "crossedSection" is hereby replaced by "dist".
-			//if ( s_tab[tid]
 			// tagIds don't have wids and are skipped
 			continue;
 		}

 		// skip if wid is 0, it is not an alnum word then
 		if ( ! wids[i] ) {
-			// and extra unit if it starts with \n i guess
-			if ( words->m_words[i][0] == '\n' ) dist++;
-			//	dist += words->m_wordLens[i] / 3;
 			continue;
 		}

@ -859,7 +813,6 @@ bool Matches::addMatches( Words *words, Phrases *phrases, Sections *sections, Bi
 		m->m_sections = sections;
 		m->m_bits     = bits;
 		m->m_pos      = pos;
-		m->m_dist     = dist;
 		m->m_flags    = flags | eflag ;

 		// add to our vector. we want to know where each QueryWord
--- a/Matches.h
+++ b/Matches.h
@ -62,10 +62,6 @@ class Match {
 	// "match group" or type of match. i.e. MF_TITLETAG, MF_METASUMM, ...
 	mf_t m_flags;

-	// improve summary generation parms
-	int32_t m_dist;
-	//bool m_crossedSection;
-
 	// . for convenience, these four class ptrs are used by Summary.cpp
 	// . m_wordNum is relative to this "words" class (and scores,bits,pos)
 	class Words    *m_words;
--- a/Pos.cpp
+++ b/Pos.cpp
@ -82,6 +82,8 @@ bool Pos::set (Words *words, char *f, char *fend, int32_t *len , int32_t a , int
 	// flag for stopping back-to-back spaces. only count those as one char.
 	bool lastSpace = false;
 	int32_t maxCharSize = 4; // we are utf8
+	int in_bad_tags = 0;
+
 	for ( int32_t i = a ; i < b ; i++ ) {
 		if (trunc) {
 			break;
@ -94,8 +96,26 @@ bool Pos::set (Words *words, char *f, char *fend, int32_t *len , int32_t a , int

 		// is tag?
 		if ( tids && tids[i] ) {
+			if ( f ) {
+				// let's not get from bad tags when filtering into buffer (used for generating summaries)
+				if ( ( tids[i] == TAG_STYLE ) || ( tids[i] == TAG_SCRIPT ) ) {
+					++in_bad_tags;
+					continue;
+				}
+
+				if ( in_bad_tags ) {
+					if ( ( ( tids[i] & BACKBITCOMP ) == TAG_STYLE ) ||
+					     ( ( tids[i] & BACKBITCOMP ) == TAG_SCRIPT ) ) {
+						--in_bad_tags;
+					}
+				}
+			}
+
 			// if not breaking, does nothing
-			if ( ! g_nodes[tids[i]&0x7f].m_isBreaking ) continue;
+			if ( ! g_nodes[tids[i]&0x7f].m_isBreaking ) {
+				continue;
+			}
+
 			// list tag? <li>
 			if ( tids[i] == TAG_LI ) { 
 				if ( f ){
@ -153,6 +173,11 @@ bool Pos::set (Words *words, char *f, char *fend, int32_t *len , int32_t a , int
 			continue;
 		}
 		
+		// skip words if we're in 'bad' tags
+		if ( in_bad_tags ) {
+			continue;
+		}
+
 		// scan through all chars discounting back-to-back spaces
 		
 		// assume filters out to the same # of chars
--- a/Sections.cpp
+++ b/Sections.cpp
@ -124,14 +124,7 @@ bool Sections::set ( Words     *w                       ,
 		     int32_t       niceness                ,
 		     void      *state                   ,
 		     void     (*callback)(void *state)  ,
-		     uint8_t    contentType             ,
-		     // from XmlDoc::ptr_sectionsData in a title rec
-		     char      *sectionsData            ,
-		     bool       sectionsDataValid       ,
-		     char      *sectionsVotes           ,
-		     //uint64_t   tagPairHash             ,
-		     char      *buf                     ,
-		     int32_t       bufSize                 ) {
+		     uint8_t    contentType             ) {

 	reset();

@ -1821,23 +1814,6 @@ bool Sections::set ( Words     *w                       ,
 	///////////////////////////////////////
 	setMenus();

-	///////////////////////////////////////
-	//
-	// now set SENT_LIST flags on m_sentFlags
-	//
-	// try to capture sentences that are not menus but are a list of
-	// things. if the sentence itself has a list of short items, or a bunch
-	// of commas, then also set the SEC_LIST flag on it. or if sentence
-	// is part of a sequence of sentences that are a list of sentences then
-	// set it for them as well. typically such sentences will be separated
-	// by a vertical space, have no periods, maybe have an <li> tag or only
-	// have a few words per sentence. this will help us demote search results
-	// that have the query terms in such a list because it is usually not
-	// very useful information.
-	//
-	///////////////////////////////////////
-	setListFlags();
-
 	//verifySections();

 	// don't use nsvt/osvt now
@ -1856,16 +1832,6 @@ bool Sections::addImpliedSections ( Addresses *aa ) {
 	// no point in going any further if we have nothing
 	if ( m_numSections == 0 ) return true;

-	// set this
-	//m_osvt = osvt;
-
-
-	// as part of a replacement for table swoggling which is confusing
-	// and didn't really work right, especially when we had both 
-	// table header row and column, we set these on each table cell:
-	// SEC_HASDATEHEADERROW and SEC_HASDATEHEADERCOL
-	if ( ! setTableStuff ( ) ) return false;
-
 	m_aa = aa;


@ -4060,7 +4026,6 @@ bool Sections::setSentFlagsPart2 ( ) {
 		inParens = false;
 		int32_t dollarCount = 0;
 		int32_t priceWordCount = 0;
-		bool hadAt = false;

 		// watchout if in a table. the last table column header
 		// should not be applied to the first table cell in the
@ -4435,8 +4400,7 @@ bool Sections::setSentFlagsPart2 ( ) {
 				isStopWord = true;
 			// count them
 			if ( isStopWord ) stops++;
-			// set this
-			if ( m_wids[i] == h_at ) hadAt = true;
+
 			// if we end on a stop word that is usually indicative
 			// of something like
 			// "Search Results for <h1>Doughnuts</h1>" as for
@ -5270,8 +5234,6 @@ int32_t hasTitleWords ( sentflags_t sflags ,
 	static int64_t h_tickets;
 	static int64_t h_events;
 	static int64_t h_jobs;
-	static int64_t h_this;
-	static int64_t h_series;
 	static int64_t h_total;
 	static int64_t h_times;
 	static int64_t h_purchase;
@ -5321,8 +5283,6 @@ int32_t hasTitleWords ( sentflags_t sflags ,
 		h_tickets = hash64n("tickets");
 		h_events = hash64n("events");
 		h_jobs = hash64n("jobs");
-		h_this = hash64n("this");
-		h_series = hash64n("series");
 		h_total = hash64n("total");
 		h_times = hash64n("times");
 		h_purchase = hash64n("purchase");
@ -6537,10 +6497,6 @@ int32_t hasTitleWords ( sentflags_t sflags ,
 		     to_lower_a(wptrs[i][wlens[i]-1]) == 't' )
 			hadAthon = true;

-		//if ( wids[i] == h_this &&
-		//     i+2<nw && wids[i+2] == h_series )
-		//	log("hey");
-
 		// save it
 		int64_t savedWid = lastWid;
 		// assign
@ -7088,7 +7044,6 @@ int32_t Sections::addImpliedSections3 ( ) {
 		//bro = sk;
 		// assume no winner
 		int32_t       bestScore  = 0;
-		Section   *bestBro    = NULL;
 		char       bestMethod = -1;
 		Partition *bestPart   = NULL;
 		// loop over all enumerated methods
@ -7147,7 +7102,6 @@ int32_t Sections::addImpliedSections3 ( ) {
 				if ( score <= bestScore ) continue;
 				// is best of all methods so far?
 				bestScore  = score;
-				bestBro    = bro;
 				bestMethod = m;
 				bestPart   = &parts[m];
 			}
@ -7186,11 +7140,9 @@ int32_t Sections::addImpliedSections3 ( ) {
 			Section *cc = m_sectionPtrs[a];
 			if ( cc && cc->m_a == a && cc->m_b == b ) continue;
 			// this returns false and sets g_errno on error
-			if ( ! insertSubSection( sk->m_parent ,
-						 winnerPart->m_a[i],
-						 winnerPart->m_b[i],
-						 BH_IMPLIED ) )
+			if ( ! insertSubSection( winnerPart->m_a[i], winnerPart->m_b[i], BH_IMPLIED ) ) {
 				return -1;
+			}
 		}

 		// ok, flag it
@ -7214,8 +7166,7 @@ float computeSimilarity2 ( int32_t   *vec0 ,
 			  int32_t    niceness ,
 			  SafeBuf *pbuf ,
 			  HashTableX *labelTable ,
-			  int32_t nv0 ,
-			  int32_t nv1 ) {
+			  int32_t nv0 ) {
 	// if both empty, assume not similar at all
 	if ( *vec0 == 0 && *vec1 == 0 ) return 0;
 	// if either is empty, return 0 to be on the safe side
@ -7379,7 +7330,7 @@ int32_t Sections::getDelimScore ( Section *bro ,
 	// save it
 	Section *start = bro;

-	int32_t dh = getDelimHash ( method , delim , start );
+	int32_t dh = getDelimHash ( method , delim );

 	// bro must be certain type for some methods
 	if ( dh == -1 ) return -2;
@ -7395,8 +7346,6 @@ int32_t Sections::getDelimScore ( Section *bro ,
 	// sanity check... should all be brothers (same parent)
 	if ( delim->m_parent != container ) { char *xx=NULL;*xx=0; }

-	// the head section of a particular partition's section
-	Section *currDelim = bro;
 	// scores
 	int32_t brosWithWords = 0;
 	int32_t maxBrosWithWords = 0;
@ -7453,7 +7402,6 @@ int32_t Sections::getDelimScore ( Section *bro ,

 	// reset prev sentence
 	Section *prevSent = NULL;
-	Section *lastBro  = NULL;

 	// scan the brothers
 	for ( ; ; bro = bro->m_nextBrother ) {
@ -7467,7 +7415,7 @@ int32_t Sections::getDelimScore ( Section *bro ,

 		// get its hash
 		int32_t h = 0LL ;
-		if ( bro ) h = getDelimHash ( method , bro , start );
+		if ( bro ) h = getDelimHash ( method , bro );

 		// . check this out
 		// . don't return 0 because we make a vector of these hashes
@ -7480,12 +7428,6 @@ int32_t Sections::getDelimScore ( Section *bro ,
 		// if first time, ignore crap above the first delimeter occurnc
 		if ( ignoreAbove ) continue;

-		// update this for insertSubSection()
-		lastBro = bro;
-
-		if ( h == dh ) 
-			currDelim = bro;
-
 		// count non delimeter sections. at least one section
 		// must have text and not be a delimeter section
 		if ( h != dh && bro && bro->m_firstWordPos >= 0 )
@ -7592,8 +7534,7 @@ int32_t Sections::getDelimScore ( Section *bro ,
 							m_niceness ,
 							pbuf ,
 							dbt ,
-							nva ,
-							nvb );
+							nva );
 			// add up all sims
 			if ( cellCount >= 2 ) { // ! firstTime ) {
 				simTotal += sim;
@ -7610,8 +7551,6 @@ int32_t Sections::getDelimScore ( Section *bro ,
 				if ( pbuf ) 
 					minBuf.safeMemcpy ( pbuf );
 			}
-			// a new head
-			//currDelim = bro;
 			// reset vht for next partition cell to call
 			// hashSentenceBits() into
 			vht.clear();
@ -8171,7 +8110,7 @@ bool Sections::hashSentPairs (Section    *sx ,

 // . don't return 0 because we make a vector of these hashes
 //   and computeSimilarity() assumes vectors are NULL term'd. return -1 instead
-int32_t Sections::getDelimHash ( char method , Section *bro , Section *head ) {
+int32_t Sections::getDelimHash ( char method , Section *bro ) {

 	// now all must have text!
 	//if ( bro->m_firstWordPos < 0 ) return -1;
@ -8370,31 +8309,6 @@ int32_t Sections::getDelimHash ( char method , Section *bro , Section *head ) {
 		// was not getting an implied section set, so let's do away
 		// with the pure dow algo and see what happens.
 		return -1;
-		// must be a sort of heading like "Jul 24"
-		//if ( !(bro->m_flags & SEC_HEADING_CONTAINER) &&
-		//     !(bro->m_flags & SEC_HEADING          ) )
-		//	return -1;
-		if ( ! (bro->m_flags & SEC_HAS_DOW) ) 
-			return -1;
-		// this is causing core
-		if ( bro->m_tagId == TAG_TC ) return -1;
-		// now it must be all date words
-		int32_t a = bro->m_firstWordPos;
-		int32_t b = bro->m_lastWordPos;
-		// sanity check
-		if ( a < 0 ) { char *xx=NULL;*xx=0; }
-		// scan
-		for ( int32_t i = a ; i <= b ; i++ ) {
-			// breathe
-			QUICKPOLL(m_niceness);
-			// skip if not wid
-			if ( ! m_wids[i] ) continue;
-			// must be in date
-			if ( ! ( m_bits->m_bits[i] & D_IS_IN_DATE ) ) 
-				return -1;
-		}
-		// do not collide with tagids
-		return 66666;
 	}
 	if ( method == METHOD_ABOVE_DOW ) {
 		// must be a sort of heading like "Jul 24"
@ -8503,10 +8417,6 @@ bool Sections::addSentenceSections ( ) {
 	static int64_t h_the;
 	static int64_t h_and;
 	static int64_t h_a;
-	static int64_t h_p;
-	static int64_t h_m;
-	static int64_t h_am;
-	static int64_t h_pm;
 	static int64_t h_http;
 	static int64_t h_https;
 	static int64_t h_room;
@ -8516,8 +8426,6 @@ bool Sections::addSentenceSections ( ) {
 	static int64_t h_suite;
 	static int64_t h_ste;
 	static int64_t h_tags;
-	//static int64_t h_noon;
-	//static int64_t h_midnight;
 	if ( ! s_init ) {
 		s_init = true;
 		h_tags = hash64n("tags");
@ -8525,10 +8433,6 @@ bool Sections::addSentenceSections ( ) {
 		h_the = hash64n("the");
 		h_and = hash64n("and");
 		h_a = hash64n("a");
-		h_p = hash64n("p");
-		h_m = hash64n("m");
-		h_am = hash64n("am");
-		h_pm = hash64n("pm");
 		h_a = hash64n("a");
 		h_at = hash64n("at");
 		h_for = hash64n("for");
@ -8549,8 +8453,6 @@ bool Sections::addSentenceSections ( ) {
 		h_building = hash64n("building");
 		h_suite = hash64n("suite");
 		h_ste = hash64n("ste");
-		//h_noon  = hash64n("noon");
-		//h_midnight = hash64n("midnight");
 	}

 	// need D_IS_IN_URL bits to be valid
@ -8578,7 +8480,6 @@ bool Sections::addSentenceSections ( ) {
 		bool lastWasComma = false;
 		nodeid_t includedTag = -2;
 		int32_t lastbr = -1;
-		bool hasColon = false;
 		bool endOnBr = false;
 		bool endOnBold = false;
 		bool capped = true;
@ -8842,11 +8743,6 @@ bool Sections::addSentenceSections ( ) {
 					     j == i )
 						break;

-					// flag it, but only if not in
-					// a time format like "8:30"
-					if ( j>0 && !is_digit(m_wptrs[j][-1]))
-						hasColon = true;
-
 					// a "::" is used in breadcrumbs,
 					// so break on that.
 					// fixes "Dining :: Visit :: 
@ -8881,39 +8777,9 @@ bool Sections::addSentenceSections ( ) {
 					     
 					if ( tagBefore ) break;
 					if ( tagAfter  ) break;
+
 					// for now allow it!
 					continue;
-					// do not break http://... though
-					if ( p[1] == '/' ) continue;
-					// or 10:30 etc.
-					if ( is_digit(p[1]) ) continue;
-					if ( j>0 && is_digit(p[-1]) ) continue;
-					// allow trumba titles to have colons
-					// so they can get the TSF_TITLEY
-					// event title boost in Events.cpp
-					if ( m_isTrumba &&
-					     sp[j]->m_tagId == TAG_TITLE )
-						continue;
-					// fix guysndollsllc.com which has
-					// "Battle of the Bands with: The Cincy
-					// Rockers, Second Wind, ..."
-					// if last word was a lowercase
-					// and one of these, let it in the
-					// sentence
-					//if ( lastWidPos < 0 ) 
-					//	break;
-					// must have been lowercase
-					if(!is_lower_utf8(m_wptrs[lastWidPos]))
-						break;
-					// and must be one of these words:
-					if ( prevWid == h_with ||
-					     // "Send info to: Booking"
-					     // from guysndollsllc.com/page4.ht
-					     prevWid == h_to ||
-					     prevWid == h_and  )
-						continue;
-					// otherwise, break it
-					break;
 				}
 				// . special hyphen
 				// . breaks up title for peachpundit.com
@ -8961,28 +8827,12 @@ bool Sections::addSentenceSections ( ) {

 				// set "next" to next alnum word after us
 				int32_t next = j+1;
-				int64_t nwid = 0LL;
 				int32_t max  = next + 10;
 				if ( max > m_nw ) max = m_nw;
 				for ( ; next < max ; next++ ) {
 					if ( ! m_wids[next] ) continue;
-					nwid = m_wids[next];
 					break;
 				}
-				// am. pm.
-				// if prev word was like 'm' as in am or pm
-				// then assume a cap word following ends sent.
-				// although if we got 
-				// "At 1 p.m. Bob Jones plays"
-				// then we'd be wrong.
-				bool isAmPm = false;
-				if ( prevWid == h_m &&
-				     (prevPrevWid == h_a ||
-				      prevPrevWid == h_p ) )
-					isAmPm = true;
-				if ( (prevWid == h_am ||
-				      prevWid == h_pm ) )
-					isAmPm = true;

 				// was previous word/abbr capitalized?
 				// if so, assume period does not end sentence.
@ -9206,7 +9056,7 @@ bool Sections::addSentenceSections ( ) {
 			if ( addb >= m_nw ) { char *xx=NULL;*xx=0; }

 			// ok, now add the split sentence
-			Section *is =insertSubSection(parent,adda,addb+1,bh);
+			Section *is =insertSubSection(adda,addb+1,bh);
 			// panic?
 			if ( ! is ) return false;
 			// set sentence flag on it
@ -9270,8 +9120,7 @@ bool Sections::addSentenceSections ( ) {
 	return true;
 }

-Section *Sections::insertSubSection ( Section *parentArg , int32_t a , int32_t b ,
-				      int32_t newBaseHash ) {
+Section *Sections::insertSubSection ( int32_t a, int32_t b, int32_t newBaseHash ) {
 	// debug
 	//log("sect: inserting subsection [%"INT32",%"INT32")",a,b);

@ -9373,11 +9222,11 @@ Section *Sections::insertSubSection ( Section *parentArg , int32_t a , int32_t b
 		m_numSections--;
 		char *xx=NULL;*xx=0;
 		return NULL;
-		sk->m_next = m_rootSection;//m_rootSection;
-		sk->m_prev = NULL;
-		//m_sections[0].m_prev = sk;
-		m_rootSection->m_prev = sk;
-		m_rootSection = sk;
+//		sk->m_next = m_rootSection;//m_rootSection;
+//		sk->m_prev = NULL;
+//		//m_sections[0].m_prev = sk;
+//		m_rootSection->m_prev = sk;
+//		m_rootSection = sk;
 	} else {
 		// insert us into the linked list of sections
 		if ( si->m_next ) si->m_next->m_prev = sk;
@ -9541,96 +9390,6 @@ Section *Sections::insertSubSection ( Section *parentArg , int32_t a , int32_t b
 	}

 	return sk;
-
-	// start scanning here
-	Section *start = parent->m_next;
-
-	int32_t lastb = -1;
-	// try just scanning sections in parent
-	for ( Section *sx = start ; sx ; sx = sx->m_next ) {
-		// breathe
-		QUICKPOLL ( m_niceness );
-		// get it
-		//Section *sx = &m_sections[xx];
- 		// skip if section ends before our sentence begins
-		if ( sx->m_b <= a ) continue;
-		// stop if beyond sk
-		if ( sx->m_a >= b ) break;
-		// skip if sn not parent
-		if ( sx->m_parent != parent ) continue;
-		// when splitting a section do not reparent if
-		// not in our split...
-		//if ( sx->m_a >= b ) continue;
-		// do not reparent if it contains us
-		if ( sx->m_a <= a && sx->m_b >= b ) continue;
-		// reset his parent to the newly added section
-		sx->m_parent = sk;
-		// and or his flags into us. SEC_HAS_DOM, etc.
-		sk->m_flags |= sx->m_flags & mask;
-		// sanity check
-		if ( sx->m_b > sk->m_b ) { char *xy=NULL;*xy=0; }
-		if ( sx->m_a < sk->m_a ) { char *xy=NULL;*xy=0; }
-		// skip if already got the xor for this section
-		if ( sx->m_a < lastb ) continue;
-		// set this
-		lastb = sx->m_b;
-		// add all the entries from this child section from the
-		// phone/email/etc. tables
-		sk->m_phoneXor ^= sx->m_phoneXor;
-		sk->m_emailXor ^= sx->m_emailXor;
-		sk->m_priceXor ^= sx->m_priceXor;
-		sk->m_todXor   ^= sx->m_todXor;
-		sk->m_dayXor   ^= sx->m_dayXor;
-		sk->m_addrXor  ^= sx->m_addrXor;
-		// make sure did not make it zero
-		if ( sx->m_phoneXor   && sk->m_phoneXor == 0 )
-			sk->m_phoneXor    = sx->m_phoneXor;
-		if ( sx->m_emailXor   && sk->m_emailXor == 0 )
-			sk->m_emailXor    = sx->m_emailXor;
-		if ( sx->m_priceXor   && sk->m_priceXor == 0 )
-			sk->m_priceXor    = sx->m_priceXor;
-		if ( sx->m_todXor     && sk->m_todXor == 0 )
-			sk->m_todXor      = sx->m_todXor;
-		if ( sx->m_dayXor     && sk->m_dayXor == 0 )
-			sk->m_dayXor      = sx->m_dayXor;
-		if ( sx->m_addrXor && sk->m_addrXor == 0 )
-			sk->m_addrXor = sx->m_addrXor;
-		// set this perhaps
-		if ( sk->m_firstPlaceNum < 0 ) 
-			sk->m_firstPlaceNum = sx->m_firstPlaceNum;
-		// update this?
-		if ( sx->m_alnumPosA < 0 ) continue;
-		// take the first one we get
-		if ( sk->m_alnumPosA == -1 ) 
-			sk->m_alnumPosA = sx->m_alnumPosA;
-		// update to the last one always
-		sk->m_alnumPosB  = sx->m_alnumPosB;
-	}
-
-	// a flag
-	bool needsFirst = true;
-	// . set the words ptrs to it
-	// . TODO: can later speed up with ptr to ptr logic
-	for ( int32_t yy = a ; yy < b ; yy++ ) {
-		// breathe
-		QUICKPOLL ( m_niceness );
-		// and first/last word pos
-		if ( m_wids[yy] ) {
-			// mark this
-			if ( needsFirst ) {
-				sk->m_firstWordPos = yy;
-				needsFirst = false;
-			}
-			// remember last
-			sk->m_lastWordPos = yy;
-		}
-		// must have had sn as parent
-		if ( m_sectionPtrs[yy] != parent ) continue;
-		// "sk" becomes the new parent
-		m_sectionPtrs[yy] = sk;
-	}
-
-	return sk;
 }

 // for brbr and hr splitting delimeters
@ -9666,8 +9425,6 @@ int32_t Sections::splitSectionsByTag ( nodeid_t tagid ) {
 		for ( ; first->m_prevBrother ; first = first->m_prevBrother )
 			// breathe
 			QUICKPOLL(m_niceness);
-		// save parent
-		Section *parent = first->m_parent;

 	subloop:
 		// mark it
@ -9715,7 +9472,7 @@ int32_t Sections::splitSectionsByTag ( nodeid_t tagid ) {
 		     // and must group together something meaningful
 		     numTextSections >= 2 ) {
 			// do the insertion
-			Section *sk = insertSubSection (parent,a,b,BH_IMPLIED);
+			Section *sk = insertSubSection (a,b,BH_IMPLIED);
 			// error?
 			if ( ! sk ) return -1;
 			// fix it
@ -9792,7 +9549,7 @@ bool Sections::splitSections ( char *delimeter , int32_t dh ) {
 		//
 		// try this now
 		//
-		Section *sk = insertSubSection ( sn , start , i , dh );
+		Section *sk = insertSubSection ( start , i , dh );

 		// do not resplit this split section with same delimeter!!
 		if ( sk ) sk->m_processedHash = dh;
@ -9900,7 +9657,6 @@ SectionVotingTable::SectionVotingTable ( ) {
 //bool Sections::gotSectiondbList ( bool *needsRecall ) {
 bool SectionVotingTable::addListOfVotes ( RdbList *list, 
 					  key128_t **lastKey ,
-					  uint32_t tagPairHash ,
 					  int64_t myDocId ,
 					  int32_t niceness ) {

@ -11296,27 +11052,6 @@ bool Sections::containsTagId ( Section *si, nodeid_t tagId ) {
 	return false;
 }

-bool Sections::setTableStuff ( ) {
-	return true;
-}
-
-// . does table have a date header row or column?
-// . we need this for our weekly schedule detection algorithm
-// . many sites have a row header this is the days of the week
-// . sometimes they have tods in the first column, and sometimes they
-//   just put the tods and tod ranges in the table cells directly.
-// . sets Section::m_flags SEC_HASDATEHEADERCOL/ROW for JUST the table
-//   section if it indeed has such date headers
-// . Dates::isCompatible() looks at that table flag to see if it should
-//   apply special processing when deciding if two dates should be paired
-// . then we set DF_TABLEDATEHEADERROW/COL for the dates in those
-//   header rows/cols so that we can set SF_RECURRING_DOW if the dow date
-//   was in the header row/col
-bool Sections::setTableDateHeaders ( Section *ts ) {
-
-	return true;
-}
-
 // . just the voting info for passing into diffbot in json
 // . along w/ the title/summary/etc. we can return this json blob for each search result
 bool Sections::printVotingInfoInJSON ( SafeBuf *sb ) {
@ -11354,10 +11089,8 @@ bool Sections::print2 ( SafeBuf *sbuf ,
 			char *diversityVec,
 			char *wordSpamVec,
 			char *fragVec,
-			HashTableX *st2 ,
-			HashTableX *tt  ,
 			Addresses *aa ,
-			char format ) { // bool forProCog ){
+			char format ) {
 	//FORMAT_PROCOG FORMAT_JSON HTML

 	//sbuf->safePrintf("<b>Sections in Document</b>\n");
@ -11367,10 +11100,6 @@ bool Sections::print2 ( SafeBuf *sbuf ,

 	m_sbuf->setLabel ("sectprnt");

-	//m_pt = pt;
-	//m_et = et;
-	//m_at = at;
-	//m_priceTable = priceTable;
 	m_aa = aa;
 	m_hiPos = hiPos;

@ -11991,7 +11720,6 @@ bool Sections::setRegistrationBits ( ) {
 	static int64_t h_request;
 	static int64_t h_requesting;
 	static int64_t h_get;
-	static int64_t h_enroll;
 	static int64_t h_buy;
 	static int64_t h_presale ;
 	static int64_t h_pre ;
@ -12004,7 +11732,6 @@ bool Sections::setRegistrationBits ( ) {
 	static int64_t h_box; // box office for newmexicojazzfestival.org
 	static int64_t h_office;
 	static int64_t h_ticket;//ticket window for newmexicojazzfestival.org
-	static int64_t h_online;
 	static int64_t h_window;
 	static int64_t h_patron;
 	static int64_t h_service;
@ -12043,7 +11770,6 @@ bool Sections::setRegistrationBits ( ) {
 		h_requesting   = hash64n("requesting");
 		h_request      = hash64n("request");
 		h_get          = hash64n("get");
-		h_enroll       = hash64n("enroll");
 		h_buy          = hash64n("buy");
 		h_presale      = hash64n("presale");
 		h_pre          = hash64n("pre");
@ -12069,7 +11795,6 @@ bool Sections::setRegistrationBits ( ) {
 		h_box          = hash64n("box");
 		h_office       = hash64n("office");
 		h_ticket       = hash64n("ticket");
-		h_online       = hash64n("online");
 		h_window       = hash64n("window");
 		h_patron       = hash64n("patron");
 		h_service      = hash64n("service");
@ -12186,7 +11911,6 @@ bool Sections::setRegistrationBits ( ) {
 		if ( wid == h_sign && nextWid == h_up      ) gotIt = 1;
 		if ( wid == h_signup                       ) gotIt = 1;
 		if ( wid == h_buy && nextWid == h_ticket  ) gotIt = 1;
-		//if ( wid == h_buy && nextWid == h_online  ) gotIt = 1;
 		if ( wid == h_purchase&&nextWid==h_ticket ) gotIt = 1;
 		if ( wid == h_get && nextWid==h_ticket    ) gotIt = 1;
 		// for that jimmy kimmel live url "requesting tickets online"
@ -13102,100 +12826,95 @@ bool Sectiondb::verify ( char *coll ) {
 	return true;
 }

-
-bool Sections::setListFlags ( ) {
-	return true;
-}
-
 bool Sections::growSections ( ) {
 	// make a log note b/c this should not happen a lot because it's slow
 	log("build: growing sections!");
 	g_errno = EDOCBADSECTIONS;
 	return true;
-	// record old buf start
-	char *oldBuf = m_sectionBuf.getBufStart();
-	// grow by 20MB at a time
-	if ( ! m_sectionBuf.reserve ( 20000000 ) ) return false;
-	// for fixing ptrs:
-	char *newBuf = m_sectionBuf.getBufStart();
-	// set the new max
-	m_maxNumSections = m_sectionBuf.getCapacity() / sizeof(Section);
-	// update ptrs in the old sections
-	for ( int32_t i = 0 ; i < m_numSections ; i++ ) {
-		// breathe
-		QUICKPOLL(m_niceness);
-		Section *si = &m_sections[i];
-		if ( si->m_parent ) {
-			char *np = (char *)si->m_parent;
-			np = np - oldBuf + newBuf;
-			si->m_parent = (Section *)np;
-		}
-		if ( si->m_next ) {
-			char *np = (char *)si->m_next;
-			np = np - oldBuf + newBuf;
-			si->m_next = (Section *)np;
-		}
-		if ( si->m_prev ) {
-			char *np = (char *)si->m_prev;
-			np = np - oldBuf + newBuf;
-			si->m_prev = (Section *)np;
-		}
-		if ( si->m_listContainer ) {
-			char *np = (char *)si->m_listContainer;
-			np = np - oldBuf + newBuf;
-			si->m_listContainer = (Section *)np;
-		}
-		if ( si->m_prevBrother ) {
-			char *np = (char *)si->m_prevBrother;
-			np = np - oldBuf + newBuf;
-			si->m_prevBrother = (Section *)np;
-		}
-		if ( si->m_nextBrother ) {
-			char *np = (char *)si->m_nextBrother;
-			np = np - oldBuf + newBuf;
-			si->m_nextBrother = (Section *)np;
-		}
-		if ( si->m_sentenceSection ) {
-			char *np = (char *)si->m_sentenceSection;
-			np = np - oldBuf + newBuf;
-			si->m_sentenceSection = (Section *)np;
-		}
-		if ( si->m_prevSent ) {
-			char *np = (char *)si->m_prevSent;
-			np = np - oldBuf + newBuf;
-			si->m_prevSent = (Section *)np;
-		}
-		if ( si->m_nextSent ) {
-			char *np = (char *)si->m_nextSent;
-			np = np - oldBuf + newBuf;
-			si->m_nextSent = (Section *)np;
-		}
-		if ( si->m_tableSec ) {
-			char *np = (char *)si->m_tableSec;
-			np = np - oldBuf + newBuf;
-			si->m_tableSec = (Section *)np;
-		}
-		if ( si->m_headColSection ) {
-			char *np = (char *)si->m_headColSection;
-			np = np - oldBuf + newBuf;
-			si->m_headColSection = (Section *)np;
-		}
-		if ( si->m_headRowSection ) {
-			char *np = (char *)si->m_headRowSection;
-			np = np - oldBuf + newBuf;
-			si->m_headRowSection = (Section *)np;
-		}
-		if ( si->m_leftCell ) {
-			char *np = (char *)si->m_leftCell;
-			np = np - oldBuf + newBuf;
-			si->m_leftCell = (Section *)np;
-		}
-		if ( si->m_aboveCell ) {
-			char *np = (char *)si->m_aboveCell;
-			np = np - oldBuf + newBuf;
-			si->m_aboveCell = (Section *)np;
-		}
-	}
-	return true;
+//	// record old buf start
+//	char *oldBuf = m_sectionBuf.getBufStart();
+//	// grow by 20MB at a time
+//	if ( ! m_sectionBuf.reserve ( 20000000 ) ) return false;
+//	// for fixing ptrs:
+//	char *newBuf = m_sectionBuf.getBufStart();
+//	// set the new max
+//	m_maxNumSections = m_sectionBuf.getCapacity() / sizeof(Section);
+//	// update ptrs in the old sections
+//	for ( int32_t i = 0 ; i < m_numSections ; i++ ) {
+//		// breathe
+//		QUICKPOLL(m_niceness);
+//		Section *si = &m_sections[i];
+//		if ( si->m_parent ) {
+//			char *np = (char *)si->m_parent;
+//			np = np - oldBuf + newBuf;
+//			si->m_parent = (Section *)np;
+//		}
+//		if ( si->m_next ) {
+//			char *np = (char *)si->m_next;
+//			np = np - oldBuf + newBuf;
+//			si->m_next = (Section *)np;
+//		}
+//		if ( si->m_prev ) {
+//			char *np = (char *)si->m_prev;
+//			np = np - oldBuf + newBuf;
+//			si->m_prev = (Section *)np;
+//		}
+//		if ( si->m_listContainer ) {
+//			char *np = (char *)si->m_listContainer;
+//			np = np - oldBuf + newBuf;
+//			si->m_listContainer = (Section *)np;
+//		}
+//		if ( si->m_prevBrother ) {
+//			char *np = (char *)si->m_prevBrother;
+//			np = np - oldBuf + newBuf;
+//			si->m_prevBrother = (Section *)np;
+//		}
+//		if ( si->m_nextBrother ) {
+//			char *np = (char *)si->m_nextBrother;
+//			np = np - oldBuf + newBuf;
+//			si->m_nextBrother = (Section *)np;
+//		}
+//		if ( si->m_sentenceSection ) {
+//			char *np = (char *)si->m_sentenceSection;
+//			np = np - oldBuf + newBuf;
+//			si->m_sentenceSection = (Section *)np;
+//		}
+//		if ( si->m_prevSent ) {
+//			char *np = (char *)si->m_prevSent;
+//			np = np - oldBuf + newBuf;
+//			si->m_prevSent = (Section *)np;
+//		}
+//		if ( si->m_nextSent ) {
+//			char *np = (char *)si->m_nextSent;
+//			np = np - oldBuf + newBuf;
+//			si->m_nextSent = (Section *)np;
+//		}
+//		if ( si->m_tableSec ) {
+//			char *np = (char *)si->m_tableSec;
+//			np = np - oldBuf + newBuf;
+//			si->m_tableSec = (Section *)np;
+//		}
+//		if ( si->m_headColSection ) {
+//			char *np = (char *)si->m_headColSection;
+//			np = np - oldBuf + newBuf;
+//			si->m_headColSection = (Section *)np;
+//		}
+//		if ( si->m_headRowSection ) {
+//			char *np = (char *)si->m_headRowSection;
+//			np = np - oldBuf + newBuf;
+//			si->m_headRowSection = (Section *)np;
+//		}
+//		if ( si->m_leftCell ) {
+//			char *np = (char *)si->m_leftCell;
+//			np = np - oldBuf + newBuf;
+//			si->m_leftCell = (Section *)np;
+//		}
+//		if ( si->m_aboveCell ) {
+//			char *np = (char *)si->m_aboveCell;
+//			np = np - oldBuf + newBuf;
+//			si->m_aboveCell = (Section *)np;
+//		}
+//	}
+//	return true;
 }

--- a/Sections.h
+++ b/Sections.h
@ -33,8 +33,6 @@
 // . these are descriptive flags, they are computed when Sections is set
 // . SEC_NOTEXT sections do not vote, i.e. they are not stored in Sectiondb
 #define SEC_NOTEXT       0x0001 // implies section has no alnum words
-//#define SEC_ARTICLE    0x0002 // section is SV_UNIQUE and SV_TEXTY
-//#define SEC_DUP        0x0004 // content hash repeated on same site

 // . Weights.cpp zeroes out the weights for these types of sections
 // . is section delimeted by the <script> tag, <marquee> tag, etc.
@ -43,9 +41,6 @@
 #define SEC_SELECT       0x0020
 #define SEC_MARQUEE      0x0040
 #define SEC_CONTAINER    0x0080
-// . is section in anchor text
-// . is section delimeted by the <a href...> tag
-//#define SEC_A            0x0080

 // . in title/header. for gigabits in XmlDoc.cpp
 // . is section delemited by <title> or <hN> tags?
@ -69,8 +64,6 @@
 #define SEC_HEADING      0x200000

 // reasons why a section is not an event
-//#define SEC_MULT_PLACES    0x008000 
-//#define SEC_IS_MENUITEM        0x00040000 // in a list of menu items?
 #define SEC_UNBALANCED         0x00400000 // interlaced section/tags
 #define SEC_OPEN_ENDED         0x00800000 // no closing tag found
 #define SEC_SENTENCE           0x01000000 // made by a sentence?
@ -115,26 +108,11 @@
 #define SEC_MULTIDIMS               0x0008000000000000LL
 #define SEC_HASHXPATH               0x0010000000000000LL

-//#define SEC_HAS_ADDRESS        0x08000000
-//#define SEC_ADDRESS_CONTAINER  0x40000000
-//#define SEC_HAS_STOREHOURS     0x01000000 // event is really just store hours
-//#define SEC_HAS_NONSTOREHOURS  0x02000000
-//#define SEC_HAS_NON_EVENT_DATE 0x04000000
-
-
 // . some random-y numbers for Section::m_baseHash
 // . used by splitSection() function
-//#define BH_BR      -1113348753
-//#define BH_BRBR    3947503
-//#define BH_HR      1378153634
-//#define BH_H1     -1788814047
-//#define BH_H2     -1170023066
-//#define BH_H3     -132582659
-//#define BH_H4      2095609929
 #define BH_BULLET  7845934
 #define BH_SENTENCE 4590649
 #define BH_IMPLIED  95468323
-//#define BH_IMPLIED_LIST 9434499

 // values for Section::m_sentFlags (sentence flags)
 #define SENT_HAS_COLON       0x00000001
@ -342,12 +320,7 @@ public:
 	// used by Events.cpp to count # of timeofdays in section
 	//class Event *m_event;

-	// for Events class
-	//uint8_t m_numAddresses;
-	//class Address *m_address;
 	// for Events class, usually streets!
-	//uint8_t m_numPlaces;
-	//class Place *m_place;
 	class Addresses *m_aa;

 	// . if we are an element in a list, what is the list container section
@ -357,10 +330,6 @@ public:
 	// . used to set SEC_HAS_MENUBROTHER flag
 	class Section *m_listContainer;

-	// if we are a header, of what list are we a header of?
-	//class Section *m_headerOfList;
-	
-
 	// the sibling section before/after us. can be NULL.
 	class Section *m_prevBrother;
 	class Section *m_nextBrother;
@ -453,30 +422,9 @@ public:
 	// for debug output display of color coded nested sections
 	uint32_t m_colorHash;

-	// like tag hash but only the tag ids, no hashed attributes or 
-	// virtual section base hashes
-	//int32_t  m_formatHash;
-
 	// tagid of this section, 0 means none (like sentence section, etc.)
 	nodeid_t m_tagId;

-	/*
-	// used by addImpliedSections()
-	int32_t getBaseHash2 ( ) { 
-		// fix for funkefiredarts.com since one of the header tags
-		// has a different tag attribute, but it says "Monday". so
-		// treat all these special headers the same since it is
-		// critical we get these type of implied sections right, lest
-		// we hurt our date telscoping.
-		if ( m_flags & SEC_HAS_DOM_DOW ) return 22222;
-		if ( m_flags&SEC_HEADING_CONTAINER) return m_baseHash^0x789123;
-		else                                return m_baseHash;
-	};
-	*/
-
-	//int32_t getBaseHash3 ();
-
-
 	// usually just the m_tagId, but hashes in the class attributes of
 	// div and span tags, etc. to make them unique
 	uint32_t  m_baseHash;
@ -491,25 +439,12 @@ public:
 	// these deal with enumertated tags and are used by Events.cpp
 	int32_t  m_occNum;
 	int32_t  m_numOccurences;
-	// section with same m_tagHash and before you
-	//class Section *m_prevSibling;

 	// used by XmlDoc.cpp to set a topological distance
 	int32_t m_topDist;
-	//int32_t m_sortedIndex;
-
-	// all the parent tags are enumerated, but the kid (youngest tag)
-	// is not enumerated
-	//int32_t  m_enumTagHash;
-
-	// . tag hash which disregards non-breaking or tags with no back tags
-	// . used by Events.cpp
-	//int32_t  m_hardTagHash;

 	// hash of all the alnum words DIRECTLY in this section
 	uint64_t  m_contentHash64;
-	// if section contains words indirectly, then store xor'ed wids in here
-	//int32_t  m_contentHash2;

 	uint64_t  m_sentenceContentHash64;

@ -523,12 +458,6 @@ public:
 	// uses m_sentenceContentHash64 (for sentences)
 	uint64_t m_indirectSentHash64;

-	// for voting! we basically ignore numbers and dates, months, etc.
-	// for doing this hash so that if the date changes from page to page
-	// it will still be recognized as a "dup section" and m_votesForDup
-	// should be high
-	//uint32_t m_voteHash32;
-
 	// . range of words in Words class we encompass
 	// . m_wordStart and m_wordEnd are the tag word #'s
 	// . ACTUALLY it is a half-closed interval [a,b) like all else
@ -539,31 +468,12 @@ public:
 	int32_t  m_b;//wordEnd;

 	// for event titles and descriptions
-	//float m_titleScore;
-	//float m_descScore;
-	//titleflags_t  m_titleFlags;
 	sentflags_t m_sentFlags;

-	// bits set based on turk votes. see the TB_* bits in XmlDoc.h
-	//turkbits_t m_turkBits;
-
-	// alnum count for us and all sections we contain
-	//int32_t  m_alnumCount;
-
 	// . # alnum words only in this and only this section
 	// . if we have none, we are SEC_NOTEXT
 	int32_t  m_exclusive;

-	// like above, but word must also NOT be in a hyperlink
-	//int32_t  m_plain;
-
-	// Address.cpp uses this
-	//char     m_numBackToBackSubsections;
-	//nodeid_t m_lastTid;
-
-	// # of times this section appears in this doc
-	//int32_t  m_totalOccurences; 
-
 	// our depth. # of tags in the hash
 	int32_t  m_depth;

@ -574,61 +484,15 @@ public:
 	int32_t m_mark;

 	// Events.cpp assigns a date to each section
-	//int32_t m_fullDate;
-	//class Date *m_datePtr;
 	int32_t m_firstDate;

 	char m_used;

-	//int32_t m_numTods;
-
-	// the event section we contain. used by Events.cpp
-	//class Section *m_eventSec;
-
-	// used by Events.cpp for determining what range of events a section
-	// contains. we store that range in Events::hash() when we index each
-	// word into datedb for events.
-	//int32_t m_minEventId;
-	//int32_t m_maxEventId;
-
 	// used in Sections::splitSections() function
 	int32_t m_processedHash;

 	int32_t m_gbFrameNum;

-	// . support event ids from 0 to 255
-	// . this increases the sizeof this class from 160 to 192 bytes
-	//char m_evIdBits[32];
-	// how many bits in the above array are set?
-	//int16_t m_numEventIdBits;
-
-	/*
-	bool hasEventId ( int32_t evId ) {
-		// this is an overflow condition...
-		if ( evId > 255 ) return false;
-		// -1 or 0 means not associated with any event id since
-		// all eventIds are >= 1
-		if ( m_minEventId <= 0   ) return false;
-		if ( evId < m_minEventId ) return false;
-		if ( evId > m_maxEventId ) return false;
-		unsigned char bitMask = 1 << (evId % 8);
-		return m_evIdBits[evId/8] & bitMask;
-	};
-
-	void addEventId ( int32_t eid ) {
-		if ( eid >= 256 ) return;
-		unsigned char bitMask = 1 << (eid % 8);
-		unsigned char byteOff = eid / 8;
-		if ( m_evIdBits[byteOff] & bitMask ) return;
-		m_evIdBits[byteOff] |= bitMask;
-		m_numEventIdBits++;
-		if ( m_minEventId <= 0 || m_minEventId > eid )
-			m_minEventId = eid;
-		if ( m_maxEventId <= 0 || m_maxEventId < eid )
-			m_maxEventId = eid;
-	};
-	*/
-
 	// do we contain section "arg"?
 	bool contains ( class Section *arg ) {
 		return ( m_a <= arg->m_a && m_b >= arg->m_b ); };
@ -675,12 +539,7 @@ class Sections {
 		   int32_t            niceness    ,
 		   void           *state       ,
 		   void          (*callback)(void *state) ,
-		   uint8_t         contentType ,
-		   char           *sectionsData,
-		   bool            sectionsDataValid ,
-		   char           *sectionsData2,
-		   char           *buf         ,
-		   int32_t            bufSize     ) ;
+		   uint8_t         contentType ) ;


 	bool addVotes(class SectionVotingTable *nsvt, uint32_t tagPairHash );
@ -721,17 +580,10 @@ class Sections {
 		      char *diversityVec,
 		      char *wordSpamVec,
 		      char *fragVec,
-		      class HashTableX *st2 ,
-		      class HashTableX *tt  ,
 		      class Addresses  *aa  ,
-		      char format = FMT_HTML ); // bool forProCog );
+		      char format = FMT_HTML );
 	bool printSectionDiv ( class Section *sk , char format = FMT_HTML );
-	//bool forProCog = false ) ;
 	class SafeBuf *m_sbuf;
-	//class HashTableX *m_pt;
-	//class HashTableX *m_et;
-	//class HashTableX *m_at;
-	//class HashTableX *m_priceTable;

 	char *getSectionsReply ( int32_t *size );
 	char *getSectionsVotes ( int32_t *size );
@ -739,13 +591,10 @@ class Sections {
 	bool isHardSection ( class Section *sn );

 	bool setMenus ( );
-	bool setListFlags ( );

 	bool setFormTableBits ( ) ;
 	bool setTableRowsAndCols ( class Section *tableSec ) ;
 	bool setTableHeaderBits ( class Section *table );
-	bool setTableStuff  ( ) ;
-	bool setTableDateHeaders ( class Section *ts ) ;
 	bool setTableScanPtrs ( class Section *ts ) ;

 	void setHeader ( int32_t r , class Section *first , sec_t flag ) ;
@ -765,7 +614,6 @@ class Sections {
 	class Url   *m_url      ;
 	int64_t    m_docId    ;
 	int64_t    m_siteHash64 ;
-	//int64_t    m_tagPairHash;
 	char        *m_coll     ;
 	void        *m_state    ;
 	void       (*m_callback) ( void *state );
@ -797,7 +645,6 @@ class Sections {
 	bool m_waitInLine;
 	int32_t m_articleStartWord;
 	int32_t m_articleEndWord;
-	//int32_t m_totalSimilarLayouts;
 	bool m_hadArticle;
 	int32_t m_numInvalids;
 	int32_t m_totalSiteVoters;
@ -848,13 +695,6 @@ class Sections {

 	int32_t m_numSentenceSections;

-	// . the section ptrs sorted by Section::m_a
-	// . since we set SEC_FAKE from splitSections() those new sections
-	//   are appended on m_sections[] array and are out of order, so
-	//   we merge sort the two sublists of m_sections[] and put the
-	//   pointers into here...
-	//class Section **m_sorted;
-
 	bool m_isTestColl;

 	// assume no malloc
@ -870,15 +710,8 @@ class Sections {
 	char      **m_wptrs;
 	nodeid_t   *m_tids;

-	//int32_t addImpliedSections  ( bool needHR );
-	//int32_t addHeaderImpliedSections ( );
-
-	//int32_t addImpliedSectionsOld ( );
-	//int32_t getHeadingScore ( class Section *sk , int32_t baseHash );
-
 	// the new way
 	bool addImpliedSections ( class Addresses *aa );//, HashTableX *svt );
-	//HashTableX *m_svt;

 	bool setSentFlagsPart1 ( );
 	bool setSentFlagsPart2 ( );
@ -899,10 +732,7 @@ class Sections {
 			     char method,
 			     class Section *delim ,
 			     class Partition *part );
-	int32_t getDelimHash ( char method , class Section *bro ,
-			    class Section *head ) ;
-	//int32_t m_totalHdrCount;
-	//bool m_called;
+	int32_t getDelimHash ( char method , class Section *bro ) ;

 	bool addImpliedLists ( ) ;
 	int32_t getDelimScore2 ( class Section *bro,
@ -926,10 +756,7 @@ class Sections {

 	bool addSentenceSections ( ) ;

-	class Section *insertSubSection ( class Section *parent , 
-					  int32_t a , 
-					  int32_t b ,
-					  int32_t newBaseHash ) ;
+	class Section *insertSubSection ( int32_t a, int32_t b, int32_t newBaseHash ) ;

 	int32_t splitSectionsByTag ( nodeid_t tagid ) ;
 	bool splitSections ( char *delimeter , int32_t dh );
@ -1040,7 +867,6 @@ class SectionVotingTable {
 	// stock table from a sectiondb rdblist
 	bool addListOfVotes ( RdbList *list, 
 			      key128_t **lastKey ,
-			      uint32_t tagPairHash ,
 			      int64_t docId ,
 			      int32_t niceness ) ;

@ -1105,26 +931,7 @@ class SectionVotingTable {
 #define SV_EURDATEFMT     3 // DateParse2.cpp. contains european date fmt
 #define SV_EVENT          4 // used in Events.cpp to indicate event container
 #define SV_ADDRESS        5 // used in Events.cpp to indicate address container
-// . place types here
-// . these #define's are used for values of Place::m_type in Events.cpp too!
-// . score is from 0 to 1.0 which is probability section is a place container
-//   for the specified place type
-// . used by Events.cpp for address extraction
-/*
-#define SV_PLACE_NAME_1   7 // places now have two names
-#define SV_PLACE_NAME_2   8 // places now have two names
-#define SV_PLACE_STREET   9
-#define SV_PLACE_CITY    10
-#define SV_PLACE_ZIP     11
-#define SV_PLACE_SUITE   12
-#define SV_PLACE_ADM1    13
-#define SV_PLACE_ADM2    14
-#define SV_PLACE_ADM3    15
-#define SV_PLACE_ADM4    16
-#define SV_PLACE_CTRY    17
-#define SV_PLACE_SCH     18
-#define SV_PLACE_PRK     19
-*/
+
 // . HACK: the "date" is not the enum tag hash, but is the tagPairHash for this
 // . every doc has just one of these describing the entire layout of the page
 // . basically looking for these is same as doing a gbtaghash: query
@ -1133,25 +940,11 @@ class SectionVotingTable {
 // . this allows us to detect a duplicate section even though the layout
 //   of the web page is not quite the same, but is from the same site
 #define SV_TAGCONTENTHASH   21 
-// . HACK: a statistic
-// . the voter that had the max SectionVote::m_numSampled
-// . the m_numSampled for this statistic is his m_numSampled
-// . if we find that a section is not unique (i.e. repeated) on just one
-//   voting document, then we think it is probably a comment and we do not
-//   set the SEC_ARTICLE flag for that section
-//#define SV_TEXTY_MAX_SAMPLED  22
-// . HACK: the "date" is not the enum tag hash, but is the tagPairHash!
-// . indicates this doc is waiting in line for enough docs from its site
-//   with the same page layout (tagpairhash) to become indexed so that it can
-//   make an informed decision in regards to eliminating comment sections
-//   and determining article sections
-//#define SV_WAITINLINE    23
+
 // now Dates.cpp sets these too
 #define SV_FUTURE_DATE   24
 #define SV_PAST_DATE     25
 #define SV_CURRENT_DATE  26
-//#define SV_DUP           27
-//#define SV_NOT_DUP       28
 #define SV_SITE_VOTER    29
 #define SV_TURKTAGHASH   30

--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -5424,13 +5424,7 @@ Sections *XmlDoc::getExplicitSections ( ) {
 				m_niceness    ,
 				m_masterState ,    // state
 				m_masterLoop  ,    // callback
-				*ct           ,
-				NULL          ,    // sd // sections data
-				true          ,    // sections data valid?
-				NULL          ,    // sv // for m_nsvt
-				//*tph          ,
-				NULL          ,    // buf
-				0             )) { // bufSize
+				*ct           )) {
 		m_calledSections = true;
 		// sanity check, this should not block, we are setting
 		// exclusively from the titleRec
@ -6438,7 +6432,6 @@ SectionVotingTable *XmlDoc::getOldSectionVotingTable ( ) {
 	//   occurs in the document.
 	if ( ! m_osvt.addListOfVotes(&m_secdbList,
 				     &lastKey,
-				     *tph,
 				     *d , // docid
 				     m_niceness))
 		return NULL;
@ -30703,12 +30696,7 @@ SafeBuf *XmlDoc::getSampleForGigabitsJSON ( ) {
 			m_niceness    ,
 			NULL,//m_masterState ,    // state
 			NULL,//m_masterLoop  ,    // callback
-			CT_JSON, // *ct           ,
-			NULL          ,    // sd // sections data
-			true          ,    // sections data valid?
-			NULL          ,    // sv // for m_nsvt
-			NULL          ,    // buf
-			0             )) { // bufSize
+			CT_JSON )) {
 		return NULL;
 	}

@ -34963,8 +34951,6 @@ bool XmlDoc::printRainbowSections ( SafeBuf *sb , HttpRequest *hr ) {
 				   diversityVec,
 				   wordSpamVec,
 				   fragVec,
-				   NULL,
-				   NULL ,
 				   &m_addresses ,
 				   true );
 		return true;
--- a/main.cpp
+++ b/main.cpp
@ -8806,13 +8806,7 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
 		// do not supply xd so it will be set from scratch
 		if ( ! sections.set (&words,&phrases,&bits,NULL,0,0,
 				     NULL,0,NULL,NULL,
-				     0, // contenttype
-				     NULL, // sectionsdata
-				     false, // sectionsdatavalid
-				     NULL, // sectionsdata2
-				     //0, // tagpairhash
-				     NULL, // buf
-				     0)) // bufSize
+				     0))
 			return log("build: speedtestxml: sections set: %s",
 				   mstrerror(g_errno));