Merge branch 'diffbot-testing' into diffbot-sam

2015-07-23 09:27:00 -06:00 · 2015-07-23 09:27:00 -06:00 · da41d53575
commit da41d53575
parent b0a6e590d6 e165b5d668
50 changed files with 1046 additions and 307 deletions
--- a/Collectiondb.cpp
+++ b/Collectiondb.cpp
@ -3579,7 +3579,7 @@ bool CollectionRec::rebuildUrlFiltersDiffbot() {

 	// lower from 7 to 1 since we have so many collections now
 	// ok, now we have much less colls so raise back to 7
-	int32_t diffbotipms = 7;// 1; // 7
+	int32_t diffbotipms = 7;//1; // 7

 	// make the gigablast regex table just "default" so it does not
 	// filtering, but accepts all urls. we will add code to pass the urls
@ -3599,8 +3599,8 @@ bool CollectionRec::rebuildUrlFiltersDiffbot() {
 		m_spiderIpWaits     [i] = wait;
 		m_spiderIpMaxSpiders[i] = diffbotipms; // keep it respectful
 		// ethan wants some speed
-		if ( isEthan )
-			m_spiderIpMaxSpiders[i] = 30;
+		// if ( isEthan )
+		// 	m_spiderIpMaxSpiders[i] = 30;
 		//m_spidersEnabled    [i] = 1;
 		m_spiderFreqs       [i] = respiderFreq;
 		//m_spiderDiffbotApiUrl[i].purge();
@ -3623,6 +3623,18 @@ bool CollectionRec::rebuildUrlFiltersDiffbot() {
 	m_forceDelete        [i] = 1;
 	i++;

+	// de-prioritize fakefirstip urls so we don't give the impression our
+	// spiders are slow. like if someone adds a bulk job with 100,000 urls
+	// then we sit there and process to lookup their ips and add a real
+	// spider request (if it falls onto the same shard) before we actually
+	// do any real spidering. so keep the priority here low.
+	m_regExs[i].set("isfakeip");
+	m_maxSpidersPerRule  [i] = 7;
+	m_spiderIpMaxSpiders [i] = 7;
+	m_spiderPriorities   [i] = 20;
+	m_spiderIpWaits      [i] = 0;
+	i++;
+
 	// hopcount filter if asked for
 	if( m_diffbotMaxHops >= 0 ) {

--- a/HashTableX.cpp
+++ b/HashTableX.cpp
@ -18,6 +18,8 @@ void HashTableX::constructor() {
 	m_useKeyMagic = false;
 	m_ks = 0;
 	m_allowGrowth = true;
+	m_numSlots = 0;
+	m_numSlotsUsed = 0;
 }

 void HashTableX::destructor() {
--- a/Highlight.cpp
+++ b/Highlight.cpp
@ -160,8 +160,8 @@ int32_t Highlight::set ( SafeBuf *sb ,
 	// . set the anchor counts to 1000*i+1 for each possible query term num
 	// . yes, i know, why +1? because we're assuming the query terms
 	//   have been highlighted before us 
-	for ( int32_t i = 0 ; i < MAX_QUERY_TERMS ; i++ ) 
-		m_anchorCounts[i] = 1000*i + 1;
+	//for ( int32_t i = 0 ; i < MAX_QUERY_TERMS ; i++ ) 
+	//	m_anchorCounts[i] = 1000*i + 1;
 	// set lengths of provided front/back highlight tags
 	if ( m_frontTag ) m_frontTagLen = gbstrlen ( frontTag );
 	if ( m_backTag  ) m_backTagLen  = gbstrlen ( backTag  );
@ -170,6 +170,10 @@ int32_t Highlight::set ( SafeBuf *sb ,
 	//m_bufLen = bufLen;
 	//m_bufPtr = buf;
 	m_sb = sb;
+
+	// label it
+	m_sb->setLabel ("highw");
+
 	// save room for terminating \0
 	//m_bufEnd = m_buf + m_bufLen - 1;

--- a/Highlight.h
+++ b/Highlight.h
@ -70,7 +70,7 @@ class Highlight {
 	bool     m_doStemming;

 	bool     m_useAnchors;  // click and scroll technology for cached pages
-	int32_t     m_anchorCounts [ MAX_QUERY_TERMS ];
+	//int32_t     m_anchorCounts [ MAX_QUERY_TERMS ];
 	const char    *m_baseUrl;

 	int32_t m_numMatches;
--- a/HttpServer.cpp
+++ b/HttpServer.cpp
@ -1514,6 +1514,10 @@ bool HttpServer::sendReply ( TcpSocket  *s , HttpRequest *r , bool isAdmin) {
 	//   is recycled/destroyed
 	// . this will call getMsgPiece() to fill up sendBuf from file
 	int32_t totalToSend = mimeLen + bytesToSend;
+
+	//s->m_state = NULL; // do we need this? yes, cuz s is NULL for cleanUp
+	if ( s && s->m_state == f ) s->m_state = NULL;
+
 	//if ( ! m_tcp.sendMsg ( s           , 
 	if (  ! tcp->sendMsg ( s           , 
 			       sendBuf     ,
@ -1542,7 +1546,6 @@ bool HttpServer::sendReply ( TcpSocket  *s , HttpRequest *r , bool isAdmin) {
 	if ( ! f->isOpen() ) f->open( O_RDONLY );
 	int fd = f->getfd();
 	cleanUp ( f , NULL/*TcpSocket */ );
-	s->m_state = NULL; // do we need this? yes, cuz s is NULL for cleanUp
 	// . AND we need to do this ourselves here
 	// . do it SILENTLY so not message is logged if fd not registered
 	if (tcp->m_useSSL)
--- a/2
+++ b/2
@ -10,7 +10,7 @@ CC=g++

 # remove dlstubs.o for CYGWIN
 OBJS =  UdpSlot.o Rebalance.o \
-	Msg13.o Mime.o IndexReadInfo.o \
+	Msg13.o Mime.o \
 	PageGet.o PageHosts.o \
 	PageParser.o PageInject.o PagePerf.o PageReindex.o PageResults.o \
 	PageAddUrl.o PageRoot.o PageSockets.o PageStats.o \
--- a/Matches.cpp
+++ b/Matches.cpp
@ -24,10 +24,24 @@
 Matches::Matches ( ) {
 	m_detectSubPhrases = false;
 	m_numMatchGroups = 0;
+	m_qwordFlags = NULL;
+	m_qwordAllocSize = 0;
 	reset();
 }
 Matches::~Matches( ) { reset(); }
 void Matches::reset   ( ) { 
+	reset2();
+	if ( m_qwordFlags && m_qwordFlags != (mf_t *)m_tmpBuf ) {
+		mfree ( m_qwordFlags , m_qwordAllocSize , "mmqw" );
+		m_qwordFlags = NULL;
+	}
+	//m_explicitsMatched = 0;
+	//m_matchableRequiredBits = 0;
+	//m_hasAllQueryTerms = false;
+	//m_matchesQuery = false;
+}
+
+void Matches::reset2() {
 	m_numMatches = 0;
 	//m_maxNQT     = -1;
 	m_numAlnums  = 0;
@ -39,10 +53,6 @@ void Matches::reset   ( ) {
 		m_bitsArray    [i].reset();
 	}
 	m_numMatchGroups = 0;
-	//m_explicitsMatched = 0;
-	//m_matchableRequiredBits = 0;
-	//m_hasAllQueryTerms = false;
-	//m_matchesQuery = false;
 }

 bool Matches::isMatchableTerm ( QueryTerm *qt ) { // , int32_t i ) {
@ -103,6 +113,20 @@ void Matches::setQuery ( Query *q ) {

 	//memset ( m_foundNegTermVector, 0, m_q->getNumTerms() );

+	if ( m_qwordFlags ) { char *xx=NULL;*xx=0; }
+
+	int32_t need = m_q->m_numWords * sizeof(mf_t) ;
+	m_qwordAllocSize = need;
+	if ( need < 128 ) 
+		m_qwordFlags = (mf_t *)m_tmpBuf;
+	else
+		m_qwordFlags = (mf_t *)mmalloc ( need , "mmqf" );
+
+	if ( ! m_qwordFlags ) {
+		log("matches: alloc failed for query %s",q->m_orig);
+		return;
+	}
+
 	// this is word based. these are each 1 byte
 	memset ( m_qwordFlags  , 0 , m_q->m_numWords * sizeof(mf_t));

@ -278,7 +302,7 @@ bool Matches::set ( XmlDoc   *xd         ,
 		    int32_t      niceness   ) {

 	// don't reset query info!
-	reset();
+	reset2();

 	// sanity check
 	if ( ! xd->m_docIdValid ) { char *xx=NULL;*xx=0; }
--- a/Matches.h
+++ b/Matches.h
@ -142,6 +142,7 @@ class Matches {
 	Matches ( ) ;
 	~Matches( ) ;
 	void reset ( ) ;
+	void reset2 ( ) ;

 	// BIG HACK support
 	//int32_t getTermsFound ( bool *hadPhrases , bool *hadWords );
@ -183,7 +184,10 @@ class Matches {

 	// . 1-1 with Query::m_qwords[] array of QWords
 	// . shows the match flags for that query word
-	mf_t      m_qwordFlags[MAX_QUERY_WORDS];
+	//mf_t      m_qwordFlags[MAX_QUERY_WORDS];
+	mf_t     *m_qwordFlags;
+	int32_t m_qwordAllocSize;
+	char m_tmpBuf[128];

 	//stuff for detecting whether a match is part of a larger phrase
 	void setSubPhraseDetection();
--- a/Mem.cpp
+++ b/Mem.cpp
@ -530,6 +530,11 @@ void Mem::addMem ( void *mem , int32_t size , const char *note , char isnew ) {

 	//validate();

+	// if ( note && note[0] == 'S' && note[1] == 'a' &&
+	//      note[2] == 'f' && size == 13371521 )
+	// 	log("mem: got mystery safebuf");
+
+
        //m_memtablesize = 0;//DMEMTABLESIZE;
 	  // 4G/x = 600*1024 -> x = 4000000000.0/(600*1024) = 6510
 	// crap, g_hostdb.init() is called inmain.cpp before
--- a/Mem.h
+++ b/Mem.h
@ -280,6 +280,20 @@ inline int32_t getNumBitsOn64 ( uint64_t bits ) {
 		g_a [ *((unsigned char *)(&bits) + 7)  ] ;
 }

+inline int32_t getNumBitsOnX ( unsigned char *s , int32_t slen ) {
+	if ( slen == 1 ) return getNumBitsOn8 ( *s );
+	if ( slen == 2 ) return getNumBitsOn16 ( *(uint16_t *)s );
+	if ( slen == 4 ) return getNumBitsOn32 ( *(uint32_t *)s );
+	if ( slen == 3 ) 
+		return  getNumBitsOn8 ( s[0] ) +
+			getNumBitsOn8 ( s[1] ) +
+			getNumBitsOn8 ( s[2] ) ;
+	int32_t total = 0;
+	for ( int32_t i = 0 ; i < slen ; i++ )
+		total += getNumBitsOn8 ( s[i] );
+	return total;
+}
+
 // assume only one bit is set for this (used by Address.cpp)
 inline int32_t getBitPosLL ( uint8_t *bit ) {
 	// which int32_t is it in?
--- a/Msg2.cpp
+++ b/Msg2.cpp
@ -98,7 +98,7 @@ bool Msg2::getLists ( int32_t     rdbId       ,
 	// set this
 	m_numLists = m_query->m_numTerms;
 	// make sure not too many lists being requested
-	if ( m_numLists > MAX_NUM_LISTS ) {g_errno=ETOOMANYLISTS; return true;}
+	//if(m_numLists > MAX_NUM_LISTS ) {g_errno=ETOOMANYLISTS; return true;}
 	// clear them all
 	//for ( int32_t i = 0 ; i < m_numLists ; i++ ) {
 	//	m_inProgress[i] = true;
@ -133,7 +133,7 @@ bool Msg2::getLists ( ) {
 	// . make slots for all
 	for (  ; m_i < m_numLists ; m_i++ ) {
 		// sanity for Msg39's sake. do no breach m_lists[].
-		if ( m_i >= MAX_QUERY_TERMS ) { char *xx=NULL;*xx=0; }
+		if ( m_i >= ABS_MAX_QUERY_TERMS ) { char *xx=NULL;*xx=0; }
 		// if any had error, forget the rest. do not launch any more
 		if ( m_errno ) break;
 		// skip if already did it
@ -413,6 +413,8 @@ bool Msg2::getLists ( ) {
 		// mem. we should also report the size of each termlist
 		// in bytes in the query info header.
 		//int32_t minRecSizes = DEFAULT_POSDB_READSIZE;
+		// MDW TODO fix this later we go oom too easily for queries
+		// like 'www.disney.nl'
 		int32_t minRecSizes = -1;

 		// start up the read. thread will wait in thread queue to 
--- a/Msg2.h
+++ b/Msg2.h
@ -7,9 +7,10 @@
 #include "Msg0.h"

 /** define the max # of lists you can get as the max # of query terms for now */
-#define MAX_NUM_LISTS MAX_QUERY_TERMS
+//#define MAX_NUM_LISTS MAX_QUERY_TERMS
 /** how many outstanding msg5 requests at one time? */
-#define MSG2_MAX_REQUESTS MAX_QUERY_TERMS
+//#define MSG2_MAX_REQUESTS MAX_QUERY_TERMS
+#define MSG2_MAX_REQUESTS 32
 /** support the &sites=xyz.com+abc.com+... to restrict search results to provided sites.*/
 #define MAX_WHITELISTS 500

--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -34,6 +34,10 @@ Msg39::Msg39 () {
 	reset();
 }

+Msg39::~Msg39 () {
+	reset();
+}
+
 void Msg39::reset() {
 	if ( m_inUse ) { char *xx=NULL;*xx=0; }
 	m_allocedTree = false;
@ -46,8 +50,16 @@ void Msg39::reset() {

 void Msg39::reset2() {
 	// reset lists
-	for ( int32_t j = 0 ; j < m_msg2.m_numLists ; j++ ) 
-		m_lists[j].freeList();
+	int32_t nqt = m_stackBuf.getLength() / sizeof(RdbList);
+	//for ( int32_t j = 0 ; j < m_msg2.m_numLists && m_lists ; j++ ) {
+	for ( int32_t j = 0 ; j < nqt && m_lists ; j++ ) {
+		//m_lists[j].freeList();
+		//log("msg39: destroy list @ 0x%"PTRFMT,(PTRTYPE)&m_lists[j]);
+		// same thing but more generic
+		m_lists[j].destructor();
+	}
+	m_stackBuf.purge();
+	m_lists = NULL;
 	m_msg2.reset();
 	m_posdbTable.reset();
 	m_callback = NULL;
@ -205,7 +217,8 @@ void Msg39::getDocIds2 ( Msg39Request *req ) {
 	if ( ! m_tmpq.set2 ( m_r->ptr_query  , 
 			     m_r->m_language ,
 			     m_r->m_queryExpansion ,
-			     m_r->m_useQueryStopWords ) ) {
+			     m_r->m_useQueryStopWords ,
+			     m_r->m_maxQueryTerms ) ) {
 		log("query: msg39: setQuery: %s." , 
 		    mstrerror(g_errno) );
 		sendReply ( m_slot , this , NULL , 0 , 0 , true );
@ -223,11 +236,14 @@ void Msg39::getDocIds2 ( Msg39Request *req ) {
 	if ( m_tmpq.getNumTerms() != m_r->m_nqt ) {
 		g_errno = EBADENGINEER;
 		log("query: Query parsing inconsistency for q=%s. "
+		    "%i != %i. "
 		    "langid=%"INT32". Check langids and m_queryExpansion parms "
 		    "which are the only parms that could be different in "
 		    "Query::set2(). You probably have different mysynoyms.txt "
 		    "files on two different hosts! check that!!"
 		    ,m_tmpq.m_orig
+		    ,(int)m_tmpq.getNumTerms()
+		    ,(int)m_r->m_nqt
 		    ,(int32_t)m_r->m_language
 		    );
 		sendReply ( m_slot , this , NULL , 0 , 0 , true );
@ -713,7 +729,7 @@ bool Msg39::getLists () {
 			     //(int64_t)m_tmpq.m_qterms[i].m_explicitBit  ,
 			     //(int64_t)m_tmpq.m_qterms[i].m_implicitBits ,
 			     (int32_t)m_tmpq.m_qterms[i].m_hardCount ,
-			     (int32_t)m_tmpq.m_componentCodes[i],
+			     (int32_t)m_tmpq.m_qterms[i].m_componentCode,
 			     (int32_t)m_tmpq.getTermLen(i) ,
 			     isSynonym,
 			     (int32_t)m_tmpq.m_langId ); // ,tt
@ -762,6 +778,19 @@ bool Msg39::getLists () {
 	// split is us????
 	//int32_t split = g_hostdb.m_myHost->m_group;
 	int32_t split = g_hostdb.m_myHost->m_shardNum;
+
+
+	int32_t nqt = m_tmpq.getNumTerms();
+	int32_t need = sizeof(RdbList) * nqt ;
+	m_stackBuf.setLabel("stkbuf2");
+	if ( ! m_stackBuf.reserve ( need ) ) return true;
+	m_lists = (IndexList *)m_stackBuf.getBufStart();
+	m_stackBuf.setLength ( need );
+	for ( int32_t i = 0 ; i < nqt ; i++ ) {
+		m_lists[i].constructor();
+		//log("msg39: constructlist @ 0x%"PTRFMT,(PTRTYPE)&m_lists[i]);
+	}
+
 	// call msg2
 	if ( ! m_msg2.getLists ( rdbId                      ,
 				 m_r->m_collnum,//m_r->ptr_coll              ,
--- a/Msg39.h
+++ b/Msg39.h
@ -216,6 +216,7 @@ class Msg39 {
 public:

 	Msg39();
+	~Msg39();
 	void reset();
 	void reset2();
 	// register our request handler for Msg39's
@ -266,7 +267,9 @@ class Msg39 {

 	// . we hold our IndexLists here for passing to PosdbTable
 	// . one array for each of the tiers
-	IndexList  m_lists [ MAX_QUERY_TERMS ];
+	//IndexList  m_lists [ MAX_QUERY_TERMS ];
+	IndexList *m_lists;
+	SafeBuf m_stackBuf;
 	
 	// used for timing
 	int64_t  m_startTime;
--- a/Msg3a.cpp
+++ b/Msg3a.cpp
@ -317,8 +317,8 @@ bool Msg3a::gotCacheReply ( ) {

 	//CollectionRec *cr;
 	//cr = g_collectiondb.getRec(m_r->ptr_coll,m_r->size_coll-1);
-
-	setTermFreqWeights ( m_r->m_collnum,m_q,m_termFreqs,m_termFreqWeights);
+	//setTermFreqWeights(m_r->m_collnum,m_q,m_termFreqs,m_termFreqWeights);
+	setTermFreqWeights ( m_r->m_collnum,m_q );

 	if ( m_debug ) {
 		//int64_t *termIds = m_q->getTermIds();
@ -338,8 +338,8 @@ bool Msg3a::gotCacheReply ( ) {
 			     i,
 			     qt->m_term, 
 			     qt->m_termId,
-			     m_termFreqs[i],
-			     m_termFreqWeights[i]);
+			     qt->m_termFreq,//m_termFreqs[i],
+			     qt->m_termFreqWeight);//m_termFreqWeights[i]);
 			// put it back
 			*tpc = c;
 		}
@ -368,7 +368,8 @@ bool Msg3a::gotCacheReply ( ) {
 	}

 	// a tmp buf
-	int32_t readSizes[MAX_QUERY_TERMS];
+	int32_t readSizes[ABS_MAX_QUERY_TERMS];
+	float   tfw      [ABS_MAX_QUERY_TERMS];
 	// update our read info
 	for ( int32_t j = 0; j < n ; j++ ) {
 		// the read size for THIS query term
@ -379,7 +380,9 @@ bool Msg3a::gotCacheReply ( ) {
 		rs = DEFAULT_POSDB_READSIZE;//90000000; // 90MB!
 		// it is better to go oom then leave users scratching their
 		// heads as to why some results are not being returned.
-		rs = -1;
+		// no, because we are going out of mem for queries like
+		// 'www.disney.nl' etc.
+		//rs = -1;
 		// if section stats, limit to 1MB
 		//if ( m_r->m_getSectionStats ) rs = 1000000;
 		// get the jth query term
@ -388,13 +391,14 @@ bool Msg3a::gotCacheReply ( ) {
 		if ( qt->m_ignored ) rs = 0;
 		// set it
 		readSizes[j] = rs;
+		// serialize these too
+		tfw[j] = qt->m_termFreqWeight;
 	}

 	// serialize this
 	m_r->ptr_readSizes  = (char *)readSizes;
 	m_r->size_readSizes = 4 * n;
-	// and this
-	m_r->ptr_termFreqWeights  = (char *)m_termFreqWeights;
+	m_r->ptr_termFreqWeights  = (char *)tfw;//m_termFreqWeights;
 	m_r->size_termFreqWeights = 4 * n;
 	// store query into request, might have changed since we called
 	// Query::expandQuery() above
@ -1095,7 +1099,10 @@ bool Msg3a::mergeLists ( ) {
 		// log("results: alloc fhtqt of %"PTRFMT" for st0=%"PTRFMT,
 		//     (PTRTYPE)ht->m_buf,(PTRTYPE)m_q->m_st0Ptr);
 		// sanity
-		if ( ! ht->m_isWritable ) {char *xx=NULL;*xx=0;}
+		if ( ! ht->m_isWritable ) {
+			log("msg3a: queryterm::constructor not called?");
+			char *xx=NULL;*xx=0;
+		}
 	}

 	// now scan each facethashlist from each shard and compile into 
@ -1548,9 +1555,9 @@ void Msg3a::printTerms ( ) {
 }

 void setTermFreqWeights ( collnum_t collnum , // char *coll,
-			  Query *q , 
-			  int64_t *termFreqs, 
-			  float *termFreqWeights ) {
+			  Query *q ) {
+			  // int64_t *termFreqs, 
+			  // float *termFreqWeights ) {

 	int64_t numDocsInColl = 0;
 	RdbBase *base = getRdbBase ( RDB_CLUSTERDB  , collnum );	
@ -1562,13 +1569,16 @@ void setTermFreqWeights ( collnum_t collnum , // char *coll,
 		numDocsInColl = 1;
 	}
 	// now get term freqs again, like the good old days
-	int64_t *termIds = q->getTermIds();
+	//int64_t *termIds = q->getTermIds();
 	// just use rdbmap to estimate!
 	for ( int32_t i = 0 ; i < q->getNumTerms(); i++ ) {
+		QueryTerm *qt = &q->m_qterms[i];
 		// GET THE TERMFREQ for setting weights
-		int64_t tf = g_posdb.getTermFreq ( collnum ,termIds[i]);
-		if ( termFreqs ) termFreqs[i] = tf;
+		int64_t tf = g_posdb.getTermFreq ( collnum ,qt->m_termId);
+		//if ( termFreqs ) termFreqs[i] = tf;
+		qt->m_termFreq = tf;
 		float tfw = getTermFreqWeight(tf,numDocsInColl);
-		termFreqWeights[i] = tfw;
+		//termFreqWeights[i] = tfw;
+		qt->m_termFreqWeight = tfw;
 	}
 }			      
--- a/Msg3a.h
+++ b/Msg3a.h
@ -12,9 +12,9 @@
 #define DEFAULT_POSDB_READSIZE 90000000

 void setTermFreqWeights ( collnum_t collnum, // char *coll,
-			  class Query *q , 
-			  int64_t *termFreqs, 
-			  float *termFreqWeights ) ;
+			  class Query *q );
+			  //int64_t *termFreqs, 
+			  //float *termFreqWeights ) ;

 //#define MSG3A_TMP_BUFSIZE (MAX_RESULTS*18)

@ -131,8 +131,8 @@ public:

 	// use msg37 to get TermFreqs
 	//Msg37      m_msg37;
-	int64_t  m_termFreqs      [MAX_QUERY_TERMS];
-	float      m_termFreqWeights[MAX_QUERY_TERMS];
+	//int64_t  m_termFreqs      [MAX_QUERY_TERMS];
+	//float      m_termFreqWeights[MAX_QUERY_TERMS];

 	// a multicast class to send the request, one for each split
 	Multicast  m_mcast[MAX_SHARDS];
--- a/Msg40.cpp
+++ b/Msg40.cpp
@ -666,7 +666,7 @@ bool Msg40::federatedLoop ( ) {
 	mr.size_whiteList              = slen;
 	mr.m_timeout                   = -1; // auto-determine based on #terms
 	// make sure query term counts match in msg39
-	mr.m_maxQueryTerms             = m_si->m_maxQueryTerms; 
+	//mr.m_maxQueryTerms             = m_si->m_maxQueryTerms; 
 	mr.m_realMaxTop                = m_si->m_realMaxTop;

 	mr.m_minSerpDocId              = m_si->m_minSerpDocId;
@ -699,6 +699,9 @@ bool Msg40::federatedLoop ( ) {
 	//if ( numDocIdSplits < 5 ) numDocIdSplits = 5;
 	//}

+	if ( cr ) mr.m_maxQueryTerms = cr->m_maxQueryTerms; 
+	else      mr.m_maxQueryTerms = 100;
+
 	// special oom hack fix
 	if ( cr && cr->m_isCustomCrawl && numDocIdSplits < 4 ) 
 		numDocIdSplits = 4;
@ -3496,7 +3499,10 @@ bool Msg40::computeGigabits( TopicGroup *tg ) {
 			log("gbits: too many words in samples. "
 			    "Discarding the remaining samples "
 			    "(maxWords=%"INT32")", maxWords);
-			char *xx=NULL;*xx=0;
+			// return -1 with g_errno set on error
+			g_errno = EBUFTOOSMALL;
+			return -1;
+			//char *xx=NULL;*xx=0;
 		}
 		// the thing we are counting!!!!
 		maxWords += sampleWords;
@ -4330,7 +4336,8 @@ void hashExcerpt ( Query *q ,
 		int32_t m_posPtr;
 	};
 	SafeBuf posBuf;
-	int32_t need2 = MAX_QUERY_TERMS * sizeof(PosInfo);
+	//int32_t need2 = MAX_QUERY_TERMS * sizeof(PosInfo);
+	int32_t need2 = q->m_numTerms * sizeof(PosInfo);
 	posBuf.setLabel("m40posbuf");
 	if ( ! posBuf.reserve ( need2 ) ) {
 		log("gigabits: could not allocate 2 local buffer "
--- a/Msg40.h
+++ b/Msg40.h
@ -15,7 +15,7 @@
 #include "Msg20.h"      // for getting summary from docId
 #include "Msg17.h"      // a distributed cache of serialized/compressed Msg40s
 //#include "Msg2b.h"      // for generating directories
-#include "IndexReadInfo.h" // STAGE0,...
+//#include "IndexReadInfo.h" // STAGE0,...
 #include "Msg3a.h"
 #include "PostQueryRerank.h"

--- a/PageGet.cpp
+++ b/PageGet.cpp
@ -40,7 +40,9 @@ public:
 	bool       m_isLocal;
 	//bool       m_seq;
 	bool       m_rtq;
-	char       m_q[MAX_QUERY_LEN+1];
+	//char       m_q[MAX_QUERY_LEN+1];
+	SafeBuf m_qsb;
+	char m_qtmpBuf[128];
 	int32_t       m_qlen;
 	char       m_boolFlag;
 	bool       m_printed;
@ -98,7 +100,7 @@ bool sendPageGet ( TcpSocket *s , HttpRequest *r ) {
 	int32_t  qlen = 0;
 	char *q = r->getString ( "q" , &qlen , NULL /*default*/);
 	// ensure query not too big
-	if ( qlen >= MAX_QUERY_LEN-1 ) { 
+	if ( qlen >= ABS_MAX_QUERY_LEN-1 ) { 
 		g_errno=EQUERYTOOBIG; 
 		return g_httpServer.sendErrorReply (s,500 ,mstrerror(g_errno));
 	}
@ -156,8 +158,16 @@ bool sendPageGet ( TcpSocket *s , HttpRequest *r ) {
 	//	delete ( st );
 	//	return sendPageNetResult( s );
 	//}
-	if ( q && qlen > 0 ) strcpy ( st->m_q , q );
-	else                 st->m_q[0] = '\0';
+	//if ( q && qlen > 0 ) strcpy ( st->m_q , q );
+	//else                 st->m_q[0] = '\0';
+
+	st->m_qsb.setBuf ( st->m_qtmpBuf,128,0,false );
+	st->m_qsb.setLabel ( "qsbpg" );
+
+	// save the query
+	if ( q && qlen > 0 )
+		st->m_qsb.safeStrcpy ( q );
+	
 	st->m_qlen = qlen;
 	//st->m_seq      = seq;
 	st->m_rtq      = rtq;
@ -415,8 +425,8 @@ bool processLoop ( void *state ) {
 	int32_t startLen2 = sb->length();//p;

 	// query should be NULL terminated
-	char *q    = st->m_q;
-	int32_t  qlen = st->m_qlen;
+	char *q    = st->m_qsb.getBufStart();
+	int32_t  qlen = st->m_qsb.getLength(); // m_qlen;

 	char styleTitle[128] =  "font-size:14px;font-weight:600;"
 				"color:#000000;";
--- a/PageResults.cpp
+++ b/PageResults.cpp
@ -2529,6 +2529,22 @@ bool printSearchResultsHeader ( State0 *st ) {
 			       , getLanguageString(si->m_queryLangId) );
 		// print query words we ignored, like stop words
 		printIgnoredWords ( sb , si );
+
+		sb->safePrintf("\t\t<queryNumTermsTotal>"
+			       "%"INT32
+			       "</queryNumTermsTotal>\n"
+			       , q->m_numTermsUntruncated );
+		sb->safePrintf("\t\t<queryNumTermsUsed>"
+			       "%"INT32
+			       "</queryNumTermsUsed>\n"
+			       , q->m_numTerms );
+		int32_t tval = 0;
+		if ( q->m_numTerms < q->m_numTermsUntruncated ) tval = 1;
+		sb->safePrintf("\t\t<queryWasTruncated>"
+			       "%"INT32
+			       "</queryWasTruncated>\n"
+			       , tval );
+
 		for ( int i = 0 ; i < q->m_numTerms ; i++ ) {
 			sb->safePrintf("\t\t<term>\n");
 			QueryTerm *qt = &q->m_qterms[i];
@ -2574,7 +2590,8 @@ bool printSearchResultsHeader ( State0 *st ) {
 					       ,printTerm);
 				term[sq->m_termLen] = c;
 			}				
-			int64_t tf = msg40->m_msg3a.m_termFreqs[i];
+			//int64_t tf = msg40->m_msg3a.m_termFreqs[i];
+			int64_t tf = qt->m_termFreq;
 			sb->safePrintf("\t\t\t<termFreq>%"INT64"</termFreq>\n"
 				       ,tf);
 			sb->safePrintf("\t\t\t<termHash48>%"INT64"</termHash48>\n"
@ -2604,6 +2621,19 @@ bool printSearchResultsHeader ( State0 *st ) {
 		sb->safePrintf("\",\n");
 		// print query words we ignored, like stop words
 		printIgnoredWords ( sb , si );
+
+		sb->safePrintf("\t\"queryNumTermsTotal\":"
+			       "%"INT32",\n"
+			       , q->m_numTermsUntruncated );
+		sb->safePrintf("\t\"queryNumTermsUsed\":"
+			       "%"INT32",\n"
+			       , q->m_numTerms );
+		int32_t tval = 0;
+		if ( q->m_numTerms < q->m_numTermsUntruncated ) tval = 1;
+		sb->safePrintf("\t\"queryWasTruncated\":"
+			       "%"INT32",\n"
+			       , tval );
+			
 		sb->safePrintf("\t\"terms\":[\n");
 		for ( int i = 0 ; i < q->m_numTerms ; i++ ) {
 			sb->safePrintf("\t\t{\n");
@ -2643,7 +2673,8 @@ bool printSearchResultsHeader ( State0 *st ) {
 				sb->safePrintf("\",\n");
 				term[sq->m_termLen] = c;
 			}				
-			int64_t tf = msg40->m_msg3a.m_termFreqs[i];
+			//int64_t tf = msg40->m_msg3a.m_termFreqs[i];
+			int64_t tf = qt->m_termFreq;
 			sb->safePrintf("\t\t\"termFreq\":%"INT64",\n"
 				       ,tf);

@ -2793,13 +2824,14 @@ bool printSearchResultsHeader ( State0 *st ) {

 	//Highlight h;

-	st->m_qe[0] = '\0';
+	//st->m_qe[0] = '\0';
+	st->m_qesb.nullTerm();

 	// encode query buf
 	//char qe[MAX_QUERY_LEN+1];
 	char *dq    = si->m_displayQuery;
 	//int32_t  dqlen = si->m_displayQueryLen;
-	if ( dq ) urlEncode(st->m_qe,MAX_QUERY_LEN*2,dq,gbstrlen(dq));
+	if ( dq ) st->m_qesb.urlEncode(dq);

 	// how many results were requested?
 	//int32_t docsWanted = msg40->getDocsWanted();
@ -5185,7 +5217,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
 				"get?"
 				"q=%s&c=%s&d=%"INT64">"
 				"cached</a>\n",
-				st->m_qe , coll ,
+				 st->m_qesb.getBufStart() , coll ,
 				mr->m_docId );
 	else if ( printCached )
 		sb->safePrintf ( "<a href=\""
@ -5194,7 +5226,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
 				"qlang=%s&"
 				"c=%s&d=%"INT64"&cnsp=0\">"
 				"cached</a>\n", 
-				st->m_qe , 
+				 st->m_qesb.getBufStart() , 
 				// "qlang" parm
 				si->m_defaultSortLang,
 				coll , 
@ -5334,7 +5366,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
 				 "d=%"INT64"&"
 				 "cnsp=0\">"
 				 "sections</a>\n", 
-				 st->m_qe , 
+				 st->m_qesb.getBufStart() , 
 				 // "qlang" parm
 				 si->m_defaultSortLang,
 				 coll , 
@ -5447,7 +5479,7 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
 		qq.urlEncode("site:");
 		qq.urlEncode (hbuf);
 		qq.urlEncode(" | ");
-		qq.safeStrcpy(st->m_qe);
+		qq.safeStrcpy(st->m_qesb.getBufStart());
 		qq.nullTerm();
 		// get the original url and add/replace in query
 		char tmp2[512];
@ -6176,8 +6208,14 @@ bool printPairScore ( SafeBuf *sb , SearchInput *si , PairScore *ps ,
 	//int64_t sz2 = ps->m_listSize2;
 	//int64_t tf1 = ps->m_termFreq1;//sz1 / 10;
 	//int64_t tf2 = ps->m_termFreq2;//sz2 / 10;
-	int64_t tf1 = msg40->m_msg3a.m_termFreqs[qtn1];
-	int64_t tf2 = msg40->m_msg3a.m_termFreqs[qtn2];
+	
+	QueryTerm *qt1 = &msg40->m_msg3a.m_q->m_qterms[qtn1];
+	QueryTerm *qt2 = &msg40->m_msg3a.m_q->m_qterms[qtn2];
+
+	//int64_t tf1 = msg40->m_msg3a.m_termFreqs[qtn1];
+	//int64_t tf2 = msg40->m_msg3a.m_termFreqs[qtn2];
+	int64_t tf1 = qt1->m_termFreq;
+	int64_t tf2 = qt2->m_termFreq;
 	float tfw1 = ps->m_tfWeight1;
 	float tfw2 = ps->m_tfWeight2;
 	
@ -6893,7 +6931,9 @@ bool printSingleScore ( SafeBuf *sb ,
 	
 	//int64_t tf = ss->m_termFreq;//ss->m_listSize;
 	int32_t qtn = ss->m_qtermNum;
-	int64_t tf = msg40->m_msg3a.m_termFreqs[qtn];
+	//int64_t tf = msg40->m_msg3a.m_termFreqs[qtn];
+	QueryTerm *qt = &msg40->m_msg3a.m_q->m_qterms[qtn];
+	int64_t tf = qt->m_termFreq;
 	float tfw = ss->m_tfWeight;
 	
 	if ( si->m_format == FORMAT_XML ) {
@ -8252,8 +8292,11 @@ bool printCSVHeaderRow2 ( SafeBuf *sb ,
 			hdr = "Hop Count";
 		if ( ! strcmp(hdr,"gbssIp") ) 
 			hdr = "IP";
-		if ( ! strcmp(hdr,"gbssDiffbotUri" ) )
-			hdr = "Diffbot URI";
+		// csv report is regular urls not diffbot object urls so
+		// regular urls do not have a just a single diffboturi,
+		// they could have 0 or multiple diffboturis
+		//if ( ! strcmp(hdr,"gbssDiffbotUri" ) )
+		//	hdr = "Diffbot URI";
 		if ( ! strcmp(hdr,"gbssSentToDiffbotThisTime") ) 
 			hdr = "Process Attempted";
 		if ( ! strcmp(hdr,"gbssDiffbotReplyMsg") )
--- a/PageResults.h
+++ b/PageResults.h
@ -52,7 +52,8 @@ public:
 	int64_t    m_took; // how long it took to get the results
 	HttpRequest  m_hr;
 	bool         m_printedHeaderRow;
-	char         m_qe[MAX_QUERY_LEN+1];
+	//char         m_qe[MAX_QUERY_LEN+1];
+	SafeBuf m_qesb;

 	// for printing our search result json items in csv:
 	HashTableX   m_columnTable;
--- a/PageRoot.cpp
+++ b/PageRoot.cpp
@ -1858,11 +1858,11 @@ bool printDirHomePage ( SafeBuf &sb , HttpRequest *r ) {
 // . call g_httpServer.sendDynamicPage() to send it
 bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie ) {
 	// don't allow pages bigger than 128k in cache
-	char  buf [ 10*1024 + MAX_QUERY_LEN ];
+	char  buf [ 10*1024 ];//+ MAX_QUERY_LEN ];
 	// a ptr into "buf"
 	//char *p    = buf;
 	//char *pend = buf + 10*1024 + MAX_QUERY_LEN - 100 ;
-	SafeBuf sb(buf, 10*1024 + MAX_QUERY_LEN);
+	SafeBuf sb(buf, 10*1024 );//+ MAX_QUERY_LEN);
 	// print bgcolors, set focus, set font style
 	//p = g_httpServer.printFocus  ( p , pend );
 	//p = g_httpServer.printColors ( p , pend );
--- a/PageStats.cpp
+++ b/PageStats.cpp
@ -252,18 +252,18 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
 			  &secs,
 			  &msecs);

-	int64_t avgTier0Time = 0;
-	int64_t avgTier1Time = 0;
-	int64_t avgTier2Time = 0;
-	if ( g_stats.m_tierHits[0] > 0 )
-		avgTier0Time = g_stats.m_tierTimes[0] /
-			(int64_t)g_stats.m_tierHits[0];
-	if ( g_stats.m_tierHits[1] > 0 )
-		avgTier1Time = g_stats.m_tierTimes[1] /
-			(int64_t)g_stats.m_tierHits[1];
-	if ( g_stats.m_tierHits[2] > 0 )
-		avgTier2Time = g_stats.m_tierTimes[2] /
-			(int64_t)g_stats.m_tierHits[2];
+	// int64_t avgTier0Time = 0;
+	// int64_t avgTier1Time = 0;
+	// int64_t avgTier2Time = 0;
+	// if ( g_stats.m_tierHits[0] > 0 )
+	// 	avgTier0Time = g_stats.m_tierTimes[0] /
+	// 		(int64_t)g_stats.m_tierHits[0];
+	// if ( g_stats.m_tierHits[1] > 0 )
+	// 	avgTier1Time = g_stats.m_tierTimes[1] /
+	// 		(int64_t)g_stats.m_tierHits[1];
+	// if ( g_stats.m_tierHits[2] > 0 )
+	// 	avgTier2Time = g_stats.m_tierTimes[2] /
+	// 		(int64_t)g_stats.m_tierHits[2];

 	if ( format == FORMAT_HTML )
 		p.safePrintf ( 
--- a/Pages.cpp
+++ b/Pages.cpp
@ -3519,6 +3519,7 @@ bool printApiForPage ( SafeBuf *sb , int32_t PAGENUM , CollectionRec *cr ) {
 		if ( pageNum != PAGENUM ) continue;

 		SafeBuf tmp;
+		tmp.setLabel("apisb");
 		char diff = 0;
 		bool printVal = false;
 		if ( parm->m_type != TYPE_CMD &&
@ -3856,6 +3857,25 @@ bool printApiForPage ( SafeBuf *sb , int32_t PAGENUM , CollectionRec *cr ) {
 			       "</b>");


+		sb->brify2 ( 
+			"\t\t# List of space separated words in the "
+			"query that were ignored for the most part. "
+			"Because they were common words for the "
+			"query language they are in.\n"
+			, cols , "\n\t\t# " , false );
+		sb->safePrintf("<b>\t\t\"ignoredWords\":\"to the\",\n\n"
+			       "</b>");
+
+		sb->brify2 ( 
+			"\t\t# There is a maximum limit placed on the "
+			"number of query terms we search on to keep things "
+			"fast. This can "
+			"be changed in the search controls.\n"
+			, cols , "\n\t\t# " , false );
+		sb->safePrintf("<b>\t\t\"queryNumTermsTotal\":52,\n</b>");
+		sb->safePrintf("<b>\t\t\"queryNumTermsUsed\":20,\n</b>");
+		sb->safePrintf("<b>\t\t\"queryWasTruncated\":1,\n\n</b>");
+
 		sb->brify2 ( 
 			"\t\t# The start of the terms array. Each query "
 			"is broken down into a list of terms. Each "
@ -4037,7 +4057,8 @@ bool printApiForPage ( SafeBuf *sb , int32_t PAGENUM , CollectionRec *cr ) {
 		// end instance
 		sb->safePrintf("<b>\t\t}\n\n</b>");
 		// end gigabit
-		sb->safePrintf("<b>\t\t},\n\n</b>");
+		sb->safePrintf("\t\t# End of the first gigabit\n"
+			       "<b>\t\t},\n\n</b>");

 		sb->safePrintf("\t\t...\n\n");

@ -4047,6 +4068,59 @@ bool printApiForPage ( SafeBuf *sb , int32_t PAGENUM , CollectionRec *cr ) {
 		sb->safePrintf("<b>\t],\n\n</b>");


+		// BEGIN FACETS
+		sb->safePrintf( "\t# Start of the facets array, if any.\n");
+		sb->safePrintf("<b>\t\"facets\":[\n</b>\n");
+
+		sb->safePrintf("\t\t# The first facet in the array.\n");
+		sb->safePrintf("<b>\t\t{\n</b>");
+
+		sb->brify2 ( "\t\t\t"
+			     "# The field you are faceting over\n"
+			     , cols , "\n\t\t\t# " , false );
+		sb->safePrintf ( "<b>\t\t\t\"field\":\"Company\",\n\n</b>");
+		sb->brify2 ( "\t\t\t"
+			     "# How many documents in the collection had "
+			     "this particular field? 64-bit integer.\n"
+			     , cols , "\n\t\t\t# " , false );
+		sb->safePrintf ( "<b>\t\t\t\"totalDocsWithField\":148553,"
+				 "\n\n</b>");
+
+		sb->brify2 ( "\t\t\t"
+			     "# How many documents in the collection had "
+			     "this particular field with the same value "
+			     "as the value line directly below? This should "
+			     "always be less than or equal to the "
+			     "totalDocsWithField count. 64-bit integer.\n"
+			     , cols , "\n\t\t\t# " , false );
+		sb->safePrintf ( "<b>\t\t\t\"totalDocsWithFieldAndValue\":"
+				 "44184,\n\n</b>");
+
+		sb->brify2 ( "\t\t\t"
+			     "# The value of the field in the case of "
+			     "this facet. Can be a string or an integer or "
+			     "a float, depending on the type described in "
+			     "the gbfacet query term. i.e. gbfacetstr, "
+			     "gbfacetint or gbfacetfloat.\n"
+			     , cols , "\n\t\t\t# " , false );
+		sb->safePrintf ( "<b>\t\t\t\"value\":"
+				 "\"Widgets, Inc.\",\n\n</b>");
+
+
+		sb->brify2 ( "\t\t\t"
+			     "# Should be the same as totalDocsWith"
+			     "FieldAndValue, "
+			     "above. 64-bit integer.\n"
+			     , cols , "\n\t\t\t# " , false );
+		sb->safePrintf ( "<b>\t\t\t\"docCount\":"
+				 "44184\n\n</b>");
+
+		sb->safePrintf("\t\t# End of the first facet in the array.\n");
+		sb->safePrintf("<b>\t\t}\n\n</b>");
+
+		sb->safePrintf( "\t# End of the facets array.\n");
+		sb->safePrintf("<b>\t],\n\n</b>");
+		// END FACETS



@ -4670,7 +4744,7 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
 		mb->safePrintf("%s",box);
 		mb->safePrintf("%"INT32" %s dead and not responding to "
 			      "pings. See the "
-			       "<a href=/admin/host?c=%s>hosts table</a>.",
+			       "<a href=/admin/hosts?c=%s>hosts table</a>.",
 			       ps->m_numHostsDead ,s ,coll);
 		mb->safePrintf("%s",boxEnd);
 	}
--- a/Parms.cpp
+++ b/Parms.cpp
@ -7879,17 +7879,19 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_COLL;
 	m++;

-	//m->m_title = "max query terms";
-	//m->m_desc  = "Do not allow more than this many query terms. Will "
-	//	"return error in XML feed error tag if breeched.";
-	//m->m_cgi   = "mqt";
-	//m->m_off   = (char *)&cr.m_maxQueryTerms - x;
+	m->m_title = "max query terms";
+	m->m_desc  = "Do not allow more than this many query terms. Helps "
+		"prevent big queries from resource hogging.";
+	m->m_cgi   = "mqt";
+	m->m_off   = (char *)&cr.m_maxQueryTerms - x;
 	//m->m_soff  = (char *)&si.m_maxQueryTerms - y;
-	//m->m_type  = TYPE_LONG;
-	//m->m_def   = "20"; // 20 for testing, normally 16
-	//m->m_sparm = 1;
-	//m->m_spriv = 1;
-	//m++;
+	m->m_type  = TYPE_LONG;
+	m->m_def   = "999999"; // now we got synonyms... etc
+	m->m_group = 0;
+	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE; 
+	m->m_page  = PAGE_SEARCH;
+	m->m_obj   = OBJ_COLL;
+	m++;

 	/*
 	m->m_title = "dictionary site";
@ -15283,7 +15285,7 @@ void Parms::init ( ) {
 	m->m_type  = TYPE_CHARPTR;
 	m->m_page  = PAGE_REINDEX;
 	m->m_obj   = OBJ_GBREQUEST;
-	m->m_def   = "xx";
+	m->m_def   = "en";
 	m->m_flags = PF_API ;
 	m++;

--- a/Posdb.cpp
+++ b/Posdb.cpp
@ -759,19 +759,22 @@ void PosdbTable::init ( Query     *q               ,
 	// set this now
 	//m_collnum = cr->m_collnum;

-
 	// save it
 	m_topTree = topTree;
 	// a ptr for debugging i guess
 	g_topTree = topTree;
 	// remember the query class, it has all the info about the termIds
 	m_q = q;
+	m_nqt = q->getNumTerms();
 	// for debug msgs
 	m_logstate = logstate;

 	m_realMaxTop = r->m_realMaxTop;
 	if ( m_realMaxTop > MAX_TOP ) m_realMaxTop = MAX_TOP;

+	m_siteRankMultiplier = SITERANKMULTIPLIER;
+	if ( m_q->m_isBoolean ) m_siteRankMultiplier = 0.0;
+
 	// seo.cpp supplies a NULL msg2 because it already sets
 	// QueryTerm::m_posdbListPtrs
 	if ( ! msg2 ) return;
@ -1060,6 +1063,26 @@ bool PosdbTable::allocTopTree ( ) {
 		// make it nongrowable because we'll be in a thread
 		qt->m_facetHashTable.setNonGrow();
 	}
+
+	// m_stackBuf
+	int32_t   nqt = m_q->m_numTerms;
+	int32_t need  = 0;
+	need += 4 * nqt;
+	need += 4 * nqt;
+	need += 4 * nqt;
+	need += 4 * nqt;
+	need += sizeof(float ) * nqt;
+	need += sizeof(char *) * nqt;
+	need += sizeof(char *) * nqt;
+	need += sizeof(char *) * nqt;
+	need += sizeof(char *) * nqt;
+	need += sizeof(char *) * nqt;
+	need += sizeof(char  ) * nqt;
+	need += sizeof(float ) * nqt * nqt; // square matrix
+	m_stackBuf.setLabel("stkbuf1");
+	if ( ! m_stackBuf.reserve( need ) )
+		return false;
+
 	return true;
 }

@ -1378,8 +1401,8 @@ void PosdbTable::evalSlidingWindow ( char **ptrs ,
 		max *= m_freqWeights[i] * m_freqWeights[j];

 		// use score from scoreMatrix if bigger
-		if ( scoreMatrix[MAX_QUERY_TERMS*i+j] > max ) {
-			max = scoreMatrix[MAX_QUERY_TERMS*i+j];
+		if ( scoreMatrix[m_nqt*i+j] > max ) {
+			max = scoreMatrix[m_nqt*i+j];
 			//if ( m_ds ) {
 			//	winners1[i*MAX_QUERY_TERMS+j] = NULL;
 			//	winners2[i*MAX_QUERY_TERMS+j] = NULL;
@ -4815,6 +4838,8 @@ bool PosdbTable::setQueryTermInfo ( ) {
 	// below when trying to grow it. they could all be OR'd together
 	// so alloc the most!
 	int32_t maxSlots = (grand/12) * 2;
+	// try to speed up. this doesn't *seem* to matter, so i took out:
+	//maxSlots *= 2;
 	// get total operands we used
 	//int32_t numOperands = m_q->m_numWords;//Operands;
 	// a quoted phrase counts as a single operand
@ -4826,15 +4851,15 @@ bool PosdbTable::setQueryTermInfo ( ) {
 	// allow an extra byte for remainders
 	if ( m_numQueryTermInfos % 8 ) m_vecSize++;
 	// now preallocate the hashtable. 0 niceness.
-	if ( m_q->m_isBoolean && 
-	     ! m_bt.set (8,m_vecSize,maxSlots,NULL,0,false,0,"booltbl"))
+	if ( m_q->m_isBoolean &&  // true = useKeyMagic
+	     ! m_bt.set (8,m_vecSize,maxSlots,NULL,0,false,0,"booltbl",true))
 		return false;
 	// . m_ct maps a boolean "bit vector" to a true/false value
 	// . each "bit" in the "bit vector" indicates if docid has that 
 	//   particular query term
-	if ( m_q->m_isBoolean && 
+	if ( m_q->m_isBoolean && // true = useKeyMagic
 	     ! m_ct.set (8,1,maxSlots,NULL,0,false,0,
-			 "booltbl"))
+			 "booltbl",true))
 		return false;

 	return true;
@ -4999,13 +5024,13 @@ int64_t PosdbTable::countUniqueDocids( QueryTermInfo *qti ) {
 	// inc the TOTAL val count
 	if ( fe ) fe->m_outsideSearchResultsCount++;

-	// skip that docid record in our termlist. it MUST have been
-	// 12 bytes, a docid heading record.
-	recPtr += 12;
-	count++;
-	// skip any following keys that are 6 bytes, that means they
-	// share the same docid
-	for ( ; recPtr < subListEnd && ((*recPtr)&0x04); recPtr += 6 );
+	// Increment ptr to the next record
+        int32_t recSize = qti->m_subLists[0]->getRecSize(recPtr);
+        recPtr += recSize;
+
+        // Records that are 6 bytes share the same doc id, so only increment
+        // 'count' if it refers to a record with a new (unique) docId
+        if (recSize > 6) count++;
 	goto loop;
 }

@ -5882,6 +5907,8 @@ void PosdbTable::intersectLists10_r ( ) {
 		if ( qti->m_bigramFlags[0] & BF_NEGATIVE ) continue;
 		// inc this
 		listGroupNum++;
+		// if it hits 256 then wrap back down to 1
+		if ( listGroupNum >= 256 ) listGroupNum = 1;
 		// add it
 		addDocIdVotes ( qti , listGroupNum );
 	}
@ -5966,11 +5993,28 @@ void PosdbTable::intersectLists10_r ( ) {
 	//
 	// TRANSFORM QueryTermInfo::m_* vars into old style arrays
 	//
-	int32_t  wikiPhraseIds  [MAX_QUERY_TERMS];
-	int32_t  quotedStartIds[MAX_QUERY_TERMS];
-	int32_t  qpos           [MAX_QUERY_TERMS];
-	int32_t  qtermNums      [MAX_QUERY_TERMS];
-	float freqWeights    [MAX_QUERY_TERMS];
+	// int32_t  wikiPhraseIds  [MAX_QUERY_TERMS];
+	// int32_t  quotedStartIds[MAX_QUERY_TERMS];
+	// int32_t  qpos           [MAX_QUERY_TERMS];
+	// int32_t  qtermNums      [MAX_QUERY_TERMS];
+	// float freqWeights    [MAX_QUERY_TERMS];
+	// now dynamically allocate to avoid stack smashing
+	char     *pp  = m_stackBuf.getBufStart();
+	int32_t   nqt = m_q->m_numTerms;
+	int32_t  *wikiPhraseIds  = (int32_t *)pp; pp += 4 * nqt;
+	int32_t  *quotedStartIds = (int32_t *)pp; pp += 4 * nqt;
+	int32_t  *qpos           = (int32_t *)pp; pp += 4 * nqt;
+	int32_t  *qtermNums      = (int32_t *)pp; pp += 4 * nqt;
+	float    *freqWeights    = (float   *)pp; pp += sizeof(float) * nqt;
+	char    **miniMergedList = (char   **)pp; pp += sizeof(char *) * nqt;
+	char    **miniMergedEnd  = (char   **)pp; pp += sizeof(char *) * nqt;
+	char    **bestPos        = (char   **)pp; pp += sizeof(char *) * nqt;
+	char    **winnerStack    = (char   **)pp; pp += sizeof(char *) * nqt;
+	char    **xpos           = (char   **)pp; pp += sizeof(char *) * nqt;
+	char     *bflags         = (char    *)pp; pp += sizeof(char) * nqt;
+	float    *scoreMatrix    = (float   *)pp; pp += sizeof(float) *nqt*nqt;
+	if ( pp > m_stackBuf.getBufEnd() ) {char *xx=NULL;*xx=0; }
+
 	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
 		// get it
 		QueryTermInfo *qti = &qip[i];
@ -6012,17 +6056,11 @@ void PosdbTable::intersectLists10_r ( ) {
 	float minPairScore;
 	float minSingleScore;
 	//int64_t docId;
-	char *miniMergedList [MAX_QUERY_TERMS];
-	char *miniMergedEnd  [MAX_QUERY_TERMS];
-	char  bflags         [MAX_QUERY_TERMS];
 	m_bflags = bflags;
 	int32_t qdist;
 	float wts;
 	float pss;
-	float scoreMatrix[MAX_QUERY_TERMS*MAX_QUERY_TERMS];
-	char *bestPos[MAX_QUERY_TERMS];
 	float maxNonBodyScore;
-	char *winnerStack[MAX_QUERY_TERMS];
 	// new vars for removing supplanted docid score infos and
 	// corresponding pair and single score infos
 	char *sx;
@ -6340,12 +6378,7 @@ void PosdbTable::intersectLists10_r ( ) {
 	}

 	if ( m_q->m_isBoolean ) {
-		minScore = 1.0;
-		// since we are jumping, we need to set m_docId here
-		//m_docId = *(uint32_t *)(docIdPtr+1);
-		//m_docId <<= 8;
-		//m_docId |= (unsigned char)docIdPtr[0];
-		//m_docId >>= 2;
+		//minScore = 1.0;
 		// we can't jump over setting of miniMergeList. do that.
 		goto boolJump1;
 	}
@ -6557,6 +6590,30 @@ void PosdbTable::intersectLists10_r ( ) {

 boolJump1:

+	if ( m_q->m_isBoolean ) {
+		//minScore = 1.0;
+		// this is somewhat wasteful since it is set below again
+		m_docId = *(uint32_t *)(docIdPtr+1);
+		m_docId <<= 8;
+		m_docId |= (unsigned char)docIdPtr[0];
+		m_docId >>= 2;
+		// add one point for each term matched in the bool query
+		// this is really just for when the terms are from different
+		// fields. if we have unfielded boolean terms we should
+		// do proximity matching.
+		int32_t slot = m_bt.getSlot ( &m_docId );
+		if ( slot >= 0 ) {
+			uint8_t *bv = (uint8_t *)m_bt.getValueFromSlot(slot);
+			// then a score based on the # of terms that matched
+			int16_t bitsOn = getNumBitsOnX ( bv , m_vecSize );
+			// but store in hashtable now
+			minScore = (float)bitsOn;
+		}
+		else {
+			minScore = 1.0;
+		}
+	}
+
 	// we need to do this for seo hacks to merge the synonyms together
 	// into one list
 seoHackSkip2:
@ -6922,7 +6979,7 @@ void PosdbTable::intersectLists10_r ( ) {
 						   &pss);
 		// it's -1 if one term is in the body/header/menu/etc.
 		if ( pss < 0 ) {
-			scoreMatrix[i*MAX_QUERY_TERMS+j] = -1.00;
+			scoreMatrix[i*nqt+j] = -1.00;
 			wts = -1.0;
 		}
 		else {
@ -6931,7 +6988,7 @@ void PosdbTable::intersectLists10_r ( ) {
 			wts *= m_freqWeights[j];//sfw[j];
 			// store in matrix for "sub out" algo below
 			// when doing sliding window
-			scoreMatrix[i*MAX_QUERY_TERMS+j] = wts;
+			scoreMatrix[i*nqt+j] = wts;
 			// if terms is a special wiki half stop bigram
 			//if ( bflags[i] == 1 ) wts *= WIKI_BIGRAM_WEIGHT;
 			//if ( bflags[j] == 1 ) wts *= WIKI_BIGRAM_WEIGHT;
@ -7053,7 +7110,7 @@ void PosdbTable::intersectLists10_r ( ) {

 	// use special ptrs for the windows so we do not mangle 
 	// miniMergedList[] array because we use that below!
-	char *xpos[MAX_QUERY_TERMS];
+	//char *xpos[MAX_QUERY_TERMS];
 	for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) 
 		xpos[i] = miniMergedList[i];

@ -7262,7 +7319,7 @@ void PosdbTable::intersectLists10_r ( ) {
 boolJump2:

 	// try dividing it by 3! (or multiply by .33333 faster)
-	score = minScore * (((float)siteRank)*SITERANKMULTIPLIER+1.0);
+	score = minScore * (((float)siteRank)*m_siteRankMultiplier+1.0);

 	// . not foreign language? give a huge boost
 	// . use "qlang" parm to set the language. i.e. "&qlang=fr"
@ -7932,7 +7989,7 @@ float PosdbTable::getMaxPossibleScore ( QueryTermInfo *qti ,
 		score *= WIKI_BIGRAM_WEIGHT;
 	}
 	//score *= perfectWordSpamWeight * perfectWordSpamWeight;
-	score *= (((float)siteRank)*SITERANKMULTIPLIER+1.0);
+	score *= (((float)siteRank)*m_siteRankMultiplier+1.0);

 	// language boost if same language (or no lang specified)
 	if ( m_r->m_language == docLang ||
@ -8165,6 +8222,10 @@ bool PosdbTable::makeDocIdVoteBufForBoolQuery_r ( ) {
 	}


+	// debug info
+	// int32_t nc = m_bt.getLongestString();
+	// log("posdb: string of %"INT32" filled slots!",nc);
+
 	char *dst = m_docIdVoteBuf.getBufStart();

 	// . now our hash table is filled with all the docids
@ -8223,13 +8284,15 @@ bool PosdbTable::makeDocIdVoteBufForBoolQuery_r ( ) {
 			// a 6 byte key means you pass
 			gbmemcpy ( dst , &docId , 6 );
 			// test it
-			int64_t d2;
-			d2 = *(uint32_t *)(dst+1);
-			d2 <<= 8;
-			d2 |= (unsigned char)dst[0];
-			d2 >>= 2;
-			docId >>= 2;
-			if ( d2 != docId ) { char *xx=NULL;*xx=0; }
+			if ( m_debug ) {
+				int64_t d2;
+				d2 = *(uint32_t *)(dst+1);
+				d2 <<= 8;
+				d2 |= (unsigned char)dst[0];
+				d2 >>= 2;
+				docId >>= 2;
+				if ( d2 != docId ) { char *xx=NULL;*xx=0; }
+			}
 			// end test
 			dst += 6;
 		}
--- a/Posdb.h
+++ b/Posdb.h
@ -604,6 +604,8 @@ class PosdbTable {
 	float m_finalScore;
 	float m_preFinalScore;

+	float m_siteRankMultiplier;
+
 	// how long to add the last batch of lists
 	int64_t       m_addListsTime;
 	int64_t       m_t1 ;
@ -654,10 +656,13 @@ class PosdbTable {
 	SafeBuf m_pairScoreBuf;
 	SafeBuf m_singleScoreBuf;

+	SafeBuf m_stackBuf;
+
 	//SafeBuf m_mergeBuf;

 	// a reference to the query
 	Query          *m_q;
+	int32_t m_nqt;

 	// these are NOT in imap space, but in query term space, 1-1 with 
 	// Query::m_qterms[]
--- a/PostQueryRerank.h
+++ b/PostQueryRerank.h
@ -29,7 +29,9 @@ typedef float rscore_t;

 #define MINSCORE      1
 #define MIN_SAVE_SIZE 100
-#define PQR_BUF_SIZE  MAX_QUERY_LEN
+// we don't use this any more so make it compile
+//#define PQR_BUF_SIZE  MAX_QUERY_LEN
+#define PQR_BUF_SIZE  64

 class PostQueryRerank {
 public:
--- a/Query.cpp
+++ b/Query.cpp
@ -28,6 +28,7 @@ void Query::constructor ( ) {
 	//m_bmap      = NULL;
 	m_bitScores = NULL;
 	m_qwords      = NULL;
+	m_numWords = 0;
 	//m_expressions = NULL;
 	m_qwordsAllocSize      = 0;
 	//m_expressionsAllocSize = 0;
@ -37,8 +38,8 @@ void Query::constructor ( ) {
 	m_st0Ptr = NULL;
 	// we have to manually call this because Query::constructor()
 	// might have been called explicitly
-	for ( int32_t i = 0 ; i < MAX_QUERY_TERMS ; i++ )
-		m_qterms[i].constructor();
+	//for ( int32_t i = 0 ; i < MAX_QUERY_TERMS ; i++ )
+	//	m_qterms[i].constructor();
 	//m_expressions          = NULL;
 	reset ( );
 }
@ -68,9 +69,19 @@ void Query::reset ( ) {
 		qt->m_facetIndexBuf.purge();
 	}

+	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
+		QueryWord *qw = &m_qwords[i];
+		qw->destructor();
+	}
+
+	m_stackBuf.purge();
+	m_qterms = NULL;
+
+	m_sb.purge();
+	m_osb.purge();
 	m_docIdRestriction = 0LL;
 	m_groupThatHasDocId = NULL;
-	m_bufLen      = 0;
+	//m_bufLen      = 0;
 	m_origLen     = 0;
 	m_numWords    = 0;
 	//m_numOperands = 0;
@ -84,6 +95,7 @@ void Query::reset ( ) {
 	//if ( m_bitScores && m_bitScoresSize ) //  != m_bsbuf )
 	//	mfree ( m_bitScores , m_bitScoresSize , "Query2" );
 	//m_bmap = NULL;
+
 	m_bitScores = NULL;
 	//m_bmapSize      = 0;
 	m_bitScoresSize = 0;
@ -131,14 +143,16 @@ bool Query::set2 ( char *query        ,
 		   // need language for doing synonyms
 		   uint8_t  langId ,
 		   char     queryExpansion ,
-		   bool     useQueryStopWords ) {
-		  //int32_t  maxQueryTerms  ) {
+		   bool     useQueryStopWords ,
+		   int32_t  maxQueryTerms  ) {

 	m_langId = langId;
 	m_useQueryStopWords = useQueryStopWords;
 	// fix summary rerank and highlighting.
 	bool keepAllSingles = true;

+	m_maxQueryTerms = maxQueryTerms;
+
 	// assume  boolean auto-detect.
 	char boolFlag = 2;

@ -150,7 +164,7 @@ bool Query::set2 ( char *query        ,
 	if ( ! query ) return true;

 	// set to 256 for synonyms?
-	m_maxQueryTerms = 256;
+	//m_maxQueryTerms = 256;
 	m_queryExpansion = queryExpansion;

 	int32_t queryLen = gbstrlen(query);
@ -160,17 +174,26 @@ bool Query::set2 ( char *query        ,
 	//m_coll    = coll;
 	//m_collLen = collLen;
 	// truncate query if too big
-	if ( queryLen >= MAX_QUERY_LEN ) {
-		log("query: Query length of %"INT32" must be less than %"INT32". "
-		    "Truncating.",queryLen,(int32_t)MAX_QUERY_LEN);
-		queryLen = MAX_QUERY_LEN - 1;
+	if ( queryLen >= ABS_MAX_QUERY_LEN ) {
+		log("query: Query length of %"INT32" must be "
+		    "less than %"INT32". "
+		    "Truncating.",queryLen,(int32_t)ABS_MAX_QUERY_LEN);
+		queryLen = ABS_MAX_QUERY_LEN - 1;
 		m_truncated = true;
 	}
 	// save original query
+	m_osb.setBuf ( m_otmpBuf , 128 , 0 , false );
+	m_osb.setLabel ("oqbuf" );
+	m_osb.reserve ( queryLen + 1 );
+	m_osb.safeMemcpy ( query , queryLen );
+	m_osb.nullTerm ();
 	
-	m_origLen = queryLen;
-	gbmemcpy ( m_orig , query , queryLen );
-	m_orig [ m_origLen ] = '\0';
+	//m_origLen = queryLen;
+	//gbmemcpy ( m_orig , query , queryLen );
+	//m_orig [ m_origLen ] = '\0';
+
+	m_orig = m_osb.getBufStart();
+	m_origLen = m_osb.getLength();

 	log(LOG_DEBUG, "query: set called = %s", m_orig);

@ -204,9 +227,16 @@ bool Query::set2 ( char *query        ,
 	// that were set somewhere above!!! i moved top: label above!
 	//reset();

+	// reserve some space, guessing how much we'd need
+	m_sb.setBuf(m_tmpBuf3,128,0,false);
+	m_sb.setLabel("qrystk");
+	int32_t need = queryLen * 2 + 32;
+	if ( ! m_sb.reserve ( need ) ) 
+		return false;
+
 	// convenience ptr
-	char *p    = m_buf;
-	char *pend = m_buf + MAX_QUERY_LEN;
+	//char *p    = m_buf;
+	//char *pend = m_buf + MAX_QUERY_LEN;
 	bool inQuotesFlag = false;
 	// . copy query into m_buf
 	// . translate ( and ) to special query operators so Words class
@ -219,27 +249,31 @@ bool Query::set2 ( char *query        ,
 		if ( query[i] == '\"' ) inQuotesFlag = !inQuotesFlag;

 		if ( inQuotesFlag ) {
-			*p = query [i];
-			p++;
+			//*p = query [i];
+			//p++;
+			m_sb.pushChar(query[i]);
 			continue;
 		}

 		// dst buf must be big enough
-		if ( p + 8 >= pend ) {
-			g_errno = EBUFTOOSMALL;
-			return log(LOG_LOGIC,"query: query: query too big.");
-		}
+		// if ( p + 8 >= pend ) {
+		// 	g_errno = EBUFTOOSMALL;
+		// 	return log(LOG_LOGIC,"query: query: query too big.");
+		// }
 		// translate ( and )
 		if ( boolFlag == 1 && query[i] == '(' ) {
-			gbmemcpy ( p , " LeFtP " , 7 ); p += 7;
+			//gbmemcpy ( p , " LeFtP " , 7 ); p += 7;
+			m_sb.safeMemcpy ( " LeFtP " , 7 );
 			continue;
 		}
 		if ( boolFlag == 1 && query[i] == ')' ) {
-			gbmemcpy ( p , " RiGhP " , 7 ); p += 7;
+			//gbmemcpy ( p , " RiGhP " , 7 ); p += 7;
+			m_sb.safeMemcpy ( " RiGhP " , 7 );
 			continue;
 		}
 		if ( query[i] == '|' ) {
-			gbmemcpy ( p , " PiiPE " , 7 ); p += 7;
+			//gbmemcpy ( p , " PiiPE " , 7 ); p += 7;
+			m_sb.safeMemcpy ( " PiiPE " , 7 );
 			continue;
 		}
 		// translate [#a] [#r] [#ap] [#rp] [] [p] to operators
@ -249,28 +283,34 @@ bool Query::set2 ( char *query        ,
 			while ( is_digit(query[j]) ) j++;
 			char c = query[j];
 			if ( (c == 'a' || c == 'r') && query[j+1]==']' ) {
-				sprintf ( p , " LeFtB %"INT32" %c RiGhB ",val,c);
-				p += gbstrlen(p);
+				//sprintf ( p , " LeFtB %"INT32" %c RiGhB ",
+				m_sb.safePrintf(" LeFtB %"INT32" %c RiGhB ",
+					  val,c);
+				//p += gbstrlen(p);
 				i = j + 1;
 				continue;
 			}
 			else if ( (c == 'a' || c == 'r') && 
 				  query[j+1]=='p' && query[j+2]==']') {
-				sprintf ( p , " LeFtB %"INT32" %cp RiGhB ",val,c);
-				p += gbstrlen(p);
+				//sprintf ( p , " LeFtB %"INT32" %cp RiGhB ",
+				m_sb.safePrintf(" LeFtB %"INT32" %cp RiGhB ",
+				val,c);
+				//p += gbstrlen(p);
 				i = j + 2;
 				continue;
 			}
 		}
 		if ( query[i] == '[' && query[i+1] == ']' ) {
-			sprintf ( p , " LeFtB RiGhB ");
-			p += gbstrlen(p);
+			//sprintf ( p , " LeFtB RiGhB ");
+			//p += gbstrlen(p);
+			m_sb.safePrintf ( " LeFtB RiGhB ");
 			i = i + 1;
 			continue;
 		}
 		if ( query[i] == '[' && query[i+1] == 'p' && query[i+2]==']') {
-			sprintf ( p , " LeFtB RiGhB ");
-			p += gbstrlen(p);
+			//sprintf ( p , " LeFtB RiGhB ");
+			//p += gbstrlen(p);
+			m_sb.safePrintf ( " LeFtB RiGhB ");
 			i = i + 2;
 			continue;
 		}
@ -306,17 +346,22 @@ bool Query::set2 ( char *query        ,
 
 		// TODO: copy altavista's operators here? & | !
 		// otherwise, just a plain copy
-		*p = query [i];
-		p++;
+		// *p = query [i];
+		// p++;
+		m_sb.pushChar ( query[i] );
 	}
 	// NULL terminate
-	*p = '\0';
+	//*p = '\0';
+	m_sb.nullTerm();
 	// debug statement
 	//log(LOG_DEBUG,"Query: Got new query=%s",tempBuf);
 	//printf("query: query: Got new query=%s\n",tempBuf);

 	// set length
-	m_bufLen = p - m_buf;
+	//m_bufLen = p - m_buf;
+
+	//m_buf = m_sb.getBufStart();
+	//m_bufLen = m_sb.length();

 	Words words;
 	Phrases phrases;
@ -560,8 +605,108 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 	// what is the max value for "shift"?
 	int32_t max = (int32_t)MAX_EXPLICIT_BITS;
 	if ( max > m_maxQueryTerms ) max = m_maxQueryTerms;
+
+	// count phrases first for allocating
+	int32_t nqt = 0;
+	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
+		QueryWord *qw  = &m_qwords[i];
+		// skip if ignored... mdw...
+		if ( ! qw->m_phraseId ) continue;
+		if (   qw->m_ignorePhrase ) continue; // could be a repeat
+		// none if weight is absolute zero
+		if ( qw->m_userWeightPhrase == 0   && 
+		     qw->m_userTypePhrase   == 'a'  ) continue;
+		nqt++;
+	}
+	// count phrase terms too!!!
+	for ( int32_t i = 0 ; i < m_numWords; i++ ) {
+		QueryWord *qw  = &m_qwords[i];
+ 		if ( qw->m_ignoreWord && 
+ 		     qw->m_ignoreWord != IGNORE_QSTOP) continue;
+		// ignore if in quotes and part of phrase, watch out
+		// for things like "word", a single word in quotes.
+		if ( qw->m_quoteStart >= 0 && qw->m_phraseId ) continue;
+		// if we are not start of quote and NOT in a phrase we
+		// must be the tailing word i guess.
+		// fixes '"john smith" -"bob dole"' from having
+		// smith and dole as query terms.
+		if ( qw->m_quoteStart >= 0 && qw->m_quoteStart != i )
+			continue;
+		// ignore if weight is absolute zero
+		if ( qw->m_userWeight == 0   && 
+		     qw->m_userType   == 'a'  ) continue;
+		nqt++;
+	}
+	// thirdly, count synonyms
+	Synonyms syn;
+	int32_t sn = 0;
+	if ( m_queryExpansion ) sn = m_numWords;
+	int64_t to = hash64n("to",0LL);
+	for ( int32_t i = 0 ; i < sn ; i++ ) {
+		// get query word
+		QueryWord *qw  = &m_qwords[i];
+		// skip if in quotes, we will not get synonyms for it
+		if ( qw->m_inQuotes ) continue;
+		// skip if has plus sign in front
+		if ( qw->m_wordSign == '+' ) continue;
+		// not '-' either i guess
+		if ( qw->m_wordSign == '-' ) continue;
+		// no url: stuff, maybe only title
+		if ( qw->m_fieldCode &&
+		     qw->m_fieldCode != FIELD_TITLE &&
+		     qw->m_fieldCode != FIELD_GENERIC )
+			continue;
+		// skip if ignored like a stopword (stop to->too)
+		//if ( qw->m_ignoreWord ) continue;
+		// ignore title: etc. words, they are field names
+		if ( qw->m_ignoreWord == IGNORE_FIELDNAME ) continue;
+		// ignore boolean operators
+		if ( qw->m_ignoreWord ) continue;// IGNORE_BOOLOP
+		// no, hurts 'Greencastle IN economic development'
+		if ( qw->m_wordId == to ) continue;
+		// single letters...
+		if ( qw->m_wordLen == 1 ) continue;
+		// set the synonyms for this word
+		char tmpBuf [ TMPSYNBUFSIZE ];
+		int32_t naids = syn.getSynonyms ( &words ,
+					       i ,
+						  // language of the query.
+						  // 0 means unknown. if this
+						  // is 0 we sample synonyms
+						  // from all languages.
+						  m_langId , 
+					       tmpBuf ,
+					       0 ); // m_niceness );
+		// if no synonyms, all done
+		if ( naids <= 0 ) continue;
+		nqt += naids;
+	}
+
+	m_numTermsUntruncated = nqt;
+
+	if ( nqt > m_maxQueryTerms ) nqt = m_maxQueryTerms;
+
+	// allocate the stack buf
+	if ( nqt ) {
+		int32_t need = nqt * sizeof(QueryTerm) ;
+		if ( ! m_stackBuf.reserve ( need ) )
+			return false;
+		m_stackBuf.setLabel("stkbuf3");
+		char *pp = m_stackBuf.getBufStart();
+		m_qterms = (QueryTerm *)pp;
+		pp += sizeof(QueryTerm);
+		if ( pp > m_stackBuf.getBufEnd() ) { char *xx=NULL;*xx=0; }
+	}
+
+	// call constructor on each one here
+	for ( int32_t i = 0 ; i < nqt ; i++ ) {
+		QueryTerm *qt = &m_qterms[i];
+		qt->constructor();
+	}
+
+
 	//char u8Buf[256]; 
-	for ( int32_t i = 0 ; i < m_numWords && n < MAX_QUERY_TERMS ; i++ ) {
+	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
 		// break out if no more explicit bits!
 		/*
 		if ( shift >= max ) {
@ -580,9 +725,14 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 		     qw->m_userTypePhrase   == 'a'  ) continue;

 		// stop breach
-		if ( n >= MAX_QUERY_TERMS ) {
+		if ( n >= ABS_MAX_QUERY_TERMS ) {
 			log("query: lost query phrase terms to max term "
-			    "limit of %"INT32"",(int32_t)MAX_QUERY_TERMS );
+			    "limit of %"INT32"",(int32_t)ABS_MAX_QUERY_TERMS );
+			break;
+		}
+		if ( n >= m_maxQueryTerms ) {
+			log("query: lost query phrase terms to max term cr "
+			    "limit of %"INT32"",(int32_t)m_maxQueryTerms);
 			break;
 		}

@ -604,7 +754,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 		qt->m_isQueryStopWord = false;
 		// change in both places
 		qt->m_termId    = qw->m_phraseId & TERMID_MASK;
-		m_termIds[n]    = qw->m_phraseId & TERMID_MASK;
+		//m_termIds[n]    = qw->m_phraseId & TERMID_MASK;
 		//log(LOG_DEBUG, "Setting query phrase term id %d: %lld", n, m_termIds[n]);
 		qt->m_rawTermId = qw->m_rawPhraseId;
 		// assume explicit bit is 0
@ -615,12 +765,12 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 		// phrases like: "cat dog" AND pig
 		if ( m_isBoolean && qw->m_phraseSign != '*' ) {
 			qt->m_termSign = '\0';
-			m_termSigns[n] = '\0';
+			//m_termSigns[n] = '\0';
 		}
 		// if not boolean, ensure to change signs in both places
 		else {
 			qt->m_termSign  = qw->m_phraseSign;
-			m_termSigns[n]  = qw->m_phraseSign;
+			//m_termSigns[n]  = qw->m_phraseSign;
 		}
 		//
 		// INSERT UOR LOGIC HERE
@ -703,7 +853,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 	}

 	// now if we have enough room, do the singles
-	for ( int32_t i = 0 ; i < m_numWords && n < MAX_QUERY_TERMS ; i++ ) {
+	for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
 		// break out if no more explicit bits!
 		/*
 		if ( shift >= max ) {
@ -738,9 +888,14 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 		     qw->m_userType   == 'a'  ) continue;

 		// stop breach
-		if ( n >= MAX_QUERY_TERMS ) {
+		if ( n >= ABS_MAX_QUERY_TERMS ) {
 			log("query: lost query terms to max term "
-			    "limit of %"INT32"",(int32_t)MAX_QUERY_TERMS );
+			    "limit of %"INT32"",(int32_t)ABS_MAX_QUERY_TERMS );
+			break;
+		}
+		if ( n >= m_maxQueryTerms ) {
+			log("query: lost query terms to max term cr "
+			    "limit of %"INT32"",(int32_t)m_maxQueryTerms);
 			break;
 		}

@ -760,7 +915,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 		qt->m_isQueryStopWord = qw->m_isQueryStopWord;
 		// change in both places
 		qt->m_termId    = qw->m_wordId & TERMID_MASK;
-		m_termIds[n]    = qw->m_wordId & TERMID_MASK;
+		//m_termIds[n]    = qw->m_wordId & TERMID_MASK;
 		qt->m_rawTermId = qw->m_rawWordId;
 		// assume explicit bit is 0
 		qt->m_explicitBit = 0;
@ -769,18 +924,18 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 		// boolean queries are not allowed term signs
 		if ( m_isBoolean ) {
 			qt->m_termSign = '\0';
-			m_termSigns[n] = '\0';
+			//m_termSigns[n] = '\0';
 			// boolean fix for "health OR +sports" because
 			// the + there means exact word match, no synonyms.
 			if ( qw->m_wordSign == '+' ) {
 				qt->m_termSign  = qw->m_wordSign;
-				m_termSigns[n]  = qw->m_wordSign;
+				//m_termSigns[n]  = qw->m_wordSign;
 			}
 		}
 		// if not boolean, ensure to change signs in both places
 		else {
 			qt->m_termSign  = qw->m_wordSign;
-			m_termSigns[n]  = qw->m_wordSign;
+			//m_termSigns[n]  = qw->m_wordSign;
 		}
 		// get previous text word
 		//int32_t pw = i - 2;
@ -1230,16 +1385,14 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 	// . skip this part if language is unknown i guess
 	//
 	////////////
-	int32_t sn = 0;
-	Synonyms syn;
 	// loop over all words in query and process its synonyms list
 	//if ( m_langId != langUnknown && m_queryExpansion ) 
 	// if lang is "xx" unknown we still do synonyms it just does
 	// a loop over all languages starting with english
-	if ( m_queryExpansion ) 
-		sn = m_numWords;
+	// if ( m_queryExpansion ) 
+	// 	sn = m_numWords;

-	int64_t to = hash64n("to",0LL);
+	//int64_t to = hash64n("to",0LL);

 	for ( int32_t i = 0 ; i < sn ; i++ ) {
 		// get query word
@ -1257,6 +1410,10 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 			continue;
 		// skip if ignored like a stopword (stop to->too)
 		//if ( qw->m_ignoreWord ) continue;
+		// ignore title: etc. words, they are field names
+		if ( qw->m_ignoreWord == IGNORE_FIELDNAME ) continue;
+		// ignore boolean operators
+		if ( qw->m_ignoreWord ) continue;// IGNORE_BOOLOP
 		// no, hurts 'Greencastle IN economic development'
 		if ( qw->m_wordId == to ) continue;
 		// single letters...
@ -1277,19 +1434,29 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 		// sanity
 		if ( naids > MAX_SYNS ) { char *xx=NULL;*xx=0; }
 		// now make the buffer to hold them for us
+		qw->m_synWordBuf.setLabel("qswbuf");
 		qw->m_synWordBuf.safeMemcpy ( &syn.m_synWordBuf );
 		// get the term for this word
 		QueryTerm *origTerm = qw->m_queryWordTerm;
 		// loop over synonyms for word #i now
 		for ( int32_t j = 0 ; j < naids ; j++ ) {
 			// stop breach
-			if ( n >= MAX_QUERY_TERMS ) {
+			if ( n >= ABS_MAX_QUERY_TERMS ) {
 				log("query: lost synonyms due to max term "
-				    "limit of %"INT32"",(int32_t)MAX_QUERY_TERMS );
+				    "limit of %"INT32"",
+				    (int32_t)ABS_MAX_QUERY_TERMS );
 				break;
 			}
 			// this happens for 'da da da'
 			if ( ! origTerm ) continue;
+
+			if ( n >= m_maxQueryTerms ) {
+				log("query: lost synonyms due to max cr term "
+				    "limit of %"INT32"",
+				    (int32_t)m_maxQueryTerms);
+				break;
+			}
+
 			// add that query term
 			QueryTerm *qt   = &m_qterms[n];
 			qt->m_qword     = qw; // NULL;
@ -1346,7 +1513,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 				wid= hash64h(wid,ph);
 			}
 			qt->m_termId    = wid & TERMID_MASK;
-			m_termIds[n]    = wid & TERMID_MASK;
+			//m_termIds[n]    = wid & TERMID_MASK;
 			qt->m_rawTermId = syn.m_aids[j];
 			// assume explicit bit is 0
 			qt->m_explicitBit = 0;
@ -1354,18 +1521,18 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 			// boolean queries are not allowed term signs
 			if ( m_isBoolean ) {
 				qt->m_termSign = '\0';
-				m_termSigns[n] = '\0';
+				//m_termSigns[n] = '\0';
 				// boolean fix for "health OR +sports" because
 				// the + there means exact word match, no syns
 				if ( qw->m_wordSign == '+' ) {
 					qt->m_termSign  = qw->m_wordSign;
-					m_termSigns[n]  = qw->m_wordSign;
+					//m_termSigns[n]  = qw->m_wordSign;
 				}
 			}
 			// if not bool, ensure to change signs in both places
 			else {
 				qt->m_termSign  = qw->m_wordSign;
-				m_termSigns[n]  = qw->m_wordSign;
+				//m_termSigns[n]  = qw->m_wordSign;
 			}
 			// do not use an explicit bit up if we got a hard count
 			qt->m_hardCount = qw->m_hardCount;
@ -1413,7 +1580,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {

 	m_numTerms = n;
 	
-	if ( n > MAX_QUERY_TERMS ) { char *xx=NULL;*xx=0; }
+	if ( n > ABS_MAX_QUERY_TERMS ) { char *xx=NULL;*xx=0; }


 	// count them for doing number of combos
@ -1493,7 +1660,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 	// . don't forget to set m_termSigns too!
 	if ( n == 1 && m_qterms[0].m_isPhrase && ! m_qterms[0].m_termSign ) {
 		m_qterms[0].m_termSign = '*';
-		m_termSigns[0]         = '*';
+		//m_termSigns[0]         = '*';
 	}

 	// . or bits into the m_implicitBits member of phrase QueryTerms that
@ -1524,7 +1691,11 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
 	// . see Msg2.cpp for more info on componentCodes
 	// . -2 means unset, neither a compound term nor a component term at
 	//   this time
-	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) m_componentCodes[i] = -2;
+	//for( int32_t i = 0 ; i < m_numTerms ; i++ ) m_componentCodes[i] = -2;
+	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) {
+		QueryTerm *qt = &m_qterms[i];
+		qt->m_componentCode = -2;
+	}
 	m_numComponents = 0;

 	// . now set m_phrasePart for Summary.cpp's hackfix filter
@ -1879,7 +2050,10 @@ void Query::addCompoundTerms ( ) {

 // -1 means compound, -2 means unset, >= 0 means component
 bool Query::isCompoundTerm ( int32_t i ) {
-	return ( m_componentCodes[i] == -1 );
+	//return ( m_componentCodes[i] == -1 );
+	if ( i >= m_numTerms ) return false;
+	QueryTerm *qt = &m_qterms[i];
+	return ( qt->m_componentCode == -1 );
 }

 bool Query::setQWords ( char boolFlag , 
@ -1891,16 +2065,17 @@ bool Query::setQWords ( char boolFlag ,
 	// . because we now deal with boolean queries, we make parentheses
 	//   their own separate Word, so tell "words" we're setting a query
 	//Words words;
-	if ( ! words.set ( m_buf , m_bufLen,
+	if ( ! words.set ( m_sb.getBufStart() , m_sb.length() ,
+			   //buf , m_bufLen,
 			    TITLEREC_CURRENT_VERSION, true, true ) )
 		return log("query: Had error parsing query: %s.",
 			   mstrerror(g_errno));
 	int32_t numWords = words.getNumWords();
 	// truncate it
-	if ( numWords > MAX_QUERY_WORDS ) {
+	if ( numWords > ABS_MAX_QUERY_WORDS ) {
 		log("query: Had %"INT32" words. Max is %"INT32". Truncating.",
-		    numWords,(int32_t)MAX_QUERY_WORDS);
-		numWords = MAX_QUERY_WORDS;
+		    numWords,(int32_t)ABS_MAX_QUERY_WORDS);
+		numWords = ABS_MAX_QUERY_WORDS;
 		m_truncated = true;
 	}
 	m_numWords = numWords;
@ -1923,11 +2098,14 @@ bool Query::setQWords ( char boolFlag ,
 			return log("query: Could not allocate mem for query.");
 		m_qwordsAllocSize = need;
 	}
+	// reset safebuf in there
+	for ( int32_t i = 0 ; i < m_numWords ; i++ )
+		m_qwords[i].constructor();

 	// is all alpha chars in query in upper case? caps lock on?
 	bool allUpper = true;
-	char *p    = m_buf;
-	char *pend = m_buf + m_bufLen;
+	char *p    = m_sb.getBufStart();//m_buf;
+	char *pend = m_sb.getBuf(); // m_buf + m_bufLen;
 	for ( ; p < pend ; p += getUtf8CharSize(p) )
 		if ( is_alpha_utf8 ( p ) && ! is_upper_utf8 ( p ) ) {
 			allUpper = false; break; }
@ -2027,7 +2205,7 @@ bool Query::setQWords ( char boolFlag ,
 	char *ignoreTill = NULL;

 	// loop over all words, these QueryWords are 1-1 with "words"
-	for ( int32_t i = 0 ; i < numWords && i < MAX_QUERY_WORDS ; i++ ) {
+	for ( int32_t i = 0 ; i < numWords && i < ABS_MAX_QUERY_WORDS ; i++ ) {
 		// convenience var, these are 1-1 with "words"
 		QueryWord *qw = &m_qwords[i];
 		// set to defaults?
@ -2338,12 +2516,14 @@ bool Query::setQWords ( char boolFlag ,
 		// in quotes which is silly, so undo it. But we should
 		// still inherit any quoteSign, however. Be sure to also
 		// set m_inQuotes to false so Matches.cpp::matchWord() works.
-		if ( i == quoteStart ) { // + 1 ) {
-			if ( i + 1 >= numWords || words.getNumQuotes(i+1)>0 ) {
-				qw->m_quoteStart = -1;
-				qw->m_inQuotes   = false;
-			}
-		}
+		// MDW: don't undo it because we do not want to get synonyms
+		// of terms in quotes. 7/15/2015
+		// if ( i == quoteStart ) { // + 1 ) {
+		// 	if ( i + 1 >= numWords || words.getNumQuotes(i+1)>0 ) {
+		// 		qw->m_quoteStart = -1;
+		// 		qw->m_inQuotes   = false;
+		// 	}
+		// }
 		// . get prefix hash of collection name and field
 		// . but first convert field to lower case
 		uint64_t ph;
@ -3228,7 +3408,8 @@ bool Query::setQWords ( char boolFlag ,
 			// search up to this far
 			int32_t maxj = i + nw;
 			// but not past our truncated limit
-			if ( maxj > MAX_QUERY_WORDS ) maxj = MAX_QUERY_WORDS;
+			if ( maxj > ABS_MAX_QUERY_WORDS ) 
+				maxj = ABS_MAX_QUERY_WORDS;

 			for ( j = i ; j < maxj ; j++ ) {
 				// skip punct
@ -3385,7 +3566,7 @@ bool Query::setQWords ( char boolFlag ,
 		// count non-ignored words
 		if ( qw->m_ignoreWord ) continue;
 		// if under limit, continue
-		if ( count++ < MAX_QUERY_TERMS ) continue;
+		if ( count++ < ABS_MAX_QUERY_TERMS ) continue;
 		// . otherwise, ignore
 		// . if we set this for our UOR'ed terms from SearchInput.cpp's
 		//   UOR'ed facebook interests then it causes us to get no results!
@ -4968,7 +5149,7 @@ void Query::printQueryTerms(){
 		     (int64_t)m_qterms[i].m_explicitBit  ,
 		     (int64_t)m_qterms[i].m_implicitBits ,
 		     (int32_t) m_qterms[i].m_hardCount ,
-		     m_componentCodes[i],
+		     m_qterms[i].m_componentCode,
 		     getTermLen(i),
 		     tt                        );
 	}
@ -5514,7 +5695,17 @@ bool QueryTerm::isSplit() {
 // hash of all the query terms
 int64_t Query::getQueryHash() {
 	int64_t qh = 0LL;
-	for ( int32_t i = 0 ; i < m_numTerms ; i++ ) 
-		qh = hash64 ( m_termIds[i] , qh );
+	for ( int32_t i = 0 ; i < m_numTerms ; i++ )  {
+		QueryTerm *qt = &m_qterms[i];
+		qh = hash64 ( qt->m_termId , qh );
+	}
 	return qh;
 }
+
+void QueryWord::constructor () {
+	m_synWordBuf.constructor();
+}
+
+void QueryWord::destructor () {
+	m_synWordBuf.purge();
+}
--- a/Query.h
+++ b/Query.h
@ -10,7 +10,9 @@

 // keep these down to save memory
 //#define MAX_QUERY_LEN   8000 // url:XXX can be quite long! (MAX_URL_LEN)
-#define MAX_QUERY_LEN 3200
+//#define MAX_QUERY_LEN 3200
+// support big OR queries for image shingles
+#define ABS_MAX_QUERY_LEN 62000
 // . words need to deal with int32_t list of sites!
 // . remember, words can be string of punctuation, too
 //#define MAX_QUERY_WORDS 5000 
@ -21,7 +23,8 @@
 // seems like we alloc just enough to hold our words now so that this
 // is really a performance capper but it is used in Summary.cpp
 // and Matches.h so don't go too big just yet
-#define MAX_QUERY_WORDS 800
+//#define MAX_QUERY_WORDS 800
+#define ABS_MAX_QUERY_WORDS 99000

 // . how many IndexLists might we get/intersect
 // . we now use a int64_t to hold the query term bits for non-boolean queries
@ -36,7 +39,8 @@
 //#define MAX_QUERY_TERMS 40
 // how to make a lock pick set loses synonyms from 40!
 //#define MAX_QUERY_TERMS 80
-#define MAX_QUERY_TERMS 160
+//#define MAX_QUERY_TERMS 160
+#define ABS_MAX_QUERY_TERMS 9000

 // only allow up to 200 interests from facebook plus manually entered
 // because we are limited by the query terms above so we can only
@ -270,6 +274,9 @@ class QueryWord {
 			if ( is_wspace_utf8 ( p ) ) return true;
 		return false;
 	};
+	void constructor ();
+	void destructor ();
+
 	//UCScript wordScript() { 
 	//	UChar*foo;
 	//	return ucGetScript(utf16Decode((UChar*)(m_word),&foo));
@ -463,6 +470,10 @@ class QueryTerm {
 	char *m_parenList;
 	int32_t  m_parenListLen;

+	int32_t   m_componentCode;
+	int64_t   m_termFreq;
+	float     m_termFreqWeight;
+
 	// . our representative bits
 	// . the bits in this bit vector is 1-1 with the QueryTerms
 	// . if a doc has query term #i then bit #i will be set
@ -624,10 +635,10 @@ class Query {
 		    //int32_t  collLen  ,
 		    uint8_t  langId ,
 		    char     queryExpansion ,
-		    bool     useQueryStopWords = true );
-		   //char  boolFlag = 2 , // auto-detect if boolean query
-		   //bool  keepAllSingles = false ,
-		   //int32_t  maxQueryTerms = 0x7fffffff );
+		    bool     useQueryStopWords = true ,
+		    //char  boolFlag = 2 , // auto-detect if boolean query
+		    //bool  keepAllSingles = false ,
+		    int32_t  maxQueryTerms = 0x7fffffff );

 	// serialize/deserialize ourselves so we don't have to pass the
 	// unmodified string around and reparse it every time
@ -680,9 +691,9 @@ class Query {
 	// . the signs and ids are dupped in the QueryTerm classes, too
 	//int64_t *getTermFreqs ( ) { return m_termFreqs ; };
 	//int64_t  getTermFreq  ( int32_t i ) { return m_termFreqs[i]; };
-	int64_t *getTermIds   ( ) { return m_termIds   ; };
-	char      *getTermSigns ( ) { return m_termSigns ; };
-	int32_t      *getComponentCodes   ( ) { return m_componentCodes; };
+	//int64_t *getTermIds   ( ) { return m_termIds   ; };
+	//char      *getTermSigns ( ) { return m_termSigns ; };
+	//int32_t      *getComponentCodes   ( ) { return m_componentCodes; };
 	int64_t  getRawWordId ( int32_t i ) { return m_qwords[i].m_rawWordId;};

 	int32_t getNumComponentTerms ( ) { return m_numComponents; };
@ -926,17 +937,26 @@ class Query {
 	int32_t       m_qwordsAllocSize;

 	// QueryWords are converted to QueryTerms
-	QueryTerm m_qterms [ MAX_QUERY_TERMS ];
+	//QueryTerm m_qterms [ MAX_QUERY_TERMS ];
 	int32_t      m_numTerms;
 	int32_t      m_numTermsSpecial;

+	int32_t m_numTermsUntruncated;
+
 	// separate vectors for easier interfacing, 1-1 with m_qterms
 	//int64_t m_termFreqs      [ MAX_QUERY_TERMS ];
-	int64_t m_termIds        [ MAX_QUERY_TERMS ];
-	char      m_termSigns      [ MAX_QUERY_TERMS ];
-	int32_t      m_componentCodes [ MAX_QUERY_TERMS ];
-	char      m_ignore         [ MAX_QUERY_TERMS ]; // is term ignored?
-	int32_t      m_numComponents;
+	//int64_t m_termIds        [ MAX_QUERY_TERMS ];
+	//char      m_termSigns      [ MAX_QUERY_TERMS ];
+	//int32_t      m_componentCodes [ MAX_QUERY_TERMS ];
+	//char      m_ignore         [ MAX_QUERY_TERMS ]; // is term ignored?
+	SafeBuf    m_stackBuf;
+	QueryTerm *m_qterms         ;
+	//int64_t   *m_termIds        ;
+	//char      *m_termSigns      ;
+	//int32_t   *m_componentCodes ;
+	//char      *m_ignore         ; // is term ignored?
+
+	int32_t   m_numComponents;

 	// how many bits in the full vector?
 	//int32_t      m_numExplicitBits;
@ -974,18 +994,27 @@ class Query {
 	class Host *m_groupThatHasDocId;

 	// for holding the filtered query, in utf8
-	char m_buf [ MAX_QUERY_LEN ];
-	int32_t m_bufLen;
+	//char m_buf [ MAX_QUERY_LEN ];
+	//int32_t m_bufLen;
+
+	// for holding the filtered query, in utf8
+	SafeBuf m_sb;
+	char m_tmpBuf3[128];

 	// for holding the filtered/NULL-terminated query for doing
 	// matching. basically store phrases in here without punct
 	// so we can point a needle to them for matching in XmlDoc.cpp.
-	char m_needleBuf [ MAX_QUERY_LEN + 1 ];
-	int32_t m_needleBufLen;
+	//char m_needleBuf [ MAX_QUERY_LEN + 1 ];
+	//int32_t m_needleBufLen;

 	// the original query
-	char m_orig [ MAX_QUERY_LEN ];
+	//char m_orig [ MAX_QUERY_LEN ];
+	//int32_t m_origLen;
+
+	char *m_orig;
 	int32_t m_origLen;
+	SafeBuf m_osb;
+	char m_otmpBuf[128];

 	// we just have a ptr to this so don't pull the rug out
 	//char *m_coll;
--- a/RdbDump.cpp
+++ b/RdbDump.cpp
@ -373,12 +373,12 @@ bool RdbDump::dumpTree ( bool recall ) {
 		//if ( removeNegRecs )
 		//	m_list.removeNegRecs();

-// 		if(!m_list->checkList_r ( false , // removeNegRecs?
-// 					 false , // sleep on problem?
-// 					 m_rdb->m_rdbId )) {
-// 			log("db: list to dump is not sane!");
-//			char *xx=NULL;*xx=0;
-// 		}
+ 		// if(!m_list->checkList_r ( false , // removeNegRecs?
+ 		// 			 false , // sleep on problem?
+ 		// 			 m_rdb->m_rdbId )) {
+ 		// 	log("db: list to dump is not sane!");
+		// 	char *xx=NULL;*xx=0;
+ 		// }


 	skip:
@ -781,6 +781,10 @@ bool RdbDump::doneReadingForVerify ( ) {
 	if ( m_addToMap ) t = gettimeofdayInMilliseconds();
 	// sanity check
 	if ( m_list->m_ks != m_ks ) { char *xx = NULL; *xx = 0; }
+
+	bool triedToFix = false;
+
+ tryAgain:
 	// . register this with the map now
 	// . only register AFTER it's ALL on disk so we don't get partial
 	//   record reads and we don't read stuff on disk that's also in tree
@ -788,6 +792,16 @@ bool RdbDump::doneReadingForVerify ( ) {
 	// . we don't have maps when we do unordered dumps
 	// . careful, map is NULL if we're doing unordered dump
 	if ( m_addToMap && m_map && ! m_map->addList ( m_list ) ) {
+		// keys  out of order in list from tree?
+		if ( g_errno == ECORRUPTDATA ) {
+			log("db: trying to fix tree or buckets");
+			if ( m_tree ) m_tree->fixTree();
+			//if ( m_buckets ) m_buckets->fixBuckets();
+			if ( m_buckets ) { char *xx=NULL;*xx=0; }
+			if ( triedToFix ) { char *xx=NULL;*xx=0; }
+			triedToFix = true;
+			goto tryAgain;
+		}
 		g_errno = ENOMEM; 
 		log("db: Failed to add data to map.");
 		// undo the offset update, the write failed, the parent
--- a/RdbList.cpp
+++ b/RdbList.cpp
@ -624,7 +624,8 @@ bool RdbList::growList ( int32_t newSize ) {
 	// don't shrink list
 	if ( newSize <= m_allocSize ) return true;
 	// debug msg
-	//log("RdbList::growList from %"INT32" to %"INT32"",m_allocSize , newSize );
+	// log("RdbList::growList 0x%"PTRFMT "from %"INT32" to %"INT32"",
+	//     (PTRTYPE)this,m_allocSize , newSize );
 	// make a new buffer
 	char *tmp =(char *) mrealloc ( m_alloc,m_allocSize,newSize,"RdbList");
 	//if ( (int32_t)tmp == 0x904dbd0 )
--- a/RdbMap.cpp
+++ b/RdbMap.cpp
@ -323,8 +323,9 @@ bool RdbMap::verifyMap2 ( ) {
 			KEYSET(lastKey,k,m_ks); continue; }
 		// just bitch for now
 		log(
-		    "db: Key out of order in map file %s%s. "
-		    "page = %"INT32". key offset = %"INT64". Map or data file is "
+		    "db: Key out of order in map file %s/%s. "
+		    "page = %"INT32". key offset = %"INT64". "
+		    "Map or data file is "
 		    "corrupt, but it is probably the data file. Please "
 		    "delete the map file and restart.", 
 		    m_file.m_dir,m_file.getFilename() ,
@ -337,6 +338,15 @@ bool RdbMap::verifyMap2 ( ) {
 		    KEY1(lastKey,m_ks),KEY0(lastKey));
 		log("db:    k.n1=%016"XINT64" n0=%016"XINT64"",KEY1(k,m_ks),KEY0(k));
 		log("db: m_numPages = %"INT32"",m_numPages);
+
+		SafeBuf cmd;
+		cmd.safePrintf("mv %s/%s %s/trash/",
+			       m_file.m_dir,
+			       m_file.getFilename(),
+			       g_hostdb.m_dir);
+		log("db: %s",cmd.getBufStart() );
+		gbsystem ( cmd.getBufStart() );
+
 		exit(0);
 		//char *xx=NULL;*xx=0;
 		// was k too small?
@ -543,7 +553,8 @@ bool RdbMap::addRecord ( char *key, char *rec , int32_t recSize ) {
 		m_lastLogTime = getTime();
 		//pageNum > 0 && getKey(pageNum-1) > getKey(pageNum) ) {
 		log(LOG_LOGIC,"build: RdbMap: added key out of order. "
-		    "count=%"INT64".",m_badKeys);
+		    "count=%"INT64" file=%s/%s.",m_badKeys,
+		    m_file.m_dir,m_file.getFilename());
 		//log(LOG_LOGIC,"build: k.n1=%"XINT32" %"XINT64"  lastKey.n1=%"XINT32" %"XINT64"",
 		//    key.n1,key.n0,m_lastKey.n1,m_lastKey.n0 );
 		log(LOG_LOGIC,"build: offset=%"INT64"",
@ -556,7 +567,10 @@ bool RdbMap::addRecord ( char *key, char *rec , int32_t recSize ) {
 			g_errno = ECORRUPTDATA;
 			return false;
 		}
-		char *xx=NULL;*xx=0;
+		// if being called from RdbDump.cpp...
+		g_errno = ECORRUPTDATA;
+		return false;
+		//char *xx=NULL;*xx=0;
 		// . during a merge, corruption can happen, so let's core
 		//   here until we figure out how to fix it.
 		// . any why wasn't the corruption discovered and patched
@ -719,7 +733,10 @@ bool RdbMap::addList ( RdbList *list ) {
 	if ( ! addRecord ( key , rec , recSize ) ) {
 		log("db: Failed to add record to map: %s.",
 		    mstrerror(g_errno));
-		char *xx = NULL; *xx = 0;
+		// allow caller to try to fix the tree in the case of dumping
+		// a tree to a file on disk
+		return false;
+		//char *xx = NULL; *xx = 0;
 	}
 	if ( list->skipCurrentRecord() ) goto top2;

--- a/RdbTree.cpp
+++ b/RdbTree.cpp
@ -1283,19 +1283,26 @@ bool RdbTree::checkTree2 ( bool printMsgs , bool doChainTest ) {
 		if ( m_right[i] >= 0 && m_parents[m_right[i]] != i ) 
 			return log(
 				   "db: Tree right kid and parent disagree.");
-		/*
+		// MDW: why did i comment out the order checking?
 		// check order
-		if ( m_left[i] >= 0 ) {
+		if ( m_left[i] >= 0 &&
+		     m_collnums[i] == m_collnums[m_left[i]] ) {
 			char *key = &m_keys[i*m_ks];
 			char *left = &m_keys[m_left[i]*m_ks];
-			if ( KEYCMP(key,left,m_ks)<0) {char *xx=NULL;*xx=0;}
+			if ( KEYCMP(key,left,m_ks)<0) 
+				return log("db: Tree left kid > parent %i",i);
+			
 		}
-		if ( m_right[i] >= 0 ) {
+		if ( m_right[i] >= 0 &&
+		     m_collnums[i] == m_collnums[m_right[i]] ) {
 			char *key = &m_keys[i*m_ks];
 			char *right = &m_keys[m_right[i]*m_ks];
-			if ( KEYCMP(key,right,m_ks)>0) {char *xx=NULL;*xx=0;}
+			if ( KEYCMP(key,right,m_ks)>0) 
+				return log("db: Tree right kid < parent %i "
+					   "%s < %s",i,
+					   KEYSTR(right,m_ks),
+					   KEYSTR(key,m_ks) );
 		}
-		*/
 		//g_loop.quickPoll(1, __PRETTY_FUNCTION__, __LINE__);
 	}
 	if ( hkp > 0 ) 
--- a/SafeBuf.cpp
+++ b/SafeBuf.cpp
@ -522,7 +522,8 @@ int32_t SafeBuf::safeSave (char *filename ) {
 }


-int32_t SafeBuf::fillFromFile(char *dir,char *filename) {
+int32_t SafeBuf::fillFromFile(char *dir,char *filename,char *label) {
+	m_label = label;
 	char buf[1024];
 	if ( dir ) snprintf(buf,1024,"%s/%s",dir,filename);
 	else       snprintf(buf,1024,"%s",filename);
--- a/SafeBuf.h
+++ b/SafeBuf.h
@ -10,6 +10,9 @@
 * (for java programmers, very similar to the StringBuffer class, with all the speed that c++ allows).
 * Most of strings in Gigablast are handled by those.
 */
+
+#include "iana_charset.h"
+
 class SafeBuf {
 public:
 	//*TRUCTORS
@ -33,8 +36,11 @@ public:
 	// want SafeBuf to free the data for you. Keep in mind, all
 	// previous content in SafeBuf will be cleared when you pass it
 	// a new buffer.
-	bool setBuf(char *newBuf, int32_t bufMax, int32_t bytesInUse, bool ownData,
-		    int16_t encoding );
+	bool setBuf(char *newBuf, 
+		    int32_t bufMax, 
+		    int32_t bytesInUse, 
+		    bool ownData,
+		    int16_t encoding = csUTF8 );
 	// yieldBuf() allows you to take over the buffer in SafeBuf. 
 	// You may only free the data if it was originally owned by
 	// the SafeBuf.
@ -67,8 +73,9 @@ public:
 	int32_t safeSave (char *filename );

 	int32_t  fillFromFile(char *filename);
-	int32_t  fillFromFile(char *dir,char *filename);
-	int32_t  load(char *dir,char *fname) { return fillFromFile(dir,fname);};
+	int32_t  fillFromFile(char *dir,char *filename, char *label=NULL);
+	int32_t  load(char *dir,char *fname,char *label = NULL) { 
+		return fillFromFile(dir,fname,label);};
 	int32_t  load(char *fname) { return fillFromFile(fname);};

 	void filterTags();
--- a/SearchInput.cpp
+++ b/SearchInput.cpp
@ -50,14 +50,16 @@ void SearchInput::clear ( int32_t niceness ) {
 key_t SearchInput::makeKey ( ) {
 	// hash the query
 	int32_t       n       = m_q.getNumTerms  ();
-	int64_t *termIds = m_q.getTermIds   ();
-	char      *signs   = m_q.getTermSigns ();
+	//int64_t *termIds = m_q.getTermIds   ();
+	//char      *signs   = m_q.getTermSigns ();
 	key_t k;
 	k.n1 = 0;
-	k.n0 = hash64 ( (char *)termIds , n * sizeof(int64_t) );
-	k.n0 = hash64 ( (char *)signs   , n , k.n0 );
+	//k.n0 = hash64 ( (char *)termIds , n * sizeof(int64_t) );
+	//k.n0 = hash64 ( (char *)signs   , n , k.n0 );
 	// user defined weights, for weighting each query term separately
 	for ( int32_t i = 0 ; i < n ; i++ ) {
+		k.n0 = hash64 ((char *)&m_q.m_qterms[i].m_termId    ,4, k.n0);
+		k.n0 = hash64 ((char *)&m_q.m_qterms[i].m_termSign  ,1, k.n0);
 		k.n0 = hash64 ((char *)&m_q.m_qterms[i].m_userWeight,4, k.n0);
 		k.n0 = hash64 ((char *)&m_q.m_qterms[i].m_userType  ,1, k.n0);
 	}
@ -468,14 +470,16 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) { //, Query *q ) {
 		log("query: qlang of \"%s\" is NOT SUPPORTED. using "
 		    "langUnknown, \"xx\".",langAbbr);

+	int32_t maxQueryTerms = cr->m_maxQueryTerms;
+
 	// . the query to use for highlighting... can be overriden with "hq"
 	// . we need the language id for doing synonyms
 	if ( m_prepend && m_prepend[0] )
-		m_hqq.set2 ( m_prepend , m_queryLangId , true );
+		m_hqq.set2 ( m_prepend , m_queryLangId , true ,maxQueryTerms);
 	else if ( m_highlightQuery && m_highlightQuery[0] )
-		m_hqq.set2 ( m_highlightQuery , m_queryLangId , true );
+		m_hqq.set2 (m_highlightQuery,m_queryLangId,true,maxQueryTerms);
 	else if ( m_query && m_query[0] )
-		m_hqq.set2 ( m_query , m_queryLangId , true );
+		m_hqq.set2 ( m_query , m_queryLangId , true,maxQueryTerms);

 	// log it here
 	log(LOG_INFO,
@ -487,7 +491,9 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) { //, Query *q ) {
 	// . returns false and sets g_errno on error (ETOOMANYOPERANDS)
 	if ( ! m_q.set2 ( m_sbuf1.getBufStart(), 
 			  m_queryLangId , 
-			  m_queryExpansion ) ) {
+			  m_queryExpansion ,
+			  true , // use QUERY stopwords?
+			  maxQueryTerms ) ) {
 		g_msg = " (error: query has too many operands)";
 		return false;
 	}
@ -823,6 +829,9 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
 			m_sbuf2.safeStrcpy(" AND ");
 		}
 	}
+	m_sbuf1.setLabel("sisbuf1");
+	m_sbuf2.setLabel("sisbuf2");
+	m_sbuf3.setLabel("sisbuf3");
 	// append the natural query
 	if ( m_query && m_query[0] ) {
 		//if ( p  > pstart  ) *p++  = ' ';
--- a/Sections.cpp
+++ b/Sections.cpp
@ -1164,7 +1164,9 @@ bool Sections::set ( Words     *w                       ,
 			xh ^= g_hashtab[cnt++][(unsigned char )*p];
 		}
 		// sanity check
-		if ( ! xh ) { char *xx=NULL;*xx=0; }
+		//if ( ! xh ) { char *xx=NULL;*xx=0; }
+		// if it is a string of the same chars it can be 0
+		if ( ! xh ) xh = 1;
 		// store that
 		sn->m_xmlNameHash = (int32_t)xh;
 	}
--- a/Spider.cpp
+++ b/Spider.cpp
@ -11641,6 +11641,18 @@ int32_t getUrlFilterNum2 ( SpiderRequest *sreq       ,
 			goto checkNextRule;
 		}

+		if ( strncmp ( p , "isfakeip",8 ) == 0 ) {
+			// skip for msg20
+			if ( isForMsg20 ) continue;
+			// if no match continue
+			if ( (bool)sreq->m_fakeFirstIp == val ) continue;
+			p += 8;
+			p = strstr(p, "&&");
+			if ( ! p ) return i;
+			p += 2;
+			goto checkNextRule;
+		}
+
 		if ( strncmp ( p , "isonsamedomain",14 ) == 0 ) {
 			// skip for msg20
 			if ( isForMsg20 ) continue;
@ -13993,6 +14005,17 @@ bool getSpiderStatusMsg ( CollectionRec *cx , SafeBuf *msg , int32_t *status ) {
 		return msg->safePrintf("Job is initializing.");
 	}

+	// if we had seeds and none were successfully crawled, do not just
+	// print that the crawl completed.
+	if ( cx->m_collectiveRespiderFrequency <= 0.0 &&
+	     cx->m_isCustomCrawl &&
+	     ! cx->m_globalCrawlInfo.m_hasUrlsReadyToSpider &&
+	     cx->m_globalCrawlInfo.m_pageDownloadAttempts > 0 &&
+	     cx->m_globalCrawlInfo.m_pageDownloadSuccesses == 0 ) {
+		*status = SP_SEEDSERROR;
+		return msg->safePrintf("Failed to crawl any seed.");
+	}
+
 	// if we sent an email simply because no urls
 	// were left and we are not recrawling!
 	if ( cx->m_collectiveRespiderFrequency <= 0.0 &&
--- a/Spider.h
+++ b/Spider.h
@ -39,6 +39,7 @@
 #define SP_INPROGRESS   7 // it is going on!
 #define SP_ADMIN_PAUSED 8 // g_conf.m_spideringEnabled = false
 #define SP_COMPLETED    9 // crawl is done, and no repeatCrawl is scheduled
+#define SP_SEEDSERROR  10 // all seeds had an error preventing crawling

 bool tryToDeleteSpiderColl ( SpiderColl *sc , char *msg ) ;
 void spiderRoundIncremented ( class CollectionRec *cr ) ;
--- a/Stats.cpp
+++ b/Stats.cpp
@ -44,13 +44,13 @@ Stats::Stats ( ) {
 	m_totalSpiderSuccessOld = 0;
 	m_totalSpiderErrorsOld = 0;
 	m_msg3aRecallCnt = 0;
-	m_tierHits[0] = 0;
-	m_tierHits[1] = 0;
-	m_tierHits[2] = 0;
-	m_tier2Misses = 0;
-	m_tierTimes[0] = 0;
-	m_tierTimes[1] = 0;
-	m_tierTimes[2] = 0;
+	// m_tierHits[0] = 0;
+	// m_tierHits[1] = 0;
+	// m_tierHits[2] = 0;
+	// m_tier2Misses = 0;
+	// m_tierTimes[0] = 0;
+	// m_tierTimes[1] = 0;
+	// m_tierTimes[2] = 0;
 	//m_totalDedupCand = 0;
 	//m_dedupedCand = 0;
 	//m_bannedDups = 0;
--- a/Stats.h
+++ b/Stats.h
@ -11,7 +11,7 @@

 #include "SafeBuf.h"
 #include "UdpProtocol.h" // MAX_MSG_TYPES
-#include "IndexReadInfo.h"
+//#include "IndexReadInfo.h"

 class StatPoint {
 public:
@ -143,8 +143,8 @@ class Stats {
 	// when we just request more docids from the same tier
 	int32_t m_msg3aFastRecalls;
 	// how many resolutions did we get on each tier
-	int32_t      m_tierHits [MAX_TIERS];
-	int64_t m_tierTimes[MAX_TIERS];
+	//int32_t      m_tierHits [MAX_TIERS];
+	//int64_t m_tierTimes[MAX_TIERS];
 	// how many searches did not get enough results?
 	int32_t m_tier2Misses;
 	// one count for each CR_* defined in Msg51.h
@ -160,8 +160,8 @@ class Stats {
 	//int32_t m_errored;
 	int32_t m_msg3aRecalls[6];
 	SafeBuf m_keyCols;
-	int32_t m_numTermsVsTier[14][MAX_TIERS];
-	int32_t m_termsVsTierExp[14][MAX_TIERS][7];
+	//int32_t m_numTermsVsTier[14][MAX_TIERS];
+	//int32_t m_termsVsTierExp[14][MAX_TIERS][7];

 	// use m_start so we know what msg stats to clear with memset
 	char      m_start;
--- a/Summary.cpp
+++ b/Summary.cpp
@ -12,6 +12,8 @@ Summary::Summary()
 	//m_buf = NULL;
 	m_bitScoresBuf = NULL;
 	m_bitScoresBufSize = 0;
+	m_wordWeights = NULL;
+	m_buf4 = NULL;
 	reset();
 }

@ -36,6 +38,15 @@ void Summary::reset() {
 	m_numExcerpts = 0;
 	m_summaryLocs.reset();
 	m_summaryLocsPops.reset();
+	if ( m_wordWeights && m_wordWeights != (float *)m_tmpBuf ) {
+		mfree ( m_wordWeights , m_wordWeightSize , "sumww");
+		m_wordWeights = NULL;
+	}
+	m_wordWeights = NULL;
+	if ( m_buf4 && m_buf4 != m_tmpBuf4 ) {
+		mfree ( m_buf4 , m_buf4Size , "ssstkb" );
+		m_buf4 = NULL;
+	}
 }


@ -151,6 +162,15 @@ bool Summary::set2 ( Xml      *xml                ,
 		      end - start );
 		      start = gettimeofdayInMilliseconds();*/
 	//
+	int32_t need1 = q->m_numWords * sizeof(float);
+	m_wordWeightSize = need1;
+	if ( need1 < 128 )
+		m_wordWeights = (float *)m_tmpBuf;
+	else
+		m_wordWeights = (float *)mmalloc ( need1 , "wwsum" );
+	if ( ! m_wordWeights ) return false;
+
+

 	// zero out all word weights
 	for ( int32_t i = 0 ; i < q->m_numWords; i++ )
@ -229,11 +249,25 @@ bool Summary::set2 ( Xml      *xml                ,
 	pend = m_summary + maxSummaryLen;
 	m_numExcerpts = 0;

+	int32_t need2 = (1+1+1) * m_q->m_numWords;
+	m_buf4Size = need2;
+	if ( need2 < 128 )
+		m_buf4 = m_tmpBuf4;
+	else
+		m_buf4 = (char *)mmalloc ( need2 , "stkbuf" );
+	if ( ! m_buf4 ) return false;
+	char *x = m_buf4;
+	char *retired = x;
+	x += m_q->m_numWords;
+	char *maxGotIt = x;
+	x += m_q->m_numWords;
+	char *gotIt = x;
+
 	// . the "maxGotIt" count vector accumulates into "retired"
 	// . that is how we keep track of what query words we used for previous
 	//   summary excerpts so we try to get diversified excerpts with 
 	//   different query terms/words in them
-	char retired  [ MAX_QUERY_WORDS ];
+	//char retired  [ MAX_QUERY_WORDS ];
 	memset ( retired, 0, m_q->m_numWords * sizeof(char) );

 	// some query words are already matched in the title
@ -260,7 +294,7 @@ bool Summary::set2 ( Xml      *xml                ,
 		int32_t       maxb = 0;
 		int32_t       maxi  = -1;
 		int32_t       lasta = -1;
-		char       maxGotIt [ MAX_QUERY_WORDS ];
+		//char       maxGotIt [ MAX_QUERY_WORDS ];

 		if(lastNumFinal == numFinal) {
 			if(maxLoops-- <= 0) {
@ -296,7 +330,7 @@ bool Summary::set2 ( Xml      *xml                ,
 			if ( skip ) continue;

 			// ask him for the query words he matched
-			char gotIt [ MAX_QUERY_WORDS ];
+			//char gotIt [ MAX_QUERY_WORDS ];
 			// clear it for him
 			memset ( gotIt, 0, m_q->m_numWords * sizeof(char) );

@ -558,6 +592,12 @@ bool Summary::set2 ( Xml      *xml                ,
 			m_displayLen = p - m_summary;
 	}

+	// free the mem we used if we allocated it
+	if ( m_buf4 && m_buf4 != m_tmpBuf4 ) {
+		mfree ( m_buf4 , m_buf4Size , "ssstkb" );
+		m_buf4 = NULL;
+	}
+

 	// If we still didn't find a summary, get the default summary
 	if ( p == m_summary ) {
@ -570,6 +610,7 @@ bool Summary::set2 ( Xml      *xml                ,
 						  maxSummaryLen );
 		if ( m_numDisplayLines > 0 )
 			m_displayLen = m_summaryLen;
+		
 		return status;
 	}

@ -1211,7 +1252,7 @@ bool Summary::set1 ( char      *doc                ,
 	int32_t numTerms = q->getNumTerms();
 	// . now assign scores based on term frequencies
 	// . highest score is 10000, then 9900, 9800, 9700, ...
-	int32_t ptrs [ MAX_QUERY_TERMS ];
+	int32_t ptrs [ ABS_MAX_QUERY_TERMS ];
 	for ( int32_t i = 0 ; i < numTerms ; i++ ) ptrs[i] = i;
 	// convenience var
 	int64_t *freqs = termFreqs; // q->getTermFreqs();
@ -1232,7 +1273,7 @@ bool Summary::set1 ( char      *doc                ,
 		}
 	}
 	// assign scores, give rarest terms highest score
-	int32_t scores [ MAX_QUERY_TERMS ];
+	int32_t scores [ ABS_MAX_QUERY_TERMS ];
 	for ( int32_t i = 0 ; i < numTerms ; i++ ) 
 		scores[ptrs[i]] = 10000000 - (i*100);
 	// force QUERY stop words to have much lower scores at most 10000
@ -1441,7 +1482,7 @@ bool Summary::set1 ( char      *doc                ,
 	int32_t  maxi = -1;
 	int32_t  maxa = 0;
 	int32_t  maxb = 0;
-	char  gotIt [ MAX_QUERY_TERMS ];
+	char  gotIt [ ABS_MAX_QUERY_TERMS ];
 	char *maxleft  = NULL;
 	char *maxright = NULL;
 	for ( int32_t i = 0 ; i < numMatches ; i++ ) {
--- a/Summary.h
+++ b/Summary.h
@ -266,7 +266,14 @@ class Summary {

 	char *m_bitScoresBuf;
 	int32_t  m_bitScoresBufSize;
-	float m_wordWeights[MAX_QUERY_WORDS];
+	//float m_wordWeights[MAX_QUERY_WORDS];
+	float *m_wordWeights;
+	int32_t m_wordWeightSize;
+	char m_tmpBuf[128];
+
+	char *m_buf4;
+	int32_t m_buf4Size;
+	char m_tmpBuf4[128];

 	char    m_summaryLocBuf[MAX_SUMMARY_LOCS*sizeof(uint64_t)];
 	SafeBuf m_summaryLocs;
--- a/Synonyms.cpp
+++ b/Synonyms.cpp
@ -12,6 +12,7 @@
 #include "Wiktionary.h"

 Synonyms::Synonyms() {
+	m_synWordBuf.setLabel("syswbuf");
 }

 Synonyms::~Synonyms() {
--- a/Tagdb.cpp
+++ b/Tagdb.cpp
@ -5049,8 +5049,8 @@ bool Tagdb::loadMinSiteInlinksBuffer2 ( ) {
 	// use 4 bytes for the first 130,000 entries or so to hold
 	// # of site inlinks. then we only need 1 byte since the remaining
 	// 25M are <256 sitenuminlinksunqiecblocks
-	m_siteBuf1.load(g_hostdb.m_dir,"sitelinks1.dat");
-	m_siteBuf2.load(g_hostdb.m_dir,"sitelinks2.dat");
+	m_siteBuf1.load(g_hostdb.m_dir,"sitelinks1.dat","stelnks1");
+	m_siteBuf2.load(g_hostdb.m_dir,"sitelinks2.dat","stelnks2");

 	m_siteBuf1.setLabel("sitelnks");
 	m_siteBuf2.setLabel("sitelnks");
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -2565,11 +2565,10 @@ bool XmlDoc::indexDoc ( ) {
 			SafeBuf *ssDocMetaList = NULL;
 			// save this
 			int32_t saved = m_indexCode;
-			// and make it the real reason for the spider status doc
+			// make it the real reason for the spider status doc
 			m_indexCode = EDNSERROR;
-			// get the spiderreply ready to be added
-			
-			ssDocMetaList = getSpiderStatusDocMetaList(NULL ,false);//del
+			// get the spiderreply ready to be added. false=del
+			ssDocMetaList =getSpiderStatusDocMetaList(NULL ,false);
 			// revert
 			m_indexCode = saved;
 			// error?
@ -2586,8 +2585,11 @@ bool XmlDoc::indexDoc ( ) {

 			char *url = "unknown";
 			if ( m_sreqValid ) url = m_sreq.m_url;
-			log("build: error2 getting real firstip of %"INT32" for "
-			    "%s. Not adding new spider req", (int32_t)*fip,url);
+			log("build: error2 getting real firstip of "
+			    "%"INT32" for "
+			    "%s. Not adding new spider req. "
+			    "spiderstatusdocsize=%"INT32, (int32_t)*fip,url,
+			    m_addedStatusDocSize);
 			// also count it as a crawl attempt
 			cr->m_localCrawlInfo.m_pageDownloadAttempts++;
 			cr->m_globalCrawlInfo.m_pageDownloadAttempts++;
@ -3130,8 +3132,9 @@ bool isRobotsTxtFile ( char *u , int32_t ulen ) {
 bool XmlDoc::isContainerDoc ( ) {
 	if ( m_firstUrlValid && m_firstUrl.isWarc() ) return true;
 	if ( m_firstUrlValid && m_firstUrl.isArc () ) return true;
-	if ( ! m_contentDelimValid ) { char *xx=NULL;*xx=0; }
-	if ( m_contentDelim ) return true;
+	//if ( ! m_contentDelimValid ) { char *xx=NULL;*xx=0; }
+	//if ( m_contentDelim ) return true;
+	if ( m_contentDelimValid && m_contentDelim ) return true;
 	return false;
 }

@ -9617,11 +9620,15 @@ float computeSimilarity ( int32_t   *vec0 ,
 		// . stock the query term hash table
 		// . use the lower 32 bits of the termids to make compatible 
 		//   with the other vectors we use
-		int64_t *qtids = q->getTermIds ();
+		//int64_t *qtids = q->getTermIds ();
 		int32_t       nt    = q->getNumTerms();
 		for ( int32_t i = 0 ; i < nt ; i++ ) {
+			// get query term
+			QueryTerm *QT = &q->m_qterms[i];
+			// get the termid
+			int64_t termId = QT->m_termId;
 			// get it
-			uint32_t h = (uint32_t)(qtids[i] & 0xffffffff);
+			uint32_t h = (uint32_t)(termId & 0xffffffff);
 			// hash it
 			if ( ! qt.addKey ( &h ) ) return -1;
 		}
@ -28672,6 +28679,11 @@ SafeBuf *XmlDoc::getSpiderStatusDocMetaList2 ( SpiderReply *reply1 ) {
 		jd.safePrintf("\"gbssHttpStatus\":%"INT32",\n",
 			      (int32_t)m_httpStatus);

+	// do not index gbssIsSeedUrl:0 because there will be too many usually
+	bool isSeed = ( m_sreqValid && m_sreq.m_isAddUrl );
+	if ( isSeed )
+		jd.safePrintf("\"gbssIsSeedUrl\":1,\n");
+
 	if ( od )
 		jd.safePrintf("\"gbssWasIndexed\":1,\n");
 	else
@ -28696,6 +28708,18 @@ SafeBuf *XmlDoc::getSpiderStatusDocMetaList2 ( SpiderReply *reply1 ) {
 		else
 			jd.safePrintf("\"gbssDiffbotUri\":"
 				      "\"none\",\n");
+		// show the type as gbssDiffbotType:"article" etc.
+		JsonItem *dti = NULL;
+		if ( jp1 ) 
+			dti = jp1->getItem("type");
+		if ( dti ) {
+			jd.safePrintf("\"gbssDiffbotType\":\"");
+			int32_t vlen;
+			char *val = dti->getValueAsString( &vlen );
+			if ( val ) jd.jsonEncode ( val , vlen );
+			jd.safePrintf("\",\n");
+		}
+
 	}
 	else { // if ( cr->m_isCustomCrawl ) {
 		jd.safePrintf("\"gbssIsDiffbotObject\":0,\n");
@ -45262,7 +45286,7 @@ SafeBuf *XmlDoc::getMatchingQueriesScoredForFullQuery ( ) {
 	// prepend to the query?
 	int32_t ulen = m_firstUrl.m_ulen;
 	// go to next guy if this query is too big already
-	if ( ulen + qlen + 10 > MAX_QUERY_LEN ) {
+	if ( ulen + qlen + 10 > ABS_MAX_QUERY_LEN ) {
 		m_queryNum++;
 		goto loop;
 	}
--- a/hash.cpp
+++ b/hash.cpp
@ -232,6 +232,17 @@ uint64_t hash64d ( char *p, int32_t plen ) {
 		char    ncs = utf8Encode ( x , (char *)tmp );
 		// sanity check
 		if ( ncs > 4 ) { char *xx=NULL;*xx=0; }
+		// i've seen this happen for 4 byte char =
+		// -16,-112,-51,-125  which has x=66371 and y=66371
+		// but utf8Encode() returned 0!
+		if ( ncs == 0 ) {
+			// let's just hash it as-is then
+			tmp[0] = p[0];
+			if ( cs >= 1 ) tmp[1] = p[1];
+			if ( cs >= 2 ) tmp[2] = p[2];
+			if ( cs >= 3 ) tmp[3] = p[3];
+			ncs = cs;
+		}
 		// hash it up
 		h ^= g_hashtab [i++][tmp[0]];
 		if ( ncs == 1 ) continue;
--- a/hash.h
+++ b/hash.h
@ -250,6 +250,17 @@ inline uint64_t hash64Lower_utf8_nospaces ( char *p, int32_t len  ) {
 		char ncs = utf8Encode ( y , tmp );
 		// sanity check
 		if ( ncs > 4 ) { char *xx=NULL;*xx=0; }
+		// i've seen this happen for 4 byte char =
+		// -16,-112,-51,-125  which has x=66371 and y=66371
+		// but utf8Encode() returned 0!
+		if ( ncs == 0 ) {
+			// let's just hash it as-is then
+			tmp[0] = p[0];
+			if ( cs >= 1 ) tmp[1] = p[1];
+			if ( cs >= 2 ) tmp[2] = p[2];
+			if ( cs >= 3 ) tmp[3] = p[3];
+			ncs = cs;
+		}
 		// hash it up
 		h ^= g_hashtab [i++][(uint8_t)tmp[0]];
 		if ( ncs == 1 ) continue;
@ -301,6 +312,17 @@ inline uint64_t hash64Lower_utf8_cont ( char *p,
 		char ncs = utf8Encode ( y , tmp );
 		// sanity check
 		if ( ncs > 4 ) { char *xx=NULL;*xx=0; }
+		// i've seen this happen for 4 byte char =
+		// -16,-112,-51,-125  which has x=66371 and y=66371
+		// but utf8Encode() returned 0!
+		if ( ncs == 0 ) {
+			// let's just hash it as-is then
+			tmp[0] = p[0];
+			if ( cs >= 1 ) tmp[1] = p[1];
+			if ( cs >= 2 ) tmp[2] = p[2];
+			if ( cs >= 3 ) tmp[3] = p[3];
+			ncs = cs;
+		}
 		// hash it up
 		h ^= g_hashtab [i++][(uint8_t)tmp[0]];
 		if ( ncs == 1 ) continue;
@ -376,6 +398,17 @@ inline uint64_t hash64Lower_utf8 ( char *p ) {
 		char ncs = utf8Encode ( y , (char *)tmp );
 		// sanity check
 		if ( ncs > 4 ) { char *xx=NULL;*xx=0; }
+		// i've seen this happen for 4 byte char =
+		// -16,-112,-51,-125  which has x=66371 and y=66371
+		// but utf8Encode() returned 0!
+		if ( ncs == 0 ) {
+			// let's just hash it as-is then
+			tmp[0] = p[0];
+			if ( cs >= 1 ) tmp[1] = p[1];
+			if ( cs >= 2 ) tmp[2] = p[2];
+			if ( cs >= 3 ) tmp[3] = p[3];
+			ncs = cs;
+		}
 		// hash it up
 		h ^= g_hashtab [i++][(uint8_t)tmp[0]];
 		if ( ncs == 1 ) continue;
--- a/main.cpp
+++ b/main.cpp
@ -4998,7 +4998,7 @@ int install ( install_flag_konst_t installFlag , int32_t hostId , char *dir ,
 			if ( ! f.doesExist() ) target = "gb";

 			sprintf(tmp,
-				"scp -c blowfish " // blowfish is faster
+				"scp -c arcfour " // blowfish is faster
 				"%s%s "
 				"%s:%s/gb.installed%s",
 				dir,