Merge branch 'testing' of github.com:gigablast/open-source-search-engine into testing

2014-06-19 05:42:15 -07:00
parent d4202cb49d fdf9d51280
commit 8cc8e19d4d
13 changed files with 395 additions and 70 deletions
--- a/Collectiondb.cpp
+++ b/Collectiondb.cpp
@ -240,7 +240,7 @@ bool Collectiondb::addExistingColl ( char *coll, collnum_t collnum ) {
 	//cr->m_collnum = oldCollnum;

 	// get the default.conf from working dir if there
-	g_parms.setToDefault( (char *)cr , OBJ_COLL );
+	g_parms.setToDefault( (char *)cr , OBJ_COLL , cr );

 	strcpy ( cr->m_coll , coll );
 	cr->m_collLen = gbstrlen ( coll );
@ -383,7 +383,7 @@ bool Collectiondb::addNewColl ( char *coll ,
 	// . get the default.conf from working dir if there
 	// . i think this calls CollectionRec::reset() which resets all of its
 	//   rdbbase classes for its collnum so m_collnum needs to be right
-	g_parms.setToDefault( (char *)cr , OBJ_COLL );
+	g_parms.setToDefault( (char *)cr , OBJ_COLL , cr );

 	/*
 	// the default conf file
@ -1657,7 +1657,7 @@ bool CollectionRec::load ( char *coll , long i ) {
 	// also reset some counts not included in parms list
 	reset();
 	// before we load, set to defaults in case some are not in xml file
-	g_parms.setToDefault ( (char *)this , OBJ_COLL );
+	g_parms.setToDefault ( (char *)this , OBJ_COLL , this );
 	// get the filename with that id
 	File f;
 	char tmp2[1024];
--- a/Conf.cpp
+++ b/Conf.cpp
@ -165,12 +165,12 @@ bool Conf::isConnectIp ( unsigned long ip ) {

 // . set all member vars to their default values
 void Conf::reset ( ) {
-	g_parms.setToDefault ( (char *)this , OBJ_CONF );
+	g_parms.setToDefault ( (char *)this , OBJ_CONF ,NULL);
 	m_save = true;
 }

 bool Conf::init ( char *dir ) { // , long hostId ) {
-	g_parms.setToDefault ( (char *)this , OBJ_CONF );
+	g_parms.setToDefault ( (char *)this , OBJ_CONF ,NULL);
 	m_save = true;
 	char fname[1024];
 	if ( dir ) sprintf ( fname , "%slocalgb.conf", dir );
--- a/DiskPageCache.cpp
+++ b/DiskPageCache.cpp
@ -28,7 +28,10 @@ DiskPageCache::DiskPageCache () {
 	// sometimes db may pass an unitialized DiskPageCache to a BigFile
 	// so make sure when BigFile::close calls DiskPageCache::rmVfd() our
 	// m_memOff vector is all NULLed out, otherwise it will core
-	memset ( m_memOff , 0 , 4 * MAX_NUM_VFDS2 );
+	//memset ( m_memOff , 0 , sizeof(long *) * MAX_NUM_VFDS2 );
+	for ( long i = 0 ; i < MAX_NUM_VFDS2 ; i++ )
+		m_memOff[i] = NULL;
+
 	m_availMemOff = NULL;
 	//m_isOverriden = false;
 	reset();
--- a/4
+++ b/4
@ -546,8 +546,8 @@ master-rpm:
 # DEBIAN PACKAGE SECTION BEGIN

 # need to do 'apt-get intall dh-make'
-testing-deb:
-	git archive --format=tar --prefix=gb-1.0/ testing > ../gb_1.0.orig.tar
+master-deb:
+	git archive --format=tar --prefix=gb-1.0/ master > ../gb_1.0.orig.tar
 	rm -rf debian
 # change "-p gb_1.0" to "-p gb_1.1" to update version for example
 	dh_make -e gigablast@mail.com -p gb_1.0 -f ../gb_1.0.orig.tar
--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -37,7 +37,7 @@ Msg39::Msg39 () {
 void Msg39::reset() {
 	if ( m_inUse ) { char *xx=NULL;*xx=0; }
 	m_allocedTree = false;
-	m_numDocIdSplits = 1;
+	//m_numDocIdSplits = 1;
 	m_tmpq.reset();
 	m_numTotalHits = 0;
 	m_gotClusterRecs = 0;
@ -241,29 +241,6 @@ void Msg39::getDocIds2 ( Msg39Request *req ) {

 	QUICKPOLL ( m_r->m_niceness );

-	// assume not doing special docid splitting
-	m_numDocIdSplits = 1;
-
-	// . do not do splits if caller is already specifying a docid range
-	//   like for gbdocid: queries i guess.
-	// . make sure m_msg2 is non-NULL, because if it is NULL we are
-	//   evaluating a query for a single docid for seo tools
-	if ( m_r->m_minDocId == -1 ) { // && m_msg2 ) {
-		long nt = m_tmpq.getNumTerms();
-		m_numDocIdSplits = nt / 2;
-		if ( m_numDocIdSplits == 0 ) m_numDocIdSplits = 1;
-	}
-
-	// for testing
-	//m_numDocIdSplits = 3;
-
-	//if ( ! g_conf.m_doDocIdRangeSplitting )
-	//	m_numDocIdSplits = 1;
-
-	// limit to 10
-	if ( m_numDocIdSplits > 10 ) 
-		m_numDocIdSplits = 10;
-
 	// . if caller already specified a docid range, then be loyal to that!
 	// . or if we do not have enough query terms to warrant splitting
 	//if ( m_numDocIdSplits == 1 ) {
@ -351,7 +328,7 @@ bool Msg39::controlLoop ( ) {
 		// the starting docid...
 		long long d0 = m_ddd;
 		// shortcut
-		long long delta = MAX_DOCID / (long long)m_numDocIdSplits;
+		long long delta = MAX_DOCID / (long long)m_r->m_numDocIdSplits;
 		// advance to point to the exclusive endpoint
 		m_ddd += delta;
 		// ensure this is exclusive of ddd since it will be
--- a/Msg39.h
+++ b/Msg39.h
@ -69,8 +69,10 @@ class Msg39Request {
 		m_getDocIdScoringInfo = 1;

 		// -1 means to not to docid range restriction
-		m_minDocId = -1;
-		m_maxDocId = -1;
+		m_minDocId = -1LL;
+		m_maxDocId = -1LL;
+
+		m_numDocIdSplits = 1;

 		// for widget, to only get results to append to last docid
 		m_maxSerpScore = 0.0;
@ -92,6 +94,7 @@ class Msg39Request {
 	char    m_niceness;
 	long    m_maxAge;
 	long    m_maxQueryTerms;
+	long    m_numDocIdSplits;
 	//long    m_compoundListMaxSize;
 	char    m_boolFlag;
 	uint8_t m_language;
@ -238,7 +241,7 @@ class Msg39 {

 	char       m_debug;

-	long m_numDocIdSplits;
+	//long m_numDocIdSplits;
 	bool m_allocedTree;
 	long long m_ddd;
 	long long m_dddEnd;
--- a/Msg40.cpp
+++ b/Msg40.cpp
@ -245,10 +245,10 @@ bool Msg40::getResults ( SearchInput *si      ,
 	if ( /*m_si->m_firstResultNum == 0 && */get < m_docsToScanForTopics ) 
 		get = m_docsToScanForTopics;
 	// for alden's reranking. often this is 50!
-	if ( get < m_si->m_docsToScanForReranking  ) 
-		get = m_si->m_docsToScanForReranking;
+	//if ( get < m_si->m_docsToScanForReranking  ) 
+	//	get = m_si->m_docsToScanForReranking;
 	// for zak's reference pages
-        if ( get < m_si->m_refs_numToGenerate ) get=m_si->m_refs_numToGenerate;
+	// if(get<m_si->m_refs_numToGenerate ) get=m_si->m_refs_numToGenerate;
 	// limit to this ceiling though for peformance reasons
 	//if ( get > m_maxDocIdsToCompute ) get = m_maxDocIdsToCompute;
 	// ok, need some sane limit though to prevent malloc from 
@ -562,6 +562,43 @@ bool Msg40::getDocIds ( bool recall ) {
 	mr.m_minSerpDocId              = m_si->m_minSerpDocId;
 	mr.m_maxSerpScore              = m_si->m_maxSerpScore;

+	//
+	// how many docid splits should we do to avoid going OOM?
+	//
+	CollectionRec *cr = g_collectiondb.getRec(m_firstCollnum);
+	RdbBase *base = NULL;
+	if ( cr ) g_titledb.getRdb()->getBase(cr->m_collnum);
+	long long numDocs = 0;
+	if ( base ) numDocs = base->getNumTotalRecs();
+	// for every 5M docids per host, lets split up the docid range
+	// to avoid going OOM
+	long mult = numDocs / 5000000;
+        if ( mult <= 0 ) mult = 1;
+	// . do not do splits if caller is already specifying a docid range
+	//   like for gbdocid: queries i guess.
+	// . make sure m_msg2 is non-NULL, because if it is NULL we are
+	//   evaluating a query for a single docid for seo tools
+	//if ( m_r->m_minDocId == -1 ) { // && m_msg2 ) {
+	long nt = m_si->m_q.getNumTerms();
+	long numDocIdSplits = nt / 2; // ;/// 2;
+	if ( numDocIdSplits <= 0 ) numDocIdSplits = 1;
+	// and mult based on index size
+	numDocIdSplits *= mult;
+	//if ( numDocIdSplits < 5 ) numDocIdSplits = 5;
+	//}
+	// for testing
+	//m_numDocIdSplits = 3;
+	//if ( ! g_conf.m_doDocIdRangeSplitting )
+	//	m_numDocIdSplits = 1;
+	// limit to 10
+	if ( numDocIdSplits > 15 ) 
+		numDocIdSplits = 15;
+	// store it in the reuquest now
+	mr.m_numDocIdSplits = numDocIdSplits;
+
+	
+
+
 	// . get the docIds
 	// . this sets m_msg3a.m_clusterLevels[] for us
 	//if(! m_msg3a.getDocIds ( &m_r,  m_si->m_q, this , gotDocIdsWrapper))
@ -1240,6 +1277,18 @@ bool Msg40::launchMsg20s ( bool recalled ) {
 		if ( m_si->m_streamResults &&
 		     i >= m_printi + MAX_OUTSTANDING_MSG20S - 1 )
 			break;
+
+		// if we have printed enough summaries then do not launch
+		// any more, wait for them to come back in
+		if ( m_printi >= m_docsToGetVisible ) {
+			logf(LOG_DEBUG,"query: got %li summaries. done. "
+			     "waiting on remaining "
+			     "%li to return."
+			     , m_printi
+			     , m_numRequests-m_numReplies);
+			break;
+		}
+
 		// do not double count!
 		//if ( i <= m_lastProcessedi ) continue;
 		// do not repeat for this i
@ -1552,6 +1601,9 @@ bool Msg40::gotSummary ( ) {
 	State0 *st = (State0 *)m_state;

 	// keep socket alive if not streaming. like downloading csv...
+	// this fucks up HTTP replies by inserting a space before the "HTTP"
+	// it does not render properly on the browser...
+	/*
 	long now2 = getTimeLocal();
 	if ( now2 - m_lastHeartbeat >= 10 && ! m_si->m_streamResults &&
 	     // incase socket is closed and recycled for another connection
@ -1561,7 +1613,7 @@ bool Msg40::gotSummary ( ) {
 		log("msg40: sent heartbeat of %li bytes on sd=%li",
 		    (long)n,(long)st->m_socket->m_sd);
 	}
-
+	*/


 	/*
@ -4979,12 +5031,14 @@ static int factCmp ( const void *a, const void *b ) {
 	return 0;
 }

+// . aka NUGGABITS
 // . now make the fast facts from the gigabits and the samples. 
 // . these are sentences containing the query and a gigabit.
+// . sets m_factBuf
 bool Msg40::computeFastFacts ( ) {

 	// skip for now
-	return true;
+	//return true;

 	bool debugGigabits = m_si->m_debugGigabits;

@ -5037,7 +5091,7 @@ bool Msg40::computeFastFacts ( ) {


 	//
-	// store Facts (sentences) into this safebuf (nuggets)
+	// store Facts (sentences) into this safebuf (nuggets)(nuggabits)
 	//
 	char ftmp[100000];
 	SafeBuf factBuf(ftmp,100000);
--- a/PageResults.cpp
+++ b/PageResults.cpp
@ -718,6 +718,219 @@ void gotState ( void *state ){
 }


+// print all sentences containing this gigabit (fast facts) (nuggabits)
+static bool printGigabitContainingSentences ( State0 *st,
+					      SafeBuf *sb , 
+					      Msg40 *msg40 , 
+					      Gigabit *gi , 
+					      SearchInput *si ,
+					      Query *gigabitQuery ) {
+
+	static long s_gigabitCount = 0;
+
+	sb->safePrintf("<nobr><b>");
+	//"<img src=http://search.yippy.com/"
+	//"images/new/button-closed.gif><b>");
+
+	HttpRequest *hr = &st->m_hr;
+
+	// make a new query
+	sb->safePrintf("<a href=\"/search?gigabits=1&q=");
+	sb->urlEncode(gi->m_term,gi->m_termLen);
+	sb->safeMemcpy("+|+",3);
+	char *q = hr->getString("q",NULL,"");
+	sb->urlEncode(q);
+	sb->safePrintf("\">");
+	sb->safeMemcpy(gi->m_term,gi->m_termLen);
+	sb->safePrintf("</a></b>");
+	sb->safePrintf(" <font color=gray size=-1>");
+	long numOff = sb->m_length;
+	sb->safePrintf("      ");//,gi->m_numPages);
+	sb->safePrintf("</font>");
+	sb->safePrintf("</b>");
+	if ( si->m_isAdmin ) 
+		sb->safePrintf("[%.0f]{%li}",
+			      gi->m_gbscore,
+			      gi->m_minPop);
+
+	long revert = sb->length();
+
+	sb->safePrintf("<font color=blue style=align:right;>"
+		      "<a onclick=ccc(%li);>"
+		      , s_gigabitCount 
+		      );
+	long spaceOutOff = sb->length();
+	sb->safePrintf( "%c%c%c",
+		      0xe2,
+		      0x87,
+		      0x93);
+	sb->safePrintf(//"[more]"
+		      "</a></font>");
+	
+
+	sb->safePrintf("</nobr>"); // <br>
+
+	// get facts
+	long numNuggets = 0;
+	long numFacts = msg40->m_factBuf.length() / sizeof(Fact);
+	Fact *facts = (Fact *)msg40->m_factBuf.getBufStart();
+	bool first = true;
+	bool second = false;
+	bool printedSecond = false;
+	//long long lastDocId = -1LL;
+	long saveOffset = 0;
+	for ( long i = 0 ; i < numFacts ; i++ ) {
+		Fact *fi = &facts[i];
+
+		// if printed for a higher scoring gigabit, skip
+		if ( fi->m_printed ) continue;
+
+		// check gigabit match
+		long k; for ( k = 0 ; k < fi->m_numGigabits ; k++ ) 
+			if ( fi->m_gigabitPtrs[k] == gi ) break;
+		// skip this fact/sentence if does not contain gigabit
+		if ( k >= fi->m_numGigabits ) continue;
+
+		// do not print if no period at end
+		char *s = fi->m_fact;
+		char *e = s + fi->m_factLen;
+		if ( e[-1] != '*' ) continue;
+		e--;
+
+	again:
+
+		// first time, print in the single fact div
+		if ( first ) {
+			sb->safePrintf("<div "
+				      "style=\"border:1px lightgray solid;\" "
+				      "id=fd%li>",s_gigabitCount);
+		}
+
+		if ( second ) {
+			sb->safePrintf("<div style=\"max-height:300px;"
+				      "display:none;"
+				      "overflow-x:hidden;"
+				      "overflow-y:auto;"//scroll;"
+				      "border:1px lightgray solid;\" "
+				      "id=sd%li>",s_gigabitCount);
+			printedSecond = true;
+		}
+
+		Msg20Reply *reply = fi->m_reply;
+
+		// ok, print it out
+		if ( ! first && ! second ) {
+			//if ( reply->m_docId != lastDocId ) 
+			sb->safePrintf("<br><br>\n");
+			//else {
+			//	sb->setLength ( saveOffset );
+			//	sb->safePrintf("<br><br>\n");
+			//}
+		}
+		else {
+			sb->safePrintf("<br>");
+		}
+
+
+		numNuggets++;
+
+		// print the fast fact (sentence)
+		//sb->safeMemcpy ( s , e-s );
+
+		// let's highlight with gigabits and query terms
+		SafeBuf tmpBuf;
+		Highlight h;
+		h.set ( &tmpBuf , // print it out here
+			s , // content
+			e - s , // len
+			si->m_queryLangId , // from m_defaultSortLang
+			gigabitQuery , // the gigabit "query" in quotes
+			true , // stemming? -- unused
+			false , // use anchors?
+			NULL , // baseurl
+			"<u>", // front tag
+			"</u>", // back tag
+			0 , // fieldCode
+			0  ); // niceness
+		// now highlight the original query as well but in black bold
+		h.set ( sb , // print it out here
+			tmpBuf.getBufStart() , // content
+			tmpBuf.length() , // len
+			si->m_queryLangId , // from m_defaultSortLang
+			&si->m_q , // the regular query
+			true , // stemming? -- unused
+			false , // use anchors?
+			NULL , // baseurl
+			"<b>" , // front tag
+			"</b>", // back tag
+			0 , // fieldCode
+			0  ); // niceness
+		
+
+		fi->m_printed = 1;
+		saveOffset = sb->length();
+		sb->safePrintf(" <a href=/get?cnsp=0&"
+			      "strip=1&d=%lli>",reply->m_docId);
+		long dlen; char *dom = getDomFast(reply->ptr_ubuf,&dlen);
+		sb->safeMemcpy(dom,dlen);
+		sb->safePrintf("</a>\n");
+		//lastDocId = reply->m_docId;
+
+		if ( first ) {
+			sb->safePrintf("</div>");
+		}
+
+		if ( second ) {
+			second = false;
+		}
+
+		if ( first ) {
+			first = false;
+			second = true;
+			// print first gigabit all over again but in 2nd div
+			goto again;
+		}
+	}
+
+	// we counted the first one twice since we had to throw it into
+	// the hidden div too!
+	if ( numNuggets > 1 ) numNuggets--;
+
+	// do not print the double down arrow if no nuggets printed
+	if ( numNuggets <= 0 ) {
+		sb->m_length = revert;
+		sb->safePrintf("</nobr>");
+	}
+	// just remove down arrow if only 1...
+	else if ( numNuggets == 1 ) {
+		char *dst = sb->getBufStart()+spaceOutOff;
+		dst[0] = ' ';
+		dst[1] = ' ';
+		dst[2] = ' ';
+	}
+	// store the # of nuggets in ()'s like (10 )
+	else {
+		char tmp[10];
+		sprintf(tmp,"(%li)",numNuggets);
+		char *src = tmp;
+		// starting storing digits after "( "
+		char *dst = sb->getBufStart()+numOff;
+		long srcLen = gbstrlen(tmp);
+		if ( srcLen > 5 ) srcLen = 5;
+		for ( long k = 0 ; k < srcLen ; k++ ) 
+			dst[k] = src[k];
+	}
+
+	s_gigabitCount++;
+
+	if ( printedSecond ) {
+		sb->safePrintf("</div>");
+	}
+
+	return true;
+}
+
+/*
 // print all sentences containing this gigabit
 static bool printGigabit ( State0 *st,
 			   SafeBuf *sb , 
@ -757,6 +970,7 @@ static bool printGigabit ( State0 *st,

 	return true;
 }
+*/

 class StateAU {
 public:
@ -1883,9 +2097,49 @@ bool printSearchResultsHeader ( State0 *st ) {
 	Gigabit *gigabits = (Gigabit *)gbuf->getBufStart();
 	//long numCols = 5;
 	//long perRow = numGigabits / numCols;
+
+	if ( numGigabits && si->m_format == FORMAT_HTML )
+		// gigabit unhide function
+		sb->safePrintf (
+				"<script>"
+				"function ccc ( gn ) {\n"
+				"var e = document.getElementById('fd'+gn);\n"
+				"var f = document.getElementById('sd'+gn);\n"
+				"if ( e.style.display == 'none' ){\n"
+				"e.style.display = '';\n"
+				"f.style.display = 'none';\n"
+				"}\n"
+				"else {\n"
+				"e.style.display = 'none';\n"
+				"f.style.display = '';\n"
+				"}\n"
+				"}\n"
+				"</script>\n"
+			       );
+	
 	if ( numGigabits && si->m_format == FORMAT_HTML )
 		sb->safePrintf("<table cellspacing=7 bgcolor=lightgray>"
 			      "<tr><td width=200px; valign=top>");
+
+	Query gigabitQuery;
+	SafeBuf ttt;
+	// limit it to 40 gigabits for now
+	for ( long i = 0 ; i < numGigabits && i < 40 ; i++ ) {
+		Gigabit *gi = &gigabits[i];
+		ttt.pushChar('\"');
+		ttt.safeMemcpy(gi->m_term,gi->m_termLen);
+		ttt.pushChar('\"');
+		ttt.pushChar(' ');
+	}
+	if ( numGigabits > 0 ) 
+		gigabitQuery.set2 ( ttt.getBufStart() ,
+				    si->m_queryLangId ,
+				    true , // queryexpansion?
+				    true );  // usestopwords?
+
+
+
+
 	for ( long i = 0 ; i < numGigabits ; i++ ) {
 		if ( i > 0 && si->m_format == FORMAT_HTML ) 
 			sb->safePrintf("<hr>");
@ -1893,8 +2147,11 @@ bool printSearchResultsHeader ( State0 *st ) {
 		//	sb->safePrintf("</td><td valign=top>");
 		// print all sentences containing this gigabit
 		Gigabit *gi = &gigabits[i];
-		printGigabit ( st,sb , msg40 , gi , si );
-		sb->safePrintf("<br>");
+		//printGigabit ( st,sb , msg40 , gi , si );
+		//sb->safePrintf("<br>");
+		printGigabitContainingSentences(st,sb,msg40,gi,si,
+						&gigabitQuery);
+		sb->safePrintf("<br><br>");
 	}
 	if ( numGigabits && si->m_format == FORMAT_HTML )
 		sb->safePrintf("</td></tr></table>");
--- a/Parms.cpp
+++ b/Parms.cpp
@ -991,9 +991,9 @@ bool Parms::setGigablastRequest ( TcpSocket *socket ,
 	long obj = OBJ_GBREQUEST;

 	//
-	// reset THIS to defaults
+	// reset THIS to defaults. use NULL for cr since mostly for SearchInput
 	//
-	setToDefault ( THIS , obj );
+	setToDefault ( THIS , obj , NULL);

 	// loop through cgi parms
 	for ( long i = 0 ; i < hr->getNumFields() ; i++ ) {
@ -1533,7 +1533,7 @@ bool Parms::printParms2 ( SafeBuf* sb ,
 		page = PAGE_SECURITY;

 	GigablastRequest gr;
-	g_parms.setToDefault ( (char *)&gr , OBJ_GBREQUEST );
+	g_parms.setToDefault ( (char *)&gr , OBJ_GBREQUEST , NULL);

 	// find in parms list
 	for ( long i = 0 ; i < m_numParms ; i++ ) {
@ -2628,11 +2628,15 @@ void Parms::setParm ( char *THIS , Parm *m , long mm , long j , char *s ,
 	float oldVal = 0;
 	float newVal = 0;

-	if ( ! s && m->m_type != TYPE_CHARPTR) {
+	if ( ! s && 
+	     m->m_type != TYPE_CHARPTR && 
+	     m->m_type != TYPE_FILEUPLOADBUTTON && 
+	     m->m_defOff==-1) {
 		s = "0";
 		char *tit = m->m_title;
 		if ( ! tit || ! tit[0] ) tit = m->m_xml;
-		log(LOG_LOGIC,"admin: Parm \"%s\" had NULL default value.",
+		log(LOG_LOGIC,"admin: Parm \"%s\" had NULL default value. "
+		    "Forcing to 0.",
 		    tit);
 		//char *xx = NULL; *xx = 0;
 	}
@ -2916,8 +2920,9 @@ void Parms::setToDefault ( char *THIS , char objType , CollectionRec *argcr ) {
 		if ( m->m_obj != objType ) continue;
 		if ( m->m_obj == OBJ_NONE ) continue;
 		if ( m->m_type == TYPE_COMMENT ) continue;
-		if ( m->m_type == TYPE_FILEUPLOADBUTTON ) 
-			continue;
+		// no, we gotta set GigablastRequest::m_contentFile to NULL
+		//if ( m->m_type == TYPE_FILEUPLOADBUTTON ) 
+		//	continue;
 		if ( m->m_type == TYPE_MONOD2  ) continue;
 		if ( m->m_type == TYPE_MONOM2  ) continue;
 		if ( m->m_type == TYPE_CMD     ) continue;
@ -2958,6 +2963,7 @@ void Parms::setToDefault ( char *THIS , char objType , CollectionRec *argcr ) {
 		if ( m->m_max <= 1 ) {
 			//if ( i == 282 )  // "query" parm
 			//	log("hey");
+			//if ( ! m->m_def ) { char *xx=NULL;*xx=0; }
 			setParm ( THIS , m, i, 0, m->m_def, false/*not enc.*/,
 				  false );
 			//((CollectionRec *)THIS)->m_orig[i] = 1;
@ -5150,6 +5156,7 @@ void Parms::init ( ) {
 	m->m_page  = PAGE_BASIC_SETTINGS;
 	m->m_obj   = OBJ_COLL;
 	m->m_off   = 0;
+	m->m_def   = NULL;
 	m->m_type  = TYPE_FILEUPLOADBUTTON;
 	m->m_flags = PF_NOSAVE | PF_DUP;
 	m++;
@ -6987,6 +6994,21 @@ void Parms::init ( ) {
 	m->m_obj   = OBJ_COLL;
 	m++;

+	m->m_title = "number of reference pages to generate";
+	m->m_desc  = "What is the number of "
+		"reference pages to generate per query? Set to 0 to save "
+		"CPU time.";
+	m->m_cgi   = "snrp";
+	m->m_off  = (char *)&si.m_refs_numToGenerate - y;
+	m->m_type  = TYPE_LONG;
+	m->m_defOff =(char *)&cr.m_refs_numToGenerate - x;
+	m->m_priv  = 0;
+	m->m_smin  = 0;
+	m->m_flags = PF_HIDDEN | PF_NOSAVE;
+	m->m_page  = PAGE_NONE;
+	m->m_obj   = OBJ_SI;
+	m++;
+
 	m->m_title = "number of reference pages to display";
 	m->m_desc  = "What is the number of "
 		"reference pages to display per query?";
@ -8312,6 +8334,7 @@ void Parms::init ( ) {
 	m->m_cgi   = "iw";
 	m->m_page  = PAGE_NONE;
 	m->m_obj   = OBJ_SI;
+	m->m_def   = "200";
 	m++;

 	m->m_title = "image height";
@ -8322,6 +8345,7 @@ void Parms::init ( ) {
 	m->m_cgi   = "ih";
 	m->m_page  = PAGE_NONE;
 	m->m_obj   = OBJ_SI;
+	m->m_def   = "200";
 	m++;

 	// m->m_title = "password";
@ -13352,6 +13376,7 @@ void Parms::init ( ) {
 	m->m_cgi   = "urls";
 	m->m_page  = PAGE_ADDURL2;
 	m->m_obj   = OBJ_NONE;
+	m->m_def   = NULL;
 	m->m_type  = TYPE_FILEUPLOADBUTTON;
 	m++;
 	*/
@ -14152,7 +14177,7 @@ void Parms::init ( ) {
 	m->m_cgi   = "dsrt";
 	m->m_off   = (char *)&cr.m_docsToScanForTopics - x;
 	m->m_type  = TYPE_LONG;
-	m->m_def   = "300";
+	m->m_def   = "30";
 	m->m_flags = PF_API;
 	m->m_page  = PAGE_SEARCH;
 	m->m_obj   = OBJ_COLL;
--- a/Parms.h
+++ b/Parms.h
@ -368,7 +368,7 @@ class Parms {
 		       bool isHtmlEncoded , bool fromRequest ) ;
 	
 	void setToDefault ( char *THIS , char objType ,
-			    CollectionRec *argcr = NULL ) ;
+			    CollectionRec *argcr );//= NULL ) ;

 	bool setFromFile ( void *THIS        , 
 			   char *filename    , 
--- a/SearchInput.cpp
+++ b/SearchInput.cpp
@ -287,7 +287,7 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) { //, Query *q ) {
 	}

 	// and set from the http request. will set m_coll, etc.
-	g_parms.setToDefault ( (char *)this , OBJ_SI );
+	g_parms.setToDefault ( (char *)this , OBJ_SI , cr );


 	///////
--- a/Sections.cpp
+++ b/Sections.cpp
@ -17162,6 +17162,11 @@ bool Sectiondb::init ( ) {
 	// . just hard-code 5MB for now
 	long pcmem = 5000000; // = g_conf.m_sectiondbMaxDiskPageCacheMem;

+	// do not use for now i think we use posdb and store the 32bit
+	// val in the key for facet type stuff
+	pcmem = 0;
+	maxTreeMem = 100000;
+	maxTreeNodes = 1000;

 	/*
 	key128_t k;
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -2090,8 +2090,8 @@ void XmlDoc::getRevisedSpiderRequest ( SpiderRequest *revisedReq ) {
 	// this must be valid for us of course
 	if ( ! m_firstIpValid ) { char *xx=NULL;*xx=0; }

-	// wtf?
-	if ( m_firstIp == 0 || m_firstIp == -1 ) { char *xx=NULL;*xx=0; }
+	// wtf? it might be invalid!!! parent caller will handle it...
+	//if ( m_firstIp == 0 || m_firstIp == -1 ) { char *xx=NULL;*xx=0; }

 	// store the real ip in there now
 	revisedReq->m_firstIp = m_firstIp;
@ -22122,8 +22122,6 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
 		setStatus("adding spider request");
 		// checkpoint
 		saved = m_p;
-		// copy it
-		*m_p++ = RDB_SPIDERDB;
 		// store it here
 		SpiderRequest revisedReq;
 		// this fills it in
@ -22141,6 +22139,8 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
 			goto skipNewAdd2;
 		}

+		// copy it
+		*m_p++ = RDB_SPIDERDB;
 		// store it back
 		memcpy ( m_p , &revisedReq , revisedReq.getRecSize() );
 		// skip over it
@ -28530,15 +28530,15 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
 		reply->size_content = size_utf8Content;
 	}

-	if ( m_req->m_getSectionVotingInfo && m_tmpBuf3.getCapacity() <= 0 ) {
-		Sections *ss = getSections();
-		if ( ! ss || ss == (void *)-1) return (Msg20Reply *)ss;
-		// will at least store a \0 in there, but will not count
-		// as part of the m_tmpBuf.length()
-	        ss->printVotingInfoInJSON ( &m_tmpBuf3 );
-		reply-> ptr_sectionVotingInfo = m_tmpBuf3.getBufStart();
-		reply->size_sectionVotingInfo = m_tmpBuf3.length() + 1;
-	}
+	// if ( m_req->m_getSectionVotingInfo && m_tmpBuf3.getCapacity() <=0) {
+	// 	Sections *ss = getSections();
+	// 	if ( ! ss || ss == (void *)-1) return (Msg20Reply *)ss;
+	// 	// will at least store a \0 in there, but will not count
+	// 	// as part of the m_tmpBuf.length()
+	//         ss->printVotingInfoInJSON ( &m_tmpBuf3 );
+	// 	reply-> ptr_sectionVotingInfo = m_tmpBuf3.getBufStart();
+	// 	reply->size_sectionVotingInfo = m_tmpBuf3.length() + 1;
+	// }

 	// breathe
 	QUICKPOLL ( m_niceness );
@ -29697,7 +29697,7 @@ SafeBuf *XmlDoc::getSampleForGigabits ( ) {
 		      e[-2] == '!' ) )
 			endsInPeriod = true;

-		long off = reply.length();
+		//long off = reply.length();

 		if ( ! reply.safePrintFilterTagsAndLines ( p , e-p ,false ) )
 			return NULL;
@ -29734,8 +29734,9 @@ SafeBuf *XmlDoc::getSampleForGigabits ( ) {
 		*pc = '\0';

 		// debug
-		char *x = reply.getBufStart() + off;
-		log("fastfact: %s",x);
+		//char *x = reply.getBufStart() + off;
+		// turn off fast fact debug for now
+		//log("fastfact: fastfact: %s",x);
 		// revert back to |
 		*pc = '|';