Merge branch 'diffbot' of github.com:gigablast/open-source-search-engine into diffbot

2014-01-30 13:11:48 -08:00 · 2014-01-30 13:11:48 -08:00 · 40f373c9e0
commit 40f373c9e0
parent 95a47a776e 8bdb9d1a3e
113 changed files with 5815 additions and 3112 deletions
--- a/AutoBan.cpp
+++ b/AutoBan.cpp
@ -849,9 +849,8 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
 		setCodesFromConf();
 	}

-	sb.safePrintf("\n<br><br><table width=100%% bgcolor=#%s "
-		      "cellpadding=4 border=1>\n", 
-		      BABY_BLUE);
+	sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE);
+
 	getCalendarFromMs((now - m_codeResetTime) * 1000,
 			  &days, 
 			  &hours, 
@ -1134,9 +1133,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {



-	sb.safePrintf("\n<table width=100%% bgcolor=#%s "
-		      "cellpadding=4 border=1>\n", 
-		      BABY_BLUE);
+	sb.safePrintf("\n<table %s>\n",TABLE_STYLE);
 	sb.safePrintf("<tr><td colspan=2 bgcolor=#%s>"
 		      "<center><b>Add IPs</b></center></td></tr>", 
 		      DARK_BLUE);
@ -1174,9 +1171,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {

 	/////////////////////////////////////////////////////////////////////

-	sb.safePrintf("\n<table width=100%% bgcolor=#%s "
-		      "cellpadding=4 border=1>\n", 
-		      BABY_BLUE);
+	sb.safePrintf("\n<table %s>\n",TABLE_STYLE);

 	sb.safePrintf("<tr><td colspan=3 bgcolor=#%s>"
 		      "<center><b>Watched Ips</b></center></td></tr>", 
@ -1315,9 +1310,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {

 	// MDW moved from here

-	sb.safePrintf("\n<br><br><table width=100%% bgcolor=#%s "
-		      "cellpadding=4 border=1>\n", 
-		      BABY_BLUE);
+	sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE);

 	sb.safePrintf("<tr><td colspan=5 bgcolor=#%s>"
 		      "<center><b>Control Panel</b></center></td></tr>", 
@ -1362,9 +1355,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
 	}
 	

-	sb.safePrintf("\n<br><br><table width=100%% bgcolor=#%s "
-		      "cellpadding=4 border=1>\n", 
-		      BABY_BLUE);
+	sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE);

 	sb.safePrintf("<tr><td colspan=6 bgcolor=#%s>"
 		      "<center><b>Queries Today</b></center></td></tr>", 
--- a/BigFile.cpp
+++ b/BigFile.cpp
@ -569,7 +569,9 @@ bool BigFile::readwrite ( void         *buf      ,
 	}
 	// otherwise, thread spawn failed, do it blocking then
 	g_errno = 0;
-	if ( ! g_threads.m_disabled ) {
+	// if threads are manually disabled don't print these msgs because
+	// we redbox the fact above the controls in Pages.cpp
+	if ( g_conf.m_useThreads && ! g_threads.m_disabled ) {
 		static long s_lastTime = 0;
 		long now = getTime();
 		if ( now - s_lastTime >= 1 ) {
--- a/Blaster.cpp
+++ b/Blaster.cpp
@ -651,7 +651,10 @@ void Blaster::gotDoc2 ( void *state, TcpSocket *s){
 		     false,
 		     0,
 		     false,
-		     TITLEREC_CURRENT_VERSION)){
+		      TITLEREC_CURRENT_VERSION ,
+		      true , // set parents
+		      0 , // niceness 
+		      CT_XML )){ // content type
 		log(LOG_WARN,"blaster: Couldn't set XML1 Class in gotDoc2");
 	}
 	Links links1;
@ -679,7 +682,10 @@ void Blaster::gotDoc2 ( void *state, TcpSocket *s){
 		     false,
 		     0,
 		     false,
-		     TITLEREC_CURRENT_VERSION)){
+		      TITLEREC_CURRENT_VERSION,
+		      true , // setparents
+		      0 , // niceness
+		      CT_XML )){
 		log(LOG_WARN,"blaster: Couldn't set XML2 Class in gotDoc2");
 	}
 	Links links2;
@ -1170,7 +1176,10 @@ void Blaster::gotDoc4 ( void *state, TcpSocket *s){
 		     false,
 		     0,
 		     false,
-		     TITLEREC_CURRENT_VERSION)){
+		     TITLEREC_CURRENT_VERSION,
+		     true, // setparents
+		     0, // niceness
+		     CT_XML )){
 		log(LOG_WARN,"blaster: Couldn't set XML Class in gotDoc4");
 	}
 	Links links;
--- a/Cachedb.cpp
+++ b/Cachedb.cpp
@ -71,9 +71,9 @@ bool Cachedb::init ( ) {
 		return false;

 	// add the base since it is a collectionless rdb
-	return m_rdb.addColl ( NULL );
+	return m_rdb.addRdbBase1 ( NULL );
 }
-
+/*
 bool Cachedb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -85,7 +85,7 @@ bool Cachedb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Cachedb::verify ( char *coll ) {
 	// coll is NULL here methinks
 	log ( LOG_DEBUG, "db: Verifying %s...",m_name );
--- a/Catdb.cpp
+++ b/Catdb.cpp
@ -84,7 +84,7 @@ bool Catdb::init (  ) {
 	// Rdb::getBase(collnum_t) will return. however, for collectionless
 	// rdb databases we set Rdb::m_collectionlessBase special here.
 	// This was in Rdb.cpp::init().
-	return m_rdb.addColl ( NULL );
+	return m_rdb.addRdbBase1 ( NULL );
 }

 bool Catdb::init2 ( long treeMem ) {
@ -112,6 +112,7 @@ bool Catdb::init2 ( long treeMem ) {
 // end support for "cache recs"
 //	

+/*
 bool Catdb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	// verify
@ -123,6 +124,7 @@ bool Catdb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
+*/

 bool Catdb::verify ( char *coll ) {
 	char *rdbName = "Catdb";
--- a/Clusterdb.cpp
+++ b/Clusterdb.cpp
@ -337,7 +337,7 @@ bool Clusterdb::init2 ( long treeMem ) {
 			    12            ,     // key size
 			    true          ); // bias disk page cache
 }
-
+/*
 bool Clusterdb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -349,7 +349,7 @@ bool Clusterdb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Clusterdb::verify ( char *coll ) {
 	log ( LOG_DEBUG, "db: Verifying Clusterdb for coll %s...", coll );
 	g_threads.disableThreads();
--- a/Collectiondb.cpp
+++ b/Collectiondb.cpp
@ -34,7 +34,13 @@ Collectiondb g_collectiondb;
 Collectiondb::Collectiondb ( ) {
 	m_numRecs = 0;
 	m_numRecsUsed = 0;
-	m_lastUpdateTime = 0LL;
+	//m_lastUpdateTime = 0LL;
+	m_needsSave = false;
+	// sanity
+	if ( RDB_END2 >= RDB_END ) return;
+	log("db: increase RDB_END2 to at least %li in "
+	    "Collectiondb.h",(long)RDB_END);
+	char *xx=NULL;*xx=0;
 }

 // reset rdb
@ -51,6 +57,7 @@ void Collectiondb::reset() {
 	g_collTable.reset();
 }

+/*
 bool Collectiondb::init ( bool isDump ) {
 	reset();
 	if ( g_isYippy ) return true;
@ -77,6 +84,7 @@ bool Collectiondb::init ( bool isDump ) {
 	// otherwise, true, even if reloadList() blocked
 	return true;
 }
+*/

 // . save to disk
 // . returns false if blocked, true otherwise
@ -95,7 +103,12 @@ bool Collectiondb::save ( ) {
 	return true;
 }

-bool Collectiondb::load ( bool isDump ) {
+///////////
+//
+// fill up our m_recs[] array based on the coll.*.*/coll.conf files
+//
+///////////
+bool Collectiondb::loadAllCollRecs ( ) {
 	char dname[1024];
 	// MDW: sprintf ( dname , "%s/collections/" , g_hostdb.m_dir );
 	sprintf ( dname , "%s" , g_hostdb.m_dir );
@ -104,7 +117,7 @@ bool Collectiondb::load ( bool isDump ) {
 	if ( ! d.open ()) return log("admin: Could not load collection config "
 				     "files.");
 	// note it
-	log(LOG_INFO,"db: Loading collection config files.");
+	//log(LOG_INFO,"db: loading collection config files.");
 	// . scan through all subdirs in the collections dir
 	// . they should be like, "coll.main/" and "coll.mycollection/"
 	char *f;
@ -122,16 +135,23 @@ bool Collectiondb::load ( bool isDump ) {
 		// get collnum
 		collnum_t collnum = atol ( pp + 1 );
 		// add it
-		if ( ! addExistingColl ( coll , collnum ,isDump ) )
+		if ( ! addExistingColl ( coll , collnum ) )
 			return false;
 	}
 	// note it
-	log(LOG_INFO,"db: Loaded data for %li collections. Ranging from "
-	    "collection #0 to #%li.",m_numRecsUsed,m_numRecs-1);
+	//log(LOG_INFO,"db: Loaded data for %li collections. Ranging from "
+	//    "collection #0 to #%li.",m_numRecsUsed,m_numRecs-1);
 	// update the time
-	updateTime();
+	//updateTime();
 	// don't clean the tree if just dumpin
-	if ( isDump ) return true;
+	//if ( isDump ) return true;
+	return true;
+}
+
+// after we've initialized all rdbs in main.cpp call this to clean out
+// our rdb trees
+bool Collectiondb::cleanTrees ( ) {
+
 	// remove any nodes with illegal collnums
 	Rdb *r;
 	//r = g_indexdb.getRdb();
@ -158,7 +178,7 @@ bool Collectiondb::load ( bool isDump ) {
 	// success
 	return true;
 }
-
+/*
 void Collectiondb::updateTime() {
 	// get time now in milliseconds
 	long long newTime = gettimeofdayInMilliseconds();
@ -169,14 +189,13 @@ void Collectiondb::updateTime() {
 	// we need a save
 	m_needsSave = true;
 }
+*/

 #include "Statsdb.h"
 #include "Cachedb.h"
 #include "Syncdb.h"

-bool Collectiondb::addExistingColl ( char *coll, 
-				     collnum_t collnum ,
-				     bool isDump ) {
+bool Collectiondb::addExistingColl ( char *coll, collnum_t collnum ) {

 	long i = collnum;

@ -221,7 +240,7 @@ bool Collectiondb::addExistingColl ( char *coll,
 			   "\"%s\".",coll);
 	}

-	if ( ! registerCollRec ( cr , isDump , false ) ) return false;
+	if ( ! registerCollRec ( cr , false ) ) return false;

 	// we need to compile the regular expressions or update the url
 	// filters with new logic that maps crawlbot parms to url filters
@ -454,6 +473,16 @@ bool Collectiondb::addNewColl ( char *coll ,
 	memset ( &cr->m_localCrawlInfo , 0 , sizeof(CrawlInfo) );
 	memset ( &cr->m_globalCrawlInfo , 0 , sizeof(CrawlInfo) );

+	// note that
+	log("colldb: initial revival for %s",cr->m_coll);
+
+	// . assume we got some urls ready to spider
+	// . Spider.cpp will wait SPIDER_DONE_TIME seconds and if it has no
+	//   urls it spidered in that time these will get set to 0 and it
+	//   will send out an email alert if m_sentCrawlDoneAlert is not true.
+	cr->m_localCrawlInfo.m_hasUrlsReadyToSpider = 1;
+	cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider = 1;
+
 	// set some defaults. max spiders for all priorities in this 
 	// collection. NO, default is in Parms.cpp.
 	//cr->m_maxNumSpiders = 10;
@ -496,46 +525,66 @@ bool Collectiondb::addNewColl ( char *coll ,
 	}


-	return registerCollRec ( cr , false , true );
+	if ( ! registerCollRec ( cr , true ) )
+		return false;
+
+	// add the rdbbases for this coll, CollectionRec::m_bases[]
+	if ( ! addRdbBasesForCollRec ( cr ) )
+		return false;
+
+	return true;
 }

 // . called only by addNewColl() and by addExistingColl()
-bool Collectiondb::registerCollRec ( CollectionRec *cr ,
-				     bool isDump ,
-				     bool isNew ) {
-
+bool Collectiondb::registerCollRec ( CollectionRec *cr ,  bool isNew ) {

 	// add m_recs[] and to hashtable
 	if ( ! setRecPtr ( cr->m_collnum , cr ) )
 		return false;

-	bool verify = true;
+	return true;
+}
+
+bool Collectiondb::addRdbBaseToAllRdbsForEachCollRec ( ) {
+	for ( long i = 0 ; i < m_numRecs ; i++ ) {
+		CollectionRec *cr = m_recs[i];
+		if ( ! cr ) continue;
+		// add rdb base files etc. for it
+		addRdbBasesForCollRec ( cr );
+	}
+	return true;
+}
+
+bool Collectiondb::addRdbBasesForCollRec ( CollectionRec *cr ) {

 	char *coll = cr->m_coll;

+	//////
+	//
 	// if we are doing a dump from the command line, skip this stuff
-	if ( isDump ) return true;
-
-
-	if ( isNew ) verify = false;
-
+	//
+	//////
+	if ( g_dumpMode ) return true;

 	// tell rdbs to add one, too
-	//if ( ! g_indexdb.addColl    ( coll, verify ) ) goto hadError;
-	if ( ! g_posdb.addColl    ( coll, verify ) ) goto hadError;
-	//if ( ! g_datedb.addColl     ( coll, verify ) ) goto hadError;
+	//if ( ! g_indexdb.getRdb()->addRdbBase1    ( coll ) ) goto hadError;
+	if ( ! g_posdb.getRdb()->addRdbBase1        ( coll ) ) goto hadError;
+	//if ( ! g_datedb.getRdb()->addRdbBase1     ( coll ) ) goto hadError;
 	
-	if ( ! g_titledb.addColl    ( coll, verify ) ) goto hadError;
-	//if ( ! g_revdb.addColl      ( coll, verify ) ) goto hadError;
-	//if ( ! g_sectiondb.addColl  ( coll, verify ) ) goto hadError;
-	if ( ! g_tagdb.addColl      ( coll, verify ) ) goto hadError;
-	//if ( ! g_catdb.addColl      ( coll, verify ) ) goto hadError;
-	//if ( ! g_checksumdb.addColl ( coll, verify ) ) goto hadError;
-	//if ( ! g_tfndb.addColl      ( coll, verify ) ) goto hadError;
-	if ( ! g_clusterdb.addColl  ( coll, verify ) ) goto hadError;
-	if ( ! g_linkdb.addColl     ( coll, verify ) ) goto hadError;
-	if ( ! g_spiderdb.addColl   ( coll, verify ) ) goto hadError;
-	if ( ! g_doledb.addColl     ( coll, verify ) ) goto hadError;
+	if ( ! g_titledb.getRdb()->addRdbBase1      ( coll ) ) goto hadError;
+	//if ( ! g_revdb.getRdb()->addRdbBase1      ( coll ) ) goto hadError;
+	//if ( ! g_sectiondb.getRdb()->addRdbBase1  ( coll ) ) goto hadError;
+	if ( ! g_tagdb.getRdb()->addRdbBase1        ( coll ) ) goto hadError;
+	//if ( ! g_catdb.getRdb()->addRdbBase1      ( coll ) ) goto hadError;
+	//if ( ! g_checksumdb.getRdb()->addRdbBase1 ( coll ) ) goto hadError;
+	//if ( ! g_tfndb.getRdb()->addRdbBase1      ( coll ) ) goto hadError;
+	if ( ! g_clusterdb.getRdb()->addRdbBase1    ( coll ) ) goto hadError;
+	if ( ! g_linkdb.getRdb()->addRdbBase1       ( coll ) ) goto hadError;
+	if ( ! g_spiderdb.getRdb()->addRdbBase1     ( coll ) ) goto hadError;
+	if ( ! g_doledb.getRdb()->addRdbBase1       ( coll ) ) goto hadError;
+
+	// now clean the trees
+	cleanTrees();

 	// debug message
 	//log ( LOG_INFO, "db: verified collection \"%s\" (%li).",
@ -637,6 +686,22 @@ bool Collectiondb::deleteRec ( char *coll , WaitEntry *we ) {
 }
 */

+// if there is an outstanding disk read thread or merge thread then
+// Spider.cpp will handle the delete in the callback.
+void Collectiondb::deleteSpiderColl ( SpiderColl *sc ) {
+
+	sc->m_deleteMyself = true;
+
+	// if not currently being accessed nuke it now
+	if ( ! sc->m_msg5.m_waitingForList &&
+	     ! sc->m_msg5b.m_waitingForList &&
+	     ! sc->m_msg1.m_mcast.m_inUse ) {
+		mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		delete ( sc );
+		return;
+	}
+}
+
 bool Collectiondb::deleteRec2 ( collnum_t collnum ) { //, WaitEntry *we ) {
 	// do not allow this if in repair mode
 	if ( g_repairMode > 0 ) {
@ -724,10 +789,14 @@ bool Collectiondb::deleteRec2 ( collnum_t collnum ) { //, WaitEntry *we ) {
 	SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(collnum);
 	if ( sc ) {
 		// remove locks from lock table:
-		sc->clear();
+		sc->clearLocks();
 		//sc->m_collnum = newCollnum;
-		sc->reset();
-		mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		//sc->reset();
+		// this will put it on "death row" so it will be deleted
+		// once Msg5::m_waitingForList/Merge is NULL
+		deleteSpiderColl ( sc );
+		//mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		//delete ( sc );
 		cr->m_spiderColl = NULL;
 	}

@ -872,7 +941,7 @@ bool Collectiondb::setRecPtr ( collnum_t collnum , CollectionRec *cr ) {
 	}

 	// update the time
-	updateTime();
+	//updateTime();

 	return true;
 }
@ -926,8 +995,19 @@ bool Collectiondb::resetColl2( collnum_t oldCollnum,
 	// reset spider info
 	SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(oldCollnum);
 	if ( sc ) {
-		sc->clear();
-		sc->m_collnum = newCollnum;
+		// remove locks from lock table:
+		sc->clearLocks();
+		// don't do this anymore, just nuke it in case
+		// m_populatingDoledb was true etc. there are too many
+		// flags to worry about
+		//sc->m_collnum = newCollnum;
+		//sc->reset();
+		// this will put it on "death row" so it will be deleted
+		// once Msg5::m_waitingForList/Merge is NULL
+		deleteSpiderColl ( sc );
+		//mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		//delete ( sc );
+		cr->m_spiderColl = NULL;
 	}

 	// reset spider round
@ -1052,7 +1132,7 @@ bool addCollToTable ( char *coll , collnum_t collnum ) {


 // get coll rec specified in the HTTP request
-CollectionRec *Collectiondb::getRec ( HttpRequest *r ) {
+CollectionRec *Collectiondb::getRec ( HttpRequest *r , bool useDefaultRec ) {
 	char *coll = r->getString ( "c" );
 	if ( coll && ! coll[0] ) coll = NULL;
 	// maybe it is crawlbot?
@ -1067,6 +1147,18 @@ CollectionRec *Collectiondb::getRec ( HttpRequest *r ) {
 		snprintf(tmp,MAX_COLL_LEN,"%s-%s",token,name);
 		coll = tmp;
 	}
+
+	// default to main first
+	if ( ! coll && useDefaultRec ) {
+		CollectionRec *cr = g_collectiondb.getRec("main");
+		if ( cr ) return cr;
+	}
+
+	// try next in line
+	if ( ! coll && useDefaultRec ) {
+		return getFirstRec ();
+	}
+
 	// give up?
 	if ( ! coll ) return NULL;
 	//if ( ! coll || ! coll[0] ) coll = g_conf.m_defaultColl;
@ -1296,7 +1388,7 @@ CollectionRec::CollectionRec() {
 	//m_spiderStatusMsg = NULL;
 	// for Url::getSite()
 	m_updateSiteRulesTable = 1;
-	m_lastUpdateTime = 0LL;
+	//m_lastUpdateTime = 0LL;
 	m_clickNScrollEnabled = false;
 	// inits for sortbydatetable
 	m_inProgress = false;
@ -1359,6 +1451,10 @@ void CollectionRec::setToDefaults ( ) {

 void CollectionRec::reset() {

+	// . grows dynamically
+	// . setting to 0 buckets should never have error
+	//m_pageCountTable.set ( 4,4,0,NULL,0,false,MAX_NICENESS,"pctbl" );
+
 	// regex_t types
 	if ( m_hasucr ) regfree ( &m_ucr );
 	if ( m_hasupr ) regfree ( &m_upr );
@ -1378,6 +1474,27 @@ void CollectionRec::reset() {
 	     rdb->resetBase ( m_collnum );
 	}

+	for ( long i = 0 ; i < g_process.m_numRdbs ; i++ ) {
+		RdbBase *base = m_bases[i];
+		if ( ! base ) continue;
+		mdelete (base, sizeof(RdbBase), "Rdb Coll");
+		delete  (base);
+	}
+
+	SpiderColl *sc = m_spiderColl;
+	// if never made one, we are done
+	if ( ! sc ) return;
+
+	// spider coll also!
+	sc->m_deleteMyself = true;
+
+	// if not currently being accessed nuke it now
+	if ( ! sc->m_msg5.m_waitingForList &&
+	     ! sc->m_msg5b.m_waitingForList &&
+	     ! sc->m_msg1.m_mcast.m_inUse ) {
+		mdelete ( sc, sizeof(SpiderColl),"nukecr2");
+		delete ( sc );
+	}
 }

 CollectionRec *g_cr = NULL;
@ -1404,7 +1521,8 @@ bool CollectionRec::load ( char *coll , long i ) {
 	m_collLen = gbstrlen ( coll );
 	strcpy ( m_coll , coll );

-	log(LOG_INFO,"db: loading data for %s",coll);
+	log(LOG_INFO,"db: loading conf for collection %s (%li)",coll,
+	    (long)m_collnum);

 	// collection name HACK for backwards compatibility
 	//if ( strcmp ( coll , "main" ) == 0 ) {
@ -1440,6 +1558,43 @@ bool CollectionRec::load ( char *coll , long i ) {
 		//m_localCrawlInfo.setFromSafeBuf(&sb);
 		// it is binary now
 		memcpy ( &m_localCrawlInfo , sb.getBufStart(),sb.length() );
+
+	
+	log("coll: loaded %s (%li) local hasurlsready=%li",
+	    m_coll,
+	    (long)m_collnum,
+	    (long)m_localCrawlInfo.m_hasUrlsReadyToSpider);
+
+
+	// we introduced the this round counts, so don't start them at 0!!
+	if ( m_spiderRoundNum == 0 &&
+	     m_localCrawlInfo.m_pageDownloadSuccessesThisRound <
+	     m_localCrawlInfo.m_pageDownloadSuccesses ) {
+		log("coll: fixing process count this round for %s",m_coll);
+		m_localCrawlInfo.m_pageDownloadSuccessesThisRound =
+			m_localCrawlInfo.m_pageDownloadSuccesses;
+	}
+
+	// we introduced the this round counts, so don't start them at 0!!
+	if ( m_spiderRoundNum == 0 &&
+	     m_localCrawlInfo.m_pageProcessSuccessesThisRound <
+	     m_localCrawlInfo.m_pageProcessSuccesses ) {
+		log("coll: fixing process count this round for %s",m_coll);
+		m_localCrawlInfo.m_pageProcessSuccessesThisRound =
+			m_localCrawlInfo.m_pageProcessSuccesses;
+	}
+
+	// fix from old bug that was fixed
+	//if ( m_spiderRoundNum == 0 &&
+	//     m_collectiveRespiderFrequency > 0.0 &&
+	//     m_localCrawlInfo.m_sentCrawlDoneAlert ) {
+	//	log("coll: bug fix: resending email alert for coll %s (%li) "
+	//	    "of respider freq %f",m_coll,(long)m_collnum,
+	//	    m_collectiveRespiderFrequency);
+	//	m_localCrawlInfo.m_sentCrawlDoneAlert = false;
+	//}
+
+
 	// LOAD GLOBAL
 	snprintf ( tmp1 , 1023, "%scoll.%s.%li/globalcrawlinfo.dat",
 		  g_hostdb.m_dir , m_coll , (long)m_collnum );
@ -1451,20 +1606,23 @@ bool CollectionRec::load ( char *coll , long i ) {
 		// it is binary now
 		memcpy ( &m_globalCrawlInfo , sb.getBufStart(),sb.length() );

+	log("coll: loaded %s (%li) global hasurlsready=%li",
+	    m_coll,
+	    (long)m_collnum,
+	    (long)m_globalCrawlInfo.m_hasUrlsReadyToSpider);
+	
+
 	////////////
 	//
 	// PAGE COUNT TABLE for doing quotas in url filters
 	//
 	/////////////
-	// . grows dynamically
-	// . setting to 0 buckets should never have error
-	m_pageCountTable.set ( 4,4,0,NULL,0,false,MAX_NICENESS,"pctbl" );
 	// log it up if there on disk
-	snprintf ( tmp1 , 1023, "/coll.%s.%li/pagecounts.dat",
-		   m_coll , (long)m_collnum );
-	if ( ! m_pageCountTable.load ( g_hostdb.m_dir , tmp1 ) && g_errno )
-		log("db: failed to load page count table: %s",
-		    mstrerror(g_errno));
+	//snprintf ( tmp1 , 1023, "/coll.%s.%li/pagecounts.dat",
+	//	   m_coll , (long)m_collnum );
+	//if ( ! m_pageCountTable.load ( g_hostdb.m_dir , tmp1 ) && g_errno )
+	//	log("db: failed to load page count table: %s",
+	//	    mstrerror(g_errno));

 	// ignore errors i guess
 	g_errno = 0;
@ -1619,11 +1777,11 @@ void CollectionRec::setUrlFiltersToDefaults ( ) {
 	m_spiderIpWaits[n] = 1000;
 	m_numRegExs5++;

-	m_spiderIpMaxSpiders[n] = 1;
+	m_spiderIpMaxSpiders[n] = 7;
 	m_numRegExs6++;

-	m_spidersEnabled[n] = 1;
-	m_numRegExs7++;
+	//m_spidersEnabled[n] = 1;
+	//m_numRegExs7++;

 	m_harvestLinks[n] = 1;
 	m_numRegExs8++;
@ -1724,19 +1882,24 @@ bool CollectionRec::save ( ) {
 		    tmp,mstrerror(g_errno));
 		g_errno = 0;
 	}
-	
-	// save page count table which has # of pages indexed per 
-	// subdomain/site and firstip for doing quotas in url filters table
-	snprintf ( tmp , 1023, "coll.%s.%li/pagecounts.dat",
-		   m_coll , (long)m_collnum );
-	if ( ! m_pageCountTable.save ( g_hostdb.m_dir , tmp ) ) {
-		log("db: failed to save file %s : %s",tmp,mstrerror(g_errno));
-		g_errno = 0;
-	}
-

 	// do not need a save now
 	m_needsSave = false;
+
+	// waiting tree is saved in SpiderCache::save() called by Process.cpp
+	//SpiderColl *sc = m_spiderColl;
+	//if ( ! sc ) return true;
+
+	// save page count table which has # of pages indexed per 
+	// subdomain/site and firstip for doing quotas in url filters table
+	//snprintf ( tmp , 1023, "coll.%s.%li/pagecounts.dat",
+	//	   m_coll , (long)m_collnum );
+	//if ( ! m_pageCountTable.save ( g_hostdb.m_dir , tmp ) ) {
+	//	log("db: failed to save file %s : %s",tmp,mstrerror(g_errno));
+	//	g_errno = 0;
+	//}
+
+
 	return true;
 }

@ -1937,10 +2100,10 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 	for ( long i = 0 ; i < MAX_FILTERS ; i++ ) {
 		m_regExs[i].purge();
 		m_spiderPriorities[i] = 0;
-		m_maxSpidersPerRule [i] = 10;
+		m_maxSpidersPerRule [i] = 100;
 		m_spiderIpWaits     [i] = wait;
 		m_spiderIpMaxSpiders[i] = 7; // keep it respectful
-		m_spidersEnabled    [i] = 1;
+		//m_spidersEnabled    [i] = 1;
 		m_spiderFreqs       [i] =m_collectiveRespiderFrequency;
 		//m_spiderDiffbotApiUrl[i].purge();
 		m_harvestLinks[i] = true;
@ -1961,6 +2124,24 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 		i++;
 	}

+	// and for docs that have errors respider once every 5 hours
+	m_regExs[i].set("errorcount==1");
+	m_spiderPriorities   [i] = 40;
+	m_spiderFreqs        [i] = 0.001; // 86 seconds
+	i++;
+
+	// and for docs that have errors respider once every 5 hours
+	m_regExs[i].set("errorcount==2");
+	m_spiderPriorities   [i] = 40;
+	m_spiderFreqs        [i] = 0.1; // 2.4 hrs
+	i++;
+
+	// excessive errors? (tcp/dns timed out, etc.) retry once per month?
+	m_regExs[i].set("errorcount>=3");
+	m_spiderPriorities   [i] = 30;
+	m_spiderFreqs        [i] = 30; // 30 days
+	i++;
+
 	// 3rd rule for respidering
 	if ( m_collectiveRespiderFrequency > 0.0 ) {
 		m_regExs[i].set("lastspidertime>={roundstart}");
@ -1968,7 +2149,11 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 		m_spiderPriorities   [i] = 10;
 		// just turn off spidering. if we were to set priority to
 		// filtered it would be removed from index!
-		m_spidersEnabled     [i] = 0;
+		//m_spidersEnabled     [i] = 0;
+		m_maxSpidersPerRule[i] = 0;
+		// temp hack so it processes in xmldoc.cpp::getUrlFilterNum()
+		// which has been obsoleted, but we are running old code now!
+		//m_spiderDiffbotApiUrl[i].set ( api );
 		i++;
 	}
 	// if collectiverespiderfreq is 0 or less then do not RE-spider
@ -1981,22 +2166,14 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 		m_spiderPriorities   [i] = 10;
 		// just turn off spidering. if we were to set priority to
 		// filtered it would be removed from index!
-		m_spidersEnabled     [i] = 0;
+		//m_spidersEnabled     [i] = 0;
+		m_maxSpidersPerRule[i] = 0;
+		// temp hack so it processes in xmldoc.cpp::getUrlFilterNum()
+		// which has been obsoleted, but we are running old code now!
+		//m_spiderDiffbotApiUrl[i].set ( api );
 		i++;
 	}

-	// and for docs that have errors respider once every 5 hours
-	m_regExs[i].set("errorcount>0 && errcount<3");
-	m_spiderPriorities   [i] = 40;
-	m_spiderFreqs        [i] = 0.2; // half a day
-	i++;
-
-	// excessive errors? (tcp/dns timed out, etc.) retry once per month?
-	m_regExs[i].set("errorcount>=3");
-	m_spiderPriorities   [i] = 30;
-	m_spiderFreqs        [i] = 30; // 30 days
-	i++;
-
 	// url crawl and process pattern
 	if ( ucp && upp ) {
 		m_regExs[i].set("matchesucp && matchesupp");
@ -2060,7 +2237,7 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 	m_numRegExs10 = i;
 	m_numRegExs5  = i;
 	m_numRegExs6  = i;
-	m_numRegExs7  = i;
+	//m_numRegExs7  = i;
 	m_numRegExs8  = i;
 	//m_numRegExs11 = i;

--- a/Collectiondb.h
+++ b/Collectiondb.h
@ -27,6 +27,7 @@ public:
 	char *m_parmEnd;
 	class UdpSlot *m_slot;
 	bool m_doRebuilds;
+	bool m_updatedRound;
 	collnum_t m_collnum;
 	bool m_registered;
 	long m_errno;
@ -45,10 +46,18 @@ class Collectiondb  {
 	// . returns false and sets errno on error
 	// . each collection as a CollectionRec class for it and
 	//   is loaded up from the appropriate config file
-	bool init ( bool isDump = false );
+	bool init ( );

 	// this loads all the recs from host #0 
-	bool load ( bool isDump = false );
+	//bool load ( bool isDump = false );
+
+	// called by main.cpp to fill in our m_recs[] array with
+	// all the coll.*.*/coll.conf info
+	bool loadAllCollRecs ( );
+
+	// after main.cpp loads all rdb trees it calls this to remove
+	// bogus collnums from the trees i guess
+	bool cleanTrees ( ) ;

 	// . this will save all conf files back to disk that need it
 	// . returns false and sets g_errno on error, true on success
@ -63,7 +72,8 @@ class Collectiondb  {
 	char *getColl     ( collnum_t collnum ) {return getCollName(collnum);};

 	// get coll rec specified in the HTTP request
-	class CollectionRec *getRec ( class HttpRequest *r );
+	class CollectionRec *getRec ( class HttpRequest *r ,
+				      bool useDefaultRec = true );

 	// . get collectionRec from name
 	// returns NULL if not available
@ -81,7 +91,7 @@ class Collectiondb  {

 	// . how many collections we have in here
 	// . only counts valid existing collections
-	long getNumRecs() { return m_numRecsUsed; };
+	long getNumRecsUsed() { return m_numRecsUsed; };

 	// . does this requester have root admin privledges???
 	// . uses the root collection record!
@ -92,9 +102,9 @@ class Collectiondb  {
 	// what collnum will be used the next time a coll is added?
 	collnum_t reserveCollNum ( ) ;

-	long long getLastUpdateTime () { return m_lastUpdateTime; };
+	//long long getLastUpdateTime () { return m_lastUpdateTime; };
 	// updates m_lastUpdateTime so g_spiderCache know when to reload
-	void     updateTime         ();
+	//void     updateTime         ();

 	// private:

@ -105,9 +115,8 @@ class Collectiondb  {
 	//		  bool saveRec ); // = true


-	bool addExistingColl ( char *coll, 
-			       collnum_t collnum ,
-			       bool isDump ) ;
+	bool addExistingColl ( char *coll, collnum_t collnum );
+
 	bool addNewColl ( char *coll , 
 			  char customCrawl ,
 			  char *cpc , 
@ -115,9 +124,10 @@ class Collectiondb  {
 			  bool saveIt ,
 			  collnum_t newCollnum ) ;

-	bool registerCollRec ( CollectionRec *cr ,
-			       bool isDump ,
-			       bool isNew ) ;
+	bool registerCollRec ( CollectionRec *cr ,  bool isNew ) ;
+
+	bool addRdbBaseToAllRdbsForEachCollRec ( ) ;
+	bool addRdbBasesForCollRec ( CollectionRec *cr ) ;

 	bool setRecPtr ( collnum_t collnum , CollectionRec *cr ) ;

@ -128,6 +138,8 @@ class Collectiondb  {
 	//bool updateRec ( CollectionRec *newrec );
 	bool deleteRecs ( class HttpRequest *r ) ;

+	void deleteSpiderColl ( class SpiderColl *sc );
+
 	// returns false if blocked, true otherwise. 
 	//bool resetColl ( char *coll , WaitEntry *we , bool purgeSeeds );
 	bool resetColl2 ( collnum_t oldCollnum, 
@ -149,7 +161,7 @@ class Collectiondb  {
 	long            m_numRecs;
 	long            m_numRecsUsed;

-	long long            m_lastUpdateTime;
+	//long long            m_lastUpdateTime;
 };

 extern class Collectiondb g_collectiondb;
@ -249,6 +261,7 @@ class CrawlInfo {
 	long long m_pageProcessSuccesses;  // 7
 	long long m_urlsHarvested;         // 8

+
 	long m_lastUpdateTime;

 	// this is non-zero if urls are available to be spidered right now.
@ -268,6 +281,12 @@ class CrawlInfo {
 	//long m_numUrlsLaunched;
 	long m_dummy1;

+	// keep separate because when we receive a crawlinfo struct from
+	// a host we only add these in if it matches our round #
+	long long m_pageDownloadSuccessesThisRound;
+	long long m_pageProcessSuccessesThisRound;
+
+
 	void reset() { memset ( this , 0 , sizeof(CrawlInfo) ); };
 	//bool print (class SafeBuf *sb ) ;
 	//bool setFromSafeBuf (class SafeBuf *sb ) ;
@ -348,7 +367,7 @@ class CollectionRec {
 	bool m_urlFiltersHavePageCounts;

 	// moved from SpiderColl so we can load up at startup
-	HashTableX m_pageCountTable;
+	//HashTableX m_pageCountTable;

 	// . when was the last time we changed?
 	//long long m_lastUpdateTime;
@ -385,7 +404,9 @@ class CollectionRec {
 	// spidered and begin the next round
 	long   m_spiderRoundNum;

-	char  m_useDatedb               ;
+	char  m_indexBody;
+
+	//char  m_useDatedb               ;
 	//char  m_addUrlEnabled           ; // TODO: use at http interface lvl
 	//char  m_spiderLinks             ; use url filters now!
 	char  m_sameHostLinks           ; // spider links from same host only?
@ -691,8 +712,8 @@ class CollectionRec {
 	//long      m_respiderWaits      [ MAX_FILTERS ];
 	//long      m_numRegExs8;
 	// spidering on or off?
-	long      m_numRegExs7;
-	char      m_spidersEnabled     [ MAX_FILTERS ];
+	//long      m_numRegExs7;
+	//char      m_spidersEnabled     [ MAX_FILTERS ];

 	// should urls in this queue be sent to diffbot for processing
 	// when we are trying to index them?
--- a/Conf.h
+++ b/Conf.h
@ -216,6 +216,7 @@ class Conf {
 	//long long  m_tfndbMaxUrls;

 	long  m_maxCpuThreads;
+	long  m_maxCpuMergeThreads;

 	long  m_deadHostTimeout;
 	long  m_sendEmailTimeout;
@ -300,6 +301,8 @@ class Conf {
 	long  m_robotdbMaxCacheMem  ;
 	bool  m_robotdbSaveCache;

+	long  m_maxTotalSpiders;
+
 	// indexdb has a max cached age for getting IndexLists (10 mins deflt)
 	long  m_indexdbMaxTreeMem   ;
 	long  m_indexdbMaxCacheMem;
--- a/CountryCode.cpp
+++ b/CountryCode.cpp
@ -1285,6 +1285,10 @@ bool CountryCode::loadHashTable(void) {
 	return(s_catToCountry.load(g_hostdb.m_dir, "catcountry.dat"));
 }

+void CountryCode::reset ( ) {
+	s_catToCountry.reset();
+}
+
 int CountryCode::getNumCodes(void) {
 	return(s_numCountryCodes);
 }
--- a/CountryCode.h
+++ b/CountryCode.h
@ -25,6 +25,7 @@ class CountryCode {
 		uint8_t getLanguageFromDMOZ(long catid);
 		int createHashTable(void);
 		bool loadHashTable(void);
+		void reset();
 		long getNumEntries(void);
 		void debugDumpNumbers(void);
         	uint64_t getLanguagesWritten(int index);
--- a/Datedb.cpp
+++ b/Datedb.cpp
@ -121,7 +121,7 @@ bool Datedb::init2 ( long treeMem ) {
 			   false            , // preload dskpagecache
 			   16               );// key size
 }
-
+/*
 bool Datedb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -133,7 +133,7 @@ bool Datedb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Datedb::verify ( char *coll ) {
 	log ( LOG_INFO, "db: Verifying Datedb for coll %s...", coll );
 	g_threads.disableThreads();
--- a/DiskPageCache.cpp
+++ b/DiskPageCache.cpp
@ -108,6 +108,9 @@ bool DiskPageCache::init ( const char *dbname ,
 			//   void (*rmVfd2)(DiskPageCache*, long) ) {
 	reset();

+	// fix cores while rebalancing
+	//maxMem = 0;
+
 	m_rdbId = rdbId;

 	bool *tog = NULL;
--- a/Errno.cpp
+++ b/Errno.cpp
@ -166,6 +166,7 @@ case	EBADJSONPARSER: return "Bad JSON parser";
 case	EFAKEFIRSTIP: return "Fake firstIp";
 case	EBADHOSTSCONF: return "A hosts.conf is out of sync";
 case    EWAITINGTOSYNCHOSTSCONF: return "Wait to ensure hosts.conf in sync";
+case	EDOCNONCANONICAL: return "Url was dup of canonical page";
 	}
 	// if the remote error bit is clear it must be a regulare errno
 	//if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum );
--- a/Errno.h
+++ b/Errno.h
@ -169,6 +169,7 @@ enum {
 	EBADJSONPARSER,
 	EFAKEFIRSTIP,
 	EBADHOSTSCONF,
-	EWAITINGTOSYNCHOSTSCONF
+	EWAITINGTOSYNCHOSTSCONF,
+	EDOCNONCANONICAL
 };
 #endif
--- a/HashTableX.cpp
+++ b/HashTableX.cpp
@ -187,6 +187,8 @@ bool HashTableX::addKey ( void *key , void *val , long *slot ) {
 		g_errno = ETRYAGAIN; 
 		return false;
 	}
+	// never got initialized? call HashTableX::init()
+	if ( m_ks <= 0 ){ char *xx=NULL; *xx=0; }
 	// check to see if we should grow the table. now we grow
 	// when 25% full to make operations faster so getLongestString()
 	// doesn't return such big numbers!
--- a/Hostdb.cpp
+++ b/Hostdb.cpp
@ -68,6 +68,16 @@ Hostdb::~Hostdb () {
 }

 void Hostdb::reset ( ) {
+
+	for ( long i = 0 ; m_hosts && i < m_numHosts ; i++ ) {
+		Host *h = &m_hosts[i];
+		if ( ! h->m_lastKnownGoodCrawlInfoReply ) continue;
+		mfree ( h->m_lastKnownGoodCrawlInfoReply ,
+			h->m_lastKnownGoodCrawlInfoReplyEnd -
+			h->m_lastKnownGoodCrawlInfoReply , "lknown" );
+		h->m_lastKnownGoodCrawlInfoReply = NULL;
+	}
+
 	if ( m_hosts ) 
 		mfree ( m_hosts, m_allocSize,"Hostdb" );
 	if ( m_ips   ) mfree ( m_ips  , m_numIps * 4, "Hostdb" );
@ -121,8 +131,7 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
 	// make sure our hostId is in our conf file
 	if ( hostId < 0 ) 
 		return log(
-			   "conf: Negative hostId %li supplied in "
-			   "hosts.conf.",hostId);
+			   "conf: Negative hostId %li supplied",hostId);
 	// set early for calling log()
 	m_hostId = hostId;
 	// set clock in sync in fctypes.cpp
--- a/Hostdb.h
+++ b/Hostdb.h
@ -273,6 +273,9 @@ class Host {
 	char           m_inSync ;
 	char           m_isPermanentOutOfSync ;

+	char *m_lastKnownGoodCrawlInfoReply;
+	char *m_lastKnownGoodCrawlInfoReplyEnd;
+
 	// . used by Parms.cpp for broadcasting parm change requests
 	// . each parm change request has an id
 	// . this let's us know which id is in progress and what the last
--- a/HttpServer.cpp
+++ b/HttpServer.cpp
@ -925,6 +925,10 @@ bool HttpServer::sendReply ( TcpSocket  *s , HttpRequest *r , bool isAdmin) {
 	     strncmp ( path , "/v2/bulk/download/"  ,18 ) == 0 )
 		return sendBackDump ( s , r );

+	// "GET /download/mycoll_urls.csv"
+	if ( strncmp ( path , "/download/", 10 ) == 0 )
+		return sendBackDump ( s , r );
+
 	// . is it a diffbot api request, like "GET /api/*"
 	// . ie "/api/startcrawl" or "/api/stopcrawl" etc.?
 	//if ( strncmp ( path , "/api/" , 5 ) == 0 )
--- a/Indexdb.cpp
+++ b/Indexdb.cpp
@ -173,7 +173,7 @@ bool Indexdb::init2 ( long treeMem ) {
 	return true;
 }

-
+/*
 bool Indexdb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -187,6 +187,7 @@ bool Indexdb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
+*/

 bool Indexdb::verify ( char *coll ) {
 	return true;
--- a/Json.cpp
+++ b/Json.cpp
@ -96,7 +96,13 @@ JsonItem *Json::parseJsonStringIntoJsonItems ( char *json , long niceness ) {
 	// plus a \0 for the value and a \0 for the name of each jsonitem
 	need += 2;
 	// prevent cores for now
-	need += 10000;
+	need += 10;
+	// . to prevent safebuf from reallocating do this
+	// . safeMemcpy() calls reserve(m_length+len) and reserves
+	//   tries to alloc m_length + (m_length+len) so since,
+	//   m_length+len should never be more than "need" we need to
+	//   double up here
+	need *= 2;
 	// this should be enough
 	if ( ! m_sb.reserve ( need ) ) return NULL;
 	// for testing if we realloc
--- a/LangList.cpp
+++ b/LangList.cpp
@ -378,7 +378,7 @@ void LangList::reset ( ) {
 // . looks under the langlist/ directory for langlist.# files
 //   each number corrisponds to a language
 bool LangList::loadLists ( ) {
-	log ( LOG_INIT, "lang: Loading Language Lists.");
+	//log ( LOG_INIT, "lang: Loading Language Lists.");
 	// init the term table
 	m_langTable.set(8,4,100000*MAX_LANGUAGES,NULL,0,false,0,"tbl-lang");
 	// loop over the languages and load the files
@ -476,6 +476,7 @@ bool LangList::loadLists ( ) {
 		// count the list
 		listCount++;
 		
+		if ( wordsInList > 0 )
 		log ( LOG_DEBUG, 
 		      "lang: Successfully Loaded %li out of %li (%li bytes) "
 		      "words from %s dictionary.",
--- a/Linkdb.cpp
+++ b/Linkdb.cpp
@ -173,7 +173,7 @@ bool Linkdb::init2 ( long treeMem ) {
 			    sizeof(key224_t), // key size
 			    true          );// bias disk page cache
 }
-
+/*
 bool Linkdb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -185,7 +185,7 @@ bool Linkdb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Linkdb::verify ( char *coll ) {
 	log ( LOG_DEBUG, "db: Verifying Linkdb for coll %s...", coll );
 	g_threads.disableThreads();
@ -3648,7 +3648,8 @@ bool Inlink::setXmlFromRSS ( Xml *xml , long niceness ) {
 			  true                     , // pure xml?
 			  TITLEREC_CURRENT_VERSION ,
 			  false                    , // no need to now
-			  niceness                 );
+			  niceness                 ,
+			  CT_XML );
 }

 // only Title.cpp uses this right now
--- a/Loop.cpp
+++ b/Loop.cpp
@ -1791,6 +1791,11 @@ void Loop::quickPoll(long niceness, const char* caller, long lineno) {
 	if(m_inQuickPoll) {
 		log(LOG_WARN, 
 		    "admin: tried to quickpoll from inside quickpoll");
+		// this happens when handleRequest3f is called from
+		// a quickpoll and it deletes a collection and BigFile::close
+		// calls ThreadQueue::removeThreads and Msg3::doneScanning()
+		// has niceness 2 and calls quickpoll again!
+		return;
 		//if(g_conf.m_quickpollCoreOnError) { 
 		char*xx=NULL;*xx=0;
 		//		}
--- a/Make.depend
+++ b/Make.depend
@ -662,7 +662,8 @@ fctypes.o: fctypes.cpp gb-include.h types.h fctypes.h Unicode.h \
 openssl/ssl23.h openssl/srtp.h Collectiondb.h HashTableX.h PingServer.h \
 Entities.h UCWordIterator.h Timedb.h Rdb.h RdbBase.h RdbScan.h BigFile.h \
 RdbMap.h RdbList.h RdbDump.h RdbTree.h RdbMem.h RdbBuckets.h RdbCache.h \
- Msg5.h Msg3.h RdbMerge.h Dir.h Titledb.h DiskPageCache.h Threads.h
+ Msg5.h Msg3.h RdbMerge.h Dir.h Titledb.h DiskPageCache.h Threads.h \
+ HttpMime.h
 File.o: File.cpp gb-include.h types.h fctypes.h Unicode.h \
 UnicodeProperties.h UCPropTable.h iconv.h hash.h Errno.h Log.h File.h \
 Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h iana_charset.h ip.h \
@ -1325,13 +1326,14 @@ Mem.o: Mem.cpp gb-include.h types.h fctypes.h Unicode.h \
 openssl/pem2.h openssl/hmac.h openssl/kssl.h openssl/ssl2.h \
 openssl/ssl3.h openssl/tls1.h openssl/dtls1.h openssl/pqueue.h \
 openssl/ssl23.h openssl/srtp.h Collectiondb.h HashTableX.h PingServer.h \
- Threads.h malloc.c Msg20.h UdpServer.h UdpSlot.h UdpProtocol.h \
- Multicast.h Summary.h matches2.h Query.h Words.h StopWords.h Titledb.h \
- Rdb.h RdbBase.h RdbScan.h BigFile.h RdbMap.h RdbList.h RdbDump.h \
- RdbTree.h RdbMem.h RdbBuckets.h RdbCache.h Msg5.h Msg3.h RdbMerge.h \
- Dir.h DiskPageCache.h Bits.h Pos.h Matches.h HashTableT.h Domains.h \
- CountryCode.h Tagdb.h Msg0.h Indexdb.h Events.h Sections.h IndexList.h \
- Dates.h
+ Threads.h Pages.h HttpServer.h TcpServer.h openssl/err.h MsgC.h \
+ UdpServer.h UdpSlot.h UdpProtocol.h Dns.h DnsProtocol.h RdbCache.h \
+ RdbList.h Multicast.h Rdb.h RdbBase.h RdbScan.h BigFile.h RdbMap.h \
+ RdbDump.h RdbTree.h RdbMem.h RdbBuckets.h Msg5.h Msg3.h RdbMerge.h Dir.h \
+ HttpMime.h PageCrawlBot.h malloc.c Msg20.h Summary.h matches2.h Query.h \
+ Words.h StopWords.h Titledb.h DiskPageCache.h Bits.h Pos.h Matches.h \
+ HashTableT.h Domains.h CountryCode.h Tagdb.h Msg0.h Indexdb.h Events.h \
+ Sections.h IndexList.h Dates.h
 MemPool.o: MemPool.cpp gb-include.h types.h fctypes.h Unicode.h \
 UnicodeProperties.h UCPropTable.h iconv.h hash.h Errno.h Log.h MemPool.h \
 MemPoolTree.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h \
@ -1930,7 +1932,7 @@ Msg4.o: Msg4.cpp gb-include.h types.h fctypes.h Unicode.h \
 Msg13.h Msge0.h Msge1.h Msg8b.h SearchInput.h Msg40.h Msg39.h Msg37.h \
 Posdb.h TopTree.h IndexTable2.h Msg51.h Msg17.h IndexReadInfo.h Msg3a.h \
 Stats.h PostQueryRerank.h Sanity.h SiteGetter.h Title.h Address.h zlib.h \
- zconf.h Syncdb.h
+ zconf.h Syncdb.h Process.h
 Msg51.o: Msg51.cpp gb-include.h types.h fctypes.h Unicode.h \
 UnicodeProperties.h UCPropTable.h iconv.h hash.h Errno.h Log.h Msg51.h \
 Msg0.h UdpServer.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h \
@ -3178,7 +3180,8 @@ RdbBase.o: RdbBase.cpp gb-include.h types.h fctypes.h Unicode.h \
 Repair.h XmlDoc.h Phrases.h LangList.h Images.h Msg36.h Msg13.h Msge0.h \
 Msge1.h MsgC.h Dns.h DnsProtocol.h Msg8b.h SearchInput.h Msg40.h Msg39.h \
 Msg37.h TopTree.h IndexTable2.h Msg51.h Msg17.h Msg3a.h \
- PostQueryRerank.h Sanity.h SiteGetter.h Title.h Address.h HttpMime.h
+ PostQueryRerank.h Sanity.h SiteGetter.h Title.h Address.h HttpMime.h \
+ Rebalance.h
 RdbBuckets.o: RdbBuckets.cpp RdbBuckets.h Mem.h Conf.h Xml.h XmlNode.h \
 gb-include.h types.h fctypes.h Unicode.h UnicodeProperties.h \
 UCPropTable.h iconv.h hash.h Errno.h Log.h Lang.h Iso8859.h \
@ -3334,7 +3337,12 @@ RdbMerge.o: RdbMerge.cpp gb-include.h types.h fctypes.h Unicode.h \
 openssl/ssl23.h openssl/srtp.h Collectiondb.h HashTableX.h PingServer.h \
 RdbScan.h BigFile.h RdbMap.h RdbList.h RdbDump.h RdbTree.h RdbMem.h \
 RdbBuckets.h RdbCache.h Msg5.h Msg3.h RdbMerge.h Dir.h Indexdb.h \
- DiskPageCache.h Titledb.h Process.h
+ DiskPageCache.h Titledb.h Process.h Spider.h Msg4.h Msg1.h UdpServer.h \
+ UdpSlot.h UdpProtocol.h Multicast.h Threads.h Msg0.h Clusterdb.h \
+ Linkdb.h Msg2.h Query.h Msg20.h Summary.h matches2.h Words.h StopWords.h \
+ Bits.h Pos.h Matches.h HashTableT.h Domains.h CountryCode.h Tagdb.h \
+ Events.h Sections.h IndexList.h Dates.h Msg22.h CatRec.h Categories.h \
+ HashTable.h Catdb.h Datedb.h
 RdbScan.o: RdbScan.cpp gb-include.h types.h fctypes.h Unicode.h \
 UnicodeProperties.h UCPropTable.h iconv.h hash.h Errno.h Log.h RdbScan.h \
 BigFile.h File.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h \
@ -4508,7 +4516,8 @@ Xml.o: Xml.cpp gb-include.h types.h fctypes.h Unicode.h \
 openssl/ssl23.h openssl/srtp.h Collectiondb.h HashTableX.h PingServer.h \
 Titledb.h Rdb.h RdbBase.h RdbScan.h BigFile.h RdbMap.h RdbList.h \
 RdbDump.h RdbTree.h RdbMem.h RdbBuckets.h RdbCache.h Msg5.h Msg3.h \
- RdbMerge.h Dir.h DiskPageCache.h Words.h StopWords.h Entities.h
+ RdbMerge.h Dir.h DiskPageCache.h Words.h StopWords.h HttpMime.h \
+ Entities.h
 XmlDoc.o: XmlDoc.cpp gb-include.h types.h fctypes.h Unicode.h \
 UnicodeProperties.h UCPropTable.h iconv.h hash.h Errno.h Log.h XmlDoc.h \
 Lang.h Iso8859.h iana_charset.h Words.h Xml.h XmlNode.h SafeBuf.h \
--- a/Mem.cpp
+++ b/Mem.cpp
@ -10,11 +10,13 @@
 //#include "MemPoolVar.h"
 //#include "malloc.h"
 //#include "Stats.h"
+#include "Pages.h"

 // put me back
-//#define _EFENCE_
+//#define EFENCE
+#define EFENCE_SIZE 100000

-// uncomment this for _EFENCE_ to do underflow checks instead of the
+// uncomment this for EFENCE to do underflow checks instead of the
 // default overflow checks
 //#define _CHECKUNDERFLOW_

@ -50,7 +52,7 @@
 // there because it will hit a different PAGE, to be more sure we could
 // make UNDERPAD and OVERPAD PAGE bytes, although the overrun could still write
 // to another allocated area of memory and we can never catch it.
-#ifdef _EFENCE_
+#ifdef EFENCE
 #define UNDERPAD 0
 #define OVERPAD  0
 #else
@ -66,7 +68,7 @@ extern bool g_isYippy;

 bool freeCacheMem();

-#ifdef _EFENCE_
+#ifdef EFENCE
 static void *getElecMem ( long size ) ;
 static void  freeElecMem ( void *p  ) ;
 #endif
@ -148,7 +150,9 @@ void mutexUnlock ( ) {
 // make it big for production machines
 //#define DMEMTABLESIZE (1024*602)
 // there should not be too many mallocs any more
-#define DMEMTABLESIZE (1024*302)
+// i boosted from 300k to 600k so we can get summaries for 150k results
+// for the csv download...
+#define DMEMTABLESIZE (1024*602)
 //#define DMEMTABLESIZE (1024*202)
 // and small for local machine
 //#define DMEMTABLESIZE (1024*50)
@ -248,7 +252,7 @@ void * operator new (size_t size) throw (std::bad_alloc) {
 		throw std::bad_alloc();
 		//throw 1;
 	}
-#ifdef _EFENCE_
+#ifdef EFENCE
 	void *mem = getElecMem(size);
 #else
 	//void *mem = dlmalloc ( size );
@ -266,7 +270,7 @@ newmemloop:
 		//return NULL;
 	}
 	if ( (unsigned long)mem < 0x00010000 ) {
-#ifdef _EFENCE_
+#ifdef EFENCE
 		void *remem = getElecMem(size);
 #else
 		void *remem = sysmalloc(size);
@ -274,7 +278,7 @@ newmemloop:
 		log ( LOG_WARN, "mem: Caught low memory allocation at %08lx, "
 				"reallocated to %08lx", (unsigned long)mem,
 				(unsigned long)remem );
-#ifdef _EFENCE_
+#ifdef EFENCE
 		freeElecMem (mem);
 #else
 		sysfree(mem);
@ -326,7 +330,7 @@ void * operator new [] (size_t size) throw (std::bad_alloc) {
 		throw std::bad_alloc();
 		//throw 1;
 	}
-#ifdef _EFENCE_
+#ifdef EFENCE
 	void *mem = getElecMem(size);
 #else
 	//void *mem = dlmalloc ( size );
@ -345,7 +349,7 @@ newmemloop:
 		//return NULL;
 	}
 	if ( (unsigned long)mem < 0x00010000 ) {
-#ifdef _EFENCE_
+#ifdef EFENCE
 		void *remem = getElecMem(size);
 #else
 		void *remem = sysmalloc(size);
@ -353,7 +357,7 @@ newmemloop:
 		log ( LOG_WARN, "mem: Caught low memory allocation at %08lx, "
 				"reallocated to %08lx", 
 		      (long)mem, (long)remem );
-#ifdef _EFENCE_
+#ifdef EFENCE
 		freeElecMem (mem);
 #else
 		sysfree(mem);
@ -423,6 +427,7 @@ pid_t Mem::getPid() {
 bool Mem::init  ( long long maxMem ) { 
 	// set main process pid
 	s_pid = getpid();
+
 	// . don't swap our memory out, man...
 	// . damn, linux 2.4.17 seems to crash the kernel sometimes w/ this
 	//if ( mlockall( MCL_CURRENT | MCL_FUTURE ) == -1 ) {
@ -440,10 +445,37 @@ bool Mem::init  ( long long maxMem ) {
 	if ( g_conf.m_detectMemLeaks )
 		log(LOG_INIT,"mem: Memory leak checking is enabled.");

-#ifdef _EFENCE_
+#ifdef EFENCE
 	log(LOG_INIT,"mem: using electric fence!!!!!!!");
 #endif

+	// if we can't alloc 3gb exit and retry
+	long long start = gettimeofdayInMilliseconds();
+	char *pools[30];
+	long long count = 0LL;
+	long long chunk = 100000000LL; // 100MB chunks
+	long long need = 3000000000LL; // 3GB
+	long i = 0; for ( i = 0 ; i < 30 ; i++ ) {
+		pools[i] = (char *)mmalloc(chunk,"testmem");
+		count += chunk;
+		if ( pools[i] ) continue;
+		count -= chunk;
+		log("mem: could only alloc %lli bytes of the "
+		    "%lli required to run gigablast. exiting.",
+		    count , need );
+	}
+	for ( long j = 0 ; j < i ; j++ )
+		mfree ( pools[j] , chunk , "testmem" );
+	long long now = gettimeofdayInMilliseconds();
+	long long took = now - start;
+	if ( took > 20 ) log("mem: took %lli ms to check memory ceiling",took);
+	// return if could not alloc the full 3GB
+	if ( i < 30 ) return false;
+
+	// reset this, our max mem used over time ever because we don't
+	// want the mem test we did above to count towards it
+	m_maxAlloced = 0;
+
 	// init or own malloc stuff in malloc.c (from doug leay)
 	//if ( mdw_init_sbrk ( maxMem ) ) return true;
 	// bitch
@ -652,24 +684,24 @@ bool Mem::printMemBreakdownTable ( SafeBuf* sb,

 	// make sure the admin viewing this table knows that there will be
 	// frees in here that are delayed if electric fence is enabled.
-#ifdef _EFENCE_
+#ifdef EFENCE
 	ss = " <font color=red>*DELAYED FREES ENABLED*</font>";
 #endif

 	sb->safePrintf (
 		       "<table>"

-		       "<table cellpadding=4 width=100%% bgcolor=#%s border=1>"
+		       "<table %s>"
 		       "<tr>"
 		       "<td colspan=3 bgcolor=#%s>"
 		       "<center><b>Mem Breakdown%s</b></td></tr>\n"

-		       "<tr>"
+		       "<tr bgcolor=#%s>"
 		       "<td><b>allocator</b></td>"
 		       "<td><b>num allocs</b></td>"
 		       "<td><b>allocated</b></td>"
 		       "</tr>" ,
-		       lightblue, darkblue , ss );
+		       TABLE_STYLE, darkblue , ss , darkblue );

 	long n = m_numAllocated * 2;
 	MemEntry *e = (MemEntry *)mcalloc ( sizeof(MemEntry) * n , "Mem" );
@ -756,11 +788,12 @@ bool Mem::printMemBreakdownTable ( SafeBuf* sb,
 	// now print into buffer
 	for ( long i = 0 ; i < count ; i++ ) 
 		sb->safePrintf (
-			       "<tr>"
+			       "<tr bgcolor=%s>"
 			       "<td>%s</td>"
 			       "<td>%li</td>"
 			       "<td>%li</td>"
 			       "</tr>\n",
+			       LIGHT_BLUE,
 			       winners[i]->m_label,
 			       winners[i]->m_numAllocs,
 			       winners[i]->m_allocated);
@ -1242,14 +1275,24 @@ void *Mem::gbmalloc ( int size , const char *note ) {
 		return NULL;
 	}

+	void *mem;
+
 	// to find bug that cores on malloc do this
 	//printBreeches(true);
 	//g_errno=ENOMEM;return (void *)log("Mem::malloc: reached mem limit");}
-#ifdef _EFENCE_
-	void *mem = getElecMem(size+UNDERPAD+OVERPAD);
-#else
+#ifdef EFENCE
+	mem = getElecMem(size+UNDERPAD+OVERPAD);
+
+	// conditional electric fence?
+#elif EFENCE_BIG
+	if ( size >= EFENCE_SIZE )
+		mem = getElecMem(size+0+0);
+	else
+		mem = (void *)sysmalloc ( size + UNDERPAD + OVERPAD );
+#else			
+
 	//void *mem = dlmalloc ( size );
-	void *mem = (void *)sysmalloc ( size + UNDERPAD + OVERPAD );
+	mem = (void *)sysmalloc ( size + UNDERPAD + OVERPAD );
 #endif
 	// initialization debug
 	//char *pend = (char *)mem + UNDERPAD + size;
@ -1321,7 +1364,7 @@ mallocmemloop:
 		return NULL;
 	}
 	if ( (unsigned long)mem < 0x00010000 ) {
-#ifdef _EFENCE_
+#ifdef EFENCE
 		void *remem = getElecMem(size);
 #else
 		void *remem = sysmalloc(size);
@ -1329,7 +1372,7 @@ mallocmemloop:
 		log ( LOG_WARN, "mem: Caught low memory allocation at %08lx, "
 				"reallocated to %08lx",
 				(unsigned long)mem, (unsigned long)remem );
-#ifdef _EFENCE_
+#ifdef EFENCE
 		freeElecMem (mem);
 #else
 		sysfree(mem);
@ -1392,7 +1435,9 @@ void *Mem::gbrealloc ( void *ptr , int oldSize , int newSize ,

 	char *mem;

-#ifdef _EFENCE_
+	// even though size may be < 100k for EFENCE_BIG, do it this way
+	// for simplicity...
+#if defined(EFENCE) || defined(EFENCE_BIG)
 	mem = (char *)mmalloc ( newSize , note );
 	if ( ! mem ) return NULL;
 	// copy over to it
@ -1471,10 +1516,19 @@ void Mem::gbfree ( void *ptr , int size , const char *note ) {
 		char *xx = NULL; *xx = 0;
 	}

-#ifdef _EFENCE_
+#ifdef EFENCE
 	// this does a delayed free so do not call rmMem() just yet
 	freeElecMem ((char *)ptr - UNDERPAD );
-#else
+	return;
+#endif
+
+#ifdef EFENCE_BIG
+	if ( size >= EFENCE_SIZE ) {
+		freeElecMem ((char *)ptr - 0 );
+		return;
+	}
+#endif	
+
 	bool isnew = s_isnew[slot];

 	// if this returns false it was an unbalanced free
@ -1482,7 +1536,6 @@ void Mem::gbfree ( void *ptr , int size , const char *note ) {

 	if ( isnew ) sysfree ( (char *)ptr );
 	else         sysfree ( (char *)ptr - UNDERPAD );
-#endif
 }

 long getLowestLitBitLL ( unsigned long long bits ) {
--- a/Monitordb.cpp
+++ b/Monitordb.cpp
@ -53,7 +53,7 @@ bool Monitordb::init ( ) {
 			    sizeof(key96_t) ,
 			    true             ); // bias page cache? (true!)
 }
-
+/*
 bool Monitordb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -65,7 +65,7 @@ bool Monitordb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Monitordb::verify ( char *coll ) {
 	log ( LOG_INFO, "db: Verifying Monitordb for coll %s...", coll );
 	g_threads.disableThreads();
--- a/Msg13.cpp
+++ b/Msg13.cpp
@ -1156,10 +1156,12 @@ void gotHttpReply2 ( void *state ,
 		// . if no user-agent line matches * or gigabot/flurbot we
 		//   will get just a \0 for the reply, replySize=1!
 		//char *ua = "ProCogBot";//"EventGuruBot";//r->m_userAgent;
-		char *ua = "Gigabot";
-		long uaLen = gbstrlen(ua);
-		replySize = filterRobotsTxt (reply,replySize,&mime,niceness,
-					     ua,uaLen);
+		// take this out until it works for 
+		// user-agent: *\ndisallow: blah
+		//char *ua = "Gigabot";
+		//long uaLen = gbstrlen(ua);
+		//replySize = filterRobotsTxt (reply,replySize,&mime,niceness,
+		//			     ua,uaLen);
 		// record in the stats
 		docsPtr     = &g_stats.m_compressRobotsTxtDocs;
 		bytesInPtr  = &g_stats.m_compressRobotsTxtBytesIn;
@ -2020,7 +2022,7 @@ bool getIframeExpandedContent ( Msg13Request *r , TcpSocket *ts ) {
 	xd->m_r   = r;

 	// so XmlDoc::getExtraDoc doesn't have any issues
-	xd->m_firstIp = 0;
+	xd->m_firstIp = 123456;
 	xd->m_firstIpValid = true;

 	// try using xmldoc to do it
--- a/Msg20.cpp
+++ b/Msg20.cpp
@ -20,9 +20,16 @@ void Msg20::constructor () {

 void Msg20::destructor  () { reset(); m_mcast.destructor(); }

+#include "Process.h"
+
 void Msg20::reset() { 
 	// not allowed to reset one in progress
-	if ( m_inProgress ) { char *xx=NULL;*xx=0; }
+	if ( m_inProgress ) { 
+		// do not core on abrupt exits!
+		if (g_process.m_mode == EXIT_MODE ) return;
+		// otherwise core
+		char *xx=NULL;*xx=0; 
+	}
 	m_launched = false;
 	if ( m_request && m_request   != m_requestBuf )
 		mfree ( m_request , m_requestSize  , "Msg20rb" );
--- a/Msg22.cpp
+++ b/Msg22.cpp
@ -334,7 +334,8 @@ void handleRequest22 ( UdpSlot *slot , long netnice ) {
 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
 	RdbBase *tbase; 
 	if ( ! (tbase=getRdbBase(RDB_TITLEDB,coll) ) ) {
-		log("db: Could not get title rec in collection \"%s\".",
+		log("db: Could not get title rec in collection \"%s\" "
+		    "because rdbbase is null.",
 		    coll);
 		g_errno = EBADENGINEER;
 		us->sendErrorReply ( slot , g_errno ); 
--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -427,16 +427,24 @@ bool Msg39::getLists () {
 	
 	// if we have twins, then make sure the twins read different
 	// pieces of the same docid range to make things 2x faster
-	bool useTwins = false;
-	if ( g_hostdb.getNumStripes() == 2 ) useTwins = true;
-	if ( useTwins ) {
-		long long delta2 = ( docIdEnd - docIdStart ) / 2;
-		if ( m_r->m_stripe == 0 ) docIdEnd = docIdStart + delta2;
-		else                      docIdStart = docIdStart + delta2;
-	}
+	//bool useTwins = false;
+	//if ( g_hostdb.getNumStripes() == 2 ) useTwins = true;
+	//if ( useTwins ) {
+	//	long long delta2 = ( docIdEnd - docIdStart ) / 2;
+	//	if ( m_r->m_stripe == 0 ) docIdEnd = docIdStart + delta2;
+	//	else                      docIdStart = docIdStart + delta2;
+	//}
+	// new striping logic:
+	long numStripes = g_hostdb.getNumStripes();
+	long long delta2 = ( docIdEnd - docIdStart ) / numStripes;
+	long stripe = g_hostdb.getMyHost()->m_stripe;
+	docIdStart += delta2 * stripe; // is this right?
+	docIdEnd = docIdStart + delta2;
+	// add 1 to be safe so we don't lose a docid
+	docIdEnd++;
 	// TODO: add triplet support later for this to split the
 	// read 3 ways. 4 ways for quads, etc.
-	if ( g_hostdb.getNumStripes() >= 3 ) { char *xx=NULL;*xx=0;}
+	//if ( g_hostdb.getNumStripes() >= 3 ) { char *xx=NULL;*xx=0;}
 	// do not go over MAX_DOCID  because it gets masked and
 	// ends up being 0!!! and we get empty lists
 	if ( docIdEnd > MAX_DOCID ) docIdEnd = MAX_DOCID;
--- a/Msg4.cpp
+++ b/Msg4.cpp
@ -541,8 +541,9 @@ bool Msg4::addMetaList ( char      *metaList                 ,
 		s_msg4Tail->m_next = this;
 		// we are the new tail
 		s_msg4Tail = this;
-		// debug log
-		log("msg4: queueing body msg4=0x%lx",(long)this);
+		// debug log. seems to happen a lot if not using threads..
+		if ( g_conf.m_useThreads )
+			log("msg4: queueing body msg4=0x%lx",(long)this);
 		// mark it
 		m_inUse = true;
 		// all done then, but return false so caller does not free
@ -556,8 +557,10 @@ bool Msg4::addMetaList ( char      *metaList                 ,
 	// sanity check
 	if ( s_msg4Head || s_msg4Tail ) { char *xx=NULL; *xx=0; }

-	// spider hang bug
-	logf(LOG_DEBUG,"msg4: queueing head msg4=0x%lx",(long)this);
+	// . spider hang bug
+	// . debug log. seems to happen a lot if not using threads..
+	if ( g_conf.m_useThreads )
+		logf(LOG_DEBUG,"msg4: queueing head msg4=0x%lx",(long)this);

 	// mark it
 	m_inUse = true;
@ -1062,8 +1065,10 @@ void storeLineWaiters ( ) {
 	// . if his callback was NULL, then was loaded in loadAddsInProgress()
 	// . we no longer do that so callback should never be null now
 	if ( ! msg4->m_callback ) { char *xx=NULL;*xx=0; }
-	// log this now i guess
-	logf(LOG_DEBUG,"msg4: calling callback for msg4=0x%lx",(long)msg4);
+	// log this now i guess. seems to happen a lot if not using threads
+	if ( g_conf.m_useThreads )
+		logf(LOG_DEBUG,"msg4: calling callback for msg4=0x%lx",
+		     (long)msg4);
 	// release it
 	msg4->m_inUse = false;
 	// call his callback
@ -1074,7 +1079,7 @@ void storeLineWaiters ( ) {
 	goto loop;
 }

-
+#include "Process.h"

 // . destroys the slot if false is returned
 // . this is registered in Msg4::set() to handle add rdb record msgs
--- a/Msg40.cpp
+++ b/Msg40.cpp
@ -17,7 +17,7 @@

 // increasing this doesn't seem to improve performance any on a single
 // node cluster....
-#define MAX_OUTSTANDING_MSG20S 50
+#define MAX_OUTSTANDING_MSG20S 200

 //static void handleRequest40              ( UdpSlot *slot , long netnice );
 //static void gotExternalReplyWrapper      ( void *state , void *state2 ) ;
@ -1184,6 +1184,10 @@ bool gotSummaryWrapper ( void *state ) {
 	Msg40 *THIS  = (Msg40 *)state;
 	// inc it here
 	THIS->m_numReplies++;
+	// log every 1000 i guess
+	if ( (THIS->m_numReplies % 1000) == 0 )
+		log("msg40: got %li summaries out of %li",THIS->m_numReplies,
+		    THIS->m_msg3a.m_numDocIds);
 	// it returns false if we're still awaiting replies
 	if ( ! THIS->gotSummary ( ) ) return false;
 	// now call callback, we're done
@ -1217,11 +1221,24 @@ bool Msg40::gotSummary ( ) {
 		// reset g_errno
 		g_errno = 0;
 	}
+	/*
+	// sanity check
+	for ( long i = 0 ; i < m_msg3a.m_numDocIds ; i++ ) {
+		// stop as soon as we hit a gap breaking our contiguity... 
+		Msg20 *m = m_msg20[i];
+		if ( ! m ) continue;
+		Msg20Reply *mr = m->m_r;
+		if ( ! mr ) continue;
+		char *cc = mr->ptr_content;
+		if ( ! cc ) continue;
+		//if ( ! strstr(cc,"Modern Marketing KF400032MA") )  continue;
+		//log("hey");
+		//fprintf(stderr,"msg %li = %s\n",i,cc );
+		if ( i == 48329 ) { char *xx=NULL;*xx=0; }
+		mr->ptr_content = NULL;
+	}
+	*/

-	// . ok, now i wait for everybody.
-	// . TODO: evaluate if this hurts us
-	if ( m_numReplies < m_numRequests )
-		return false;

 doAgain:

@ -1245,6 +1262,11 @@ bool Msg40::gotSummary ( ) {
 		//char *xx=NULL; *xx=0;
 	}

+	// . ok, now i wait for everybody.
+	// . TODO: evaluate if this hurts us
+	if ( m_numReplies < m_numRequests )
+		return false;
+

 	// save this before we increment m_numContiguous
 	//long oldNumContiguous = m_numContiguous;
--- a/Msg5.cpp
+++ b/Msg5.cpp
@ -22,6 +22,7 @@ long g_numCorrupt = 0;

 Msg5::Msg5() {
 	m_waitingForList = false;
+	//m_waitingForMerge = false;
 	m_numListPtrs = 0;
 	m_mergeLists = true;
 	reset();
@ -33,7 +34,7 @@ Msg5::~Msg5() {

 // frees m_treeList
 void Msg5::reset() {
-	if ( m_waitingForList ) {
+	if ( m_waitingForList ) { // || m_waitingForMerge ) {
 		log("disk: Trying to reset a class waiting for a reply.");
 		// might being doing an urgent exit (mainShutdown(1)) or
 		// g_process.shutdown(), so do not core here
@ -45,7 +46,6 @@ void Msg5::reset() {
 	m_prevCount = 0;
 	//m_prevKey.setMin();
 	KEYMIN(m_prevKey,MAX_KEY_BYTES);// m_ks); m_ks is invalid
-	m_waitingForList = false;
 	// free lists if m_mergeLists was false
 	for ( long i = 0 ; ! m_mergeLists && i < m_numListPtrs ; i++ )
 		m_listPtrs[i]->freeList();
@ -203,6 +203,13 @@ bool Msg5::getList ( char     rdbId         ,
 	// remember stuff
 	m_rdbId         = rdbId;
 	m_coll          = coll;
+
+	m_collnum = g_collectiondb.getCollnum ( coll );
+	if ( m_collnum < 0 ) {
+		g_errno = ENOCOLLREC;
+		return true;
+	}
+
 	m_list          = list;
 	//m_startKey      = startKey;
 	//m_endKey        = endKey;
@ -466,7 +473,12 @@ bool Msg5::getList ( char     rdbId         ,
 	// timing debug
 	//log("Msg5:getting list startKey.n1=%lu",m_startKey.n1);
 	// start the read loop - hopefully, will only loop once
-	return readList ( );
+	if ( readList ( ) ) return true;
+
+	// tell Spider.cpp not to nuke us until we get back!!!
+	m_waitingForList = true;
+	// we blocked!!! must call m_callback
+	return false;
 }
 // . returns false if blocked, true otherwise
 // . sets g_errno on error
@ -725,7 +737,7 @@ bool Msg5::readList ( ) {
 	if ( m_treeList.m_ks != m_ks ) { char *xx = NULL; *xx = 0; }

 	// we are waiting for the list
-	m_waitingForList = true;
+	//m_waitingForList = true;

 	// clear just in case
 	g_errno = 0;
@ -915,6 +927,8 @@ void gotListWrapper ( void *state ) {
 	if ( THIS->m_calledCallback ) { char *xx=NULL;*xx=0; }
 	// set it now
 	THIS->m_calledCallback = 1;
+	// we are no longer waiting for the list
+	THIS->m_waitingForList = false;
 	// when completely done call the callback
 	THIS->m_callback ( THIS->m_state , THIS->m_list , THIS );
 }
@ -931,7 +945,7 @@ static void *mergeListsWrapper_r ( void *state , ThreadEntry *t ) ;
 bool Msg5::gotList ( ) {

 	// we are no longer waiting for the list
-	m_waitingForList = false;
+	//m_waitingForList = false;

 	// debug msg
 	//log("msg5 got lists from msg3 (msg5=%lu)",(long)this);
@ -1064,8 +1078,15 @@ bool Msg5::gotList2 ( ) {
 		// sanity check
 		//if ( KEYNEG(m_listPtrs[i]->getEndKey()) ) {
 		//	char *xx=NULL;*xx=0; }
-		if ( KEYCMP(m_listPtrs[i]->getEndKey(),m_minEndKey,m_ks)<0 ) 
+		if ( KEYCMP(m_listPtrs[i]->getEndKey(),m_minEndKey,m_ks)<0 ) {
 			KEYSET(m_minEndKey,m_listPtrs[i]->getEndKey(),m_ks);
+			// crap, if list is all negative keys, then the
+			// end key seems negative too! however in this
+			// case RdbScan::m_endKey seems positive so
+			// maybe we got a negative endkey in constrain?
+			//if (! (m_minEndKey[0] & 0x01) )
+			//	log("msg5: list had bad endkey");
+		}
 	}
 	// sanity check
 	//if ( KEYNEG( m_minEndKey) ) {char *xx=NULL;*xx=0; }
@ -1152,7 +1173,7 @@ bool Msg5::gotList2 ( ) {
 	// filter happens and we have a chance to weed out old titleRecs
 	if ( m_rdbId == RDB_TITLEDB && m_numFiles != 1 && n == 1 &&
 	     m_isRealMerge ) {
-		log(LOG_LOGIC,"db: Adding dummy list.");
+		//log(LOG_LOGIC,"db: Adding dummy list.");
 		//m_tfns [n] = 255;
 		m_dummy.set ( NULL                      , // list data
 			      0                         , // list data size
@ -1377,6 +1398,8 @@ bool Msg5::gotList2 ( ) {
 	// skip it for now
 	//goto skipThread;

+	//m_waitingForMerge = true;
+
 	// . if size is big, make a thread
 	// . let's always make niceness 0 since it wasn't being very
 	//   aggressive before
@ -1386,8 +1409,11 @@ bool Msg5::gotList2 ( ) {
 			      threadDoneWrapper   ,
 			      mergeListsWrapper_r ) ) 
 		return false;
+
+	//m_waitingForMerge = false;
+
 	// thread creation failed
-	if ( ! g_threads.areThreadsDisabled() )
+	if ( g_conf.m_useThreads && ! g_threads.m_disabled )
 		log(LOG_INFO,
 		    "net: Failed to create thread to merge lists. Doing "
 		    "blocking merge. Hurts performance.");
@ -1441,6 +1467,8 @@ void threadDoneWrapper ( void *state , ThreadEntry *t ) {
 	if ( THIS->needsRecall() && ! THIS->readList() ) return;
 	// sanity check
 	if ( THIS->m_calledCallback ) { char *xx=NULL;*xx=0; }
+	// we are no longer waiting for the list
+	THIS->m_waitingForList = false;
 	// set it now
 	THIS->m_calledCallback = 3;
 	// when completely done call the callback
@ -1716,6 +1744,8 @@ void Msg5::mergeLists_r ( ) {
 // . we are left with an empty list
 bool Msg5::doneMerging ( ) {

+	//m_waitingForMerge = false;
+
 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
 	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;

@ -2017,6 +2047,8 @@ void gotRemoteListWrapper( void *state ) { // , RdbList *list ) {
 	if ( ! THIS->gotRemoteList() ) return;
 	// sanity check
 	if ( THIS->m_calledCallback ) { char *xx=NULL;*xx=0; }
+	// we are no longer waiting for the list
+	THIS->m_waitingForList = false;
 	// set it now
 	THIS->m_calledCallback = 4;
 	// if it doesn't block call the callback, g_errno may be set
--- a/Msg5.h
+++ b/Msg5.h
@ -293,6 +293,8 @@ class Msg5 {
 	bool  m_mergeLists;

 	char m_waitingForList;
+	//char m_waitingForMerge;
+	collnum_t m_collnum;
 	
 	// actually part of a different algo than m_waitingForList!
 	unsigned long long m_waitingKey;
--- a/Msge0.cpp
+++ b/Msge0.cpp
@ -118,8 +118,14 @@ bool Msge0::launchRequests ( long starti ) {
 loop:
 	// stop if no more urls. return true if we got all replies! no block.
 	if ( m_n >= m_numUrls ) return (m_numRequests == m_numReplies);
+	// if all hosts are getting a diffbot reply with 50 spiders and they
+	// all timeout at the same time we can very easily clog up the
+	// udp sockets, so use this to limit... i've seen the whole
+	// spider tables stuck with "getting outlink tag rec vector"statuses
+	long maxOut = MAX_OUTSTANDING_MSGE0;
+	if ( g_udpServer.m_numUsedSlots > 500 ) maxOut = 1;
 	// if we are maxed out, we basically blocked!
-	if (m_numRequests - m_numReplies >= MAX_OUTSTANDING_MSGE0)return false;
+	if (m_numRequests - m_numReplies >= maxOut ) return false;
 	// . skip if "old"
 	// . we are not planning on adding this to spiderdb, so Msg16
 	//   want to skip the ip lookup, etc.
@ -145,7 +151,8 @@ bool Msge0::launchRequests ( long starti ) {
 	// . grab a slot
 	// . m_msg8as[i], m_msgCs[i], m_msg50s[i], m_msg20s[i]
 	long i;
-	for ( i = starti ; i < MAX_OUTSTANDING_MSGE0 ; i++ )
+	// make this 0 since "maxOut" now changes!!
+	for ( i = 0 /*starti*/ ; i < MAX_OUTSTANDING_MSGE0 ; i++ )
 		if ( ! m_used[i] ) break;
 	// sanity check
 	if ( i >= MAX_OUTSTANDING_MSGE0 ) { char *xx = NULL; *xx = 0; }
--- a/Multicast.cpp
+++ b/Multicast.cpp
@ -443,6 +443,8 @@ void Multicast::gotReply2 ( UdpSlot *slot ) {
 		long now = getTime();
 		if (now - s_elastTime > 10) {s_elastTime = now; logIt=true;}
 	}
+	// don't log ETRYAGAIN, may come across as bad when it is normal
+	if ( m_errnos[i] == ETRYAGAIN ) logIt = false;
 	// log a failure msg
 	if ( logIt ) { // m_errnos[i] != ETRYAGAIN ) {
 		Host *h = m_hostdb->getHost ( slot->m_ip ,slot->m_port );
--- a/PageAddColl.cpp
+++ b/PageAddColl.cpp
@ -31,7 +31,7 @@ bool sendPageAddDelColl ( TcpSocket *s , HttpRequest *r , bool add ) {
 	char *msg = NULL;

 	// if any host in network is dead, do not do this
-	if ( g_hostdb.hasDeadHost() ) msg = "A host in the network is dead.";
+	//if ( g_hostdb.hasDeadHost() ) msg = "A host in the network is dead.";

 	// . are we adding a collection?
 	// . return if error adding, might already exist!
@ -85,15 +85,18 @@ bool sendPageAddDelColl ( TcpSocket *s , HttpRequest *r , bool add ) {
 	// print the add collection box
 	if ( add /*&& (! nc[0] || g_errno ) */ ) {
 		p.safePrintf (
-			  "<center>\n<table border=1 cellpadding=4 "
-			  "width=100%% bgcolor=#%s>\n"
-			   "<tr><td colspan=2 bgcolor=#%s>"
+			  "<center>\n<table %s>\n"
+			   "<tr class=hdrow><td colspan=2>"
 			  "<center><b>Add Collection</b></center>"
-			  "</td></tr>\n",LIGHT_BLUE,DARK_BLUE);
+			  "</td></tr>\n",
+			  TABLE_STYLE);
 		p.safePrintf (
-			  "<tr><td><b>name of new collection to add</td>\n"
+			  "<tr bgcolor=#%s>"
+			  "<td><b>name of new collection to add</td>\n"
 			  "<td><input type=text name=addColl size=30>"
-			  "</td></tr>\n");
+			  "</td></tr>\n"
+			  , LIGHT_BLUE
+			      );
 		// now list collections from which to copy the config
 		//p.safePrintf (
 		//	  "<tr><td><b>copy configuration from this "
@ -118,27 +121,31 @@ bool sendPageAddDelColl ( TcpSocket *s , HttpRequest *r , bool add ) {
 	// print all collections out in a checklist so you can check the
 	// ones you want to delete, the values will be the id of that collectn
 	p.safePrintf (
-		  "<center>\n<table border=1 cellpadding=4 "
-		  "width=100%% bgcolor=#%s>\n"
-		  "<tr><td bgcolor=#%s><center><b>Delete Collections"
+		  "<center>\n<table %s>\n"
+		  "<tr class=hdrow><td><center><b>Delete Collections"
 		  "</b></center></td></tr>\n"
-		  "<tr><td>"
+		  "<tr bgcolor=#%s><td>"
 		  "<center><b>Select the collections you wish to delete. "
 		  //"<font color=red>This feature is currently under "
 		  //"development.</font>"
 		  "</b></center></td></tr>\n"
-		  "<tr><td>"
+		  "<tr bgcolor=#%s><td>"
 		  // table within a table
 		  "<center><table width=20%%>\n",
-		  LIGHT_BLUE,DARK_BLUE);
+		  TABLE_STYLE,
+		  LIGHT_BLUE,
+		  DARK_BLUE
+		      );

 	for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
 		CollectionRec *cr = g_collectiondb.m_recs[i];
 		if ( ! cr ) continue;
 		p.safePrintf (
-			  "<tr><td>"
-			  "<input type=checkbox name=delete value=\"%s\"> "
-			  "%s</td></tr>\n",cr->m_coll,cr->m_coll);
+			  "<tr bgcolor=#%s><td>"
+			  "<input type=checkbox name=delColl value=\"%s\"> "
+			  "%s</td></tr>\n",
+			  DARK_BLUE,
+			  cr->m_coll,cr->m_coll);
 	}
 	p.safePrintf( "</table></center></td></tr></table><br>\n" );
 skip:
--- a/PageAddUrl.cpp
+++ b/PageAddUrl.cpp
@ -89,7 +89,7 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 		collLen = gbstrlen(coll);
 	}
 	// get collection rec
-	CollectionRec *cr = g_collectiondb.getRec ( coll );
+	CollectionRec *cr = g_collectiondb.getRec ( r ); // coll );
 	// bitch if no collection rec found
 	if ( ! cr ) {
 		g_errno = ENOCOLLREC;
@ -248,8 +248,6 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 	//

 	SpiderRequest *sreq = &st1->m_sreq;
-
-
 	// set the SpiderRequest from this add url
 	if ( ! sreq->setFromAddUrl ( st1->m_url ) ) {
 		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
--- a/PageCatdb.cpp
+++ b/PageCatdb.cpp
@ -149,30 +149,54 @@ bool sendReply ( void *state ) {
 	// . do not print big links if only an assassin, just print host ids
 	g_pages.printAdminTop ( &sb, st->m_socket , &st->m_r );

-	sb.safePrintf ( "<table width=100%% bgcolor=#%s border=1 cellpadding=4>"
-			"<tr><td bgcolor=#%s colspan=2>"
+	sb.safePrintf(
+		      "<style>"
+		      ".poo { background-color:#%s;}\n"
+		      "</style>\n" ,
+		      LIGHT_BLUE );
+
+
+	sb.safePrintf ( "<table %s>"
+			"<tr><td colspan=2>"
 			"<center><font size=+1><b>Catdb</b></font></center>"
-			"</td></tr>", LIGHT_BLUE , DARK_BLUE );
+			"</td></tr>", TABLE_STYLE );
+
+	// instructions
+	sb.safePrintf("<tr bgcolor=#%s>"
+		      "<td colspan=3>"
+		      "<font size=-2>"
+		      "<center>"
+		      "Don't just start using this, you need to follow the "
+		      "instructions in the <i>admin guide</i> for adding "
+		      "DMOZ support."
+		      "</center>"
+		      "</font>"
+		      "</td>"
+		      "</tr>"
+		      ,DARK_BLUE
+		      );

 	// print the generate Catdb link
-	sb.safePrintf ( "<tr><td>Update Catdb from DMOZ data.</td>"
+	sb.safePrintf ( "<tr class=poo><td>Update Catdb from DMOZ data.</td>"
 			"<td><center>"
 			"<a href=\"/master/catdb?c=%s&gencatdb=2\">"
 			"Update Catdb</a> "
 			"</center></td></tr>",
 			st->m_coll );
-	sb.safePrintf ( "<tr><td>Generate New Catdb from DMOZ data.</td>"
+	sb.safePrintf ( "<tr class=poo>"
+			"<td>Generate New Catdb from DMOZ data.</td>"
 			"<td><center>"
 			"<a href=\"/master/catdb?c=%s&gencatdb=1\">"
 			"Generate Catdb</a> "
 			"</center></td></tr>",
 			st->m_coll );
 	if (st->m_genCatdb)
-		sb.safePrintf ( "<tr><td> Catdb Generation took %lli ms."
+		sb.safePrintf ( "<tr class=poo>"
+				"<td> Catdb Generation took %lli ms."
 				"</td></tr>",
 				endTime - st->m_startTime );
 	// print Url Catgory Lookup
-	sb.safePrintf ( "<tr><td>Lookup Category of Url.</td>"
+	sb.safePrintf ( "<tr class=poo><td>Lookup Category of Url.</td>"
 			"<td><input type=text name=caturl size=80"
 			" value=\"");
 	if (st->m_catLookup) {
--- a/PageCrawlBot.cpp
+++ b/PageCrawlBot.cpp
@ -160,6 +160,10 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {
 		rdbId = RDB_SPIDERDB;
 		fmt = FMT_CSV;
 	}
+	else if ( ( xx = strstr ( path , "_urls.txt" ) ) ) {
+		rdbId = RDB_SPIDERDB;
+		fmt = FMT_TXT;
+	}
 	else if ( ( xx = strstr ( path , "_pages.txt" ) ) ) {
 		rdbId = RDB_TITLEDB;
 		fmt = FMT_TXT;
@ -204,6 +208,10 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {
 		SafeBuf sb2(tmp2,5000);
 		sb2.safePrintf("GET /search.csv?icc=1&format=csv&sc=0&dr=0&"
 			      "c=%s&n=1000000&"
+			       // no gigabits
+			       "dsrt=0&"
+			       // do not compute summary. 0 lines.
+			       "ns=0&"
 			      "q=gbsortby%%3Agbspiderdate&"
 			      "prepend=type%%3Ajson"
 			      "\r\n\r\n"
@ -231,6 +239,7 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {
 	       return g_httpServer.sendErrorReply(sock,500,mstrerror(g_errno));
 	}
 	mnew ( st , sizeof(StateCD), "statecd");
+
 	// initialize the new state
 	st->m_rdbId = rdbId;
 	st->m_downloadJSON = downloadJSON;
@ -266,13 +275,60 @@ bool sendBackDump ( TcpSocket *sock, HttpRequest *hr ) {
 	return true;
 }

+
+// . all wrappers call this
+// . returns false if would block, true otherwise
+bool readAndSendLoop ( StateCD *st , bool readFirst ) {
+
+ subloop:
+
+	// if we had a broken pipe on the sendChunk() call then hopefully
+	// this will kick in...
+	if ( g_errno ) {
+		log("crawlbot: readAndSendLoop: %s",mstrerror(g_errno));
+		readFirst = true;
+		st->m_someoneNeedsMore = false;
+	}
+
+	// wait if some are outstanding. how can this happen?
+	if ( st->m_numRequests > st->m_numReplies ) {
+		log("crawlbot: only got %li of %li replies. waiting for "
+		    "all to come back in.",
+		    st->m_numReplies,st->m_numRequests);
+		return false;
+	}
+
+	// are we all done?
+	if ( readFirst && ! st->m_someoneNeedsMore ) {
+		log("crawlbot: done sending for download request");
+		mdelete ( st , sizeof(StateCD) , "stcd" );
+		delete st;
+		return true;
+	}
+
+	// begin reading from each shard and sending the spiderdb records
+	// over the network. return if that blocked
+	if ( readFirst && ! st->readDataFromRdb ( ) ) return false;
+
+	// send it to the browser socket. returns false if blocks.
+	if ( ! st->sendList() ) return false;
+
+	// read again i guess
+	readFirst = true;
+
+	// hey, it did not block... tcpserver caches writes...
+	goto subloop;
+}
+
 void StateCD::sendBackDump2 ( ) {

 	m_numRequests = 0;
 	m_numReplies  = 0;

 	// read 10MB from each shard's spiderdb at a time
-	m_minRecSizes = 9999999;
+	//m_minRecSizes = 9999999;
+	// 1ook to be more fluid
+	m_minRecSizes = 99999;

 	// we stop reading from all shards when this becomes false
 	m_someoneNeedsMore = true;
@ -284,20 +340,22 @@ void StateCD::sendBackDump2 ( ) {
 		KEYMIN((char *)&m_titledbStartKeys[i],sizeof(key_t));
 	}

- subloop:
-	// begin reading from each shard and sending the spiderdb records
-	// over the network. return if that blocked
-	if ( ! readDataFromRdb ( ) ) return;
-	// send it to the browser socket
-	if ( ! sendList() ) return;
-	// . hey, it did not block... i guess no data to send out
-	// . but if all shards are exhausted from the dump, just return
-	if ( m_someoneNeedsMore ) goto subloop;
-	// note it
-	log("crawlbot: nobody needs more 1");
+	// begin reading from the shards and trasmitting back on m_socket
+	readAndSendLoop ( this , true );
 }

-void sendListWrapper ( void *state ) ;
+
+static void gotListWrapper7 ( void *state ) {
+	// get the Crawler dump State
+	StateCD *st = (StateCD *)state;
+	// inc it up here
+	st->m_numReplies++;
+	// wait for all
+	if ( st->m_numReplies < st->m_numRequests ) return;
+	// read and send loop
+	readAndSendLoop( st , false );
+}
+	

 bool StateCD::readDataFromRdb ( ) {

@ -341,7 +399,7 @@ bool StateCD::readDataFromRdb ( ) {
 					   // records
 					   m_minRecSizes,
 					   this,
-					   sendListWrapper ,
+					    gotListWrapper7 ,
 					    niceness ) ) {
 			log("crawlbot: blocked getting list from shard");
 			// continue if it blocked
@ -360,22 +418,6 @@ bool StateCD::readDataFromRdb ( ) {
 	return true;
 }

-void sendListWrapper ( void *state ) {
-	// get the Crawler dump State
-	StateCD *st = (StateCD *)state;
-	// inc it up here
-	st->m_numReplies++;
- subloop:
-	// if this blocked sending back some data, return
-	if ( ! st->sendList() ) return;
-	// otherwise, read more, maybe had no data to send from list
-	if ( ! st->readDataFromRdb () ) return;
-	// send and read more
-	if ( st->m_someoneNeedsMore ) goto subloop;
-	// note it
-	log("crawlbot: nobody needs more 2");
-}
-	
 bool StateCD::sendList ( ) {
 	// get the Crawler dump State
 	// inc it
@ -403,6 +445,7 @@ bool StateCD::sendList ( ) {
 	//   then do so here, the content-length will not be in there
 	//   because we might have to call for more spiderdb data
 	if ( m_needsMime ) {
+		m_needsMime = false;
 		HttpMime mime;
 		mime.makeMime ( -1, // totel content-lenght is unknown!
 				0 , // do not cache (cacheTime)
@ -496,6 +539,13 @@ bool StateCD::sendList ( ) {
 		list->freeList();
 	}

+	//log("rdbid=%li fmt=%li some=%li printed=%li",
+	//    (long)m_rdbId,(long)m_fmt,(long)m_someoneNeedsMore,
+	//    (long)m_printedEndingBracket);
+
+	bool lastChunk = false;
+	if ( ! m_someoneNeedsMore )
+		lastChunk = true;

 	// if nobody needs to read more...
 	if ( m_rdbId == RDB_TITLEDB && 
@ -504,113 +554,31 @@ bool StateCD::sendList ( ) {
 	     ! m_printedEndingBracket ) {
 		m_printedEndingBracket = true;
 		// end array of json objects. might be empty!
-		sb.safePrintf("\n]");
+		sb.safePrintf("\n]\n");
+		//log("adding ]. len=%li",sb.length());
 	}

-	// if first time, send it back
-	if ( m_needsMime ) {
-		// only do once
-		m_needsMime = false;
+	TcpServer *tcp = &g_httpServer.m_tcp;

-	sendLoop:
-		// start the send process
-		TcpServer *tcp = &g_httpServer.m_tcp;
-		if (  ! tcp->sendMsg ( m_socket ,
-				       sb.getBufStart(), // sendBuf     ,
-				       sb.getCapacity(),//sendBufSize ,
-				       sb.length(),//sendBufSize ,
-				       sb.length(), // msgtotalsize
-				       this       ,   // data for callback
-				       doneSendingWrapper  ) ) { // callback
-			// do not free sendbuf we are transmitting it
-			sb.detachBuf();
-			return false;
-		}
-		// error?
-		//TcpSocket *s = m_socket;
-		// sometimes it does not block and is successful because
-		// it just writes its buffer out in one write call.
-		//if ( ! g_errno ) 
-		sb.detachBuf();
+	// . transmit the chunk in sb
+	// . steals the allocated buffer from sb and stores in the 
+	//   TcpSocket::m_sendBuf, which it frees when socket is
+	//   ultimately destroyed or we call sendChunk() again.
+	// . when TcpServer is done transmitting, it does not close the
+	//   socket but rather calls doneSendingWrapper() which can call
+	//   this function again to send another chunk
+	// . when we are truly done sending all the data, then we set lastChunk
+	//   to true and TcpServer.cpp will destroy m_socket when done
+	if ( ! tcp->sendChunk ( m_socket , 
+				&sb  ,
+				this ,
+				doneSendingWrapper ,
+				lastChunk ) )
+		return false;

-		// log it
-		//log("crawlbot: nuking state. strange");
-
-		// nuke state
-		//delete this;
-		//mdelete ( this , sizeof(StateCD) , "stcd" );
-		//if ( g_errno )
-		log("diffbot: tcp sendmsg did not block: %s",
-		    mstrerror(g_errno));
-		//g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
-		// wait for doneSendingWrapper to be called.
-		//return false;
-		//
-		// it did not block... so just keep going. that just
-		// means the socket sent the data. it's probably buffered.
-		//
-		// but we DO have to free the sendbuffer here since
-		// we did not block
-		mfree ( m_socket->m_sendBuf ,
-			m_socket->m_sendBufSize ,
-			"dbsbuf");
-		m_socket->m_sendBuf = NULL;
-
-		return true;
-	}
-
-
-	// if nothing to send back we are done. return true since we
-	// did not block sending back.
-	if ( sb.length() == 0 ) {
-		//log("crawlbot: nuking state.");		
-		//delete this;
-		//mdelete ( this , sizeof(StateCD) , "stcd" );
-		return true;
-	}
-
-	// how can this be?
-	if ( m_socket->m_sendBuf ) { char *xx=NULL;*xx=0; }
-
-	// put socket in sending-again mode
-	m_socket->m_sendBuf     = sb.getBufStart();
-	m_socket->m_sendBufSize = sb.getCapacity();
-	m_socket->m_sendBufUsed = sb.length();
-	m_socket->m_sendOffset  = 0;
-	m_socket->m_totalSent   = 0;
-	m_socket->m_totalToSend = sb.length();
-
-	// tell TcpServer.cpp to send this latest buffer! HACK!
-	//m_socket->m_sockState = ST_SEND_AGAIN;//ST_WRITING;//SEND_AGAIN;
-
-	// this does nothing if we were not called indirectly by
-	// TcpServer::writeSocketWrapper_r(). so if we should call
-	// sendMsg() ourselves in such a situation.
-	// so if the sendMsg() did not block, the first time, and we came
-	// here empty except for the ending ']' the 2nd time, then
-	// write it out this way... calling sendMsg() directly
-	if ( m_socket->m_sockState == ST_NEEDS_CLOSE ) {
-		//m_socket->m_sockState = ST_SEND_AGAIN;
-		goto sendLoop;
-	}
-
-	// do not let safebuf free this, we will take care of it
-	sb.detachBuf();
-
-	// . when it is done sending call this callback, don't hang up!
-	// . if m_someoneNeedsMore is false then this callback should just
-	//   destroy the socket and delete "this"
-	m_socket->m_callback = doneSendingWrapper;
-	m_socket->m_state    = this;
-
-	//if ( m_socket->m_sendBufUsed == 79 )
-	//	log("hey");
-	
-	// log it
-	log("crawlbot: resending %li bytes on socket",m_socket->m_sendBufUsed);
-
-	// we blocked sending back
-	return false;
+	// we are done sending this chunk, i guess tcp write was cached
+	// in the network card buffer or something
+	return true;
 }

 // TcpServer.cpp calls this when done sending TcpSocket's m_sendBuf
@ -618,83 +586,16 @@ void doneSendingWrapper ( void *state , TcpSocket *sock ) {

 	StateCD *st = (StateCD *)state;

-	TcpSocket *socket = st->m_socket;
+	//TcpSocket *socket = st->m_socket;

 	log("crawlbot: done sending on socket %li/%li bytes",
 	    sock->m_totalSent,
 	    sock->m_sendBufUsed);

-	// . if the final callback
-	// . sometimes m_sendBuf is NULL if we freed it below and tried to
-	//   read more, only to read 0 bytes
-	// . but it will be non-null if we read 0 bytes the first time
-	//   and just have a mime to send. because sendReply() above 
-	//   returned true, and then doneSendingWrapper() got called.
-	if ( //! socket->m_sendBuf &&
-	     st->m_numRequests <= st->m_numReplies &&
-	     ! st->m_someoneNeedsMore ) {
-		log("crawlbot: done sending for download request");
-		delete st;
-		mdelete ( st , sizeof(StateCD) , "stcd" );
-		//log("mdel1: st=%lx",(long)st);
-		return;
-	}

-	// if the timer called us, just return
-	if ( ! socket->m_sendBuf ) {
-		log("crawlbot: timer callback");
-		socket->m_sockState = ST_SEND_AGAIN;
-		return;
-	}
+	readAndSendLoop ( st , true );

-
-	// free the old sendbuf then i guess since we might replace it
-	// in the above function.
-	mfree ( socket->m_sendBuf ,
-		socket->m_sendBufSize ,
-		"dbsbuf");
-
-	// in case we have nothing to send back do not let socket free
-	// what we just freed above. it'll core.
-	socket->m_sendBuf = NULL;
-
-	// sometimes this wrapper is called just from the timer...
-	// so if we have outstanding msg0s then we gotta wait
-	if ( st->m_numRequests > st->m_numReplies ) {
-		char *xx=NULL;*xx=0;
-		socket->m_sockState = ST_SEND_AGAIN;
-		return;
-	}
-
-
-	// all done?
-	if ( st->m_someoneNeedsMore ) {
-		// make sure socket doesn't close up on us!
-		socket->m_sockState = ST_SEND_AGAIN;
-		log("crawlbot: reading more download data");
-		// just enter the little loop here
-	subloop:
-		// otherwise, read more, maybe had no data to send from list
-		if ( ! st->readDataFromRdb () ) return;
-		// if this blocked sending back some data, return
-		if ( ! st->sendList() ) return;
-		// note that
-		log("crawlbot: sendList did not block");
-		// send and read more
-		if ( st->m_someoneNeedsMore ) goto subloop;
-		// note it
-		log("crawlbot: nobody needs more 3");
-		// sanity
-		if ( st->m_numRequests>st->m_numReplies){char *xx=NULL;*xx=0;}
-	}
-
-
-	log("crawlbot: no more data available");
-
-	// it's possible that readDataFromRdb() did not block and called
-	// sendList which set the socket m_sendBuf again... so check
-	// for that... it needs to be sent yet before we delete this state
-	//if ( st->m_socket->m_sendBuf ) return;
+	return;
 }

 void StateCD::printSpiderdbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){
@ -804,7 +705,9 @@ void StateCD::printSpiderdbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){
 					nowGlobalMS,
 					false,
 					MAX_NICENESS,
-					cr);
+					cr,
+					false, // isoutlink?
+					NULL);
 		char *expression = NULL;
 		long  priority = -4;
 		// sanity check
@ -821,7 +724,9 @@ void StateCD::printSpiderdbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){
 		// when spidering rounds we use the 
 		// lastspidertime>={roundstart} --> spiders disabled rule
 		// so that we do not spider a url twice in the same round
-		if ( ufn >= 0 && ! cr->m_spidersEnabled[ufn] ) {
+		if ( ufn >= 0 && //! cr->m_spidersEnabled[ufn] ) {
+		     // we set this to 0 instead of using the checkbox
+		     cr->m_maxSpidersPerRule[ufn] <= 0 ) {
 			priority = -5;
 		}

@ -837,7 +742,10 @@ void StateCD::printSpiderdbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){
 			m_isFirstTime = false;
 			sb->safePrintf("\"Url\","
 				       "\"Entry Method\","
-				       "\"Processed?\","
+				       );
+			if ( cr->m_isCustomCrawl )
+				sb->safePrintf("\"Processed?\",");
+			sb->safePrintf(
 				       "\"Add Time\","
 				       "\"Last Crawled\","
 				       "\"Last Status\","
@ -869,12 +777,15 @@ void StateCD::printSpiderdbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){
 		// but default to csv
 		else {
 			sb->safePrintf("\"%s\",\"%s\","
-				       "%li,%lu,%lu,\"%s\",\"%s\",\""
-				       //",%s"
-				       //"\n"
 				       , sreq->m_url
 				       , as
-				       , (long)isProcessed
+				       );
+			if ( cr->m_isCustomCrawl )
+				sb->safePrintf("%li,",(long)isProcessed);
+			sb->safePrintf(
+				       "%lu,%lu,\"%s\",\"%s\",\""
+				       //",%s"
+				       //"\n"
 				       // when was it first added to spiderdb?
 				       , sreq->m_addedTime
 				       // last time spidered, 0 if none
@ -991,8 +902,11 @@ void StateCD::printTitledbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){

 		m_printedItem = true;

-		if ( ! sb->safeStrcpyPrettyJSON ( json ) ) 
-			log("diffbot: error printing json in dump");
+		//if ( ! sb->safeStrcpyPrettyJSON ( json ) ) 
+		//	log("diffbot: error printing json in dump");
+		sb->safeStrcpy ( json );
+
+		sb->nullTerm();

 		// separate each JSON object with \n i guess
 		//sb->pushChar('\n');
@ -1132,8 +1046,8 @@ void printCrawlStatsWrapper ( void *state ) {
 	// save before nuking state
 	TcpSocket *sock = sxx->m_socket;
 	// nuke the state
-	delete sxx;
 	mdelete ( sxx , sizeof(StateXX) , "stxx" );
+	delete sxx;
 	// and send back now
 	g_httpServer.sendDynamicPage ( sock ,
 				       sb.getBufStart(), 
@ -1383,8 +1297,8 @@ void addedUrlsToSpiderdbWrapper ( void *state ) {
 			     NULL ,
 			     &rr ,
 			     st->m_collnum );
-	delete st;
 	mdelete ( st , sizeof(StateCD) , "stcd" );
+	delete st;
 	//log("mdel2: st=%lx",(long)st);
 }
 /*
@ -1460,8 +1374,8 @@ void injectedUrlWrapper ( void *state ) {
 			     response,
 			     NULL ,
 			     st->m_collnum );
-	delete st;
 	mdelete ( st , sizeof(StateCD) , "stcd" );
+	delete st;
 }
 */	

@ -1587,8 +1501,8 @@ void collOpDoneWrapper ( void *state ) {
 	StateCD *st = (StateCD *)state;
 	TcpSocket *socket = st->m_socket;
 	log("crawlbot: done with blocked op.");
-	delete st;
 	mdelete ( st , sizeof(StateCD) , "stcd" );
+	delete st;
 	//log("mdel3: st=%lx",(long)st);
 	g_httpServer.sendDynamicPage (socket,"OK",2);
 }
@ -1648,6 +1562,29 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 	// . put in xml or json if format=xml or format=json or
 	//   xml=1 or json=1 ...
 	char fmt = FMT_JSON;
+
+	// token is always required. get from json or html form input
+	//char *token = getInputString ( "token" );
+	char *token = hr->getString("token");
+	char *name = hr->getString("name");
+
+	// . try getting token-name from ?c= 
+	// . the name of the collection is encoded as <token>-<crawlname>
+	char *c = hr->getString("c");
+	char tmp[MAX_COLL_LEN+100];
+	if ( ! token && c ) {
+		strncpy ( tmp , c , MAX_COLL_LEN );
+		token = tmp;
+		name = strstr(tmp,"-");
+		if ( name ) {
+			*name = '\0';
+			name++;
+		}
+		// change default formatting to html
+		fmt = FMT_HTML;
+	}
+
+
 	char *fs = hr->getString("format",NULL,NULL);
 	// give john a json api
 	if ( fs && strcmp(fs,"html") == 0 ) fmt = FMT_HTML;
@ -1656,9 +1593,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 	// if we got json as input, give it as output
 	//if ( JS.getFirstItem() ) fmt = FMT_JSON;

-	// token is always required. get from json or html form input
-	//char *token = getInputString ( "token" );
-	char *token = hr->getString("token");
+

 	if ( ! token && fmt == FMT_JSON ) { // (cast==0|| fmt == FMT_JSON ) ) {
 		char *msg = "invalid token";
@ -1718,8 +1653,6 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 	bool restartColl = hr->hasField("restart");


-	char *name = hr->getString("name");
-
 	//if ( delColl && !  && cast == 0 ) {
 	//	log("crawlbot: no collection found to delete.");
 	//	char *msg = "Could not find crawl to delete.";
@ -1906,8 +1839,8 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 			em.safePrintf("Invalid regular expresion: %s",rx2);
 		}
 		if ( status1 || status2 ) {
-			delete st;
 			mdelete ( st , sizeof(StateCD) , "stcd" );
+			delete st;
 			char *msg = em.getBufStart();
 			return sendErrorReply2(socket,fmt,msg);
 		}
@ -1965,8 +1898,8 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 			if ( resetColl ) msg = "No such collection";
 			if ( restartColl ) msg = "No such collection";
 			// nuke it
-			delete st;
 			mdelete ( st , sizeof(StateCD) , "stcd" );
+			delete st;
 			// log it
 			log("crawlbot: cr is null. %s",msg);
 			// make sure this returns in json if required
@ -1992,8 +1925,8 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 			if ( ! g_collectiondb.deleteRec ( collName , we ) )
 				return false;
 			// nuke it
-			delete st;
 			mdelete ( st , sizeof(StateCD) , "stcd" );
+			delete st;
 			// all done
 			return g_httpServer.sendDynamicPage (socket,"OK",2);
 		}
@ -2017,14 +1950,14 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 			// to avoid user confusion
 			if ( cr ) cr->m_spideringEnabled = 1;
 			// nuke it
-			delete st;
 			mdelete ( st , sizeof(StateCD) , "stcd" );
+			delete st;
 			// all done
 			return g_httpServer.sendDynamicPage (socket,"OK",2);
 		}
 		// nuke it
-		delete st;
 		mdelete ( st , sizeof(StateCD) , "stcd" );
+		delete st;
 		// this will set the the collection parms from json
 		//setSpiderParmsFromJSONPost ( socket , hr , cr , &JS );
 		// this is a cast, so just return simple response
@ -2050,8 +1983,8 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 		if ( name && name[0] )
 			msg = "Failed to add crawl. Crawl name is illegal.";
 		// nuke it
-		delete st;
 		mdelete ( st , sizeof(StateCD) , "stcd" );
+		delete st;
 		//log("crawlbot: no collection found. need to add a crawl");
 		return sendErrorReply2(socket,fmt, msg);
 	}
@ -2101,15 +2034,15 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 		// error?
 		if ( ! status ) {
 			// nuke it
-			delete st;
 			mdelete ( st , sizeof(StateCD) , "stcd" );
+			delete st;
 			return sendErrorReply2(socket,fmt,mstrerror(g_errno));
 		}
 		// if not list
 		if ( ! size ) {
 			// nuke it
-			delete st;
 			mdelete ( st , sizeof(StateCD) , "stcd" );
+			delete st;
 			return sendErrorReply2(socket,fmt,"no urls found");
 		}
 		// add to spiderdb
@ -2163,8 +2096,8 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
 	printCrawlBotPage2 ( socket,hr,fmt,NULL,NULL,cr->m_collnum);

 	// get rid of that state
-	delete st;
 	mdelete ( st , sizeof(StateCD) , "stcd" );
+	delete st;
 	//log("mdel4: st=%lx",(long)st);
 	return true;
 }
@ -2281,8 +2214,11 @@ bool printCrawlDetailsInJson ( SafeBuf &sb , CollectionRec *cx ) {
 		      //"\"urlsExamined\":%lli,\n"
 		      "\"pageCrawlAttempts\":%lli,\n"
 		      "\"pageCrawlSuccesses\":%lli,\n"
+		      "\"pageCrawlSuccessesThisRound\":%lli,\n"
+
 		      "\"pageProcessAttempts\":%lli,\n"
 		      "\"pageProcessSuccesses\":%lli,\n"
+		      "\"pageProcessSuccessesThisRound\":%lli,\n"

 		      "\"maxRounds\":%li,\n"
 		      "\"repeat\":%f,\n"
@ -2303,8 +2239,11 @@ bool printCrawlDetailsInJson ( SafeBuf &sb , CollectionRec *cx ) {
 		      //,cx->m_globalCrawlInfo.m_urlsConsidered
 		      , cx->m_globalCrawlInfo.m_pageDownloadAttempts
 		      , cx->m_globalCrawlInfo.m_pageDownloadSuccesses
+		      , cx->m_globalCrawlInfo.m_pageDownloadSuccessesThisRound
+
 		      , cx->m_globalCrawlInfo.m_pageProcessAttempts
 		      , cx->m_globalCrawlInfo.m_pageProcessSuccesses
+		      , cx->m_globalCrawlInfo.m_pageProcessSuccessesThisRound

 		      , (long)cx->m_maxCrawlRounds
 		      , cx->m_collectiveRespiderFrequency
@ -2619,8 +2558,12 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      "<td><b>URLs Examined</b></td>"
 			      "<td><b>Page Download Attempts</b></td>"
 			      "<td><b>Page Download Successes</b></td>"
+			      "<td><b>Page Download Successes This Round"
+			      "</b></td>"
 			      "<td><b>Page Process Attempts</b></td>"
 			      "<td><b>Page Process Successes</b></td>"
+			      "<td><b>Page Process Successes This Round"
+			      "</b></td>"
 			      "</tr>"
 			      );
 	}
@ -2667,6 +2610,8 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      "<td>%lli</td>"
 			      "<td>%lli</td>"
 			      "<td>%lli</td>"
+			      "<td>%lli</td>"
+			      "<td>%lli</td>"
 			      "</tr>"
 			      , cx->m_coll
 			      , cx->m_globalCrawlInfo.m_objectsAdded -
@ -2675,8 +2620,10 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      //, cx->m_globalCrawlInfo.m_urlsConsidered
 			      , cx->m_globalCrawlInfo.m_pageDownloadAttempts
 			      , cx->m_globalCrawlInfo.m_pageDownloadSuccesses
+			      , cx->m_globalCrawlInfo.m_pageDownloadSuccessesThisRound
 			      , cx->m_globalCrawlInfo.m_pageProcessAttempts
 			      , cx->m_globalCrawlInfo.m_pageProcessSuccesses
+			      , cx->m_globalCrawlInfo.m_pageProcessSuccessesThisRound
 			      );
 	}
 	if ( summary && fmt == FMT_HTML ) {
@ -2732,6 +2679,8 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			return false;
 		// shortcut
 		XmlDoc **docs = g_spiderLoop.m_docs;
+		// row count
+		long j = 0;
 		// first print the spider recs we are spidering
 		for ( long i = 0 ; i < (long)MAX_SPIDERS ; i++ ) {
 			// get it
@ -2739,17 +2688,18 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			// skip if empty
 			if ( ! xd ) continue;
 			// sanity check
-			if ( ! xd->m_oldsrValid ) { char *xx=NULL;*xx=0; }
+			if ( ! xd->m_sreqValid ) { char *xx=NULL;*xx=0; }
 			// skip if not our coll rec!
 			//if ( xd->m_cr != cr ) continue;
 			if ( xd->m_collnum != cr->m_collnum ) continue;
 			// grab it
-			SpiderRequest *oldsr = &xd->m_oldsr;
+			SpiderRequest *oldsr = &xd->m_sreq;
 			// get status
 			char *status = xd->m_statusMsg;
 			// show that
-			if ( ! oldsr->printToTableSimple ( &sb , status,xd) ) 
+			if ( ! oldsr->printToTableSimple ( &sb , status,xd,j)) 
 				return false;
+			j++;
 		}

 		// end the table
@ -2888,6 +2838,9 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 	//
 	if ( fmt == FMT_HTML ) {

+		char *seedStr = cr->m_diffbotSeeds.getBufStart();
+		if ( ! seedStr ) seedStr = "";
+
 		SafeBuf tmp;
 		long crawlStatus = -1;
 		getSpiderStatusMsg ( cr , &tmp , &crawlStatus );
@ -2927,6 +2880,11 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      "<td>%s</td>"
 			      "</tr>"

+			      "<tr>"
+			      "<td><b>Seeds:</td>"
+			      "<td>%s</td>"
+			      "</tr>"
+
 			      "<tr>"
 			      "<td><b>Crawl Status:</td>"
 			      "<td>%li</td>"
@ -2942,6 +2900,11 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      "<td>%li</td>"
 			      "</tr>"

+			      "<tr>"
+			      "<td><b>Has Urls Ready to Spider:</td>"
+			      "<td>%li</td>"
+			      "</tr>"
+

 			      // this will  have to be in crawlinfo too!
 			      //"<tr>"
@ -2975,6 +2938,11 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      "<td>%lli</td>"
 			      "</tr>"

+			      "<tr>"
+			      "<td><b>Page Crawl Successes This Round</b></td>"
+			      "<td>%lli</td>"
+			      "</tr>"
+
 			      "<tr>"
 			      "<td><b>Page Process Attempts</b></td>"
 			      "<td>%lli</td>"
@ -2985,6 +2953,11 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
 			      "<td>%lli</td>"
 			      "</tr>"

+			      "<tr>"
+			      "<td><b>Page Process Successes This Round</b></td>"
+			      "<td>%lli</td>"
+			      "</tr>"
+
 			      
 			      , cr->m_diffbotCrawlName.getBufStart()
 			      
@ -2992,9 +2965,12 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,

 			      , cr->m_diffbotToken.getBufStart()

+			      , seedStr
+
 			      , crawlStatus
 			      , tmp.getBufStart()
 			      , cr->m_spiderRoundNum
+			      , cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider

 			      , cr->m_globalCrawlInfo.m_objectsAdded -
 			        cr->m_globalCrawlInfo.m_objectsDeleted
@ -3003,9 +2979,11 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,

 			      , cr->m_globalCrawlInfo.m_pageDownloadAttempts
 			      , cr->m_globalCrawlInfo.m_pageDownloadSuccesses
+			      , cr->m_globalCrawlInfo.m_pageDownloadSuccessesThisRound

 			      , cr->m_globalCrawlInfo.m_pageProcessAttempts
 			      , cr->m_globalCrawlInfo.m_pageProcessSuccesses
+			      , cr->m_globalCrawlInfo.m_pageProcessSuccessesThisRound
 			      );


@ -3841,6 +3819,9 @@ bool getSpiderRequestMetaList ( char *doc ,
 		SpiderRequest sreq;
 		sreq.reset();
 		sreq.m_firstIp = url.getHostHash32(); // fakeip!
+		// avoid ips of 0 or -1
+		if ( sreq.m_firstIp == 0 || sreq.m_firstIp == -1 )
+			sreq.m_firstIp = 1;
 		sreq.m_hostHash32 = url.getHostHash32();
 		sreq.m_domHash32  = url.getDomainHash32();
 		sreq.m_siteHash32 = url.getHostHash32();
--- a/PageEvents.cpp
+++ b/PageEvents.cpp
@ -7527,7 +7527,7 @@ bool printTopBarNav ( SafeBuf &sb , State7 *st ) {
 			"</tr>"
 			// - shadow row
 			//"<tr cellspacing=5 height=5px><td colspan=9 "
-			//"bgcolor=%s></td></tr>"
+			//"bgcolor=#%s></td></tr>"
 			// END TOP TABLE
 			"</table>" 
 			//, GRAD2
@ -12671,7 +12671,7 @@ bool gotResults ( void *state ) {
 				">"
 				"<tr>"
 				"<td valign=top>" 
-				// bgcolor=%s
+				// bgcolor=#%s
 				//, GRAD1
 				//, bg
 				);
--- a/PageGet.cpp
+++ b/PageGet.cpp
@ -712,7 +712,7 @@ bool processLoop ( void *state ) {
 		//Words *ww = xd->getWords();
 		if ( ! xml.set ( content , contentLen , false ,
 				 0 , false , TITLEREC_CURRENT_VERSION ,
-				 false , 0 ) ) { // niceness is 0
+				 false , 0 , CT_HTML ) ) { // niceness is 0
 			//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
 			return sendErrorReply ( st , g_errno );
 		}			
--- a/PageHosts.cpp
+++ b/PageHosts.cpp
@ -108,13 +108,13 @@ skipReplaceHost:
 			      refreshRate);

 	// ignore
-	char *username = g_users.getUsername ( r );
-	char *password = NULL;
-	User *user = NULL;
-	if ( username ) user = g_users.getUser (username );
-	if ( user     ) password = user->m_password;
-	if ( ! password ) password = "";
-	if ( ! username ) username = "";
+	//char *username = g_users.getUsername ( r );
+	//char *password = NULL;
+	//User *user = NULL;
+	//if ( username ) user = g_users.getUser (username );
+	//if ( user     ) password = user->m_password;
+	//if ( ! password ) password = "";
+	//if ( ! username ) username = "";

 	// print standard header
 	// 	char *pp    = sb.getBuf();
@ -131,26 +131,26 @@ skipReplaceHost:
 		colspan = "31";
 		//shotcol = "<td><b>ip2</b></td>";
 		sprintf ( shotcol, "<td><a href=\"/master/hosts?c=%s"
-			 	   "&sort=2&username=%s&pwd=%s\">"
+			 	   "&sort=2\">"
 			  "<b>ping2</b></td></a>",
-			  coll,username,password);
+			  coll);
 	}

 	// print host table
 	sb.safePrintf ( 
-		  "<table cellpadding=4 border=1 width=100%% bgcolor=#%s>" 
-		  "<tr><td colspan=%s bgcolor=#%s><center>"
+		  "<table %s>"
+		  "<tr><td colspan=%s><center>"
 		  //"<font size=+1>"
 		  "<b>Hosts "
 		  "(<a href=\"/master/hosts?c=%s&sort=%li&reset=1\">"
 		  "reset)</b>"
 		  //"</font>"
 		  "</td></tr>" 
-		  "<tr>"
-		  "<td><a href=\"/master/hosts?c=%s&sort=0&username=%s&"
-		  "password=%s\">"
+		  "<tr bgcolor=#%s>"
+		  "<td><a href=\"/master/hosts?c=%s&sort=0\">"
+
 		  "<b>hostId</b></td>"
-		  "<td><b>host name</b></td>"
+		  "<td><b>host ip</b></td>"
 		  "<td><b>shard</b></td>" // mirror group
 		  "<td><b>stripe</b></td>"

@ -187,49 +187,49 @@ skipReplaceHost:
 		  //"<td><b>resends sent</td>"
 		  //"<td><b>errors recvd</td>"
 		  //"<td><b>ETRYAGAINS recvd</td>"
-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=3\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=3\">"
 		  "<b>dgrams resent</a></td>"
-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=4\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=4\">"
 		  "<b>errors recvd</a></td>"
-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=5\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=5\">"
 		  "<b>ETRY AGAINS recvd</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=6\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=6\">"
 		  "<b>dgrams to</a></td>"
-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=7\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=7\">"
 		  "<b>dgrams from</a></td>"

-		  //"<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=8\">"
+		  //"<td><a href=\"/master/hosts?c=%s&sort=8\">"
 		  //"<b>loadavg</a></td>"


-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=13\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=13\">"
 		  "<b>avg split time</a></td>"

 		  "<td><b>splits done</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=12\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=12\">"
 		  "<b>status</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=15\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=15\">"
 		  "<b>slow reads</a></td>"

 		  "<td><b>docs indexed</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=9\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=9\">"
 		  "<b>mem used</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=10\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=10\">"
 		  "<b>cpu</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=14\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=14\">"
 		  "<b>max ping1</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=11\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=11\">"
 		  "<b>ping1 age</a></td>"

 		  //"<td><b>ip1</td>"
-		  "<td><a href=\"/master/hosts?c=%s&username=%s&pwd=%s&sort=1\">"
+		  "<td><a href=\"/master/hosts?c=%s&sort=1\">"
 		  "<b>ping1</a></td>"

 		  "%s"// "<td><b>ip2</td>"
@ -237,25 +237,26 @@ skipReplaceHost:
 		  //"<td>avg roundtrip</td>"
 		  //"<td>std. dev.</td></tr>"
 		  "<td><b>note</td>",
-		  LIGHT_BLUE ,
+		  TABLE_STYLE ,
 		  colspan    ,
-		  DARK_BLUE  ,
+
 		  coll, sort,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  //coll,username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
-		  coll, username, password,
+		  DARK_BLUE  ,
+
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
+		  coll,
 		  shotcol    );

 	// loop through each host we know and print it's stats
@ -396,13 +397,14 @@ skipReplaceHost:
 				      "in disagreement with ours.\">H</b></font>");
 		// rebalancing?
 		if ( h->m_flags & PFLAG_REBALANCING )
-			fb.safePrintf("<b title=\"Current rebalancing\">R</b>");
+			fb.safePrintf("<b title=\"Currently "
+				      "rebalancing\">R</b>");
 		// has recs that should be in another shard? indicates
 		// we need to rebalance or there is a bad hosts.conf
 		if ( h->m_flags & PFLAG_FOREIGNRECS )
 			fb.safePrintf("<font color=red><b title=\"Foreign data "
 				      "detected. Needs rebalance.\">F"
-				      "</b></font");
+				      "</b></font>");
 		// if it has spiders going on say "S"
 		if ( h->m_flags & PFLAG_HASSPIDERS )
 			fb.safePrintf ( "<span title=\"Spidering\">S</span>");
@ -423,11 +425,15 @@ skipReplaceHost:
 		if ( fb.length() == 0 )
 			fb.safePrintf("&nbsp;");

+		char *bg = LIGHT_BLUE;
+		if ( h->m_ping >= g_conf.m_deadHostTimeout ) 
+			bg = "ffa6a6";
+
 		// print it
 		sb.safePrintf (
-			  "<tr>"
+			  "<tr bgcolor=#%s>"
 			  "<td><a href=\"http://%s:%hi/master/hosts?"
-			  "username=%s&pwd=%s&"
+			  ""
 			  "c=%s"
 			  "&sort=%li\">%li</a></td>"

@ -496,8 +502,8 @@ skipReplaceHost:
 			  //"<td>%lims</td>"
 			  "<td nowrap=1>%s</td>"
 			  "</tr>" , 
+			  bg,//LIGHT_BLUE ,
 			  ipbuf3, h->m_httpPort, 
-			  username, password,
 			  coll, sort,
 			  i , 
 			  h->m_hostname,
@ -552,15 +558,16 @@ skipReplaceHost:
 	// end the table now
 	sb.safePrintf ( "</table><br>\n" );

+	
 	// print spare hosts table
 	sb.safePrintf ( 
-		  "<table cellpadding=4 border=1 width=100%% bgcolor=#%s>" 
-		  "<tr><td colspan=10 bgcolor=#%s><center>"
+		  "<table %s>"
+		  "<tr class=hdrow><td colspan=10><center>"
 		  //"<font size=+1>"
 		  "<b>Spares</b>"
 		  //"</font>"
 		  "</td></tr>" 
-		  "<tr>"
+		  "<tr bgcolor=#%s>"
 		  "<td><b>spareId</td>"
 		  "<td><b>host name</td>"
 		  "<td><b>ip1</td>"
@ -575,7 +582,7 @@ skipReplaceHost:
 		  //"<td><b>ide channel</td>"

 		  "<td><b>note</td>",
-		  LIGHT_BLUE ,
+		  TABLE_STYLE,
 		  DARK_BLUE  );

 	for ( long i = 0; i < g_hostdb.m_numSpareHosts; i++ ) {
@ -589,7 +596,7 @@ skipReplaceHost:

 		// print it
 		sb.safePrintf (
-			  "<tr>"
+			  "<tr bgcolor=#%s>"
 			  "<td>%li</td>"
 			  "<td>%s</td>"
 			  "<td>%s</td>"
@ -602,6 +609,7 @@ skipReplaceHost:
 			  //"<td>%li</td>" // ide channel
 			  "<td>%s</td>"
 			  "</tr>" , 
+			  LIGHT_BLUE,
 			  i , 
 			  h->m_hostname,
 			  ipbuf1,
@ -618,13 +626,13 @@ skipReplaceHost:

 	// print proxy hosts table
 	sb.safePrintf ( 
-		  "<table cellpadding=4 border=1 width=100%% bgcolor=#%s>" 
-		  "<tr><td colspan=12 bgcolor=#%s><center>"
+		  "<table %s>"
+		  "<tr class=hdrow><td colspan=12><center>"
 		  //"<font size=+1>"
 		  "<b>Proxies</b>"
 		  //"</font>"
 		  "</td></tr>" 
-		  "<tr>"
+		  "<tr bgcolor=#%s>"
 		  "<td><b>proxyId</b></td>"
 		  "<td><b>type</b></td>"
 		  "<td><b>host name</b></td>"
@ -645,8 +653,9 @@ skipReplaceHost:
 		  //"<td><b>ide channel</td>"

 		  "<td><b>note</td>",
-		  LIGHT_BLUE ,
-		  DARK_BLUE  );
+		  TABLE_STYLE,
+		  DARK_BLUE 
+			);
 	for ( long i = 0; i < g_hostdb.m_numProxyHosts; i++ ) {
 		// get the ith host (hostId)
 		Host *h = g_hostdb.getProxy ( i );
@ -677,10 +686,10 @@ skipReplaceHost:

 		// print it
 		sb.safePrintf (
-			  "<tr>"
+			  "<tr bgcolor=#%s>"

 			  "<td><a href=\"http://%s:%hi/master/hosts?"
-			  "username=%s&pwd=%s&"
+			  ""
 			  "c=%s\">"
 			  "%li</a></td>"

@ -700,10 +709,9 @@ skipReplaceHost:
 			  "<td>%s </td>"
 			  "</tr>" , 

+			  LIGHT_BLUE,
 			  ipbuf3,
 			  h->m_httpPort,
-			  username,
-			  password,
 			  coll,
 			  i , 

@ -724,24 +732,31 @@ skipReplaceHost:
 	}
 	sb.safePrintf ( "</table><br><br>" );

+	sb.safePrintf(
+		      "<style>"
+		      ".poo { background-color:#%s;}\n"
+		      "</style>\n" ,
+		      LIGHT_BLUE );
+
+
 	// print help table
 	sb.safePrintf ( 
-		  "<table cellpadding=4 border=1 width=100%% bgcolor=#%s>" 
-		  "<tr><td colspan=10 bgcolor=#%s><center>"
+		  "<table %s>"
+		  "<tr class=hdrow><td colspan=10><center>"
 		  //"<font size=+1>"
 		  "<b>Key</b>"
 		  //"</font>"
 		  "</td></tr>" 

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>shard</td>"
 		  "<td>"
 		  "The index is split into shards. Which shard does this "
-		  "host server?"
+		  "host serve?"
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>stripe</td>"
 		  "<td>"
 		  "Hosts with the same stripe serve the same shard "
@ -749,41 +764,41 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>ip1</td>"
 		  "<td>The primary IP address of the host."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>ip2</td>"
 		  "<td>The secondary IP address of the host."
 		  "</td>"
 		  "</tr>\n"

 		  /*
-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>udp port</td>"
 		  "<td>The UDP port the host uses to send and recieve "
 		  "datagrams."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>dns client port</td>"
 		  "<td>The UDP port used to send and receive dns traffic with."
 		  "</td>"
 		  "</tr>\n"
 		  */

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>http port</td>"
 		  "<td>The port you can connect a browser to."
 		  "</td>"
 		  "</tr>\n"

 		  /*
-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>best switch id</td>"
 		  "<td>The host prefers to be on this switch because it "
 		  "needs to send a lot of data to other hosts on this swtich. "
@ -794,7 +809,7 @@ skipReplaceHost:
 		  */

 		  /*
-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>switch id</td>"
 		  "<td>Hosts that share the same switch id are "
 		  "physically on the same switch."
@ -802,7 +817,7 @@ skipReplaceHost:
 		  "</tr>\n"
 		  */

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>dgrams resent</td>"
 		  "<td>How many datagrams have had to be resent to a host "
 		  "because it was not ACKed quick enough or because it was "
@ -811,7 +826,7 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>errors recvd</td>"
 		  "<td>How many errors were received from a host in response "
 		  "to a request to retrieve or insert data."
@ -819,7 +834,7 @@ skipReplaceHost:
 		  "</tr>\n"


-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>ETRYAGAINS recvd</td>"
 		  "<td>How many ETRYAGAIN were received in response to a "
 		  "request to add data. Usually because the host's memory "
@ -830,7 +845,7 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>dgrams to</td>"
 		  "<td>How many datagrams were sent to the host from the "
 		  "selected host since startup. Includes ACK datagrams. This "
@ -841,46 +856,46 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>dgrams from</td>"
 		  "<td>How many datagrams were received from the host by the "
 		  "selected host since startup. Includes ACK datagrams."
 		  "</td>"
 		  "</tr>\n"

-		  //"<tr>"
+		  //"<tr class=poo>"
 		  //"<td>loadavg</td>"
 		  //"<td>1-minute sliding-window load average from "
 		  //"/proc/loadavg."
 		  //"</td>"
 		  //"</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>mem used</td>"
 		  "<td>percentage of memory currently used."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>cpu usage</td>"
 		  "<td>percentage of cpu resources in use by the gb process."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>ping1 age</td>"
 		  "<td>How long ago the last ping request was sent to "
 		  "this host. Let's us know how fresh the ping time is."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>ping1</td>"
 		  "<td>Ping time to this host on the primary network."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>ping2</td>"
 		  "<td>Ping time to this host on the seconday/shotgun "
 		  "network. This column is not visible if the shotgun "
@ -888,25 +903,25 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>M (status flag)</td>"
 		  "<td>Indicates host is merging files on disk."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>D (status flag)</td>"
 		  "<td>Indicates host is dumping data to disk."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>S (status flag)</td>"
 		  "<td>Indicates host has outstanding spiders."
 		  "</td>"
 		  "</tr>\n"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>y (status flag)</td>"
 		  "<td>Indicates host is performing the daily merge."
 		  "</td>"
@ -914,8 +929,8 @@ skipReplaceHost:


 		  ,
-		  LIGHT_BLUE ,
-		  DARK_BLUE  );
+		  TABLE_STYLE
+			);

 	sb.safePrintf ( "</table><br></form><br>" );

--- a/PageInject.cpp
+++ b/PageInject.cpp
@ -52,6 +52,8 @@ bool sendPageInject ( TcpSocket *s , HttpRequest *r ) {

 	msg7->m_crawlbotAPI = crawlbotAPI;

+	strncpy(msg7->m_coll,coll,MAX_COLL_LEN);
+
 	// for diffbot
 	if ( crawlbotAPI ) 
 		msg7->m_hr.copy ( r );
@ -63,7 +65,6 @@ bool sendPageInject ( TcpSocket *s , HttpRequest *r ) {
 		// qts is html encoded? NO! fix that below then...
 		//char *uf="http://www.google.com/search?num=50&"
 		//	"q=%s&scoring=d&filter=0";
-		strncpy(msg7->m_coll,coll,MAX_COLL_LEN);
 		msg7->m_isScrape = true;
 		msg7->m_qbuf.safeStrcpy(qts);
 		msg7->m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab");
@ -193,6 +194,12 @@ bool sendReply ( void *state ) {
 	//	pm = msg;
 	//}

+	sb.safePrintf(
+		      "<style>"
+		      ".poo { background-color:#%s;}\n"
+		      "</style>\n" ,
+		      LIGHT_BLUE );
+
 	//char *c = msg7->m_coll;
 	char bb [ MAX_COLL_LEN + 60 ];
 	bb[0]='\0';
@ -204,39 +211,50 @@ bool sendReply ( void *state ) {
 		  "<b>%s</b>\n\n" // the url msg
 		  //"<FORM method=POST action=/inject>\n\n" 

+		  "<FORM method=GET action=/inject>\n\n" 
+
 		  //"<input type=hidden name=pwd value=\"%s\">\n"
 		  //"<input type=hidden name=username value=\"%s\">\n"
-		  "<table width=100%% bgcolor=#%s cellpadding=4 border=1>"
-		  "<tr><td  bgcolor=#%s colspan=2>"
+		  "<table %s>"
+		  "<tr class=hdrow><td colspan=2>"
 		  "<center>"
 		  //"<font size=+1>"
 		  "<b>"
 		  "Inject URL</b>%s"
 		  //"</font>"
-		  "<br>"
-		  //"Enter the information below to inject "
-		  //"a URL. This allows you to specify the URL as well as the "
-		  //"content for the URL."
 		  "</td></tr>\n\n"

-		  "<tr><td><b>url</b></td>"
-		  "<td>\n"
+		  "<tr class=poo><td><b>url</b>"
+		  "<br>"
+		  "<font size=-2>"
+		  "Specify the URL that will be immediately crawled and "
+		  "indexed in real time "
+		  "while you wait. The browser will return the "
+		  "final index status code. Alternatively, "
+		  "use the <i>add urls</i> page "
+		  "to add URLs in bulk or to just add to the spider queue "
+		  "without having to wait for the page or pages to be "
+		  "actually indexed in realtime."
+		  "</font>"
+		  "</td>"
+
+		  "<td width=50%%>\n"
 		  "<input type=text name=u value=\"\" size=50>"
 		  "</td></tr>\n\n"

-		  "<tr><td><b>query to scrape</b></td>"
+		  "<tr class=poo><td><b>query to scrape</b></td>"
 		  "<td>\n"
 		  "<input type=text name=qts value=\"\" size=50>"
 		  "</td></tr>\n\n"

-		  //"<tr><td><b>use ahrefs.com</b></td>"
+		  //"<tr class=poo><td><b>use ahrefs.com</b></td>"
 		  //"<td>\n"
 		  //"<input type=radio name=useahrefs value=0 checked>no &nbsp; "
 		  //"<input type=radio name=useahrefs value=1>yes "
 		  //"</td></tr>\n\n"

 		  
-		  "<tr><td><b>spider links</b></td>"
+		  "<tr class=poo><td><b>spider links</b></td>"
 		  "<td>\n"
 		  "<input type=radio name=spiderlinks value=0>no &nbsp; "
 		  "<input type=radio name=spiderlinks value=1 checked>yes "
@ -249,18 +267,18 @@ bool sendReply ( void *state ) {



-		  "<tr><td><b>inject scraped links</b></td>"
+		  "<tr class=poo><td><b>inject scraped links</b></td>"
 		  "<td>\n"
 		  "<input type=radio name=injectlinks value=0 checked>no &nbsp; "
 		  "<input type=radio name=injectlinks value=1>yes "
 		  "</td></tr>\n\n"

-		  "<tr><td><b>collection</b></td>"
+		  "<tr class=poo><td><b>collection</b></td>"
 		  "<td>\n"
 		  "<input type=text name=c value=\"%s\" size=15>"
 		  "</td></tr>\n\n"

-		  "<tr><td><b>quick reply?</b><br>"
+		  "<tr class=poo><td><b>quick reply?</b><br>"
 		  "<font size=1>Should reply be short? "
 		  "Default: no"
 		  "</td>"
@ -269,7 +287,7 @@ bool sendReply ( void *state ) {
 		  "<input type=radio name=quick value=1>yes "
 		  "</td></tr>\n\n"

-		  "<tr><td><b>only inject new docs?</b><br>"
+		  "<tr class=poo><td><b>only inject new docs?</b><br>"
 		  "<font size=1>Skips injection if docs already indexed. "
 		  "Default: no"
 		  "</td>"
@ -279,17 +297,17 @@ bool sendReply ( void *state ) {
 		  "</td></tr>\n\n"


-		  "<tr><td><b>delete?</b><br>"
+		  "<tr class=poo><td><b>delete url?</b><br>"
 		  "<font size=1>Should this url be deleted from the index? "
 		  "Default: no"
 		  "</td>"
 		  "<td>\n"
-		  "<input type=radio name=delete value=0 checked>no &nbsp; "
-		  "<input type=radio name=delete value=1>yes "
+		  "<input type=radio name=deleteurl value=0 checked>no &nbsp; "
+		  "<input type=radio name=deleteurl value=1>yes "
 		  "</td></tr>\n\n"


-		  "<tr><td><b>recycle content?</b><br>"
+		  "<tr class=poo><td><b>recycle content?</b><br>"
 		  "<font size=1>Should page content be recycled if "
 		  "reindexing? "
 		  "Default: no"
@ -299,16 +317,18 @@ bool sendReply ( void *state ) {
 		  "<input type=radio name=recycle value=1>yes "
 		  "</td></tr>\n\n"

-		  "<tr><td><b>ip</b><br>"
+		  /*
+		  "<tr class=poo><td><b>ip</b><br>"
 		  "<font size=1>IP address of the url. If blank then "
 		  "Gigablast will look up. "
 		  "Default: blank"
 		  "</td>"
 		  "<td>\n<input type=text name=ip value=\"\" size=15>"
 		  "</td></tr>\n\n"
+		  */

 		  /*
-		  "<tr><td><b>do ip lookups?</b><br>"
+		  "<tr class=poo><td><b>do ip lookups?</b><br>"
 		  "<font size=1>Should Gigablast look up the IP address "
 		  "of the url, if it is not provided. "
 		  "Default: yes"
@ -319,7 +339,7 @@ bool sendReply ( void *state ) {
 		  "</td></tr>\n\n"
 		  */

-		  //"<tr><td><b>is url new?</b><br>"
+		  //"<tr class=poo><td><b>is url new?</b><br>"
 		  //"<font size=1>Is this url new to the index? If unsure "
 		  //"then you should say no here. "
 		  //"Default: yes"
@ -329,7 +349,7 @@ bool sendReply ( void *state ) {
 		  //"<input type=radio name=isnew value=1 checked>yes "
 		  //"</td></tr>\n\n"

-		  "<tr><td><b>dedup?</b><br>"
+		  "<tr class=poo><td><b>dedup?</b><br>"
 		  "<font size=1>Should this url be skipped if there is "
 		  "already  a url in the index from this same domain with "
 		  "this same content? "
@ -339,14 +359,14 @@ bool sendReply ( void *state ) {
 		  "<input type=radio name=dedup value=0>no &nbsp; "
 		  "<input type=radio name=dedup value=1 checked>yes "
 		  "</td></tr>\n\n" ,
-		  //"<tr><td><b>ruleset</b><br>"
+		  //"<tr class=poo><td><b>ruleset</b><br>"
 		  //"<font size=1>Use this ruleset to index the URL. "
 		  //"Default: auto"
 		  //"</td>"
 		  //"<td>\n<select name=rs>" ,
 		  pm , // msg7->m_pwd , 
 		  //msg7->m_username,
-		  LIGHT_BLUE , DARK_BLUE , bb , msg7->m_coll );
+		  TABLE_STYLE , bb , msg7->m_coll );


 	//p += gbstrlen(p);
@ -382,7 +402,7 @@ bool sendReply ( void *state ) {

 	// make a table, each row will be an injectable parameter
 	sb.safePrintf (
-		  "<tr><td><b>content has mime</b><br>"
+		  "<tr class=poo><td><b>content has mime</b><br>"
 		  "<font size=1>IP address of the url. If blank then "
 		  "Gigablast will look up. "
 		  "Default: blank"
@ -392,10 +412,13 @@ bool sendReply ( void *state ) {
 		  "<input type=radio name=hasmime value=1>yes "
 		  "</td></tr>\n\n" 

-		  "<tr><td colspan=2>"
+		  "<tr class=poo><td colspan=2>"
 		  "<center>"
 		  "<b>content</b><br>"
-		  "<font size=1>Enter the content here. Enter MIME header "
+		  "<font size=1>If you want to supply the URL's content "
+		  "rather than have Gigablast download it, then "
+		  "enter the content here. "
+		  "Enter MIME header "
 		  "first if \"content has mime\" is set to true above. "
 		  "Separate MIME from actual content with two returns."
 		  "<br>"
@ -404,11 +427,15 @@ bool sendReply ( void *state ) {
 		  "\n"
 		  "<textarea rows=32 cols=80 name=content>"
 		  "</textarea>"
-		  "<br>"
-		  "<br>\n\n"
-		  "<input type=submit value=Submit>"
 		  "</center>"
 		  "</td></tr></table>\n"
+
+		  "<br>"
+		  "<br>\n\n"
+		  "<center>"
+		  "<input type=submit value=Submit>"
+		  "</center>"
+
 		  "</form>\n"
 		  );

@ -463,34 +490,48 @@ bool Msg7::inject ( TcpSocket *s ,
 	long  contentLen;

 	// get the junk
-	char *coll           = r->getString ( "c" , NULL  , NULL /*default*/);
+	//char *coll        = r->getString ( "c" , NULL  , NULL /*default*/);
 	//if ( ! coll ) coll = "main";
 	// sometimes crawlbot will add or reset a coll and do an inject
 	// in PageCrawlBot.cpp
 	//if ( ! coll ) coll = r->getString("addcoll");
 	//if ( ! coll ) coll = r->getString("resetcoll");
-	if ( ! coll ) coll = collOveride;
+	//if ( ! coll ) coll = collOveride;

 	// default to main
-	if ( ! coll || ! coll[0] ) coll = "main";
+	//if ( ! coll || ! coll[0] ) coll = "main";
+
+	if ( collOveride && ! collOveride[0] ) collOveride = NULL;
+
+	CollectionRec *cr = NULL;
+	if ( collOveride ) cr = g_collectiondb.getRec ( collOveride );
+	else cr = g_collectiondb.getRec ( r );
+
+	if ( ! cr ) {
+		g_errno = ENOCOLLREC;
+		return true;
+	}
+
+	char *coll = cr->m_coll;

 	bool  quickReply     = r->getLong   ( "quick" , 0 );	
 	//char *pwd            = r->getString ( "pwd" , NULL );
 	char *url            = r->getString ( "u" , NULL , NULL /*default*/);
 	// for diffbot.cpp api
 	if ( ! url ) url = r->getString("injecturl",NULL,NULL);
+	if ( ! url ) url = r->getString("url",NULL,NULL);
 	// PageCrawlBot.cpp uses "seed"
 	if ( ! url ) url = r->getString("seed",NULL,NULL);

 	bool  recycleContent = r->getLong   ( "recycle",0);
-	char *ips            = r->getString ( "ip" , NULL , NULL );
+	//char *ips            = r->getString ( "ip" , NULL , NULL );
 	//char *username       = g_users.getUsername(r);
-	long firstIndexed = r->getLongLong("firstindexed",0LL);
-	long lastSpidered = r->getLongLong("lastspidered",0LL);
+	//long firstIndexed = r->getLongLong("firstindexed",0LL);
+	//long lastSpidered = r->getLongLong("lastspidered",0LL);
 	long hopCount     = r->getLong("hopcount",-1);
 	long newOnly      = r->getLong("newonly",0);
 	long charset      = r->getLong("charset",-1);
-	long deleteIt     = r->getLong("delete",0);
+	long deleteUrl    = r->getLong("deleteurl",0);
 	char hasMime      = r->getLong("hasmime",0);
 	// do consistency testing?
 	bool doConsistencyTesting = r->getLong("dct",0);
@ -502,7 +543,7 @@ bool Msg7::inject ( TcpSocket *s ,

 	long  forcedIp  = 0;
 	
-	if ( ips ) forcedIp = atoip ( ips , gbstrlen(ips) );
+	//if ( ips ) forcedIp = atoip ( ips , gbstrlen(ips) );

 	char *content        = r->getString ( "content" , &contentLen , NULL );
 	// mark doesn't like to url-encode his content
@ -543,17 +584,20 @@ bool Msg7::inject ( TcpSocket *s ,
 			niceness,
 			state,
 			callback,
-			firstIndexed,
-			lastSpidered,
+			//firstIndexed,
+			//lastSpidered,
 			hopCount,
 			newOnly,
 			charset,
 			spiderLinks,
-			deleteIt,
+			deleteUrl,
 			hasMime,
 			doConsistencyTesting);
 }

+// . returns false if blocked, true otherwise
+// . if returns false will call your callback(state) when is done
+// . returns true and sets g_errno on error
 bool Msg7::inject ( char *url ,
 		    long  forcedIp ,
 		    char *content ,
@ -567,13 +611,13 @@ bool Msg7::inject ( char *url ,
 		    long  niceness,
 		    void *state ,
 		    void (*callback)(void *state),
-		    long firstIndexed,
-		    long lastSpidered,
+		    //long firstIndexed,
+		    //long lastSpidered,
 		    long hopCount,
 		    char newOnly,
 		    short charset,
 		    char spiderLinks,
-		    char deleteIt,
+		    char deleteUrl,
 		    char hasMime,
 		    bool doConsistencyTesting
 		    ) {
@ -581,11 +625,14 @@ bool Msg7::inject ( char *url ,
 	m_quickReply = quickReply;

 	// store coll
-	if ( ! coll ) { g_errno = ENOCOLLREC; return true; }
-        long collLen = gbstrlen ( coll );
-	if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
-	strncpy ( m_coll , coll , collLen );
-	m_coll [ collLen ] = '\0';
+	//if ( ! coll ) { g_errno = ENOCOLLREC; return true; }
+	//      long collLen = gbstrlen ( coll );
+	//if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
+	//strncpy ( m_coll , coll , collLen );
+	//m_coll [ collLen ] = '\0';
+
+	CollectionRec *cr = g_collectiondb.getRec ( coll );
+	if ( ! cr ) { g_errno = ENOCOLLREC; return true; }

 	// store user
 	//long ulen = 0;
@ -612,149 +659,36 @@ bool Msg7::inject ( char *url ,
 	if ( g_repairMode ) { g_errno = EREPAIRING; return true; }

 	// send template reply if no content supplied
-	if ( ! content && ! recycleContent ) {
-		log("inject: no content supplied to inject command and "
-		    "recycleContent is false.");
-		//return true;
-	}
-
-	// clean url?
-	// normalize and add www. if it needs it
-	Url uu;
-	uu.set ( url , gbstrlen(url) , true );
-	// remove >'s i guess and store in st1->m_url[] buffer
-	char cleanUrl[MAX_URL_LEN+1];
-	urlLen = cleanInput ( cleanUrl,
-			      MAX_URL_LEN, 
-			      uu.getUrl(),
-			      uu.getUrlLen() );
-
-
-	// this can go on the stack since set4() copies it
-	SpiderRequest sreq;
-	sreq.reset();
-	strcpy(sreq.m_url, cleanUrl );
-	// parentdocid of 0
-	long firstIp = hash32n(cleanUrl);
-	if ( firstIp == -1 || firstIp == 0 ) firstIp = 1;
-	sreq.setKey( firstIp,0LL, false );
-	sreq.m_isInjecting   = 1; 
-	sreq.m_isPageInject  = 1;
-	sreq.m_hopCount      = hopCount;
-	sreq.m_hopCountValid = 1;
-	sreq.m_fakeFirstIp   = 1;
-	sreq.m_firstIp       = firstIp;
+	//if ( ! content && ! recycleContent ) {
+	//	log("inject: no content supplied to inject command and "
+	//	    "recycleContent is false.");
+	//	//return true;
+	//}

 	// shortcut
 	XmlDoc *xd = &m_xd;

-	// log it now
-	//log("inject: injecting doc %s",cleanUrl);
+	if ( ! xd->injectDoc ( url ,
+			       cr ,
+			       content ,
+			       hasMime , // content starts with http mime?
+			       hopCount,
+			       charset,

-	static char s_dummy[3];
-	// sometims the content is indeed NULL...
-	if ( newOnly && ! content ) { 
-		// don't let it be NULL because then xmldoc will
-		// try to download the page!
-		s_dummy[0] = '\0';
-		content = s_dummy;
-		//char *xx=NULL;*xx=0; }
-	}
+			       deleteUrl,
+			       contentType, // CT_HTML, CT_XML
+			       spiderLinks ,
+			       newOnly, // index iff new

+			       state,
+			       callback ) )
+		// we blocked...
+		return false;

-	// . use the enormous power of our new XmlDoc class
-	// . this returns false with g_errno set on error
-	if ( //m_needsSet &&
-	     ! xd->set4 ( &sreq       ,
-			  NULL        ,
-			  m_coll  ,
-			  NULL        , // pbuf
-			  // give it a niceness of 1, we have to be
-			  // careful since we are a niceness of 0!!!!
-			  niceness, // 1 , 
-			  // inject this content
-			  content ,
-			  deleteIt, // false, // deleteFromIndex ,
-			  forcedIp ,
-			  contentType ,
-			  lastSpidered ,
-			  hasMime )) {
-		// g_errno should be set if that returned false
-		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
-		return true;
-	}
-	// do not re-call the set
-	//m_needsSet = false;
-	// make this our callback in case something blocks
-	xd->setCallback ( state , callback );
-
-	xd->m_doConsistencyTesting = doConsistencyTesting;
-
-	// . set xd from the old title rec if recycle is true
-	// . can also use XmlDoc::m_loadFromOldTitleRec flag
-	if ( recycleContent ) xd->m_recycleContent = true;
-
-	// othercrap
-	if ( firstIndexed ) {
-		xd->m_firstIndexedDate = firstIndexed;
-		xd->m_firstIndexedDateValid = true;
-	}
-
-	if ( lastSpidered ) {
-		xd->m_spideredTime      = lastSpidered;
-		xd->m_spideredTimeValid = true;
-	}
-
-	if ( hopCount != -1 ) {
-		xd->m_hopCount = hopCount;
-		xd->m_hopCountValid = true;
-	}
-
-	if ( charset != -1 && charset != csUnknown ) {
-		xd->m_charset = charset;
-		xd->m_charsetValid = true;
-	}
-
-	// avoid looking up ip of each outlink to add "firstip" tag to tagdb
-	// because that can be slow!!!!!!!
-	xd->m_spiderLinks = spiderLinks;
-	xd->m_spiderLinks2 = spiderLinks;
-	xd->m_spiderLinksValid = true;
-
-	// . newOnly is true --> do not inject if document is already indexed!
-	// . maybe just set indexCode
-	xd->m_newOnly = newOnly;
-
-	// do not re-lookup the robots.txt
-	xd->m_isAllowed      = true;
-	xd->m_isAllowedValid = true;
-	xd->m_crawlDelay     = -1; // unknown
-	xd->m_crawlDelayValid = true;
-
-	// set this now
-	g_inPageInject = true;
-
-	// log it now
-	//log("inject: indexing injected doc %s",cleanUrl);
-
-	// . now tell it to index
-	// . this returns false if blocked
-	bool status = xd->indexDoc ( );
-
-	// log it. i guess only for errors when it does not block?
-	// because xmldoc.cpp::indexDoc calls logIt()
-	if ( status ) xd->logIt();
-
-	// undo it
-	g_inPageInject = false;
-
-	// note that it blocked
-	//if ( ! status ) log("inject: blocked for %s",cleanUrl);
-
-	// return false if it blocked
-	return status;
+	return true;
 }

+
 ///////////////
 //
 // SCRAPE GOOGLE
--- a/PageInject.h
+++ b/PageInject.h
@ -53,8 +53,8 @@ public:
 		      long  niceness,
 		      void *state ,
 		      void (*callback)(void *state),
-		      long firstIndexedDate = 0,
-		      long spiderDate = 0,
+		      //long firstIndexedDate = 0,
+		      //long spiderDate = 0,
 		      long hopCount = -1 ,
 		      char newOnly = 0 ,
 		      short charset = -1 ,
--- a/PageLogView.cpp
+++ b/PageLogView.cpp
@ -28,8 +28,8 @@ struct StateLogView {

 static char *s_magicStr = "4j3.8x*";
 #define BABY_BLUE  "e0e0d0"
-#define LIGHT_BLUE "d0d0e0"
-#define DARK_BLUE  "c0c0f0"
+//#define LIGHT_BLUE "d0d0e0"
+//#define DARK_BLUE  "c0c0f0"

 bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {

@ -79,15 +79,21 @@ bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {
 		      "</SCRIPT> ");
 	p->safePrintf("<form name=\"fo\">");

-	p->safePrintf("\n<table width=100%% bgcolor=#%s "
-		      "cellpadding=4 border=1>\n", BABY_BLUE);
+	p->safePrintf("\n<table %s>\n",TABLE_STYLE);
+	p->safePrintf("<tr class=hdrow><td colspan=2>"
+		      "<center><b>Log View</b></center>"
+		      "</td></tr>");
 	
-	p->safePrintf("<tr><td>Refresh Rate:</td><td><input type=\"text\""
+	p->safePrintf("<tr bgcolor=%s>"
+		      "<td>Refresh Rate:</td><td><input type=\"text\""
 		      " name=\"rr\" value=\"%li\" size=\"4\"></td></tr>", 
+		      LIGHT_BLUE,
 		      refreshRate);

-	p->safePrintf("<tr><td>Sample Size:</td><td><input type=\"text\""
-		      " name=\"ss\" value=\"%li\" size=\"4\"></td></tr>", 
+	p->safePrintf("<tr bgcolor=%s>"
+		      "<td>Sample Size:</td><td><input type=\"text\""
+		      " name=\"ss\" value=\"%li\" size=\"4\">",
+		      LIGHT_BLUE,
 		      sampleSize);

 	p->safePrintf("<input type=\"hidden\" "
@ -96,6 +102,7 @@ bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {
 	p->safePrintf("<input type=\"hidden\" "
 		      "name=\"dontlog\" value=\"1\">");

+	p->safePrintf("</td></tr>");

 	// . count the number of hosts we are getting logs for:
 	long numOn = 0;
@ -134,7 +141,8 @@ bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {
 	st->m_filterStr[6] = "INFO";
 	st->m_filterStr[7] = "INIT";

-	p->safePrintf("<tr><td>Filter Types:</td><td>");
+	p->safePrintf("<tr bgcolor=#%s><td>Filter Types:</td><td>",
+		      LIGHT_BLUE);
 	char *checked;
 	st->m_numFilts = 0;
 	for(long i = 7; i >= 0; i--) {
@ -183,7 +191,8 @@ bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {



-	p->safePrintf("<tr><td>Hosts:</td><td>");
+	p->safePrintf("<tr bgcolor=#%s><td>Hosts:</td><td>",
+		      LIGHT_BLUE);
 	for ( long i = 0 ; i < nh ; i++ ) {
 		// skip dead hosts, i don't want to wait for them to timeout.
 		if ( g_hostdb.isDead ( i ) ) continue;
@ -214,9 +223,9 @@ bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {
 	
 	p->safePrintf("</td></tr>\n");
 	
-	p->safePrintf("<tr><td>\n");
+	p->safePrintf("<tr bgcolor=#%s><td>\n",LIGHT_BLUE);
 	p->safePrintf("<input type=\"submit\" value=\"Update\"> ");
-	p->safePrintf("</td></tr></table>\n");
+	p->safePrintf("</td><td></td></tr></table>\n");
 	p->safePrintf("</form>");

 	if(!blocked)
@ -227,6 +236,14 @@ bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {
 }


+bool showLine ( SafeBuf *sb , char *s , long len ) {
+
+	return sb->brify ( s , len , 
+			   0 , // niceness 
+			   80 , // cols
+			   "<br>",
+			   false ); // isHtml?
+}


 void gotRemoteLogWrapper(void *state, UdpSlot *slot) {
@ -329,25 +346,25 @@ void gotRemoteLogWrapper(void *state, UdpSlot *slot) {
 		if(matchNum >= 0 || st->m_numFilts == 0) {
 			if(matchNum == 0) {
 				p->safePrintf("<font color=red>");
-				p->safeMemcpy(st->m_readBufPtrs[ndx], lineLen);
+				showLine(p,st->m_readBufPtrs[ndx], lineLen);
 				p->safePrintf("\n");
 				p->safePrintf("</font>");
 			}
 			else if(matchNum == 1) {
 				p->safePrintf("<font color=green>");
-				p->safeMemcpy(st->m_readBufPtrs[ndx], lineLen);
+				showLine(p,st->m_readBufPtrs[ndx], lineLen);
 				p->safePrintf("\n");
 				p->safePrintf("</font>");

 			}
 			else if(matchNum == 2) {
 				p->safePrintf("<font color=blue>");
-				p->safeMemcpy(st->m_readBufPtrs[ndx], lineLen);
+				showLine(p,st->m_readBufPtrs[ndx], lineLen);
 				p->safePrintf("\n");
 				p->safePrintf("</font>");
 			}
 			else {
-				p->safeMemcpy(st->m_readBufPtrs[ndx], lineLen);
+				showLine(p,st->m_readBufPtrs[ndx], lineLen);
 				p->safePrintf("\n");
 			}
 		}
--- a/PageParser.cpp
+++ b/PageParser.cpp
@ -211,11 +211,18 @@ bool sendPageParser2 ( TcpSocket   *s ,
 	if ( st->m_render             ) render = " checked";
 	if ( st->m_oips               ) oips   = " checked";

+	xbuf->safePrintf(
+			 "<style>"
+			 ".poo { background-color:#%s;}\n"
+			 "</style>\n" ,
+			 LIGHT_BLUE );
+
+
 	long clen;
 	char *contentParm = r->getString("content",&clen,"");
 	
 	// print the input form
-	xbuf->safePrintf ("<br>"
+	xbuf->safePrintf (
 		       "<style>\n"
 		       "h2{font-size: 12px; color: #666666;}\n"

@ -233,21 +240,30 @@ bool sendPageParser2 ( TcpSocket   *s ,
 		       ".hs {color: #009900;}"
 		       "</style>\n"
 		       "<center>"
-		  "<table cellpadding=3>"
-		  "<tr>"
+
+		  "<table %s>"
+
+			  "<tr><td colspan=5><center><b>"
+			  "Parser"
+			  "</b></center></td></tr>\n"
+
+		  "<tr class=poo>"
 		  "<td>"
-		  "Url:"
+		  "<b>url</b>"
+			  "<br><font size=-2>"
+			  "Type in <b>FULL</b> url to parse."
+			  "</font>"
+		  "</td>"
+
 		  "</td>"
 		  "<td>"
 		  "<input type=text name=u value=\"%s\" size=\"40\">\n"
 		  "</td>"
-		  "<td>"
-		  "Type in <b>FULL</b> url\n"
-		  "</td>"
 		  "</tr>"


-		  "<tr>"
+			  /*
+		  "<tr class=poo>"
 		  "<td>"
 		  "Parser version to use: "
 		  "</td>"
@ -258,9 +274,10 @@ bool sendPageParser2 ( TcpSocket   *s ,
 		  "(-1 means to use latest title rec version)<br>"
 		  "</td>"
 		  "</tr>"
+			  */

 			  /*
-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
 		  "Hop count to use: "
 		  "</td>"
@ -273,20 +290,22 @@ bool sendPageParser2 ( TcpSocket   *s ,
 		  "</tr>"
 			  */

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
-		  "Use cached:"
+			  "<b>use cached</b>"
+
+			  "<br><font size=-2>"
+			  "Load page from cache (titledb)?"
+			  "</font>"
+
 		  "</td>"
 		  "<td>"
 		  "<input type=checkbox name=old value=1%s> "
 		  "</td>"
-		  "<td>"
-		  "Load page from cache (titledb)?"
-		  "</td>"
 		  "</tr>"

 			  /*
-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
 		  "Reparse root:"
 		  "</td>"
@ -299,20 +318,23 @@ bool sendPageParser2 ( TcpSocket   *s ,
 		  "</tr>"
 			  */

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
-		  "Recycle Link Info:"
+		  "<b>recycle link info</b>"
+
+			  "<br><font size=-2>"
+			  "Recycle the link info from the title rec"
+			  "Load page from cache (titledb)?"
+			  "</font>"
+
 		  "</td>"
 		  "<td>"
 		  "<input type=checkbox name=recycle value=1%s> "
 		  "</td>"
-		  "<td>"
-		  "Recycle the link info from the title rec"
-		  "</td>"
 		  "</tr>"

 			  /*
-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
 		  "Recycle Link Info Imported:"
 		  "</td>"
@ -325,20 +347,22 @@ bool sendPageParser2 ( TcpSocket   *s ,
 		  "</tr>"
 			  */

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
-		  "Render HTML:"
+		  "<b>render html</b>"
+
+			  "<br><font size=-2>"
+			  "Render document content as HTML"
+			  "</font>"
+
 		  "</td>"
 		  "<td>"
 		  "<input type=checkbox name=render value=1%s> "
 		  "</td>"
-		  "<td>"
-		  "Render document content as HTML"
-		  "</td>"
 		  "</tr>"

 			  /*
-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
 		  "Lookup outlinks' ruleset, ips, quality:"
 		  "</td>"
@ -351,7 +375,7 @@ bool sendPageParser2 ( TcpSocket   *s ,
 		  "</td>"
 		  "</tr>"

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
 		  "LinkInfo Coll:"
 		  "</td>"
@ -364,49 +388,59 @@ bool sendPageParser2 ( TcpSocket   *s ,
 		  "</tr>"
 			  */

-		  "<tr>"
+		  "<tr class=poo>"
 		  "<td>"
-		  "Optional query:"
+		  "<b>optional query</b>"
+
+			  "<br><font size=-2>"
+			  "Leave empty usually. For title generation only."
+			  "</font>"
+
 		  "</td>"
 		  "<td>"
 		  "<input type=text name=\"q\" size=\"20\" value=\"\"> "
 		  "</td>"
-		  "<td>"
-		  "Leave empty usually. For title generation only."
-		  "</td>"
 		  "</tr>"

-		  "<tr>"
-		  "<td>"
-		  "Content Below is XML:"
-		  "</td>"
+
+
+		  "<tr class=poo>"
+			  "<td>"
+			  "<b>content below is xml</b>"
+			  "<br><font size=-2>"
+			  "Is the content below XML?"
+			  "</font>"
+			  "</td>"
+
 		  "<td>"
 		  "<input type=checkbox name=xml value=1> "
-		  "</td>"
-		  "<td>"
-			  //""
+
 		  "</td>"
 		  "</tr>"



-			  "<tr>"
-			  "<td colspan=3>"
+
+			  "<tr class=poo>"
+			  "<td><b>content</b>"
+			  "<br><font size=-2>"
+			  "Use this content for the provided <i>url</i> "
+			  "rather than downloading it from the web."
+			  "</td>"
+
+			  "<td>"
 			  "<textarea rows=10 cols=80 name=content>"
 			  "%s"
 			  "</textarea>"
 			  "</td>"
 			  "</tr>"

-		  "<tr>"
-		  "<td colspan=\"3\">"
-		  "<input type=submit value=OK>"
-		  "</td>"
-		  "</tr>"
 		  "</table>"
 		  "</center>"
 		  "</form>"
 		  "<br>",
+
+			  TABLE_STYLE,
 			  us ,
 			  //(long)st->m_hopCount,
 			   //rtu,
@ -420,8 +454,11 @@ bool sendPageParser2 ( TcpSocket   *s ,



-
-
+	xbuf->safePrintf(
+			 "<center>"
+			 "<input type=submit value=Submit>"
+			 "</center>"
+			 );


 	// just print the page if no url given
--- a/PagePerf.cpp
+++ b/PagePerf.cpp
@ -99,14 +99,15 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 	//skip request path
 	while (!isspace(*rbufEnd)) rbufEnd++;
 	*rbufEnd = '\0';
-	char* refresh = strstr(rbuf, "&rr=");
+	//char* refresh = strstr(rbuf, "&rr=");


 	// print resource table
 	// columns are the dbs
 	p.safePrintf(
 		       //"<center>Disk Statistics<br><br>"
-		       "<center><br>"
+		       "<center>"
+		       //"<br>"
 		       //"<img name=\"diskgraph\" 
 		       //src=/diskGraph%li.gif><br><br>",
 		       //g_hostdb.m_hostId );
@ -115,12 +116,13 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 	// now try using absolute divs instead of a GIF
 	g_stats.printGraphInHtml ( p );

+	/*
 	if(autoRefresh > 0) {
 		if(refresh) *(refresh+4) = '0';
 		p.safePrintf(
 			     "<center><a href=\"%s\">Auto Refresh Off</a>"
 			     "</center>",
-			     rbuf + 4/*skip over GET*/); 
+			     rbuf + 4);  // skip over GET
 		p.safePrintf( "<input type=\"hidden\" "
 			      "name=\"dontlog\" value=\"1\">");
 		
@ -132,20 +134,26 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 		p.safePrintf(
 			     "<center><a href=\"%s%s\">Auto Refresh</a>"
 			     "</center>",
-			     rbuf + 4/*skip over GET*/, rr); 
+			     rbuf + 4, rr);  // skip over "GET "
 	}
+	*/

 	// print the key
 	p.safePrintf (
+		      "<br>"
 		       "<center>"
-		       "<table border=1 cellpadding=2>"
+		       //"<table %s>"
+		       //"<tr>%s</tr></table>"

-		       "<tr>%s</tr></table>"
+		       "<style>"
+		       ".poo { background-color:#%s;}\n"
+		       "</style>\n"

-		       "<table border=1 cellpadding=2>"
+
+		       "<table %s>"

 		       // black
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#000000>&nbsp; &nbsp;</td>"
 		       "<td> High priority disk read. "
 		       "Thicker lines for bigger reads.</td>"
@ -158,7 +166,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {


 		       // red
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#ff0000>&nbsp; &nbsp;</td>"
 		       "<td> Disk write. "
 		       "Thicker lines for bigger writes.</td>"
@ -170,7 +178,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {


 		       // dark brown
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#753d30>&nbsp; &nbsp;</td>"
 		       "<td> Processing raw query. Has raw= parm.</td>"

@ -181,7 +189,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {


 		       // pinkish purple
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#aa00aa>&nbsp; &nbsp;</td>"
 		       "<td> Send data over network. (low priority)"
 		       "Thicker lines for bigger sends.</td>"
@ -193,7 +201,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 		       "</tr>"

 		       // pinkish purple
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#ff00ff>&nbsp; &nbsp;</td>"
 		       "<td> Send data over network.  (high priority)"
 		       "Thicker lines for bigger sends.</td>"
@ -206,7 +214,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {


 		       // dark purple
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#8220ff>&nbsp; &nbsp;</td>"
 		       "<td> Get all summaries for results.</td>"

@ -218,7 +226,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {


 		       // white
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#ffffff>&nbsp; &nbsp;</td>"
 		       "<td> Uncompress cached document.</td>"

@ -229,7 +237,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {


 		       // bright green
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#00ff00>&nbsp; &nbsp;</td>"
 		       "<td> Compute search results. "
 		       "All terms required. rat=1.</td>"
@ -241,7 +249,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 		       "</tr>"

 		       // bright green
-		       "<tr>"
+		       "<tr class=poo>"
 		       "<td bgcolor=#ccffcc>&nbsp; &nbsp;</td>"
 		       "<td> Compute reference pages. "
 		       "</td>"
@ -252,7 +260,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 		       "</td>"
 		       "</tr>"

-		       "<tr>"
+		       "<tr class=poo>"

 		       "<td bgcolor=#d1e1ff>&nbsp; &nbsp;</td>"
 		       "<td> Compute Gigabits. "
@ -265,7 +273,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 		       "</tr>"


-		       "<tr>"
+		       "<tr class=poo>"

 		       "<td bgcolor=#0000b0>&nbsp; &nbsp;</td>"
 		       "<td> \"Summary\" extraction (low priority) "
@ -279,10 +287,12 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 		       

 		       "</table>"
-		       "</center>",
-		       g_stats.m_keyCols.getBufStart() && 
-		       g_conf.m_dynamicPerfGraph ? 
-		       g_stats.m_keyCols.getBufStart() : ""
+		       "</center>"
+		       , LIGHT_BLUE 
+		       , TABLE_STYLE
+		       //,g_stats.m_keyCols.getBufStart() && 
+		       //g_conf.m_dynamicPerfGraph ? 
+		       //g_stats.m_keyCols.getBufStart() : ""
 		       );

 	if(autoRefresh > 0) p.safePrintf("</body>"); 
--- a/PageReindex.cpp
+++ b/PageReindex.cpp
@ -108,7 +108,7 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 	// if they are NOT submitting a request print the interface
 	// and we're not running, just print the interface
 	t = r->getString ("action" , &len );
-	if ( len != 2 ) { // && ! s_isRunning ) {
+	if ( len < 2 ) { // && ! s_isRunning ) {
 		//p = g_pages.printAdminTop ( p , pend , s , r );
 		//p = printInterface ( p , pend,q,username,coll,NULL,qlangStr);
 		g_pages.printAdminTop ( &sb , s , r );
@ -315,19 +315,25 @@ bool printInterface (SafeBuf *sb, char *q , //long user ,
 			errmsg );
 	}

+	sb->safePrintf(
+		       "<style>"
+		       ".poo { background-color:#%s;}\n"
+		       "</style>\n" ,
+		       LIGHT_BLUE );
+
 	char bb [ MAX_COLL_LEN + 60 ];
 	bb[0]='\0';
 	//if ( user == USER_MASTER && c && c[0] ) sprintf ( bb , " (%s)", c);

 	// print the reindex interface
 	sb->safePrintf (
-		  "<table width=100%% bgcolor=#%s cellpadding=4 border=1>"
-		  "<tr><td colspan=3 bgcolor=#%s><center>"
+		  "<table %s>"
+		  "<tr><td colspan=3><center>"
 		  //"<font size=+1>"
 		  "<b>"
 		  "Reindex Urls"
 		  "</b>%s</td></tr>"
-		  "<tr><td colspan=3>"
+		  "<tr bgcolor=#%s><td colspan=3>"
 		  "<font size=1>"
 		  "Reindex the URLs that match this query. If URLs are "
 		  "banned in tagdb they will be removed from the index. "
@ -339,7 +345,7 @@ bool printInterface (SafeBuf *sb, char *q , //long user ,
 		  "whatever rule they match in the URL Filters table."
 		  "</td></tr>"

-		  "<tr><td><b>query</b>"
+		  "<tr class=poo><td><b>query</b>"
 		  "<br><font size=1>"
 		  "URLs matching this query will be added to the spider "
 		  "queue for re-spidering."
@ -359,32 +365,32 @@ bool printInterface (SafeBuf *sb, char *q , //long user ,
 		  "name=updatetags>"
 		  "</td></tr>"
 		  */
-		  , LIGHT_BLUE , DARK_BLUE , bb , q );
+		  , TABLE_STYLE , bb , DARK_BLUE , q );

 	if ( ! qlangStr ) qlangStr = "";

 	sb->safePrintf (

-		  "<tr><td><b>start result number</b>"
+		  "<tr class=poo><td><b>start result number</b>"
 		  "<font size=1>"
 		  "<br>Start at this search result number. Default 0.</td>"
 		  "<td><input type=text name=srn value=0 size=10>"
 		  "</td></tr>"

-		  "<tr><td><b>end result number</b>"
+		  "<tr class=poo><td><b>end result number</b>"
 		  "<font size=1>"
 		  "<br>Stop at this search result number. "
 		  "Default 2000000. (2M)</td>"
 		  "<td><input type=text name=ern size=10 value=2000000>"
 		  "</td></tr>" 

-		  "<tr><td><b>query language</b>"
+		  "<tr class=poo><td><b>query language</b>"
 		  "<font size=1>"
 		  "<br>Language that helps determine sort result ranking.</td>"
 		  "<td><input type=text name=qlang size=6 value=\"%s\">"
 		  "</td></tr>"

-		  "<tr><td><b>FORCE DELETE</b>"
+		  "<tr class=poo><td><b>FORCE DELETE</b>"
 		  "<font size=1>"
 		  "<br>Check this checkbox to "
 		  "delete every search result matching the above "
@ -434,7 +440,7 @@ bool printInterface (SafeBuf *sb, char *q , //long user ,
 	// submit button
 	sb->safePrintf(
 		  "<center>"
-		  "<input type=submit name=action value=OK>" 
+		  "<input type=submit name=action value=Submit>" 
 		  "</center>"
 		  "</form></html>");
 	
--- a/PageResults.cpp
+++ b/PageResults.cpp
@ -57,6 +57,11 @@ public:
 	// for printing our search result json items in csv:
 	HashTableX   m_columnTable;
 	long         m_numCSVColumns;
+
+	// stuff for doing redownloads
+	bool    m_didRedownload;
+	XmlDoc *m_xd;
+	long    m_oldContentHash32;
 };

 static bool printResult ( SafeBuf &sb,
@ -467,6 +472,11 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
 	}
 	mnew ( st , sizeof(State0) , "PageResults2" );

+	// init some stuff
+	st->m_didRedownload    = false;
+	st->m_xd               = NULL;
+	st->m_oldContentHash32 = 0;
+
 	// copy yhits
 	if ( ! st->m_hr.copy ( hr ) )
 		return sendReply ( st , NULL );
@ -615,6 +625,15 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
 	return status2;
 }

+// if returned json result is > maxagebeforedownload then we redownload the
+// page and if its checksum has changed we return empty results
+void doneRedownloadingWrapper ( void *state ) {
+	// cast our State0 class from this
+	State0 *st = (State0 *) state;
+	// resume
+	gotResults ( st );
+}
+
 /*
 void gotSpellingWrapper( void *state ){
 	// cast our State0 class from this
@ -749,6 +768,85 @@ bool gotResults ( void *state ) {
 	       return sendReply(st,NULL);
 	}

+	/*
+	//
+	// BEGIN REDOWNLOAD LOGIC
+	//
+
+	////////////
+	//
+	// if caller wants a certain freshness we might have to redownload the
+	// parent url to get the new json
+	//
+	////////////
+	// get the first result
+	Msg20 *m20first = msg40->m_msg20[0];
+	long mabr = st->m_hr.getLong("maxagebeforeredownload",-1);
+	if ( mabr >= 0 && 
+	     numResults > 0 &&
+	     // only do this once
+	     ! st->m_didRedownload &&
+	     // need at least one result
+	     m20first &&
+	     // get the last spidered time from the msg20 reply of that result
+	     m20first->m_r->m_lastSpidered - now > mabr ) {
+		// make a new xmldoc to do the redownload
+		XmlDoc *xd;
+		try { xd = new (XmlDoc); }
+		catch ( ... ) {
+			g_errno = ENOMEM;
+			log("query: Failed to alloc xmldoc.");
+		}
+		if ( g_errno ) return sendReply (st,NULL);
+		mnew ( xd , sizeof(XmlDoc) , "mabrxd");
+		// save it
+		st->m_xd = xd;
+		// get this
+		st->m_oldContentHash32 = m20rep->m_contentHash32;
+		// do not re-do redownload
+		st->m_didRedownload = true;
+		// set it
+		xd->setUrl(parentUrl);
+		xd->setCallback ( st , doneRedownloadingWrapper );
+		// get the checksum
+		if ( xd->getContentChecksum32Fast() == (void *)-1 )
+			// return false if it blocked
+			return false;
+		// error?
+		if ( g_errno ) return sendReply (st,NULL);
+		// how did this not block
+		log("page: redownload did not would block adding parent");
+	}
+	     
+	// if we did the redownload and checksum changed, return 0 results
+	if ( st->m_didRedownload ) {
+		// get the doc we downloaded
+		XmlDoc *xd = st->m_xd;
+		// get it
+		long newHash32 = xd->getContentHash32();
+		// log it
+		if ( newHash32 != st->m_oldContentHash32 ) 
+			// note it in logs for now
+			log("results: content changed for %s",xd->m_firstUrl.m_url);
+		// free it
+		mdelete(xd, sizeof(XmlDoc), "mabrxd" );
+		delete xd;
+		// null it out so we don't try to re-free
+		st->m_xd = NULL;
+		// if content is significantly different, return 0 results
+		if ( newHash32 != st->m_oldContentHash32 ) {
+			SafeBuf sb;
+			// empty json i guess
+			sb.safePrintf("[]\n");
+			return sendReply(st,sb.getBufStart());
+		}
+		// otherwise, print the diffbot json results, they are still valid
+	}
+
+	//
+	// END REDOWNLOAD LOGIC
+	//
+	*/

 	//
 	// BEGIN ADDING URL
@ -1061,7 +1159,8 @@ bool gotResults ( void *state ) {

 	// otherwise, we had no error
 	if ( numResults == 0 && si->m_format == FORMAT_HTML ) {
-		sb.safePrintf ( "No results found." );
+		sb.safePrintf ( "No results found in <b>%s</b> collection.",
+				cr->m_coll);
 	}
 	else if ( moreFollow && si->m_format == FORMAT_HTML ) {
 		if ( isAdmin && si->m_docsToScanForReranking > 1 )
@ -1128,11 +1227,8 @@ bool gotResults ( void *state ) {
 	if ( collLen == 4 && strncmp ( coll, "main", 4) == 0 ) isMain = true;

 	// print "in collection ***" if we had a collection
-	if ( collLen > 0 && ! isMain && isAdmin ) {
-		sb.safePrintf (" in collection '<b>");
-		sb.safeMemcpy ( coll , collLen );
-		sb.safeMemcpy ( "</b>'" , 5 );
-	}
+	if ( collLen > 0 && ! isMain && si->m_format == FORMAT_HTML )
+		sb.safePrintf (" in collection <b>%s</b>",coll);


 	char *pwd = si->m_pwd;
@ -2409,7 +2505,7 @@ static bool printResult ( SafeBuf &sb,
 				mr->m_docId ); 

 	// the new links
-	if ( si->m_format == FORMAT_HTML ) {
+	if ( si->m_format == FORMAT_HTML && g_conf.m_isMattWells ) {
 		//sb.safePrintf(" - <a href=\"/scoring?"
 		//	      "c=%s&\">scoring</a>",
 		//	      coll );
@ -4724,21 +4820,26 @@ bool printLogoAndSearchBox ( SafeBuf &sb , HttpRequest *hr , long catId ) {
 	else
 		sb.safePrintf("<a title=\"Search the web\" href=/>web</a>");

-		      
+
 	sb.safePrintf(" &nbsp;&nbsp;&nbsp;&nbsp; "  );
-	//  SEO functionality not included yet - so redir to gigablast.
-	if ( g_conf.m_isMattWells )
-		sb.safePrintf("<a title=\"Rank higher in "
-			      "Google\" href='/seo'>");
-	else
-		sb.safePrintf("<a title=\"Rank higher in "
-			      "Google\" href='https://www.gigablast."
-			      "com/seo'>");
+
+
+	if ( g_conf.m_isMattWells ) {
+		//  SEO functionality not included yet - so redir to gigablast.
+		if ( g_conf.m_isMattWells )
+			sb.safePrintf("<a title=\"Rank higher in "
+				      "Google\" href='/seo'>");
+		else
+			sb.safePrintf("<a title=\"Rank higher in "
+				      "Google\" href='https://www.gigablast."
+				      "com/seo'>");
 	
-	sb.safePrintf(
-		      "seo</a>"
-		      " &nbsp;&nbsp;&nbsp;&nbsp; "
-		      );
+		sb.safePrintf(
+			      "seo</a>"
+			      " &nbsp;&nbsp;&nbsp;&nbsp; "
+			      );
+	}
+

 	if (catId <= 0 )
 		sb.safePrintf("<a title=\"Browse the DMOZ directory\" "
@ -4757,12 +4858,12 @@ bool printLogoAndSearchBox ( SafeBuf &sb , HttpRequest *hr , long catId ) {
 		      // i'm not sure why this was removed. perhaps
 		      // because it is not working yet because of
 		      // some bugs...
-		      "<!-- <a title=\"Advanced web search\" "
+		      "<a title=\"Advanced web search\" "
 		      "href=/adv.html>"
 		      "advanced"
 		      "</a>"
 		      
-		      " &nbsp;&nbsp;&nbsp;&nbsp; -->"
+		      " &nbsp;&nbsp;&nbsp;&nbsp;"
 		      
 		      "<a title=\"Add your url to the index\" "
 		      "href=/addurl>"
@ -4945,6 +5046,11 @@ bool printCSVHeaderRow ( SafeBuf *sb , State0 *st ) {
 		Msg20      *m20 = msg40->m_msg20[i];
 		Msg20Reply *mr  = m20->m_r;

+		if ( ! mr ) {
+			log("results: missing msg20 reply for result #%li",i);
+			continue;
+		}
+
 		// get content
 		char *json = mr->ptr_content;
 		// how can it be empty?
--- a/PageRoot.cpp
+++ b/PageRoot.cpp
@ -61,20 +61,23 @@ bool printNav ( SafeBuf &sb , HttpRequest *r ) {
 		      "<a href=%s/privacy.html>Privacy Policy</a>"
 		      " &nbsp; &nbsp; "
 		      "<a href=%s/searchfeed.html>Search API</a>"
-		      " &nbsp; &nbsp; "
-		      "<a href=%s/seoapi.html>SEO API</a>"
-		      " &nbsp; &nbsp; "
-		      "<a href=%s/account>My Account</a> "
 		      , root
 		      , root
 		      , root
 		      , root
 		      , root
-		      , root
-		      , rootSecure
-
-		      //" &nbsp; &nbsp; <a href=/logout>Logout</a>"
 		      );
+
+	if ( g_conf.m_isMattWells )
+		sb.safePrintf(" &nbsp; &nbsp; "
+			      "<a href=%s/seoapi.html>SEO API</a>"
+			      " &nbsp; &nbsp; "
+			      "<a href=%s/account>My Account</a> "
+			      , root
+			      , rootSecure
+			      //" &nbsp; &nbsp; <a href=/logout>Logout</a>"
+			      );
+
 	if ( r->isLocal() )
 	     sb.safePrintf("&nbsp; &nbsp;[<a href=\"/master?\">Admin</a>]");
 	sb.safePrintf("</p></b></center></body></html>");
@ -152,7 +155,15 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r ) {
 	// submit to https now
 	sb.safePrintf("<form method=get "
 		      "action=/search name=f>\n");
+
+	CollectionRec *cr = g_collectiondb.getRec ( r );
+	if ( cr )
+		sb.safePrintf("<input type=hidden name=c value=\"%s\">",
+			      cr->m_coll);
+
 	sb.safePrintf("<input name=q type=text size=60 value=\"\">&nbsp;<input type=\"submit\" value=\"Search\">\n");
+
+
 	sb.safePrintf("\n");
 	sb.safePrintf("</form>\n");
 	sb.safePrintf("<br>\n");
@ -381,7 +392,12 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {

 	sb.safePrintf("<br><br>\n");
 	sb.safePrintf("<br><br><br>\n");
-	sb.safePrintf("<a href=/>web</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=http://www.gigablast.com/seo>seo</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=\"/Top\">directory</a> &nbsp;&nbsp;&nbsp;&nbsp; \n");
+	sb.safePrintf("<a href=/>web</a> &nbsp;&nbsp;&nbsp;&nbsp; ");
+	if ( g_conf.m_isMattWells )
+		sb.safePrintf("<a href=http://www.gigablast.com/seo>seo"
+			      "</a> &nbsp;&nbsp;&nbsp;&nbsp; " );
+	sb.safePrintf("<a href=\"/Top\">directory</a> "
+		      "&nbsp;&nbsp;&nbsp;&nbsp; \n");
 	sb.safePrintf("<a href=/adv.html>advanced search</a>");
 	sb.safePrintf(" &nbsp;&nbsp;&nbsp;&nbsp; ");
 	sb.safePrintf("<b title=\"Instantly add your url to Gigablast's "
@ -391,8 +407,17 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
 	sb.safePrintf("<br><br>\n");
 	sb.safePrintf("<form method=get action=/addurl name=f>\n");

-	//CollectionRec *cr = g_collectiondb.getRec ( "main" );
-	//sb.safePrintf("<input type=hidden name=c value=\"%s\">",cr->m_coll);
+
+	CollectionRec *cr = g_collectiondb.getRec ( r );
+	// the collection we want to add the url to
+	char *coll = NULL;
+	if ( cr ) 
+		coll = cr->m_coll;
+	if ( coll )
+		sb.safePrintf("<input type=hidden name=c value=\"%s\">",coll);
+	if ( ! coll ) 
+		coll = "";
+
 	sb.safePrintf("<input name=u type=text size=60 value=\"");
 	if ( url ) {
 		SafeBuf tmp;
@ -416,6 +441,9 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
 	// or if in read-only mode
 	if (   g_conf.m_readOnlyMode  ) 
 		msg = "Add url is temporarily disabled";
+
+	sb.safePrintf("<br><br>Add a url to the <b>%s</b> collection",coll);
+
 	// if url is non-empty the ajax will receive this identical msg
 	// and display it in the div, so do not duplicate the msg!
 	if ( msg && ! url )
@ -453,11 +481,12 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
 		unsigned long long rand64 = gettimeofdayInMillisecondsLocal();
 		// msg7 needs an explicit collection for /addurl for injecting
 		// in PageInject.cpp. it does not use defaults for safety.
-		sb.safePrintf("&id=%lu&c=main&rand=%llu';\n"
+		sb.safePrintf("&id=%lu&c=%s&rand=%llu';\n"
 			      "client.open('GET', url );\n"
 			      "client.send();\n"
 			      "</script>\n"
 			      , h32
+			      , coll
 			      , rand64
 			      );
 		sb.safePrintf("</div>\n");
@ -526,9 +555,21 @@ bool printDirHomePage ( SafeBuf &sb , HttpRequest *r ) {

 	sb.safePrintf("<br><br>\n");
 	sb.safePrintf("<br><br><br>\n");
-	sb.safePrintf("<a href=/>web</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=http://www.gigablast.com/seo>seo</a> &nbsp;&nbsp;&nbsp;&nbsp; <b>directory</b> &nbsp;&nbsp;&nbsp;&nbsp; \n");
-	sb.safePrintf("<a href=http://www.gigablast.com/events>events</a>"
-		      " &nbsp;&nbsp;&nbsp;&nbsp; \n");
+
+	sb.safePrintf("<a href=/>web</a> &nbsp;&nbsp;&nbsp;&nbsp; ");
+
+	if ( g_conf.m_isMattWells )
+		sb.safePrintf("<a href=http://www.gigablast.com/seo>seo"
+			      "</a> &nbsp;&nbsp;&nbsp;&nbsp; " );
+
+	sb.safePrintf("<a href=\"/Top\"><b>directory</b></a> "
+		      "&nbsp;&nbsp;&nbsp;&nbsp; \n");
+
+	if ( g_conf.m_isMattWells )
+		sb.safePrintf("<a href=http://www.gigablast.com/events>"
+			      "events</a>"
+			      " &nbsp;&nbsp;&nbsp;&nbsp; \n");
+
 	sb.safePrintf("<a href=/adv.html>advanced search</a>");
 	sb.safePrintf(" &nbsp;&nbsp;&nbsp;&nbsp; ");
 	char *root = "";
@ -578,18 +619,13 @@ bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie ) {
 	//long  qlen;
 	//char *q = r->getString ( "q" , &qlen , NULL );
 	// insert collection name too
-	long collLen;
-	char *coll    = r->getString("c",&collLen);
-	if ( ! coll || ! coll[0] ) {
-		//coll    = g_conf.m_defaultColl;
-		coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
-		collLen = gbstrlen(coll);
-	}
-	// ensure collection not too big
-	if ( collLen >= MAX_COLL_LEN ) { 
-		g_errno = ECOLLTOOBIG; 
+	CollectionRec *cr = g_collectiondb.getRec(r);
+	if ( ! cr ) {
+		g_errno = ENOCOLLREC;
 		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); 
 	}
+
+
 	// get the collection rec
 	/*
 	CollectionRec *cr = g_collectiondb.getRec ( coll );
@ -1271,7 +1307,9 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 	//	collLen = gbstrlen(coll);
 	//}
 	// get collection rec
+
 	CollectionRec *cr = g_collectiondb.getRec ( r );
+
 	// bitch if no collection rec found
 	if ( ! cr ) {
 		g_errno = ENOCOLLREC;
@ -1552,6 +1590,8 @@ void doneInjectingWrapper3 ( void *st ) {
 	//CollectionRec *cr = g_collectiondb.getRec ( st1->m_coll );
 	
 	// collection name
+	char *coll = st1->m_coll;
+	if ( ! coll ) coll = "";

 	//char tt [ 128 ];
 	//tt[0] = '\0';
@ -1658,8 +1698,10 @@ void doneInjectingWrapper3 ( void *st ) {
 			unsigned long rand32 = rand();
 			// in the mime to 0 seconds!
 			sb.safePrintf("<b>Url successfully added. "
-				      "<a href=/search?rand=%lu&q=url%%3A",
-				      rand32);
+				      "<a href=/search?rand=%lu&"
+				      "c=%s&q=url%%3A",
+				      rand32,
+				      coll);
 			sb.urlEncode(url);
 			sb.safePrintf(">Check it</a> or "
 				      "<a href=http://www.gigablast.com/seo?u=");
--- a/PageSockets.cpp
+++ b/PageSockets.cpp
@ -131,14 +131,14 @@ bool sendPageSockets ( TcpSocket *s , HttpRequest *r ) {

 void printTcpTable ( SafeBuf* p, char *title, TcpServer *server ) {
 	// table headers for urls current being spiderd
-	p->safePrintf ( "<table width=100%% bgcolor=#d0d0f0 border=1>"
-		       "<tr><td bgcolor=#c0c0f0 colspan=19>"
+	p->safePrintf ( "<table %s>"
+		       "<tr class=hdrow><td colspan=19>"
 		       "<center>"
 		       //"<font size=+1>"
 		       "<b>%s</b>"
 		       //"</font>"
 		       "</td></tr>"
-		       "<tr>"
+		       "<tr bgcolor=#%s>"
 		       "<td><b>#</td>"
 		       "<td><b>fd</td>"
 		       "<td><b>age</td>"
@ -151,7 +151,11 @@ void printTcpTable ( SafeBuf* p, char *title, TcpServer *server ) {
 		       "<td><b>bytes to read</td>"
 		       "<td><b>bytes sent</td>"
 		       "<td><b>bytes to send</td>"
-		       "</tr>\n" , title );
+		       "</tr>\n"
+			, TABLE_STYLE
+			, title 
+			, DARK_BLUE
+			);
 	// current time in milliseconds
 	long long now = gettimeofdayInMilliseconds();
 	// store in buffer for sorting
@ -202,12 +206,12 @@ void printTcpTable ( SafeBuf* p, char *title, TcpServer *server ) {
 		case ST_CLOSE_CALLED:    st="close called";    break;
 		}
 		// bgcolor is lighter for incoming requests
-		char *bg = "#c0c0f0";
-		if ( s->m_isIncoming ) bg = "#e8e8ff";
+		char *bg = "c0c0f0";
+		if ( s->m_isIncoming ) bg = "e8e8ff";
 		// times
 		long elapsed1 = now - s->m_startTime      ;
 		long elapsed2 = now - s->m_lastActionTime ;
-		p->safePrintf ("<tr bgcolor=%s>"
+		p->safePrintf ("<tr bgcolor=#%s>"
 			       "<td>%li</td>" // i
 			       "<td>%i</td>" // fd
 			       "<td>%lims</td>"  // elapsed seconds since start
@ -301,26 +305,30 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
 			msgCount1[s->m_msgType]++;
 	}
 	// print the counts
-	p->safePrintf ( "<table bgcolor=#d0d0f0 border=1>"
-			"<tr><td bgcolor=#c0c0f0 colspan=19>"
+	p->safePrintf ( "<table %s>"
+			"<tr class=hdrow><td colspan=19>"
 			"<center>"
 			"<b>%s Summary</b> (%li transactions)"
 			"</td></tr>"
-			"<tr>"
+			"<tr bgcolor=#%s>"
 			"<td><b>niceness</td>"
 			"<td><b>msg type</td>"
 			"<td><b>total</td>"
 			"</tr>",
-			title , server->getNumUsedSlots() );
+			TABLE_STYLE,
+			title , server->getNumUsedSlots() ,
+			DARK_BLUE );
 	for ( long i = 0; i < 96; i++ ) {
 		if ( msgCount0[i] <= 0 ) continue;
-		p->safePrintf("<tr><td>0</td><td>0x%lx</td><td>%li</td></tr>",
-			      i, msgCount0[i]);
+		p->safePrintf("<tr bgcolor=#%s>"
+			      "<td>0</td><td>0x%lx</td><td>%li</td></tr>",
+			      LIGHT_BLUE,i, msgCount0[i]);
 	}
 	for ( long i = 0; i < 96; i++ ) {
 		if ( msgCount1[i] <= 0 ) continue;
-		p->safePrintf("<tr><td>1</td><td>0x%lx</td><td>%li</td></tr>",
-			      i, msgCount1[i]);
+		p->safePrintf("<tr bgcolor=#%s>"
+			      "<td>1</td><td>0x%lx</td><td>%li</td></tr>",
+			      LIGHT_BLUE,i, msgCount1[i]);
 	}
 	p->safePrintf ( "</table><br>" );

@ -333,15 +341,15 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
 		dd = //"<td><b>dns ip</b></td>"
 		     "<td><b>hostname</b></td>";
 	}
-	// table headers for urls current being spiderd
-	p->safePrintf ( "<table width=100%% bgcolor=#d0d0f0 border=1>"
-			"<tr><td bgcolor=#c0c0f0 colspan=19>"
+
+	p->safePrintf ( "<table %s>"
+			"<tr class=hdrow><td colspan=19>"
 			"<center>"
 			//"<font size=+1>"
 			"<b>%s</b> (%li transactions)"
 			//"</font>"
 			"</td></tr>"
-			"<tr>"
+			"<tr bgcolor=#%s>"
 			"<td><b>age</td>"
 			"<td><b>last read</td>"
 			"<td><b>last send</td>"
@ -362,7 +370,11 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
 			"<td><b>dgrams to send</td>"
 			"<td><b>acks read</td>"
 			"<td><b>resends</td>"
-			"</tr>\n" , title , server->getNumUsedSlots() , dd );
+			"</tr>\n" , 
+			TABLE_STYLE,
+			title , server->getNumUsedSlots() , 
+			DARK_BLUE ,
+			dd );


 	// now fill in the columns
@ -385,9 +397,9 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
 		if ( s->m_lastReadTime == 0LL ) strcpy ( e1 , "--" );
 		if ( s->m_lastSendTime == 0LL ) strcpy ( e2 , "--" );
 		// bgcolor is lighter for incoming requests
-		char *bg = "#c0c0f0";
+		char *bg = LIGHT_BLUE;//"c0c0f0";
 		// is it incoming
-		if ( ! s->m_callback ) bg = "#e8e8ff";
+		if ( ! s->m_callback ) bg = LIGHTER_BLUE;//"e8e8ff";
 		Host *h = g_hostdb.getHost ( s->m_ip , s->m_port );
 		char           *eip     = "??";
 		unsigned short  eport   =  0 ;
@ -494,7 +506,7 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
 		if ( msgType == 0x25 ) desc = "get link info";
 		if ( msgType == 0xfd ) desc = "proxy forward";
 		
-		p->safePrintf ( "<tr bgcolor=%s>"
+		p->safePrintf ( "<tr bgcolor=#%s>"
 				"<td>%s</td>"  // age
 				"<td>%s</td>"  // last read
 				"<td>%s</td>"  // last send
@ -540,22 +552,25 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
 				      cf2);
 		}

-		if ( ! isDns ) 
+		if ( ! isDns ) {
 			//"<td>%s</td>"  // ip
 			//"<td>%hu</td>" // port
 			// clickable hostId
+			char *toFrom = "to";
+			if ( ! s->m_callback ) toFrom = "from";
 			//"<td><a href=http://%s:%hu/cgi/15.cgi>%li</a></td>"
 			p->safePrintf (	"<td>0x%hhx</td>"  // msgtype
 					"<td><nobr>%s</nobr></td>"  // desc
-					"<td><a href=http://%s:%hu/"
+					"<td><nobr>%s <a href=http://%s:%hu/"
 					"master/sockets?"
-					"c=%s>%s</a></td>"
+					"c=%s>%s</a></nobr></td>"
 					"<td>%s%li%s</td>" , // niceness
 					s->m_msgType ,
 					desc,
 					//iptoa(s->m_ip) ,
 					//s->m_port ,
 					// begin clickable hostId
+					toFrom,
 					eip     ,
 					eport   ,
 					coll ,
@ -565,6 +580,7 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
 					cf2
 					// end clickable hostId
 					);
+		}

 		p->safePrintf ( "<td>%lu</td>" // transId
 				"<td>%i</td>" // called handler
--- a/PageStats.cpp
+++ b/PageStats.cpp
--- a/PageStatsdb.cpp
+++ b/PageStatsdb.cpp
@ -194,9 +194,6 @@ void sendReply ( void *state ) {
 	//g_pages.printAdminTop2 ( &buf , st->m_socket , &st->m_request, NULL ,
 	//			 tmpBuf.getBufStart(), tmpBuf.length() ); 

-	// write the controls section of the page
-	writeControls( &buf, st );
-
 	// Debug print of CGI parameters and errors
 	char startTimeStr[30];
 	char endTimeStr[30];
@ -211,10 +208,10 @@ void sendReply ( void *state ) {
 			       "Turn on in the master controls.</b>"
 			       "</font>\n" );

-	buf.safePrintf("<table cellpadding=10 border=0>\n");
+	buf.safePrintf("<table %s>\n",TABLE_STYLE);

-	buf.safePrintf("<tr><td>"
-		       "<center>");
+	buf.safePrintf("<tr><td bgcolor=#%s>"
+		       "<center>",LIGHT_BLUE);

 	/////////////////////////
 	//
@ -246,6 +243,9 @@ void sendReply ( void *state ) {

 	buf.safePrintf("</center>");

+	// write the controls section of the page
+	writeControls( &buf, st );
+
 	// print the bottom of the page
 	g_pages.printAdminBottom2( &buf );
 	
--- a/PageThreads.cpp
+++ b/PageThreads.cpp
@ -34,8 +34,8 @@ bool sendPageThreads ( TcpSocket *s , HttpRequest *r ) {
 		long hiActive = q->m_hiLaunched - q->m_hiReturned;
 		long      total    = loActive + mdActive + hiActive;
 		
-		p.safePrintf ( "<table width=100%% bgcolor=#d0d0f0 border=1>"
-			       "<tr><td bgcolor=#c0c0f0 colspan=\"11\">"
+		p.safePrintf ( "<table %s>"
+			       "<tr class=hdrow><td colspan=\"11\">"
 			       //"<center>"
 				//"<font size=+1>"
 				"<b>Thread Type: %s"
@ -43,12 +43,13 @@ bool sendPageThreads ( TcpSocket *s , HttpRequest *r ) {
 				"  med: %li"
 				"  high: %li"
 				"  total: %li)</td></tr>",
+			       TABLE_STYLE,
 				q->getThreadType(), 
 				loActive, mdActive, 
 				hiActive, total);


-		p.safePrintf ("<tr>"
+		p.safePrintf ("<tr bgcolor=#%s>"
 			      "<td><b>Status</b></td>"
 			      "<td><b>Niceness</b></td>"
 			      "<td><b>Queued Time</b></td>"
@ -60,7 +61,9 @@ bool sendPageThreads ( TcpSocket *s , HttpRequest *r ) {
 			      "<td><b>Bytes Done</b></td>"
 			      "<td><b>KBytes/Sec</b></td>"
 			      "<td><b>Read|Write</b></td>"
-			      "</tr>");
+			      "</tr>"
+			      , LIGHT_BLUE
+			      );

 		for ( long j = 0 ; j < q->m_top ; j++ ) {
 			ThreadEntry *t = &q->m_entries[j];
@ -73,7 +76,7 @@ bool sendPageThreads ( TcpSocket *s , HttpRequest *r ) {
 			// might have got pre-called from EDISKSTUCK
 			if ( ! t->m_callback ) fs = NULL;

-			p.safePrintf("<tr>"); 
+			p.safePrintf("<tr bgcolor=#%s>", DARK_BLUE ); 
 			
 			if(t->m_isDone) {
 				p.safePrintf("<td><font color='red'><b>done</b></font></td>");
@ -109,7 +112,7 @@ bool sendPageThreads ( TcpSocket *s , HttpRequest *r ) {
 				if(diskThread && fs ) {
 					long long took = (now - t->m_launchedTime);
 					if(took <= 0) took = 1;
-					p.safePrintf("<td>???/%li</td>", t->m_bytesToGo);
+					p.safePrintf("<td>%c%c%c/%li</td>", '?','?','?',t->m_bytesToGo);
 					p.safePrintf("<td>%.2f kbps</td>", 0.0);//(float)fs->m_bytesDone/took);
 					p.safePrintf("<td>%s</td>",t->m_doWrite? "Write":"Read");
 				}
@ -159,41 +162,50 @@ bool sendPageThreads ( TcpSocket *s , HttpRequest *r ) {
 	long hiActiveMed = disk->m_hiLaunchedMed - disk->m_hiReturnedMed;
 	long hiActiveSma = disk->m_hiLaunchedSma - disk->m_hiReturnedSma;
 	long activeWrites = disk->m_writesLaunched - disk->m_writesReturned;
-	p.safePrintf ( "<table width=100%% bgcolor=#d0d0f0 border=1>"
-		       "<tr><td bgcolor=#c0c0f0 colspan=\"5\">");
+	p.safePrintf ( "<table %s>"
+		       "<tr class=hdrow><td colspan=\"5\">"
+		       , TABLE_STYLE );
 	p.safePrintf ( "<center><b>Active Read Threads</b></center></td></tr>"
-		       "<tr><td></td><td colspan='3'><center><b>Priority</b></center></td></tr>"
-		       "<tr>"
+		       "<tr bgcolor=#%s>"
+		       "<td></td><td colspan='3'>"
+		       "<center><b>Priority</b></center></td></tr>"
+		       "<tr bgcolor=#%s>"
 		       "<td><b>Size</b></td><td>Low</td><td>Medium</td><td>High</td>"
 		       "</tr>"
 		       // 			       "<tr>"
 		       // 			       "<td>Size</td>"
 		       // 			       "</tr>"
-		       "<tr>"
+		       "<tr bgcolor=#%s>"
 		       "<td>Small</td> <td>%li</td><td>%li</td><td>%li</td>"
 		       "</tr>"
-		       "<tr>"
+		       "<tr bgcolor=#%s>"
 		       "<td>Medium</td> <td>%li</td><td>%li</td><td>%li</td>"
 		       "</tr>"
-		       "<tr>"
+		       "<tr bgcolor=#%s>"
 		       "<td>Large</td> <td>%li</td><td>%li</td><td>%li</td>"
 		       "</tr>"
 		       "</table><br><br>",
+		       LIGHT_BLUE,
+		       LIGHT_BLUE,
+
+		       DARK_BLUE,
 		       loActiveSma,
 		       mdActiveSma,
 		       hiActiveSma,

+		       DARK_BLUE,
 		       loActiveMed,
 		       mdActiveMed,
 		       hiActiveMed,

+		       DARK_BLUE,
 		       loActiveBig,
 		       mdActiveBig,
 		       hiActiveBig);

-	p.safePrintf ("<table width=100%% bgcolor=#d0d0f0 border=1>");
-	p.safePrintf ("<tr>"
-		      "<td bgcolor=#c0c0f0><b>Active Write Threads</b></td><td>%li</td>"
+	p.safePrintf ("<table %s>",TABLE_STYLE);
+	p.safePrintf ("<tr class=hdrow>"
+		      "<td><b>Active Write Threads</b></td><td>%li</td>"
 		      "</tr></table>",
 		      activeWrites);

--- a/Pages.cpp
+++ b/Pages.cpp
@ -222,11 +222,11 @@ static WebPage s_pages[] = {
 	  //USER_ADMIN | USER_MASTER   , 
 	  "page filter page",
 	  sendPageGeneric  , 0 } ,
-	{ PAGE_INJECT    , "admin/inject"   , 0 , "inject urls" ,  0 , 1 ,
+	{ PAGE_INJECT    , "admin/inject"   , 0 , "inject url" ,  0 , 1 ,
 	  //USER_ADMIN | USER_MASTER   ,
 	  "inject url in the index here",
 	  sendPageInject   , 2 } ,
-	{ PAGE_ADDURL2   , "admin/addurl"   , 0 , "add url" ,  0 , 0 ,
+	{ PAGE_ADDURL2   , "admin/addurl"   , 0 , "add urls" ,  0 , 0 ,
 	  //USER_ADMIN | USER_MASTER   ,
 	  "add url page",
 	  sendPageAddUrl   , 0 } ,
@ -913,7 +913,9 @@ bool Pages::printAdminTop ( SafeBuf *sb    ,
 	if ( user ) pwd = user->m_password;

 	sb->safePrintf(
-		     "<html>\n"
+		       "<html>\n");
+
+	sb->safePrintf(
 		     "<head>\n"
 		     "<title>%s | gigablast admin</title>\n"
 		     "<meta http-equiv=\"Content-Type\" "
@ -961,21 +963,24 @@ bool Pages::printAdminTop ( SafeBuf *sb    ,
 					   coll, NULL, fromIp, qs );
 	}
 	// end table
-	sb->safePrintf ("</td></tr></table><br/><br/>\n");
+	sb->safePrintf ("</td></tr></table><br/>\n");//<br/>\n");

 	SafeBuf mb;
 	long adds = 0;

 	PingServer *ps = &g_pingServer;

-	mb.safePrintf("<center>"
+	mb.safePrintf(//"<center>"
 		      "<table cellpadding=5 "
 		      "style=\""
-		      //"border:2px solid black;"
-		      "max-width:600px\" "
+		      "background-color:#ff6666;"
+		      "border:2px #8f0000 solid;"
+		      "border-radius:5px;"
+		      "max-width:600px;"
+		      "\" "
 		      "border=0"
 		      ">"
-		      "<tr><td bgcolor=#ff6666>");
+		      "<tr><td>");

 	// emergency message box
 	if ( g_pingServer.m_hostsConfInDisagreement ) {
@ -997,8 +1002,9 @@ bool Pages::printAdminTop ( SafeBuf *sb    ,
 	     *needsRebalance ) {
 		if ( adds ) mb.safePrintf("<br><br>");
 		adds++;
-		mb.safePrintf("This host requires a shard rebalance. "
-			      "Click 'rebalance shards' in master controls.");
+		mb.safePrintf("A host requires a shard rebalance. "
+			      "Click 'rebalance shards' in master controls to "
+			      "rebalance all hosts.");
 	}

 	if ( ps->m_numHostsDead ) {
@ -1010,39 +1016,22 @@ bool Pages::printAdminTop ( SafeBuf *sb    ,
 			      "pings.",ps->m_numHostsDead ,s );
 	}

-	mb.safePrintf("</td></tr></table></center><br>");
+	if ( ! g_conf.m_useThreads || g_threads.m_disabled ) {
+		if ( adds ) mb.safePrintf("<br><br>");
+		adds++;
+		mb.safePrintf("Threads are disabled. Severely hurts "
+			      "performance.");
+	}

-	// a new table. on the left is collections, on right is other stuff
-	sb->safePrintf("<TABLE "
-		       "cellpadding=5 border=0>"
-		       "<TR>"
-		       "<TD valign=top>"
-		       "<div "
-		       "style="
-		       "max-height:600px;"
-		       //"max-width:225px;"
-		       //"min-width:225px;"
-		       "overflow-y:auto;"
-		       "overflow-x:hidden>"
-		       );
-	
-	// collection under that
-	status &= printCollectionNavBar ( sb, page , username , coll,pwd, qs );
-
-	// then collection page links and parms
-	sb->safePrintf("</div></TD><TD valign=top><br>");
-
-	// print emergency msg box
-	if ( adds )
-		sb->safePrintf("%s",mb.getBufStart());
-
-	// print the links
-	status &= printAdminLinks ( sb, page , username , coll , pwd, true );
-
-	// print the links
-	status &= printAdminLinks ( sb, page , username , coll ,pwd , false );
+	mb.safePrintf("</td></tr></table>"
+		      //"</center>"
+		      "<br>");

+	////////
+	//
 	// . the form
+	//
+	////////
 	// . we cannot use the GET method if there is more than a few k of
 	//   parameters, like in the case of the Search Controls page. The
 	//   browser simply will not send the request if it is that big.
@ -1054,7 +1043,6 @@ bool Pages::printAdminTop ( SafeBuf *sb    ,
 		sb->safePrintf ("<form name=\"SubmitInput\" method=\"get\" "
 				"action=\"/%s\">\n",
 				s_pages[page].m_filename);
-
 	// pass on this stuff
 	//if ( ! pwd ) pwd = "";
 	//sb->safePrintf ( "<input type=hidden name=pwd value=\"%s\">\n",pwd);
@ -1065,11 +1053,66 @@ bool Pages::printAdminTop ( SafeBuf *sb    ,
 	if ( g_users.hasPermission ( username, PAGE_ADMIN ) ){
 		sb->safePrintf("<input type=hidden name=master value=0>\n");
 	}
-
 	// should any changes be broadcasted to all hosts?
 	sb->safePrintf ("<input type=hidden name=cast value=\"%li\">\n",
 			(long)s_pages[page].m_cast);

+
+
+
+
+	// a new table. on the left is collections, on right is other stuff
+	sb->safePrintf("<TABLE "
+		       "cellpadding=5 border=0>"
+		       "<TR>"
+		       "<td></td>"
+		       );
+
+
+	// then collection page links and parms
+	sb->safePrintf("<TD valign=top>");
+
+	// print emergency msg box
+	if ( adds )
+		sb->safePrintf("<br>%s",mb.getBufStart());
+
+	// print the links
+	status &= printAdminLinks ( sb, page , username , coll , pwd, true );
+
+	// print the links
+	status &= printAdminLinks ( sb, page , username , coll ,pwd , false );
+
+	// begin 2nd row in big table
+	sb->safePrintf("</td></TR>");
+
+	sb->safePrintf(
+		       "<TR>"
+		       "<TD valign=top>"
+		       "<div "
+		       "style=\""
+		       "max-height:600px;"
+		       "max-width:200px;"
+		       "min-width:200px;"
+		       "padding:4px;" // same as TABLE_STYLE
+		       "background-color:#d0d0d0;"
+		       "border-radius:10px;"
+		       "border:2px #606060 solid;"
+		       //"border-width:2px;"
+		       //"border-color:#606060;"
+		       "overflow-y:auto;"
+		       "overflow-x:hidden;"
+		       "line-height:23px;"
+		       "\""
+		       ">"
+		       );
+	// collection under that
+	status &= printCollectionNavBar ( sb, page , username , coll,pwd, qs );
+
+	sb->safePrintf("</div></TD>");
+
+	// the controls will go here
+	sb->safePrintf("<TD valign=top>");
+
 	return true;
 }

@ -1783,7 +1826,7 @@ bool  Pages::printAdminLinks ( SafeBuf *sb,

 	//sprintf(p,"<font size=+1>\n" );
 	//p += gbstrlen(p);
-	sb->safePrintf ("<center>\n" );
+	//sb->safePrintf ("<center>\n" );

 	// soemtimes we do not want to be USER_MASTER for testing
 	char buf [ 64 ];
@ -1807,6 +1850,10 @@ bool  Pages::printAdminLinks ( SafeBuf *sb,
 		if ( ! g_conf.m_isMattWells && i == PAGE_SEO ) 
 			continue;

+		// skip page autoban link
+		if ( ! g_conf.m_isMattWells && i == PAGE_AUTOBAN )
+			continue;
+
 		// ignore these for now
 		if ( i == PAGE_SECURITY ) continue;
 		if ( i == PAGE_ACCESS ) continue;
@ -1815,38 +1862,55 @@ bool  Pages::printAdminLinks ( SafeBuf *sb,
 		if ( i == PAGE_SEARCHBOX ) continue;
 		if ( i == PAGE_TITLEDB ) continue;

+		// print "url download" before "inject url"
+		// GET /mycollname_urls.csv
+		if ( i == PAGE_INJECT ) {
+			sb->safePrintf (
+					"<b>"
+					"<a style=text-decoration:none; "
+					"href=\"/download/%s_urls.txt\">"
+					"url download"
+					"</a>"
+					"</b>"
+					" &nbsp; \n",
+					coll );
+		}		
+
 		if ( cr && ! cr->m_isCustomCrawl && i == PAGE_CRAWLBOT )
 			continue;

 		// print it out
 		if ( i == PAGE_LOGIN || i == PAGE_LOGIN2 ) 
 			sb->safePrintf(
-				"<span style=\"white-space:nowrap\">"
-				"<a href=\"/%s?"
-				//"user=%s&pwd=%s&"
-				"c=%s%s\">%s</a>"
-				"</span>"
-				" &nbsp; \n",s_pages[i].m_filename,
-				//username,pwd,
-				coll,
-				buf,s_pages[i].m_name);
+				       //"<span style=\"white-space:nowrap\">"
+				       "<a href=\"/%s?"
+				       //"user=%s&pwd=%s&"
+				       "c=%s%s\">%s</a>"
+				       //"</span>"
+				       " &nbsp; \n",s_pages[i].m_filename,
+				       //username,pwd,
+				       coll,
+				       buf,s_pages[i].m_name);
 		else if ( page == i )
 			sb->safePrintf(
-				"<span style=\"white-space:nowrap\">"
-				"<a href=\"/%s?c=%s%s\"><b>"
-				"<font color=red>%s</font></b></a>"
-				"</span>"
-				" &nbsp; \n",s_pages[i].m_filename,
-				coll,
-				buf,s_pages[i].m_name);
+				       //"<span style=\"white-space:nowrap\">"
+				       "<a href=\"/%s?c=%s%s\"><b>"
+				       "<font color=red>%s</font></b></a>"
+				       //"</span>"
+				       " &nbsp; \n",s_pages[i].m_filename,
+				       coll,
+				       buf,s_pages[i].m_name);
 		else
 			sb->safePrintf(
-				"<span style=\"white-space:nowrap\">"
-				"<a href=\"/%s?c=%s%s\">%s</a>"
-				"</span>"
-				" &nbsp; \n",s_pages[i].m_filename,
-				coll,
-				buf,s_pages[i].m_name);
+				       //"<span style=\"white-space:nowrap\">"
+				       "<b>"
+				       "<a style=text-decoration:none; "
+				       "href=\"/%s?c=%s%s\">%s</a>"
+				       "</b>"
+				       //"</span>"
+				       " &nbsp; \n",s_pages[i].m_filename,
+				       coll,
+				       buf,s_pages[i].m_name);
 		// print <br> after the last master admin control
 		/*
 		if ( i == PAGE_DELCOLL && user == USER_MASTER ) {
@ -1861,7 +1925,24 @@ bool  Pages::printAdminLinks ( SafeBuf *sb,
 		}
 		*/
 	}
-	sb->safePrintf("</center><br/>" );
+
+	// print documentation links
+	if ( top ) {
+		sb->safePrintf(" <a style=text-decoration:none "
+			       "href=/admin.html>"
+			       "<b>"
+			       "admin guide"
+			       "</b></a> "
+			       "&nbsp; "
+			       " <a style=text-decoration:none; "
+			       "href=/developer.html>"
+			       "<b>dev guide</b></a>" );
+	}
+
+	//sb->safePrintf("</center>" );
+	sb->safePrintf("<br/>" );
+
+	if ( top ) sb->safePrintf("<br/>" );

 	if ( top ) return status;

@ -1956,6 +2037,7 @@ bool Pages::printCollectionNavBar ( SafeBuf *sb     ,
 	bool status = true;
 	//if ( ! pwd ) pwd = "";
 	if ( ! qs  ) qs  = "";
+
 	// if not admin just print collection name
 	if ( g_collectiondb.m_numRecsUsed == 0 ) {
 		sb->safePrintf ( "<center>"
@ -1990,26 +2072,42 @@ bool Pages::printCollectionNavBar ( SafeBuf *sb     ,

 	char *s = "s";
 	if ( g_collectiondb.m_numRecsUsed == 1 ) s = "";
-	sb->safePrintf ( "<center><b>%li Collection%s</b></center><br>\n",
+	sb->safePrintf ( "<center><nobr><b>%li Collection%s</b></nobr>"
+			 "</center><br>\n",
 			 g_collectiondb.m_numRecsUsed , s );

 	char *color = "red";
 	//if ( page >= PAGE_CGIPARMS ) color = "red";
 	//else                         color = "black";

+	// style for printing collection names
+	sb->safePrintf("<style>.x{text-decoration:none;font-weight:bold;}"
+		       ".e{background-color:#e0e0e0;}"
+		       "</style>\n");
+
+	long row = 0;
+
 	//for ( long i = a ; i < b ; i++ ) {
 	for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
 		CollectionRec *cc = g_collectiondb.m_recs[i];
 		if ( ! cc ) continue;
 		char *cname = cc->m_coll;
+
+		row++;
+
 		//if ( p + gbstrlen(cname) + 100 >= pend ) return p;
 		// collection name HACK for backwards compatibility
 		//if ( ! cname[0] ) cname = "main";

+		// every other coll in a darker div
+		if ( (row % 2) == 0 )
+			sb->safePrintf("<div class=e>");
+
 		sb->safePrintf("<nobr>");

 		if ( i != collnum || ! highlight )// || ! coll || ! coll[0])
 			sb->safePrintf ( "<a title=\"%s\" "
+					 "class=x "
 					 "href=\"/%s?c=%s%s\">%s"
 				  "</a> &nbsp;",
 					 cname,
@ -2017,12 +2115,17 @@ bool Pages::printCollectionNavBar ( SafeBuf *sb     ,
 					 cname ,
 					 qs, cname );
 		else
-			sb->safePrintf ( "<b><font title=\"%s\" "
-					 "color=%s>%s</font></b> "
+			sb->safePrintf ( "<u><b><font title=\"%s\" "
+					 "color=%s>%s</font></b></u> "
 					 "&nbsp; ",  
 					 cname, color , cname );
 		sb->safePrintf("</nobr>");
-		sb->safePrintf("<br>\n");
+
+		// every other coll in a darker div
+		if ( (row % 2) == 0 )
+			sb->safePrintf("</div>");
+		else
+			sb->safePrintf("<br>\n");
 	}

 	//sb->safePrintf ( "</center><br/>" );
@ -2383,21 +2486,32 @@ bool sendPageCgiParms ( TcpSocket *s , HttpRequest *r ) {
 	// 	p.incrementLength ( pp - p.getBuf() );
 	// 	}

-	p.safePrintf ( "<table width=100%% cellpadding=2 "
-		       "bgcolor=#%s border=1>"
-		       "<tr><td colspan=4 bgcolor=#%s>"
+	p.safePrintf ( "<table %s>"
+		       "<tr class=hdrow><td colspan=8>"
 		       "<center><b>CGI Parameters</b></tr></tr>"
-		       "<tr><td><b>CGI</b></td><td><b>Type</b></td>"
+		       "<tr bgcolor=#%s><td><b>CGI</b></td>"
+		       "<td><b>Page</b></td>"
+		       "<td><b>Type</b></td>"
 		       "<td><b>Name</b></td><td><b>Description</b></td></tr>\n",
-		       LIGHT_BLUE, DARK_BLUE );
+		       TABLE_STYLE , DARK_BLUE);
 	for ( long i = 0; i < g_parms.m_numParms; i++ ) {
 		Parm *parm = &g_parms.m_parms[i];
 		if ( !parm->m_sparm ) continue;
 		// use m_cgi if no m_scgi
 		char *cgi = parm->m_cgi;
 		if ( parm->m_scgi ) cgi = parm->m_scgi;
+
+		// skip if hidden
+		if ( parm->m_flags & PF_HIDDEN ) continue;
+
+		char *page = parm->m_scmd;
+		if ( ! page ) page = "";
+
 		// print the parm
-		p.safePrintf ( "<tr><td><b>%s</b></td><td nowrap=1>", cgi );
+		p.safePrintf ( "<tr bgcolor=#%s><td><b>%s</b></td>", 
+			       LIGHT_BLUE , cgi );
+		p.safePrintf("<td>%s</td>",page);
+		p.safePrintf("<td nowrap=1>");
 		switch ( parm->m_type ) {
 		case TYPE_BOOL: p.safePrintf ( "BOOL" ); break;
 		case TYPE_BOOL2: p.safePrintf ( "BOOL" ); break;
@ -2417,14 +2531,13 @@ bool sendPageCgiParms ( TcpSocket *s , HttpRequest *r ) {
 	}
 	p.safePrintf ( "</table><br><br>" );

-	p.safePrintf ( "<table width=100%% cellpadding=2 "
-		       "bgcolor=#%s border=1>"
-		       "<tr><td colspan=2 bgcolor=#%s>"
+	p.safePrintf ( "<table %s>"
+		       "<tr class=hdrow><td colspan=2>"
 		       "<center><b>Query Operators</b></td></tr>"
 		       "<tr><td><b>Operator</b></td>"
 		       "<td><b>Description</b>"
 		       "</td></tr>\n",
-		       LIGHT_BLUE, DARK_BLUE );
+		       TABLE_STYLE );
 	// table of the query keywords
 	long n = getNumFieldCodes();
 	for ( long i = 0 ; i < n ; i++ ) {
@ -2434,8 +2547,9 @@ bool sendPageCgiParms ( TcpSocket *s , HttpRequest *r ) {
 		char *d = f->desc;
 		// fix table internal cell bordering
 		if ( d[0] == '\0' ) d = "&nbsp;";
-		p.safePrintf("<tr><td><b>%s</b>:</td><td>%s</td></tr>\n",
-			     f->text,d);
+		p.safePrintf("<tr bgcolor=#%s>"
+			     "<td><b>%s</b>:</td><td>%s</td></tr>\n",
+			     LIGHT_BLUE,f->text,d);
 	}
 	
 	p.safePrintf("</body></html>");
--- a/Pages.h
+++ b/Pages.h
@ -14,8 +14,12 @@
 #include "SafeBuf.h"
 #include "PageCrawlBot.h" // sendPageCrawlBot()

+#define LIGHTER_BLUE "e8e8ff"
 #define LIGHT_BLUE "d0d0e0"
 #define DARK_BLUE  "c0c0f0"
+#define DARKER_BLUE  "a0a0f0"
+#define DARKEST_BLUE  "8080f0"
+#define TABLE_STYLE " style=\"border-radius:10px;border:#6060f0 2px solid;\" width=100% bgcolor=#a0a0f0 cellpadding=4 border=0 "

 extern char *g_msg;

--- a/Parms.cpp
+++ b/Parms.cpp
--- a/Parms.h
+++ b/Parms.h
@ -93,9 +93,10 @@ class Page {
 #define PF_API             0x10
 #define PF_REBUILDURLFILTERS 0x20
 #define PF_NOSYNC            0x40
-#define PF_CUSTOMCRAWLONLY   0x80
+#define PF_DIFFBOT           0x80

 #define PF_HIDDEN 0x0100
+#define PF_NOSAVE 0x0200


 class Parm {
@ -342,7 +343,8 @@ class Parms {
 				 bool sendToGrunts  = true ,
 				 bool sendToProxies = false ,
 				 // send to this single hostid? -1 means all
-				 long hostId = -1 );
+				 long hostId = -1 ,
+				 long hostId2 = -1 ); // hostid range?
 	bool doParmSendingLoop ( ) ;
 	bool syncParmsWithHost0 ( ) ;
 	bool makeSyncHashList ( SafeBuf *hashList ) ;
--- a/PingServer.cpp
+++ b/PingServer.cpp
@ -2677,8 +2677,14 @@ void checkKernelErrors( int fd, void *state ){
 	// klogctl reads the last 4k lines of the kernel ring buffer
 	short bufLen = klogctl(3,buf,4096);
 	long long took = gettimeofdayInMilliseconds() - st;
-	if ( took > 1 )
+	if ( took >= 3 ) {
+		long len = bufLen;
+		if ( len > 200 ) len = 200;
+		char c = buf[len];
+		buf[len] = '\0';
 		log("db: klogctl took %lli ms to read %s",took, buf);
+		buf[len] = c;
+	}

 	if ( bufLen < 0 ){
 		log ("db: klogctl returned error: %s",mstrerror(errno));
--- a/Placedb.cpp
+++ b/Placedb.cpp
@ -89,7 +89,7 @@ bool Placedb::init2 ( long treeMem ) {
 		return false;
 	return true;
 }
-
+/*
 bool Placedb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -101,7 +101,7 @@ bool Placedb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Placedb::verify ( char *coll ) {
 	log ( LOG_INFO, "db: Verifying Placedb for coll %s...", coll );
 	g_threads.disableThreads();
--- a/Posdb.cpp
+++ b/Posdb.cpp
@ -227,7 +227,7 @@ bool Posdb::init2 ( long treeMem ) {


 bool Posdb::addColl ( char *coll, bool doVerify ) {
-	if ( ! m_rdb.addColl ( coll ) ) return false;
+	if ( ! m_rdb.addRdbBase1 ( coll ) ) return false;
 	if ( ! doVerify ) return true;
 	// verify
 	if ( verify(coll) ) return true;
--- a/Process.cpp
+++ b/Process.cpp
@ -108,6 +108,10 @@ char *g_files[] = {
 	"pdftohtml",  // pdf
 	"pstotext" ,  // postscript
 	//"ppthtml"  ,  // powerpoint
+
+	// required for SSL server support for both getting web pages
+	// on https:// sites and for serving https:// pages
+	"gb.pem",
 	
 	//"dict/unifiedDict",
 	//"dict/thesaurus.txt",
@ -187,6 +191,7 @@ char *g_files[] = {

 bool Process::checkFiles ( char *dir ) {

+	/*
 	// check these by hand since you need one or the other
 	File f1;
 	File f2;
@ -199,15 +204,14 @@ bool Process::checkFiles ( char *dir ) {
 	if ( //( ! f3.doesExist() || ! f4.doesExist() ) && 
 	    ( ! f4.doesExist() ) && 
 	     ( ! f1.doesExist() || ! f2.doesExist() ) ) {
-		/*
 		log("db: need either (%s and %s) or (%s and %s)",
 		    f3.getFilename() ,
 		    f4.getFilename() ,
 		    f1.getFilename() ,
 		    f2.getFilename() );
-		*/
 		//return false;
 	}
+	*/

 	// check for email subdir
 	//f1.set ( dir , "/html/email/");
@ -410,7 +414,7 @@ bool Process::init ( ) {
 	//m_rdbs[m_numRdbs++] = g_tfndb.getRdb       ();
 	m_rdbs[m_numRdbs++] = g_titledb.getRdb     ();
 	//m_rdbs[m_numRdbs++] = g_revdb.getRdb       ();
-	//m_rdbs[m_numRdbs++] = g_sectiondb.getRdb   ();
+	m_rdbs[m_numRdbs++] = g_sectiondb.getRdb   ();
 	m_rdbs[m_numRdbs++] = g_posdb.getRdb     ();
 	//m_rdbs[m_numRdbs++] = g_datedb.getRdb      ();
 	m_rdbs[m_numRdbs++] = g_spiderdb.getRdb    ();
@ -430,7 +434,7 @@ bool Process::init ( ) {
 	//m_rdbs[m_numRdbs++] = g_tfndb2.getRdb      ();
 	m_rdbs[m_numRdbs++] = g_titledb2.getRdb    ();
 	//m_rdbs[m_numRdbs++] = g_revdb2.getRdb      ();
-	//m_rdbs[m_numRdbs++] = g_sectiondb2.getRdb  ();
+	m_rdbs[m_numRdbs++] = g_sectiondb2.getRdb  ();
 	m_rdbs[m_numRdbs++] = g_posdb2.getRdb    ();
 	//m_rdbs[m_numRdbs++] = g_datedb2.getRdb     ();
 	m_rdbs[m_numRdbs++] = g_spiderdb2.getRdb   ();
@ -1426,6 +1430,13 @@ bool Process::shutdown2 ( ) {
 		// at least destroy the page caches that have shared memory
 		// because they seem to not clean it up
 		resetPageCaches();
+
+		// let's ensure our core file can dump
+		struct rlimit lim;
+		lim.rlim_cur = lim.rlim_max = RLIM_INFINITY;
+		if ( setrlimit(RLIMIT_CORE,&lim) )
+			log("gb: setrlimit: %s.", mstrerror(errno) );
+
 		// . force an abnormal termination which will cause a core dump
 		// . do not dump core on SIGHUP signals any more though
 		abort();
@ -1478,7 +1489,7 @@ void Process::disableTreeWrites ( ) {
 		rdb->disableWrites();
 	}
 	// disable all spider trees and tables
-	for ( long i = 0 ; i < g_collectiondb.getNumRecs() ; i++ ) {
+	for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
 		SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(i);
 		if ( ! sc ) continue;
 		sc->m_waitingTree .disableWrites();
@ -1495,7 +1506,7 @@ void Process::enableTreeWrites ( ) {
 		rdb->enableWrites();
 	}
 	// enable all waiting trees
-	for ( long i = 0 ; i < g_collectiondb.getNumRecs() ; i++ ) {
+	for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
 		SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(i);
 		if ( ! sc ) continue;
 		sc->m_waitingTree .enableWrites();
@ -1771,6 +1782,8 @@ void Process::resetAll ( ) {

 	g_wiktionary.reset();

+	g_countryCode.reset();
+
 	s_clusterdbQuickCache.reset();
 	s_hammerCache.reset();
 	s_table32.reset();
@ -1824,7 +1837,7 @@ void Process::resetPageCaches ( ) {
 	//g_datedb          .getDiskPageCache()->reset();
 	g_linkdb          .getDiskPageCache()->reset();
 	g_titledb         .getDiskPageCache()->reset();
-	//g_sectiondb       .getDiskPageCache()->reset();
+	g_sectiondb       .getDiskPageCache()->reset();
 	g_tagdb           .getDiskPageCache()->reset();
 	g_spiderdb        .getDiskPageCache()->reset();
 	//g_tfndb           .getDiskPageCache()->reset();
--- a/Profiler.cpp
+++ b/Profiler.cpp
@ -66,7 +66,7 @@ bool Profiler::init() {
 			return false;
 	if ( ! m_activeFns.set(4,4,256,NULL,0,false,0,"activefns") )
 		return false;
-	return m_fn.set(4,sizeof(FnInfo),256,NULL,0,false,0,"fntbl");
+	return m_fn.set(4,sizeof(FnInfo),65536,NULL,0,false,0,"fntbl");
 }


@ -751,17 +751,19 @@ bool Profiler::printInfo(SafeBuf *sb,char *username, //long user,
 	}


-	sb->safePrintf(  "<center>\n<table border=1 cellpadding=4 "
-			 "width=100%% bgcolor=#%s>\n"
-			 "<tr><td colspan=9 bgcolor=#%s>"
+	sb->safePrintf(  "<center>\n<table %s>\n"
+			 "<tr class=hdrow><td colspan=9>"
 			 "<center><b>Profiler "//- Since Startup</b></center>"
 			 "<a href=\"/admin/profiler?c=%s"//"
 			 "&profilerreset=1\">"
 			 "(reset)</a></b></center>"
-			 "</td></tr>\n",LIGHT_BLUE,DARK_BLUE,
+			 "</td></tr>\n",
+			 TABLE_STYLE,
 			 coll);

-       	sb->safePrintf("<tr><td><b>Address</b></td><td><b>Function</b></td>");
+       	sb->safePrintf("<tr bgcolor=#%s>"
+		       "<td><b>Address</b></td><td><b>Function</b></td>"
+		       , LIGHT_BLUE);
 	sb->safePrintf("<td><b><a href=/admin/profiler?sorts=3&c=%s>"
 		       "Times Called</a></b></td></td>",coll);
 	sb->safePrintf("<td><b><a href=/admin/profiler?sorts=4&c=%s>"
@ -858,12 +860,13 @@ bool Profiler::printInfo(SafeBuf *sb,char *username, //long user,


 	//Now to print the table of functions called in the last 10 seconds
-	sb->safePrintf(  "<center>\n<table border=1 cellpadding=4 "
-			 "width=100%% bgcolor=#%s>\n"
-			 "<tr><td colspan=8 bgcolor=#%s>"
+	sb->safePrintf(  "<center>\n<table %s>\n"
+			 "<tr class=hdrow><td colspan=8>"
 			 "<center><b>Profiler - Last 10 seconds</b></center>"
-			 "</td></tr>\n",LIGHT_BLUE,DARK_BLUE);
-       	sb->safePrintf("<tr><td><b>Address</b></td><td><b>Function</b></td>");
+			 "</td></tr>\n",TABLE_STYLE);
+       	sb->safePrintf("<tr bgcolor=#%s>"
+		       "<td><b>Address</b></td><td><b>Function</b></td>",
+		       LIGHT_BLUE);
 	sb->safePrintf("<td><b><a href=/admin/profiler?sort10=3&c=%s&"
 		       ">"
 		       "Times Called</a></b></td></td>",coll);
@ -1003,22 +1006,24 @@ bool Profiler::printInfo(SafeBuf *sb,char *username, //long user,

 	numSlots = m_quickpolls.getNumSlots();
 	numSlotsUsed = m_quickpolls.getNumSlotsUsed();
-	sb->safePrintf("<center>\n<table border=1 cellpadding=4 "
-		       "width=100%% bgcolor=#%s>\n"
-		       "<tr><td colspan=5 bgcolor=#%s>"
+	sb->safePrintf("<center>\n<table %s>\n"
+		       "<tr class=hdrow><td colspan=5>"
 		       "<center><b>Triggered Quickpolls "
 		       "<a href=\"/admin/profiler?c=%s"
 		       "&qpreset=1\">"
 		       "(reset)</a></b></center>"
-		       "</td></tr>\n",LIGHT_BLUE,DARK_BLUE,
+		       "</td></tr>\n",
+		       TABLE_STYLE,
 		       coll);

-	sb->safePrintf("<tr><td><b>Between Functions</b></td>"
+	sb->safePrintf("<tr bgcolor=#%s>"
+		       "<td><b>Between Functions</b></td>"
 		       "<td><b>max blocked(msec)</b></td>"
 		       "<td><b>avg time(msec)</b></td>"
 		       "<td><b>times triggered</b></td>"
 		       "<td><b>total(msec)</b></td>"
-		       "</tr>");
+		       "</tr>"
+		       , LIGHT_BLUE );

 	if(numSlotsUsed == 0) {
 		sb->safePrintf("</table>");
@ -1539,15 +1544,13 @@ Profiler::printRealTimeInfo(SafeBuf *sb,
 			    int realTimeSortMode,
 			    int realTimeShowAll) {
 	if(!m_realTimeProfilerRunning) {
-		sb->safePrintf("<table border=1 cellpadding=4 bgcolor=#%s "
-			       "width=100%%\n>",
-			LIGHT_BLUE);
-		sb->safePrintf("<tr><td colspan=7 bgcolor=#%s>"
+		sb->safePrintf("<table %s>",TABLE_STYLE);
+		sb->safePrintf("<tr class=hdrow><td colspan=7>"
 			 "<center><b>Real Time Profiler "
 			 "<a href=\"/admin/profiler?c=%s"
 			 "&rtstart=1\">"
 			 "(Start)</a></b></center>"
-			       "</td></tr>\n",DARK_BLUE,coll);
+			       "</td></tr>\n",coll);
 		sb->safePrintf("</table><br><br>\n");
 		return true;
 	}
@ -1562,16 +1565,14 @@ Profiler::printRealTimeInfo(SafeBuf *sb,
 	//}
 	rtNumEntries = realTimeProfilerData.getNumUsedSlots();
 	if(!rtNumEntries) {
-		sb->safePrintf("<table border=1 cellpadding=4 bgcolor=#%s "
-			       "width=100%%\n>",
-			LIGHT_BLUE);
-		sb->safePrintf("<tr><td colspan=7 bgcolor=#%s>"
+		sb->safePrintf("<table %s>",TABLE_STYLE);
+		sb->safePrintf("<tr class=hdrow><td colspan=7>"
 			 "<center><b>Real Time Profiler started, refresh page "
 			 "after some time."
 			 "<a href=\"/admin/profiler?c=%s"
 			 "&rtstop=1\">"
 			 "(Stop)</a></b></center>"
-			       "</td></tr>\n",DARK_BLUE,coll);
+			       "</td></tr>\n",coll);
 		sb->safePrintf("</table><br><br>\n");
 		startRealTimeProfiler();
 		return true;
@ -1626,9 +1627,7 @@ Profiler::printRealTimeInfo(SafeBuf *sb,
 			hitEntries[i].missedQuickPollsPerFunc=missedQuickPolls; 
 		}
 	}
-	sb->safePrintf("<table border=1 cellpadding=4 bgcolor=#%s "
-		       "width=100%%>\n",
-			LIGHT_BLUE);
+	sb->safePrintf("<table %s>",TABLE_STYLE);
 	char *showMessage;
 	int rtall;
 	if(realTimeShowAll) {
@ -1638,11 +1637,11 @@ Profiler::printRealTimeInfo(SafeBuf *sb,
 		showMessage = "(show all)";
 		rtall = 1;
 	}
-	sb->safePrintf("<tr><td colspan=7 bgcolor=#%s>"
+	sb->safePrintf("<tr class=hdrow><td colspan=7>"
 			 "<center><b>Real Time Profiler "
 			 "<a href=\"/admin/profiler?c=%s"
 			 "&rtall=%i\">%s</a>"
-		       ,DARK_BLUE,coll,
+		       ,coll,
 			 rtall, showMessage);
 	sb->safePrintf("<a href=\"/admin/profiler?c=%s&rtstop=1\">"
 		       "(Stop)</a></b></center></td></tr>\n",
--- a/Proxy.cpp
+++ b/Proxy.cpp
@ -280,7 +280,7 @@ bool Proxy::initProxy ( long proxyId, unsigned short udpPort,
 	//need to init collectiondb too because of addurl
 	//set isdump to true because we aren't going to store any data in the
 	//collection
-	if ( !g_collectiondb.init( true ) ){ //isDump
+	if ( !g_collectiondb.loadAllCollRecs( ) ){ //isDump
 		log ("db: collectiondb init failed.");
 		return false;
 	}
--- a/Query.cpp
+++ b/Query.cpp
@ -2190,6 +2190,7 @@ bool Query::setQWords ( char boolFlag ,
 		// if we're hashing a url:, link:, site: or ip: term, 
 		// then we need to hash ALL up to the first space
 		if ( fieldCode == FIELD_URL  || 
+		     fieldCode == FIELD_GBPARENTURL ||
 		     fieldCode == FIELD_EXT  || 
 		     fieldCode == FIELD_LINK ||
 		     fieldCode == FIELD_ILINK||
@ -2225,6 +2226,7 @@ bool Query::setQWords ( char boolFlag ,

 			// should we have normalized before hashing?
 			if ( fieldCode == FIELD_URL ||
+			     fieldCode == FIELD_GBPARENTURL ||
 			     fieldCode == FIELD_LINK ||
 			     fieldCode == FIELD_ILINK ||
 			     fieldCode == FIELD_SITELINK ||
@ -2237,6 +2239,8 @@ bool Query::setQWords ( char boolFlag ,
 				if ( fieldCode == FIELD_ILINK) addwww = true;
 				if ( fieldCode == FIELD_LINKS) addwww = true;
 				if ( fieldCode == FIELD_URL  ) addwww = true;
+				if ( fieldCode == FIELD_GBPARENTURL ) 
+					addwww = true;
 				if ( fieldCode == FIELD_SITELINK) 
 					addwww = true;
 				url.set ( w , wlen , addwww );
@ -3006,7 +3010,7 @@ struct QueryField g_fields[] = {
 	{"ilink", FIELD_ILINK, true,"Similar to above."},
 	{"sitelink", FIELD_SITELINK, true,"Matches all pages that link to the given site. Example:sitelink:www.gigablast.com matches all pages that link to some page on the www.gigablast.com site."},
 	{"site", FIELD_SITE, true,"Matches all pages from the given site. Example: site:www.gigablast.com will return all the pages on the gigablast site"},
-	{"coll", FIELD_COLL, true,"Not sure if this works."},
+	//{"coll", FIELD_COLL, true,"Not sure if this works."},
 	{"ip", FIELD_IP, true,"Matches all pages with the given ip. Example:1.2.3.4 will match all pages whose urls have that IP address."},
 	{"inurl", FIELD_SUBURL, true,"Matches all pages that have the given terms in the url. Example inurl:water will match all pages whose url has the word water in it, but the word must be delineated by punctuation."},
 	{"suburl", FIELD_SUBURL, true,"Same as inurl."},
@ -3038,8 +3042,8 @@ struct QueryField g_fields[] = {
 	{"gbhasext", FIELD_GBOTHER, false,""},
 	{"gbsubmiturl", FIELD_GBOTHER, false,""},

-	{"qdom", FIELD_QUOTA, false,""},
-	{"qhost", FIELD_QUOTA, false,""},
+	//{"qdom", FIELD_QUOTA, false,""},
+	//{"qhost", FIELD_QUOTA, false,""},
 	{"gbtagvector", FIELD_GBTAGVECTOR, false,""},

 	{"gbgigabitvector", FIELD_GBGIGABITVECTOR, false,""},
@ -3061,7 +3065,7 @@ struct QueryField g_fields[] = {
 	{"gbduphash"                ,FIELD_GBOTHER,false,"Internal use only."},
 	{"gbsitetemplate"           ,FIELD_GBOTHER,false,"Internal use only."},
 	{"gboutlinkedtitle"         ,FIELD_GBOTHER,false,"gboutlinkedtitle:0 and gboutlinkedtitle:1 matches events whose title is not in and in a hyperlink, respectively."},
-	{"gbisaggregator"           ,FIELD_GBOTHER,false,"gbisaggregator:0|1 depending on if the event came from an event aggregator website, like eviesays.com."},
+	//{"gbisaggregator"           ,FIELD_GBOTHER,false,"gbisaggregator:0|1 depending on if the event came from an event aggregator website, like eviesays.com."},
 	{"gbdeduped"                ,FIELD_GBOTHER,false,""},

 	{"gbinjected", FIELD_GBOTHER,false,"Was the event injected?."},
@ -3070,7 +3074,8 @@ struct QueryField g_fields[] = {
 	//{"gbendrange",FIELD_GBENDRANGE,false,""},

 	{"gbpermalink",FIELD_GBPERMALINK,false,""},
-	{"gbcsenum",FIELD_GBCSENUM,false,""},
+	//{"gbcsenum",FIELD_GBCSENUM,false,""},
+	{"gbparenturl", FIELD_GBPARENTURL, true,"Match the json urls that were extract from this parent url. Example: gbparenturl:www.gigablast.com/addurl.htm"},
 	{"gbdocid",FIELD_GBDOCID,false,"restrict results to this docid"}
 	
 };
--- a/Query.h
+++ b/Query.h
@ -108,6 +108,7 @@ typedef unsigned long long qvec_t;
 #define FIELD_GBREVSORTBY      55 // i.e. sortby:price -> low to high
 #define FIELD_GBNUMBERMIN      56
 #define FIELD_GBNUMBERMAX      57
+#define FIELD_GBPARENTURL      58

 #define FIELD_GBOTHER 92

--- a/Rdb.cpp
+++ b/Rdb.cpp
@ -90,7 +90,7 @@ RdbBase *Rdb::getBase ( collnum_t collnum )  {
 	return cr->m_bases[(unsigned char)m_rdbId];
 }

-// used by Rdb::addColl
+// used by Rdb::addBase1()
 void Rdb::addBase ( collnum_t collnum , RdbBase *base ) {
 	// if we are collectionless, like g_statsdb.m_rdb or
 	// g_cachedb.m_rdb, etc.. shared by all collections essentially.
@ -468,12 +468,17 @@ bool Rdb::updateToRebuildFiles ( Rdb *rdb2 , char *coll ) {

 // . returns false and sets g_errno on error, returns true on success
 // . if this rdb is collectionless we set m_collectionlessBase in addBase()
-bool Rdb::addColl ( char *coll ) {
+bool Rdb::addRdbBase1 ( char *coll ) { // addColl()
 	collnum_t collnum = g_collectiondb.getCollnum ( coll );
-	return addColl2 ( collnum );
+	return addRdbBase2 ( collnum );
 }

-bool Rdb::addColl2 ( collnum_t collnum ) {
+bool Rdb::addRdbBase2 ( collnum_t collnum ) { // addColl2()
+
+	if ( ! m_initialized ) {
+		g_errno = EBADENGINEER;
+		return log("db: adding coll to uninitialized rdb!");
+	}

 	// catdb,statsbaccessdb,facebookdb,syncdb
 	if ( m_isCollectionLess )
@ -501,8 +506,9 @@ bool Rdb::addColl2 ( collnum_t collnum ) {
 	RdbBase *base = getBase ( collnum );
 	if ( base ) { // m_bases [ collnum ] ) {
 		g_errno = EBADENGINEER;
-		return log("db: %s: Rdb for collection \"%s\" exists.",
-			   m_dbname,coll);
+		return log("db: Rdb for db \"%s\" and "
+			   "collection \"%s\" (collnum %li) exists.",
+			   m_dbname,coll,(long)collnum);
 	}
 	// make a new one
 	RdbBase *newColl = NULL;
@ -616,7 +622,7 @@ bool Rdb::deleteColl ( collnum_t collnum , collnum_t newCollnum ) {
 	// . TODO: what about outstanding merge or dump operations?
 	// . it seems like we can't really recycle this too easily 
 	//   because reset it not resetting filenames or directory name?
-	//   just nuke it and rebuild using addColl2()...
+	//   just nuke it and rebuild using addRdbBase2()...
 	RdbBase *oldBase = getBase ( collnum );
 	mdelete (oldBase, sizeof(RdbBase), "Rdb Coll");
 	delete  (oldBase);
@ -632,7 +638,7 @@ bool Rdb::deleteColl ( collnum_t collnum , collnum_t newCollnum ) {

 	// if just resetting recycle base
 	if ( collnum != newCollnum ) {
-		addColl2 ( newCollnum );
+		addRdbBase2 ( newCollnum );
 		// make a new base now
 		//RdbBase *newBase = mnew
 		// new cr
--- a/Rdb.h
+++ b/Rdb.h
@ -86,8 +86,8 @@ class Rdb {
 	 Rdb ( );
 	~Rdb ( );

-	bool addColl ( char *coll );
-	bool addColl2 ( collnum_t collnum );
+	bool addRdbBase1 ( char *coll );
+	bool addRdbBase2 ( collnum_t collnum );
 	bool delColl ( char *coll );

 	bool resetBase ( collnum_t collnum );
--- a/RdbBase.cpp
+++ b/RdbBase.cpp
@ -21,6 +21,7 @@
 #include "Collectiondb.h"
 //#include "CollectionRec.h"
 #include "Repair.h"
+#include "Rebalance.h"
 //#include "Msg3.h" // debug include

 // how many rdbs are in "urgent merge" mode?
@ -613,8 +614,22 @@ bool RdbBase::setFiles ( ) {
 			return false;
 	}

+	// everyone should start with file 0001.dat or 0000.dat
+	if ( m_numFiles > 0 && m_fileIds[0] > 1 ) {
+		log("db: missing file id 0001.dat for %s in coll %s. "
+		    "Fix this or it'll core later. Just rename the next file "
+		    "in line to 0001.dat/map. We probably cored at a "
+		    "really bad time during the end of a merge process.",
+		    m_dbname, m_coll );
+		char *xx=NULL; *xx=0;
+	}
+
+
 	m_dir.close();

+	// ensure files are sharded correctly
+	verifyFileSharding();
+
 	if ( ! converting ) return true;

 	// now if we are converting old titledb names to new...
@ -655,6 +670,7 @@ long RdbBase::addFile ( long id , bool isNew , long mergeNum , long id2 ,
 		    (long)MAX_RDB_FILES);
 		return -1;
 	}
+
 	// HACK: skip to avoid a OOM lockup. if RdbBase cannot dump
 	// its data to disk it can backlog everyone and memory will
 	// never get freed up.
@ -1558,10 +1574,11 @@ void RdbBase::gotTokenForMerge ( ) {
 	if ( m_rdb == g_tfndb.getRdb() ) m = &g_merge2;
 	// sanity check
 	if ( m_isMerging || m->isMerging() ) {
-		if ( m_doLog )
-		log(LOG_INFO,
-		    "merge: Someone already merging. Waiting for merge token "
-		    "in order to merge %s.",m_dbname);
+		//if ( m_doLog )
+			//log(LOG_INFO,
+			//"merge: Someone already merging. Waiting for "
+			//"merge token "
+			//"in order to merge %s.",m_dbname);
 		return;
 	}
 	// clear for take-off
@ -1959,8 +1976,8 @@ void RdbBase::gotTokenForMerge ( ) {
 	// . if we have no g_errno that is bad!!!
 	// . we should dump core here or something cuz we have to remove the
 	//   merge file still to be correct
-	if ( ! g_errno )
-		log(LOG_INFO,"merge: Got token without blocking.");
+	//if ( ! g_errno )
+	//	log(LOG_INFO,"merge: Got token without blocking.");
 	// we now set this in init() by calling m_merge.init() so it
 	// can pre-alloc it's lists in it's s_msg3 class
 	//		       g_conf.m_mergeMaxBufSize ) ) return ;
@ -2185,3 +2202,104 @@ void RdbBase::verifyDiskPageCache ( ) {
 		m_pc->verify(f);
 	}
 }
+
+bool RdbBase::verifyFileSharding ( ) {
+
+	if ( m_rdb->m_isCollectionLess ) return true;
+
+	//log ( "db: Verifying %s for coll %s (collnum=%li)...", 
+	//      m_dbname , m_coll , (long)m_collnum );
+
+	g_threads.disableThreads();
+
+	Msg5 msg5;
+	//Msg5 msg5b;
+	RdbList list;
+	char startKey[MAX_KEY_BYTES];
+	char endKey[MAX_KEY_BYTES];
+	KEYMIN(startKey,MAX_KEY_BYTES);
+	KEYMAX(endKey,MAX_KEY_BYTES);
+	long minRecSizes = 64000;
+	char rdbId = m_rdb->m_rdbId;
+	if ( rdbId == RDB_TITLEDB ) minRecSizes = 640000;
+	
+	if ( ! msg5.getList ( m_rdb->m_rdbId, //RDB_POSDB   ,
+			      m_coll          ,
+			      &list         ,
+			      startKey      ,
+			      endKey        ,
+			      minRecSizes   ,
+			      true          , // includeTree   ,
+			      false         , // add to cache?
+			      0             , // max cache age
+			      0             , // startFileNum  ,
+			      -1            , // numFiles      ,
+			      NULL          , // state
+			      NULL          , // callback
+			      0             , // niceness
+			      false         , // err correction?
+			      NULL          ,
+			      0             ,
+			      -1            ,
+			      true          ,
+			      -1LL          ,
+			      NULL          , // &msg5b        ,
+			      true          )) {
+		g_threads.enableThreads();
+		return log("db: HEY! it did not block");
+	}
+
+	long count = 0;
+	long got   = 0;
+	long printed = 0;
+	char k[MAX_KEY_BYTES];
+
+	for ( list.resetListPtr() ; ! list.isExhausted() ;
+	      list.skipCurrentRecord() ) {
+		//key144_t k;
+		list.getCurrentKey(k);
+		count++;
+		//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
+		//unsigned long groupId = getGroupId ( RDB_POSDB , &k );
+		//if ( groupId == g_hostdb.m_groupId ) got++;
+		unsigned long shardNum = getShardNum( rdbId , k );
+
+		if ( shardNum == getMyShardNum() ) {
+			got++;
+			continue;
+		}
+
+		if ( ++printed > 100 ) continue;
+
+		// avoid log spam... comment this out
+		//log ( "db: Found bad key in list belongs to shard %li",
+		//      shardNum);
+	}
+
+	g_threads.enableThreads();
+
+	//if ( got ) 
+	//	log("db: verified %li recs for %s in coll %s",
+	//	    got,m_dbname,m_coll);
+       
+	if ( got == count ) return true;
+
+	// tally it up
+	g_rebalance.m_numForeignRecs += count - got;
+	log ("db: Out of first %li records in %s for %s.%li, only %li belong "
+	     "to our group.",count,m_dbname,m_coll,(long)m_collnum,got);
+	// exit if NONE, we probably got the wrong data
+	//if ( got == 0 ) log("db: Are you sure you have the "
+	//		    "right data in the right directory? ");
+
+	//log ( "db: Exiting due to Posdb inconsistency." );
+	g_threads.enableThreads();
+	return true;//g_conf.m_bypassValidation;
+
+	//log(LOG_DEBUG, "db: Posdb passed verification successfully for %li "
+	//		"recs.", count );
+	// DONE
+	//return true;
+}
+
+
--- a/RdbBase.h
+++ b/RdbBase.h
@ -262,6 +262,8 @@ class RdbBase {

 	void verifyDiskPageCache ( );

+	bool verifyFileSharding ( );
+
 	// . add a (new) file to the m_files/m_maps/m_fileIds arrays
 	// . both return array position we added it to
 	// . both return -1 and set errno on error
--- a/RdbCache.cpp
+++ b/RdbCache.cpp
@ -1503,9 +1503,12 @@ bool RdbCache::load ( char *dbname ) {
 	// does the file exist?
 	File f;
 	f.set ( g_hostdb.m_dir , filename );
+	// having cache file not existing on disk is not so bad, it's a cache
 	if ( ! f.doesExist() )
-		return log("db: Could not load cache from %s: does not exist.",
-			   f.getFilename());
+		return false;
+	//	return log("db: Could not load cache from %s: does not exist.",
+	//		   f.getFilename());
+
 	// open the file
 	if ( ! f.open ( O_RDWR ) ) 
 		return log("db: Could not open cache save file for %s: %s.", 
--- a/RdbDump.cpp
+++ b/RdbDump.cpp
@ -204,11 +204,14 @@ void RdbDump::doneDumping ( ) {
 	     m_totalPosDumped , m_totalNegDumped ,
 	     m_totalPosDumped + m_totalNegDumped );

-	// map verify
-	log("db: map # pos=%lli neg=%lli",
-	    m_map->getNumPositiveRecs(),
-	    m_map->getNumNegativeRecs()
-	    );
+	// . map verify
+	// . if continueDumping called us with no collectionrec, it got
+	//   deleted so RdbBase::m_map is nuked too i guess
+	if ( saved != ENOCOLLREC )
+		log("db: map # pos=%lli neg=%lli",
+		    m_map->getNumPositiveRecs(),
+		    m_map->getNumNegativeRecs()
+		    );

 	// free the list's memory
 	if ( m_list ) m_list->freeList();
@ -1015,11 +1018,16 @@ void RdbDump::continueDumping() {

 	// if someone reset/deleted the collection we were dumping...
 	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
-	if ( ! cr ) g_errno = ENOCOLLREC;
-
+	if ( ! cr ) {
+		g_errno = ENOCOLLREC;
+		// m_file is invalid if collrec got nuked because so did
+		// the Rdbbase which has the files
+		log("db: continue dumping lost collection");
+	}
 	// bitch about errors
-	if (g_errno)log("db: Dump to %s had error writing: %s.",
-			m_file->getFilename(),mstrerror(g_errno));
+	else if (g_errno)log("db: Dump to %s had error writing: %s.",
+			     m_file->getFilename(),mstrerror(g_errno));
+
 	// go back now if we were NOT dumping a tree
 	if ( ! (m_tree || m_buckets) ) {
 		m_isDumping = false;
--- a/RdbList.cpp
+++ b/RdbList.cpp
@ -1631,9 +1631,12 @@ void RdbList::merge_r ( RdbList **lists         ,
 	// . we don't want any positive recs to go un annhilated
 	// . but don't worry about this check if start and end keys are equal
 	//if ( m_startKey != m_endKey && (m_endKey.n0 & 0x01) == 0x00 )
-	if ( KEYCMP(m_startKey,m_endKey,m_ks)!=0 && KEYNEG(m_endKey) )
+	if ( KEYCMP(m_startKey,m_endKey,m_ks)!=0 && KEYNEG(m_endKey) ) {
 		log(LOG_LOGIC,"db: rdblist: merge_r: Illegal endKey for "
-		    "merging.");
+		    "merging. fixing.");
+		// make it legal so it will be read first NEXT time
+		KEYSUB(m_endKey,1,m_ks);
+	}
 	// do nothing if no lists passed in
 	if ( numLists <= 0 ) return;
 	// inherit the key size of what we merge
--- a/RdbTree.cpp
+++ b/RdbTree.cpp
@ -172,7 +172,11 @@ void RdbTree::reset ( ) {
 	// unprotect it all
 	if ( m_useProtection ) unprotect ( );
 	// make sure string is NULL temrinated. this gbstrlen() should 
-	if ( m_numNodes > 0 && m_dbname[0] && gbstrlen(m_dbname) >= 0 )
+	if ( m_numNodes > 0 && 
+	     m_dbname[0] && 
+	     gbstrlen(m_dbname) >= 0 &&
+	     // don't be spammy we can have thousands of these, one per coll
+	     strcmp(m_dbname,"waitingtree") )
 		log(LOG_INFO,"db: Resetting tree for %s.",m_dbname);

 	// liberate all the nodes
@ -279,7 +283,7 @@ long RdbTree::clear ( ) {
 	for ( long i = 0 ; i < nc ; i++ ) {
 		CollectionRec *cr = g_collectiondb.getRec(i);
 		if ( ! cr ) continue;
-		//if ( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
+		//if (((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
 		cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
 		cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
 	}
@ -633,9 +637,11 @@ long RdbTree::addNode ( collnum_t collnum ,
 		// crap, when fixing a tree this will segfault because
 		// m_recs[collnum] is NULL.
 		if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
-			//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
-			g_collectiondb.m_recs[collnum]->
-				m_numNegKeysInTree[(unsigned char)m_rdbId]++;
+			//if( ((unsigned char)m_rdbId)>=RDB_END){
+			//char *xx=NULL;*xx=0; }
+			CollectionRec *cr ;
+			cr = g_collectiondb.m_recs[collnum];
+			if(cr)cr->m_numNegKeysInTree[(unsigned char)m_rdbId]++;
 		}
 	}
 	else {
@ -644,9 +650,11 @@ long RdbTree::addNode ( collnum_t collnum ,
 		// crap, when fixing a tree this will segfault because
 		// m_recs[collnum] is NULL.
 		if ( m_rdbId >= 0 && g_collectiondb.m_recs[collnum] ) {
-			//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
-			g_collectiondb.m_recs[collnum]->
-				m_numPosKeysInTree[(unsigned char)m_rdbId]++;
+			//if( ((unsigned char)m_rdbId)>=RDB_END){
+			//char *xx=NULL;*xx=0; }
+			CollectionRec *cr ;
+			cr = g_collectiondb.m_recs[collnum];
+			if(cr)cr->m_numPosKeysInTree[(unsigned char)m_rdbId]++;
 		}
 	}
 	// debug2 msg
@ -839,16 +847,20 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
 	if ( KEYNEG(m_keys,i,m_ks) ) {
 		m_numNegativeKeys--;
 		//m_numNegKeysPerColl[m_collnums[i]]--;
-		if ( m_rdbId >= 0 )
-			g_collectiondb.m_recs[m_collnums[i]]->
-				m_numPosKeysInTree[(unsigned char)m_rdbId]--;
+		if ( m_rdbId >= 0 ) {
+			CollectionRec *cr;
+			cr = g_collectiondb.m_recs[m_collnums[i]];
+			if(cr)cr->m_numNegKeysInTree[(unsigned char)m_rdbId]--;
+		}
 	}
 	else {
 		m_numPositiveKeys--;
 		//m_numPosKeysPerColl[m_collnums[i]]--;
-		if ( m_rdbId >= 0 )
-			g_collectiondb.m_recs[m_collnums[i]]->
-				m_numPosKeysInTree[(unsigned char)m_rdbId]--;
+		if ( m_rdbId >= 0 ) {
+			CollectionRec *cr;
+			cr = g_collectiondb.m_recs[m_collnums[i]];
+			if(cr)cr->m_numPosKeysInTree[(unsigned char)m_rdbId]--;
+		}
 	}
 	// debug step -- check chain from iparent down making sure that
 	//printTree();
@ -874,11 +886,14 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
 	//m_numNegKeysPerColl[m_collnums[i]] = 0;
 	//m_numPosKeysPerColl[m_collnums[i]] = 0;
 	if ( m_rdbId >= 0 ) {
-		//if ( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
-		g_collectiondb.m_recs[m_collnums[i]]->
-			m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
-		g_collectiondb.m_recs[m_collnums[i]]->
-			m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
+		//if ( ((unsigned char)m_rdbId)>=RDB_END){
+		//char *xx=NULL;*xx=0; }
+		CollectionRec *cr ;
+		cr = g_collectiondb.m_recs[m_collnums[i]];
+		if(cr){
+			cr->m_numNegKeysInTree[(unsigned char)m_rdbId] = 0;
+			cr->m_numPosKeysInTree[(unsigned char)m_rdbId] = 0;
+		}
 	}


@ -945,8 +960,9 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
 		//m_numNegKeysPerColl[m_collnums[i]]--;
 		if ( m_rdbId >= 0 ) {
 			//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
-			g_collectiondb.m_recs[m_collnums[i]]->
-				m_numNegKeysInTree[(unsigned char)m_rdbId]--;
+			CollectionRec *cr ;
+			cr = g_collectiondb.m_recs[m_collnums[i]];
+			if(cr)cr->m_numNegKeysInTree[(unsigned char)m_rdbId]--;
 		}
 	}
 	else {
@ -954,8 +970,9 @@ void RdbTree::deleteNode ( long i , bool freeData ) {
 		//m_numPosKeysPerColl[m_collnums[i]]--;
 		if ( m_rdbId >= 0 ) {
 			//if( ((unsigned char)m_rdbId)>=RDB_END){char *xx=NULL;*xx=0; }
-			g_collectiondb.m_recs[m_collnums[i]]->
-				m_numPosKeysInTree[(unsigned char)m_rdbId]--;
+			CollectionRec *cr ;
+			cr = g_collectiondb.m_recs[m_collnums[i]];
+			if(cr)cr->m_numPosKeysInTree[(unsigned char)m_rdbId]--;
 		}
 	}
 	// debug step -- check chain from iparent down making sure that
@ -3059,8 +3076,9 @@ void RdbTree::cleanTree ( ) { // char **bases ) {
 			deleteNode ( i , true );
 		// remove it otherwise
 		// don't actually remove it!!!! in case collection gets
-		// moved accidentally
-		//deleteNode ( i , true );
+		// moved accidentally.
+		// no... otherwise it can clog up the tree forever!!!!
+		deleteNode ( i , true );
 		count++;
 		// save it
 		collnum = m_collnums[i];
@ -3070,8 +3088,8 @@ void RdbTree::cleanTree ( ) { // char **bases ) {
 	if ( count == 0 ) return;
 	log(LOG_LOGIC,"db: Removed %li records from %s tree for invalid "
 	    "collection number %i.",count,m_dbname,collnum);
-	log(LOG_LOGIC,"db: Records not actually removed for safety. Except "
-	    "for those with negative colnums.");
+	//log(LOG_LOGIC,"db: Records not actually removed for safety. Except "
+	//    "for those with negative colnums.");
 	static bool s_print = true;
 	if ( ! s_print ) return;
 	s_print = false;
--- a/Rebalance.cpp
+++ b/Rebalance.cpp
@ -127,6 +127,7 @@ char *Rebalance::getNeedsRebalance ( ) {
 	hexToBin(keyStr,gbstrlen(keyStr), (char *)&m_nextKey);

 	m_collnum = cn;
+	//m_collnum = 4695; //debug skip
 	// we are valid now either way
 	m_needsRebalanceValid = true;
 	// assume ok
@ -217,8 +218,9 @@ void Rebalance::scanLoop ( ) {
 			if ( rdb->m_rdbId == RDB_STATSDB ) continue;
 			// log it as well
 			if ( m_lastRdb != rdb ) {
-				log("rebal: scanning %s [%s]",
-				    cr->m_coll,rdb->m_dbname);
+				log("rebal: scanning %s (%li) [%s]",
+				    cr->m_coll,(long)cr->m_collnum,
+				    rdb->m_dbname);
 				// only do this once per rdb/coll
 				m_lastRdb = rdb;
 				// reset key cursor as well!!!
@ -235,8 +237,11 @@ void Rebalance::scanLoop ( ) {
 			// scan it. returns true if done, false if blocked
 			if ( ! scanRdb ( ) ) return;
 			// note it
-			log("rebal: moved %lli of %lli recs scanned",
-			    m_rebalanceCount,m_scannedCount);
+			log("rebal: moved %lli of %lli recs scanned in "
+			    "%s for coll.%s.%li",
+			    m_rebalanceCount,m_scannedCount,
+			    rdb->m_dbname,cr->m_coll,(long)cr->m_collnum);
+			//if ( m_rebalanceCount ) goto done;
 			m_rebalanceCount = 0;
 			m_scannedCount = 0;
 			m_lastPercent = -1;
@ -245,6 +250,7 @@ void Rebalance::scanLoop ( ) {
 		m_rdbNum = 0;
 	}

+	// done:
 	// all done
 	m_isScanning     = false;
 	m_needsRebalance = false;
@ -318,6 +324,8 @@ bool Rebalance::scanRdb ( ) {

 readAnother:

+	if ( g_process.m_mode == EXIT_MODE ) return false;
+
 	//log("rebal: loading list start = %s",KEYSTR(m_nextKey,rdb->m_ks));

 	if ( ! m_msg5.getList ( rdb->m_rdbId     ,
@ -391,22 +399,27 @@ bool Rebalance::gotList ( ) {
 		return true;
 	}

-	char *last = NULL;
+	//char *last = NULL;

 	for ( ; ! m_list.isExhausted() ; m_list.skipCurrentRec() ) {
 		// get tht rec
-		char *rec = m_list.getCurrentRec();
+		//char *rec = m_list.getCurrentRec();
+		// get it
+		m_list.getCurrentKey  ( m_nextKey );
+		// skip if negative... wtf?
+		if ( KEYNEG(m_nextKey) ) continue;
 		// get shard
-		long shard = getShardNum ( rdbId , rec );
+		long shard = getShardNum ( rdbId , m_nextKey );
 		// save last ptr
-		last = rec;
+		//last = rec;
 		// debug!
-		//m_list.getKey  ( rec , m_nextKey );
 		//log("rebal: checking key %s",KEYSTR(m_nextKey,ks));
 		// count as scanned
 		m_scannedCount++;
 		// skip it if it belongs with us
 		if ( shard == myShard ) continue;
+		// note it
+		//log("rebal: shard is %li",shard);
 		// count it
 		m_rebalanceCount++;
 		// otherwise, it does not!
@ -445,18 +458,21 @@ bool Rebalance::gotList ( ) {
 	//log("rebal: done reading list");

 	//  update nextkey
-	if ( last ) {
+	//if ( last ) {
+	if ( ! m_list.isEmpty() ) {
 		// get the last key we scanned, all "ks" bytes of it.
 		// because some keys are compressed and we take the
 		// more significant compressed out bytes from m_list.m_*
 		// member vars
-		m_list.getKey  ( last , m_nextKey );
+		//m_list.getKey  ( last , m_nextKey );
 		// if it is not maxxed out, then incremenet it for the
 		// next scan round
 		if ( KEYCMP ( m_nextKey , KEYMAX() , ks ) != 0 )
 			KEYADD ( m_nextKey , 1 , ks );
 	}
-
+	//else {
+	//	log("rebal: got empty list");
+	//}

 	if ( ! m_msg4a.addMetaList ( &m_posMetaList ,
 				     m_collnum ,
--- a/Repair.cpp
+++ b/Repair.cpp
@ -836,7 +836,8 @@ void Repair::getNextCollToRepair ( ) {

 	// add collection to secondary rdbs
 	if ( m_rebuildTitledb ) {
-		if ( ! g_titledb2.addColl    ( m_coll ) &&
+		if ( //! g_titledb2.addColl    ( m_coll ) &&
+		    ! g_titledb2.getRdb()->addRdbBase1(m_coll) &&
 		     g_errno != EEXIST ) goto hadError;
 	}

@ -851,7 +852,7 @@ void Repair::getNextCollToRepair ( ) {
 	//}

 	if ( m_rebuildPosdb ) {
-		if ( ! g_posdb2.addColl    ( m_coll ) &&
+		if ( ! g_posdb2.getRdb()->addRdbBase1 ( m_coll ) &&
 		     g_errno != EEXIST ) goto hadError;
 	}

@ -861,7 +862,7 @@ void Repair::getNextCollToRepair ( ) {
 	//}

 	if ( m_rebuildClusterdb ) {
-		if ( ! g_clusterdb2.addColl  ( m_coll ) &&
+		if ( ! g_clusterdb2.getRdb()->addRdbBase1 ( m_coll ) &&
 		     g_errno != EEXIST ) goto hadError;
 	}

@ -871,7 +872,7 @@ void Repair::getNextCollToRepair ( ) {
 	//}

 	if ( m_rebuildSpiderdb ) {
-		if ( ! g_spiderdb2.addColl   ( m_coll ) &&
+		if ( ! g_spiderdb2.getRdb()->addRdbBase1 ( m_coll ) &&
 		     g_errno != EEXIST ) goto hadError;
 	}

@ -881,7 +882,7 @@ void Repair::getNextCollToRepair ( ) {
 	//}

 	if ( m_rebuildLinkdb ) {
-		if ( ! g_linkdb2.addColl     ( m_coll ) &&
+		if ( ! g_linkdb2.getRdb()->addRdbBase1 ( m_coll ) &&
 		     g_errno != EEXIST ) goto hadError;
 	}

@ -2254,80 +2255,116 @@ bool Repair::printRepairStatus ( SafeBuf *sb , long fromIp ) {
 	}

 	// now show the rebuild status
-	sb->safePrintf ( "<table>"
+	sb->safePrintf ( 
+			 "<table%s"
+			 " id=\"repairstatustable\">"

-			 "<table width=100%% bgcolor=#%s cellpadding=4 "
-			 "border=1 id=\"repairstatustable\">"
-
-			 "<tr><td bgcolor=%s colspan=2><b><center>"
+			 "<tr class=hdrow><td colspan=2><b><center>"
 			 "Repair Status</center></b></td></tr>\n"

+			 "<tr bgcolor=#%s><td colspan=2>"
+			 "<font size=-2>"
+			 "Use this to rebuild a database or to reindex "
+			 "all pages to pick up new link text."
+			 "</font>"
+			 "</td></tr>"
+
 			 // status (see list of above statuses)
-			 "<tr><td width=50%%><b>status</b></td>"
+			 "<tr bgcolor=#%s><td width=50%%><b>status</b></td>"
 			 "<td>%s</td></tr>\n"

-			 "<tr><td width=50%%><b>repair mode</b></td>"
+			 "<tr bgcolor=#%s><td width=50%%><b>repair mode</b>"
+			 "</td>"
 			 "<td>%li</td></tr>\n"

-			 "<tr><td width=50%%><b>min repair mode</b></td>"
+			 "<tr bgcolor=#%s>"
+			 "<td width=50%%><b>min repair mode</b></td>"
 			 "<td>%li</td></tr>\n"

-			 "<tr><td width=50%%><b>host ID with min repair mode"
+			 "<tr bgcolor=#%s>"
+			 "<td width=50%%><b>host ID with min repair mode"
 			 "</b></td>"
 			 "<td><a href=\"http://%s:%hu/master/repair\">"
 			 "%li</a></td></tr>\n"

-			 "<tr><td><b>old collection</b></td>"
+			 "<tr bgcolor=#%s><td><b>old collection</b></td>"
 			 "<td>%s</td></tr>"

-			 "<tr><td><b>new collection</b></td>"
+			 "<tr bgcolor=#%s><td><b>new collection</b></td>"
 			 "<td>%s</td></tr>"

+			 ,
+			 TABLE_STYLE ,
+
+
+			 LIGHT_BLUE ,
+			 LIGHT_BLUE ,
+			 status ,
+
+			 LIGHT_BLUE ,
+			 (long)g_repairMode,
+
+			 LIGHT_BLUE ,
+			 (long)g_pingServer.m_minRepairMode,
+
+			 LIGHT_BLUE ,
+			 minIpBuf, // ip string
+			 minPort,  // port
+			 (long)minHostId,
+
+			 LIGHT_BLUE ,
+			 oldColl ,
+
+			 LIGHT_BLUE ,
+			 newColl
+			 );
+
+	sb->safePrintf ( 
 			 // docs done, includes overwritten title recs
-			 "<tr bgcolor=%s><td><b>titledb recs scanned</b></td>"
+			 "<tr bgcolor=#%s><td><b>titledb recs scanned</b></td>"
 			 "<td>%lli of %lli</td></tr>\n"

 			 // percent complete
-			 "<tr bgcolor=%s><td><b>titledb recs scanned "
+			 "<tr bgcolor=#%s><td><b>titledb recs scanned "
 			 "progress</b></td>"
 			 "<td>%.2f%%</td></tr>\n"

 			 // title recs set errors, parsing errors, etc.
-			 //"<tr bgcolor=%s><td><b>title recs injected</b></td>"
+			 //"<tr bgcolor=#%s><td><b>title recs injected</b></td>"
 			 //"<td>%lli</td></tr>\n"

 			 // title recs set errors, parsing errors, etc.
-			 "<tr bgcolor=%s><td><b>titledb rec error count</b></td>"
+			 "<tr bgcolor=#%s><td><b>titledb rec error count</b></td>"
 			 "<td>%lli</td></tr>\n"

 			 // sub errors
-			 "<tr bgcolor=%s><td> &nbsp; key out of order</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; key out of order</b></td>"
 			 "<td>%lli</td></tr>\n"
-			 "<tr bgcolor=%s><td> &nbsp; set errors</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; set errors</b></td>"
 			 "<td>%lli</td></tr>\n"
-			 "<tr bgcolor=%s><td> &nbsp; corrupt errors</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; corrupt errors</b></td>"
 			 "<td>%lli</td></tr>\n"
-			 "<tr bgcolor=%s><td> &nbsp; xml errors</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; xml errors</b></td>"
 			 "<td>%lli</td></tr>\n"
-			 "<tr bgcolor=%s><td> &nbsp; dup docid errors</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; dup docid errors</b></td>"
 			 "<td>%lli</td></tr>\n"
-			 "<tr bgcolor=%s><td> &nbsp; negative keys</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; negative keys</b></td>"
 			 "<td>%lli</td></tr>\n"
-			 //"<tr bgcolor=%s><td> &nbsp; overwritten recs</b></td>"
+			 //"<tr bgcolor=#%s><td> &nbsp; overwritten recs</b></td>"
 			 //"<td>%lli</td></tr>\n"
-			 "<tr bgcolor=%s><td> &nbsp; twin's "
+			 "<tr bgcolor=#%s><td> &nbsp; twin's "
 			 "respsponsibility</b></td>"
 			 "<td>%lli</td></tr>\n"

-			 "<tr bgcolor=%s><td> &nbsp; wrong shard</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; wrong shard</b></td>"
 			 "<td>%lli</td></tr>\n"

-			 "<tr bgcolor=%s><td> &nbsp; root urls</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; root urls</b></td>"
 			 "<td>%lli</td></tr>\n"
-			 "<tr bgcolor=%s><td> &nbsp; non-root urls</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; non-root urls</b></td>"
 			 "<td>%lli</td></tr>\n"

-			 "<tr bgcolor=%s><td> &nbsp; no title rec</b></td>"
+			 "<tr bgcolor=#%s><td> &nbsp; no title rec</b></td>"
 			 "<td>%lli</td></tr>\n"

 			 //"<tr><td><b> &nbsp; Other errors</b></td>"
@ -2337,49 +2374,7 @@ bool Repair::printRepairStatus ( SafeBuf *sb , long fromIp ) {
 			 //"<tr><td><b>Time Left in Phase %li</b></td>"
 			 //"<td>%.2f hrs</td></tr>\n"

-
-
-
-
-			 // spider recs done
-			 "<tr><td><b>spider recs scanned</b></td>"
-			 "<td>%lli of %lli</td></tr>\n"
-
-			 // percent complete
-			 "<tr><td><b>spider recs scanned progress</b></td>"
-			 "<td>%.2f%%</td></tr>\n"
-
-			 // spider recs set errors, parsing errors, etc.
-			 "<tr><td><b>spider rec not assigned to us</b></td>"
-			 "<td>%li</td></tr>\n"
-
-			 // spider recs set errors, parsing errors, etc.
-			 "<tr><td><b>spider rec errors</b></td>"
-			 "<td>%lli</td></tr>\n"
-
-			 // spider recs set errors, parsing errors, etc.
-			 "<tr><td><b>spider rec bad tld</b></td>"
-			 "<td>%li</td></tr>\n"
-
-			 // time left in hours
-			 //"<tr><td><b>Time Left in Phase %li</b></td>"
-			 //"<td>%.2f hrs</td></tr>\n"
-
-
 			 ,
-			 LIGHT_BLUE ,
-			 DARK_BLUE ,
-			 status ,
-
-			 (long)g_repairMode,
-			 (long)g_pingServer.m_minRepairMode,
-			 minIpBuf, // ip string
-			 minPort,  // port
-			 (long)minHostId,
-
-			 oldColl ,
-			 newColl ,
-
 			 DARK_BLUE,
 			 ns     ,
 			 nr     ,
@ -2415,13 +2410,49 @@ bool Repair::printRepairStatus ( SafeBuf *sb , long fromIp ) {
 			 m_recsNonRoot ,

 			 DARK_BLUE,
-			 m_noTitleRecs,
+			 m_noTitleRecs
+			 );

+
+	sb->safePrintf(
+			 // spider recs done
+			 "<tr bgcolor=#%s><td><b>spider recs scanned</b></td>"
+			 "<td>%lli of %lli</td></tr>\n"
+
+			 // percent complete
+			 "<tr bgcolor=#%s><td><b>spider recs scanned "
+			 "progress</b></td>"
+			 "<td>%.2f%%</td></tr>\n"
+
+			 // spider recs set errors, parsing errors, etc.
+			 "<tr bgcolor=#%s><td><b>spider rec not "
+			 "assigned to us</b></td>"
+			 "<td>%li</td></tr>\n"
+
+			 // spider recs set errors, parsing errors, etc.
+			 "<tr bgcolor=#%s><td><b>spider rec errors</b></td>"
+			 "<td>%lli</td></tr>\n"
+
+			 // spider recs set errors, parsing errors, etc.
+			 "<tr bgcolor=#%s><td><b>spider rec bad tld</b></td>"
+			 "<td>%li</td></tr>\n"
+
+			 // time left in hours
+			 //"<tr bgcolor=#%s><td><b>"
+			 //"Time Left in Phase %li</b></td>"
+			 //"<td>%.2f hrs</td></tr>\n"
+
+			 ,
+			 LIGHT_BLUE ,
 			 ns2    ,
 			 nr2    ,
+			 LIGHT_BLUE ,
 			 ratio2 ,
+			 LIGHT_BLUE ,
 			 m_spiderRecNotAssigned ,
+			 LIGHT_BLUE ,
 			 errors2,
+			 LIGHT_BLUE ,
 			 m_spiderRecBadTLD
 			 );

@ -2439,7 +2470,7 @@ bool Repair::printRepairStatus ( SafeBuf *sb , long fromIp ) {
 		// m_dbname will be 0
 		if ( tr == 0 ) continue;
 		sb->safePrintf(
-			 "<tr bgcolor=%s><td><b>%s2 recs</b></td>"
+			 "<tr bgcolor=#%s><td><b>%s2 recs</b></td>"
 			 "<td>%lli</td></tr>\n" ,
 			 bg,
 			 rdb->m_dbname,
@ -2495,81 +2526,94 @@ bool Repair::printRepairStatus ( SafeBuf *sb , long fromIp ) {

 	sb->safePrintf ( 

-			 "<table width=100%% bgcolor=#%s cellpadding=4 "
-			 "border=1 id=\"repairstatustable2\">"
+			 "<table %s "
+			 "id=\"repairstatustable2\">"

 			 // current collection being repaired
-			 "<tr><td bgcolor=%s colspan=2><b><center>"
+			 "<tr class=hdrow><td colspan=2><b><center>"
 			 "Repair Settings In Use</center></b></td></tr>"

 			 // . print parms for this repair
 			 // . they may differ than current controls because
 			 //   the current controls were changed after the
 			 //   repair started
-			 "<tr><td width=50%%><b>full rebuild</b></td>"
+			 "<tr bgcolor=#%s>"
+			 "<td width=50%%><b>full rebuild</b></td>"
 			 "<td>%s</td></tr>\n"

-			 //"<tr><td><b>recycle link info</b></td>"
+			 //"<tr bgcolor=#%s><td><b>recycle link info</b></td>"
 			 //"<td>%s</td></tr>\n"

-			 "<tr><td><b>rebuild titledb</b></td>"
+			 "<tr bgcolor=#%s><td><b>rebuild titledb</b></td>"
 			 "<td>%s</td></tr>\n"

-			 //"<tr><td><b>rebuild tfndb</b></td>"
+			 //"<tr bgcolor=#%s><td><b>rebuild tfndb</b></td>"
 			 //"<td>%s</td></tr>\n"

-			 //"<tr><td><b>rebuild indexdb</b></td>"
+			 //"<tr bgcolor=#%s><td><b>rebuild indexdb</b></td>"
 			 //"<td>%s</td></tr>\n"

-			 "<tr><td><b>rebuild posdb</b></td>"
+			 "<tr bgcolor=#%s><td><b>rebuild posdb</b></td>"
 			 "<td>%s</td></tr>\n"

-			 //"<tr><td><b>rebuild datedb</b></td>"
+			 //"<tr bgcolor=#%s><td><b>rebuild datedb</b></td>"
 			 //"<td>%s</td></tr>\n"

-			 "<tr><td><b>rebuild clusterdb</b></td>"
+			 "<tr bgcolor=#%s><td><b>rebuild clusterdb</b></td>"
 			 "<td>%s</td></tr>\n"

-			 //"<tr><td><b>rebuild checksumdb</b></td>"
+			 //"<tr bgcolor=#%s><td><b>rebuild checksumdb</b></td>"
 			 //"<td>%s</td></tr>\n"

-			 "<tr><td><b>rebuild spiderdb</b></td>"
+			 "<tr bgcolor=#%s><td><b>rebuild spiderdb</b></td>"
 			 "<td>%s</td></tr>\n" 

-			 "<tr><td><b>rebuild linkdb</b></td>"
+			 "<tr bgcolor=#%s><td><b>rebuild linkdb</b></td>"
 			 "<td>%s</td></tr>\n" 

-			 //"<tr><td><b>rebuild tagdb</b></td>"
+			 //"<tr bgcolor=#%s><td><b>rebuild tagdb</b></td>"
 			 //"<td>%s</td></tr>\n" 
-			 //"<tr><td><b>rebuild placedb</b></td>"
+			 //"<tr bgcolor=#%s><td><b>rebuild placedb</b></td>"
 			 //"<td>%s</td></tr>\n" 
-			 //"<tr><td><b>rebuild sectiondb</b></td>"
+			 //"<tr bgcolor=#%s><td><b>rebuild sectiondb</b></td>"
 			 //"<td>%s</td></tr>\n" 
-			 //"<tr><td><b>rebuild revdb</b></td>"
+			 //"<tr bgcolor=#%s><td><b>rebuild revdb</b></td>"
 			 //"<td>%s</td></tr>\n" 


-			 "<tr><td><b>rebuild root urls</b></td>"
+			 "<tr bgcolor=#%s><td><b>rebuild root urls</b></td>"
 			 "<td>%s</td></tr>\n" 

-			 "<tr><td><b>rebuild non-root urls</b></td>"
+			 "<tr bgcolor=#%s>"
+			 "<td><b>rebuild non-root urls</b></td>"
 			 "<td>%s</td></tr>\n" 

 			 "</table>\n"
 			 "<br>\n"
 			 ,
+			 TABLE_STYLE,
+
 			 LIGHT_BLUE,
-			 DARK_BLUE,
 			 rr[0],
 			 //rr[10],
+
+			 LIGHT_BLUE,
 			 rr[1],
 			 //rr[2],
+
+			 LIGHT_BLUE,
 			 rr[3],
 			 //rr[4],
+
+			 LIGHT_BLUE,
 			 rr[5],
 			 //rr[6],
+
+			 LIGHT_BLUE,
 			 rr[7],
 			 //rr[8],
+
+			 LIGHT_BLUE,
 			 rr[9],

 			 //rr[13],
@ -2578,7 +2622,10 @@ bool Repair::printRepairStatus ( SafeBuf *sb , long fromIp ) {
 			 //rr[16],
 			 //rr[17],

+			 LIGHT_BLUE,
 			 rr[11],
+
+			 LIGHT_BLUE,
 			 rr[12] 
 			 );
 	return true;
--- a/Revdb.cpp
+++ b/Revdb.cpp
@ -67,7 +67,7 @@ bool Revdb::init2 ( long treeMem ) {
 		return false;
 	return true;
 }
-
+/*
 bool Revdb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -79,7 +79,7 @@ bool Revdb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Revdb::verify ( char *coll ) {
 	log ( LOG_INFO, "db: Verifying Revdb for coll %s...", coll );
 	g_threads.disableThreads();
--- a/SafeBuf.cpp
+++ b/SafeBuf.cpp
@ -1761,7 +1761,12 @@ Tag *SafeBuf::addTag ( char *mysite ,
 bool SafeBuf::addTag ( Tag *tag ) {
 	long recSize = tag->getSize();
 	//tag->setDataSize();
-	if ( tag->m_recDataSize <= 16 ) { char *xx=NULL;*xx=0; }
+	if ( tag->m_recDataSize <= 16 ) { 
+		// note it
+		return log("safebuf: encountered corrupted tag datasize=%li.",
+			   tag->m_recDataSize);
+		//char *xx=NULL;*xx=0; }
+	}
 	return safeMemcpy ( (char *)tag , recSize );
 }

@ -2703,6 +2708,7 @@ bool SafeBuf::decodeJSONToUtf8 ( long niceness ) {
 //   diffbot
 // . really we could leave the newlines decoded etc, but it is prettier
 //   for printing
+/*
 bool SafeBuf::safeStrcpyPrettyJSON ( char *decodedJson ) {
 	// how much space do we need?
 	// each single byte \t char for instance will need 2 bytes
@ -2762,6 +2768,7 @@ bool SafeBuf::safeStrcpyPrettyJSON ( char *decodedJson ) {

 	return true;
 }
+*/

 bool SafeBuf::safeUtf8ToJSON ( char *utf8 ) {

--- a/SafeBuf.h
+++ b/SafeBuf.h
@ -104,7 +104,7 @@ struct SafeBuf {
 	bool  safeMemcpy(SafeBuf *c){return safeMemcpy(c->m_buf,c->m_length);};
 	bool  safeMemcpy ( class Words *w , long a , long b ) ;
 	bool  safeStrcpy ( char *s ) ;
-	bool  safeStrcpyPrettyJSON ( char *decodedJson ) ;
+	//bool  safeStrcpyPrettyJSON ( char *decodedJson ) ;
 	bool  safeUtf8ToJSON ( char *utf8 ) ;

 	bool  csvEncode ( char *s , long len , long niceness = 0 );
--- a/SearchInput.h
+++ b/SearchInput.h
@ -224,8 +224,10 @@ class SearchInput {
 	//long  m_formatStrLen;
 	//char *m_formatStr;

+	char m_formatTmp[11];
+
 	// can be 0 for FORMAT_HTML, 1 = FORMAT_XML, 2=FORMAT_JSON, 3=csv
-	char m_format;
+	long m_format;

 	// this should be part of the key because it will affect the results!
 	char   m_queryExpansion;
--- a/Sections.cpp
+++ b/Sections.cpp
@ -17238,7 +17238,7 @@ bool Sectiondb::init2 ( long treeMem ) {
 		return false;
 	return true;
 }
-
+/*
 bool Sectiondb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -17250,7 +17250,7 @@ bool Sectiondb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: sectiondb verify failed, but scaling is allowed, passing.");
 	return true;
 }
-
+*/
 bool Sectiondb::verify ( char *coll ) {
 	log ( LOG_INFO, "db: Verifying Sectiondb for coll %s...", coll );
 	g_threads.disableThreads();
--- a/Spider.cpp
+++ b/Spider.cpp
--- a/Spider.h
+++ b/Spider.h
@ -1,11 +1,5 @@
 // Matt Wells, copyright Nov 2002

-//
-// . Spider.h/.cpp contains all the code related to spider scheduling
-// . Spiderdb holds the SpiderRecs which indicate the time to spider a url
-// . there are 2 types of SpiderRecs: SpiderRequest and SpiderReply recs
-//
-
 #ifndef _SPIDER_H_
 #define _SPIDER_H_

@ -45,6 +39,7 @@
 #define SP_ADMIN_PAUSED 8 // g_conf.m_spideringEnabled = false
 #define SP_COMPLETED    9 // crawl is done, and no repeatCrawl is scheduled

+void spiderRoundIncremented ( class CollectionRec *cr ) ;
 bool testPatterns ( ) ;
 bool doesStringContainPattern ( char *content , char *pattern ) ;

@ -57,6 +52,29 @@ bool getSpiderStatusMsg ( class CollectionRec *cx ,
 // this new spider algorithm ensures that urls get spidered even if a host
 // is dead. and even if the url was being spidered by a host that suddenly went
 // dead.
+//
+// . Spider.h/.cpp contains all the code related to spider scheduling
+// . Spiderdb holds the SpiderRecs which indicate the time to spider a url
+// . there are 2 types of SpiderRecs: SpiderRequest and SpiderReply recs
+//
+//
+// There are 3 main components to the spidering process:
+// 1) spiderdb
+// 2) the "waiting tree"
+// 3) doledb
+//
+// spiderdb holds all the spiderrequests/spiderreplies sorted by 
+// their IP
+//
+// the waiting tree holds at most one entry for an IP indicating that
+// we should scan all the spiderrequests/spiderreplies for that IP in
+// spiderdb, find the "best" one(s) and add it (them) to doledb.
+//
+// doledb holds the best spiderrequests from spiderdb sorted by
+// "priority".  priorities range from 0 to 127, the highest priority.
+// basically doledb holds the urls that are ready for spidering now.
+
+


 // Spiderdb
@ -242,10 +260,10 @@ bool getSpiderStatusMsg ( class CollectionRec *cx ,
 // can spider any request/url in doledb provided they get the lock.


-// scanSpiderdb()
+// evalIpLoop()
 //
 // The waiting tree is populated at startup by scanning spiderdb (see
-// SpiderColl::scanSpiderdb()), which might take a while to complete, 
+// SpiderColl::evalIpLoop()), which might take a while to complete, 
 // so it is running in the background while the gb server is up. it will
 // log "10836674298 spiderdb bytes scanned for waiting tree re-population"
 // periodically in the log as it tries to do a complete spiderdb scan 
@ -255,7 +273,7 @@ bool getSpiderStatusMsg ( class CollectionRec *cx ,
 // It will also perform a background scan if the admin changes the url
 // filters table, which dictates that we recompute everything.
 //
-// scanSpiderdb() will recompute the "url filter number" (matching row)
+// evalIpLoop() will recompute the "url filter number" (matching row)
 // in the url filters table for each url in each SpiderRequest it reads.
 // it will ignore spider requests whose urls
 // are "filtered" or "banned". otherwise they will have a spider priority >= 0.
@ -270,18 +288,18 @@ bool getSpiderStatusMsg ( class CollectionRec *cx ,
 // by preferring those with the highest priority. Tied spider priorities
 // should be resolved by minimum hopCount probably. 
 //
-// If the spidertime of the URL is overdue then scanSpiderdb() will NOT add
+// If the spidertime of the URL is overdue then evalIpLoop() will NOT add
 // it to waiting tree, but will add it to doledb directly to make it available
 // for spidering immediately. It calls m_msg4.addMetaList() to add it to 
 // doledb on all hosts in its group (shard). It uses s_ufnTree for keeping 
 // track of the best urls to spider for a given IP/spiderPriority.
 //
-// scanSpiderdb() can also be called with its m_nextKey/m_endKey limited
+// evalIpLoop() can also be called with its m_nextKey/m_endKey limited
 // to just scan the SpiderRequests for a specific IP address. It does
 // this after adding a SpiderReply. addSpiderReply() calls addToWaitingTree()
 // with the "0" time entry, and addToWaitingTree() calls 
 // populateDoledbFromWaitingTree() which will see that "0" entry and call
-// scanSpiderdb(true) after setting m_nextKey/m_endKey for that IP.
+// evalIpLoop(true) after setting m_nextKey/m_endKey for that IP.



@ -289,7 +307,7 @@ bool getSpiderStatusMsg ( class CollectionRec *cx ,
 //
 // SpiderColl::populateDoledbFromWaitingTree() scans the waiting tree for
 // entries whose spider time is due. so it gets the IP address and spider
-// priority from the waiting tree. but then it calls scanSpiderdb() 
+// priority from the waiting tree. but then it calls evalIpLoop() 
 // restricted to that IP (using m_nextKey,m_endKey) to get the best
 // SpiderRequest from spiderdb for that IP to add to doledb for immediate 
 // spidering. populateDoledbFromWaitingTree() is called a lot to try to
@ -505,8 +523,28 @@ class SpiderRequest {
 	// . this is zero if none or invalid
 	long    m_contentHash32;

+	/*
+	char    m_reserved1;
+
+	// the new add url control will allow user to control link spidering
+	// on each url they add. they can also specify file:// instead of
+	// http:// to index local files. so we have to allow file://
+	char    m_onlyAddSameDomainLinks        :1;
+	char    m_onlyAddSameSubdomainLinks     :1;
+	char    m_onlyDoNotAddLinksLinks        :1; // max hopcount 1
+	char    m_onlyDoNotAddLinksLinksLinks   :1; // max hopcount 2
+	char    m_reserved2d:1;
+	char    m_reserved2e:1;
+	char    m_reserved2f:1;
+	char    m_reserved2g:1;
+	char    m_reserved2h:1;
+
+
 	// . each request can have a different hop count
 	// . this is only valid if m_hopCountValid is true!
+	short   m_hopCount;
+	*/
+	
 	long    m_hopCount;

 	// . this is now computed dynamically often based on the latest
@ -711,16 +749,17 @@ class SpiderRequest {
 	long print( class SafeBuf *sb );

 	long printToTable     ( SafeBuf *sb , char *status ,
-				class XmlDoc *xd ) ;
+				class XmlDoc *xd , long row ) ;
 	// for diffbot...
 	long printToTableSimple     ( SafeBuf *sb , char *status ,
-				class XmlDoc *xd ) ;
+				      class XmlDoc *xd , long row ) ;
 	static long printTableHeader ( SafeBuf *sb , bool currentlSpidering ) ;
 	static long printTableHeaderSimple ( SafeBuf *sb , 
 					     bool currentlSpidering ) ;

 	// returns false and sets g_errno on error
 	bool setFromAddUrl ( char *url ) ;
+	bool setFromInject ( char *url ) ;
 };

 // . XmlDoc adds this record to spiderdb after attempting to spider a url
@ -826,7 +865,11 @@ class SpiderReply {
 	long    m_isContacty    :1;
 	long    m_hasAddress    :1;
 	long    m_hasTOD        :1;
-	long    m_hasSiteVenue  :1;
+
+	// make this "INvalid" not valid since it was set to 0 before
+	// and we want to be backwards compatible
+	long    m_isIndexedINValid :1;
+	//long    m_hasSiteVenue  :1;

 	// expires after a certain time or if ownership changed
 	long    m_inGoogleValid           :1;
@ -835,7 +878,8 @@ class SpiderReply {
 	long    m_isContactyValid         :1;
 	long    m_hasAddressValid         :1;
 	long    m_hasTODValid             :1;
-	long    m_hasSiteVenueValid       :1;
+	//long    m_hasSiteVenueValid       :1;
+	long    m_reserved2               :1;
 	long    m_siteNumInlinksValid     :1;
 	// was the request an injection request
 	long    m_fromInjectionRequest    :1; 
@ -989,7 +1033,7 @@ class SpiderColl {
 	~SpiderColl ( );
 	SpiderColl  ( ) ;

-	void clear();
+	void clearLocks();

 	// called by main.cpp on exit to free memory
 	void      reset();
@ -1028,7 +1072,8 @@ class SpiderColl {
 	// for scanning the wait tree...
 	bool m_isPopulating;
 	// for reading from spiderdb
-	bool m_isReadDone;
+	//bool m_isReadDone;
+	bool m_didRead;

 	Msg4 m_msg4;
 	Msg1 m_msg1;
@ -1111,7 +1156,8 @@ class SpiderColl {

 	bool m_countingPagesIndexed;
 	HashTableX m_localTable;
-	long long m_lastReqUh48;
+	long long m_lastReqUh48a;
+	long long m_lastReqUh48b;
 	long long m_lastRepUh48;
 	// move to CollectionRec so it can load at startup and save it
 	//HashTableX m_pageCountTable;
@ -1127,8 +1173,17 @@ class SpiderColl {
 	bool addToWaitingTree    ( uint64_t spiderTime , long firstIp ,
 				   bool callForScan );
 	long getNextIpFromWaitingTree ( );
-	void populateDoledbFromWaitingTree ( bool reentry );
-	bool scanSpiderdb        ( bool needList );
+	void populateDoledbFromWaitingTree ( );
+
+	//bool scanSpiderdb        ( bool needList );
+
+
+	// broke up scanSpiderdb into simpler functions:
+	bool evalIpLoop ( ) ;
+	bool readListFromSpiderdb ( ) ;
+	bool scanListForWinners ( ) ;
+	bool addWinnerToDoledb ( ) ;
+

 	void populateWaitingTreeFromSpiderdb ( bool reentry ) ;

@ -1138,7 +1193,11 @@ class SpiderColl {
 	key_t      m_waitingTreeKey;
 	bool       m_waitingTreeKeyValid;
 	long       m_scanningIp;
-	bool       m_gotNewRequestsForScanningIp;
+	long       m_gotNewDataForScanningIp;
+	long       m_lastListSize;
+	long       m_lastScanningIp;
+
+	char m_deleteMyself;

 	// start key for reading doledb
 	key_t m_msg5StartKey;
@ -1292,7 +1351,7 @@ void handleRequestc1 ( UdpSlot *slot , long niceness ) ;

 // . max spiders we can have going at once for this process
 // . limit to 70 to preven OOM conditions
-#define MAX_SPIDERS 70
+#define MAX_SPIDERS 100

 class SpiderLoop {

@ -1412,6 +1471,7 @@ long getUrlFilterNum ( class SpiderRequest *sreq ,
 		       bool isForMsg20 ,
 		       long niceness , 
 		       class CollectionRec *cr ,
-		       bool isOutlink = false ) ;
+		       bool isOutlink , // = false ,
+		       HashTableX *quotaTable );//= NULL ) ;

 #endif
--- a/Stats.cpp
+++ b/Stats.cpp
@ -557,6 +557,11 @@ void Stats::printGraphInHtml ( SafeBuf &sb ) {
 	//
 	sb.safePrintf("<div style=\"position:relative;"
 		      "background-color:#c0c0c0;"
+
+		      // match style of tables
+		      "border-radius:10px;"
+		      "border:#6060f0 2px solid;"
+		      
 		      //"overflow-y:hidden;"
 		      "overflow-x:hidden;"
 		      "z-index:-10;"
@ -567,10 +572,11 @@ void Stats::printGraphInHtml ( SafeBuf &sb ) {
 		      "min-height:%lipx;"
 		      //"width:100%%;"
 		      //"min-height:600px;"
-		      "margin-top:10px;"
+		      //"margin-top:10px;"
 		      "margin-bottom:10px;"
-		      "margin-right:10px;"
-		      "margin-left:10px;\">"
+		      //"margin-right:10px;"
+		      //"margin-left:10px;"
+		      "\">"
 		      ,(long)DX
 		      ,(long)DY +20); // add 10 more for "2s" labels etc.

--- a/Stats.h
+++ b/Stats.h
@ -25,9 +25,10 @@ class StatPoint {

 #define MAX_POINTS 6000
 #define MAX_WIDTH  6
-#define DY         1000              // pixels vertical
+//#define DY         1000              // pixels vertical
+#define DY         500              // pixels vertical
 #define DX         1000             // pixels across
-#define DT         (20*1000)        // time window, 20 seconds
+#define DT         (10*1000)        // time window, 10 seconds
 #define MAX_LINES  (DY / (MAX_WIDTH+1)) // leave free pixel above each line

 #define STAT_GENERIC 0
--- a/Statsdb.cpp
+++ b/Statsdb.cpp
@ -209,17 +209,17 @@ bool Statsdb::init ( ) {
 	// will init the CollectionRec::m_rdbBase, which is what
 	// Rdb::getBase(collnum_t) will return. however, for collectionless
 	// rdb databases we set Rdb::m_collectionlessBase special here.
-	return m_rdb.addColl ( NULL );
+	return m_rdb.addRdbBase1 ( NULL );
 }



 // Make sure we need this function.
 // main.cpp currently uses the addColl from m_rdb
-bool Statsdb::addColl ( char *coll, bool doVerify ) {
-	if ( ! m_rdb.addColl ( coll ) ) return false;
-	return true;
-}
+//bool Statsdb::addColl ( char *coll, bool doVerify ) {
+//	if ( ! m_rdb.addColl ( coll ) ) return false;
+//	return true;
+//}

 void flushStatsWrapper ( int fd , void *state ) {
 	g_statsdb.addDocsIndexed();
@ -532,7 +532,8 @@ bool Statsdb::makeGIF ( long t1Arg ,

 #define MAX_POINTS 6000
 #define MAX_WIDTH  6
-#define DY2        600              // pixels vertical
+//#define DY2        600              // pixels vertical
+#define DY2        400              // pixels vertical
 #define DX2        1000             // pixels across
 #define MAX_LINES2  (DY2 / (MAX_WIDTH+1)) // leave free pixel above each line

--- a/Syncdb.cpp
+++ b/Syncdb.cpp
@ -992,7 +992,7 @@ bool Syncdb::init ( ) {
 	// clear it all!
 	m_qt.clear();
 	// add the base since it is a collectionless rdb
-	return m_rdb.addColl ( NULL );
+	return m_rdb.addRdbBase1 ( NULL );
 }

 // . save our crap
@ -1432,9 +1432,10 @@ void Syncdb::syncDone ( ) {
 	m_rcpStarted = false;
 }

-
+/*
 // TODO: Provide verification.
 bool Syncdb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	return true;
 }
+*/
--- a/Tagdb.cpp
+++ b/Tagdb.cpp
@ -1854,7 +1854,7 @@ bool Tagdb::init2 ( long treeMem ) {
 			    false ); // bias disk page cache?
 }

-
+/*
 bool Tagdb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;//false;
@ -1867,7 +1867,7 @@ bool Tagdb::addColl ( char *coll, bool doVerify ) {
 	//return true;
 	return false;
 }
-
+*/


 bool Tagdb::verify ( char *coll ) {
@ -2761,14 +2761,16 @@ bool Msg8a::launchGetRequests ( ) {
 	long shardNum = getShardNum ( m_rdbId , &startKey );//, true );
 	Host *group = g_hostdb.getShard ( shardNum );

-	long numTwins = g_hostdb.getNumHostsPerShard();
+	//long numTwins = g_hostdb.getNumHostsPerShard();
 	// use top byte!
 	uint8_t *sks = (uint8_t *)&startKey;
 	uint8_t top = sks[sizeof(TAGDB_KEY)-1];
-	long hostNum = 0;
-	if ( numTwins == 2 && (top & 0x80) ) hostNum = 1;
+	//long hostNum = 0;
+	//if ( numTwins == 2 && (top & 0x80) ) hostNum = 1;
 	// TODO: fix this!
-	if ( numTwins >= 3 ) { char *xx=NULL;*xx=0; }
+	//if ( numTwins >= 3 ) { char *xx=NULL;*xx=0; }
+	// support more than 2 stripes now...
+	long hostNum = top % g_hostdb.getNumHostsPerShard();
 	long hostId = group[hostNum].m_hostId;


@ -4440,27 +4442,62 @@ bool sendReply2 ( void *state ) {
 	char bb [ MAX_COLL_LEN + 60 ];
 	bb[0]='\0';

+	sb.safePrintf(
+		      "<style>"
+		      ".poo { background-color:#%s;}\n"
+		      "</style>\n" ,
+		      LIGHT_BLUE );
+
 	// print interface to add sites
 	sb.safePrintf (
-		  "<table width=100%% bgcolor=#%s border=1 cellpadding=4>"
-		  "<tr><td bgcolor=#%s colspan=21>"
-		  "<center><font size=+1><b>Tagdb</b>%s</font></center>"
-		  "</td></tr>", LIGHT_BLUE , DARK_BLUE , bb );
+		  "<table %s>"
+		  "<tr><td colspan=2>"
+		  "<center><b>Tagdb</b>%s</center>"
+		  "</td></tr>", TABLE_STYLE , bb );

 	// sometimes we add a huge # of urls, so don't display them because
 	// it like freezes the silly browser
 	char *uu = st->m_urls;
 	if ( st->m_urlsLen > 100000 ) uu = "";
 	
-	sb.safePrintf ( "<tr><td colspan=21>");
+
+	//sb.safePrintf ( "<tr bgcolor=#%s><td colspan=2>"
+	//		"<center>"
+	//		"</center>"
+	//		"</td></tr>",
+	//		DARK_BLUE);
+	
+
+	sb.safePrintf ( "<tr class=poo><td>"
+			"<b>urls</b>"
+			"<br>"
+
+			"<font size=-2>"
+			"Enter a single URL and then click <i>Get Tags</i> to "
+			"get back its tags. Enter multiple URLs and select "
+			"the tags names and values in the other table "
+			"below in order to tag "
+			"them all with those tags when you click "
+			"<i>Add Tags</i>. "
+			"On the command line you can also issue a "
+			"<i>./gb 0 dump S main 0 -1 1</i>"
+			"command, for instance, to dump out the tagdb "
+			"contents for the <i>main</i> collection on "
+			"<i>host #0</i>. "
+			"</font>"
+
+
+			"</td>");
+
 	// text area for adding space separated sites/urls
 	//char *pp = "put sites here";
 	//char *pp = "";
 	//if ( st->m_bufLen > 0 ) pp = st->m_buf; // no, print out "urls"
-	sb.safePrintf ("<center>"
+	sb.safePrintf (""
+		       "<td width=70%%>"
 		       "<br>"
 		       "<textarea rows=16 cols=64 name=u>"
-		       "%s</textarea><br><br>" , uu );
+		       "%s</textarea></td></tr>" , uu );

 	// spam assassins should not use this much power, too risky
 	//if ( st->m_isAdmin ) {
@ -4470,30 +4507,61 @@ bool sendReply2 ( void *state ) {

 	// allow filename to load them from
 	//if ( st->m_isAdmin ) {
-	sb.safePrintf("or specify a file of them: <input name=ufu "
-		      "type=text size=40><br>"
-		      "<i>file can also be dumped output of "
-		      "tagdb from the <b>gb dump S ...</b> "
-		      "command.</i>"
-		      "<br><br>" );
+	sb.safePrintf("<tr class=poo>"
+		      "<td>"
+		      "<b>file of urls to tag</b>"
+		      "<br>"
+		      "<font size=-2>"
+		      "If provided, Gigablast will read the URLs from "
+		      "this file as if you pasted them into the text "
+		      "area above. The text area will also be ignored."
+		      "</font>"
+		      "</td>"
+		      "<td><input name=ufu "
+		      "type=text size=40>"//<br>"
+		      //"<i>file can also be dumped output of "
+		      //"tagdb from the <b>gb dump S ...</b> "
+		      //"command.</i>"
+		      //"<br><br>" );
+		      "</td></tr>"
+		      );
 	//}

 	// this is applied to every tag that is added for accountability
-	sb.safePrintf("<br>Username: <input name=username type=text size=6 "
-		      "value=\"admin\"> " );//,st->m_username);
+	sb.safePrintf("<tr class=poo><td>"
+		      "<b>username</b>"
+		      "<br><font size=-2>"
+		      "Stored with each tag you add for accountability."
+		      "</font>"
+		      "</td><td>"
+		      "<input name=username type=text size=6 "
+		      "value=\"admin\"> " 
+		      "</td></tr>"
+		      );//,st->m_username);

 	// as a safety, this must be checked for any delete operation
-	sb.safePrintf ("&nbsp; delete operation<input type=\"checkbox\" "
-		       "value=\"1\" name=\"delop\"><br>");
+	sb.safePrintf ("<tr class=poo><td><b>delete operation</b>"
+		       "<br>"
+		       "<font size=-2>"
+
+			"If checked "
+			"then the tag names you specify below will be "
+			"deleted for the URLs you provide in the text area "
+			"when you click <i>Add Tags</i>."
+		       "</font>"
+
+
+		       "</td><td><input type=\"checkbox\" "
+		       "value=\"1\" name=\"delop\"></td></tr>");

 	// close up
-	sb.safePrintf ("<br><center>"
-
+	sb.safePrintf ("<tr bgcolor=#%s><td colspan=2>"
+		       "<center>"
 		       // this is merge all by default right now but since
 		       // zak is really only using eventtaghashxxxx.com we
 		       // should be ok
 		       "<input type=submit name=get "
-		       "value=\"get tags\" border=0>"
+		       "value=\"Get Tags\" border=0>"

 		       //"<input type=submit name=get "
 		       //"value=\"get best rec\" border=0>"
@ -4506,7 +4574,11 @@ bool sendReply2 ( void *state ) {

 		       //		  "</form>"
 		       "</center>"
-		       "</tr>\n");
+		       "</td></tr></table>"
+		       "<br><br>"
+		       , DARK_BLUE
+		       );
+

 	// . show all tags we got values for
 	// . put a delete checkbox next to each one
@ -4515,6 +4587,13 @@ bool sendReply2 ( void *state ) {
 	// for some reason the "selected" option tags do not show up below
 	// on firefox unless i have this line.

+	sb.safePrintf (
+		       "<table %s>"
+		       "<tr><td colspan=20>"
+		       "<center><b>Add Tag</b></center>"
+		       "</td></tr>", TABLE_STYLE );
+
+
 	// count how many "tagRecs" we are taking tags from
 	Tag *jtag  = st->m_tagRec.getFirstTag();
 	long numTagRecs = 0;
@ -4532,13 +4611,14 @@ bool sendReply2 ( void *state ) {
 	bool canEdit = (numTagRecs <= 1);

 	if ( ! canEdit )
-		sb.safePrintf("<tr><td colspan=20><center><font color=red>"
+		sb.safePrintf("<tr class=poo>"
+			      "<td colspan=10><center><font color=red>"
 			      "<b>Can not edit because more than one "
 			      "TagRecs were merged</b></font></center>"
 			      "</td></tr>\n" );

 	// headers
-	sb.safePrintf("<tr bgcolor=%s>"
+	sb.safePrintf("<tr bgcolor=#%s>"
 		      //"<td><b>delete?</b></td>"
 		      "<td><b>del?</b></td>"
 		      "<td><b>tag name</b></td>"
@ -4574,9 +4654,9 @@ bool sendReply2 ( void *state ) {
 		// if we are NULL, print out 3 empty tags
 		if ( ! ctag ) empty++;
 		// start the section
-		sb.safePrintf("<tr bgcolor=%s>",DARK_BLUE);
+		sb.safePrintf("<tr class=poo>");
 		// the delete tag checkbox
-		//sb.safePrintf("<tr bgcolor=%s><td>",DARK_BLUE);
+		//sb.safePrintf("<tr bgcolor=#%s><td>",DARK_BLUE);
 		sb.safePrintf("<td>");
 		if ( ctag && canEdit ) // && tag->m_type != ST_SITE ) 
 			sb.safePrintf("<input name=deltag%li "
@ -4624,7 +4704,7 @@ bool sendReply2 ( void *state ) {
 		// was selected will have this score
 		if ( canEdit )
 			sb.safePrintf("<input type=text name=tagdata%li "
-				      "size=70 value=\"",count);
+				      "size=50 value=\"",count);
 		// show the value
 		if ( ctag ) ctag->printDataToBuf ( &sb );
 		// close up the input tag
@ -4693,10 +4773,10 @@ bool sendReply2 ( void *state ) {
 	// do not print add or del tags buttons if we got tags from more
 	// than one TagRec!
 	if ( canEdit )
-		sb.safePrintf ("<tr bgcolor=%s><td colspan=21><center>"
+		sb.safePrintf ("<tr bgcolor=#%s><td colspan=10><center>"
 			       
 			       "<input type=submit name=add "
-			       "value=\"add tags\" border=0>"
+			       "value=\"Add Tags\" border=0>"
 			       
 			       "</center></td>"
 			       "</tr>\n",DARK_BLUE);
--- a/Tagdb.h
+++ b/Tagdb.h
@ -115,7 +115,7 @@ char *getTagStrFromType ( long tagType ) ;
 //#define MAX_TAGREC_SIZE 1024

 // max "oustanding" msg0 requests sent by TagRec::lookup()
-#define MAX_TAGDB_REQUESTS 5
+#define MAX_TAGDB_REQUESTS 3

 // . the latest version of the TagRec
 //#define TAGREC_CURRENT_VERSION 0
--- a/TcpServer.cpp
+++ b/TcpServer.cpp
@ -604,6 +604,7 @@ bool TcpServer::sendMsg ( long   ip       ,
 	s->m_maxOtherDocLen   = maxOtherDocLen ;
 	s->m_ssl              = NULL;
 	s->m_udpSlot          = NULL;
+	s->m_streamingMode    = false;
 	// . call the connect routine to try to connect it asap
 	// . this does not block however
 	// . this returns false if blocked, true otherwise
@ -694,11 +695,17 @@ bool TcpServer::sendMsg ( TcpSocket *s            ,
 	// . this will also unregister all our callbacks for the socket
 	// . TODO: deleting nodes from under Loop::callCallbacks is dangerous!!
 	if      ( g_errno      ) { destroySocket ( s ); return true; }
+
+	// if in streaming mode just return true, do not set sockState
+	// to ST_NEEDS_CLOSE lest it be destroyed. streaming mode needs
+	// to get more data to send on the socket.
+	if ( s->m_streamingMode ) return true;
+
 	// reset the socket iff it was a reply that we finished writing
 	// hmmm else if ( s->m_readBuf ) { recycleSocket ( s ); return true; }
-	// we can't close it here any more for some reason the browser truncates
-	// the content we transmit otherwise... i've tried SO_LINGER and couldnt get
-	// that to work...
+	// we can't close it here any more for some reason the browser truncats
+	// the content we transmit otherwise... i've tried SO_LINGER and 
+	// couldnt get that to work...
 	if ( s->m_readBuf ) { s->m_sockState = ST_NEEDS_CLOSE; return true; }
 	// we're blocking on the reply (readBuf is empty)
 	return false;
@ -906,6 +913,8 @@ TcpSocket *TcpServer::wrapSocket ( int sd , long niceness , bool isIncoming ) {
 	s->m_lastActionTime = s->m_startTime;
 	// set if it's incoming connection or not
 	s->m_isIncoming = isIncoming;
+	// turn this off
+	s->m_streamingMode = false;
 	// . a 30 sec timeout, we don't want slow guys using all our sockets
 	// . they could easily flood us anyway though
 	// . we need to wait possibly a few minutes for a large inject of
@ -1434,7 +1443,7 @@ void writeSocketWrapper ( int sd , void *state ) {
 	// if socket has nothing to send yet cuz we're waiting, wait...
 	if ( s->m_sendBufUsed == 0 ) return;

- sendAgain:
+	// sendAgain:

 	// . writeSocket returns false if blocked, true otherwise
 	// . it also sets g_errno on errro
@ -1451,13 +1460,16 @@ void writeSocketWrapper ( int sd , void *state ) {

 	// if callback changed socket status to ST_SEND_AGAIN 
 	// then let's send the new buffer that it has. Diffbot.cpp uses this.
-	if ( s->m_sockState == ST_SEND_AGAIN ) {
-		s->m_sockState = ST_WRITING;
-		// if nothing left to send just return
-		if ( ! s->m_sendBuf ) return;
-		// otherwise send it
-		goto sendAgain;
-	}
+	//if ( s->m_sockState == ST_SEND_AGAIN ) {
+	//	s->m_sockState = ST_WRITING;
+	//	// if nothing left to send just return
+	//	if ( ! s->m_sendBuf ) return;
+	//	// otherwise send it
+	//	goto sendAgain;
+	//}
+
+	// wait for it to exit streaming mode before destroying
+	if ( s->m_streamingMode ) return;

 	// . destroy the socket on error, recycle on transaction completion
 	// . this will also unregister all our callbacks for the socket
@ -1673,6 +1685,14 @@ connected:
 // . calls the callback governing "s" if it has one
 void TcpServer::destroySocket ( TcpSocket *s ) {
 	if ( ! s ) return ;
+
+	// sanity, must exit streaming mode before destruction
+	if ( s->m_streamingMode ) { 
+		log("tcp: destroying socket in streaming mode. err=%s",
+		    mstrerror(g_errno));
+		//char *xx=NULL;*xx=0; }
+	}
+
 	// sanity check
 	if ( s->m_udpSlot ) { 
 		log("tcp: sending back error on udp slot err=%s",
@ -1864,6 +1884,7 @@ void TcpServer::recycleSocket ( TcpSocket *s ) {
 	//s->m_timeout           = 60*1000;
 	s->m_timeout           = 10*60*1000;
 	s->m_udpSlot           = NULL;
+	s->m_streamingMode     = false;
 	// keep it alive for other dialogs
 	s->m_sockState         = ST_AVAILABLE;
 	s->m_startTime         = gettimeofdayInMilliseconds();
@ -2097,6 +2118,7 @@ TcpSocket *TcpServer::acceptSocket ( ) {
 	s->m_sockState = ST_READING;
 	s->m_this      = this;
 	s->m_udpSlot   = NULL;
+	s->m_streamingMode = false;

 	if ( ! m_useSSL ) return s;

@ -2214,3 +2236,56 @@ void TcpServer::cancel ( void *state ) {
 		destroySocket ( s );
 	}
 }
+
+#include "SafeBuf.h"
+
+bool TcpServer::sendChunk ( TcpSocket *s ,
+			    SafeBuf *sb ,
+			    void *state ,
+			    // call this function when done sending this chunk
+			    // so that it can read another chunk and call 
+			    // sendChunk() again.
+			    void (* doneSendingWrapper)( void *,TcpSocket *) ,
+			    bool lastChunk ) {
+
+	log("tcp: sending chunk of %li bytes", sb->length() );
+
+	// if socket had shit on there already, free that memory
+	// just like TcpServer::destroySocket would
+	if ( s->m_sendBuf ) {
+		mfree (s->m_sendBuf, s->m_sendBufSize,"TcpServer");
+		s->m_sendBuf = NULL;
+	}
+
+	// reset send stats just in case
+	s->m_sendOffset        = 0;
+	s->m_totalSent         = 0;
+	s->m_totalToSend       = 0;
+
+	// let it know not to close the socket while this is set
+	if ( ! lastChunk ) s->m_streamingMode = true;
+	else               s->m_streamingMode = false;
+
+	// . start the send process
+	// . returns false if send did not complete
+	// . returns true and sets g_errno on error
+	if (  ! sendMsg ( s ,
+			  sb->getBufStart(), // sendBuf     ,
+			  sb->getCapacity(),//sendBufSize ,
+			  sb->length(),//sendBufSize ,
+			  sb->length(), // msgtotalsize
+			  state       ,   // data for callback
+			  doneSendingWrapper  ) ) { // callback
+		// do not free sendbuf we are transmitting it
+		sb->detachBuf();
+		return false;
+	}
+
+	// we sent without blocking
+	sb->detachBuf();
+
+	// a problem?
+	if ( g_errno ) return true;
+
+	return true;
+}
--- a/TcpServer.h
+++ b/TcpServer.h
@ -99,6 +99,15 @@ class TcpServer {
 		       long   maxOtherDocLen );
 		       

+	bool sendChunk ( class TcpSocket *s ,
+			 class SafeBuf *sb ,
+			 void *state ,
+			 // call this function when done sending this chunk
+			 // so that it can read another chunk and call 
+			 // sendChunk() again.
+			 void (* doneSendingWrapper)( void *state,TcpSocket *),
+			 bool lastChunk );
+
 	// . returns false if blocked, true otherwise
 	// . sets errno on error
 	// . use this for sending a msg to another host
--- a/TcpSocket.h
+++ b/TcpSocket.h
@ -29,7 +29,7 @@
 // hack to repopulate the socket's send buf when its done sending
 // it's current sendbuf in order to transmit large amounts of data that
 // can't all fit in memory at the same time:
-#define ST_SEND_AGAIN       10
+//#define ST_SEND_AGAIN       10

 #define TCP_READ_BUF_SIZE 1024

@ -117,6 +117,7 @@ class TcpSocket {
 	long        m_maxOtherDocLen; // if reading other doc types

 	char        m_niceness;
+	char        m_streamingMode;

 	long m_shutdownStart;

--- a/Tfndb.cpp
+++ b/Tfndb.cpp
@ -105,7 +105,7 @@ bool Tfndb::init2 ( long treeMem ) {
 		return false;
 	return true;
 }
-
+/*
 bool Tfndb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -117,7 +117,7 @@ bool Tfndb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Tfndb::verify ( char *coll ) {
 	log ( LOG_INFO, "db: Verifying Tfndb for coll %s...", coll );
 	g_threads.disableThreads();
--- a/Tfndb.h
+++ b/Tfndb.h
@ -27,7 +27,7 @@ class Tfndb {

 	bool verify ( char *coll );

-	bool addColl ( char *coll, bool doVerify = true );
+	//bool addColl ( char *coll, bool doVerify = true );
 	
 	// set up our private rdb
 	bool init ( );
--- a/Threads.cpp
+++ b/Threads.cpp
@ -284,7 +284,12 @@ bool Threads::init ( ) {
 	//   with high niceness cuz it would hold up high priority ones!
 	// . TODO: is there a better way? cancel it when UdpServer calls
 	//   Threads::suspendLowPriorityThreads() ?
-	if ( ! g_threads.registerType ( MERGE_THREAD , 2/*maxThreads*/,1000) ) 
+	// . this used to be 2 but now defaults to 10 in Parms.cpp. i found
+	//   i have less long gray lines in the performance graph when i
+	//   did that on trinity.
+	long max2 = g_conf.m_maxCpuMergeThreads;
+	if ( max2 < 1 ) max2 = 1;
+	if ( ! g_threads.registerType ( MERGE_THREAD , max2,1000) ) 
 		return log("thread: Failed to register thread type." );
 	// will raising this from 1 to 2 make it faster too?
 	// i raised since global specs new servers have 2 (hyperthreaded?) cpus
@ -300,7 +305,11 @@ bool Threads::init ( ) {
 		return log("thread: Failed to register thread type." );
 	// . File.cpp spawns a rename thread for doing renames and unlinks
 	// . doing a tight merge on titldb can be ~250 unlinks
-	if ( ! g_threads.registerType ( UNLINK_THREAD,1/*maxThreads*/,3000) ) 
+	// . MDW up from 1 to 30 max, after doing a ddump on 3000+ collections
+	//   it was taking forever to go one at a time through the unlink
+	//   thread queue. seemed like a 1 second space between unlinks.
+	//   1/23/1014
+	if ( ! g_threads.registerType ( UNLINK_THREAD,30/*maxThreads*/,3000) ) 
 		return log("thread: Failed to register thread type." );
 	// generic multipurpose
 	if ( ! g_threads.registerType (GENERIC_THREAD,100/*maxThreads*/,100) ) 
@ -1120,7 +1129,7 @@ void makeCallback ( ThreadEntry *t ) {
 	// then set it
 	if ( t->m_niceness >= 1 ) g_niceness = 1;
 	else                      g_niceness = 0;
-	
+
 	t->m_callback ( t->m_state , t );

 	// time it?
--- a/Titledb.cpp
+++ b/Titledb.cpp
@ -124,7 +124,7 @@ bool Titledb::init2 ( long treeMem ) {
 	// validate
 	//return verify ( );
 }
-
+/*
 bool Titledb::addColl ( char *coll, bool doVerify ) {
 	if ( ! m_rdb.addColl ( coll ) ) return false;
 	if ( ! doVerify ) return true;
@ -136,7 +136,7 @@ bool Titledb::addColl ( char *coll, bool doVerify ) {
 	log ( "db: Verify failed, but scaling is allowed, passing." );
 	return true;
 }
-
+*/
 bool Titledb::verify ( char *coll ) {
 	log ( LOG_DEBUG, "db: Verifying Titledb for coll %s...", coll );
 	g_threads.disableThreads();
--- a/Titledb.h
+++ b/Titledb.h
@ -39,7 +39,7 @@ class Titledb {

 	bool verify ( char *coll );

-	bool addColl ( char *coll, bool doVerify = true );
+	//bool addColl ( char *coll, bool doVerify = true );

 	// init m_rdb
 	bool init ();
--- a/UCNormalizer.cpp
+++ b/UCNormalizer.cpp
@ -131,8 +131,8 @@ static char s_compBuf[COMPBUFSIZE];
 // Kompatible Decomposition table must be loaded before calling this
 bool initCompositionTable(){
 	if ( ! s_isInitialized ) {
-		log(LOG_INFO,"conf: UCNormalizer: "
-		    "initializing Full Composition table");
+		//log(LOG_INFO,"conf: UCNormalizer: "
+		//    "initializing Full Composition table");
 		// set up the hash table
 		//if ( ! s_compositions.set ( 8,4,16384 ) )
 		if (!s_compositions.set(8,4,65536,s_compBuf,(long)COMPBUFSIZE,
--- a/Show More
+++ b/Show More