Merge branch 'testing'

2014-04-05 19:25:35 -07:00 · 2014-04-05 19:25:35 -07:00 · fa7216f978
commit fa7216f978
parent 34f7540160 5ff88fafbc
161 changed files with 16104 additions and 9987 deletions
--- a/Address.cpp
+++ b/Address.cpp
@ -646,7 +646,8 @@ bool Addresses::set ( Sections  *sections    ,
 		      TagRec    *gr          ,
 		      Url       *url         ,
 		      long long  docId       ,
-		      char      *coll        ,
+		      //char      *coll        ,
+		      collnum_t collnum ,
 		      long       domHash32   ,
 		      long       ip          ,
 		      //long       tagPairHash ,
@ -678,7 +679,7 @@ bool Addresses::set ( Sections  *sections    ,
 	m_gr          = gr;
 	m_url         = url;
 	m_docId       = docId;
-	m_coll        = coll;
+	m_collnum        = collnum;
 	m_domHash32   = domHash32;
 	m_ip          = ip;
 	//m_tagPairHash = tagPairHash;
@ -1090,7 +1091,7 @@ bool Addresses::set ( Sections  *sections    ,
 	// parsing consistency
 	if ( //! m_addressReplyValid && 
 	    ! m_msg2c->verifyAddresses ( this         ,
-					 m_coll       ,
+					 m_collnum       ,
 					 m_domHash32 ,
 					 m_ip         ,
 					 m_niceness   ,
@ -10257,7 +10258,7 @@ void Addresses::print ( SafeBuf *pbuf , long long uh64 ) {
 			 "be a KEY in placedb. So you generally need two "
 			 "places inlining the same name before that will "
 			 "happen.</i>");
-	pbuf->safePrintf("</br>\n");
+	pbuf->safePrintf("<br>\n");

 }

@ -16102,7 +16103,8 @@ void Msg2c::reset() {
 //   into the TitleRec for re-parsing purposes later on, so we consistently
 //   re-parse
 bool Msg2c::verifyAddresses ( Addresses  *aa         ,
-			      char       *coll       ,
+			      //char       *coll       ,
+			      collnum_t collnum ,
 			      long        domHash32  ,
 			      long        ip         ,
 			      long        niceness   ,
@ -16111,7 +16113,7 @@ bool Msg2c::verifyAddresses ( Addresses  *aa         ,
 	
 	m_niceness   = niceness;
 	m_addresses  = aa;
-	m_coll       = coll;
+	m_collnum = collnum;
 	m_domHash32  = domHash32;
 	m_ip         = ip;
 	m_callback   = callback;
@ -16255,9 +16257,11 @@ bool Msg2c::launchRequests ( ) {
 	char isName = ( a->m_street->m_flags2 & PLF2_IS_NAME ) ;
 	*(char *)p = isName  ; p += 1;
 	// collection
-	long collSize = gbstrlen(m_coll) + 1;
-	memcpy ( p , m_coll , collSize );
-	p += collSize;
+	//long collSize = gbstrlen(m_coll) + 1;
+	//memcpy ( p , m_coll , collSize );
+	//p += collSize;
+	*(collnum_t *)p = m_collnum;
+	p += sizeof(collnum_t);
 	// end of it
 	char *pend = requestBuf + REQBUFSIZE; // s_requestBuf + max;
 	// . then the address string, semicolon separated, null terminated
@ -16495,14 +16499,16 @@ void handleRequest2c ( UdpSlot *slot , long nicenessWTF ) {
 	// save it
 	st->m_niceness = niceness;
 	// get coll
-	char *coll = p; p += gbstrlen(p) + 1;
+	//char *coll = p; p += gbstrlen(p) + 1;
+	collnum_t collnum = *(collnum_t *)p;
+	p += sizeof(collnum_t);
 	// the address string, semicolon separated, NULL terminated
 	st->m_addrStr = p; p += gbstrlen(p) + 1;

 	// . get from msg5, return if it blocked
 	// . will probably not block since in the disk page cache a lot
 	if ( ! st->m_msg5.getList ( RDB_PLACEDB ,
-				    coll        ,
+				    collnum        ,
 				    &st->m_list ,
 				    (char *)&startKey    ,
 				    (char *)&endKey      ,
--- a/Address.h
+++ b/Address.h
@ -483,7 +483,8 @@ class Msg2c {
 	// . closest matching "site" is used as the "site" (the site url)
 	// . stores the tagRec in your "tagRec"
 	bool verifyAddresses ( class Addresses *aa         ,
-			       char            *coll       , 
+			       //char            *coll       , 
+			       collnum_t collnum,
 			       long             domHash32  ,
 			       long             ip         ,
 			       //HashTableX      *avt        ,
@ -494,8 +495,8 @@ class Msg2c {
 	bool launchRequests ( );

 	// some specified input
-	char  *m_coll;
-	long   m_collLen;
+	//char  *m_coll;
+	//long   m_collLen;
 	collnum_t m_collnum;
 	void    (*m_callback ) ( void *state );
 	void     *m_state;
@ -597,7 +598,8 @@ class Addresses {
 		   class TagRec   *gr          ,
 		   class Url      *url         ,
 		   long long       docId       ,
-		   char           *coll        ,
+		   //char           *coll        ,
+		   collnum_t collnum,
 		   long            domHash32   ,
 		   long            ip          ,
 		   //long            tagPairHash ,
@ -716,7 +718,8 @@ class Addresses {
 	RdbList         m_list;
 	class Url      *m_url;
 	long long       m_docId;
-	char           *m_coll;
+	//char           *m_coll;
+	collnum_t m_collnum;
 	long long       m_termId;
 	long            m_domHash32;
 	long            m_ip;
--- a/AutoBan.cpp
+++ b/AutoBan.cpp
@ -802,7 +802,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
 	SafeBuf sb(512 * 512,"autobbuf");
 	//read in all of the possible cgi parms off the bat:
 	//long  user     = g_pages.getUserType( s , r );
-	char *username = g_users.getUsername(r);
+	//char *username = g_users.getUsername(r);
 	//char *pwd  = r->getString ("pwd");

 	char *coll = r->getString ("c");
@ -831,8 +831,8 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {

 // 	char *ss = sb.getBuf();
 // 	char *ssend = sb.getBufEnd();
-	g_pages.printAdminTop ( &sb, PAGE_AUTOBAN, username,
-				coll , NULL , s->m_ip );
+	g_pages.printAdminTop ( &sb, s , r );
+
 	//sb.incrementLength(sss - ss);

 	// MDW: moved to here
@ -859,7 +859,7 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
 			  &msecs);
 	sb.safePrintf("<tr><td colspan=18 bgcolor=#%s>"
 		      "<center><b>Code Usage "
-		      "(<a href=\"/master/"
+		      "(<a href=\"/admin/"
 		      "autoban?c=%s&resetcodes=1\">reset</a> "
 		      "%li days %li hours %li "
 		      "minutes %li sec ago)"
@ -1271,15 +1271,15 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
 // 			      "%li days %li hrs %li min ago"
 // 			      "</center></td>"

-			      "<td><center><a href=\"/master/"
+			      "<td><center><a href=\"/admin/"
 			      "autoban?c=%s&allow=%s&showAllIps=%li\">" 
 			      "allow/</a>"

-			      "<a href=\"/master/"
+			      "<a href=\"/admin/"
 			      "autoban?c=%s&deny=%s&showAllIps=%li\">" 
 			      "deny/</a>"

-			      "<a href=\"/master/"
+			      "<a href=\"/admin/"
 			      "autoban?c=%s&clear=%s&showAllIps=%li\">"
 			      "clear</a></center>"
 			      "</td>",color, 
@ -1320,22 +1320,22 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
 		      "<td bgcolor=#%s><center><b>Show Ips by Number of Queries"
 		      "</b></center></td>",
 		      LIGHT_BLUE);
-	sb.safePrintf("<td><center><font color=red><b><a href=\"/master/"
+	sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
 		      "autoban?c=%s&showAllIps=0\">"
 		      "0 Queries</a></b>"
 		      "</font></center></td>",
 		      coll);
-	sb.safePrintf("<td><center><font color=red><b><a href=\"/master/"
+	sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
 		      "autoban?c=%s&showAllIps=1\">"
 		      "1 Query</a></b>"
 		      "</font></center></td>",
 		      coll);
-	sb.safePrintf("<td><center><font color=red><b><a href=\"/master/"
+	sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
 		      "autoban?c=%s&showAllIps=10\">"
 		      "10 Queries</a></b>"
 		      "</font></center></td>",
 		      coll);
-	sb.safePrintf("<td><center><font color=red><b><a href=\"/master/"
+	sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
 		      "autoban?c=%s&showAllIps=100\">"
 		      "100 Queries</a></b>"
 		      "</font></center></td></tr>",
@ -1469,10 +1469,10 @@ bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
 				      m_detectVals[i].m_timesBanned);
 		}
 		sb.safePrintf("<td><center>"
-			      "<a href=\"/master/"
+			      "<a href=\"/admin/"
 			      "autoban?c=%s&allow=%s&showAllIps=%li\">" 
 			      "allow/</a>"
-			      "<a href=\"/master/"
+			      "<a href=\"/admin/"
 			      "autoban?c=%s&deny=%s&showAllIps=%li\">" 
 			      "deny</a></center>"
 			      "</td>",
--- a/BigFile.cpp
+++ b/BigFile.cpp
@ -468,6 +468,9 @@ bool BigFile::readwrite ( void         *buf      ,
 	fstate->m_callback    = callback;
 	fstate->m_niceness    = niceness;
 	fstate->m_flags       = m_flags;
+	// sanity
+	if ( fstate->m_bytesToGo > 150000000 )
+		log("file: huge read of %lli bytes",(long long)size);
 	// . set our fd's before entering the thread in case RdbMerge
 	//   calls our unlinkPart() 
 	// . it's thread-UNsafe to call getfd() from within the thread
@ -563,10 +566,12 @@ bool BigFile::readwrite ( void         *buf      ,
 	// request originated through Multicast, then multicast will sleep
 	// and retry. Msg3 could retry, the multicast thing should be more
 	// for running out of udp slots though...
-	if ( g_errno && ! doWrite && g_errno != ENOTHREADSLOTS ) {
-		log (LOG_INFO,"disk: May retry later.");
-		return true;
-	}
+	// crap, call to clone() now fails a lot since we use pthreads
+	// library ... so assume that is it i guess (MDW 3/15/2014)
+	//if ( g_errno && ! doWrite && g_errno != ENOTHREADSLOTS ) {
+	//	log (LOG_INFO,"disk: May retry later.");
+	//	return true;
+	//}
 	// otherwise, thread spawn failed, do it blocking then
 	g_errno = 0;
 	// if threads are manually disabled don't print these msgs because
@ -577,7 +582,8 @@ bool BigFile::readwrite ( void         *buf      ,
 		if ( now - s_lastTime >= 1 ) {
 			s_lastTime = now;
 			log (LOG_INFO,
-			     "disk: Doing blocking disk access. This will hurt "
+			     "disk: Doing blocking disk access. "
+			     "This will hurt "
 			     "performance. isWrite=%li.",(long)doWrite);
 		}
 	}
--- a/Cachedb.cpp
+++ b/Cachedb.cpp
@ -99,9 +99,10 @@ bool Cachedb::verify ( char *coll ) {
 	startKey.setMin();
 	endKey.setMax();
 	long minRecSizes = 64000;
-	
+	CollectionRec *cr = g_collectiondb.getRec(coll);
+
 	if ( ! msg5.getList ( m_rdbId,//RDB_CACHEDB   ,
-			      coll          ,
+			      cr->m_collnum ,
 			      &list         ,
 			      (char*)&startKey      ,
 			      (char*)&endKey        ,
--- a/Catdb.cpp
+++ b/Catdb.cpp
@ -141,7 +141,7 @@ bool Catdb::verify ( char *coll ) {
 	//long minRecSizes = 64000;
 	
 	if ( ! msg5.getList ( RDB_CATDB     ,
-			      "",//coll          ,
+			      0,//collnum          ,
 			      &list         ,
 			      startKey      ,
 			      endKey        ,
--- a/Clusterdb.cpp
+++ b/Clusterdb.cpp
@ -362,9 +362,10 @@ bool Clusterdb::verify ( char *coll ) {
 	startKey.setMin();
 	endKey.setMax();
 	//long minRecSizes = 64000;
+	CollectionRec *cr = g_collectiondb.getRec(coll);
 	
 	if ( ! msg5.getList ( RDB_CLUSTERDB ,
-			      coll          ,
+			      cr->m_collnum          ,
 			      &list         ,
 			      startKey      ,
 			      endKey        ,
@ -394,6 +395,8 @@ bool Clusterdb::verify ( char *coll ) {
 	for ( list.resetListPtr() ; ! list.isExhausted() ;
 	      list.skipCurrentRecord() ) {
 		key_t k = list.getCurrentKey();
+		// skip negative keys
+		if ( (k.n0 & 0x01) == 0x00 ) continue;
 		count++;
 		//unsigned long groupId = getGroupId ( RDB_CLUSTERDB , &k );
 		//if ( groupId == g_hostdb.m_groupId ) got++;
--- a/Collectiondb.cpp
+++ b/Collectiondb.cpp
@ -138,6 +138,19 @@ bool Collectiondb::loadAllCollRecs ( ) {
 		if ( ! addExistingColl ( coll , collnum ) )
 			return false;
 	}
+	// if no existing recs added... add coll.main.0 always at startup
+	if ( m_numRecs == 0 ) {
+		log("admin: adding main collection.");
+		addNewColl ( "main",
+			     0 , // customCrawl ,
+			     NULL, 
+			     0 ,
+			     true , // bool saveIt ,
+			     // Parms.cpp reserves this so it can be sure
+			     // to add the same collnum to every shard
+			     0 );
+	}
+		
 	// note it
 	//log(LOG_INFO,"db: Loaded data for %li collections. Ranging from "
 	//    "collection #0 to #%li.",m_numRecsUsed,m_numRecs-1);
@ -449,10 +462,10 @@ bool Collectiondb::addNewColl ( char *coll ,
 		// show the ban links in the search results. the 
 		// collection name is cryptographic enough to show that
 		cr->m_isCustomCrawl = customCrawl;
-		cr->m_diffbotOnlyProcessIfNew = true;
+		cr->m_diffbotOnlyProcessIfNewUrl = true;
 		// default respider to off
 		cr->m_collectiveRespiderFrequency = 0.0;
-		cr->m_restrictDomain = true;
+		//cr->m_restrictDomain = true;
 		// reset the crawl stats
 		// . this will core if a host was dead and then when it came
 		//   back up host #0's parms.cpp told it to add a new coll
@ -604,7 +617,7 @@ bool Collectiondb::addRdbBasesForCollRec ( CollectionRec *cr ) {



-
+/*
 bool Collectiondb::isAdmin ( HttpRequest *r , TcpSocket *s ) {
 	if ( r->getLong("admin",1) == 0 ) return false;
 	if ( g_conf.isMasterAdmin ( s , r ) ) return true;
@ -615,7 +628,6 @@ bool Collectiondb::isAdmin ( HttpRequest *r , TcpSocket *s ) {
 	//return cr->hasPermission ( r , s );
 }

-/*
 void savingCheckWrapper1 ( int fd , void *state ) {
 	WaitEntry *we = (WaitEntry *)state;
 	// no state?
@ -688,6 +700,8 @@ bool Collectiondb::deleteRec ( char *coll , WaitEntry *we ) {

 // if there is an outstanding disk read thread or merge thread then
 // Spider.cpp will handle the delete in the callback.
+// this is now tryToDeleteSpiderColl in Spider.cpp
+/*
 void Collectiondb::deleteSpiderColl ( SpiderColl *sc ) {

 	sc->m_deleteMyself = true;
@ -701,10 +715,11 @@ void Collectiondb::deleteSpiderColl ( SpiderColl *sc ) {
 		return;
 	}
 }
+*/

 bool Collectiondb::deleteRec2 ( collnum_t collnum ) { //, WaitEntry *we ) {
 	// do not allow this if in repair mode
-	if ( g_repairMode > 0 ) {
+	if ( g_repair.isRepairActive() && g_repair.m_collnum == collnum ) {
 		log("admin: Can not delete collection while in repair mode.");
 		g_errno = EBADENGINEER;
 		return true;
@ -794,7 +809,7 @@ bool Collectiondb::deleteRec2 ( collnum_t collnum ) { //, WaitEntry *we ) {
 		//sc->reset();
 		// this will put it on "death row" so it will be deleted
 		// once Msg5::m_waitingForList/Merge is NULL
-		deleteSpiderColl ( sc );
+		tryToDeleteSpiderColl ( sc );
 		//mdelete ( sc, sizeof(SpiderColl),"nukecr2");
 		//delete ( sc );
 		cr->m_spiderColl = NULL;
@ -836,8 +851,8 @@ bool Collectiondb::resetColl ( char *coll ,  bool purgeSeeds) {
 		return true;
 	}

-	// get the CollectionRec for "test"
-	CollectionRec *cr = getRec ( coll ); // "test" );
+	// get the CollectionRec for "qatest123"
+	CollectionRec *cr = getRec ( coll ); // "qatest123" );

 	// must be there. if not, we create test i guess
 	if ( ! cr ) { 
@ -849,6 +864,47 @@ bool Collectiondb::resetColl ( char *coll ,  bool purgeSeeds) {
 }
 */

+// ensure m_recs[] is big enough for m_recs[collnum] to be a ptr
+bool Collectiondb::growRecPtrBuf ( collnum_t collnum ) {
+
+	// an add, make sure big enough
+	long need = ((long)collnum+1)*sizeof(CollectionRec *);
+	long have = m_recPtrBuf.getLength();
+	long need2 = need - have;
+
+	// if already big enough
+	if ( need2 <= 0 ) {
+		m_recs [ collnum ] = NULL;
+		return true;
+	}
+
+	// . true here means to clear the new space to zeroes
+	// . this shit works based on m_length not m_capacity
+	if ( ! m_recPtrBuf.reserve ( need2 ,NULL, true ) ) {
+		log("admin: error growing rec ptr buf2.");
+		return false;
+	}
+
+	// sanity
+	if ( m_recPtrBuf.getCapacity() < need ) { char *xx=NULL;*xx=0; }
+
+	// set it
+	m_recs = (CollectionRec **)m_recPtrBuf.getBufStart();
+
+	// update length of used bytes in case we re-alloc
+	m_recPtrBuf.setLength ( need );
+
+	// re-max
+	long max = m_recPtrBuf.getCapacity() / sizeof(CollectionRec *);
+	// sanity
+	if ( collnum >= max ) { char *xx=NULL;*xx=0; }
+
+	// initialize slot
+	m_recs [ collnum ] = NULL;
+
+	return true;
+}
+

 bool Collectiondb::setRecPtr ( collnum_t collnum , CollectionRec *cr ) {

@ -891,29 +947,12 @@ bool Collectiondb::setRecPtr ( collnum_t collnum , CollectionRec *cr ) {
 		return true;
 	}

-	// an add, make sure big enough
-	long need = ((long)collnum+1)*sizeof(CollectionRec *);
-	long have = m_recPtrBuf.getLength();
-	long need2 = need - have;
-	// . true here means to clear the new space to zeroes
-	// . this shit works based on m_length not m_capacity
-	if ( need2 > 0 && ! m_recPtrBuf.reserve ( need2 ,NULL, true ) ) {
-		log("admin: error growing rec ptr buf2.");
+	// ensure m_recs[] is big enough for m_recs[collnum] to be a ptr
+	if ( ! growRecPtrBuf ( collnum ) )
 		return false;
-	}

 	// sanity
 	if ( cr->m_collnum != collnum ) { char *xx=NULL;*xx=0; }
-	// update length of used bytes in case we re-alloc
-	m_recPtrBuf.setLength ( need );
-	// sanity
-	if ( m_recPtrBuf.getCapacity() < need ) { char *xx=NULL;*xx=0; }
-	// re-ref it in case it is different
-	m_recs = (CollectionRec **)m_recPtrBuf.getBufStart();
-	// re-max
-	max = m_recPtrBuf.getCapacity() / sizeof(CollectionRec *);
-	// sanity
-	if ( collnum >= max ) { char *xx=NULL;*xx=0; }

 	// add to hash table to map name to collnum_t
 	long long h64 = hash64n(cr->m_coll);
@ -946,6 +985,39 @@ bool Collectiondb::setRecPtr ( collnum_t collnum , CollectionRec *cr ) {
 	return true;
 }

+// moves a file by first trying rename, then copying since cross device renaming doesn't work
+// returns 0 on success
+int mv(char* src, char* dest) {
+    int status = rename( src , dest );
+
+    if (status == 0)
+        return 0;
+    FILE *fsrc, *fdest;
+    fsrc = fopen(src, "r");
+    if (fsrc == NULL)
+        return -1;
+    fdest = fopen(dest, "w");
+    if (fdest == NULL) {
+        fclose(fsrc);
+        return -1;
+    }
+
+    const int BUF_SIZE = 1024;
+    char buf[BUF_SIZE];
+    while (!ferror(fdest) && !ferror(fsrc) && !feof(fsrc)) {
+        int read = fread(buf, 1, BUF_SIZE, fsrc);
+        fwrite(buf, 1, read, fdest);
+    }
+
+    fclose(fsrc);
+    fclose(fdest);
+    if (ferror(fdest) || ferror(fsrc))
+        return -1;
+
+    remove(src);
+    return 0;
+}
+
 // . returns false if we need a re-call, true if we completed
 // . returns true with g_errno set on error
 bool Collectiondb::resetColl2( collnum_t oldCollnum,
@ -956,8 +1028,8 @@ bool Collectiondb::resetColl2( collnum_t oldCollnum,
 	// save parms in case we block
 	//we->m_purgeSeeds = purgeSeeds;

-	// now must be "test" only for now
-	//if ( strcmp(coll,"test") ) { char *xx=NULL;*xx=0; }
+	// now must be "qatest123" only for now
+	//if ( strcmp(coll,"qatest123") ) { char *xx=NULL;*xx=0; }
 	// no spiders can be out. they may be referencing the CollectionRec
 	// in XmlDoc.cpp... quite likely.
 	//if ( g_conf.m_spideringEnabled ||
@ -968,7 +1040,7 @@ bool Collectiondb::resetColl2( collnum_t oldCollnum,
 	//}

 	// do not allow this if in repair mode
-	if ( g_repairMode > 0 ) {
+	if ( g_repair.isRepairActive() && g_repair.m_collnum == oldCollnum ) {
 		log("admin: Can not delete collection while in repair mode.");
 		g_errno = EBADENGINEER;
 		return true;
@ -992,6 +1064,18 @@ bool Collectiondb::resetColl2( collnum_t oldCollnum,
 	//collnum_t oldCollnum = cr->m_collnum;
 	//collnum_t newCollnum = m_numRecs;

+	// in case of bulk job, be sure to save list of spots
+	// copy existing list to a /tmp, where they will later be transferred back to the new folder
+	char oldbulkurlsname[1036];
+	snprintf(oldbulkurlsname, 1036, "%scoll.%s.%li/bulkurls.txt",g_hostdb.m_dir,cr->m_coll,(long)oldCollnum);
+	char newbulkurlsname[1036];
+	snprintf(newbulkurlsname, 1036, "%scoll.%s.%li/bulkurls.txt",g_hostdb.m_dir,cr->m_coll,(long)newCollnum);
+	char tmpbulkurlsname[1036];
+	snprintf(tmpbulkurlsname, 1036, "/tmp/coll.%s.%li.bulkurls.txt",cr->m_coll,(long)oldCollnum);
+
+	if (cr->m_isCustomCrawl == 2)
+	    mv( oldbulkurlsname , tmpbulkurlsname );
+
 	// reset spider info
 	SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(oldCollnum);
 	if ( sc ) {
@ -1004,7 +1088,7 @@ bool Collectiondb::resetColl2( collnum_t oldCollnum,
 		//sc->reset();
 		// this will put it on "death row" so it will be deleted
 		// once Msg5::m_waitingForList/Merge is NULL
-		deleteSpiderColl ( sc );
+		tryToDeleteSpiderColl ( sc );
 		//mdelete ( sc, sizeof(SpiderColl),"nukecr2");
 		//delete ( sc );
 		cr->m_spiderColl = NULL;
@ -1101,14 +1185,18 @@ bool Collectiondb::resetColl2( collnum_t oldCollnum,
 	// save coll.conf to new directory
 	cr->save();

+	// be sure to copy back the bulk urls for bulk jobs
+	if (cr->m_isCustomCrawl == 2)
+	    mv( tmpbulkurlsname, newbulkurlsname );

 	// and clear the robots.txt cache in case we recently spidered a
 	// robots.txt, we don't want to use it, we want to use the one we
 	// have in the test-parser subdir so we are consistent
-	RdbCache *robots = Msg13::getHttpCacheRobots();
-	RdbCache *others = Msg13::getHttpCacheOthers();
-	robots->clear ( oldCollnum );
-	others->clear ( oldCollnum );
+	//RdbCache *robots = Msg13::getHttpCacheRobots();
+	//RdbCache *others = Msg13::getHttpCacheOthers();
+	// clear() was removed do to possible corruption
+	//robots->clear ( oldCollnum );
+	//others->clear ( oldCollnum );

 	//g_templateTable.reset();
 	//g_templateTable.save( g_hostdb.m_dir , "turkedtemplates.dat" );
@ -1329,6 +1417,9 @@ collnum_t Collectiondb::reserveCollNum ( ) {

 	if ( m_numRecs < 0x7fff ) {
 		collnum_t next = m_numRecs;
+		// make the ptr NULL at least to accomodate the
+		// loop that scan up to m_numRecs lest we core
+		growRecPtrBuf ( next );
 		m_numRecs++;
 		return next;
 	}
@ -1458,6 +1549,9 @@ void CollectionRec::reset() {
 	if ( m_hasucr ) regfree ( &m_ucr );
 	if ( m_hasupr ) regfree ( &m_upr );

+	m_hasucr = false;
+	m_hasupr = false;
+
 	// make sure we do not leave spiders "hanging" waiting for their
 	// callback to be called... and it never gets called
 	//if ( m_callbackQueue.length() > 0 ) { char *xx=NULL;*xx=0; }
@ -1759,31 +1853,193 @@ void CollectionRec::setUrlFiltersToDefaults ( ) {

 	long n = 0;

-	//strcpy(m_regExs   [n],"default");
+	/*
 	m_regExs[n].set("default");
 	m_regExs[n].nullTerm();
-	m_numRegExs++;
-
 	m_spiderFreqs     [n] = 30; // 30 days default
-	m_numRegExs2++;
-
 	m_spiderPriorities[n] = 0;
-	m_numRegExs3++;
-
 	m_maxSpidersPerRule[n] = 99;
-	m_numRegExs10++;
-
 	m_spiderIpWaits[n] = 1000;
-	m_numRegExs5++;
-
 	m_spiderIpMaxSpiders[n] = 7;
-	m_numRegExs6++;
-
-	//m_spidersEnabled[n] = 1;
-	//m_numRegExs7++;
-
 	m_harvestLinks[n] = 1;
-	m_numRegExs8++;
+	*/
+
+	m_regExs[n].set("isdocidbased");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 0; // 30 days default
+	m_maxSpidersPerRule  [n] = 99; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 80;
+	n++;
+
+	m_regExs[n].set("ismedia");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 0; // 30 days default
+	m_maxSpidersPerRule  [n] = 99; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = -3; // delete!
+	n++;
+
+	// if not in the site list then nuke it
+	m_regExs[n].set("!insitelist");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 0; // 30 days default
+	m_maxSpidersPerRule  [n] = 99; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = -3; // delete!
+	n++;
+
+	m_regExs[n].set("errorcount>=3 && hastmperror");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 1; // 30 days default
+	m_maxSpidersPerRule  [n] = 1; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 3;
+	n++;
+
+	m_regExs[n].set("errorcount>=1 && hastmperror");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 1; // 30 days default
+	m_maxSpidersPerRule  [n] = 1; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 45;
+	n++;
+
+	m_regExs[n].set("isaddurl");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 7; // 30 days default
+	m_maxSpidersPerRule  [n] = 99; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 85;
+	n++;
+
+	m_regExs[n].set("hopcount==0 && iswww && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 7; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 50;
+	n++;
+
+	m_regExs[n].set("hopcount==0 && iswww");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 7; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 48;
+	n++;
+
+	m_regExs[n].set("hopcount==0 && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 7; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 49;
+	n++;
+
+	m_regExs[n].set("hopcount==0");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 10; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 47;
+	n++;
+
+	m_regExs[n].set("hopcount==1 && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 20; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 40;
+	n++;
+
+	m_regExs[n].set("hopcount==1");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 20; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 39;
+	n++;
+
+	m_regExs[n].set("hopcount==2 && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 40; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 30;
+	n++;
+
+	m_regExs[n].set("hopcount==2");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 40; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 29;
+	n++;
+
+	m_regExs[n].set("hopcount>=3 && isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 60; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 20;
+	n++;
+
+	m_regExs[n].set("hopcount>=3");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 60; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 19;
+	n++;
+
+	m_regExs[n].set("isnew");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 30; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 2;
+	n++;
+
+	m_regExs[n].set("default");
+	m_harvestLinks       [n] = 1;
+	m_spiderFreqs        [n] = 30; // 30 days default
+	m_maxSpidersPerRule  [n] = 9; // max spiders
+	m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
+	m_spiderIpWaits      [n] = 1000; // same ip wait
+	m_spiderPriorities   [n] = 1;
+	n++;
+
+
+	m_numRegExs   = n;
+	m_numRegExs2  = n;
+	m_numRegExs3  = n;
+	m_numRegExs10 = n;
+	m_numRegExs5  = n;
+	m_numRegExs6  = n;
+	m_numRegExs8  = n;
+
+	// more rules
+
+
+

 	//m_spiderDiffbotApiNum[n] = 1;
 	//m_numRegExs11++;
@ -1859,7 +2115,9 @@ bool CollectionRec::save ( ) {
 	snprintf ( tmp , 1023, "%scoll.%s.%li/localcrawlinfo.dat",
 		  g_hostdb.m_dir , m_coll , (long)m_collnum );
 	//log("coll: saving %s",tmp);
-	SafeBuf sb;
+	// in case emergency save from malloc core, do not alloc
+	char stack[1024];
+	SafeBuf sb(stack,1024);
 	//m_localCrawlInfo.print ( &sb );
 	// binary now
 	sb.safeMemcpy ( &m_localCrawlInfo , sizeof(CrawlInfo) );
@ -2029,6 +2287,8 @@ bool CollectionRec::hasSearchPermission ( TcpSocket *s , long encapIp ) {
 }

 bool expandRegExShortcuts ( SafeBuf *sb ) ;
+bool updateSiteListTables ( collnum_t collnum,bool addSeeds,char *siteListArg);
+void nukeDoledb ( collnum_t collnum );

 // . anytime the url filters are updated, this function is called
 // . it is also called on load of the collection at startup
@ -2058,6 +2318,48 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 		}
 	}

+	// if collection is brand new being called from addNewColl()
+	// then sc will be NULL
+	SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(m_collnum);
+
+	// . do not do this at startup
+	// . this essentially resets doledb
+	if ( g_doledb.m_rdb.m_initialized && 
+	     // somehow this is initialized before we set m_recs[m_collnum]
+	     // so we gotta do the two checks below...
+	     sc &&
+	     // must be a valid coll
+	     m_collnum < g_collectiondb.m_numRecs &&
+	     g_collectiondb.m_recs[m_collnum] ) {
+
+
+		log("coll: resetting doledb for %s (%li)",m_coll,
+		    (long)m_collnum);
+		
+		// clear doledb recs from tree
+		//g_doledb.getRdb()->deleteAllRecs ( m_collnum );
+		nukeDoledb ( m_collnum );
+		
+		// add it back
+		//if ( ! g_doledb.getRdb()->addRdbBase2 ( m_collnum ) ) 
+		//	log("coll: error re-adding doledb for %s",m_coll);
+		
+		// just start this over...
+		// . MDW left off here
+		//tryToDelete ( sc );
+		// maybe this is good enough
+		//if ( sc ) sc->m_waitingTreeNeedsRebuild = true;
+		
+		CollectionRec *cr = sc->m_cr;
+
+		// . rebuild sitetable? in PageBasic.cpp.
+		// . re-adds seed spdierrequests using msg4
+		// . true = addSeeds
+		updateSiteListTables ( m_collnum , 
+				       true , 
+				       cr->m_siteListBuf.getBufStart() );
+	}
+

 	// only for diffbot custom crawls
 	if ( m_isCustomCrawl != 1 && // crawl api
@ -2082,6 +2384,66 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 	if ( ! upp ) upp = m_diffbotUrlProcessRegEx.getBufStart();
 	if ( upp && ! upp[0] ) upp = NULL;

+	///////
+	//
+	// recompile regular expressions
+	//
+	///////
+
+
+	if ( m_hasucr ) {
+		regfree ( &m_ucr );
+		m_hasucr = false;
+	}
+
+	if ( m_hasupr ) {
+		regfree ( &m_upr );
+		m_hasupr = false;
+	}
+
+	// copy into tmpbuf
+	SafeBuf tmp;
+
+	char *rx = m_diffbotUrlCrawlRegEx.getBufStart();
+	if ( rx && ! rx[0] ) rx = NULL;
+	if ( rx ) {
+		tmp.reset();
+		tmp.safeStrcpy ( rx );
+		expandRegExShortcuts ( &tmp );
+		m_hasucr = true;
+	}
+	if ( rx && regcomp ( &m_ucr , tmp.getBufStart() ,
+			     REG_EXTENDED| //REG_ICASE|
+			     REG_NEWLINE ) ) { // |REG_NOSUB) ) {
+		// error!
+		log("coll: regcomp %s failed: %s. "
+			   "Ignoring.",
+			   rx,mstrerror(errno));
+		regfree ( &m_ucr );
+		m_hasucr = false;
+	}
+
+
+	rx = m_diffbotUrlProcessRegEx.getBufStart();
+	if ( rx && ! rx[0] ) rx = NULL;
+	if ( rx ) m_hasupr = true;
+	if ( rx ) {
+		tmp.reset();
+		tmp.safeStrcpy ( rx );
+		expandRegExShortcuts ( &tmp );
+		m_hasupr = true;
+	}
+	if ( rx && regcomp ( &m_upr , tmp.getBufStart() ,
+			     REG_EXTENDED| // REG_ICASE|
+			     REG_NEWLINE ) ) { // |REG_NOSUB) ) {
+		// error!
+		log("coll: regcomp %s failed: %s. "
+		    "Ignoring.",
+		    rx,mstrerror(errno));
+		regfree ( &m_upr );
+		m_hasupr = false;
+	}
+

 	// what diffbot url to use for processing
 	char *api = m_diffbotApiUrl.getBufStart();
@ -2092,6 +2454,9 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 	// default to 250ms i guess. -1 means unset i think.
 	if ( m_collectiveCrawlDelay < 0.0 ) wait = 250;

+	bool isEthan = false;
+	if (m_coll)isEthan=strstr(m_coll,"2b44a0e0bb91bbec920f7efd29ce3d5b");
+
 	// make the gigablast regex table just "default" so it does not
 	// filtering, but accepts all urls. we will add code to pass the urls
 	// through m_diffbotUrlCrawlPattern alternatively. if that itself
@ -2102,6 +2467,9 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 		m_maxSpidersPerRule [i] = 100;
 		m_spiderIpWaits     [i] = wait;
 		m_spiderIpMaxSpiders[i] = 7; // keep it respectful
+		// ethan wants some speed
+		if ( isEthan )
+			m_spiderIpMaxSpiders[i] = 30;
 		//m_spidersEnabled    [i] = 1;
 		m_spiderFreqs       [i] =m_collectiveRespiderFrequency;
 		//m_spiderDiffbotApiUrl[i].purge();
@ -2110,33 +2478,53 @@ bool CollectionRec::rebuildUrlFilters ( ) {

 	long i = 0;

+	// 1st one! for query reindex/ query delete
+	m_regExs[i].set("isdocidbased");
+	m_spiderIpMaxSpiders [i] = 10;
+	m_spiderPriorities   [i] = 70;
+	i++;

-	// 1st default url filter
+	// 2nd default url filter
 	m_regExs[i].set("ismedia && !ismanualadd");
 	m_spiderPriorities   [i] = SPIDER_PRIORITY_FILTERED;
 	i++;

 	// 2nd default filter
-	if ( m_restrictDomain ) {
+	// always turn this on for now. they need to add domains they want
+	// to crawl as seeds so they do not spider the web.
+	// no because FTB seeds with link pages that link to another
+	// domain. they just need to be sure to supply a crawl pattern
+	// to avoid spidering the whole web.
+	//
+	// if they did not EXPLICITLY provide a url crawl pattern or
+	// url crawl regex then restrict to seeds to prevent from spidering
+	// the entire internet
+	if ( ! ucp && ! m_hasucr ) { // m_restrictDomain ) {
 		m_regExs[i].set("!isonsamedomain && !ismanualadd");
 		m_spiderPriorities   [i] = SPIDER_PRIORITY_FILTERED;
 		i++;
 	}

+	m_regExs[i].set("errorcount>=1 && !hastmperror");
+	m_spiderPriorities   [i] = 15;
+	m_spiderFreqs        [i] = 0.00; // 86 seconds
+	m_maxSpidersPerRule  [i] = 0; // turn off spiders if not tmp error
+	i++;
+
 	// and for docs that have errors respider once every 5 hours
-	m_regExs[i].set("errorcount==1");
+	m_regExs[i].set("errorcount==1 && hastmperror");
 	m_spiderPriorities   [i] = 40;
 	m_spiderFreqs        [i] = 0.001; // 86 seconds
 	i++;

 	// and for docs that have errors respider once every 5 hours
-	m_regExs[i].set("errorcount==2");
+	m_regExs[i].set("errorcount==2 && hastmperror");
 	m_spiderPriorities   [i] = 40;
 	m_spiderFreqs        [i] = 0.1; // 2.4 hrs
 	i++;

 	// excessive errors? (tcp/dns timed out, etc.) retry once per month?
-	m_regExs[i].set("errorcount>=3");
+	m_regExs[i].set("errorcount>=3 && hastmperror");
 	m_spiderPriorities   [i] = 30;
 	m_spiderFreqs        [i] = 30; // 30 days
 	i++;
@ -2240,63 +2628,9 @@ bool CollectionRec::rebuildUrlFilters ( ) {
 	m_numRegExs8  = i;
 	//m_numRegExs11 = i;

-	///////
-	//
-	// recompile regular expressions
-	//
-	///////

-
-	if ( m_hasucr ) {
-		regfree ( &m_ucr );
-		m_hasucr = false;
-	}
-
-	if ( m_hasupr ) {
-		regfree ( &m_upr );
-		m_hasupr = false;
-	}
-
-	// copy into tmpbuf
-	SafeBuf tmp;
-
-	char *rx = m_diffbotUrlCrawlRegEx.getBufStart();
-	if ( rx && ! rx[0] ) rx = NULL;
-	if ( rx ) {
-		tmp.safeStrcpy ( rx );
-		expandRegExShortcuts ( &tmp );
-		m_hasucr = true;
-	}
-	if ( rx && regcomp ( &m_ucr , tmp.getBufStart() ,
-			     REG_EXTENDED| //REG_ICASE|
-			     REG_NEWLINE ) ) { // |REG_NOSUB) ) {
-		// error!
-		log("coll: regcomp %s failed: %s. "
-			   "Ignoring.",
-			   rx,mstrerror(errno));
-		regfree ( &m_ucr );
-		m_hasucr = false;
-	}
-
-
-	rx = m_diffbotUrlProcessRegEx.getBufStart();
-	if ( rx && ! rx[0] ) rx = NULL;
-	if ( rx ) m_hasupr = true;
-	if ( rx ) {
-		tmp.safeStrcpy ( rx );
-		expandRegExShortcuts ( &tmp );
-		m_hasupr = true;
-	}
-	if ( rx && regcomp ( &m_upr , tmp.getBufStart() ,
-			     REG_EXTENDED| // REG_ICASE|
-			     REG_NEWLINE ) ) { // |REG_NOSUB) ) {
-		// error!
-		log("coll: regcomp %s failed: %s. "
-		    "Ignoring.",
-		    rx,mstrerror(errno));
-		regfree ( &m_upr );
-		m_hasupr = false;
-	}
+	//char *x = "http://staticpages.diffbot.com/testCrawl/article1.html";
+	//if(m_hasupr && regexec(&m_upr,x,0,NULL,0) ) { char *xx=NULL;*xx=0; }

 	return true;
 }
--- a/Collectiondb.h
+++ b/Collectiondb.h
@ -95,7 +95,7 @@ class Collectiondb  {

 	// . does this requester have root admin privledges???
 	// . uses the root collection record!
-	bool isAdmin ( class HttpRequest *r , class TcpSocket *s );
+	//bool isAdmin ( class HttpRequest *r , class TcpSocket *s );

 	//collnum_t getNextCollnum ( collnum_t collnum );

@ -129,6 +129,7 @@ class Collectiondb  {
 	bool addRdbBaseToAllRdbsForEachCollRec ( ) ;
 	bool addRdbBasesForCollRec ( CollectionRec *cr ) ;

+	bool growRecPtrBuf ( collnum_t collnum ) ;
 	bool setRecPtr ( collnum_t collnum , CollectionRec *cr ) ;

 	// returns false if blocked, true otherwise. 
@ -138,7 +139,7 @@ class Collectiondb  {
 	//bool updateRec ( CollectionRec *newrec );
 	bool deleteRecs ( class HttpRequest *r ) ;

-	void deleteSpiderColl ( class SpiderColl *sc );
+	//void deleteSpiderColl ( class SpiderColl *sc );

 	// returns false if blocked, true otherwise. 
 	//bool resetColl ( char *coll , WaitEntry *we , bool purgeSeeds );
@ -310,10 +311,10 @@ class CollectionRec {
 	// . set ourselves the cgi parms in an http request
 	// . unspecified cgi parms will be assigned default values
 	// . returns false and sets errno on error
-	bool set ( class HttpRequest *r , TcpSocket *s );
+	bool set ( class HttpRequest *r , class TcpSocket *s );

 	// calls hasPermission() below
-	bool hasPermission ( class HttpRequest *r , TcpSocket *s ) ;
+	bool hasPermission ( class HttpRequest *r , class TcpSocket *s ) ;

 	// . does this user have permission for editing this collection?
 	// . "p" is the password for this collection in question
@ -326,7 +327,7 @@ class CollectionRec {
 	// . can this ip perform a search or add url on this collection?
 	// . mamma.com provides encapsulated ips of their queriers so we
 	//   can ban them by ip
-	bool hasSearchPermission ( TcpSocket *s , long encapIp = 0 );
+	bool hasSearchPermission ( class TcpSocket *s , long encapIp = 0 );

 	// how many bytes would this record occupy in raw binary format?
 	//long getStoredSize () { return m_recSize; };
@ -458,7 +459,7 @@ class CollectionRec {
 	char  m_enforceNewQuotas        ;
 	char  m_doIpLookups             ; // considered iff using proxy
 	char  m_useRobotsTxt            ;
-	char  m_restrictDomain          ; // say on same domain as seeds?
+	//char  m_restrictDomain          ; // say on same domain as seeds?
 	char  m_doTuringTest            ; // for addurl
 	char  m_applyFilterToText       ; // speeds us up
 	char  m_allowHttps              ; // read HTTPS using SSL
@ -640,7 +641,7 @@ class CollectionRec {
 	long    m_hasucr:1;
 	long    m_hasupr:1;

-	char    m_diffbotOnlyProcessIfNew;
+	char    m_diffbotOnlyProcessIfNewUrl;

 	//SafeBuf m_diffbotClassify;
 	//char m_diffbotClassify;
@ -678,6 +679,9 @@ class CollectionRec {
 	// for storing callbacks waiting in line for freshest crawl info
 	//SafeBuf m_callbackQueue;
 	
+	// list of url patterns to be indexed.
+	SafeBuf m_siteListBuf;
+	char m_spiderToo;

 	// . now the url regular expressions
 	// . we chain down the regular expressions
--- a/Conf.cpp
+++ b/Conf.cpp
@ -18,6 +18,7 @@ Conf::Conf ( ) {
 // . master admin can administer ALL collections
 // . use CollectionRec::hasPermission() to see if has permission
 //   to adminster one particular collection
+/*
 bool Conf::isMasterAdmin ( TcpSocket *s , HttpRequest *r ) {
 	// sometimes they don't want to be admin intentionally for testing
 	if ( r->getLong ( "master" , 1 ) == 0 ) return false;
@ -64,37 +65,84 @@ bool Conf::isMasterAdmin ( TcpSocket *s , HttpRequest *r ) {
 	// check admin ips
 	// scan the passwords
 	// MDW: no! too vulnerable to attacks!
-	/*
-	for ( long i = 0 ; i < m_numMasterPwds ; i++ ) {
-		if ( strcmp ( m_masterPwds[i], p ) != 0 ) continue;
-		// . matching one password is good enough now, default OR
-		// . because just matching an IP is good enough security,
-		//   there is really no need for both IP AND passwd match
-		return true;
-	}
-	*/
+	//for ( long i = 0 ; i < m_numMasterPwds ; i++ ) {
+	//	if ( strcmp ( m_masterPwds[i], p ) != 0 ) continue;
+	//	// . matching one password is good enough now, default OR
+	//	// . because just matching an IP is good enough security,
+	//	//   there is really no need for both IP AND passwd match
+	//	return true;
+	//}
 	// ok, make sure they came from an acceptable IP
-	if ( isAdminIp ( ip ) )
+	if ( isRootIp ( ip ) )
 		// they also have a matching IP, so they now have permission
 		return true;
 	// if no security, allow all
 	// MDW: nonononono!!!!
-	/*
-	if ( m_numMasterPwds == 0 && 
-	     m_numMasterIps  == 0   ) return true;
-	*/
+	//if ( m_numMasterPwds == 0 && 
+	//     m_numMasterIps  == 0   ) return true;
 	// if they did not match an ip or password, even if both lists
 	// are empty, do not allow access... this prevents security breeches
 	// by accident
 	return false;
 }
+*/
+
+bool Conf::isCollAdmin ( TcpSocket *socket , HttpRequest *hr ) {
+	// until we have coll tokens use this...
+	return isRootAdmin ( socket , hr );
+}
+
+// . is user a root administrator?
+// . only need to be from root IP *OR* have password, not both
+bool Conf::isRootAdmin ( TcpSocket *socket , HttpRequest *hr ) {
+
+	// totally open access?
+	if ( m_numConnectIps  <= 0 && m_numMasterPwds <= 0 )
+		return true;
+
+	// coming from root gets you in
+	if ( isRootIp ( socket->m_ip ) ) return true;
+
+	//if ( isConnectIp ( socket->m_ip ) ) return true;
+
+	if ( hasRootPwd ( hr ) ) return true;
+
+	return false;
+}
+
+
+bool Conf::hasRootPwd ( HttpRequest *hr ) {
+
+	if ( m_numMasterPwds == 0 ) return false;
+
+	char *p = hr->getString("pwd");
+
+	if ( ! p ) p = hr->getString("password");
+
+	if ( ! p ) p = hr->getStringFromCookie("pwd");
+
+	if ( ! p ) return false;
+
+	for ( long i = 0 ; i < m_numMasterPwds ; i++ ) {
+		if ( strcmp ( m_masterPwds[i], p ) != 0 ) continue;
+		// we got a match
+		return true;
+	}
+	return false;
+}

 // . check this ip in the list of admin ips
-bool Conf::isAdminIp ( unsigned long ip ) {
-	for ( long i = 0 ; i < m_numMasterIps ; i++ ) 
-		if ( m_masterIps[i] == (long)ip )
+bool Conf::isRootIp ( unsigned long ip ) {
+
+	//if ( m_numMasterIps == 0 ) return false;
+	if ( m_numConnectIps == 0 ) return false;
+
+	for ( long i = 0 ; i < m_numConnectIps ; i++ ) 
+		if ( m_connectIps[i] == (long)ip )
 			return true;
+
 	//if ( ip == atoip("10.5.0.2",8) ) return true;
+
 	// no match
 	return false;
 }
@ -124,8 +172,17 @@ bool Conf::init ( char *dir ) { // , long hostId ) {
 	g_parms.setToDefault ( (char *)this );
 	m_save = true;
 	char fname[1024];
-	if ( dir ) sprintf ( fname , "%sgb.conf", dir );
-	else       sprintf ( fname , "./gb.conf" );
+	if ( dir ) sprintf ( fname , "%slocalgb.conf", dir );
+	else       sprintf ( fname , "./localgb.conf" );
+	File f;
+	f.set ( fname );
+	m_isLocal = true;
+	if ( ! f.doesExist() ) {
+		m_isLocal = false;
+		if ( dir ) sprintf ( fname , "%sgb.conf", dir );
+		else       sprintf ( fname , "./gb.conf" );
+	}
+
 	// make sure g_mem.maxMem is big enough temporarily
 	if ( g_mem.m_maxMem < 10000000 ) g_mem.m_maxMem = 10000000;
 	bool status = g_parms.setFromFile ( this , fname , NULL );
@ -351,7 +408,9 @@ bool Conf::save ( ) {
 	bool status = g_parms.saveToXml ( (char *)this , fname );
 	if ( status ) {
 		char fname2[1024];
-		sprintf( fname2 , "%sgb.conf" , g_hostdb.m_dir );
+		char *local = "";
+		if ( m_isLocal ) local = "local";
+		sprintf( fname2 , "%s%sgb.conf" , g_hostdb.m_dir , local );
 		if(access(fname2, F_OK) == 0) unlink(fname2);
 		if(link(fname, fname2) == 0) {
 			unlink(fname);
--- a/Conf.h
+++ b/Conf.h
@ -24,7 +24,7 @@
 #include "Collectiondb.h"

 #define MAX_MASTER_IPS        15
-#define MAX_MASTER_PASSWORDS  10
+#define MAX_MASTER_PASSWORDS  5

 #define USERAGENTMAXSIZE      128

@ -49,9 +49,13 @@ class Conf {
 	
 	Conf();

-	bool isMasterAdmin  ( class TcpSocket *s , class HttpRequest *r );
-	bool isSpamAssassin ( class TcpSocket *s , class HttpRequest *r );
-	bool isAdminIp      ( unsigned long ip );
+	bool isCollAdmin ( TcpSocket *socket , HttpRequest *hr ) ;
+
+	bool isRootAdmin ( TcpSocket *socket , HttpRequest *hr ) ;
+	//bool isMasterAdmin  ( class TcpSocket *s , class HttpRequest *r );
+	//bool isSpamAssassin ( class TcpSocket *s , class HttpRequest *r );
+	bool hasRootPwd ( HttpRequest *hr ) ;
+	bool isRootIp      ( unsigned long ip );
 	bool isConnectIp    ( unsigned long ip );

 	// loads conf parms from this file "{dir}/gb.conf"
@ -94,6 +98,8 @@ class Conf {
 	// a core dump saving them
 	char m_save;

+	bool m_isLocal;
+
 	//director info (optional) (used iff m_isTrustedNet is false)
 	//public_key  m_dirPubKey;  // everyone should know director's pub key
 	//private_key m_dirPrivKey;   // this is 0 if we don't know it
@ -663,9 +669,10 @@ class Conf {

 	long m_numMasterPwds;
 	char m_masterPwds[MAX_MASTER_PASSWORDS][PASSWORD_MAX_LEN];
-	long m_numMasterIps;
-	long m_masterIps[MAX_MASTER_IPS];
+	//long m_numMasterIps;
+	//long m_masterIps[MAX_MASTER_IPS];

+	// these are the new master ips
 	long  m_numConnectIps;
 	long  m_connectIps [ MAX_CONNECT_IPS ];

--- a/Datedb.cpp
+++ b/Datedb.cpp
@ -145,10 +145,11 @@ bool Datedb::verify ( char *coll ) {
 	key_t endKey;
 	startKey.setMin();
 	endKey.setMax();
+	CollectionRec *cr = g_collectiondb.getRec(coll);
 	//long minRecSizes = 64000;
 	
 	if ( ! msg5.getList ( RDB_DATEDB    ,
-			      coll          ,
+			      cr->m_collnum ,
 			      &list         ,
 			      startKey      ,
 			      endKey        ,
--- a/Dates.cpp
+++ b/Dates.cpp
@ -1318,7 +1318,7 @@ sections. -- todo -- might be an alignment issue... check out later

 // . make a whole new set of urls for pub date detection
 // . grab that sample set from buzz wiki page
-// . record the correct pub date for urls in the "test" coll and make sure
+// . record the correct pub date for urls in the "qatest123" coll and make sure
 //   we get them each time, otherwise core dump!!
 // . check the date we extract with the rss feed. that is a good test too!
 //   report on that accuracy in the logs and on the stats page.
@ -2428,7 +2428,7 @@ bool Dates::setPart1 ( //char       *u        ,
 	//if ( m_nw != words->m_numWords ) { char *xx=NULL; *xx=0; }

 	// . get the current time in utc
-	// . NO! to ensure the "test" collection re-injects docs exactly
+	// . NO! to ensure the "qatest123" collection re-injects docs exactly
 	//   the same, use the spideredTime from the doc
 	// . we make sure to save this in the test subdir somehow..
 	//m_now      = nd->m_spideredTime; // getTimeSynced();
@ -3283,7 +3283,7 @@ bool Dates::setPart1 ( //char       *u        ,
 	//    DF_NOTCLOCK flags from this.

 	// . current time. sync'd with host #0 who uses ntp supposedly...! :(
-	// . to ensure that the "test" subdir re-injects docs exactly the
+	// . to ensure that the "qatest123" subdir re-injects docs exactly the
 	//   same, we need to use this date now
 	long now = nd->m_spideredTime; 
 	// how long has elapsed since we downloaded it last approx.?
@ -3294,7 +3294,8 @@ bool Dates::setPart1 ( //char       *u        ,
 	// might have been different than ours... actually i think our
 	// spiderdate.txt file had an older date in it from a previous round!
 	// so disable this when test spidering.
-	if ( elapsed<0 && g_conf.m_testSpiderEnabled && !strcmp(m_coll,"test"))
+	if ( elapsed<0 && g_conf.m_testSpiderEnabled && !strcmp(m_coll,
+								"qatest123"))
 		elapsed = 0;
 	// is true.
 	if ( elapsed < 0 ) { 
--- a/DiskPageCache.cpp
+++ b/DiskPageCache.cpp
@ -108,7 +108,10 @@ bool DiskPageCache::init ( const char *dbname ,
 			//   void (*rmVfd2)(DiskPageCache*, long) ) {
 	reset();

-	// fix cores while rebalancing
+	// seems like we lose data when it prints "Caught add breach"
+	// so let's stop using until we fix that... happens while we are
+	// dumping i think and somehow the data seems to get lost that
+	// we were dumping.
 	//maxMem = 0;

 	m_rdbId = rdbId;
--- a/Errno.cpp
+++ b/Errno.cpp
@ -167,6 +167,7 @@ case	EFAKEFIRSTIP: return "Fake firstIp";
 case	EBADHOSTSCONF: return "A hosts.conf is out of sync";
 case    EWAITINGTOSYNCHOSTSCONF: return "Wait to ensure hosts.conf in sync";
 case	EDOCNONCANONICAL: return "Url was dup of canonical page";
+case    ECUSTOMCRAWLMISMATCH: return "Job name/type mismatch. Job name has already been used for a crawl or bulk job.";
 	}
 	// if the remote error bit is clear it must be a regulare errno
 	//if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum );
--- a/Errno.h
+++ b/Errno.h
@ -170,6 +170,7 @@ enum {
 	EFAKEFIRSTIP,
 	EBADHOSTSCONF,
 	EWAITINGTOSYNCHOSTSCONF,
-	EDOCNONCANONICAL
+	EDOCNONCANONICAL,
+	ECUSTOMCRAWLMISMATCH  // a crawl request was made with a name that already existed for bulk request (or the other way around)
 };
 #endif
--- a/HashTableX.h
+++ b/HashTableX.h
@ -21,6 +21,9 @@ class HashTableX {
 		   char *allocName       ,
 		   bool  useKeyMagic = false );

+	// key size is 0 if UNinitialized
+	bool isInitialized ( ) { return (m_ks != 0); };
+
 	 HashTableX       ( );
 	~HashTableX       ( );
 	void constructor ();
@ -389,6 +392,10 @@ class HashTableX {
 	long getNumSlotsUsed ( ) { return m_numSlotsUsed; };
 	long getNumUsedSlots ( ) { return m_numSlotsUsed; };

+	bool isEmpty() { 
+		if ( m_numSlotsUsed == 0 ) return true;
+		return false; };
+
 	// how many are there total? used and unused.
 	long getNumSlots ( ) { return m_numSlots; };

--- a/Hostdb.cpp
+++ b/Hostdb.cpp
@ -2315,10 +2315,10 @@ uint32_t Hostdb::getShardNum ( char rdbId,void *k ) { // ,bool split ) {
 	else if ( rdbId == RDB_LINKDB || rdbId == RDB2_LINKDB2 ) {
 		return m_map [(*(uint16_t *)((char *)k + 26))>>3];	
 	}
-	else if ( rdbId == RDB_TFNDB || rdbId == RDB2_TFNDB2 ) {
-		unsigned long long d = g_tfndb.getDocId ( (key_t *)k );
-		return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
-	}
+	//else if ( rdbId == RDB_TFNDB || rdbId == RDB2_TFNDB2 ) {
+	//	unsigned long long d = g_tfndb.getDocId ( (key_t *)k );
+	//	return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
+	//}
 	else if ( rdbId == RDB_TITLEDB || rdbId == RDB2_TITLEDB2 ) {
 		unsigned long long d = g_titledb.getDocId ( (key_t *)k );
 		return m_map [ ((d>>14)^(d>>7)) & (MAX_KSLOTS-1) ];
--- a/Hostdb.h
+++ b/Hostdb.h
@ -53,7 +53,7 @@ enum {
 #define PFLAG_RECOVERYMODE   0x80

 // added slow disk reads to it, 4 bytes (was 52)
-#define MAX_PING_SIZE (44+4)
+#define MAX_PING_SIZE (44+4+4)

 #define HT_GRUNT   0x01
 #define HT_SPARE   0x02
@ -144,6 +144,8 @@ class Host {
 	// cpu usage
 	float          m_cpuUsage;

+	float          m_diskUsage;
+
 	long          m_slowDiskReads;

 	// doc count
--- a/HttpRequest.cpp
+++ b/HttpRequest.cpp
@ -6,8 +6,21 @@
 HttpRequest::HttpRequest () { m_cgiBuf = NULL; m_cgiBuf2 = NULL; reset(); }
 HttpRequest::~HttpRequest() { reset();      }

+char HttpRequest::getReplyFormat() {
+	if ( m_replyFormatValid ) return m_replyFormat;
+	char *fs = getString("format",NULL,NULL);
+	char fmt = FORMAT_HTML;
+	if ( fs && strcmp(fs,"html") == 0 ) fmt = FORMAT_HTML;
+	if ( fs && strcmp(fs,"json") == 0 ) fmt = FORMAT_JSON;
+	if ( fs && strcmp(fs,"xml") == 0 ) fmt = FORMAT_XML;
+	m_replyFormat = fmt;
+	m_replyFormatValid = true;
+	return m_replyFormat;
+}
+
 void HttpRequest::reset() {
 	m_numFields = 0;
+	m_replyFormatValid = false;
 	//if ( m_cgiBuf ) mfree ( m_cgiBuf , m_cgiBufMaxLen , "HttpRequest");
 	m_cgiBufLen    = 0;
 	m_cgiBuf       = NULL;
--- a/HttpRequest.h
+++ b/HttpRequest.h
@ -27,6 +27,16 @@
 #include "Url.h"       // Url class
 #include "TcpSocket.h"

+// values for HttpRequest::m_replyFormat
+#define FORMAT_HTML 1
+#define FORMAT_XML  2
+#define FORMAT_JSON 3
+#define FORMAT_CSV  4
+#define FORMAT_TXT  5
+#define FORMAT_PROCOG 6
+
+
+
 class HttpRequest {

 public:
@ -59,6 +69,11 @@ class HttpRequest {
 		//return m_buf;
 	};

+	// FORMAT_HTML FORMAT_JSON FORMAT_XML
+	char getReplyFormat();
+	bool m_replyFormatValid;
+	char m_replyFormat;
+
 	// get the referer field of the MIME header
 	char *getReferer () { return m_ref; };

--- a/HttpServer.cpp
+++ b/HttpServer.cpp
@ -986,6 +986,7 @@ bool HttpServer::sendReply ( TcpSocket  *s , HttpRequest *r , bool isAdmin) {
 	if ( strncmp(path,"/crawlbot",9) == 0 ) n = PAGE_CRAWLBOT;
 	if ( strncmp(path,"/v2/crawl",9) == 0 ) n = PAGE_CRAWLBOT;
 	if ( strncmp(path,"/v2/bulk" ,8) == 0 ) n = PAGE_CRAWLBOT;
+	if ( strncmp(path,"/v2/search" ,8) == 0 ) n = PAGE_RESULTS;
 	
 	bool isProxy = g_proxy.isProxy();
 	// . prevent coring
@ -1401,6 +1402,7 @@ bool HttpServer::sendReply2 ( char *mime,

 	// . store the login/logout links after <body> tag
 	// . only proxy should provide a non-null hr right now
+	/*
 	if ( hr ) {
 		long newReplySize;
 		char *newReply = g_proxy.storeLoginBar ( sendBuf, 
@ -1417,6 +1419,7 @@ bool HttpServer::sendReply2 ( char *mime,
 		sendBufSize  = newReplySize;
 		sendBufAlloc = newReplySize;
 	}
+	*/

 	// . send it away
 	// . this returns false if blocked, true otherwise
@ -1900,7 +1903,7 @@ long getMsgSize ( char *buf, long bufSize, TcpSocket *s ) {
 			max = 0x7fffffff; // maxOtherDocLen not available
 		// if post is a /cgi/12.cgi (tagdb) allow 10 megs
 		//if ( pp + 11 < ppend && strncmp ( pp ,"/cgi/12.cgi",11)==0)
-		if ( pp + 11 < ppend && strncmp ( pp ,"/master/tagdb",13)==0)
+		if ( pp + 12 < ppend && strncmp ( pp ,"/admin/tagdb",12)==0)
 			max = 10*1024*1024;
 		if ( pp + 4 < ppend && strncmp ( pp ,"/vec",4)==0)
 			max = 0x7fffffff;
--- a/Images.cpp
+++ b/Images.cpp
@ -221,7 +221,7 @@ bool Images::getThumbnail ( char *pageSite ,
 			    long  siteLen  ,
 			    long long docId ,
 			    XmlDoc *xd ,
-			    char *coll ,
+			    collnum_t collnum,//char *coll ,
 			    char **statusPtr ,
 			    long hopCount,
 			    void *state ,
@ -246,7 +246,7 @@ bool Images::getThumbnail ( char *pageSite ,
 	// save these
 	m_statusPtr = statusPtr;
 	// save this
-	m_coll  = coll;
+	m_collnum = collnum;
 	m_docId = docId;

 	// if no candidates, we are done, no error
@ -280,7 +280,7 @@ bool Images::getThumbnail ( char *pageSite ,
 	// store the termid
 	long long termId = q.getTermId(0);

-	if ( ! m_msg36.getTermFreq ( coll               ,
+	if ( ! m_msg36.getTermFreq ( m_collnum               ,
 				     0                  , // maxAge
 				     termId             ,
 				     this               ,
@ -340,7 +340,7 @@ bool Images::launchRequests ( ) {
 					0     , // maxAge
 					false , // addToCache?
 					RDB_INDEXDB ,
-					m_coll      ,
+					m_collnum      ,
 					&m_list     , // RdbList ptr
 					startKey    ,
 					endKey      ,
@ -414,6 +414,9 @@ bool Images::downloadImages () {
 		mfree ( m_imgBuf , m_imgBufMaxLen , "Image" );
 		m_imgBuf = NULL;
 	}
+
+	CollectionRec *cr = g_collectiondb.getRec(m_collnum);
+
 	// . download each leftover image
 	// . stop as soon as we get one with good dimensions
 	// . make a thumbnail of that one
@ -442,7 +445,7 @@ bool Images::downloadImages () {
 		r->reset();
 		r->m_maxTextDocLen  = 200000;
 		r->m_maxOtherDocLen = 500000;
-		if ( ! strcmp(m_coll,"test")) {
+		if ( ! strcmp(cr->m_coll,"qatest123")) {
 			r->m_useTestCache   = 1;
 			r->m_addToTestCache = 1;
 		}
--- a/Images.h
+++ b/Images.h
@ -41,7 +41,7 @@ class Images {
 			    long  siteLen  ,
 			    long long docId ,
 			    class XmlDoc *xd ,
-			    char *coll ,
+			    collnum_t collnum,
 			    char **statusPtr ,
 			    long hopCount,
 			    void   *state ,
@ -71,7 +71,7 @@ class Images {
 	bool      m_stopDownloading;
 	char    **m_statusPtr;
 	char      m_statusBuf[128];
-	char     *m_coll;
+	collnum_t m_collnum;

 	long long   m_docId;
 	IndexList   m_list;
--- a/Indexdb.cpp
+++ b/Indexdb.cpp
@ -202,9 +202,10 @@ bool Indexdb::verify ( char *coll ) {
 	startKey.setMin();
 	endKey.setMax();
 	//long minRecSizes = 64000;
+	CollectionRec *cr = g_collectiondb.getRec(coll);
 	
 	if ( ! msg5.getList ( RDB_INDEXDB   ,
-			      coll          ,
+			      cr->m_collnum ,
 			      &list         ,
 			      startKey      ,
 			      endKey        ,
@ -293,6 +294,7 @@ void Indexdb::deepVerify ( char *coll ) {
 	RdbBase *rdbBase = g_indexdb.m_rdb.getBase(collnum);
 	long numFiles = rdbBase->getNumFiles();
 	long currentFile = 0;
+	CollectionRec *cr = g_collectiondb.getRec(coll);
 	
 deepLoop:
 	// done after scanning all files
@ -304,7 +306,7 @@ deepLoop:
 	}
 	// scan this file
 	if ( ! msg5.getList ( RDB_INDEXDB   ,
-			      coll          ,
+			      cr->m_collnum ,
 			      &list         ,
 			      startKey      ,
 			      endKey        ,
@ -389,7 +391,7 @@ key_t Indexdb::makeKey ( long long          termId   ,

 // . accesses RdbMap to estimate size of the indexList for this termId
 // . returns an UPPER BOUND
-long long Indexdb::getTermFreq ( char *coll , long long termId ) {
+long long Indexdb::getTermFreq ( collnum_t collnum , long long termId ) {
 	// establish the list boundary keys
 	key_t startKey = makeStartKey ( termId );
 	key_t endKey   = makeEndKey   ( termId );
@ -403,7 +405,7 @@ long long Indexdb::getTermFreq ( char *coll , long long termId ) {
 	long oldTrunc = 100000;
 	// get maxKey for only the top "oldTruncLimit" docids because when
 	// we increase the trunc limit we screw up our extrapolation! BIG TIME!
-	maxRecs = m_rdb.getListSize(coll,startKey,endKey,&maxKey,oldTrunc )/6;
+	maxRecs=m_rdb.getListSize(collnum,startKey,endKey,&maxKey,oldTrunc )/6;
 	// . TRUNCATION NOW OBSOLETE
 	return maxRecs;
 	
@ -427,7 +429,7 @@ long long Indexdb::getTermFreq ( char *coll , long long termId ) {
 	// . modify maxKey
 	key_t midKey = g_indexdb.makeKey   ( termId , shy , 0LL , true );
 	// get # of recs that have this termId and score
-	long  lastChunk = m_rdb.getListSize(coll,
+	long  lastChunk = m_rdb.getListSize(collnum,
 					    midKey,endKey,&maxKey,oldTrunc)/ 6;
 	// now interpolate number of uncounted docids for the score "shy"
 	long remaining = (((long long)lastChunk) * lastDocId) / 
--- a/Indexdb.h
+++ b/Indexdb.h
@ -164,7 +164,7 @@ class Indexdb {
 	// . accesses RdbMap to estimate size of the indexList for this termId
 	// . returns a pretty tight upper bound if indexList not truncated
 	// . if truncated, it's does linear interpolation (use exponential!)
-	long long getTermFreq ( char *coll , long long termId ) ;
+	long long getTermFreq ( collnum_t collnum , long long termId ) ;

 	//long getTruncationLimit ( ){return g_conf.m_indexdbTruncationLimit;};

--- a/Json.cpp
+++ b/Json.cpp
@ -421,3 +421,23 @@ bool JsonItem::isInArray ( ) {
 	}
 	return false;
 }
+
+// convert nubers and bools to strings for this one
+char *JsonItem::getValueAsString ( long *valueLen ) {
+
+	// strings are the same
+	if ( m_type == JT_STRING ) {
+		*valueLen = getValueLen();
+		return getValue();
+	}
+
+	// numbers...
+	static char s_numBuf[64];
+	if ( (float)m_valueLong == m_valueDouble ) {
+		*valueLen = sprintf ( s_numBuf,"%li", m_valueLong );
+		return s_numBuf;
+	}
+
+	*valueLen = sprintf ( s_numBuf,"%f", m_valueDouble );
+	return s_numBuf;
+}
--- a/Json.h
+++ b/Json.h
@ -51,6 +51,9 @@ class JsonItem {
 		return (char *)this + sizeof(JsonItem);
 	};

+	// convert nubers and bools to strings for this one
+	char *getValueAsString ( long *valueLen ) ;
+
 	// like acme.product.offerPrice if "acme:{product:{offerprice:1.23}}"
 	bool getCompoundName ( SafeBuf &nameBuf ) ;

--- a/4
+++ b/4
@ -198,7 +198,3 @@ license that then you can arrange a licensing agreement with Matt Wells.

 Likewise, the Event datamining logic is in Events.cpp and must be separately licensed 
 as well.
-
-And any code in between "#ifdef NEEDLICENSE" and "#endif" statements is not
-covered by this license and must be licensed separately, too. That code is
-not compiled by default and only pertains to a few isolated things.
--- a/Linkdb.cpp
+++ b/Linkdb.cpp
@ -14,6 +14,7 @@ void Linkdb::reset() {
 }

 bool Linkdb::init ( ) {
+
 	key224_t  k;
 	// sanity tests
 	uint32_t    linkeeSiteHash32 = (uint32_t)rand();
@ -198,9 +199,10 @@ bool Linkdb::verify ( char *coll ) {
 	startKey.setMin();
 	endKey.setMax();
 	long minRecSizes = 64000;
+	CollectionRec *cr = g_collectiondb.getRec(coll);
 	
 	if ( ! msg5.getList ( RDB_LINKDB   ,
-			      coll          ,
+			      cr->m_collnum      ,
 			      &list         ,
 			      (char*)&startKey      ,
 			      (char*)&endKey        ,
@ -231,6 +233,8 @@ bool Linkdb::verify ( char *coll ) {
 	      list.skipCurrentRecord() ) {
 		key224_t k;
 		list.getCurrentKey((char*)&k);
+		// skip negative keys
+		if ( (k.n0 & 0x01) == 0x00 ) continue;
 		count++;
 		//uint32_t shardNum = getShardNum ( RDB_LINKDB , &k );
 		//if ( groupId == g_hostdb.m_groupId ) got++;
@ -393,7 +397,7 @@ key224_t Linkdb::makeKey_uk ( uint32_t  linkeeSiteHash32       ,

 //static void gotRootTitleRecWrapper25 ( void *state ) ;
 //static void gotTermFreqWrapper       ( void *state ) ;
-static void gotListWrapper           ( void *state );//, RdbList *list );
+static void gotListWrapper           ( void *state ,RdbList *list,Msg5 *msg5);
 static bool gotLinkTextWrapper       ( void *state );
 //static void sendLinkInfoReplyWrapper ( void *state );//, LinkInfo *info ) ;
 //static void gotReplyWrapper25        ( void *state , void *state2 ) ;
@ -404,7 +408,7 @@ Msg25::Msg25() {
 	// set minhopcount to unknown
 	//m_minInlinkerHopCount = -1;
 	m_numReplyPtrs = 0;
-	m_linkInfo = NULL;
+	//m_linkInfo = NULL;
 	m_ownReplies = true;
 }

@ -423,7 +427,7 @@ void Msg25::reset() {
 	//if ( m_linkInfo ) 
 	//	mfree ( m_linkInfo , m_linkInfo->getStoredSize(),"msg25s");
 	// this now points into m_linkInfoBuf safebuf, just NULL it
-	m_linkInfo = NULL;
+	//m_linkInfo = NULL;

 	m_table.reset();
 	m_ipTable.reset();
@ -435,12 +439,417 @@ void Msg25::reset() {
 #define MODE_PAGELINKINFO 1
 #define MODE_SITELINKINFO 2

-// . get the inlinkers to this SITE (any page on this site)
-// . use that to compute a site quality
-// . also get the inlinkers sorted by date and see how many good inlinkers
-//   we had since X days ago. (each inlinker needs a pub/birth date)
+// . we got a reply back from the msg25 request
+// . reply should just be a LinkInfo class
+// . set XmlDoc::m_linkInfoBuf safebuf to that reply
+// . we store tr to that safebuf in Msg25Request::m_linkInfoBuf
+void gotMulticastReplyWrapper25 ( void *state , void *state2 ) {
+
+	Msg25Request *req = (Msg25Request *)state;
+
+	// call callback now if error is set
+	if ( g_errno ) {
+		req->m_callback ( req->m_state );
+		return;
+	}
+
+	Multicast *mcast = req->m_mcast;
+
+	long  replySize;
+	long  replyMaxSize;
+	bool  freeit;
+	char *reply = mcast->getBestReply (&replySize,&replyMaxSize,&freeit);
+
+	// . store reply in caller's linkInfoBuf i guess
+	// . mcast should free the reply
+	req->m_linkInfoBuf->safeMemcpy ( reply , replySize );
+
+	// i guess we gotta free this
+	mfree ( reply , replySize , "rep25" );
+
+	req->m_callback ( req->m_state );
+}


+// . returns false if would block, true otherwise
+// . sets g_errno and returns true on launch error
+// . calls req->m_callback when ready if it would block
+bool getLinkInfo ( SafeBuf   *reqBuf              ,
+		   Multicast *mcast               ,
+		   char      *site                ,
+		   char      *url                 ,
+		   bool       isSiteLinkInfo      ,
+		   long       ip                  ,
+		   long long  docId               ,
+		   collnum_t  collnum             ,
+		   char      *qbuf,
+		   long       qbufSize,
+		   void      *state               ,
+		   void (* callback)(void *state) ,
+		   bool       isInjecting         ,
+		   SafeBuf   *pbuf                ,
+		   bool       printInXml          ,
+		   long       siteNumInlinks      ,
+		   LinkInfo  *oldLinkInfo         ,
+		   long       niceness            ,
+		   bool       doLinkSpamCheck     ,
+		   bool       oneVotePerIpDom     ,
+		   bool       canBeCancelled      ,
+		   long       lastUpdateTime      ,
+		   bool       onlyNeedGoodInlinks ,
+		   bool       getLinkerTitles     ,
+		   long       ourHostHash32       ,
+		   long       ourDomHash32        ,
+		   SafeBuf   *linkInfoBuf         ) {
+
+	long siteLen = gbstrlen(site);
+	long urlLen  = gbstrlen(url);
+
+	long oldLinkSize = 0;
+	if ( oldLinkInfo )
+		oldLinkSize = oldLinkInfo->getSize();
+
+	long need = sizeof(Msg25Request) + siteLen+1 + urlLen+1 + oldLinkSize;
+
+	// keep it in a safebuf so caller can just add "SafeBuf m_msg25Req;"
+	// to his .h file and not have to worry about freeing it.
+	reqBuf->purge();
+
+	// clear = true. put 0 bytes in there
+	if ( ! reqBuf->reserve ( need ,"m25req", true ) ) return true;
+
+	Msg25Request *req = (Msg25Request *)reqBuf->getBufStart();
+
+	req->m_linkInfoBuf = linkInfoBuf;
+
+	req->m_mcast = mcast;
+
+	req->ptr_site = site;
+	req->size_site = siteLen + 1;
+
+	req->ptr_url  = url;
+	req->size_url  = urlLen  + 1;
+
+	req->ptr_oldLinkInfo = (char *)oldLinkInfo;
+	if ( oldLinkInfo ) req->size_oldLinkInfo = oldLinkInfo->getSize();
+	else               req->size_oldLinkInfo = 0;
+
+	if ( isSiteLinkInfo ) req->m_mode = MODE_SITELINKINFO;
+	else                  req->m_mode = MODE_PAGELINKINFO;
+	
+	req->m_ip = ip;
+	req->m_docId = docId;
+	req->m_collnum = collnum;
+	req->m_state = state;
+	req->m_callback = callback;
+	req->m_isInjecting = isInjecting;
+	req->m_printInXml = printInXml;
+	req->m_siteNumInlinks = siteNumInlinks;
+	req->m_niceness = niceness;
+	req->m_doLinkSpamCheck = doLinkSpamCheck;
+	req->m_oneVotePerIpDom = oneVotePerIpDom;
+	req->m_canBeCancelled = canBeCancelled;
+	req->m_lastUpdateTime = lastUpdateTime;
+	req->m_onlyNeedGoodInlinks = onlyNeedGoodInlinks;
+	req->m_getLinkerTitles = getLinkerTitles;
+	req->m_ourHostHash32 = ourHostHash32;
+	req->m_ourDomHash32 = ourDomHash32;
+
+	if ( g_conf.m_logDebugLinkInfo )
+		req->m_printDebugMsgs = true;
+
+	Url u;
+	u.set ( req->ptr_url );
+
+	req->m_linkHash64 = (uint64_t)u.getUrlHash64();
+
+
+	req->m_siteHash32 = 0LL;
+	req->m_siteHash64 = 0LL;
+	if ( req->ptr_site ) {
+		// hash collection # in with it
+		long long h64 = hash64n ( req->ptr_site );
+		h64 = hash64 ((char *)&req->m_collnum,sizeof(collnum_t),h64);
+		req->m_siteHash64 = h64;
+		req->m_siteHash32 = hash32n ( req->ptr_site );
+	}
+
+	// send to host for local linkdb lookup
+	key224_t startKey ;
+	//long siteHash32 = hash32n ( req->ptr_site );
+	// access different parts of linkdb depending on the "mode"
+	if ( req->m_mode == MODE_SITELINKINFO )
+		startKey = g_linkdb.makeStartKey_uk ( req->m_siteHash32 );
+	else
+		startKey = g_linkdb.makeStartKey_uk (req->m_siteHash32,
+						     req->m_linkHash64 );
+	// what group has this linkdb list?
+	unsigned long shardNum = getShardNum ( RDB_LINKDB, &startKey );
+	// use a biased lookup
+	long numTwins = g_hostdb.getNumHostsPerShard();
+	long long sectionWidth = (0xffffffff/(long long)numTwins) + 1;
+	// these are 192 bit keys, top 32 bits are a hash of the url
+	unsigned long x = req->m_siteHash32;//(startKey.n1 >> 32);
+	long hostNum = x / sectionWidth;
+	long numHosts = g_hostdb.getNumHostsPerShard();
+	Host *hosts = g_hostdb.getShard ( shardNum); // Group ( groupId );
+	if ( hostNum >= numHosts ) { char *xx = NULL; *xx = 0; }
+	long hostId = hosts [ hostNum ].m_hostId ;
+
+	// . serialize the string buffers
+	// . use Msg25Request::m_buf[MAX_NEEDED]
+	// . turns the ptr_* members into offsets into req->m_buf[]
+	req->serialize();
+
+	// this should always block
+	if ( ! mcast->send ( 
+			    (char *)req ,
+			    req->getStoredSize() ,
+			    0x25 ,
+			    false        , // does multicast own request?
+			    shardNum ,
+			    false        , // send to whole group?
+			    0            , // key is passed on startKey
+			    req          , // state data
+			    NULL         , // state data
+			    gotMulticastReplyWrapper25 ,
+			    1000      , // timeout in seconds (was 30)
+			    req->m_niceness ,
+			    false, // realtime     ,
+			    hostId )) {// firstHostId  ,
+		log("linkdb: Failed to send multicast for %s err=%s",
+		    u.getUrl(),mstrerror(g_errno));
+		return true;
+	}
+
+	// wait for req->m_callback(req->m_state) to be called
+	return false;
+}
+
+HashTableX g_lineTable;
+
+static void sendReplyWrapper ( void *state ) {
+
+	long saved = g_errno;
+
+	Msg25 *m25 = (Msg25 *)state;
+	// the original request
+	Msg25Request *mr = m25->m_req25;
+	// get udp slot for sending back reply
+	UdpSlot *slot2 = mr->m_udpSlot;
+	// shortcut
+	SafeBuf *info = m25->m_linkInfoBuf;
+	// steal this buffer
+	char *reply1 = info->getBufStart();
+	long  replySize = info->length();
+	// sanity. no if collrec not found its 0!
+	if ( ! saved && replySize <= 0 ) { 
+		saved = g_errno = EBADENGINEER;
+		log("linkdb: sending back empty link text reply. did "
+		    "coll get deleted?");
+		//char *xx=NULL;*xx=0; }
+	}
+	// get original request
+	Msg25Request *req = (Msg25Request *)slot2->m_readBuf;
+	// sanity
+	if ( req->m_udpSlot != slot2 ) { char *xx=NULL;*xx=0;}
+	// if in table, nuke it
+	g_lineTable.removeKey ( &req->m_siteHash64 );
+
+ nextLink:
+
+	UdpSlot *udpSlot = req->m_udpSlot;
+
+	// update for next udpSlot
+	req = req->m_next;
+
+	// just dup the reply for each one
+	char *reply2 = (char *)mdup(reply1,replySize,"m25repd");
+
+	// error?
+	if ( saved || ! reply2 ) {
+		long err = saved;
+		if ( ! err ) err = g_errno;
+		if ( ! err ) { char *xx=NULL;*xx=0; }
+		g_udpServer.sendErrorReply(udpSlot,err);
+	}
+	else {
+		// send it back to requester
+		g_udpServer.sendReply_ass ( reply2 ,
+					    replySize ,
+					    reply2 ,
+					    replySize,
+					    udpSlot );
+	}
+
+	// if we had a link
+	if ( req ) goto nextLink;
+
+	// the destructor
+	mdelete ( m25 ,sizeof(Msg25),"msg25");
+	delete ( m25 );
+}
+
+
+void  handleRequest25 ( UdpSlot *slot , long netnice ) {
+
+	Msg25Request *req = (Msg25Request *)slot->m_readBuf;
+
+	req->deserialize();
+
+	// make sure this always NULL for our linked list logic
+	req->m_next = NULL;
+
+	// udp socket for sending back the final linkInfo in m_linkInfoBuf
+	// used by sendReply()
+	req->m_udpSlot = slot;
+
+	// set up the hashtable if our first time
+	if ( ! g_lineTable.isInitialized() )
+		g_lineTable.set ( 8,4,256,NULL,0,false,MAX_NICENESS,"lht25");
+
+	// . if already working on this same request, wait for it, don't
+	//   overload server with duplicate requests
+	// . hashkey is combo of collection, url, and m_mode
+	// . TODO: ensure does not send duplicate "page" link info requests
+	//   just "site" link info requests
+	long slotNum = -1;
+	bool isSiteLinkInfo = false;
+	if ( req->m_mode == MODE_SITELINKINFO ) {
+		slotNum = g_lineTable.getSlot ( &req->m_siteHash64 );
+		isSiteLinkInfo = true;
+	}
+
+	if ( slotNum >= 0 ) {
+		Msg25Request *head ;
+		head = *(Msg25Request **)g_lineTable.getValueFromSlot(slotNum);
+		if ( head->m_next ) 
+			req->m_next = head->m_next;
+		head->m_next = req;
+		// note it for debugging
+		log("build: msg25 request waiting in line for %s slot=0x%lx",
+		    req->ptr_url,(long)slot);
+		// we will send a reply back for this guy when done
+		// getting the reply for the head msg25request
+		return;
+	}
+
+	// make a new Msg25
+	Msg25 *m25;
+	try { m25 = new ( Msg25 ); }
+	catch ( ... ) {
+		g_errno = ENOMEM;
+		log("build: msg25: new(%i): %s", 
+		    sizeof(Msg25),mstrerror(g_errno));
+		g_udpServer.sendErrorReply ( slot , g_errno );
+		return;
+	}
+	mnew ( m25 , sizeof(Msg25) , "Msg25" );
+
+	if ( isSiteLinkInfo ) {
+		// add the initial entry
+		g_lineTable.addKey ( &req->m_siteHash64 , &req );
+	}
+
+	// point to a real safebuf here for populating with data
+	m25->m_linkInfoBuf = &m25->m_realBuf;
+
+	// set some new stuff. should probably be set in getLinkInfo2()
+	// but we are trying to leave that as unaltered as possible to
+	// try to reduce debugging.
+	m25->m_req25 = req;
+
+	// this should call our callback when done
+	if ( ! m25->getLinkInfo2 ( req->ptr_site ,
+				   req->ptr_url ,
+				   isSiteLinkInfo      ,
+				   req->m_ip ,
+				   req->m_docId ,
+				   req->m_collnum , // coll
+				   NULL, // qbuf
+				   0 , // qbufSize
+				   m25 , // state
+				   sendReplyWrapper , // CALLBACK!
+				   req->m_isInjecting         ,
+				   req->m_printDebugMsgs ,
+				   //XmlDoc *xd ,
+				   req->m_printInXml ,
+				   req->m_siteNumInlinks      ,
+				   (LinkInfo *)req->ptr_oldLinkInfo ,
+				   req->m_niceness            ,
+				   req->m_doLinkSpamCheck     ,
+				   req->m_oneVotePerIpDom     ,
+				   req->m_canBeCancelled      ,
+				   req->m_lastUpdateTime      ,
+				   req->m_onlyNeedGoodInlinks  ,
+				   req->m_getLinkerTitles ,
+				   req->m_ourHostHash32 ,
+				   req->m_ourDomHash32 ,
+				   m25->m_linkInfoBuf ) ) // SafeBuf 4 output
+		return;
+
+	if(m25->m_linkInfoBuf->getLength()<=0&&!g_errno){char *xx=NULL;*xx=0;}
+
+	if ( g_errno == ETRYAGAIN ) { char *xx=NULL;*xx=0; }
+
+	if ( g_errno )
+		log("linkdb: error getting linkinfo: %s",mstrerror(g_errno));
+
+	// it did not block... g_errno will be set on error so sendReply()
+	// should in that case send an error reply.
+	sendReplyWrapper ( m25 );
+}
+
+long Msg25Request::getStoredSize() {
+	return sizeof(Msg25Request) + size_url + size_site + size_oldLinkInfo;
+}
+
+// . fix the char ptrs for sending over the network
+// . use a for loop like we do in Msg20.cpp if we get too many strings
+void Msg25Request::serialize ( ) {
+
+	char *p = m_buf;
+
+	memcpy ( p , ptr_url , size_url );
+	ptr_url = (char *)(p - m_buf);
+	p += size_url;
+
+	memcpy ( p , ptr_site , size_site );
+	ptr_site = (char *)(p - m_buf);
+	p += size_site;
+
+	memcpy ( p , ptr_oldLinkInfo , size_oldLinkInfo );
+	ptr_oldLinkInfo = (char *)(p - m_buf);
+	p += size_oldLinkInfo;
+}
+
+void Msg25Request::deserialize ( ) {
+
+	char *p = m_buf;
+
+	ptr_url = p;
+	p += size_url;
+
+	if ( size_url == 0 ) ptr_url = NULL;
+
+	ptr_site = p;
+	p += size_site;
+
+	if ( size_site == 0 ) ptr_site = NULL;
+
+	ptr_oldLinkInfo = p;
+	p += size_oldLinkInfo;
+
+	if ( size_oldLinkInfo == 0 ) ptr_oldLinkInfo = NULL;
+}
+
+//////
+//
+// OLD interface below here. use the stuff above now so we can send
+// the request to a single host and multiple incoming requests can
+// wait in line, and we can set network bandwidth too.
+//
+/////

 // . returns false if blocked, true otherwise
 // . sets g_errno on error
@ -448,21 +857,23 @@ void Msg25::reset() {
 // . NOTE: make sure no input vars are on the stack in case we block
 // . reallyGetLinkInfo is set to false if caller does not want it but calls
 //   us anyway for some reason forgotten...
-bool Msg25::getLinkInfo ( char      *site                ,
+bool Msg25::getLinkInfo2( char      *site                ,
 			  char      *url                 ,
 			  // either MODE_PAGELINKINFO or MODE_SITELINKINFO
 			  bool       isSiteLinkInfo      ,
 			  long       ip                  ,
 			  long long  docId               ,
-			  char      *coll                ,
+			  //char      *coll                ,
+			  collnum_t collnum,
 			  char      *qbuf                ,
 			  long       qbufSize            ,
 			  void      *state               ,
 			  void (* callback)(void *state) ,
 			  bool       isInjecting         ,
-			  SafeBuf   *pbuf                ,
-			  XmlDoc *xd ,
-			  //bool     printInXml ,
+			  //SafeBuf   *pbuf                ,
+			  bool     printDebugMsgs ,
+			  //XmlDoc *xd ,
+			  bool     printInXml ,
 			  long       siteNumInlinks      ,
 			  //long       sitePop             ,
 			  LinkInfo  *oldLinkInfo         ,
@ -475,19 +886,26 @@ bool Msg25::getLinkInfo ( char      *site                ,
 			  bool       getLinkerTitles ,
 			  long       ourHostHash32 ,
 			  long       ourDomHash32 ,
+			  // put LinkInfo output class in here
 			  SafeBuf   *linkInfoBuf ) {

 	// reset the ip table
 	reset();
+
 	//long mode = MODE_PAGELINKINFO;
 	//m_printInXml = printInXml;
 	if ( isSiteLinkInfo ) m_mode = MODE_SITELINKINFO;
 	else                  m_mode = MODE_PAGELINKINFO;
-	m_xd = xd;
-	m_printInXml = false;
-	if ( m_xd ) m_printInXml = m_xd->m_printInXml;
+	//m_xd = xd;
+	//m_printInXml = false;
+	//if ( m_xd ) m_printInXml = m_xd->m_printInXml;
+	m_printInXml = printInXml;
+
+	if ( printDebugMsgs ) m_pbuf = &m_tmp;
+	else                  m_pbuf = NULL;
+
 	// sanity check
-	if ( ! coll ) { char *xx=NULL; *xx=0; }
+	//if ( ! coll ) { char *xx=NULL; *xx=0; }
 	m_onlyNeedGoodInlinks = onlyNeedGoodInlinks;
 	m_getLinkerTitles     = getLinkerTitles;
 	// save safebuf ptr, where we store the link info
@ -498,10 +916,10 @@ bool Msg25::getLinkInfo ( char      *site                ,
 	// must have a valid ip
 	//if ( ! ip || ip == -1 ) { char *xx = NULL; *xx = 0; }
 	// get collection rec for our collection
-	CollectionRec *cr = g_collectiondb.getRec ( coll );//, collLen );
+	CollectionRec *cr = g_collectiondb.getRec ( collnum );//, collLen );
 	// bail if NULL
 	if ( ! cr ) {
-		g_errno = ENOTFOUND;
+		g_errno = ENOCOLLREC;
 		log("build: No collection record found when getting "
 		    "link info.");
 		return true;
@ -532,7 +950,8 @@ bool Msg25::getLinkInfo ( char      *site                ,
 	m_linkSpamLinkdb      = 0;
 	//m_url                 = url;
 	m_docId               = docId;
-	m_coll                = coll;
+	//m_coll                = coll;
+	m_collnum = collnum;
 	//m_collLen             = collLen;
 	m_callback            = callback;
 	m_state               = state;
@ -545,7 +964,7 @@ bool Msg25::getLinkInfo ( char      *site                ,
 	m_qbufSize            = qbufSize;
 	m_isInjecting         = isInjecting;
 	m_oldLinkInfo         = oldLinkInfo;
-	m_pbuf                = pbuf;
+	//m_pbuf                = pbuf;
 	m_ip                  = ip;
 	m_top                 = iptop(m_ip);
 	m_lastUpdateTime      = lastUpdateTime;
@ -601,6 +1020,7 @@ bool Msg25::getLinkInfo ( char      *site                ,
 	// must have a valid ip
 	if ( ! ip || ip == -1 ) { //char *xx = NULL; *xx = 0; }
 		log("linkdb: no inlinks because ip is invalid");
+		g_errno = EBADENGINEER;
 		return true;
 	}

@ -651,7 +1071,7 @@ bool Msg25::doReadLoop ( ) {
 	long numFiles = -1;
 	// NO, DON't restrict because it will mess up the hopcount.
 	bool includeTree = true;
-
+	/*
 	// what group has this linkdb list?
 	//unsigned long groupId = getGroupId ( RDB_LINKDB , &startKey );
 	unsigned long shardNum = getShardNum ( RDB_LINKDB, &startKey );
@ -665,7 +1085,7 @@ bool Msg25::doReadLoop ( ) {
 	Host *hosts = g_hostdb.getShard ( shardNum); // Group ( groupId );
 	if ( hostNum >= numHosts ) { char *xx = NULL; *xx = 0; }
 	long hostId = hosts [ hostNum ].m_hostId ;
-
+	*/
 	// debug log
 	if ( g_conf.m_logDebugLinkInfo ) {
 		char *ms = "page";
@ -677,6 +1097,15 @@ bool Msg25::doReadLoop ( ) {

 	m_gettingList = true;

+	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
+	if ( ! cr ) {
+		log("linkdb: no coll for collnum %li",(long)m_collnum);
+		g_errno = ENOCOLLREC;
+		return true;
+	}
+
+	//char *coll = cr->m_coll;
+
 	// . get the linkdb list
 	// . we now get the WHOLE list so we can see how many linkers there are
 	// . we need a high timeout because udp server was getting suspended
@ -685,27 +1114,22 @@ bool Msg25::doReadLoop ( ) {
 	//   Now we hang indefinitely. We also fixed UdpServer to resend
 	//   requests after 30 seconds even though it was fully acked in case
 	//   the receiving host went down and is now back up.
-	if ( ! m_msg0.getList ( -1              , // hostId, -1 if none
-				0               , // hostId ip
-				0               , // hostId port
-				0               , // max cache age in seconds
-				false           , // addToCache?
+	if ( ! m_msg5.getList ( 
 				RDB_LINKDB      ,
-				m_coll          ,
+				cr->m_collnum          ,
 				&m_list         ,
 				(char*)&startKey,
 				(char*)&endKey  ,
 				m_minRecSizes   ,
+				includeTree     ,
+				false , // add to cache?
+				0 , // maxcacheage
+				0               , // startFileNum
+				numFiles        ,
 				this            ,
 				gotListWrapper  ,
 				m_niceness      ,
-				true            , // error correct?
-				includeTree     ,
-				true            , // do merge
-				hostId          , // firstHostId
-				0               , // startFileNum
-				numFiles        ,
-				60*60*24*365    )){// timeout of one year
+				true            )){ // error correct?
 		//log("debug: msg0 blocked this=%lx",(long)this);
 		return false;
 	}
@ -725,7 +1149,7 @@ bool Msg25::doReadLoop ( ) {
 	return gotList();
 }

-void gotListWrapper ( void *state ) { // , RdbList *list ) {
+void gotListWrapper ( void *state , RdbList *list , Msg5 *msg5 ) {
 	Msg25 *THIS = (Msg25 *) state;

 	//log("debug: entering gotlistwrapper this=%lx",(long)THIS);
@ -964,6 +1388,13 @@ bool Msg25::sendRequests ( ) {
 	if ( ourMax > MAX_MSG20_OUTSTANDING )
 		ourMax = MAX_MSG20_OUTSTANDING;

+	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
+	if ( ! cr ) {
+		log("linkdb: collnum %li is gone",(long)m_collnum);
+		return true;
+	}
+	//char *coll = cr->m_coll;
+
 	// if more than 300 sockets in use max this 1. prevent udp socket clog.
 	if ( g_udpServer.m_numUsedSlots >= 300 ) ourMax = 1;

@ -1204,8 +1635,9 @@ bool Msg25::sendRequests ( ) {
 			r-> ptr_linkee = m_site;
 			r->size_linkee = gbstrlen(m_site)+1; // include \0
 		}
-		r-> ptr_coll         = m_coll;
-		r->size_coll         = gbstrlen(m_coll) + 1; // include \0
+		//r-> ptr_coll         = coll;
+		//r->size_coll         = gbstrlen(coll) + 1; // include \0
+		r->m_collnum = cr->m_collnum;
 		r->m_docId           = docId;
 		r->m_expected        = true; // false;
 		r->m_niceness        = m_niceness;
@ -1532,6 +1964,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 		    mstrerror(g_errno),docId);
 		// this is a special case
 		if ( g_errno == ECANCELLED || 
+		     g_errno == ENOCOLLREC ||
 		     g_errno == ENOMEM     ||
 		     g_errno == ENOSLOTS    ) {
 			m_errors++;
@ -1826,7 +2259,8 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 			log("linkdb: recalling round=%li for %s=%s",
 			    m_round,ms,m_site);
 		}
-		// and re-call
+		// and re-call. returns true if did not block.
+		// returns true with g_errno set on error.
 		if ( ! doReadLoop() ) return false;
 		// it did not block!! wtf? i guess it read no more or
 		// launched no more requests.
@ -1898,11 +2332,18 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 		    ms,m_site,m_url,m_docId);
 	}

+	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
+	if ( ! cr ) {
+		log("linkdb: collnum %li is gone",(long)m_collnum);
+		return true;
+	}
+	char *coll = cr->m_coll;
+
 	// . this returns NULL and sets g_errno on error
 	// . returns an allocated ptr to a LinkInfo class
 	// . we are responsible for freeing
 	// . LinkInfo::getSize() returns the allocated size
-	m_linkInfo = makeLinkInfo ( m_coll            ,
+	makeLinkInfo ( coll            ,
 				    m_ip              ,
 				    m_siteNumInlinks  ,
 				    //m_sitePop         ,
@ -1919,7 +2360,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 				    this ,
 				    m_linkInfoBuf );
 	// return true with g_errno set on error
-	if ( ! m_linkInfo ) {
+	if ( ! m_linkInfoBuf->length() ) {
 		log("build: msg25 linkinfo set: %s",mstrerror(g_errno));
 		return true;
 	}
@ -1973,7 +2414,9 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 	char *ss = "site";
 	if ( m_mode == MODE_PAGELINKINFO ) ss = "page";

-	long siteRank = ::getSiteRank ( m_linkInfo->m_numGoodInlinks );
+	LinkInfo *info = (LinkInfo *)m_linkInfoBuf->getBufStart();
+
+	long siteRank = ::getSiteRank ( info->m_numGoodInlinks );

 	if ( m_printInXml ) { // && m_xd ) {

@ -1983,19 +2426,24 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 				   "</sampleCreatedUTC>\n"
 				   , m_lastUpdateTime
 				   );
-		char *u = NULL;
-		if ( m_xd ) u = m_xd->ptr_firstUrl;
+		//char *u = NULL;
+		//if ( m_xd ) u = m_xd->ptr_firstUrl;
+		// m_url should point into the Msg25Request buffer
+		char *u = m_url;
 		if ( u )
 			m_pbuf->safePrintf("\t<url><![CDATA[%s]]></url>\n",u);

-		char *site = NULL;
-		if ( m_xd ) site = m_xd->ptr_site;
+		//char *site = NULL;
+		//if ( m_xd ) site = m_xd->ptr_site;
+		// m_site should point into the Msg25Request buffer
+		char *site = m_site;
 		if ( site )
 			m_pbuf->safePrintf("\t<site><![CDATA[%s]]></site>\n",
 					   site);

-		long long d = 0LL;
-		if ( m_xd ) d = m_xd->m_docId;
+		//long long d = 0LL;
+		//if ( m_xd ) d = m_xd->m_docId;
+		long long d = m_docId;
 		if ( d && d != -1LL )
 			m_pbuf->safePrintf("\t<docId>%lli</docId>\n",d);
 			
@ -2018,7 +2466,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 				   // the total # of inlinkers. we may not have
 				   // read all of them from disk though.
 				   , m_numDocIds
-				   , m_linkInfo->m_numGoodInlinks
+				   , info->m_numGoodInlinks
 				   , m_cblocks
 				   , m_uniqueIps
 				   );
@ -2142,10 +2590,10 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 		for ( long j = 0 ; j < MAX_ENTRY_DOCIDS ; j++ ) {
 			if ( e->m_docIds[j] == -1LL ) break;
 			if ( ! m_printInXml )
-				m_pbuf->safePrintf ("<a href=\"/master/titledb"
+				m_pbuf->safePrintf ("<a href=\"/admin/titledb"
 						    "?c=%s&d=%lli\">"
 						    "%li</a> ",
-						    m_coll,e->m_docIds[j],j);
+						    coll,e->m_docIds[j],j);
 		}
 		if ( ! m_printInXml )
 			m_pbuf->safePrintf ( "&nbsp; </td></tr>\n" );
@ -2225,7 +2673,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 				   (long)m_ipDupsLinkdb   ,
 				   (long)m_docIdDupsLinkdb   ,
 				   (long)m_linkSpamLinkdb ,
-				   m_linkInfo->m_numGoodInlinks
+				   info->m_numGoodInlinks
 				   // good and max
 				   //(long)m_linkInfo->getNumInlinks() ,
 				   );
@ -2490,7 +2938,7 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
 		m_pbuf->safePrintf("<td><a href=\"/search?q=ip%%3A"
 				   "%s&c=%s&n=200\">%s</a></td>"  // ip
 				   , iptoa(r->m_ip)
-				   , m_coll
+				   , coll
 				   , iptoa(r->m_ip)
 				   );
 		m_pbuf->safePrintf("<td>%s</td>"
@ -3487,7 +3935,7 @@ LinkInfo *makeLinkInfo ( char        *coll                    ,
 	// . how many unique ips link to us?
 	// . this count includes internal IPs as well
 	info->m_numUniqueIps           = msg25->m_uniqueIps;
-	// keep things consistent for the "test" coll
+	// keep things consistent for the "qatest123" coll
 	info->m_reserved1              = 0;
 	info->m_reserved2              = 0;
 	// how many total GOOD inlinks we got. does not include internal cblock
@ -3551,6 +3999,7 @@ LinkInfo *makeLinkInfo ( char        *coll                    ,
 	// how many guys that we stored were internal?
 	info->m_numInlinksInternal = (char)icount3;

+	linkInfoBuf->setLength ( need );

 	// sanity parse it
 	//long ss = 0;
@ -4161,7 +4610,7 @@ bool LinkInfo::print ( SafeBuf *sb , char *coll ) {
 			       "<tr><td colspan=2>link #%04li "
 			       "("
 			       //"baseScore=%010li, "
-			       "d=<a href=\"/master/titledb?c=%s&"
+			       "d=<a href=\"/admin/titledb?c=%s&"
 			       "d=%lli\">%016lli</a>, "
 			       "siterank=%li, "
 			       "hopcount=%03li "
--- a/Linkdb.h
+++ b/Linkdb.h
@ -35,6 +35,123 @@
 #include "DiskPageCache.h"
 #include "Titledb.h"

+void  handleRequest25 ( UdpSlot *slot , long netnice ) ;
+
+// . get the inlinkers to this SITE (any page on this site)
+// . use that to compute a site quality
+// . also get the inlinkers sorted by date and see how many good inlinkers
+//   we had since X days ago. (each inlinker needs a pub/birth date)
+class Msg25Request {
+public:
+	// either MODE_PAGELINKINFO or MODE_SITELINKINFO
+	char       m_mode; // bool       m_isSiteLinkInfo    ;
+	long       m_ip                ;
+	long long  m_docId             ;
+	collnum_t  m_collnum           ;
+	bool       m_isInjecting       ;
+	bool       m_printInXml        ;
+
+	// when we get a reply we call this
+	void      *m_state               ;
+	void    (* m_callback)(void *state) ;
+
+	// server-side parms so it doesn't have to allocate a state
+	//SafeBuf    m_pbuf        ;
+	//SafeBuf    m_linkInfoBuf ;
+
+	//char    *coll              ;
+	//char    *qbuf              ;
+	//long     qbufSize          ;
+	//XmlDoc  *xd                ;
+
+	long       m_siteNumInlinks      ;
+	class LinkInfo  *m_oldLinkInfo         ;
+	long       m_niceness            ;
+	bool       m_doLinkSpamCheck     ;
+	bool       m_oneVotePerIpDom     ;
+	bool       m_canBeCancelled      ;
+	long       m_lastUpdateTime      ;
+	bool       m_onlyNeedGoodInlinks  ;
+	bool       m_getLinkerTitles ;
+	long       m_ourHostHash32 ;
+	long       m_ourDomHash32 ;
+
+	// new stuff
+	long       m_siteHash32;
+	long long  m_siteHash64;
+	long long  m_linkHash64;
+	// for linked list of these guys in g_lineTable in Linkdb.cpp
+	// but only used on the server end, not client end
+	class Msg25Request *m_next;
+	// the mutlicast we use
+	class Multicast *m_mcast;
+	UdpSlot *m_udpSlot;
+	bool m_printDebugMsgs;
+	// store final LinkInfo reply in here
+	SafeBuf   *m_linkInfoBuf;
+
+
+	char      *ptr_site;
+	char      *ptr_url;
+	char      *ptr_oldLinkInfo;
+
+	long       size_site;
+	long       size_url;
+	long       size_oldLinkInfo;
+
+	char m_buf[0];
+
+	long getStoredSize();
+	void serialize();
+	void deserialize();
+};
+
+// . returns false if blocked, true otherwise
+// . sets errno on error
+// . your req->m_callback will be called with the Msg25Reply
+bool getLinkInfo ( SafeBuf *reqBuf , // store msg25 request in here
+		   Multicast *mcast , // use this to send msg 0x25 request
+		   char      *site ,
+		   char      *url  ,
+		   bool       isSiteLinkInfo ,
+		   long       ip                  ,
+		   long long  docId               ,
+		   collnum_t collnum ,
+		   char      *qbuf                ,
+		   long       qbufSize            ,
+		   void      *state               ,
+		   void (* callback)(void *state) ,
+		   bool       isInjecting         ,
+		   SafeBuf   *pbuf                ,
+		   //class XmlDoc *xd ,
+		   bool printInXml ,
+		   long       siteNumInlinks      ,
+		   //long       sitePop             ,
+		   LinkInfo  *oldLinkInfo         ,
+		   long       niceness            ,
+		   bool       doLinkSpamCheck     ,
+		   bool       oneVotePerIpDom     ,
+		   bool       canBeCancelled      ,
+		   long       lastUpdateTime      ,
+		   bool       onlyNeedGoodInlinks  ,
+		   bool       getLinkerTitles , //= false ,
+		   // if an inlinking document has an outlink
+		   // of one of these hashes then we set
+		   // Msg20Reply::m_hadLinkToOurDomOrHost.
+		   // it is used to remove an inlinker to a related
+		   // docid, which also links to our main seo url
+		   // being processed. so we do not recommend
+		   // such links since they already link to a page
+		   // on your domain or hostname. set BOTH to zero
+		   // to not perform this algo in handleRequest20()'s
+		   // call to XmlDoc::getMsg20Reply().
+		   long       ourHostHash32 , // = 0 ,
+		   long       ourDomHash32 , // = 0 );
+		   SafeBuf *myLinkInfoBuf );
+
+
+void  handleRequest25 ( UdpSlot *slot , long netnice ) ;
+
 long getSiteRank ( long sni ) ;

 class Linkdb {
@ -307,19 +424,22 @@ class Msg25 {
 	//   any link text and return true right away, really saves a bunch 
 	//   of disk seeks when spidering small collections that don't need 
 	//   link text/info indexing/analysis
-	bool getLinkInfo ( char      *site ,
+	bool getLinkInfo2 (char      *site ,
 			   char      *url  ,
 			   bool       isSiteLinkInfo ,
 			   long       ip                  ,
 			   long long  docId               ,
-			   char      *coll                ,
+			   //char      *coll                ,
+			   collnum_t collnum,
 			   char      *qbuf                ,
 			   long       qbufSize            ,
 			   void      *state               ,
 			   void (* callback)(void *state) ,
 			   bool       isInjecting         ,
-			   SafeBuf   *pbuf                ,
-			   class XmlDoc *xd ,
+			   //SafeBuf   *pbuf                ,
+			   bool       printDebugMsgs , // into "Msg25::m_pbuf"
+			   //class XmlDoc *xd ,
+			   bool       printInXml ,
 			   long       siteNumInlinks      ,
 			   //long       sitePop             ,
 			   LinkInfo  *oldLinkInfo         ,
@ -363,17 +483,21 @@ class Msg25 {

 	//char getMinInlinkerHopCount () { return m_minInlinkerHopCount; };

+	// a new parm referencing the request we got over the network
+	class Msg25Request * m_req25;

 	class Msg20Reply *getLoser (class Msg20Reply *r, class Msg20Reply *p);
 	char             *isDup    (class Msg20Reply *r, class Msg20Reply *p);

 	bool addNote ( char *note , long noteLen , long long docId );

-	class LinkInfo *getLinkInfo () { return m_linkInfo; };
+	//class LinkInfo *getLinkInfo () { return m_linkInfo; };

 	// m_linkInfo ptr references into here. provided by caller.
 	SafeBuf *m_linkInfoBuf;

+	SafeBuf m_realBuf;
+
 	// private:
 	// these need to be public for wrappers to call:
 	bool gotTermFreq ( bool msg42Called ) ;
@ -409,9 +533,10 @@ class Msg25 {
 	bool       m_onlyNeedGoodInlinks;
 	bool       m_getLinkerTitles;
 	long long  m_docId;
-	char      *m_coll;
+	//char      *m_coll;
+	collnum_t m_collnum;
 	//long       m_collLen;
-	LinkInfo  *m_linkInfo;
+	//LinkInfo  *m_linkInfo;
 	void      *m_state;
 	void     (* m_callback) ( void *state );

@ -419,7 +544,7 @@ class Msg25 {
 	//long m_sitePop;
 	long m_mode;
 	bool m_printInXml;
-	class XmlDoc  *m_xd;
+	//class XmlDoc  *m_xd;

 	// private:

@ -437,7 +562,8 @@ class Msg25 {
 	// . the href: IndexList's docIds are docs that link to us
 	// . we now use Msg2 since it has "restrictIndexdb" support to limit
 	//   indexdb searches to just the root file to decrease disk seeks
-	Msg0  m_msg0;
+	//Msg0  m_msg0;
+	Msg5 m_msg5;
 	RdbList m_list;

 	class Inlink *m_k;
@ -499,7 +625,12 @@ class Msg25 {
 	// this is used for link ban checks
 	//Msg18     m_msg18;

-	SafeBuf  *m_pbuf;
+	SafeBuf   m_tmp;
+	SafeBuf  *m_pbuf; // will point to m_tmp if m_printDebugMsgs
+
+	// for holding the final linkinfo output
+	//SafeBuf m_linkInfoBuf;
+
 	// copied from CollectionRec
 	bool  m_oneVotePerIpDom           ;
 	bool  m_doLinkSpamCheck           ;
--- a/Make.depend
+++ b/Make.depend
--- a/19
+++ b/19
@ -2,11 +2,11 @@ SHELL = /bin/bash

 CC=g++

-OBJS =  Tfndb.o UdpSlot.o Rebalance.o \
+OBJS =  UdpSlot.o Rebalance.o \
 	Msg13.o Mime.o IndexReadInfo.o \
-	PageGet.o PageHosts.o PageIndexdb.o PageLogin.o \
+	PageGet.o PageHosts.o PageIndexdb.o \
 	PageParser.o PageInject.o PagePerf.o PageReindex.o PageResults.o \
-	PageRoot.o PageSockets.o PageStats.o \
+	PageAddUrl.o PageRoot.o PageSockets.o PageStats.o \
 	PageTitledb.o \
 	PageAddColl.o \
 	hash.o Domains.o \
@ -57,9 +57,10 @@ OBJS =  Tfndb.o UdpSlot.o Rebalance.o \
 	PostQueryRerank.o Msge0.o Msge1.o \
 	CountryCode.o DailyMerge.o CatRec.o Tagdb.o \
 	Users.o Images.o Wiki.o Wiktionary.o Scraper.o \
-	Dates.o Sections.o SiteGetter.o Syncdb.o \
+	Dates.o Sections.o SiteGetter.o Syncdb.o qa.o \
 	Placedb.o Address.o Test.o GeoIP.o GeoIPCity.o Synonyms.o \
-	Cachedb.o Monitordb.o dlstubs.o PageCrawlBot.o Json.o
+	Cachedb.o Monitordb.o dlstubs.o PageCrawlBot.o Json.o PageBasic.o
+

 CHECKFORMATSTRING = -D_CHECK_FORMAT_STRING_

@ -76,7 +77,8 @@ ifeq ("titan","$(HOST)")
 # in 2013. So it just uses clone() and does its own "threading". Unfortunately,
 # the way it works is not even possible on newer kernels because they no longer
 # allow you to override the _errno_location() function. -- matt
-CPPFLAGS = -m32 -g -Wall -pipe -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -static -DMATTWELLS -DNEEDLICENSE
+# -DMATTWELLS
+CPPFLAGS = -m32 -g -Wall -pipe -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -static -DTITAN
 LIBS = ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a
 else
 # use -m32 to force 32-bit mode compilation.
@ -326,8 +328,9 @@ Rdb.o:
 RdbBase.o:
 	$(CC) $(DEFS) $(CPPFLAGS) -O2 -c $*.cpp 

-RdbCache.o:
-	$(CC) $(DEFS) $(CPPFLAGS) -O2 -c $*.cpp 
+# RdbCache.cpp gets "corrupted" with -O2... like RdbTree.cpp
+#RdbCache.o:
+#	$(CC) $(DEFS) $(CPPFLAGS) -O2 -c $*.cpp 

 # fast dictionary generation and spelling recommendations
 #Speller.o:
--- a/Mem.cpp
+++ b/Mem.cpp
@ -14,7 +14,7 @@

 // put me back
 //#define EFENCE
-#define EFENCE_SIZE 100000
+//#define EFENCE_SIZE 50000

 // uncomment this for EFENCE to do underflow checks instead of the
 // default overflow checks
@ -52,7 +52,7 @@
 // there because it will hit a different PAGE, to be more sure we could
 // make UNDERPAD and OVERPAD PAGE bytes, although the overrun could still write
 // to another allocated area of memory and we can never catch it.
-#ifdef EFENCE
+#if defined(EFENCE) || defined(EFENCE_SIZE)
 #define UNDERPAD 0
 #define OVERPAD  0
 #else
@ -68,7 +68,7 @@ extern bool g_isYippy;

 bool freeCacheMem();

-#ifdef EFENCE
+#if defined(EFENCE) || defined(EFENCE_SIZE)
 static void *getElecMem ( long size ) ;
 static void  freeElecMem ( void *p  ) ;
 #endif
@ -254,6 +254,12 @@ void * operator new (size_t size) throw (std::bad_alloc) {
 	}
 #ifdef EFENCE
 	void *mem = getElecMem(size);
+#elif EFENCE_SIZE
+	void *mem;
+	if ( size > EFENCE_SIZE )
+		mem = getElecMem(size);
+	else
+		mem = sysmalloc ( size );
 #else
 	//void *mem = dlmalloc ( size );
 	void *mem = sysmalloc ( size );
@ -332,6 +338,12 @@ void * operator new [] (size_t size) throw (std::bad_alloc) {
 	}
 #ifdef EFENCE
 	void *mem = getElecMem(size);
+#elif EFENCE_SIZE
+	void *mem;
+	if ( size > EFENCE_SIZE )
+		mem = getElecMem(size);
+	else
+		mem = sysmalloc ( size );
 #else
 	//void *mem = dlmalloc ( size );
 	void *mem = sysmalloc ( size );
@ -445,10 +457,11 @@ bool Mem::init  ( long long maxMem ) {
 	if ( g_conf.m_detectMemLeaks )
 		log(LOG_INIT,"mem: Memory leak checking is enabled.");

-#ifdef EFENCE
+#if defined(EFENCE) || defined(EFENCE_SIZE)
 	log(LOG_INIT,"mem: using electric fence!!!!!!!");
 #endif

+#ifndef TITAN
 	// if we can't alloc 3gb exit and retry
 	long long start = gettimeofdayInMilliseconds();
 	char *pools[30];
@ -471,6 +484,7 @@ bool Mem::init  ( long long maxMem ) {
 	if ( took > 20 ) log("mem: took %lli ms to check memory ceiling",took);
 	// return if could not alloc the full 3GB
 	if ( i < 30 ) return false;
+#endif

 	// reset this, our max mem used over time ever because we don't
 	// want the mem test we did above to count towards it
@ -500,6 +514,15 @@ void Mem::addMem ( void *mem , long size , const char *note , char isnew ) {

 	//validate();

+	if ( (long)m_numAllocated + 100 >= (long)m_memtablesize ) { 
+		bool s_printed = false;
+		if ( ! s_printed ) {
+			log("mem: using too many slots");
+			printMem();
+			s_printed = true;
+		}
+	}
+
 	// sanity check
 	if ( g_inSigHandler ) {
 		log(LOG_LOGIC,"mem: In sig handler.");
@ -1284,7 +1307,7 @@ void *Mem::gbmalloc ( int size , const char *note ) {
 	mem = getElecMem(size+UNDERPAD+OVERPAD);

 	// conditional electric fence?
-#elif EFENCE_BIG
+#elif EFENCE_SIZE
 	if ( size >= EFENCE_SIZE )
 		mem = getElecMem(size+0+0);
 	else
@ -1435,9 +1458,9 @@ void *Mem::gbrealloc ( void *ptr , int oldSize , int newSize ,

 	char *mem;

-	// even though size may be < 100k for EFENCE_BIG, do it this way
+	// even though size may be < 100k for EFENCE_SIZE, do it this way
 	// for simplicity...
-#if defined(EFENCE) || defined(EFENCE_BIG)
+#if defined(EFENCE) || defined(EFENCE_SIZE)
 	mem = (char *)mmalloc ( newSize , note );
 	if ( ! mem ) return NULL;
 	// copy over to it
@ -1516,21 +1539,22 @@ void Mem::gbfree ( void *ptr , int size , const char *note ) {
 		char *xx = NULL; *xx = 0;
 	}

+	bool isnew = s_isnew[slot];
+
 #ifdef EFENCE
 	// this does a delayed free so do not call rmMem() just yet
 	freeElecMem ((char *)ptr - UNDERPAD );
 	return;
 #endif

-#ifdef EFENCE_BIG
+#ifdef EFENCE_SIZE
+	if ( size == -1 ) size = s_sizes[slot];
 	if ( size >= EFENCE_SIZE ) {
 		freeElecMem ((char *)ptr - 0 );
 		return;
 	}
 #endif	

-	bool isnew = s_isnew[slot];
-
 	// if this returns false it was an unbalanced free
 	if ( ! rmMem ( ptr , size , note ) ) return;

--- a/Monitordb.cpp
+++ b/Monitordb.cpp
@ -78,9 +78,10 @@ bool Monitordb::verify ( char *coll ) {
 	startKey.setMin();
 	endKey.setMax();
 	long minRecSizes = 64000;
-	
+	CollectionRec *cr = g_collectiondb.getRec(coll);
+
 	if ( ! msg5.getList ( RDB_MONITORDB   ,
-			      coll          ,
+			      cr->m_collnum,
 			      &list         ,
 			      (char*)&startKey      ,
 			      (char*)&endKey        ,
--- a/Msg0.cpp
+++ b/Msg0.cpp
@ -103,7 +103,8 @@ bool Msg0::getList ( long long hostId      , // host to ask (-1 if none)
 		     long      maxCacheAge , // max cached age in seconds
 		     bool      addToCache  , // add net recv'd list to cache?
 		     char      rdbId       , // specifies the rdb
-		     char     *coll        ,
+		     //char     *coll        ,
+		     collnum_t collnum ,
 		     RdbList  *list        ,
 		     //key_t     startKey    , 
 		     //key_t     endKey      , 
@ -143,7 +144,7 @@ bool Msg0::getList ( long long hostId      , // host to ask (-1 if none)
 	//if ( doIndexdbSplit ) 
 	//	logf(LOG_DEBUG,"net: doing msg0 with indexdb split true");
 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg0.");
+	if ( collnum < 0 ) log(LOG_LOGIC,"net: NULL collection. msg0.");

 	//if ( doIndexdbSplit ) { char *xx=NULL;*xx=0; }

@ -189,7 +190,7 @@ bool Msg0::getList ( long long hostId      , // host to ask (-1 if none)
 	KEYSET(m_endKey,endKey,m_ks);
 	m_minRecSizes   = minRecSizes;
 	m_rdbId         = rdbId;
-	m_coll          = coll;
+	m_collnum = collnum;//          = coll;
 	m_isRealMerge   = isRealMerge;
 	m_allowPageCache = allowPageCache;

@ -349,7 +350,7 @@ bool Msg0::getList ( long long hostId      , // host to ask (-1 if none)
 		*/
 		QUICKPOLL(m_niceness);
 		if ( ! m_msg5->getList ( rdbId,
-					 coll ,
+					 m_collnum ,
 					 m_list ,
 					 m_startKey ,
 					 m_endKey   ,
@ -462,7 +463,8 @@ skip:
 	KEYSET(p,m_startKey,m_ks);          ; p+=m_ks;
 	KEYSET(p,m_endKey,m_ks);            ; p+=m_ks;
 	// NULL terminated collection name
-	strcpy ( p , coll ); p += gbstrlen ( coll ); *p++ = '\0';
+	//strcpy ( p , coll ); p += gbstrlen ( coll ); *p++ = '\0';
+	*(collnum_t *)p = m_collnum; p += sizeof(collnum_t);
 	m_requestSize    = p - m_request;
 	// ask an individual host for this list if hostId is NOT -1
 	if ( m_hostId != -1 ) {
@ -957,7 +959,8 @@ void handleRequest0 ( UdpSlot *slot , long netnice ) {
 	char     *startKey           = p; p+=ks;
 	char     *endKey             = p; p+=ks;
 	// then null terminated collection
-	char     *coll               = p;
+	//char     *coll               = p;
+	collnum_t collnum = *(collnum_t *)p; p += sizeof(collnum_t);


 	// error set from XmlDoc::cacheTermLists()?
@ -1175,7 +1178,7 @@ void handleRequest0 ( UdpSlot *slot , long netnice ) {
 	// . return if this blocks
 	// . we'll call sendReply later
 	if ( ! st0->m_msg5.getList ( rdbId             ,
-				     coll              ,
+				     collnum           ,
 				     &st0->m_list      ,
 				     startKey          ,
 				     endKey            ,
--- a/Msg0.h
+++ b/Msg0.h
@ -36,7 +36,7 @@ bool getRecFromTermListCache ( char *coll,
 */

 //#define MSG0_REQ_SIZE (8 + 2 * sizeof(key_t) + 16 + 5 + MAX_COLL_LEN + 1 )
-#define MSG0_REQ_SIZE (8 + 2 * MAX_KEY_BYTES + 16 + 5 + MAX_COLL_LEN + 1 + 1 )
+#define MSG0_REQ_SIZE (8 + 2 * MAX_KEY_BYTES + 16 + 5 + 4 + 1 + 1 )

 class Msg0 {

@ -68,7 +68,8 @@ class Msg0 {
 		       long      maxCacheAge , // max cached age in seconds
 		       bool      addToCache  , // add net recv'd list to cache?
 		       char      rdbId       , // specifies the rdb
-		       char     *coll        ,
+		       //char     *coll        ,
+		       collnum_t collnum ,
 		       class RdbList  *list  ,
 		       //key_t     startKey    , 
 		       //key_t     endKey      , 
@ -106,7 +107,8 @@ class Msg0 {
 		       long      maxCacheAge , // max cached age in seconds
 		       bool      addToCache  , // add net recv'd list to cache?
 		       char      rdbId       , // specifies the rdb
-		       char     *coll        ,
+		       //char     *coll        ,
+		       collnum_t collnum ,
 		       class RdbList  *list  ,
 		       key_t     startKey    , 
 		       key_t     endKey      , 
@ -144,7 +146,7 @@ class Msg0 {
 				 maxCacheAge ,
 				 addToCache  ,
 				 rdbId       ,
-				 coll        ,
+				 collnum     ,
 				 list  ,
 				 (char *)&startKey    , 
 				 (char *)&endKey      , 
@ -256,7 +258,8 @@ class Msg0 {
 	char  m_endKey[MAX_KEY_BYTES];
 	long  m_minRecSizes ;
 	char  m_rdbId       ;
-	char *m_coll        ;
+	//char *m_coll        ;
+	collnum_t m_collnum;

 	class Msg5  *m_msg5 ;
 	class Msg5  *m_msg5b;
--- a/Msg1.cpp
+++ b/Msg1.cpp
@ -95,7 +95,7 @@ bool Msg1::addRecord ( char *rec ,
 		   sizeof(key_t));
 	return addList ( &m_tmpList ,
 			 rdbId ,
-			 g_collectiondb.m_recs[collnum]->m_coll ,
+			 collnum,//g_collectiondb.m_recs[collnum]->m_coll ,
 			 state ,
 			 callback ,
 			 false , // force local?
@ -111,7 +111,7 @@ bool Msg1::addRecord ( char *rec ,
 //   when the reply does come back we do NOT call the callback
 bool Msg1::addList ( RdbList      *list              ,
 		     char          rdbId             ,
-		     char         *coll              ,
+		     collnum_t collnum, // char         *coll              ,
 		     void         *state             ,
 		     void (* callback)(void *state)  ,
 		     bool          forceLocal        ,
@ -120,7 +120,7 @@ bool Msg1::addList ( RdbList      *list              ,
 		     bool          waitForReply      ,
 		     bool         *inTransit         ) {
 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg1.cpp.");
+	if ( collnum<0 ) log(LOG_LOGIC,"net: bad collection. msg1.cpp.");
 	// if list has no records in it return true
 	if ( ! list || list->isEmpty() ) return true;
 	// sanity check
@ -175,7 +175,7 @@ bool Msg1::addList ( RdbList      *list              ,
 		bool inTransit;
 		bool status = Y->addList ( &Y->m_ourList ,
 					   rdbId         ,
-					   coll          ,
+					   collnum       ,
 					   Y             , // state
 					   returnMsg1    , // callback
 					   forceLocal    ,
@ -205,7 +205,7 @@ bool Msg1::addList ( RdbList      *list              ,
 	// remember these vars
 	m_list          = list;
 	m_rdbId         = rdbId;
-	m_coll          = coll;
+	m_collnum       = collnum;
 	m_state         = state;
 	m_callback      = callback;
 	m_forceLocal    = forceLocal;
@ -451,6 +451,11 @@ bool Msg1::sendData ( unsigned long shardNum, char *listData , long listSize) {
 	*/
 	// if the data is being added to our group, don't send ourselves
 	// a msg1, if we can add it right now
+	// MDW: crap this is getting ETRYAGAIN and it isn't being tried again
+	// i guess and Spider.cpp fails to add to doledb but the doleiptable
+	// maintains a positive count, thereby hanging the spiders. let's
+	// just always go through multicast so it will auto-retry ETRYAGAIN
+	/*
 	bool sendToSelf = true;
 	if ( shardNum == getMyShardNum() &&
 	     ! g_conf.m_interfaceMachine ) {
@ -485,7 +490,8 @@ bool Msg1::sendData ( unsigned long shardNum, char *listData , long listSize) {
 		// if no error, no need to use a Msg1 UdpSlot for ourselves
 		if ( ! g_errno ) sendToSelf = false;
 		else {
-			log("rdb: msg1 had error: %s",mstrerror(g_errno));
+			log("rdb: msg1 coll=%s rdb=%s had error: %s",
+			    m_coll,rdb->m_dbname,mstrerror(g_errno));
 			// this is messing up generate catdb's huge rdblist add
 			// why did we put it in there??? from msg9b.cpp
 			//return true;
@ -497,16 +503,17 @@ bool Msg1::sendData ( unsigned long shardNum, char *listData , long listSize) {
 		     g_hostdb.getNumHostsPerShard() == 1 ) return true;
 	}
 skip:
+	*/
 	// . make an add record request to multicast to a bunch of machines
 	// . this will alloc new space, returns NULL on failure
 	//char *request = makeRequest ( listData, listSize, groupId , 
 	//m_rdbId , &requestLen );
-	long collLen = gbstrlen ( m_coll );
+	//long collLen = gbstrlen ( m_coll );
 	// . returns NULL and sets g_errno on error
 	// . calculate total size of the record
 	// . 1 byte for rdbId, 1 byte for flags,
 	//   then collection NULL terminated, then list
-	long requestLen = 1 + 1 + collLen + 1 + listSize ;
+	long requestLen = 1 + 1 + sizeof(collnum_t) + listSize ;
 	// make the request
 	char *request = (char *) mmalloc ( requestLen ,"Msg1" );
 	if ( ! request ) return true;
@ -518,16 +525,18 @@ skip:
 	if ( m_injecting ) *p |= 0x80;
 	p++;
 	// then collection name
-	memcpy ( p , m_coll , collLen );
-	p += collLen;
-	*p++ = '\0';
+	//memcpy ( p , m_coll , collLen );
+	//p += collLen;
+	//*p++ = '\0';
+	*(collnum_t *)p = m_collnum;
+	p += sizeof(collnum_t);
 	// sanity check
-	if ( collLen <= 0 ) {
-		log(LOG_LOGIC,"net: No collection specified for list add.");
-		//char *xx = NULL; *xx = 0;
-		g_errno = ENOCOLLREC;
-		return true;
-	}
+	//if ( collLen <= 0 ) {
+	//	log(LOG_LOGIC,"net: No collection specified for list add.");
+	//	//char *xx = NULL; *xx = 0;
+	//	g_errno = ENOCOLLREC;
+	//	return true;
+	//}
 	//if ( m_deleteRecs    ) request[1] |= 0x80;
 	//if ( m_overwriteRecs ) request[1] |= 0x40;
 	// store the list after coll
@ -573,7 +582,7 @@ skip:
 			    k    , // cache key
 			    RDB_NONE , // bogus rdbId
 			    -1    , // unknown minRecSizes read size
-			    sendToSelf ))
+			    true )) // sendToSelf ))
 		return false;

 	QUICKPOLL(m_niceness);
@ -675,8 +684,10 @@ void handleRequest1 ( UdpSlot *slot , long netnice ) {
 	else             injecting = false;
 	p++;
 	// then collection
-	char *coll = p;
-	p += gbstrlen (p) + 1;
+	//char *coll = p;
+	//p += gbstrlen (p) + 1;
+	collnum_t collnum = *(collnum_t *)p;
+	p += sizeof(collnum_t);
 	// . make a list from this data
 	// . skip over the first 4 bytes which is the rdbId
 	// . TODO: embed the rdbId in the msgtype or something...
@ -694,7 +705,7 @@ void handleRequest1 ( UdpSlot *slot , long netnice ) {
 	//log("msg1: handlerequest1 calling addlist niceness=%li",niceness);
 	//log("msg1: handleRequest1 niceness=%li",niceness);
 	// this returns false and sets g_errno on error
-	rdb->addList ( coll , &list , niceness);
+	rdb->addList ( collnum , &list , niceness);
 	// if titledb, add tfndb recs to map the title recs
 	//if ( ! g_errno && rdb == g_titledb.getRdb() && injecting ) 
 	//	updateTfndb ( coll , &list , true, 0);
--- a/Msg1.h
+++ b/Msg1.h
@ -59,7 +59,7 @@ class Msg1 {
 	// . when deleteRecs is true, the recs in the list are really just keys
 	bool addList ( RdbList  *list  ,
 		       char      rdbId ,
-		       char     *coll  ,
+		       collnum_t collnum, // char     *coll  ,
 		       void     *state ,
 		       void    (*callback)(void *state) ,
 		       bool      forceLocal    ,
@ -95,7 +95,8 @@ class Msg1 {

 	// rdb id to add to ( see Msg0::getRdb(char rdbId) )
 	char        m_rdbId;
-	char       *m_coll;
+	//char       *m_coll;
+	collnum_t m_collnum;

 	// groupId to send to (may be -1 if it's up to us to decide)
 	unsigned long m_groupId;
--- a/Msg13.cpp
+++ b/Msg13.cpp
@ -721,6 +721,25 @@ void downloadTheDocForReals ( Msg13Request *r ) {
 			"(compatible; MSIE 6.0; Windows 98; "
 			"Win 9x 4.90)" ;

+	// for bulk jobs avoid actual downloads of the page for efficiency
+	if ( r->m_isCustomCrawl == 2 ) {
+		char *s = 
+			"HTTP/1.0 200 (OK)\r\n"
+			"Content-Length: 0\r\n"
+			"Connection: Close\r\n"
+			"Content-Type: text/html\r\n\r\n";
+		long slen = gbstrlen(s);
+		long fakeBufSize = slen + 1;
+		char *fakeBuf = mdup ( s , fakeBufSize , "fkblk");
+		gotHttpReply2 ( r , 
+				fakeBuf,
+				fakeBufSize, // include \0
+				fakeBufSize, // allocsize
+				NULL ); // tcpsock
+		return;
+	}
+
+
 	// download it
 	if ( ! g_httpServer.getDoc ( r->m_url             ,
 				     r->m_urlIp           ,
@ -1390,7 +1409,7 @@ void passOnReply ( void *state , UdpSlot *slot ) {

 //
 //
-// . UTILITY FUNCTIONS for injecting into the "test" collection
+// . UTILITY FUNCTIONS for injecting into the "qatest123" collection
 // . we need to ensure that the web pages remain constant so we store them
 //
 //
@ -1400,7 +1419,7 @@ void passOnReply ( void *state , UdpSlot *slot ) {
 // . now that we are lower level in Msg13.cpp, set "ts" not "slot"
 bool getTestDoc ( char *u , TcpSocket *ts , Msg13Request *r ) {
 	// sanity check
-	//if ( strcmp(m_coll,"test") ) { char *xx=NULL;*xx=0; }
+	//if ( strcmp(m_coll,"qatest123") ) { char *xx=NULL;*xx=0; }
 	// hash the url into 64 bits
 	long long h = hash64 ( u , gbstrlen(u) );
 	// read the spider date file first
@ -1547,7 +1566,7 @@ bool addTestSpideredDate ( Url *u , long spideredTime , char *testDir ) {
 	return true;
 }

-// add it to our "test" subdir
+// add it to our "qatest123" subdir
 bool addTestDoc ( long long urlHash64 , char *httpReply , long httpReplySize ,
 		  long err , Msg13Request *r ) {

@ -1796,7 +1815,7 @@ long hasGoodDates ( char *content ,
 			NULL     , // tag rec
 			NULL     , // url
 			0        , // docid
-			NULL     , // coll
+			0     , // collnum
 			0        , // domhash32
 			0        , // ip
 			niceness ,
--- a/Msg13.h
+++ b/Msg13.h
@ -32,6 +32,8 @@ public:
 	// if doing spider compression, compute contentHash32 of document
 	// downloaded, and if it matches this then send back EDOCUNCHANGED
 	long  m_contentHash32;
+	// copy of CollectionRec::m_customCrawl, 0 1 for crawls or 2 for bulks
+	char m_isCustomCrawl;
 	// send back error ENOGOODDATE if it does not have one. but if
 	// harvestLinks is true, just send back a filtered list of links
 	long  m_requireGoodDate:1;
--- a/Msg17.cpp
+++ b/Msg17.cpp
@ -67,7 +67,8 @@ bool Msg17::getFromCache ( char   cacheId,
 			   key_t  key,
 			   char **recPtr,
 			   long  *recSize,
-			   char  *coll ,
+			   //char  *coll ,
+			   collnum_t collnum ,
 			   void  *state ,
 			   void (*callback) (void *state) ,
 			   long   niceness ,
@ -107,7 +108,7 @@ bool Msg17::getFromCache ( char   cacheId,
 	if ( c ) {
 		time_t cachedTime;
 		// return true if not found in our local cache
-		if ( ! c->getRecord ( coll      ,
+		if ( ! c->getRecord ( collnum   ,
 				      m_key     ,
 				      recPtr    ,
 				      recSize   ,
@ -148,7 +149,8 @@ bool Msg17::getFromCache ( char   cacheId,
 	*p++ = m_cacheId;
 	// the flag (0 means read request, 1 means store request)
 	*p++ = 0;
-	strcpy ( p , coll ); p += gbstrlen ( coll ) + 1;
+	memcpy ( p , &collnum, sizeof(collnum_t)); p += sizeof(collnum_t);
+	//strcpy ( p , coll ); p += gbstrlen ( coll ) + 1;
        // . send the request to the key host
 	// . this returns false and sets g_errno on error
 	// . now wait for 1 sec before timing out
@ -317,13 +319,14 @@ void handleRequest17 ( UdpSlot *slot , long niceness  ) {
 	// then 1-byte flag (0 means read request, 1 means store request)
 	char flag = *p++;
 	// NULL terminated collection name follows
-	char *coll = p; p += gbstrlen ( coll ) + 1 ;
+	//char *coll = p; p += gbstrlen ( coll ) + 1 ;
+	collnum_t collnum = *(collnum_t *)p; p += sizeof(collnum_t);

 	RdbCache *c = &g_genericCache[(int)cacheId];

 	// if flag is 1 then it is a request to store a compressed Msg40
 	if ( flag == 1 ) {
-		if ( ! c->addRecord ( coll ,
+		if ( ! c->addRecord ( collnum ,
 				      k, 
 				      p, 
 				      pend - p ) )
@ -338,7 +341,7 @@ void handleRequest17 ( UdpSlot *slot , long niceness  ) {
 	long   recSize;
 	time_t cachedTime;
 	// send back nothing if not in cache
-	if ( ! c->getRecord ( coll     ,
+	if ( ! c->getRecord ( collnum  ,
 			      k        ,
 			      &rec     ,
 			      &recSize ,
@ -386,7 +389,7 @@ bool Msg17::storeInCache ( char   cacheId ,
 			   key_t  key ,
 			   char  *recPtr ,
 			   long   recSize ,
-			   char  *coll ,
+			   collnum_t collnum, // char  *coll ,
 			   long   niceness ,
 			   long   timeout  ) {

@ -446,7 +449,8 @@ bool Msg17::storeInCache ( char   cacheId ,
 	// use "1" for a store request
 	*p++ = 1;
 	//char *coll = si->m_coll;
-	strcpy ( p , coll ); p += gbstrlen(coll) + 1; // includes '\0'
+	//strcpy ( p , coll ); p += gbstrlen(coll) + 1; // includes '\0'
+	memcpy ( p ,&collnum ,sizeof(collnum_t)); p += sizeof(collnum_t);

 	QUICKPOLL(niceness);

@ -466,7 +470,7 @@ bool Msg17::storeInCache ( char   cacheId ,
 		long avail = pend - p;
 		// save it
 		long saved = avail;
-		long clen = gbstrlen(coll);
+		//long clen = gbstrlen(coll);
 		// compress "tmp" into m_buf, but leave leading bytes
 		// for the key
 		int err = gbcompress ( (unsigned char *)p ,
@ -479,10 +483,10 @@ bool Msg17::storeInCache ( char   cacheId ,
 		if ( err != Z_OK ) { 
 			g_errno = ECOMPRESSFAILED; 
 			log("query: Compression of cache cacheId=%i "
-			    "failed err=%li avail=%li collLen=%li "
+			    "failed err=%li avail=%li collnum=%li "
 			    "recSize=%li.", 
 			    cacheId , (long)err ,
-			    saved , clen , recSize );
+			    saved , (long)collnum , recSize );
 			return true;
 		}
 	}
@ -506,7 +510,7 @@ bool Msg17::storeInCache ( char   cacheId ,
 	// if we are that host, store it ourselves right now
 	if ( host->m_hostId == g_hostdb.m_hostId ) {
 		RdbCache *c = &g_genericCache[(int)m_cacheId];
-		if ( ! c->addRecord ( coll ,
+		if ( ! c->addRecord ( collnum ,
 				      key  ,
 				      cacheRec     ,
 				      cacheRecSize ) )
--- a/Msg17.h
+++ b/Msg17.h
@ -53,7 +53,7 @@ class Msg17 {
 			    key_t  key ,
 			    char **recPtr ,
 			    long  *recSize ,
-			    char  *coll,
+			    collnum_t collnum,
 			    void  *state ,
 			    void (* callback) (void *state) ,
 			    long   niceness,
@ -83,7 +83,7 @@ class Msg17 {
 			    key_t key ,
 			    char *recPtr ,
 			    long  recSize ,
-			    char *coll ,
+			    collnum_t collnum,
 			    long  niceness ,
 			    long  timeout );

@ -116,7 +116,7 @@ class Msg17 {
 	// ptr to "it"
 	class Msg40 *m_msg40;

-	char m_request [ 12 + 1 + MAX_COLL_LEN + 1 ]; // key + flag + coll name
+	char m_request [ 12 + 1 + 8 ]; // key + flag + collnum_t
 };

 // allow our cache to be used by PageStats.cpp to display its stats
--- a/Msg2.cpp
+++ b/Msg2.cpp
@ -28,7 +28,7 @@ Msg2 *g_msg2;
 //   other termlists have a componentCode of -2. These are typically taken
 //   from the Query.cpp class.
 bool Msg2::getLists ( long     rdbId       ,
-		      char    *coll        ,
+		      collnum_t collnum , // char    *coll        ,
 		      long     maxAge      ,
 		      bool     addToCache  ,
 		      //QueryTerm *qterms ,
@ -53,7 +53,7 @@ bool Msg2::getLists ( long     rdbId       ,
 		      char     forceParitySplit    ,
 		      bool     checkCache          ) {
 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg2.");
+	if ( collnum < 0 ) log(LOG_LOGIC,"net: bad collection. msg2.");
 	if ( ! minRecSizes ) { 
 		g_errno = EBADENGINEER;
 		log(LOG_LOGIC,"net: MinRecSizes is NULL.");
@ -82,7 +82,7 @@ bool Msg2::getLists ( long     rdbId       ,
 	m_getComponents       = false;
 	m_rdbId               = rdbId;
 	m_addToCache          = addToCache;
-	m_coll                = coll;
+	m_collnum             = collnum;
 	m_restrictPosdb       = restrictPosdb;
 	m_forceParitySplit    = forceParitySplit;
 	m_checkCache          = checkCache;
@ -278,7 +278,7 @@ bool Msg2::getLists ( ) {
 		// . we now always compress the list for 2x faster transmits
 		if ( ! msg5->getList ( 
 					   m_rdbId         , // rdbid
-					   m_coll        ,
+					   m_collnum      ,
 					   &m_lists[m_i], // listPtr
 					   sk2,//&m_startKeys  [i*ks],
 					   ek2,//&m_endKeys    [i*ks],
@ -410,7 +410,7 @@ bool Msg2::getLists ( ) {
 		// start up the read. thread will wait in thread queue to 
 		// launch if too many threads are out.
 		if ( ! msg5->getList ( 	   m_rdbId         , // rdbid
-					   m_coll        ,
+					   m_collnum        ,
 					   &m_whiteLists[m_w], // listPtr
 					   &sk3,//&m_startKeys  [i*ks],
 					   &ek3,//&m_endKeys    [i*ks],
--- a/Msg2.h
+++ b/Msg2.h
@ -33,7 +33,7 @@ class Msg2 {
 	// . sets errno on error
 	// . "termIds/termFreqs" should NOT be on the stack in case we block
 	bool getLists ( long     rdbId       ,
-			char    *coll        ,
+			collnum_t collnum ,//char    *coll        ,
 			long     maxAge      ,
 			bool     addToCache  ,
 			//key_t   *startKeys   ,
@ -123,7 +123,8 @@ class Msg2 {
 	bool     m_getComponents;
 	char     m_rdbId;
 	bool     m_addToCache;
-	char    *m_coll;
+	//char    *m_coll;
+	collnum_t m_collnum;
 	bool     m_restrictPosdb;
 	long     m_compoundListMaxSize;
 	char     m_forceParitySplit;
--- a/Msg20.cpp
+++ b/Msg20.cpp
@ -14,6 +14,7 @@ void Msg20::constructor () {
 	m_r       = NULL;
 	m_inProgress = false;
 	m_launched = false;
+	m_ii = -1;
 	reset();
 	m_mcast.constructor();
 }
@ -22,6 +23,18 @@ void Msg20::destructor  () { reset(); m_mcast.destructor(); }

 #include "Process.h"

+void Msg20::freeReply() {
+	if ( ! m_r ) return;
+	// sometimes the msg20 reply carries an merged bffer from
+	// msg40 that is a constructed ptr_eventSummaryLines from a
+	// merge operation in msg40. this fixes the "merge20buf1" memory
+	// leak from Msg40.cpp
+	m_r->destructor();
+	if ( m_ownReply ) mfree ( m_r, m_replyMaxSize , "Msg20b"  );
+	m_r = NULL;
+
+}
+
 void Msg20::reset() { 
 	// not allowed to reset one in progress
 	if ( m_inProgress ) { 
@ -33,15 +46,12 @@ void Msg20::reset() {
 	m_launched = false;
 	if ( m_request && m_request   != m_requestBuf )
 		mfree ( m_request , m_requestSize  , "Msg20rb" );
-	// sometimes the msg20 reply carries an merged bffer from
-	// msg40 that is a constructed ptr_eventSummaryLines from a
-	// merge operation in msg40. this fixes the "merge20buf1" memory
-	// leak from Msg40.cpp
-	if ( m_r ) m_r->destructor();
-	if ( m_r && m_ownReply ) //&& (char *)m_r != m_replyBuf )
-		mfree ( m_r       , m_replyMaxSize , "Msg20b"  );
+	freeReply();
+	//if ( m_r ) m_r->destructor();
+	//if ( m_r && m_ownReply ) //&& (char *)m_r != m_replyBuf )
+	//	mfree ( m_r       , m_replyMaxSize , "Msg20b"  );
+	//m_r            = NULL; // the reply ptr
 	m_request      = NULL; // the request buf ptr
-	m_r            = NULL; // the reply ptr
 	m_gotReply     = false;
 	m_errno        = 0;
 	m_requestDocId = -1LL;
@ -268,6 +278,13 @@ void Msg20::gotReply ( UdpSlot *slot ) {
 	m_inProgress = false;
 	// sanity check
 	if ( m_r ) { char *xx = NULL; *xx = 0; }
+
+	// free our serialized request buffer to save mem
+	if ( m_request && m_request   != m_requestBuf ) {
+		mfree ( m_request , m_requestSize  , "Msg20rb" );
+		m_request = NULL;
+	}
+
 	// save error so Msg40 can look at it
 	if ( g_errno ) { 
 		m_errno = g_errno; 
@ -349,7 +366,7 @@ void handleRequest20 ( UdpSlot *slot , long netnice ) {
 	if ( nb != slot->m_readBufSize ) { char *xx = NULL; *xx = 0; }

 	// sanity check, the size include the \0
-	if ( req->size_coll <= 1 || *req->ptr_coll == '\0' ) {
+	if ( req->m_collnum < 0 ) {
 		log("query: Got empty collection in msg20 handler. FIX!");
 		char *xx =NULL; *xx = 0; 
 	}
--- a/Msg20.h
+++ b/Msg20.h
@ -121,6 +121,8 @@ class Msg20Request {
 	//   serialized using Address::serialize(), and all the start dates
 	//   from now onward
 	long       m_eventId                   ;
+	// we now use the numeric collection # and not the ptr_coll
+	collnum_t  m_collnum;
 	// set this to true when you pass in m_eventIdBits...
 	char       m_getEventSummary           ;
 	char       m_summaryMode               ;
@ -189,7 +191,7 @@ class Msg20Request {
 	char      *ptr_termFreqs     ;
 	char      *ptr_affWeights    ;
 	char      *ptr_linkee        ; // used by Msg25 for getting link text
-	char      *ptr_coll          ;
+	//char      *ptr_coll          ;
 	char      *ptr_imgUrl        ;
 	char      *ptr_displayMetas  ;

@ -206,7 +208,7 @@ class Msg20Request {
 	long       size_termFreqs    ;
 	long       size_affWeights   ;
 	long       size_linkee       ; // size includes terminating \0
-	long       size_coll         ; // size includes terminating \0
+	//long       size_coll         ; // size includes terminating \0
 	long       size_imgUrl       ;
 	long       size_displayMetas ; // size includes terminating \0

@ -309,6 +311,7 @@ public:
 	//long       m_numLikers           ;
        bool       m_datedbDateIsEstimated;
 	long       m_errno               ; // LinkInfo uses it for LinkTextRepl
+	collnum_t  m_collnum             ; // collection # we came from
 	char       m_sumFromDmoz         ; // unused
 	long       m_hostHash            ;
 	char       m_noArchive           ;
@ -334,7 +337,7 @@ public:
 	//long     m_numCatIds           ; // use size_catIds
 	//long     m_numIndCatIds        ; // use size_indCatIds
 	long       m_contentLen          ; // was m_docLen
-	//long       m_contentHash         ;
+	long       m_contentHash32       ;  // for deduping diffbot json objects streaming
 	//long     m_docSummaryScore     ;
 	//long     m_inSectionScore      ;
 	//float      m_proximityScore      ;
@ -780,9 +783,11 @@ class Msg20 {
 	// so we can alloc arrays of these using mmalloc()
 	void constructor ();
 	void destructor  ();
+	void freeReply   ();
 	void reset       ();

 	long m_hack;
+	long m_ii;

 	// is the reply in progress? if msg20 has not launched a request
 	// this is false. if msg20 received its reply, this is false. 
--- a/Msg22.cpp
+++ b/Msg22.cpp
@ -320,7 +320,7 @@ void handleRequest22 ( UdpSlot *slot , long netnice ) {
 	// get the request
 	Msg22Request *r = (Msg22Request *)slot->m_readBuf;
       // get this
-       char *coll = g_collectiondb.getCollName ( r->m_collnum );
+	//char *coll = g_collectiondb.getCollName ( r->m_collnum );

 	// sanity check
 	long  requestSize = slot->m_readBufSize;
@ -333,10 +333,10 @@ void handleRequest22 ( UdpSlot *slot , long netnice ) {

 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
 	RdbBase *tbase; 
-	if ( ! (tbase=getRdbBase(RDB_TITLEDB,coll) ) ) {
-		log("db: Could not get title rec in collection \"%s\" "
+	if ( ! (tbase=getRdbBase(RDB_TITLEDB,r->m_collnum) ) ) {
+		log("db: Could not get title rec in collection # %li "
 		    "because rdbbase is null.",
-		    coll);
+		    (long)r->m_collnum);
 		g_errno = EBADENGINEER;
 		us->sendErrorReply ( slot , g_errno ); 
 		return; 
@ -763,7 +763,7 @@ void gotUrlListWrapper ( void *state , RdbList *list , Msg5 *msg5 ) {
 	// . our file range should be solid
 	// . use 500 million for min recsizes to get all in range
 	if ( ! st->m_msg5.getList ( RDB_TITLEDB       ,
-				    coll              ,
+				    r->m_collnum ,
 				    &st->m_tlist      ,
 				    startKey          , // startKey
 				    endKey            , // endKey
--- a/Msg3.cpp
+++ b/Msg3.cpp
@ -63,7 +63,7 @@ void Msg3::reset() {
 //   in Sync class can just read from titledb*.dat files that were formed
 //   since the last sync point.
 bool Msg3::readList  ( char           rdbId         ,
-		       char          *coll          ,
+		       collnum_t collnum ,
 		       //key_t          startKey      , 
 		       //key_t          endKey        , 
 		       char          *startKeyArg      , 
@ -94,10 +94,10 @@ bool Msg3::readList  ( char           rdbId         ,
 	// reset m_alloc and data in all lists in case we are a re-call
 	reset();
 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg3.");
+	if ( collnum < 0 ) log(LOG_LOGIC,"net: NULL collection. msg3.");
 	// remember the callback
 	m_rdbId              = rdbId;
-	m_coll               = coll;
+	m_collnum = collnum;
 	m_callback           = callback;
 	m_state              = state;
 	m_niceness           = niceness;
@ -136,7 +136,7 @@ bool Msg3::readList  ( char           rdbId         ,
 	long max ;

 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
-	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;
+	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;

 	// if caller specified exactly
 	/*
@ -673,7 +673,7 @@ void doneScanningWrapper ( void *state ) {
 	// if we had an error, remember it
 	if ( g_errno ) { 
 		// get base, returns NULL and sets g_errno to ENOCOLLREC on err
-		RdbBase *base; base=getRdbBase(THIS->m_rdbId,THIS->m_coll);
+		RdbBase *base; base=getRdbBase(THIS->m_rdbId,THIS->m_collnum);
 		char *dbname = "NOT FOUND";
 		if ( base ) dbname = base->m_dbname;
 		long tt = LOG_WARN;
@ -783,7 +783,7 @@ bool Msg3::doneScanning ( ) {
 	}

 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
-	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;
+	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;

 	// this really slows things down because it blocks the cpu so
 	// leave it out for now
@ -964,7 +964,7 @@ bool Msg3::doneSleeping ( ) {
 	g_loop.unregisterSleepCallback(this,doneSleepingWrapper3);
 	// read again
 	if ( ! readList ( m_rdbId            ,
-			  m_coll             ,
+			  m_collnum          ,
 			  m_startKey         ,
 			  m_endKeyOrig       ,
 			  m_minRecSizesOrig  ,
--- a/Msg3.h
+++ b/Msg3.h
@ -50,7 +50,8 @@ class Msg3 {
 	//   by Msg5.cpp to constrain the endKey so it can read the recs
 	//   from the tree using that endKey, and not waste time.
 	bool readList  ( char           rdbId         ,
-			 char          *coll          ,
+			 //char          *coll          ,
+			 collnum_t collnum ,
 			 //key_t          startKey      , 
 			 //key_t          endKey        , 
 			 char          *startKey      , 
@ -110,7 +111,8 @@ class Msg3 {

 	// the rdb we're scanning for
 	char  m_rdbId;
-	char *m_coll;
+	//char *m_coll;
+	collnum_t m_collnum;

 	// the scan classes, 1 per file, used to read from that file
 	RdbScan *m_scans ; // [ MAX_RDB_FILES ];
--- a/Msg36.cpp
+++ b/Msg36.cpp
@ -36,7 +36,7 @@ bool Msg36::registerHandler ( ) {
 // . sets g_errno on error
 // . "termIds/termFreqs" should NOT be on the stack in case we block
 // . i based this on ../titledb/Msg23.cpp 
-bool Msg36::getTermFreq ( char      *coll       ,
+bool Msg36::getTermFreq ( collnum_t collnum , // char      *coll       ,
 			  long       maxAge     ,
 			  long long  termId     ,
 			  void      *state      ,
@ -53,7 +53,7 @@ bool Msg36::getTermFreq ( char      *coll       ,
 		return true;
 	}
 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"quota: msg36: NULL collection.");
+	if ( collnum < 0 ) log(LOG_LOGIC,"quota: msg36: bad collection.");
 	// no more quotas here!
 	if ( incCount || decCount ) { char *xx = NULL; *xx = 0; }
 	// sanity check
@ -117,7 +117,7 @@ bool Msg36::getTermFreq ( char      *coll       ,
 		//unsigned long i = ((unsigned long)groupId/*key*/) % numHosts;
 		// if it's us then no need to multicast to ourselves
 		//if(hosts[i].m_hostId==g_hostdb.m_hostId||g_conf.m_fullSplit) {
-		m_termFreq = g_posdb.getTermFreq ( coll , termId );
+		m_termFreq = g_posdb.getTermFreq ( collnum , termId );
 		// clear g_errno
 		g_errno = 0;
 		return true;
@ -134,7 +134,8 @@ bool Msg36::getTermFreq ( char      *coll       ,
 	if ( m_niceness ) *p |= 0x08;
 	p++;
 	*(long long *)p = termId ; p += sizeof(long long);
-	strcpy ( p , coll ); p += gbstrlen(coll) + 1; // copy includes \0
+	//strcpy ( p , coll ); p += gbstrlen(coll) + 1; // copy includes \0
+	*(collnum_t *)p = collnum; p += sizeof(collnum_t);

 	long timeout = 5;
 	//if ( incCount || decCount ) timeout = 9999999;
@ -339,12 +340,13 @@ void handleRequest36 ( UdpSlot *slot , long netnice ) {
 	//if ( *request & 0x04 ) decCount   = true;
 	if ( *request & 0x08 ) niceness   = MAX_NICENESS;
 	long long  termId = *(long long *) (request+1) ; 
-	char      *coll   = request + 8 + 1;
+	//char      *coll   = request + 8 + 1;
+	collnum_t collnum = *(collnum_t *)(request + 8 + 1);

 	// if there is no way this termlist size exceeds exactMax, then just
 	// return the approximation we got, saves on disk seeks
 	if ( ! exactCount ) {//&& ! incCount && ! decCount ) { //max<exactMax){
-		long long termFreq = g_posdb.getTermFreq(coll,termId);
+		long long termFreq = g_posdb.getTermFreq(collnum,termId);
 		// no need to malloc since we have the tmp buf
 		char *reply = slot->m_tmpBuf;
 		*(long long *)reply = termFreq ;
@ -355,7 +357,7 @@ void handleRequest36 ( UdpSlot *slot , long netnice ) {
 	}

 	// check our cache for this termid and collection, 
-	collnum_t collnum = g_collectiondb.getCollnum(coll);
+	//collnum_t collnum = g_collectiondb.getCollnum(coll);
 	if ( collnum < 0 ) {
 		g_errno = ENOCOLLREC;
 		log("quota: msg36: collection does not exist.");
@ -508,10 +510,10 @@ void callMsg5 ( State36 *st , key144_t startKey , key144_t endKey  ) {
 	// . TODO: if quota is over about 30 million docs for a particular site
 	//   then we will need to fix this code, cuz it only reads up to 
 	//   200MB (MRS) if the site: termlist
-	char *coll = g_collectiondb.getCollName ( st->m_collnum );
+	//char *coll = g_collectiondb.getCollName ( st->m_collnum );
 	//log (LOG_WARN,"build: getting frequency from disk");
 	if ( ! st->m_msg5.getList ( RDB_POSDB    ,
-				    coll           ,
+				    st->m_collnum           ,
 				    &st->m_list    ,
 				    &startKey       ,
 				    &endKey         ,
--- a/Msg36.h
+++ b/Msg36.h
@ -28,7 +28,7 @@ class Msg36 {
 	// . sets errno on error
 	// . "termFreq" should NOT be on the stack in case we block
 	// . sets *termFreq to UPPER BOUND on # of records with that "termId"
-	bool getTermFreq ( char       *coll       ,
+	bool getTermFreq ( collnum_t collnum,//char       *coll       ,
 			   long        maxAge     ,
 			   long long   termId     ,
 			   void       *state      ,
--- a/Msg37.cpp
+++ b/Msg37.cpp
@ -9,7 +9,7 @@ static void gotTermFreqWrapper ( void *state ) ;
 // . "termIds/termFreqs" should NOT be on the stack in case we block
 // . i based this on ../titled/Msg25.cpp since it sends out multiple msgs at 
 //   the same time, too
-bool Msg37::getTermFreqs ( char       *coll       ,
+bool Msg37::getTermFreqs ( collnum_t collnum,//char       *coll       ,
 			   long        maxAge     ,
 			   long long  *termIds    ,
 			   long        numTerms   ,
@ -20,7 +20,7 @@ bool Msg37::getTermFreqs ( char       *coll       ,
 			   bool        exactCount ) {

 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg37.");
+	if ( collnum < 0 ) log(LOG_LOGIC,"net: bad collection. msg37.");
 	// we haven't got any responses as of yet or sent any requests
 	m_callback    = callback;
 	m_state       = state;
@ -31,7 +31,8 @@ bool Msg37::getTermFreqs ( char       *coll       ,
 	m_errno       = 0;
 	m_numTerms    = numTerms;
 	m_termFreqs   = termFreqs;
-	m_coll        = coll;
+	m_collnum     = collnum;
+	//m_coll        = coll;
 	m_maxAge      = maxAge;
 	m_termIds     = termIds;
 	// set all to 1 in case there's an error
@ -84,7 +85,7 @@ bool Msg37::launchRequests ( ) {
 		m_msg36[j].m_i    = m_i;
 		// . start up a Msg36 to get it
 		// . this will return false if blocks
-		if ( ! m_msg36[j].getTermFreq ( m_coll ,
+		if ( ! m_msg36[j].getTermFreq ( m_collnum ,
 						m_maxAge ,
 						m_termIds[m_i] ,
 						&m_msg36[j],
--- a/Msg37.h
+++ b/Msg37.h
@ -22,7 +22,7 @@ class Msg37 {
 	// . returns false if blocked, true otherwise
 	// . sets errno on error
 	// . "termIds/termFreqs" should NOT be on the stack in case we block
-	bool getTermFreqs ( char       *coll       ,
+	bool getTermFreqs ( collnum_t collnum ,
 			    long        maxAge     ,
 			    long long  *termIds    ,
 			    long        numTermIds ,
@ -58,7 +58,7 @@ class Msg37 {

 	bool  m_exactCount;

-	char *m_coll;
+	collnum_t m_collnum;

 	long        m_maxAge;
 	long long  *m_termIds ;
--- a/Msg39.cpp
+++ b/Msg39.cpp
@ -151,7 +151,7 @@ void Msg39::getDocIds ( UdpSlot *slot ) {
 	// deserialize it before we do anything else
 	long finalSize = deserializeMsg ( sizeof(Msg39Request) ,
 					  &m_r->size_readSizes ,
-					  &m_r->size_coll ,
+					  &m_r->size_whiteList,//coll ,
 					  &m_r->ptr_readSizes,
 					  m_r->m_buf );

@ -176,15 +176,17 @@ void Msg39::getDocIds2 ( Msg39Request *req ) {
 	if ( g_conf.m_logTimingQuery ) m_debug = true;

        // ensure it's size is ok
-        if ( m_r->size_coll <= 0 ) {
+	/*
+        if ( m_r->size_whiteList <= 0 ) {
 		g_errno = ENOCOLLREC;
 		log(LOG_LOGIC,"query: msg39: getDocIds: %s." , 
 		    mstrerror(g_errno) );
 		sendReply ( m_slot , this , NULL , 0 , 0 , true );
 		return ; 
 	}
+	*/

-        CollectionRec *cr = g_collectiondb.getRec ( m_r->ptr_coll );
+        CollectionRec *cr = g_collectiondb.getRec ( m_r->m_collnum );
        if ( ! cr ) {
 		g_errno = ENOCOLLREC;
 		log(LOG_LOGIC,"query: msg39: getDocIds: %s." , 
@ -541,7 +543,7 @@ bool Msg39::getLists () {
 			     "component=%li "
 			     "otermLen=%li "
 			     "isSynonym=%li "
-			     "querylangid=%li ",
+			     "querylangid=%li " ,
 			     (long)this ,
 			     i          ,
 			     qt->m_term,//bb ,
@ -567,7 +569,7 @@ bool Msg39::getLists () {
 			     (long)m_tmpq.m_componentCodes[i],
 			     (long)m_tmpq.getTermLen(i) ,
 			     isSynonym,
-			     (long)m_tmpq.m_langId); // ,tt
+			     (long)m_tmpq.m_langId ); // ,tt
 			// put it back
 			*tpc = tmp;
 			if ( st ) {
@ -614,7 +616,7 @@ bool Msg39::getLists () {
 	long split = g_hostdb.m_myHost->m_shardNum;
 	// call msg2
 	if ( ! m_msg2.getLists ( rdbId                      ,
-				 m_r->ptr_coll              ,
+				 m_r->m_collnum,//m_r->ptr_coll              ,
 				 m_r->m_maxAge              ,
 				 m_r->m_addToCache          ,
 				 //m_tmpq.m_qterms ,
@ -659,6 +661,7 @@ void gotListsWrapper ( void *state ) {
 	Msg39 *THIS = (Msg39 *) state;
 	// . hash the lists into our index table
 	// . this will send back a reply or recycle and read more list data
+
 	if ( ! THIS->gotLists ( true ) ) return;

 	// . if he did not block and there was an errno we send reply
@ -669,6 +672,12 @@ void gotListsWrapper ( void *state ) {
 		log("msg39: sending back error reply = %s",mstrerror(g_errno));
 		sendReply ( THIS->m_slot , THIS , NULL , 0 , 0 ,true);
 	}
+
+	// no, block? call the docid split loop
+	//if ( numDocIdSplits <= 1 ) return;
+
+	// if we get the lists and processed them without blocking, repeat!
+	THIS->doDocIdSplitLoop();
 }

 // . now come here when we got the necessary index lists
@ -677,6 +686,7 @@ void gotListsWrapper ( void *state ) {
 bool Msg39::gotLists ( bool updateReadInfo ) {
 	// bail on error
 	if ( g_errno ) { 
+	hadError:
 		log("msg39: Had error getting termlists: %s.",
 		    mstrerror(g_errno));
 		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
@ -694,6 +704,13 @@ bool Msg39::gotLists ( bool updateReadInfo ) {
 	// breathe
 	QUICKPOLL ( m_r->m_niceness );

+	// ensure collection not deleted from under us
+	CollectionRec *cr = g_collectiondb.getRec ( m_r->m_collnum );
+	if ( ! cr ) {
+		g_errno = ENOCOLLREC;
+		goto hadError;
+	}
+
 	// . set the IndexTable so it can set it's score weights from the
 	//   termFreqs of each termId in the query
 	// . this now takes into account the special termIds used for sorting
@ -707,7 +724,7 @@ bool Msg39::gotLists ( bool updateReadInfo ) {
 			    m_debug              ,
 			    this                   ,
 			    &m_tt                  ,
-			    m_r->ptr_coll          , 
+			    m_r->m_collnum,//ptr_coll          , 
 			    &m_msg2 , // m_lists                ,
 			    //m_tmpq.m_numTerms      , // m_numLists
 			    m_r                              );
@ -743,10 +760,25 @@ bool Msg39::gotLists ( bool updateReadInfo ) {
 	// . now we must call this separately here, not in allocTopTree()
 	// . we have to re-set the QueryTermInfos with each docid range split
 	//   since it will set the list ptrs from the msg2 lists
-	if ( m_r->m_useNewAlgo && ! m_posdbTable.setQueryTermInfo () ) {
-		return true;
+	if ( ! m_posdbTable.setQueryTermInfo () ) return true;
+
+	// print query term bit numbers here
+	for ( long i = 0 ; 
+	      m_debug && i < m_tmpq.getNumTerms() ; i++ ) {
+		QueryTerm *qt = &m_tmpq.m_qterms[i];
+		//utf16ToUtf8(bb, 256, qt->m_term, qt->m_termLen);
+		char *tpc = qt->m_term + qt->m_termLen;
+		char  tmp = *tpc;
+		*tpc = '\0';
+		SafeBuf sb;
+		sb.safePrintf("query: msg39: BITNUM query term #%li \"%s\" "
+			      "bitnum=%li ", i , qt->m_term, qt->m_bitNum );
+		// put it back
+		*tpc = tmp;
+		logf(LOG_DEBUG,"%s",sb.getBufStart());
 	}

+
 	// timestamp log
 	if ( m_debug ) {
 		log(LOG_DEBUG,"query: msg39: [%lu] Preparing to intersect "
@ -777,7 +809,8 @@ bool Msg39::gotLists ( bool updateReadInfo ) {

 	// . create the thread
 	// . only one of these type of threads should be launched at a time
-	if ( g_threads.call ( INTERSECT_THREAD  , // threadType
+	if ( ! m_debug &&
+	     g_threads.call ( INTERSECT_THREAD  , // threadType
 			      m_r->m_niceness   ,
 			      this              , // top 4 bytes must be cback
 			      threadDoneWrapper ,
@ -806,6 +839,7 @@ bool Msg39::gotLists ( bool updateReadInfo ) {
 	// time it
 	diff = gettimeofdayInMilliseconds() - start;
 	if ( diff > 10 ) log("query: Took %lli ms for intersection",diff);
+
 	// returns false if blocked, true otherwise
 	return addedLists ();
 }
@ -982,7 +1016,7 @@ bool Msg39::setClusterRecs ( ) {
 					m_clusterLevels       ,
 					m_clusterRecs         ,
 					m_numClusterDocIds    ,
-					m_r->ptr_coll         ,
+					m_r->m_collnum ,
 					0                     , // maxAge
 					false                 , // addToCache
 					this                  ,
@ -1095,7 +1129,7 @@ void Msg39::estimateHits ( ) {

 	// convenience ptrs. we will store the docids/scores into these arrays
 	long long *topDocIds;
-	float     *topScores;
+	double    *topScores;
 	key_t     *topRecs;

 	// numDocIds counts docs in all tiers when using toptree.
@ -1162,7 +1196,7 @@ void Msg39::estimateHits ( ) {
 		mr.ptr_clusterRecs  = NULL;
 		// this is how much space to reserve
 		mr.size_docIds      = 8 * numDocIds; // long long
-		mr.size_scores      = 4 * numDocIds; // float
+		mr.size_scores      = sizeof(double) * numDocIds; // float
 		// if not doing site clustering, we won't have these perhaps...
 		if ( m_gotClusterRecs ) 
 			mr.size_clusterRecs = sizeof(key_t) *numDocIds;
@ -1190,7 +1224,7 @@ void Msg39::estimateHits ( ) {
 			return ; 
 		}
 		topDocIds    = (long long *) mr.ptr_docIds;
-		topScores    = (float     *) mr.ptr_scores;
+		topScores    = (double    *) mr.ptr_scores;
 		topRecs      = (key_t     *) mr.ptr_clusterRecs;
 	}

@ -1224,6 +1258,8 @@ void Msg39::estimateHits ( ) {
 		//add it to the reply
 		topDocIds         [docCount] = t->m_docId;
 		topScores         [docCount] = t->m_score;
+		if ( m_tt.m_useIntScores ) 
+			topScores[docCount] = (double)t->m_intScore;
 		// supply clusterdb rec? only for full splits
 		if ( m_gotClusterRecs ) 
 			topRecs [docCount] = t->m_clusterRec;
--- a/Msg39.h
+++ b/Msg39.h
@ -49,6 +49,7 @@ class Msg39Request {
 		m_useMinAlgo              = false;
 		m_fastIntersection        = -1;
 		m_stripe                  = 0;
+		m_collnum                 = -1;
 		m_useQueryStopWords       = true;
 		m_useNewAlgo              = true;
 		m_doMaxScoreAlgo          = true;
@ -58,12 +59,12 @@ class Msg39Request {
 		ptr_readSizes             = NULL;
 		ptr_query                 = NULL; // in utf8?
 		ptr_whiteList             = NULL;
-		ptr_coll                  = NULL;
+		//ptr_coll                  = NULL;

 		size_readSizes            = 0;
 		size_query                = 0;
 		size_whiteList            = 0;
-		size_coll                 = 0;
+		//size_coll                 = 0;

 		m_getDocIdScoringInfo = 1;

@ -115,6 +116,8 @@ class Msg39Request {
 	char    m_useMinAlgo;
 	char    m_fastIntersection;

+	collnum_t m_collnum;
+
 	long long m_minDocId;
 	long long m_maxDocId;
 	bool      m_makeReply;
@ -128,13 +131,13 @@ class Msg39Request {
 	char   *ptr_termFreqWeights;
 	char   *ptr_query; // in utf8?
 	char   *ptr_whiteList;
-	char   *ptr_coll;
+	//char   *ptr_coll;
 	
 	long    size_readSizes;
 	long    size_termFreqWeights;
 	long    size_query;
 	long    size_whiteList;
-	long    size_coll;
+	//long    size_coll;

 	char    m_buf[0];
 };
@ -158,7 +161,7 @@ public:
 	long   m_errno;

 	char  *ptr_docIds         ; // the results, long long
-	char  *ptr_scores;        ; // floats
+	char  *ptr_scores;        ; // now doubles! so we can have intScores
 	char  *ptr_scoreInfo      ; // transparency info
 	char  *ptr_pairScoreBuf   ; // transparency info
 	char  *ptr_singleScoreBuf ; // transparency info
--- a/Msg3a.cpp
+++ b/Msg3a.cpp
@ -20,6 +20,7 @@ void Msg3a::constructor ( ) {
 	m_finalBuf     = NULL;
 	m_docsToGet    = 0;
 	m_numDocIds    = 0;
+	m_collnums     = NULL;

 	// need to call all safebuf constructors now to set m_label
 	m_rbuf2.constructor();
@ -68,6 +69,8 @@ void Msg3a::reset ( ) {
 	m_docsToGet    = 0;
 	m_errno        = 0;
 	m_numDocIds    = 0;
+	m_collnums     = NULL;
+	m_numTotalEstimatedHits = 0LL;
 }

 Msg39Request *g_r = NULL;
@ -139,8 +142,9 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	m_state    = state;

 	// warning. coll size includes \0
-	if ( ! m_r->ptr_coll || m_r->size_coll-1 <= 0 ) 
-		log(LOG_LOGIC,"net: NULL or bad collection. msg3a.");
+	if ( ! m_r->m_collnum < 0 ) // ptr_coll || m_r->size_coll-1 <= 0 ) 
+		log(LOG_LOGIC,"net: bad collection. msg3a. %li",
+		    (long)m_r->m_collnum);

 	//m_indexdbSplit = g_hostdb.m_indexSplits;
 	// certain query term, like, gbdom:xyz.com, are NOT split
@ -171,7 +175,7 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 		return true;
 	// . set g_errno if not found and return true
 	// . coll is null terminated
-	CollectionRec *cr = g_collectiondb.getRec(r->ptr_coll, r->size_coll-1);
+	CollectionRec *cr = g_collectiondb.getRec(r->m_collnum);
 	if ( ! cr ) { g_errno = ENOCOLLREC; return true; }

 	// query is truncated if had too many terms in it
@ -201,7 +205,7 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 	if ( m_r->m_useSeoResultsCache ) {
 		// the all important seo results cache key
 		m_ckey.n0 = hash64 ( m_r->ptr_query ,m_r->size_query - 1 ,0 );
-		m_ckey.n0 = hash64 ( m_r->ptr_coll,m_r->size_coll,  m_ckey.n0);
+		m_ckey.n0 = hash64h ( (long long)m_r->m_collnum,  m_ckey.n0);
 		m_ckey.n0 = hash64 ( (char *)&m_r->m_language,1 ,  m_ckey.n0 );
 		m_ckey.n0 = hash64 ( (char *)&m_r->m_docsToGet,4,  m_ckey.n0 );
 		// this should be non-zero so g_hostdb.getGroupId(RDB_SERPDB)
@ -236,7 +240,7 @@ bool Msg3a::getDocIds ( Msg39Request *r          ,
 					0 , // maxcacheage
 					false, // addtocache?
 					RDB_SERPDB,//RDB_CACHEDB,
-					m_r->ptr_coll,
+					m_r->m_collnum,//ptr_coll,
 					&m_seoCacheList,
 					(char *)&startKey ,
 					(char *)&endKey,
@ -277,8 +281,8 @@ bool Msg3a::gotCacheReply ( ) {
 		m_docIds = (long long *)p;
 		p += 8 * m_numDocIds;
 		// scores
-		m_scores = (float *)p;
-		p += sizeof(float) * m_numDocIds;
+		m_scores = (double *)p;
+		p += sizeof(double) * m_numDocIds;
 		// site hashes
 		m_siteHashes26 = (long *)p;
 		p += 4 * m_numDocIds;
@ -303,10 +307,10 @@ bool Msg3a::gotCacheReply ( ) {
 		return true;
 	}

-	CollectionRec *cr;
-	cr = g_collectiondb.getRec(m_r->ptr_coll,m_r->size_coll-1);
+	//CollectionRec *cr;
+	//cr = g_collectiondb.getRec(m_r->ptr_coll,m_r->size_coll-1);

-	setTermFreqWeights ( cr->m_coll,m_q,m_termFreqs , m_termFreqWeights );
+	setTermFreqWeights ( m_r->m_collnum,m_q,m_termFreqs,m_termFreqWeights);

 	if ( m_debug ) {
 		//long long *termIds = m_q->getTermIds();
@ -402,7 +406,7 @@ bool Msg3a::gotCacheReply ( ) {
 	//   end up copying over ourselves.
 	m_rbufPtr = serializeMsg ( sizeof(Msg39Request),
 				   &m_r->size_readSizes,
-				   &m_r->size_coll,
+				   &m_r->size_whiteList,
 				   &m_r->ptr_readSizes,
 				   m_r,
 				   &m_rbufSize , 
@ -727,20 +731,20 @@ bool Msg3a::gotAllSplitReplies ( ) {
 		if ( ! m_debug ) continue;
 		// cast these for printing out
 		long long *docIds    = (long long *)mr->ptr_docIds;
-		score_t   *scores    = (score_t   *)mr->ptr_scores;
+		double    *scores    = (double    *)mr->ptr_scores;
 		// print out every docid in this split reply
 		for ( long j = 0; j < mr->m_numDocIds ; j++ ) {
 			// print out score_t
 			logf( LOG_DEBUG,
 			     "query: msg3a: [%lu] %03li) "
 			     "split=%li docId=%012llu domHash=0x%02lx "
-			     "score=%lu"                     ,
+			     "score=%f"                     ,
 			     (unsigned long)this                      ,
 			     j                                        , 
 			     i                                        ,
 			     docIds [j] ,
 			     (long)g_titledb.getDomHash8FromDocId(docIds[j]),
-			      (long)scores[j] );
+			      (float)scores[j] );
 		}
 	}

@ -772,7 +776,7 @@ bool Msg3a::gotAllSplitReplies ( ) {
 	for ( long i = 0 ; i < max ; i++ ) 
 		cr.pushLongLong(m_docIds[i] );
 	for ( long i = 0 ; i < max ; i++ ) 
-		cr.pushFloat(m_scores[i]);
+		cr.pushDouble(m_scores[i]);
 	for ( long i = 0 ; i < max ; i++ ) 
 		cr.pushLong(getSiteHash26(i));
 	// sanity
@ -807,7 +811,7 @@ bool Msg3a::gotAllSplitReplies ( ) {
 	// this will often block, but who cares!? it just sends a request off
 	if ( ! m_msg1.addList ( &m_seoCacheList ,
 				RDB_SERPDB,//RDB_CACHEDB,
-				m_r->ptr_coll,
+				m_r->m_collnum,//ptr_coll,
 				this, // state
 				gotSerpdbReplyWrapper, // callback
 				false, // forcelocal?
@ -849,7 +853,7 @@ bool Msg3a::mergeLists ( ) {
 	// . tcPtr = term count. how many required query terms does the doc 
 	//   have? formerly called topExplicits in IndexTable2.cpp
 	long long     *diPtr [MAX_INDEXDB_SPLIT];
-	float         *rsPtr [MAX_INDEXDB_SPLIT];
+	double        *rsPtr [MAX_INDEXDB_SPLIT];
 	key_t         *ksPtr [MAX_INDEXDB_SPLIT];
 	long long     *diEnd [MAX_INDEXDB_SPLIT];
 	for ( long j = 0; j < m_numHosts ; j++ ) {
@ -863,7 +867,7 @@ bool Msg3a::mergeLists ( ) {
 			continue;
 		}
 		diPtr [j] = (long long *)mr->ptr_docIds;
-		rsPtr [j] = (float     *)mr->ptr_scores;
+		rsPtr [j] = (double    *)mr->ptr_scores;
 		ksPtr [j] = (key_t     *)mr->ptr_clusterRecs;
 		diEnd [j] = (long long *)(mr->ptr_docIds +
 					  mr->m_numDocIds * 8);
@ -919,7 +923,8 @@ bool Msg3a::mergeLists ( ) {

 	// . how much do we need to store final merged docids, etc.?
 	// . docid=8 score=4 bitScore=1 clusterRecs=key_t clusterLevls=1
-	long need = m_docsToGet * (8+4+sizeof(key_t)+sizeof(DocIdScore *)+1);
+	long need = m_docsToGet * (8+sizeof(double)+
+				   sizeof(key_t)+sizeof(DocIdScore *)+1);
 	// allocate it
 	m_finalBuf     = (char *)mmalloc ( need , "finalBuf" );
 	m_finalBufSize = need;
@ -928,7 +933,7 @@ bool Msg3a::mergeLists ( ) {
 	// hook into it
 	char *p = m_finalBuf;
 	m_docIds        = (long long *)p; p += m_docsToGet * 8;
-	m_scores        = (float     *)p; p += m_docsToGet * sizeof(float);
+	m_scores        = (double    *)p; p += m_docsToGet * sizeof(double);
 	m_clusterRecs   = (key_t     *)p; p += m_docsToGet * sizeof(key_t);
 	m_clusterLevels = (char      *)p; p += m_docsToGet * 1;
 	m_scoreInfos    = (DocIdScore **)p;p+=m_docsToGet*sizeof(DocIdScore *);
@ -1078,7 +1083,7 @@ bool Msg3a::mergeLists ( ) {

 			// turn it into a float, that is what rscore_t is.
 			// we do this to make it easier for PostQueryRerank.cpp
-			m_scores    [m_numDocIds]=(float)*rsPtr[maxj];
+			m_scores    [m_numDocIds]=(double)*rsPtr[maxj];
 			if ( m_r->m_doSiteClustering ) 
 				m_clusterRecs[m_numDocIds]= *ksPtr[maxj];
 			// clear this out
@ -1142,7 +1147,7 @@ bool Msg3a::mergeLists ( ) {
 long Msg3a::getStoredSize ( ) {
 	// docId=8, scores=sizeof(rscore_t), clusterLevel=1 bitScores=1
 	// eventIds=1
-	long need = m_numDocIds * ( 8 + sizeof(rscore_t) + 1 ) + 
+	long need = m_numDocIds * ( 8 + sizeof(double) + 1 ) + 
 		4 + // m_numDocIds
 		8 ; // m_numTotalEstimatedHits (estimated # of results)
 	return need;
@ -1158,8 +1163,8 @@ long Msg3a::serialize   ( char *buf , char *bufEnd ) {
 	// store each docid, 8 bytes each
 	memcpy ( p , m_docIds , m_numDocIds * 8 ); p += m_numDocIds * 8;
 	// store scores
-	memcpy ( p , m_scores , m_numDocIds * sizeof(rscore_t) );
-	p +=  m_numDocIds * sizeof(rscore_t) ;
+	memcpy ( p , m_scores , m_numDocIds * sizeof(double) );
+	p +=  m_numDocIds * sizeof(double) ;
 	// store cluster levels
 	memcpy ( p , m_clusterLevels , m_numDocIds ); p += m_numDocIds;
 	// sanity check
@ -1178,7 +1183,7 @@ long Msg3a::deserialize ( char *buf , char *bufEnd ) {
 	// get each docid, 8 bytes each
 	m_docIds = (long long *)p; p += m_numDocIds * 8;
 	// get scores
-	m_scores = (rscore_t *)p; p += m_numDocIds * sizeof(rscore_t) ;
+	m_scores = (double *)p; p += m_numDocIds * sizeof(double) ;
 	// get cluster levels
 	m_clusterLevels = (char *)p; p += m_numDocIds;
 	// sanity check
@ -1214,13 +1219,13 @@ void Msg3a::printTerms ( ) {
 	}
 }

-void setTermFreqWeights ( char *coll,
+void setTermFreqWeights ( collnum_t collnum , // char *coll,
 			  Query *q , 
 			  long long *termFreqs, 
 			  float *termFreqWeights ) {

 	long long numDocsInColl = 0;
-	RdbBase *base = getRdbBase ( RDB_CLUSTERDB  , coll );	
+	RdbBase *base = getRdbBase ( RDB_CLUSTERDB  , collnum );	
 	if ( base ) numDocsInColl = base->getNumGlobalRecs();
 	// issue? set it to 1000 if so
 	if ( numDocsInColl < 0 ) {
@ -1232,7 +1237,7 @@ void setTermFreqWeights ( char *coll,
 	long long *termIds = q->getTermIds();
 	// just use rdbmap to estimate!
 	for ( long i = 0 ; i < q->getNumTerms(); i++ ) {
-		long long tf = g_posdb.getTermFreq ( coll ,termIds[i]);
+		long long tf = g_posdb.getTermFreq ( collnum ,termIds[i]);
 		if ( termFreqs ) termFreqs[i] = tf;
 		float tfw = getTermFreqWeight(tf,numDocsInColl);
 		termFreqWeights[i] = tfw;
--- a/Msg3a.h
+++ b/Msg3a.h
@ -11,7 +11,7 @@
 // 90MB for 32 nodes we got now with about 1.3B docs
 #define DEFAULT_POSDB_READSIZE 90000000

-void setTermFreqWeights ( char *coll,
+void setTermFreqWeights ( collnum_t collnum, // char *coll,
 			  class Query *q , 
 			  long long *termFreqs, 
 			  float *termFreqWeights ) ;
@ -61,7 +61,7 @@ public:
 	// we basically turn the scores we get from each msg39 split into
 	// floats (rscore_t) and store them as floats so that PostQueryRerank
 	// has an easier time
-	float *getScores        ( ) { return m_scores;        };
+	double *getScores        ( ) { return m_scores;        };
 	long   getNumDocIds     ( ) { return m_numDocIds; };

 	long getSiteHash26 ( long i ) { 
@ -125,6 +125,10 @@ public:
 	// this is set if IndexTable::addLists() had an error
 	long       m_errno;

+	// this is now in here so Msg40 can send out one Msg3a per
+	// collection if it wants to search an entire token
+	Msg39Request m_rrr;
+
 	// use msg37 to get TermFreqs
 	//Msg37      m_msg37;
 	long long  m_termFreqs      [MAX_QUERY_TERMS];
@ -160,16 +164,25 @@ public:

 	// final merged lists go here
 	long long      *m_docIds        ;
-	float          *m_scores        ;
+	double         *m_scores        ;
 	class DocIdScore **m_scoreInfos ;
 	//key_t          *m_recs          ; // clusterdb recs
 	key_t          *m_clusterRecs   ;
 	char           *m_clusterLevels ;
+	// this is new
+	collnum_t      *m_collnums;
 	long            m_numDocIds     ;
 	// the above ptrs point into this buffer
 	char           *m_finalBuf;
 	long            m_finalBufSize;

+	// when merging this list of docids into a final list keep
+	// track of the cursor into m_docIds[]
+	long m_cursor;
+
+	// what collection # are these docids from if m_collnums[] is NULL
+	//collnum_t m_collnum;
+
 	//
 	// new things for seoresults cache
 	//
--- a/Msg4.cpp
+++ b/Msg4.cpp
@ -159,7 +159,7 @@ public:
 };


-// . injecting into the "test" coll flushes after each inject
+// . injecting into the "qatest123" coll flushes after each inject
 // . returns false if blocked and callback will be called
 bool flushMsg4Buffers ( void *state , void (* callback) (void *) ) {
 	// if all empty, return true now
@ -535,6 +535,8 @@ bool Msg4::addMetaList ( char      *metaList                 ,
 	m_next         = NULL;
 	m_shardOverride = shardOverride;

+ retry:
+
 	// get in line if there's a line
 	if ( s_msg4Head ) {
 		// add ourselves to the line
@ -554,8 +556,21 @@ bool Msg4::addMetaList ( char      *metaList                 ,
 	// then do it
 	if ( addMetaList2 ( ) ) return true;

-	// sanity check
-	if ( s_msg4Head || s_msg4Tail ) { char *xx=NULL; *xx=0; }
+	// . sanity check
+	// . we sometimes get called with niceness 0 from possibly
+	//   an injection or something and from a quickpoll
+	//   inside addMetList2() in which case our addMetaList2() will
+	//   fail, assuming s_msg4Head got set, BUT it SHOULD be OK because
+	//   being interrupted at the one QUICKPOLL() in addMetaList2()
+	//   doesn't seem like it would hurt.
+	// . FURTHEMORE the multicast seems to always be called with
+	//   MAX_NICENESS so i'm not sure how niceness 0 will really help
+	//   with any of this stuff.
+	//if ( s_msg4Head || s_msg4Tail ) { char *xx=NULL; *xx=0; }
+	if ( s_msg4Head || s_msg4Tail ) {
+		log("msg4: got unexpected head"); // :)
+		goto retry;
+	}

 	// . spider hang bug
 	// . debug log. seems to happen a lot if not using threads..
@ -708,6 +723,9 @@ bool Msg4::addMetaList2 ( ) {
 	// flush them buffers
 	//flushLocal();
 			       
+	// in case this was being used to hold the data, free it
+	m_tmpBuf.purge();
+
 	return true;
 }

--- a/Msg4.h
+++ b/Msg4.h
@ -17,6 +17,8 @@ bool addMetaList ( char *p , class UdpSlot *slot = NULL ) ;

 bool isInMsg4LinkedList ( class Msg4 *msg4 ) ;

+#include "SafeBuf.h"
+
 class Msg4 {

 public:
@ -90,6 +92,8 @@ class Msg4 {
 	void         (*m_callback ) ( void *state );
 	void          *m_state;

+	SafeBuf m_tmpBuf;
+
 	char      m_rdbId;
 	char      m_inUse;
 	collnum_t m_collnum;
--- a/Msg40.cpp
+++ b/Msg40.cpp
--- a/Msg40.h
+++ b/Msg40.h
@ -117,7 +117,7 @@ class Msg40 {
 	bool computeGigabits( class TopicGroup *tg );
 	SafeBuf m_gigabitBuf;

-#ifdef NEEDLICENSE
+	// nuggabits...
 	bool computeFastFacts ( );
 	bool addFacts ( HashTableX *queryTable,
 			HashTableX *gbitTable ,
@ -126,13 +126,14 @@ class Msg40 {
 			bool debugGigabits ,
 			class Msg20Reply *reply,
 			SafeBuf *factBuf ) ;
-#endif

 	SafeBuf m_factBuf;

 	// keep these public since called by wrapper functions
 	bool gotDocIds        ( ) ;
 	bool launchMsg20s     ( bool recalled ) ;
+	class Msg20 *getAvailMsg20();
+	class Msg20 *getCompletedSummary ( long ix );
 	bool getSummaries     ( ) ;
 	bool gotSummary       ( ) ;
 	bool reallocMsg20Buf ( ) ;
@ -158,8 +159,8 @@ class Msg40 {
 	// . these routines give us back our inputted parameters we saved
 	char *getQuery              ( ) { return m_si->m_q->getQuery(); };
 	long  getQueryLen           ( ) { return m_si->m_q->getQueryLen(); };
-	char *getColl               ( ) { return m_si->m_coll2; };
-	long  getCollLen            ( ) { return m_si->m_collLen2; };
+	//char *getColl               ( ) { return m_si->m_coll2; };
+	//long  getCollLen            ( ) { return m_si->m_collLen2; };
 	long  getDocsWanted         ( ) { return m_si->m_docsWanted; };
 	long  getFirstResultNum     ( ) { return m_si->m_firstResultNum; };

@ -171,7 +172,10 @@ class Msg40 {
 	long long getDocId  ( long i ){return m_msg3a.m_docIds[i]; };
 	long long *getDocIds(        ){return m_msg3a.m_docIds; };
 	float  getScore  ( long i ){return m_msg3a.m_scores[i]; };
-	class DocIdScore *getScoreInfo(long i){return m_msg3a.m_scoreInfos[i];}
+	class DocIdScore *getScoreInfo(long i){
+		if ( ! m_msg3a.m_scoreInfos ) return NULL;
+		return m_msg3a.m_scoreInfos[i];
+	}
 	//LinkInfo *getLinkInfo( long i){return m_msg20[i]->m_linkInfo; }
 	bool  moreResultsFollow ( )   {return m_moreToCome; };
 	time_t getCachedTime ( )      {return m_cachedTime; };
@ -202,8 +206,21 @@ class Msg40 {
 	// Msg39 and all Msg20s must use the same clock timestamp
 	time_t m_nowUTC;

+	long m_lastHeartbeat;
+
+	bool printSearchResult9 ( long ix ) ;
+	HashTableX m_columnTable;
+	bool printCSVHeaderRow ( class SafeBuf *sb );
+	bool printJsonItemInCSV ( class State0 *st , long ix );
+	long m_numCSVColumns;
+
+
+	HashTableX m_dedupTable;
+
 	long m_msg3aRecallCnt;
-	Msg39Request m_r;
+	// this goes into msg3a now so we can send multiple msg3as out,
+	// 1 per collection
+	//Msg39Request m_r;

 	long       m_docsToGet;
 	long       m_docsToGetVisible;
@ -211,7 +228,9 @@ class Msg40 {
 	// incoming parameters 
 	void       *m_state;
 	void      (* m_callback ) ( void *state );
-	
+
+	long m_needFirstReplies;
+
 	// max outstanding msg20s
 	//long       m_maxOutstanding;

@ -237,6 +256,17 @@ class Msg40 {
 	char      *m_msg20StartBuf;
 	long       m_numToFree;

+	bool m_hadPrintError ;
+	long m_numPrinted    ;
+	bool m_printedHeader ;
+	bool m_printedTail   ;
+	bool m_lastChunk     ;
+	long m_sendsOut      ;
+	long m_sendsIn       ;
+	long m_printi        ;
+	long m_socketHadError;
+
+
 	// use msg3a to get docIds
 	Msg3a      m_msg3a;

@ -307,6 +337,14 @@ class Msg40 {
 	// Msg2b for generating a directory
 	//Msg2b  m_msg2b;

+	bool mergeDocIdsIntoBaseMsg3a();
+	long m_numCollsToSearch;
+	class Msg3a **m_msg3aPtrs;
+	SafeBuf m_msg3aPtrBuf;
+	long m_num3aRequests;
+	long m_num3aReplies;
+	collnum_t m_firstCollnum;
+
 	PostQueryRerank m_postQueryRerank;

        HashTableT<uint64_t, uint64_t> m_urlTable;
--- a/Msg5.cpp
+++ b/Msg5.cpp
@ -114,7 +114,7 @@ void  makeCacheKey ( char *startKey     ,
 //   another special meaning. it tells msg5 to tell RdbTree's getList() to 
 //   pre-allocate the list size by counting the recs ahead of time.
 bool Msg5::getList ( char     rdbId         ,
-		     char    *coll          ,
+		     collnum_t collnum ,
 		     RdbList *list          ,
 		     //key_t    startKey      , 
 		     //key_t    endKey        , 
@ -157,7 +157,7 @@ bool Msg5::getList ( char     rdbId         ,
 	// sanity
 	if ( ! list && mergeLists ) { char *xx=NULL;*xx=0; }
 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg5.");
+	if ( collnum < 0 ) log(LOG_LOGIC,"net: bad collection. msg5.");
 	// MUST have this
 	//if ( rdbId == RDB_TITLEDB && ! msg5b ) {
 	//	log(LOG_LOGIC,"net: No msg5b supplied. 1.");
@ -202,10 +202,10 @@ bool Msg5::getList ( char     rdbId         ,
 	//m_startTime = gettimeofdayInMilliseconds();
 	// remember stuff
 	m_rdbId         = rdbId;
-	m_coll          = coll;
+	m_collnum          = collnum;

-	m_collnum = g_collectiondb.getCollnum ( coll );
-	if ( m_collnum < 0 ) {
+	CollectionRec *ttt = g_collectiondb.getRec ( m_collnum );
+	if ( ! ttt ) {
 		g_errno = ENOCOLLREC;
 		return true;
 	}
@ -237,7 +237,7 @@ bool Msg5::getList ( char     rdbId         ,
 	m_mergeLists         = mergeLists;

 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
-	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;
+	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;
 	// point to cache
 	//RdbCache *cache = base->m_rdb->getCache();
 	// . these 2 vars are used for error correction
@ -487,7 +487,7 @@ bool Msg5::getList ( char     rdbId         ,
 // . loops until m_minRecSizes is satisfied OR m_endKey is reached
 bool Msg5::readList ( ) {
 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
-	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;
+	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;
 readMore:
 	// . reset our tree list
 	// . sets fixedDataSize here in case m_includeTree is false because
@ -525,7 +525,7 @@ bool Msg5::readList ( ) {
 	if ( m_isRealMerge ) niceness = 1;
 	if ( compute ) {
 		m_msg3.readList  ( m_rdbId          ,
-				   m_coll           , 
+				   m_collnum        , 
 				   m_fileStartKey   , // modified by gotList()
 				   m_endKey         ,
 				   m_newMinRecSizes , // modified by gotList()
@ -722,6 +722,11 @@ bool Msg5::readList ( ) {
 		}
 	}

+	// limit to 20MB so we don't go OOM!
+	if ( m_newMinRecSizes > 2 * m_minRecSizes &&
+	     m_newMinRecSizes > 20000000 )
+		m_newMinRecSizes = 20000000;
+	     

 	QUICKPOLL((m_niceness));
 	// debug msg
@ -747,7 +752,7 @@ bool Msg5::readList ( ) {
 	// . if compensateForMerge is true then m_startFileNum/m_numFiles
 	//   will be appropriately mapped around the merge
 	if ( ! m_msg3.readList  ( m_rdbId          ,
-				  m_coll           , 
+				  m_collnum        , 
 				  m_fileStartKey   , // modified by gotList()
 				  diskEndKey       ,
 				  m_newMinRecSizes , // modified by gotList()
@ -794,10 +799,10 @@ void Msg5::copyAndSendBackList ( RdbList *listSrc ) {
 bool Msg5::needsRecall ( ) {
 	bool logIt;
 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
-	RdbBase *base = getRdbBase ( m_rdbId , m_coll );
+	RdbBase *base = getRdbBase ( m_rdbId , m_collnum );
 	// if collection was deleted from under us, base will be NULL
 	if ( ! base && ! g_errno ) {
-		log("msg5: base lost for coll %s",m_coll);
+		log("msg5: base lost for collnum %li",(long)m_collnum);
 		return false;
 	}
 	// sanity check
@ -849,11 +854,14 @@ bool Msg5::needsRecall ( ) {
 	// seems to be very common for doledb, so don't log unless extreme
 	//if ( m_rdbId == RDB_DOLEDB && m_round < 15 ) logIt = false;
 	if ( m_round > 100 && (m_round % 1000) != 0 ) logIt = false;
+	// seems very common when doing rebalancing then merging to have
+	// to do at least one round of re-reading, so note that
+	if ( m_round == 0 ) logIt = false;
 	if ( logIt )
 		logf(LOG_DEBUG,"db: Reading %li again from %s (need %li total "
-		     "got %li) this=0x%lx round=%li.", 
+		     "got %li) cn=%li this=0x%lx round=%li.", 
 		     m_newMinRecSizes , base->m_dbname , m_minRecSizes, 
-		     m_list->m_listSize, (long)this , m_round );
+		     m_list->m_listSize, (long)m_collnum,(long)this, m_round );
 	m_round++;
 	// record how many screw ups we had so we know if it hurts performance
 	base->m_rdb->didReSeek ( );
@ -1167,7 +1175,7 @@ bool Msg5::gotList2 ( ) {
 	}
 	
 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
-	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;
+	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;

 	// if not enough lists, use a dummy list to trigger merge so tfndb
 	// filter happens and we have a chance to weed out old titleRecs
@ -1523,7 +1531,7 @@ void Msg5::repairLists_r ( ) {
 		// . logging the key ranges gives us an idea of how long
 		//   it will take to patch the bad data
 		long nn = m_msg3.m_numFileNums;
-		RdbBase *base = getRdbBase ( m_rdbId , m_coll );
+		RdbBase *base = getRdbBase ( m_rdbId , m_collnum );
 		if ( i < nn && base ) {
 			long fn = m_msg3.m_fileNums[i];
 			BigFile *bf = base->getFile ( fn );
@ -1574,7 +1582,7 @@ void Msg5::mergeLists_r ( ) {
 	if ( KEYCMP(m_prevKey,m_fileStartKey,m_ks)>=0 ) m_prevCount = 0;

 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
-	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) {
+	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) {
 		log("No collection found."); return; }

 	/*
@ -1747,7 +1755,7 @@ bool Msg5::doneMerging ( ) {
 	//m_waitingForMerge = false;

 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
-	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;
+	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;

 	// . if there was a merge error, bitch about it
 	// . Thread class should propagate g_errno when it was set in a thread
@ -1764,8 +1772,8 @@ bool Msg5::doneMerging ( ) {
 	//   our first merge
 	if ( m_hadCorruption ) {
 		// log it here, cuz logging in thread doesn't work too well
-		log("net: Encountered a corrupt list in rdb=%s coll=%s",
-		    base->m_dbname,m_coll);
+		log("net: Encountered a corrupt list in rdb=%s collnum=%li",
+		    base->m_dbname,(long)m_collnum);
 		// remove error condition, we removed the bad data in thread
 		
 		m_hadCorruption = false;
@ -1891,7 +1899,7 @@ bool Msg5::doneMerging ( ) {

 	// . for every round we get call increase by 10 percent
 	// . try to fix all those negative recs in the rebalance re-run
-	m_newMinRecSizes *= (1.0 + (m_round * .10));
+	m_newMinRecSizes *= (long)(1.0 + (m_round * .10));

 	// wrap around?
 	if ( m_newMinRecSizes < 0 || m_newMinRecSizes > 1000000000 )
@ -2003,7 +2011,7 @@ bool Msg5::getRemoteList ( ) {
 				 0                    , // max cached age
 				 false                , // add to cache?
 				 m_rdbId              , // rdbId
-				 m_coll               ,
+				 m_collnum            ,
 				 m_list               ,
 				 m_startKey           ,
 				 m_endKey             ,
--- a/Msg5.h
+++ b/Msg5.h
@ -66,7 +66,8 @@ class Msg5 {
 	// . if maxCacheAge is > 0, we lookup in cache first
 	bool getList ( //class RdbBase *base      ,
 		       char       rdbId         ,
-		       char      *coll          ,
+		       //char      *coll          ,
+		       collnum_t collnum ,
 		       RdbList   *list          ,
 		       //key_t      startKey      , 
 		       //key_t      endKey        , 
@ -99,7 +100,8 @@ class Msg5 {

 	bool getList ( //class RdbBase *base      ,
 		       char       rdbId         ,
-		       char      *coll          ,
+		       //char      *coll          ,
+		       collnum_t collnum ,
 		       RdbList   *list          ,
 		       key_t      startKey      , 
 		       key_t      endKey        , 
@ -125,7 +127,7 @@ class Msg5 {
 		       bool        allowPageCache = true ,
 		       bool        hitDisk        = true ) {
 		return getList ( rdbId         ,
-				 coll          ,
+				 collnum       ,
 				 list          ,
 				 (char *)&startKey      , 
 				 (char *)&endKey        , 
@ -216,7 +218,7 @@ class Msg5 {
 	long      m_startFileNum;
 	long      m_minRecSizes;
 	//RdbBase  *m_base;
-	char     *m_coll;
+	//char     *m_coll;
 	char      m_rdbId;

 	// . cache may modify these
--- a/Msg51.cpp
+++ b/Msg51.cpp
@ -75,7 +75,8 @@ bool Msg51::getClusterRecs ( long long     *docIds                   ,
 			     char          *clusterLevels            ,
 			     key_t         *clusterRecs              ,
 			     long           numDocIds                ,
-			     char          *coll                     ,
+			     //char          *coll                     ,
+			     collnum_t collnum ,
 			     long           maxCacheAge              ,
 			     bool           addToCache               ,
 			     void          *state                    ,
@ -87,12 +88,13 @@ bool Msg51::getClusterRecs ( long long     *docIds                   ,
 	// reset this msg
 	reset();
 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg51.");
+	if ( collnum < 0 ) log(LOG_LOGIC,"net: NULL collection. msg51.");
 	// get the collection rec
-	CollectionRec *cr = g_collectiondb.getRec ( coll );
+	CollectionRec *cr = g_collectiondb.getRec ( collnum );
 	// return true on error, g_errno should already be set
 	if ( ! cr ) {
-		log("db: msg51. Collection rec null for coll %s.", coll);
+		log("db: msg51. Collection rec null for collnum %li.", 
+		    (long)collnum);
 		g_errno = EBADENGINEER;
 		char *xx=NULL; *xx=0;
 		return true;
@ -102,8 +104,9 @@ bool Msg51::getClusterRecs ( long long     *docIds                   ,
 	m_addToCache    = addToCache;
 	m_state         = state;
 	m_callback      = callback;
-	m_coll          = coll;
-	m_collLen       = gbstrlen(coll);
+	//m_coll          = coll;
+	//m_collLen       = gbstrlen(coll);
+	m_collnum = collnum;
 	// these are storage for the requester
 	m_docIds        = docIds;
 	m_clusterLevels = clusterLevels;
@ -186,7 +189,7 @@ bool Msg51::sendRequests ( long k ) {
 	key_t     ckey = (key_t)m_docIds[m_nexti];
 	bool found = false;
 	if ( c )
-		found = c->getRecord ( m_coll    ,
+		found = c->getRecord ( m_collnum    ,
 				       ckey      , // cache key
 				       &crecPtr  , // pointer to it
 				       &crecSize ,
@ -292,7 +295,7 @@ bool Msg51::sendRequest ( long    i ) {
 				     m_maxCacheAge ,
 				     m_addToCache  ,
 				     RDB_CLUSTERDB ,
-				     m_coll        ,
+				     m_collnum        ,
 				     &m_lists[i]   ,
 				     (char *)&startKey      ,
 				     (char *)&endKey        ,
@ -437,7 +440,7 @@ void Msg51::gotClusterRec ( Msg0 *msg0 ) { //, RdbList *list ) {
 	// . add the record to our quick cache as a long long
 	// . ignore any error
 	if ( s_cacheInit )
-		c->addRecord ( m_coll        ,
+		c->addRecord ( m_collnum        ,
 			       (key_t)docId  , // docid is key
 			       (char *)rec   ,
 			       sizeof(key_t) , // recSize
--- a/Msg51.h
+++ b/Msg51.h
@ -108,7 +108,8 @@ class Msg51 {
 			      char          *clusterLevels            ,
 			      key_t         *clusterRecs              ,
 			      long           numDocIds                ,
-			      char          *coll                     ,
+			      //char          *coll                     ,
+			      collnum_t collnum ,
 			      long           maxCacheAge              ,
 			      bool           addToCache               ,
 			      void          *state                    ,
@ -169,8 +170,9 @@ class Msg51 {
 	long       m_firstNode;
 	long       m_nextNode;

-	char      *m_coll;
-	long       m_collLen;
+	//char      *m_coll;
+	//long       m_collLen;
+	collnum_t m_collnum;
 	
 	// cache info
 	long       m_maxCacheAge;
--- a/Msg8b.cpp
+++ b/Msg8b.cpp
@ -187,7 +187,7 @@ bool Msg8b::getCatRec  ( Url     *url              ,
 				0        , // max cached age in seconds (60)
 			        false    , // add net recv'd list to cache?
 				RDB_CATDB, // specifies the rdb, 1 = tagdb
-				"",//NULL,//m_coll   ,
+				0,//collnum"",//NULL,//m_coll   ,
 				//&m_list  ,
 				m_list   ,
 				startKey ,
--- a/Msg9b.cpp
+++ b/Msg9b.cpp
@ -172,7 +172,8 @@ bool Msg9b::addCatRecs ( char *urls        ,
 	// . use high priority (niceness of 0)
 	// . i raised niceness from 0 to 1 so multicast does not use the
 	//   small UdpSlot::m_tmpBuf... might have a big file...
-	return m_msg1.addList ( &m_list, RDB_CATDB, coll ,
+	return m_msg1.addList ( &m_list, RDB_CATDB, 
+				(collnum_t)0 ,
 				state , callback ,
 				false , // force local?
 				niceness     ); // niceness 
--- a/Msge0.cpp
+++ b/Msge0.cpp
@ -51,7 +51,7 @@ bool Msge0::getTagRecs ( char        **urlPtrs           ,
 			// if skipOldLinks && urlFlags[i]&LF_OLDLINK, skip it
 			 bool          skipOldLinks      ,
 			 TagRec       *baseTagRec        ,
-			 char         *coll              ,
+			 collnum_t     collnum,
 			 long          niceness          ,
 			 void         *state             ,
 			 void        (*callback)(void *state) ) {
@ -65,7 +65,7 @@ bool Msge0::getTagRecs ( char        **urlPtrs           ,
 	m_numUrls          = numUrls;
 	m_skipOldLinks     = skipOldLinks;
 	m_baseTagRec       = baseTagRec;
-	m_coll             = coll;
+	m_collnum          = collnum;
 	m_niceness         = niceness;
 	m_state            = state;
 	m_callback         = callback;
@ -240,7 +240,7 @@ bool Msge0::sendMsg8a ( long i ) {
 	//   subsite.
 	if ( ! m->getTagRec ( &m_urls[i]    ,
 			      NULL, // sites[i] ,
-			      m_coll        ,
+			      m_collnum        ,
 			      // if domain is banned, we will miss that here!
 			      true          , // skip domain lookup?
 			      m_niceness    ,
--- a/Msge0.h
+++ b/Msge0.h
@ -21,7 +21,7 @@ public:
 			  long          numUrls      ,
 			  bool          skipOldLinks ,
 			  class TagRec *baseTagRec ,
-			  char         *coll         ,
+			  collnum_t  collnum,
 			  long          niceness     ,
 			  void         *state        ,
 			  void (*callback)(void *state) ) ;
@ -32,7 +32,7 @@ public:
 	bool sendMsg8a      ( long i );
 	bool doneSending    ( long i );

-	char *m_coll      ;
+	collnum_t m_collnum;
 	long  m_niceness  ;

 	char **m_urlPtrs;
--- a/Msge1.cpp
+++ b/Msge1.cpp
@ -116,7 +116,7 @@ bool Msge1::getFirstIps ( TagRec **grv ,
 	if ( ! launchRequests ( 0 ) ) return false;

 	// save it? might be a page parser
-	//if ( ! strcmp(m_coll,"test") ) saveTestBuf();
+	//if ( ! strcmp(m_coll,"qatest123") ) saveTestBuf();

 	// none blocked, we are done
 	return true;
@ -219,7 +219,7 @@ bool Msge1::launchRequests ( long starti ) {

 	/*
 	// look up in our m_testBuf.
-	if ( m_coll && ! strcmp(m_coll,"test") ) {
+	if ( m_coll && ! strcmp(m_coll,"qatest123") ) {
 		bool found = false;
 		// do we got it?
 		long quickIp ; bool status = getTestIp ( p , &quickIp, &found);
@ -300,7 +300,7 @@ bool Msge1::sendMsgC ( long i , char *host , long hlen ) {


 	// look up in our m_testBuf.
-	if ( m_coll && ! strcmp(m_coll,"test") ) {
+	if ( m_coll && ! strcmp(m_coll,"qatest123") ) {
 		bool found = false;
 		// shortcut
 		//char *p = m_urlPtrs[n];
@ -340,7 +340,7 @@ void gotMsgCWrapper ( void *state , long ip ) {
 	if ( ! THIS->launchRequests(i) ) return;
 	// . save it if we should. might be a page parser
 	// . mdw i uncommented this when we cored all the time
-	//if ( ! strcmp(THIS->m_coll,"test")) saveTestBuf();
+	//if ( ! strcmp(THIS->m_coll,"qatest123")) saveTestBuf();
 	// must be all done, call the callback
 	THIS->m_callback ( THIS->m_state );
 }
@ -364,7 +364,7 @@ bool Msge1::doneSending ( long i ) {
 	//      n, i,  m_urls[i].getUrl() ,iptoa(ip));

 	// store it?
-	if ( ! strcmp(m_coll,"test") ) {
+	if ( ! strcmp(m_coll,"qatest123") ) {
 		// get host
 		long  hlen = 0;
 		char *host = getHostFast ( m_urlPtrs[n] , &hlen );
@ -511,9 +511,9 @@ static char *s_last         = NULL ;
 static long  s_lastLen      = 0    ;
 static HashTableX s_ht;

-// . only call this if the collection is "test"
+// . only call this if the collection is "qatest123"
 // . we try to get the ip by accessing the "./test/ips.txt" file
-// . we also ad ips we lookup to that file in the collection is "test"
+// . we also ad ips we lookup to that file in the collection is "qatest123"
 // . returns false and sets g_errno on error, true on success
 bool getTestIp ( char *url , long *retIp , bool *found , long niceness ,
 		 char *testDir ) {
@ -533,8 +533,8 @@ bool getTestIp ( char *url , long *retIp , bool *found , long niceness ,
 	// assume not found
 	*found = false;

-	// . if we are the "test" collection, check for "./test/ips.txt" file
-	//   that gives us the ips of the given urls. 
+	// . if we are the "qatestq123" collection, check for "./test/ips.txt"
+	//   file that gives us the ips of the given urls. 
 	// . if we end up doing some lookups we should append to that file
 	if ( ! s_testBuf || s_needsReload ) {
 		// assume needs reload now
--- a/PageAddUrl.cpp
+++ b/PageAddUrl.cpp
@ -2,107 +2,64 @@

 #include "Pages.h"
 #include "Collectiondb.h"
-#include "HashTable.h"
 #include "Msg4.h"
-#include "TuringTest.h"
-#include "AutoBan.h"
-//#include "CollectionRec.h"
-//#include "Links.h"
-#include "Users.h"
-#include "HashTableT.h"
 #include "Spider.h"
+#include "Parms.h"

 static bool sendReply        ( void *state  , bool addUrlEnabled );
-static bool canSubmit        (unsigned long h, long now, long maxUrlsPerIpDom);

 static void addedStuff ( void *state );

-void resetPageAddUrl ( ) ;
-
-class State2 {
-public:
-	Url        m_url;
-	char      *m_buf;
-	long       m_bufLen;
-	long       m_bufMaxLen;
-};
-
 class State1 {
 public:
 	Msg4       m_msg4;
 	TcpSocket *m_socket;
-        bool       m_isAdmin;
-	char       m_coll[MAX_COLL_LEN+1];
-	bool       m_goodAnswer;
-	bool       m_doTuringTest;
-	long       m_ufuLen;
-	char       m_ufu[MAX_URL_LEN];
+
+	HttpRequest m_hr;

 	long       m_urlLen;
 	char       m_url[MAX_URL_LEN];

-	char       m_username[MAX_USER_SIZE];
 	bool       m_strip;
 	bool       m_spiderLinks;
-	bool       m_forceRespider;
- 	// buf filled by the links coming from google, msn, yahoo, etc
-	State2     m_state2[5]; // gb, goog, yahoo, msn, ask
+
 	long       m_numSent;
 	long       m_numReceived;
-	//long       m_raw;
 	SpiderRequest m_sreq;
 };

-// only allow up to 1 Msg10's to be in progress at a time
-static bool s_inprogress = false;
-
 // . returns false if blocked, true otherwise
 // . sets g_errno on error
-bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
+// . add url page for admin, users use sendPageAddUrl() in PageRoot.cpp
+bool sendPageAddUrl2 ( TcpSocket *s , HttpRequest *r ) {
 	// . get fields from cgi field of the requested url
 	// . get the search query
 	long  urlLen = 0;
 	char *url = r->getString ( "u" , &urlLen , NULL /*default*/);
+	// also try "url" and "urls"
+	if ( ! url ) url = r->getString ( "url" , &urlLen , NULL );
+	if ( ! url ) url = r->getString ( "urls" , &urlLen , NULL );

 	// see if they provided a url of a file of urls if they did not
 	// provide a url to add directly
-	//bool isAdmin = g_collectiondb.isAdmin ( r , s );
-	bool isAdmin = r->getIsLocal();
-	long  ufuLen = 0;
-	char *ufu = NULL;
-	if ( isAdmin )
-		// get the url of a file of urls (ufu)
-		ufu = r->getString ( "ufu" , &ufuLen , NULL );

 	// can't be too long, that's obnoxious
-	if ( urlLen > MAX_URL_LEN || ufuLen > MAX_URL_LEN ) {
+	if ( urlLen > MAX_URL_LEN ) {
 		g_errno = EBUFTOOSMALL;
 		g_msg = " (error: url too long)";
 		return g_httpServer.sendErrorReply(s,500,"url too long");
 	}
-	// get the collection
-	long  collLen = 0;
-	char *coll    = r->getString("c",&collLen);
-	if ( ! coll || ! coll[0] ) {
-		//coll    = g_conf.m_defaultColl;
-		coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
-		collLen = gbstrlen(coll);
-	}
+
 	// get collection rec
-	CollectionRec *cr = g_collectiondb.getRec ( r ); // coll );
+	CollectionRec *cr = g_collectiondb.getRec ( r );
 	// bitch if no collection rec found
 	if ( ! cr ) {
 		g_errno = ENOCOLLREC;
 		g_msg = " (error: no collection)";
 		return g_httpServer.sendErrorReply(s,500,"no coll rec");
 	}
-	// . make sure the ip is not banned
-	// . we may also have an exclusive list of IPs for private collections
-	if ( ! cr->hasSearchPermission ( s ) ) {
-		g_errno = ENOPERM;
-		g_msg = " (error: permission denied)";
-		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
-	}
+
+
 	// make a new state
 	State1 *st1 ;
 	try { st1 = new (State1); }
@ -112,9 +69,11 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 		    sizeof(State1),mstrerror(g_errno));
 		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); }
 	mnew ( st1 , sizeof(State1) , "PageAddUrl" );
-	// save socket and isAdmin
+
+
 	st1->m_socket  = s;
-	st1->m_isAdmin = isAdmin;
+
+	st1->m_hr.copy ( r );

 	// assume no url buf yet, set below
 	//st1->m_ubuf      = NULL;
@ -126,7 +85,9 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 	if ( url ) {
 		// normalize and add www. if it needs it
 		Url uu;
-		uu.set ( url , gbstrlen(url) , true );
+		// do not convert xyz.com to www.xyz.com because sometimes
+		// people want xyz.com exactly
+		uu.set ( url , gbstrlen(url) , false ); // true );
 		// remove >'s i guess and store in st1->m_url[] buffer
 		st1->m_urlLen=cleanInput ( st1->m_url,
 					   MAX_URL_LEN, 
@ -138,63 +99,11 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 		//st1->m_ubufAlloc = NULL; // do not free it!
 	}

-	// save the "ufu" (url of file of urls)
-	st1->m_ufu[0] = '\0';
-	st1->m_ufuLen  = ufuLen;
-	memcpy ( st1->m_ufu , ufu , ufuLen );
-	st1->m_ufu[ufuLen] = '\0';
-
-	st1->m_doTuringTest = cr->m_doTuringTest;
-	char *username     = g_users.getUsername(r);
-	if(username) strcpy(st1->m_username,username);
-	//st1->m_user    = g_pages.getUserType ( s , r );
 	st1->m_spiderLinks = true;
 	st1->m_strip   = true;
-	//st1->m_raw = r->getLong("raw",0);

-	// init state2
-	for ( long i = 0; i < 5; i++ ){
-		st1->m_state2[i].m_buf = NULL;
-		st1->m_state2[i].m_bufLen = 0;
-		st1->m_state2[i].m_bufMaxLen = 0;
-	}
-
-	// save the collection name in the State1 class
-	if ( collLen > MAX_COLL_LEN ) collLen = MAX_COLL_LEN;
-	strncpy ( st1->m_coll , coll , collLen );
-	st1->m_coll [ collLen ] = '\0';
-
-	// assume they answered turing test correctly
-	st1->m_goodAnswer = true;
-	// if addurl is turned off, just print "disabled" msg
-	if ( ! g_conf.m_addUrlEnabled ) return sendReply ( st1 , false );
-	// can also be turned off in the collection rec
-	if ( ! cr->m_addUrlEnabled    ) return sendReply ( st1 , false );
 	// or if in read-only mode
-	if (   g_conf.m_readOnlyMode  ) return sendReply ( st1 , false );
-	// cannot add if another Msg10 from here is still in progress
-	if ( s_inprogress ) return sendReply ( st1 , true );
-	// use now as the spiderTime
-
-	// get ip of submitter
-	//unsigned long h = ipdom ( s->m_ip );
-	// . use top 2 bytes now, some isps have large blocks
-	// . if this causes problems, then they can do pay for inclusion
-	unsigned long h = iptop ( s->m_ip );
-	long codeLen;
-	char* code = r->getString("code", &codeLen);
-	if(g_autoBan.hasCode(code, codeLen, s->m_ip)) {
-		long uipLen = 0;
-		char* uip = r->getString("uip",&uipLen);
-		long hip = 0;
-		//use the uip when we have a raw query to test if 
-		//we can submit
-		if(uip) {
-			hip = atoip(uip, uipLen);
-			h = iptop( hip );
-		}
-	}
-
+	if ( g_conf.m_readOnlyMode  ) return sendReply ( st1 , false );

 	st1->m_strip = r->getLong("strip",0);
 	// Remember, for cgi, if the box is not checked, then it is not 
@ -208,36 +117,7 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 	// . use to manually update spider times for a url
 	// . however, will not remove old scheduled spider times
 	// . mdw: made force on the default
-	st1->m_forceRespider = r->getLong("force",1); // 0);
-
-	long now = getTimeGlobal();
-
-	// . allow 1 submit every 1 hour
-	// . restrict by submitter domain ip
-	if ( ! st1->m_isAdmin &&
-	     ! canSubmit ( h , now , cr->m_maxAddUrlsPerIpDomPerDay ) ) {
-		// return error page
-		g_errno = ETOOEARLY;
-		return sendReply ( st1 , true );
-	}
-
-
-	//st1->m_query = r->getString( "qts", &st1->m_queryLen );
-
-
-	// check it, if turing test is enabled for this collection
-	if ( ! st1->m_isAdmin && cr->m_doTuringTest && 
-	     ! g_turingTest.isHuman(r) )  {
-		// log note so we know it didn't make it
-		g_msg = " (error: bad answer)";
-		//log("PageAddUrl:: addurl failed for %s : bad answer",
-		//    iptoa(s->m_ip));
-		st1->m_goodAnswer = false;
-		return sendReply ( st1 , true /*addUrl enabled?*/ );
-	}
-
-	//if ( st1->m_queryLen > 0 )
-	//	return getPages( st1 );
+	//st1->m_forceRespider = r->getLong("force",1); // 0);

 	// if no url given, just print a blank page
 	if ( ! url ) return sendReply (  st1 , true );
@ -262,7 +142,7 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {
 	// now add that to spiderdb using msg4
 	if ( ! m->addMetaList ( (char *)sreq    ,
 				sreq->getRecSize() ,
-				coll            ,
+				cr->m_coll            ,
 				st1             , // state
 				addedStuff      ,
 				MAX_NICENESS    ,
@ -283,7 +163,7 @@ void addedStuff ( void *state ) {

 bool sendReply ( void *state , bool addUrlEnabled ) {
 	// allow others to add now
-	s_inprogress = false;
+	//s_inprogress = false;
 	// get the state properly
 	State1 *st1 = (State1 *) state;
 	// in order to see what sites are being added log it, then we can
@ -292,7 +172,6 @@ bool sendReply ( void *state , bool addUrlEnabled ) {
 	log(LOG_INFO,"http: add url %s (%s)",st1->m_url ,mstrerror(g_errno));
 	// extract info from state
 	TcpSocket *s       = st1->m_socket;
-	bool       isAdmin = st1->m_isAdmin;
 	char      *url     = NULL;
 	if ( st1->m_urlLen ) url = st1->m_url;
 	// re-null it out if just http://
@ -315,255 +194,49 @@ bool sendReply ( void *state , bool addUrlEnabled ) {

 	char tt [ 128 ];
 	tt[0] = '\0';
-	if ( st1->m_coll[0] != '\0' && ! isAdmin ) 
-		sprintf ( tt , " for %s", st1->m_coll );
-	// the bg colors and style
-	g_pages.printColors (&sb);
-	sb.safePrintf ( "<title>Gigablast Add a Url</title>"
-			"<table><tr><td valign=bottom><a href=/>"
-		      //"<img width=200 length=25 border=0 src=/logo2.gif></a>"
-			"<img width=210 height=25 border=0 src=/logo2.gif></a>"
-			"&nbsp;&nbsp;</font></td><td><font size=+1>"
-			"<b>Add Url%s</td></tr></table>" , tt );
+
+	g_pages.printAdminTop ( &sb , st1->m_socket , &st1->m_hr );
+
 	// watch out for NULLs
 	if ( ! url ) url = "http://";
-	// blank out url if adding a url of a file of urls
-	//	if ( st1->m_ufu ) url = "http://";
+
 	// if there was an error let them know
 	char msg[MAX_URL_LEN + 1024];
 	char *pm = "";
 	if ( g_errno ) {
-		if ( g_errno == ETOOEARLY ) {
-			pm = "Error. 100 urls have "
-			"already been submitted by "
-			"this IP address for the last 24 hours. "
-			"<a href=/addurlerror.html>Explanation</a>.";
-			log("addurls: Failed for user at %s: "
-			    "quota breeched.", iptoa(s->m_ip));
-
-			//rb.safePrintf("Error. %li urls have "
-			//	      "already been submitted by "
-			//	      "this IP address for the "
-			//	      "last 24 hours. ",
-			//	      cr->m_maxAddUrlsPerIpDomPerDay);
-		}
-		else {
-			sprintf ( msg ,"Error adding url(s): <b>%s[%i]</b>", 
-				  mstrerror(g_errno) , g_errno);
-			pm = msg;
-			//rb.safePrintf("Error adding url(s): %s[%i]", 
-			//	      mstrerror(g_errno) , g_errno);
-		}
+		sprintf ( msg ,"Error adding url(s): <b>%s[%i]</b>", 
+			  mstrerror(g_errno) , g_errno);
+		pm = msg;
+		//rb.safePrintf("Error adding url(s): %s[%i]", 
+		//	      mstrerror(g_errno) , g_errno);
 	}
-	else {
-		if      ( ! addUrlEnabled ) {//g_conf.m_addUrlEnabled ) 
-			pm = "<font color=#ff0000>"
-				"Sorry, this feature is temporarily disabled. "
-				"Please try again later.</font>";
-			if ( st1->m_urlLen ) 
-				log("addurls: failed for user at %s: "
-				    "add url is disabled. "
-				    "Enable add url on the "
-				    "Master Controls page and "
-				    "on the Spider Controls page for "
-				    "this collection.", 
-				    iptoa(s->m_ip));
-
-			//rb.safePrintf("Sorry, this feature is temporarily "
-			//	      "disabled. Please try again later.");
-		}
-		else if ( s_inprogress ) {
-			pm = "Add url busy. Try again later.";
-			log("addurls: Failed for user at %s: "
-			    "busy adding another.", iptoa(s->m_ip));
-			//rb.safePrintf("Add url busy. Try again later.");
-
-		}
-		// did they fail the turing test?
-		else if ( ! st1->m_goodAnswer ) {
-			pm = "<font color=#ff0000>"
-				"Oops, you did not enter the 4 large letters "
-				"you see below. Please try again.</font>";
-			//rb.safePrintf("could not add the url"
-			//	      " because the turing test"
-			//	      " is enabled.");
-
-		}
-		if ( url && ! st1->m_ufu[0] && url[0] && printUrl ) {
-				sprintf ( msg ,"<u>%s</u> added to spider "
-					  "queue "
-					  "successfully", url );
-				//rb.safePrintf("%s added to spider "
-				//	      "queue successfully", url );
-		}
-		else if ( st1->m_ufu[0] ) {
-			sprintf ( msg ,"urls in <u>%s</u> "
-				  "added to spider queue "
-				  "successfully", st1->m_ufu );
-
-			//rb.safePrintf("urls in %s added to spider "
-			//	      "queue successfully", url );
-
-		}
-		else {
-			sprintf(msg,"Add the url you want:");
-			//rb.safePrintf("Add the url you want:");
-		}
-		
+	else if ( url && printUrl && url[0] ) {
+		sprintf ( msg ,"<b><u>%s</u></b> added to spider "
+			  "queue "
+			  "successfully<br><br>", url );
+		//rb.safePrintf("%s added to spider "
+		//	      "queue successfully", url );
 		pm = msg;
 		url = "http://";
 		//else
 		//	pm = "Don't forget to <a href=/gigaboost.html>"
 		//		"Gigaboost</a> your URL.";
 	}
-
-	// TODO: show them a list of the urls they added
-	// print the addUrl page in here with a status msg
-	sb.safePrintf (
-		  "<br><br><br><center>"
-		  "<b>%s</b>" // the url msg
-		  "<br><br>"
-		  "<FORM method=get action=/addurl>" 
-		  "<input type=text name=u value=\"%s\" size=50> "
-		  "<input type=submit value=\"add url\" border=0><br>",pm,url);
-	// if we're coming from local ip print the collection box
-	if ( isAdmin ) 
-		sb.safePrintf (
-			  "\n"
-
-			  "<br><b>or specify the url of a "
-			  "file of urls to add:</b>"
-			  "<br>\n"
-			  "<input type=text name=ufu size=50> "
-			  "<input type=submit value=\"add file\" border=0><br>"
-			  "<br>"
-
-			  //"<br><b>or a query to scrape from major engines:</b>"
-			  //"<br>\n"
-			  // qts = query to scrape
-			  //"<input type=text name=qts size=49> "
-			  //"<input type=submit value=\"add query\" border=0><br>"
-			  //"<br>"
-
-			  "<br><b>collection to add to:</b> "
-			  "<input type=text name=c size=20 value=\"%s\">"
-			  "<br><br>\n",
-			  st1->m_coll );
-	// otherwise hide it
-	else 
-		sb.safePrintf ( "<input type=hidden name=c value=\"%s\">" ,
-			  st1->m_coll );
-
 	
-	char *ss = "";
-	if ( st1->m_strip ) ss =" checked";
-	sb.safePrintf ("<br>"
-		       "<input type=checkbox name=strip value=1%s> "
-		       "strip sessionids<br>", ss );
-	
-	sb.safePrintf("<br>\n");

- 	//Adding spider links box
- 	char *sl = "";
- 	if ( st1->m_spiderLinks ) sl =" checked";
- 	sb.safePrintf ("<input type=checkbox name=spiderLinks value=1%s> "
- 		       "spider (harvest) links from page<br><br>\n", sl );
+	g_parms.printParmTable ( &sb , st1->m_socket , &st1->m_hr );

-	if ( ! s_inprogress && addUrlEnabled && st1->m_doTuringTest ) {
-		g_turingTest.printTest(&sb);
-	}
-
-	// . print the url box, etc...
-	// . assume user is always forcing their url
-	// sprintf ( p ,
-	//	  "<br><br>"
-	//	  "<input type=checkbox name=force value=1 checked> "
-	//        "force respider<br>" );
-	//p += gbstrlen ( p );
-	/*
-	sprintf ( p , 
-		  "<br>"
-		  "<a href=/?redir="
-		  "http://www.searchengineguide.com/submit/gigablast.html>"
-		  "<b>Search Engine Marketing News</b></a><br>"
-		  "If you would like to stay up to date with the "
-		  "latest articles on using search engines to market "
-		  "your web site, we recommend subscribing to the "
-		  "Search Engine Marketing weekly newsletter. Once a "
-		  "week, a digest of articles from the top search "
-		  "engine marketing experts is delivered straight to "
-		  "your inbox for free.<br><br>");
-	p += gbstrlen(p);
-	*/
 	// print the final tail
-	g_pages.printTail ( &sb, st1->m_isAdmin ); // local?
+	g_pages.printTail ( &sb, true ); // admin?
 	// clear g_errno, if any, so our reply send goes through
 	g_errno = 0;
-	//bool raw = st1->m_raw;
-	// free the buffer
-	//if ( st1->m_ubufAlloc )
-	//	mfree ( st1->m_ubufAlloc , st1->m_ubufAllocSize,"pau");
-	//if ( st1->m_metaList )
-	//	mfree ( st1->m_metaList , st1->m_metaListAllocSize,"pau");
+
 	// nuke state
 	mdelete ( st1 , sizeof(State1) , "PageAddUrl" );
 	delete (st1);
-	// . send this page
-	// . encapsulates in html header and tail
-	// . make a Mime
-	// . i thought we need -2 for cacheTime, but i guess not
-	//rb.safePrintf("</status>\n");
-	//if(raw)	return g_httpServer.sendDynamicPage (s, 
-	//					     rb.getBufStart(), 
-	//					     rb.length(),
-	//					     -1/*cachetime*/,
-	//					     false, // POSTREply? 
-	//					     "text/xml"// content type
-	//					     );

-	return g_httpServer.sendDynamicPage (s, sb.getBufStart(), 
+	return g_httpServer.sendDynamicPage (s, 
+					     sb.getBufStart(), 
 					     sb.length(),
-					     -1/*cachetime*/);
+					     -1 ); // cachetime
 }
-
-
-// we get like 100k submissions a day!!!
-static HashTable s_htable;
-static bool      s_init = false;
-static long      s_lastTime = 0;
-bool canSubmit ( unsigned long h , long now , long maxAddUrlsPerIpDomPerDay ) {
-	// . sometimes no limit
-	// . 0 means no limit because if they don't want any submission they
-	//   can just turn off add url and we want to avoid excess 
-	//   troubleshooting for why a url can't be added
-	if ( maxAddUrlsPerIpDomPerDay <= 0 ) return true;
-	// init the table
-	if ( ! s_init ) {
-		s_htable.set ( 50000 );
-		s_init = true;
-	}
-	// clean out table every 24 hours
-	if ( now - s_lastTime > 24*60*60 ) {
-		s_lastTime = now;
-		s_htable.clear();
-	}
-	// . if table almost full clean out ALL slots
-	// . TODO: just clean out oldest slots
-	if ( s_htable.getNumSlotsUsed() > 47000 ) s_htable.clear ();
-	// . how many times has this IP domain submitted?
-	// . allow 10 times per day
-	long n = s_htable.getValue ( h );
-	// if over 24hr limit then bail
-	if ( n >= maxAddUrlsPerIpDomPerDay ) return false;
-	// otherwise, inc it
-	n++;
-	// add to table, will replace old values
-	s_htable.addKey ( h , n );
-	return true;
-}
-
-
-void resetPageAddUrl ( ) {
-	s_htable.reset();
-}
-
--- a/PageBasic.cpp
+++ b/PageBasic.cpp
@ -0,0 +1,876 @@
+#include "SafeBuf.h"
+#include "HttpRequest.h"
+#include "SearchInput.h"
+#include "Pages.h"
+#include "Parms.h"
+#include "Spider.h"
+
+//bool printSitePatternExamples ( SafeBuf *sb , HttpRequest *hr ) ;
+
+///////////
+//
+// main > Basic > Settings
+//
+///////////
+/*
+bool sendPageBasicSettings ( TcpSocket *socket , HttpRequest *hr ) {
+
+	char  buf [ 128000 ];
+	SafeBuf sb(buf,128000);
+
+	// true = usedefault coll?
+	CollectionRec *cr = g_collectiondb.getRec ( hr , true );
+	if ( ! cr ) {
+		g_httpServer.sendErrorReply(socket,500,"invalid collection");
+		return true;
+	}
+
+	// process any incoming request
+	handleSettingsRequest ( socket , hr );
+
+	// . print standard header 
+	// . this prints the <form tag as well
+	g_pages.printAdminTop ( &sb , socket , hr );
+
+
+	g_parms.printParms ( &sb , socket , hr );
+
+
+	printSitePatternExamples ( &sb , hr );
+
+	// wrap up the form, print a submit button
+	g_pages.printAdminBottom ( &sb );
+
+
+	return g_httpServer.sendDynamicPage ( socket,
+					      sb.getBufStart() ,
+					      sb.length()      , 
+					      -1               ,
+					      false,//POSTReply        ,
+					      NULL             , // contType
+					      -1               , // httpstatus
+					      NULL,//cookie           ,
+					      NULL             );// charset
+}
+*/
+
+class PatternData {
+public:
+	// hash of the subdomain or domain for this line in sitelist
+	long m_thingHash32;
+	// ptr to the line in CollectionRec::m_siteListBuf
+	char *m_patternStr;
+	// offset of the url path in the pattern, 0 means none
+	short m_pathOff; 
+	short m_pathLen;
+};
+
+
+// . Collectiondb.cpp calls this when any parm flagged with 
+//   PF_REBUILDURLFILTERS is updated
+// . this returns false if it blocks
+// . returns true and sets g_errno on error
+// . uses msg4 to add seeds to spiderdb if necessary
+// . only adds seeds for the shard we are on iff we are responsible for
+//   the fake firstip!!!
+bool updateSiteListTables ( collnum_t collnum , 
+			    bool addSeeds ,
+			    char *siteListArg ) {
+
+	CollectionRec *cr = g_collectiondb.getRec ( collnum );
+	if ( ! cr ) return true;
+
+	// this might make a new spidercoll...
+	SpiderColl *sc = g_spiderCache.getSpiderColl ( cr->m_collnum );
+
+	// sanity. if in use we should not even be here
+	if ( sc->m_msg4x.m_inUse ) { 
+		log("basic: trying to update site list while previous "
+		    "update still outstanding.");
+		g_errno = EBADENGINEER;
+		return true;
+	}
+
+	// when sitelist is update Parms.cpp should invalidate this flag!
+	//if ( sc->m_siteListTableValid ) return true;
+
+	// hash current sitelist entries, each line so we don't add
+	// dup requests into spiderdb i guess...
+	HashTableX dedup;
+	if ( ! dedup.set ( 4,0,1024,NULL,0,false,0,"sldt") ) return true;
+	// this is a safebuf PARM in Parms.cpp now HOWEVER, not really
+	// because we set it here from a call to CommandUpdateSiteList()
+	// because it requires all this computational crap.
+	char *op = cr->m_siteListBuf.getBufStart();
+	// scan and hash each line in it
+	for ( ; *op ; op++ ) {
+		// get end
+		char *s = op;
+		// skip to end of line marker
+		for ( ; *op && *op != '\n' ; op++ ) ;
+		// keep it simple
+		long h32 = hash32 ( s , op - s );
+		// for deduping
+		if ( ! dedup.addKey ( &h32 ) ) return true;
+	}
+
+	// get the old sitelist Domain Hash to PatternData mapping table
+	// which tells us what domains, subdomains or paths we can or
+	// can not spider...
+	HashTableX *dt = &sc->m_siteListDomTable;
+
+	// reset it
+	if ( ! dt->set ( 4 , 
+			 sizeof(PatternData),
+			 1024 ,
+			 NULL , 
+			 0 ,
+			 true , // allow dup keys?
+			 0 , // niceness - at least for now
+			 "sldt" ) )
+		return true;
+
+
+	// clear old shit
+	sc->m_posSubstringBuf.purge();
+	sc->m_negSubstringBuf.purge();
+
+	// we can now free the old site list methinks
+	//cr->m_siteListBuf.purge();
+
+	// reset flags
+	//sc->m_siteListAsteriskLine = NULL;
+	sc->m_siteListHasNegatives = false;
+	sc->m_siteListIsEmpty = true;
+
+	// use this so it will be free automatically when msg4 completes!
+	SafeBuf *spiderReqBuf = &sc->m_msg4x.m_tmpBuf;
+
+	//char *siteList = cr->m_siteListBuf.getBufStart();
+
+	// scan the list
+	char *pn = siteListArg;
+
+	// completely empty?
+	if ( ! pn ) return true;
+
+	long lineNum = 1;
+
+	long added = 0;
+
+	Url u;
+
+	for ( ; *pn ; lineNum++ ) {
+
+		// get end
+		char *s = pn;
+		// skip to end of line marker
+		for ( ; *pn && *pn != '\n' ; pn++ ) ;
+
+		char *start = s;
+
+		// back p up over spaces in case ended in spaces
+	        char *pe = pn;
+		for ( ; pe > s && is_wspace_a(pe[-1]) ; pe-- );
+
+		// advance over '\n' for next line
+		if ( *pn && *pn == '\n' ) pn++;
+
+		// make hash of the line
+		long h32 = hash32 ( s , pe - s );
+
+		bool seedMe = true;
+		bool isUrl = true;
+		bool isNeg = false;
+		bool isFilter = true;
+
+	innerLoop:
+		// skip spaces at start of line
+		if ( *s == ' ' ) s++;
+
+		// comment?
+		if ( *s == '#' ) continue;
+
+		// empty line?
+		if ( *s == '\n' ) continue;
+
+		// all?
+		//if ( *s == '*' ) {
+		//	sc->m_siteListAsteriskLine = start;
+		//	continue;
+		//}
+
+		if ( *s == '-' ) {
+			sc->m_siteListHasNegatives = true;
+			isNeg = true;
+			s++;
+		}
+
+		// exact:?
+		//if ( strncmp(s,"exact:",6) == 0 ) {
+		//	s += 6;
+		//	goto innerLoop;
+		//}
+
+		// these will be manual adds and should pass url filters
+		// because they have the "ismanual" directive override
+		if ( strncmp(s,"seed:",5) == 0 ) {
+			s += 5;
+			isFilter = false;
+			goto innerLoop;
+		}
+
+		if ( strncmp(s,"site:",5) == 0 ) {
+			s += 5;
+			seedMe = false;
+			goto innerLoop;
+		}
+
+		if ( strncmp(s,"contains:",9) == 0 ) {
+			s += 9;
+			seedMe = false;
+			isUrl = false;
+			goto innerLoop;
+		}
+
+		long slen = pe - s;
+
+		// empty line?
+		if ( slen <= 0 ) 
+			continue;
+
+		if ( ! isUrl ) {
+			// add to string buffers
+			if (   isNeg ) {
+				if ( !sc->m_negSubstringBuf.safeMemcpy(s,slen))
+					return true;
+				if ( !sc->m_negSubstringBuf.pushChar('\0') )
+					return true;
+				continue;
+			}
+			// add to string buffers
+			if ( ! sc->m_posSubstringBuf.safeMemcpy(s,slen) )
+				return true;
+			if ( ! sc->m_posSubstringBuf.pushChar('\0') )
+				return true;
+			continue;
+		}
+
+
+		u.set ( s , slen );
+
+		// error? skip it then...
+		if ( u.getHostLen() <= 0 ) {
+			log("basic: error on line #%li in sitelist",lineNum);
+			continue;
+		}
+
+		// is fake ip assigned to us?
+		long firstIp = getFakeIpForUrl2 ( &u );
+
+		if ( ! isAssignedToUs( firstIp ) ) continue;
+
+		// see if in existing table for existing site list
+		if ( addSeeds &&
+		     // a "site:" directive mean no seeding
+		     // a "contains:" directive mean no seeding
+		     seedMe &&
+		     ! dedup.isInTable ( &h32 ) ) {
+			// make spider request
+			SpiderRequest sreq;
+			sreq.setFromAddUrl ( u.getUrl() );
+			if ( 
+			    // . add this url to spiderdb as a spiderrequest
+			     // . calling msg4 will be the last thing we do
+			    !spiderReqBuf->safeMemcpy(&sreq,sreq.getRecSize()))
+				return true;
+			// count it
+			added++;
+
+		}
+
+		// if it is a "seed: xyz.com" thing it is seed only
+		// do not use it for a filter rule
+		if ( ! isFilter ) continue;
+		
+		
+		// make the data node used for filtering urls during spidering
+		PatternData pd;
+		// hash of the subdomain or domain for this line in sitelist
+		pd.m_thingHash32 = u.getHostHash32();
+		// . ptr to the line in CollectionRec::m_siteListBuf. 
+		// . includes pointing to "exact:" too i guess and tag: later.
+		pd.m_patternStr = start;
+		// offset of the url path in the pattern, 0 means none
+		pd.m_pathOff = 0;
+		// scan url pattern, it should start at "s"
+		char *x = s;
+		// go all the way to the end
+		for ( ; *x && x < pe ; x++ ) {
+			// skip ://
+			if ( x[0] == ':' && x[1] =='/' && x[2] == '/' ) {
+				x += 2;
+				continue;
+			}
+			// stop if we hit another /, that is path start
+			if ( x[0] != '/' ) continue;
+			x++;
+			// empty path besides the /?
+			if (  x >= pe   ) break;
+			// ok, we got something here i think
+			if ( u.getPathLen() <= 1 ) { char *xx=NULL;*xx=0; }
+			// calc length from "start" of line so we can
+			// jump to the path quickly for compares. inc "/"
+			pd.m_pathOff = (x-1) - start;
+			pd.m_pathLen = pe - (x-1);
+			break;
+		}
+
+		// add to new dt
+		long domHash32 = u.getDomainHash32();
+		if ( ! dt->addKey ( &domHash32 , &pd ) )
+			return true;
+
+		// we have some patterns in there
+		sc->m_siteListIsEmpty = false;
+	}
+
+	// go back to a high niceness
+	dt->m_niceness = MAX_NICENESS;
+
+	//long siteListLen = gbstrlen(siteList);
+	//cr->m_siteListBuf.safeMemcpy ( siteList , siteListLen + 1 );
+
+	if ( ! addSeeds ) return true;
+
+	log("spider: adding %li seed urls",added);
+
+	// use spidercoll to contain this msg4 but if in use it
+	// won't be able to be deleted until it comes back..
+	if ( ! sc->m_msg4x.addMetaList ( spiderReqBuf ,
+					 sc->m_collnum ,
+					 // no need for callback since m_msg4x
+					 // should set msg4::m_inUse to false
+					 // when it comes back
+					 NULL , // state
+					 NULL , // callback 
+					 MAX_NICENESS ,
+					 RDB_SPIDERDB
+					 ) )
+		return false;
+
+	return true;
+}
+
+// . Spider.cpp calls this to see if a url it wants to spider is
+//   in our "site list"
+// . we should return the row of the FIRST match really
+// . the url patterns all contain a domain now, so this can use the domain
+//   hash to speed things up
+// . return ptr to the start of the line in case it has "tag:" i guess
+char *getMatchingUrlPattern ( SpiderColl *sc , SpiderRequest *sreq ) {
+
+	// if it has * and no negatives, we are in!
+	//if ( sc->m_siteListAsteriskLine && ! sc->m_siteListHasNegatives )
+	//	return sc->m_siteListAsteriskLine;
+
+	// if it is just a bunch of comments or blank lines, it is empty
+	if ( sc->m_siteListIsEmpty )
+		return NULL;
+
+	// if we had a list of contains: or regex: directives in the sitelist
+	// we have to linear scan those
+	char *nb = sc->m_negSubstringBuf.getBufStart();
+	char *nbend = nb + sc->m_negSubstringBuf.getLength();
+	for ( ; nb && nb < nbend ; ) {
+		// return NULL if matches a negative substring
+		if ( strstr ( sreq->m_url , nb ) ) return NULL;
+		// skip it
+		nb += strlen(nb) + 1;
+	}
+
+
+	char *myPath = NULL;
+
+	// check domain specific tables
+	HashTableX *dt = &sc->m_siteListDomTable;
+
+	// get this
+	CollectionRec *cr = sc->m_cr;
+
+	// need to build dom table for pattern matching?
+	if ( dt->getNumSlotsUsed() == 0 && cr ) {
+		// do not add seeds, just make siteListDomTable, etc.
+		updateSiteListTables ( sc->m_collnum , 
+				       false , // add seeds?
+				       cr->m_siteListBuf.getBufStart() );
+	}
+
+	if ( dt->getNumSlotsUsed() == 0 ) { 
+		// empty site list -- no matches
+		return NULL;
+		//char *xx=NULL;*xx=0; }
+	}
+
+	// this table maps a 32-bit domain hash of a domain to a
+	// patternData class. only for those urls that have firstIps that
+	// we handle.
+	long slot = dt->getSlot ( &sreq->m_domHash32 );
+
+	// loop over all the patterns that contain this domain and see
+	// the first one we match, and if we match a negative one.
+	for ( ; slot >= 0 ; slot = dt->getNextSlot(slot,&sreq->m_domHash32)) {
+		// get pattern
+		PatternData *pd = (PatternData *)dt->getValueFromSlot ( slot );
+		// is it negative? return NULL if so so url will be ignored
+		//if ( pd->m_patternStr[0] == '-' ) 
+		//	return NULL;
+		// otherwise, it has a path. skip if we don't match path ptrn
+		if ( pd->m_pathOff ) {
+			if ( ! myPath ) myPath = sreq->getUrlPath();
+			if ( strncmp (myPath,
+				      pd->m_patternStr + pd->m_pathOff,
+				      pd->m_pathLen ) )
+				continue;
+		}
+		// was the line just a domain and not a subdomain?
+		if ( pd->m_thingHash32 == sreq->m_domHash32 )
+			// this will be false if negative pattern i guess
+			return pd->m_patternStr;
+		// was it just a subdomain?
+		if ( pd->m_thingHash32 == sreq->m_hostHash32 )
+			// this will be false if negative pattern i guess
+			return pd->m_patternStr;
+	}
+
+
+	// if we had a list of contains: or regex: directives in the sitelist
+	// we have to linear scan those
+	char *pb = sc->m_posSubstringBuf.getBufStart();
+	char *pend = pb + sc->m_posSubstringBuf.length();
+	for ( ; pb && pb < pend ; ) {
+		// return NULL if matches a negative substring
+		if ( strstr ( sreq->m_url , pb ) ) return pb;
+		// skip it
+		pb += strlen(pb) + 1;
+	}
+
+
+	// is there an '*' in the patterns?
+	//if ( sc->m_siteListAsteriskLine ) return sc->m_siteListAsteriskLine;
+
+	return NULL;
+}
+
+bool printSitePatternExamples ( SafeBuf *sb , HttpRequest *hr ) {
+
+	// true = useDefault?
+	CollectionRec *cr = g_collectiondb.getRec ( hr , true );
+	if ( ! cr ) return true;
+
+	/*
+	// it is a safebuf parm
+	char *siteList = cr->m_siteListBuf.getBufStart();
+	if ( ! siteList ) siteList = "";
+
+	SafeBuf msgBuf;
+	char *status = "";
+	long max = 1000000;
+	if ( cr->m_siteListBuf.length() > max ) {
+		msgBuf.safePrintf( "<font color=red><b>"
+				   "Site list is over %li bytes large, "
+				   "too many to "
+				   "display on this web page. Please use the "
+				   "file upload feature only for now."
+				   "</b></font>"
+				   , max );
+		status = " disabled";
+	}
+	*/
+
+
+	/*
+	sb->safePrintf(
+		       "On the command like you can issue a command like "
+
+		       "<i>"
+		       "gb addurls &lt; fileofurls.txt"
+		       "</i> or "
+
+		       "<i>"
+		       "gb addfile &lt; *.html"
+		       "</i> or "
+
+		       "<i>"
+		       "gb injecturls &lt; fileofurls.txt"
+		       "</i> or "
+
+		       "<i>"
+		       "gb injectfile &lt; *.html"
+		       "</i> or "
+
+		       "to schedule downloads or inject content directly "
+		       "into Gigablast."
+
+		       "</td><td>"
+
+		       "<input "
+		       "size=20 "
+		       "type=file "
+		       "name=urls>"
+		       "</td></tr>"
+
+		       );
+	*/	      
+
+	// example table
+	sb->safePrintf ( "<a name=examples></a>"
+			 "<table %s>"
+			 "<tr class=hdrow><td colspan=2>"
+			 "<center><b>Site List Examples</b></tr></tr>"
+			 //"<tr bgcolor=#%s>"
+			 //"<td>"
+			 ,TABLE_STYLE );//, DARK_BLUE);
+			 
+
+	sb->safePrintf(
+		       //"*"
+		       //"</td>"
+		       //"<td>Spider all urls encountered. If you just submit "
+		       //"this by itself, then Gigablast will initiate spidering "
+		       //"automatically at dmoz.org, an internet "
+		      //"directory of good sites.</td>"
+		       //"</tr>"
+
+		      "<tr>"
+		      "<td>goodstuff.com</td>"
+		      "<td>"
+		      "Spider the url <i>goodstuff.com/</i> and spider "
+		      "any links we harvest that have the domain "
+		      "<i>goodstuff.com</i>"
+		      "</td>"
+		      "</tr>"
+
+		      // protocol and subdomain match
+		      "<tr>"
+		      "<td>http://www.goodstuff.com/</td>"
+		      "<td>"
+		      "Spider the url "
+		      "<i>http://www.goodstuff.com/</i> and spider "
+		      "any links we harvest that start with "
+		      "<i>http://www.goodstuff.com/</i>"
+		      "</td>"
+		      "</tr>"
+
+		      "<tr>"
+		      "<td>seed:www.goodstuff.com/myurl.html</td>"
+		      "<td>"
+		      "Spider the url <i>www.goodstuff.com/myurl.html</i>. "
+		      "Add any outlinks we find into the "
+		      "spider queue, but those outlinks will only be "
+		      "spidered if they "
+		      "match ANOTHER line in this site list."
+		      "</td>"
+		      "</tr>"
+
+
+		      // protocol and subdomain match
+		      "<tr>"
+		      "<td>site:http://www.goodstuff.com/</td>"
+		      "<td>"
+		      "Allow any urls starting with "
+		      "<i>http://www.goodstuff.com/</i> to be spidered "
+		      "if encountered."
+		      "</td>"
+		      "</tr>"
+
+		      // subdomain match
+		      "<tr>"
+		      "<td>site:www.goodstuff.com</td>"
+		      "<td>"
+		      "Allow any urls starting with "
+		      "<i>www.goodstuff.com/</i> to be spidered "
+		      "if encountered."
+		      "</td>"
+		      "</tr>"
+
+		      "<tr>"
+		      "<td>-site:bad.goodstuff.com</td>"
+		      "<td>"
+		      "Do not spider any urls starting with "
+		      "<i>bad.goodstuff.com/</i> to be spidered "
+		      "if encountered."
+		      "</td>"
+		      "</tr>"
+
+		      // domain match
+		      "<tr>"
+		      "<td>site:goodstuff.com</td>"
+		      "<td>"
+		      "Allow any urls starting with "
+		      "<i>goodstuff.com/</i> to be spidered "
+		      "if encountered."
+		      "</td>"
+		      "</tr>"
+
+		      // spider this subdir
+		      "<tr>"
+		      "<td><nobr>site:"
+		      "http://www.goodstuff.com/goodir/anotherdir/</nobr></td>"
+		      "<td>"
+		      "Allow any urls starting with "
+		      "<i>http://www.goodstuff.com/goodir/anotherdir/</i> "
+		      "to be spidered "
+		      "if encountered."
+		      "</td>"
+		      "</tr>"
+
+
+		      // exact match
+		      
+		      //"<tr>"
+		      //"<td>exact:http://xyz.goodstuff.com/myurl.html</td>"
+		      //"<td>"
+		      //"Allow this specific url."
+		      //"</td>"
+		      //"</tr>"
+
+		      /*
+		      // local subdir match
+		      "<tr>"
+		      "<td>file://C/mydir/mysubdir/"
+		      "<td>"
+		      "Spider all files in the given subdirectory or lower. "
+		      "</td>"
+		      "</tr>"
+
+		      "<tr>"
+		      "<td>-file://C/mydir/mysubdir/baddir/"
+		      "<td>"
+		      "Do not spider files in this subdirectory."
+		      "</td>"
+		      "</tr>"
+		      */
+
+		      // connect to a device and index it as a stream
+		      //"<tr>"
+		      //"<td>stream:/dev/eth0"
+		      //"<td>"
+		      //"Connect to a device and index it as a stream. "
+		      //"It will be treated like a single huge document for "
+		      //"searching purposes with chunks being indexed in "
+		      //"realtime. Or chunk it up into individual document "
+		      //"chunks, but proximity term searching will have to "
+		      //"be adjusted to compute query term distances "
+		      //"inter-document."
+		      //"</td>"
+		      //"</tr>"
+
+		      // negative subdomain match
+		      "<tr>"
+		      "<td>contains:goodtuff</td>"
+		      "<td>Spider any url containing <i>goodstuff</i>."
+		      "</td>"
+		      "</tr>"
+
+		      "<tr>"
+		      "<td>-contains:badstuff</td>"
+		      "<td>Do not spider any url containing <i>badstuff</i>."
+		      "</td>"
+		      "</tr>"
+
+		      /*
+		      "<tr>"
+		      "<td>regexp:-pid=[0-9A-Z]+/</td>"
+		      "<td>Url must match this regular expression. "
+		      "Try to avoid using these if possible; they can slow "
+		      "things down and are confusing to use."
+		      "</td>"
+		      "</tr>"
+		      
+
+		      // tag match
+		      "<tr><td>"
+		      //"<td>tag:boots contains:boots<br>"
+		      "<nobr>tag:boots site:www.westernfootwear."
+		      "</nobr>com<br>"
+		      "tag:boots site:www.cowboyshop.com<br>"
+		      "tag:boots site:www.moreboots.com<br>"
+		      "<nobr>tag:boots site:www.lotsoffootwear.com"
+		      "</nobr><br>"
+		      //"<td>t:boots -contains:www.cowboyshop.com/shoes/</td>"
+		      "</td><td>"
+		      "Advance users only. "
+		      "Tag any urls matching these 4 url patterns "
+		      "so we can use "
+		      "the expression <i>tag:boots</i> in the "
+		      "<a href=/scheduler>spider scheduler</a> and perhaps "
+		      "give such urls higher spider priority."
+		      "For more "
+		      "precise spidering control over url subsets. "
+		      "Preceed any pattern with the tagname followed by "
+		      "space to tag it."
+		      "</td>"
+		      "</tr>"
+		      */
+
+		      "<tr>"
+		      "<td># This line is a comment.</td>"
+		      "<td>Empty lines and lines starting with # are "
+		      "ignored."
+		      "</td>"
+		      "</tr>"
+
+		      "</table>"
+		      );
+
+	return true;
+}
+
+
+// from pagecrawlbot.cpp for printCrawlDetailsInJson()
+#include "PageCrawlBot.h"
+
+///////////
+//
+// main > Basic > Status
+//
+///////////
+bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
+
+	char  buf [ 128000 ];
+	SafeBuf sb(buf,128000);
+	sb.reset();
+
+	char *fs = hr->getString("format",NULL,NULL);
+	char fmt = FORMAT_HTML;
+	if ( fs && strcmp(fs,"html") == 0 ) fmt = FORMAT_HTML;
+	if ( fs && strcmp(fs,"json") == 0 ) fmt = FORMAT_JSON;
+	if ( fs && strcmp(fs,"xml") == 0 ) fmt = FORMAT_XML;
+
+
+	// true = usedefault coll?
+	CollectionRec *cr = g_collectiondb.getRec ( hr , true );
+	if ( ! cr ) {
+		g_httpServer.sendErrorReply(socket,500,"invalid collection");
+		return true;
+	}
+
+	if ( fmt == FMT_JSON ) {
+		printCrawlDetailsInJson ( &sb , cr );
+		return g_httpServer.sendDynamicPage (socket, 
+						     sb.getBufStart(), 
+						     sb.length(),
+						     0); // cachetime
+	}
+
+
+	// print standard header 
+	if ( fmt == FORMAT_HTML )
+		// this prints the <form tag as well
+		g_pages.printAdminTop ( &sb , socket , hr );
+
+
+	//
+	// show stats
+	//
+	if ( fmt == FORMAT_HTML ) {
+
+		char *seedStr = cr->m_diffbotSeeds.getBufStart();
+		if ( ! seedStr ) seedStr = "";
+
+		SafeBuf tmp;
+		long crawlStatus = -1;
+		getSpiderStatusMsg ( cr , &tmp , &crawlStatus );
+		CrawlInfo *ci = &cr->m_localCrawlInfo;
+		long sentAlert = (long)ci->m_sentCrawlDoneAlert;
+		if ( sentAlert ) sentAlert = 1;
+
+		//sb.safePrintf(
+		//	      "<form method=get action=/crawlbot>"
+		//	      "%s"
+		//	      , sb.getBufStart() // hidden input token/name/..
+		//	      );
+
+		char *hurts = "No";
+		if ( cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider )
+			hurts = "Yes";
+
+		sb.safePrintf("<TABLE border=0>"
+			      "<TR><TD valign=top>"
+
+			      "<table border=0 cellpadding=5>"
+
+			      "<tr>"
+			      "<td><b>Crawl Status Code:</td>"
+			      "<td>%li</td>"
+			      "</tr>"
+
+			      "<tr>"
+			      "<td><b>Crawl Status Msg:</td>"
+			      "<td>%s</td>"
+			      "</tr>"
+
+			      //"<tr>"
+			      //"<td><b>Rounds Completed:</td>"
+			      //"<td>%li</td>"
+			      //"</tr>"
+
+			      "<tr>"
+			      "<td><b>Has Urls Ready to Spider:</td>"
+			      "<td>%s</td>"
+			      "</tr>"
+
+
+			      // this will  have to be in crawlinfo too!
+			      //"<tr>"
+			      //"<td><b>pages indexed</b>"
+			      //"<td>%lli</td>"
+			      //"</tr>"
+
+			      "<tr>"
+			      "<td><b>URLs Harvested</b> "
+			      "(may include dups)</td>"
+			      "<td>%lli</td>"
+     
+			      "</tr>"
+
+			      //"<tr>"
+			      //"<td><b>URLs Examined</b></td>"
+			      //"<td>%lli</td>"
+			      //"</tr>"
+
+			      "<tr>"
+			      "<td><b>Page Crawl Attempts</b></td>"
+			      "<td>%lli</td>"
+			      "</tr>"
+
+			      "<tr>"
+			      "<td><b>Page Crawl Successes</b></td>"
+			      "<td>%lli</td>"
+			      "</tr>"
+			      , crawlStatus
+			      , tmp.getBufStart()
+			      //, cr->m_spiderRoundNum
+			      //, cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider
+			      , hurts
+
+			      , cr->m_globalCrawlInfo.m_urlsHarvested
+			      //, cr->m_globalCrawlInfo.m_urlsConsidered
+
+			      , cr->m_globalCrawlInfo.m_pageDownloadAttempts
+			      , cr->m_globalCrawlInfo.m_pageDownloadSuccesses
+			      );
+
+	}
+
+	//if ( fmt != FORMAT_JSON )
+	//	// wrap up the form, print a submit button
+	//	g_pages.printAdminBottom ( &sb );
+
+	return g_httpServer.sendDynamicPage (socket, 
+					     sb.getBufStart(), 
+					     sb.length(),
+					     0); // cachetime
+}
+	
--- a/PageCatdb.cpp
+++ b/PageCatdb.cpp
@ -28,7 +28,7 @@ public:
 // . sets g_errno on error
 bool sendPageCatdb ( TcpSocket *s , HttpRequest *r ) {
 	// are we the admin?
-	bool isAdmin    = g_collectiondb.isAdmin    ( r , s );
+	//bool isAdmin    = g_collectiondb.hasPermission ( r , s );
 	// get the collection record
 	CollectionRec *cr = g_collectiondb.getRec ( r );
 	if ( ! cr ) {
@ -38,16 +38,18 @@ bool sendPageCatdb ( TcpSocket *s , HttpRequest *r ) {
 		return g_httpServer.sendErrorReply ( s , 500 ,
 						  "collection does not exist");
 	}
+	/*
 	bool isAssassin = cr->isAssassin ( s->m_ip );
 	if ( isAdmin ) isAssassin = true;
 	// bail if permission denied
 	if ( ! isAssassin && ! cr->hasPermission ( r , s ) ) {
 		log("admin: Bad collection name or password. Could not add "
 		    "sites to tagdb. Permission denied.");
-		return sendPageLogin ( s , r , 
+		return sendPagexxxx ( s , r , 
 						    "Collection name or "
 						    "password is incorrect");
 	}
+	*/
 	// get the collection
 	long collLen = 0;
 	char *coll   = r->getString("c", &collLen, NULL);
@ -179,14 +181,14 @@ bool sendReply ( void *state ) {
 	// print the generate Catdb link
 	sb.safePrintf ( "<tr class=poo><td>Update Catdb from DMOZ data.</td>"
 			"<td><center>"
-			"<a href=\"/master/catdb?c=%s&gencatdb=2\">"
+			"<a href=\"/admin/catdb?c=%s&gencatdb=2\">"
 			"Update Catdb</a> "
 			"</center></td></tr>",
 			st->m_coll );
 	sb.safePrintf ( "<tr class=poo>"
 			"<td>Generate New Catdb from DMOZ data.</td>"
 			"<td><center>"
-			"<a href=\"/master/catdb?c=%s&gencatdb=1\">"
+			"<a href=\"/admin/catdb?c=%s&gencatdb=1\">"
 			"Generate Catdb</a> "
 			"</center></td></tr>",
 			st->m_coll );
--- a/PageCrawlBot.cpp
+++ b/PageCrawlBot.cpp
--- a/PageCrawlBot.h
+++ b/PageCrawlBot.h
@ -2,6 +2,8 @@
 #ifndef CRAWLBOT_H
 #define CRAWLBOT_H

+bool printCrawlDetailsInJson ( class SafeBuf *sb , class CollectionRec *cx ) ;
+
 // values for the diffbot dropdown
 /*
 #define DBA_NONE 0
--- a/PageDirectory.cpp
+++ b/PageDirectory.cpp
@ -58,15 +58,15 @@ bool sendPageDirectory ( TcpSocket *s , HttpRequest *r ) {
 	//
 	else {
 		// search box
-		printLogoAndSearchBox(sb,r,catId);
+		printLogoAndSearchBox(&sb,r,catId);
 		// radio buttons for search dmoz. no, this is printed
 		// from call to printLogoAndSearchBox()
 		//printDmozRadioButtons(sb,catId);
 		// the dmoz breadcrumb
-		printDMOZCrumb ( sb,catId,xml);
+		printDMOZCrumb ( &sb,catId,xml);
 		// print the subtopcis in this topic. show as links above
 		// the search results
-		printDMOZSubTopics ( sb, catId , xml );
+		printDMOZSubTopics ( &sb, catId , xml );
 		// ok, for now just print the dmoz topics since our search
 		// results will be empty... until populated!
 		g_categories->printUrlsInTopic ( &sb , catId );
--- a/PageEvents.cpp
+++ b/PageEvents.cpp
@ -7610,8 +7610,8 @@ bool printAdminLinks ( SafeBuf &sb , State7 *st ) {
 		// get the filename directly
 		sb.safePrintf (" &nbsp; "
 			       "<font color=red><b>"
-			       //"<a href=\"/master/tagdb?f=%li&c=%s&u=%s\">"
-			       "<a href=\"/master/tagdb?"
+			       //"<a href=\"/admin/tagdb?f=%li&c=%s&u=%s\">"
+			       "<a href=\"/admin/tagdb?"
 			       //"tagid0=%li&"
 			       "tagtype0=manualban&"
 			       "tagdata0=1&"
@ -7631,7 +7631,7 @@ bool printAdminLinks ( SafeBuf &sb , State7 *st ) {
 		//long bannedTagId = getTagTypeFromStr("manualban",9);
 		sb.safePrintf (" &nbsp; "
 			       "<font color=red><b>"
-			       "<a href=\"/master/tagdb?"
+			       "<a href=\"/admin/tagdb?"
 			       //"tagid0=%li&"
 			       "tagtype0=manualban&"
 			       "tagdata0=1&"
@ -7876,7 +7876,7 @@ void printAdminEventOptions ( SafeBuf* sb,
 	sb->safePrintf("Ban By Domain: ");
 	
 	//long bannedTagId = getTagTypeFromStr("manualban",9);
-	sb->safePrintf("<a href=\"/master/tagdb?"
+	sb->safePrintf("<a href=\"/admin/tagdb?"
 		       "tagtype0=manualban&"
 		       "tagdata0=1&"
 		       "u=%s&c=%s\">"
@ -8561,13 +8561,13 @@ static bool printResult ( CollectionRec *cr,
 		// . if it's local, don't put the hostname/port in
 		//   there cuz it will mess up Global Spec's machine
 		//if ( h->m_groupId == g_hostdb.m_groupId ) 
-		sb.safePrintf(" - <a href=\"/master/titledb?c=%s&"
+		sb.safePrintf(" - <a href=\"/admin/titledb?c=%s&"
 			      "d=%lli",coll,mr->m_docId);
 		// then the [info] link to show the TitleRec
 		sb.safePrintf ( "\">[info]</a>" );
 		
 		// now the analyze link
-		sb.safePrintf (" - <a href=\"/master/parser?c=%s&"
+		sb.safePrintf (" - <a href=\"/admin/parser?c=%s&"
 			       "old=1&hc=%li&u=", 
 			       coll,
 			       (long)mr->m_hopcount);
@ -8629,7 +8629,7 @@ static bool printResult ( CollectionRec *cr,
 			       dbuf ,
 			       coll , dbuf );
 		sb.safePrintf(" - "
-				  " <a href=\"/master/tagdb?"
+				  " <a href=\"/admin/tagdb?"
 				  "tagtype0=manualban&"
 				  "tagdata0=1&"
 				  "u=%s&c=%s\">"
@ -8641,7 +8641,7 @@ static bool printResult ( CollectionRec *cr,
 		memcpy ( dbuf , uu.getHost() , dlen );
 		dbuf [ dlen ] = '\0';
 		sb.safePrintf(" - "
-				  " <a href=\"/master/tagdb?"
+				  " <a href=\"/admin/tagdb?"
 				  "tagtype0=manualban&"
 				  "tagdata0=1&"
 				  "u=%s&c=%s\">"
@ -17616,7 +17616,7 @@ bool gotCaptchaReply ( State9 *st9 , TcpSocket *s ) {
 	if ( st9->m_isAdmin && 1 == 2) {
 		SafeBuf ttt;
 		ttt.safePrintf("<br>"
-			       "<a href=/master/parser?"
+			       "<a href=/admin/parser?"
 			       //"user=mwells&pwd=mwell62&"
 			       "c=%s&u=%s&content=",
 			       st9->m_coll,
--- a/PageGet.cpp
+++ b/PageGet.cpp
@ -120,7 +120,7 @@ bool sendPageGet ( TcpSocket *s , HttpRequest *r ) {
 	mnew ( st , sizeof(State2) , "PageGet1" );
 	// save the socket and if Host: is local in the Http request Mime
 	st->m_socket   = s;
-	st->m_isAdmin  = g_collectiondb.isAdmin ( r , s );
+	st->m_isAdmin  = g_conf.isCollAdmin ( s , r );
 	st->m_isLocal  = r->isLocal();
 	st->m_docId    = docId;
 	// include header ... "this page cached by Gigablast on..."
--- a/PageHosts.cpp
+++ b/PageHosts.cpp
@ -23,6 +23,7 @@ static int dgramsFromSort ( const void *i1, const void *i2 );
 //static int loadAvgSort    ( const void *i1, const void *i2 );
 static int memUsedSort    ( const void *i1, const void *i2 );
 static int cpuUsageSort   ( const void *i1, const void *i2 );
+static int diskUsageSort  ( const void *i1, const void *i2 );

 long generatePingMsg( Host *h, long long nowms, char *buffer );

@ -130,7 +131,7 @@ skipReplaceHost:
 	if ( g_conf.m_useShotgun ) {
 		colspan = "31";
 		//shotcol = "<td><b>ip2</b></td>";
-		sprintf ( shotcol, "<td><a href=\"/master/hosts?c=%s"
+		sprintf ( shotcol, "<td><a href=\"/admin/hosts?c=%s"
 			 	   "&sort=2\">"
 			  "<b>ping2</b></td></a>",
 			  coll);
@ -142,17 +143,17 @@ skipReplaceHost:
 		  "<tr><td colspan=%s><center>"
 		  //"<font size=+1>"
 		  "<b>Hosts "
-		  "(<a href=\"/master/hosts?c=%s&sort=%li&reset=1\">"
+		  "(<a href=\"/admin/hosts?c=%s&sort=%li&reset=1\">"
 		  "reset)</b>"
 		  //"</font>"
 		  "</td></tr>" 
 		  "<tr bgcolor=#%s>"
-		  "<td><a href=\"/master/hosts?c=%s&sort=0\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=0\">"

 		  "<b>hostId</b></td>"
 		  "<td><b>host ip</b></td>"
-		  "<td><b>shard</b></td>" // mirror group
-		  "<td><b>stripe</b></td>"
+		  "<td><b>shard</b></td>"
+		  "<td><b>mirror</b></td>" // mirror # within the shard

 		  // i don't remember the last time i used this, so let's
 		  // just comment it out to save space
@ -187,49 +188,52 @@ skipReplaceHost:
 		  //"<td><b>resends sent</td>"
 		  //"<td><b>errors recvd</td>"
 		  //"<td><b>ETRYAGAINS recvd</td>"
-		  "<td><a href=\"/master/hosts?c=%s&sort=3\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=3\">"
 		  "<b>dgrams resent</a></td>"
-		  "<td><a href=\"/master/hosts?c=%s&sort=4\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=4\">"
 		  "<b>errors recvd</a></td>"
-		  "<td><a href=\"/master/hosts?c=%s&sort=5\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=5\">"
 		  "<b>ETRY AGAINS recvd</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&sort=6\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=6\">"
 		  "<b>dgrams to</a></td>"
-		  "<td><a href=\"/master/hosts?c=%s&sort=7\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=7\">"
 		  "<b>dgrams from</a></td>"

-		  //"<td><a href=\"/master/hosts?c=%s&sort=8\">"
+		  //"<td><a href=\"/admin/hosts?c=%s&sort=8\">"
 		  //"<b>loadavg</a></td>"


-		  "<td><a href=\"/master/hosts?c=%s&sort=13\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=13\">"
 		  "<b>avg split time</a></td>"

 		  "<td><b>splits done</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&sort=12\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=12\">"
 		  "<b>status</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&sort=15\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=15\">"
 		  "<b>slow reads</a></td>"

 		  "<td><b>docs indexed</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&sort=9\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=9\">"
 		  "<b>mem used</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&sort=10\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=10\">"
 		  "<b>cpu</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&sort=14\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=17\">"
+		  "<b>disk</a></td>"
+
+		  "<td><a href=\"/admin/hosts?c=%s&sort=14\">"
 		  "<b>max ping1</a></td>"

-		  "<td><a href=\"/master/hosts?c=%s&sort=11\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=11\">"
 		  "<b>ping1 age</a></td>"

 		  //"<td><b>ip1</td>"
-		  "<td><a href=\"/master/hosts?c=%s&sort=1\">"
+		  "<td><a href=\"/admin/hosts?c=%s&sort=1\">"
 		  "<b>ping1</a></td>"

 		  "%s"// "<td><b>ip2</td>"
@ -257,6 +261,7 @@ skipReplaceHost:
 		  coll,
 		  coll,
 		  coll,
+		  coll,
 		  shotcol    );

 	// loop through each host we know and print it's stats
@ -295,6 +300,7 @@ skipReplaceHost:
 	case 14:gbsort ( hostSort, nh, sizeof(long), pingMaxSort    ); break;
 	case 15:gbsort ( hostSort, nh, sizeof(long), slowDiskSort    ); break;
 	case 16:gbsort ( hostSort, nh, sizeof(long), defaultSort    ); break;
+	case 17:gbsort ( hostSort, nh, sizeof(long), diskUsageSort   ); break;
 	}

 	// we are the only one that uses these flags, so set them now
@ -379,6 +385,15 @@ skipReplaceHost:
 		if ( cpu > 100.0 ) cpu = 100.0;
 		if ( cpu < 0.0   ) cpu = -1.0;

+		char diskUsageMsg[64];
+		sprintf(diskUsageMsg,"%.1f%%",h->m_diskUsage);
+		if ( h->m_diskUsage < 0.0 )
+			sprintf(diskUsageMsg,"???");
+		if ( h->m_diskUsage >= 98.0 )
+			sprintf(diskUsageMsg,"<font color=red><b>%.1f%%"
+				"</b></font>",h->m_diskUsage);
+
+
 		// split time, don't divide by zero!
 		long splitTime = 0;
 		if ( h->m_splitsDone ) 
@ -437,7 +452,7 @@ skipReplaceHost:
 		// print it
 		sb.safePrintf (
 			  "<tr bgcolor=#%s>"
-			  "<td><a href=\"http://%s:%hi/master/hosts?"
+			  "<td><a href=\"http://%s:%hi/admin/hosts?"
 			  ""
 			  "c=%s"
 			  "&sort=%li\">%li</a></td>"
@ -494,6 +509,8 @@ skipReplaceHost:
 			  "<td>%s%.1f%%%s</td>"
 			  // cpu usage
 			  "<td>%.1f%%</td>"
+			  // disk usage
+			  "<td>%s</td>"

 			  // ping max
 			  "<td>%s</td>"
@ -547,6 +564,7 @@ skipReplaceHost:
 			  h->m_percentMemUsed, // float
 			  fontTagBack,
 			  cpu, // float
+			  diskUsageMsg,

 			  // ping max
 			  pms,
@ -564,6 +582,7 @@ skipReplaceHost:
 	sb.safePrintf ( "</table><br>\n" );

 	
+	/*
 	// print spare hosts table
 	sb.safePrintf ( 
 		  "<table %s>"
@ -628,7 +647,9 @@ skipReplaceHost:
 			  h->m_note );
 	}
 	sb.safePrintf ( "</table><br>" );
+	*/

+	/*
 	// print proxy hosts table
 	sb.safePrintf ( 
 		  "<table %s>"
@ -693,7 +714,7 @@ skipReplaceHost:
 		sb.safePrintf (
 			  "<tr bgcolor=#%s>"

-			  "<td><a href=\"http://%s:%hi/master/hosts?"
+			  "<td><a href=\"http://%s:%hi/admin/hosts?"
 			  ""
 			  "c=%s\">"
 			  "%li</a></td>"
@ -736,6 +757,7 @@ skipReplaceHost:
 			  h->m_note );
 	}
 	sb.safePrintf ( "</table><br><br>" );
+	*/

 	sb.safePrintf(
 		      "<style>"
@ -753,6 +775,12 @@ skipReplaceHost:
 		  //"</font>"
 		  "</td></tr>" 

+		  "<tr class=poo>"
+		  "<td>host ip</td>"
+		  "<td>The primary IP address of the host."
+		  "</td>"
+		  "</tr>\n"
+
 		  "<tr class=poo>"
 		  "<td>shard</td>"
 		  "<td>"
@ -762,26 +790,20 @@ skipReplaceHost:
 		  "</tr>\n"

 		  "<tr class=poo>"
-		  "<td>stripe</td>"
+		  "<td>mirror</td>"
 		  "<td>"
-		  "Hosts with the same stripe serve the same shard "
-		  "of data."
-		  "</td>"
-		  "</tr>\n"
-
-		  "<tr class=poo>"
-		  "<td>ip1</td>"
-		  "<td>The primary IP address of the host."
+		  "A shard can be mirrored multiple times for "
+		  "data redundancy."
 		  "</td>"
 		  "</tr>\n"

+		  /*
 		  "<tr class=poo>"
 		  "<td>ip2</td>"
 		  "<td>The secondary IP address of the host."
 		  "</td>"
 		  "</tr>\n"

-		  /*
 		  "<tr class=poo>"
 		  "<td>udp port</td>"
 		  "<td>The UDP port the host uses to send and recieve "
@ -794,7 +816,6 @@ skipReplaceHost:
 		  "<td>The UDP port used to send and receive dns traffic with."
 		  "</td>"
 		  "</tr>\n"
-		  */

 		  "<tr class=poo>"
 		  "<td>http port</td>"
@ -802,7 +823,6 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

-		  /*
 		  "<tr class=poo>"
 		  "<td>best switch id</td>"
 		  "<td>The host prefers to be on this switch because it "
@ -868,6 +888,43 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

+		  "<tr class=poo>"
+		  "<td>avg split time</td>"
+		  "<td>Average time this host took to compute the docids "
+		  "for a query. Useful for guaging the slowness of a host "
+		  "compare to other hosts."
+		  "</td>"
+		  "</tr>\n"
+
+		  "<tr class=poo>"
+		  "<td>splits done</td>"
+		  "<td>Number of queries this host completed. Used in "
+		  "computation of the <i>avg split time</i>."
+		  "</td>"
+		  "</tr>\n"
+
+		  "<tr class=poo>"
+		  "<td>status</td>"
+		  "<td>Status flags for the host. See key below."
+		  "</td>"
+		  "</tr>\n"
+
+		  "<tr class=poo>"
+		  "<td>slow reads</td>"
+		  "<td>Number of slow disk reads the host has had. "
+		  "When this is big compared to other hosts it is a good "
+		  "indicator its drives are relatively slow."
+		  "</td>"
+		  "</tr>\n"
+
+		  "<tr class=poo>"
+		  "<td>docs indexed</td>"
+		  "<td>Number of documents this host has indexed over all "
+		  "collections. All hosts should have close to the same "
+		  "number in a well-sharded situation."
+		  "</td>"
+		  "</tr>\n"
+
 		  //"<tr class=poo>"
 		  //"<td>loadavg</td>"
 		  //"<td>1-minute sliding-window load average from "
@ -877,13 +934,26 @@ skipReplaceHost:

 		  "<tr class=poo>"
 		  "<td>mem used</td>"
-		  "<td>percentage of memory currently used."
+		  "<td>Percentage of memory currently used."
 		  "</td>"
 		  "</tr>\n"

 		  "<tr class=poo>"
 		  "<td>cpu usage</td>"
-		  "<td>percentage of cpu resources in use by the gb process."
+		  "<td>Percentage of cpu resources in use by the gb process."
+		  "</td>"
+		  "</tr>\n"
+
+		  "<tr class=poo>"
+		  "<td>disk usage</td>"
+		  "<td>Percentage of disk in use. When this gets close to "
+		  "100%% you need to do something."
+		  "</td>"
+		  "</tr>\n"
+
+		  "<tr class=poo>"
+		  "<td>max ping1</td>"
+		  "<td>The worst ping latency from host to host."
 		  "</td>"
 		  "</tr>\n"

@ -900,6 +970,7 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

+		  /*
 		  "<tr class=poo>"
 		  "<td>ping2</td>"
 		  "<td>Ping time to this host on the seconday/shotgun "
@ -907,6 +978,7 @@ skipReplaceHost:
 		  "network is not enabled in the master controls."
 		  "</td>"
 		  "</tr>\n"
+		  */

 		  "<tr class=poo>"
 		  "<td>M (status flag)</td>"
@ -932,6 +1004,27 @@ skipReplaceHost:
 		  "</td>"
 		  "</tr>\n"

+		  "<tr class=poo>"
+		  "<td>R (status flag)</td>"
+		  "<td>Indicates host is performing a rebalance operation."
+		  "</td>"
+		  "</tr>\n"
+
+		  "<tr class=poo>"
+		  "<td>F (status flag)</td>"
+		  "<td>Indicates host has foreign records and requires "
+		  "a rebalance operation."
+		  "</td>"
+		  "</tr>\n"
+
+		  "<tr class=poo>"
+		  "<td>x (status flag)</td>"
+		  "<td>Indicates host has abruptly exited due to a fatal "
+		  "error (cored) and "
+		  "restarted itself."
+		  "</td>"
+		  "</tr>\n"
+

 		  ,
 		  TABLE_STYLE
@ -1156,3 +1249,11 @@ int cpuUsageSort ( const void *i1, const void *i2 ) {
 	if ( h1->m_cpuUsage < h2->m_cpuUsage ) return  1;
 	return 0;
 }
+
+int diskUsageSort ( const void *i1, const void *i2 ) {
+	Host *h1 = g_hostdb.getHost ( *(long*)i1 );
+	Host *h2 = g_hostdb.getHost ( *(long*)i2 );
+	if ( h1->m_diskUsage > h2->m_diskUsage ) return -1;
+	if ( h1->m_diskUsage < h2->m_diskUsage ) return  1;
+	return 0;
+}
--- a/PageIndexdb.cpp
+++ b/PageIndexdb.cpp
@ -29,6 +29,7 @@ public:
 	Msg1      m_msg1;
 	IndexList m_list;
 	//IndexList m_list2;
+	collnum_t m_collnum;
 	char      m_query[MAX_QUERY_LEN+1];
 	long      m_queryLen;
 	//char      m_coll[MAX_COLL_LEN+1];
@ -84,6 +85,10 @@ bool sendPageIndexdb ( TcpSocket *s , HttpRequest *r ) {
 		g_errno = ECOLLTOOBIG; 
 		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); 
 	}
+	CollectionRec *cr = g_collectiondb.getRec(coll);
+	if ( ! cr ) {
+		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno)); 
+	}
 	// make a state
 	State10 *st ;
 	try { st = new (State10); }
@ -121,10 +126,11 @@ bool sendPageIndexdb ( TcpSocket *s , HttpRequest *r ) {
 	//st->m_collLen  = collLen;
 	//st->m_coll [ collLen ] ='\0';
 	st->m_coll = coll;
+	st->m_collnum = cr->m_collnum;
 	// save the TcpSocket
 	st->m_socket = s;
 	// and if the request is local/internal or not
-	st->m_isAdmin = g_collectiondb.isAdmin ( r , s );
+	st->m_isAdmin = g_conf.isCollAdmin ( s , r );
 	st->m_isLocal = r->isLocal();
 	st->m_r.copy ( r );
 	// . check for add/delete request
@ -151,7 +157,7 @@ bool sendPageIndexdb ( TcpSocket *s , HttpRequest *r ) {
 		// call msg1 to add/delete key
 		if ( ! st->m_msg1.addList ( &st->m_keyList,
 					     RDB_INDEXDB,
-					     st->m_coll,
+					     st->m_collnum,
 					     st,
 					     addedKeyWrapper,
 					     false,
@ -174,7 +180,7 @@ bool sendPageIndexdb ( TcpSocket *s , HttpRequest *r ) {
 	// skip if nothing
 	else return gotTermFreq ( st );
 	// get the termfreq of this term!
-	if ( ! st->m_msg36.getTermFreq ( coll ,
+	if ( ! st->m_msg36.getTermFreq ( st->m_collnum ,
 					 0 , 
 					 st->m_termId,
 					 st ,
@ -258,7 +264,7 @@ loop:
 			    0  ,    // max cache age
 			    false , // add to cache?
 			    rdbId , // RDB_INDEXDB  , // rdbId of 2 = indexdb
-			    st->m_coll ,
+			    st->m_collnum ,
 			    &st->m_list  ,
 			    startKey  ,
 			    endKey    ,
@ -405,7 +411,7 @@ bool gotIndexList2 ( void *state , RdbList *list ) {

 	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
 	RdbBase *base; 
-	if (!(base=getRdbBase((uint8_t)RDB_INDEXDB,st->m_coll))) return true;
+	if (!(base=getRdbBase((uint8_t)RDB_INDEXDB,st->m_collnum)))return true;

 	// print the standard header for admin pages
 	pbuf->safePrintf ( 
@ -529,8 +535,8 @@ bool gotIndexList2 ( void *state , RdbList *list ) {
 			  "<tr><td>%li.</td>"
 			  "<td>%s%i</td>"
 			  "<td>"
-			  //"<a href=http://%s:%hu/master/titledb?d=%llu>"
-			  "<a href=/master/titledb?c=%s&d=%llu>"
+			  //"<a href=http://%s:%hu/admin/titledb?d=%llu>"
+			  "<a href=/admin/titledb?c=%s&d=%llu>"
 			  "%llu"
 			  //"<td><a href=/cgi/4.cgi?d=%llu>%llu"
 			  "</td>"
@ -596,8 +602,8 @@ bool gotIndexList2 ( void *state , RdbList *list ) {
 			  "<td>%llu</td>"
 			  "<td>%lu</td><td>%i</td>"
 			  "<td>"
-			  //"<a href=http://%s:%hu/master/titledb?d=%llu>"
-			  "<a href=/master/titledb?c=%s&d=%llu>"
+			  //"<a href=http://%s:%hu/admin/titledb?d=%llu>"
+			  "<a href=/admin/titledb?c=%s&d=%llu>"
 			  "%llu"
 			  //"<td><a href=/cgi/4.cgi?d=%llu>%llu"
 			  "</td></tr>\n" ,
--- a/PageInject.cpp
+++ b/PageInject.cpp
@ -55,8 +55,8 @@ bool sendPageInject ( TcpSocket *s , HttpRequest *r ) {
 	strncpy(msg7->m_coll,coll,MAX_COLL_LEN);

 	// for diffbot
-	if ( crawlbotAPI ) 
-		msg7->m_hr.copy ( r );
+	//if ( crawlbotAPI ) 
+	msg7->m_hr.copy ( r );

 	// a scrape request?
 	char *qts = r->getString("qts",NULL);
@ -169,12 +169,7 @@ bool sendReply ( void *state ) {
 	SafeBuf sb;

 	// print admin bar
-	g_pages.printAdminTop ( &sb, // p , pend , 
-				PAGE_INJECT, 
-				NULL, // msg7->m_username ,
-				msg7->m_coll , 
-				NULL ,  // pwd
-				s->m_ip );
+	g_pages.printAdminTop ( &sb, s , &msg7->m_hr );

 	// if there was an error let them know
 	char msg[1024];
@ -231,10 +226,23 @@ bool sendReply ( void *state ) {
 		  "indexed in real time "
 		  "while you wait. The browser will return the "
 		  "final index status code. Alternatively, "
-		  "use the <i>add urls</i> page "
-		  "to add URLs in bulk or to just add to the spider queue "
-		  "without having to wait for the page or pages to be "
-		  "actually indexed in realtime."
+		  "use the <a href=/admin/addurl>add url</a> page "
+		  "to add urls individually or in bulk "
+		  "without having to wait for the pages to be "
+		  "actually indexed in realtime. "
+
+		  "By default, injected urls "
+		  "take precedence over the \"insitelist\" directive in the "
+		  "<a href=/admin/filters>url filters</a> "
+		  "so injected urls need not match the "
+		  "<a href=/admin/sites>spider sites</a> patterns. You can "
+		  "change that behavior in the <a href=/admin/filters>url "
+		  "filters</a> if you want. "
+		  "Injected urls will have a "
+		  "<a href=/admin/filters#hopcount>hopcount</a> of 0. "
+		  "The injection api is described on the "
+		  "<a href=/admin/api>api</a> page."
+
 		  "</font>"
 		  "</td>"

@ -242,7 +250,15 @@ bool sendReply ( void *state ) {
 		  "<input type=text name=u value=\"\" size=50>"
 		  "</td></tr>\n\n"

-		  "<tr class=poo><td><b>query to scrape</b></td>"
+		  "<tr class=poo><td><b>query to scrape</b>"
+
+		  "<br>"
+		  "<font size=-2>"
+		  "Scrape other search engines and inject their links "
+		  "for this query. "
+		  "</font>"
+
+		  "</td>"
 		  "<td>\n"
 		  "<input type=text name=qts value=\"\" size=50>"
 		  "</td></tr>\n\n"
--- a/PageLogView.cpp
+++ b/PageLogView.cpp
@ -49,9 +49,9 @@ bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {
 	p->reserve2x(65535);
 	
 	//long  user     = g_pages.getUserType( s , r );
- 	char *username = g_users.getUsername(r);
-	char *pwd  = r->getString ("pwd");
- 	char *coll = r->getString ("c");
+ 	//char *username = g_users.getUsername(r);
+	//char *pwd  = r->getString ("pwd");
+ 	//char *coll = r->getString ("c");
 	long refreshRate = r->getLong("rr", 0);
 	long sampleSize  =  r->getLong("ss", 2048);
 	if(refreshRate > 0) 
@ -61,8 +61,8 @@ bool sendPageLogView    ( TcpSocket *s , HttpRequest *r ) {

 	// 	char *ss = p->getBuf();
 	// 	char *ssend = p->getBufEnd();
-	g_pages.printAdminTop ( p , PAGE_LOGVIEW, username,
-				coll , pwd , s->m_ip );
+	g_pages.printAdminTop ( p, s, r );
+
 	//	p->incrementLength(sss - ss);

 	long nh = g_hostdb.getNumHosts();
--- a/PageLogin.cpp
+++ b/PageLogin.cpp
@ -1,148 +0,0 @@
-#include "gb-include.h"
-
-#include "Pages.h"
-#include "Parms.h"
-#include "Users.h"
-
-bool sendPageLogin ( TcpSocket *s , HttpRequest *r ) {
-	return sendPageLogin ( s , r, NULL);
-}
-
-bool sendPageLogin ( TcpSocket *s , HttpRequest *r , char *emsg ) {
-
-	// get the collection
-	long  collLen = 0;
-	char *coll    = r->getString("c",&collLen);
-	if ( ! coll || ! coll[0] ) {
-		//coll    = g_conf.m_defaultColl;
-		coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
-		collLen = gbstrlen(coll);
-	}
-
-	// does collection exist? ...who cares, proxy doesn't have coll data.
-	//CollectionRec *cr = g_collectiondb.getRec ( coll );
-	//if ( ! cr ) emsg = "Collection does not exist.";
-
-
-	// log off user whose username is in the cookie
-	char *username = r->getStringFromCookie("username",NULL);
-	char *password = r->getString("pwd",NULL);
-	if ( username && !password ) g_users.logoffUser( username, s->m_ip );
-	
-	//  get username from the request
-	username = NULL;
-	username = r->getString("username",NULL);
-	
-	// reset emsg if user is coming for the first time
-	long page = g_pages.getDynamicPageNumber(r);
-	if ( !username && !password && 
-		(page == PAGE_LOGIN || page == PAGE_LOGIN2) && emsg)
-		emsg ="";
-
-	// just make cookie same format as an http request for ez parsing
-	char cookieData[2024];
-	char host[1024]="";
-	/*if ( cr && userType == USER_MASTER && username ) 
-		return g_parms.sendPageGeneric ( s , r , PAGE_MASTER , cookie);
-	if ( userType == USER_ADMIN && username )
-		return g_parms.sendPageGeneric ( s , r , PAGE_SEARCH , cookie);
-	*/
-
-
-	// print it
-	char  buf [ 2*1024 ];
-	char *p    = buf;
-	char *pend = buf + 2*1024;
-
-	// print colors
-	p = g_pages.printColors ( p , pend );
-	// start table
-	sprintf ( p , "<table><tr><td>");
-	p += gbstrlen ( p );
-	// print logo
-	p = g_pages.printLogo   ( p , pend , coll );
-
-	// make it printable
-	char *pu = g_users.getUsername(r);
-	if ( ! pu ) pu = "";
-
-	// then Login
-	if ( r->getHostLen() < 1024 )
-		strncpy ( host, r->getHost(), r->getHostLen() );
-
-	char *cookie = NULL;
-	User *user = NULL;
-	if ( username && host[0] ) user = g_users.getUser(username);
-	if ( user && !emsg ){	
-		sprintf ( cookieData , "username=%s;expires=0;"
-					,username);
-		
-		// try to the get reference Page
-		long refPage = r->getLong("ref",-1);
-		if ( refPage >= 0 && refPage != PAGE_LOGIN && refPage != PAGE_LOGIN2
-			&& g_users.hasPermission(username,refPage)){
-			WebPage *page = g_pages.getPage(refPage);
-			sprintf ( p, "<meta http-equiv=\"refresh\" content=\"0;"
-		              	"http://%s/%s?c=%s\">",
-				host,page->m_filename,coll);
-		}
-		else{	
-			long pageNum = user->firstPage();
-			char *path = g_pages.getPath(pageNum); 
-			sprintf ( p, "<meta http-equiv=\"refresh\" content=\"0;"
-			              "http://%s/%s?c=%s\">",
-					host,path,coll);
-		}
-		p += gbstrlen ( p );
-		cookie = cookieData;
-	}
-	
-	if ( !emsg ) emsg = "";
-	sprintf ( p ,
-		  "&nbsp; &nbsp; "
-		  "</td><td><font size=+1><b>Login</b></font></td></tr>"
-		  "</table>" 
-		  "<form method=post action=\"/login\" name=f>"
-		  "<input type=hidden name=ref value=\"%li\">"
-		  "<center>"
-		  "<br><br>"
-		  "<font color=ff0000><b>%s</b></font>"
-		  "<br><br>"
-
-		  "<table cellpadding=2><tr><td>"
-		  "<b>Username</td><td>"
-		  "<input type=text name=username size=30 value=\"%s\">"
-		  "</td><td></td></tr>"
-		  "<tr><td>"
-
-		  "<b>Collection</td><td>"
-		  "<input type=text name=c size=30 value=\"%s\">"
-		  "</td><td></td></tr>"
-		  "<tr><td>"
-		  "<b>Password</td><td><input type=password name=pwd size=30>"
-		  "</td><td>"
-		  "<input type=submit value=ok border=0></td>"
-		  "</tr></table>"
-		  "</center>"
-		  "<br><br>",
-		  page, emsg , pu , coll );
-	p += gbstrlen ( p );
-	// master test
-	/*
-	long user = g_pages.getUserType ( s , r );
-	if ( user != USER_MASTER ) {
-		sprintf ( p , "\n<input type=hidden name=master value=0>\n"
-			  "</form>" );
-		p += gbstrlen ( p );
-	}
-	*/
-	// print the tail
-	p = g_pages.printTail ( p , pend , r->isLocal() ); // pwd
-	// send the page
-	return g_httpServer.sendDynamicPage ( s , buf , p - buf ,
-					      -1    , // cacheTime
-					      false , // POSTReply?
-					      NULL  , // contentType
-					      -1   ,
-					      cookie);// Forbidden http status
-}
--- a/PageOverview.cpp
+++ b/PageOverview.cpp
@ -1451,8 +1451,8 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) {
 "You can specify different indexing and spider parameters on a per URL basis by one or more of the following methods:\n"
 "<br><br>\n"
 "<ul>\n"
-"<li>Using the <a href=\"/master/tagdb\">tagdb interface</a>, you can assign a <a href=#ruleset>ruleset</a> to a set of sites. All you do is provide Gigablast with a list of sites and the ruleset to use for those sites.\n"
-"You can enter the sites via the <a href=\"/master/tagdb\">HTML form</a> or you can provide Gigablast with a file of the sites. Each file must be limited to 1 Megabyte, but you can add hundreds of millions of sites. \n"
+"<li>Using the <a href=\"/admin/tagdb\">tagdb interface</a>, you can assign a <a href=#ruleset>ruleset</a> to a set of sites. All you do is provide Gigablast with a list of sites and the ruleset to use for those sites.\n"
+"You can enter the sites via the <a href=\"/admin/tagdb\">HTML form</a> or you can provide Gigablast with a file of the sites. Each file must be limited to 1 Megabyte, but you can add hundreds of millions of sites. \n"
 "Sites can be full URLs, hostnames, domain names or IP addresses.\n"
 "If you add a site which is just a canonical domain name with no explicit host name, like gigablast.com, then any URL with the same domain name, regardless of its host name will match that site. That is, \"hostname.gigablast.com\" will match the site \"gigablast.com\" and therefore be assigned the associated ruleset.\n"
 "Sites may also use IP addresses instead of domain names. If the least significant byte of an IP address that you submit to tagdb is 0 then any URL with the same top 3 IP bytes as that IP will be considered a match.\n"
@ -1917,7 +1917,7 @@ bool sendPageOverview ( TcpSocket *s , HttpRequest *r ) {
 "<br>\n"
 "After the base score is computed, it is multiplied by the number of occurences of the word or phrase in the portion of the document being indexed as specified by the index rule. This score may then be reduced if spam detection occurred and the word or phrase was deemed repetitious. Spam detection is triggered when the quality of the document is at or below the value specified in the &lt;minQualityForSpamDetect&gt; tag in the index rule. Finally, the score is mapped into an 8 bit value, from 1 to 255, and stored in the index."
 "<br><br>\n"
-"To see the scoring algorithm in action you can use the <b><a href=\"/master/parser\">Parser Tool</a></b>. It will show each indexed word and phrase and its associated score, as well as some attributes associated with the indexed document."
+"To see the scoring algorithm in action you can use the <b><a href=\"/admin/parser\">Parser Tool</a></b>. It will show each indexed word and phrase and its associated score, as well as some attributes associated with the indexed document."
 ""
 "<br>\n"
 "<br>\n"
--- a/PageParser.cpp
+++ b/PageParser.cpp
@ -1,8 +1,8 @@
 #include "gb-include.h"

 #include "PageParser.h"
-#include "IndexTable.h"
-#include "IndexTable2.h"
+//#include "IndexTable.h"
+//#include "IndexTable2.h"
 //#include "XmlDoc.h" // addCheckboxSpan()

 bool g_inPageParser = false;
@ -101,7 +101,7 @@ bool sendPageParser2 ( TcpSocket   *s ,
 	st->m_termFreqs       = termFreqs;
 	st->m_termFreqWeights = termFreqWeights;
 	st->m_affWeights      = affWeights;
-	st->m_total           = (score_t)-1;
+	//st->m_total           = (score_t)-1;
 	st->m_indexCode       = 0;
 	st->m_blocked         = false;
 	st->m_didRootDom      = false;
@ -561,7 +561,7 @@ bool processLoop ( void *state ) {
 		// . save the ips.txt file if we are the test coll
 		// . saveTestBuf() is a function in Msge1.cpp
 		CollectionRec *cr = xd->getCollRec();
-		if ( xd && cr && cr->m_coll && ! strcmp ( cr->m_coll,"test") )
+		if ( xd && cr && cr->m_coll && !strcmp(cr->m_coll,"qatest123"))
 			// use same dir that XmlDoc::getTestDir() would use
 			saveTestBuf ( "test-page-parser" );
 		// now get the meta list, in the process it will print out a 
@ -654,7 +654,7 @@ bool sendPageAnalyze ( TcpSocket *s , HttpRequest *r ) {
 	//st->m_termFreqs       = termFreqs;
 	//st->m_termFreqWeights = termFreqWeights;
 	//st->m_affWeights      = affWeights;
-	st->m_total           = (score_t)-1;
+	//st->m_total           = (score_t)-1;
 	st->m_indexCode       = 0;
 	st->m_blocked         = false;
 	st->m_didRootDom      = false;
@ -855,7 +855,7 @@ bool gotXmlDoc ( void *state ) {

 	// . save the ips.txt file if we are the test coll
 	// . saveTestBuf() is a function in Msge1.cpp
-	//if ( xd && xd->m_coll && ! strcmp ( xd->m_coll , "test")) 
+	//if ( xd && xd->m_coll && ! strcmp ( xd->m_coll , "qatest123")) 
 	//	// use same dir that XmlDoc::getTestDir() would use
 	//	saveTestBuf ( "test-page-parser" );

--- a/PageParser.h
+++ b/PageParser.h
@ -80,7 +80,7 @@ public:
 	long long *m_termFreqs;
 	float     *m_termFreqWeights;
 	float     *m_affWeights;
-	score_t    m_total;
+	//score_t    m_total;
 	bool       m_freeIt;
 	bool       m_blocked;

--- a/PageReindex.cpp
+++ b/PageReindex.cpp
@ -29,11 +29,12 @@ static bool printInterface ( SafeBuf *sb , char *q ,//long user ,
 class State13 {
 public:
 	char       m_query [ MAX_QUERY_LEN + 1];
-	char       m_isAdmin;
+	//char       m_isAdmin;
 	Msg1c      m_msg1c;
 	//Msg1d      m_msg1d;
-	char       m_coll [ MAX_COLL_LEN + 1];
-	long       m_collLen;
+	//char       m_coll [ MAX_COLL_LEN + 1];
+	//long       m_collLen;
+	collnum_t m_collnum;
 	TcpSocket *m_socket;
 	//char       m_replyBuf[64*1024];
 	//long       m_replyBufSize;
@ -61,7 +62,7 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 	pwd [ len ] = '\0';

 	// are we the admin?
-	bool isAdmin = g_collectiondb.isAdmin ( r , s );
+	//bool isAdmin = g_collectiondb.isAdmin ( r , s );
 	//long user    = g_pages.getUserType ( s , r );
 	char *username = g_users.getUsername ( r );
 	char *errmsg = NULL;
@ -75,6 +76,7 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 		return g_httpServer.sendErrorReply ( s , 500 ,
 						"Collection does not exist.");
 	}
+	/*
 	bool isAssassin = cr->isAssassin ( s->m_ip );
 	if ( isAdmin ) isAssassin = true;

@ -82,15 +84,14 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 	if ( ! isAssassin && ! cr->hasPermission ( r , s ) ) {
 		log("admin: Bad collection name "
 		    "or password. Query reindex failed. Permission denied.");
-		return sendPageLogin ( s , r , 
+		return sendPagexxxx ( s , r , 
 				       "Collection name or "
 				       "password is incorrect.");
 	}
-
+	*/
 	// get collection name and its length
 	char *coll    = cr->m_coll;
-	long  collLen = gbstrlen ( coll );
-
+	//long  collLen = gbstrlen ( coll );

 	//char buf[64*1024];
 	//char *p    = buf;
@ -131,7 +132,7 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 	mnew ( st , sizeof(State13) , "PageReindex" );

 	// set stuff now
-	st->m_isAdmin    = isAdmin;
+	//st->m_isAdmin    = isAdmin;
 	

 	// save the query to static buffer
@ -140,6 +141,8 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 	memcpy ( st->m_query , t , len );
 	st->m_query[len] = '\0';

+	st->m_collnum = cr->m_collnum;
+
 	// save start and end numbers
 	long startNum = r->getLong   ( "srn" , 0 );
 	long endNum   = r->getLong   ( "ern" , 0 );
@ -152,9 +155,9 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 	bool updateTags = r->getLong ( "updatetags", 0 );

 	// copy collection
-	memcpy ( st->m_coll , coll , collLen );
-	st->m_coll [ collLen ] = '\0';
-	st->m_collLen=collLen;
+	//memcpy ( st->m_coll , coll , collLen );
+	//st->m_coll [ collLen ] = '\0';
+	//st->m_collLen=collLen;

 	// fix parms
 	if ( startNum <  0 ) startNum = 0 ;
@ -202,7 +205,7 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 	// place holder, for holding response when we're done adding
 	// all these docids to the spider queue
 	st->m_placeOff = rp->length() ;
-	for ( long i = 0 ; i < 100 ; i++ )
+	for ( long i = 0 ; i < 200 ; i++ )
 		rp->pushChar(' ');
 	//memset ( rp , ' ' , 100 );
 	//rp += 100;
@ -239,7 +242,7 @@ bool sendPageReindex ( TcpSocket *s , HttpRequest *r ) {
 	*/
 		// let msg1d do all the work now
 		if ( ! st->m_msg1c.reindexQuery ( st->m_query ,
-						  st->m_coll,
+						  st->m_collnum,
 						  startNum ,
 						  endNum   ,
 						  (bool)forceDel ,
@ -266,6 +269,7 @@ void doneReindexing ( void *state ) {
 		g_httpServer.sendErrorReply(sock,500,mstrerror(g_errno));
 		mdelete ( st , sizeof(State13) , "PageTagdb" );
 		delete (st);
+		return;
 	}	
 	// if no error, send the pre-generated page
 	// this must be under 100 chars or it messes our reply buf up
@ -462,12 +466,12 @@ static void addedListWrapper ( void *state ) ;
 Msg1c::Msg1c() {
 	m_numDocIds = 0;
 	m_numDocIdsAdded = 0;
-	m_coll = NULL;
+	m_collnum = -1;
 	m_callback = NULL;
 }

 bool Msg1c::reindexQuery ( char *query ,
-			   char *coll  ,
+			   collnum_t collnum ,//char *coll  ,
 			   long startNum ,
 			   long endNum ,
 			   bool forceDel ,
@ -475,7 +479,7 @@ bool Msg1c::reindexQuery ( char *query ,
 			   void *state ,
 			   void (* callback) (void *state ) ) {

-	m_coll           = coll;
+	m_collnum = collnum;//           = coll;
 	m_startNum       = startNum;
 	m_endNum         = endNum;
 	m_forceDel       = forceDel;
@ -489,12 +493,15 @@ bool Msg1c::reindexQuery ( char *query ,
 	// langunknown?
 	m_qq.set2 ( query , langId , true ); // /*bool flag*/ );

+	//CollectionRec *cr = g_collectiondb.getRec ( collnum );
+
 	//CollectionRec *cr = g_collectiondb.getRec ( coll );
 	// reset again just in case
 	m_req.reset();
 	// set our Msg39Request
-	m_req.ptr_coll                    = coll;
-	m_req.size_coll                   = gbstrlen(coll)+1;
+	//m_req.ptr_coll                    = coll;
+	//m_req.size_coll                   = gbstrlen(coll)+1;
+	m_req.m_collnum = m_collnum;
 	m_req.m_docsToGet                 = endNum;
 	m_req.m_niceness                  = 0,
 	m_req.m_getDocIdScoringInfo       = false;
@ -507,6 +514,7 @@ bool Msg1c::reindexQuery ( char *query ,
 	m_req.m_queryExpansion            = true; // so it's like regular rslts
 	// add language dropdown or take from [query reindex] link
 	m_req.m_language                  = langId;
+	//m_req.m_debug = 1;

 	// log for now
 	logf(LOG_DEBUG,"reindex: qlangid=%li q=%s",langId,query);
@ -661,7 +669,7 @@ bool Msg1c::gotList ( ) {

 	if ( ! m_msg4.addMetaList ( m_sb.getBufStart() ,
 				    m_sb.length() ,
-				    m_coll ,
+				    m_collnum ,
 				    this ,
 				    addedListWrapper ,
 				    0 , // niceness
--- a/PageReindex.h
+++ b/PageReindex.h
@ -13,7 +13,7 @@ public:
 	Msg1c();

 	bool reindexQuery ( char *query ,
-			    char *coll  ,
+			    collnum_t collnum, // char *coll  ,
 			    long startNum ,
 			    long endNum ,
 			    bool forceDel ,
@ -23,7 +23,8 @@ public:
 	
 	bool gotList ( );
 	
-	char *m_coll;
+	//char *m_coll;
+	collnum_t m_collnum;
 	long m_startNum;
 	long m_endNum;
 	bool m_forceDel;
--- a/PageResults.cpp
+++ b/PageResults.cpp
--- a/PageResults.h
+++ b/PageResults.h
@ -2,15 +2,68 @@
 #define _PAGERESULTS_H_

 #include "SafeBuf.h"
+#include "Language.h" // MAX_FRAG_SIZE
+#include "Msg40.h"
+#include "Msg0.h"

-bool printDmozRadioButtons ( SafeBuf &sb , long catId ) ;
-bool printLogoAndSearchBox ( SafeBuf &sb , class HttpRequest *hr, long catId );
+class State0 {
+public:

-bool printTermPairs ( SafeBuf &sb , class Query *q , class PairScore *ps ) ;
-bool printSingleTerm ( SafeBuf &sb , class Query *q , class SingleScore *ss );
+	// store results page in this safebuf
+	SafeBuf      m_sb;
+
+	// if socket closes before we get a chance to send back
+	// search results, we will know by comparing this to
+	// m_socket->m_numDestroys
+	long         m_numDestroys;
+	bool         m_header;
+
+	collnum_t    m_collnum;
+        Query        m_q;
+	SearchInput  m_si;
+	Msg40        m_msg40;
+	TcpSocket   *m_socket;
+	Msg0         m_msg0;
+	long long    m_startTime;
+	//Ads          m_ads;
+	bool         m_gotAds;
+	bool         m_gotResults;
+	char         m_spell  [MAX_FRAG_SIZE]; // spelling recommendation
+	bool         m_gotSpell;
+	long         m_errno;
+	Query        m_qq3;
+        long         m_numDocIds;
+	long long    m_took; // how long it took to get the results
+	HttpRequest  m_hr;
+	bool         m_printedHeaderRow;
+	char         m_qe[MAX_QUERY_LEN+1];
+
+	// for printing our search result json items in csv:
+	HashTableX   m_columnTable;
+	long         m_numCSVColumns;
+
+	// stuff for doing redownloads
+	bool    m_didRedownload;
+	XmlDoc *m_xd;
+	long    m_oldContentHash32;
+};


-bool printEventAddress ( SafeBuf &sb , char *addrStr , class SearchInput *si ,
+bool printSearchResultsHeader ( class State0 *st ) ;
+bool printResult ( class State0 *st,  long ix );
+bool printSearchResultsTail ( class State0 *st ) ;
+
+
+
+
+bool printDmozRadioButtons ( SafeBuf *sb , long catId ) ;
+bool printLogoAndSearchBox ( SafeBuf *sb , class HttpRequest *hr, long catId );
+
+bool printTermPairs ( SafeBuf *sb , class Query *q , class PairScore *ps ) ;
+bool printSingleTerm ( SafeBuf *sb , class Query *q , class SingleScore *ss );
+
+
+bool printEventAddress ( SafeBuf *sb , char *addrStr , class SearchInput *si ,
 			 double *lat , double *lon , bool isXml ,
 			 // use this for printing distance if lat/lon above
 			 // is invalid. only for non-xml printing though.
@ -20,10 +73,10 @@ bool printEventAddress ( SafeBuf &sb , char *addrStr , class SearchInput *si ,
 			 double eventGeocoderLon,
 			 char *eventBestPlaceName );

-bool printDMOZCrumb ( SafeBuf &sb , long catId , bool xml ) ;
-bool printDMOZSubTopics ( SafeBuf&  sb, long catId, bool inXml ) ;
+bool printDMOZCrumb ( SafeBuf *sb , long catId , bool xml ) ;
+bool printDMOZSubTopics ( SafeBuf *sb, long catId, bool inXml ) ;

-bool printEventCountdown2 ( SafeBuf &sb ,
+bool printEventCountdown2 ( SafeBuf *sb ,
 			    SearchInput *si,
 		       long now ,
 		       long timeZoneOffset ,
--- a/PageRoot.cpp
+++ b/PageRoot.cpp
@ -78,8 +78,9 @@ bool printNav ( SafeBuf &sb , HttpRequest *r ) {
 			      //" &nbsp; &nbsp; <a href=/logout>Logout</a>"
 			      );

-	if ( r->isLocal() )
-	     sb.safePrintf("&nbsp; &nbsp;[<a href=\"/master?\">Admin</a>]");
+	//if ( r->isLocal() )
+	    sb.safePrintf("&nbsp; &nbsp; [<a href=\"/admin/settings\">"
+			  "<font color=red>Admin</font></a>]");
 	sb.safePrintf("</p></b></center></body></html>");
 	return true;
 }
@ -168,6 +169,11 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r ) {
 	sb.safePrintf("</form>\n");
 	sb.safePrintf("<br>\n");
 	sb.safePrintf("\n");
+
+	// print any red boxes we might need to
+	if ( printRedBox2 ( &sb , true ) )
+		sb.safePrintf("<br>\n");
+
 	sb.safePrintf("<table cellpadding=3>\n");
 	sb.safePrintf("\n");

@ -1285,7 +1291,7 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {

 	// see if they provided a url of a file of urls if they did not
 	// provide a url to add directly
-	bool isAdmin = g_collectiondb.isAdmin ( r , s );
+	bool isAdmin = g_conf.isCollAdmin ( s , r );
 	long  ufuLen = 0;
 	char *ufu = NULL;
 	if ( isAdmin )
@ -1561,7 +1567,7 @@ void doneInjectingWrapper3 ( void *st ) {
 	// allow others to add now
 	s_inprogress = false;
 	// get the state properly
-	//State1 *st1 = (State1 *) state;
+	//State1i *st1 = (State1i *) state;
 	// in order to see what sites are being added log it, then we can
 	// more easily remove sites from sitesearch.gigablast.com that are
 	// being added but not being searched
--- a/PageSockets.cpp
+++ b/PageSockets.cpp
@ -537,7 +537,7 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
 			long dlen;
 			char *dbuf = ::getDomFast ( hostname,&dlen,false);
 			p->safePrintf(
-			      " <a href=\"/master/tagdb?"
+			      " <a href=\"/admin/tagdb?"
 			      "user=admin&"
 			      "tagtype0=manualban&"
 			      "tagdata0=1&"
--- a/PageStats.cpp
+++ b/PageStats.cpp
@ -620,7 +620,7 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
 		       "<td colspan=50>"
 		       "<center><b>Spider Compression Proxy Stats</b> "

-		       " &nbsp; [<a href=\"/master/stats?reset=2\">"
+		       " &nbsp; [<a href=\"/admin/stats?reset=2\">"
 		       "reset</a>]</td></tr>\n"

 		       "<tr class=poo>"
@ -828,7 +828,7 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
 		       "<td colspan=50>"
 		       "<center><b>Message Stats</b> "

-		       " &nbsp; [<a href=\"/master/stats?reset=1\">"
+		       " &nbsp; [<a href=\"/admin/stats?reset=1\">"
 		       "reset</a>]</td></tr>\n"

 		       "<tr class=poo>"
--- a/PageThesaurus.cpp
+++ b/PageThesaurus.cpp
@ -284,8 +284,8 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "</font>"
 		  "</td>"
 		  "<td width=12%% bgcolor=#0000ff>"
-		  "<center><b><a href=\"/master/thesaurus?rebuild=1&%s\">"
-		  "rebuild all data</a> <a href=\"/master/thesaurus?"
+		  "<center><b><a href=\"/admin/thesaurus?rebuild=1&%s\">"
+		  "rebuild all data</a> <a href=\"/admin/thesaurus?"
 		  "rebuild=1&full=1&%s\">(full)</a></b></center>"
 		  "</td>"
 		  "</tr>\n", getBuf, getBuf);
@ -300,7 +300,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "</font>"
 		  "</td>"
 		  "<td width=12%% bgcolor=#0000ff>"
-		  "<center><b><a href=\"/master/thesaurus?distribute=1&%s\">"
+		  "<center><b><a href=\"/admin/thesaurus?distribute=1&%s\">"
 		  "distribute data</a></b></center>"
 		  "</td>"
 		  "</tr>\n", getBuf);
@ -314,7 +314,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "</td>"
 		  "<td width=12%% bgcolor=#0000ff>"
 		  "<center><b>"
-		  "<a href=\"/master/thesaurus?reload=1&cast=0&%s\">"
+		  "<a href=\"/admin/thesaurus?reload=1&cast=0&%s\">"
 		  "reload data</a></b></center>"
 		  "</td>"
 		  "</tr>\n", getBuf);
@ -328,7 +328,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "</td>"
 		  "<td width=12%% bgcolor=#0000ff>"
 		  "<center><b>"
-		  "<a href=\"/master/thesaurus?reload=1&cast=1&%s\">"
+		  "<a href=\"/admin/thesaurus?reload=1&cast=1&%s\">"
 		  "reload data (all hosts)</a></b></center>"
 		  "</td>"
 		  "</tr>\n", getBuf);
@ -342,7 +342,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "</font>"
 		  "</td>"
 		  "<td width=12%%>"
-		  "<form action=\"/master/thesaurus>\">"
+		  "<form action=\"/admin/thesaurus>\">"
 		  "<input type=text name=synonym size=20>"
 		  "<input type=submit value=Submit>"
 		  "%s"
@ -365,7 +365,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "</font>"
 		  "</td>"
 		  "<td width=12%% bgcolor=#0000ff>"
-		  "<center><b><a href=\"/master/thesaurus?cancel=1&%s\">"
+		  "<center><b><a href=\"/admin/thesaurus?cancel=1&%s\">"
 		  "cancel running rebuild</a></b></center>"
 		  "</td>"
 		  "</tr>\n", getBuf);
@ -380,8 +380,8 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "</font>"
 		  "</td>"
 		  "<td width=12%% bgcolor=#0000ff>"
-		  "<center><b><a href=\"/master/thesaurus?rebuildaff=1&%s\">"
-		  "rebuild affinity</a> <a href=\"/master/thesaurus?"
+		  "<center><b><a href=\"/admin/thesaurus?rebuildaff=1&%s\">"
+		  "rebuild affinity</a> <a href=\"/admin/thesaurus?"
 		  "rebuildaff=1&full=1&%s\">(full)</a></b></center>"
 		  "</td>"
 		  "</tr>\n", getBuf, getBuf);
@ -405,7 +405,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "character, optionally followed by another pipe and a type "
 		  "designation; any badly formatted lines will be silently "
 		  "ignored</font><br>\n"
-		  "<form action=\"/master/thesaurus\" method=post>"
+		  "<form action=\"/admin/thesaurus\" method=post>"
 		  "<textarea name=\"manualadd\" rows=20 cols=80>");

 	if (manualAdd && manualAddLen) {
@ -434,7 +434,7 @@ bool sendPageThesaurus( TcpSocket *s, HttpRequest *r ) {
 		  "that these pairs will only work if the thesaurus otherwise "
 		  "has an entry for them, so add them to the manual add file "
 		  "above if need be</font><br>\n"
-		  "<form action=\"/master/thesaurus\" method=post>"
+		  "<form action=\"/admin/thesaurus\" method=post>"
 		  "<textarea name=\"affinityadd\" rows=20 cols=80>");

 	if (affinityAdd && affinityAddLen) {
--- a/PageTitledb.cpp
+++ b/PageTitledb.cpp
@ -58,7 +58,7 @@ bool sendPageTitledb ( TcpSocket *s , HttpRequest *r ) {
 	// copy it
 	st->m_r.copy ( r );
 	// remember if http request is internal/local or not
-	st->m_isAdmin = g_collectiondb.isAdmin ( r , s );
+	st->m_isAdmin = g_conf.isCollAdmin ( s , r );
 	st->m_isLocal = r->isLocal();
 	st->m_docId   = docId;
 	// password, too
--- a/Pages.cpp
+++ b/Pages.cpp
--- a/Pages.h
+++ b/Pages.h
@ -5,6 +5,9 @@
 #ifndef _PAGES_H_
 #define _PAGES_H_

+bool printRedBox2 ( SafeBuf *sb , bool isRootWebPage = false ) ;
+bool printRedBox ( SafeBuf *mb , bool isRootWebPage = false ) ;
+
 // for PageEvents.cpp and Accessdb.cpp
 //#define RESULTSWIDTHSTR "550px"

@ -26,13 +29,18 @@ extern char *g_msg;
 // . declare all dynamic functions here
 // . these are all defined in Page*.cpp files
 // . these are called to send a dynamic page
+bool sendPageBasicSettings   ( TcpSocket *s , HttpRequest *r );
+bool sendPageBasicStatus     ( TcpSocket *s , HttpRequest *r );
+//bool sendPageBasicDiffbot    ( TcpSocket *s , HttpRequest *r );
+
+
+
 bool sendPageRoot     ( TcpSocket *s , HttpRequest *r );
 bool sendPageRoot     ( TcpSocket *s , HttpRequest *r, char *cookie );
 bool sendPageResults  ( TcpSocket *s , HttpRequest *r );
 //bool sendPageEvents   ( TcpSocket *s , HttpRequest *r );
 bool sendPageAddUrl   ( TcpSocket *s , HttpRequest *r );
 bool sendPageGet      ( TcpSocket *s , HttpRequest *r );
-bool sendPageLogin    ( TcpSocket *s , HttpRequest *r , char  *emsg );
 bool sendPageLogin    ( TcpSocket *s , HttpRequest *r );
 bool sendPageStats    ( TcpSocket *s , HttpRequest *r );
 bool sendPageHosts    ( TcpSocket *s , HttpRequest *r );
@ -156,35 +164,6 @@ class Pages {
 				  char* bodyJavascript = "" );


-	char *printAdminTop            ( char        *p    , 
-					 char        *pend , 
-					 TcpSocket   *s    ,
-					 HttpRequest *r    ,
-					 char        *qs = NULL,
-					 char* bodyJavascript = "" ) ;
-
-	bool printAdminTop             ( SafeBuf *sb    ,
-					 long    page   ,
-					 //long    user   ,
-					 char   *username,
-					 char   *coll   ,
-					 char   *pwd    ,
-					 long    fromIp ,
-					 char   *qs = NULL, 
-					 char* bodyJavascript = "" );
-
-	char *printAdminTop            ( char *p      , 
-					 char *pend   , 
-					 long  page   ,
-					 //long  user   ,
-					 char *username,
-					 char *coll   ,
-					 char *pwd    ,
-					 long  fromIp ,
-					 char *qs     = "",
-					 char* bodyJavascript = "" ) ;
-
-
 	bool printAdminTop2 	       ( SafeBuf     *sb   ,
 					 TcpSocket   *s    ,
 					 HttpRequest *r    ,
@ -206,32 +185,33 @@ class Pages {
 	void printFormTop(  SafeBuf *sb, HttpRequest *r );
 	void printFormData( SafeBuf *sb, TcpSocket *s, HttpRequest *r );

-	char *printAdminBottom         ( char *p, char *pend, HttpRequest *r );
-	char *printAdminBottom         ( char *p, char *pend);
+	//char *printAdminBottom       ( char *p, char *pend, HttpRequest *r );
+	//char *printAdminBottom       ( char *p, char *pend);
 	bool  printAdminBottom         ( SafeBuf *sb, HttpRequest *r );
 	bool  printAdminBottom         ( SafeBuf *sb);
 	bool  printAdminBottom2        ( SafeBuf *sb, HttpRequest *r );
 	bool  printAdminBottom2        ( SafeBuf *sb);
 	bool  printTail                ( SafeBuf* sb, 
 					 bool isLocal );
+	bool printSubmit ( SafeBuf *sb ) ;
 					 //long user , 
 					 //char *username,
 					 //char *pwd );
-	char *printTail                ( char *p    ,
-					 char *pend ,
-					 bool isLocal );
+	//char *printTail                ( char *p    ,
+	//				 char *pend ,
+	//				 bool isLocal );
 	//long  user ,
 	//char *username,
 	//char *pwd  ) ;
 	bool  printColors              ( SafeBuf *sb , char* bodyJavascript = "" ) ;
-	char *printColors              ( char *p , char *pend , 
-					 char* bodyJavascript = "");
+	//char *printColors              ( char *p , char *pend , 
+	//				 char* bodyJavascript = "");

-	char *printColors2             ( char *p , char *pend ) ;
+	//char *printColors2           ( char *p , char *pend ) ;
 	bool  printColors3	       ( SafeBuf *sb ) ;
-	char *printFocus               ( char *p , char *pend ) ;
+	//char *printFocus             ( char *p , char *pend ) ;
 	bool  printLogo                ( SafeBuf *sb, char *coll ) ;
-	char *printLogo                ( char *p , char *pend , char *coll ) ;
+	//char *printLogo              ( char *p , char *pend , char *coll ) ;
 	bool  printHostLinks           ( SafeBuf *sb  ,
 					 long  page   ,
 					 char *username ,
@ -240,7 +220,7 @@ class Pages {
 					 char *pwd    ,
 					 long  fromIp ,
 					 char *qs = NULL ) ;
-
+	/*
 	char *printHostLinks           ( char *p      ,
 					 char *pend   ,
 					 long  page   ,
@ -248,14 +228,12 @@ class Pages {
 					 char *pwd    ,
 					 long  fromIp ,
 					 char *qs = NULL ) ;
+	*/
 	bool  printAdminLinks          ( SafeBuf *sb, 
 					 long  page ,
-					 //long  user ,
-					 char *username,
 					 char *coll ,
-					 char *pwd  ,
-					 bool  top  ) ;
-
+					 bool isBasic );
+	/*
 	char *printAdminLinks          ( char *p    , 
 					 char *pend , 
 					 long  page ,
@ -264,6 +242,7 @@ class Pages {
 					 char *coll ,
 					 char *pwd  ,
 					 bool  top  ) ;
+	*/
 	bool  printCollectionNavBar ( SafeBuf *sb     ,
 				      long  page     ,
 				      //long  user     ,
@ -271,7 +250,7 @@ class Pages {
 				      char *coll     ,
 				      char *pwd      ,
 				      char *qs       );
-
+	/*
 	char *printCollectionNavBar    ( char *p    ,
 					 char *pend , 
 					 long  page ,
@ -280,7 +259,7 @@ class Pages {
 					 char *coll ,
 					 char *pwd  ,
 					 char *qs = NULL );
-
+	*/
 	/*
 	bool printRulesetDropDown ( SafeBuf *sb        ,
 				    long  user         ,
@ -321,25 +300,43 @@ enum {
 	PAGE_DIRECTORY   ,
 	PAGE_REPORTSPAM  ,
 	//PAGE_WORDVECTOR  ,
-	
+
+	// basic controls page /admin/basic
+	PAGE_BASIC_SETTINGS ,
+	PAGE_BASIC_STATUS ,
+	//PAGE_BASIC_SEARCH , // TODO
+	//PAGE_BASIC_DIFFBOT , // TODO
+	PAGE_BASIC_SECURITY ,
+	PAGE_BASIC_SEARCH ,
+
 	// master admin pages
 	PAGE_MASTER      , 
+	PAGE_SEARCH      ,  
+	PAGE_SPIDER      ,
+	PAGE_LOG         ,
+	PAGE_SECURITY    ,
+	PAGE_ADDCOLL     ,	
+	PAGE_DELCOLL     , 
+	PAGE_REPAIR      ,
+	PAGE_SITES , // site filters
+	PAGE_FILTERS     ,
+	PAGE_INJECT      , 
+	PAGE_ADDURL2     ,
+	PAGE_REINDEX     ,	
+
 	PAGE_HOSTS       ,
 	PAGE_STATS       , // 10
 	PAGE_STATSDB	 ,
 	PAGE_PERF        ,
 	PAGE_SOCKETS     ,
-	PAGE_LOG         ,
+
 	PAGE_LOGVIEW     ,
 //	PAGE_SYNC        , 
-	PAGE_SECURITY    ,
-	PAGE_ADDCOLL     ,	
-	PAGE_DELCOLL     , 
 	PAGE_AUTOBAN     , // 20
 	//PAGE_SPIDERLOCKS ,
 	PAGE_PROFILER    ,
 	PAGE_THREADS     ,
-	PAGE_REPAIR      ,
+
 //	PAGE_THESAURUS   , 

 	// . non master-admin pages (collection controls)
@ -352,15 +349,9 @@ enum {
 	PAGE_TITLEDB     ,  
 	//PAGE_STATSDB	 ,

-	PAGE_SEARCH      ,  
-	PAGE_SPIDER      ,
 	PAGE_CRAWLBOT    , // 35
 	PAGE_SPIDERDB    , 
 	//PAGE_PRIORITIES  ,  // priority queue controls
-	PAGE_FILTERS     ,
-	PAGE_INJECT      , 
-	PAGE_ADDURL2     ,
-	PAGE_REINDEX     ,	
 	//PAGE_KEYWORDS    ,
 	PAGE_SEO         ,
 	PAGE_ACCESS      ,  //40	
--- a/Parms.cpp
+++ b/Parms.cpp
--- a/Parms.h
+++ b/Parms.h
@ -24,7 +24,9 @@ enum {
 enum {
 	OBJ_CONF    = 1 ,
 	OBJ_COLL        ,
-	OBJ_SI          }; // SearchInput class
+	OBJ_SI          , // SearchInput class
+	OBJ_NONE
+};

 enum {
 	TYPE_BOOL       = 1 ,
@ -56,8 +58,8 @@ enum {
 	TYPE_MONOM2         ,
 	TYPE_LONG_CONST     ,
 	TYPE_SITERULE       , // 29
-	TYPE_SAFEBUF        
-	//TYPE_DIFFBOT_DROPDOWN
+	TYPE_SAFEBUF        ,
+	TYPE_FILEUPLOADBUTTON
 };

 //forward decls to make compiler happy:
@ -95,9 +97,10 @@ class Page {
 #define PF_NOSYNC            0x40
 #define PF_DIFFBOT           0x80

-#define PF_HIDDEN 0x0100
-#define PF_NOSAVE 0x0200
-
+#define PF_HIDDEN   0x0100
+#define PF_NOSAVE   0x0200
+#define PF_DUP      0x0400
+#define PF_TEXTAREA 0x0800

 class Parm {
 public:
@ -197,29 +200,22 @@ class Parms {

 	void init();
 	
-	bool sendPageGeneric ( class TcpSocket *s, class HttpRequest *r, 
-			       long page , char *cookie = NULL ,
-			       // Diffbot.cpp uses this to print the
-			       // url filters into
-			       SafeBuf *pageBuf = NULL ,
-			       // used by diffbot.cpp
-			       char *collOverride = NULL ,
-			       bool isJSON = false ) ;
-
-	bool sendPageGeneric2 ( class TcpSocket *s , class HttpRequest *r , 
-				long page , char *coll , char *pwd ) ;
+	bool sendPageGeneric ( class TcpSocket *s, class HttpRequest *r );

+	bool printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r );

 	//char *printParms (char *p, char *pend, TcpSocket *s, HttpRequest *r);
 	bool printParms (SafeBuf* sb, TcpSocket *s , HttpRequest *r );

-	//char *printParms (char *p,char *pend,long page,char *username,
-	//                  void *THIS, char *coll , char *pwd , 
-	//		  long nc , long pd ) ;
-	bool printParms (SafeBuf* sb, long page,char *username,void *THIS,
-			 char *coll , char *pwd , long nc , long pd ,
-			 bool isCrawlbot = false ,
-			 bool isJSON = false );
+	bool printParms2 (SafeBuf* sb, 
+			  long page,
+			  CollectionRec *cr,
+			  long nc , 
+			  long pd ,
+			  bool isCrawlbot ,
+			  bool isJSON,
+			  TcpSocket *sock
+			  );

 	/*
 	char *printParm ( char *p    , 
--- a/Show More
+++ b/Show More