Merge branch 'master' into diffbot

Conflicts: Hostdb.cpp Makefile PageResults.cpp PageRoot.cpp Pages.cpp Rdb.cpp SearchInput.cpp SearchInput.h Spider.cpp Spider.h XmlDoc.cpp
2025-07-19 03:14:38 -04:00 · 2013-10-16 14:28:42 -07:00
parent 22ef91a6f1 d2f39cd1a0
commit fc17521697
59 changed files with 3678 additions and 3262 deletions
--- a/CatRec.cpp
+++ b/CatRec.cpp
@ -198,6 +198,7 @@ bool CatRec::set ( Url *url , char *data , long dataSize , bool gotByIp ) {
 		log ( "tagdb: Deserialized datasize %i != %li for url %s so "
 		      "ignoring tagdb record.",
 		      p - m_data, m_dataSize , url->getUrl() );
+		return false;
 		char *xx = NULL; *xx = 0;
 	}

@ -308,7 +309,9 @@ bool CatRec::set ( Url *site ,
 	// add the ids
 	m_catids = (long*)p;
 	memcpy(p, catids, 4*m_numCatids);
-	p += 4*m_numCatids;
+	// skip over "numCatids" NOT m_numCatids which is TRUNCATED
+	// to MAX_CATIDS
+	p += 4*numCatids;
 	//}
 	// point to the filenum so we can mod it!
 	//m_filenumPtr = p;
--- a/Catdb.cpp
+++ b/Catdb.cpp
@ -29,7 +29,11 @@ bool Catdb::init (  ) {
 	// . what's max # of tree nodes?
 	// . assume avg tagdb rec size (siteUrl) is about 82 bytes we get:
 	// . NOTE: 32 bytes of the 82 are overhead
-	long treeMem = g_conf.m_catdbMaxTreeMem;
+	//long treeMem = g_conf.m_catdbMaxTreeMem;
+	// speed up gen catdb, use 15MB. later maybe once gen is complete
+	// we can free this tree or something...
+	// TODO!
+	long treeMem = 15000000;
 	//long treeMem = 100000000;
 	//long maxTreeNodes = g_conf.m_catdbMaxTreeMem / 82;
 	long maxTreeNodes = treeMem / 82;
@ -51,14 +55,14 @@ bool Catdb::init (  ) {
 	// . initialize our own internal rdb
 	// . i no longer use cache so changes to tagdb are instant
 	// . we still use page cache however, which is good enough!
-	if ( this == &g_catdb )
-		return m_rdb.init ( g_hostdb.m_dir               ,
+	//if ( this == &g_catdb )
+	if ( !  m_rdb.init ( g_hostdb.m_dir               ,
 			    "catdb"                   ,
 			    true                       , // dedup same keys?
 			    -1                         , // fixed record size
 			    //g_hostdb.m_groupMask         ,
 			    //g_hostdb.m_groupId           ,
-				    g_conf.m_catdbMinFilesToMerge   ,
+			     2,//g_conf.m_catdbMinFilesToMerge   ,
 				    treeMem ,//g_conf.m_catdbMaxTreeMem  ,
 			    maxTreeNodes               ,
 			    // now we balance so Sync.cpp can ordered huge list
@ -70,9 +74,17 @@ bool Catdb::init (  ) {
 				    &m_pc                      ,
 				    false,
 				    false,
-				    12,
+			     12, // keysize
 				    false,
-				    true ); // is collectionless?
+			     true )) // is collectionless?
+		return false;
+
+	// normally Collectiondb.addColl() will call Rdb::addColl() which
+	// will init the CollectionRec::m_rdbBase, which is what
+	// Rdb::getBase(collnum_t) will return. however, for collectionless
+	// rdb databases we set Rdb::m_collectionlessBase special here.
+	// This is in Rdb.cpp::init() now.
+	//return m_rdb.addColl ( NULL );
 	return true;
 }

@ -119,7 +131,7 @@ bool Catdb::verify ( char *coll ) {
 	g_threads.disableThreads();

 	Msg5 msg5;
-	Msg5 msg5b;
+	//Msg5 msg5b;
 	RdbList list;
 	key_t startKey;
 	key_t endKey;
@ -128,7 +140,7 @@ bool Catdb::verify ( char *coll ) {
 	//long minRecSizes = 64000;
 	
 	if ( ! msg5.getList ( RDB_CATDB     ,
-			      coll          ,
+			      "",//coll          ,
 			      &list         ,
 			      startKey      ,
 			      endKey        ,
@ -147,7 +159,7 @@ bool Catdb::verify ( char *coll ) {
 			      -1            ,
 			      true          ,
 			      -1LL          ,
-			      &msg5b        ,
+			      NULL,//&msg5b        ,
 			      true          )) {
 		g_threads.enableThreads();
 		return log("db: HEY! it did not block");
@ -311,6 +323,19 @@ void Catdb::listSearch ( RdbList *list,
 	// for small lists, just loop through the list
 	if (list->getListSize() < 16*1024) {
 		while ( ! list->isExhausted() ) {
+			// for debug!
+			/*
+			CatRec crec;
+			crec.set ( NULL,
+				   list->getCurrentData(),
+				   list->getCurrentDataSize(),
+				   false);
+			log("catdb: caturl=%s #catid=%li version=%li"
+			    ,crec.m_url
+			    ,(long)crec.m_numCatids
+			    ,(long)crec.m_version
+			    );
+			*/
 			// check the current key
 			if ( list->getCurrentKey() != exactKey ) {
 				// miss, next
--- a/Categories.cpp
+++ b/Categories.cpp
@ -50,6 +50,7 @@ void Categories::reset() {
 	}
 }

+// filename usually ./catdb/gbdmoz.structure.dat
 long Categories::loadCategories ( char *filename ) {
 	//ifstream inStream;
 	int inStream;
@ -69,6 +70,7 @@ long Categories::loadCategories ( char *filename ) {
 		return 1;
 	}
 	// read in the number of cats
+	// filename usually ./catdb/gbdmoz.structure.dat
 	if ( fileRead ( inStream, &m_numCats, sizeof(long) ) != sizeof(long) ) {
 		log("cat: Error reading structure file: %s", filename);
 		close(inStream);
@ -114,7 +116,8 @@ long Categories::loadCategories ( char *filename ) {
 		g_errno = ENOMEM;
 		return 1;
 	}
-	// read the rest of the file into the temp buffer
+	// . read the rest of the file into the temp buffer
+	// . filename usually ./catdb/gbdmoz.structure.dat
 	if ( fileRead ( inStream, tempBuffer, readSize ) != readSize ) {
 		log("cat: Error reading structure file: %s", filename);
 		close(inStream);
@ -212,6 +215,15 @@ long Categories::loadCategories ( char *filename ) {
 	long long start = gettimeofdayInMilliseconds();
 	// sort the category hash by hash value
 	gbsort(m_catHash, m_numCats, sizeof(CategoryHash), sortCatHash);
+
+	// sanity check - no dups allowed
+	unsigned long last = 0xffffffff;
+	for ( long i = 0 ; i < m_numCats ; i++ ) {
+		if ( m_catHash[i].m_hash == last ) 
+			log("dmoz: hash collision on %lu",last);
+		last = m_catHash[i].m_hash;
+	}
+
 	// time it
 	long long took = gettimeofdayInMilliseconds();
 	if ( took - start > 100 ) log(LOG_INIT,"admin: Took %lli ms to "
@ -327,9 +339,15 @@ long Categories::getIndexFromPath ( char *str, long strLen ) {
 	// check for top
 	if (strLen == 3 &&
 	    strncasecmp(str, "Top", 3) == 0)
+		// it is catid 2 right? but i guess zero is symbolic for us!
 		return 0;
 	// get the hash
 	unsigned long hash = hash32Lower_a(str, strLen, 0);
+	// debug
+	//char c = str[strLen];
+	//str[strLen] = '\0';
+	//log("dmoz: looking up hash %lu for %s",hash,str);
+	//str[strLen] = c;
 	// binary search
 	while (low <= high) {
 		// next check spot
@ -349,6 +367,7 @@ long Categories::getIndexFromPath ( char *str, long strLen ) {

 // return the catid from the given path
 long Categories::getIdFromPath ( char *str, long strLen ) {
+	if ( ! m_cats ) return -1;
 	long index = getIndexFromPath(str, strLen);
 	return m_cats[index].m_catid;
 }
@ -497,7 +516,7 @@ void Categories::printPathFromId ( SafeBuf *sb ,
 	long catIndex;
 	// get the index
 	catIndex = getIndexFromId(catid);
-	if (catIndex < 1) return;
+	//if (catIndex < 1) return;
 	printPathFromIndex(sb, catIndex, raw, isRTL);
 }

@ -509,8 +528,22 @@ void Categories::printPathFromIndex ( SafeBuf *sb ,
 	if (catIndex < 1) return;
 	// get the parent
 	parentId = m_cats[catIndex].m_parentid;
-	// print the parent(s) first
-	if (parentId > 1) {
+	long catid = m_cats[catIndex].m_catid;
+
+	// include Top now. in newer dmoz it is catid2.
+	//if ( catid == 2 ) {
+	//	sb->safePrintf("Top");
+	//	return;
+	//}		
+
+	// . print the parent(s) first
+	// . the new dmoz data dumps signify a parentless topic by
+	//   havings its parentid equal its catid, so avoid infinite
+	//   loops by checking for that here now. mdw oct 2013.
+	// . the new DMOZ has Top has catid 2 now, even though it is
+	//   mistakenly labelled as Top/World, which is really catid 3.
+	//   so make this parentId > 2...
+	if (parentId >= 1 && parentId != catid ) {
 		bool isParentRTL = isIdRTLStart(parentId);
 		// print spacing here if RTL
 		//if (isRTL && !raw)
@ -558,7 +591,7 @@ void Categories::printPathCrumbFromId ( SafeBuf *sb ,
 	long catIndex;
 	// get the index
 	catIndex = getIndexFromId(catid);
-	if (catIndex < 1) return;
+	//if (catIndex < 1) return;
 	printPathCrumbFromIndex(sb, catIndex, isRTL);
 }

@ -569,8 +602,20 @@ void Categories::printPathCrumbFromIndex ( SafeBuf *sb,
 	if (catIndex < 1) return;
 	// get the parent
 	parentId = m_cats[catIndex].m_parentid;
-	// print the parent(s) first
-	if (parentId > 1) {
+	long catid = m_cats[catIndex].m_catid;
+
+	// include Top now. in newer dmoz it is catid2.
+	// seems to already be included below... because you made it
+	// parentId>1 not parentId>2
+	//if ( catid == 2 ) {
+	//	sb->safePrintf("Top");
+	//	return;
+	//}
+
+	// . print the parent(s) first
+	// . the new dmoz has Top has parentid 2 now, and Top/World is
+	//   catid 3. so make this parentId > 2 not parentId > 1
+	if (parentId > 1 && parentId != catid ) {
 		bool isParentRTL = isIdRTLStart(parentId);
 		printPathCrumbFromId(sb, parentId, isRTL);
 		// print a spacing
@ -793,7 +838,7 @@ long Categories::fixUrl ( char *url, long urlLen ) {
 	return newUrlLen;
 }

- bool Categories::addUrlsToBadHashTable ( long catid  ) {
+bool Categories::addUrlsToBadHashTable ( long catid  ) {
 	 return getTitleAndSummary ( NULL  , // urlorig
 				     0     , // urloriglen
 				     catid ,
@ -810,6 +855,183 @@ long Categories::fixUrl ( char *url, long urlLen ) {
 				     true  );// just add to table
 }

+// just show the urls in dmoz
+bool Categories::printUrlsInTopic ( SafeBuf *sb, long catid ) {
+	long catIndex;
+	unsigned long fileOffset;
+	unsigned long n;
+	char* p;
+	unsigned long readSize;
+	char title[1024];
+	char summ[5000];
+	long maxTitleLen = 1024;
+	long maxSummLen = 5000;
+	long titleLen;
+	long summLen;
+	long urlStrLen;
+	char urlStr[MAX_URL_LEN];
+	long niceness = 0;
+	bool printedStart = false;
+
+	// lookup the index for this catid
+	catIndex = getIndexFromId(catid);
+	if (catIndex < 0)
+		goto errEnd;
+	// get the file offset
+	fileOffset = m_cats[catIndex].m_contentOffset;
+
+	QUICKPOLL( niceness );
+
+	// . open the file
+	char filename[512];
+	sprintf(filename, "%scatdb/%s", g_hostdb.m_dir, RDFCONTENT_FILE);
+	m_rdfStream = open(filename, O_RDONLY | O_NONBLOCK);
+	if ( m_rdfStream < 0 ) {
+		log("cat: Error Opening %s\n", filename);
+		goto errEnd;
+	}
+	// . seek to the offset
+	n = lseek ( m_rdfStream, fileOffset, SEEK_SET );
+	if ( n != fileOffset ) {
+		log("cat: Error seeking to Content Offset %li", fileOffset);
+		goto errEnd;
+	}
+	// . read in a chunk
+	m_rdfBuffer     = m_rdfSmallBuffer;
+	m_rdfBufferSize = RDFSMALLBUFFER_SIZE;
+
+	p = m_rdfBuffer;
+	readSize = m_rdfBufferSize;
+ readLoop:
+	n = read ( m_rdfStream, p, readSize );
+	if(n > 0 && n != readSize) {
+		p += n;
+		readSize -= n;
+	}
+	//log(LOG_WARN,"build: reading %li bytes out of %li",n,m_rdfBufferSize);
+	QUICKPOLL(niceness);
+
+	if(n < 0 && errno == EAGAIN) goto readLoop;
+	
+	if ( n <= 0 || n > (unsigned long)m_rdfBufferSize ) {
+		log("cat: Error Reading Content");
+		goto errEnd;
+	}
+	m_rdfPtr = m_rdfBuffer;
+	m_rdfEnd = &m_rdfBuffer[n];
+	m_currOffset = fileOffset;
+	// . parse to the correct url
+	// parse the first topic and catid
+	if (rdfNextTag() < 0)
+		goto errEnd;
+	if (rdfNextTag() < 0)
+		goto errEnd;
+	// parse until "ExternalPage"
+nextTag:
+	QUICKPOLL((niceness));
+	if (rdfNextTag() < 0)
+		goto errEnd;
+	// check for catid of next topic to stop looking
+	if (m_tagLen == 5 &&
+	    strncmp(m_tagRecfer, "catid", 5) == 0)
+		goto errEnd;
+	if (m_tagLen != 12 ) goto nextTag;
+	if ( strncmp(m_tagRecfer, "ExternalPage", 12) != 0) goto nextTag;
+
+	//
+	// got one
+	//
+
+	// get the next string
+	urlStrLen = fillNextString(urlStr, MAX_URL_LEN-1);
+	if (urlStrLen < 0)
+		goto errEnd;
+
+	// html decode the url
+	/*
+	urlStrLen = htmlDecode(decodedUrl, urlStr, urlStrLen,false,
+			       niceness);
+	memcpy(urlStr, decodedUrl, urlStrLen);
+
+	normUrl.set(urlStr, urlStrLen, true);
+	g_catdb.normalizeUrl(&normUrl, &normUrl);
+	// copy it back
+	urlStrLen = normUrl.getUrlLen();
+	memcpy(urlStr, normUrl.getUrl(), urlStrLen);
+	// make sure there's a trailing / on root urls
+	// and no www.
+	//urlStrLen = fixUrl(urlStr, urlStrLen);
+	// check for an anchor
+	urlAnchor = NULL;
+	urlAnchorLen = 0;
+	//for (long i = 0; i < urlStrLen; i++) {
+	//if (urlStr[i] == '#') {
+	if (normUrl.getAnchorLen() > 0) {
+		//urlAnchor = &urlStr[i];
+		//urlAnchorLen = urlStrLen - i;
+		//urlStrLen = i;
+		urlAnchor = normUrl.getAnchor();
+		urlAnchorLen = normUrl.getAnchorLen();
+		//break;
+	}
+	*/
+
+	// . parse out the title
+	if (rdfParse("d:Title") < 0)
+		goto errEnd;
+
+	titleLen = fillNextTagBody(title, maxTitleLen);
+
+	QUICKPOLL(niceness);
+
+	// . parse out the summary
+	if (rdfParse("d:Description") < 0)
+		goto errEnd;
+
+	summLen = fillNextTagBody(summ, maxSummLen);
+
+	if ( ! printedStart ) {
+		printedStart = true;
+		sb->safePrintf("<ul>");
+	}
+
+	// print it out
+	sb->safePrintf("<li><a href=\"");
+	sb->safeMemcpy ( urlStr , urlStrLen );
+	sb->safePrintf("\">");
+	sb->safeMemcpy ( title , titleLen );
+	sb->safePrintf("</a><br>");
+	sb->safeMemcpy( summ, summLen );
+	sb->safePrintf("<br>");//<br>");
+
+
+	/*
+	// . fill the anchor
+	if (anchor) {
+		if (urlAnchor) {
+			if (urlAnchorLen > maxAnchorLen)
+				urlAnchorLen = maxAnchorLen;
+			memcpy(anchor, urlAnchor, urlAnchorLen);
+			*anchorLen = urlAnchorLen;
+		}
+		else
+			*anchorLen = 0;
+	}
+	*/
+
+	// DO NEXT tag
+	goto nextTag;
+
+errEnd:
+
+	sb->safePrintf("</ul>");
+
+	close(m_rdfStream);
+	return false;
+}
+
+
+
 // . get the title and summary for a specific url
 //   and catid
 bool Categories::getTitleAndSummary ( char  *urlOrig,
@ -857,7 +1079,7 @@ bool Categories::getTitleAndSummary ( char  *urlOrig,

 	// . open the file
 	char filename[512];
-	sprintf(filename, "%scat/%s", g_hostdb.m_dir, RDFCONTENT_FILE);
+	sprintf(filename, "%scatdb/%s", g_hostdb.m_dir, RDFCONTENT_FILE);
 	//m_rdfStream.clear();
 	//m_rdfStream.open(filename, ifstream::in);
 	m_rdfStream = open(filename, O_RDONLY | O_NONBLOCK);
@ -1011,13 +1233,17 @@ errEnd:
 	return false;
 }

-// generate sub categories for a given catid
+// . generate sub categories for a given catid
+// . store list of SubCategories into "subCatBuf" return # stored
 long Categories::generateSubCats ( long catid,
-				   SubCategory *subCats,
-				   char **catBuffer,
-				   long  *catBufferSize,
-				   long  *catBufferLen,
-				   bool   allowRealloc ) {
+				   SafeBuf *subCatBuf 
+				   //SubCategory *subCats,
+				   //char **catBuffer,
+				   //long  *catBufferSize,
+				   //long  *catBufferLen,
+				   //bool   allowRealloc 
+				   ) {
+
 	long catIndex;
 	unsigned long fileOffset;
 	unsigned long n;
@ -1029,17 +1255,24 @@ long Categories::generateSubCats ( long catid,
 	long prefixLen;
 	long nameStart;
 	long nameLen;
-	long catp         = 0;
-	long catBufferInc = *catBufferSize;
-	// lookup the index for this catid
+	long need ;
+	SubCategory *cat;
+	char *p ;
+
+	//long catp         = 0;
+	//long catBufferInc = *catBufferSize;
+	// . lookup the index for this catid
+	// . binary step, guessing to approximate place
+	//   and then scanning from there
 	catIndex = getIndexFromId(catid);
 	if (catIndex < 0)
 		goto errEnd;
 	// get the file offset
 	fileOffset = m_cats[catIndex].m_structureOffset;
 	// open the structure file
+	// catdb/structure.rdf.u8 in utf8
 	char filename[512];
-	sprintf(filename, "%scat/%s", g_hostdb.m_dir, RDFSTRUCTURE_FILE);
+	sprintf(filename, "%scatdb/%s", g_hostdb.m_dir, RDFSTRUCTURE_FILE);
 	//m_rdfStream.clear();
 	//m_rdfStream.open(filename, ifstream::in);
 	m_rdfStream = open(filename, O_RDONLY);
@ -1066,12 +1299,16 @@ long Categories::generateSubCats ( long catid,
 		log("cat: Error Reading Structure Offset");
 		goto errEnd;
 	}
+	// point to the buffer we just read with m_rdfPtr
 	m_rdfPtr = m_rdfBuffer;
 	m_rdfEnd = &m_rdfBuffer[n];
 	m_currOffset = fileOffset;
 	
 	// parse tags for the sub categories or until we hit /Topic
 nextTag:
+	// . this increments m_rdfPtr until it points to the beginning of a tag
+	// . it may end up reading another chunk from disk
+	// . it memcopies m_tagRecfer to be the name of the tag it points to
 	if (rdfNextTag() < 0)
 		goto gotSubCats;
 	// check for /Topic
@ -1121,6 +1358,9 @@ nextTag:
 				 false,
 				 0);
 	memcpy(catStr, htmlDecoded, catStrLen);
+	// reset this offset
+	nameStart = 0;
+	nameLen = catStrLen;
 	// get the prefix and name position/length
 	switch (currType) {
 	case SUBCAT_ALTLANG:
@ -1130,14 +1370,14 @@ nextTag:
 		// prefix is at the start
 		prefixStart = 0;
 		prefixLen   = 0;
-		nameStart   = 0;
+		//nameStart   = 0;
 		// go to the end of the prefix
 		while (catStr[nameStart] != ':') {
 			nameStart++;
 			prefixLen++;
 		}
-		// skip the :Top/
-		nameStart += 5;
+		// skip the : in :Top/
+		nameStart += 1;
 		nameLen = catStrLen - nameStart;
 		break;
 	case SUBCAT_LETTERBAR:
@ -1145,9 +1385,9 @@ nextTag:
 		prefixStart = catStrLen - 1;
 		prefixLen   = 1;
 		// skip the Top/ for the name
-		nameStart   = 4;
+		//nameStart   = 4;
 		// lose the Top/, keep the end letter
-		nameLen     = catStrLen - 4;
+		//nameLen     = catStrLen - 4;
 		break;
 	// . don't do this because of ltr?
 	//case SUBCAT_RELATED:
@ -1167,43 +1407,56 @@ nextTag:
 			prefixStart--;
 			prefixLen++;
 		}
-		// name skips Top/
-		nameStart = 4;
-		nameLen   = catStrLen - 4;
+		// name skips Top/ ... no! we include Top now
+		// because we need it so PageResults.cpp can call
+		// currIndex=g_categories->getIndexFromPath(catName,catNameLen)
+		// on this name, and it needs "Top/" because it was part
+		// of the hash of the full name for the category now.
+		// and we lookup the Category record by that hash
+		// in getIndexFromPath().
+		//nameStart = 4;
+		//nameLen   = catStrLen - 4;
 		break;
 	}
 	// . fill the next sub category
-	if (catp + prefixLen + nameLen >= *catBufferSize) {
-		if (!allowRealloc)
-			goto gotSubCats;
-		// realloc the buffer
-		char *re_catBuffer = (char*)mrealloc ( *catBuffer,
-					       *catBufferSize,
-					       *catBufferSize+catBufferInc,
-					       "Categories" );
-		if (!re_catBuffer) {
-			log ( "Could not allocate %li bytes for catBuffer",
-			      *catBufferSize+catBufferInc );
-			g_errno = ENOMEM;
-			goto errEnd;
-		}
-		*catBuffer = re_catBuffer;
-		*catBufferSize += catBufferInc;
-	}
-	// fill the prefix and name in the buffer and subcat
+	// . fill the prefix and name in the buffer and subcat
+	need = sizeof(SubCategory) + prefixLen + 1 + nameLen + 1;
+
+	// reserve space in safebuf for it
+	if ( ! subCatBuf->reserve(need) ) goto errEnd;
+
+	// point to it in safebuf
+	cat = (SubCategory *)(subCatBuf->getBuf());
+
+	cat->m_prefixLen = prefixLen;
+	cat->m_nameLen = nameLen;
+	cat->m_type = currType;
+	p = cat->m_buf;
+	memcpy ( p , catStr + prefixStart , prefixLen );
+	p += prefixLen;
+	*p++ = '\0';
+	memcpy ( p , catStr + nameStart , nameLen );
+	p += nameLen;
+	*p++ = '\0';
+
+	// update safebuf length
+	subCatBuf->incrementLength ( cat->getRecSize() );
+
+	/*
 	subCats[numSubCats].m_prefixOffset = catp;
 	subCats[numSubCats].m_prefixLen    = prefixLen;
 	if (prefixLen > 0) {
 		memcpy(&((*catBuffer)[catp]), &catStr[prefixStart], prefixLen);
 		catp += prefixLen;
 	}
-	subCats[numSubCats].m_nameOffset   = catp;
+	subCats[numSubCats].m_nameOffset   = catBuf->length();//catp;
 	subCats[numSubCats].m_nameLen      = nameLen;
 	if (nameLen > 0) {
 		memcpy(&((*catBuffer)[catp]), &catStr[nameStart], nameLen);
 		catp += nameLen;
 	}
 	subCats[numSubCats].m_type         = currType;
+	*/
 	// next sub cat
 	numSubCats++;
 	if (numSubCats >= MAX_SUB_CATS) {
@ -1214,14 +1467,14 @@ nextTag:
 	// next tag
 	goto nextTag;
 gotSubCats:
-	*catBufferLen = catp;
+	//*catBufferLen = catp;
 	//m_rdfStream.close();
 	//m_rdfStream.clear();
 	close(m_rdfStream);
 	return numSubCats;

 errEnd:
-	*catBufferLen = 0;
+	//*catBufferLen = 0;
 	//m_rdfStream.close();
 	//m_rdfStream.clear();
 	close(m_rdfStream);
@ -1259,8 +1512,13 @@ long Categories::createDirSearchRequest ( char *requestBuf,
 	char *rrr = r->m_reqBuf.getBufStart();
 	if ( rrr && rrr[0] == 'Z' ) cmd = "ZET";
 	// request
-	p += sprintf(p, "%s /search?dir=%li&dr=0&sc=0&sdir=%li&sdirt=0&c=",
-			cmd, catid, catid);
+	//p += sprintf(p, "%s /search?dir=%li&dr=0&sc=0&sdir=%li&sdirt=0&c=",
+	//		cmd, catid, catid);
+	p += sprintf(p, 
+		     "%s /search?q=gbcatid%%3A%li&dir=%li&dr=0&sc=0&c="
+		     , cmd
+		     , catid
+		     , catid);
 	// coll
 	memcpy(p, coll, collLen);
 	p += collLen;
@ -1314,7 +1572,7 @@ bool Categories::loadLangTables(void) {
 	unsigned long entries = 0L;
 	char *cp;
 	char *cpEnd = line + 10239;
-	if(!(content = fopen("cat/content.rdf.u8", "r"))) {
+	if(!(content = fopen("catdb/content.rdf.u8", "r"))) {
 		log(LOG_INFO, "cat: could not open content file.\n");
 		return(false);
 	}
--- a/Categories.h
+++ b/Categories.h
@ -26,7 +26,7 @@
 #define MAX_TAG_LEN      127
 #define MAX_URL_CATIDS   64
 #define MAX_URLTXT_SIZE  500000
-#define MAX_CATIDS       64
+#define MAX_CATIDS       96
 #define MAX_CATNAME_LEN  1024

 #define HASHTABLE_SIZE    (1024*1024)
@ -61,11 +61,15 @@ struct CategoryHash {
 };

 struct SubCategory {
-	long  m_prefixOffset;
+	//long  m_prefixOffset;
 	long  m_prefixLen;
-	long  m_nameOffset;
+	//long  m_nameOffset;
 	long  m_nameLen;
 	char  m_type;
+	long getRecSize () { return sizeof(SubCategory)+m_prefixLen+m_nameLen+2;};
+	char *getPrefix() { return m_buf; };
+	char *getName  () { return m_buf+m_prefixLen+1;};
+	char  m_buf[0];
 };

 class Categories {
@ -133,6 +137,8 @@ public:
 				       long  catid,
 				       bool  isRTL = false );

+	bool printUrlsInTopic ( class SafeBuf *sb , long catid  ) ;
+
 	// . get the title and summary for a specific url
 	//   and catid
 	bool getTitleAndSummary ( char  *url,
@ -153,15 +159,13 @@ public:
 	// normalize a url string
 	long fixUrl ( char *url, long urlLen );

-	// generate sub categories for a given catid
-	long generateSubCats ( long catid,
-			       SubCategory *subCats,
-			       char **catBuffer,
-			       long  *catBufferSize,
-			       long  *catBufferLen,
-			       bool   allowRealloc = true );
+	// . generate sub categories for a given catid
+	// . store list of SubCategories into "subCatBuf" return # stored
+	// . hits disk without using threads... so kinda sucks...
+	long generateSubCats ( long catid, SafeBuf *subCatBuf );

 	long getNumUrlsFromIndex ( long catIndex ) {
+		if ( ! m_cats ) return 0;
 		return m_cats[catIndex].m_numUrls; };

 	// creates a directory search request url
--- a/Conf.h
+++ b/Conf.h
@ -164,7 +164,7 @@ class Conf {
 	long  m_catdbMaxTreeMem;
 	long  m_catdbMaxDiskPageCacheMem;
 	long  m_catdbMaxCacheMem;
-	long  m_catdbMinFilesToMerge;
+	//long  m_catdbMinFilesToMerge;

 	long  m_revdbMaxTreeMem;
 	long  m_timedbMaxTreeMem;
--- a/Hostdb.cpp
+++ b/Hostdb.cpp
@ -2359,6 +2359,9 @@ uint32_t Hostdb::getShardNum ( char rdbId,void *k,bool split ) {
 	//else if ( rdbId == RDB_CATDB || rdbId == RDB2_CATDB2 ) {
 	//	return m_map [(*(uint16_t *)((char *)k + 10))>>3];
 	//}
+	else if ( rdbId == RDB_CATDB || rdbId == RDB2_CATDB2 ) {
+		return m_map [(*(uint16_t *)((char *)k + 10))>>3];
+	}
 	// core -- must be provided
 	char *xx = NULL; *xx = 0;
 	//groupId=key.n1 & g_hostdb.m_groupMask;
--- a/HttpMime.cpp
+++ b/HttpMime.cpp
@ -518,6 +518,7 @@ long HttpMime::getContentTypePrivate ( char *s ) {
        else if (!strcasecmp(s,"image/jpeg"              ) ) ct = CT_JPG;
        else if (!strcasecmp(s,"image/png"               ) ) ct = CT_PNG;
        else if (!strcasecmp(s,"image/tiff"              ) ) ct = CT_TIFF;
+        else if (!strncasecmp(s,"image/",6               ) ) ct = CT_IMAGE;
 	else if (!strcasecmp(s,"application/javascript"  ) ) ct = CT_JS;
 	else if (!strcasecmp(s,"application/x-javascript") ) ct = CT_JS;
 	else if (!strcasecmp(s,"text/javascript"         ) ) ct = CT_JS;
--- a/HttpMime.h
+++ b/HttpMime.h
@ -36,6 +36,7 @@ time_t atotime5   ( char *s ) ;
 #define CT_JS     14
 #define CT_CSS    15
 #define CT_JSON   16
+#define CT_IMAGE  17

 #define ET_IDENTITY 0
 #define ET_GZIP 1
--- a/HttpRequest.cpp
+++ b/HttpRequest.cpp
@ -23,6 +23,7 @@ void HttpRequest::reset() {
 	m_userIP = 0;
 	m_isMSIE = false;
 	m_reqBufValid = false;
+	m_reqBuf.purge();

 	if (m_cgiBuf2) {
 		mfree(m_cgiBuf2, m_cgiBuf2Size, "extraParms");
--- a/Lang.cpp
+++ b/Lang.cpp
@ -459,7 +459,11 @@ unsigned char getLanguageFromUserAgent(char *abbr) {
 	return langUnknown;
 }

-// these are going to be adult, in any language
+// . these are going to be adult, in any language
+// . this seems only to be used by Speller.cpp when splitting up words
+//   in the url domain. 
+// . s/slen is a full word that is found in our "dictionary" so using
+//   phrases like biglittlestuff probably should not go here.
 bool isAdult( char *s, long slen, char **loc ) {
 	char **p = NULL;
 	char *a = NULL;
--- a/6
+++ b/6
@ -33,13 +33,13 @@ OBJS =  Tfndb.o UdpSlot.o \
 	HttpMime.o Hostdb.o \
 	Highlight.o File.o Errno.o Entities.o \
 	Dns.o Dir.o Conf.o Bits.o \
-	Stats.o BigFile.o AdultBit.o Msg17.o \
+	Stats.o BigFile.o Msg17.o \
 	Speller.o DiskPageCache.o \
 	PingServer.o StopWords.o TopTree.o \
 	Parms.o Pages.o Msg28.o Msg30.o \
 	Unicode.o iana_charset.o Iso8859.o \
 	SearchInput.o \
-	Categories.o Msg2a.o PageCatdb.o PageDirectory.o Msg2b.o \
+	Categories.o Msg2a.o PageCatdb.o PageDirectory.o \
 	SafeBuf.o Datedb.o \
 	UCNormalizer.o UCPropTable.o UnicodeProperties.o \
 	Pops.o Title.o Pos.o LangList.o \
@ -99,7 +99,7 @@ endif
 # let's keep the libraries in the repo for easier bug reporting and debugging
 # in general if we can. the includes are still in /usr/include/ however...
 # which is kinda strange but seems to work so far.
-#LIBS= -L. ./libplotter.a ./libplot.a ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libgcc.a ./libpthread.a ./libc.a ./libstdc++.a 
+#LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libgcc.a ./libpthread.a ./libc.a ./libstdc++.a 



--- a/Mem.cpp
+++ b/Mem.cpp
@ -1275,10 +1275,12 @@ mallocmemloop:
 		long long avail = (long long)m_maxMem - 
 			(long long)m_used;
 		if ( now - s_lastTime >= 1000LL ) {
-			log("mem: system malloc(%i) availShouldBe=%lli: "
+			log("mem: system malloc(%i,%s) availShouldBe=%lli: "
 			    "%s (%s) (ooms suppressed since "
 			    "last log msg = %li)",
-			    size+UNDERPAD+OVERPAD,avail,
+			    size+UNDERPAD+OVERPAD,
+			    note,
+			    avail,
 			    mstrerror(g_errno),
 			    note,
 			    s_missed);
--- a/Msg1.cpp
+++ b/Msg1.cpp
@ -388,6 +388,12 @@ bool Msg1::sendSomeOfList ( ) {
 	if ( m_list->m_fixedDataSize != getDataSizeFromRdbId(m_rdbId) ) {
 		char *xx=NULL;*xx=0; }

+	// little debug thing for genCatdb from msg9b's huge list add
+	//if ( m_list->m_listSize > 10000000 )
+	//	log("msg1: adding chunk @ %li of %li bytes",
+	//	    (long)(dataStart - m_list->m_list) ,
+	//	    (long)m_list->m_listSize );
+
 	// . now send this list to the host
 	// . this returns false if blocked, true otherwise
 	// . it also sets g_errno on error
@ -480,7 +486,9 @@ bool Msg1::sendData ( unsigned long shardNum, char *listData , long listSize) {
 		if ( ! g_errno ) sendToSelf = false;
 		else {
 			log("rdb: msg1 had error: %s",mstrerror(g_errno));
-			return true;
+			// this is messing up generate catdb's huge rdblist add
+			// why did we put it in there??? from msg9b.cpp
+			//return true;
 		}
 		
 		QUICKPOLL(m_niceness);
--- a/Msg2a.cpp
+++ b/Msg2a.cpp
@ -58,9 +58,9 @@ bool Msg2a::makeCatdb( char  *coll,
 	char inFile[256];
 	// url info (content) file
 	if ( m_updateFromNew )
-		sprintf(inFile, "%scat/gbdmoz.content.dat.new", g_hostdb.m_dir);
+		sprintf(inFile, "%scatdb/gbdmoz.content.dat.new", g_hostdb.m_dir);
 	else
-		sprintf(inFile, "%scat/gbdmoz.content.dat", g_hostdb.m_dir);
+		sprintf(inFile, "%scatdb/gbdmoz.content.dat", g_hostdb.m_dir);
 	//m_inStream.open(inFile, ifstream::in);
 	m_inStream = open(inFile, O_RDONLY);
 	//if (!m_inStream.is_open()) {
@ -118,7 +118,7 @@ bool Msg2a::makeCatdb( char  *coll,
 		// open the new diff file
 		//ifstream diffInStream;
 		int diffInStream;
-		sprintf(inFile, "%scat/gbdmoz.content.dat.new.diff",
+		sprintf(inFile, "%scatdb/gbdmoz.content.dat.new.diff",
 				g_hostdb.m_dir);
 		//diffInStream.open(inFile, ifstream::in);
 		diffInStream = open(inFile, O_RDONLY);
@ -328,6 +328,12 @@ bool Msg2a::makeCatdb( char  *coll,
 		// null terminate
 		m_urls[urlp] = '\0';
 		currUrl++;
+		// debug
+		//SafeBuf sb;
+		//sb.safeMemcpy(&m_urls[urlp-urlLen],urlLen);
+		//sb.nullTerm();
+		//log("gencat: url=%s",sb.getBufStart());
+
 	}
 	log(LOG_INFO, "db: Wrote %li urls to update (%li)\n",
 		      currUrl - m_numRemoveUrls, m_numUpdateIndexes);
@ -581,9 +587,9 @@ void handleRequest2a ( UdpSlot *slot, long netnice ) {
 		otherCategories = &g_categories1;
 	// load the new file
 	if ( updateFromNew )
-		sprintf(buff, "%scat/gbdmoz.structure.dat.new", g_hostdb.m_dir);
+		sprintf(buff, "%scatdb/gbdmoz.structure.dat.new", g_hostdb.m_dir);
 	else
-		sprintf(buff, "%scat/gbdmoz.structure.dat", g_hostdb.m_dir);
+		sprintf(buff, "%scatdb/gbdmoz.structure.dat", g_hostdb.m_dir);
 	if (otherCategories->loadCategories(buff) != 0) {
 		log("db: Loading Categories From %s Failed", buff);
 		// send error reply
@ -605,51 +611,51 @@ void handleRequest2a ( UdpSlot *slot, long netnice ) {
 	}

 	// move the current files to .old
-	sprintf(buff, "mv %scat/content.rdf.u8 %scat/content.rdf.u8.old",
+	sprintf(buff, "mv %scatdb/content.rdf.u8 %scatdb/content.rdf.u8.old",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );
-	sprintf(buff, "mv %scat/structure.rdf.u8 %scat/structure.rdf.u8.old",
+	sprintf(buff, "mv %scatdb/structure.rdf.u8 %scatdb/structure.rdf.u8.old",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );
-	sprintf(buff, "mv %scat/gbdmoz.content.dat "
-		      "%scat/gbdmoz.content.dat.old",
+	sprintf(buff, "mv %scatdb/gbdmoz.content.dat "
+		      "%scatdb/gbdmoz.content.dat.old",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );
-	sprintf(buff, "mv %scat/gbdmoz.structure.dat "
-		      "%scat/gbdmoz.structure.dat.old",
+	sprintf(buff, "mv %scatdb/gbdmoz.structure.dat "
+		      "%scatdb/gbdmoz.structure.dat.old",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );
-	sprintf(buff, "mv %scat/gbdmoz.content.dat.diff "
-		      "%scat/gbdmoz.content.dat.diff.old",
+	sprintf(buff, "mv %scatdb/gbdmoz.content.dat.diff "
+		      "%scatdb/gbdmoz.content.dat.diff.old",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );

 	// move the .new files to current
-	sprintf(buff, "mv %scat/content.rdf.u8.new %scat/content.rdf.u8",
+	sprintf(buff, "mv %scatdb/content.rdf.u8.new %scatdb/content.rdf.u8",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );
-	sprintf(buff, "mv %scat/structure.rdf.u8.new %scat/structure.rdf.u8",
+	sprintf(buff, "mv %scatdb/structure.rdf.u8.new %scatdb/structure.rdf.u8",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );
-	sprintf(buff, "mv %scat/gbdmoz.content.dat.new "
-		      "%scat/gbdmoz.content.dat",
+	sprintf(buff, "mv %scatdb/gbdmoz.content.dat.new "
+		      "%scatdb/gbdmoz.content.dat",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );
-	sprintf(buff, "mv %scat/gbdmoz.structure.dat.new "
-		      "%scat/gbdmoz.structure.dat",
+	sprintf(buff, "mv %scatdb/gbdmoz.structure.dat.new "
+		      "%scatdb/gbdmoz.structure.dat",
 		      g_hostdb.m_dir, g_hostdb.m_dir);
 	log ( LOG_INFO, "%s", buff);
 	system ( buff );
-	//sprintf(buff, "mv %scat/gbdmoz.content.dat.new.diff "
-	//	      "%scat/gbdmoz.content.dat.diff",
+	//sprintf(buff, "mv %scatdb/gbdmoz.content.dat.new.diff "
+	//	      "%scatdb/gbdmoz.content.dat.diff",
 	//	      g_hostdb.m_dir, g_hostdb.m_dir);
 	//log ( LOG_INFO, "%s", buff);
 	//system ( buff );
--- a/Msg40.cpp
+++ b/Msg40.cpp
@ -148,6 +148,10 @@ bool Msg40::getResults ( SearchInput *si      ,
 	// we need this info for caching as well
 	//m_numGigabitInfos = 0;

+
+	//just getfrom searchinput
+	//....	m_catId = hr->getLong("catid",0);m_si->m_catId;
+
 	m_postQueryRerank.set1( this, si );

 	// get the collection rec
@ -680,6 +684,20 @@ bool Msg40::gotDocIds ( ) {
 // 	if ( ! m_msg1a.generateReferences(m_si,(void*)this,didTaskWrapper) )
 // 		m_tasksRemaining++;

+
+	//
+	// call Msg2b to generate directory
+	//
+	// why is this here? it does not depend on the docids. (mdw 9/25/13)
+	// dissect it and fix it!!
+	//
+	//if ( m_si->m_catId && 
+	//     ! m_msg2b.generateDirectory ( m_si->m_catId,
+	//				   (void*)this,
+	//				   didTaskWrapper ) )
+	//	m_tasksRemaining++;
+
+
 	return launchMsg20s ( false );
 }

@ -878,7 +896,6 @@ bool Msg40::reallocMsg20Buf ( ) {
 	return true;
 }

-/*
 void didTaskWrapper ( void* state ) {
 	Msg40 *THIS = (Msg40 *) state;
 	// one less task
@ -888,7 +905,6 @@ void didTaskWrapper ( void* state ) {
 	// we are done, call the callback
 	THIS->m_callback ( THIS->m_state );
 }
-*/

 bool Msg40::launchMsg20s ( bool recalled ) {

@ -2128,7 +2144,7 @@ long Msg40::getStoredSize ( ) {
 	//size += m_msg24.getStoredSize ( );
 	//size += m_msg1a.getStoredSize ( );
 	// cache msg2b if we have it
-	size += m_msg2b.getStoredSize();
+	//size += m_msg2b.getStoredSize();

 	return size;
 }
@ -2203,9 +2219,9 @@ long Msg40::serialize ( char *buf , long bufLen ) {
 	//if ( y == -1 ) return -1;
 	//p += y;

-	long z = m_msg2b.serialize (p, pend - p);
-	if ( z == -1 ) return -1;
-	p += z;
+	//long z = m_msg2b.serialize (p, pend - p);
+	//if ( z == -1 ) return -1;
+	//p += z;

 	if ( m_r.m_debug )
 		log("query: msg40 serialize nd=%li "
@ -2258,9 +2274,9 @@ long Msg40::deserialize ( char *buf , long bufSize ) {
 	}

 	// msg2b
-	long z = m_msg2b.deserialize ( p , pend - p );
-	if ( z == -1 ) return -1;
-	p += z;
+	//long z = m_msg2b.deserialize ( p , pend - p );
+	//if ( z == -1 ) return -1;
+	//p += z;

 	// return bytes read
 	return p - buf;
--- a/Msg40.h
+++ b/Msg40.h
@ -14,7 +14,7 @@
 #include "Msg39.h"      // getTermFreqs()
 #include "Msg20.h"      // for getting summary from docId
 #include "Msg17.h"      // a distributed cache of serialized/compressed Msg40s
-#include "Msg2b.h"      // for generating directories
+//#include "Msg2b.h"      // for generating directories
 #include "IndexReadInfo.h" // STAGE0,...
 #include "Msg3a.h"
 #include "PostQueryRerank.h"
@ -302,7 +302,7 @@ class Msg40 {
 	long       m_docsToScanForTopics;

 	// Msg2b for generating a directory
-	Msg2b  m_msg2b;
+	//Msg2b  m_msg2b;

 	PostQueryRerank m_postQueryRerank;

--- a/Msg8b.cpp
+++ b/Msg8b.cpp
@ -45,13 +45,13 @@ bool Msg8b::getCatRec  ( Url     *url              ,
 	// clear g_errno
 	g_errno = 0;
 	// warning
-	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg8b.");
+	//if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg8b.");
 	// store the calling parameters in this class for retrieval by callback
 	m_state          = state;
 	m_callback       = callback;
 	m_url            = url;
-	m_coll           = coll;
-	m_collLen        = collLen;
+	//m_coll           = coll;
+	//m_collLen        = collLen;
 	m_cr             = cr;
 	m_niceness       = niceness;

@ -68,10 +68,10 @@ bool Msg8b::getCatRec  ( Url     *url              ,
 	//m_coll = g_conf.m_dirColl;
 	//m_collLen = gbstrlen(m_coll);
 	// catdb uses a dummy collection now, should not be looked at
-	m_coll = "catdb";
-	m_collLen = 5;
+	//m_coll = "catdb";
+	//m_collLen = 5;

-	m_collnum = g_collectiondb.getCollnum ( m_coll , m_collLen );
+	//m_collnum = g_collectiondb.getCollnum ( m_coll , m_collLen );

 	// . first, try it by canonical domain name
 	// . if that finds no matches, then try it by ip domain
@ -90,7 +90,7 @@ bool Msg8b::getCatRec  ( Url     *url              ,
 	//
 	if ( getMyShardNum() != m_shardNum ) {//g_hostdb.m_groupId!=m_groupId){
 		// coll, url, niceness(1), rdbid(1), useCanonicalName(1)
-		long requestSize = m_collLen + m_url->getUrlLen() + 4 + 4;
+		long requestSize = m_url->getUrlLen() + 4 + 3;
 		// make the request
 		char *p = m_request;
 		*(long *)p = m_url->getIp()     ; p+=4;
@ -98,10 +98,10 @@ bool Msg8b::getCatRec  ( Url     *url              ,
 		*p      = (char)niceness        ; p++;
 		*p      = (char)useCanonicalName; p++;
 		// coll
-		memcpy(p, m_coll, m_collLen);
-		p      += m_collLen;
-		*p      = '\0';
-		p++;
+		//memcpy(p, m_coll, m_collLen);
+		//p      += m_collLen;
+		//*p      = '\0';
+		//p++;
 		// url
 		memcpy(p, m_url->getUrl(), m_url->getUrlLen());
 		 p     += m_url->getUrlLen();
@ -187,7 +187,7 @@ bool Msg8b::getCatRec  ( Url     *url              ,
 				0        , // max cached age in seconds (60)
 			        false    , // add net recv'd list to cache?
 				RDB_CATDB, // specifies the rdb, 1 = tagdb
-				m_coll   ,
+				"",//NULL,//m_coll   ,
 				//&m_list  ,
 				m_list   ,
 				startKey ,
@ -546,7 +546,7 @@ bool Msg8b::gotList ( ) {
 	char *rec;

 	//rec = g_catdb->getRec ( &m_list , m_url , &recSize );
-	rec = g_catdb.getRec(m_list,m_url,&recSize,m_coll,m_collLen);
+	rec = g_catdb.getRec(m_list,m_url,&recSize,NULL,0);//m_coll,m_collLen);

 	// if record found then set it and also set gotIt to true
 	if ( rec ) {
@ -589,8 +589,8 @@ void Msg8b::getIndirectCatids ( ) {
 					matchRecs,
 					matchRecSizes,
 					MAX_IND_CATIDS,
-					m_coll,
-					m_collLen);
+					NULL,//m_coll,
+					0);//m_collLen);
 	// parse out the catids from the matches
 	m_cr->m_numIndCatids = 0;
 	for ( long i = 0; i < numMatches; i++ ) {
--- a/Msg8b.h
+++ b/Msg8b.h
@ -68,11 +68,11 @@ class Msg8b {
 	void cleanSlot     ( );

 	// some specified input
-	char  *m_coll;
-	long   m_collLen;
+	//char  *m_coll;
+	//long   m_collLen;
 	Url   *m_url;

-	collnum_t m_collnum;
+	//collnum_t m_collnum;

 	void    (*m_callback ) ( void *state );//, CatRec *rec );
 	void     *m_state;      // ptr to caller's private state data
--- a/Msg9b.cpp
+++ b/Msg9b.cpp
@ -93,10 +93,17 @@ bool Msg9b::addCatRecs ( char *urls        ,
 		char *e = p; while ( *e && ! is_wspace_a (*e) ) e++;
 		// . set the url
 		// . but don't add the "www."
+		// . watch out for
+		//   http://twitter.com/#!/ronpaul to http://www.twitter.com/
+		//   so do not strip # hashtags
 		Url site;
-		site.set ( p , e - p , false/*addwww?*/);
+		site.set ( p , e - p , false ); // addwww?
 		// normalize the url
 		g_catdb.normalizeUrl(&site, &site);
+
+		// sanity
+		if ( numCatids[k] > MAX_CATIDS ) { char *xx=NULL;*xx=0; }
+
 		// make a siteRec from this url
 		CatRec sr;
 		// returns false and sets g_errno on error
@ -107,6 +114,16 @@ bool Msg9b::addCatRecs ( char *urls        ,
 		char *data     = sr.getData ();
 		long  dataSize = sr.getDataSize ();
 		key_t key;
+		// sanity test
+		CatRec cr2;
+		if ( ! cr2.set ( NULL , sr.getData(), sr.getDataSize(),false)){
+			char *xx=NULL;*xx=0; }
+		// debug when generating catdb
+		//char *x = p;
+		//for ( ; x<e ; x++ ) {
+		//	if ( x[0] == '#' )
+		//		log("hey");
+		//}
 		if ( numCatids[k] == 0 )
 			key = g_catdb.makeKey(&site, true);
 		else
@ -123,7 +140,23 @@ bool Msg9b::addCatRecs ( char *urls        ,
 		}
 		else if ( ! m_list.addRecord ( key, dataSize, data ) )
 			return true;
-		
+
+		/*
+		// debug point
+		SafeBuf sb;
+		//sb.safeMemcpy(p , e-p );
+		sb.safeStrcpy(sr.m_url);
+		sb.safePrintf(" ");
+		for ( long i = 0 ; i < numCatids[k] ; i++ )
+			sb.safePrintf ( "%li " , catids[c+i] );
+		log("catdb: adding key=%s url=%s",
+		    KEYSTR(&key,12),
+		    sb.getBufStart());
+		*/
+
+		// debug
+		//log("gencat: adding url=%s",sr.m_url);
+
 		//skip:
 		// now advance p to e
 		p = e;
@ -133,7 +166,8 @@ bool Msg9b::addCatRecs ( char *urls        ,
 		
 		QUICKPOLL((niceness));
 	}
-	log ( LOG_INFO, "Msg9b: %li sites and %li links added", k , c );
+	log ( LOG_INFO, "Msg9b: %li sites and %li links added. "
+	      "listSize=%li", k , c , m_list.m_listSize );
 	// . now add the m_list to tagdb using msg1
 	// . use high priority (niceness of 0)
 	// . i raised niceness from 0 to 1 so multicast does not use the
--- a/PageAddUrl.cpp
+++ b/PageAddUrl.cpp
@ -66,7 +66,8 @@ bool sendPageAddUrl ( TcpSocket *s , HttpRequest *r ) {

 	// see if they provided a url of a file of urls if they did not
 	// provide a url to add directly
-	bool isAdmin = g_collectiondb.isAdmin ( r , s );
+	//bool isAdmin = g_collectiondb.isAdmin ( r , s );
+	bool isAdmin = r->getIsLocal();
 	long  ufuLen = 0;
 	char *ufu = NULL;
 	if ( isAdmin )
--- a/PageCatdb.cpp
+++ b/PageCatdb.cpp
@ -105,8 +105,8 @@ bool sendPageCatdb ( TcpSocket *s , HttpRequest *r ) {
 		st->m_url.set(url, urlLen);
 		// call msg8b to lookup in catdb
 		if (!st->m_msg8b.getCatRec ( &st->m_url,
-					      st->m_coll,
-					      st->m_collLen,
+					     NULL,//st->m_coll,
+					     0,//st->m_collLen,
 					      true,
 					      1,
 					      &st->m_catRec,
--- a/PageDirectory.cpp
+++ b/PageDirectory.cpp
@ -3,6 +3,10 @@
 #include "CollectionRec.h"
 #include "Pages.h"
 #include "Categories.h"
+#include "PageResults.h" // printDMOZSubtopics()
+
+// function is in PageRoot.cpp:
+bool printDirHomePage ( SafeBuf &sb , HttpRequest *r ) ;

 // . returns false if blocked, true otherwise
 // . sets g_errno on error
@ -36,14 +40,61 @@ bool sendPageDirectory ( TcpSocket *s , HttpRequest *r ) {
 			break;
 		}
 	}
-	// look it up
+	// look it up. returns catId <= 0 if dmoz not setup yet.
 	long catId = g_categories->getIdFromPath(decodedPath, decodedPathLen);

+	SafeBuf sb;
+
+	long xml = r->getLong("xml",0);
+
+	// if /Top print the directory homepage
+	if ( catId == 1 || catId <= 0 ) {
+		// this is in PageRoot.cpp
+		printDirHomePage(sb,r);
+	}
+	//
+	// try printing this shit out not as search results right now
+	// but just verbatim from dmoz files
+	//
+	else {
+		// search box
+		printLogoAndSearchBox(sb,r,catId);
+		// radio buttons for search dmoz. no, this is printed
+		// from call to printLogoAndSearchBox()
+		//printDmozRadioButtons(sb,catId);
+		// the dmoz breadcrumb
+		printDMOZCrumb ( sb,catId,xml);
+		// print the subtopcis in this topic. show as links above
+		// the search results
+		printDMOZSubTopics ( sb, catId , xml );
+		// ok, for now just print the dmoz topics since our search
+		// results will be empty... until populated!
+		g_categories->printUrlsInTopic ( &sb , catId );
+	}
+
+	return g_httpServer.sendDynamicPage ( s,
+					      (char*) sb.getBufStart(),
+					      sb.length(),
+					      // 120 seconds cachetime
+					      // don't cache anymore 
+					      // since
+					      // we have the login bar
+					      // @ the top of the page
+					      0,//120, // cachetime
+					      false,// post?
+					      "text/html",
+					      200,
+					      NULL, // cookie
+					      "UTF-8",
+					      r);
+
+
 	// . make a new request for PageResults
 	//Url dirUrl;
 	char requestBuf[1024+MAX_COLL_LEN+128];
 	long requestBufSize = 1024+MAX_COLL_LEN+128;
 	//g_categories.createDirectorySearchUrl ( &dirUrl,
+	log("dmoz: creating search request");
 	long requestBufLen = g_categories->createDirSearchRequest(
 						 requestBuf,
 						 requestBufSize,
--- a/PageOverview.cpp
+++ b/PageOverview.cpp
@ -2494,14 +2494,14 @@ z       122       7a                    {       123       7b\
 " query with \"prefix:sufix\", i.e. \"gbpdcat:1\" will"
 " list all pages under the Top category (or all pages"
 " in the entire directory).<br>"
-"    <ul><li>gbdcat  - The page is listed directly"
+"    <ul><li>gbcatid  - The page is listed directly"
 " under this base category.<br>"
-"        <li>gbpdcat - The page is listed under this"
+"        <li>gbpcatid - The page is listed under this"
 " category or any child of this category.<br>"
-"        <li>gbicat  - The page is listed indirectly"
+"        <li>gbicatid  - The page is listed indirectly"
 " under this base category, meaning it is a page found"
 " under a site listed in the base category.<br>"
-"        <li>gbpicat - The page is listed indirectly"
+"        <li>gbipcat - The page is listed indirectly"
 " under this category, meaning it is a page found under"
 " a site listed under this category or any child of"
 " this category.<br>"
--- a/PagePerf.cpp
+++ b/PagePerf.cpp
@ -14,11 +14,11 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 	// allow connection if i'm running this on lenny, too
 	//if ( s->m_ip != matt1 && s->m_ip != matt2 )
 	//	return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));
-	long refreshLen = 0;
-	if(r->getString ( "refresh" , &refreshLen) ) {
-		g_stats.dumpGIF ();
-		return g_httpServer.sendDynamicPage ( s , "x", 1 );
-	}
+	//long refreshLen = 0;
+	//if(r->getString ( "refresh" , &refreshLen) ) {
+	//	g_stats.dumpGIF ();
+	//	return g_httpServer.sendDynamicPage ( s , "x", 1 );
+	//}

 	// don't allow pages bigger than 128k in cache
 	char  buf [ 64*1024 ];
@ -77,7 +77,7 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {


 	// dump stats to /tmp/diskGraph.gif
-	g_stats.dumpGIF ();
+	//g_stats.dumpGIF ();

 	if(autoRefresh > 0) 
 		p.safePrintf("<body onLoad=\"timeit();\">"); 
@ -105,8 +105,13 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
 	p.safePrintf(
 		       //"<center>Disk Statistics<br><br>"
 		       "<center><br>"
-		       "<img name=\"diskgraph\" src=/diskGraph%li.gif><br><br>",
-		       g_hostdb.m_hostId );
+		       //"<img name=\"diskgraph\" 
+		       //src=/diskGraph%li.gif><br><br>",
+		       //g_hostdb.m_hostId );
+		     );
+
+	// now try using absolute divs instead of a GIF
+	g_stats.printGraphInHtml ( p );

 	if(autoRefresh > 0) {
 		if(refresh) *(refresh+4) = '0';
--- a/PageResults.cpp
+++ b/PageResults.cpp
--- a/PageResults.h
+++ b/PageResults.h
@ -3,6 +3,9 @@

 #include "SafeBuf.h"

+bool printDmozRadioButtons ( SafeBuf &sb , long catId ) ;
+bool printLogoAndSearchBox ( SafeBuf &sb , class HttpRequest *hr, long catId );
+
 bool printTermPairs ( SafeBuf &sb , class Query *q , class PairScore *ps ) ;
 bool printSingleTerm ( SafeBuf &sb , class Query *q , class SingleScore *ss );

@ -17,6 +20,9 @@ bool printEventAddress ( SafeBuf &sb , char *addrStr , class SearchInput *si ,
 			 double eventGeocoderLon,
 			 char *eventBestPlaceName );

+bool printDMOZCrumb ( SafeBuf &sb , long catId , bool xml ) ;
+bool printDMOZSubTopics ( SafeBuf&  sb, long catId, bool inXml ) ;
+
 bool printEventCountdown2 ( SafeBuf &sb ,
 			    SearchInput *si,
 		       long now ,
--- a/PageRoot.cpp
+++ b/PageRoot.cpp
@ -115,7 +115,7 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r ) {

 	sb.safePrintf("<br><br>\n");
 	sb.safePrintf("<br><br><br>\n");
-	sb.safePrintf("<b>web</b> &nbsp;&nbsp;&nbsp;&nbsp; <a href=/seo>seo</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=\"http://www.gigablast.com/?c=dmoz3\">directory</a> &nbsp;&nbsp;&nbsp;&nbsp; \n");
+	sb.safePrintf("<b>web</b> &nbsp;&nbsp;&nbsp;&nbsp; <a href=/seo>seo</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=\"/Top\">directory</a> &nbsp;&nbsp;&nbsp;&nbsp; \n");
 	sb.safePrintf("<a href=/adv.html>advanced search</a>");
 	sb.safePrintf(" &nbsp;&nbsp;&nbsp;&nbsp; ");
 	sb.safePrintf("<a href=/addurl title=\"Instantly add your url to "
@ -325,7 +325,7 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {

 	sb.safePrintf("<br><br>\n");
 	sb.safePrintf("<br><br><br>\n");
-	sb.safePrintf("<a href=/>web</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=/seo>seo</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=\"http://www.gigablast.com/?c=dmoz3\">directory</a> &nbsp;&nbsp;&nbsp;&nbsp; \n");
+	sb.safePrintf("<a href=/>web</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=/seo>seo</a> &nbsp;&nbsp;&nbsp;&nbsp; <a href=\"/Top\">directory</a> &nbsp;&nbsp;&nbsp;&nbsp; \n");
 	sb.safePrintf("<a href=/adv.html>advanced search</a>");
 	sb.safePrintf(" &nbsp;&nbsp;&nbsp;&nbsp; ");
 	sb.safePrintf("<b title=\"Instantly add your url to Gigablast's "
@ -474,6 +474,8 @@ bool printDirHomePage ( SafeBuf &sb , HttpRequest *r ) {
 	sb.safePrintf("<form method=get "
 		      "action=/search name=f>\n");
 	sb.safePrintf("<input name=q type=text size=60 value=\"\">&nbsp;<input type=\"submit\" value=\"Search Green\">\n");
+	sb.safePrintf("<input type=hidden "
+		      "name=prepend value=\"gbipcatid:2\">");
 	sb.safePrintf("\n");
 	sb.safePrintf("</form>\n");
 	sb.safePrintf("<br>\n");
@ -570,10 +572,10 @@ bool sendPageRoot ( TcpSocket *s , HttpRequest *r, char *cookie ) {
 	*/


-	if ( ! strcmp(coll,"dmoz3" ) )
-		printDirHomePage(sb,r);
-	else
-		printWebHomePage(sb,r);
+	//if ( ! strcmp(coll,"dmoz" ) )
+	//	printDirHomePage(sb,r);
+	//else
+	printWebHomePage(sb,r);


 	// . print last 5 queries
@ -947,136 +949,151 @@ long printLastQueries ( char *p , char *pend ) {

 //char *printTopDirectory ( char *p, char *pend ) {
 bool printTopDirectory ( SafeBuf& sb ) {
+
+	// if no recs in catdb, print instructions
+	if ( g_catdb.getRdb()->getNumTotalRecs() == 0 )
+		return sb.safePrintf("<center>"
+				     "<b>DMOZ functionality is not set up.</b>"
+				     "<br>"
+				     "<br>"
+				     "<b>"
+				     "Please follow the set up "
+				     "<a href=/admin.html#dmoz>"
+				     "instructions"
+				     "</a>."
+				     "</b>"
+				     "</center>");
+
 	//char topList[4096];
 	//sprintf(topList, 
 	return sb.safePrintf (
 	"<center>"
 	"<table cellspacing=\"4\" cellpadding=\"4\"><tr><td valign=top>\n"
-	"<b><a href=\"/Arts/\">Arts</a></b><br>"
+	"<b><a href=\"/Top/Arts/\">Arts</a></b><br>"
 	"<small>"
-	"<a href=\"/Arts/Movies/\">Movies</a>, "
-	"<a href=\"/Arts/Television/\">Television</a>, "
-	"<a href=\"/Arts/Music/\">Music</a>..."
+	"<a href=\"/Top/Arts/Movies/\">Movies</a>, "
+	"<a href=\"/Top/Arts/Television/\">Television</a>, "
+	"<a href=\"/Top/Arts/Music/\">Music</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Business/\">Business</a></b><br>"
+	"<b><a href=\"/Top/Business/\">Business</a></b><br>"
 	"<small>"
-	"<a href=\"/Business/Employment/\">Jobs</a>, "
-	"<a href=\"/Business/Real_Estate/\">Real Estate</a>, "
-	"<a href=\"/Business/Investing/\">Investing</a>..."
+	"<a href=\"/Top/Business/Employment/\">Jobs</a>, "
+	"<a href=\"/Top/Business/Real_Estate/\">Real Estate</a>, "
+	"<a href=\"/Top/Business/Investing/\">Investing</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Computers/\">Computers</a></b><br>"
+	"<b><a href=\"/Top/Computers/\">Computers</a></b><br>"
 	"<small>"
-	"<a href=\"/Computers/Internet/\">Internet</a>, "
-	"<a href=\"/Computers/Software/\">Software</a>, "
-	"<a href=\"/Computers/Hardware/\">Hardware</a>..."
+	"<a href=\"/Top/Computers/Internet/\">Internet</a>, "
+	"<a href=\"/Top/Computers/Software/\">Software</a>, "
+	"<a href=\"/Top/Computers/Hardware/\">Hardware</a>..."
 	"</small>\n"
 	"</td></tr><tr><td valign=top>"
-	"<b><a href=\"/Games/\">Games</a></b><br>"
+	"<b><a href=\"/Top/Games/\">Games</a></b><br>"
 	"<small>"
-	"<a href=\"/Games/Video_Games/\">Video Games</a>, "
-	"<a href=\"/Games/Roleplaying/\">RPGs</a>, "
-	"<a href=\"/Games/Gambling/\">Gambling</a>..."
+	"<a href=\"/Top/Games/Video_Games/\">Video Games</a>, "
+	"<a href=\"/Top/Games/Roleplaying/\">RPGs</a>, "
+	"<a href=\"/Top/Games/Gambling/\">Gambling</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Health/\">Health</a></b><br>"
+	"<b><a href=\"/Top/Health/\">Health</a></b><br>"
 	"<small>"
-	"<a href=\"/Health/Fitness/\">Fitness</a>, "
-	"<a href=\"/Health/Medicine/\">Medicine</a>, "
-	"<a href=\"/Health/Alternative/\">Alternative</a>..."
+	"<a href=\"/Top/Health/Fitness/\">Fitness</a>, "
+	"<a href=\"/Top/Health/Medicine/\">Medicine</a>, "
+	"<a href=\"/Top/Health/Alternative/\">Alternative</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Home/\">Home</a></b><br>"
+	"<b><a href=\"/Top/Home/\">Home</a></b><br>"
 	"<small>"
-	"<a href=\"/Home/Family/\">Family</a>, "
-	"<a href=\"/Home/Consumer_Information/\">Consumers</a>, "
-	"<a href=\"/Home/Cooking/\">Cooking</a>..."
+	"<a href=\"/Top/Home/Family/\">Family</a>, "
+	"<a href=\"/Top/Home/Consumer_Information/\">Consumers</a>, "
+	"<a href=\"/Top/Home/Cooking/\">Cooking</a>..."
 	"</small>\n"
 	"</td></tr><tr><td valign=top>"
-	//"<b><a href=\"/Kids_and_Teens/\">"
+	//"<b><a href=\"/Top/Kids_and_Teens/\">"
 	//"<font color=\"#ff0000\">K</font>"
 	//"<font color=\"339900\">i</font>"
 	//"<font color=\"#ff6600\">d</font>"
 	//"<font color=\"#0066ff\">s</font>"
 	//" and Teens</a></b><br>"
-	"<b><a href=\"/Kids_and_Teens/\">Kids and Teens</a></b><br>"
+	"<b><a href=\"/Top/Kids_and_Teens/\">Kids and Teens</a></b><br>"
 	"<small>"
-	"<a href=\"/Kids_and_Teens/Arts/\">Arts</a>, "
-	"<a href=\"/Kids_and_Teens/School_Time/\">School Time</a>, "
-	"<a href=\"/Kids_and_Teens/Teen_Life/\">Teen Life</a>..."
+	"<a href=\"/Top/Kids_and_Teens/Arts/\">Arts</a>, "
+	"<a href=\"/Top/Kids_and_Teens/School_Time/\">School Time</a>, "
+	"<a href=\"/Top/Kids_and_Teens/Teen_Life/\">Teen Life</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/News/\">News</a></b><br>"
+	"<b><a href=\"/Top/News/\">News</a></b><br>"
 	"<small>"
-	"<a href=\"/News/Media/\">Media</a>, "
-	"<a href=\"/News/Newspapers/\">Newspapers</a>, "
-	"<a href=\"/News/Weather/\">Weather</a>..."
+	"<a href=\"/Top/News/Media/\">Media</a>, "
+	"<a href=\"/Top/News/Newspapers/\">Newspapers</a>, "
+	"<a href=\"/Top/News/Weather/\">Weather</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Recreation/\">Recreation</a></b><br>"
+	"<b><a href=\"/Top/Recreation/\">Recreation</a></b><br>"
 	"<small>"
-	"<a href=\"/Recreation/Travel/\">Travel</a>, "
-	"<a href=\"/Recreation/Food/\">Food</a>, "
-	"<a href=\"/Recreation/Outdoors/\">Outdoors</a>, "
-	"<a href=\"/Recreation/Humor/\">Humor</a>..."
+	"<a href=\"/Top/Recreation/Travel/\">Travel</a>, "
+	"<a href=\"/Top/Recreation/Food/\">Food</a>, "
+	"<a href=\"/Top/Recreation/Outdoors/\">Outdoors</a>, "
+	"<a href=\"/Top/Recreation/Humor/\">Humor</a>..."
 	"</small>\n"
 	"</td></tr><tr><td valign=top>"
-	"<b><a href=\"/Reference/\">Reference</a></b><br>"
+	"<b><a href=\"/Top/Reference/\">Reference</a></b><br>"
 	"<small>"
-	"<a href=\"/Reference/Maps/\">Maps</a>, "
-	"<a href=\"/Reference/Education/\">Education</a>, "
-	"<a href=\"/Reference/Libraries/\">Libraries</a>..."
+	"<a href=\"/Top/Reference/Maps/\">Maps</a>, "
+	"<a href=\"/Top/Reference/Education/\">Education</a>, "
+	"<a href=\"/Top/Reference/Libraries/\">Libraries</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Regional/\">Regional</a></b><br>"
+	"<b><a href=\"/Top/Regional/\">Regional</a></b><br>"
 	"<small>"
-	"<a href=\"/Regional/North_America/United_States/\">US</a>, "
-	"<a href=\"/Regional/North_America/Canada/\">Canada</a>, "
-	"<a href=\"/Regional/Europe/United_Kingdom/\">UK</a>, "
-	"<a href=\"/Regional/Europe/\">Europe</a>..."
+	"<a href=\"/Top/Regional/North_America/United_States/\">US</a>, "
+	"<a href=\"/Top/Regional/North_America/Canada/\">Canada</a>, "
+	"<a href=\"/Top/Regional/Europe/United_Kingdom/\">UK</a>, "
+	"<a href=\"/Top/Regional/Europe/\">Europe</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Science/\">Science</a></b><br>"
+	"<b><a href=\"/Top/Science/\">Science</a></b><br>"
 	"<small>"
-	"<a href=\"/Science/Biology/\">Biology</a>, "
-	"<a href=\"/Science/Social_Sciences/Psychology/\">Psychology</a>, "
-	"<a href=\"/Science/Physics/\">Physics</a>..."
+	"<a href=\"/Top/Science/Biology/\">Biology</a>, "
+	"<a href=\"/Top/Science/Social_Sciences/Psychology/\">Psychology</a>, "
+	"<a href=\"/Top/Science/Physics/\">Physics</a>..."
 	"</small>\n"
 	"</td></tr><tr><td valign=top>"
-	"<b><a href=\"/Shopping/\">Shopping</a></b><br>"
+	"<b><a href=\"/Top/Shopping/\">Shopping</a></b><br>"
 	"<small>"
-	"<a href=\"/Shopping/Vehicles/Autos/\">Autos</a>, "
-	"<a href=\"/Shopping/Clothing/\">Clothing</a>, "
-	"<a href=\"/Shopping/Gifts/\">Gifts</a>..."
+	"<a href=\"/Top/Shopping/Vehicles/Autos/\">Autos</a>, "
+	"<a href=\"/Top/Shopping/Clothing/\">Clothing</a>, "
+	"<a href=\"/Top/Shopping/Gifts/\">Gifts</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Society/\">Society</a></b><br>"
+	"<b><a href=\"/Top/Society/\">Society</a></b><br>"
 	"<small>"
-	"<a href=\"/Society/People/\">People</a>, "
-	"<a href=\"/Society/Religion_and_Spirituality/\">Religion</a>, "
-	"<a href=\"/Society/Issues/\">Issues</a>..."
+	"<a href=\"/Top/Society/People/\">People</a>, "
+	"<a href=\"/Top/Society/Religion_and_Spirituality/\">Religion</a>, "
+	"<a href=\"/Top/Society/Issues/\">Issues</a>..."
 	"</small>\n"
 	"</td><td valign=top>"
-	"<b><a href=\"/Sports/\">Sports</a></b><br>"
+	"<b><a href=\"/Top/Sports/\">Sports</a></b><br>"
 	"<small>"
-	"<a href=\"/Sports/Baseball/\">Baseball</a>, "
-	"<a href=\"/Sports/Soccer/\">Soccer</a>, "
-	"<a href=\"/Sports/Basketball/\">Basketball</a>..."
+	"<a href=\"/Top/Sports/Baseball/\">Baseball</a>, "
+	"<a href=\"/Top/Sports/Soccer/\">Soccer</a>, "
+	"<a href=\"/Top/Sports/Basketball/\">Basketball</a>..."
 	"</small>\n"
 	"</td></tr>"
 	"<tr><td colspan=3 valign=top>"
-	"<b><a href=\"/World/\">World</a></b><br>"
+	"<b><a href=\"/Top/World/\">World</a></b><br>"
 	"<small>"
-	"<a href=\"/World/Deutsch/\">Deutsch</a>, "
-	"<a href=\"/World/Espa%%c3%%b1ol/\">Espa%c%col</a>, "
-	"<a href=\"/World/Fran%%c3%%a7ais/\">Fran%c%cais</a>, "
-	"<a href=\"/World/Italiano/\">Italiano</a>, "
-	"<a href=\"/World/Japanese/\">Japanese</a>, "
-	"<a href=\"/World/Nederlands/\">Nederlands</a>, "
-	"<a href=\"/World/Polska/\">Polska</a>, "
-	"<a href=\"/World/Dansk/\">Dansk</a>, "
-	"<a href=\"/World/Svenska/\">Svenska</a>..."
+	"<a href=\"/Top/World/Deutsch/\">Deutsch</a>, "
+	"<a href=\"/Top/World/Espa%%c3%%b1ol/\">Espa%c%col</a>, "
+	"<a href=\"/Top/World/Fran%%c3%%a7ais/\">Fran%c%cais</a>, "
+	"<a href=\"/Top/World/Italiano/\">Italiano</a>, "
+	"<a href=\"/Top/World/Japanese/\">Japanese</a>, "
+	"<a href=\"/Top/World/Nederlands/\">Nederlands</a>, "
+	"<a href=\"/Top/World/Polska/\">Polska</a>, "
+	"<a href=\"/Top/World/Dansk/\">Dansk</a>, "
+	"<a href=\"/Top/World/Svenska/\">Svenska</a>..."
 	"</small>\n"
 	"</td></tr></table></center>\n",
 	195, 177, 195, 167);
--- a/PageStats.cpp
+++ b/PageStats.cpp
@ -1087,12 +1087,12 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
 		//g_tfndb.getRdb(),
 		g_tagdb.getRdb(),
 		g_clusterdb.getRdb(),
-		//g_catdb.getRdb(),
 		g_linkdb.getRdb(),
 		g_cachedb.getRdb(),
 		g_serpdb.getRdb(),
 		g_monitordb.getRdb(),
-		g_statsdb.getRdb()
+		g_statsdb.getRdb(),
+		g_catdb.getRdb()
 		//g_placedb.getRdb() ,
 		//g_sectiondb.getRdb()
 	};
--- a/PageStatsdb.cpp
+++ b/PageStatsdb.cpp
@ -67,7 +67,8 @@ bool sendPageStatsdb ( TcpSocket *s, HttpRequest *r ) {
 	st->m_niceness     = MAX_NICENESS;

 	st->m_socket  	   = s;
-	st->m_request 	   = *r;
+	//st->m_request 	   = *r;
+	st->m_request.copy ( r );

 	// hostId must be one of the following:
 	// 	 0-n - a valid hostId
@ -120,7 +121,9 @@ bool sendPageStatsdb ( TcpSocket *s, HttpRequest *r ) {
 		st->m_endDate = st->m_endDateR;
 	}

-
+	//
+	// this is no longer a gif, but an html graph in g_statsdb.m_sb
+	//
 	if ( ! g_statsdb.makeGIF ( st->m_endDateR   ,
 				   st->m_startDateR ,
 				   st->m_samples ,
@ -211,15 +214,28 @@ void sendReply ( void *state ) {
 	buf.safePrintf("<table cellpadding=10 border=0>\n");

 	buf.safePrintf("<tr><td>"
-		       "<center>"
-		       "<img src=\"/stats%li.gif\" height=%li width=%li "
-		       "border=\"0px\">"
-		       "</center>"
+		       "<center>");
+
+	/////////////////////////
+	//
+	// insert the div graph here
+	//
+	/////////////////////////
+	buf.cat ( g_statsdb.m_gw );
+
+	// purge it
+	g_statsdb.m_gw.purge();
+	g_statsdb.m_dupTable.reset();
+
+	//"<img src=\"/stats%li.gif\" height=%li width=%li "
+	//"border=\"0px\">"
+	//st->m_hostId,
+	//g_statsdb.getImgHeight(),
+	//g_statsdb.getImgWidth());
+
+	buf.safePrintf("</center>"
 		       //"class=\"statsdb_image\">"
-		       "</td></tr>\n",
-		       st->m_hostId,
-		       g_statsdb.getImgHeight(),
-		       g_statsdb.getImgWidth());
+		       "</td></tr>\n");

 	// the map key
 	buf.safePrintf("<tr><td>");
--- a/Pages.cpp
+++ b/Pages.cpp
@ -384,7 +384,10 @@ long Pages::getDynamicPageNumber ( HttpRequest *r ) {
 	}
 	// sanity
 	if ( ! g_categories ) log("process: no categories loaded");
-	// look it up for a category
+
+	//
+	// dmoz - look it up for a category
+	//
 	if ( g_categories &&
 	     g_categories->getIndexFromPath(decodedPath, decodedPathLen) >= 0)
 		return PAGE_DIRECTORY;
@ -497,6 +500,10 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) {
 	//	log("login: access denied 3 from ip=%s",iptoa(s->m_ip));
 	//	return sendPageLogin(s,r,"Access Denied. Bad or no password.");
 	//}
+	if ( ! publicPage && ! isLocal && ! isLoopback ) {
+		log("login: access denied 2 from ip=%s",iptoa(s->m_ip));
+		return sendPageLogin ( s , r, "Access Denied. No permission.");
+	}

 	g_errno = 0;

@ -635,7 +642,6 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) {
 	// . now, so it can be responsible for calling pg->m_function
 	//if ( userType > USER_PUBLIC ) {
 	// check if user has public page access 
-	//if ( g_users.hasPermission( r, page , s ) ) {
 	if ( isLocal ) { //g_users.hasPermission( r, page , s )){
 		// . this will set various parms
 		// . we know the request came from a host in the cluster
--- a/Parms.cpp
+++ b/Parms.cpp
@ -4657,6 +4657,7 @@ void Parms::init ( ) {
 	m->m_type  = TYPE_LONG;
 	m++;

+	/*
 	m->m_title = "catdb min files to merge";
 	m->m_desc  = "";
 	m->m_off   = (char *)&g_conf.m_catdbMinFilesToMerge - g;
@ -4665,7 +4666,6 @@ void Parms::init ( ) {
 	m->m_save  = 0;
 	m++;

-	/*
 	m->m_title = "revdb max tree mem";
 	m->m_desc  = "Revdb holds the meta list we added for this doc.";
 	m->m_off   = (char *)&g_conf.m_revdbMaxTreeMem - g;
--- a/Process.cpp
+++ b/Process.cpp
@ -6,7 +6,7 @@
 #include "Clusterdb.h"
 #include "Hostdb.h"
 #include "Tagdb.h"
-//#include "Catdb.h"
+#include "Catdb.h"
 #include "Posdb.h"
 #include "Cachedb.h"
 #include "Monitordb.h"
@ -56,7 +56,7 @@ long       g_qbufNeedSave = 0;
 extern void resetPageAddUrl    ( );
 extern void resetHttpMime      ( );
 extern void reset_iana_charset ( );
-extern void resetAdultBit      ( );
+//extern void resetAdultBit      ( );
 extern void resetDomains       ( );
 extern void resetEntities      ( );
 extern void resetQuery         ( );
@ -411,7 +411,7 @@ bool Process::init ( ) {
 	m_rdbs[m_numRdbs++] = g_spiderdb.getRdb    ();
 	m_rdbs[m_numRdbs++] = g_clusterdb.getRdb   (); 
 	m_rdbs[m_numRdbs++] = g_tagdb.getRdb      ();
-	//m_rdbs[m_numRdbs++] = g_catdb.getRdb       ();
+	m_rdbs[m_numRdbs++] = g_catdb.getRdb       ();
 	m_rdbs[m_numRdbs++] = g_statsdb.getRdb     ();
 	m_rdbs[m_numRdbs++] = g_linkdb.getRdb      ();
 	m_rdbs[m_numRdbs++] = g_cachedb.getRdb      ();
@ -1660,7 +1660,7 @@ void Process::resetAll ( ) {
 		rdb->reset();
 	}

-	//g_catdb           .reset();
+	g_catdb           .reset();
 	g_collectiondb    .reset();
 	g_categories1     .reset();
 	g_categories2     .reset();
@ -1712,7 +1712,7 @@ void Process::resetAll ( ) {
 	resetPageAddUrl();
 	resetHttpMime();
 	reset_iana_charset();
-	resetAdultBit();
+	//resetAdultBit();
 	resetDomains();
 	resetEntities();
 	resetQuery();
@ -1761,7 +1761,7 @@ void Process::resetPageCaches ( ) {
 	//g_tfndb           .getDiskPageCache()->reset();
 	//g_checksumdb      .getDiskPageCache()->reset();
 	g_clusterdb       .getDiskPageCache()->reset();
-	//g_catdb           .getDiskPageCache()->reset();
+	g_catdb           .getDiskPageCache()->reset();
 	//g_placedb         .getDiskPageCache()->reset();
 	g_doledb          .getDiskPageCache()->reset();
 	//g_statsdb	  .getDiskPageCache()->reset();
--- a/Proxy.cpp
+++ b/Proxy.cpp
@ -256,7 +256,7 @@ bool Proxy::initProxy ( long proxyId, unsigned short udpPort,
 	g_pages.init ( );
 	// load up the dmoz categories here
 	char structureFile[256];
-	sprintf(structureFile, "%scat/gbdmoz.structure.dat", g_hostdb.m_dir);
+	sprintf(structureFile, "%scatdb/gbdmoz.structure.dat", g_hostdb.m_dir);
 	g_categories = &g_categories1;
 	if (g_categories->loadCategories(structureFile) != 0) {
 		log("cat: Loading Categories From %s Failed.",
--- a/Rdb.cpp
+++ b/Rdb.cpp
@ -5,7 +5,7 @@
 #include "Clusterdb.h"
 #include "Hostdb.h"
 #include "Tagdb.h"
-//#include "Catdb.h"
+#include "Catdb.h"
 #include "Indexdb.h"
 #include "Posdb.h"
 #include "Cachedb.h"
@ -302,8 +302,20 @@ bool Rdb::init ( char          *dir                  ,
 	if ( ! loadTree ( ) ) return false;

 	// add the single dummy collection for catdb
-	//if ( g_catdb.getRdb() == this ) //||
-	//	return g_catdb.addColl ( NULL );
+	if ( g_catdb.getRdb() == this ) 
+		return g_catdb.addColl ( NULL );
+	if ( g_statsdb.getRdb() == this ) 
+		return g_statsdb.addColl ( NULL );
+	if ( g_cachedb.getRdb() == this ) 
+		return g_cachedb.addColl ( NULL );
+	if ( g_serpdb.getRdb() == this ) 
+		return g_serpdb.addColl ( NULL );
+	//else if ( g_accessdb.getRdb() == this ) 
+	//	return g_accessdb.addColl ( NULL );
+	//else if ( g_facebookdb.getRdb() == this ) 
+	//	return g_facebookdb.addColl ( NULL );
+	if ( g_syncdb.getRdb() == this ) 
+		return g_syncdb.addColl ( NULL );

 	// set this for use below
 	//*(long long *)m_gbcounteventsTermId =
@ -1404,7 +1416,7 @@ void attemptMergeAll ( int fd , void *state ) {
 	g_titledb.getRdb()->attemptMerge    ( 1 , false , !state);
 	//g_tfndb.getRdb()->attemptMerge      ( 1 , false , !state);
 	g_tagdb.getRdb()->attemptMerge     ( 1 , false , !state);
-	//g_catdb.getRdb()->attemptMerge      ( 1 , false , !state);
+	g_catdb.getRdb()->attemptMerge      ( 1 , false , !state);
 	g_clusterdb.getRdb()->attemptMerge  ( 1 , false , !state);
 	g_statsdb.getRdb()->attemptMerge    ( 1 , false , !state);
 	g_syncdb.getRdb()->attemptMerge    ( 1 , false , !state);
@ -2035,6 +2047,13 @@ bool Rdb::addRecord ( collnum_t collnum,
 	}
 	*/

+	// debug testing
+	//if ( m_rdbId == RDB_CATDB ) {
+	//	// show key
+	//	log("rdb: adding key=%s to tree n=%li",KEYSTR(key,12) ,n);
+	//}
+
+
 	//jumpdown:

 	// if it exists then annihilate it
@ -2423,7 +2442,7 @@ Rdb *getRdbFromId ( uint8_t rdbId ) {
 		s_table9 [ RDB_DOLEDB    ] = g_doledb.getRdb();
 		s_table9 [ RDB_TFNDB     ] = g_tfndb.getRdb();
 		s_table9 [ RDB_CLUSTERDB ] = g_clusterdb.getRdb();
-		//s_table9 [ RDB_CATDB     ] = g_catdb.getRdb();
+		s_table9 [ RDB_CATDB     ] = g_catdb.getRdb();
 		s_table9 [ RDB_DATEDB    ] = g_datedb.getRdb();
 		s_table9 [ RDB_LINKDB    ] = g_linkdb.getRdb();
 		s_table9 [ RDB_CACHEDB   ] = g_cachedb.getRdb();
@ -2453,7 +2472,7 @@ Rdb *getRdbFromId ( uint8_t rdbId ) {
 // the opposite of the above
 char getIdFromRdb ( Rdb *rdb ) {
 	if ( rdb == g_tagdb.getRdb    () ) return RDB_TAGDB;
-	//if ( rdb == g_catdb.getRdb     () ) return RDB_CATDB;
+	if ( rdb == g_catdb.getRdb     () ) return RDB_CATDB;
 	if ( rdb == g_indexdb.getRdb   () ) return RDB_INDEXDB;
 	if ( rdb == g_posdb.getRdb   () ) return RDB_POSDB;
 	if ( rdb == g_datedb.getRdb    () ) return RDB_DATEDB;
@ -2474,7 +2493,7 @@ char getIdFromRdb ( Rdb *rdb ) {
 	if ( rdb == g_revdb.getRdb     () ) return RDB_REVDB;
 	//if ( rdb == g_sitedb.getRdb    () ) return RDB_SITEDB;
 	//if ( rdb == g_tagdb2.getRdb    () ) return RDB2_SITEDB2;
-	//if ( rdb == g_catdb.getRdb     () ) return RDB_CATDB;
+	if ( rdb == g_catdb.getRdb     () ) return RDB_CATDB;
 	if ( rdb == g_indexdb2.getRdb   () ) return RDB2_INDEXDB2;
 	if ( rdb == g_posdb2.getRdb   () ) return RDB2_POSDB2;
 	if ( rdb == g_datedb2.getRdb    () ) return RDB2_DATEDB2;
@ -2498,7 +2517,7 @@ char getIdFromRdb ( Rdb *rdb ) {
 char isSecondaryRdb ( uint8_t rdbId ) {
 	switch ( rdbId ) {
 	//case RDB2_SITEDB2    : return true;
-        //case RDB_CATDB2     : return g_catdb2.getRdb();
+        case RDB2_CATDB2     : return true;
 	case RDB2_INDEXDB2   : return true;
 	case RDB2_POSDB2   : return true;
 	case RDB2_DATEDB2    : return true;
@ -2606,6 +2625,7 @@ long getDataSizeFromRdbId ( uint8_t rdbId ) {
 			else if ( i == RDB2_TITLEDB2 ||
 				  i == RDB2_REVDB2   ||
 				  i == RDB2_TAGDB2   ||
+				  i == RDB2_CATDB2   ||
 				  i == RDB2_SPIDERDB2 ||
 				  i == RDB2_PLACEDB2 )
 				ds = -1;
--- a/Rdb.h
+++ b/Rdb.h
@ -53,6 +53,7 @@ enum {
 	RDB2_REVDB2,
 	RDB2_TAGDB2,
 	RDB2_POSDB2, // 31
+	RDB2_CATDB2,
 	RDB_END
 };
 // how many rdbs are in "urgent merge" mode?
--- a/SafeBuf.cpp
+++ b/SafeBuf.cpp
@ -1244,7 +1244,8 @@ void initTable ( ) {
 	}
 }

-bool SafeBuf::urlEncode ( bool spaceToPlus ) {
+//  url encode the whole buffer
+bool SafeBuf::urlEncodeAllBuf ( bool spaceToPlus ) {
 	// this makes things faster
 	if ( ! s_init23 ) initTable();
 	// how many chars do we need?
--- a/SafeBuf.h
+++ b/SafeBuf.h
@ -229,11 +229,15 @@ struct SafeBuf {
 			 bool requestPath = false,
 			 bool encodeApostrophes = false );

-	bool  urlEncode (char *s , 
-			 bool encodeApostrophes = false ) {
+	bool  urlEncode (char *s ) {
+		return urlEncode ( s,strlen(s),false,false); };
+
+
+	bool  urlEncode2 (char *s , 
+			  bool encodeApostrophes ) { // usually false
 		return urlEncode ( s,strlen(s),false,encodeApostrophes); };

-	bool  urlEncode ( bool spaceToPlus = true );
+	bool  urlEncodeAllBuf ( bool spaceToPlus = true );
 	bool  latin1CdataEncode(char *s, long len);
 	bool  utf8CdataEncode(char *s, long len);

--- a/SearchInput.cpp
+++ b/SearchInput.cpp
@ -711,7 +711,6 @@ m	if (! cr->hasSearchPermission ( sock, encapIp ) ) {
 	// . sets m_qbuf1 and m_qbuf2
 	if ( ! setQueryBuffers ( r ) ) return false;

-
 	/* --- Virtual host language detection --- */
 	if(r->getHost()) {
 		bool langset = getLanguageFromAbbr(m_defaultSortLanguage);
@ -1226,6 +1225,40 @@ bool SearchInput::setQueryBuffers ( HttpRequest *hr ) {
 		  m_displayQuery,
 		  m_displayQueryLen);
 	
+
+
+
+	//////////
+	//
+	// show DMOZ BREADCRUMB if doing a 
+	// "gbpcatid:<catid> |" (Search restricted to category)
+	// "gbcatid:<catid>"    (DMOZ urls in that topic, c=dmoz3)
+	//
+	//////////
+	long pcatId = -1;
+	long dcatId  = -1;
+	// get the final query
+	char *q =m_sbuf1.getBufStart();
+	if ( q ) sscanf(q,"gbpcatid:%li",&pcatId);
+	if ( q ) sscanf(q,"gbcatid:%li",&dcatId);
+	// pick the one that is valid
+	long catId = -1;
+	if ( pcatId >= 0 ) catId = pcatId;
+	if ( dcatId >= 0 ) catId = dcatId;
+	
+	//////
+	//
+	// save catid into the state
+	m_catId = catId;
+	//
+	///////
+
+	// are we a right to left language like hebrew?
+	if ( catId > 0 && g_categories->isIdRTL(catId) )
+		m_isRTL = true;
+	else
+		m_isRTL = false;
+
 	return true;
 }

--- a/SearchInput.h
+++ b/SearchInput.h
@ -402,6 +402,9 @@ class SearchInput {
 	SafeBuf m_sbuf2;
 	SafeBuf m_sbuf3;

+	long m_catId;
+	bool m_isRTL;
+
 	// make a cookie from parms with m_flags of PF_COOKIE set
 	SafeBuf m_cookieBuf;

--- a/Sections.cpp
+++ b/Sections.cpp
@ -36,8 +36,10 @@ Sections::Sections ( ) {
 }

 void Sections::reset() {
-	if ( m_sections && m_needsFree )
-		mfree ( m_sections , m_sectionsBufSize , "Sections" );
+	//if ( m_sections && m_needsFree )
+	//	mfree ( m_sections , m_sectionsBufSize , "Sections" );
+	m_sectionBuf.purge();
+	m_sectionPtrBuf.purge();
 	if ( m_buf && m_bufSize )
 		mfree ( m_buf , m_bufSize , "sdata" );
 	if ( m_buf2 && m_bufSize2 )
@ -228,10 +230,20 @@ bool Sections::set ( Words     *w                       ,
 	max++;
 	// and each section may create a sentence section
 	max *= 2;
+
+	// truncate if excessive. growSections() will kick in then i guess
+	// if we need more sections.
+	if ( max > 1000000 ) {
+		log("sections: truncating max sections to 1000000");
+		max = 1000000;
+	}
+
 	//max += 5000;
 	long need = max * sizeof(Section);
+
+
 	// and we need one section ptr for every word!
-	need += nw * 4;
+	//need += nw * 4;
 	// and a section ptr for m_sorted[]
 	//need += max * sizeof(Section *);
 	// set this
@ -240,8 +252,21 @@ bool Sections::set ( Words     *w                       ,
 	// breathe
 	QUICKPOLL(m_niceness);

-	// allocate m_sections[] buffer
+	// separate buf now for section ptr for each word
+	if ( ! m_sectionPtrBuf.reserve ( nw *4 ) ) return true;
+	m_sectionPtrs = (Section **)m_sectionPtrBuf.getBufStart();
+	m_sectionPtrsEnd = (Section **)m_sectionPtrBuf.getBufEnd();
+
+	// allocate m_sectionBuf
 	m_sections = NULL;
+
+	if ( ! m_sectionBuf.reserve ( need ) )
+		return true;
+
+	// point into it
+	m_sections = (Section *)m_sectionBuf.getBufStart();
+
+	/*
 	// assume no malloc
 	m_needsFree = false;
 	if      ( need < SECTIONS_LOCALBUFSIZE ) {
@ -259,6 +284,7 @@ bool Sections::set ( Words     *w                       ,
 		m_sectionsBufSize = need;
 		m_needsFree       = true;
 	}
+	*/

 	// clear it nicely
 	//memset_nice ( m_sections , 0 , m_sectionsBufSize, m_niceness );
@ -270,20 +296,20 @@ bool Sections::set ( Words     *w                       ,
 	m_titleEnd   = -1;

 	// bail if no luck
-	if ( ! m_sections ) return true;
+	//if ( ! m_sections ) return true;

 	// point to buf
-	char *ppp = (char *)m_sections;
+	//char *ppp = (char *)m_sections;
 	// skip Sections array
-	ppp += max * sizeof(Section);
+	//ppp += max * sizeof(Section);
 	// assign space for m_sorted
 	//m_sorted = (Section **)ppp;
 	// skip that
 	//ppp += max * sizeof(Section *);
 	// assign space for our ptrs that are 1-1 with the words array
-	m_sectionPtrs = (Section **)ppp;
+	//m_sectionPtrs = (Section **)ppp;
 	// the end
-	m_sectionPtrsEnd = (Section **)(ppp + nw * 4);
+	//m_sectionPtrsEnd = (Section **)(ppp + nw * 4);
 	// save this too
 	m_nw = nw;

@ -375,6 +401,10 @@ bool Sections::set ( Words     *w                       ,
 		if ( fullTid == TAG_INPUT ||
 		     fullTid == TAG_HR    ||
 		     fullTid == TAG_COMMENT ) {
+			// try to realloc i guess. should keep ptrs in tact.
+			if ( m_numSections >= m_maxNumSections && 
+			     ! growSections() ) 
+				return true;
 			// get the section
 			Section *sn = &m_sections[m_numSections];
 			// clear
@ -397,6 +427,10 @@ bool Sections::set ( Words     *w                       ,

 		// a section of multiple br tags in a sequence
 		if ( fullTid == TAG_BR ) {
+			// try to realloc i guess. should keep ptrs in tact.
+			if ( m_numSections >= m_maxNumSections && 
+			     ! growSections() ) 
+				return true;
 			// get the section
 			Section *sn = &m_sections[m_numSections];
 			// clear
@ -884,6 +918,9 @@ bool Sections::set ( Words     *w                       ,
 		// with the address above it, and it shouldn't do that!
 		if ( tid == TAG_FONT ) continue;

+		// try to realloc i guess. should keep ptrs in tact.
+		if ( m_numSections >= m_maxNumSections && ! growSections() ) 
+			return true;
 		// get the section
 		Section *sn = &m_sections[m_numSections];
 		// clear
@ -11034,8 +11071,11 @@ Section *Sections::insertSubSection ( Section *parentArg , long a , long b ,
 	// debug
 	//log("sect: inserting subsection [%li,%li)",a,b);

-	// sanity check
-	if ( m_numSections >= m_maxNumSections ) { char *xx=NULL;*xx=0;}
+	// try to realloc i guess. should keep ptrs in tact.
+	if ( m_numSections >= m_maxNumSections )
+		// try to realloc i guess
+		if ( ! growSections() ) return NULL;
+		//char *xx=NULL;*xx=0;}

 	//
 	// make a new section
@ -17270,3 +17310,95 @@ bool Sections::setListFlags ( ) {
 		Section *ps;
 	*/
 }
+
+bool Sections::growSections ( ) {
+	// make a log note b/c this should not happen a lot because it's slow
+	log("build: growing sections!");
+	// record old buf start
+	char *oldBuf = m_sectionBuf.getBufStart();
+	// grow by 20MB at a time
+	if ( ! m_sectionBuf.reserve ( 20000000 ) ) return false;
+	// for fixing ptrs:
+	char *newBuf = m_sectionBuf.getBufStart();
+	// set the new max
+	m_maxNumSections = m_sectionBuf.getCapacity() / sizeof(Section);
+	// update ptrs in the old sections
+	for ( long i = 0 ; i < m_numSections ; i++ ) {
+		// breathe
+		QUICKPOLL(m_niceness);
+		Section *si = &m_sections[i];
+		if ( si->m_parent ) {
+			char *np = (char *)si->m_parent;
+			np = np - oldBuf + newBuf;
+			si->m_parent = (Section *)np;
+		}
+		if ( si->m_next ) {
+			char *np = (char *)si->m_next;
+			np = np - oldBuf + newBuf;
+			si->m_next = (Section *)np;
+		}
+		if ( si->m_prev ) {
+			char *np = (char *)si->m_prev;
+			np = np - oldBuf + newBuf;
+			si->m_prev = (Section *)np;
+		}
+		if ( si->m_listContainer ) {
+			char *np = (char *)si->m_listContainer;
+			np = np - oldBuf + newBuf;
+			si->m_listContainer = (Section *)np;
+		}
+		if ( si->m_prevBrother ) {
+			char *np = (char *)si->m_prevBrother;
+			np = np - oldBuf + newBuf;
+			si->m_prevBrother = (Section *)np;
+		}
+		if ( si->m_nextBrother ) {
+			char *np = (char *)si->m_nextBrother;
+			np = np - oldBuf + newBuf;
+			si->m_nextBrother = (Section *)np;
+		}
+		if ( si->m_sentenceSection ) {
+			char *np = (char *)si->m_sentenceSection;
+			np = np - oldBuf + newBuf;
+			si->m_sentenceSection = (Section *)np;
+		}
+		if ( si->m_prevSent ) {
+			char *np = (char *)si->m_prevSent;
+			np = np - oldBuf + newBuf;
+			si->m_prevSent = (Section *)np;
+		}
+		if ( si->m_nextSent ) {
+			char *np = (char *)si->m_nextSent;
+			np = np - oldBuf + newBuf;
+			si->m_nextSent = (Section *)np;
+		}
+		if ( si->m_tableSec ) {
+			char *np = (char *)si->m_tableSec;
+			np = np - oldBuf + newBuf;
+			si->m_tableSec = (Section *)np;
+		}
+		if ( si->m_headColSection ) {
+			char *np = (char *)si->m_headColSection;
+			np = np - oldBuf + newBuf;
+			si->m_headColSection = (Section *)np;
+		}
+		if ( si->m_headRowSection ) {
+			char *np = (char *)si->m_headRowSection;
+			np = np - oldBuf + newBuf;
+			si->m_headRowSection = (Section *)np;
+		}
+		if ( si->m_leftCell ) {
+			char *np = (char *)si->m_leftCell;
+			np = np - oldBuf + newBuf;
+			si->m_leftCell = (Section *)np;
+		}
+		if ( si->m_aboveCell ) {
+			char *np = (char *)si->m_aboveCell;
+			np = np - oldBuf + newBuf;
+			si->m_aboveCell = (Section *)np;
+		}
+	}
+	return true;
+}
+
+		
--- a/Sections.h
+++ b/Sections.h
@ -680,7 +680,9 @@ class Sections {
 	long getStoredSize ( ) ;
 	static long getStoredSize ( char *p ) ;
 	long serialize     ( char *p ) ;
-	long getMemUsed ( ) { return m_sectionsBufSize; };
+	//long getMemUsed ( ) { return m_sectionsBufSize; };
+
+	bool growSections ( );

 	bool getSectiondbList ( );
 	bool gotSectiondbList ( bool *needsRecall ) ;
@ -828,10 +830,17 @@ class Sections {

 	// allocate m_sections[] buffer
 	class Section  *m_sections;
-	long            m_sectionsBufSize;
+	//long            m_sectionsBufSize;
 	long            m_numSections;
 	long            m_maxNumSections;

+	// this holds the Sections instances in a growable array
+	SafeBuf m_sectionBuf;
+
+	// this holds ptrs to sections 1-1 with words array, so we can
+	// see what section a word is in.
+	SafeBuf m_sectionPtrBuf;
+
 	long m_numSentenceSections;

 	bool m_firstDateValid;
--- a/Speller.cpp
+++ b/Speller.cpp
@ -1537,7 +1537,8 @@ bool Speller::findNext( char *s, char *send, char **nextWord, bool *isPorn,
 	long slen = send - s;
 	// check if there is an adult word in there
 	// NOTE: The word 'adult' gives a lot of false positives, so even 
-	// though it is in the isAdult() list, skip it
+	// though it is in the isAdult() list, skip it.
+	// s/slen constitues an individual word.
 	if ( isAdult ( s, slen, &loc ) && strncmp ( s, "adult", 5 ) != 0 ){
 		// if this string starts with the adult word, don't check 
 		// further
--- a/Spider.cpp
+++ b/Spider.cpp
@ -3878,7 +3878,7 @@ void SpiderLoop::spiderDoledUrls ( ) {
 		if ( m_cri >= g_collectiondb.m_numRecs ) m_cri = 0;
 		// get rec
 		cr = g_collectiondb.m_recs[m_cri];
-		// skip if empty
+		// skip if gone
 		if ( ! cr ) continue;
 		// stop if not enabled
 		if ( ! cr->m_spideringEnabled ) continue;
--- a/Spider.h
+++ b/Spider.h
@ -601,7 +601,13 @@ class SpiderRequest {
 	// this 0 and to not avoid spidering the links.
 	long    m_avoidSpiderLinks:1;
 	// for identifying address heavy sites...
-	long    m_tagYellowPages:1;
+	//long    m_tagYellowPages:1;
+	// when indexing urls for dmoz, i.e. the urls outputted from
+	// 'dmozparse urldump -s' we need to index them even if there
+	// was a ETCPTIMEDOUT because we have to have indexed the same
+	// urls that dmoz has in it in order to be identical to dmoz.
+	long    m_ignoreExternalErrors:1;
+
 	// called XmlDoc::set4() from PageSubmit.cpp?
 	//long    m_isPageSubmit:1;

--- a/Stats.cpp
+++ b/Stats.cpp
@ -2,7 +2,7 @@

 #include <errno.h>
 #include "Stats.h"
-#define X_DISPLAY_MISSING 1
+//#define X_DISPLAY_MISSING 1
 //#include <plotter.h>
 #include <math.h>
 #include "Conf.h"
@ -133,6 +133,7 @@ void Stats::addStat_r ( long        numBytes    ,
 	//pthread_mutex_unlock ( &s_lock );
 }

+/*
 // . dump a graph to /tmp/diskGraph.gif
 // . use libplotter.a or .so ?
 // . docs at http://www.gnu.org/manual/plotutils/html_mono/plotutils.html#SEC54
@ -341,7 +342,7 @@ void Stats::dumpGIF ( long long startTime , long long endTime ) {
 	mfree(lrgBuf, lrgSize, "Stats.cpp");
 #endif
 }
-
+*/

 void Stats::addPoint (StatPoint **points    , 
 		      long       *numPoints ,
@ -486,3 +487,213 @@ void Stats::addSpiderPoint ( long errCode, bool isNew ) {
 		m_allErrorsOld[errCode]++;
 	}
 }
+
+// draw a HORIZONTAL line in html
+void drawLine2 ( SafeBuf &sb ,
+		 long x1 , 
+		 long x2 ,
+		 long fy1 , 
+		 long color ,
+		 long width ) {
+
+	sb.safePrintf("<div style=\"position:absolute;"
+		      "left:%li;"
+		      "top:%li;"
+		      "background-color:#%lx;"
+		      "z-index:-5;"
+		      "min-height:%lipx;"
+		      "min-width:%lipx;\"></div>\n"
+		      , x1
+		      , (fy1 - width/2) - 20 //- 300
+		      , color
+		      , width
+		      , x2 - x1
+		      );
+}
+
+
+//
+// new code for drawing graph in html with absolute divs instead
+// of using GIF plotter library which had issues
+//
+void Stats::printGraphInHtml ( SafeBuf &sb ) {
+
+	// gif size
+	char tmp[64];
+	sprintf ( tmp , "%lix%li", (long)DX+40 , (long)DY+40 ); // "1040x440"
+
+	// 20 pixel borders
+	//int bx = 10;
+	//int by = 30;
+	// define the space with boundaries 100 unit wide boundaries
+	//plotter.space ( -bx , -by , DX + bx , DY + by );
+	// draw the x-axis
+	//plotter.line ( 0 , 0 , DX , 0  );
+	// draw the y-axis
+	//plotter.line ( 0 , 0 ,  0 , DY );
+
+	// find time ranges
+	long long t2 = 0;
+	for ( long i = 0 ; i < MAX_POINTS ; i++ ) {
+		// skip empties
+		if ( m_pts[i].m_startTime == 0 ) continue;
+		// set min/max
+		if ( m_pts[i].m_endTime   > t2 ) t2 = m_pts[i].m_endTime;
+	}
+	// now compute the start time for the graph
+	long long t1 = 0x7fffffffffffffffLL;
+	// now recompute t1
+	for ( long i = 0 ; i < MAX_POINTS ; i++ ) {
+		// skip empties
+		if ( m_pts[i].m_startTime == 0 ) continue;
+		// can't be behind more than 1 second
+		if ( m_pts[i].m_startTime   < t2 - DT ) continue;
+		// otherwise, it's a candidate for the first time
+		if ( m_pts[i].m_startTime < t1 ) t1 = m_pts[i].m_startTime;
+	}
+
+	//
+	// main graphing window
+	//
+	sb.safePrintf("<div style=\"position:relative;"
+		      "background-color:#c0c0c0;"
+		      //"overflow-y:hidden;"
+		      "overflow-x:hidden;"
+		      "z-index:-10;"
+		      // the tick marks we print below are based on it
+		      // being a window of the last 20 seconds... and using
+		      // DX pixels
+		      "min-width:%lipx;"
+		      "min-height:%lipx;"
+		      //"width:100%%;"
+		      //"min-height:600px;"
+		      "margin-top:10px;"
+		      "margin-bottom:10px;"
+		      "margin-right:10px;"
+		      "margin-left:10px;\">"
+		      ,(long)DX
+		      ,(long)DY +20); // add 10 more for "2s" labels etc.
+
+	// 10 x-axis tick marks
+	for ( int x = DX/20 ; x <= DX ; x += DX/20 ) {
+		// tick mark
+		//plotter.line ( x , -20 , x , 20 );
+		sb.safePrintf("<div style=\"position:absolute;"
+			      "left:%li;"
+			      "bottom:0;"
+			      "background-color:#000000;"
+			      "z-index:110;"
+			      "min-height:20px;"
+			      "min-width:3px;\"></div>\n"
+			      , (long)x-1
+			      );
+		// generate label
+		//char buf [ 32 ];
+		//sprintf ( buf , "%li" , 
+		//	  (long)(DT * (long long)x / (long long)DX) );
+		// LABEL
+		sb.safePrintf("<div style=\"position:absolute;"
+			      "left:%li;"
+			      "bottom:20;"
+			      //"background-color:#000000;"
+			      "z-index:110;"
+			      "min-height:20px;"
+			      "min-width:3px;\">%lis</div>\n"
+			      , (long)x-10
+			      // the label:
+			      ,(long)(DT * (long long)x / (long long)DX)/1000
+			      );
+
+		// move cursor
+		//plotter.move ( x , -by / 2 - 9 );
+		// plot label
+		//plotter.alabel     ( 'c' , 'c' , buf );
+	}
+
+	// . each line consists of several points
+	// . we need to know each point for adding otherlines
+	// . is about [400/6][1024] = 70k
+	// . each line can contain multiple data points
+	// . each data point is expressed as a horizontal line segment
+	void *lrgBuf;
+	long lrgSize = 0;
+	lrgSize += MAX_LINES * MAX_POINTS * sizeof(StatPoint *);
+	lrgSize += MAX_LINES * sizeof(long);
+	lrgBuf = (char *) mmalloc(lrgSize, "Stats.cpp"); 
+	if (! lrgBuf) {
+	    log("could not allocate memory for local buffer in Stats.cpp"
+		"%li bytes needed", lrgSize);
+	    return;
+	}
+	char *lrgPtr = (char *)lrgBuf;
+	StatPoint **points = (StatPoint **)lrgPtr;   
+	lrgPtr += MAX_LINES * MAX_POINTS * sizeof(StatPoint *);
+	long *numPoints = (long *)lrgPtr;
+	lrgPtr += MAX_LINES * sizeof(long);
+	memset ( (char *)numPoints , 0 , MAX_LINES * sizeof(long) );
+
+	// store the data points into "lines"
+	long count = MAX_POINTS;
+	for ( long i = m_next ; count >= 0 ; i++ , count-- ) {
+		// wrap around the array
+		if ( i >= MAX_POINTS ) i = 0;
+		// skip point if empty
+		if ( m_pts[i].m_startTime == 0 ) continue;
+		// skip if too early
+		if ( m_pts[i].m_endTime < t1 ) continue;
+		// . find the lowest line the will hold us
+		// . this adds point to points[x][n] where x is determined
+		addPoint ( points , numPoints , &m_pts[i] );
+	}
+
+	int y1 = 21;
+	// plot the points (lines) in each line
+	for ( long i = 0 ; i < MAX_LINES    ; i++ ) {
+		// increase vert
+		y1 += MAX_WIDTH + 1;
+		// wrap back down if necessary
+		if ( y1 >= DY ) y1 = 21;
+		// plt all points in this row
+	for ( long j = 0 ; j < numPoints[i] ; j++ ) {
+		// get the point
+		StatPoint *p =  points[MAX_POINTS * i + j];
+		// transform time to x coordinates
+		int x1 = (p->m_startTime - t1) * (long long)DX / DT;
+		int x2 = (p->m_endTime   - t1) * (long long)DX / DT;
+		// if x2 is negative, skip it
+		if ( x2 < 0 ) continue;
+		// if x1 is negative, boost it to -2
+		if ( x1 < 0 ) x1 = -2;
+		// . line thickness is function of read/write size
+		// . take logs
+		int w = (int)log(((double)p->m_numBytes)/8192.0) + 3;
+		//log("log of %li is %i",m_pts[i].m_numBytes,w);
+		if ( w < 3         ) w = 3;
+		if ( w > MAX_WIDTH ) w = MAX_WIDTH;
+		//plotter.linewidth ( w );       
+		// use the color specified from addStat_r() for this line/pt
+		//plotter.pencolor ( ((p->m_color >> 16) & 0xff) << 8 ,
+		//		   ((p->m_color >>  8) & 0xff) << 8 ,
+		//		   ((p->m_color >>  0) & 0xff) << 8 );
+		// ensure at least 3 units wide for visibility
+		if ( x2 < x1 + 3 ) x2 = x1 + 3;
+		// . flip the y so we don't have to scroll the browser down
+		// . DY does not include the axis and tick marks
+		long fy1 = DY - y1 + 20 ;
+		// plot it
+		//plotter.line ( x1 , fy1 , x2 , fy1 );
+		drawLine2 ( sb , x1 , x2 , fy1 , p->m_color , w );
+		// debug msg
+		//log("line (%i,%i, %i,%i) ", x1 , vert , x2 , vert );
+		//log("bytes = %li width = %li ", m_pts[i].m_numBytes,w);
+		//log("st=%i, end=%i color=%lx " ,
+		//      (int)m_pts[i].m_startTime , 
+		//      (int)m_pts[i].m_endTime   , 
+		//      m_pts[i].m_color );
+	}
+	}
+
+	sb.safePrintf("</div>\n");
+
+	mfree(lrgBuf, lrgSize, "Stats.cpp");
+}
--- a/Stats.h
+++ b/Stats.h
@ -25,9 +25,9 @@ class StatPoint {

 #define MAX_POINTS 6000
 #define MAX_WIDTH  6
-#define DY         900              // pixels vertical
+#define DY         600              // pixels vertical
 #define DX         1000             // pixels across
-#define DT         (20*1000)        // time window, 10 seconds
+#define DT         (20*1000)        // time window, 20 seconds
 #define MAX_LINES  (DY / (MAX_WIDTH+1)) // leave free pixel above each line

 #define STAT_GENERIC 0
@ -53,7 +53,10 @@ class Stats {
 	// . dumps a bar graph
 	// . each bar represents a stat in time, from inception to completion
 	// . useful for seeing possible sources of contention
-	void dumpGIF ( long long startTime = -1 , long long endTime = -1 );
+	//void dumpGIF ( long long startTime = -1 , long long endTime = -1 );
+
+
+	void printGraphInHtml ( SafeBuf &sb );

 	// this graphs:
 	// 1. stats per second
--- a/Statsdb.cpp
+++ b/Statsdb.cpp
@ -80,7 +80,7 @@ static Label s_labels[] = {
 	// . max = -1, means dynamic size the ymax!
 	// . use 1B for now again...
 	// . color=pink
-	{GRAPH_QUANTITY,1000000000.0,"docs_indexed", .1,"%.0fK docs" , .001 , 0x00cc0099,"docs indexed" }
+	{GRAPH_QUANTITY,50000000.0,"docs_indexed", .1,"%.0fK docs" , .001 , 0x00cc0099,"docs indexed" }


 	//{ "termlist_intersect",0x0000ff00},
@ -101,6 +101,13 @@ static Label s_labels[] = {
 	//{ "parm_change",0xffc0c0} // pink?
 };

+void drawLine3 ( SafeBuf &sb ,
+		 long x1 , 
+		 long x2 ,
+		 long fy1 , 
+		 long color ,
+		 long width ) ;
+
 Label *Statsdb::getLabel ( long labelHash ) {
 	Label **label = (Label **)m_labelTable.getValue ( &labelHash );
 	if ( ! label ) return NULL;
@ -116,7 +123,7 @@ bool Statsdb::init ( ) {

 	// 20 pixel borders
 	m_bx = 10;
-	m_by = 30;
+	m_by = 40;

 	// keep it at least at 20MB otherwise it is filling up the tree 
 	// constantly and dumping
@ -477,6 +484,11 @@ bool Statsdb::makeGIF ( long t1Arg ,
 	m_sb3.reset();
 	m_ht3.reset();

+	// print graph in here as a bunch of divs now:
+	m_gw.purge();
+	m_dupTable.reset();
+	m_dupTable.set(4,0,20000,NULL,0,false,0,"statstbl");
+
 	// . start at t1 and get stats lists, up to 1MB of stats at a time
 	// . subtract 60 seconds so we can have a better shot at having
 	//   a moving average for the last SAMPLE points
@ -495,6 +507,7 @@ bool Statsdb::makeGIF ( long t1Arg ,
 		return true;

 	// open the file for the gif
+	/*
 	char fname [ 1024 ];
 	sprintf ( fname , "%s/stats%li.gif" ,
 		  g_hostdb.m_httpRootDir , g_hostdb.m_hostId );
@ -504,13 +517,16 @@ bool Statsdb::makeGIF ( long t1Arg ,
 		    fname , mstrerror(errno) );
 		return true;
 	}
+	*/

 	return gifLoop ();
 }

+#define POINTWIDTH 8
+
 #define MAX_POINTS 6000
 #define MAX_WIDTH  6
-#define DY         900              // pixels vertical
+#define DY         600              // pixels vertical
 #define DX         1000             // pixels across
 #define MAX_LINES  (DY / (MAX_WIDTH+1)) // leave free pixel above each line

@ -542,9 +558,9 @@ bool Statsdb::gifLoop ( ) {
 	// shortcut
 	Msg5 *m = &m_msg5;

-#ifndef _USEPLOTTER_
-	return true;
-#endif
+	//#ifndef _USEPLOTTER_
+	//return true;
+	//#endif

 	// loop over all the lists in the time range, [m_t1,m_t2]
 	for ( ; ! m_done ; ) {
@ -576,53 +592,87 @@ bool Statsdb::gifLoop ( ) {
 	}

 	// define time delta - commented out because it's currently not used.
-	//long dt = m_t2 - m_t1;
+	long dt = m_t2 - m_t1;

-#ifdef _USEPLOTTER_
+	//#ifdef _USEPLOTTER_

 	// gif size
-	char tmp[64];
+	//char tmp[64];
 	// dimensions of the gif
-	sprintf ( tmp , "%lix%li", (long)DX+m_bx*2 , (long)DY+m_by*2 );
-	GIFPlotter::parampl ( "BITMAPSIZE" , (void *)tmp );
+	//sprintf ( tmp , "%lix%li", (long)DX+m_bx*2 , (long)DY+m_by*2 );
+	//GIFPlotter::parampl ( "BITMAPSIZE" , (void *)tmp );
 	// create one
-	GIFPlotter plotter ( NULL , m_fd , NULL );
+	//GIFPlotter plotter ( NULL , m_fd , NULL );
 	// open it
-	plotter.openpl ( );
+	//plotter.openpl ( );

 	// define the space with boundaries 100 unit wide boundaries
-	//plotter.space ( -m_bx , -m_by , DX + m_bx , DY + m_by );
-	plotter.space ( 0 , 0 , DX + m_bx * 2 , DY + m_by * 2 );
+	//plotter.space ( 0 , 0 , DX + m_bx * 2 , DY + m_by * 2 );

 	// line thickness in user coordinates (pixels for us)
-	plotter.linewidth ( 1 );       
+	//plotter.linewidth ( 1 );       
 	// set bg color to gray (r/g/b) 
-	plotter.bgcolor ( 0xd600 , 0xce00 , 0xd600 );
-	// set bg color to white (r/g/b) 
-	//plotter.bgcolor ( 0xff00 , 0xff00 , 0xff00 );
+	//plotter.bgcolor ( 0xd600 , 0xce00 , 0xd600 );
 	// erase Plotter's graphics display
-	plotter.erase ();                
+	//plotter.erase ();                
 	// draw axises in black
-	plotter.pencolorname ("black");    
+	//plotter.pencolorname ("black");    
+
+	//
+	// main graphing window
+	//
+	m_gw.safePrintf("<div style=\"position:relative;"
+		      "background-color:#c0c0c0;"
+		      //"overflow-y:hidden;"
+		      "overflow-x:hidden;"
+		      "z-index:-10;"
+		      // the tick marks we print below are based on it
+		      // being a window of the last 20 seconds... and using
+		      // DX pixels
+		      "min-width:%lipx;"
+		      "min-height:%lipx;"
+		      //"width:100%%;"
+		      //"min-height:600px;"
+		      "margin-top:10px;"
+		      "margin-bottom:10px;"
+		      "margin-right:10px;"
+		      "margin-left:10px;\">"
+		      ,(long)DX + 2 *m_bx
+			,(long)DY + 2*m_by);
+
+
 	// draw the x-axis
-	plotter.line ( m_bx , m_by , DX + m_bx , m_by  );
-	// draw the y-axis
-	plotter.line ( m_bx , m_by ,  m_bx , DY + m_by);
+	//plotter.line ( m_bx , m_by , DX + m_bx , m_by  );
+
 	// 10 x-axis tick marks
-	for ( int x = DX/10 + m_bx ; x < DX - m_bx ; x += DX/10 ) {
+	for ( int x = DX/20 ; x <= DX ; x += DX/20 ) {
 		// tick mark
-		plotter.line ( x , m_by - 15 , x , m_by + 15 );
-		// generate label
-		long xv = (long)(dt * (long long)x / (long long)DX) -(long)dt;
-		char buf [ 32 ];
-		// in seconds, so put "s" in there
-		sprintf ( buf , "%lis" , xv );//(float)xv / 1000.0 );
-		// move cursor
-		plotter.move ( x , m_by - m_by / 2 - 9 );
-		// plot label
-		plotter.alabel     ( 'c' , 'c' , buf );
+		//plotter.line ( x , -20 , x , 20 );
+		m_gw.safePrintf("<div style=\"position:absolute;"
+			      "left:%li;"
+			      "bottom:0;"
+			      "background-color:#000000;"
+			      "z-index:110;"
+			      "min-height:20px;"
+			      "min-width:3px;\"></div>\n"
+			      , m_bx + (long)x-1
+			      );
+		long xv = (long)(dt * (long long)x/(long long)DX)-(long)dt;
+		// LABEL
+		m_gw.safePrintf("<div style=\"position:absolute;"
+				"left:%li;"
+				"bottom:20;"
+				//"background-color:#000000;"
+				"z-index:110;"
+				"min-height:20px;"
+				"min-width:3px;\">%lis</div>\n"
+				, (long)x-10 + m_bx
+				// the label:
+				, xv
+				);
 	}

+
 	HashTableX tmpht;
 	tmpht.set(4,0,0,NULL,0,false,m_niceness,"statsparms");

@ -651,7 +701,7 @@ bool Statsdb::gifLoop ( ) {

 		// . graph this single graph of this color
 		// . returns ptr to first point of different color!
-		plotGraph ( p , pend , gh , &plotter , zoff );
+		plotGraph ( p , pend , gh , m_gw , zoff );
 		// prevent collisions
 		zoff += 20;

@ -709,7 +759,7 @@ bool Statsdb::gifLoop ( ) {
 		}

 		// set the line width
-		plotter.linewidth ( pp->m_thickness );
+		//plotter.linewidth ( pp->m_thickness );

 		// get parm hash
 		long colorHash = pp->m_parmHash;
@ -720,9 +770,9 @@ bool Statsdb::gifLoop ( ) {
 		// . is really the parm hash in disguise
 		long c1 = colorHash & 0x00ffffff;
 		// use the color specified from addStat_r() for this line/pt
-		plotter.pencolor ( ((c1 >> 16) & 0xff) << 8 ,
-				   ((c1 >>  8) & 0xff) << 8 ,
-				   ((c1 >>  0) & 0xff) << 8 );
+		//plotter.pencolor ( ((c1 >> 16) & 0xff) << 8 ,
+		//		   ((c1 >>  8) & 0xff) << 8 ,
+		//		   ((c1 >>  0) & 0xff) << 8 );

 		long x1 = pp->m_a;
 		long x2 = pp->m_b;
@ -731,9 +781,10 @@ bool Statsdb::gifLoop ( ) {
 		if ( x2 < x1 + 10 ) x2 = x1 + 10;
 		// . flip the y so we don't have to scroll the browser down
 		// . DY does not include the axis and tick marks
-		long fy1 = DY - y1 + m_by ;
+		//long fy1 = DY - y1 + m_by ;
 		// plot it
-		plotter.line ( x1 , fy1 , x2 , fy1 );
+		//plotter.line ( x1 , fy1 , x2 , fy1 );
+		drawLine3 ( m_gw , x1 , x2 , y1 , c1 , pp->m_thickness );

 		// add to map key? only if we haven't already
 		if ( tmpht.isInTable ( &colorHash ) ) continue;
@ -785,12 +836,15 @@ bool Statsdb::gifLoop ( ) {
 	//

 	// all done
-	if ( plotter.closepl () < 0 ) 
-		log("admin: Could not close performance graph object.");
+	//if ( plotter.closepl () < 0 ) 
+	//	log("admin: Could not close performance graph object.");
 	// close the file
-	fclose ( m_fd );
+	//fclose ( m_fd );

-#endif
+	//#endif
+
+	// close main graphing window
+	m_gw.safePrintf("</div>\n");

 	return true;
 }
@ -799,15 +853,10 @@ bool Statsdb::gifLoop ( ) {
 char *Statsdb::plotGraph ( char *pstart , 
 			   char *pend , 
 			   long graphHash , 
-			   GIFPlotter *plotter ,
+			   //GIFPlotter *plotter ,
+			   SafeBuf &gw ,
 			   long zoff ) {

-#ifndef _USEPLOTTER_
-
-	return NULL;
-
-#else
-
 	// . use "graphHash" to map to unit display
 	// . this is a disk read volume
 	Label *label = getLabel ( graphHash );
@ -857,20 +906,16 @@ char *Statsdb::plotGraph ( char *pstart ,
 	char *retp = p;

 	// set the line width
-	plotter->linewidth ( 1 );
+	//plotter->linewidth ( 1 );

 	long color = label->m_color;

 	// use the color specified from addStat_r() for this line/pt
-	plotter->pencolor ( ((color >> 16) & 0xff) << 8 ,
-			    ((color >>  8) & 0xff) << 8 ,
-			    ((color >>  0) & 0xff) << 8 );
+	//plotter->pencolor ( ((color >> 16) & 0xff) << 8 ,
+	//		    ((color >>  8) & 0xff) << 8 ,
+	//		    ((color >>  0) & 0xff) << 8 );


-	// how many points per pixel do we have now
-	//float res = (ymax - ymin) / (float)DY;
-	
-
 	// . the minimum difference between ymax and ymin is minDiff.
 	// . this prevents us from zooming in too close!
 	float minDiff = (float)DY     * label->m_minRes ;
@ -896,7 +941,7 @@ char *Statsdb::plotGraph ( char *pstart ,


 	// set the line width
-	plotter->linewidth ( 2 );
+	//plotter->linewidth ( 2 );

 	// reset for 2nd scan
 	p = pstart;
@ -940,8 +985,8 @@ char *Statsdb::plotGraph ( char *pstart ,
 		// . flip the y so we don't have to scroll the browser down
 		// . DY does not include the axis and tick marks
 		// . do not flip y any more for statsdb graphs
-		long fy1 = (long)(y1+.5) + m_by ;
-		long fy2 = (long)(y2+.5) + m_by ;
+		long fy1 = (long)(y1+.5);// + m_by ;
+		long fy2 = (long)(y2+.5);// + m_by ;

 		// how are we getting -.469 for "query" point?
 		if ( fy1 < 0 ) continue;
@ -949,7 +994,10 @@ char *Statsdb::plotGraph ( char *pstart ,

 		// skip if can't make a line
 		if ( firstPoint ) { 
-			plotter->circle ( x2 , fy2 , 2 );
+			//plotter->circle ( x2 , fy2 , 2 );
+			long width = POINTWIDTH;
+			// draw a 4x4 box now:
+			drawLine3(m_gw,x2-width/2,x2+width/2,fy2,color,width); 
 			firstPoint = false;
 			continue;
 		}
@ -963,32 +1011,38 @@ char *Statsdb::plotGraph ( char *pstart ,

 		// plot it
 		// BUT only iff not more than 5 seconds difference
-		float secondsPerPixel = (m_t2-m_t1)/(float)DX;
-		float dt = (x2 - x1) * secondsPerPixel;
+		//float secondsPerPixel = (m_t2-m_t1)/(float)DX;
+
+		// avoid this for now. mdw oct 14 2013.
+		//float dt = (x2 - x1) * secondsPerPixel;
+		//if ( dt <= 13 || x2 - x1 <= 10 )
+		//	plotter->line ( x1 , fy1 , x2  , fy2 );

-		if ( dt <= 13 || x2 - x1 <= 10 )
-			plotter->line ( x1 , fy1 , x2  , fy2 );
 		// circle second point
-		plotter->circle ( x1 , fy1 , 2 );
-		plotter->circle ( x2 , fy2 , 2 );
+		//plotter->circle ( x1 , fy1 , 2 );
+		//plotter->circle ( x2 , fy2 , 2 );
+		// draw a 4x4 boxes now:
+		long width = POINTWIDTH;
+		drawLine3 ( m_gw,x1-width/2, x1+width/2, fy1,color, width); 
+		drawLine3 ( m_gw,x2-width/2, x2+width/2, fy2,color, width); 
 	}

-	plotter->linewidth ( 1 );
+	//plotter->linewidth ( 1 );

 	// plot unit lines
 	float deltaz = (ymax-ymin) / 6;
 	if ( strstr(label->m_keyDesc,"latency" ) ) {
 		// draw it
-		drawHR ( 400.0 - 111.0 , ymin , ymax , plotter , label , zoff,0xff0000);
-		drawHR ( 600.0 - 111.0 , ymin , ymax , plotter , label , zoff , color);
+		drawHR ( 400.0 - 111.0 , ymin,ymax,m_gw,label,zoff,0xff0000);
+		drawHR ( 600.0-111.0,ymin,ymax,m_gw,label,zoff,color);
 	}

 	if ( strstr(label->m_keyDesc,"queries per sec" ) ) {
 		// draw it
 		//deltaz /= 2;
-		//drawHR ( 120.0 , ymin , ymax , plotter , label , zoff , color );
-		//drawHR ( 130.0 , ymin , ymax , plotter , label , zoff , color );
-		drawHR ( 140.0 , ymin , ymax , plotter , label , zoff , color );
+		//drawHR(120.0, ymin , ymax , plotter , label , zoff , color );
+		//drawHR(130.0, ymin , ymax , plotter , label , zoff , color );
+		drawHR ( 140.0 , ymin , ymax ,m_gw , label , zoff , color );
 	}


@ -996,18 +1050,19 @@ char *Statsdb::plotGraph ( char *pstart ,
 		// breathe
 		QUICKPOLL ( m_niceness );
 		// draw it
-		drawHR ( z , ymin , ymax , plotter , label , zoff , color );
+		drawHR ( z , ymin , ymax , m_gw , label , zoff , color );
 	}

 	return retp;
-#endif
+	//#endif
       
 }

 void Statsdb::drawHR ( float z ,
 		       float ymin , 
 		       float ymax ,
-		       GIFPlotter *plotter ,
+		       //GIFPlotter *plotter ,
+		       SafeBuf &gw,
 		       Label *label ,
 		       float zoff ,
 		       long color ) {
@ -1017,29 +1072,34 @@ void Statsdb::drawHR ( float z ,
 	// avoid collisions with other graphs
 	z2 += zoff;
 	// border
-	z2 += m_by;
+	//z2 += m_by;
 	// round off error
 	z2 += 0.5;
 	// for adjusatmnet
-	//float ptsPerPixel = (ymax-ymin)/ (float)DY;
+	float ptsPerPixel = (ymax-ymin)/ (float)DY;
 	// make an adjustment to the label then! -- Commented out because it's currently not used.
-	//float zadj = zoff * ptsPerPixel;
+	float zadj = zoff * ptsPerPixel;

-#ifdef _USEPLOTTER_
+	//#ifdef _USEPLOTTER_

 	// use the color specified from addStat_r() for this line/pt
-	plotter->pencolor ( ((color >> 16) & 0xff) << 8 ,
-			    ((color >>  8) & 0xff) << 8 ,
-			    ((color >>  0) & 0xff) << 8 );
+	//plotter->pencolor ( ((color >> 16) & 0xff) << 8 ,
+	//		    ((color >>  8) & 0xff) << 8 ,
+	//		    ((color >>  0) & 0xff) << 8 );

 	// horizontal line
-	plotter->line ( m_bx, (long)z2 , DX + m_bx, (long)z2 );
+	//plotter->line ( m_bx, (long)z2 , DX + m_bx, (long)z2 );
+	long width = 1;
+	drawLine3 ( m_gw, 0, DX , (long)z2,color, width); 
+
+
 	// make label
 	char tmp[128];
 	// . use "graphHash" to map to unit display
 	// . this is a disk read volume
 	sprintf(tmp,label->m_format,z +zadj);//* label->m_yscalar);

+	/*
 	// a white shadow
 	plotter->pencolor ( 0xffff,0xffff,0xffff );
 	plotter->move ( m_bx + 80 + 2 , z2 + 10 - 2 );
@ -1060,7 +1120,24 @@ void Statsdb::drawHR ( float z ,
 	plotter->move ( m_bx + 80 , z2 + 10 );
 	// plot label
 	plotter->alabel     ( 'c' , 'c' , tmp );
-#endif
+	*/
+
+	// LABEL
+	gw.safePrintf("<div style=\"position:absolute;"
+		      "left:%li;"
+		      "bottom:%li;"
+		      "color:#%lx;"
+		      "z-index:110;"
+		      "font-size:14px;"
+		      "min-height:20px;"
+		      "min-width:3px;\">%s</div>\n"
+		      , (long)(m_bx)
+		      , (long)z2 +m_by
+		      , color
+		      // the label:
+		      , tmp
+		      );
+	
 }

 void gotListWrapper ( void *state , RdbList *list, Msg5 *msg5 ) {
@ -1289,7 +1366,7 @@ bool Statsdb::addPoint ( long      x        ,
 	// convert x into pixel position
 	float xf = (float)DX * (float)(x - m_t1) / (float)(m_t2 - m_t1);
 	// round it to nearest pixel
-	long  x2 = (long)(xf + .5) + m_bx;
+	long  x2 = (long)(xf + .5) ;//+ m_bx;
 	// make this our y pos
 	float y2 = y;
 	// average values if tied
@ -1371,7 +1448,7 @@ bool Statsdb::addEventPoint ( long  t1        ,
 	// convert t1 into pixel position
 	float af = (float)DX * (float)(t1 - m_t1) / (float)(m_t2 - m_t1);
 	// round it to nearest pixel
-	long  a = (long)(af + .5) + m_bx;
+	long  a = (long)(af + .5) ;//+ m_bx;

 	// convert t2 into pixel position
 	//float bf = (float)DX * (float)(t2 - m_t1) / (float)(m_t2 - m_t1);
@ -1439,3 +1516,43 @@ bool Statsdb::addEventPoint ( long  t1        ,
 	log("stats: no room in graph for event");
 	return true;
 }
+
+//////////
+//
+// NEW CODE HERE
+//
+//////////
+
+
+// draw a HORIZONTAL line in html
+void Statsdb::drawLine3 ( SafeBuf &sb ,
+		 long x1 , 
+		 long x2 ,
+		 long fy1 , 
+		 long color ,
+		 long width ) {
+
+	// do not draw repeats in the case we have a ton of points to plot
+	long key32 ;
+	key32 = hash32h ( x1  , 0 );
+	key32 = hash32h ( x2  , key32);
+	key32 = hash32h ( fy1 , key32);
+	key32 = hash32h ( color , key32);
+	key32 = hash32h ( width , key32);
+	if ( m_dupTable.isInTable(&key32) ) return;
+	m_dupTable.addKey(&key32);
+
+	sb.safePrintf("<div style=\"position:absolute;"
+		      "left:%li;"
+		      "bottom:%li;"
+		      "background-color:#%lx;"
+		      "z-index:-5;"
+		      "min-height:%lipx;"
+		      "min-width:%lipx;\"></div>\n"
+		      , x1 + m_bx
+		      , (fy1 - width/2) + m_by
+		      , color
+		      , width
+		      , x2 - x1
+		      );
+}
--- a/Statsdb.h
+++ b/Statsdb.h
@ -73,13 +73,22 @@ class Statsdb {
 	char *plotGraph ( char *pstart ,
 			  char *pend ,
 			  long graphHash ,
-			  class GIFPlotter *plotter ,
+			  //class GIFPlotter *plotter ,
+			  SafeBuf &gw,
 			  long  zoff );

+	void drawLine3 ( SafeBuf &sb ,
+			 long x1 , 
+			 long x2 ,
+			 long fy1 , 
+			 long color ,
+			  long width ) ;
+
 	void drawHR ( float z ,
 		      float ymin , 
 		      float ymax ,
-		      class GIFPlotter *plotter ,
+		      //class GIFPlotter *plotter ,
+		      SafeBuf &gw,
 		      class Label *label ,
 		      float zoff ,
 		      long color ) ;
@ -119,6 +128,10 @@ class Statsdb {
 	RdbList   m_list;
 	Msg1	  m_msg1;

+	// the graphing window. now a bunch of absolute divs in html
+	SafeBuf m_gw;
+	HashTableX m_dupTable;
+
 	SafeBuf m_sb0;
 	SafeBuf m_sb1;

--- a/TcpServer.cpp
+++ b/TcpServer.cpp
@ -1735,7 +1735,10 @@ void TcpServer::destroySocket ( TcpSocket *s ) {
 	//log("tcp: closing fd=%i",sd);

 	// TODO: does this block or what?
-	long cret = ::close ( sd );
+	long cret = 0;
+	// if sd is 0 do not really close it. seems to fix that bug.
+	// 0 is the FD for stdin so i don't know how that is happening.
+	if ( sd != 0 ) cret = ::close ( sd );
 	if ( cret != 0 ) // == -1 ) 
 		log("tcp: close(%li) = %li = %s",
 		    (long)sd,cret,mstrerror(errno));
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
--- a/XmlDoc.h
+++ b/XmlDoc.h
@ -495,6 +495,13 @@ class XmlDoc {
 	long **getIndCatIds ( ) ;
 	long **getCatIds ( ) ;
 	class CatRec *getCatRec ( ) ;
+
+	long *getNumDmozEntries() ;
+	char **getDmozTitles ( ) ;
+	char **getDmozSummaries ( ) ;
+	char **getDmozAnchors ( ) ;
+	bool setDmozInfo () ;
+
 	long long **getWikiDocIds ( ) ;
 	void gotWikiResults ( class UdpSlot *slot );
 	long *getPubDate ( ) ;
@ -663,6 +670,8 @@ class XmlDoc {
 	int8_t *getNextSpiderPriority ( ) ;
 	long *getPriorityQueueNum ( ) ;
 	class TagRec ***getOutlinkTagRecVector () ;
+	char *hasNoIndexMetaTag();
+	char *hasFakeIpsMetaTag ( );
 	long **getOutlinkFirstIpVector () ;
 	//char **getOutlinkIsIndexedVector () ;
 	long *getRegExpNum ( long outlinkNum ) ;
@ -678,6 +687,7 @@ class XmlDoc {
 	bool getIsInjecting();
 	long *getSpiderPriority ( ) ;
 	long *getIndexCode ( ) ;
+	long *getIndexCode2 ( ) ;
 	SafeBuf *getNewTagBuf ( ) ;

 	char *updateTagdb ( ) ;
@ -733,6 +743,7 @@ class XmlDoc {
 	bool hashZipCodes ( class HashTableX *table ) ;
 	bool hashMetaZip ( class HashTableX *table ) ;
 	bool hashContentType ( class HashTableX *table ) ;
+	bool hashDMOZCategories ( class HashTableX *table ) ;
 	bool hashLinks ( class HashTableX *table ) ;
 	bool hashUrl ( class HashTableX *table ) ;
 	bool hashSections ( class HashTableX *table ) ;
@ -1038,7 +1049,6 @@ class XmlDoc {
 	char     m_fragBufValid;
 	char     m_wordSpamBufValid;
 	char     m_finalSummaryBufValid;
-
 	char     m_matchingQueryBufValid;
 	char     m_relatedQueryBufValid;
 	char     m_queryLinkBufValid;
@ -1143,6 +1153,7 @@ class XmlDoc {
 	bool m_dmozTitlesValid;
 	bool m_dmozSummsValid;
 	bool m_dmozAnchorsValid;
+	bool m_dmozInfoValid;
 	bool m_rawUtf8ContentValid;
 	bool m_expandedUtf8ContentValid;
 	bool m_utf8ContentValid;
@ -1239,6 +1250,8 @@ class XmlDoc {
 	bool m_priorityQueueNumValid;
 	bool m_outlinkTagRecVectorValid;
 	bool m_outlinkIpVectorValid;
+	bool m_hasNoIndexMetaTagValid;
+	bool m_hasUseFakeIpsMetaTagValid;
 	bool m_outlinkIsIndexedVectorValid;
 	bool m_isSiteRootValid;
 	bool m_wasInjectedValid;
@ -1499,8 +1512,15 @@ class XmlDoc {
 	Msge0 m_msge0;

 	// this points into m_msge1 i guess
-	//long *m_outlinkIpVector;
+	long *m_outlinkIpVector;
+	SafeBuf m_outlinkTagRecPtrBuf;
+	SafeBuf m_fakeIpBuf;
+	char m_hasNoIndexMetaTag;
+	char m_hasUseFakeIpsMetaTag;
 	Msge1 m_msge1;
+	TagRec **m_outlinkTagRecVector;
+	SafeBuf m_fakeTagRecPtrBuf;
+	TagRec m_fakeTagRec;

 	//
 	// diffbot parms for indexing diffbot's json output
@ -1860,7 +1880,9 @@ class XmlDoc {
 	char m_isErrorPage;
 	char m_isHijacked;
 	//char m_isVisible;
-	char m_dmozBuf[12000];
+	//char m_dmozBuf[12000];
+	SafeBuf m_dmozBuf;
+	long m_numDmozEntries;

 	// stuff
 	char *m_statusMsg;
--- a/dmozparse.cpp
+++ b/dmozparse.cpp
@ -21,6 +21,11 @@
 bool closeAll ( void *state , void (* callback)(void *state) ) { return true; }
 bool allExit ( ) { return true; };

+bool sendPageSEO(TcpSocket *s, HttpRequest *hr) {return true;}
+
+//long g_qbufNeedSave = false;
+//SafeBuf g_qbuf;
+
 #define RDFBUFFER_SIZE    (1024*1024*10)
 #define RDFSTRUCTURE_FILE "structure.rdf.u8"
 #define RDFCONTENT_FILE   "content.rdf.u8"
@ -167,14 +172,18 @@ char* incRdfPtr( long skip = 1 ) {

 // parse the rdf file up past a given start tag
 long rdfParse ( char *tagName ) {
-	bool inQuote = false;
+	//bool inQuote = false;
 	do {
 		long matchPos = 0;
 		// move to the next tag
-		while (*rdfPtr != '<' || inQuote ) {
+		// . quotes are no longer escaped out in the newer
+		//   dmoz files in oct 2013... so take that out. i do
+		//   this < is &lt; though.. perhaps only check for
+		//   quotes when in a tag?
+		while (*rdfPtr != '<' ) { // || inQuote ) {
 			// check for quotes
-			if (*rdfPtr == '"')
-				inQuote = !inQuote;
+			//if (*rdfPtr == '"')
+			//	inQuote = !inQuote;
 			// next char
 			if (!incRdfPtr())
 				return -1;
@ -200,12 +209,15 @@ long rdfParse ( char *tagName ) {

 // move to the next tag in the file
 long rdfNextTag ( ) {
-	bool inQuote = false;
+	//bool inQuote = false;
 	// move to the next tag
-	while (*rdfPtr != '<' || inQuote ) {
+	while (*rdfPtr != '<' ) { // || inQuote ) {
 		// check for quotes
-		if (*rdfPtr == '"')
-			inQuote = !inQuote;
+		// NO! too many unbalanced quotes all over the place!
+		// and i think quotes in tags do not have < or > in them
+		// because they should be encoded as &gt; and &lt;
+		//if (*rdfPtr == '"')
+		//	inQuote = !inQuote;
 		// next char
 		if (!incRdfPtr())
 			return -1;
@ -395,6 +407,11 @@ long getIndexFromId ( long catid ) {
 		else
 			low  = currCat+1;
 	}
+	//printf("catid %li not found. sanity checking.\n",catid);
+	// sanity check our algo
+	//for ( long i = 0 ; i < numRdfCats ; i++ ) {
+	//	if ( rdfCats[i].m_catid == catid ) { char *xx=NULL;*xx=0;}
+	//}
 	// not found
 	return -1;
 }
@ -518,7 +535,7 @@ bool isGoodUrl ( char *url, long urlLen ) {
 	if ( urlLen <= 0 )
 		return false;
 	for (long i = 0; i < urlLen; i++) {
-		if (is_space(url[i]))
+		if (is_wspace_a(url[i]))
 			return false;
 	}
 	// check for [prot]://[url]
@ -546,8 +563,27 @@ long printCatPath ( char *str, long catid, bool raw ) {
 		return 0;
 	// get the parent
 	parentId = rdfCats[catIndex].m_parentid;
-	// print the parent(s) first
-	if (parentId > 1) {
+
+	// . print the parent(s) first
+	// . in NEWER DMOZ dumps, "Top" is catid 2 and catid 1 is an
+	//   empty title. really catid 2 is Top/World but that is an
+	//   error that we correct below. (see "Top/World" below).
+	//   but do not include the "Top/" as part of the path name
+	if ( catid == 2 ) {
+		// no! we now include Top as part of the path. let's
+		// be consistent. i'd rather have www.gigablast.com/Top
+		// and www.gigablast.com/Top/Arts etc. then i know if the
+		// path starts with /Top that it is dmoz!!
+		sprintf(p,"Top");
+		return 3;
+	}
+
+	if (parentId > 1 && 
+	    // the newer dmoz files have the catid == the parent id of
+	    // i guess top most categories, like "Top/Arts"... i would think
+	    // it should have a parentId of 1 like the old dmoz files,
+	    // so it's probably a bug on dmoz's end
+	    parentId != catid ) {
 		p += printCatPath(p, parentId, raw);
 		// print spacing
 		if (!raw) p += sprintf(p, " / ");
@ -621,18 +657,22 @@ long fixUrl ( char *url, long urlLen ) {
 			memmove(&url[slashi-1], &url[slashi], newUrlLen - slashi);
 			newUrlLen--;
 		}
-		if (is_space(url[slashi])) {
+		if (is_wspace_a(url[slashi])) {
 			memmove(&url[slashi], &url[slashi+1], newUrlLen - (slashi+1));
 			newUrlLen--;
 		}
 	}
 	// remove any anchor
+	// mdw, sep 2013, no because there is twitter.com/#!/ronpaul
+	// and others...
+	/*
 	for (long i = 0; i < newUrlLen; i++) {
 		if (url[i] == '#') {
 			newUrlLen = i;
 			break;
 		}
 	}
+	*/
 	// remove any trailing /
 	if (url[newUrlLen-1] == '/')
 		newUrlLen--;
@ -670,6 +710,38 @@ long fileWrite ( int fileid, void *buf, size_t count ) {
 	return sizeWrote;
 }

+// print special meta tags to tell gigablast to only spider/index
+// the links and not the links of the links. b/c we only want
+// to index the dmoz urls. AND ignore any external error like 
+// ETCPTIMEDOUT when indexing a dmoz url so we can be sure to index
+// all of them under the proper category so our gbcatid:xxx search 
+// works and we can replicate dmoz accurately. see XmlDoc.cpp
+// addOutlinksSpiderRecsToMetaList() and indexDoc() to see
+// where these meta tags come into play.
+void writeMetaTags ( int outStream2 ) {
+	char *str = 
+		"<!-- do not spider the links of the links -->\n"
+		"<meta name=spiderlinkslinks content=0>\n"
+		"<!--ignore tcp timeouts, dns timeouts, etc.-->\n"
+		"<meta name=ignorelinksexternalerrors content=1>\n"
+		"<!--do not index this document, but get links from it-->\n"
+		"<meta name=noindex content=1>\n"
+		// tell gigablast to not do a dns lookup on every
+		// outlink when adding spiderRequests to spiderdb
+		// for each outlink. will save time up front but
+		// will have to be done when spidering the doc.
+		"<!-- do not lookup the ip address of every outlink, "
+		"but use hash of the subdomain as the ip -->\n"
+		"<meta name=usefakeips content=1>\n"
+		;
+	long len = gbstrlen(str);
+	if ( write ( outStream2, str , len ) != len )
+		printf("Error writing to outStream2b\n");
+}
+
+		
+
+
 // main parser
 int main ( int argc, char *argv[] ) {
 	long n;
@ -678,7 +750,7 @@ int main ( int argc, char *argv[] ) {
 	long m = 0;
 	long newNameBufferSize = 0;
 	long newOffset = 0;
-	char filename[256];
+	char filename[1256];
 	long urlTxtCount = 0;
 	long urlTxtFile  = 0;
 	Url normUrl;
@ -695,6 +767,8 @@ int main ( int argc, char *argv[] ) {
 	bool splitUrls = false;
 	char mode = MODE_NONE;
 	long totalNEC = 0;
+	char *dir="";
+	bool firstTime;

 	// check the options and mode
 	for (long i = 0; i < argc; i++) {
@ -783,20 +857,29 @@ int main ( int argc, char *argv[] ) {
 		goto errExit;
 	}

+	dir = "";
+
+ retry:
+
 	// open the structure file
 	if ( mode == MODE_NEW || mode == MODE_CATDUMP )
-		sprintf(filename, "%s", RDFSTRUCTURE_FILE);
+		sprintf(filename, "%s%s", dir,RDFSTRUCTURE_FILE);
 	else
-		sprintf(filename, "%s.new", RDFSTRUCTURE_FILE);
+		sprintf(filename, "%s%s.new", dir,RDFSTRUCTURE_FILE);
 	//rdfStream.open(filename, ifstream::in);
 	rdfStream = open ( filename, O_RDONLY );
-	// make sure it openned okay
+	// make sure it opened okay
 	//if (!rdfStream.is_open()) {
 	if ( rdfStream < 0 ) {
-		printf("Error Openning %s\n", filename);
+		// try ./catdb/ subdir if not found
+		if ( ! dir[0] ) {
+			dir = "./catdb/";
+			goto retry;
+		}
+		printf("Error Opening %s\n", filename);
 		goto errExit;
 	}
-	printf("Openned Structure File: %s\n", filename);
+	printf("Opened Structure File: %s\n", filename);

 	// take the first chunk
 	//rdfStream.read(rdfBuffer, RDFBUFFER_SIZE);
@ -809,6 +892,7 @@ int main ( int argc, char *argv[] ) {
 	rdfPtr = rdfBuffer;
 	rdfEnd = &rdfBuffer[n];
 	currOffset = 0;
+	firstTime = true;

 	// read and parse the file
 	printf("Parsing Topics...\n");
@ -820,6 +904,13 @@ int main ( int argc, char *argv[] ) {
 		unsigned long catOffset = currOffset - 6;
 		// get the topic name, preserve it on the buffer
 		long nameOffset = nameBufferLen;
+		// the name inserted by this function into "nameBuffer"
+		// does not seem to contain "Top/" at the beginning.
+		// it is from structure.rdf.u8, but it seems to be there!
+		// yeah, later on we hack the name buffer and nameOffset
+		// so it is just the last word in the directory to save
+		// mem. then we print out all the parent names to
+		// reconstruct.
 		long nameLen    = fillNextString();
 		if (nameLen == -1)
 			goto fileEnd;
@ -827,18 +918,48 @@ int main ( int argc, char *argv[] ) {
 			printf("Out of Memory!\n");
 			goto errExit1;
 		}
+		// fix <Topic r:id=\"\"> in the newer content.rdf.u8
+		if ( nameLen == 0 ) {
+			// only do this once!
+			if ( ! firstTime ) {
+				printf("Encountered zero length name");
+				continue;
+			}
+			memcpy(nameBuffer+nameOffset,"Top\0",4);
+			nameLen = 3;
+			firstTime = false;
+		}
 		// html decode it
 		if (nameLen > MAX_HTTP_FILENAME_LEN)
 			nameLen = MAX_HTTP_FILENAME_LEN;
 		nameLen = htmlDecode ( htmlDecoded,
 				      &nameBuffer[nameOffset],
-				       nameLen );
-		memcpy(&nameBuffer[nameOffset], htmlDecoded, nameLen);
-		nameBufferLen  += nameLen;
+				       nameLen ,
+				       false,
+				       0);
+
 		// parse the catid
 		long catid = parseNextCatid();
 		if (catid == -1)
 			goto fileEnd;
+
+		// crap, in the new dmoz structure.rdf.u8 catid 1 is 
+		// empty name and catid 2 has Topic tag "Top/World" but 
+		// Title tag "Top".
+		// but it should probably be "Top" and not "World". There is 
+		// another catid 3 in structure.rdf.u8 that has 
+		// <Topic r:id="Top/World"> and catid 3 which is the real one,
+		// so catid 2 is just "Top". this is a bug in the dmoz output 
+		// i think, so fix it here.
+		if ( catid == 2 ) {
+			nameLen = 3;
+			memcpy(&nameBuffer[nameOffset],"Top",nameLen); 
+			nameBufferLen += nameLen;
+		}
+		else {
+			memcpy(&nameBuffer[nameOffset], htmlDecoded, nameLen);
+			nameBufferLen  += nameLen;
+		}
 		// . fill the current cat
 		//   make sure there's room
 		if (numRdfCats >= rdfCatsSize) {
@ -856,6 +977,11 @@ int main ( int argc, char *argv[] ) {
 			printf("Out of Memory!\n");
 			goto errExit1;
 		}
+		// debug
+		//printf("gbcat=");
+		//for ( long i = 0 ; i < nameLen ; i++ )
+		//	printf("%c",htmlDecoded[i]);
+		//printf("\n");
 		// fill it
 		rdfCats[numRdfCats].m_catid           = catid;
 		rdfCats[numRdfCats].m_parentid        = 0;
@ -923,10 +1049,16 @@ fileEnd:
 	rdfEnd = &rdfBuffer[n];
 	currOffset = 0;

+	//
+	// set m_parentid using structure.rdf.u8
+	//
+
 	// read and parse the file again
 	printf("Building Hierarchy...\n");
 	while (true) {
-		// parse the next catid
+		// parse the next catid in the file, sequentially
+		//if ( currOffset == 545468935 )
+		//	printf("shit\n");
 		long catid = parseNextCatid();
 		if (catid == -1)
 			goto fileEnd1;
@ -977,8 +1109,18 @@ nextChildTag:
 			childNameLen = MAX_HTTP_FILENAME_LEN;
 		childNameLen = htmlDecode ( htmlDecoded,
 					    childName,
-					    childNameLen );
+					    childNameLen ,
+					    false,
+					    0);
 		memcpy(childName, htmlDecoded, childNameLen);
+
+		// debug log
+		//if ( currOffset >= 506362430 ) // 556362463
+		//	printf("off=%li\n",currOffset);
+		// debug point
+		//if ( currOffset == 545467573 )
+		//	printf("GOT DEBUG POINT before giant skip\n");
+
 		// cut off the leading label if symbolic
 //		if (parentType == 2) {
 //			while (*childName != ':') {
@ -988,20 +1130,27 @@ nextChildTag:
 //			childName++;
 //			childNameLen--;
 //		}
+		// debug point
+		//if (strcmp(childName,"Top/World/Català/Arts") == 0 )
+		//	printf("hey\n");
 		// get the catid for the child
 		long childid = getCatHash(childName, childNameLen);
 		// get the cat for this id
 		long cat = getIndexFromId(childid);
 		// make sure we have a match
 		if (cat == -1) {
-			//printf("Warning: Child Topic Not Found: ");
-			//for (long i = 0; i < childNameLen; i++)
-			//	printf("%c", childName[i]);
-			//printf("\n");
+			// debug. why does Top/World/Catala/Arts
+			// not have a parent??
+			printf("Warning: Child Topic Not Found: ");
+			for (long i = 0; i < childNameLen; i++)
+				printf("%c", childName[i]);
+			printf("\n");
 			m++;
 			goto nextChildTag;
 		}
-		// assign the parent to the cat
+		// . assign the parent to the cat
+		// . this means we are in a "child" tag within the "catid"
+		// . catid 84192 
 		if (parentType == 1) {
 			if (rdfCats[cat].m_parentid != 0)
 				printf("Warning: Overwriting Parent Id!\n");
@ -1033,6 +1182,14 @@ fileEnd1:
 	printf("  Total Topics:                  %li\n", numRdfCats);
 	printf("  Topics with Parents:           %li\n", t);
 	printf("  Topics Linked but Nonexistent: %li\n", m);
+
+	if ( t != numRdfCats ) {
+		printf("\n"
+		       "  *Topics without parents is bad because they\n"
+		       "   can not have their entired rawPath printed out\n"
+		       "   in order to get their proper hash\n");
+	}
+
 	//printf("  Number of Symbolic Links:      %li\n", numSymParents);
 	printf("\n");

@ -1066,25 +1223,45 @@ fileEnd1:
 	for (long i = 0; i < numRdfCats; i++) {
 		// get the hash of the path
 		rawPathLen = printCatPath(rawPath, rdfCats[i].m_catid, true);
-		rdfCats[i].m_catHash = hash32Lower(rawPath, rawPathLen, 0);
+		// crap, this rawpath contains "Top/" in the beginning
+		// but the rdfCats[i].m_nameOffset refers to a name
+		// that does not include "Top/"
+		rdfCats[i].m_catHash = hash32Lower_a(rawPath, rawPathLen, 0);
+		// fix. so that xyz/Arts does not just hash "Arts"
+		// because it has no parent...
+		if ( rdfCats[i].m_parentid == 0 ) {
+			printf("Missing parent for catid %li. Will be "
+			       "excluded from DMOZ so we avoid hash "
+			       "collisions.\n",rdfCats[i].m_catid);
+		}
+		//
+		// DEBUG!
+		// print this shit out to find the collisions
+		//
+		continue;
+		printf("hash32=%lu catid=%li parentid=%li path=%s\n",
+		       rdfCats[i].m_catHash,
+		       rdfCats[i].m_catid,
+		       rdfCats[i].m_parentid,
+		       rawPath);
 	}

 	// . now we want to serialize the needed data into
 	//   one (or more?) file(s) to be quickly read by gb
 	if ( mode == MODE_NEW )
-		sprintf(filename, "%s", STRUCTURE_OUTPUT_FILE);
+		sprintf(filename, "%s%s", dir,STRUCTURE_OUTPUT_FILE);
 	else
-		sprintf(filename, "%s.new", STRUCTURE_OUTPUT_FILE);
+		sprintf(filename, "%s%s.new", dir,STRUCTURE_OUTPUT_FILE);
 	//outStream.open(filename, ofstream::out|ofstream::trunc);
 	outStream = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
 			S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
-	// make sure it openned okay
+	// make sure it opened okay
 	//if (!outStream.is_open()) {
 	if ( outStream < 0 ) {
-		printf("Error Openning %s\n", filename);
+		printf("Error Opening %s\n", filename);
 		goto errExit;
 	}
-	printf("\nOpenned %s for writing.\n", filename);
+	printf("\nOpened %s for writing.\n", filename);

 	// write the size of the truncated name buffer
 	//outStream.write((char*)&newNameBufferSize, sizeof(long));
@ -1149,21 +1326,26 @@ contentParse:
 		printf("Out of Memory!\n");
 		goto errExit;
 	}
-	
+
+ again:	
 	// open the content file
 	if ( mode == MODE_NEW ||  mode == MODE_URLDUMP )
-		sprintf(filename, "%s", RDFCONTENT_FILE);
+		sprintf(filename, "%s%s", dir,RDFCONTENT_FILE);
 	else
-		sprintf(filename, "%s.new", RDFCONTENT_FILE);
+		sprintf(filename, "%s%s.new", dir,RDFCONTENT_FILE);
 	//rdfStream.open(filename, ifstream::in);
 	rdfStream = open ( filename, O_RDONLY );
-	// make sure it openned okay
+	// make sure it opened okay
 	//if (!rdfStream.is_open()) {
 	if ( rdfStream < 0 ) {
-		printf("Error Openning %s\n", filename);
+		if ( ! dir[0] ) {
+			dir = "./catdb/";
+			goto again;
+		}
+		printf("Error Opening %s\n", filename);
 		goto errExit;
 	}
-	printf("\nOpenned Content File: %s\n", filename);
+	printf("\nOpened Content File: %s\n", filename);

 	// take the first chunk
 	//rdfStream.read(rdfBuffer, RDFBUFFER_SIZE);
@ -1184,28 +1366,32 @@ contentParse:
 		// write another file for the urls
 		if ( mode == MODE_URLDUMP ) {
 			if (!splitUrls)
-				sprintf(filename, "%s", URLTEXT_OUTPUT_FILE);
+				sprintf(filename, "html/%s", URLTEXT_OUTPUT_FILE);
 			else
-				sprintf(filename, "%s.0", URLTEXT_OUTPUT_FILE);
+				// put them directly into html/ now for
+				// easy add url'ing
+				sprintf(filename, "html/%s.0", URLTEXT_OUTPUT_FILE);
 		}
 		else {
 			if (!splitUrls)
-				sprintf(filename, "%s",
+				sprintf(filename, "html/%s",
 					DIFFURLTEXT_OUTPUT_FILE);
 			else
-				sprintf(filename, "%s.0",
+				sprintf(filename, "html/%s.0",
 					DIFFURLTEXT_OUTPUT_FILE);
 		}
 		//outStream2.open(filename, ofstream::out|ofstream::trunc);
 		outStream2 = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
 					S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
-		// make sure it openned okay
+		// make sure it opened okay
 		//if (!outStream2.is_open()) {
 		if ( outStream2 < 0 ) {
-			printf("Error Openning %s\n", filename);
+			printf("Error Opening %s\n", filename);
 			goto errExit1;
 		}
-		printf("Openned %s for writing.\n", filename);
+		printf("Opened %s for writing.\n", filename);
+
+		writeMetaTags ( outStream2 );

 		// if we're doing a diffurldump, load up the diff file first
 		if ( mode == MODE_DIFFURLDUMP ) {
@ -1219,10 +1405,10 @@ contentParse:
 			diffInStream = open(filename, O_RDONLY);
 			//if (!diffInStream.is_open()) {
 			if ( diffInStream < 0 ) {
-				printf("Error Openning %s\n", filename);
+				printf("Error Opening %s\n", filename);
 				goto errExit;
 			}
-			printf("Openned Diff File: %s\n", filename);
+			printf("Opened Diff File: %s\n", filename);
 	
 			// read in the number of urls to update/add
 			//diffInStream.read((char*)&numUpdateIndexes,
@ -1318,7 +1504,7 @@ contentParse:
 					printf("Completed Writing File.\n");
 					// write another file for the urls
 					urlTxtFile++;
-					sprintf(filename, "%s.%li",
+					sprintf(filename, "html/%s.%li",
 						URLTEXT_OUTPUT_FILE,
 						urlTxtFile);
 					//outStream2.open(filename,
@ -1326,14 +1512,14 @@ contentParse:
 					outStream2 = open ( filename,
 					  O_CREAT|O_WRONLY|O_TRUNC,
 					  S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
-					// make sure it openned okay
+					// make sure it opened okay
 					//if (!outStream2.is_open()) {
 					if ( outStream2 < 0 ) {
-						printf("Error Openning %s\n",
+						printf("Error Opening %s\n",
 						       filename);
 						goto errExit1;
 					}
-					printf("Openned %s for writing.\n",
+					printf("Opened %s for writing.\n",
 					       filename);
 					urlTxtCount = 0;
 				}
@ -1348,20 +1534,20 @@ contentParse:
 	}
 	else {
 		if ( mode == MODE_NEW )
-			sprintf(filename, "%s", CONTENT_OUTPUT_FILE);
+			sprintf(filename, "%s%s", dir,CONTENT_OUTPUT_FILE);
 		else
-			sprintf(filename, "%s.new", CONTENT_OUTPUT_FILE);
+			sprintf(filename, "%s%s.new", dir,CONTENT_OUTPUT_FILE);
 		// stream the urls into the content
 		//outStream.open(filename, ofstream::out|ofstream::trunc);
 		outStream = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
 				S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
-		// make sure it openned okay
+		// make sure it opened okay
 		//if (!outStream.is_open()) {
 		if ( outStream < 0 ) {
-			printf("Error Openning %s\n", filename);
+			printf("Error Opening %s\n", filename);
 			goto errExit;
 		}
-		printf("Openned %s for writing.\n", filename);
+		printf("Opened %s for writing.\n", filename);

 		// store a space for the number of urls at the start of the file
 		//outStream.write((char*)&numUrlInfos, sizeof(long));
@ -1371,7 +1557,7 @@ contentParse:
 			goto errExit;
 		}
 	}
-	
+
 	// read and parse the file again
 	printf("Building Links...\n");
 	while (true) {
@ -1389,6 +1575,9 @@ contentParse:
 		if ( mode == MODE_URLDUMP || mode == MODE_DIFFURLDUMP )
 			goto nextLink;
 		// . set the content offset for this cat
+		// . it's missing catid 425187... why? because it had
+		//   a double quote in it like '4"'!! so i took out inQuotes
+		//   logic above.
 		cat = getIndexFromId(catid);
 		if (cat == -1) {
 			totalNEC++;
@ -1442,15 +1631,35 @@ hashLink:
 		// html decode the url
 		if (urlLen > MAX_URL_LEN)
 			urlLen = MAX_URL_LEN;
-		urlLen = htmlDecode(decodedUrl, &urlBuffer[urlOffset], urlLen);
+		urlLen = htmlDecode(decodedUrl, &urlBuffer[urlOffset], urlLen,
+				    false,0);
+		// debug point
+		//if ( strcmp(decodedUrl,"http://twitter.com/#!/ronpaul")==0)
+		//	printf("hey\n");
+
+		// ignore any url with # in it for now like
+		// http://twitter.com/#!/ronpaul because it bastardizes
+		// the meaning of the # (hashtag) and we need to protest that
+		if ( strchr ( decodedUrl , '#' ) )
+			goto nextLink;
+
 		memcpy(&urlBuffer[urlOffset], decodedUrl, urlLen);
 		// fix up bad urls
 		urlLen = fixUrl(&urlBuffer[urlOffset], urlLen);
 		if (urlLen == 0)
 			goto nextLink;
-		// normalize with Url
-		normUrl.set(&urlBuffer[urlOffset], urlLen,
-			    true, false, false, true);
+		// . normalize with Url
+		// . watch out for
+		//   http://twitter.com/#!/ronpaul to http://www.twitter.com/
+		//   so do not strip # hashtags
+		normUrl.set(&urlBuffer[urlOffset], 
+			    urlLen,
+			    true, // addwww?
+			    false, // stripsessionid
+			    false, // strippound?
+			    true); // stripcommonfile? (i.e. index.htm)
+		// debug print
+		//printf("gburl %s -> %s\n",decodedUrl,normUrl.getUrl());
 		// put it back
 		urlLen = normUrl.getUrlLen();
 		if (urlBufferLen+urlLen+10 >= urlBufferSize) {
@ -1473,7 +1682,7 @@ hashLink:
 		//urlBufferLen += urlLen;
 		// get the hash value
 		unsigned long long urlHash =
-			hash64Lower(&urlBuffer[urlOffset], urlLen, 0);
+			hash64Lower_a(&urlBuffer[urlOffset], urlLen, 0);
 		//unsigned long urlHash2 =
 		//	hash32Lower(&urlBuffer[urlOffset], urlLen, 0);
 		// see if it's already indexed
@ -1491,6 +1700,10 @@ hashLink:
 				     currUrl == updateIndexes[currDiffIndex] ) {
 					//outStream2.write(&urlBuffer[urlOffset],
 					//		  urlLen);
+					// print it in an anchor tag
+					// now so gigablast can spider
+					// these links
+					write ( outStream2,"<a href=\"",9);
 					if ( write ( outStream2,
 						     &urlBuffer[urlOffset],
 						     urlLen ) != urlLen ) {
@ -1498,6 +1711,7 @@ hashLink:
 						       "outStream2\n");
 						goto errExit1;
 					}
+					write ( outStream2,"\"></a>",6);
 					//outStream2.write("\n", 1);
 					if (write(outStream2, "\n", 1) != 1) {
 						printf("Error writing to "
@ -1518,11 +1732,11 @@ hashLink:
 					// write another file for the urls
 					urlTxtFile++;
 					if ( mode == MODE_URLDUMP )
-						sprintf(filename, "%s.%li",
+						sprintf(filename, "html/%s.%li",
 							URLTEXT_OUTPUT_FILE,
 							urlTxtFile);
 					else
-						sprintf(filename, "%s.%li",
+						sprintf(filename, "html/%s.%li",
 							DIFFURLTEXT_OUTPUT_FILE,
 							urlTxtFile);
 					//outStream2.open(filename,
@ -1530,15 +1744,16 @@ hashLink:
 					outStream2 = open ( filename,
 					  O_CREAT|O_WRONLY|O_TRUNC,
 					  S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
-					// make sure it openned okay
+					// make sure it opened okay
 					//if (!outStream2.is_open()) {
 					if ( outStream2 < 0 ) {
-						printf("Error Openning %s\n",
+						printf("Error Opening %s\n",
 						       filename);
 						goto errExit1;
 					}
-					printf("Openned %s for writing.\n",
+					printf("Opened %s for writing.\n",
 					       filename);
+					writeMetaTags ( outStream2 );
 					urlTxtCount = 0;
 				}
 			}
@ -1634,8 +1849,17 @@ hashLink:
 		long currIndex = getIndexFromId(catid);
 		while (currIndex >= 0) {
 			rdfCats[currIndex].m_numUrls++;
+			// the new dmoz files have catids whose parents
+			// are the same cat id! so stop infinite loops
+			if ( rdfCats[currIndex].m_parentid == 
+			     rdfCats[currIndex].m_catid )
+				break;
+			// otherwise, make "currIndex" point to the parent
 			currIndex = getIndexFromId(
 					rdfCats[currIndex].m_parentid );
+			// in the newer dmoz files 0 is a bad catid i guess
+			// not -1 any more?
+			// ??????
 		}

 		goto nextLink;
@ -1697,19 +1921,19 @@ fileEnd2:

 		// load the content and url files
 		// url info (content) file
-		sprintf(filename, "%s", CONTENT_OUTPUT_FILE);
+		sprintf(filename, "%s%s", dir,CONTENT_OUTPUT_FILE);
 		//rdfStream.open(filename, ifstream::in);
 		rdfStream = open ( filename, O_RDONLY );
 		//if (!rdfStream.is_open()) {
 		if ( rdfStream < 0 ) {
-			printf("Error Openning %s\n", CONTENT_OUTPUT_FILE);
+			printf("Error Opening %s\n", filename);
 			goto oldErrExit;
 		}
 		// read in the number of urls
 		//rdfStream.read((char*)&oldNumUrls, sizeof(long));
 		if (fileRead(rdfStream, &oldNumUrls, sizeof(long)) !=
 				sizeof(long)) {
-			printf("Error Reading %s\n", CONTENT_OUTPUT_FILE);
+			printf("Error Reading %s\n", filename);
 			goto oldErrExit;
 		}
 	
@ -1749,8 +1973,8 @@ fileEnd2:
 			//rdfStream.read((char*)&urlLen, sizeof(short));
 			long n = fileRead(rdfStream, &urlLen, sizeof(short));
 			if ( n < 0 || n > (long)sizeof(short) ) {
-				printf("Error Reading %s\n",
-					CONTENT_OUTPUT_FILE);
+				printf("Error Reading %s\n",filename);
+				//CONTENT_OUTPUT_FILE);
 				goto oldErrExit;
 			}
 			if ( n == 0 )
@ -1780,8 +2004,8 @@ fileEnd2:
 			}
 			n = fileRead(rdfStream, &oldUrls[urlp], urlLen);
 			if ( n < 0 || n > urlLen ) {
-				printf("Error Reading %s\n",
-					CONTENT_OUTPUT_FILE);
+				printf("Error Reading %s\n",filename);
+				//CONTENT_OUTPUT_FILE);
 				goto oldErrExit;
 			}
 			if ( n == 0 )
@ -1791,7 +2015,7 @@ fileEnd2:
 			urlLen = fixUrl(&oldUrls[urlp], urlLen);
 			// make the hash
 			oldUrlHashes[currUrl] =
-				hash64Lower(&oldUrls[urlp], urlLen, 0);
+				hash64Lower_a(&oldUrls[urlp], urlLen, 0);
 			removeOldUrl[currUrl] = 0;
 			// increment the buffer pointer
 			if (urlLen <= 0) {
@ -1814,8 +2038,8 @@ fileEnd2:
 			//rdfStream.read((char*)&oldNumCatids[currUrl], 1);
 			long n = fileRead(rdfStream, &oldNumCatids[currUrl], 1);
 			if ( n < 0 || n > 1 ) {
-				printf("Error Reading %s\n",
-					CONTENT_OUTPUT_FILE);
+				printf("Error Reading %s\n",filename);
+				//CONTENT_OUTPUT_FILE);
 				goto oldErrExit;
 			}
 			if ( n == 0 )
@ -1839,8 +2063,8 @@ fileEnd2:
 			long readSize = sizeof(long)*oldNumCatids[currUrl];
 			n = fileRead(rdfStream, &oldCatids[catidp], readSize);
 			if ( n < 0 || n > readSize ) {
-				printf("Error Reading %s\n",
-					CONTENT_OUTPUT_FILE);
+				printf("Error Reading %s\n",filename);
+				//CONTENT_OUTPUT_FILE);
 				goto oldErrExit;
 			}
 			if ( n == 0 )
@ -1907,17 +2131,17 @@ oldIsDifferent:
 		//   also urls to remove
 		//
 		// open the new diff file for writing
-		sprintf(filename, "%s.new.diff", CONTENT_OUTPUT_FILE);
+		sprintf(filename, "%s%s.new.diff", dir,CONTENT_OUTPUT_FILE);
 		//outStream.open(filename, ofstream::out|ofstream::trunc);
 		outStream = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
 				S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
-		// make sure it openned okay
+		// make sure it opened okay
 		//if (!outStream.is_open()) {
 		if ( outStream < 0 ) {
-			printf("Error Openning %s\n", filename);
+			printf("Error Opening %s\n", filename);
 			goto oldErrExit;
 		}
-		printf("\nOpenned %s for writing.\n", filename);
+		printf("\nOpened %s for writing.\n", filename);

 		// write out the number of urls to update/add
 		//outStream.write(&numUpdateUrls, sizeof(long));
@ -2027,19 +2251,19 @@ oldGoodExit:
 	// . now we want to serialize the needed data into
 	//   one (or more?) file(s) to be quickly read by gb
 	if ( mode == MODE_NEW )
-		sprintf(filename, "%s", STRUCTURE_OUTPUT_FILE);
+		sprintf(filename, "%s%s", dir,STRUCTURE_OUTPUT_FILE);
 	else
-		sprintf(filename, "%s.new", STRUCTURE_OUTPUT_FILE);
+		sprintf(filename, "%s%s.new", dir,STRUCTURE_OUTPUT_FILE);
 	//outStream.open(filename, ofstream::out|ofstream::ate);
 	outStream = open ( filename, O_WRONLY|O_APPEND,
 			S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
-	// make sure it openned okay
+	// make sure it opened okay
 	//if (!outStream.is_open()) {
 	if ( outStream < 0 ) {
-		printf("Error Openning %s\n", filename);
+		printf("Error Opening %s\n", filename);
 		goto errExit;
 	}
-	printf("\nOpenned %s for writing.\n", filename);
+	printf("\nOpened %s for writing.\n", filename);

 	// write the cats
 	//outStream.write((char*)rdfCats, sizeof(RdfCat)*numRdfCats);
@ -2109,21 +2333,21 @@ oldGoodExit:

 	// write another file for the urls
 	if ( mode == MODE_NEW )
-		sprintf(filename, "%s", CONTENT_OUTPUT_FILE);
+		sprintf(filename, "%s%s", dir,CONTENT_OUTPUT_FILE);
 	else
-		sprintf(filename, "%s.new", CONTENT_OUTPUT_FILE);
+		sprintf(filename, "%s%s.new", dir,CONTENT_OUTPUT_FILE);
 	//outStream.open(filename, ofstream::out|ofstream::ate);
 	outStream = open ( filename, O_WRONLY,
 			S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP );
 	//outStream.open(filename, ofstream::out|ofstream::trunc);
 	//endpos = outStream.tellp();
-	// make sure it openned okay
+	// make sure it opened okay
 	//if (!outStream.is_open()) {
 	if ( outStream < 0 ) {
-		printf("Error Openning %s\n", filename);
+		printf("Error Opening %s\n", filename);
 		goto errExit;
 	}
-	printf("\nOpenned %s for writing.\n", filename);
+	printf("\nOpened %s for writing.\n", filename);

 	//outStream.seekp(0);
 	lseek(outStream, 0, SEEK_SET);
--- a/html/admin.html
+++ b/html/admin.html
--- a/libplot.a
+++ b/libplot.a
--- a/libplotter.a
+++ b/libplotter.a
--- a/main.cpp
+++ b/main.cpp
@ -22,7 +22,7 @@
 #include "Titledb.h"
 #include "Revdb.h"
 #include "Tagdb.h"
-//#include "Catdb.h"
+#include "Catdb.h"
 #include "Users.h"
 #include "Tfndb.h"
 #include "Spider.h"
@ -1390,7 +1390,7 @@ int main ( int argc , char *argv[] ) {
 		char structureFile[256];
 		g_conf.m_maxMem = 1000000000LL; // 1G
 		g_mem.m_maxMem  = 1000000000LL; // 1G
-		sprintf(structureFile, "%scat/gbdmoz.structure.dat", g_hostdb.m_dir);
+		sprintf(structureFile, "%scatdb/gbdmoz.structure.dat", g_hostdb.m_dir);
 		g_categories = &g_categories1;
 		if (g_categories->loadCategories(structureFile) != 0) {
 			log("cat: Loading Categories From %s Failed.", structureFile);
@ -2633,8 +2633,8 @@ int main ( int argc , char *argv[] ) {
 	if ( ! g_tagdb.init()     ) {
 		log("db: Tagdb init failed." ); return 1; }
 	// the catdb, it's an instance of tagdb, pass RDB_CATDB
-	//if ( ! g_catdb.init()   ) {
-	//	log("db: Catdb1 init failed." ); return 1; }
+	if ( ! g_catdb.init()   ) {
+		log("db: Catdb1 init failed." ); return 1; }
 	// initialize Users
 	if ( ! g_users.init()  ){
 		log("db: Users init failed. "); return 1;}
@ -2842,7 +2842,7 @@ int main ( int argc , char *argv[] ) {

 	// load up the dmoz categories here
 	char structureFile[256];
-	sprintf(structureFile, "%scat/gbdmoz.structure.dat", g_hostdb.m_dir);
+	sprintf(structureFile, "%scatdb/gbdmoz.structure.dat", g_hostdb.m_dir);
 	g_categories = &g_categories1;
 	if (g_categories->loadCategories(structureFile) != 0) {
 		log("cat: Loading Categories From %s Failed.",
@ -4511,8 +4511,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			if ( h2->m_hostId == 0 ) continue;
 			sprintf(tmp,
 				"rcp "
-				"%scat/content.rdf.u8 "
-				"%s:%scat/content.rdf.u8",
+				"%scatdb/content.rdf.u8 "
+				"%s:%scatdb/content.rdf.u8",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4520,8 +4520,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/structure.rdf.u8 "
-				"%s:%scat/structure.rdf.u8",
+				"%scatdb/structure.rdf.u8 "
+				"%s:%scatdb/structure.rdf.u8",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4529,8 +4529,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.structure.dat "
-				"%s:%scat/gbdmoz.structure.dat",
+				"%scatdb/gbdmoz.structure.dat "
+				"%s:%scatdb/gbdmoz.structure.dat",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4538,8 +4538,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.content.dat "
-				"%s:%scat/gbdmoz.content.dat",
+				"%scatdb/gbdmoz.content.dat "
+				"%s:%scatdb/gbdmoz.content.dat",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4547,8 +4547,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			//system ( tmp );
 			//sprintf(tmp,
 			//	"rcp "
-			//	"%scat/gbdmoz.content.dat.diff "
-			//	"%s:%scat/gbdmoz.content.dat.diff",
+			//	"%scatdb/gbdmoz.content.dat.diff "
+			//	"%s:%scatdb/gbdmoz.content.dat.diff",
 			//	dir,
 			//	iptoa(h2->m_ip),
 			//	h2->m_dir);
@ -4561,8 +4561,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			if ( h2->m_hostId == 0 ) continue;
 			sprintf(tmp,
 				"rcp "
-				"%scat/content.rdf.u8.new "
-				"%s:%scat/content.rdf.u8.new",
+				"%scatdb/content.rdf.u8.new "
+				"%s:%scatdb/content.rdf.u8.new",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4570,8 +4570,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/structure.rdf.u8.new "
-				"%s:%scat/structure.rdf.u8.new",
+				"%scatdb/structure.rdf.u8.new "
+				"%s:%scatdb/structure.rdf.u8.new",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4579,8 +4579,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.structure.dat.new "
-				"%s:%scat/gbdmoz.structure.dat.new",
+				"%scatdb/gbdmoz.structure.dat.new "
+				"%s:%scatdb/gbdmoz.structure.dat.new",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4588,8 +4588,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.content.dat.new "
-				"%s:%scat/gbdmoz.content.dat.new",
+				"%scatdb/gbdmoz.content.dat.new "
+				"%s:%scatdb/gbdmoz.content.dat.new",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4597,8 +4597,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.content.dat.new.diff "
-				"%s:%scat/gbdmoz.content.dat.new.diff",
+				"%scatdb/gbdmoz.content.dat.new.diff "
+				"%s:%scatdb/gbdmoz.content.dat.new.diff",
 				dir,
 				iptoa(h2->m_ip),
 				h2->m_dir);
@ -4694,8 +4694,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			if ( h2->m_hostId == 0 ) continue;
 			sprintf(tmp,
 				"rcp "
-				"%scat/content.rdf.u8 "
-				"%s:%scat/content.rdf.u8",
+				"%scatdb/content.rdf.u8 "
+				"%s:%scatdb/content.rdf.u8",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -4703,8 +4703,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/structure.rdf.u8 "
-				"%s:%scat/structure.rdf.u8",
+				"%scatdb/structure.rdf.u8 "
+				"%s:%scatdb/structure.rdf.u8",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -4712,8 +4712,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.structure.dat "
-				"%s:%scat/gbdmoz.structure.dat",
+				"%scatdb/gbdmoz.structure.dat "
+				"%s:%scatdb/gbdmoz.structure.dat",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -4721,8 +4721,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.content.dat "
-				"%s:%scat/gbdmoz.content.dat",
+				"%scatdb/gbdmoz.content.dat "
+				"%s:%scatdb/gbdmoz.content.dat",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -4730,8 +4730,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			//system ( tmp );
 			//sprintf(tmp,
 			//	"rcp "
-			//	"%scat/gbdmoz.content.dat.diff "
-			//	"%s:%scat/gbdmoz.content.dat.diff",
+			//	"%scatdb/gbdmoz.content.dat.diff "
+			//	"%s:%scatdb/gbdmoz.content.dat.diff",
 			//	dir,
 			//	iptoa(h2->m_ip),
 			//	h2->m_dir);
@ -4745,8 +4745,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			if ( h2->m_hostId == 0 ) continue;
 			sprintf(tmp,
 				"rcp "
-				"%scat/content.rdf.u8.new "
-				"%s:%scat/content.rdf.u8.new",
+				"%scatdb/content.rdf.u8.new "
+				"%s:%scatdb/content.rdf.u8.new",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -4754,8 +4754,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/structure.rdf.u8.new "
-				"%s:%scat/structure.rdf.u8.new",
+				"%scatdb/structure.rdf.u8.new "
+				"%s:%scatdb/structure.rdf.u8.new",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -4763,8 +4763,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.structure.dat.new "
-				"%s:%scat/gbdmoz.structure.dat.new",
+				"%scatdb/gbdmoz.structure.dat.new "
+				"%s:%scatdb/gbdmoz.structure.dat.new",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -4772,8 +4772,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.content.dat.new "
-				"%s:%scat/gbdmoz.content.dat.new",
+				"%scatdb/gbdmoz.content.dat.new "
+				"%s:%scatdb/gbdmoz.content.dat.new",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -4781,8 +4781,8 @@ int install ( install_flag_konst_t installFlag , long hostId , char *dir ,
 			system ( tmp );
 			sprintf(tmp,
 				"rcp "
-				"%scat/gbdmoz.content.dat.new.diff "
-				"%s:%scat/gbdmoz.content.dat.new.diff",
+				"%scatdb/gbdmoz.content.dat.new.diff "
+				"%s:%scatdb/gbdmoz.content.dat.new.diff",
 				dir,
 				iptoa(h2->m_ipShotgun),
 				h2->m_dir);
@ -11036,7 +11036,8 @@ void dumpTagdb (char *coll,long startFileNum,long numFiles,bool includeTree,
 	//g_conf.m_spiderdbMaxTreeMem = 1024*1024*30;
 	g_tagdb.init ();
 	g_collectiondb.init(true);
-	g_tagdb.addColl ( coll, false );
+	if ( rdbId == RDB_TAGDB ) g_tagdb.addColl ( coll, false );
+	if ( rdbId == RDB_CATDB ) g_catdb.init();
 	key128_t startKey ;
 	key128_t endKey   ;
 	startKey.setMin();
@ -11101,6 +11102,23 @@ void dumpTagdb (char *coll,long startFileNum,long numFiles,bool includeTree,
 			printf("corrupt tagdb rec k.n0=%llu",k.n0);
 			continue;
 		}
+		// catdb?
+		if ( rdbId == RDB_CATDB ) {
+			// for debug!
+			CatRec crec;
+			crec.set ( NULL,
+				   data ,
+				   size ,
+				   false);
+			fprintf(stdout,
+				"key=%s caturl=%s #catids=%li version=%li\n"
+			       ,KEYSTR(&k,12)
+			    ,crec.m_url
+			    ,(long)crec.m_numCatids
+			    ,(long)crec.m_version
+			    );
+			continue;
+		}
 		// parse it up
 		//TagRec *tagRec = (TagRec *)rec; 
 		Tag *tag = (Tag *)rec;
@ -13997,10 +14015,10 @@ void saveRdbs ( int fd , void *state ) {
 	last = rdb->getLastWriteTime();
 	if ( now - last > delta )
 		if ( ! rdb->close(NULL,NULL,false,false)) return;
-	//rdb = g_catdb.getRdb();
-	//last = rdb->getLastWriteTime();
-	//if ( now - last > delta )
-	//	if ( ! rdb->close(NULL,NULL,false,false)) return;
+	rdb = g_catdb.getRdb();
+	last = rdb->getLastWriteTime();
+	if ( now - last > delta )
+		if ( ! rdb->close(NULL,NULL,false,false)) return;
 	//rdb = g_indexdb.getRdb();
 	//last = rdb->getLastWriteTime();
 	//if ( now - last > delta )
--- a/matches2.cpp
+++ b/matches2.cpp
@ -6,7 +6,8 @@
 #include "HashTableT.h"

 //make the key, it is just the needles ptr 
-static HashTableT<unsigned long long , char*> s_quickTables;
+//static HashTableT<unsigned long long , char*> s_quickTables;
+static HashTableX s_quickTables;

 /*
 // returns false and sets g_errno on error
@ -63,6 +64,9 @@ bool fast_highlight ( // highlight these query terms:
 //   to lower and store into tmp[]. TODO.
 // . a space (includes \r \n) in a needle will match a consecutive sequence
 //   of spaces in the haystack
+
+#define BITVEC unsigned long long
+
 char *getMatches2 ( Needle *needles          , 
 		    long    numNeedles       ,
 		    char   *haystack         , 
@ -108,51 +112,69 @@ char *getMatches2 ( Needle *needles          ,
 	// . TODO: use a static cache of like 4 of these tables where the key
 	//         is the Needles ptr ... done
 	long numNeedlesToInit = numNeedles;
-	char space[256 * 5 * sizeof(unsigned long)];
+	char space[256 * 6 * sizeof(BITVEC)];
 	char *buf = NULL;

-	unsigned long *s0;
-	unsigned long *s1;
-	unsigned long *s2;
-	unsigned long *s3;
-	unsigned long *s4;
+	BITVEC *s0;
+	BITVEC *s1;
+	BITVEC *s2;
+	BITVEC *s3;
+	BITVEC *s4;
+	BITVEC *s5;

+	/*
+	static bool s_quickTableInit = false;
+	static char s_qtbuf[128*(12+1)*2];
+
+	long slot = -1;
 	if(saveQuickTables) {
-		uint64_t key = (uint32_t)needles;
-		long slot = s_quickTables.getSlot(key);
-		if(slot == -1) {
-			buf = (char*)mcalloc(sizeof(unsigned long)*256*5,
-					     "matches");
-			if(buf) s_quickTables.addKey(key, buf);
-			//sanity check, no reason why there needs to be a 
-			//limit, I just don't expect there to be this many
-			//static needles at this point.
-			if(s_quickTables.getNumSlotsUsed() > 32){
-				char *xx=NULL; *xx = 0;
-			}
+		if ( ! s_quickTableInit ) {
+			s_quickTableInit = true;
+			s_quickTables.set(8,4,128,s_qtbuf,256*13,false,0,"qx");
 		}
-		else {
+		uint64_t key = (uint32_t)needles;
+		slot = s_quickTables.getSlot(&key);
+		if ( slot >= 0 ) {
 			buf = s_quickTables.getValueFromSlot(slot);
 			numNeedlesToInit = 0;
 		}
 	}
+	*/
+
 	if(!buf) {
 		buf = space;
-		memset ( buf , 0 , sizeof(unsigned long)*256*5);
+		memset ( buf , 0 , sizeof(BITVEC)*256*6);
 	}

-	long offset = 0;
-	s0 = (unsigned long*)(buf + offset);
-	offset += sizeof(unsigned long)*256;
-	s1 = (unsigned long*)(buf + offset);
-	offset += sizeof(unsigned long)*256;
-	s2 = (unsigned long*)(buf + offset);
-	offset += sizeof(unsigned long)*256;
-	s3 = (unsigned long*)(buf + offset);
-	offset += sizeof(unsigned long)*256;
-	s4 = (unsigned long*)(buf + offset);
+	/*
+	if( useQuickTables && slot == -1 ) {
+		//buf = (char*)mcalloc(sizeof(unsigned long)*256*5,
+		//		     "matches");
+		if(buf) s_quickTables.addKey(&key, &buf);
+		//sanity check, no reason why there needs to be a 
+		//limit, I just don't expect there to be this many
+		//static needles at this point.
+		if(s_quickTables.getNumSlotsUsed() > 32){
+			char *xx=NULL; *xx = 0;
+		}
+	}
+	*/

-	unsigned long mask;
+	// try 64 bit bit vectors now since we doubled # of needles
+	long offset = 0;
+	s0 = (BITVEC *)(buf + offset);
+	offset += sizeof(BITVEC)*256;
+	s1 = (BITVEC *)(buf + offset);
+	offset += sizeof(BITVEC)*256;
+	s2 = (BITVEC *)(buf + offset);
+	offset += sizeof(BITVEC)*256;
+	s3 = (BITVEC *)(buf + offset);
+	offset += sizeof(BITVEC)*256;
+	s4 = (BITVEC *)(buf + offset);
+	offset += sizeof(BITVEC)*256;
+	s5 = (BITVEC *)(buf + offset);
+
+	BITVEC mask;

 	// set the letter tables, s0[] through sN[], for each needle
 	for ( long i = 0 ; i < numNeedlesToInit ; i++ ) {
@ -160,7 +182,8 @@ char *getMatches2 ( Needle *needles          ,
 		QUICKPOLL(niceness);
 		unsigned char *w    = (unsigned char *)needles[i].m_string;
 		unsigned char *wend = w + needles[i].m_stringSize;
-		mask = (1<<(i&0x1f)); // (1<<(i%32));
+		// BITVEC is now 64 bits
+		mask = (1<<(i&0x3f)); // (1<<(i%64));
 		// if the needle is small, fill up the remaining letter tables
 		// with its mask... so it matches any character in haystack.
 		s0[(unsigned char)to_lower_a(*w)] |= mask;
@ -172,6 +195,7 @@ char *getMatches2 ( Needle *needles          ,
 				s2[j] |= mask;
 				s3[j] |= mask;
 				s4[j] |= mask;
+				s5[j] |= mask;
 			}
 			continue;
 		}
@ -184,6 +208,7 @@ char *getMatches2 ( Needle *needles          ,
 				s2[j] |= mask;
 				s3[j] |= mask;
 				s4[j] |= mask;
+				s5[j] |= mask;
 			}
 			continue;
 		}
@ -195,6 +220,7 @@ char *getMatches2 ( Needle *needles          ,
 			for ( long j = 0 ; j < 256 ; j++ )  {
 				s3[j] |= mask;
 				s4[j] |= mask;
+				s5[j] |= mask;
 			}
 			continue;
 		}
@ -206,12 +232,24 @@ char *getMatches2 ( Needle *needles          ,
 		if ( w >= wend ) {
 			for ( long j = 0 ; j < 256 ; j++ )  {
 				s4[j] |= mask;
+				s5[j] |= mask;
 			}
 			continue;
 		}
 		s4[(unsigned char)to_lower_a(*w)] |= mask;
 		s4[(unsigned char)to_upper_a(*w)] |= mask;
 		w += 1;//step;
+
+		if ( w >= wend ) {
+			for ( long j = 0 ; j < 256 ; j++ )  {
+				s5[j] |= mask;
+			}
+			continue;
+		}
+		s5[(unsigned char)to_lower_a(*w)] |= mask;
+		s5[(unsigned char)to_upper_a(*w)] |= mask;
+		w += 1;//step;
+
 	}

 	// return a ptr to the first match if we should, this is it
@ -245,6 +283,8 @@ char *getMatches2 ( Needle *needles          ,
 		if ( ! mask ) continue;
 		mask &= s4[*(p+4)];
 		if ( ! mask ) continue;
+		mask &= s5[*(p+5)];
+		if ( ! mask ) continue;
 		//debugCount++;
 		/*
 		// display
@ -273,7 +313,7 @@ char *getMatches2 ( Needle *needles          ,
 		// we got a good candidate, loop through all the needles
 		for ( long j = 0 ; j < numNeedles ; j++ ) {
 			// skip if does not match mask, will save time
-			if ( ! ((1<<(j&0x1f)) & mask) ) continue;
+			if ( ! ((1<<(j&0x3f)) & mask) ) continue;
 			if( needles[j].m_stringSize > 3) {
 				// ensure first 4 bytes matches this needle's
 				if (needles[j].m_string[0]!=to_lower_a(*(p+0)))
@ -421,7 +461,7 @@ char *getMatches2 ( Needle *needles          ,
 		// we got a good candidate, loop through all the needles
 		for ( long j = 0 ; j < numNeedles ; j++ ) {
 			// skip if does not match mask, will save time
-			if ( ! ((1<<(j&0x1f)) & mask) ) continue;
+			if ( ! ((1<<(j&0x3f)) & mask) ) continue;
 			if( needles[j].m_stringSize > 3) {
 				// ensure first 4 bytes matches this needle's
 				if (needles[j].m_string[0]!=to_lower_a(*(p+0)))
--- a/plotter.h
+++ b/plotter.h
--- a/urlinfo.cpp
+++ b/urlinfo.cpp
@ -16,8 +16,10 @@
 bool mainShutdown ( bool urgent ) { return true; }
 bool closeAll ( void *state , void (* callback)(void *state) ) {return true;}
 bool allExit ( ) { return true; }
-long g_qbufNeedSave = false;
-SafeBuf g_qbuf;
+//long g_qbufNeedSave = false;
+//SafeBuf g_qbuf;
+bool sendPageSEO(class TcpSocket *s, class HttpRequest *hr) {return true;}
+

 int main ( int argc , char *argv[] ) {
 	bool addWWW = true;