Remove commented out DateParse & code from main.cpp

2025-07-13 02:36:06 -04:00 · 2015-12-07 18:25:05 +01:00
parent eaa948fa20
commit 4b7020bf88
10 changed files with 8 additions and 302 deletions
--- a/Address.cpp
+++ b/Address.cpp
@ -66,7 +66,6 @@ class Address *g_address; // for debug
 #include "gb-include.h"
 #include "Address.h"
 #include "Sections.h"
-//#include "DateParse2.h"
 #include "Abbreviations.h"
 #include "Phrases.h"
 //#include "Weights.h"
--- a/CatRec.cpp
+++ b/CatRec.cpp
@ -3,7 +3,6 @@
 #include "CatRec.h"
 //#include "SiteBonus.h"
 #include "Lang.h"
-//#include "DateParse.h"

 //static int32_t getY(Xml *xml, int32_t n0,int32_t n1,int32_t X,
 //		 char *strx,char *stry,int32_t def);
@ -922,15 +921,4 @@ char* CatRec::getAdultStr() {

 }

-
-char *CatRec::getPubDateFmtStr() {
-	int32_t fmt = getScoreForType(SiteType::DATE_FORMAT);
-	switch (fmt) {
-	case DateParse::DATE_FMT_AMER:
-		return "American";
-	case DateParse::DATE_FMT_EURO:
-		return "European";
-	}
-	return "Unknown/Ambiguous";
-}
 */
--- a/CatRec.h
+++ b/CatRec.h
@ -123,8 +123,6 @@ class CatRec {
 	bool isKidSafe()            { return m_adultLevel == RATED_G;   }
 	char* getAdultStr();

-	char *getPubDateFmtStr();
-
 	int32_t          getTimeStamp()   { return m_timeStamp; }
 	char         *getComment()     { return m_comment; }
 	char         *getUsername()    { return m_username; }
--- a/HashTableT.cpp
+++ b/HashTableT.cpp
@ -6,7 +6,6 @@
 #include "Dns.h"
 //#include "Thesaurus.h" // SynonymLinkGroup structure
 #include "PostQueryRerank.h" // for ComTopInDmozRec
-//#include "DateParse.h" // TimeZoneInfo structure
 #include "types.h"

 template<class Key_t, class Val_t> 
--- a/PageTitledb.cpp
+++ b/PageTitledb.cpp
@ -16,7 +16,6 @@
 #include "Users.h"
 #include "Tagdb.h"
 #include "Spider.h"
-//#include "DateParse2.h"

 // TODO: meta redirect tag to host if hostId not ours
 static bool gotTitleRec        ( void *state );
--- a/Pops.cpp
+++ b/Pops.cpp
@ -14,189 +14,6 @@ Pops::~Pops() {
 		mfree ( m_pops , m_popsSize , "Pops" );
 }

-/*
-// should be one for each host in the network
-bool Pops::readPopFiles ( ) {
-	int32_t n = g_hostdb.getNumGroups();
-	for ( int32_t i = 0 ; i < n ; i++ ) {
-		// note it
-		log(LOG_INIT,"db: Reading %s/pops.%"INT32" of %"INT32".",
-		    g_conf.m_dir,i,n);
-		// 
-	}
-}
-
-bool Pops::makeFinalPopFile ( char *coll ) {
-
-	int32_t n = g_hostdb.getNumGroups();
-
-	// tell each host to write his pop file to html directory
-	Msg3e msg3e;
-	for ( int32_t i = 0 ; i < n ; i++ ) 
-		msg3e.sendRequest ( );
-
-	// no more than 4096 groups supported for this now, but up later maybe
-	char *buf [ 4096 ];
-
-	// retrieve it from each host (msg3f = getFile)
-	for ( int32_t i = 0 ; i < n ; i++ ) {
-		// get over http
-		g_httpServer.getDoc ( ... );
-		// save to disk
-		out[i].write ( content , contentLen );
-	}
-
-	// merge out file
-	BigFile out;
-
-	// then merge all of them out
-	for ( int32_t i = 0 ; i < n ; i++ ) {
-		
-	}	
-
-	// merge final
-
-	// distribute final copy to all
-
-	// clean up locals
-}
-
-// . make the pop file from indexdb
-// . a bunch of wordhash/#docs pairs
-// . word hash is lower 4 bytes of the termid
-// . first int64_t in file is the # of docs
-bool Pops::makeLocalPopFile ( char *coll ) {
-	// get the rdbmap of the first indexdb file
-	RdbBase *base = g_indexdb.getBase ( coll );
-	//RdbMap  *map  = base->getMap(0);
-	if ( ! base ) 
-		return log("admin: Collection \"%s\" does not exist.",coll);
-	BigFile *f    = base->getFile(0);
-	// term must be in at least this many docs
-	int32_t minDocs = 4000;
-	// log it
-	log(LOG_INFO,"admin: Making popularity file from %s for coll \"%s\".",
-	    f->getFilename(),coll);
-	log(LOG_INFO,"admin: Using cutoff of %"INT32" docs.",minDocs);
-
-	// output the wordId/count pairs to this file
-	BigFile out;
-	char outFilename[256];
-	sprintf(outFilename,"%s/popout.%"INT32"",g_conf.m_dir,g_hostdb.m_hostId);
-	out.set ( outFilename );
-
-	// store # of docs
-	int64_t n = g_titledb.getGlobalNumDocs();
-	out.write ( &n , 8 );
-
-	// store key read from disk into here
-	char tmp [ MAX_KEY_BYTES ];
-
-	//
-	//
-	// this part is taken from main.cpp:dumpIndexdb()
-	//
-	//
-	char buf [ 1000000 ];
-	int32_t bufSize = 1000000;
-	if ( ! f.open ( O_RDONLY ) ) return;
-	// init our vars
-	bool haveTop = false;
-	char top[6];
-	memset ( top , 0 , 6 );
-	bool warned = false;
-	// how big is this guy?
-	int64_t filesize = f.getFileSize();
-	// reset error number
-	g_errno = 0;
-	// the big read loop
- loop:
-	int64_t readSize = bufSize;
-	if ( off + readSize > filesize ) readSize = filesize - off;
-	// return if we're done reading the whole file
-	if ( readSize <= 0 ) return;
-	// read in as much as we can
-	f.read ( buf , readSize , off );
-	// bail on read error
-	if ( g_errno ) {
-		log("admin: Read of %s failed.",f.getFilename());
-		return;
-	}
-	char *p    = buf;
-	char *pend = buf + readSize;
- inner:
-	// parse out the keys
-	int32_t size;
-	if ( ((*p) & 0x02) == 0x00 ) size = ks;
-	else                         size = ks-6;
-	if ( p + size > pend ) {
-		// skip what we read
-		off  += readSize ;
-		// back up so we don't split a key we should not
-		off -= ( pend - p );
-		// read more
-		goto loop;
-	}
-	// new top?
-	if ( size == ks ) { gbmemcpy ( top , p + (ks-6) , 6 ); haveTop = true; }
-	// warning msg
-	if ( ! haveTop && ! warned ) {
-		warned = true;
-		log("admin: Warning: first key is a half key.");
-	}
-
-	//
-	// BUT i added this part to the main.cpp stuff
-	//
-
-	// was it the same as last key?
-	if ( ks == 6 ) 
-		count++;
-	// ok, this starts a new key
-	else {
-		// did the previous key meet the min count requirement?
-		if ( count >= minDocs ) {
-			// if so, store the upper 4 bytes of the termid
-			int32_t h;
-			gbmemcpy ( &h , tmp+8 , 4 );
-			// write it out
-			out.write ( &h , 4 );
-			// and the count
-			out.write ( &count , 4 );
-		}
-		// reset, we got a new termid
-		count = 1;
-	}
-
-	// 
-	// end new stuff
-	//
-
-
-	// make the key
-	gbmemcpy ( tmp , p , ks-6 );
-	gbmemcpy ( tmp + ks-6 , top , 6 );
-	// print the key
-	//if ( ks == 12 )
-	//	fprintf(stdout,"%08lli) %08"XINT32" %016"XINT64"\n",
-	//		off + (p - buf) ,
-	//		*(int32_t *)(tmp+8),*(int64_t *)tmp );
-	//else
-	//	fprintf(stdout,"%08lli) %016"XINT64" %016"XINT64"\n",
-	//		off + (p - buf) ,
-	//		*(int64_t *)(tmp+8),*(int64_t *)tmp );
-
-	// go to next key
-	p += size;
-	// loop up
-	goto inner;
-
-
-	
-}
-*/
-
-
 bool Pops::set ( Words *words , int32_t a , int32_t b ) {
 	int32_t        nw        = words->getNumWords();
 	int64_t  *wids      = words->getWordIds ();
--- a/Process.cpp
+++ b/Process.cpp
@ -1992,8 +1992,6 @@ bool Process::saveBlockingFiles2 ( ) {
 	// this one too
 	//      g_classifier.save();
        //g_siteBonus.save();
-	// save state for top docs
-	//g_pageTopDocs.saveStateToDisk();
 	
 	// save the turk url cache, urls and user states
 	//g_pageTurk.saveCache();
@ -2034,8 +2032,6 @@ void Process::resetAll ( ) {
 	g_profiler        .reset();
 	g_autoBan         .reset();
 	//g_qtable          .reset();
-	//g_pageTopDocs     .destruct();
-	//g_pageNetTest     .destructor();

 	for ( int32_t i = 0; i < MAX_GENERIC_CACHES; i++ )
 		g_genericCache[i].reset();
--- a/Xml.cpp
+++ b/Xml.cpp
@ -6,7 +6,6 @@
 #include "Unicode.h" // for html entities that return unicode
 #include "Titledb.h"
 #include "Words.h"
-//#include "DateParse2.h"

 Xml::Xml  () { 
 	m_xml = NULL; 
--- a/XmlDoc.h
+++ b/XmlDoc.h
@ -541,7 +541,6 @@ class XmlDoc {

 	int64_t **getWikiDocIds ( ) ;
 	void gotWikiResults ( class UdpSlot *slot );
-	//class DateParse2 *getDateParse2 ( ) ;
 	//class HashTableX *getClockCandidatesTable();
 	int32_t getOutlinkAge ( int32_t outlinkNum ) ;
 	char *getIsPermalink ( ) ;
@ -1448,7 +1447,6 @@ class XmlDoc {
 	bool m_isWWWDupValid;
 	bool m_linkInfo1Valid;
 	bool m_linkSiteHashesValid;
-	//bool m_dateParse2Valid;
 	bool m_sectionsReplyValid;
 	bool m_sectionsVotesValid;
 	bool m_sectiondbDataValid;
@ -1542,7 +1540,6 @@ class XmlDoc {
 	//Query m_wq; 
 	//SearchInput m_si;
 	//Msg40 m_msg40;
-	//DateParse2 m_dateParse2;
 	bool m_printedMenu;
 	//HashTableX m_clockCandidatesTable;
 	//SafeBuf m_cctbuf;
--- a/main.cpp
+++ b/main.cpp
@ -93,8 +93,6 @@ bool registerMsgHandlers3 ( ) ;

 void allExitWrapper ( int fd , void *state ) ;

-//bool QuerySerializeTest( char *ff ); 	// Query.cpp
-
 void rmTest();

 int g_inMemcpy=0;
@ -202,8 +200,6 @@ void countdomains( char* coll, int32_t numRecs, int32_t verb, int32_t output );

 UdpProtocol g_dp; // Default Proto

-//void zlibtest ( );
-
 // installFlag konstants 
 typedef enum {
 	ifk_install = 1,
@ -650,14 +646,6 @@ int main2 ( int argc , char *argv[] ) {
 			"<file>. Used to only check performance of "
 			"getPhrasePopularity.\n\n"

-			//"stemmertest <file>\n"
-			//"\truns the stemmer on words in <file>.\n\n"
-		
-			//"queryserializetest <file>\n"
-			//"\tserializes every query in <file> and tracks "
-			//"statistics, as well as \t\nverifying consistency; "
-			//"takes raw strings or URLs as input\n\n"
-
 			// less common things
 			"gendict <coll> [numWordsToDump]\n\tgenerate "
 			"dictionary used for spellchecker "
@ -3035,24 +3023,10 @@ int main2 ( int argc , char *argv[] ) {
 		//return 1;
 	}

-	//if( !g_pageTopDocs.init() ) {
-	//	log( "init: PageTopDocs init failed." );
-	//	return 1;
-	//}
-
-	//if( !g_pageNetTest.init() ) {
-	//	log( "init: PageNetTest init failed." );
-	//	return 1;
-	//}
-
 	//if(!Msg6a::init()) {
 	//	log( "init: Quality Agent init failed." );
 	//}

-	//if ( ! DateParse::init()  ) {
-	//	log("db: DateParse init failed." ); return 1;
-	//}
-
 	//countdomains was HERE, moved up to access more mem.

 	// load up the dmoz categories here
@ -3090,12 +3064,6 @@ int main2 ( int argc , char *argv[] ) {
 	//	//return 1;
 	//}

-	// deprecated in favor of Msg13-based throttling
-	//if ( !g_msg6.init() ) {
-	//	log ( "init: msg6 init failed." );
-	//	return 1;
-	//}
-
 	// if(!g_profiler.init()) {
 	// 	log("profiler: init failed.");
 	// }
@ -3211,26 +3179,31 @@ int main2 ( int argc , char *argv[] ) {
 	//			  20       ,   // pollTime in ms
 	//			  1000     )){ // max udp slots
 	//	log("db: UdpServer2 init failed." ); return 1; }
+
 	// start pinging right away
 	if ( ! g_pingServer.init() ) {
 		log("db: PingServer init failed." ); return 1; }
+
 	// start up repair loop
 	if ( ! g_repair.init() ) {
 		log("db: Repair init failed." ); return 1; }
+
 	// start up repair loop
 	if ( ! g_dailyMerge.init() ) {
 		log("db: Daily merge init failed." ); return 1; }
+
 	// . then dns Distributed client
 	// . server should listen to a socket and register with g_loop
 	// . Only the distributed cache shall call the dns server.
 	if ( ! g_dns.init( h9->m_dnsClientPort ) ) {
 		log("db: Dns distributed client init failed." ); return 1; }
+
 	// . then dns Local client
 	//if ( ! g_dnsLocal.init( 0 , false ) ) {
 	//	log("db: Dns local client init failed." ); return 1; }
+
 	// . then webserver
 	// . server should listen to a socket and register with g_loop
-	// again:
 	if ( ! g_httpServer.init( h9->m_httpPort, h9->m_httpsPort ) ) {
 		log("db: HttpServer init failed. Another gb already "
 		    "running?" ); 
@ -3254,8 +3227,7 @@ int main2 ( int argc , char *argv[] ) {
 	if ( ! registerMsgHandlers() ) {
 		log("db: registerMsgHandlers failed" ); return 1; }

-	// for Events.cpp event extraction we need to parse out "places" from 
-	// each doc
+	// for Events.cpp event extraction we need to parse out "places" from each doc
 	//if ( ! initPlaceDescTable ( ) ) {
 	//	log("events: places table init failed"); return 1; }

@ -3292,35 +3264,6 @@ int main2 ( int argc , char *argv[] ) {
 		return 0;
 	}

-	// gb stemmertest
-	//if ( strcmp ( cmd , "stemmertest" ) == 0 ) {
-	//	if ( argc != cmdarg + 2 ) goto printHelp;
-	//	g_stemmer.test ( argv[cmdarg + 1] );
-	//	return 0;
-	//}
-
-	// gb queryserializetest
-	/*
-	if ( strcmp ( cmd , "queryserializetest" ) == 0 ) {
-		if ( argc != cmdarg + 2 ) goto printHelp;
-		int64_t starttime = gettimeofdayInMilliseconds();
-		QuerySerializeTest( argv[cmdarg + 1] );
-		log(LOG_INFO, "query: took %"INT64"msecs for query serialize" \
-			"test on %s", gettimeofdayInMilliseconds() - starttime,
-			argv[cmdarg + 1]);
-		return 0;
-	}
-	*/
-
-#ifdef _LIMIT10_
-	// how many pages have we indexed so far?
-	//int64_t numPages = g_titledb.getRdb()->getNumGlobalRecs();
-	int64_t numPages = g_clusterdb.getRdb()->getNumGlobalRecs();
-	if ( numPages > 10123466 ) 
-		log("WARNING: Over 10 million documents are in the index. "
-		     "You have exceeded the terms of your license. "
-		     "Please contact mwells@gigablast.com for a new license.");
-#endif
 	// bdflush needs to be turned off because we need to control the
 	// writes directly. we do this by killing the write thread.
 	// we kill it when we need to do important reads, otherwise, if
@ -3338,13 +3281,6 @@ int main2 ( int argc , char *argv[] ) {
 	//log("REMINDER: remove mem leack checking");
 	//log("REMINDER: put thread back in Msg39");

-	// . now check with gigablast.com (216.243.113.1) to see if we 
-	//   are licensed, for now, just get the doc
-	// . TODO: implement this (GET /license.html \r\n
-	//                         Host: www.gigablast.com\r\n\r)
-
-	// do the zlib test
-	//zlibtest();
 	// . now m_minToMerge might have changed so try to do a merge
 	// . only does one merge at a time
 	// . other rdb's will sleep and retry until it's their turn
@ -3352,6 +3288,7 @@ int main2 ( int argc , char *argv[] ) {
 	//g_loop.registerSleepCallback ( 1000 ,
 	//			       NULL ,
 	//			       tryMergingWrapper );
+
 	// . register a callback to try to merge everything every 2 seconds
 	// . do not exit if we couldn't do this, not a huge deal
 	// . put this in here instead of Rdb.cpp because we don't want
@ -3372,29 +3309,6 @@ int main2 ( int argc , char *argv[] ) {
 	if ( ! g_loop.registerSleepCallback(1000,NULL,tryToSyncWrapper,0))
 		return false;

-	//if( !g_loop.registerSleepCallback(2000,(void *)1,controlDumpTopDocs) )
-	//	log("db: Failed to init dump TopDocs sleep callback.");
-
-        // MTS: removing nettest, this breaks NetGear switches when all links
-        //      are transmitting full bore and full duplex.
-	//if( !g_loop.registerSleepCallback(2000,(void *)1,controlNetTest) )
-	//	log("db: Failed to init network test sleep callback.");
-	
-	//if( !g_loop.registerSleepCallback(60000,(void *)1,takeSnapshotWrapper))
-	//	log("db: Failed to init Statsdb snapshot sleep callback.");
-
-	// check to make sure we have the latest parms
-	//Msg3e msg3e;  
-	//msg3e.checkForNewParms();
-
-	// this stuff is similar to alden's msg3e but will sync collections
-	// that were added/deletede
-	//if ( ! g_parms.syncParmsWithHost0() ) {
-	//	log("parms: error syncing parms: %s",mstrerror(g_errno));
-	//	return 0;
-	//}
-
-
 	if(g_recoveryMode) {
 		//now that everything is init-ed send the message.
 		char buf[256];