Remove commented out code

2025-07-14 02:36:06 -04:00 · 2016-02-25 22:05:04 +01:00
parent 0e392c9921
commit ab09100b1f
2 changed files with 20 additions and 133 deletions
--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -5062,13 +5062,6 @@ HashTableX *XmlDoc::getCountTable ( ) {
 	char *fv = getFragVec();
 	if ( ! fv || fv == (void *)-1 ) return (HashTableX *)fv;

-	//LinkInfo *info2    = getLinkInfo2();
-	//if ( ! info2 || info2 == (LinkInfo *)-1 ) return (HashTableX *)info2;
-
-	// init our count table otherwise
-	//if(! m_countTable.set( 8,4,1024,NULL,0,false,m_niceness,"xmlcnttbl"))
-	//	return NULL;
-
 	// breathe
 	QUICKPOLL ( m_niceness );

@ -5079,9 +5072,6 @@ HashTableX *XmlDoc::getCountTable ( ) {
 	// shortcut
 	HashTableX *ct = &m_countTable;

-	// reset the counts, just in case set() below does not
-	//ct->reset();
-
 	// ez var
 	int64_t  *wids  = words->getWordIds    ();
 	nodeid_t   *tids  = words->getTagIds     ();
@ -5096,9 +5086,6 @@ HashTableX *XmlDoc::getCountTable ( ) {
 	if (!ct->set(8,4,numSlots,NULL,0,false,m_niceness,"xmlct"))
 	  return (HashTableX *)NULL;

-	//char *ff = getFragVec ( ) ;
-	//if ( ! ff ) return false;
-
 	// . now hash all the phrase ids we have in order to see if the phrase
 	//   is unique or not. if phrase is repeated a lot we punish the scores
 	//   of the individual words in the phrase and boost the score of the
@ -5108,8 +5095,7 @@ HashTableX *XmlDoc::getCountTable ( ) {
 		QUICKPOLL ( m_niceness );
 		// add the word
 		if ( wids[i] == 0LL ) continue;
-		//if ( wids[i] == 708411945052722517LL )
-		//	log("hey4 got new pid=%"INT64" i=%"INT32"",pids[i],i);
+
 		// . skip if in repeated fragment
 		// . unfortunately we truncate the frag vec to like
 		//   the first 80,000 words for performance reasons
@ -5133,7 +5119,7 @@ HashTableX *XmlDoc::getCountTable ( ) {
 		// breathe
 		QUICKPOLL ( m_niceness );
 		// skip if not a meta tag
-		if ( tids[i] != 68 ) continue;
+		if ( tids[i] != TAG_META ) continue;
 		// find the "content=" word
 		char *w    = wptrs[i];
 		int32_t  wlen = wlens[i];
@ -5146,9 +5132,10 @@ HashTableX *XmlDoc::getCountTable ( ) {
 		p += 8;
 		// skip if empty meta content
 		if ( wend - p <= 0 ) continue;
+
 		// our ouw hash
 		if ( ! hashString_ct ( ct , p , wend - p ) )
-		  return (HashTableX *)NULL;
+			return (HashTableX *)NULL;
 	}
 	// add each incoming link text
 	for ( Inlink *k=NULL ; info1 && (k=info1->getNextInlink(k)) ; ) {
@ -24294,15 +24281,9 @@ bool getDensityRanks ( int64_t *wids ,
 // . string is usually the document body or inlink text of an inlinker or
 //   perhaps meta keywords. it could be anything. so we need to create this
 //   vector based on that string, which is represented by words/phrases here.
-bool getDiversityVec ( Words *words ,
-		       Phrases *phrases ,
-		       HashTableX *countTable ,
-		       SafeBuf *sbWordVec ,
-		       //SafeBuf *sbPhraseVec ,
-		       int32_t niceness ) {
-
+bool getDiversityVec( Words *words, Phrases *phrases, HashTableX *countTable, SafeBuf *sbWordVec,
+					  int32_t niceness ) {
 	int64_t  *wids  = words->getWordIds ();
-	//nodeid_t   *tids  = words->getTagIds  ();
 	int32_t        nw    = words->getNumWords();
 	int64_t  *pids  = phrases->getPhraseIds2();

@ -24337,7 +24318,7 @@ bool getDiversityVec ( Words *words ,
 		int64_t pid = pids[i];
 		// get the word and phrase weights for term #i
 		float ww2;
-		//float pw2;
+
 		getWordToPhraseRatioWeights ( lastPid  , // pids[i-1],
 					      wids[i]  ,
 					      pid      ,
@ -24687,7 +24668,6 @@ void getWordToPhraseRatioWeights ( int64_t   pid1 , // pre phrase
 				   int64_t   pid2 ,
 				   int64_t   wid2 , // post word
 				   float      *retww   ,
-				   //float      *retpw   ,
 				   HashTableX *tt1  ,
 				   int32_t        titleRecVersion ) {

--- a/XmlDoc_Indexing.cpp
+++ b/XmlDoc_Indexing.cpp
@ -2675,107 +2675,12 @@ bool XmlDoc::hashIsAdult ( HashTableX *tt ) {
 	return true;
 }

-
-/*
-  BR 20160106 removed. We don't want to store this in posdb as we don't use it.
-
-// hash destination urls for embedded gb search boxes
-bool XmlDoc::hashSubmitUrls ( HashTableX *tt ) {
-
-	setStatus ( "hashing submit urls" );
-
-	Url *baseUrl = getBaseUrl();
-	if ( ! baseUrl || baseUrl == (Url *)-1) { char*xx=NULL;*xx=0;}
-
-	for ( int32_t i = 0 ; i < m_xml.getNumNodes() ; i++ ) {
-		// Find forms
-		if ( m_xml.getNodeId(i) != TAG_FORM ) continue;
-		if ( m_xml.isBackTag(i) ) continue;
-		int32_t score =  *getSiteNumInlinks8() * 256;
-		if ( score <= 0 ) score = 1;
-		int32_t len;
-		char *s = m_xml.getString ( i , "action" , &len );
-		if (!s || len == 0) continue;
-		Url url; url.set(baseUrl, s, len, true);
-
-		char *buf  = url.getUrl();
-		int32_t  blen = url.getUrlLen();
-
-		// update hash parms
-		HashInfo hi;
-		hi.m_tt        = tt;
-		hi.m_hashGroup = HASHGROUP_INTAG;
-		hi.m_prefix    = "gbsubmiturl";
-		hi.m_desc      = "submit url for form";
-
-		// this returns false on failure
-		if ( ! hashString ( buf,blen,&hi ) ) return false;
-	}
-	return true;
-}
-*/
-
-
-/*
-bool XmlDoc::hashSingleTerm ( int64_t termId , HashInfo *hi ) {
-	// combine with a non-NULL prefix
-	if ( hi->m_prefix ) {
-		int64_t prefixHash = hash64b ( hi->m_prefix );
-		// sanity test, make sure it is in supported list
-		if ( getFieldCode3 ( prefixHash ) == FIELD_GENERIC ) {
-			char *xx=NULL;*xx=0; }
-		termId = hash64 ( termId , prefixHash );
-	}
-
-	// save it?
-	if ( m_wts && ! ::storeTerm ( "binary",6,termId,hi,0,0,
-				      MAXDENSITYRANK,
-				      MAXDIVERSITYRANK,
-				      MAXWORDSPAMRANK,
-				      hi->m_hashGroup,
-				      false,&m_wbuf,m_wts,false) )
-		return false;
-
-	// shortcut
-	HashTableX *dt = hi->m_tt;
-	// sanity check
-	if ( dt->m_ks != sizeof(key_t) ) { char *xx=NULL;*xx=0; }
-	// make the key like we do in hashWords()
-	key96_t k;
-	k.n1 = hi->m_date;
-	k.n0 = termId;
-	// get current score for this wordid
-	int32_t slot = dt->getSlot ( &k );
-	// does this termid/date already exist?
-	if ( slot >= 0 ) {
-		// done
-		return true;
-	}
-	// otherwise, add a new slot
-	char val = 1;
-	if ( ! hi->m_tt->addKey ( (char *)k , &val ) )
-		return false;
-	// return true on success
-	return true;
-}
-*/
-
-
-bool XmlDoc::hashSingleTerm ( char       *s         ,
-			      int32_t        slen      ,
-			      HashInfo   *hi        ) {
+bool XmlDoc::hashSingleTerm( char *s, int32_t slen, HashInfo *hi ) {
 	// empty?
 	if ( slen <= 0 ) return true;
 	if ( ! m_versionValid    ) { char *xx=NULL;*xx=0; }
 	if ( hi->m_useCountTable && ! m_countTableValid){char *xx=NULL;*xx=0; }

-	//
-	// POSDB HACK: temporarily turn off posdb until we hit 1B pages!
-	//
-	//if ( ! m_storeTermListInfo )
-	//	return true;
-
-
 	// a single blob hash
        int64_t termId = hash64 ( s , slen );
 	// combine with prefix
@ -2841,12 +2746,13 @@ bool XmlDoc::hashSingleTerm ( char       *s         ,
 bool XmlDoc::hashString ( char *s, HashInfo *hi ) {
 	return hashString ( s , gbstrlen(s), hi ); }

-bool XmlDoc::hashString ( char       *s          ,
-			  int32_t        slen       ,
-			  HashInfo   *hi         ) {
+bool XmlDoc::hashString( char *s, int32_t slen, HashInfo *hi ) {
 	if ( ! m_versionValid        ) { char *xx=NULL;*xx=0; }
+
 	if ( hi->m_useCountTable && ! m_countTableValid){char *xx=NULL;*xx=0; }
+
 	if ( ! m_siteNumInlinksValid ) { char *xx=NULL;*xx=0; }
+
 	int32_t *sni = getSiteNumInlinks();
 	return   hashString3( s                ,
 			      slen             ,
@ -2874,8 +2780,7 @@ bool XmlDoc::hashString3( char       *s              ,
 	Words   words;
 	Bits    bits;
 	Phrases phrases;
-	//Weights weights;
-	//Synonyms synonyms;
+
 	if ( ! words.set   ( s , slen , true , niceness ) )
 		return false;
 	if ( ! bits.set    ( &words , version , niceness ) )
@ -3021,8 +2926,9 @@ bool XmlDoc::hashWords3 ( //int32_t        wordStart ,
 	// phrase score. thus, a search for 'mexico' should not bring up
 	// the page for university of new mexico!
 	SafeBuf dwbuf;
-	if(!getDiversityVec ( words,phrases,countTable,&dwbuf,niceness))
+	if ( !getDiversityVec( words, phrases, countTable, &dwbuf, niceness ) ) {
 		return false;
+	}
 	char *wdv = dwbuf.getBufStart();

 	int32_t nw = words->getNumWords();
@ -3154,10 +3060,11 @@ bool XmlDoc::hashWords3 ( //int32_t        wordStart ,
 		if ( m_wts && langVec ) langId = langVec[i];

 		char wd;
-		if ( hi->m_useCountTable ) wd = wdv[i];
-		else                       wd = MAXDIVERSITYRANK;
-
-
+		if ( hi->m_useCountTable ) {
+			wd = wdv[i];
+		} else {
+			wd = MAXDIVERSITYRANK;
+		}

 		// BR 20160115: Don't hash 'junk' words
 		bool skipword = false;