Remove commented out code

This commit is contained in:
Ai Lin Chia
2016-02-25 22:05:04 +01:00
parent 0e392c9921
commit ab09100b1f
2 changed files with 20 additions and 133 deletions

@ -5062,13 +5062,6 @@ HashTableX *XmlDoc::getCountTable ( ) {
char *fv = getFragVec();
if ( ! fv || fv == (void *)-1 ) return (HashTableX *)fv;
//LinkInfo *info2 = getLinkInfo2();
//if ( ! info2 || info2 == (LinkInfo *)-1 ) return (HashTableX *)info2;
// init our count table otherwise
//if(! m_countTable.set( 8,4,1024,NULL,0,false,m_niceness,"xmlcnttbl"))
// return NULL;
// breathe
QUICKPOLL ( m_niceness );
@ -5079,9 +5072,6 @@ HashTableX *XmlDoc::getCountTable ( ) {
// shortcut
HashTableX *ct = &m_countTable;
// reset the counts, just in case set() below does not
//ct->reset();
// ez var
int64_t *wids = words->getWordIds ();
nodeid_t *tids = words->getTagIds ();
@ -5096,9 +5086,6 @@ HashTableX *XmlDoc::getCountTable ( ) {
if (!ct->set(8,4,numSlots,NULL,0,false,m_niceness,"xmlct"))
return (HashTableX *)NULL;
//char *ff = getFragVec ( ) ;
//if ( ! ff ) return false;
// . now hash all the phrase ids we have in order to see if the phrase
// is unique or not. if phrase is repeated a lot we punish the scores
// of the individual words in the phrase and boost the score of the
@ -5108,8 +5095,7 @@ HashTableX *XmlDoc::getCountTable ( ) {
QUICKPOLL ( m_niceness );
// add the word
if ( wids[i] == 0LL ) continue;
//if ( wids[i] == 708411945052722517LL )
// log("hey4 got new pid=%"INT64" i=%"INT32"",pids[i],i);
// . skip if in repeated fragment
// . unfortunately we truncate the frag vec to like
// the first 80,000 words for performance reasons
@ -5133,7 +5119,7 @@ HashTableX *XmlDoc::getCountTable ( ) {
// breathe
QUICKPOLL ( m_niceness );
// skip if not a meta tag
if ( tids[i] != 68 ) continue;
if ( tids[i] != TAG_META ) continue;
// find the "content=" word
char *w = wptrs[i];
int32_t wlen = wlens[i];
@ -5146,9 +5132,10 @@ HashTableX *XmlDoc::getCountTable ( ) {
p += 8;
// skip if empty meta content
if ( wend - p <= 0 ) continue;
// our ouw hash
if ( ! hashString_ct ( ct , p , wend - p ) )
return (HashTableX *)NULL;
return (HashTableX *)NULL;
}
// add each incoming link text
for ( Inlink *k=NULL ; info1 && (k=info1->getNextInlink(k)) ; ) {
@ -24294,15 +24281,9 @@ bool getDensityRanks ( int64_t *wids ,
// . string is usually the document body or inlink text of an inlinker or
// perhaps meta keywords. it could be anything. so we need to create this
// vector based on that string, which is represented by words/phrases here.
bool getDiversityVec ( Words *words ,
Phrases *phrases ,
HashTableX *countTable ,
SafeBuf *sbWordVec ,
//SafeBuf *sbPhraseVec ,
int32_t niceness ) {
bool getDiversityVec( Words *words, Phrases *phrases, HashTableX *countTable, SafeBuf *sbWordVec,
int32_t niceness ) {
int64_t *wids = words->getWordIds ();
//nodeid_t *tids = words->getTagIds ();
int32_t nw = words->getNumWords();
int64_t *pids = phrases->getPhraseIds2();
@ -24337,7 +24318,7 @@ bool getDiversityVec ( Words *words ,
int64_t pid = pids[i];
// get the word and phrase weights for term #i
float ww2;
//float pw2;
getWordToPhraseRatioWeights ( lastPid , // pids[i-1],
wids[i] ,
pid ,
@ -24687,7 +24668,6 @@ void getWordToPhraseRatioWeights ( int64_t pid1 , // pre phrase
int64_t pid2 ,
int64_t wid2 , // post word
float *retww ,
//float *retpw ,
HashTableX *tt1 ,
int32_t titleRecVersion ) {

@ -2675,107 +2675,12 @@ bool XmlDoc::hashIsAdult ( HashTableX *tt ) {
return true;
}
/*
BR 20160106 removed. We don't want to store this in posdb as we don't use it.
// hash destination urls for embedded gb search boxes
bool XmlDoc::hashSubmitUrls ( HashTableX *tt ) {
setStatus ( "hashing submit urls" );
Url *baseUrl = getBaseUrl();
if ( ! baseUrl || baseUrl == (Url *)-1) { char*xx=NULL;*xx=0;}
for ( int32_t i = 0 ; i < m_xml.getNumNodes() ; i++ ) {
// Find forms
if ( m_xml.getNodeId(i) != TAG_FORM ) continue;
if ( m_xml.isBackTag(i) ) continue;
int32_t score = *getSiteNumInlinks8() * 256;
if ( score <= 0 ) score = 1;
int32_t len;
char *s = m_xml.getString ( i , "action" , &len );
if (!s || len == 0) continue;
Url url; url.set(baseUrl, s, len, true);
char *buf = url.getUrl();
int32_t blen = url.getUrlLen();
// update hash parms
HashInfo hi;
hi.m_tt = tt;
hi.m_hashGroup = HASHGROUP_INTAG;
hi.m_prefix = "gbsubmiturl";
hi.m_desc = "submit url for form";
// this returns false on failure
if ( ! hashString ( buf,blen,&hi ) ) return false;
}
return true;
}
*/
/*
bool XmlDoc::hashSingleTerm ( int64_t termId , HashInfo *hi ) {
// combine with a non-NULL prefix
if ( hi->m_prefix ) {
int64_t prefixHash = hash64b ( hi->m_prefix );
// sanity test, make sure it is in supported list
if ( getFieldCode3 ( prefixHash ) == FIELD_GENERIC ) {
char *xx=NULL;*xx=0; }
termId = hash64 ( termId , prefixHash );
}
// save it?
if ( m_wts && ! ::storeTerm ( "binary",6,termId,hi,0,0,
MAXDENSITYRANK,
MAXDIVERSITYRANK,
MAXWORDSPAMRANK,
hi->m_hashGroup,
false,&m_wbuf,m_wts,false) )
return false;
// shortcut
HashTableX *dt = hi->m_tt;
// sanity check
if ( dt->m_ks != sizeof(key_t) ) { char *xx=NULL;*xx=0; }
// make the key like we do in hashWords()
key96_t k;
k.n1 = hi->m_date;
k.n0 = termId;
// get current score for this wordid
int32_t slot = dt->getSlot ( &k );
// does this termid/date already exist?
if ( slot >= 0 ) {
// done
return true;
}
// otherwise, add a new slot
char val = 1;
if ( ! hi->m_tt->addKey ( (char *)k , &val ) )
return false;
// return true on success
return true;
}
*/
bool XmlDoc::hashSingleTerm ( char *s ,
int32_t slen ,
HashInfo *hi ) {
bool XmlDoc::hashSingleTerm( char *s, int32_t slen, HashInfo *hi ) {
// empty?
if ( slen <= 0 ) return true;
if ( ! m_versionValid ) { char *xx=NULL;*xx=0; }
if ( hi->m_useCountTable && ! m_countTableValid){char *xx=NULL;*xx=0; }
//
// POSDB HACK: temporarily turn off posdb until we hit 1B pages!
//
//if ( ! m_storeTermListInfo )
// return true;
// a single blob hash
int64_t termId = hash64 ( s , slen );
// combine with prefix
@ -2841,12 +2746,13 @@ bool XmlDoc::hashSingleTerm ( char *s ,
bool XmlDoc::hashString ( char *s, HashInfo *hi ) {
return hashString ( s , gbstrlen(s), hi ); }
bool XmlDoc::hashString ( char *s ,
int32_t slen ,
HashInfo *hi ) {
bool XmlDoc::hashString( char *s, int32_t slen, HashInfo *hi ) {
if ( ! m_versionValid ) { char *xx=NULL;*xx=0; }
if ( hi->m_useCountTable && ! m_countTableValid){char *xx=NULL;*xx=0; }
if ( ! m_siteNumInlinksValid ) { char *xx=NULL;*xx=0; }
int32_t *sni = getSiteNumInlinks();
return hashString3( s ,
slen ,
@ -2874,8 +2780,7 @@ bool XmlDoc::hashString3( char *s ,
Words words;
Bits bits;
Phrases phrases;
//Weights weights;
//Synonyms synonyms;
if ( ! words.set ( s , slen , true , niceness ) )
return false;
if ( ! bits.set ( &words , version , niceness ) )
@ -3021,8 +2926,9 @@ bool XmlDoc::hashWords3 ( //int32_t wordStart ,
// phrase score. thus, a search for 'mexico' should not bring up
// the page for university of new mexico!
SafeBuf dwbuf;
if(!getDiversityVec ( words,phrases,countTable,&dwbuf,niceness))
if ( !getDiversityVec( words, phrases, countTable, &dwbuf, niceness ) ) {
return false;
}
char *wdv = dwbuf.getBufStart();
int32_t nw = words->getNumWords();
@ -3154,10 +3060,11 @@ bool XmlDoc::hashWords3 ( //int32_t wordStart ,
if ( m_wts && langVec ) langId = langVec[i];
char wd;
if ( hi->m_useCountTable ) wd = wdv[i];
else wd = MAXDIVERSITYRANK;
if ( hi->m_useCountTable ) {
wd = wdv[i];
} else {
wd = MAXDIVERSITYRANK;
}
// BR 20160115: Don't hash 'junk' words
bool skipword = false;