Remove Words::m_version

Removed m_version memeber which was write-only. Removed version parameter to set() methods.
2015-11-30 16:24:58 +01:00
parent 29f4afa20d
commit 80f7c4017f
13 changed files with 15 additions and 47 deletions
--- a/Address.cpp
+++ b/Address.cpp
@ -3804,7 +3804,6 @@ bool setHashes ( Place *p , Words *ww , int32_t niceness ) {
 		// return false with g_errno set on error
 		if ( ! tmp.set ( p->m_str , 
 				 p->m_strlen ,
-				 TITLEREC_CURRENT_VERSION ,
 				 true ,
 				 niceness ) ) return false;
 		// set it up
--- a/Dates.cpp
+++ b/Dates.cpp
@ -2496,7 +2496,6 @@ bool Dates::setPart1 ( //char       *u        ,
 		Words ww;
 		if ( ! ww.set ( date                     ,
 				dateLen                  ,
-				TITLEREC_CURRENT_VERSION ,
 				true                     , // compute Ids?
 				m_niceness               ))
 			// return false with g_errno set on error
--- a/Highlight.cpp
+++ b/Highlight.cpp
@ -92,7 +92,6 @@ int32_t Highlight::set ( SafeBuf *sb,
 	Words words;
 	if ( ! words.set ( content      , 
 			   contentLen   , 
-			   TITLEREC_CURRENT_VERSION,
 			   true         , // computeId
 			   true         ) ) // has html entites?
 		return -1;
--- a/Matches.cpp
+++ b/Matches.cpp
@ -785,7 +785,6 @@ bool Matches::addMatches ( char      *s         ,
 	// set the words class for this match group
 	if ( ! wp->set ( s                        ,
 			 slen                     , // in bytes
-			 TITLEREC_CURRENT_VERSION ,
 			 true                     , // computeIds?
 			 niceness                 ))
 		return false;
--- a/PageGet.cpp
+++ b/PageGet.cpp
@ -595,7 +595,6 @@ bool processLoop ( void *state ) {
 	Words qw;
 	qw.set ( q            ,  // content being highlighted, utf8
 		 qlen         ,  // content being highlighted, utf8
-		 TITLEREC_CURRENT_VERSION,
 		 true         ,  // computeIds
 		 false        ); // hasHtmlEntities?
 	// . assign scores of 0 to query words that should be ignored
--- a/Query.cpp
+++ b/Query.cpp
@ -2073,7 +2073,7 @@ bool Query::setQWords ( char boolFlag ,
 	//   their own separate Word, so tell "words" we're setting a query
 	//Words words;
 	if ( ! words.set ( m_sb.getBufStart() , m_sb.length() ,
-			    TITLEREC_CURRENT_VERSION, true, 1 ) )
+			    true, 1 ) )
 		return log("query: Had error parsing query: %s.",
 			   mstrerror(g_errno));
 	int32_t numWords = words.getNumWords();
--- a/Title.cpp
+++ b/Title.cpp
@ -352,7 +352,6 @@ bool Title::setTitle4 ( XmlDoc   *xd            ,
 			// now the words.
 			if ( ! tw[ti].set ( k->getLinkText() ,
 					    k->size_linkText-1, // len
-					    TITLEREC_CURRENT_VERSION ,
 					    true              , // computeIds
 					    0                 ))// niceness
 				return false;
@ -390,7 +389,6 @@ bool Title::setTitle4 ( XmlDoc   *xd            ,
 		// now set words to that
 		if ( ! tw[ti].set ( ts                       ,
 				    tslen                    ,
-				    TITLEREC_CURRENT_VERSION ,
 				    true       , // compute wordIds?
 				    0          ))// niceness
 			return false;
@ -502,7 +500,6 @@ bool Title::setTitle4 ( XmlDoc   *xd            ,
 		// ok, process it
 		if ( ! tw[ti].set ( atitle            ,
 				    atlen             , // len
-				    TITLEREC_CURRENT_VERSION ,
 				    true              , // computeIds
 				    0                 ))// niceness
 			return false;
@ -790,7 +787,6 @@ bool Title::setTitle4 ( XmlDoc   *xd            ,
 		// now set words to that
 		if ( ! tw[ti].set ( p                        , // string
 				    pend - p                 , // len
-				    TITLEREC_CURRENT_VERSION ,
 				    true       , // compute wordIds?
 				    0          ))// niceness
 			return false;
--- a/Wiki.cpp
+++ b/Wiki.cpp
@ -122,7 +122,6 @@ bool Wiki::loadText ( int32_t fileSize ) {
 		Words w;
 		if ( ! w.set ( p            , // s
 			       eol - p      , // slen
-			       TITLEREC_CURRENT_VERSION ,
 			       true         , // computeIds?
 			       MAX_NICENESS ) ) 
 			return false;
@ -557,7 +556,6 @@ void Wiki::doneReadingWiki ( ) {
 		Words w;
 		if ( ! w.set ( p            , // s
 			       eol - p      , // slen
-			       TITLEREC_CURRENT_VERSION ,
 			       true         , // computeIds?
 			       MAX_NICENESS ) ) {
 			m_errno = g_errno;
--- a/Words.cpp
+++ b/Words.cpp
@ -36,7 +36,7 @@ void Words::reset ( ) {
 	m_localBufSize2 = 0;
 }

-bool Words::set ( char *s, int32_t slen, int32_t version, 
+bool Words::set ( char *s, int32_t slen,
 		  bool computeWordIds,
 		  int32_t niceness) {
 	// bail if nothing
@ -48,7 +48,7 @@ bool Words::set ( char *s, int32_t slen, int32_t version,

 	char c = s[slen];
 	if ( c != '\0' ) s[slen]='\0';
-	bool status = set ( s , version, computeWordIds , niceness );
+	bool status = set ( s , computeWordIds , niceness );
 	if ( c != '\0' ) s[slen] = c;
 	return status;
 }
@ -110,7 +110,6 @@ bool Words::set ( Xml *xml,
 	if ( m_xml == xml ) { char *xx=NULL;*xx=0; }
 	reset();
 	m_xml = xml;
-	m_version = xml->getVersion();

 	// if xml is empty, bail
 	if   ( ! xml->getContent() ) return true;
@ -172,7 +171,6 @@ bool Words::set ( Xml *xml,

 bool Words::set11 ( char *s , char *send , int32_t niceness ) {
 	reset();
-	m_version = TITLEREC_CURRENT_VERSION;
 	m_s = s;
 	// this will make addWords() scan for tags
 	m_hasTags = true;
@ -199,7 +197,6 @@ bool Words::setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) {
 	// prevent setting with the same string
 	if ( m_s == s ) { char *xx=NULL;*xx=0; }
 	reset();
-	m_version = TITLEREC_CURRENT_VERSION;
 	// save for sanity check
 	m_s = s;
 	m_localBuf2 = buf;
@ -220,7 +217,7 @@ bool Words::setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) {
 // . doesn't do tags, only text nodes in "xml"
 // . our definition of a word is as close to English as we can get it
 // . BUT we also consider a string of punctuation characters to be a word
-bool Words::set ( char *s , int32_t version, 
+bool Words::set ( char *s ,
 		  bool computeWordIds ,
 		  int32_t niceness ) {

@ -228,11 +225,9 @@ bool Words::set ( char *s , int32_t version,
 	if ( m_s == s ) { char *xx=NULL;*xx=0; }

 	reset();
-	m_version = version;
 	// save for sanity check
 	m_s = s;

-	m_version = version;
 	// determine rough upper bound on number of words by counting
 	// punct/alnum boundaries
 	m_preCount = countWords ( s );
@ -574,8 +569,6 @@ bool Words::set2 ( Xml *xml,
 		   int32_t niceness) {
 	reset();
 	m_xml = xml;
-	m_version = xml->getVersion();
-	m_version = xml->getVersion();
 	register char *p = (char *)xml->getContent();
 	if ( *p ) p++;
 	register int32_t x = 0;
@ -762,13 +755,6 @@ int32_t Words::getLanguage( Sections *sections ,
 			 int32_t maxSamples,
 			 int32_t niceness,
 			 int32_t *langScore) {
-	// calculate scores if not given
-	//Scores calcdScores;
-	//if ( ! scores ) {
-	//	if ( ! calcdScores.set( this,m_version,false ) )
-	//		return -1;
-	//	scores = &calcdScores;
-	//}

 	// . take a random sample of words and look them up in the
 	//   language dictionary
--- a/Words.h
+++ b/Words.h
@ -43,8 +43,6 @@ unsigned char getCharacterLanguage ( char *utf8Char ) ;

 #define NUM_LANGUAGE_SAMPLES 1000

-//#define TITLEREC_CURRENT_VERSION 114
-
 // this bit is set in the tag id to indicate a back tag
 #define BACKBIT     ((nodeid_t)0x8000)
 #define BACKBITCOMP ((nodeid_t)0x7fff)
@ -59,28 +57,27 @@ class Words {
 	// . there is typically no html in "s"
 	// . html tags are NOT parsed out
 	bool set ( char *s , 
-		   int32_t version , // = TITLEREC_CURRENT_VERSION , 
 		   bool computeIds , // = true ,
 		   int32_t niceness ); // = 0);

-	// assume TITLEREC_CURRENT_VERSION and computeIds is true
+	// assume computeIds is true
 	bool set9 ( char *s , int32_t niceness ) {
-		return set ( s , TITLEREC_CURRENT_VERSION, true , niceness);};
+		return set ( s , true , niceness);}

 	bool setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) ;

 	bool setx ( char *s , int32_t slen , int32_t niceness ) {
-		return set ( s,slen,TITLEREC_CURRENT_VERSION,true,niceness);};
+		return set ( s,slen,true,niceness);}

 	bool set11 ( char *s , char *send , int32_t niceness ) ;

 	// . similar to above
 	// . but we temporarily stick a \0 @ s[slen] for parsing purposes
-	bool set ( char *s , int32_t slen , int32_t version, 
+	bool set ( char *s , int32_t slen ,
 		   bool computeIds ,
 		   int32_t niceness = 0);

-	bool set3 ( char *s ) {return set(s,TITLEREC_CURRENT_VERSION,true,0);};
+	bool set3 ( char *s ) {return set(s,true,0);};

 	// . new function to set directly from an Xml, rather than extracting
 	//   text first
@ -111,7 +108,6 @@ class Words {
 		    int32_t            prefixLen2  ,
 		    bool            useStems    ,
 		    bool            hashUniqueOnly ,
-		    int32_t            titleRecVersion ,
 		    class Phrases  *phrases                ,//= NULL  ,
 		    bool            hashWordIffNotInPhrase ,//= false,
 		    int32_t            niceness               );//= 0);
@ -387,7 +383,6 @@ class Words {
 	int32_t           m_numAlnumWords;

 	int32_t           m_totalLen;  // of all words
-	int32_t           m_version; // titlerec version

 	bool           m_hasTags;

--- a/XmlDoc.cpp
+++ b/XmlDoc.cpp
@ -9016,7 +9016,7 @@ bool XmlDoc::hashString_ct ( HashTableX *ct , char *s , int32_t slen ) {
 	Words   words;
 	Bits    bits;
 	Phrases phrases;
-	if ( ! words.set   ( s , slen , m_version , true , m_niceness ) ) 
+	if ( ! words.set   ( s , slen , true , m_niceness ) ) 
 		return false;
 	if ( ! bits.set    ( &words , m_version , m_niceness ) )
 		return false;
@ -35231,7 +35231,7 @@ bool XmlDoc::hashString3( char       *s              ,
 	Phrases phrases;
 	//Weights weights;
 	//Synonyms synonyms;
-	if ( ! words.set   ( s , slen , version , true , niceness ) ) 
+	if ( ! words.set   ( s , slen , true , niceness ) ) 
 		return false;
 	if ( ! bits.set    ( &words , version , niceness ) )
 		return false;
@ -42548,7 +42548,6 @@ SafeBuf *XmlDoc::getTermInfoBuf ( ) {
 		Words ww2;
 		if ( ! ww2.set ( k->getLinkText()   ,
 				 k->size_linkText-1, // len
-				 TITLEREC_CURRENT_VERSION ,
 				 true              , // computeIds
 				 m_niceness        ))// niceness
 			// g_errno set on error, return NULL
@ -42592,7 +42591,6 @@ SafeBuf *XmlDoc::getTermInfoBuf ( ) {
 		Words ww2;
 		if ( ! ww2.set ( k->getLinkText()   ,
 				 k->size_linkText-1, // len
-				 TITLEREC_CURRENT_VERSION ,
 				 true              , // computeIds
 				 m_niceness        ))// niceness
 			// g_errno set on error, return NULL
--- a/main.cpp
+++ b/main.cpp
@ -10618,7 +10618,7 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
 	t = gettimeofdayInMilliseconds_force();
 	for ( int32_t i = 0 ; i < 100 ; i++ ) 
 		//if ( ! words.set ( &xml , true , true ) )
-		if ( ! words.set ( content , TITLEREC_CURRENT_VERSION,
+		if ( ! words.set ( content ,
 				   true, 0 ) )
 			return log("build: speedtestxml: words set: %s",
 				   mstrerror(g_errno));
@ -10819,7 +10819,7 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
 				     false     ); // use <stop index> tag?
 		if ( ! bufLen ) return log("build: speedtestxml: getText: %s",
 					   mstrerror(g_errno));
-		if ( ! words.set ( buf,TITLEREC_CURRENT_VERSION,true,0) )
+		if ( ! words.set ( buf,true,0) )
 			return log("build: speedtestxml: words set: %s",
 				   mstrerror(g_errno));
 	}
--- a/test_parser.cpp
+++ b/test_parser.cpp
@ -214,7 +214,7 @@ void parse_doc_8859_1(char *s, int len, bool doHash,char *charset)
 	Words words;

 	// just tokenize words
-	words.set(false, text_buf, TITEREC_CURRENT_VERSION, doHash);
+	words.set(false, text_buf, doHash);
 	free(text_buf);
 }

@ -236,7 +236,7 @@ void parse_doc_icu(char *s, int len, bool doHash, char *charset){
 				   doFilterSpaces,
 				   false);
 	Words w;
-	w.set(true,false, text_buf, textLen, TITLEREC_CURRENT_VERSION,doHash);
+	w.set(true,false, text_buf, textLen, doHash);
 	free(text_buf);
 }