Remove Words::m_version

Removed m_version memeber which was write-only. Removed version parameter to
set() methods.
This commit is contained in:
Ivan Skytte Jørgensen
2015-11-30 16:24:58 +01:00
parent 29f4afa20d
commit 80f7c4017f
13 changed files with 15 additions and 47 deletions

@ -3804,7 +3804,6 @@ bool setHashes ( Place *p , Words *ww , int32_t niceness ) {
// return false with g_errno set on error
if ( ! tmp.set ( p->m_str ,
p->m_strlen ,
TITLEREC_CURRENT_VERSION ,
true ,
niceness ) ) return false;
// set it up

@ -2496,7 +2496,6 @@ bool Dates::setPart1 ( //char *u ,
Words ww;
if ( ! ww.set ( date ,
dateLen ,
TITLEREC_CURRENT_VERSION ,
true , // compute Ids?
m_niceness ))
// return false with g_errno set on error

@ -92,7 +92,6 @@ int32_t Highlight::set ( SafeBuf *sb,
Words words;
if ( ! words.set ( content ,
contentLen ,
TITLEREC_CURRENT_VERSION,
true , // computeId
true ) ) // has html entites?
return -1;

@ -785,7 +785,6 @@ bool Matches::addMatches ( char *s ,
// set the words class for this match group
if ( ! wp->set ( s ,
slen , // in bytes
TITLEREC_CURRENT_VERSION ,
true , // computeIds?
niceness ))
return false;

@ -595,7 +595,6 @@ bool processLoop ( void *state ) {
Words qw;
qw.set ( q , // content being highlighted, utf8
qlen , // content being highlighted, utf8
TITLEREC_CURRENT_VERSION,
true , // computeIds
false ); // hasHtmlEntities?
// . assign scores of 0 to query words that should be ignored

@ -2073,7 +2073,7 @@ bool Query::setQWords ( char boolFlag ,
// their own separate Word, so tell "words" we're setting a query
//Words words;
if ( ! words.set ( m_sb.getBufStart() , m_sb.length() ,
TITLEREC_CURRENT_VERSION, true, 1 ) )
true, 1 ) )
return log("query: Had error parsing query: %s.",
mstrerror(g_errno));
int32_t numWords = words.getNumWords();

@ -352,7 +352,6 @@ bool Title::setTitle4 ( XmlDoc *xd ,
// now the words.
if ( ! tw[ti].set ( k->getLinkText() ,
k->size_linkText-1, // len
TITLEREC_CURRENT_VERSION ,
true , // computeIds
0 ))// niceness
return false;
@ -390,7 +389,6 @@ bool Title::setTitle4 ( XmlDoc *xd ,
// now set words to that
if ( ! tw[ti].set ( ts ,
tslen ,
TITLEREC_CURRENT_VERSION ,
true , // compute wordIds?
0 ))// niceness
return false;
@ -502,7 +500,6 @@ bool Title::setTitle4 ( XmlDoc *xd ,
// ok, process it
if ( ! tw[ti].set ( atitle ,
atlen , // len
TITLEREC_CURRENT_VERSION ,
true , // computeIds
0 ))// niceness
return false;
@ -790,7 +787,6 @@ bool Title::setTitle4 ( XmlDoc *xd ,
// now set words to that
if ( ! tw[ti].set ( p , // string
pend - p , // len
TITLEREC_CURRENT_VERSION ,
true , // compute wordIds?
0 ))// niceness
return false;

@ -122,7 +122,6 @@ bool Wiki::loadText ( int32_t fileSize ) {
Words w;
if ( ! w.set ( p , // s
eol - p , // slen
TITLEREC_CURRENT_VERSION ,
true , // computeIds?
MAX_NICENESS ) )
return false;
@ -557,7 +556,6 @@ void Wiki::doneReadingWiki ( ) {
Words w;
if ( ! w.set ( p , // s
eol - p , // slen
TITLEREC_CURRENT_VERSION ,
true , // computeIds?
MAX_NICENESS ) ) {
m_errno = g_errno;

@ -36,7 +36,7 @@ void Words::reset ( ) {
m_localBufSize2 = 0;
}
bool Words::set ( char *s, int32_t slen, int32_t version,
bool Words::set ( char *s, int32_t slen,
bool computeWordIds,
int32_t niceness) {
// bail if nothing
@ -48,7 +48,7 @@ bool Words::set ( char *s, int32_t slen, int32_t version,
char c = s[slen];
if ( c != '\0' ) s[slen]='\0';
bool status = set ( s , version, computeWordIds , niceness );
bool status = set ( s , computeWordIds , niceness );
if ( c != '\0' ) s[slen] = c;
return status;
}
@ -110,7 +110,6 @@ bool Words::set ( Xml *xml,
if ( m_xml == xml ) { char *xx=NULL;*xx=0; }
reset();
m_xml = xml;
m_version = xml->getVersion();
// if xml is empty, bail
if ( ! xml->getContent() ) return true;
@ -172,7 +171,6 @@ bool Words::set ( Xml *xml,
bool Words::set11 ( char *s , char *send , int32_t niceness ) {
reset();
m_version = TITLEREC_CURRENT_VERSION;
m_s = s;
// this will make addWords() scan for tags
m_hasTags = true;
@ -199,7 +197,6 @@ bool Words::setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) {
// prevent setting with the same string
if ( m_s == s ) { char *xx=NULL;*xx=0; }
reset();
m_version = TITLEREC_CURRENT_VERSION;
// save for sanity check
m_s = s;
m_localBuf2 = buf;
@ -220,7 +217,7 @@ bool Words::setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) {
// . doesn't do tags, only text nodes in "xml"
// . our definition of a word is as close to English as we can get it
// . BUT we also consider a string of punctuation characters to be a word
bool Words::set ( char *s , int32_t version,
bool Words::set ( char *s ,
bool computeWordIds ,
int32_t niceness ) {
@ -228,11 +225,9 @@ bool Words::set ( char *s , int32_t version,
if ( m_s == s ) { char *xx=NULL;*xx=0; }
reset();
m_version = version;
// save for sanity check
m_s = s;
m_version = version;
// determine rough upper bound on number of words by counting
// punct/alnum boundaries
m_preCount = countWords ( s );
@ -574,8 +569,6 @@ bool Words::set2 ( Xml *xml,
int32_t niceness) {
reset();
m_xml = xml;
m_version = xml->getVersion();
m_version = xml->getVersion();
register char *p = (char *)xml->getContent();
if ( *p ) p++;
register int32_t x = 0;
@ -762,13 +755,6 @@ int32_t Words::getLanguage( Sections *sections ,
int32_t maxSamples,
int32_t niceness,
int32_t *langScore) {
// calculate scores if not given
//Scores calcdScores;
//if ( ! scores ) {
// if ( ! calcdScores.set( this,m_version,false ) )
// return -1;
// scores = &calcdScores;
//}
// . take a random sample of words and look them up in the
// language dictionary

15
Words.h

@ -43,8 +43,6 @@ unsigned char getCharacterLanguage ( char *utf8Char ) ;
#define NUM_LANGUAGE_SAMPLES 1000
//#define TITLEREC_CURRENT_VERSION 114
// this bit is set in the tag id to indicate a back tag
#define BACKBIT ((nodeid_t)0x8000)
#define BACKBITCOMP ((nodeid_t)0x7fff)
@ -59,28 +57,27 @@ class Words {
// . there is typically no html in "s"
// . html tags are NOT parsed out
bool set ( char *s ,
int32_t version , // = TITLEREC_CURRENT_VERSION ,
bool computeIds , // = true ,
int32_t niceness ); // = 0);
// assume TITLEREC_CURRENT_VERSION and computeIds is true
// assume computeIds is true
bool set9 ( char *s , int32_t niceness ) {
return set ( s , TITLEREC_CURRENT_VERSION, true , niceness);};
return set ( s , true , niceness);}
bool setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) ;
bool setx ( char *s , int32_t slen , int32_t niceness ) {
return set ( s,slen,TITLEREC_CURRENT_VERSION,true,niceness);};
return set ( s,slen,true,niceness);}
bool set11 ( char *s , char *send , int32_t niceness ) ;
// . similar to above
// . but we temporarily stick a \0 @ s[slen] for parsing purposes
bool set ( char *s , int32_t slen , int32_t version,
bool set ( char *s , int32_t slen ,
bool computeIds ,
int32_t niceness = 0);
bool set3 ( char *s ) {return set(s,TITLEREC_CURRENT_VERSION,true,0);};
bool set3 ( char *s ) {return set(s,true,0);};
// . new function to set directly from an Xml, rather than extracting
// text first
@ -111,7 +108,6 @@ class Words {
int32_t prefixLen2 ,
bool useStems ,
bool hashUniqueOnly ,
int32_t titleRecVersion ,
class Phrases *phrases ,//= NULL ,
bool hashWordIffNotInPhrase ,//= false,
int32_t niceness );//= 0);
@ -387,7 +383,6 @@ class Words {
int32_t m_numAlnumWords;
int32_t m_totalLen; // of all words
int32_t m_version; // titlerec version
bool m_hasTags;

@ -9016,7 +9016,7 @@ bool XmlDoc::hashString_ct ( HashTableX *ct , char *s , int32_t slen ) {
Words words;
Bits bits;
Phrases phrases;
if ( ! words.set ( s , slen , m_version , true , m_niceness ) )
if ( ! words.set ( s , slen , true , m_niceness ) )
return false;
if ( ! bits.set ( &words , m_version , m_niceness ) )
return false;
@ -35231,7 +35231,7 @@ bool XmlDoc::hashString3( char *s ,
Phrases phrases;
//Weights weights;
//Synonyms synonyms;
if ( ! words.set ( s , slen , version , true , niceness ) )
if ( ! words.set ( s , slen , true , niceness ) )
return false;
if ( ! bits.set ( &words , version , niceness ) )
return false;
@ -42548,7 +42548,6 @@ SafeBuf *XmlDoc::getTermInfoBuf ( ) {
Words ww2;
if ( ! ww2.set ( k->getLinkText() ,
k->size_linkText-1, // len
TITLEREC_CURRENT_VERSION ,
true , // computeIds
m_niceness ))// niceness
// g_errno set on error, return NULL
@ -42592,7 +42591,6 @@ SafeBuf *XmlDoc::getTermInfoBuf ( ) {
Words ww2;
if ( ! ww2.set ( k->getLinkText() ,
k->size_linkText-1, // len
TITLEREC_CURRENT_VERSION ,
true , // computeIds
m_niceness ))// niceness
// g_errno set on error, return NULL

@ -10618,7 +10618,7 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
t = gettimeofdayInMilliseconds_force();
for ( int32_t i = 0 ; i < 100 ; i++ )
//if ( ! words.set ( &xml , true , true ) )
if ( ! words.set ( content , TITLEREC_CURRENT_VERSION,
if ( ! words.set ( content ,
true, 0 ) )
return log("build: speedtestxml: words set: %s",
mstrerror(g_errno));
@ -10819,7 +10819,7 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
false ); // use <stop index> tag?
if ( ! bufLen ) return log("build: speedtestxml: getText: %s",
mstrerror(g_errno));
if ( ! words.set ( buf,TITLEREC_CURRENT_VERSION,true,0) )
if ( ! words.set ( buf,true,0) )
return log("build: speedtestxml: words set: %s",
mstrerror(g_errno));
}

@ -214,7 +214,7 @@ void parse_doc_8859_1(char *s, int len, bool doHash,char *charset)
Words words;
// just tokenize words
words.set(false, text_buf, TITEREC_CURRENT_VERSION, doHash);
words.set(false, text_buf, doHash);
free(text_buf);
}
@ -236,7 +236,7 @@ void parse_doc_icu(char *s, int len, bool doHash, char *charset){
doFilterSpaces,
false);
Words w;
w.set(true,false, text_buf, textLen, TITLEREC_CURRENT_VERSION,doHash);
w.set(true,false, text_buf, textLen, doHash);
free(text_buf);
}