forked from Mirrors/privacore-open-source-search-engine
Remove Words::m_version
Removed m_version memeber which was write-only. Removed version parameter to set() methods.
This commit is contained in:
@ -3804,7 +3804,6 @@ bool setHashes ( Place *p , Words *ww , int32_t niceness ) {
|
||||
// return false with g_errno set on error
|
||||
if ( ! tmp.set ( p->m_str ,
|
||||
p->m_strlen ,
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true ,
|
||||
niceness ) ) return false;
|
||||
// set it up
|
||||
|
@ -2496,7 +2496,6 @@ bool Dates::setPart1 ( //char *u ,
|
||||
Words ww;
|
||||
if ( ! ww.set ( date ,
|
||||
dateLen ,
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // compute Ids?
|
||||
m_niceness ))
|
||||
// return false with g_errno set on error
|
||||
|
@ -92,7 +92,6 @@ int32_t Highlight::set ( SafeBuf *sb,
|
||||
Words words;
|
||||
if ( ! words.set ( content ,
|
||||
contentLen ,
|
||||
TITLEREC_CURRENT_VERSION,
|
||||
true , // computeId
|
||||
true ) ) // has html entites?
|
||||
return -1;
|
||||
|
@ -785,7 +785,6 @@ bool Matches::addMatches ( char *s ,
|
||||
// set the words class for this match group
|
||||
if ( ! wp->set ( s ,
|
||||
slen , // in bytes
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // computeIds?
|
||||
niceness ))
|
||||
return false;
|
||||
|
@ -595,7 +595,6 @@ bool processLoop ( void *state ) {
|
||||
Words qw;
|
||||
qw.set ( q , // content being highlighted, utf8
|
||||
qlen , // content being highlighted, utf8
|
||||
TITLEREC_CURRENT_VERSION,
|
||||
true , // computeIds
|
||||
false ); // hasHtmlEntities?
|
||||
// . assign scores of 0 to query words that should be ignored
|
||||
|
@ -2073,7 +2073,7 @@ bool Query::setQWords ( char boolFlag ,
|
||||
// their own separate Word, so tell "words" we're setting a query
|
||||
//Words words;
|
||||
if ( ! words.set ( m_sb.getBufStart() , m_sb.length() ,
|
||||
TITLEREC_CURRENT_VERSION, true, 1 ) )
|
||||
true, 1 ) )
|
||||
return log("query: Had error parsing query: %s.",
|
||||
mstrerror(g_errno));
|
||||
int32_t numWords = words.getNumWords();
|
||||
|
@ -352,7 +352,6 @@ bool Title::setTitle4 ( XmlDoc *xd ,
|
||||
// now the words.
|
||||
if ( ! tw[ti].set ( k->getLinkText() ,
|
||||
k->size_linkText-1, // len
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // computeIds
|
||||
0 ))// niceness
|
||||
return false;
|
||||
@ -390,7 +389,6 @@ bool Title::setTitle4 ( XmlDoc *xd ,
|
||||
// now set words to that
|
||||
if ( ! tw[ti].set ( ts ,
|
||||
tslen ,
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // compute wordIds?
|
||||
0 ))// niceness
|
||||
return false;
|
||||
@ -502,7 +500,6 @@ bool Title::setTitle4 ( XmlDoc *xd ,
|
||||
// ok, process it
|
||||
if ( ! tw[ti].set ( atitle ,
|
||||
atlen , // len
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // computeIds
|
||||
0 ))// niceness
|
||||
return false;
|
||||
@ -790,7 +787,6 @@ bool Title::setTitle4 ( XmlDoc *xd ,
|
||||
// now set words to that
|
||||
if ( ! tw[ti].set ( p , // string
|
||||
pend - p , // len
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // compute wordIds?
|
||||
0 ))// niceness
|
||||
return false;
|
||||
|
2
Wiki.cpp
2
Wiki.cpp
@ -122,7 +122,6 @@ bool Wiki::loadText ( int32_t fileSize ) {
|
||||
Words w;
|
||||
if ( ! w.set ( p , // s
|
||||
eol - p , // slen
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // computeIds?
|
||||
MAX_NICENESS ) )
|
||||
return false;
|
||||
@ -557,7 +556,6 @@ void Wiki::doneReadingWiki ( ) {
|
||||
Words w;
|
||||
if ( ! w.set ( p , // s
|
||||
eol - p , // slen
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // computeIds?
|
||||
MAX_NICENESS ) ) {
|
||||
m_errno = g_errno;
|
||||
|
20
Words.cpp
20
Words.cpp
@ -36,7 +36,7 @@ void Words::reset ( ) {
|
||||
m_localBufSize2 = 0;
|
||||
}
|
||||
|
||||
bool Words::set ( char *s, int32_t slen, int32_t version,
|
||||
bool Words::set ( char *s, int32_t slen,
|
||||
bool computeWordIds,
|
||||
int32_t niceness) {
|
||||
// bail if nothing
|
||||
@ -48,7 +48,7 @@ bool Words::set ( char *s, int32_t slen, int32_t version,
|
||||
|
||||
char c = s[slen];
|
||||
if ( c != '\0' ) s[slen]='\0';
|
||||
bool status = set ( s , version, computeWordIds , niceness );
|
||||
bool status = set ( s , computeWordIds , niceness );
|
||||
if ( c != '\0' ) s[slen] = c;
|
||||
return status;
|
||||
}
|
||||
@ -110,7 +110,6 @@ bool Words::set ( Xml *xml,
|
||||
if ( m_xml == xml ) { char *xx=NULL;*xx=0; }
|
||||
reset();
|
||||
m_xml = xml;
|
||||
m_version = xml->getVersion();
|
||||
|
||||
// if xml is empty, bail
|
||||
if ( ! xml->getContent() ) return true;
|
||||
@ -172,7 +171,6 @@ bool Words::set ( Xml *xml,
|
||||
|
||||
bool Words::set11 ( char *s , char *send , int32_t niceness ) {
|
||||
reset();
|
||||
m_version = TITLEREC_CURRENT_VERSION;
|
||||
m_s = s;
|
||||
// this will make addWords() scan for tags
|
||||
m_hasTags = true;
|
||||
@ -199,7 +197,6 @@ bool Words::setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) {
|
||||
// prevent setting with the same string
|
||||
if ( m_s == s ) { char *xx=NULL;*xx=0; }
|
||||
reset();
|
||||
m_version = TITLEREC_CURRENT_VERSION;
|
||||
// save for sanity check
|
||||
m_s = s;
|
||||
m_localBuf2 = buf;
|
||||
@ -220,7 +217,7 @@ bool Words::setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) {
|
||||
// . doesn't do tags, only text nodes in "xml"
|
||||
// . our definition of a word is as close to English as we can get it
|
||||
// . BUT we also consider a string of punctuation characters to be a word
|
||||
bool Words::set ( char *s , int32_t version,
|
||||
bool Words::set ( char *s ,
|
||||
bool computeWordIds ,
|
||||
int32_t niceness ) {
|
||||
|
||||
@ -228,11 +225,9 @@ bool Words::set ( char *s , int32_t version,
|
||||
if ( m_s == s ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
reset();
|
||||
m_version = version;
|
||||
// save for sanity check
|
||||
m_s = s;
|
||||
|
||||
m_version = version;
|
||||
// determine rough upper bound on number of words by counting
|
||||
// punct/alnum boundaries
|
||||
m_preCount = countWords ( s );
|
||||
@ -574,8 +569,6 @@ bool Words::set2 ( Xml *xml,
|
||||
int32_t niceness) {
|
||||
reset();
|
||||
m_xml = xml;
|
||||
m_version = xml->getVersion();
|
||||
m_version = xml->getVersion();
|
||||
register char *p = (char *)xml->getContent();
|
||||
if ( *p ) p++;
|
||||
register int32_t x = 0;
|
||||
@ -762,13 +755,6 @@ int32_t Words::getLanguage( Sections *sections ,
|
||||
int32_t maxSamples,
|
||||
int32_t niceness,
|
||||
int32_t *langScore) {
|
||||
// calculate scores if not given
|
||||
//Scores calcdScores;
|
||||
//if ( ! scores ) {
|
||||
// if ( ! calcdScores.set( this,m_version,false ) )
|
||||
// return -1;
|
||||
// scores = &calcdScores;
|
||||
//}
|
||||
|
||||
// . take a random sample of words and look them up in the
|
||||
// language dictionary
|
||||
|
15
Words.h
15
Words.h
@ -43,8 +43,6 @@ unsigned char getCharacterLanguage ( char *utf8Char ) ;
|
||||
|
||||
#define NUM_LANGUAGE_SAMPLES 1000
|
||||
|
||||
//#define TITLEREC_CURRENT_VERSION 114
|
||||
|
||||
// this bit is set in the tag id to indicate a back tag
|
||||
#define BACKBIT ((nodeid_t)0x8000)
|
||||
#define BACKBITCOMP ((nodeid_t)0x7fff)
|
||||
@ -59,28 +57,27 @@ class Words {
|
||||
// . there is typically no html in "s"
|
||||
// . html tags are NOT parsed out
|
||||
bool set ( char *s ,
|
||||
int32_t version , // = TITLEREC_CURRENT_VERSION ,
|
||||
bool computeIds , // = true ,
|
||||
int32_t niceness ); // = 0);
|
||||
|
||||
// assume TITLEREC_CURRENT_VERSION and computeIds is true
|
||||
// assume computeIds is true
|
||||
bool set9 ( char *s , int32_t niceness ) {
|
||||
return set ( s , TITLEREC_CURRENT_VERSION, true , niceness);};
|
||||
return set ( s , true , niceness);}
|
||||
|
||||
bool setxi ( char *s , char *buf, int32_t bufSize, int32_t niceness ) ;
|
||||
|
||||
bool setx ( char *s , int32_t slen , int32_t niceness ) {
|
||||
return set ( s,slen,TITLEREC_CURRENT_VERSION,true,niceness);};
|
||||
return set ( s,slen,true,niceness);}
|
||||
|
||||
bool set11 ( char *s , char *send , int32_t niceness ) ;
|
||||
|
||||
// . similar to above
|
||||
// . but we temporarily stick a \0 @ s[slen] for parsing purposes
|
||||
bool set ( char *s , int32_t slen , int32_t version,
|
||||
bool set ( char *s , int32_t slen ,
|
||||
bool computeIds ,
|
||||
int32_t niceness = 0);
|
||||
|
||||
bool set3 ( char *s ) {return set(s,TITLEREC_CURRENT_VERSION,true,0);};
|
||||
bool set3 ( char *s ) {return set(s,true,0);};
|
||||
|
||||
// . new function to set directly from an Xml, rather than extracting
|
||||
// text first
|
||||
@ -111,7 +108,6 @@ class Words {
|
||||
int32_t prefixLen2 ,
|
||||
bool useStems ,
|
||||
bool hashUniqueOnly ,
|
||||
int32_t titleRecVersion ,
|
||||
class Phrases *phrases ,//= NULL ,
|
||||
bool hashWordIffNotInPhrase ,//= false,
|
||||
int32_t niceness );//= 0);
|
||||
@ -387,7 +383,6 @@ class Words {
|
||||
int32_t m_numAlnumWords;
|
||||
|
||||
int32_t m_totalLen; // of all words
|
||||
int32_t m_version; // titlerec version
|
||||
|
||||
bool m_hasTags;
|
||||
|
||||
|
@ -9016,7 +9016,7 @@ bool XmlDoc::hashString_ct ( HashTableX *ct , char *s , int32_t slen ) {
|
||||
Words words;
|
||||
Bits bits;
|
||||
Phrases phrases;
|
||||
if ( ! words.set ( s , slen , m_version , true , m_niceness ) )
|
||||
if ( ! words.set ( s , slen , true , m_niceness ) )
|
||||
return false;
|
||||
if ( ! bits.set ( &words , m_version , m_niceness ) )
|
||||
return false;
|
||||
@ -35231,7 +35231,7 @@ bool XmlDoc::hashString3( char *s ,
|
||||
Phrases phrases;
|
||||
//Weights weights;
|
||||
//Synonyms synonyms;
|
||||
if ( ! words.set ( s , slen , version , true , niceness ) )
|
||||
if ( ! words.set ( s , slen , true , niceness ) )
|
||||
return false;
|
||||
if ( ! bits.set ( &words , version , niceness ) )
|
||||
return false;
|
||||
@ -42548,7 +42548,6 @@ SafeBuf *XmlDoc::getTermInfoBuf ( ) {
|
||||
Words ww2;
|
||||
if ( ! ww2.set ( k->getLinkText() ,
|
||||
k->size_linkText-1, // len
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // computeIds
|
||||
m_niceness ))// niceness
|
||||
// g_errno set on error, return NULL
|
||||
@ -42592,7 +42591,6 @@ SafeBuf *XmlDoc::getTermInfoBuf ( ) {
|
||||
Words ww2;
|
||||
if ( ! ww2.set ( k->getLinkText() ,
|
||||
k->size_linkText-1, // len
|
||||
TITLEREC_CURRENT_VERSION ,
|
||||
true , // computeIds
|
||||
m_niceness ))// niceness
|
||||
// g_errno set on error, return NULL
|
||||
|
4
main.cpp
4
main.cpp
@ -10618,7 +10618,7 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
|
||||
t = gettimeofdayInMilliseconds_force();
|
||||
for ( int32_t i = 0 ; i < 100 ; i++ )
|
||||
//if ( ! words.set ( &xml , true , true ) )
|
||||
if ( ! words.set ( content , TITLEREC_CURRENT_VERSION,
|
||||
if ( ! words.set ( content ,
|
||||
true, 0 ) )
|
||||
return log("build: speedtestxml: words set: %s",
|
||||
mstrerror(g_errno));
|
||||
@ -10819,7 +10819,7 @@ bool parseTest ( char *coll , int64_t docId , char *query ) {
|
||||
false ); // use <stop index> tag?
|
||||
if ( ! bufLen ) return log("build: speedtestxml: getText: %s",
|
||||
mstrerror(g_errno));
|
||||
if ( ! words.set ( buf,TITLEREC_CURRENT_VERSION,true,0) )
|
||||
if ( ! words.set ( buf,true,0) )
|
||||
return log("build: speedtestxml: words set: %s",
|
||||
mstrerror(g_errno));
|
||||
}
|
||||
|
@ -214,7 +214,7 @@ void parse_doc_8859_1(char *s, int len, bool doHash,char *charset)
|
||||
Words words;
|
||||
|
||||
// just tokenize words
|
||||
words.set(false, text_buf, TITEREC_CURRENT_VERSION, doHash);
|
||||
words.set(false, text_buf, doHash);
|
||||
free(text_buf);
|
||||
}
|
||||
|
||||
@ -236,7 +236,7 @@ void parse_doc_icu(char *s, int len, bool doHash, char *charset){
|
||||
doFilterSpaces,
|
||||
false);
|
||||
Words w;
|
||||
w.set(true,false, text_buf, textLen, TITLEREC_CURRENT_VERSION,doHash);
|
||||
w.set(true,false, text_buf, textLen, doHash);
|
||||
free(text_buf);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user