mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-16 02:46:08 -04:00
Remove m_colorNum from Query & Matches
This commit is contained in:
118
Highlight.cpp
118
Highlight.cpp
@ -6,51 +6,33 @@
|
||||
#include "Synonyms.h"
|
||||
#include "XmlDoc.h"
|
||||
|
||||
|
||||
// use different front tags for matching different term #'s
|
||||
static char *s_frontTags[] = {
|
||||
"<span class=\"gbcnst gbcnst00\">" ,
|
||||
"<span class=\"gbcnst gbcnst01\">" ,
|
||||
"<span class=\"gbcnst gbcnst02\">" ,
|
||||
"<span class=\"gbcnst gbcnst03\">" ,
|
||||
"<span class=\"gbcnst gbcnst04\">" ,
|
||||
"<span class=\"gbcnst gbcnst05\">" ,
|
||||
"<span class=\"gbcnst gbcnst06\">" ,
|
||||
"<span class=\"gbcnst gbcnst07\">" ,
|
||||
"<span class=\"gbcnst gbcnst08\">" ,
|
||||
"<span class=\"gbcnst gbcnst09\">"
|
||||
static const char *s_frontTags[] = {
|
||||
"<span class='gbcnst00'>" ,
|
||||
"<span class='gbcnst01'>" ,
|
||||
"<span class='gbcnst02'>" ,
|
||||
"<span class='gbcnst03'>" ,
|
||||
"<span class='gbcnst04'>" ,
|
||||
"<span class='gbcnst05'>" ,
|
||||
"<span class='gbcnst06'>" ,
|
||||
"<span class='gbcnst07'>" ,
|
||||
"<span class='gbcnst08'>" ,
|
||||
"<span class='gbcnst09'>"
|
||||
};
|
||||
|
||||
int32_t s_frontTagLen=gbstrlen("<span class=\"gbcnst gbcnst00\">");
|
||||
|
||||
static char *s_styleSheet =
|
||||
"<style type=\"text/css\">"
|
||||
"span.gbcns{font-weight:600}"
|
||||
"span.gbcnst00{color:black;background-color:#ffff66}"
|
||||
"span.gbcnst01{color:black;background-color:#a0ffff}"
|
||||
"span.gbcnst02{color:black;background-color:#99ff99}"
|
||||
"span.gbcnst03{color:black;background-color:#ff9999}"
|
||||
"span.gbcnst04{color:black;background-color:#ff66ff}"
|
||||
"span.gbcnst05{color:white;background-color:#880000}"
|
||||
"span.gbcnst06{color:white;background-color:#00aa00}"
|
||||
"span.gbcnst07{color:white;background-color:#886800}"
|
||||
"span.gbcnst08{color:white;background-color:#004699}"
|
||||
"span.gbcnst09{color:white;background-color:#990099}"
|
||||
"span.gbcnst00x{color:white;background-color:black;border:2px solid #ffff66}"
|
||||
"span.gbcnst01x{color:white;background-color:black;border:2px solid #a0ffff}"
|
||||
"span.gbcnst02x{color:white;background-color:black;border:2px solid #99ff99}"
|
||||
"span.gbcnst03x{color:white;background-color:black;border:2px solid #ff9999}"
|
||||
"span.gbcnst04x{color:white;background-color:black;border:2px solid #ff66ff}"
|
||||
"span.gbcnst05x{color:white;background-color:black;border:2px solid #880000}"
|
||||
"span.gbcnst06x{color:white;background-color:black;border:2px solid #00aa00}"
|
||||
"span.gbcnst07x{color:white;background-color:black;border:2px solid #886800}"
|
||||
"span.gbcnst08x{color:white;background-color:black;border:2px solid #004699}"
|
||||
"span.gbcnst09x{color:white;background-color:black;border:2px solid #990099}"
|
||||
static const char *s_styleSheet =
|
||||
"<style type='text/css'>"
|
||||
"span.gbcnst00{color:black;background-color:#ffff66}"
|
||||
"span.gbcnst01{color:black;background-color:#a0ffff}"
|
||||
"span.gbcnst02{color:black;background-color:#99ff99}"
|
||||
"span.gbcnst03{color:black;background-color:#ff9999}"
|
||||
"span.gbcnst04{color:black;background-color:#ff66ff}"
|
||||
"span.gbcnst05{color:white;background-color:#880000}"
|
||||
"span.gbcnst06{color:white;background-color:#00aa00}"
|
||||
"span.gbcnst07{color:white;background-color:#886800}"
|
||||
"span.gbcnst08{color:white;background-color:#004699}"
|
||||
"span.gbcnst09{color:white;background-color:#990099}"
|
||||
"</style>";
|
||||
int32_t s_styleSheetLen = gbstrlen( s_styleSheet );
|
||||
|
||||
//buffer for writing term list items
|
||||
char s_termList[1024];
|
||||
|
||||
// . return length stored into "buf"
|
||||
// . content must be NULL terminated
|
||||
@ -124,22 +106,6 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
char *w;
|
||||
int32_t wlen;
|
||||
|
||||
// length of our front tag should be constant
|
||||
int32_t frontTagLen ;
|
||||
if ( m_frontTag ) frontTagLen = m_frontTagLen;
|
||||
else frontTagLen = s_frontTagLen;
|
||||
// set the back tag, should be constant
|
||||
const char *backTag ;
|
||||
int32_t backTagLen;
|
||||
if ( m_backTag ) {
|
||||
backTag = m_backTag;
|
||||
backTagLen = m_backTagLen;
|
||||
}
|
||||
else {
|
||||
backTag = "</span>";
|
||||
backTagLen = 7;
|
||||
}
|
||||
|
||||
// set nexti to the word # of the first word that matches a query word
|
||||
int32_t nextm = -1;
|
||||
int32_t nexti = -1;
|
||||
@ -150,26 +116,20 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
|
||||
int32_t backTagi = -1;
|
||||
bool inTitle = false;
|
||||
bool endHead = false;
|
||||
bool endHtml = false;
|
||||
|
||||
for ( int32_t i = 0 ; i < numWords ; i++ ) {
|
||||
// set word's info
|
||||
w = words->getWord(i);
|
||||
wlen = words->getWordLen(i);
|
||||
endHead = false;
|
||||
endHtml = false;
|
||||
bool endHead = false;
|
||||
bool endHtml = false;
|
||||
|
||||
if ( (words->getTagId(i) ) == TAG_TITLE ) {
|
||||
inTitle = !(words->isBackTag(i));
|
||||
} else if ( (words->getTagId(i) ) == TAG_HTML ) {
|
||||
if ( words->isBackTag( i ) ) {
|
||||
endHtml = true;
|
||||
}
|
||||
endHtml = words->isBackTag( i );
|
||||
} else if ( (words->getTagId(i) ) == TAG_HEAD ) {
|
||||
if (words->isBackTag(i) ) {
|
||||
endHead = true;
|
||||
}
|
||||
endHead = words->isBackTag(i);
|
||||
}
|
||||
|
||||
// match class ptr
|
||||
@ -187,26 +147,20 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
}
|
||||
}
|
||||
else {
|
||||
// now each match is the entire quote, so write the
|
||||
// fron tag right now
|
||||
const char *frontTag;
|
||||
// now each match is the entire quote, so write the front tag right now
|
||||
if ( m_frontTag ) {
|
||||
frontTag = m_frontTag;
|
||||
m_sb->safeStrcpy ( m_frontTag );
|
||||
} else {
|
||||
frontTag = s_frontTags[mat->m_colorNum%10];
|
||||
m_sb->safeStrcpy( s_frontTags[(mat->m_qwordNum % 10)] );
|
||||
}
|
||||
|
||||
m_sb->safeStrcpy ( (char *)frontTag );
|
||||
|
||||
// when to write the back tag? add the number of
|
||||
// words in the match to i.
|
||||
backTagi = i + mat->m_numWords;
|
||||
}
|
||||
}
|
||||
else if ( endHead ) {
|
||||
// include the tags style sheet immediately before
|
||||
// the closing </TITLE> tag
|
||||
m_sb->safeMemcpy( s_styleSheet , s_styleSheetLen );
|
||||
} else if ( endHead ) {
|
||||
// include the tags style sheet immediately before the closing </TITLE> tag
|
||||
m_sb->safeStrcpy( s_styleSheet );
|
||||
}
|
||||
|
||||
if ( i == nexti ) {
|
||||
@ -224,7 +178,11 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
// back tag
|
||||
if ( i == backTagi-1 ) {
|
||||
// store the back tag
|
||||
m_sb->safeMemcpy ( (char *)backTag , backTagLen );
|
||||
if ( m_backTag ) {
|
||||
m_sb->safeMemcpy( m_backTag, m_backTagLen );
|
||||
} else {
|
||||
m_sb->safeStrcpy("</span>");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
77
Matches.cpp
77
Matches.cpp
@ -10,9 +10,6 @@
|
||||
#include "Sections.h"
|
||||
#include "XmlDoc.h"
|
||||
|
||||
//#define DEBUG_MATCHES 1
|
||||
|
||||
|
||||
// TODO: have Matches set itself from all the meta tags, titles, link text,
|
||||
// neighborhoods and body. then proximity algo can utilize that info
|
||||
// as well as the summary generator, Summary.cpp. right now prox algo
|
||||
@ -35,12 +32,10 @@ void Matches::reset ( ) {
|
||||
|
||||
void Matches::reset2() {
|
||||
m_numMatches = 0;
|
||||
//m_maxNQT = -1;
|
||||
m_numAlnums = 0;
|
||||
// free all the classes' buffers
|
||||
for ( int32_t i = 0 ; i < m_numMatchGroups ; i++ ) {
|
||||
m_wordsArray [i].reset();
|
||||
//m_sectionsArray[i].reset();
|
||||
m_posArray [i].reset();
|
||||
m_bitsArray [i].reset();
|
||||
}
|
||||
@ -48,25 +43,17 @@ void Matches::reset2() {
|
||||
}
|
||||
|
||||
bool Matches::isMatchableTerm ( QueryTerm *qt ) { // , int32_t i ) {
|
||||
// . skip if negative sign
|
||||
// . no, we need to match negative words/phrases now so we can
|
||||
// big hack them out...
|
||||
//if ( qw->m_wordSign == '-' ) return false;
|
||||
QueryWord *qw = qt->m_qword;
|
||||
// not derived from a query word? how?
|
||||
if ( ! qw ) return false;
|
||||
if ( qw->m_ignoreWord == IGNORE_DEFAULT ) return false;
|
||||
if ( qw->m_ignoreWord == IGNORE_FIELDNAME ) return false;
|
||||
if ( qw->m_ignoreWord == IGNORE_BOOLOP ) return false;
|
||||
// stop words in 'all the king's men' query need to be highlighted
|
||||
//if ( qw->m_isQueryStopWord && ! qw->m_inQuotes ) return false;
|
||||
//if ( qw->m_isStopWord && ! qw->m_inQuotes ) return false;
|
||||
// take this out for now so we highlight for title: terms
|
||||
if ( qw->m_fieldCode && qw->m_fieldCode != FIELD_TITLE ) return false;
|
||||
// what word # are we?
|
||||
int32_t qwn = qw - m_q->m_qwords;
|
||||
// do not include if in a quote and does not start it!!
|
||||
//if ( qw->m_inQuotes && i-1 != qw->m_quoteStart ) return false;
|
||||
if ( qw->m_quoteStart >= 0 && qw->m_quoteStart != qwn ) return false;
|
||||
// if query is too long, a query word can be truncated!
|
||||
// this happens for some words if they are ignored, too!
|
||||
@ -74,12 +61,6 @@ bool Matches::isMatchableTerm ( QueryTerm *qt ) { // , int32_t i ) {
|
||||
// after a NOT operator?
|
||||
if ( qw->m_underNOT )
|
||||
return false;
|
||||
// in a field?
|
||||
//if ( qw->m_fieldCode != fieldCode ) continue;
|
||||
// skip if a query stop word w/o a sign and ignored
|
||||
//if ( q->m_isStopWord[i] &&
|
||||
// q->m_termSigns[i] == '\0' &&
|
||||
// q->m_ignore[i] ) continue;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -111,15 +92,10 @@ void Matches::setQuery ( Query *q ) {
|
||||
// how many query words do we have that can be matched?
|
||||
int32_t numToMatch = 0;
|
||||
for ( int32_t i = 0 ; i < nqt ; i++ ) {
|
||||
// rest this
|
||||
//m_qwordFlags[i] = 0;
|
||||
// get query word #i
|
||||
//QueryWord *qw = &m_q->m_qwords[i];
|
||||
QueryTerm *qt = &m_q->m_qterms[i];
|
||||
// skip if ignored *in certain ways only*
|
||||
if ( ! isMatchableTerm ( qt ) ) {
|
||||
//if( (qw->m_wordSign == '-') && !qw->m_fieldCode )
|
||||
// m_numNegTerms++;
|
||||
continue;
|
||||
}
|
||||
// count it
|
||||
@ -147,38 +123,26 @@ void Matches::setQuery ( Query *q ) {
|
||||
// clear hash table
|
||||
memset ( m_qtableIds , 0 , m_numSlots * 8 );
|
||||
memset ( m_qtableFlags , 0 , m_numSlots );
|
||||
//memset ( m_qtableNegIds, 0 , m_numNegTerms );
|
||||
|
||||
// alternate colors for highlighting
|
||||
int32_t colorNum = 0;
|
||||
|
||||
//int32_t negIds = 0;
|
||||
// . hash all the query terms into the hash table
|
||||
// . the term's score should be 100 for a very rare term,
|
||||
// and 1 for a stop word.
|
||||
//m_maxNQT = nqt;
|
||||
for ( int32_t i = 0 ; i < nqt ; i++ ) {
|
||||
// get query word #i
|
||||
//QueryWord *qw = &m_q->m_qwords[i];
|
||||
QueryTerm *qt = &m_q->m_qterms[i];
|
||||
// skip if ignored *in certain ways only*
|
||||
if ( ! isMatchableTerm ( qt ) ) {
|
||||
//if( (qw->m_wordSign == '-') && !qw->m_fieldCode )
|
||||
// m_qtableNegIds[negIds++] = qw->m_rawWordId;
|
||||
continue;
|
||||
}
|
||||
|
||||
// get the word it is from
|
||||
QueryWord *qw = qt->m_qword;
|
||||
|
||||
// get word #
|
||||
int32_t qwn = qw - q->m_qwords;
|
||||
|
||||
// assign color # for term highlighting with different colors
|
||||
qw->m_colorNum = colorNum++;
|
||||
|
||||
// do not overfill table
|
||||
if ( colorNum > MAX_QUERY_WORDS_TO_MATCH ) {
|
||||
if ( i >= MAX_QUERY_WORDS_TO_MATCH ) {
|
||||
break;
|
||||
}
|
||||
|
||||
// this should be equivalent to the word id
|
||||
int64_t qid = qt->m_rawTermId;//qw->m_rawWordId;
|
||||
|
||||
@ -194,15 +158,19 @@ void Matches::setQuery ( Query *q ) {
|
||||
|
||||
// put in hash table
|
||||
n = ((uint32_t)qid) & mask;
|
||||
|
||||
// chain to an empty slot
|
||||
while ( m_qtableIds[n] && m_qtableIds[n] != qid )
|
||||
if ( ++n >= m_numSlots ) n = 0;
|
||||
|
||||
// . if already occupied, do not overwrite this, keep this
|
||||
// first word, the other is often ignored as IGNORE_REPEAT
|
||||
// . what word # in the query are we. save this.
|
||||
if ( ! m_qtableIds[n] ) m_qtableWordNums[n] = qwn;
|
||||
|
||||
// store it
|
||||
m_qtableIds[n] = qid;
|
||||
|
||||
// in quotes? this term may appear multiple times in the
|
||||
// query, in some cases in quotes, and in some cases not.
|
||||
// we need to know either way for logic below.
|
||||
@ -371,7 +339,6 @@ bool Matches::addMatches( char *s, int32_t slen, mf_t flags, int32_t niceness )
|
||||
|
||||
// get some new ptrs for this match group
|
||||
Words *wp = &m_wordsArray [ m_numMatchGroups ];
|
||||
//Sections *sp = &m_sectionsArray [ m_numMatchGroups ];
|
||||
Sections *sp = NULL;
|
||||
Bits *bp = &m_bitsArray [ m_numMatchGroups ];
|
||||
Pos *pb = &m_posArray [ m_numMatchGroups ];
|
||||
@ -381,9 +348,6 @@ bool Matches::addMatches( char *s, int32_t slen, mf_t flags, int32_t niceness )
|
||||
return false;
|
||||
}
|
||||
|
||||
// scores vector
|
||||
//if ( ! sp->set ( wp , TITLEREC_CURRENT_VERSION , false ) )
|
||||
// return false;
|
||||
// bits vector
|
||||
if ( ! bp->setForSummary ( wp ) ) {
|
||||
return false;
|
||||
@ -424,25 +388,6 @@ bool Matches::addMatches( char *s, int32_t slen, mf_t flags, int32_t niceness )
|
||||
return status;
|
||||
}
|
||||
|
||||
bool Matches::getMatchGroup ( mf_t matchFlag ,
|
||||
Words **wp ,
|
||||
Pos **pp ,
|
||||
Sections **sp ) {
|
||||
|
||||
for ( int32_t i = 0 ; i < m_numMatchGroups ; i++ ) {
|
||||
// must be the type we want
|
||||
if ( m_flags[i] != matchFlag ) continue;
|
||||
// get it
|
||||
*wp = &m_wordsArray [i];
|
||||
*pp = &m_posArray [i];
|
||||
//*sp = &m_sectionsArray [i];
|
||||
*sp = NULL;
|
||||
return true;
|
||||
}
|
||||
// not found
|
||||
return false;
|
||||
}
|
||||
|
||||
// . TODO: support stemming later. each word should then have multiple ids.
|
||||
// . add to our m_matches[] array iff addToMatches is true, otherwise we just
|
||||
// set the m_foundTermVector for doing the BIG HACK described in Summary.cpp
|
||||
@ -734,12 +679,6 @@ bool Matches::addMatches(Words *words, Phrases *phrases, Sections *sections, Bit
|
||||
// get the first query word # of this match
|
||||
qw = &m_q->m_qwords[qwn];
|
||||
|
||||
// get its color. for highlighting under different colors.
|
||||
m->m_colorNum = qw->m_colorNum;
|
||||
|
||||
// sanity check
|
||||
if ( m->m_colorNum < 0 ) { char *xx = NULL; *xx = 0; }
|
||||
|
||||
// convenience, used by Summary.cpp
|
||||
m->m_words = words;
|
||||
m->m_sections = sections;
|
||||
|
@ -64,10 +64,6 @@ class Match {
|
||||
// this is 1
|
||||
int32_t m_numQWords;
|
||||
|
||||
// . used for highlighting under different colors (Highlight.cpp)
|
||||
// . words in the same quote should use the same highlight color
|
||||
int32_t m_colorNum;
|
||||
|
||||
// "match group" or type of match. i.e. MF_TITLETAG, MF_METASUMM, ...
|
||||
mf_t m_flags;
|
||||
|
||||
@ -161,8 +157,6 @@ class Matches {
|
||||
int64_t *m_pids3;
|
||||
int64_t *m_pids4;
|
||||
int64_t *m_pids5;
|
||||
|
||||
bool getMatchGroup( mf_t matchFlag, Words **wp, Pos **pp, Sections **sp );
|
||||
};
|
||||
|
||||
#endif
|
||||
|
3
Query.h
3
Query.h
@ -239,9 +239,6 @@ class QueryWord {
|
||||
// is this query word before a | (pipe) operator?
|
||||
bool m_piped;
|
||||
|
||||
// used by Matches.cpp for highlighting under different colors
|
||||
int32_t m_colorNum;
|
||||
|
||||
// for min/max score ranges like gbmin:price:1.99
|
||||
float m_float;
|
||||
|
||||
|
Reference in New Issue
Block a user