Remove m_colorNum from Query & Matches

This commit is contained in:
Ai Lin Chia
2016-03-04 17:50:24 +01:00
parent 1391a86a20
commit 56861e10b7
4 changed files with 46 additions and 158 deletions

@ -6,51 +6,33 @@
#include "Synonyms.h"
#include "XmlDoc.h"
// use different front tags for matching different term #'s
static char *s_frontTags[] = {
"<span class=\"gbcnst gbcnst00\">" ,
"<span class=\"gbcnst gbcnst01\">" ,
"<span class=\"gbcnst gbcnst02\">" ,
"<span class=\"gbcnst gbcnst03\">" ,
"<span class=\"gbcnst gbcnst04\">" ,
"<span class=\"gbcnst gbcnst05\">" ,
"<span class=\"gbcnst gbcnst06\">" ,
"<span class=\"gbcnst gbcnst07\">" ,
"<span class=\"gbcnst gbcnst08\">" ,
"<span class=\"gbcnst gbcnst09\">"
static const char *s_frontTags[] = {
"<span class='gbcnst00'>" ,
"<span class='gbcnst01'>" ,
"<span class='gbcnst02'>" ,
"<span class='gbcnst03'>" ,
"<span class='gbcnst04'>" ,
"<span class='gbcnst05'>" ,
"<span class='gbcnst06'>" ,
"<span class='gbcnst07'>" ,
"<span class='gbcnst08'>" ,
"<span class='gbcnst09'>"
};
int32_t s_frontTagLen=gbstrlen("<span class=\"gbcnst gbcnst00\">");
static char *s_styleSheet =
"<style type=\"text/css\">"
"span.gbcns{font-weight:600}"
"span.gbcnst00{color:black;background-color:#ffff66}"
"span.gbcnst01{color:black;background-color:#a0ffff}"
"span.gbcnst02{color:black;background-color:#99ff99}"
"span.gbcnst03{color:black;background-color:#ff9999}"
"span.gbcnst04{color:black;background-color:#ff66ff}"
"span.gbcnst05{color:white;background-color:#880000}"
"span.gbcnst06{color:white;background-color:#00aa00}"
"span.gbcnst07{color:white;background-color:#886800}"
"span.gbcnst08{color:white;background-color:#004699}"
"span.gbcnst09{color:white;background-color:#990099}"
"span.gbcnst00x{color:white;background-color:black;border:2px solid #ffff66}"
"span.gbcnst01x{color:white;background-color:black;border:2px solid #a0ffff}"
"span.gbcnst02x{color:white;background-color:black;border:2px solid #99ff99}"
"span.gbcnst03x{color:white;background-color:black;border:2px solid #ff9999}"
"span.gbcnst04x{color:white;background-color:black;border:2px solid #ff66ff}"
"span.gbcnst05x{color:white;background-color:black;border:2px solid #880000}"
"span.gbcnst06x{color:white;background-color:black;border:2px solid #00aa00}"
"span.gbcnst07x{color:white;background-color:black;border:2px solid #886800}"
"span.gbcnst08x{color:white;background-color:black;border:2px solid #004699}"
"span.gbcnst09x{color:white;background-color:black;border:2px solid #990099}"
static const char *s_styleSheet =
"<style type='text/css'>"
"span.gbcnst00{color:black;background-color:#ffff66}"
"span.gbcnst01{color:black;background-color:#a0ffff}"
"span.gbcnst02{color:black;background-color:#99ff99}"
"span.gbcnst03{color:black;background-color:#ff9999}"
"span.gbcnst04{color:black;background-color:#ff66ff}"
"span.gbcnst05{color:white;background-color:#880000}"
"span.gbcnst06{color:white;background-color:#00aa00}"
"span.gbcnst07{color:white;background-color:#886800}"
"span.gbcnst08{color:white;background-color:#004699}"
"span.gbcnst09{color:white;background-color:#990099}"
"</style>";
int32_t s_styleSheetLen = gbstrlen( s_styleSheet );
//buffer for writing term list items
char s_termList[1024];
// . return length stored into "buf"
// . content must be NULL terminated
@ -124,22 +106,6 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
char *w;
int32_t wlen;
// length of our front tag should be constant
int32_t frontTagLen ;
if ( m_frontTag ) frontTagLen = m_frontTagLen;
else frontTagLen = s_frontTagLen;
// set the back tag, should be constant
const char *backTag ;
int32_t backTagLen;
if ( m_backTag ) {
backTag = m_backTag;
backTagLen = m_backTagLen;
}
else {
backTag = "</span>";
backTagLen = 7;
}
// set nexti to the word # of the first word that matches a query word
int32_t nextm = -1;
int32_t nexti = -1;
@ -150,26 +116,20 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
int32_t backTagi = -1;
bool inTitle = false;
bool endHead = false;
bool endHtml = false;
for ( int32_t i = 0 ; i < numWords ; i++ ) {
// set word's info
w = words->getWord(i);
wlen = words->getWordLen(i);
endHead = false;
endHtml = false;
bool endHead = false;
bool endHtml = false;
if ( (words->getTagId(i) ) == TAG_TITLE ) {
inTitle = !(words->isBackTag(i));
} else if ( (words->getTagId(i) ) == TAG_HTML ) {
if ( words->isBackTag( i ) ) {
endHtml = true;
}
endHtml = words->isBackTag( i );
} else if ( (words->getTagId(i) ) == TAG_HEAD ) {
if (words->isBackTag(i) ) {
endHead = true;
}
endHead = words->isBackTag(i);
}
// match class ptr
@ -187,26 +147,20 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
}
}
else {
// now each match is the entire quote, so write the
// fron tag right now
const char *frontTag;
// now each match is the entire quote, so write the front tag right now
if ( m_frontTag ) {
frontTag = m_frontTag;
m_sb->safeStrcpy ( m_frontTag );
} else {
frontTag = s_frontTags[mat->m_colorNum%10];
m_sb->safeStrcpy( s_frontTags[(mat->m_qwordNum % 10)] );
}
m_sb->safeStrcpy ( (char *)frontTag );
// when to write the back tag? add the number of
// words in the match to i.
backTagi = i + mat->m_numWords;
}
}
else if ( endHead ) {
// include the tags style sheet immediately before
// the closing </TITLE> tag
m_sb->safeMemcpy( s_styleSheet , s_styleSheetLen );
} else if ( endHead ) {
// include the tags style sheet immediately before the closing </TITLE> tag
m_sb->safeStrcpy( s_styleSheet );
}
if ( i == nexti ) {
@ -224,7 +178,11 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
// back tag
if ( i == backTagi-1 ) {
// store the back tag
m_sb->safeMemcpy ( (char *)backTag , backTagLen );
if ( m_backTag ) {
m_sb->safeMemcpy( m_backTag, m_backTagLen );
} else {
m_sb->safeStrcpy("</span>");
}
}
}

@ -10,9 +10,6 @@
#include "Sections.h"
#include "XmlDoc.h"
//#define DEBUG_MATCHES 1
// TODO: have Matches set itself from all the meta tags, titles, link text,
// neighborhoods and body. then proximity algo can utilize that info
// as well as the summary generator, Summary.cpp. right now prox algo
@ -35,12 +32,10 @@ void Matches::reset ( ) {
void Matches::reset2() {
m_numMatches = 0;
//m_maxNQT = -1;
m_numAlnums = 0;
// free all the classes' buffers
for ( int32_t i = 0 ; i < m_numMatchGroups ; i++ ) {
m_wordsArray [i].reset();
//m_sectionsArray[i].reset();
m_posArray [i].reset();
m_bitsArray [i].reset();
}
@ -48,25 +43,17 @@ void Matches::reset2() {
}
bool Matches::isMatchableTerm ( QueryTerm *qt ) { // , int32_t i ) {
// . skip if negative sign
// . no, we need to match negative words/phrases now so we can
// big hack them out...
//if ( qw->m_wordSign == '-' ) return false;
QueryWord *qw = qt->m_qword;
// not derived from a query word? how?
if ( ! qw ) return false;
if ( qw->m_ignoreWord == IGNORE_DEFAULT ) return false;
if ( qw->m_ignoreWord == IGNORE_FIELDNAME ) return false;
if ( qw->m_ignoreWord == IGNORE_BOOLOP ) return false;
// stop words in 'all the king's men' query need to be highlighted
//if ( qw->m_isQueryStopWord && ! qw->m_inQuotes ) return false;
//if ( qw->m_isStopWord && ! qw->m_inQuotes ) return false;
// take this out for now so we highlight for title: terms
if ( qw->m_fieldCode && qw->m_fieldCode != FIELD_TITLE ) return false;
// what word # are we?
int32_t qwn = qw - m_q->m_qwords;
// do not include if in a quote and does not start it!!
//if ( qw->m_inQuotes && i-1 != qw->m_quoteStart ) return false;
if ( qw->m_quoteStart >= 0 && qw->m_quoteStart != qwn ) return false;
// if query is too long, a query word can be truncated!
// this happens for some words if they are ignored, too!
@ -74,12 +61,6 @@ bool Matches::isMatchableTerm ( QueryTerm *qt ) { // , int32_t i ) {
// after a NOT operator?
if ( qw->m_underNOT )
return false;
// in a field?
//if ( qw->m_fieldCode != fieldCode ) continue;
// skip if a query stop word w/o a sign and ignored
//if ( q->m_isStopWord[i] &&
// q->m_termSigns[i] == '\0' &&
// q->m_ignore[i] ) continue;
return true;
}
@ -111,15 +92,10 @@ void Matches::setQuery ( Query *q ) {
// how many query words do we have that can be matched?
int32_t numToMatch = 0;
for ( int32_t i = 0 ; i < nqt ; i++ ) {
// rest this
//m_qwordFlags[i] = 0;
// get query word #i
//QueryWord *qw = &m_q->m_qwords[i];
QueryTerm *qt = &m_q->m_qterms[i];
// skip if ignored *in certain ways only*
if ( ! isMatchableTerm ( qt ) ) {
//if( (qw->m_wordSign == '-') && !qw->m_fieldCode )
// m_numNegTerms++;
continue;
}
// count it
@ -147,38 +123,26 @@ void Matches::setQuery ( Query *q ) {
// clear hash table
memset ( m_qtableIds , 0 , m_numSlots * 8 );
memset ( m_qtableFlags , 0 , m_numSlots );
//memset ( m_qtableNegIds, 0 , m_numNegTerms );
// alternate colors for highlighting
int32_t colorNum = 0;
//int32_t negIds = 0;
// . hash all the query terms into the hash table
// . the term's score should be 100 for a very rare term,
// and 1 for a stop word.
//m_maxNQT = nqt;
for ( int32_t i = 0 ; i < nqt ; i++ ) {
// get query word #i
//QueryWord *qw = &m_q->m_qwords[i];
QueryTerm *qt = &m_q->m_qterms[i];
// skip if ignored *in certain ways only*
if ( ! isMatchableTerm ( qt ) ) {
//if( (qw->m_wordSign == '-') && !qw->m_fieldCode )
// m_qtableNegIds[negIds++] = qw->m_rawWordId;
continue;
}
// get the word it is from
QueryWord *qw = qt->m_qword;
// get word #
int32_t qwn = qw - q->m_qwords;
// assign color # for term highlighting with different colors
qw->m_colorNum = colorNum++;
// do not overfill table
if ( colorNum > MAX_QUERY_WORDS_TO_MATCH ) {
if ( i >= MAX_QUERY_WORDS_TO_MATCH ) {
break;
}
// this should be equivalent to the word id
int64_t qid = qt->m_rawTermId;//qw->m_rawWordId;
@ -194,15 +158,19 @@ void Matches::setQuery ( Query *q ) {
// put in hash table
n = ((uint32_t)qid) & mask;
// chain to an empty slot
while ( m_qtableIds[n] && m_qtableIds[n] != qid )
if ( ++n >= m_numSlots ) n = 0;
// . if already occupied, do not overwrite this, keep this
// first word, the other is often ignored as IGNORE_REPEAT
// . what word # in the query are we. save this.
if ( ! m_qtableIds[n] ) m_qtableWordNums[n] = qwn;
// store it
m_qtableIds[n] = qid;
// in quotes? this term may appear multiple times in the
// query, in some cases in quotes, and in some cases not.
// we need to know either way for logic below.
@ -371,7 +339,6 @@ bool Matches::addMatches( char *s, int32_t slen, mf_t flags, int32_t niceness )
// get some new ptrs for this match group
Words *wp = &m_wordsArray [ m_numMatchGroups ];
//Sections *sp = &m_sectionsArray [ m_numMatchGroups ];
Sections *sp = NULL;
Bits *bp = &m_bitsArray [ m_numMatchGroups ];
Pos *pb = &m_posArray [ m_numMatchGroups ];
@ -381,9 +348,6 @@ bool Matches::addMatches( char *s, int32_t slen, mf_t flags, int32_t niceness )
return false;
}
// scores vector
//if ( ! sp->set ( wp , TITLEREC_CURRENT_VERSION , false ) )
// return false;
// bits vector
if ( ! bp->setForSummary ( wp ) ) {
return false;
@ -424,25 +388,6 @@ bool Matches::addMatches( char *s, int32_t slen, mf_t flags, int32_t niceness )
return status;
}
bool Matches::getMatchGroup ( mf_t matchFlag ,
Words **wp ,
Pos **pp ,
Sections **sp ) {
for ( int32_t i = 0 ; i < m_numMatchGroups ; i++ ) {
// must be the type we want
if ( m_flags[i] != matchFlag ) continue;
// get it
*wp = &m_wordsArray [i];
*pp = &m_posArray [i];
//*sp = &m_sectionsArray [i];
*sp = NULL;
return true;
}
// not found
return false;
}
// . TODO: support stemming later. each word should then have multiple ids.
// . add to our m_matches[] array iff addToMatches is true, otherwise we just
// set the m_foundTermVector for doing the BIG HACK described in Summary.cpp
@ -734,12 +679,6 @@ bool Matches::addMatches(Words *words, Phrases *phrases, Sections *sections, Bit
// get the first query word # of this match
qw = &m_q->m_qwords[qwn];
// get its color. for highlighting under different colors.
m->m_colorNum = qw->m_colorNum;
// sanity check
if ( m->m_colorNum < 0 ) { char *xx = NULL; *xx = 0; }
// convenience, used by Summary.cpp
m->m_words = words;
m->m_sections = sections;

@ -64,10 +64,6 @@ class Match {
// this is 1
int32_t m_numQWords;
// . used for highlighting under different colors (Highlight.cpp)
// . words in the same quote should use the same highlight color
int32_t m_colorNum;
// "match group" or type of match. i.e. MF_TITLETAG, MF_METASUMM, ...
mf_t m_flags;
@ -161,8 +157,6 @@ class Matches {
int64_t *m_pids3;
int64_t *m_pids4;
int64_t *m_pids5;
bool getMatchGroup( mf_t matchFlag, Words **wp, Pos **pp, Sections **sp );
};
#endif

@ -239,9 +239,6 @@ class QueryWord {
// is this query word before a | (pipe) operator?
bool m_piped;
// used by Matches.cpp for highlighting under different colors
int32_t m_colorNum;
// for min/max score ranges like gbmin:price:1.99
float m_float;