forked from Mirrors/privacore-open-source-search-engine
facet updates
This commit is contained in:
15
Msg39.cpp
15
Msg39.cpp
@ -1400,7 +1400,10 @@ void Msg39::estimateHitsAndSendReply ( ) {
|
||||
for ( long i = 0 ; i < m_tmpq.m_numTerms; i++ ) {
|
||||
QueryTerm *qt = &m_tmpq.m_qterms[i];
|
||||
// skip if not facet
|
||||
if ( qt->m_fieldCode != FIELD_GBFACET ) continue;
|
||||
if ( qt->m_fieldCode != FIELD_GBFACETSTR &&
|
||||
qt->m_fieldCode != FIELD_GBFACETINT &&
|
||||
qt->m_fieldCode != FIELD_GBFACETFLOAT )
|
||||
continue;
|
||||
HashTableX *ft = &qt->m_facetHashTable;
|
||||
if ( ft->m_numSlotsUsed == 0 ) continue;
|
||||
long used = ft->m_numSlotsUsed;
|
||||
@ -1430,7 +1433,10 @@ void Msg39::estimateHitsAndSendReply ( ) {
|
||||
for ( long i = 0 ; i < m_tmpq.m_numTerms ; i++ ) {
|
||||
QueryTerm *qt = &m_tmpq.m_qterms[i];
|
||||
// skip if not facet
|
||||
if ( qt->m_fieldCode != FIELD_GBFACET ) continue;
|
||||
if ( qt->m_fieldCode != FIELD_GBFACETSTR &&
|
||||
qt->m_fieldCode != FIELD_GBFACETINT &&
|
||||
qt->m_fieldCode != FIELD_GBFACETFLOAT )
|
||||
continue;
|
||||
// get all the facet hashes and their counts
|
||||
HashTableX *ft = &qt->m_facetHashTable;
|
||||
// skip if none
|
||||
@ -1440,6 +1446,9 @@ void Msg39::estimateHitsAndSendReply ( ) {
|
||||
p += 8;
|
||||
long used = ft->getNumSlotsUsed();
|
||||
if ( used > (long)MAX_FACETS ) used = (long)MAX_FACETS;
|
||||
// store count
|
||||
*(long *)p = used;
|
||||
p += 4;
|
||||
long count = 0;
|
||||
// for sanity check
|
||||
char *pend = p + (used * 8);
|
||||
@ -1460,7 +1469,7 @@ void Msg39::estimateHitsAndSendReply ( ) {
|
||||
}
|
||||
// now point to that so it can be serialized below
|
||||
mr.ptr_facetHashList = tmp.getBufStart();
|
||||
mr.size_facetHashList = tmp.length();
|
||||
mr.size_facetHashList = p - tmp.getBufStart();//tmp.length();
|
||||
|
||||
/////////////
|
||||
//
|
||||
|
17
Msg3a.cpp
17
Msg3a.cpp
@ -963,9 +963,14 @@ bool Msg3a::mergeLists ( ) {
|
||||
// facethashlists from each shard into
|
||||
//long long tid = m_q->m_qterms[i].m_termId;
|
||||
// we hold all the facet values
|
||||
if ( ! qt->m_facetHashTable.set(4,0,128,NULL,0,false,
|
||||
m_r->m_niceness,"fhtqt"))
|
||||
HashTableX *ht = &qt->m_facetHashTable;
|
||||
// we have to manually cal this
|
||||
ht->constructor();
|
||||
// 4 byte key, 4 byte score for counting facet values
|
||||
if ( ! ht->set(4,4,128,NULL,0,false,m_r->m_niceness,"fhtqt"))
|
||||
return true;
|
||||
// sanity
|
||||
if ( ! ht->m_isWritable ) {char *xx=NULL;*xx=0;}
|
||||
}
|
||||
|
||||
// now scan each facethashlist from each shard and compile into
|
||||
@ -996,6 +1001,12 @@ bool Msg3a::mergeLists ( ) {
|
||||
p += 4;
|
||||
// get that query term
|
||||
QueryTerm *qt = m_q->getQueryTermByTermId64 ( termId );
|
||||
// sanity
|
||||
if ( ! qt ) {
|
||||
log("msg3a: query: could not find query term with "
|
||||
"termid %llu for facet",termId);
|
||||
break;
|
||||
}
|
||||
// the end point
|
||||
char *pend = p + 4 * nh;
|
||||
// now compile the facet hash list into there
|
||||
@ -1003,7 +1014,7 @@ bool Msg3a::mergeLists ( ) {
|
||||
// debug
|
||||
//log("msg3a: got facethash %li) %lu",k,p[k]);
|
||||
// hash it up, no dups!
|
||||
if ( ! qt->m_facetHashTable.addScore((long *)p) )
|
||||
if ( ! qt->m_facetHashTable.addScore((long *)p,1) )
|
||||
return true;
|
||||
}
|
||||
// now get the next gbfacet: term if there was one
|
||||
|
@ -2131,7 +2131,11 @@ bool printSearchResultsHeader ( State0 *st ) {
|
||||
// only for html for now i guess
|
||||
if ( si->m_format != FORMAT_HTML ) break;
|
||||
QueryTerm *qt = &si->m_q.m_qterms[i];
|
||||
if ( qt->m_fieldCode != FIELD_GBFACET ) continue;
|
||||
// skip if not facet
|
||||
if ( qt->m_fieldCode != FIELD_GBFACETSTR &&
|
||||
qt->m_fieldCode != FIELD_GBFACETINT &&
|
||||
qt->m_fieldCode != FIELD_GBFACETFLOAT )
|
||||
continue;
|
||||
HashTableX *fht = &qt->m_facetHashTable;
|
||||
// a new table for each facet query term
|
||||
bool needTable = true;
|
||||
|
16
Posdb.cpp
16
Posdb.cpp
@ -796,7 +796,10 @@ bool PosdbTable::allocTopTree ( ) {
|
||||
for ( long i = 0 ; i < m_q->m_numTerms ; i++ ) {
|
||||
QueryTerm *qt = &m_q->m_qterms[i];
|
||||
// skip if not facet
|
||||
if ( qt->m_fieldCode != FIELD_GBFACET ) continue;
|
||||
if ( qt->m_fieldCode != FIELD_GBFACETSTR &&
|
||||
qt->m_fieldCode != FIELD_GBFACETINT &&
|
||||
qt->m_fieldCode != FIELD_GBFACETFLOAT )
|
||||
continue;
|
||||
// how big?
|
||||
long long total = m_msg2->m_lists[i].getListSize();
|
||||
// skip if empty
|
||||
@ -4309,7 +4312,11 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
||||
|
||||
|
||||
if (qt->m_fieldCode == FIELD_GBFACET )
|
||||
if (qt->m_fieldCode == FIELD_GBFACETSTR )
|
||||
qti->m_bigramFlags[nn]|=BF_FACET;
|
||||
if (qt->m_fieldCode == FIELD_GBFACETINT )
|
||||
qti->m_bigramFlags[nn]|=BF_FACET;
|
||||
if (qt->m_fieldCode == FIELD_GBFACETFLOAT )
|
||||
qti->m_bigramFlags[nn]|=BF_FACET;
|
||||
|
||||
// add list of member terms
|
||||
@ -6867,7 +6874,10 @@ void PosdbTable::intersectLists10_r ( ) {
|
||||
// QueryTerm::m_facetHashTable/m_dt
|
||||
for ( long i = 0 ; i < m_q->m_numTerms ; i++ ) {
|
||||
QueryTerm *qt = &m_q->m_qterms[i];
|
||||
if ( qt->m_fieldCode != FIELD_GBFACET ) continue;
|
||||
if ( qt->m_fieldCode != FIELD_GBFACETSTR &&
|
||||
qt->m_fieldCode != FIELD_GBFACETINT &&
|
||||
qt->m_fieldCode != FIELD_GBFACETFLOAT )
|
||||
continue;
|
||||
char *p = miniMergedList[i];
|
||||
//char *pend = miniMergedEnd [i];
|
||||
//
|
||||
|
22
Query.cpp
22
Query.cpp
@ -2280,7 +2280,9 @@ bool Query::setQWords ( char boolFlag ,
|
||||
fieldCode == FIELD_GBREVSORTBYINT ||
|
||||
fieldCode == FIELD_GBNUMBERMININT ||
|
||||
fieldCode == FIELD_GBNUMBERMAXINT ||
|
||||
fieldCode == FIELD_GBFACET ||
|
||||
fieldCode == FIELD_GBFACETSTR ||
|
||||
fieldCode == FIELD_GBFACETINT ||
|
||||
fieldCode == FIELD_GBFACETFLOAT ||
|
||||
|
||||
fieldCode == FIELD_GBAD ) {
|
||||
// . find 1st space -- that terminates the field value
|
||||
@ -3232,8 +3234,22 @@ struct QueryField g_fields[] = {
|
||||
"spidered in seconds since the epoch in UTC."
|
||||
},
|
||||
|
||||
{"gbfacet", FIELD_GBFACET, false,
|
||||
"Example: 'gbfacet:price' will return facets in the search results "
|
||||
{"gbfacetstr", FIELD_GBFACETSTR, false,
|
||||
"Example: 'gbfacet:color' will return facets in the search results "
|
||||
"by their color field. Any other "
|
||||
"field name can follow the gbfacet: operator."
|
||||
},
|
||||
|
||||
{"gbfacetint", FIELD_GBFACETINT, false,
|
||||
"Example: 'gbfacet:numReviews' will return "
|
||||
"facets in the search results "
|
||||
"with the # of documents for each number of reviews. Any other "
|
||||
"field name can follow the gbfacet: operator."
|
||||
},
|
||||
|
||||
{"gbfacetfloat", FIELD_GBFACETFLOAT, false,
|
||||
"Example: 'gbfacetfloat:price' will return facets in the "
|
||||
"search results "
|
||||
"with the # of documents that have certain price ranges. Any other "
|
||||
"field name can follow the gbfacet: operator."
|
||||
},
|
||||
|
4
Query.h
4
Query.h
@ -121,7 +121,9 @@ typedef unsigned long long qvec_t;
|
||||
#define FIELD_GBREVSORTBYINT 60
|
||||
#define FIELD_GBNUMBERMININT 61
|
||||
#define FIELD_GBNUMBERMAXINT 62
|
||||
#define FIELD_GBFACET 63
|
||||
#define FIELD_GBFACETSTR 63
|
||||
#define FIELD_GBFACETINT 64
|
||||
#define FIELD_GBFACETFLOAT 65
|
||||
|
||||
|
||||
#define FIELD_GBOTHER 92
|
||||
|
63
XmlDoc.cpp
63
XmlDoc.cpp
@ -6620,7 +6620,7 @@ SectionStats *XmlDoc::getSectionStats ( long long secHash64 , long sentHash32){
|
||||
// the statistics for all the values in the posdb keys of this
|
||||
// termlist, which happen to be innerHTML hashes for all pages
|
||||
// with this same xpath and on this same site.
|
||||
sprintf(qbuf,"gbfacet:gbxpathsitehash%llu",secHash64);
|
||||
sprintf(qbuf,"gbfacetstr:gbxpathsitehash%llu",secHash64);
|
||||
|
||||
CollectionRec *cr = getCollRec();
|
||||
if ( ! cr ) return NULL;
|
||||
@ -26707,7 +26707,9 @@ bool XmlDoc::hashSections ( HashTableX *tt ) {
|
||||
hi.m_prefix = prefix;
|
||||
|
||||
// we already have the hash of the inner html of the section
|
||||
hashFacet2 ( prefix, (long)(unsigned long)ih64 , hi.m_tt );
|
||||
hashFacet2 ( "gbfacetstr",
|
||||
prefix,
|
||||
(long)(unsigned long)ih64 , hi.m_tt );
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -31645,10 +31647,27 @@ bool XmlDoc::hashFacet1 ( char *term ,
|
||||
// hash the whole string as one value, the value of the facet
|
||||
long val32 = hash32 ( a , b - a );
|
||||
|
||||
return hashFacet2 ( term, val32 , tt );
|
||||
if ( ! hashFacet2 ( "gbfacetstr",term, val32 , tt ) ) return false;
|
||||
|
||||
// if it's a number hash as float and int
|
||||
if ( nw != 1 ) return true;
|
||||
char **wptrs = words->m_words;
|
||||
if ( ! is_digit ( wptrs[0][0] ) ) return true;
|
||||
|
||||
// hash with a float val
|
||||
float f = atof(wptrs[0]);
|
||||
long vf32 = *(long *)&f;
|
||||
if ( ! hashFacet2 ( "gbfacetfloat",term, vf32 , tt ) ) return false;
|
||||
|
||||
// and an int val
|
||||
long vi32 = atoi(wptrs[0]);
|
||||
if ( ! hashFacet2 ( "gbfacetint",term, vi32 , tt ) ) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool XmlDoc::hashFacet2 ( char *term ,
|
||||
bool XmlDoc::hashFacet2 ( char *prefix,
|
||||
char *term ,
|
||||
long val32 ,
|
||||
HashTableX *tt ) {
|
||||
|
||||
@ -31664,15 +31683,16 @@ bool XmlDoc::hashFacet2 ( char *term ,
|
||||
// now any field has to support gbfacet:thatfield
|
||||
// and store the 32-bit termid into where we normally put
|
||||
// the word position bits, etc.
|
||||
static long long s_facetPrefixHash = 0LL;
|
||||
if ( ! s_facetPrefixHash )
|
||||
s_facetPrefixHash = hash64n ( "gbfacet" );
|
||||
//static long long s_facetPrefixHash = 0LL;
|
||||
//if ( ! s_facetPrefixHash )
|
||||
// s_facetPrefixHash = hash64n ( "gbfacet" );
|
||||
long long prefixHash = hash64n ( prefix );
|
||||
|
||||
long long termId64 = hash64n ( term );
|
||||
|
||||
// combine with the "gbfacet" prefix. old prefix hash on right.
|
||||
// like "price" on right.
|
||||
long long ph2 = hash64 ( s_facetPrefixHash , termId64);//prefixHash );
|
||||
// like "price" on left and "gbfacetfloat" on left... see Query.cpp
|
||||
long long ph2 = hash64 ( termId64, prefixHash );
|
||||
|
||||
// . now store it
|
||||
// . use field hash as the termid. normally this would just be
|
||||
@ -31730,21 +31750,28 @@ bool XmlDoc::hashFacet2 ( char *term ,
|
||||
if ( ! m_wts )
|
||||
return true;
|
||||
|
||||
// store in buffer
|
||||
bool isFloat = false;
|
||||
if ( strcmp(prefix,"gbfacetfloat")==0 ) isFloat = true;
|
||||
|
||||
// store in buffer for display on pageparser.cpp output
|
||||
char buf[128];
|
||||
long bufLen = sprintf(buf,"facetField=%s facetVal32=%lu",
|
||||
term,val32);
|
||||
long bufLen;
|
||||
if ( isFloat )
|
||||
bufLen=sprintf(buf,"facetField=%s facetVal32=%f",term,
|
||||
*(float *)&val32);
|
||||
else
|
||||
bufLen=sprintf(buf,"facetField=%s facetVal32=%lu",term,val32);
|
||||
|
||||
// make a special hashinfo for this facet
|
||||
HashInfo hi;
|
||||
hi.m_tt = tt;
|
||||
hi.m_prefix = "gbfacet";
|
||||
hi.m_prefix = prefix;//"gbfacet";
|
||||
|
||||
// add to wts for PageParser.cpp display
|
||||
// store it
|
||||
if ( ! storeTerm ( buf,
|
||||
bufLen,
|
||||
s_facetPrefixHash,
|
||||
prefixHash, // s_facetPrefixHash,
|
||||
&hi,
|
||||
0, // word#, i,
|
||||
0, // wordPos
|
||||
@ -31836,7 +31863,7 @@ bool XmlDoc::hashNumber ( char *beginBuf ,
|
||||
// . CHROME DETECTION
|
||||
// . hash a special "gbxpathsitehash12345678" term which has the hash of the
|
||||
// innerHTML content embedded in it.
|
||||
// . we do this for doing gbfacet:gbxpathsitehash12345678 etc. on every
|
||||
// . we do this for doing gbfacetstr:gbxpathsitehash12345678 etc. on every
|
||||
// section with innerHTML so we can figure out the histogram of each
|
||||
// section on this page relative to its subdomain. like the distriubtion
|
||||
// of the innerHTML for this section as it appears on other pages from
|
||||
@ -33369,7 +33396,9 @@ bool XmlDoc::printDoc ( SafeBuf *sb ) {
|
||||
prefix = start + tp[i]->m_prefixOff;
|
||||
|
||||
bool isFacet = false;
|
||||
if ( prefix && prefix[0]=='g' && strcmp(prefix,"gbfacet")== 0 )
|
||||
if ( prefix &&
|
||||
prefix[0]=='g' &&
|
||||
strncmp(prefix,"gbfacet",7)== 0 )
|
||||
isFacet = true;
|
||||
|
||||
sb->safePrintf ( "<tr>"
|
||||
@ -33501,7 +33530,7 @@ bool XmlDoc::printDoc ( SafeBuf *sb ) {
|
||||
// there is no prefix for such terms now
|
||||
// TODO: store actual key in there i guess?? or just this bit.
|
||||
long val32 = 0;
|
||||
if ( strcmp(prefix,"gbfacet") == 0 )
|
||||
if ( strncmp(prefix,"gbfacet",7) == 0 )
|
||||
val32 = g_posdb.getInt(&tp[i]->m_key);
|
||||
|
||||
// . this is like gbxpathsitehash1234567
|
||||
|
4
XmlDoc.h
4
XmlDoc.h
@ -890,9 +890,9 @@ class XmlDoc {
|
||||
// class HashInfo *hi ,
|
||||
// long sentHash32 ) ;
|
||||
|
||||
bool hashFacet1 ( char *prefix, class Words *words , HashTableX *dt) ;
|
||||
bool hashFacet1 ( char *term, class Words *words , HashTableX *dt) ;
|
||||
|
||||
bool hashFacet2 ( char *prefix, long val32 , HashTableX *dt) ;
|
||||
bool hashFacet2 ( char *prefix,char *term,long val32, HashTableX *dt) ;
|
||||
|
||||
|
||||
bool hashNumber ( char *beginBuf ,
|
||||
|
Reference in New Issue
Block a user