Merge branch 'testing' of github.com:gigablast/open-source-search-engine into testing
This commit is contained in:
@ -240,7 +240,7 @@ bool Collectiondb::addExistingColl ( char *coll, collnum_t collnum ) {
|
||||
//cr->m_collnum = oldCollnum;
|
||||
|
||||
// get the default.conf from working dir if there
|
||||
g_parms.setToDefault( (char *)cr , OBJ_COLL );
|
||||
g_parms.setToDefault( (char *)cr , OBJ_COLL , cr );
|
||||
|
||||
strcpy ( cr->m_coll , coll );
|
||||
cr->m_collLen = gbstrlen ( coll );
|
||||
@ -383,7 +383,7 @@ bool Collectiondb::addNewColl ( char *coll ,
|
||||
// . get the default.conf from working dir if there
|
||||
// . i think this calls CollectionRec::reset() which resets all of its
|
||||
// rdbbase classes for its collnum so m_collnum needs to be right
|
||||
g_parms.setToDefault( (char *)cr , OBJ_COLL );
|
||||
g_parms.setToDefault( (char *)cr , OBJ_COLL , cr );
|
||||
|
||||
/*
|
||||
// the default conf file
|
||||
@ -1657,7 +1657,7 @@ bool CollectionRec::load ( char *coll , long i ) {
|
||||
// also reset some counts not included in parms list
|
||||
reset();
|
||||
// before we load, set to defaults in case some are not in xml file
|
||||
g_parms.setToDefault ( (char *)this , OBJ_COLL );
|
||||
g_parms.setToDefault ( (char *)this , OBJ_COLL , this );
|
||||
// get the filename with that id
|
||||
File f;
|
||||
char tmp2[1024];
|
||||
|
4
Conf.cpp
4
Conf.cpp
@ -165,12 +165,12 @@ bool Conf::isConnectIp ( unsigned long ip ) {
|
||||
|
||||
// . set all member vars to their default values
|
||||
void Conf::reset ( ) {
|
||||
g_parms.setToDefault ( (char *)this , OBJ_CONF );
|
||||
g_parms.setToDefault ( (char *)this , OBJ_CONF ,NULL);
|
||||
m_save = true;
|
||||
}
|
||||
|
||||
bool Conf::init ( char *dir ) { // , long hostId ) {
|
||||
g_parms.setToDefault ( (char *)this , OBJ_CONF );
|
||||
g_parms.setToDefault ( (char *)this , OBJ_CONF ,NULL);
|
||||
m_save = true;
|
||||
char fname[1024];
|
||||
if ( dir ) sprintf ( fname , "%slocalgb.conf", dir );
|
||||
|
@ -28,7 +28,10 @@ DiskPageCache::DiskPageCache () {
|
||||
// sometimes db may pass an unitialized DiskPageCache to a BigFile
|
||||
// so make sure when BigFile::close calls DiskPageCache::rmVfd() our
|
||||
// m_memOff vector is all NULLed out, otherwise it will core
|
||||
memset ( m_memOff , 0 , 4 * MAX_NUM_VFDS2 );
|
||||
//memset ( m_memOff , 0 , sizeof(long *) * MAX_NUM_VFDS2 );
|
||||
for ( long i = 0 ; i < MAX_NUM_VFDS2 ; i++ )
|
||||
m_memOff[i] = NULL;
|
||||
|
||||
m_availMemOff = NULL;
|
||||
//m_isOverriden = false;
|
||||
reset();
|
||||
|
4
Makefile
4
Makefile
@ -546,8 +546,8 @@ master-rpm:
|
||||
# DEBIAN PACKAGE SECTION BEGIN
|
||||
|
||||
# need to do 'apt-get intall dh-make'
|
||||
testing-deb:
|
||||
git archive --format=tar --prefix=gb-1.0/ testing > ../gb_1.0.orig.tar
|
||||
master-deb:
|
||||
git archive --format=tar --prefix=gb-1.0/ master > ../gb_1.0.orig.tar
|
||||
rm -rf debian
|
||||
# change "-p gb_1.0" to "-p gb_1.1" to update version for example
|
||||
dh_make -e gigablast@mail.com -p gb_1.0 -f ../gb_1.0.orig.tar
|
||||
|
27
Msg39.cpp
27
Msg39.cpp
@ -37,7 +37,7 @@ Msg39::Msg39 () {
|
||||
void Msg39::reset() {
|
||||
if ( m_inUse ) { char *xx=NULL;*xx=0; }
|
||||
m_allocedTree = false;
|
||||
m_numDocIdSplits = 1;
|
||||
//m_numDocIdSplits = 1;
|
||||
m_tmpq.reset();
|
||||
m_numTotalHits = 0;
|
||||
m_gotClusterRecs = 0;
|
||||
@ -241,29 +241,6 @@ void Msg39::getDocIds2 ( Msg39Request *req ) {
|
||||
|
||||
QUICKPOLL ( m_r->m_niceness );
|
||||
|
||||
// assume not doing special docid splitting
|
||||
m_numDocIdSplits = 1;
|
||||
|
||||
// . do not do splits if caller is already specifying a docid range
|
||||
// like for gbdocid: queries i guess.
|
||||
// . make sure m_msg2 is non-NULL, because if it is NULL we are
|
||||
// evaluating a query for a single docid for seo tools
|
||||
if ( m_r->m_minDocId == -1 ) { // && m_msg2 ) {
|
||||
long nt = m_tmpq.getNumTerms();
|
||||
m_numDocIdSplits = nt / 2;
|
||||
if ( m_numDocIdSplits == 0 ) m_numDocIdSplits = 1;
|
||||
}
|
||||
|
||||
// for testing
|
||||
//m_numDocIdSplits = 3;
|
||||
|
||||
//if ( ! g_conf.m_doDocIdRangeSplitting )
|
||||
// m_numDocIdSplits = 1;
|
||||
|
||||
// limit to 10
|
||||
if ( m_numDocIdSplits > 10 )
|
||||
m_numDocIdSplits = 10;
|
||||
|
||||
// . if caller already specified a docid range, then be loyal to that!
|
||||
// . or if we do not have enough query terms to warrant splitting
|
||||
//if ( m_numDocIdSplits == 1 ) {
|
||||
@ -351,7 +328,7 @@ bool Msg39::controlLoop ( ) {
|
||||
// the starting docid...
|
||||
long long d0 = m_ddd;
|
||||
// shortcut
|
||||
long long delta = MAX_DOCID / (long long)m_numDocIdSplits;
|
||||
long long delta = MAX_DOCID / (long long)m_r->m_numDocIdSplits;
|
||||
// advance to point to the exclusive endpoint
|
||||
m_ddd += delta;
|
||||
// ensure this is exclusive of ddd since it will be
|
||||
|
9
Msg39.h
9
Msg39.h
@ -69,8 +69,10 @@ class Msg39Request {
|
||||
m_getDocIdScoringInfo = 1;
|
||||
|
||||
// -1 means to not to docid range restriction
|
||||
m_minDocId = -1;
|
||||
m_maxDocId = -1;
|
||||
m_minDocId = -1LL;
|
||||
m_maxDocId = -1LL;
|
||||
|
||||
m_numDocIdSplits = 1;
|
||||
|
||||
// for widget, to only get results to append to last docid
|
||||
m_maxSerpScore = 0.0;
|
||||
@ -92,6 +94,7 @@ class Msg39Request {
|
||||
char m_niceness;
|
||||
long m_maxAge;
|
||||
long m_maxQueryTerms;
|
||||
long m_numDocIdSplits;
|
||||
//long m_compoundListMaxSize;
|
||||
char m_boolFlag;
|
||||
uint8_t m_language;
|
||||
@ -238,7 +241,7 @@ class Msg39 {
|
||||
|
||||
char m_debug;
|
||||
|
||||
long m_numDocIdSplits;
|
||||
//long m_numDocIdSplits;
|
||||
bool m_allocedTree;
|
||||
long long m_ddd;
|
||||
long long m_dddEnd;
|
||||
|
66
Msg40.cpp
66
Msg40.cpp
@ -245,10 +245,10 @@ bool Msg40::getResults ( SearchInput *si ,
|
||||
if ( /*m_si->m_firstResultNum == 0 && */get < m_docsToScanForTopics )
|
||||
get = m_docsToScanForTopics;
|
||||
// for alden's reranking. often this is 50!
|
||||
if ( get < m_si->m_docsToScanForReranking )
|
||||
get = m_si->m_docsToScanForReranking;
|
||||
//if ( get < m_si->m_docsToScanForReranking )
|
||||
// get = m_si->m_docsToScanForReranking;
|
||||
// for zak's reference pages
|
||||
if ( get < m_si->m_refs_numToGenerate ) get=m_si->m_refs_numToGenerate;
|
||||
// if(get<m_si->m_refs_numToGenerate ) get=m_si->m_refs_numToGenerate;
|
||||
// limit to this ceiling though for peformance reasons
|
||||
//if ( get > m_maxDocIdsToCompute ) get = m_maxDocIdsToCompute;
|
||||
// ok, need some sane limit though to prevent malloc from
|
||||
@ -562,6 +562,43 @@ bool Msg40::getDocIds ( bool recall ) {
|
||||
mr.m_minSerpDocId = m_si->m_minSerpDocId;
|
||||
mr.m_maxSerpScore = m_si->m_maxSerpScore;
|
||||
|
||||
//
|
||||
// how many docid splits should we do to avoid going OOM?
|
||||
//
|
||||
CollectionRec *cr = g_collectiondb.getRec(m_firstCollnum);
|
||||
RdbBase *base = NULL;
|
||||
if ( cr ) g_titledb.getRdb()->getBase(cr->m_collnum);
|
||||
long long numDocs = 0;
|
||||
if ( base ) numDocs = base->getNumTotalRecs();
|
||||
// for every 5M docids per host, lets split up the docid range
|
||||
// to avoid going OOM
|
||||
long mult = numDocs / 5000000;
|
||||
if ( mult <= 0 ) mult = 1;
|
||||
// . do not do splits if caller is already specifying a docid range
|
||||
// like for gbdocid: queries i guess.
|
||||
// . make sure m_msg2 is non-NULL, because if it is NULL we are
|
||||
// evaluating a query for a single docid for seo tools
|
||||
//if ( m_r->m_minDocId == -1 ) { // && m_msg2 ) {
|
||||
long nt = m_si->m_q.getNumTerms();
|
||||
long numDocIdSplits = nt / 2; // ;/// 2;
|
||||
if ( numDocIdSplits <= 0 ) numDocIdSplits = 1;
|
||||
// and mult based on index size
|
||||
numDocIdSplits *= mult;
|
||||
//if ( numDocIdSplits < 5 ) numDocIdSplits = 5;
|
||||
//}
|
||||
// for testing
|
||||
//m_numDocIdSplits = 3;
|
||||
//if ( ! g_conf.m_doDocIdRangeSplitting )
|
||||
// m_numDocIdSplits = 1;
|
||||
// limit to 10
|
||||
if ( numDocIdSplits > 15 )
|
||||
numDocIdSplits = 15;
|
||||
// store it in the reuquest now
|
||||
mr.m_numDocIdSplits = numDocIdSplits;
|
||||
|
||||
|
||||
|
||||
|
||||
// . get the docIds
|
||||
// . this sets m_msg3a.m_clusterLevels[] for us
|
||||
//if(! m_msg3a.getDocIds ( &m_r, m_si->m_q, this , gotDocIdsWrapper))
|
||||
@ -1240,6 +1277,18 @@ bool Msg40::launchMsg20s ( bool recalled ) {
|
||||
if ( m_si->m_streamResults &&
|
||||
i >= m_printi + MAX_OUTSTANDING_MSG20S - 1 )
|
||||
break;
|
||||
|
||||
// if we have printed enough summaries then do not launch
|
||||
// any more, wait for them to come back in
|
||||
if ( m_printi >= m_docsToGetVisible ) {
|
||||
logf(LOG_DEBUG,"query: got %li summaries. done. "
|
||||
"waiting on remaining "
|
||||
"%li to return."
|
||||
, m_printi
|
||||
, m_numRequests-m_numReplies);
|
||||
break;
|
||||
}
|
||||
|
||||
// do not double count!
|
||||
//if ( i <= m_lastProcessedi ) continue;
|
||||
// do not repeat for this i
|
||||
@ -1552,6 +1601,9 @@ bool Msg40::gotSummary ( ) {
|
||||
State0 *st = (State0 *)m_state;
|
||||
|
||||
// keep socket alive if not streaming. like downloading csv...
|
||||
// this fucks up HTTP replies by inserting a space before the "HTTP"
|
||||
// it does not render properly on the browser...
|
||||
/*
|
||||
long now2 = getTimeLocal();
|
||||
if ( now2 - m_lastHeartbeat >= 10 && ! m_si->m_streamResults &&
|
||||
// incase socket is closed and recycled for another connection
|
||||
@ -1561,7 +1613,7 @@ bool Msg40::gotSummary ( ) {
|
||||
log("msg40: sent heartbeat of %li bytes on sd=%li",
|
||||
(long)n,(long)st->m_socket->m_sd);
|
||||
}
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
@ -4979,12 +5031,14 @@ static int factCmp ( const void *a, const void *b ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// . aka NUGGABITS
|
||||
// . now make the fast facts from the gigabits and the samples.
|
||||
// . these are sentences containing the query and a gigabit.
|
||||
// . sets m_factBuf
|
||||
bool Msg40::computeFastFacts ( ) {
|
||||
|
||||
// skip for now
|
||||
return true;
|
||||
//return true;
|
||||
|
||||
bool debugGigabits = m_si->m_debugGigabits;
|
||||
|
||||
@ -5037,7 +5091,7 @@ bool Msg40::computeFastFacts ( ) {
|
||||
|
||||
|
||||
//
|
||||
// store Facts (sentences) into this safebuf (nuggets)
|
||||
// store Facts (sentences) into this safebuf (nuggets)(nuggabits)
|
||||
//
|
||||
char ftmp[100000];
|
||||
SafeBuf factBuf(ftmp,100000);
|
||||
|
261
PageResults.cpp
261
PageResults.cpp
@ -718,6 +718,219 @@ void gotState ( void *state ){
|
||||
}
|
||||
|
||||
|
||||
// print all sentences containing this gigabit (fast facts) (nuggabits)
|
||||
static bool printGigabitContainingSentences ( State0 *st,
|
||||
SafeBuf *sb ,
|
||||
Msg40 *msg40 ,
|
||||
Gigabit *gi ,
|
||||
SearchInput *si ,
|
||||
Query *gigabitQuery ) {
|
||||
|
||||
static long s_gigabitCount = 0;
|
||||
|
||||
sb->safePrintf("<nobr><b>");
|
||||
//"<img src=http://search.yippy.com/"
|
||||
//"images/new/button-closed.gif><b>");
|
||||
|
||||
HttpRequest *hr = &st->m_hr;
|
||||
|
||||
// make a new query
|
||||
sb->safePrintf("<a href=\"/search?gigabits=1&q=");
|
||||
sb->urlEncode(gi->m_term,gi->m_termLen);
|
||||
sb->safeMemcpy("+|+",3);
|
||||
char *q = hr->getString("q",NULL,"");
|
||||
sb->urlEncode(q);
|
||||
sb->safePrintf("\">");
|
||||
sb->safeMemcpy(gi->m_term,gi->m_termLen);
|
||||
sb->safePrintf("</a></b>");
|
||||
sb->safePrintf(" <font color=gray size=-1>");
|
||||
long numOff = sb->m_length;
|
||||
sb->safePrintf(" ");//,gi->m_numPages);
|
||||
sb->safePrintf("</font>");
|
||||
sb->safePrintf("</b>");
|
||||
if ( si->m_isAdmin )
|
||||
sb->safePrintf("[%.0f]{%li}",
|
||||
gi->m_gbscore,
|
||||
gi->m_minPop);
|
||||
|
||||
long revert = sb->length();
|
||||
|
||||
sb->safePrintf("<font color=blue style=align:right;>"
|
||||
"<a onclick=ccc(%li);>"
|
||||
, s_gigabitCount
|
||||
);
|
||||
long spaceOutOff = sb->length();
|
||||
sb->safePrintf( "%c%c%c",
|
||||
0xe2,
|
||||
0x87,
|
||||
0x93);
|
||||
sb->safePrintf(//"[more]"
|
||||
"</a></font>");
|
||||
|
||||
|
||||
sb->safePrintf("</nobr>"); // <br>
|
||||
|
||||
// get facts
|
||||
long numNuggets = 0;
|
||||
long numFacts = msg40->m_factBuf.length() / sizeof(Fact);
|
||||
Fact *facts = (Fact *)msg40->m_factBuf.getBufStart();
|
||||
bool first = true;
|
||||
bool second = false;
|
||||
bool printedSecond = false;
|
||||
//long long lastDocId = -1LL;
|
||||
long saveOffset = 0;
|
||||
for ( long i = 0 ; i < numFacts ; i++ ) {
|
||||
Fact *fi = &facts[i];
|
||||
|
||||
// if printed for a higher scoring gigabit, skip
|
||||
if ( fi->m_printed ) continue;
|
||||
|
||||
// check gigabit match
|
||||
long k; for ( k = 0 ; k < fi->m_numGigabits ; k++ )
|
||||
if ( fi->m_gigabitPtrs[k] == gi ) break;
|
||||
// skip this fact/sentence if does not contain gigabit
|
||||
if ( k >= fi->m_numGigabits ) continue;
|
||||
|
||||
// do not print if no period at end
|
||||
char *s = fi->m_fact;
|
||||
char *e = s + fi->m_factLen;
|
||||
if ( e[-1] != '*' ) continue;
|
||||
e--;
|
||||
|
||||
again:
|
||||
|
||||
// first time, print in the single fact div
|
||||
if ( first ) {
|
||||
sb->safePrintf("<div "
|
||||
"style=\"border:1px lightgray solid;\" "
|
||||
"id=fd%li>",s_gigabitCount);
|
||||
}
|
||||
|
||||
if ( second ) {
|
||||
sb->safePrintf("<div style=\"max-height:300px;"
|
||||
"display:none;"
|
||||
"overflow-x:hidden;"
|
||||
"overflow-y:auto;"//scroll;"
|
||||
"border:1px lightgray solid;\" "
|
||||
"id=sd%li>",s_gigabitCount);
|
||||
printedSecond = true;
|
||||
}
|
||||
|
||||
Msg20Reply *reply = fi->m_reply;
|
||||
|
||||
// ok, print it out
|
||||
if ( ! first && ! second ) {
|
||||
//if ( reply->m_docId != lastDocId )
|
||||
sb->safePrintf("<br><br>\n");
|
||||
//else {
|
||||
// sb->setLength ( saveOffset );
|
||||
// sb->safePrintf("<br><br>\n");
|
||||
//}
|
||||
}
|
||||
else {
|
||||
sb->safePrintf("<br>");
|
||||
}
|
||||
|
||||
|
||||
numNuggets++;
|
||||
|
||||
// print the fast fact (sentence)
|
||||
//sb->safeMemcpy ( s , e-s );
|
||||
|
||||
// let's highlight with gigabits and query terms
|
||||
SafeBuf tmpBuf;
|
||||
Highlight h;
|
||||
h.set ( &tmpBuf , // print it out here
|
||||
s , // content
|
||||
e - s , // len
|
||||
si->m_queryLangId , // from m_defaultSortLang
|
||||
gigabitQuery , // the gigabit "query" in quotes
|
||||
true , // stemming? -- unused
|
||||
false , // use anchors?
|
||||
NULL , // baseurl
|
||||
"<u>", // front tag
|
||||
"</u>", // back tag
|
||||
0 , // fieldCode
|
||||
0 ); // niceness
|
||||
// now highlight the original query as well but in black bold
|
||||
h.set ( sb , // print it out here
|
||||
tmpBuf.getBufStart() , // content
|
||||
tmpBuf.length() , // len
|
||||
si->m_queryLangId , // from m_defaultSortLang
|
||||
&si->m_q , // the regular query
|
||||
true , // stemming? -- unused
|
||||
false , // use anchors?
|
||||
NULL , // baseurl
|
||||
"<b>" , // front tag
|
||||
"</b>", // back tag
|
||||
0 , // fieldCode
|
||||
0 ); // niceness
|
||||
|
||||
|
||||
fi->m_printed = 1;
|
||||
saveOffset = sb->length();
|
||||
sb->safePrintf(" <a href=/get?cnsp=0&"
|
||||
"strip=1&d=%lli>",reply->m_docId);
|
||||
long dlen; char *dom = getDomFast(reply->ptr_ubuf,&dlen);
|
||||
sb->safeMemcpy(dom,dlen);
|
||||
sb->safePrintf("</a>\n");
|
||||
//lastDocId = reply->m_docId;
|
||||
|
||||
if ( first ) {
|
||||
sb->safePrintf("</div>");
|
||||
}
|
||||
|
||||
if ( second ) {
|
||||
second = false;
|
||||
}
|
||||
|
||||
if ( first ) {
|
||||
first = false;
|
||||
second = true;
|
||||
// print first gigabit all over again but in 2nd div
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
|
||||
// we counted the first one twice since we had to throw it into
|
||||
// the hidden div too!
|
||||
if ( numNuggets > 1 ) numNuggets--;
|
||||
|
||||
// do not print the double down arrow if no nuggets printed
|
||||
if ( numNuggets <= 0 ) {
|
||||
sb->m_length = revert;
|
||||
sb->safePrintf("</nobr>");
|
||||
}
|
||||
// just remove down arrow if only 1...
|
||||
else if ( numNuggets == 1 ) {
|
||||
char *dst = sb->getBufStart()+spaceOutOff;
|
||||
dst[0] = ' ';
|
||||
dst[1] = ' ';
|
||||
dst[2] = ' ';
|
||||
}
|
||||
// store the # of nuggets in ()'s like (10 )
|
||||
else {
|
||||
char tmp[10];
|
||||
sprintf(tmp,"(%li)",numNuggets);
|
||||
char *src = tmp;
|
||||
// starting storing digits after "( "
|
||||
char *dst = sb->getBufStart()+numOff;
|
||||
long srcLen = gbstrlen(tmp);
|
||||
if ( srcLen > 5 ) srcLen = 5;
|
||||
for ( long k = 0 ; k < srcLen ; k++ )
|
||||
dst[k] = src[k];
|
||||
}
|
||||
|
||||
s_gigabitCount++;
|
||||
|
||||
if ( printedSecond ) {
|
||||
sb->safePrintf("</div>");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
// print all sentences containing this gigabit
|
||||
static bool printGigabit ( State0 *st,
|
||||
SafeBuf *sb ,
|
||||
@ -757,6 +970,7 @@ static bool printGigabit ( State0 *st,
|
||||
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
class StateAU {
|
||||
public:
|
||||
@ -1883,9 +2097,49 @@ bool printSearchResultsHeader ( State0 *st ) {
|
||||
Gigabit *gigabits = (Gigabit *)gbuf->getBufStart();
|
||||
//long numCols = 5;
|
||||
//long perRow = numGigabits / numCols;
|
||||
|
||||
if ( numGigabits && si->m_format == FORMAT_HTML )
|
||||
// gigabit unhide function
|
||||
sb->safePrintf (
|
||||
"<script>"
|
||||
"function ccc ( gn ) {\n"
|
||||
"var e = document.getElementById('fd'+gn);\n"
|
||||
"var f = document.getElementById('sd'+gn);\n"
|
||||
"if ( e.style.display == 'none' ){\n"
|
||||
"e.style.display = '';\n"
|
||||
"f.style.display = 'none';\n"
|
||||
"}\n"
|
||||
"else {\n"
|
||||
"e.style.display = 'none';\n"
|
||||
"f.style.display = '';\n"
|
||||
"}\n"
|
||||
"}\n"
|
||||
"</script>\n"
|
||||
);
|
||||
|
||||
if ( numGigabits && si->m_format == FORMAT_HTML )
|
||||
sb->safePrintf("<table cellspacing=7 bgcolor=lightgray>"
|
||||
"<tr><td width=200px; valign=top>");
|
||||
|
||||
Query gigabitQuery;
|
||||
SafeBuf ttt;
|
||||
// limit it to 40 gigabits for now
|
||||
for ( long i = 0 ; i < numGigabits && i < 40 ; i++ ) {
|
||||
Gigabit *gi = &gigabits[i];
|
||||
ttt.pushChar('\"');
|
||||
ttt.safeMemcpy(gi->m_term,gi->m_termLen);
|
||||
ttt.pushChar('\"');
|
||||
ttt.pushChar(' ');
|
||||
}
|
||||
if ( numGigabits > 0 )
|
||||
gigabitQuery.set2 ( ttt.getBufStart() ,
|
||||
si->m_queryLangId ,
|
||||
true , // queryexpansion?
|
||||
true ); // usestopwords?
|
||||
|
||||
|
||||
|
||||
|
||||
for ( long i = 0 ; i < numGigabits ; i++ ) {
|
||||
if ( i > 0 && si->m_format == FORMAT_HTML )
|
||||
sb->safePrintf("<hr>");
|
||||
@ -1893,8 +2147,11 @@ bool printSearchResultsHeader ( State0 *st ) {
|
||||
// sb->safePrintf("</td><td valign=top>");
|
||||
// print all sentences containing this gigabit
|
||||
Gigabit *gi = &gigabits[i];
|
||||
printGigabit ( st,sb , msg40 , gi , si );
|
||||
sb->safePrintf("<br>");
|
||||
//printGigabit ( st,sb , msg40 , gi , si );
|
||||
//sb->safePrintf("<br>");
|
||||
printGigabitContainingSentences(st,sb,msg40,gi,si,
|
||||
&gigabitQuery);
|
||||
sb->safePrintf("<br><br>");
|
||||
}
|
||||
if ( numGigabits && si->m_format == FORMAT_HTML )
|
||||
sb->safePrintf("</td></tr></table>");
|
||||
|
41
Parms.cpp
41
Parms.cpp
@ -991,9 +991,9 @@ bool Parms::setGigablastRequest ( TcpSocket *socket ,
|
||||
long obj = OBJ_GBREQUEST;
|
||||
|
||||
//
|
||||
// reset THIS to defaults
|
||||
// reset THIS to defaults. use NULL for cr since mostly for SearchInput
|
||||
//
|
||||
setToDefault ( THIS , obj );
|
||||
setToDefault ( THIS , obj , NULL);
|
||||
|
||||
// loop through cgi parms
|
||||
for ( long i = 0 ; i < hr->getNumFields() ; i++ ) {
|
||||
@ -1533,7 +1533,7 @@ bool Parms::printParms2 ( SafeBuf* sb ,
|
||||
page = PAGE_SECURITY;
|
||||
|
||||
GigablastRequest gr;
|
||||
g_parms.setToDefault ( (char *)&gr , OBJ_GBREQUEST );
|
||||
g_parms.setToDefault ( (char *)&gr , OBJ_GBREQUEST , NULL);
|
||||
|
||||
// find in parms list
|
||||
for ( long i = 0 ; i < m_numParms ; i++ ) {
|
||||
@ -2628,11 +2628,15 @@ void Parms::setParm ( char *THIS , Parm *m , long mm , long j , char *s ,
|
||||
float oldVal = 0;
|
||||
float newVal = 0;
|
||||
|
||||
if ( ! s && m->m_type != TYPE_CHARPTR) {
|
||||
if ( ! s &&
|
||||
m->m_type != TYPE_CHARPTR &&
|
||||
m->m_type != TYPE_FILEUPLOADBUTTON &&
|
||||
m->m_defOff==-1) {
|
||||
s = "0";
|
||||
char *tit = m->m_title;
|
||||
if ( ! tit || ! tit[0] ) tit = m->m_xml;
|
||||
log(LOG_LOGIC,"admin: Parm \"%s\" had NULL default value.",
|
||||
log(LOG_LOGIC,"admin: Parm \"%s\" had NULL default value. "
|
||||
"Forcing to 0.",
|
||||
tit);
|
||||
//char *xx = NULL; *xx = 0;
|
||||
}
|
||||
@ -2916,8 +2920,9 @@ void Parms::setToDefault ( char *THIS , char objType , CollectionRec *argcr ) {
|
||||
if ( m->m_obj != objType ) continue;
|
||||
if ( m->m_obj == OBJ_NONE ) continue;
|
||||
if ( m->m_type == TYPE_COMMENT ) continue;
|
||||
if ( m->m_type == TYPE_FILEUPLOADBUTTON )
|
||||
continue;
|
||||
// no, we gotta set GigablastRequest::m_contentFile to NULL
|
||||
//if ( m->m_type == TYPE_FILEUPLOADBUTTON )
|
||||
// continue;
|
||||
if ( m->m_type == TYPE_MONOD2 ) continue;
|
||||
if ( m->m_type == TYPE_MONOM2 ) continue;
|
||||
if ( m->m_type == TYPE_CMD ) continue;
|
||||
@ -2958,6 +2963,7 @@ void Parms::setToDefault ( char *THIS , char objType , CollectionRec *argcr ) {
|
||||
if ( m->m_max <= 1 ) {
|
||||
//if ( i == 282 ) // "query" parm
|
||||
// log("hey");
|
||||
//if ( ! m->m_def ) { char *xx=NULL;*xx=0; }
|
||||
setParm ( THIS , m, i, 0, m->m_def, false/*not enc.*/,
|
||||
false );
|
||||
//((CollectionRec *)THIS)->m_orig[i] = 1;
|
||||
@ -5150,6 +5156,7 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_BASIC_SETTINGS;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m->m_off = 0;
|
||||
m->m_def = NULL;
|
||||
m->m_type = TYPE_FILEUPLOADBUTTON;
|
||||
m->m_flags = PF_NOSAVE | PF_DUP;
|
||||
m++;
|
||||
@ -6987,6 +6994,21 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "number of reference pages to generate";
|
||||
m->m_desc = "What is the number of "
|
||||
"reference pages to generate per query? Set to 0 to save "
|
||||
"CPU time.";
|
||||
m->m_cgi = "snrp";
|
||||
m->m_off = (char *)&si.m_refs_numToGenerate - y;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_defOff =(char *)&cr.m_refs_numToGenerate - x;
|
||||
m->m_priv = 0;
|
||||
m->m_smin = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_obj = OBJ_SI;
|
||||
m++;
|
||||
|
||||
m->m_title = "number of reference pages to display";
|
||||
m->m_desc = "What is the number of "
|
||||
"reference pages to display per query?";
|
||||
@ -8312,6 +8334,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "iw";
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_obj = OBJ_SI;
|
||||
m->m_def = "200";
|
||||
m++;
|
||||
|
||||
m->m_title = "image height";
|
||||
@ -8322,6 +8345,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "ih";
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_obj = OBJ_SI;
|
||||
m->m_def = "200";
|
||||
m++;
|
||||
|
||||
// m->m_title = "password";
|
||||
@ -13352,6 +13376,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "urls";
|
||||
m->m_page = PAGE_ADDURL2;
|
||||
m->m_obj = OBJ_NONE;
|
||||
m->m_def = NULL;
|
||||
m->m_type = TYPE_FILEUPLOADBUTTON;
|
||||
m++;
|
||||
*/
|
||||
@ -14152,7 +14177,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "dsrt";
|
||||
m->m_off = (char *)&cr.m_docsToScanForTopics - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "300";
|
||||
m->m_def = "30";
|
||||
m->m_flags = PF_API;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
|
2
Parms.h
2
Parms.h
@ -368,7 +368,7 @@ class Parms {
|
||||
bool isHtmlEncoded , bool fromRequest ) ;
|
||||
|
||||
void setToDefault ( char *THIS , char objType ,
|
||||
CollectionRec *argcr = NULL ) ;
|
||||
CollectionRec *argcr );//= NULL ) ;
|
||||
|
||||
bool setFromFile ( void *THIS ,
|
||||
char *filename ,
|
||||
|
@ -287,7 +287,7 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) { //, Query *q ) {
|
||||
}
|
||||
|
||||
// and set from the http request. will set m_coll, etc.
|
||||
g_parms.setToDefault ( (char *)this , OBJ_SI );
|
||||
g_parms.setToDefault ( (char *)this , OBJ_SI , cr );
|
||||
|
||||
|
||||
///////
|
||||
|
@ -17162,6 +17162,11 @@ bool Sectiondb::init ( ) {
|
||||
// . just hard-code 5MB for now
|
||||
long pcmem = 5000000; // = g_conf.m_sectiondbMaxDiskPageCacheMem;
|
||||
|
||||
// do not use for now i think we use posdb and store the 32bit
|
||||
// val in the key for facet type stuff
|
||||
pcmem = 0;
|
||||
maxTreeMem = 100000;
|
||||
maxTreeNodes = 1000;
|
||||
|
||||
/*
|
||||
key128_t k;
|
||||
|
33
XmlDoc.cpp
33
XmlDoc.cpp
@ -2090,8 +2090,8 @@ void XmlDoc::getRevisedSpiderRequest ( SpiderRequest *revisedReq ) {
|
||||
// this must be valid for us of course
|
||||
if ( ! m_firstIpValid ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// wtf?
|
||||
if ( m_firstIp == 0 || m_firstIp == -1 ) { char *xx=NULL;*xx=0; }
|
||||
// wtf? it might be invalid!!! parent caller will handle it...
|
||||
//if ( m_firstIp == 0 || m_firstIp == -1 ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// store the real ip in there now
|
||||
revisedReq->m_firstIp = m_firstIp;
|
||||
@ -22122,8 +22122,6 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
||||
setStatus("adding spider request");
|
||||
// checkpoint
|
||||
saved = m_p;
|
||||
// copy it
|
||||
*m_p++ = RDB_SPIDERDB;
|
||||
// store it here
|
||||
SpiderRequest revisedReq;
|
||||
// this fills it in
|
||||
@ -22141,6 +22139,8 @@ char *XmlDoc::getMetaList ( bool forDelete ) {
|
||||
goto skipNewAdd2;
|
||||
}
|
||||
|
||||
// copy it
|
||||
*m_p++ = RDB_SPIDERDB;
|
||||
// store it back
|
||||
memcpy ( m_p , &revisedReq , revisedReq.getRecSize() );
|
||||
// skip over it
|
||||
@ -28530,15 +28530,15 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
|
||||
reply->size_content = size_utf8Content;
|
||||
}
|
||||
|
||||
if ( m_req->m_getSectionVotingInfo && m_tmpBuf3.getCapacity() <= 0 ) {
|
||||
Sections *ss = getSections();
|
||||
if ( ! ss || ss == (void *)-1) return (Msg20Reply *)ss;
|
||||
// will at least store a \0 in there, but will not count
|
||||
// as part of the m_tmpBuf.length()
|
||||
ss->printVotingInfoInJSON ( &m_tmpBuf3 );
|
||||
reply-> ptr_sectionVotingInfo = m_tmpBuf3.getBufStart();
|
||||
reply->size_sectionVotingInfo = m_tmpBuf3.length() + 1;
|
||||
}
|
||||
// if ( m_req->m_getSectionVotingInfo && m_tmpBuf3.getCapacity() <=0) {
|
||||
// Sections *ss = getSections();
|
||||
// if ( ! ss || ss == (void *)-1) return (Msg20Reply *)ss;
|
||||
// // will at least store a \0 in there, but will not count
|
||||
// // as part of the m_tmpBuf.length()
|
||||
// ss->printVotingInfoInJSON ( &m_tmpBuf3 );
|
||||
// reply-> ptr_sectionVotingInfo = m_tmpBuf3.getBufStart();
|
||||
// reply->size_sectionVotingInfo = m_tmpBuf3.length() + 1;
|
||||
// }
|
||||
|
||||
// breathe
|
||||
QUICKPOLL ( m_niceness );
|
||||
@ -29697,7 +29697,7 @@ SafeBuf *XmlDoc::getSampleForGigabits ( ) {
|
||||
e[-2] == '!' ) )
|
||||
endsInPeriod = true;
|
||||
|
||||
long off = reply.length();
|
||||
//long off = reply.length();
|
||||
|
||||
if ( ! reply.safePrintFilterTagsAndLines ( p , e-p ,false ) )
|
||||
return NULL;
|
||||
@ -29734,8 +29734,9 @@ SafeBuf *XmlDoc::getSampleForGigabits ( ) {
|
||||
*pc = '\0';
|
||||
|
||||
// debug
|
||||
char *x = reply.getBufStart() + off;
|
||||
log("fastfact: %s",x);
|
||||
//char *x = reply.getBufStart() + off;
|
||||
// turn off fast fact debug for now
|
||||
//log("fastfact: fastfact: %s",x);
|
||||
// revert back to |
|
||||
*pc = '|';
|
||||
|
||||
|
Reference in New Issue
Block a user