Merge branch 'diffbot' of github.com:gigablast/open-source-search-engine into diffbot
This commit is contained in:
commit
129937168d
@ -73,8 +73,9 @@ char s_termList[1024];
|
||||
// . content must be NULL terminated
|
||||
// . if "useAnchors" is true we do click and scroll
|
||||
// . if "isQueryTerms" is true, we do typical anchors in a special way
|
||||
long Highlight::set ( char *buf ,
|
||||
long bufLen ,
|
||||
long Highlight::set ( SafeBuf *sb,
|
||||
//char *buf ,
|
||||
//long bufLen ,
|
||||
char *content ,
|
||||
long contentLen ,
|
||||
// primary language of the document (for synonyms)
|
||||
@ -119,8 +120,9 @@ long Highlight::set ( char *buf ,
|
||||
// store
|
||||
m_numMatches = matches.getNumMatches();
|
||||
|
||||
return set ( buf ,
|
||||
bufLen ,
|
||||
return set ( sb ,
|
||||
//buf ,
|
||||
//bufLen ,
|
||||
&words ,
|
||||
&matches ,
|
||||
doStemming ,
|
||||
@ -133,8 +135,9 @@ long Highlight::set ( char *buf ,
|
||||
}
|
||||
|
||||
// New version
|
||||
long Highlight::set ( char *buf ,
|
||||
long bufLen ,
|
||||
long Highlight::set ( SafeBuf *sb ,
|
||||
//char *buf ,
|
||||
//long bufLen ,
|
||||
Words *words ,
|
||||
Matches *matches ,
|
||||
bool doStemming ,
|
||||
@ -162,18 +165,20 @@ long Highlight::set ( char *buf ,
|
||||
if ( m_frontTag ) m_frontTagLen = gbstrlen ( frontTag );
|
||||
if ( m_backTag ) m_backTagLen = gbstrlen ( backTag );
|
||||
// point to buffer to store highlighted text into
|
||||
m_buf = buf;
|
||||
m_bufLen = bufLen;
|
||||
m_bufPtr = buf;
|
||||
//m_buf = buf;
|
||||
//m_bufLen = bufLen;
|
||||
//m_bufPtr = buf;
|
||||
m_sb = sb;
|
||||
// save room for terminating \0
|
||||
m_bufEnd = m_buf + m_bufLen - 1;
|
||||
//m_bufEnd = m_buf + m_bufLen - 1;
|
||||
|
||||
if ( ! highlightWords ( words, matches, q ) ) return 0;
|
||||
|
||||
// null terminate
|
||||
*m_bufPtr = '\0';
|
||||
//*m_bufPtr = '\0';
|
||||
m_sb->nullTerm();
|
||||
// return the length
|
||||
return m_bufPtr - m_buf;
|
||||
return m_sb->length();//m_bufPtr - m_buf;
|
||||
}
|
||||
|
||||
bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
@ -228,6 +233,7 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
endHead = false;
|
||||
endHtml = false;
|
||||
// bail now if out of room
|
||||
/*
|
||||
if ( m_bufPtr + MAX_URL_LEN + 1024 + wlen >= m_bufEnd ) {
|
||||
// don't spam the logs
|
||||
static long long s_lastTime = 0;
|
||||
@ -238,6 +244,7 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
s_lastTime = now;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
if ( (words->getTagId(i) ) == TAG_TITLE ) { //<TITLE>
|
||||
if ( words->isBackTag(i) ) inTitle = false;
|
||||
else inTitle = true;
|
||||
@ -282,8 +289,9 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
//else frontTag = s_frontTags [ p[i] % 10];
|
||||
else frontTag =s_frontTags[mat->m_colorNum%10];
|
||||
// OK...this is UTF-8 output, and ASCII Text
|
||||
strcpy ( m_bufPtr , frontTag );
|
||||
m_bufPtr += frontTagLen;
|
||||
//strcpy ( m_bufPtr , frontTag );
|
||||
//m_bufPtr += frontTagLen;
|
||||
m_sb->safeStrcpy ( (char *)frontTag );
|
||||
//log(LOG_DEBUG,
|
||||
// "Highlight: starting phrase %d at word %d\n",
|
||||
// p[i], i);
|
||||
@ -296,8 +304,9 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
else if ( endHead ) {
|
||||
// include the tags style sheet immediately before
|
||||
// the closing </TITLE> tag
|
||||
memcpy( m_bufPtr, s_styleSheet, s_styleSheetLen );
|
||||
m_bufPtr += s_styleSheetLen;
|
||||
//memcpy( m_bufPtr, s_styleSheet, s_styleSheetLen );
|
||||
m_sb->safeMemcpy( s_styleSheet , s_styleSheetLen );
|
||||
//m_bufPtr += s_styleSheetLen;
|
||||
}
|
||||
//else if ( endHtml ) {
|
||||
// ;
|
||||
@ -326,14 +335,16 @@ bool Highlight::highlightWords ( Words *words , Matches *m, Query *q ) {
|
||||
// write the alnum word
|
||||
//m_bufPtr +=latin1ToUtf8(m_bufPtr, m_bufEnd-m_bufPtr,w, wlen);
|
||||
// everything is utf8 now
|
||||
memcpy ( m_bufPtr, w , wlen );
|
||||
m_bufPtr += wlen;
|
||||
//memcpy ( m_bufPtr, w , wlen );
|
||||
//m_bufPtr += wlen;
|
||||
m_sb->safeMemcpy ( w , wlen );
|
||||
|
||||
// back tag
|
||||
if ( i == backTagi-1 ) {
|
||||
// store the back tag
|
||||
memcpy ( m_bufPtr , backTag , backTagLen );
|
||||
m_bufPtr += backTagLen ;
|
||||
//memcpy ( m_bufPtr , backTag , backTagLen );
|
||||
//m_bufPtr += backTagLen ;
|
||||
m_sb->safeMemcpy ( (char *)backTag , backTagLen );
|
||||
//log(LOG_DEBUG,
|
||||
// "Highlight: ending phrase %d at word %d\n",
|
||||
// p[i], i);
|
||||
|
19
Highlight.h
19
Highlight.h
@ -19,8 +19,9 @@ class Highlight {
|
||||
// . we highlight Query "q" in "xml" as best as we can
|
||||
// . store highlighted text into "buf"
|
||||
// . return length stored into "buf"
|
||||
long set ( char *buf ,
|
||||
long bufLen ,
|
||||
long set ( //char *buf ,
|
||||
//long bufLen ,
|
||||
SafeBuf *sb,
|
||||
char *content ,
|
||||
long contentLen ,
|
||||
char docLangId ,
|
||||
@ -33,8 +34,9 @@ class Highlight {
|
||||
long fieldCode , // = 0 ,
|
||||
long niceness ) ;
|
||||
|
||||
long set ( char *buf ,
|
||||
long bufLen ,
|
||||
long set ( //char *buf ,
|
||||
//long bufLen ,
|
||||
SafeBuf *sb ,
|
||||
Words *words ,
|
||||
Matches *matches ,
|
||||
bool doStemming ,
|
||||
@ -52,10 +54,11 @@ class Highlight {
|
||||
bool highlightWords ( Words *words , Matches *m , Query *q=NULL );
|
||||
|
||||
// null terminate and store the highlighted content in m_buf
|
||||
char *m_buf ;
|
||||
long m_bufLen;
|
||||
char *m_bufPtr;
|
||||
char *m_bufEnd;
|
||||
//char *m_buf ;
|
||||
//long m_bufLen;
|
||||
//char *m_bufPtr;
|
||||
//char *m_bufEnd;
|
||||
class SafeBuf *m_sb;
|
||||
|
||||
//Words m_words;
|
||||
Matches m_matches;
|
||||
|
71
Hostdb.cpp
71
Hostdb.cpp
@ -194,6 +194,7 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
// skip known directives
|
||||
if ( ! strncmp(p,"port-offset:",12) ||
|
||||
! strncmp(p,"index-splits:",13) ||
|
||||
! strncmp(p,"num-mirrors:",12) ||
|
||||
! strncmp(p,"working-dir:",12) )
|
||||
p = p;
|
||||
// check if this is a spare host
|
||||
@ -243,13 +244,14 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
if ( ! m_hosts ) return log(
|
||||
"conf: Memory allocation failed.");
|
||||
|
||||
unsigned long maxShard = 0;
|
||||
//unsigned long maxShard = 0;
|
||||
long numGrunts = 0;
|
||||
|
||||
// now fill up m_hosts
|
||||
p = m_buf;
|
||||
i = 0;
|
||||
long line = 1;
|
||||
unsigned long lastShard = 0;
|
||||
//unsigned long lastShard = 0;
|
||||
long proxyNum = 0;
|
||||
|
||||
// assume defaults
|
||||
@ -257,6 +259,7 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
long indexSplits = 0;
|
||||
char *wdir2 = NULL;
|
||||
long wdirlen2 = 0;
|
||||
long numMirrors = -1;
|
||||
|
||||
for ( ; *p ; p++ , line++ ) {
|
||||
if ( is_wspace_a (*p) ) continue;
|
||||
@ -273,6 +276,15 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( ! strncmp(p,"num-mirrors:",12) ) {
|
||||
p += 12;
|
||||
// skip spaces after the colon
|
||||
while ( is_wspace_a(*p) ) p++;
|
||||
numMirrors = atol(p);
|
||||
while ( *p && *p != '\n' ) p++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// does the line say "working-dir: xxxx" ?
|
||||
if ( ! strncmp(p,"working-dir:",12) ) {
|
||||
p += 12;
|
||||
@ -351,13 +363,6 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
// skip numeric hostid or "proxy" keyword
|
||||
while ( ! is_wspace_a(*p) ) p++;
|
||||
|
||||
if ( indexSplits == 0 ) {
|
||||
g_errno = EBADENGINEER;
|
||||
log("admin: need index-splits: xxx directive "
|
||||
"in hosts.conf");
|
||||
return false;
|
||||
}
|
||||
|
||||
// read in switch id
|
||||
//h->m_switchId = atoi(p);
|
||||
|
||||
@ -590,7 +595,7 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
// our group is based on our split!
|
||||
//h->m_group = i % g_hostdb.m_indexSplits; // # grps
|
||||
//h->m_group = i % indexSplits; // # grps
|
||||
h->m_shardNum = i % indexSplits;
|
||||
//h->m_shardNum = i % indexSplits;
|
||||
// i guess proxy and spares don't count
|
||||
if ( h->m_type != HT_GRUNT ) h->m_shardNum = 0;
|
||||
|
||||
@ -665,9 +670,12 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
h->m_externalHttpsPort = h->m_httpsPort;
|
||||
|
||||
// get max group number
|
||||
if ( h->m_shardNum > maxShard && h->m_type==HT_GRUNT )
|
||||
maxShard = h->m_shardNum;
|
||||
//if ( h->m_shardNum > maxShard && h->m_type==HT_GRUNT )
|
||||
// maxShard = h->m_shardNum;
|
||||
if ( h->m_type == HT_GRUNT )
|
||||
numGrunts++;
|
||||
|
||||
/*
|
||||
if ( h->m_shardNum <= lastShard && h->m_shardNum != 0
|
||||
&& !(h->m_type&(HT_ALL_PROXIES)) ) {
|
||||
g_errno = EBADENGINEER;
|
||||
@ -678,6 +686,7 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
filename,line);
|
||||
}
|
||||
lastShard = h->m_shardNum;
|
||||
*/
|
||||
|
||||
// skip line now
|
||||
while ( *p && *p != '\n' )
|
||||
@ -742,10 +751,46 @@ bool Hostdb::init ( char *filename , long hostId , char *netName ,
|
||||
//m_numHosts = i;
|
||||
m_numTotalHosts = i;
|
||||
// how many shards are we configure for?
|
||||
m_numShards = maxShard + 1; // g_conf.m_numGroups;
|
||||
//m_numShards = maxShard + 1; // g_conf.m_numGroups;
|
||||
|
||||
|
||||
// # of mirrors is zero if no mirrors,
|
||||
// if it is 1 then each host has ONE MIRROR host
|
||||
if ( numMirrors == 0 )
|
||||
indexSplits = numGrunts;
|
||||
if ( numMirrors > 0 )
|
||||
indexSplits = numGrunts / (numMirrors+1);
|
||||
|
||||
if ( indexSplits == 0 ) {
|
||||
g_errno = EBADENGINEER;
|
||||
log("admin: need num-mirrors: xxx or "
|
||||
"index-splits: xxx directive "
|
||||
"in hosts.conf");
|
||||
return false;
|
||||
}
|
||||
|
||||
numMirrors = (numGrunts / indexSplits) - 1 ;
|
||||
|
||||
if ( numMirrors < 0 ) {
|
||||
g_errno = EBADENGINEER;
|
||||
log("admin: need num-mirrors: xxx or "
|
||||
"index-splits: xxx directive "
|
||||
"in hosts.conf (2)");
|
||||
return false;
|
||||
}
|
||||
|
||||
m_indexSplits = indexSplits;
|
||||
|
||||
m_numShards = numGrunts / (numMirrors+1);
|
||||
|
||||
//
|
||||
// set Host::m_shardNum
|
||||
//
|
||||
for ( long i = 0 ; i < numGrunts ; i++ ) {
|
||||
Host *h = &m_hosts[i];
|
||||
h->m_shardNum = i % indexSplits;
|
||||
}
|
||||
|
||||
// assign spare hosts
|
||||
if ( m_numSpareHosts > MAX_SPARES ) {
|
||||
log ( "conf: Number of spares (%li) exceeds max of %i, "
|
||||
|
150
PageGet.cpp
150
PageGet.cpp
@ -305,23 +305,24 @@ bool processLoop ( void *state ) {
|
||||
SafeBuf sb;
|
||||
|
||||
// alloc buffer now
|
||||
char *buf = NULL;
|
||||
long bufMaxSize = 0;
|
||||
//char *buf = NULL;
|
||||
//long bufMaxSize = 0;
|
||||
//bufMaxSize = len + ( 32 * 1024 ) ;
|
||||
bufMaxSize = contentLen + ( 32 * 1024 ) ;
|
||||
buf = (char *)mmalloc ( bufMaxSize , "PageGet2" );
|
||||
char *p = buf;
|
||||
char *bufEnd = buf + bufMaxSize;
|
||||
if ( ! buf ) {
|
||||
return sendErrorReply ( st , g_errno );
|
||||
}
|
||||
//bufMaxSize = contentLen + ( 32 * 1024 ) ;
|
||||
//buf = (char *)mmalloc ( bufMaxSize , "PageGet2" );
|
||||
//char *p = buf;
|
||||
//char *bufEnd = buf + bufMaxSize;
|
||||
//if ( ! buf ) {
|
||||
// return sendErrorReply ( st , g_errno );
|
||||
//}
|
||||
|
||||
// for undoing the header
|
||||
char *start1 = p;
|
||||
//char *start1 = p;
|
||||
long startLen1 = sb.length();
|
||||
|
||||
// we are always utfu
|
||||
if ( strip != 2 )
|
||||
p += sprintf(p, "<meta http-equiv=\"Content-Type\" "
|
||||
sb.safePrintf( "<meta http-equiv=\"Content-Type\" "
|
||||
"content=\"text/html;charset=utf8\">\n");
|
||||
|
||||
// base href
|
||||
@ -332,20 +333,21 @@ bool processLoop ( void *state ) {
|
||||
if ( xd->ptr_redirUrl ) base = xd->ptr_redirUrl;
|
||||
//Url *redir = *xd->getRedirUrl();
|
||||
if ( strip != 2 ) {
|
||||
sprintf ( p , "<BASE HREF=\"%s\">" , base );
|
||||
p += gbstrlen ( p );
|
||||
sb.safePrintf ( "<BASE HREF=\"%s\">" , base );
|
||||
//p += gbstrlen ( p );
|
||||
}
|
||||
|
||||
// default colors in case css files missing
|
||||
if ( strip != 2 ) {
|
||||
sprintf ( p , "\n<style type=\"text/css\">\n"
|
||||
sb.safePrintf( "\n<style type=\"text/css\">\n"
|
||||
"body{background-color:white;color:black;}\n"
|
||||
"</style>\n");
|
||||
p += gbstrlen ( p );
|
||||
//p += gbstrlen ( p );
|
||||
}
|
||||
|
||||
|
||||
char *start2 = p;
|
||||
// for undoing the stuff below
|
||||
long startLen2 = sb.length();//p;
|
||||
|
||||
// query should be NULL terminated
|
||||
char *q = st->m_q;
|
||||
@ -376,7 +378,7 @@ bool processLoop ( void *state ) {
|
||||
// CNS: if ( ! st->m_clickNScroll ) {
|
||||
if ( printDisclaimer ) {
|
||||
|
||||
sprintf ( p ,
|
||||
sb.safePrintf(//sprintf ( p ,
|
||||
//"<BASE HREF=\"%s\">"
|
||||
//"<table border=1 width=100%%>"
|
||||
//"<tr><td>"
|
||||
@ -394,28 +396,31 @@ bool processLoop ( void *state ) {
|
||||
"<a href=\"%s\" style=\"%s\">%s</a>"
|
||||
"" , styleTitle, f->getUrl(), styleLink,
|
||||
f->getUrl() );
|
||||
p += gbstrlen ( p );
|
||||
//p += gbstrlen ( p );
|
||||
// then the rest
|
||||
sprintf(p ,
|
||||
//sprintf(p ,
|
||||
sb.safePrintf(
|
||||
"<span style=\"%s\">. "
|
||||
"Gigablast is not responsible for the content of "
|
||||
"this page.</span>", styleTitle );
|
||||
p += gbstrlen ( p );
|
||||
//p += gbstrlen ( p );
|
||||
|
||||
sprintf ( p , "<br/><span style=\"%s\">"
|
||||
sb.safePrintf ( "<br/><span style=\"%s\">"
|
||||
"Cached: </span>"
|
||||
"<span style=\"%s\">",
|
||||
styleTitle, styleText );
|
||||
p += gbstrlen ( p );
|
||||
//p += gbstrlen ( p );
|
||||
|
||||
// then the spider date in GMT
|
||||
time_t lastSpiderDate = xd->m_spideredTime;
|
||||
struct tm *timeStruct = gmtime ( &lastSpiderDate );
|
||||
strftime ( p, 100,"%b %d, %Y UTC", timeStruct);
|
||||
p += gbstrlen ( p );
|
||||
char tbuf[100];
|
||||
strftime ( tbuf, 100,"%b %d, %Y UTC", timeStruct);
|
||||
//p += gbstrlen ( p );
|
||||
sb.safeStrcpy(tbuf);
|
||||
|
||||
// Moved over from PageResults.cpp
|
||||
p += sprintf (p, "</span> - <a href=\""
|
||||
sb.safePrintf( "</span> - <a href=\""
|
||||
"/get?"
|
||||
"q=%s&c=%s&rtq=%li&"
|
||||
"d=%lli&strip=1\""
|
||||
@ -427,7 +432,7 @@ bool processLoop ( void *state ) {
|
||||
|
||||
// a link to alexa
|
||||
if ( f->getUrlLen() > 5 ) {
|
||||
p += sprintf (p, " - <a href=\"http:"
|
||||
sb.safePrintf( " - <a href=\"http:"
|
||||
"//web.archive.org/web/*/%s\""
|
||||
" style=\"%s\">"
|
||||
"[older copies]</a>" ,
|
||||
@ -435,29 +440,29 @@ bool processLoop ( void *state ) {
|
||||
}
|
||||
|
||||
if (st->m_noArchive){
|
||||
p += sprintf(p, " - <span style=\"%s\"><b>"
|
||||
sb.safePrintf( " - <span style=\"%s\"><b>"
|
||||
"[NOARCHIVE]</b></span>",
|
||||
styleTell );
|
||||
}
|
||||
if (st->m_isBanned){
|
||||
p += sprintf(p, " - <span style=\"%s\"><b>"
|
||||
sb.safePrintf(" - <span style=\"%s\"><b>"
|
||||
"[BANNED]</b></span>",
|
||||
styleTell );
|
||||
}
|
||||
|
||||
// only print this if we got a query
|
||||
if ( qlen > 0 ) {
|
||||
sprintf (p,"<br/><br/><span style=\"%s\"> "
|
||||
sb.safePrintf("<br/><br/><span style=\"%s\"> "
|
||||
"These search terms have been "
|
||||
"highlighted: ",
|
||||
styleText );
|
||||
p += gbstrlen ( p );
|
||||
//p += gbstrlen ( p );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// how much space left in p?
|
||||
long avail = bufEnd - p;
|
||||
//long avail = bufEnd - p;
|
||||
// . make the url that we're outputting for (like in PageResults.cpp)
|
||||
// . "thisUrl" is the baseUrl for click & scroll
|
||||
char thisUrl[MAX_URL_LEN];
|
||||
@ -487,7 +492,7 @@ bool processLoop ( void *state ) {
|
||||
//sprintf ( x, "&seq=%li&rtq=%lid=%lli",
|
||||
// (long)st->m_seq,(long)st->m_rtq,st->m_msg22.getDocId());
|
||||
sprintf ( x, "&d=%lli",st->m_docId );
|
||||
x += gbstrlen(p);
|
||||
x += gbstrlen(x);
|
||||
// set our query for highlighting
|
||||
Query qq;
|
||||
qq.set2 ( q, st->m_langId , true );
|
||||
@ -523,16 +528,18 @@ bool processLoop ( void *state ) {
|
||||
// CNS: if ( ! st->m_clickNScroll ) {
|
||||
// and highlight the matches
|
||||
if ( printDisclaimer ) {
|
||||
hilen = hi.set ( p ,
|
||||
avail ,
|
||||
hilen = hi.set ( //p ,
|
||||
//avail ,
|
||||
&sb ,
|
||||
&qw , // words to highlight
|
||||
&m , // matches relative to qw
|
||||
false , // doSteming
|
||||
false , // st->m_clickAndScroll ,
|
||||
(char *)thisUrl );// base url for ClcknScrll
|
||||
p += hilen;
|
||||
//p += hilen;
|
||||
// now an hr
|
||||
memcpy ( p , "</span></table></table>\n" , 24 ); p += 24;
|
||||
//memcpy ( p , "</span></table></table>\n" , 24 ); p += 24;
|
||||
sb.safeStrcpy("</span></table></table>\n");
|
||||
}
|
||||
|
||||
|
||||
@ -547,8 +554,8 @@ bool processLoop ( void *state ) {
|
||||
if ( ! includeHeader ) {
|
||||
// including base href is off by default when not including
|
||||
// the header, so the caller must explicitly turn it back on
|
||||
if ( st->m_includeBaseHref ) p = start2;
|
||||
else p = start1;
|
||||
if ( st->m_includeBaseHref ) sb.m_length=startLen2;//p=start2;
|
||||
else sb.m_length=startLen1;//p=start1;
|
||||
}
|
||||
|
||||
// identify start of <title> tag we wrote out
|
||||
@ -596,7 +603,8 @@ bool processLoop ( void *state ) {
|
||||
char *ebuf = st->m_r.getString("eb");
|
||||
if ( ! ebuf ) ebuf = "";
|
||||
|
||||
p += sprintf ( p ,
|
||||
//p += sprintf ( p ,
|
||||
sb.safePrintf(
|
||||
"<table border=1 "
|
||||
"cellpadding=10 "
|
||||
"cellspacing=0 "
|
||||
@ -606,7 +614,7 @@ bool processLoop ( void *state ) {
|
||||
long printLinks = st->m_r.getLong("links",0);
|
||||
|
||||
if ( ! printDisclaimer && printLinks )
|
||||
p += sprintf ( p ,
|
||||
sb.safePrintf(//p += sprintf ( p ,
|
||||
// first put cached and live link
|
||||
"<tr>"
|
||||
"<td bgcolor=lightyellow>"
|
||||
@ -637,7 +645,7 @@ bool processLoop ( void *state ) {
|
||||
);
|
||||
|
||||
if ( printLinks ) {
|
||||
p += sprintf ( p ,
|
||||
sb.safePrintf(//p += sprintf ( p ,
|
||||
"<tr><td bgcolor=pink>"
|
||||
"<span style=\"font-size:18px;"
|
||||
"font-weight:600;"
|
||||
@ -646,11 +654,11 @@ bool processLoop ( void *state ) {
|
||||
"<b>PAGE TITLE:</b> "
|
||||
);
|
||||
long tlen = titleEnd - titleStart;
|
||||
memcpy ( p , titleStart , tlen ); p += tlen;
|
||||
p += sprintf ( p , "</span></td></tr>" );
|
||||
sb.safeMemcpy ( titleStart , tlen );
|
||||
sb.safePrintf ( "</span></td></tr>" );
|
||||
}
|
||||
|
||||
p += sprintf ( p , "</table><br>\n" );
|
||||
sb.safePrintf( "</table><br>\n" );
|
||||
|
||||
}
|
||||
|
||||
@ -661,9 +669,9 @@ bool processLoop ( void *state ) {
|
||||
if ( ctype == CT_DOC ) pre = true ; // filtered msword
|
||||
if ( ctype == CT_PS ) pre = true ; // filtered postscript
|
||||
// if it is content-type text, add a <pre>
|
||||
if ( p + 5 < bufEnd && pre ) {
|
||||
memcpy ( p , "<pre>" , 5 );
|
||||
p += 5;
|
||||
if ( pre ) {//p + 5 < bufEnd && pre ) {
|
||||
sb.safePrintf("<pre>");
|
||||
//p += 5;
|
||||
}
|
||||
|
||||
if ( st->m_strip == 1 )
|
||||
@ -671,7 +679,7 @@ bool processLoop ( void *state ) {
|
||||
(long)xd->m_version, st->m_strip );
|
||||
// it returns -1 and sets g_errno on error, line OOM
|
||||
if ( contentLen == -1 ) {
|
||||
if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
return sendErrorReply ( st , g_errno );
|
||||
}
|
||||
|
||||
@ -688,8 +696,8 @@ bool processLoop ( void *state ) {
|
||||
|
||||
|
||||
if ( ! queryHighlighting ) {
|
||||
memcpy ( p , content , contentLen );
|
||||
p += contentLen ;
|
||||
sb.safeMemcpy ( content , contentLen );
|
||||
//p += contentLen ;
|
||||
}
|
||||
else {
|
||||
// get the content as xhtml (should be NULL terminated)
|
||||
@ -697,37 +705,38 @@ bool processLoop ( void *state ) {
|
||||
if ( ! xml.set ( content , contentLen , false ,
|
||||
0 , false , TITLEREC_CURRENT_VERSION ,
|
||||
false , 0 ) ) { // niceness is 0
|
||||
if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
return sendErrorReply ( st , g_errno );
|
||||
}
|
||||
if ( ! ww.set ( &xml , true , 0 ) ) { // niceness is 0
|
||||
if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
return sendErrorReply ( st , g_errno );
|
||||
}
|
||||
// sanity check
|
||||
//if ( ! xd->m_wordsValid ) { char *xx=NULL;*xx=0; }
|
||||
// how much space left in p?
|
||||
avail = bufEnd - p;
|
||||
//avail = bufEnd - p;
|
||||
|
||||
Matches m;
|
||||
m.setQuery ( &qq );
|
||||
m.addMatches ( &ww );
|
||||
hilen = hi.set ( p , avail , &ww , &m ,
|
||||
hilen = hi.set ( &sb , // p , avail ,
|
||||
&ww , &m ,
|
||||
false /*doStemming?*/ ,
|
||||
st->m_clickAndScroll ,
|
||||
thisUrl /*base url for click & scroll*/);
|
||||
p += hilen;
|
||||
//p += hilen;
|
||||
log(LOG_DEBUG, "query: Done highlighting cached page content");
|
||||
}
|
||||
|
||||
// if it is content-type text, add a </pre>
|
||||
if ( p + 6 < bufEnd && pre ) {
|
||||
memcpy ( p , "</pre>" , 6 );
|
||||
p += 6;
|
||||
if ( pre ) { // p + 6 < bufEnd && pre ) {
|
||||
sb.safeMemcpy ( "</pre>" , 6 );
|
||||
//p += 6;
|
||||
}
|
||||
|
||||
// calculate bufLen
|
||||
long bufLen = p - buf;
|
||||
//long bufLen = p - buf;
|
||||
|
||||
long ct = xd->m_contentType;
|
||||
|
||||
@ -737,16 +746,19 @@ bool processLoop ( void *state ) {
|
||||
|
||||
if ( ct == CT_XML ) {
|
||||
// encode the xml tags into <tagname> sequences
|
||||
if ( !newbuf.htmlEncodeXmlTags ( buf , p-buf,0)){// niceness=0
|
||||
if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
if ( !newbuf.htmlEncodeXmlTags ( sb.getBufStart() ,
|
||||
sb.getLength(),
|
||||
0)){// niceness=0
|
||||
//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
return sendErrorReply ( st , g_errno );
|
||||
}
|
||||
// free out buffer that we alloc'd before returning since this
|
||||
// should have copied it into another buffer
|
||||
if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
// reassign
|
||||
buf = newbuf.getBufStart();
|
||||
bufLen = newbuf.length();
|
||||
//buf = newbuf.getBufStart();
|
||||
//bufLen = newbuf.length();
|
||||
sb.stealBuf ( &newbuf );
|
||||
}
|
||||
|
||||
// now encapsulate it in html head/tail and send it off
|
||||
@ -763,14 +775,18 @@ bool processLoop ( void *state ) {
|
||||
mdelete ( st , sizeof(State2) , "PageGet1" );
|
||||
delete (st);
|
||||
|
||||
bool status = g_httpServer.sendDynamicPage (s,buf,bufLen,-1,false,
|
||||
bool status = g_httpServer.sendDynamicPage (s,
|
||||
//buf,bufLen,
|
||||
sb.getBufStart(),
|
||||
sb.getLength(),
|
||||
-1,false,
|
||||
contentType,
|
||||
-1, NULL, "utf8" );
|
||||
// free out buffer that we alloc'd before returning since this
|
||||
// should have copied it into another buffer
|
||||
|
||||
if ( ct == CT_XML ) newbuf.purge();
|
||||
else if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
//if ( ct == CT_XML ) newbuf.purge();
|
||||
//else if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
|
||||
|
||||
// and convey the status
|
||||
return status;
|
||||
|
@ -1515,8 +1515,8 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
|
||||
if ( ! si->m_doQueryHighlighting && ! si->m_xml ) continue;
|
||||
char *str = k-> ptr_linkText;
|
||||
long strLen = k->size_linkText;
|
||||
char tt[1024*3];
|
||||
char *ttend = tt + 1024*3;
|
||||
//char tt[1024*3];
|
||||
//char *ttend = tt + 1024*3;
|
||||
char *frontTag =
|
||||
"<font style=\"color:black;background-color:yellow\">" ;
|
||||
char *backTag = "</font>";
|
||||
@ -1525,8 +1525,9 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
|
||||
backTag = "</b>";
|
||||
}
|
||||
Highlight hi;
|
||||
long hlen = hi.set ( tt ,
|
||||
ttend - tt ,
|
||||
SafeBuf hb;
|
||||
long hlen = hi.set ( &hb,//tt ,
|
||||
//ttend - tt ,
|
||||
str,
|
||||
strLen ,
|
||||
mr->m_language, // docLangId
|
||||
@ -1573,7 +1574,10 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
|
||||
// inc it
|
||||
inlinkId++;
|
||||
// encode it for xml
|
||||
if ( !sb.htmlEncode ( tt,hlen,false)) return false;
|
||||
if ( !sb.htmlEncode ( hb.getBufStart(),
|
||||
hb.length(),
|
||||
false))
|
||||
return false;
|
||||
sb.safePrintf("\"/>\n");
|
||||
continue;
|
||||
}
|
||||
@ -1604,7 +1608,7 @@ bool printInlinkText ( SafeBuf &sb , Msg20Reply *mr , SearchInput *si ,
|
||||
//k->ptr_urlBuf);
|
||||
,si->m_cr->m_coll
|
||||
,k->m_docId);
|
||||
if ( ! sb.safeMemcpy(tt,hlen) ) return false;
|
||||
if ( ! sb.safeMemcpy(&hb) ) return false;
|
||||
long hostLen = 0;
|
||||
char *host = getHostFast(k->ptr_urlBuf,&hostLen,NULL);
|
||||
sb.safePrintf("</td><td>");
|
||||
@ -1871,8 +1875,8 @@ static int printResult ( SafeBuf &sb,
|
||||
|
||||
long hlen;
|
||||
//copy all summary and title excerpts for this result into here
|
||||
char tt[1024*32];
|
||||
char *ttend = tt + 1024*32;
|
||||
//char tt[1024*32];
|
||||
//char *ttend = tt + 1024*32;
|
||||
char *frontTag =
|
||||
"<font style=\"color:black;background-color:yellow\">" ;
|
||||
char *backTag = "</font>";
|
||||
@ -1882,9 +1886,11 @@ static int printResult ( SafeBuf &sb,
|
||||
}
|
||||
long cols = 80;
|
||||
if ( si->m_xml ) sb.safePrintf("\t\t<title><![CDATA[");
|
||||
SafeBuf hb;
|
||||
if ( str && strLen && si->m_doQueryHighlighting ) {
|
||||
hlen = hi.set ( tt ,
|
||||
ttend - tt ,
|
||||
hlen = hi.set ( &hb,
|
||||
//tt ,
|
||||
//ttend - tt ,
|
||||
str,
|
||||
strLen ,
|
||||
mr->m_language, // docLangId
|
||||
@ -1897,7 +1903,10 @@ static int printResult ( SafeBuf &sb,
|
||||
0,
|
||||
0 ); // niceness
|
||||
//if (!sb.utf8Encode2(tt, hlen)) return false;
|
||||
if ( ! sb.brify ( tt,hlen,0,cols) ) return false;
|
||||
if ( ! sb.brify ( hb.getBufStart(),
|
||||
hb.getLength(),
|
||||
0,
|
||||
cols) ) return false;
|
||||
}
|
||||
else if ( str && strLen ) {
|
||||
// determine if TiTle wraps, if it does add a <br> count for
|
||||
|
15
SafeBuf.cpp
15
SafeBuf.cpp
@ -2514,6 +2514,9 @@ bool SafeBuf::decodeJSON ( long niceness ) {
|
||||
|
||||
// . this should support the case when the src and dst buffers are the same!
|
||||
// so decodeJSONToUtf8() function below will work
|
||||
// . this is used by xmldoc.cpp to PARTIALLY decode a json buf so we do not
|
||||
// index letters in escapes like \n \r \f \t \uxxxx \\ \/
|
||||
// . SO we do keep \"
|
||||
bool SafeBuf::safeDecodeJSONToUtf8 ( char *json, long jsonLen, long niceness) {
|
||||
|
||||
// how much space to reserve for the copy?
|
||||
@ -2687,11 +2690,13 @@ bool SafeBuf::safeStrcpyPrettyJSON ( char *decodedJson ) {
|
||||
*dst++ = '\\';
|
||||
continue;
|
||||
}
|
||||
//if ( *src == '\/' ) {
|
||||
// *dst++ = '\\';
|
||||
// *dst++ = '/';
|
||||
// continue;
|
||||
//}
|
||||
// mdw: why was this commented out?
|
||||
// we converted '\/' above to a single / so we must undo here
|
||||
if ( *src == '/' ) {
|
||||
*dst++ = '\\';
|
||||
*dst++ = '/';
|
||||
continue;
|
||||
}
|
||||
|
||||
*dst++ = *src;
|
||||
|
||||
|
11
XmlDoc.cpp
11
XmlDoc.cpp
@ -26441,11 +26441,13 @@ char *XmlDoc::getHighlightedSummary ( ) {
|
||||
|
||||
if ( ! m_langIdValid ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
char tt[5000];
|
||||
//char tt[5000];
|
||||
Highlight hi;
|
||||
SafeBuf hb;
|
||||
// highlight the query in it
|
||||
long hlen = hi.set ( tt ,
|
||||
4999 ,
|
||||
hi.set ( &hb,
|
||||
//tt ,
|
||||
//4999 ,
|
||||
sum,
|
||||
sumLen,
|
||||
m_langId,
|
||||
@ -26459,8 +26461,9 @@ char *XmlDoc::getHighlightedSummary ( ) {
|
||||
m_niceness );
|
||||
|
||||
// store into our safebuf then
|
||||
m_finalSummaryBuf.safeMemcpy ( tt , hlen + 1 );
|
||||
m_finalSummaryBuf.safeMemcpy ( &hb );//tt , hlen + 1 );
|
||||
m_finalSummaryBufValid = true;
|
||||
m_finalSummaryBuf.nullTerm();
|
||||
|
||||
char *fsum = m_finalSummaryBuf.getBufStart();
|
||||
if ( ! fsum ) fsum = (char *)0x01;
|
||||
|
12
hosts.conf
12
hosts.conf
@ -2,12 +2,14 @@
|
||||
# Tells us what hosts are participating in the distributed search engine.
|
||||
|
||||
|
||||
# This is how many pieces you want the index split into.
|
||||
# So if you have 64 machines, and you want a unique piece of index on
|
||||
# each machine, then make this 64. But if you have 64 machines and you
|
||||
# want one level of redundancy then make this 32.
|
||||
# How many mirrors do you want? If this is 0 then your data
|
||||
# will NOT be replicated. If it is 1 then each host listed
|
||||
# below will have one host that mirrors it, thereby decreasing
|
||||
# total index capacity, but increasing redundancy. If this is
|
||||
# 1 then the first half of hosts will be replicated by the
|
||||
# second half of the hosts listed below.
|
||||
|
||||
index-splits: 1
|
||||
num-mirrors: 0
|
||||
|
||||
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user