forked from Mirrors/privacore-open-source-search-engine
Merge branch 'ia-zak' of https://github.com/gigablast/open-source-search-engine into ia-zak
This commit is contained in:
2
Conf.h
2
Conf.h
@ -292,6 +292,7 @@ class Conf {
|
||||
//bool m_refreshFacebookUsersEnabled;
|
||||
bool m_injectionsEnabled ;
|
||||
bool m_queryingEnabled ;
|
||||
bool m_returnResultsAnyway;
|
||||
// qa testing loop going on? uses "test" subdir
|
||||
bool m_testParserEnabled ;
|
||||
bool m_testSpiderEnabled ;
|
||||
@ -574,6 +575,7 @@ class Conf {
|
||||
int64_t m_tagdbFileCacheSize;
|
||||
int64_t m_clusterdbFileCacheSize;
|
||||
int64_t m_titledbFileCacheSize;
|
||||
int64_t m_spiderdbFileCacheSize;
|
||||
|
||||
//bool m_quickpollCoreOnError;
|
||||
bool m_useShotgun;
|
||||
|
@ -195,6 +195,7 @@ case EADMININTERFERENCE: return "Adminstrative interference";
|
||||
case EDNSERROR : return "DNS lookup error";
|
||||
case ETHREADSDISABLED:return "Threads Disabled";
|
||||
case EMALFORMEDQUERY: return "Malformed query";
|
||||
case ESHARDDOWN: return "One or more shards are down";
|
||||
}
|
||||
// if the remote error bit is clear it must be a regulare errno
|
||||
//if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum );
|
||||
|
3
Errno.h
3
Errno.h
@ -199,6 +199,7 @@ enum {
|
||||
EADMININTERFERENCE,
|
||||
EDNSERROR ,
|
||||
ETHREADSDISABLED,
|
||||
EMALFORMEDQUERY
|
||||
EMALFORMEDQUERY,
|
||||
ESHARDDOWN
|
||||
};
|
||||
#endif
|
||||
|
9
Msg3.cpp
9
Msg3.cpp
@ -49,7 +49,7 @@ key192_t makeCacheKey ( int64_t vfd ,
|
||||
return k;
|
||||
}
|
||||
|
||||
RdbCache g_rdbCaches[4];
|
||||
RdbCache g_rdbCaches[5];
|
||||
|
||||
class RdbCache *getDiskPageCache ( char rdbId ) {
|
||||
|
||||
@ -86,6 +86,13 @@ class RdbCache *getDiskPageCache ( char rdbId ) {
|
||||
maxRecs = maxMem / 3000;
|
||||
dbname = "titdbcache";
|
||||
}
|
||||
if ( rdbId == RDB_SPIDERDB ) {
|
||||
rpc = &g_rdbCaches[4];
|
||||
maxSizePtr = &g_conf.m_spiderdbFileCacheSize;
|
||||
maxMem = *maxSizePtr;
|
||||
maxRecs = maxMem / 3000;
|
||||
dbname = "spdbcache";
|
||||
}
|
||||
|
||||
if ( ! rpc )
|
||||
return NULL;
|
||||
|
@ -581,11 +581,29 @@ bool sendHttpReply ( void *state ) {
|
||||
//
|
||||
////////////
|
||||
|
||||
XmlDoc *s_injectHead = NULL;
|
||||
XmlDoc *s_injectTail = NULL;
|
||||
|
||||
XmlDoc *getInjectHead ( ) { return s_injectHead; }
|
||||
|
||||
// send back a reply to the originator of the msg7 injection request
|
||||
void sendUdpReply7 ( void *state ) {
|
||||
|
||||
XmlDoc *xd = (XmlDoc *)state;
|
||||
|
||||
// remove from linked list
|
||||
if ( xd->m_nextInject )
|
||||
xd->m_nextInject->m_prevInject = xd->m_prevInject;
|
||||
if ( xd->m_prevInject )
|
||||
xd->m_prevInject->m_nextInject = xd->m_nextInject;
|
||||
if ( s_injectHead == xd )
|
||||
s_injectHead = xd->m_nextInject;
|
||||
if ( s_injectTail == xd )
|
||||
s_injectTail = xd->m_prevInject;
|
||||
xd->m_nextInject = NULL;
|
||||
xd->m_prevInject = NULL;
|
||||
|
||||
|
||||
UdpSlot *slot = xd->m_injectionSlot;
|
||||
|
||||
uint32_t statColor = 0xccffcc;
|
||||
@ -654,6 +672,19 @@ void handleRequest7 ( UdpSlot *slot , int32_t netnice ) {
|
||||
xd->m_injectionSlot = slot;
|
||||
xd->m_injectStartTime = gettimeofdayInMilliseconds();
|
||||
|
||||
// add to linked list
|
||||
xd->m_nextInject = NULL;
|
||||
xd->m_prevInject = NULL;
|
||||
if ( s_injectTail ) {
|
||||
s_injectTail->m_nextInject = xd;
|
||||
xd->m_prevInject = s_injectTail;
|
||||
s_injectTail = xd;
|
||||
}
|
||||
else {
|
||||
s_injectHead = xd;
|
||||
s_injectTail = xd;
|
||||
}
|
||||
|
||||
if ( ! xd->injectDoc ( ir->ptr_url , // m_injectUrlBuf.getBufStart() ,
|
||||
cr ,
|
||||
ir->ptr_content , // start , // content ,
|
||||
|
@ -1,6 +1,10 @@
|
||||
#ifndef GBINJECT_H
|
||||
#define GBINJECT_H
|
||||
|
||||
// for getting list of injections currently being processed on this host
|
||||
// for printing in the Spider Queue table in Spider.cpp
|
||||
class XmlDoc *getInjectHead ( ) ;
|
||||
|
||||
void handleRequest7Import ( class UdpSlot *slot , int32_t netnice ) ;
|
||||
|
||||
void handleRequest7 ( class UdpSlot *slot , int32_t netnice ) ;
|
||||
|
@ -1183,6 +1183,23 @@ bool gotResults ( void *state ) {
|
||||
st->m_socket->m_totalSent == 0 )
|
||||
return sendReply(st,NULL);
|
||||
|
||||
|
||||
// if we skipped a shard because it was dead, usually we provide
|
||||
// the results anyway, but if this switch is true then return an
|
||||
// error code instead. this is the 'all or nothing' switch.
|
||||
if ( msg40->m_msg3a.m_skippedShards > 0 &&
|
||||
! g_conf.m_returnResultsAnyway ) {
|
||||
char reply[256];
|
||||
sprintf ( reply ,
|
||||
"%"INT32" shard(s) out of %"INT32" did not "
|
||||
"respond to query."
|
||||
, msg40->m_msg3a.m_skippedShards
|
||||
, g_hostdb.m_numShards );
|
||||
g_errno = ESHARDDOWN;
|
||||
return sendReply(st,reply);
|
||||
}
|
||||
|
||||
|
||||
// if already printed from Msg40.cpp, bail out now
|
||||
if ( si->m_streamResults ) {
|
||||
// this will be our final send
|
||||
|
30
Parms.cpp
30
Parms.cpp
@ -9939,6 +9939,21 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "return results even if a shard is down";
|
||||
m->m_desc = "If you turn this off then Gigablast will return "
|
||||
"an error message if a shard was down and did not return "
|
||||
"results for a query. The XML and JSON feed let's you know "
|
||||
"when a shard is down and will give you the results back "
|
||||
"any way, but if you would rather have just and error message "
|
||||
"and no results, then set then set this to 'NO'.";
|
||||
m->m_cgi = "rra";
|
||||
m->m_off = (char *)&g_conf.m_returnResultsAnyway - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "max mem";
|
||||
m->m_desc = "Mem available to this process. May be exceeded due "
|
||||
"to fragmentation.";
|
||||
@ -11515,6 +11530,21 @@ void Parms::init ( ) {
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
m->m_title = "spiderdb disk cache size";
|
||||
m->m_desc = "How much file cache size to use in bytes? Titledb "
|
||||
"holds the cached web pages, compressed. Gigablast consults "
|
||||
"it to generate a summary for a search result, or to see if "
|
||||
"a url Gigablast is spidering is already in the index.";
|
||||
m->m_cgi = "dpcsy";
|
||||
m->m_off = (char *)&g_conf.m_spiderdbFileCacheSize - g;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_def = "30000000";
|
||||
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
|
||||
|
||||
/*
|
||||
|
18
Spider.cpp
18
Spider.cpp
@ -9501,6 +9501,24 @@ bool sendPage ( State11 *st ) {
|
||||
// inc count
|
||||
j++;
|
||||
}
|
||||
// now print the injections as well!
|
||||
XmlDoc *xd = getInjectHead ( ) ;
|
||||
for ( ; xd ; xd = xd->m_nextInject ) {
|
||||
// how does this happen?
|
||||
if ( ! xd->m_sreqValid ) continue;
|
||||
// grab it
|
||||
SpiderRequest *oldsr = &xd->m_sreq;
|
||||
// get status
|
||||
SafeBuf xb;
|
||||
xb.safePrintf("[<font color=red><b>injecting</b></font>] %s",
|
||||
xd->m_statusMsg);
|
||||
char *status = xb.getBufStart();
|
||||
// show that
|
||||
if ( ! oldsr->printToTable ( &sb , status,xd,j) ) return false;
|
||||
// inc count
|
||||
j++;
|
||||
}
|
||||
|
||||
// end the table
|
||||
sb.safePrintf ( "</table>\n" );
|
||||
sb.safePrintf ( "<br>\n" );
|
||||
|
3
XmlDoc.h
3
XmlDoc.h
@ -1011,6 +1011,9 @@ class XmlDoc {
|
||||
int64_t m_startTime;
|
||||
int64_t m_injectStartTime;
|
||||
|
||||
class XmlDoc *m_prevInject;
|
||||
class XmlDoc *m_nextInject;
|
||||
|
||||
// when set() was called by Msg20.cpp so we can time how long it took
|
||||
// to generate the summary
|
||||
int64_t m_setTime;
|
||||
|
Reference in New Issue
Block a user