mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-19 03:14:38 -04:00
Merge branch 'master' into dev-language
This commit is contained in:
Collectiondb.cppConf.cppConf.hDailyMerge.cppDoledb.cppHttpServer.cppInstanceInfoExchange.cppLoop.cppLoop.hMatches.cppMsg13.cppMsg20.cppMsg22.cppMsg25.cppMsg3.cppMsg4Out.cppMsgC.cppMulticast.cppPageInject.cppParms.cppPosdbTable.cppPosdbTable.hProcess.cppProcess.hQuery.cppQuery.hRdb.cppRdbBase.cppRdbBase.hRdbBuckets.cppRdbBuckets.hRdbDump.cppRdbIndex.cppRdbIndex.hRdbIndexQuery.cppRdbIndexQuery.hRdbList.cppRdbList.hRdbMap.cppRdbMerge.cppRdbMerge.hRdbTree.cppRdbTree.hRebalance.cppRepair.cppSearchInput.cppSpiderLoop.cppSpiderProxy.cppStatistics.cppTagdb.cppTcpServer.cppUdpServer.cppUdpServer.hUdpSlot.cppUdpSlot.hUrlBlockList.cppXmlDoc.cppXmlDoc_Indexing.cppmain.cpp
tools
@ -1470,7 +1470,7 @@ bool CollectionRec::rebuildPrivacoreRules () {
|
||||
const char *langWhitelistStr = "xx,en,bg,sr,ca,cs,da,et,fi,fr,de,el,hu,is,ga,it,la,lv,lt,lb,nl,pl,pt,ro,es,sv,no,vv";
|
||||
|
||||
// max spiders per ip
|
||||
int32_t ipms = 7;
|
||||
int32_t ipms = 1;
|
||||
|
||||
int32_t n = 0;
|
||||
|
||||
@ -2078,8 +2078,9 @@ bool CollectionRec::save ( ) {
|
||||
|
||||
//File f;
|
||||
char tmp[1024];
|
||||
|
||||
snprintf(tmp, 1023, "%scoll.%s.%" PRId32"/coll.conf", g_hostdb.m_dir, m_coll, (int32_t)m_collnum);
|
||||
|
||||
log(LOG_INFO, "coll: Saving %s", tmp);
|
||||
if (!g_parms.saveToXml((char *)this, tmp, OBJ_COLL)) {
|
||||
// we didn't save successfully
|
||||
m_needsSave = true;
|
||||
|
7
Conf.cpp
7
Conf.cpp
@ -89,6 +89,7 @@ Conf::Conf ( ) {
|
||||
m_vagusPort = 8720;
|
||||
m_vagusKeepaliveSendInterval = 500;
|
||||
m_vagusKeepaliveLifetime = 5000;
|
||||
m_vagusMaxDeadTime = 5;
|
||||
m_maxDocsWanted = 0;
|
||||
m_maxFirstResultNum = 0;
|
||||
min_docid_splits = 0;
|
||||
@ -168,6 +169,7 @@ Conf::Conf ( ) {
|
||||
m_profilingEnabled = false;
|
||||
m_logHttpRequests = false;
|
||||
m_logAutobannedQueries = false;
|
||||
m_logLoopTimeThreshold = 500;
|
||||
m_logQueryTimeThreshold = 0;
|
||||
m_logDiskReadTimeThreshold = 0;
|
||||
m_logQueryReply = false;
|
||||
@ -216,6 +218,7 @@ Conf::Conf ( ) {
|
||||
m_logDebugUdp = false;
|
||||
m_logDebugUnicode = false;
|
||||
m_logDebugUrlAttempts = false;
|
||||
m_logDebugVagus = false;
|
||||
m_logTraceBigFile = false;
|
||||
m_logTraceDns = false;
|
||||
m_logTraceFile = false;
|
||||
@ -260,7 +263,7 @@ Conf::Conf ( ) {
|
||||
m_useTmpCluster = false;
|
||||
m_allowScale = true;
|
||||
m_bypassValidation = false;
|
||||
m_maxCallbackDelay = 0;
|
||||
m_maxCallbackDelay = -1;
|
||||
m_repairingEnabled = false;
|
||||
m_maxRepairinjections = 0;
|
||||
m_repairMem = 0;
|
||||
@ -509,6 +512,8 @@ bool Conf::save ( ) {
|
||||
// fix so if we core in malloc/free we can still save conf
|
||||
StackBuf<1024> fn;
|
||||
fn.safePrintf("%sgb.conf",g_hostdb.m_dir);
|
||||
log(LOG_INFO, "db: Saving %s", fn.getBufStart());
|
||||
|
||||
bool status = g_parms.saveToXml ( (char *)this , fn.getBufStart(), OBJ_CONF );
|
||||
|
||||
return status;
|
||||
|
4
Conf.h
4
Conf.h
@ -157,6 +157,7 @@ class Conf {
|
||||
int32_t m_vagusPort;
|
||||
int32_t m_vagusKeepaliveSendInterval; //milliseconds
|
||||
int32_t m_vagusKeepaliveLifetime; //milliseconds
|
||||
int32_t m_vagusMaxDeadTime; //minutes
|
||||
|
||||
int32_t m_maxDocsWanted; //maximum number of results in one go. Puts a limit on SearchInput::m_docsWanted
|
||||
int32_t m_maxFirstResultNum; //maximum document offset / result-page. Puts a limit on SearchInput::m_firstResultNum
|
||||
@ -304,6 +305,8 @@ class Conf {
|
||||
bool m_logHttpRequests;
|
||||
bool m_logAutobannedQueries;
|
||||
|
||||
int32_t m_logLoopTimeThreshold;
|
||||
|
||||
// if query took this or more milliseconds, log its time
|
||||
int32_t m_logQueryTimeThreshold;
|
||||
// if disk read took this or more milliseconds, log its time
|
||||
@ -361,6 +364,7 @@ class Conf {
|
||||
bool m_logDebugUdp;
|
||||
bool m_logDebugUnicode;
|
||||
bool m_logDebugUrlAttempts;
|
||||
bool m_logDebugVagus;
|
||||
|
||||
bool m_logTraceBigFile;
|
||||
bool m_logTraceDns;
|
||||
|
@ -25,7 +25,7 @@ bool DailyMerge::init ( ) {
|
||||
m_mergeMode = 0;
|
||||
m_didDaily = false;
|
||||
// check every 10 seconds
|
||||
if( ! g_loop.registerSleepCallback(10*1000,NULL,dailyMergeWrapper ) ){
|
||||
if (!g_loop.registerSleepCallback(10 * 1000, NULL, dailyMergeWrapper, "DailyMerge::dailyMergeWrapper")) {
|
||||
log( LOG_WARN, "repair: Failed register callback.");
|
||||
return false;
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ void nukeDoledb ( collnum_t collnum ) {
|
||||
// . it will unlink the files and maps for doledb for this collnum
|
||||
// . it will remove all recs of this collnum from its tree too
|
||||
if (g_doledb.getRdb()->isSavingTree()) {
|
||||
g_loop.registerSleepCallback(100, &collnum, nukeDoledbWrapper);
|
||||
g_loop.registerSleepCallback(100, &collnum, nukeDoledbWrapper, "Doledb::nukeDoledbWrapper");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1600,17 +1600,13 @@ static int32_t getMsgPiece ( TcpSocket *s ) {
|
||||
// . this means we block, and cleanUp should be called to unregister it
|
||||
// . this also makes f non-blocking
|
||||
if (tcp->m_useSSL) {
|
||||
if ( g_loop.registerReadCallback ( f->getfd(),
|
||||
(void *)(PTRTYPE)(s->m_sd),
|
||||
getSSLMsgPieceWrapper ,
|
||||
s->m_niceness ) )
|
||||
if (g_loop.registerReadCallback(f->getfd(), (void *)(PTRTYPE)(s->m_sd), getSSLMsgPieceWrapper,
|
||||
"HttpServer::getSSLMsgPieceWrapper", s->m_niceness))
|
||||
return n;
|
||||
}
|
||||
else {
|
||||
if ( g_loop.registerReadCallback ( f->getfd(),
|
||||
(void *)(PTRTYPE)(s->m_sd),
|
||||
getMsgPieceWrapper ,
|
||||
s->m_niceness ) )
|
||||
if (g_loop.registerReadCallback(f->getfd(), (void *)(PTRTYPE)(s->m_sd), getMsgPieceWrapper,
|
||||
"HttpServer::getMsgPieceWrapper", s->m_niceness))
|
||||
return n;
|
||||
}
|
||||
// . TODO: deal with this better
|
||||
|
@ -71,25 +71,48 @@ static int connect_to_vagus(int port) {
|
||||
sin.sin_port = htons(port);
|
||||
if(connect(fd,(sockaddr*)(void*)&sin,sizeof(sin))!=0) {
|
||||
log(LOG_ERROR,"vagus: connect() failed with errno=%d (%s)", errno, strerror(errno));
|
||||
close(fd);
|
||||
close(fd);
|
||||
return -1;
|
||||
}
|
||||
|
||||
log(LOG_DEBUG,"vagus: Connected to Vagus on fd %d", fd);
|
||||
log(LOG_INFO, "vagus: Connected to Vagus on fd %d", fd);
|
||||
return fd;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void process_alive_hosts(std::map<int,std::string> &alive_hosts) {
|
||||
//log(LOG_DEBUG,"vagus: got %zu alive hosts form vagus. hosts.conf says there should be %d",
|
||||
// alive_hosts.size(), g_hostdb.getNumHosts());
|
||||
if(alive_hosts.size() != (unsigned)g_hostdb.getNumHosts()) {
|
||||
log(LOG_WARN, "vagus: got %zu alive hosts instead of %d", alive_hosts.size(), g_hostdb.getNumHosts());
|
||||
|
||||
char hosts[g_hostdb.getNumHosts()];
|
||||
memset(hosts, '.', sizeof(hosts));
|
||||
|
||||
for (const auto &iter : alive_hosts) {
|
||||
hosts[iter.first] = '+';
|
||||
}
|
||||
log(LOG_WARN, "vagus: %.*s", g_hostdb.getNumHosts(), hosts);
|
||||
}
|
||||
|
||||
time_t now = time(NULL);
|
||||
static time_t s_ownLastAliveTime = now;
|
||||
|
||||
if ((now - s_ownLastAliveTime) > (g_conf.m_vagusMaxDeadTime * 60)) {
|
||||
log(LOG_ERROR, "vagus: We have not seen ourself alive for the past %d minutes. Aborting", g_conf.m_vagusMaxDeadTime);
|
||||
gbshutdownResourceError();
|
||||
}
|
||||
|
||||
std::vector<int> alive_hosts_ids;
|
||||
alive_hosts_ids.reserve(alive_hosts.size());
|
||||
for(auto iter : alive_hosts) {
|
||||
int hostid = iter.first;
|
||||
if(hostid<0 || hostid>=g_hostdb.getNumHosts())
|
||||
continue;
|
||||
|
||||
if (hostid == g_hostdb.getMyHostId()) {
|
||||
s_ownLastAliveTime = now;
|
||||
}
|
||||
|
||||
alive_hosts_ids.push_back(hostid);
|
||||
char extra_information[256];
|
||||
if(iter.second.length()>=sizeof(extra_information))
|
||||
@ -205,6 +228,7 @@ static bool do_vagus_poll(int fd) {
|
||||
|
||||
|
||||
static void *poll_thread(void *) {
|
||||
pthread_setname_np(pthread_self(),"vaguspoll");
|
||||
struct pollfd pfd[2];
|
||||
memset(pfd,0,sizeof(pfd));
|
||||
pfd[0].fd = fd_pipe[0];
|
||||
@ -225,7 +249,7 @@ static void *poll_thread(void *) {
|
||||
//unexpected input or lost connection.
|
||||
(void)::close(pfd[1].fd);
|
||||
pfd[1].fd = -1;
|
||||
log(LOG_DEBUG,"vagus: lost connection to Vagus");
|
||||
log(LOG_INFO,"vagus: lost connection to Vagus");
|
||||
}
|
||||
|
||||
if(pfd[1].fd<0)
|
||||
@ -263,7 +287,7 @@ bool InstanceInfoExchange::initialize() {
|
||||
struct passwd pwd, *pwdptr;
|
||||
if(getpwuid_r(geteuid(), &pwd,buf,sizeof(buf),&pwdptr)==0) {
|
||||
sprintf(vagus_cluster_name, "gb-%s", pwd.pw_name);
|
||||
log(LOG_DEBUG,"Using vagus cluster id '%s'",vagus_cluster_name);
|
||||
log(LOG_INFO,"Using vagus cluster id '%s'",vagus_cluster_name);
|
||||
} else {
|
||||
log(LOG_ERROR,"getpwuid(geteuid()...) failed with errno=%d (%s)", errno,strerror(errno));
|
||||
return false;
|
||||
@ -329,8 +353,8 @@ void InstanceInfoExchange::weAreAlive() {
|
||||
g_hostdb.getMyHostId(),
|
||||
g_conf.m_vagusKeepaliveLifetime,
|
||||
extra_information);
|
||||
|
||||
//log(LOG_DEBUG,"vagus: command='%s'",command);
|
||||
|
||||
//logDebug(g_conf.m_logDebugVagus, "vagus: command='%s'",command);
|
||||
size_t bytes_to_write = strlen(command);
|
||||
ssize_t bytes_written = ::write(fd_keepalive, command, bytes_to_write);
|
||||
if((size_t)bytes_written != bytes_to_write) {
|
||||
@ -338,7 +362,7 @@ void InstanceInfoExchange::weAreAlive() {
|
||||
::close(fd_keepalive); fd_keepalive = -1;
|
||||
return;
|
||||
}
|
||||
//log(LOG_TRACE,"vagus: sent keepalive to Vagus");
|
||||
logDebug(g_conf.m_logDebugVagus, "vagus: sent keepalive to Vagus");
|
||||
|
||||
char ignored_response[10];
|
||||
(void)read(fd_keepalive,ignored_response,sizeof(ignored_response));
|
||||
|
58
Loop.cpp
58
Loop.cpp
@ -50,12 +50,18 @@ class Slot {
|
||||
void (* m_callback)(int fd, void *state);
|
||||
// the next Slot thats registerd on this fd
|
||||
Slot *m_next;
|
||||
|
||||
// save niceness level for doPoll() to segregate
|
||||
int32_t m_niceness;
|
||||
|
||||
// this callback should be called every X milliseconds
|
||||
int32_t m_tick;
|
||||
|
||||
// when we were last called in ms time (only valid for sleep callbacks)
|
||||
int64_t m_lastCall;
|
||||
|
||||
const char *m_description;
|
||||
|
||||
// linked list of available slots
|
||||
Slot *m_nextAvail;
|
||||
};
|
||||
@ -180,9 +186,10 @@ void Loop::unregisterCallback(Slot **slots, int fd, void *state, void (* callbac
|
||||
return;
|
||||
}
|
||||
|
||||
bool Loop::registerReadCallback ( int fd, void *state, void (* callback)(int fd,void *state ) , int32_t niceness ) {
|
||||
bool Loop::registerReadCallback(int fd, void *state, void (*callback)(int fd, void *state),
|
||||
const char *description, int32_t niceness) {
|
||||
// the "true" answers the question "for reading?"
|
||||
if ( addSlot ( true, fd, state, callback, niceness ) ) {
|
||||
if (addSlot(true, fd, state, callback, niceness, description)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -191,9 +198,10 @@ bool Loop::registerReadCallback ( int fd, void *state, void (* callback)(int fd
|
||||
}
|
||||
|
||||
|
||||
bool Loop::registerWriteCallback ( int fd, void *state, void (* callback)(int fd, void *state ) , int32_t niceness ) {
|
||||
bool Loop::registerWriteCallback(int fd, void *state, void (*callback)(int fd, void *state),
|
||||
const char *description, int32_t niceness) {
|
||||
// the "false" answers the question "for reading?"
|
||||
if ( addSlot ( false, fd, state, callback, niceness ) ) {
|
||||
if (addSlot(false, fd, state, callback, niceness, description)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -202,9 +210,9 @@ bool Loop::registerWriteCallback ( int fd, void *state, void (* callback)(int fd
|
||||
}
|
||||
|
||||
// tick is in milliseconds
|
||||
bool Loop::registerSleepCallback ( int32_t tick, void *state, void (* callback)(int fd,void *state ),
|
||||
int32_t niceness, bool immediate ) {
|
||||
if ( ! addSlot ( true, MAX_NUM_FDS, state, callback, niceness, tick, immediate ) ) {
|
||||
bool Loop::registerSleepCallback(int32_t tick, void *state, void (*callback)(int fd, void *state),
|
||||
const char *description, int32_t niceness, bool immediate) {
|
||||
if (!addSlot(true, MAX_NUM_FDS, state, callback, niceness, description, tick, immediate)) {
|
||||
log( LOG_WARN, "loop: Unable to register sleep callback" );
|
||||
return false;
|
||||
}
|
||||
@ -218,8 +226,8 @@ bool Loop::registerSleepCallback ( int32_t tick, void *state, void (* callback)(
|
||||
}
|
||||
|
||||
// . returns false and sets g_errno on error
|
||||
bool Loop::addSlot ( bool forReading , int fd, void *state, void (* callback)(int fd, void *state),
|
||||
int32_t niceness , int32_t tick, bool immediate ) {
|
||||
bool Loop::addSlot(bool forReading, int fd, void *state, void (*callback)(int fd, void *state),
|
||||
int32_t niceness, const char *description, int32_t tick, bool immediate) {
|
||||
// ensure fd is >= 0
|
||||
if ( fd < 0 ) {
|
||||
g_errno = EBADENGINEER;
|
||||
@ -277,11 +285,6 @@ bool Loop::addSlot ( bool forReading , int fd, void *state, void (* callback)(in
|
||||
s_readFds[s_numReadFds++] = fd;
|
||||
FD_SET ( fd,&s_selectMaskRead );
|
||||
}
|
||||
// fd == MAX_NUM_FDS if it's a sleep callback
|
||||
//if ( fd < MAX_NUM_FDS ) {
|
||||
//FD_SET ( fd , &m_readfds );
|
||||
//FD_SET ( fd , &m_exceptfds );
|
||||
//}
|
||||
}
|
||||
else {
|
||||
next = m_writeSlots [ fd ];
|
||||
@ -301,6 +304,7 @@ bool Loop::addSlot ( bool forReading , int fd, void *state, void (* callback)(in
|
||||
// set our callback and state
|
||||
s->m_callback = callback;
|
||||
s->m_state = state;
|
||||
s->m_description = description;
|
||||
|
||||
// point to the guy that was registered for fd before us
|
||||
s->m_next = next;
|
||||
@ -314,9 +318,6 @@ bool Loop::addSlot ( bool forReading , int fd, void *state, void (* callback)(in
|
||||
// the last called time
|
||||
s->m_lastCall = immediate ? 0 : gettimeofdayInMilliseconds();
|
||||
|
||||
// debug msg
|
||||
//log("Loop::registered fd=%i state=%" PRIu32,fd,state);
|
||||
|
||||
// if fd == MAX_NUM_FDS if it's a sleep callback
|
||||
if ( fd == MAX_NUM_FDS ) {
|
||||
return true;
|
||||
@ -409,19 +410,30 @@ void Loop::callCallbacks_ass ( bool forReading , int fd , int64_t now , int32_t
|
||||
// NOTE: callback can unregister fd for Slot s, so get next
|
||||
m_callbacksNext = s->m_next;
|
||||
|
||||
logDebug( g_conf.m_logDebugLoop, "loop: enter fd callback fd=%d nice=%" PRId32, fd, s->m_niceness );
|
||||
logDebug(g_conf.m_logDebugLoop, "loop: enter fd callback '%s' fd=%d nice=%" PRId32,
|
||||
s->m_description, fd, s->m_niceness);
|
||||
|
||||
// sanity check. -1 no longer supported
|
||||
if ( s->m_niceness < 0 ) {
|
||||
if (s->m_niceness < 0) {
|
||||
g_process.shutdownAbort(true);
|
||||
}
|
||||
|
||||
int64_t took = 0;
|
||||
|
||||
m_slotMutex.unlock();
|
||||
s->m_callback ( fd , s->m_state );
|
||||
{
|
||||
int64_t start = gettimeofdayInMilliseconds();
|
||||
s->m_callback(fd, s->m_state);
|
||||
took = gettimeofdayInMilliseconds() - start;
|
||||
}
|
||||
m_slotMutex.lock();
|
||||
|
||||
logDebug( g_conf.m_logDebugLoop, "loop: exit fd callback fd=%" PRId32" nice=%" PRId32,
|
||||
(int32_t)fd,(int32_t)s->m_niceness );
|
||||
if (took > g_conf.m_logLoopTimeThreshold) {
|
||||
log(LOG_WARN, "loop: %s took %" PRId64"ms", s->m_description, took);
|
||||
}
|
||||
|
||||
logDebug(g_conf.m_logDebugLoop, "loop: exit fd callback '%s' fd=%d nice=%" PRId32,
|
||||
s->m_description, fd, s->m_niceness);
|
||||
|
||||
// inc the flag
|
||||
numCalled++;
|
||||
@ -696,7 +708,7 @@ void Loop::doPoll ( ) {
|
||||
}
|
||||
|
||||
if(m_lastKeepaliveTimestamp + g_conf.m_vagusKeepaliveSendInterval <= gettimeofdayInMilliseconds()) {
|
||||
m_lastKeepaliveTimestamp = gettimeofdayInMilliseconds() + g_conf.m_vagusKeepaliveSendInterval;
|
||||
m_lastKeepaliveTimestamp = gettimeofdayInMilliseconds();
|
||||
InstanceInfoExchange::weAreAlive();
|
||||
}
|
||||
|
||||
|
23
Loop.h
23
Loop.h
@ -60,23 +60,19 @@ class Loop {
|
||||
// . register this "fd" with "callback"
|
||||
// . "callback" will be called when fd is ready for reading
|
||||
// . "timeout" is -1 if this never timesout
|
||||
bool registerReadCallback ( int fd ,
|
||||
void *state ,
|
||||
void (* callback)(int fd,void *state ) ,
|
||||
int32_t niceness );//= MAX_NICENESS ) ;
|
||||
bool registerReadCallback(int fd, void *state, void (*callback)(int fd, void *state),
|
||||
const char *description, int32_t niceness);
|
||||
|
||||
// . register this "fd" with "callback"
|
||||
// . "callback" will be called when fd is ready for reading
|
||||
// . "callback" will be called when there is an error on fd
|
||||
bool registerWriteCallback ( int fd ,
|
||||
void *state ,
|
||||
void (* callback)(int fd, void *state ) ,
|
||||
int32_t niceness );
|
||||
bool registerWriteCallback(int fd, void *state, void (*callback)(int fd, void *state),
|
||||
const char *description, int32_t niceness);
|
||||
|
||||
// . register this callback to be called every second
|
||||
// . TODO: implement "seconds" parameter
|
||||
bool registerSleepCallback ( int32_t milliseconds, void *state, void (* callback)(int fd,void *state ),
|
||||
int32_t niceness = 1, bool immediate = false );
|
||||
bool registerSleepCallback(int32_t milliseconds, void *state, void (*callback)(int fd, void *state),
|
||||
const char *description, int32_t niceness = 1, bool immediate = false);
|
||||
|
||||
// unregister call back for reading, writing or sleeping
|
||||
void unregisterReadCallback ( int fd, void *state , void (* callback)(int fd,void *state) );
|
||||
@ -110,11 +106,8 @@ class Loop {
|
||||
void (* callback)(int fd,void *state) ,
|
||||
bool forReading );
|
||||
|
||||
bool addSlot ( bool forReading , int fd , void *state , void (* callback)(int fd , void *state ),
|
||||
int32_t niceness , int32_t tick = 0x7fffffff, bool immediate = false ) ;
|
||||
|
||||
// set how long to pause waiting for singals (in milliseconds)
|
||||
void setSigWaitTime ( int32_t ms ) ;
|
||||
bool addSlot(bool forReading, int fd, void *state, void (*callback)(int fd, void *state),
|
||||
int32_t niceness, const char *description, int32_t tick = 0x7fffffff, bool immediate = false);
|
||||
|
||||
// now we use a linked list of pre-allocated slots to avoid a malloc
|
||||
// failure which can cause the merge to dump with "URGENT MERGE FAILED"
|
||||
|
@ -77,9 +77,6 @@ bool Matches::isMatchableTerm(const QueryTerm *qt) const {
|
||||
// if query is too long, a query word can be truncated!
|
||||
// this happens for some words if they are ignored, too!
|
||||
if ( ! qw->m_queryWordTerm && ! qw->m_queryPhraseTerm ) return false;
|
||||
// after a NOT operator?
|
||||
if ( qw->m_underNOT )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -125,7 +125,7 @@ bool Msg13::registerHandler ( ) {
|
||||
if ( ! s_rt.set ( 8 ,sizeof(UdpSlot *),0,NULL,0,true,"wait13tbl") )
|
||||
return false;
|
||||
|
||||
if ( ! g_loop.registerSleepCallback(10, NULL, scanHammerQueue ) ) {
|
||||
if (!g_loop.registerSleepCallback(10, NULL, scanHammerQueue, "Msg13::scanHammerQueue")) {
|
||||
log( "build: Failed to register timer callback for hammer queue." );
|
||||
return false;
|
||||
}
|
||||
|
@ -190,7 +190,7 @@ bool Msg20::getSummary ( Msg20Request *req ) {
|
||||
if(nc==0) {
|
||||
log(LOG_DEBUG, "msg20: no live candidate hosts for shard %d", shardNum);
|
||||
if(g_conf.m_msg20FallbackToAllHosts) {
|
||||
log(LOG_DEBUG,"msg20: No live disired hosts in shard %d - falling back to all hosts in the shard", shardNum);
|
||||
log(LOG_DEBUG,"msg20: No alive desired hosts in shard %d - falling back to all hosts in the shard", shardNum);
|
||||
for(int32_t i = 0; i < allNumHosts; i++) {
|
||||
cand[nc++] = &allHosts[i];
|
||||
}
|
||||
|
@ -583,7 +583,7 @@ void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) {
|
||||
// . ok, return an available docid
|
||||
if ( r->m_url[0] || r->m_justCheckTfndb || r->m_getAvailDocIdOnly ) {
|
||||
// store docid in reply
|
||||
char *p = st->m_slot->m_tmpBuf;
|
||||
char *p = st->m_slot->m_shortSendBuffer;
|
||||
// send back the available docid
|
||||
*(int64_t *)p = st->m_availDocId;
|
||||
// send it
|
||||
|
39
Msg25.cpp
39
Msg25.cpp
@ -176,10 +176,28 @@ static void gotMulticastReplyWrapper25(void *state, void *state2) {
|
||||
int32_t replyMaxSize;
|
||||
bool freeit;
|
||||
char *reply = mcast->getBestReply (&replySize,&replyMaxSize,&freeit);
|
||||
{
|
||||
// validate linkinfo
|
||||
LinkInfo *linkInfo = (LinkInfo *)reply;
|
||||
if (linkInfo->m_version != 0 ||
|
||||
linkInfo->m_lisize < 0 || linkInfo->m_lisize != replySize ||
|
||||
linkInfo->m_numStoredInlinks < 0 || linkInfo->m_numGoodInlinks < 0) {
|
||||
gbshutdownCorrupted();
|
||||
}
|
||||
}
|
||||
|
||||
// . store reply in caller's linkInfoBuf i guess
|
||||
// . mcast should free the reply
|
||||
req->m_linkInfoBuf->safeMemcpy ( reply , replySize );
|
||||
{
|
||||
// validate linkinfo
|
||||
LinkInfo *linkInfo = (LinkInfo *)req->m_linkInfoBuf->getBufStart();
|
||||
if (linkInfo->m_version != 0 ||
|
||||
linkInfo->m_lisize < 0 || linkInfo->m_lisize != req->m_linkInfoBuf->length() ||
|
||||
linkInfo->m_numStoredInlinks < 0 || linkInfo->m_numGoodInlinks < 0) {
|
||||
gbshutdownCorrupted();
|
||||
}
|
||||
}
|
||||
|
||||
// i guess we gotta free this
|
||||
mfree ( reply , replyMaxSize , "rep25" );
|
||||
@ -352,6 +370,17 @@ static void sendReplyWrapper(void *state) {
|
||||
Msg25Request *mr = m25->m_req25;
|
||||
// get udp slot for sending back reply
|
||||
UdpSlot *slot2 = mr->m_udpSlot;
|
||||
|
||||
if (m25->m_linkInfoBuf->length() > 0) {
|
||||
// validate linkinfo
|
||||
LinkInfo *linkInfo = (LinkInfo *)m25->m_linkInfoBuf->getBufStart();
|
||||
if (linkInfo->m_version != 0 ||
|
||||
linkInfo->m_lisize < 0 || linkInfo->m_lisize != m25->m_linkInfoBuf->length() ||
|
||||
linkInfo->m_numStoredInlinks < 0 || linkInfo->m_numGoodInlinks < 0) {
|
||||
gbshutdownCorrupted();
|
||||
}
|
||||
}
|
||||
|
||||
// shortcut
|
||||
SafeBuf *info = m25->m_linkInfoBuf;
|
||||
// steal this buffer
|
||||
@ -383,6 +412,16 @@ static void sendReplyWrapper(void *state) {
|
||||
// just dup the reply for each one
|
||||
char *reply2 = (char *)mdup(reply1,replySize,"m25repd");
|
||||
|
||||
if (reply2) {
|
||||
// validate linkinfo
|
||||
LinkInfo *linkInfo = (LinkInfo *)reply2;
|
||||
if (linkInfo->m_version != 0 ||
|
||||
linkInfo->m_lisize < 0 || linkInfo->m_lisize != m25->m_linkInfoBuf->length() ||
|
||||
linkInfo->m_numStoredInlinks < 0 || linkInfo->m_numGoodInlinks < 0) {
|
||||
gbshutdownCorrupted();
|
||||
}
|
||||
}
|
||||
|
||||
// error?
|
||||
if ( saved || ! reply2 ) {
|
||||
int32_t err = saved;
|
||||
|
2
Msg3.cpp
2
Msg3.cpp
@ -837,7 +837,7 @@ bool Msg3::doneScanning ( ) {
|
||||
}
|
||||
|
||||
// wait
|
||||
if ( g_loop.registerSleepCallback ( wait, this, doneSleepingWrapper3, m_niceness ) ) {
|
||||
if (g_loop.registerSleepCallback(wait, this, doneSleepingWrapper3, "Msg3::doneSleepingWrapper3", m_niceness)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -121,7 +121,7 @@ bool Msg4::initializeOutHandling() {
|
||||
// to speed up spidering so it would harvest outlinks
|
||||
// faster and be able to spider them right away.
|
||||
// . returns false on failure
|
||||
bool rc = g_loop.registerSleepCallback( MSG4_WAIT, NULL, sleepCallback4 );
|
||||
bool rc = g_loop.registerSleepCallback(MSG4_WAIT, NULL, sleepCallback4, "Msg4Out::sleepCallback4");
|
||||
|
||||
logTrace( g_conf.m_logTraceMsg4, "END - returning %s", rc?"true":"false");
|
||||
|
||||
|
6
MsgC.cpp
6
MsgC.cpp
@ -293,10 +293,10 @@ void gotMsgCIpWrapper( void *state, int32_t ip){
|
||||
//to fit the ip address
|
||||
//char reply[12];
|
||||
// don't put it on the stack because sendReply does not copy!
|
||||
char *reply = slot->m_tmpBuf;
|
||||
char *reply = slot->m_shortSendBuffer;
|
||||
int32_t replySize=12;
|
||||
#if TMPBUFSIZE < 12
|
||||
#error Slot::m_tmpBuf must be at least 12 bytes
|
||||
#if SHORTSENDBUFFERSIZE < 12
|
||||
#error Slot::m_shortSendBuffer must be at least 12 bytes
|
||||
#endif
|
||||
// reply=(char*) mmalloc(replySize,"MsgC");
|
||||
char *p = reply;
|
||||
|
@ -229,7 +229,7 @@ void Multicast::sendToWholeGroup() {
|
||||
// continue if we're already registered for sleep callbacks
|
||||
if ( m_registeredSleep ) continue;
|
||||
// otherwise register for sleep callback to try again
|
||||
g_loop.registerSleepCallback( 5000/*ms*/, this, sleepWrapper2, m_niceness );
|
||||
g_loop.registerSleepCallback(5000, this, sleepWrapper2, "Multicast::sleepWrapper2", m_niceness);
|
||||
m_registeredSleep = true;
|
||||
}
|
||||
// if we had an error then we'll be called again in a second
|
||||
@ -324,7 +324,7 @@ void Multicast::gotReply2 ( UdpSlot *slot ) {
|
||||
return;
|
||||
// . otherwise register for sleep callback to try again
|
||||
// . sleepWrapper2() will call sendToWholeGroup() for us
|
||||
g_loop.registerSleepCallback( 5000/*ms*/, this, sleepWrapper2, m_niceness );
|
||||
g_loop.registerSleepCallback(5000/*ms*/, this, sleepWrapper2, "Multicast::sleepWrapper2", m_niceness);
|
||||
m_registeredSleep = true;
|
||||
}
|
||||
|
||||
@ -585,7 +585,7 @@ bool Multicast::sendToHost ( int32_t i ) {
|
||||
if(wait>=0) {
|
||||
// . otherwise register for sleep callback to try again
|
||||
// . sleepCallback1Wrapper() will call sendToHostLoop() for us
|
||||
g_loop.registerSleepCallback(wait/*ms*/, this, sleepCallback1Wrapper, m_niceness );
|
||||
g_loop.registerSleepCallback(wait, this, sleepCallback1Wrapper, "Multicast::sleepCallback1Wrapper", m_niceness);
|
||||
m_registeredSleep = true;
|
||||
}
|
||||
}
|
||||
@ -921,13 +921,6 @@ void Multicast::closeUpShop ( UdpSlot *slot ) {
|
||||
g_errno = slot->getErrno();
|
||||
}
|
||||
|
||||
// . sometimes UdpServer will read the reply into a temporary buffer
|
||||
// . this happens if the udp server is hot (async signal based) and
|
||||
// m_replyBuf is NULL because he cannot malloc a buf to read into
|
||||
// because malloc is not async signal safe
|
||||
if (slot->m_tmpBuf == slot->m_readBuf) {
|
||||
m_freeReadBuf = false;
|
||||
}
|
||||
// don't let UdpServer free the readBuf now that we point to it
|
||||
slot->m_readBuf = NULL;
|
||||
slot->m_readBufSize = 0;
|
||||
|
@ -505,7 +505,7 @@ static void sendUdpReply7(void *state) {
|
||||
}
|
||||
// just send back the 4 byte indexcode, which is 0 on success,
|
||||
// otherwise it is the errno
|
||||
char *tmp = slot->m_tmpBuf;
|
||||
char *tmp = slot->m_shortSendBuffer;
|
||||
char *p = tmp;
|
||||
memcpy ( p , (char *)&indexCode , 4 );
|
||||
p += 4;
|
||||
|
75
Parms.cpp
75
Parms.cpp
@ -5154,6 +5154,18 @@ void Parms::init ( ) {
|
||||
m->m_group = false;
|
||||
m++;
|
||||
|
||||
m->m_title = "Vagus dead detection";
|
||||
m->m_desc = "How long before we abort due to main thread hanging";
|
||||
m->m_cgi = "vagus_max_dead_time";
|
||||
simple_m_set(Conf,m_vagusMaxDeadTime);
|
||||
m->m_smin = 1;
|
||||
m->m_smax = 60;
|
||||
m->m_def = "5";
|
||||
m->m_units = "minutes";
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_group = false;
|
||||
m++;
|
||||
|
||||
m->m_title = "max corrupt index lists";
|
||||
m->m_desc = "If we reach this many corrupt index lists, send "
|
||||
"an admin email. Set to -1 to disable.";
|
||||
@ -8077,6 +8089,16 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
||||
m->m_title = "log loop callback time threshold";
|
||||
m->m_desc = "If a loop callback took this many millliseconds or longer, then log the "
|
||||
"description and the time it took to process.";
|
||||
m->m_cgi = "lltt";
|
||||
simple_m_set(Conf,m_logLoopTimeThreshold);
|
||||
m->m_def = "500";
|
||||
m->m_units = "milliseconds";
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
||||
m->m_title = "log query time threshold";
|
||||
m->m_desc = "If a query took this many millliseconds or longer, then log the "
|
||||
"query and the time it took to process.";
|
||||
@ -8437,6 +8459,13 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
||||
m->m_title = "log debug vagus messages";
|
||||
m->m_cgi = "ldv";
|
||||
simple_m_set(Conf,m_logDebugVagus);
|
||||
m->m_def = "0";
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
||||
////////////////////
|
||||
// log trace
|
||||
////////////////////
|
||||
@ -9897,7 +9926,7 @@ bool Parms::doParmSendingLoop ( ) {
|
||||
|
||||
s_inLoop = true;
|
||||
|
||||
if ( !s_registeredSleep && !g_loop.registerSleepCallback( 2000, NULL, parmLoop, 0 ) ) {
|
||||
if (!s_registeredSleep && !g_loop.registerSleepCallback(2000, NULL, parmLoop, "Parms::parmLoop", 0)) {
|
||||
log( LOG_WARN, "parms: failed to reg parm loop" );
|
||||
}
|
||||
|
||||
@ -10079,7 +10108,7 @@ void Parms::handleRequest3fLoop(void *weArg) {
|
||||
// . try again in 100ms
|
||||
//
|
||||
////////////
|
||||
if( !g_loop.registerSleepCallback( 100, we, handleRequest3fLoop3, 0 ) ){
|
||||
if (!g_loop.registerSleepCallback(100, we, handleRequest3fLoop3, "Parms::handleRequest3fLoop3", 0)) {
|
||||
log( LOG_WARN, "parms: failed to reg sleeper");
|
||||
return;
|
||||
}
|
||||
@ -10288,7 +10317,7 @@ bool Parms::syncParmsWithHost0 ( ) {
|
||||
void Parms::handleRequest3e(UdpSlot *slot, int32_t /*niceness*/) {
|
||||
// right now we must be host #0
|
||||
if ( g_hostdb.m_hostId != 0 ) {
|
||||
hadError:
|
||||
log(LOG_WARN,"parms: got request 0x3f but we are not host #0");
|
||||
g_errno = EBADENGINEER;
|
||||
g_udpServer.sendErrorReply( slot, g_errno );
|
||||
return;
|
||||
@ -10341,8 +10370,11 @@ hadError:
|
||||
c,
|
||||
NULL,
|
||||
-1,
|
||||
"delete"))
|
||||
goto hadError;
|
||||
"delete")) {
|
||||
g_errno = EBADENGINEER;
|
||||
g_udpServer.sendErrorReply( slot, g_errno );
|
||||
return;
|
||||
}
|
||||
// ok, get next collection hash
|
||||
continue;
|
||||
}
|
||||
@ -10350,7 +10382,11 @@ hadError:
|
||||
// get our parmlist for that collnum
|
||||
tmp.reset();
|
||||
// c is -1 for g_conf
|
||||
if ( ! g_parms.addAllParmsToList ( &tmp, c ) ) goto hadError;
|
||||
if ( ! g_parms.addAllParmsToList ( &tmp, c ) ) {
|
||||
g_errno = EBADENGINEER;
|
||||
g_udpServer.sendErrorReply( slot, g_errno );
|
||||
return;
|
||||
}
|
||||
// get checksum of that
|
||||
int64_t m64 = hash64 ( tmp.getBufStart(),tmp.length() );
|
||||
// if match, keep chugging, that's in sync
|
||||
@ -10358,7 +10394,12 @@ hadError:
|
||||
// note in log
|
||||
logf(LOG_INFO,"sync: sending all parms for collnum %" PRId32" to host #%" PRId32, (int32_t)c, hostId);
|
||||
// otherwise, send him the list
|
||||
if ( ! replyBuf.safeMemcpy ( &tmp ) ) goto hadError;
|
||||
if ( ! replyBuf.safeMemcpy ( &tmp ) ) {
|
||||
log(LOG_WARN,"parms: Could not build reply buffer");
|
||||
g_errno = EBADENGINEER;
|
||||
g_udpServer.sendErrorReply( slot, g_errno );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
@ -10380,17 +10421,27 @@ hadError:
|
||||
(collnum_t)i,
|
||||
cr->m_coll, // parm val
|
||||
-1,
|
||||
cmdStr ) )
|
||||
goto hadError;
|
||||
cmdStr ) ) {
|
||||
g_errno = EBADENGINEER;
|
||||
g_udpServer.sendErrorReply( slot, g_errno );
|
||||
return;
|
||||
}
|
||||
// and the parmlist for it
|
||||
if (!g_parms.addAllParmsToList (&replyBuf, i ) ) goto hadError;
|
||||
if (!g_parms.addAllParmsToList (&replyBuf, i ) ) {
|
||||
g_errno = EBADENGINEER;
|
||||
g_udpServer.sendErrorReply( slot, g_errno );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// . final parm is the in sync stamp of approval which will set
|
||||
// g_parms.m_inSyncWithHost0 to true. CommandInSync()
|
||||
// . use -1 for collnum for this cmd
|
||||
if ( ! g_parms.addNewParmToList1 ( &replyBuf,-1,NULL,-1,"insync"))
|
||||
goto hadError;
|
||||
if ( ! g_parms.addNewParmToList1 ( &replyBuf,-1,NULL,-1,"insync")) {
|
||||
g_errno = EBADENGINEER;
|
||||
g_udpServer.sendErrorReply( slot, g_errno );
|
||||
return;
|
||||
}
|
||||
|
||||
// this should at least have the in sync command
|
||||
log("parms: sending %" PRId32" bytes of parms to sync to host #%" PRId32,
|
||||
|
372
PosdbTable.cpp
372
PosdbTable.cpp
@ -19,6 +19,7 @@
|
||||
#include "GbMutex.h"
|
||||
#include "ScopedLock.h"
|
||||
#include <math.h>
|
||||
#include <valarray>
|
||||
|
||||
#ifdef _VALGRIND_
|
||||
#include <valgrind/memcheck.h>
|
||||
@ -100,7 +101,6 @@ void PosdbTable::reset() {
|
||||
m_qpos = NULL;
|
||||
m_wikiPhraseIds = NULL;
|
||||
m_quotedStartIds = NULL;
|
||||
m_qdist = 0;
|
||||
m_freqWeights = NULL;
|
||||
m_bflags = NULL;
|
||||
m_qtermNums = NULL;
|
||||
@ -229,13 +229,12 @@ float PosdbTable::getBestScoreSumForSingleTerm(int32_t i, const char *wpi, const
|
||||
// assume no terms!
|
||||
*highestScoringNonBodyPos = NULL;
|
||||
|
||||
// Sanity check
|
||||
if( wpi >= endi ) {
|
||||
logTrace(g_conf.m_logTracePosdb, "END, wpi %p >= %p", wpi, endi);
|
||||
return -1.0;
|
||||
}
|
||||
|
||||
if ( wpi ) {
|
||||
// Sanity check
|
||||
if( wpi >= endi ) {
|
||||
logTrace(g_conf.m_logTracePosdb, "END, wpi %p >= %p", wpi, endi);
|
||||
return -1.0;
|
||||
}
|
||||
#ifdef _VALGRIND_
|
||||
VALGRIND_CHECK_MEM_IS_DEFINED(wpi,endi-wpi);
|
||||
#endif
|
||||
@ -723,7 +722,7 @@ float PosdbTable::getMaxScoreForNonBodyTermPair(const char *wpi, const char *wp
|
||||
|
||||
|
||||
|
||||
float PosdbTable::getScoreForTermPair( const char *wpi, const char *wpj, int32_t fixedDistance ) {
|
||||
float PosdbTable::getScoreForTermPair(const char *wpi, const char *wpj, int32_t fixedDistance, int32_t qdist) {
|
||||
logTrace(g_conf.m_logTracePosdb, "BEGIN.");
|
||||
|
||||
if ( ! wpi ) {
|
||||
@ -771,7 +770,7 @@ float PosdbTable::getScoreForTermPair( const char *wpi, const char *wpj, int32_t
|
||||
// are exactly the same!
|
||||
if ( dist < 2 ) dist = 2;
|
||||
// subtract from the dist the terms are apart in the query
|
||||
if ( dist >= m_qdist ) dist = dist - m_qdist;
|
||||
if ( dist >= qdist ) dist = dist - qdist;
|
||||
// out of order? penalize by 1 unit
|
||||
if ( p2 < p1 ) dist += 1;
|
||||
}
|
||||
@ -1510,7 +1509,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
logTrace(g_conf.m_logTracePosdb, "BEGIN.");
|
||||
|
||||
// alloc space. assume max
|
||||
//int32_t qneed = sizeof(QueryTermInfo) * m_msg2->getNumLists();
|
||||
int32_t qneed = sizeof(QueryTermInfo) * m_q->m_numTerms;
|
||||
if ( ! m_qiBuf.reserve(qneed,"qibuf") ) {
|
||||
return false; // label it too!
|
||||
@ -1538,7 +1536,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
m_hasMaxSerpScore = true;
|
||||
}
|
||||
|
||||
//for ( int32_t i = 0 ; i < m_msg2->getNumLists() ; i++ ) {
|
||||
for ( int32_t i = 0 ; i < m_q->m_numTerms ; i++ ) {
|
||||
QueryTerm *qt = &m_q->m_qterms[i];
|
||||
|
||||
@ -1550,7 +1547,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
|
||||
// set this stff
|
||||
const QueryWord *qw = qt->m_qword;
|
||||
//int32_t wordNum = qw - &m_q->m_qwords[0];
|
||||
// get one
|
||||
QueryTermInfo *qti = &qtibuf[nrg];
|
||||
// and set it
|
||||
@ -1606,8 +1602,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
const QueryTerm *rightTerm = qt->m_rightPhraseTerm;
|
||||
bool leftAlreadyAdded = false;
|
||||
bool rightAlreadyAdded = false;
|
||||
//int64_t totalTermFreq = 0;
|
||||
//int64_t *tfreqs = (int64_t *)m_msg39req->ptr_termFreqs;
|
||||
//
|
||||
// add the non-bigram list AFTER the
|
||||
// bigrams, which we like to do when we PREFER the bigram
|
||||
@ -1625,7 +1619,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
// assume added
|
||||
leftAlreadyAdded = true;
|
||||
// get list
|
||||
//list = m_msg2->getList(left);
|
||||
RdbList *list = m_q->m_qterms[left].m_posdbListPtr;
|
||||
// add list ptr into our required group
|
||||
qti->m_subLists[nn] = list;
|
||||
@ -1634,7 +1627,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
// before a pipe operator?
|
||||
if ( qt->m_piped ) qti->m_bigramFlags[nn] |= BF_PIPED;
|
||||
// add list of member terms as well
|
||||
//qti->m_qtermList[nn] = &m_q->m_qterms[left];
|
||||
m_q->m_qterms[left].m_bitNum = nrg;
|
||||
// only really add if useful
|
||||
if ( list && !list->isEmpty() ) {
|
||||
@ -1643,14 +1635,12 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
|
||||
// add bigram synonyms! like "new jersey" bigram
|
||||
// has the synonym "nj"
|
||||
//for ( int32_t k = 0 ; k < m_msg2->getNumLists() ; k++) {
|
||||
for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
|
||||
QueryTerm *bt = &m_q->m_qterms[k];
|
||||
if ( bt->m_synonymOf != leftTerm ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//list = m_msg2->getList(k);
|
||||
list = m_q->m_qterms[k].m_posdbListPtr;
|
||||
qti->m_subLists[nn] = list;
|
||||
qti->m_bigramFlags[nn] = 0;
|
||||
@ -1660,7 +1650,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
qti->m_bigramFlags[nn]|=BF_PIPED;
|
||||
}
|
||||
// add list of member terms as well
|
||||
//qti->m_qtermList[nn] = bt;
|
||||
bt->m_bitNum = nrg;
|
||||
if ( list && !list->isEmpty() ) {
|
||||
nn++;
|
||||
@ -1674,7 +1663,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
// assume added
|
||||
rightAlreadyAdded = true;
|
||||
// get list
|
||||
//list = m_msg2->getList(right);
|
||||
RdbList *list = m_q->m_qterms[right].m_posdbListPtr;
|
||||
// add list ptr into our required group
|
||||
qti->m_subLists[nn] = list;
|
||||
@ -1683,7 +1671,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
// before a pipe operator?
|
||||
if ( qt->m_piped ) qti->m_bigramFlags[nn] |= BF_PIPED;
|
||||
// add list of member terms as well
|
||||
//qti->m_qtermList[nn] = &m_q->m_qterms[right];
|
||||
m_q->m_qterms[right].m_bitNum = nrg;
|
||||
// only really add if useful
|
||||
if ( list && !list->isEmpty() ) {
|
||||
@ -1692,14 +1679,12 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
|
||||
// add bigram synonyms! like "new jersey" bigram
|
||||
// has the synonym "nj"
|
||||
//for (int32_t k = 0 ; k < m_msg2->getNumLists() ; k++ ) {
|
||||
for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
|
||||
QueryTerm *bt = &m_q->m_qterms[k];
|
||||
if ( bt->m_synonymOf != rightTerm ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//list = m_msg2->getList(k);
|
||||
list = m_q->m_qterms[k].m_posdbListPtr;
|
||||
qti->m_subLists[nn] = list;
|
||||
qti->m_bigramFlags[nn] = 0;
|
||||
@ -1709,7 +1694,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
qti->m_bigramFlags[nn]|=BF_PIPED;
|
||||
}
|
||||
// add list of member terms as well
|
||||
//qti->m_qtermList[nn] = bt;
|
||||
bt->m_bitNum = nrg;
|
||||
if ( list && !list->isEmpty() ) {
|
||||
nn++;
|
||||
@ -1722,20 +1706,17 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
//
|
||||
// add to it. add backwards since we give precedence to
|
||||
// the first list and we want that to be the NEWEST list!
|
||||
//list = m_msg2->getList(i);
|
||||
RdbList *list = m_q->m_qterms[i].m_posdbListPtr;
|
||||
// add list ptr into our required group
|
||||
qti->m_subLists[nn] = list;
|
||||
// how many in there?
|
||||
//int32_t count = m_msg2->getNumListsInGroup(left);
|
||||
// base term is #1
|
||||
//bigramSet[nrg][nn] = 1;
|
||||
// special flags
|
||||
qti->m_bigramFlags[nn] = 0;
|
||||
// before a pipe operator?
|
||||
if ( qt->m_piped ) qti->m_bigramFlags[nn] |= BF_PIPED;
|
||||
if ( qt->m_piped )
|
||||
qti->m_bigramFlags[nn] |= BF_PIPED;
|
||||
// is it a negative term?
|
||||
if ( qt->m_termSign=='-')qti->m_bigramFlags[nn]|=BF_NEGATIVE;
|
||||
if ( qt->m_termSign=='-')
|
||||
qti->m_bigramFlags[nn] |= BF_NEGATIVE;
|
||||
|
||||
// numeric posdb termlist flags. instead of word position
|
||||
// they have a float stored there for sorting etc.
|
||||
@ -1762,7 +1743,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
qti->m_bigramFlags[nn]|=BF_NUMBER;
|
||||
|
||||
// add list of member terms
|
||||
//qti->m_qtermList[nn] = qt;
|
||||
qt->m_bitNum = nrg;
|
||||
|
||||
// only really add if useful
|
||||
@ -1778,7 +1758,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
//
|
||||
if ( left >= 0 && ! leftAlreadyAdded ) {
|
||||
// get list
|
||||
//list = m_msg2->getList(left);
|
||||
list = m_q->m_qterms[left].m_posdbListPtr;
|
||||
// add list ptr into our required group
|
||||
qti->m_subLists[nn] = list;
|
||||
@ -1787,7 +1766,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
// before a pipe operator?
|
||||
if ( qt->m_piped ) qti->m_bigramFlags[nn] |= BF_PIPED;
|
||||
// add list of member terms as well
|
||||
//qti->m_qtermList[nn] = &m_q->m_qterms[left];
|
||||
m_q->m_qterms[left].m_bitNum = nrg;
|
||||
// only really add if useful
|
||||
if ( list && !list->isEmpty() ) {
|
||||
@ -1796,14 +1774,12 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
|
||||
// add bigram synonyms! like "new jersey" bigram
|
||||
// has the synonym "nj"
|
||||
//for( int32_t k = 0 ; k < m_msg2->getNumLists() ; k++ ) {
|
||||
for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
|
||||
QueryTerm *bt = &m_q->m_qterms[k];
|
||||
if ( bt->m_synonymOf != leftTerm ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//list = m_msg2->getList(k);
|
||||
list = m_q->m_qterms[k].m_posdbListPtr;
|
||||
qti->m_subLists[nn] = list;
|
||||
qti->m_bigramFlags[nn] = 0;
|
||||
@ -1812,7 +1788,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
qti->m_bigramFlags[nn]|=BF_PIPED;
|
||||
}
|
||||
// add list of member terms as well
|
||||
//qti->m_qtermList[nn] = bt;
|
||||
bt->m_bitNum = nrg;
|
||||
if ( list && !list->isEmpty() ) {
|
||||
nn++;
|
||||
@ -1825,7 +1800,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
//
|
||||
if ( right >= 0 && ! rightAlreadyAdded ) {
|
||||
// get list
|
||||
//list = m_msg2->getList(right);
|
||||
list = m_q->m_qterms[right].m_posdbListPtr;
|
||||
// add list ptr into our required group
|
||||
qti->m_subLists[nn] = list;
|
||||
@ -1834,7 +1808,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
// before a pipe operator?
|
||||
if ( qt->m_piped ) qti->m_bigramFlags[nn] |= BF_PIPED;
|
||||
// add list of member terms as well
|
||||
//qti->m_qtermList[nn] = &m_q->m_qterms[right];
|
||||
m_q->m_qterms[right].m_bitNum = nrg;
|
||||
// only really add if useful
|
||||
if ( list && !list->isEmpty() ) {
|
||||
@ -1843,14 +1816,12 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
|
||||
// add bigram synonyms! like "new jersey" bigram
|
||||
// has the synonym "nj"
|
||||
//for (int32_t k = 0 ; k < m_msg2->getNumLists() ; k++ ) {
|
||||
for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
|
||||
QueryTerm *bt = &m_q->m_qterms[k];
|
||||
if ( bt->m_synonymOf != rightTerm ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//list = m_msg2->getList(k);
|
||||
list = m_q->m_qterms[k].m_posdbListPtr;
|
||||
qti->m_subLists[nn] = list;
|
||||
qti->m_bigramFlags[nn] = 0;
|
||||
@ -1870,7 +1841,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
//
|
||||
// ADD SYNONYM TERMS
|
||||
//
|
||||
//for ( int32_t k = 0 ; k < m_msg2->getNumLists() ; k++ ) {
|
||||
for ( int32_t k = 0 ; k < m_q->m_numTerms ; k++ ) {
|
||||
QueryTerm *qt2 = &m_q->m_qterms[k];
|
||||
const QueryTerm *st = qt2->m_synonymOf;
|
||||
@ -1880,7 +1850,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
}
|
||||
|
||||
// its a synonym, add it!
|
||||
//list = m_msg2->getList(k);
|
||||
list = m_q->m_qterms[k].m_posdbListPtr;
|
||||
// add list ptr into our required group
|
||||
qti->m_subLists[nn] = list;
|
||||
@ -1888,8 +1857,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
qti->m_bigramFlags[nn] = BF_SYNONYM;
|
||||
// before a pipe operator?
|
||||
if ( qt->m_piped ) qti->m_bigramFlags[nn] |= BF_PIPED;
|
||||
// add list of member terms as well
|
||||
//qti->m_qtermList[nn] = qt2;
|
||||
// set bitnum here i guess
|
||||
qt2->m_bitNum = nrg;
|
||||
// only really add if useful
|
||||
@ -1899,12 +1866,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
}
|
||||
|
||||
|
||||
// empty implies no results!!!
|
||||
//if ( nn == 0 && qt->m_termSign != '-' ) {
|
||||
// //log("query: MISSING REQUIRED TERM IN QUERY!");
|
||||
// return;
|
||||
//}
|
||||
|
||||
// store # lists in required group. nn might be zero!
|
||||
qti->m_numSubLists = nn;
|
||||
// set the term freqs for this list group/set
|
||||
@ -1922,8 +1883,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
for ( int32_t q = 0 ; q < qti->m_numSubLists ; q++ ) {
|
||||
// add list ptr into our required group
|
||||
RdbList *l = qti->m_subLists[q];
|
||||
// set end ptr
|
||||
//qti->m_subListEnds[q]=list->m_list +list->m_listSize;
|
||||
// get it
|
||||
int64_t listSize = l->getListSize();
|
||||
// add it up
|
||||
@ -1934,28 +1893,6 @@ bool PosdbTable::setQueryTermInfo ( ) {
|
||||
nrg++;
|
||||
}
|
||||
|
||||
//
|
||||
// now set QueryTerm::m_bitNum for use by Expression::isTruth()
|
||||
// in Query.cpp for boolean queries, so we can get the bit vector
|
||||
// of a docid that is 1-1 with the queryterminfos and see which
|
||||
// query words in the boolean expression it contains.
|
||||
// used by matchesBoolQuery() which we call below.
|
||||
//
|
||||
/*
|
||||
for ( int32_t i = 0 ; i < nrg ; i++ ) {
|
||||
// get one
|
||||
QueryTermInfo *qti = &qtibuf[i];
|
||||
// how many query terms are in this group?
|
||||
for ( int32_t j = 0 ; j < qti->m_numSubLists ; j++ ) {
|
||||
// get the query term
|
||||
QueryTerm *qt = qti->m_qtermList[j];
|
||||
// set the bit num member
|
||||
qt->m_bitNum = i;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
// get the query term with the least data in posdb including syns
|
||||
//
|
||||
@ -2281,21 +2218,16 @@ bool PosdbTable::findCandidateDocIds() {
|
||||
// that doing a filter on 200MB of termlists wouldn't be more than
|
||||
// 50-100ms since we can read 4GB/s from main memory.
|
||||
//
|
||||
for ( int32_t i = 0 ; i < m_numQueryTermInfos ; i++ ) {
|
||||
// get it
|
||||
QueryTermInfo *qti = &qtibuf[i];
|
||||
|
||||
// do not consider for adding if negative ('my house -home')
|
||||
if ( qti->m_bigramFlags[0] & BF_NEGATIVE ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// remove docids from each termlist that are not in
|
||||
// m_docIdVoteBuf (the intersection)
|
||||
delNonMatchingDocIdsFromSubLists( qti );
|
||||
}
|
||||
delNonMatchingDocIdsFromSubLists();
|
||||
logTrace(g_conf.m_logTracePosdb, "Shrunk SubLists");
|
||||
|
||||
if(g_conf.m_logTracePosdb) {
|
||||
log(LOG_TRACE,"Shrunk sublists, m_numQueryTermInfos=%d", m_numQueryTermInfos);
|
||||
for(int i=0; i<m_numQueryTermInfos; i++) {
|
||||
log(LOG_TRACE," qti #%d: m_numSubLists=%d m_numMatchingSubLists=%d", i, qtibuf[i].m_numSubLists, qtibuf[i].m_numMatchingSubLists);
|
||||
for(int j=0; j<qtibuf[i].m_numMatchingSubLists; j++)
|
||||
log(LOG_TRACE," matchlist #%d: %d bytes %p - %p", j, qtibuf[i].m_matchingSubListSize[j], qtibuf[i].m_matchingSubListStart[j], qtibuf[i].m_matchingSubListEnd[j]);
|
||||
}
|
||||
}
|
||||
|
||||
if ( m_debug ) {
|
||||
now = gettimeofdayInMilliseconds();
|
||||
@ -2954,14 +2886,6 @@ bool PosdbTable::prefilterMaxPossibleScoreByDistance(const QueryTermInfo *qtibuf
|
||||
// term.
|
||||
//
|
||||
void PosdbTable::mergeTermSubListsForDocId(QueryTermInfo *qtibuf, char *miniMergeBuf, const char **miniMergedList, const char **miniMergedEnd, int *highestInlinkSiteRank) {
|
||||
char *mptr;
|
||||
char *mptrEnd;
|
||||
char *lastMptr = NULL;
|
||||
const char *nwp[MAX_SUBLISTS];
|
||||
const char *nwpEnd[MAX_SUBLISTS];
|
||||
char nwpFlags[MAX_SUBLISTS];
|
||||
|
||||
|
||||
logTrace(g_conf.m_logTracePosdb, "BEGIN.");
|
||||
|
||||
// we got a docid that has all the query terms, so merge
|
||||
@ -2969,8 +2893,9 @@ void PosdbTable::mergeTermSubListsForDocId(QueryTermInfo *qtibuf, char *miniMerg
|
||||
//
|
||||
// all posdb keys for this docid should fit in here, the
|
||||
// mini merge buf:
|
||||
mptr = miniMergeBuf;
|
||||
mptrEnd = miniMergeBuf + 299000;
|
||||
char *mptr = miniMergeBuf;
|
||||
char *mptrEnd = miniMergeBuf + 299000;
|
||||
char *lastMptr = NULL;
|
||||
|
||||
// Merge each set of sublists, like we merge a term's list with
|
||||
// its two associated bigram lists, if there, the left bigram and
|
||||
@ -2999,6 +2924,9 @@ void PosdbTable::mergeTermSubListsForDocId(QueryTermInfo *qtibuf, char *miniMerg
|
||||
miniMergedList[j] = mptr;
|
||||
bool isFirstKey = true;
|
||||
|
||||
const char *nwp[MAX_SUBLISTS];
|
||||
const char *nwpEnd[MAX_SUBLISTS];
|
||||
char nwpFlags[MAX_SUBLISTS];
|
||||
// populate the nwp[] arrays for merging
|
||||
int32_t nsub = 0;
|
||||
for ( int32_t k = 0 ; k < qti->m_numMatchingSubLists ; k++ ) {
|
||||
@ -3078,6 +3006,7 @@ void PosdbTable::mergeTermSubListsForDocId(QueryTermInfo *qtibuf, char *miniMerg
|
||||
// second check means it occurred as two separate terms
|
||||
// or could be like bob and occurred as "bob's".
|
||||
// see XmlDoc::hashWords3().
|
||||
// nwp[mink][2] & 0x03 is the posdb entry original/synonym/hyponym/.. flags
|
||||
if ( ! ((nwpFlags[mink] & BF_BIGRAM) && (nwp[mink][2] & 0x03)) ) {
|
||||
|
||||
// if the first key in our merged list store the docid crap
|
||||
@ -3400,13 +3329,10 @@ float PosdbTable::getMinSingleTermScoreSum(const char **miniMergedList, const ch
|
||||
// m_bestMinTermPairWindowPtrs : Pointers to query term positions giving the best minimum score
|
||||
//
|
||||
void PosdbTable::findMinTermPairScoreInWindow(const char **ptrs, const char **highestScoringNonBodyPos, float *scoreMatrix) {
|
||||
const char *wpi;
|
||||
const char *wpj;
|
||||
float wikiWeight;
|
||||
int32_t qdist = 0;
|
||||
float minTermPairScoreInWindow = 999999999.0;
|
||||
bool scoredTerms = false;
|
||||
|
||||
|
||||
logTrace(g_conf.m_logTracePosdb, "BEGIN.");
|
||||
|
||||
// TODO: only do this loop on the (i,j) pairs where i or j
|
||||
@ -3429,15 +3355,10 @@ void PosdbTable::findMinTermPairScoreInWindow(const char **ptrs, const char **hi
|
||||
// from the title/linktext/etc.
|
||||
//else wpi = highestScoringNonBodyPos[i];
|
||||
|
||||
wpi = ptrs[i];
|
||||
|
||||
// only evaluate pairs that have the advanced term in them
|
||||
// to save time.
|
||||
int32_t j = i + 1;
|
||||
int32_t maxj = m_numQueryTermInfos;
|
||||
const char *wpi = ptrs[i];
|
||||
|
||||
// loop over other terms
|
||||
for ( ; j < maxj ; j++ ) {
|
||||
for(int32_t j = i + 1; j < m_numQueryTermInfos; j++) {
|
||||
|
||||
// skip if to the left of a pipe operator
|
||||
if ( m_bflags[j] & (BF_PIPED|BF_NEGATIVE|BF_NUMBER) )
|
||||
@ -3455,21 +3376,22 @@ void PosdbTable::findMinTermPairScoreInWindow(const char **ptrs, const char **hi
|
||||
// from the title/linktext/etc.
|
||||
//else wpj = highestScoringNonBodyPos[j];
|
||||
|
||||
wpj = ptrs[j];
|
||||
const char *wpj = ptrs[j];
|
||||
float wikiWeight;
|
||||
|
||||
// in same wikipedia phrase?
|
||||
if ( m_wikiPhraseIds[j] == m_wikiPhraseIds[i] &&
|
||||
// zero means not in a phrase
|
||||
m_wikiPhraseIds[j] ) {
|
||||
// try to get dist that matches qdist exactly
|
||||
m_qdist = m_qpos[j] - m_qpos[i];
|
||||
qdist = m_qpos[j] - m_qpos[i];
|
||||
// wiki weight
|
||||
wikiWeight = WIKI_WEIGHT; // .50;
|
||||
}
|
||||
else {
|
||||
// basically try to get query words as close
|
||||
// together as possible
|
||||
m_qdist = 2;
|
||||
qdist = 2;
|
||||
// fix 'what is an unsecured loan' to get the
|
||||
// exact phrase with higher score
|
||||
//m_qdist = m_qpos[j] - m_qpos[i];
|
||||
@ -3478,26 +3400,26 @@ void PosdbTable::findMinTermPairScoreInWindow(const char **ptrs, const char **hi
|
||||
}
|
||||
|
||||
// this will be -1 if wpi or wpj is NULL
|
||||
float max = getScoreForTermPair(wpi, wpj, 0);
|
||||
float max = getScoreForTermPair(wpi, wpj, 0, qdist);
|
||||
scoredTerms = true;
|
||||
|
||||
// try sub-ing in the best title occurence or best
|
||||
// inlink text occurence. cuz if the term is in the title
|
||||
// but these two terms are really far apart, we should
|
||||
// get a better score
|
||||
float score = getScoreForTermPair ( highestScoringNonBodyPos[i], wpj, FIXED_DISTANCE );
|
||||
float score = getScoreForTermPair(highestScoringNonBodyPos[i], wpj, FIXED_DISTANCE, qdist);
|
||||
if ( score > max ) {
|
||||
max = score;
|
||||
}
|
||||
|
||||
// a double pair sub should be covered in the
|
||||
// getMaxScoreForNonBodyTermPair() function
|
||||
score = getScoreForTermPair ( highestScoringNonBodyPos[i], highestScoringNonBodyPos[j], FIXED_DISTANCE );
|
||||
score = getScoreForTermPair(highestScoringNonBodyPos[i], highestScoringNonBodyPos[j], FIXED_DISTANCE, qdist);
|
||||
if ( score > max ) {
|
||||
max = score;
|
||||
}
|
||||
|
||||
score = getScoreForTermPair ( wpi, highestScoringNonBodyPos[j], FIXED_DISTANCE );
|
||||
score = getScoreForTermPair(wpi, highestScoringNonBodyPos[j], FIXED_DISTANCE, qdist);
|
||||
if ( score > max ) {
|
||||
max = score;
|
||||
}
|
||||
@ -5023,126 +4945,128 @@ bool PosdbTable::allocTopScoringDocIdsData() {
|
||||
// Run through each term sublist and remove all docids not
|
||||
// found in the docid vote buffer
|
||||
//
|
||||
void PosdbTable::delNonMatchingDocIdsFromSubLists(QueryTermInfo *qti) {
|
||||
void PosdbTable::delNonMatchingDocIdsFromSubLists() {
|
||||
|
||||
logTrace(g_conf.m_logTracePosdb, "BEGIN.");
|
||||
|
||||
// reset count of new sublists
|
||||
qti->m_numMatchingSubLists = 0;
|
||||
|
||||
// scan each sublist vs. the docid list
|
||||
for ( int32_t i = 0 ; i < qti->m_numSubLists ; i++ ) {
|
||||
|
||||
//phase 1: shrink the rdblists for all queryterms (except those with a minus sign)
|
||||
std::valarray<char *> newEndPtr(m_q->m_numTerms);
|
||||
for(int i=0; i<m_q->m_numTerms; i++) {
|
||||
newEndPtr[i] = NULL;
|
||||
if(m_q->m_qterms[i].m_termSign=='-')
|
||||
continue;
|
||||
RdbList *list = m_q->m_qterms[i].m_posdbListPtr;
|
||||
if(!list || list->isEmpty())
|
||||
continue;
|
||||
|
||||
// get that sublist
|
||||
char *subListPtr = qti->m_subLists[i]->getList();
|
||||
char *subListEnd = qti->m_subLists[i]->getListEnd();
|
||||
// reset docid list ptrs
|
||||
char *dp = m_docIdVoteBuf.getBufStart();
|
||||
char *dpEnd = dp + m_docIdVoteBuf.length();
|
||||
|
||||
// re-copy into the same buffer!
|
||||
char *subListPtr = list->getList();
|
||||
char *subListEnd = list->getListEnd();
|
||||
char *dst = subListPtr;
|
||||
// save it
|
||||
char *savedDst = dst;
|
||||
|
||||
|
||||
handleNextRecord:
|
||||
// scan the docid list for the current docid in this termlist
|
||||
for ( ; ; dp += 6 ) {
|
||||
// no docids in list? no need to skip any more subListPtrs!
|
||||
if ( dp >= dpEnd ) {
|
||||
goto doneWithSubList;
|
||||
}
|
||||
|
||||
// if current docid in docid list is >= the docid
|
||||
// in the sublist, stop. docid in list is 6 bytes and
|
||||
// subListPtr must be pointing to a 12 byte posdb rec.
|
||||
if ( *(uint32_t *)(dp+1) > *(uint32_t *)(subListPtr+8) ) {
|
||||
break;
|
||||
}
|
||||
|
||||
// try to catch up docid if it is behind
|
||||
if ( *(uint32_t *)(dp+1) < *(uint32_t *)(subListPtr+8) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// check lower byte if equal
|
||||
if ( *(unsigned char *)(dp) > (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
|
||||
break;
|
||||
}
|
||||
|
||||
if ( *(unsigned char *)(dp) < (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// copy over the 12 byte key
|
||||
*(int64_t *)dst = *(int64_t *)subListPtr;
|
||||
*(int32_t *)(dst+8) = *(int32_t *)(subListPtr+8);
|
||||
|
||||
// skip that
|
||||
dst += 12;
|
||||
subListPtr += 12;
|
||||
|
||||
// copy over any 6 bytes keys following
|
||||
for ( ; ; ) {
|
||||
if ( subListPtr >= subListEnd ) {
|
||||
// give up on this exhausted term list!
|
||||
goto doneWithSubList;
|
||||
}
|
||||
|
||||
// next docid willbe next 12 bytekey
|
||||
if ( ! ( subListPtr[0] & 0x04 ) ) {
|
||||
// reset docid list ptrs
|
||||
const char *dp = m_docIdVoteBuf.getBufStart();
|
||||
const char *dpEnd = dp + m_docIdVoteBuf.length();
|
||||
//log(LOG_INFO,"@@@@ i#%d subListPtr=%p subListEnd=%p", i, subListPtr, subListEnd);
|
||||
|
||||
for(;;) {
|
||||
// scan the docid list for the current docid in this termlist
|
||||
for ( ; dp < dpEnd; dp += 6 ) {
|
||||
// if current docid in docid list is >= the docid
|
||||
// in the sublist, stop. docid in list is 6 bytes and
|
||||
// subListPtr must be pointing to a 12 byte posdb rec.
|
||||
if ( *(uint32_t *)(dp+1) > *(uint32_t *)(subListPtr+8) ) {
|
||||
break;
|
||||
}
|
||||
|
||||
// otherwise it's 6 bytes
|
||||
*(int32_t *)dst = *(int32_t *)subListPtr;
|
||||
*(int16_t *)(dst+4) = *(int16_t *)(subListPtr+4);
|
||||
dst += 6;
|
||||
// try to catch up docid if it is behind
|
||||
if ( *(uint32_t *)(dp+1) < *(uint32_t *)(subListPtr+8) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// check lower byte if equal
|
||||
if ( *(unsigned char *)(dp) > (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
|
||||
break;
|
||||
}
|
||||
|
||||
if ( *(unsigned char *)(dp) < (*(unsigned char *)(subListPtr+7) & 0xfc ) ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// copy over the 12 byte key
|
||||
*(int64_t *)dst = *(int64_t *)subListPtr;
|
||||
*(int32_t *)(dst+8) = *(int32_t *)(subListPtr+8);
|
||||
|
||||
// skip that
|
||||
dst += 12;
|
||||
subListPtr += 12;
|
||||
|
||||
// copy over any 6 bytes keys following
|
||||
for ( ; ; ) {
|
||||
if ( subListPtr >= subListEnd ) {
|
||||
// give up on this exhausted term list!
|
||||
goto doneWithSubList;
|
||||
}
|
||||
|
||||
// next docid willbe next 12 bytekey
|
||||
if ( ! ( subListPtr[0] & 0x04 ) ) {
|
||||
break;
|
||||
}
|
||||
|
||||
// otherwise it's 6 bytes
|
||||
*(int32_t *)dst = *(int32_t *)subListPtr;
|
||||
*(int16_t *)(dst+4) = *(int16_t *)(subListPtr+4);
|
||||
dst += 6;
|
||||
subListPtr += 6;
|
||||
}
|
||||
}
|
||||
|
||||
// skip that docid record in our termlist. it MUST have been
|
||||
// 12 bytes, a docid heading record.
|
||||
subListPtr += 12;
|
||||
|
||||
// skip any following keys that are 6 bytes, that means they
|
||||
// share the same docid
|
||||
for ( ; ; ) {
|
||||
// list exhausted?
|
||||
if ( subListPtr >= subListEnd ) {
|
||||
goto doneWithSubList;
|
||||
}
|
||||
|
||||
// stop if next key is 12 bytes, that is a new docid
|
||||
if ( ! (subListPtr[0] & 0x04) ) {
|
||||
break;
|
||||
}
|
||||
|
||||
// skip it
|
||||
subListPtr += 6;
|
||||
}
|
||||
// continue the docid loop for this new subListPtr
|
||||
continue;
|
||||
}
|
||||
|
||||
// skip that docid record in our termlist. it MUST have been
|
||||
// 12 bytes, a docid heading record.
|
||||
subListPtr += 12;
|
||||
|
||||
// skip any following keys that are 6 bytes, that means they
|
||||
// share the same docid
|
||||
for ( ; ; ) {
|
||||
// list exhausted?
|
||||
if ( subListPtr >= subListEnd ) {
|
||||
goto doneWithSubList;
|
||||
}
|
||||
|
||||
// stop if next key is 12 bytes, that is a new docid
|
||||
if ( ! (subListPtr[0] & 0x04) ) {
|
||||
break;
|
||||
}
|
||||
|
||||
// skip it
|
||||
subListPtr += 6;
|
||||
}
|
||||
|
||||
// process the next rec ptr now
|
||||
goto handleNextRecord;
|
||||
|
||||
doneWithSubList:
|
||||
|
||||
// Now set "shortcut" info for the reduced sublist so
|
||||
// we end up with an array of matching sublists that
|
||||
// only contain the matching docids
|
||||
int32_t x = qti->m_numMatchingSubLists;
|
||||
qti->m_matchingSubListSize [x] = dst - savedDst;
|
||||
qti->m_matchingSubListStart [x] = savedDst;
|
||||
qti->m_matchingSubListEnd [x] = dst;
|
||||
qti->m_matchingSubListCursor [x] = savedDst;
|
||||
qti->m_matchingSubListSavedCursor [x] = savedDst;
|
||||
|
||||
if ( qti->m_matchingSubListSize[x] ) {
|
||||
qti->m_numMatchingSubLists++;
|
||||
//log(LOG_INFO,"@@@ shrunk #%d to %ld (%p-%p)", i, dst - list->getList(), list->getList(), dst);
|
||||
newEndPtr[i] = dst;
|
||||
}
|
||||
|
||||
//phase 2: set the matchingsublist pointers in qti
|
||||
for(int i=0; i<m_numQueryTermInfos; i++) {
|
||||
QueryTermInfo *qti = ((QueryTermInfo*)m_qiBuf.getBufStart()) + i;
|
||||
if(qti->m_bigramFlags[0]&BF_NEGATIVE)
|
||||
continue; //don't modify sublist for negative terms
|
||||
qti->m_numMatchingSubLists = 0;
|
||||
for(int j=0; j<qti->m_numSubLists; j++) {
|
||||
for(int k=0; k<m_q->m_numTerms; k++) {
|
||||
if(qti->m_subLists[j] == m_q->m_qterms[k].m_posdbListPtr) {
|
||||
char *newStartPtr = m_q->m_qterms[k].m_posdbListPtr->getList(); //same as always
|
||||
int32_t x = qti->m_numMatchingSubLists;
|
||||
qti->m_matchingSubListSize [x] = newEndPtr[k] - newStartPtr;
|
||||
qti->m_matchingSubListStart [x] = newStartPtr;
|
||||
qti->m_matchingSubListEnd [x] = newEndPtr[k];
|
||||
qti->m_matchingSubListCursor [x] = newStartPtr;
|
||||
qti->m_matchingSubListSavedCursor [x] = newStartPtr;
|
||||
qti->m_numMatchingSubLists++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -90,7 +90,7 @@ class PosdbTable {
|
||||
|
||||
float getMaxScoreForNonBodyTermPair(const char *wpi, const char *wpj, const char *endi, const char *endj, int32_t qdist);
|
||||
float getBestScoreSumForSingleTerm(int32_t i, const char *wpi, const char *endi, DocIdScore *pdcs, const char **highestScoringNonBodyPos);
|
||||
float getScoreForTermPair(const char *wpi, const char *wpj, int32_t fixedDistance);
|
||||
float getScoreForTermPair(const char *wpi, const char *wpj, int32_t fixedDistance, int32_t qdist);
|
||||
void findMinTermPairScoreInWindow(const char **ptrs, const char **highestScoringNonBodyPos, float *scoreMatrix);
|
||||
|
||||
float getTermPairScoreForAny ( int32_t i, int32_t j,
|
||||
@ -148,7 +148,6 @@ class PosdbTable {
|
||||
int32_t *m_qpos;
|
||||
int32_t *m_wikiPhraseIds;
|
||||
int32_t *m_quotedStartIds;
|
||||
int32_t m_qdist;
|
||||
float *m_freqWeights;
|
||||
char *m_bflags;
|
||||
int32_t *m_qtermNums;
|
||||
@ -224,7 +223,7 @@ class PosdbTable {
|
||||
|
||||
// sets stuff used by intersect10_r()
|
||||
bool setQueryTermInfo ( );
|
||||
void delNonMatchingDocIdsFromSubLists( QueryTermInfo *qti );
|
||||
void delNonMatchingDocIdsFromSubLists();
|
||||
|
||||
// for intersecting docids
|
||||
void addDocIdVotes( const QueryTermInfo *qti , int32_t listGroupNum );
|
||||
|
74
Process.cpp
74
Process.cpp
@ -303,18 +303,18 @@ bool Process::init ( ) {
|
||||
m_calledSave = false;
|
||||
|
||||
// heartbeat check
|
||||
if ( ! g_loop.registerSleepCallback(100,NULL,heartbeatWrapper,0)) {
|
||||
if (!g_loop.registerSleepCallback(100, NULL, heartbeatWrapper, "Process::heartbeatWrapper", 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// . continually call this once per second
|
||||
// . once every half second now so that autosaves are closer together
|
||||
// in time between all hosts
|
||||
if ( ! g_loop.registerSleepCallback(500,NULL,processSleepWrapper)) {
|
||||
if (!g_loop.registerSleepCallback(500, NULL, processSleepWrapper, "Process::processSleepWrapper")) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!g_loop.registerSleepCallback(60000, NULL, reloadDocid2SiteFlags, 0)) {
|
||||
if (!g_loop.registerSleepCallback(60000, NULL, reloadDocid2SiteFlags, "Process::reloadDocid2SiteFlags", 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -332,10 +332,6 @@ bool Process::isAnyTreeSaving() {
|
||||
return false;
|
||||
}
|
||||
|
||||
void Process::callHeartbeat () {
|
||||
heartbeatWrapper ( 0 , NULL );
|
||||
}
|
||||
|
||||
void heartbeatWrapper(int /*fd*/, void * /*state*/) {
|
||||
static int64_t s_last = 0LL;
|
||||
int64_t now = gettimeofdayInMilliseconds();
|
||||
@ -593,37 +589,43 @@ bool Process::shutdown2() {
|
||||
}
|
||||
|
||||
if ( m_urgent ) {
|
||||
log(LOG_INFO,"gb: Shutting down urgently. Timed try #%" PRId32".", m_try++);
|
||||
log(LOG_INFO,"gb: Shutting down urgently. Timed try #%" PRId32".", m_try);
|
||||
} else {
|
||||
log(LOG_INFO,"gb: Shutting down. Timed try #%" PRId32".", m_try++);
|
||||
log(LOG_INFO,"gb: Shutting down. Timed try #%" PRId32".", m_try);
|
||||
}
|
||||
|
||||
// we should only finalize these once
|
||||
if (m_try == 0) {
|
||||
InstanceInfoExchange::finalize();
|
||||
|
||||
finalizeRealtimeUrlClassification();
|
||||
|
||||
Statistics::finalize();
|
||||
|
||||
log("gb: disabling threads");
|
||||
|
||||
// always disable threads at this point so g_jobScheduler.submit() will
|
||||
// always return false and we do not queue any new jobs for spawning
|
||||
g_jobScheduler.disallow_new_jobs();
|
||||
|
||||
// Stop merging
|
||||
g_merge.haltMerge();
|
||||
|
||||
RdbBase::finalizeGlobalIndexThread();
|
||||
Msg4In::finalizeIncomingThread();
|
||||
|
||||
Rdb::finalizeRdbDumpThread();
|
||||
|
||||
g_jobScheduler.cancel_all_jobs_for_shutdown();
|
||||
}
|
||||
|
||||
m_try++;
|
||||
|
||||
// switch to urgent if having problems
|
||||
if ( m_try >= 10 ) {
|
||||
m_urgent = true;
|
||||
}
|
||||
|
||||
InstanceInfoExchange::finalize();
|
||||
|
||||
finalizeRealtimeUrlClassification();
|
||||
|
||||
Statistics::finalize();
|
||||
|
||||
log("gb: disabling threads");
|
||||
// now disable threads so we don't exit while threads are
|
||||
// outstanding
|
||||
g_jobScheduler.disallow_new_jobs();
|
||||
|
||||
// Stop merging
|
||||
g_merge.haltMerge();
|
||||
|
||||
RdbBase::finalizeGlobalIndexThread();
|
||||
Msg4In::finalizeIncomingThread();
|
||||
|
||||
Rdb::finalizeRdbDumpThread();
|
||||
|
||||
g_jobScheduler.cancel_all_jobs_for_shutdown();
|
||||
|
||||
static bool s_printed = false;
|
||||
|
||||
// wait for all threads to return
|
||||
@ -716,10 +718,6 @@ bool Process::shutdown2() {
|
||||
saveBlockingFiles2() ;
|
||||
}
|
||||
|
||||
// always disable threads at this point so g_jobScheduler.submit() will
|
||||
// always return false and we do not queue any new jobs for spawning
|
||||
g_jobScheduler.disallow_new_jobs();
|
||||
|
||||
// urgent means we need to dump core, SEGV or something
|
||||
if ( m_urgent ) {
|
||||
// log it
|
||||
@ -879,6 +877,8 @@ bool Process::saveRdbIndexes() {
|
||||
return true;
|
||||
}
|
||||
|
||||
log(LOG_INFO, "db: Saving rdb indexes");
|
||||
|
||||
// loop over all Rdbs and save them
|
||||
for (int32_t i = 0; i < m_numRdbs; i++) {
|
||||
Rdb *rdb = m_rdbs[i];
|
||||
@ -897,6 +897,8 @@ bool Process::saveRdbMaps() {
|
||||
return true;
|
||||
}
|
||||
|
||||
log(LOG_INFO, "db: Saving rdb maps");
|
||||
|
||||
// loop over all Rdbs and save them
|
||||
for ( int32_t i = 0 ; i < m_numRdbs ; i++ ) {
|
||||
Rdb *rdb = m_rdbs[i];
|
||||
@ -913,9 +915,11 @@ bool Process::saveBlockingFiles1 ( ) {
|
||||
|
||||
// save the gb.conf file now
|
||||
g_conf.save();
|
||||
|
||||
// save the conf files
|
||||
// if autosave and we have over 20 colls, just make host #0 do it
|
||||
g_collectiondb.save();
|
||||
g_collectiondb.save();
|
||||
|
||||
// . save repair state
|
||||
// . this is repeated above too
|
||||
// . keep it here for auto-save
|
||||
|
@ -82,8 +82,6 @@ class Process {
|
||||
// a timestamp for the sig alarm handler in Loop.cpp
|
||||
int64_t m_lastHeartbeatApprox;
|
||||
|
||||
void callHeartbeat ();
|
||||
|
||||
bool m_suspendAutoSave;
|
||||
|
||||
bool m_exiting;
|
||||
|
55
Query.cpp
55
Query.cpp
@ -86,13 +86,11 @@ void Query::reset ( ) {
|
||||
m_queryWordBuf.purge();
|
||||
m_qwords = NULL;
|
||||
m_numExpressions = 0;
|
||||
m_hasUOR = false;
|
||||
// the site: and ip: query terms will disable site clustering & caching
|
||||
m_hasPositiveSiteField = false;
|
||||
m_hasIpField = false;
|
||||
m_hasUrlField = false;
|
||||
m_hasSubUrlField = false;
|
||||
m_hasQuotaField = false;
|
||||
m_truncated = false;
|
||||
}
|
||||
|
||||
@ -467,8 +465,6 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
m_queryTermBuf.setLabel("stkbuf3");
|
||||
const char *pp = m_queryTermBuf.getBufStart();
|
||||
m_qterms = (QueryTerm *)pp;
|
||||
pp += sizeof(QueryTerm);
|
||||
if ( pp > m_queryTermBuf.getBufEnd() ) { g_process.shutdownAbort(true); }
|
||||
}
|
||||
|
||||
// call constructor on each one here
|
||||
@ -507,8 +503,6 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
qt->m_qword = qw ;
|
||||
qt->m_piped = qw->m_piped;
|
||||
qt->m_isPhrase = true ;
|
||||
qt->m_isUORed = false;
|
||||
qt->m_UORedTerm = NULL;
|
||||
qt->m_synonymOf = NULL;
|
||||
qt->m_ignored = false;
|
||||
qt->m_term = NULL;
|
||||
@ -600,8 +594,6 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
qt->m_qword = qw ;
|
||||
qt->m_piped = qw->m_piped;
|
||||
qt->m_isPhrase = false ;
|
||||
qt->m_isUORed = false;
|
||||
qt->m_UORedTerm = NULL;
|
||||
qt->m_synonymOf = NULL;
|
||||
// ignore some synonym terms if tf is too low
|
||||
qt->m_ignored = qw->m_ignoreWord;
|
||||
@ -638,15 +630,14 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
int32_t fieldStart=-1;
|
||||
int32_t fieldLen=0;
|
||||
|
||||
if ( pw == 0 && m_qwords[pw].m_ignoreWord==IGNORE_FIELDNAME)
|
||||
if(pw == 0 && m_qwords[pw].m_ignoreWord==IGNORE_FIELDNAME)
|
||||
fieldStart = pw;
|
||||
|
||||
if ( pw > 0&& m_qwords[pw-1].m_ignoreWord==IGNORE_FIELDNAME ){
|
||||
if(pw > 0 && m_qwords[pw-1].m_ignoreWord==IGNORE_FIELDNAME) {
|
||||
pw -= 1;
|
||||
fieldStart = pw;
|
||||
}
|
||||
while (pw>0 &&
|
||||
((m_qwords[pw].m_ignoreWord == IGNORE_FIELDNAME))) {
|
||||
while(pw > 0 && m_qwords[pw].m_ignoreWord == IGNORE_FIELDNAME) {
|
||||
pw--;
|
||||
fieldStart = pw;
|
||||
}
|
||||
@ -666,8 +657,8 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
pw++;
|
||||
|
||||
fieldLen = m_qwords[pw-1].m_word +
|
||||
m_qwords[pw-1].m_wordLen -
|
||||
m_qwords[fieldStart].m_word;
|
||||
m_qwords[pw-1].m_wordLen -
|
||||
m_qwords[fieldStart].m_word;
|
||||
}
|
||||
// do not use an explicit bit up if we have a hard count
|
||||
qt->m_hardCount = qw->m_hardCount;
|
||||
@ -715,21 +706,16 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
qt->m_leftPhraseTermNum = -1;
|
||||
qt->m_rightPhraseTerm = NULL;
|
||||
qt->m_leftPhraseTerm = NULL;
|
||||
QueryTerm *qt2 = qt->m_UORedTerm;
|
||||
if (!qt2) continue;
|
||||
// chase down first term in UOR chain
|
||||
while (qt2->m_UORedTerm) qt2 = qt2->m_UORedTerm;
|
||||
}
|
||||
|
||||
// . set implicit bits, m_implicitBits
|
||||
// . set m_inPhrase
|
||||
for (int32_t i = 0; i < m_numWords ; i++ ){
|
||||
for (int32_t i = 0; i < m_numWords ; i++ ) {
|
||||
const QueryWord *qw = &m_qwords[i];
|
||||
QueryTerm *qt = qw->m_queryWordTerm;
|
||||
if (!qt) continue;
|
||||
if ( qw->m_queryPhraseTerm )
|
||||
qw->m_queryPhraseTerm->m_implicitBits |=
|
||||
qt->m_explicitBit;
|
||||
qw->m_queryPhraseTerm->m_implicitBits |= qt->m_explicitBit;
|
||||
// set flag if in a a phrase, and set phrase term num
|
||||
if ( qw->m_queryPhraseTerm ) {
|
||||
QueryTerm *pt = qw->m_queryPhraseTerm;
|
||||
@ -856,8 +842,6 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
qt->m_qword = qw; // NULL;
|
||||
qt->m_piped = qw->m_piped;
|
||||
qt->m_isPhrase = false ;
|
||||
qt->m_isUORed = false;
|
||||
qt->m_UORedTerm = NULL;
|
||||
qt->m_langIdBits = 0;
|
||||
// synonym of this term...
|
||||
qt->m_synonymOf = origTerm;
|
||||
@ -969,15 +953,9 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
// repeated terms in setWords()
|
||||
// . we need to support: "trains AND (perl OR python) NOT python"
|
||||
for ( int32_t i = 0 ; i < n ; i++ ) {
|
||||
// BUT NOT IF in a UOR'd list!!!
|
||||
if ( m_qterms[i].m_isUORed ) continue;
|
||||
// that didn't seem to fix it right, for dup terms that
|
||||
// are the FIRST term in a UOR sequence... they don't seem
|
||||
// to have m_isUORed set
|
||||
if ( m_hasUOR ) continue;
|
||||
for ( int32_t j = 0 ; j < i ; j++ ) {
|
||||
// skip if not a termid match
|
||||
if(m_qterms[i].m_termId!=m_qterms[j].m_termId)continue;
|
||||
if(m_qterms[i].m_termId!=m_qterms[j].m_termId) continue;
|
||||
m_qterms[i].m_explicitBit = m_qterms[j].m_explicitBit;
|
||||
// if doing phrases, ignore the unrequired phrase
|
||||
if ( m_qterms[i].m_isPhrase ) {
|
||||
@ -1155,7 +1133,7 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
//
|
||||
int32_t shift = 0;
|
||||
m_requiredBits = 0;
|
||||
for ( int32_t i = 0; i < n ; i++ ){
|
||||
for ( int32_t i = 0; i < n ; i++ ) {
|
||||
QueryTerm *qt = &m_qterms[i];
|
||||
qt->m_explicitBit = 0;
|
||||
if ( ! qt->m_isRequired ) continue;
|
||||
@ -1167,7 +1145,7 @@ bool Query::setQTerms ( const Words &words ) {
|
||||
if ( shift >= (int32_t)(sizeof(qvec_t)*8) ) break;
|
||||
}
|
||||
// now implicit bits
|
||||
for ( int32_t i = 0; i < n ; i++ ){
|
||||
for ( int32_t i = 0; i < n ; i++ ) {
|
||||
QueryTerm *qt = &m_qterms[i];
|
||||
// make it explicit bit at least
|
||||
qt->m_implicitBits = qt->m_explicitBit;
|
||||
@ -1924,16 +1902,6 @@ bool Query::setQWords ( char boolFlag ,
|
||||
// if query is all in upper case and we're doing boolean
|
||||
// DETECT, then assume not boolean
|
||||
if ( allUpper && boolFlag == 2 ) boolFlag = 0;
|
||||
// . having the UOR opcode does not mean we are boolean because
|
||||
// we want to keep it fast.
|
||||
// . we need to set this opcode so the UOR logic in setQTerms()
|
||||
// works, because it checks the m_opcode value. otherwise
|
||||
// Msg20 won't think we are a boolean query and set boolFlag
|
||||
// to 0 when setting the query for summary generation and
|
||||
// will not recognize the UOR word as being an operator
|
||||
if ( wlen==3 && w[0]=='U' && w[1]=='O' && w[2]=='R' &&
|
||||
! firstWord ) {
|
||||
opcode = OP_UOR; m_hasUOR = true; goto skipin; }
|
||||
// . is this word a boolean operator?
|
||||
// . cannot be in quotes or field
|
||||
if ( boolFlag >= 1 && ! inQuotes && ! fieldCode ) {
|
||||
@ -1953,7 +1921,6 @@ bool Query::setQWords ( char boolFlag ,
|
||||
else if ( wlen==5 && w[0]=='R' && w[1]=='i' &&
|
||||
w[2]=='G' && w[3]=='h' && w[4]=='P' )
|
||||
opcode = OP_RIGHTPAREN;
|
||||
skipin:
|
||||
// no pair across or even include any boolean op phrs
|
||||
if ( opcode ) {
|
||||
bits.m_bits[i] &= ~D_CAN_PAIR_ACROSS;
|
||||
@ -3464,8 +3431,6 @@ void QueryTerm::constructor ( ) {
|
||||
m_userWeight = 0;
|
||||
m_piped = false;
|
||||
m_ignored = false;
|
||||
m_isUORed = false;
|
||||
m_UORedTerm = NULL;
|
||||
m_synonymOf = NULL;
|
||||
m_synWids0 = 0;
|
||||
m_synWids1 = 0;
|
||||
|
11
Query.h
11
Query.h
@ -238,8 +238,6 @@ class QueryWord {
|
||||
float m_userWeightForPhrase;
|
||||
|
||||
bool m_queryOp;
|
||||
// is it after a NOT operator? i.e. NOT ( x UOR y UOR ... )
|
||||
bool m_underNOT;
|
||||
// is this query word before a | (pipe) operator?
|
||||
bool m_piped;
|
||||
|
||||
@ -335,17 +333,13 @@ class QueryTerm {
|
||||
float m_userWeight;
|
||||
|
||||
// . is this query term before a | (pipe) operator?
|
||||
// . if so we must read the whole termlist, like m_underNOT above
|
||||
// . if so we must read the whole termlist
|
||||
bool m_piped;
|
||||
|
||||
// . we ignore component terms unless their compound term is not cached
|
||||
// . now this is used to ignore low tf synonym terms only
|
||||
bool m_ignored ;
|
||||
|
||||
// is it part of a UOR chain?
|
||||
bool m_isUORed;
|
||||
QueryTerm *m_UORedTerm;
|
||||
|
||||
// . if synonymOf is not NULL, then m_term points into m_synBuf, not
|
||||
// m_buf
|
||||
const QueryTerm *m_synonymOf;
|
||||
@ -502,7 +496,6 @@ public:
|
||||
bool m_hasIpField;
|
||||
bool m_hasUrlField;
|
||||
bool m_hasSubUrlField;
|
||||
bool m_hasQuotaField;
|
||||
|
||||
// . we set this to true if it is a boolean query
|
||||
// . when calling Query::set() above you can tell it explicitly
|
||||
@ -529,8 +522,6 @@ public:
|
||||
bool m_queryExpansion;
|
||||
|
||||
bool m_truncated;
|
||||
|
||||
bool m_hasUOR;
|
||||
};
|
||||
|
||||
#endif // GB_QUERY_H
|
||||
|
8
Rdb.cpp
8
Rdb.cpp
@ -672,18 +672,14 @@ bool Rdb::loadTree ( ) {
|
||||
log( LOG_ERROR, "db: Could not load saved tree." );
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
if ( !m_buckets.loadBuckets(m_dbname) ) {
|
||||
log( LOG_ERROR, "db: Could not load saved buckets." );
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t numKeys = m_buckets.getNumKeys();
|
||||
|
||||
// log("db: Loaded %" PRId32" recs from %s's buckets on disk.",
|
||||
// numKeys, m_dbname);
|
||||
|
||||
|
||||
if(!m_buckets.testAndRepair()) {
|
||||
log( LOG_ERROR, "db: unrepairable buckets, remove and restart." );
|
||||
g_process.shutdownAbort(true);
|
||||
|
57
RdbBase.cpp
57
RdbBase.cpp
@ -888,7 +888,7 @@ int32_t RdbBase::addFile ( bool isNew, int32_t fileId, int32_t fileId2, int32_t
|
||||
char mapName[1024];
|
||||
generateMapFilename(mapName,sizeof(mapName),fileId,fileId2,0,-1);
|
||||
m->set(dirName, mapName, m_fixedDataSize, m_useHalfKeys, m_ks, m_pageSize);
|
||||
if ( ! isNew && ! m->readMap ( f ) ) {
|
||||
if ( ! isNew && !isInMergeDir && ! m->readMap ( f ) ) {
|
||||
// if out of memory, do not try to regen for that
|
||||
if ( g_errno == ENOMEM ) {
|
||||
return -1;
|
||||
@ -937,8 +937,8 @@ int32_t RdbBase::addFile ( bool isNew, int32_t fileId, int32_t fileId2, int32_t
|
||||
|
||||
// set the index file's filename
|
||||
generateIndexFilename(indexName,sizeof(indexName),fileId,fileId2,0,-1);
|
||||
in->set(dirName, indexName, m_fixedDataSize, m_useHalfKeys, m_ks, m_rdb->getRdbId(), !isNew);
|
||||
if (!isNew && !(in->readIndex() && in->verifyIndex())) {
|
||||
in->set(dirName, indexName, m_fixedDataSize, m_useHalfKeys, m_ks, m_rdb->getRdbId(), (!isNew && !isInMergeDir));
|
||||
if (!isNew && !isInMergeDir && !(in->readIndex() && in->verifyIndex())) {
|
||||
// if out of memory, do not try to regen for that
|
||||
if (g_errno == ENOMEM) {
|
||||
return -1;
|
||||
@ -1543,7 +1543,7 @@ void RdbBase::renamesDone() {
|
||||
}
|
||||
if ( wait ) {
|
||||
log("db: waiting for read thread to exit on unlinked file");
|
||||
if ( !g_loop.registerSleepCallback( 100, this, checkThreadsAgainWrapper ) ) {
|
||||
if (!g_loop.registerSleepCallback(100, this, checkThreadsAgainWrapper, "RdbBase::checkThreadsAgainWrapper")) {
|
||||
gbshutdownResourceError();
|
||||
}
|
||||
return;
|
||||
@ -1864,9 +1864,13 @@ bool RdbBase::attemptMerge(int32_t niceness, bool forceMergeAll, int32_t minToMe
|
||||
}
|
||||
|
||||
int32_t endMergeFileNum = mergeFileNum;
|
||||
for(int32_t j = mergeFileNum+1; j < mergeFileNum+mergeFileCount && j<m_numFiles; j++) {
|
||||
if(m_fileInfo[j].m_fileId <= endMergeFileId) {
|
||||
for (int32_t j = mergeFileNum+1; j<m_numFiles; j++) {
|
||||
if (m_fileInfo[j].m_fileId <= endMergeFileId) {
|
||||
endMergeFileNum = j;
|
||||
|
||||
if (m_fileInfo[j].m_fileId == endMergeFileId) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1876,7 +1880,7 @@ bool RdbBase::attemptMerge(int32_t niceness, bool forceMergeAll, int32_t minToMe
|
||||
if(currentFilesToMerge<0)
|
||||
gbshutdownLogicError();
|
||||
|
||||
if(currentFilesToMerge <= mergeFileCount) {
|
||||
if(currentFilesToMerge < mergeFileCount) {
|
||||
log(LOG_INFO, "merge: Only merging %" PRId32" instead of the original %" PRId32" files.", currentFilesToMerge, mergeFileCount);
|
||||
} else if(currentFilesToMerge == mergeFileCount) {
|
||||
//excellent
|
||||
@ -2478,7 +2482,7 @@ bool RdbBase::verifyFileSharding ( ) {
|
||||
0 , // niceness
|
||||
false , // err correction?
|
||||
-1 , // maxRetries
|
||||
true)) { // isRealMerge
|
||||
false)) { // isRealMerge
|
||||
log( LOG_DEBUG, "db: HEY! it did not block");
|
||||
return false;
|
||||
}
|
||||
@ -2531,13 +2535,13 @@ void RdbBase::finalizeGlobalIndexThread() {
|
||||
m_globalIndexThreadQueue.finalize();
|
||||
}
|
||||
|
||||
docids_ptr_t RdbBase::prepareGlobalIndexJob(bool markFileReadable, int32_t fileId) {
|
||||
std::vector<std::pair<int32_t, docidsconst_ptr_t>> RdbBase::prepareGlobalIndexJob(bool markFileReadable, int32_t fileId) {
|
||||
ScopedLock sl(m_mtxFileInfo);
|
||||
return prepareGlobalIndexJob_unlocked(markFileReadable, fileId);
|
||||
}
|
||||
|
||||
docids_ptr_t RdbBase::prepareGlobalIndexJob_unlocked(bool markFileReadable, int32_t fileId) {
|
||||
docids_ptr_t tmpDocIdFileIndex(new docids_t);
|
||||
std::vector<std::pair<int32_t, docidsconst_ptr_t>> RdbBase::prepareGlobalIndexJob_unlocked(bool markFileReadable, int32_t fileId) {
|
||||
std::vector<std::pair<int32_t, docidsconst_ptr_t>> docIdFileIndexes;
|
||||
|
||||
// global index does not include RdbIndex from tree/buckets
|
||||
for (int32_t i = 0; i < m_numFiles; i++) {
|
||||
@ -2546,16 +2550,11 @@ docids_ptr_t RdbBase::prepareGlobalIndexJob_unlocked(bool markFileReadable, int3
|
||||
}
|
||||
|
||||
if(m_fileInfo[i].m_allowReads || m_fileInfo[i].m_pendingGenerateIndex) {
|
||||
auto docIds = m_fileInfo[i].m_index->getDocIds();
|
||||
tmpDocIdFileIndex->reserve(tmpDocIdFileIndex->size() + docIds->size());
|
||||
std::transform(docIds->begin(), docIds->end(), std::back_inserter(*tmpDocIdFileIndex),
|
||||
[i](uint64_t docId) {
|
||||
return ((docId << s_docIdFileIndex_docIdOffset) | i); // docId has delete key
|
||||
});
|
||||
docIdFileIndexes.emplace_back(i, m_fileInfo[i].m_index->getDocIds());
|
||||
}
|
||||
}
|
||||
|
||||
return tmpDocIdFileIndex;
|
||||
return docIdFileIndexes;
|
||||
}
|
||||
|
||||
void RdbBase::submitGlobalIndexJob(bool markFileReadable, int32_t fileId) {
|
||||
@ -2593,25 +2592,37 @@ void RdbBase::generateGlobalIndex(void *item) {
|
||||
|
||||
log(LOG_INFO, "db: Processing job %p to generate global index", item);
|
||||
|
||||
std::stable_sort(queueItem->m_docIdFileIndex->begin(), queueItem->m_docIdFileIndex->end(),
|
||||
docids_ptr_t tmpDocIdFileIndex(new docids_t);
|
||||
for (auto it = queueItem->m_docIdFileIndexes.begin(); it != queueItem->m_docIdFileIndexes.end(); ++it) {
|
||||
auto i = it->first;
|
||||
const auto &docIds = it->second;
|
||||
|
||||
tmpDocIdFileIndex->reserve(tmpDocIdFileIndex->size() + docIds->size());
|
||||
std::transform(docIds->begin(), docIds->end(), std::back_inserter(*tmpDocIdFileIndex),
|
||||
[i](uint64_t docId) {
|
||||
return ((docId << s_docIdFileIndex_docIdOffset) | i); // docId has delete key
|
||||
});
|
||||
}
|
||||
|
||||
std::stable_sort(tmpDocIdFileIndex->begin(), tmpDocIdFileIndex->end(),
|
||||
[](uint64_t a, uint64_t b) {
|
||||
return (a & s_docIdFileIndex_docIdMask) < (b & s_docIdFileIndex_docIdMask);
|
||||
});
|
||||
|
||||
// in reverse because we want to keep the highest file position
|
||||
auto it = std::unique(queueItem->m_docIdFileIndex->rbegin(), queueItem->m_docIdFileIndex->rend(),
|
||||
auto it = std::unique(tmpDocIdFileIndex->rbegin(), tmpDocIdFileIndex->rend(),
|
||||
[](uint64_t a, uint64_t b) {
|
||||
return (a & s_docIdFileIndex_docIdMask) == (b & s_docIdFileIndex_docIdMask);
|
||||
});
|
||||
queueItem->m_docIdFileIndex->erase(queueItem->m_docIdFileIndex->begin(), it.base());
|
||||
tmpDocIdFileIndex->erase(tmpDocIdFileIndex->begin(), it.base());
|
||||
|
||||
// free up used space
|
||||
queueItem->m_docIdFileIndex->shrink_to_fit();
|
||||
tmpDocIdFileIndex->shrink_to_fit();
|
||||
|
||||
// replace with new index
|
||||
ScopedLock sl(queueItem->m_base->m_mtxFileInfo);
|
||||
ScopedLock sl2(queueItem->m_base->m_docIdFileIndexMtx);
|
||||
queueItem->m_base->m_docIdFileIndex.swap(queueItem->m_docIdFileIndex);
|
||||
queueItem->m_base->m_docIdFileIndex.swap(tmpDocIdFileIndex);
|
||||
|
||||
if (queueItem->m_markFileReadable) {
|
||||
for (auto i = 0; i < queueItem->m_base->m_numFiles; ++i) {
|
||||
|
10
RdbBase.h
10
RdbBase.h
@ -222,15 +222,15 @@ public:
|
||||
static void generateGlobalIndex(void *item);
|
||||
|
||||
struct ThreadQueueItem {
|
||||
ThreadQueueItem(RdbBase *base, docids_ptr_t docIdFileIndex, bool markFileReadable, int32_t fileId)
|
||||
ThreadQueueItem(RdbBase *base, std::vector<std::pair<int32_t, docidsconst_ptr_t>> docIdFileIndexes, bool markFileReadable, int32_t fileId)
|
||||
: m_base(base)
|
||||
, m_docIdFileIndex(docIdFileIndex)
|
||||
, m_docIdFileIndexes(docIdFileIndexes)
|
||||
, m_markFileReadable(markFileReadable)
|
||||
, m_fileId(fileId) {
|
||||
}
|
||||
|
||||
RdbBase *m_base;
|
||||
docids_ptr_t m_docIdFileIndex;
|
||||
std::vector<std::pair<int32_t, docidsconst_ptr_t>> m_docIdFileIndexes;
|
||||
bool m_markFileReadable;
|
||||
int32_t m_fileId;
|
||||
};
|
||||
@ -250,8 +250,8 @@ public:
|
||||
static const uint64_t s_docIdFileIndex_filePosMask = 0x000000000000ffffULL;
|
||||
|
||||
private:
|
||||
docids_ptr_t prepareGlobalIndexJob(bool markFileReadable, int32_t fileId);
|
||||
docids_ptr_t prepareGlobalIndexJob_unlocked(bool markFileReadable, int32_t fileId);
|
||||
std::vector<std::pair<int32_t, docidsconst_ptr_t>> prepareGlobalIndexJob(bool markFileReadable, int32_t fileId);
|
||||
std::vector<std::pair<int32_t, docidsconst_ptr_t>> prepareGlobalIndexJob_unlocked(bool markFileReadable, int32_t fileId);
|
||||
|
||||
void selectFilesToMerge(int32_t mergeNum, int32_t numFiles, int32_t *p_mini);
|
||||
|
||||
|
@ -689,10 +689,11 @@ RdbBuckets::~RdbBuckets( ) {
|
||||
reset_unlocked();
|
||||
}
|
||||
|
||||
// we don't lock because variable is already atomic
|
||||
bool RdbBuckets::isSaving() const {
|
||||
ScopedLock sl(m_mtx);
|
||||
return m_isSaving;
|
||||
}
|
||||
|
||||
bool RdbBuckets::needsSave() const {
|
||||
ScopedLock sl(m_mtx);
|
||||
return m_needsSave;
|
||||
@ -2095,7 +2096,8 @@ bool RdbBuckets::fastSave(const char *dir, bool useThread, void *state, void (*c
|
||||
}
|
||||
|
||||
// return true if already in the middle of saving
|
||||
if (m_isSaving) {
|
||||
bool isSaving = m_isSaving.exchange(true);
|
||||
if (isSaving) {
|
||||
logTrace(g_conf.m_logTraceRdbBuckets, "END. Is already saving. Returning false.");
|
||||
return false;
|
||||
}
|
||||
@ -2109,8 +2111,6 @@ bool RdbBuckets::fastSave(const char *dir, bool useThread, void *state, void (*c
|
||||
m_callback = callback;
|
||||
// assume no error
|
||||
m_errno = 0;
|
||||
// no adding to the tree now
|
||||
m_isSaving = true;
|
||||
|
||||
if (useThread) {
|
||||
// make this a thread now
|
||||
@ -2151,15 +2151,6 @@ void RdbBuckets::saveWrapper(void *state) {
|
||||
// this returns false and sets g_errno on error
|
||||
that->fastSave_unlocked();
|
||||
|
||||
// . resume adding to the tree
|
||||
// . this will also allow other threads to be queued
|
||||
// . if we did this at the end of the thread we could end up with
|
||||
// an overflow of queued SAVETHREADs
|
||||
that->m_isSaving = false;
|
||||
|
||||
// we do not need to be saved now?
|
||||
that->m_needsSave = false;
|
||||
|
||||
if (g_errno && !that->m_errno) {
|
||||
that->m_errno = g_errno;
|
||||
}
|
||||
@ -2171,6 +2162,15 @@ void RdbBuckets::saveWrapper(void *state) {
|
||||
that->m_dbname, that->m_numKeysApprox, that->m_bytesWritten);
|
||||
}
|
||||
|
||||
// . resume adding to the tree
|
||||
// . this will also allow other threads to be queued
|
||||
// . if we did this at the end of the thread we could end up with
|
||||
// an overflow of queued SAVETHREADs
|
||||
that->m_isSaving = false;
|
||||
|
||||
// we do not need to be saved now?
|
||||
that->m_needsSave = false;
|
||||
|
||||
logTrace(g_conf.m_logTraceRdbBuckets, "END");
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <atomic>
|
||||
#include "rdbid_t.h"
|
||||
#include "types.h"
|
||||
#include "GbMutex.h"
|
||||
@ -44,8 +45,6 @@ public:
|
||||
void clear();
|
||||
void reset();
|
||||
|
||||
GbMutex& getLock() { return m_mtx; }
|
||||
|
||||
bool set(int32_t fixedDataSize, int32_t maxMem, const char *allocName, rdbid_t rdbId, const char *dbname,
|
||||
char keySize);
|
||||
|
||||
@ -100,6 +99,8 @@ public:
|
||||
bool loadBuckets(const char *dbname);
|
||||
|
||||
private:
|
||||
GbMutex& getLock() { return m_mtx; }
|
||||
|
||||
static void saveWrapper(void *state);
|
||||
static void saveDoneWrapper(void *state, job_exit_t exit_type);
|
||||
|
||||
@ -156,9 +157,11 @@ private:
|
||||
int32_t m_sortBufSize;
|
||||
|
||||
bool m_repairMode;
|
||||
bool m_isSaving;
|
||||
|
||||
std::atomic<bool> m_isSaving;
|
||||
// true if buckets was modified and needs to be saved
|
||||
bool m_needsSave;
|
||||
|
||||
const char *m_dir;
|
||||
void *m_state;
|
||||
|
||||
|
@ -303,7 +303,7 @@ bool RdbDump::dumpTree(bool recall) {
|
||||
logError("db: Had error getting data for dump: %s. Retrying.", mstrerror(g_errno));
|
||||
|
||||
// retry for the remaining two types of errors
|
||||
if (!g_loop.registerSleepCallback(1000, this, tryAgainWrapper2)) {
|
||||
if (!g_loop.registerSleepCallback(1000, this, tryAgainWrapper2, "RdbDump::tryAgainWrapper2")) {
|
||||
log(LOG_WARN, "db: Retry failed. Could not register callback.");
|
||||
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END - retry failed, returning true");
|
||||
|
34
RdbIndex.cpp
34
RdbIndex.cpp
@ -36,9 +36,10 @@ RdbIndex::RdbIndex()
|
||||
, m_version(s_rdbIndexCurrentVersion)
|
||||
, m_docIds(new docids_t)
|
||||
, m_docIdsMtx()
|
||||
, m_pendingDocIdsMtx()
|
||||
, m_pendingMergeMtx()
|
||||
, m_pendingMergeCond(PTHREAD_COND_INITIALIZER)
|
||||
, m_pendingMerge(false)
|
||||
, m_pendingDocIdsMtx()
|
||||
, m_pendingDocIds(new docids_t)
|
||||
, m_prevPendingDocId(MAX_DOCID + 1)
|
||||
, m_lastMergeTime(gettimeofdayInMilliseconds())
|
||||
@ -53,9 +54,9 @@ RdbIndex::~RdbIndex() {
|
||||
g_loop.unregisterSleepCallback(this, &timedMerge);
|
||||
}
|
||||
|
||||
ScopedLock sl(m_pendingDocIdsMtx);
|
||||
ScopedLock sl(m_pendingMergeMtx);
|
||||
while (m_pendingMerge) { // spurious wakeup
|
||||
pthread_cond_wait(&m_pendingMergeCond, &(m_pendingDocIdsMtx.mtx));
|
||||
pthread_cond_wait(&m_pendingMergeCond, &(m_pendingMergeMtx.mtx));
|
||||
}
|
||||
}
|
||||
|
||||
@ -92,22 +93,14 @@ void RdbIndex::clear() {
|
||||
void RdbIndex::timedMerge(int /*fd*/, void *state) {
|
||||
RdbIndex *index = static_cast<RdbIndex*>(state);
|
||||
|
||||
ScopedLock sl(index->m_pendingDocIdsMtx);
|
||||
ScopedLock sl(index->m_pendingMergeMtx);
|
||||
|
||||
// make sure there is only a single merge job at one time
|
||||
if (index->m_pendingMerge) {
|
||||
if (index->m_pendingMerge || index->m_generatingIndex) {
|
||||
return;
|
||||
}
|
||||
|
||||
// don't need to merge if it's empty
|
||||
if (index->m_pendingDocIds->empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
if ((index->m_pendingDocIds->size() >= (index->m_generatingIndex ? s_generateMaxPendingSize : s_defaultMaxPendingSize)) ||
|
||||
(gettimeofdayInMilliseconds() - index->m_lastMergeTime >= s_defaultMaxPendingTimeMs)) {
|
||||
index->m_pendingMerge = g_jobScheduler.submit(mergePendingDocIds, NULL, state, thread_type_index_merge, 0);
|
||||
}
|
||||
index->m_pendingMerge = g_jobScheduler.submit(mergePendingDocIds, NULL, state, thread_type_index_merge, 0);
|
||||
}
|
||||
|
||||
void RdbIndex::mergePendingDocIds(void *state) {
|
||||
@ -115,12 +108,15 @@ void RdbIndex::mergePendingDocIds(void *state) {
|
||||
|
||||
ScopedLock sl(index->m_pendingDocIdsMtx);
|
||||
|
||||
// we check criteria again to avoid running merge when it's not needed
|
||||
if ((index->m_pendingDocIds->size() >= (index->m_generatingIndex ? s_generateMaxPendingSize : s_defaultMaxPendingSize)) ||
|
||||
(gettimeofdayInMilliseconds() - index->m_lastMergeTime >= s_defaultMaxPendingTimeMs)) {
|
||||
(void)index->mergePendingDocIds_unlocked();
|
||||
// don't merge if it's empty
|
||||
if (!index->m_pendingDocIds->empty()) {
|
||||
if ((index->m_pendingDocIds->size() >= (index->m_generatingIndex ? s_generateMaxPendingSize : s_defaultMaxPendingSize)) ||
|
||||
(gettimeofdayInMilliseconds() - index->m_lastMergeTime >= s_defaultMaxPendingTimeMs)) {
|
||||
(void)index->mergePendingDocIds_unlocked();
|
||||
}
|
||||
}
|
||||
|
||||
ScopedLock sl2(index->m_pendingMergeMtx);
|
||||
index->m_pendingMerge = false;
|
||||
pthread_cond_signal(&(index->m_pendingMergeCond));
|
||||
}
|
||||
@ -140,7 +136,7 @@ void RdbIndex::set(const char *dir, const char *indexFilename, int32_t fixedData
|
||||
|
||||
if (!isStatic) {
|
||||
/// if we're not merging/adding record we don't need to merge
|
||||
m_registeredCallback = g_loop.registerSleepCallback(1000, this, &timedMerge);
|
||||
m_registeredCallback = g_loop.registerSleepCallback(1000, this, &timedMerge, "RdbIndex::timedMerge");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -44,6 +44,7 @@ public:
|
||||
}
|
||||
|
||||
const char *getFilename() const { return m_file.getFilename(); }
|
||||
int64_t getFileSize() const { return m_file.getFileSize(); }
|
||||
|
||||
BigFile *getFile ( ) { return &m_file; }
|
||||
|
||||
@ -109,11 +110,12 @@ private:
|
||||
docidsconst_ptr_t m_docIds;
|
||||
GbMutex m_docIdsMtx;
|
||||
|
||||
// newest record pending merge into m_docIds
|
||||
GbMutex m_pendingDocIdsMtx;
|
||||
GbMutex m_pendingMergeMtx;
|
||||
pthread_cond_t m_pendingMergeCond;
|
||||
bool m_pendingMerge;
|
||||
|
||||
// newest record pending merge into m_docIds
|
||||
GbMutex m_pendingDocIdsMtx;
|
||||
docids_ptr_t m_pendingDocIds;
|
||||
uint64_t m_prevPendingDocId;
|
||||
|
||||
|
@ -5,13 +5,15 @@
|
||||
RdbIndexQuery::RdbIndexQuery(RdbBase *base)
|
||||
: RdbIndexQuery(base ? (base->getGlobalIndex() ? base->getGlobalIndex() : docidsconst_ptr_t()) : docidsconst_ptr_t(),
|
||||
base ? (base->getTreeIndex() ? base->getTreeIndex()->getDocIds() : docidsconst_ptr_t()) : docidsconst_ptr_t(),
|
||||
base ? base->getNumFiles() : 0) {
|
||||
base ? base->getNumFiles() : 0,
|
||||
base ? base->hasPendingGlobalIndexJob() : false) {
|
||||
}
|
||||
|
||||
RdbIndexQuery::RdbIndexQuery(docidsconst_ptr_t globalIndexData, docidsconst_ptr_t treeIndexData, int32_t numFiles)
|
||||
RdbIndexQuery::RdbIndexQuery(docidsconst_ptr_t globalIndexData, docidsconst_ptr_t treeIndexData, int32_t numFiles, bool hasPendingGlobalIndexJob)
|
||||
: m_globalIndexData(globalIndexData)
|
||||
, m_treeIndexData(treeIndexData)
|
||||
, m_numFiles(numFiles) {
|
||||
, m_numFiles(numFiles)
|
||||
, m_hasPendingGlobalIndexJob(hasPendingGlobalIndexJob) {
|
||||
}
|
||||
|
||||
RdbIndexQuery::~RdbIndexQuery() {
|
||||
|
@ -15,6 +15,7 @@ public:
|
||||
bool documentIsInFile(uint64_t docId, int32_t filenum) const;
|
||||
|
||||
int32_t getNumFiles() const { return m_numFiles; }
|
||||
bool hasPendingGlobalIndexJob() const { return m_hasPendingGlobalIndexJob; }
|
||||
|
||||
void printIndex() const;
|
||||
|
||||
@ -23,11 +24,12 @@ private:
|
||||
RdbIndexQuery(const RdbIndexQuery&);
|
||||
RdbIndexQuery& operator=(const RdbIndexQuery&);
|
||||
|
||||
RdbIndexQuery(docidsconst_ptr_t globalIndexData, docidsconst_ptr_t treeIndexData, int32_t numFiles);
|
||||
RdbIndexQuery(docidsconst_ptr_t globalIndexData, docidsconst_ptr_t treeIndexData, int32_t numFiles, bool hasPendingGlobalIndexJob);
|
||||
|
||||
docidsconst_ptr_t m_globalIndexData;
|
||||
docidsconst_ptr_t m_treeIndexData;
|
||||
int32_t m_numFiles;
|
||||
bool m_hasPendingGlobalIndexJob;
|
||||
};
|
||||
|
||||
#endif // GB_RDBINDEXQUERY_H
|
||||
|
@ -2152,7 +2152,7 @@ skip:
|
||||
///////
|
||||
|
||||
bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes,
|
||||
rdbid_t rdbId, bool removeNegKeys, bool useIndexFile, collnum_t collNum, int32_t startFileNum, bool isRealMerge) {
|
||||
rdbid_t rdbId, bool removeNegKeys, bool useIndexFile, collnum_t collNum, int32_t startFileIndex, bool isRealMerge) {
|
||||
logTrace(g_conf.m_logTraceRdbList, "BEGIN");
|
||||
|
||||
// sanity
|
||||
@ -2374,7 +2374,8 @@ bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startK
|
||||
}
|
||||
|
||||
if (index->exist(docId)) {
|
||||
if (i != filePos) {
|
||||
// cater for newly dumped file that are not in global index
|
||||
if (i != filePos && !rdbIndexQuery.hasPendingGlobalIndexJob() && i != (rdbIndexQuery.getNumFiles() - 1)) {
|
||||
// docId found in newer file
|
||||
gbshutdownCorrupted();
|
||||
}
|
||||
@ -2393,7 +2394,7 @@ bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startK
|
||||
|
||||
logTrace(g_conf.m_logTraceRdbList, "Found docId=%" PRIu64" with filePos=%" PRId32, docId, filePos);
|
||||
|
||||
if (filePos > (mini + listOffset) + startFileNum) {
|
||||
if (filePos > (mini + listOffset) + startFileIndex) {
|
||||
// docId is present in newer file
|
||||
logTrace(g_conf.m_logTraceRdbList, "docId in newer list. skip. filePos=%" PRId32" mini=%" PRId16" listOffset=%" PRId32,
|
||||
filePos, mini, listOffset);
|
||||
|
@ -273,7 +273,7 @@ private:
|
||||
int32_t hintOffset, const char *hintKey, const char *filename);
|
||||
|
||||
bool posdbMerge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes,
|
||||
rdbid_t rdbId, bool removeNegKeys, bool useIndexFile, collnum_t collNum, int32_t startFileNum, bool isRealMerge);
|
||||
rdbid_t rdbId, bool removeNegKeys, bool useIndexFile, collnum_t collNum, int32_t startFileIndex, bool isRealMerge);
|
||||
|
||||
// the unalterd raw list. keys may be outside of [m_startKey,m_endKey]
|
||||
char *m_list;
|
||||
|
@ -145,6 +145,8 @@ bool RdbMap::writeMap ( bool allDone ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
log(LOG_INFO, "db: Saving %s", m_file.getFilename());
|
||||
|
||||
// open a new file
|
||||
if ( ! m_file.open ( O_RDWR | O_CREAT | O_TRUNC ) ) {
|
||||
log(LOG_ERROR, "%s:%s: END. Could not open %s for writing: %s. Returning false.",
|
||||
|
197
RdbMerge.cpp
197
RdbMerge.cpp
@ -11,6 +11,8 @@ RdbMerge g_merge;
|
||||
|
||||
RdbMerge::RdbMerge()
|
||||
: m_mergeSpaceCoordinator(NULL),
|
||||
m_isAcquireLockJobSubmited(false),
|
||||
m_isLockAquired(false),
|
||||
m_doneMerging(false),
|
||||
m_getListOutstanding(false),
|
||||
m_startFileNum(0),
|
||||
@ -19,6 +21,7 @@ RdbMerge::RdbMerge()
|
||||
m_targetFile(NULL),
|
||||
m_targetMap(NULL),
|
||||
m_targetIndex(NULL),
|
||||
m_doneRegenerateFiles(false),
|
||||
m_isMerging(false),
|
||||
m_isHalted(false),
|
||||
m_dump(),
|
||||
@ -57,11 +60,11 @@ bool RdbMerge::merge(rdbid_t rdbId,
|
||||
int32_t niceness)
|
||||
{
|
||||
if(m_isHalted) {
|
||||
logTrace(g_conf.m_logTraceRdbBase, "END, merging is halted");
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END, merging is halted");
|
||||
return true;
|
||||
}
|
||||
if(m_isMerging) {
|
||||
logTrace(g_conf.m_logTraceRdbBase, "END, already merging");
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END, already merging");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -89,6 +92,7 @@ bool RdbMerge::merge(rdbid_t rdbId,
|
||||
m_numFiles = numFiles;
|
||||
m_fixedDataSize = base->getFixedDataSize();
|
||||
m_niceness = niceness;
|
||||
m_doneRegenerateFiles = false;
|
||||
m_doneMerging = false;
|
||||
m_ks = rdb->getKeySize();
|
||||
|
||||
@ -96,19 +100,11 @@ bool RdbMerge::merge(rdbid_t rdbId,
|
||||
// . just get from the files, not tree (not cache?)
|
||||
KEYMIN(m_startKey,m_ks);
|
||||
|
||||
// if we're resuming a killed merge, set m_startKey to last
|
||||
// key the map knows about.
|
||||
// the dump will start dumping at the end of the targetMap's data file.
|
||||
if ( m_targetMap->getNumRecs() > 0 ) {
|
||||
log(LOG_INIT,"db: Resuming a killed merge.");
|
||||
m_targetMap->getLastKey(m_startKey);
|
||||
KEYINC(m_startKey,m_ks);
|
||||
}
|
||||
|
||||
//calculate how much space we need for resulting merged file
|
||||
m_spaceNeededForMerge = base->getSpaceNeededForMerge(m_startFileNum,m_numFiles);
|
||||
|
||||
if(!g_loop.registerSleepCallback(5000, this, getLockWrapper, 0, true))
|
||||
|
||||
|
||||
if(!g_loop.registerSleepCallback(5000, this, getLockWrapper, "RdbMerge::getLockWrapper", 0, true))
|
||||
return true;
|
||||
|
||||
// we're now merging since we accepted to try
|
||||
@ -117,36 +113,126 @@ bool RdbMerge::merge(rdbid_t rdbId,
|
||||
return false;
|
||||
}
|
||||
|
||||
void RdbMerge::acquireLockWrapper(void *state) {
|
||||
RdbMerge *that = static_cast<RdbMerge*>(state);
|
||||
|
||||
if(that->m_mergeSpaceCoordinator->acquire(that->m_spaceNeededForMerge)) {
|
||||
log(LOG_INFO,"Rdbmerge(%p)::getLock(), m_rdbId=%d: got lock for %" PRIu64 " bytes",
|
||||
that, (int)that->m_rdbId, that->m_spaceNeededForMerge);
|
||||
g_loop.unregisterSleepCallback(that, getLockWrapper);
|
||||
that->m_isLockAquired = true;
|
||||
} else {
|
||||
log(LOG_INFO, "Rdbmerge(%p)::getLock(), m_rdbId=%d: Didn't get lock for %" PRIu64 " bytes; retrying in a bit...",
|
||||
that, (int)that->m_rdbId, that->m_spaceNeededForMerge);
|
||||
}
|
||||
}
|
||||
|
||||
void RdbMerge::acquireLockDoneWrapper(void *state, job_exit_t exit_type) {
|
||||
RdbMerge *that = static_cast<RdbMerge*>(state);
|
||||
|
||||
that->m_isAcquireLockJobSubmited = false;
|
||||
|
||||
if (exit_type != job_exit_normal) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (that->m_isLockAquired) {
|
||||
that->gotLock();
|
||||
}
|
||||
}
|
||||
|
||||
void RdbMerge::getLockWrapper(int /*fd*/, void *state) {
|
||||
log(LOG_TRACE,"RdbMerge::getLockWrapper(%p)",state);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "RdbMerge::getLockWrapper(%p)", state);
|
||||
RdbMerge *that = static_cast<RdbMerge*>(state);
|
||||
that->getLock();
|
||||
}
|
||||
|
||||
|
||||
void RdbMerge::getLock() {
|
||||
log(LOG_DEBUG,"Rdbmerge(%p)::getLock(), m_rdbId=%d",this,(int)m_rdbId);
|
||||
if(m_mergeSpaceCoordinator->acquire(m_spaceNeededForMerge)) {
|
||||
log(LOG_INFO,"Rdbmerge(%p)::getLock(), m_rdbId=%d: got lock for %" PRIu64 " bytes", this, (int)m_rdbId, m_spaceNeededForMerge);
|
||||
g_loop.unregisterSleepCallback(this,getLockWrapper);
|
||||
logDebug(g_conf.m_logDebugMerge, "Rdbmerge(%p)::getLock(), m_rdbId=%d",this,(int)m_rdbId);
|
||||
bool isAcquireLockJobSubmited = m_isAcquireLockJobSubmited.exchange(true);
|
||||
if (isAcquireLockJobSubmited) {
|
||||
return;
|
||||
}
|
||||
|
||||
gotLock();
|
||||
} else
|
||||
log(LOG_INFO,"Rdbmerge(%p)::getLock(), m_rdbId=%d: Didn't get lock for %" PRIu64 " bytes; retrying in a bit...", this, (int)m_rdbId, m_spaceNeededForMerge);
|
||||
if (g_jobScheduler.submit(acquireLockWrapper, acquireLockDoneWrapper, this, thread_type_file_merge, 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
log(LOG_WARN, "db: merge: Unable to submit acquire lock job. Running on main thread!");
|
||||
m_isAcquireLockJobSubmited = false;
|
||||
acquireLockWrapper(this);
|
||||
acquireLockDoneWrapper(this, job_exit_normal);
|
||||
}
|
||||
|
||||
void RdbMerge::gotLockWrapper(int /*fd*/, void *state) {
|
||||
log(LOG_TRACE,"RdbMerge::gotLockWrapper(%p)", state);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "RdbMerge::gotLockWrapper(%p)", state);
|
||||
RdbMerge *that = static_cast<RdbMerge*>(state);
|
||||
g_loop.unregisterSleepCallback(state, gotLockWrapper);
|
||||
|
||||
that->gotLock();
|
||||
}
|
||||
|
||||
void RdbMerge::regenerateFilesWrapper(void *state) {
|
||||
RdbMerge *that = static_cast<RdbMerge*>(state);
|
||||
if (that->m_targetMap->getFileSize() == 0) {
|
||||
log( LOG_INFO, "db: merge: Attempting to generate map file for data file %s* of %" PRId64" bytes. May take a while.",
|
||||
that->m_targetFile->getFilename(), that->m_targetFile->getFileSize() );
|
||||
|
||||
// this returns false and sets g_errno on error
|
||||
if (!that->m_targetMap->generateMap(that->m_targetFile)) {
|
||||
log(LOG_ERROR, "db: merge: Map generation failed.");
|
||||
gbshutdownCorrupted();
|
||||
}
|
||||
|
||||
log(LOG_INFO, "db: merge: Map generation succeeded.");
|
||||
}
|
||||
|
||||
if (that->m_targetIndex && that->m_targetIndex->getFileSize() == 0) {
|
||||
log(LOG_INFO, "db: merge: Attempting to generate index file for data file %s* of %" PRId64" bytes. May take a while.",
|
||||
that->m_targetFile->getFilename(), that->m_targetFile->getFileSize() );
|
||||
|
||||
// this returns false and sets g_errno on error
|
||||
if (!that->m_targetIndex->generateIndex(that->m_targetFile)) {
|
||||
logError("db: merge: Index generation failed for %s.", that->m_targetFile->getFilename());
|
||||
gbshutdownCorrupted();
|
||||
}
|
||||
|
||||
log(LOG_INFO, "db: merge: Index generation succeeded.");
|
||||
}
|
||||
}
|
||||
|
||||
void RdbMerge::regenerateFilesDoneWrapper(void *state, job_exit_t exit_type) {
|
||||
RdbMerge *that = static_cast<RdbMerge*>(state);
|
||||
that->m_doneRegenerateFiles = true;
|
||||
that->gotLock();
|
||||
}
|
||||
|
||||
// . returns false if blocked, true otherwise
|
||||
// . sets g_errno on error
|
||||
bool RdbMerge::gotLock() {
|
||||
// regenerate map/index if needed
|
||||
if (!m_doneRegenerateFiles &&
|
||||
m_targetFile->getFileSize() > 0 &&
|
||||
((m_targetIndex && m_targetIndex->getFileSize() == 0) || m_targetMap->getFileSize() == 0)) {
|
||||
log(LOG_WARN, "db: merge: Regenerating map/index from a killed merge.");
|
||||
|
||||
if (g_jobScheduler.submit(regenerateFilesWrapper, regenerateFilesDoneWrapper, this, thread_type_file_merge, 0)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
log(LOG_WARN, "db: merge: Unable to submit regenerate files job. Running on main thread!");
|
||||
regenerateFilesWrapper(this);
|
||||
}
|
||||
|
||||
// if we're resuming a killed merge, set m_startKey to last
|
||||
// key the map knows about.
|
||||
// the dump will start dumping at the end of the targetMap's data file.
|
||||
if (m_targetMap->getNumRecs() > 0) {
|
||||
log(LOG_INIT,"db: Resuming a killed merge.");
|
||||
m_targetMap->getLastKey(m_startKey);
|
||||
KEYINC(m_startKey,m_ks);
|
||||
}
|
||||
|
||||
// . get last mapped offset
|
||||
// . this may actually be smaller than the file's actual size
|
||||
// but the excess is not in the map, so we need to do it again
|
||||
@ -177,7 +263,8 @@ bool RdbMerge::gotLock() {
|
||||
/// global index jobs. means this mechanism will have to be changed.
|
||||
if (base->hasPendingGlobalIndexJob()) {
|
||||
// wait until no more pending global index job
|
||||
g_loop.registerSleepCallback(1000, this, gotLockWrapper);
|
||||
g_loop.registerSleepCallback(1000, this, gotLockWrapper, "RdbMerge::gotLockWrapper");
|
||||
log(LOG_INFO, "db: merge: Waiting for global index job to complete");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -230,7 +317,7 @@ void RdbMerge::haltMerge() {
|
||||
void RdbMerge::doSleep() {
|
||||
log(LOG_WARN, "db: Merge had error: %s. Sleeping and retrying.", mstrerror(g_errno));
|
||||
g_errno = 0;
|
||||
g_loop.registerSleepCallback(1000, this, tryAgainWrapper);
|
||||
g_loop.registerSleepCallback(1000, this, tryAgainWrapper, "RdbMerge::tryAgainWrapper");
|
||||
}
|
||||
|
||||
// . return false if blocked, otherwise true
|
||||
@ -246,7 +333,7 @@ bool RdbMerge::resumeMerge() {
|
||||
// . sets g_errno on error
|
||||
// . we return true if it blocked
|
||||
if (!getNextList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. getNextList blocked. list=%p", &m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. getNextList blocked. list=%p", &m_list);
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -254,27 +341,27 @@ bool RdbMerge::resumeMerge() {
|
||||
// so we should sleep and retry...
|
||||
if (g_errno == ENOMEM) {
|
||||
doSleep();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. out of memory. list=%p", &m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. out of memory. list=%p", &m_list);
|
||||
return false;
|
||||
}
|
||||
|
||||
// if list is empty or we had an error then we're done
|
||||
if (g_errno || m_doneMerging) {
|
||||
doneMerging();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. error/done merging. list=%p", &m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. error/done merging. list=%p", &m_list);
|
||||
return true;
|
||||
}
|
||||
|
||||
// return if this blocked
|
||||
if (!filterList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. filterList blocked. list=%p", &m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. filterList blocked. list=%p", &m_list);
|
||||
return false;
|
||||
}
|
||||
|
||||
// . otherwise dump the list we read to our target file
|
||||
// . this returns false if blocked, true otherwise
|
||||
if (!dumpList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. dumpList blocked. list=%p", &m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. dumpList blocked. list=%p", &m_list);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -308,7 +395,7 @@ bool RdbMerge::getNextList() {
|
||||
}
|
||||
|
||||
bool RdbMerge::getAnotherList() {
|
||||
log(LOG_DEBUG,"db: Getting another list for merge.");
|
||||
logDebug(g_conf.m_logDebugMerge, "db: Getting another list for merge.");
|
||||
|
||||
// clear it up in case it was already set
|
||||
g_errno = 0;
|
||||
@ -381,32 +468,32 @@ void RdbMerge::gotListWrapper(void *state, RdbList * /*list*/, Msg5 * /*msg5*/)
|
||||
// so we should sleep and retry
|
||||
if (g_errno == ENOMEM) {
|
||||
THIS->doSleep();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. out of memory. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. out of memory. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// if g_errno we're done
|
||||
if (g_errno || THIS->m_doneMerging) {
|
||||
THIS->doneMerging();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. error/done merging. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. error/done merging. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// return if this blocked
|
||||
if (!THIS->filterList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. filterList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. filterList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// return if this blocked
|
||||
if (!THIS->dumpList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. dumpList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. dumpList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// return if this blocked
|
||||
if (!THIS->getNextList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. getNextList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. getNextList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -427,7 +514,7 @@ void RdbMerge::tryAgainWrapper(int /*fd*/, void *state) {
|
||||
|
||||
// return if this blocked
|
||||
if (!THIS->getNextList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. getNextList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. getNextList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -438,7 +525,7 @@ void RdbMerge::tryAgainWrapper(int /*fd*/, void *state) {
|
||||
void RdbMerge::filterListWrapper(void *state) {
|
||||
RdbMerge *THIS = (RdbMerge *)state;
|
||||
|
||||
logTrace(g_conf.m_logTraceRdbDump, "BEGIN. list=%p m_startKey=%s", &THIS->m_list, KEYSTR(THIS->m_startKey, THIS->m_ks));
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "BEGIN. list=%p m_startKey=%s", &THIS->m_list, KEYSTR(THIS->m_startKey, THIS->m_ks));
|
||||
|
||||
if (THIS->m_rdbId == RDB_SPIDERDB) {
|
||||
dedupSpiderdbList(&(THIS->m_list));
|
||||
@ -446,7 +533,7 @@ void RdbMerge::filterListWrapper(void *state) {
|
||||
// filterTitledbList(&(THIS->m_list));
|
||||
}
|
||||
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. list=%p", &THIS->m_list);
|
||||
}
|
||||
|
||||
// similar to gotListWrapper but we call dumpList() before dedupList()
|
||||
@ -454,18 +541,18 @@ void RdbMerge::filterDoneWrapper(void *state, job_exit_t exit_type) {
|
||||
// get a ptr to ourselves
|
||||
RdbMerge *THIS = (RdbMerge *)state;
|
||||
|
||||
logTrace(g_conf.m_logTraceRdbDump, "BEGIN. list=%p m_startKey=%s", &THIS->m_list, KEYSTR(THIS->m_startKey, THIS->m_ks));
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "BEGIN. list=%p m_startKey=%s", &THIS->m_list, KEYSTR(THIS->m_startKey, THIS->m_ks));
|
||||
|
||||
for (;;) {
|
||||
// return if this blocked
|
||||
if (!THIS->dumpList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. dumpList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. dumpList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// return if this blocked
|
||||
if (!THIS->getNextList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. getNextList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. getNextList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -473,20 +560,20 @@ void RdbMerge::filterDoneWrapper(void *state, job_exit_t exit_type) {
|
||||
// so we should sleep and retry
|
||||
if (g_errno == ENOMEM) {
|
||||
THIS->doSleep();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. out of memory. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. out of memory. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// if g_errno we're done
|
||||
if (g_errno || THIS->m_doneMerging) {
|
||||
THIS->doneMerging();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. error/done merging. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. error/done merging. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// return if this blocked
|
||||
if (!THIS->filterList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. filterList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. filterList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -544,7 +631,7 @@ bool RdbMerge::filterList() {
|
||||
// similar to gotListWrapper but we call getNextList() before dumpList()
|
||||
void RdbMerge::dumpListWrapper(void *state) {
|
||||
// debug msg
|
||||
log(LOG_DEBUG,"db: Dump of list completed: %s.",mstrerror(g_errno));
|
||||
logDebug(g_conf.m_logDebugMerge, "db: Dump of list completed: %s.",mstrerror(g_errno));
|
||||
|
||||
// get a ptr to ourselves
|
||||
RdbMerge *THIS = (RdbMerge *)state;
|
||||
@ -556,12 +643,12 @@ void RdbMerge::dumpListWrapper(void *state) {
|
||||
// collection reset or deleted while RdbDump.cpp was writing out?
|
||||
if (g_errno == ENOCOLLREC) {
|
||||
THIS->doneMerging();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. error/done merging. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. error/done merging. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
// return if this blocked
|
||||
if (!THIS->getNextList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. getNextList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. getNextList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -574,7 +661,7 @@ void RdbMerge::dumpListWrapper(void *state) {
|
||||
// m_startKey back to the startkey of this list, because
|
||||
// it is *now* only advanced on successful dump!!
|
||||
THIS->doSleep();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. out of memory. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. out of memory. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -582,19 +669,19 @@ void RdbMerge::dumpListWrapper(void *state) {
|
||||
// . if list is empty we're done
|
||||
if (g_errno || THIS->m_doneMerging) {
|
||||
THIS->doneMerging();
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. error/done merging. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. error/done merging. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// return if this blocked
|
||||
if (!THIS->filterList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. filterList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. filterList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
// return if this blocked
|
||||
if (!THIS->dumpList()) {
|
||||
logTrace(g_conf.m_logTraceRdbDump, "END. dumpList blocked. list=%p", &THIS->m_list);
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "END. dumpList blocked. list=%p", &THIS->m_list);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -612,7 +699,7 @@ bool RdbMerge::dumpList() {
|
||||
m_doneMerging = true;
|
||||
}
|
||||
|
||||
log(LOG_DEBUG,"db: Dumping list.");
|
||||
logDebug(g_conf.m_logDebugMerge, "db: Dumping list.");
|
||||
|
||||
logTrace(g_conf.m_logTraceRdbMerge, "list=%p startKey=%s",
|
||||
&m_list, KEYSTR(m_startKey, m_ks));
|
||||
@ -673,8 +760,10 @@ void RdbMerge::doneMerging() {
|
||||
|
||||
|
||||
void RdbMerge::relinquishMergespaceLock() {
|
||||
if(m_mergeSpaceCoordinator)
|
||||
if(m_mergeSpaceCoordinator) {
|
||||
m_mergeSpaceCoordinator->relinquish();
|
||||
m_isLockAquired = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
14
RdbMerge.h
14
RdbMerge.h
@ -56,15 +56,20 @@ public:
|
||||
|
||||
bool isMerging() const { return m_isMerging; }
|
||||
|
||||
rdbid_t getRdbId() const { return m_rdbId; }
|
||||
|
||||
// stop further actions
|
||||
void haltMerge();
|
||||
|
||||
void mergeIncorporated(const RdbBase *);
|
||||
|
||||
private:
|
||||
static void acquireLockWrapper(void *state);
|
||||
static void acquireLockDoneWrapper(void *state, job_exit_t exit_type);
|
||||
|
||||
static void getLockWrapper(int /*fd*/, void *state);
|
||||
|
||||
static void regenerateFilesWrapper(void *state);
|
||||
static void regenerateFilesDoneWrapper(void *state, job_exit_t exit_type);
|
||||
|
||||
void getLock();
|
||||
static void filterListWrapper(void *state);
|
||||
static void filterDoneWrapper(void *state, job_exit_t exit_type);
|
||||
@ -93,6 +98,9 @@ private:
|
||||
|
||||
MergeSpaceCoordinator *m_mergeSpaceCoordinator;
|
||||
|
||||
std::atomic<bool> m_isAcquireLockJobSubmited;
|
||||
bool m_isLockAquired;
|
||||
|
||||
// set to true when m_startKey wraps back to 0
|
||||
bool m_doneMerging;
|
||||
|
||||
@ -103,9 +111,11 @@ private:
|
||||
int32_t m_startFileNum;
|
||||
int32_t m_numFiles;
|
||||
int32_t m_fixedDataSize;
|
||||
|
||||
BigFile *m_targetFile;
|
||||
RdbMap *m_targetMap;
|
||||
RdbIndex *m_targetIndex;
|
||||
bool m_doneRegenerateFiles;
|
||||
|
||||
char m_startKey[MAX_KEY_BYTES];
|
||||
|
||||
|
26
RdbTree.cpp
26
RdbTree.cpp
@ -1629,8 +1629,8 @@ bool RdbTree::collExists(collnum_t coll) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
// we don't lock because variable is already atomic
|
||||
bool RdbTree::isSaving() const {
|
||||
ScopedLock sl(m_mtx);
|
||||
return m_isSaving;
|
||||
}
|
||||
|
||||
@ -1923,7 +1923,8 @@ bool RdbTree::fastSave(const char *dir, bool useThread, void *state, void (*call
|
||||
}
|
||||
|
||||
// return true if already in the middle of saving
|
||||
if (m_isSaving) {
|
||||
bool isSaving = m_isSaving.exchange(true);
|
||||
if (isSaving) {
|
||||
logTrace(g_conf.m_logTraceRdbTree, "END. Is already saving. Returning false.");
|
||||
return false;
|
||||
}
|
||||
@ -1941,9 +1942,6 @@ bool RdbTree::fastSave(const char *dir, bool useThread, void *state, void (*call
|
||||
// assume no error
|
||||
m_errno = 0;
|
||||
|
||||
// no adding to the tree now
|
||||
m_isSaving = true;
|
||||
|
||||
if (useThread) {
|
||||
// make this a thread now
|
||||
if (g_jobScheduler.submit(saveWrapper, saveDoneWrapper, this, thread_type_unspecified_io, 1/*niceness*/)) {
|
||||
@ -1982,15 +1980,6 @@ void RdbTree::saveWrapper ( void *state ) {
|
||||
// this returns false and sets g_errno on error
|
||||
that->fastSave_unlocked();
|
||||
|
||||
// . resume adding to the tree
|
||||
// . this will also allow other threads to be queued
|
||||
// . if we did this at the end of the thread we could end up with
|
||||
// an overflow of queued SAVETHREADs
|
||||
that->m_isSaving = false;
|
||||
|
||||
// we do not need to be saved now?
|
||||
that->m_needsSave = false;
|
||||
|
||||
if (g_errno && !that->m_errno) {
|
||||
that->m_errno = g_errno;
|
||||
}
|
||||
@ -2002,6 +1991,15 @@ void RdbTree::saveWrapper ( void *state ) {
|
||||
that->m_dbname, that->m_numUsedNodes, that->m_bytesWritten);
|
||||
}
|
||||
|
||||
// . resume adding to the tree
|
||||
// . this will also allow other threads to be queued
|
||||
// . if we did this at the end of the thread we could end up with
|
||||
// an overflow of queued SAVETHREADs
|
||||
that->m_isSaving = false;
|
||||
|
||||
// we do not need to be saved now?
|
||||
that->m_needsSave = false;
|
||||
|
||||
logTrace(g_conf.m_logTraceRdbTree, "END");
|
||||
}
|
||||
|
||||
|
@ -269,7 +269,7 @@ private:
|
||||
std::atomic<bool> m_isSaving;
|
||||
|
||||
// true if tree was modified and needs to be saved
|
||||
std::atomic<bool> m_needsSave;
|
||||
bool m_needsSave;
|
||||
|
||||
char m_rdbId;
|
||||
char m_dir[128];
|
||||
|
@ -359,7 +359,7 @@ bool Rebalance::scanRdb ( ) {
|
||||
if ( base && base->isMerging() ) {
|
||||
log("rebal: waiting for merge on %s for coll #%" PRId32" to complete",
|
||||
rdb->getDbname(),(int32_t)m_collnum);
|
||||
g_loop.registerSleepCallback ( 1000,NULL,sleepWrapper,1);
|
||||
g_loop.registerSleepCallback(1000, NULL, sleepWrapper, "Rebalance::sleepWrapper", 1);
|
||||
m_registered = true;
|
||||
// we blocked, return false
|
||||
return false;
|
||||
@ -368,7 +368,7 @@ bool Rebalance::scanRdb ( ) {
|
||||
if ( rdb->isMerging() ) {
|
||||
log("rebal: waiting for merge on %s for coll ??? to complete",
|
||||
rdb->getDbname());
|
||||
g_loop.registerSleepCallback ( 1000,NULL,sleepWrapper,1);
|
||||
g_loop.registerSleepCallback(1000, NULL, sleepWrapper, "Rebalance::sleepWrapper", 1);
|
||||
m_registered = true;
|
||||
// we blocked, return false
|
||||
return false;
|
||||
|
@ -129,7 +129,7 @@ bool Repair::init ( ) {
|
||||
m_isSuspended = false;
|
||||
m_saveRepairState = false;
|
||||
m_isRetrying = false;
|
||||
if( ! g_loop.registerSleepCallback( 1 , NULL , repairWrapper ) ) {
|
||||
if (!g_loop.registerSleepCallback(1, NULL, repairWrapper, "Repair::repairWrapper")) {
|
||||
log(LOG_WARN, "repair: Failed register callback.");
|
||||
return false;
|
||||
}
|
||||
|
@ -431,11 +431,6 @@ bool SearchInput::set ( TcpSocket *sock , HttpRequest *r ) {
|
||||
m_doSiteClustering = false;
|
||||
}
|
||||
|
||||
if ( m_q.m_hasQuotaField ) {
|
||||
m_doSiteClustering = false;
|
||||
m_doDupContentRemoval = false;
|
||||
}
|
||||
|
||||
if ( ! m_doSiteClustering )
|
||||
m_hideAllClustered = false;
|
||||
|
||||
|
@ -121,8 +121,7 @@ void SpiderLoop::init() {
|
||||
}
|
||||
|
||||
// sleep for .1 seconds = 100ms
|
||||
if (!g_loop.registerSleepCallback(50,this,doneSleepingWrapperSL))
|
||||
{
|
||||
if (!g_loop.registerSleepCallback(50, this, doneSleepingWrapperSL, "SpiderLoop::doneSleepingWrapperSL")) {
|
||||
log(LOG_ERROR, "build: Failed to register timer callback. Spidering is permanently disabled. Restart to fix.");
|
||||
}
|
||||
|
||||
|
@ -739,11 +739,11 @@ static void handleRequest54(UdpSlot *udpSlot, int32_t /*niceness*/) {
|
||||
winnersp->m_timesUsed++;
|
||||
|
||||
// sanity
|
||||
if ( (int32_t)sizeof(ProxyReply) > TMPBUFSIZE ){g_process.shutdownAbort(true);}
|
||||
if ( (int32_t)sizeof(ProxyReply) > SHORTSENDBUFFERSIZE ) { g_process.shutdownAbort(true); }
|
||||
|
||||
// and give proxy ip/port back to the requester so they can
|
||||
// use that to download their url
|
||||
ProxyReply *prep = (ProxyReply *)udpSlot->m_tmpBuf;
|
||||
ProxyReply *prep = (ProxyReply *)udpSlot->m_shortSendBuffer;
|
||||
prep->m_proxyIp = winnersp->m_ip;
|
||||
prep->m_proxyPort = winnersp->m_port;
|
||||
|
||||
@ -761,7 +761,7 @@ static void handleRequest54(UdpSlot *udpSlot, int32_t /*niceness*/) {
|
||||
// sensitive to the spider policy.
|
||||
prep->m_numBannedProxies = numBannedProxies;
|
||||
|
||||
//char *p = udpSlot->m_tmpBuf;
|
||||
//char *p = udpSlot->m_shortSendBuffer;
|
||||
//*(int32_t *)p = winnersp->m_ip ; p += 4;
|
||||
//*(int16_t *)p = winnersp->m_port; p += 2;
|
||||
// and the loadbucket id
|
||||
@ -814,7 +814,7 @@ static void handleRequest54(UdpSlot *udpSlot, int32_t /*niceness*/) {
|
||||
}
|
||||
|
||||
// send the proxy ip/port/LBid back to user
|
||||
g_udpServer.sendReply(udpSlot->m_tmpBuf, sizeof(ProxyReply), udpSlot->m_tmpBuf, sizeof(ProxyReply), udpSlot);
|
||||
g_udpServer.sendReply(udpSlot->m_shortSendBuffer, sizeof(ProxyReply), udpSlot->m_shortSendBuffer, sizeof(ProxyReply), udpSlot);
|
||||
}
|
||||
|
||||
// . use msg 0x55 to say you are done using the proxy
|
||||
@ -881,7 +881,7 @@ bool initSpiderProxyStuff() {
|
||||
buildProxyTable ();
|
||||
|
||||
// reset spider proxy stats every hour to alleviate false positives (moved from Process.cpp)
|
||||
if (!g_loop.registerSleepCallback(3600000, NULL, resetProxyStatWrapper, 0)) {
|
||||
if (!g_loop.registerSleepCallback(3600000, NULL, resetProxyStatWrapper, "SpiderProxy::resetProxyStatWrapper", 0)) {
|
||||
gbshutdownResourceError();
|
||||
}
|
||||
|
||||
|
@ -429,8 +429,8 @@ static void dump_assorted_statistics(FILE *fp) {
|
||||
fprintf(fp,"socket:limit_hit:%lu\n",socket_limit_hit_count.load());
|
||||
fprintf(fp,"socket:slots_incoming:%d\n",g_udpServer.getNumUsedSlotsIncoming());
|
||||
fprintf(fp,"socket:tcp_in_use:%d\n",g_httpServer.m_tcp.m_numUsed.load());
|
||||
fprintf(fp,"misc::corrupt_list_reads:%d\n",g_numCorrupt);
|
||||
fprintf(fp,"spider:current_spiders:%d\n",g_spiderLoop.getNumSpidersOut());
|
||||
fprintf(fp,"misc:corrupt_list_reads:%d\n",g_numCorrupt);
|
||||
fprintf(fp,"misc:current_spiders:%d\n",g_spiderLoop.getNumSpidersOut());
|
||||
}
|
||||
|
||||
|
||||
|
@ -1686,7 +1686,7 @@ static void sendReplyWrapper3(int fd, void *state) {
|
||||
/// and we want to get the updated data instead of the old data
|
||||
static void sendReplyWrapper2 ( void *state ) {
|
||||
// delay for 1sec hoping that msg4 has been processed
|
||||
g_loop.registerSleepCallback(1000, state, sendReplyWrapper3);
|
||||
g_loop.registerSleepCallback(1000, state, sendReplyWrapper3, "Tagdb::sendReplyWrapper3");
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -247,16 +247,16 @@ bool TcpServer::init ( void (* requestHandler)(TcpSocket *s) ,
|
||||
// . accept/connects generate both POLLIN and POLLOUT bands @ same time
|
||||
// . use a niceness of 0 so traffic from our server to a browser takes
|
||||
// precedence over spider traffic
|
||||
if ( ! g_loop.registerReadCallback (m_sock,this,acceptSocketWrapper,0))
|
||||
if (!g_loop.registerReadCallback(m_sock, this, acceptSocketWrapper, "TcpServer::acceptSocketWrapper", 0))
|
||||
return false;
|
||||
}
|
||||
|
||||
// . register to receives wake up calls every 500ms so we can
|
||||
// check for TcpSockets that have timed out
|
||||
// . check every 500ms now since we have timeout of 1000ms for ads
|
||||
if ( ! g_loop.registerSleepCallback (500,this,readTimeoutPollWrapper,0))
|
||||
if (!g_loop.registerSleepCallback(500, this, readTimeoutPollWrapper, "TcpServer::readTimeoutPollWrapper", 0))
|
||||
return false;
|
||||
if ( ! g_loop.registerSleepCallback (30*1000,this,timePollWrapper,0))
|
||||
if (!g_loop.registerSleepCallback(30 * 1000, this, timePollWrapper, "TcpServer::timePollWrapper", 0))
|
||||
return false;
|
||||
// return true on success
|
||||
m_ready = true;
|
||||
@ -971,7 +971,7 @@ TcpSocket *TcpServer::wrapSocket ( int sd , int32_t niceness , bool isIncoming )
|
||||
// . TODO: we'd have to set timestamps in Loop to check for timeou
|
||||
// . use niceness levels of 0 so this server-to-browser traffic takes
|
||||
// precedence over spider traffic
|
||||
if (g_loop.registerReadCallback (sd,this,readSocketWrapper,niceness)) {
|
||||
if (g_loop.registerReadCallback(sd, this, readSocketWrapper, "TcpServer::readSocketWrapper", niceness)) {
|
||||
return s;
|
||||
}
|
||||
|
||||
@ -1810,10 +1810,8 @@ int32_t TcpServer::writeSocket ( TcpSocket *s ) {
|
||||
// need to listen for writability now since our write
|
||||
// failed to write everythin gout
|
||||
if ( ! s->m_writeRegistered &&
|
||||
! g_loop.registerWriteCallback(s->m_sd,
|
||||
this,
|
||||
writeSocketWrapper,
|
||||
s->m_niceness)){
|
||||
!g_loop.registerWriteCallback(s->m_sd, this, writeSocketWrapper,
|
||||
"TcpServer::writeSocketWrapper", s->m_niceness)) {
|
||||
log("tcp: failed to reg write callback1 for "
|
||||
"sd=%i", s->m_sd);
|
||||
return -1;
|
||||
@ -1840,12 +1838,9 @@ int32_t TcpServer::writeSocket ( TcpSocket *s ) {
|
||||
// need to listen for writability now since our write
|
||||
// failed to write everythin gout
|
||||
if ( ! s->m_writeRegistered &&
|
||||
! g_loop.registerWriteCallback(s->m_sd,
|
||||
this,
|
||||
writeSocketWrapper,
|
||||
s->m_niceness)){
|
||||
log("tcp: failed to reg write callback1 for "
|
||||
"sd=%i", s->m_sd);
|
||||
!g_loop.registerWriteCallback(s->m_sd, this, writeSocketWrapper,
|
||||
"TcpServer::writeSocketWrapper", s->m_niceness)) {
|
||||
log(LOG_WARN, "tcp: failed to reg write callback1 for sd=%i", s->m_sd);
|
||||
return -1;
|
||||
}
|
||||
// do not keep doing it otherwise select() goes crazy
|
||||
@ -2012,7 +2007,8 @@ int32_t TcpServer::connectSocket ( TcpSocket *s ) {
|
||||
return 0;
|
||||
|
||||
// make select() listen on this fd for when it can write
|
||||
if(!g_loop.registerWriteCallback( s->m_sd, this, writeSocketWrapper, s->m_niceness)) {
|
||||
if (!g_loop.registerWriteCallback(s->m_sd, this, writeSocketWrapper,
|
||||
"TcpServer::writeSocketWrapper", s->m_niceness)) {
|
||||
log("tcp: failed to reg write callback2 for sd=%i", s->m_sd);
|
||||
return -1;
|
||||
}
|
||||
@ -2761,10 +2757,8 @@ int TcpServer::sslHandshake ( TcpSocket *s ) {
|
||||
// read for all file descriptors at all times. it is only
|
||||
// writes we have to turn on and off.
|
||||
if ( ! s->m_writeRegistered &&
|
||||
! g_loop.registerWriteCallback(s->m_sd,
|
||||
this,
|
||||
writeSocketWrapper,
|
||||
s->m_niceness)){
|
||||
!g_loop.registerWriteCallback(s->m_sd, this, writeSocketWrapper,
|
||||
"TcpServer::writeSocketWrapper", s->m_niceness)) {
|
||||
log("tcp: failed to reg write callback3 for "
|
||||
"sd=%i", s->m_sd);
|
||||
return -1;
|
||||
|
@ -265,7 +265,7 @@ bool UdpServer::init ( uint16_t port, UdpProtocol *proto,
|
||||
// we have a handler registered with the Loop class
|
||||
// . this makes m_sock non-blocking, too
|
||||
// . use the original niceness for this
|
||||
if ( ! g_loop.registerReadCallback ( m_sock, this, readPollWrapper, 0 )) {
|
||||
if (!g_loop.registerReadCallback(m_sock, this, readPollWrapper, "UdpServer::readPollWrapper", 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -274,7 +274,7 @@ bool UdpServer::init ( uint16_t port, UdpProtocol *proto,
|
||||
// . it's low so we can claim any unclaimed tokens!
|
||||
// . now resends are at 20ms... i'd go lower, but sigtimedqueue() only
|
||||
// has a timer resolution of 20ms, probably due to kernel time slicin
|
||||
if ( ! g_loop.registerSleepCallback ( pollTime, this, timePollWrapper, 0 )) {
|
||||
if (!g_loop.registerSleepCallback(pollTime, this, timePollWrapper, "UdpServer::timePollWrapper", 0)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -492,7 +492,7 @@ void UdpServer::sendErrorReply_unlocked(UdpSlot *slot, int32_t errnum) {
|
||||
g_errno = 0;
|
||||
|
||||
// make a little msg
|
||||
char *msg = slot->m_tmpBuf;
|
||||
char *msg = slot->m_shortSendBuffer;
|
||||
*(int32_t *)msg = htonl(errnum) ;
|
||||
|
||||
// set the m_localErrno in "slot" so it will set the dgrams error bit
|
||||
@ -648,7 +648,8 @@ bool UdpServer::doSending_unlocked(UdpSlot *slot, bool allowResends, int64_t now
|
||||
m_needToSend = true;
|
||||
// ok, now it should
|
||||
if ( ! m_writeRegistered ) {
|
||||
if( !g_loop.registerWriteCallback ( m_sock, this, sendPollWrapper, 0 ) ) {
|
||||
if (!g_loop.registerWriteCallback(m_sock, this, sendPollWrapper,
|
||||
"UdpServer::sendPollWrapper", 0)) {
|
||||
logError("registerWriteCallback failed");
|
||||
return false;
|
||||
}
|
||||
@ -1064,7 +1065,7 @@ int32_t UdpServer::readSock(UdpSlot **slotPtr, int64_t now) {
|
||||
// . if this blocks, that sucks, we'll probably get
|
||||
// another untethered read... oh well...
|
||||
// . ack from 0 to infinite to prevent more from coming
|
||||
tmp.sendAck(m_sock,now,dgramNum, 1/*weInit'ed?*/, true/*cancelTrans?*/);
|
||||
tmp.sendCancelAck(m_sock, now, dgramNum);
|
||||
//return 1;
|
||||
goto discard;
|
||||
}
|
||||
@ -1988,9 +1989,6 @@ bool UdpServer::readTimeoutPoll ( int64_t now ) {
|
||||
slot->getResendCount() >= slot->m_maxResends &&
|
||||
// did not get all acks
|
||||
slot->m_sentBitsOn > slot->m_readAckBitsOn &&
|
||||
// fix too many timing out slot msgs when a host is
|
||||
// hogging the cpu on a niceness 0 thing...
|
||||
//elapsed > 5000 &&
|
||||
// respect slot's timeout too!
|
||||
elapsed > timeout &&
|
||||
// only do this when sending a request
|
||||
@ -2011,7 +2009,7 @@ bool UdpServer::readTimeoutPoll ( int64_t now ) {
|
||||
slot->getHostId(),elapsed);
|
||||
// keep going
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// . this should clear the sentBits of all unacked dgrams
|
||||
// so they can be resent
|
||||
// . this doubles m_resendTime and updates m_resendCount
|
||||
@ -2064,9 +2062,8 @@ void UdpServer::destroySlot_unlocked( UdpSlot *slot ) {
|
||||
char *sbuf = slot->m_sendBufAlloc;
|
||||
int32_t sbufSize = slot->m_sendBufAllocSize;
|
||||
// don't free our static buffer
|
||||
if ( rbuf == slot->m_tmpBuf ) rbuf = NULL;
|
||||
// sometimes handlers will use our slots m_tmpBuf to store the reply
|
||||
if ( sbuf == slot->m_tmpBuf ) sbuf = NULL;
|
||||
// sometimes handlers will use our slots m_shortSendBuffer to store the reply
|
||||
if ( sbuf == slot->m_shortSendBuffer ) sbuf = NULL;
|
||||
// nothing allocated. used by Msg13.cpp g_fakeBuf
|
||||
if ( sbufSize == 0 ) sbuf = NULL;
|
||||
|
||||
|
@ -175,6 +175,8 @@ public:
|
||||
|
||||
std::vector<UdpStatistic> getStatistics() const;
|
||||
|
||||
GbMutex& getLock() { return m_mtx; }
|
||||
|
||||
private:
|
||||
static void readPollWrapper(int fd, void *state);
|
||||
static void timePollWrapper(int fd, void *state);
|
||||
|
42
UdpSlot.cpp
42
UdpSlot.cpp
@ -285,12 +285,31 @@ bool UdpSlot::sendSetup(char *msg, int32_t msgSize, char *alloc, int32_t allocSi
|
||||
|
||||
// resets a UdpSlot for a resend
|
||||
void UdpSlot::prepareForResend ( int64_t now , bool resendAll ) {
|
||||
// clear all if reset is true
|
||||
if ( resendAll ) {
|
||||
for ( int32_t i = 0 ; i < m_dgramsToSend ; i++ )
|
||||
clrBit ( i , m_readAckBits2 );
|
||||
// clear all if resend is true
|
||||
if (resendAll) {
|
||||
for (int32_t i = 0; i < m_dgramsToSend; ++i) {
|
||||
clrBit(i, m_readAckBits2);
|
||||
}
|
||||
|
||||
// we should clear previously receive dgrams as it could be different
|
||||
for (int32_t i = 0; i < m_dgramsToRead; ++i) {
|
||||
clrBit(i, m_readBits2);
|
||||
clrBit(i, m_sentAckBits2);
|
||||
}
|
||||
|
||||
m_dgramsToRead = 0;
|
||||
m_readBitsOn = 0;
|
||||
m_sentAckBitsOn = 0;
|
||||
m_readAckBitsOn = 0;
|
||||
|
||||
if (m_readBuf) {
|
||||
mfree(m_readBuf, m_readBufMaxSize, "UdpSlot");
|
||||
m_readBuf = NULL;
|
||||
m_readBufMaxSize = 0;
|
||||
m_readBufSize = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// how many sentBits we cleared
|
||||
int32_t cleared = 0;
|
||||
// clear each sent bit if it hasn't gotten an ACK
|
||||
@ -476,7 +495,7 @@ int32_t UdpSlot::sendDatagramOrAck ( int sock, bool allowResends, int64_t now ){
|
||||
//log("sendDatagramOrAck");
|
||||
// if acks we've sent isn't caught up to what we read, send an ack
|
||||
if ( m_sentAckBitsOn < m_readBitsOn && m_proto->useAcks() )
|
||||
return sendAck ( sock , now );
|
||||
return sendPlainAck ( sock , now );
|
||||
// we may have received an ack for an implied resend (from ack gap)
|
||||
// so we clear some bits, but then got an ACK back later
|
||||
while ( m_nextToSend < m_dgramsToSend &&
|
||||
@ -1192,7 +1211,6 @@ bool UdpSlot::readDatagramOrAck ( const void *readBuffer_,
|
||||
// . this dgram should let us know how big the entire msg is
|
||||
// . so allocate space for m_readBuf
|
||||
// . we may already have a read buf if caller passed one in
|
||||
retry:
|
||||
if ( ! m_readBuf ) {
|
||||
if ( ! makeReadBuf ( msgSize , m_dgramsToRead ) ) {
|
||||
log(LOG_WARN, "udp: Failed to allocate %" PRId32" bytes to read request or reply for udp socket.", msgSize);
|
||||
@ -1200,11 +1218,10 @@ bool UdpSlot::readDatagramOrAck ( const void *readBuffer_,
|
||||
}
|
||||
}
|
||||
|
||||
// if we don't have enough room alloc a read buffer
|
||||
if ( msgSize > m_readBufMaxSize ) {
|
||||
// now we must alloc a buffer
|
||||
m_readBuf = NULL;
|
||||
goto retry;
|
||||
// message size shouldn't change
|
||||
if (msgSize > m_readBufMaxSize) {
|
||||
g_udpServer.getLock().unlock();
|
||||
gbshutdownLogicError();
|
||||
}
|
||||
|
||||
// return false if we have no room for the entire reply
|
||||
@ -1429,6 +1446,9 @@ bool UdpSlot::makeReadBuf ( int32_t msgSize , int32_t numDgrams ) {
|
||||
log(LOG_WARN, "udp: Failed to allocate %" PRId32" bytes to read request or reply on udp socket.", msgSize);
|
||||
return false;
|
||||
}
|
||||
|
||||
// initialize readBuf to track down corruption
|
||||
memset(m_readBuf, 0xfe, msgSize);
|
||||
}
|
||||
m_readBufMaxSize = msgSize;
|
||||
// let the caller know we're good
|
||||
|
20
UdpSlot.h
20
UdpSlot.h
@ -54,7 +54,7 @@
|
||||
// . the max size of an incoming request for a hot udp server
|
||||
// . we cannot call malloc so it must fit in here
|
||||
// . now we need tens of thousands of udp slots, so keep this small
|
||||
#define TMPBUFSIZE (250)
|
||||
#define SHORTSENDBUFFERSIZE (250)
|
||||
|
||||
class Host;
|
||||
|
||||
@ -121,7 +121,7 @@ public:
|
||||
int32_t m_readBufSize; // w/o the dgram headers.
|
||||
int32_t m_readBufMaxSize;
|
||||
|
||||
protected:
|
||||
protected: //actually private but UdpServer references it.
|
||||
// set the UdpSlot's protocol, endpoint info, transId, timeout
|
||||
void connect(UdpProtocol *proto, sockaddr_in *endPoint, Host *host, int32_t hostId, int32_t transId,
|
||||
int64_t timeout, int64_t now, int32_t niceness);
|
||||
@ -257,7 +257,13 @@ private:
|
||||
// . returns -2 if nothing to send, -1 on error, 0 if blocked,
|
||||
// 1 if sent something
|
||||
// . should only be called by sendDatagramOrAck() above
|
||||
int32_t sendAck(int sock, int64_t now, int32_t dgramNum = -1, int32_t weInitiated = -2, bool cancelTrans = false);
|
||||
int32_t sendPlainAck(int sock, int64_t now) {
|
||||
return sendAck(sock, now, -1, -2, false);
|
||||
}
|
||||
int32_t sendCancelAck(int sock, int64_t now, int32_t dgramNum) {
|
||||
return sendAck(sock, now, dgramNum, 1, true);
|
||||
}
|
||||
int32_t sendAck(int sock, int64_t now, int32_t dgramNum, int32_t weInitiated, bool cancelTrans);
|
||||
|
||||
// . or by readDataGramOrAck() to read a faked ack for protocols that
|
||||
// don't use ACKs
|
||||
@ -428,11 +434,9 @@ protected:
|
||||
} m_slotStatus;
|
||||
|
||||
public:
|
||||
// . for the hot udp server, we cannot call malloc in the sig handler
|
||||
// so we set m_readBuf to this to read in int16_t requests
|
||||
// . caller should pre-allocated m_readBuf when calling sendRequest() if he expects a large reply
|
||||
// . incoming requests simply cannot be bigger than this for the hot udp server
|
||||
char m_tmpBuf[TMPBUFSIZE];
|
||||
// In some places allocating a buffer for sendign is inconvenient, especially for realyl short replies.
|
||||
// Those places can use this buffer
|
||||
char m_shortSendBuffer[SHORTSENDBUFFERSIZE];
|
||||
};
|
||||
|
||||
extern int32_t g_cancelAcksSent;
|
||||
|
@ -21,7 +21,7 @@ UrlBlockList::UrlBlockList()
|
||||
bool UrlBlockList::init() {
|
||||
log(LOG_INFO, "Initializing UrlBlockList with %s", m_filename);
|
||||
|
||||
if (!g_loop.registerSleepCallback(60000, this, &reload, 0, true)) {
|
||||
if (!g_loop.registerSleepCallback(60000, this, &reload, "UrlBlockList::reload", 0, true)) {
|
||||
log(LOG_WARN, "UrlBlockList: Failed register callback.");
|
||||
return false;
|
||||
}
|
||||
|
11
XmlDoc.cpp
11
XmlDoc.cpp
@ -6906,14 +6906,11 @@ int32_t *XmlDoc::getIp ( ) {
|
||||
logTrace( g_conf.m_logTraceXmlDoc, "SLEEPING %" PRId32" msecs", delay);
|
||||
// make a callback wrapper.
|
||||
// this returns false and sets g_errno on error
|
||||
if ( g_loop.registerSleepCallback ( delay ,
|
||||
m_masterState ,
|
||||
delayWrapper,//m_masterLoop
|
||||
m_niceness ))
|
||||
if (g_loop.registerSleepCallback(delay, m_masterState, delayWrapper, "XmlDoc::delayWrapper", m_niceness))
|
||||
// wait for it, return -1 since we blocked
|
||||
return (int32_t *)-1;
|
||||
// if was not able to register, ignore delay
|
||||
}
|
||||
return (int32_t *)-1;
|
||||
// if was not able to register, ignore delay
|
||||
}
|
||||
|
||||
if ( m_didDelay && ! m_didDelayUnregister ) {
|
||||
g_loop.unregisterSleepCallback(m_masterState,delayWrapper);
|
||||
|
@ -1226,7 +1226,7 @@ bool XmlDoc::hashUrl ( HashTableX *tt, bool urlOnly ) { // , bool isStatusDoc )
|
||||
hi.m_prefix = "urlhash";
|
||||
if ( ! hashString(buf,blen,&hi) ) return false;
|
||||
|
||||
if (m_contentLen > 0) {
|
||||
if (m_contentLen > 0 || (m_setFromTitleRec && size_utf8Content > 0)) {
|
||||
setStatus("hashing url mid domain");
|
||||
|
||||
// update parms
|
||||
|
6
main.cpp
6
main.cpp
@ -1774,12 +1774,12 @@ int main2 ( int argc , char *argv[] ) {
|
||||
// . put this in here instead of Rdb.cpp because we don't want generator commands merging on us
|
||||
// . niceness is 1
|
||||
// BR: Upped from 2 sec to 60. No need to check for merge every 2 seconds.
|
||||
if ( !g_loop.registerSleepCallback( 60000, NULL, attemptMergeAllCallback, 1, true ) ) {
|
||||
if (!g_loop.registerSleepCallback(60000, NULL, attemptMergeAllCallback, "Rdb::attemptMergeAllCallback", 1, true)) {
|
||||
log( LOG_WARN, "db: Failed to init merge sleep callback." );
|
||||
}
|
||||
|
||||
// try to sync parms (and collection recs) with host 0
|
||||
if ( !g_loop.registerSleepCallback(1000, NULL, Parms::tryToSyncWrapper, 0 ) ) {
|
||||
if (!g_loop.registerSleepCallback(1000, NULL, Parms::tryToSyncWrapper, "Parms::tryToSyncWrapper", 0)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1856,7 +1856,7 @@ bool doCmd ( const char *cmd , int32_t hostId , const char *filename ,
|
||||
|
||||
|
||||
// register sleep callback to get started
|
||||
if ( ! g_loop.registerSleepCallback(1, NULL, doCmdAll , 0 ) ) {
|
||||
if (!g_loop.registerSleepCallback(1, NULL, doCmdAll, "doCmdAll", 0)) {
|
||||
log(LOG_WARN, "admin: Loop init failed.");
|
||||
return false;
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ int main(int argc, char **argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (strcmp(argv[1], "--h") == 0 || strcmp(argv[1], "--help") == 0 ) {
|
||||
if (strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "--help") == 0 ) {
|
||||
print_usage(argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
@ -177,7 +177,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
RdbIndexQuery rdbIndexQuery(base);
|
||||
for (auto it = testData.begin(); it != testData.end(); ++it) {
|
||||
assert(rdbIndexQuery.getFilePos(*it >> RdbBase::s_docIdFileIndex_docIdOffset) == static_cast<int32_t>(*it & RdbBase::s_docIdFileIndex_filePosMask));
|
||||
assert(rdbIndexQuery.getFilePos(*it >> RdbBase::s_docIdFileIndex_docIdOffset, false) == static_cast<int32_t>(*it & RdbBase::s_docIdFileIndex_filePosMask));
|
||||
}
|
||||
|
||||
uint64_t diff = gettimeofdayInMicroseconds() - start;
|
||||
|
Reference in New Issue
Block a user