forked from Mirrors/privacore-open-source-search-engine
Merge branch 'master' of github.com:privacore/open-source-search-engine
This commit is contained in:
21
Hostdb.cpp
21
Hostdb.cpp
@ -1668,22 +1668,33 @@ int32_t Hostdb::getHostIdWithSpideringEnabled ( uint32_t shardNum ) {
|
||||
return hosts [ hostNum ].m_hostId ;
|
||||
}
|
||||
|
||||
|
||||
Host *Hostdb::getLeastLoadedInShard ( uint32_t shardNum ) {
|
||||
// if niceness 0 can't pick noquery host.
|
||||
// if niceness 1 can't pick nospider host.
|
||||
Host *Hostdb::getLeastLoadedInShard ( uint32_t shardNum , char niceness ) {
|
||||
int32_t minOutstandingRequests = 0x7fffffff;
|
||||
int32_t minOutstandingRequestsIndex = -1;
|
||||
Host *shard = getShard ( shardNum );
|
||||
Host *bestDead = NULL;
|
||||
for(int32_t i = 0; i < m_numHostsPerShard; i++) {
|
||||
Host *hh = &shard[i];
|
||||
// don't pick a 'no spider' host if niceness is 1
|
||||
if ( niceness > 0 && ! hh->m_spiderEnabled ) continue;
|
||||
// don't pick a 'no query' host if niceness is 0
|
||||
if ( niceness == 0 && ! hh->m_queryEnabled ) continue;
|
||||
if ( ! bestDead ) bestDead = hh;
|
||||
if(isDead(hh)) continue;
|
||||
// log("host %"INT32 " numOutstanding is %"INT32, hh->m_hostId,
|
||||
// hh->m_pingInfo.m_udpSlotsInUseIncoming);
|
||||
if(hh->m_pingInfo.m_udpSlotsInUseIncoming > minOutstandingRequests) continue;
|
||||
if ( hh->m_pingInfo.m_udpSlotsInUseIncoming >
|
||||
minOutstandingRequests )
|
||||
continue;
|
||||
|
||||
minOutstandingRequests = hh->m_pingInfo.m_udpSlotsInUseIncoming;
|
||||
minOutstandingRequests =hh->m_pingInfo.m_udpSlotsInUseIncoming;
|
||||
minOutstandingRequestsIndex = i;
|
||||
}
|
||||
if(minOutstandingRequestsIndex == -1) return shard;
|
||||
// we should never return a nospider/noquery host depending on
|
||||
// the niceness, so return bestDead
|
||||
if(minOutstandingRequestsIndex == -1) return bestDead;//shard;
|
||||
return &shard[minOutstandingRequestsIndex];
|
||||
}
|
||||
|
||||
|
2
Hostdb.h
2
Hostdb.h
@ -450,7 +450,7 @@ class Hostdb {
|
||||
|
||||
//Host *getLiveHostInGroup ( int32_t groupId );
|
||||
Host *getLiveHostInShard ( int32_t shardNum );
|
||||
Host *getLeastLoadedInShard ( uint32_t shardNum );
|
||||
Host *getLeastLoadedInShard ( uint32_t shardNum , char niceness );
|
||||
int32_t getHostIdWithSpideringEnabled ( uint32_t shardNum );
|
||||
|
||||
// in the entire cluster. return host #0 if its alive, otherwise
|
||||
|
46
Msg22.cpp
46
Msg22.cpp
@ -4,6 +4,7 @@
|
||||
#include "Titledb.h"
|
||||
#include "UdpServer.h"
|
||||
|
||||
|
||||
static void handleRequest22 ( UdpSlot *slot , int32_t netnice ) ;
|
||||
|
||||
Msg22Request::Msg22Request()
|
||||
@ -165,46 +166,13 @@ bool Msg22::getTitleRec ( Msg22Request *r ,
|
||||
if ( hostNum >= numHosts ) { char *xx = NULL; *xx = 0; }
|
||||
firstHostId = hosts [ hostNum ].m_hostId ;
|
||||
*/
|
||||
|
||||
Host *firstHost ;
|
||||
// if niceness 0 can't pick noquery host.
|
||||
// if niceness 1 can't pick nospider host.
|
||||
firstHost = g_hostdb.getLeastLoadedInShard ( shardNum, r->m_niceness );
|
||||
int32_t firstHostId = firstHost->m_hostId;
|
||||
|
||||
// get our group
|
||||
int32_t allNumHosts = g_hostdb.getNumHostsPerShard();
|
||||
Host *allHosts = g_hostdb.getShard ( shardNum );//Group ( groupId );
|
||||
|
||||
// put all alive hosts in this array
|
||||
Host *cand[32];
|
||||
int64_t nc = 0;
|
||||
for ( int32_t i = 0 ; i < allNumHosts ; i++ ) {
|
||||
// get that host
|
||||
Host *hh = &allHosts[i];
|
||||
// skip if dead
|
||||
if ( g_hostdb.isDead(hh) ) continue;
|
||||
// add it if alive
|
||||
cand[nc++] = hh;
|
||||
}
|
||||
// if none alive, make them all candidates then
|
||||
bool allDead = (nc == 0);
|
||||
for ( int32_t i = 0 ; allDead && i < allNumHosts ; i++ )
|
||||
cand[nc++] = &allHosts[i];
|
||||
|
||||
// route based on docid region, not parity, because we want to hit
|
||||
// the urldb page cache as much as possible
|
||||
int64_t sectionWidth =((128LL*1024*1024)/nc)+1;//(DOCID_MASK/nc)+1LL;
|
||||
// we mod by 1MB since tied scores resort to sorting by docid
|
||||
// so we don't want to overload the host responsible for the lowest
|
||||
// range of docids. CAUTION: do this for msg22 too!
|
||||
// in this way we should still ensure a pretty good biased urldb
|
||||
// cache...
|
||||
// . TODO: fix the urldb cache preload logic
|
||||
int32_t hostNum = (docId % (128LL*1024*1024)) / sectionWidth;
|
||||
if ( hostNum < 0 ) hostNum = 0; // watch out for negative docids
|
||||
if ( hostNum >= nc ) { char *xx = NULL; *xx = 0; }
|
||||
int32_t firstHostId = cand [ hostNum ]->m_hostId ;
|
||||
|
||||
// while this prevents tfndb seeks, it also causes bottlenecks
|
||||
// if one host is particularly slow, because load balancing is
|
||||
// bypassed.
|
||||
//if ( ! g_conf.m_useBiasedTfndb ) firstHostId = -1;
|
||||
// flag it
|
||||
m_outstanding = true;
|
||||
r->m_inUse = 1;
|
||||
|
||||
|
23
Tagdb.cpp
23
Tagdb.cpp
@ -2803,24 +2803,15 @@ bool Msg8a::launchGetRequests ( ) {
|
||||
//uint32_t gid = g_hostdb.getGroupId ( m_rdbId , &startKey , true );
|
||||
//Host *group = g_hostdb.getGroup ( gid );
|
||||
int32_t shardNum = getShardNum ( m_rdbId , &startKey );//, true );
|
||||
Host *group = g_hostdb.getShard ( shardNum );
|
||||
|
||||
//int32_t numTwins = g_hostdb.getNumHostsPerShard();
|
||||
// use top byte!
|
||||
uint8_t *sks = (uint8_t *)&startKey;
|
||||
uint8_t top = sks[sizeof(TAGDB_KEY)-1];
|
||||
//int32_t hostNum = 0;
|
||||
//if ( numTwins == 2 && (top & 0x80) ) hostNum = 1;
|
||||
// TODO: fix this!
|
||||
//if ( numTwins >= 3 ) { char *xx=NULL;*xx=0; }
|
||||
// support more than 2 stripes now...
|
||||
int32_t hostNum = top % g_hostdb.getNumHostsPerShard();
|
||||
int32_t hostId = group[hostNum].m_hostId;
|
||||
|
||||
Host *firstHost ;
|
||||
// if niceness 0 can't pick noquery host.
|
||||
// if niceness 1 can't pick nospider host.
|
||||
firstHost = g_hostdb.getLeastLoadedInShard ( shardNum , m_niceness );
|
||||
int32_t firstHostId = firstHost->m_hostId;
|
||||
|
||||
// . launch this request, even if to ourselves
|
||||
// . TODO: just use msg0!!
|
||||
bool status = m->getList ( hostId , // hostId
|
||||
bool status = m->getList ( firstHostId , // hostId
|
||||
0 , // ip
|
||||
0 , // port
|
||||
0 , // maxCacheAge
|
||||
@ -2837,7 +2828,7 @@ bool Msg8a::launchGetRequests ( ) {
|
||||
true , // error correction?
|
||||
true , // include tree?
|
||||
true , // doMerge?
|
||||
-1 , // firstHostId
|
||||
firstHostId , // firstHostId
|
||||
0 , // startFileNum
|
||||
-1 , // numFiles
|
||||
3600*24*365 );// timeout
|
||||
|
11
XmlDoc.cpp
11
XmlDoc.cpp
@ -20898,6 +20898,10 @@ char *XmlDoc::getIsSiteRoot ( ) {
|
||||
if ( ! site || site == (char *)-1 ) return (char *)site;
|
||||
// get our url without the http:// or https://
|
||||
char *u = getFirstUrl()->getHost();
|
||||
if ( ! u ) {
|
||||
g_errno = EBADURL;
|
||||
return NULL;
|
||||
}
|
||||
// assume valid now
|
||||
m_isSiteRootValid = true;
|
||||
// get it
|
||||
@ -21808,7 +21812,12 @@ bool XmlDoc::logIt ( SafeBuf *bb ) {
|
||||
// like how we index it, do not include the filename. so we can
|
||||
// have a bunch of pathdepth 0 urls with filenames like xyz.com/abc.htm
|
||||
if ( m_firstUrlValid ) {
|
||||
int32_t pd = m_firstUrl.getPathDepth(false);
|
||||
int32_t pd = -1;
|
||||
// fix core
|
||||
if ( m_firstUrl.m_url &&
|
||||
m_firstUrl.m_ulen > 0 &&
|
||||
m_firstUrl.m_path )
|
||||
pd = m_firstUrl.getPathDepth(false);
|
||||
sb->safePrintf("pathdepth=%"INT32" ",pd);
|
||||
}
|
||||
else {
|
||||
|
Reference in New Issue
Block a user