Merge branch 'master' of github.com:privacore/open-source-search-engine

This commit is contained in:
Ivan Skytte Jørgensen
2015-11-17 16:17:27 +01:00
5 changed files with 41 additions and 62 deletions

@ -1668,22 +1668,33 @@ int32_t Hostdb::getHostIdWithSpideringEnabled ( uint32_t shardNum ) {
return hosts [ hostNum ].m_hostId ;
}
Host *Hostdb::getLeastLoadedInShard ( uint32_t shardNum ) {
// if niceness 0 can't pick noquery host.
// if niceness 1 can't pick nospider host.
Host *Hostdb::getLeastLoadedInShard ( uint32_t shardNum , char niceness ) {
int32_t minOutstandingRequests = 0x7fffffff;
int32_t minOutstandingRequestsIndex = -1;
Host *shard = getShard ( shardNum );
Host *bestDead = NULL;
for(int32_t i = 0; i < m_numHostsPerShard; i++) {
Host *hh = &shard[i];
// don't pick a 'no spider' host if niceness is 1
if ( niceness > 0 && ! hh->m_spiderEnabled ) continue;
// don't pick a 'no query' host if niceness is 0
if ( niceness == 0 && ! hh->m_queryEnabled ) continue;
if ( ! bestDead ) bestDead = hh;
if(isDead(hh)) continue;
// log("host %"INT32 " numOutstanding is %"INT32, hh->m_hostId,
// hh->m_pingInfo.m_udpSlotsInUseIncoming);
if(hh->m_pingInfo.m_udpSlotsInUseIncoming > minOutstandingRequests) continue;
if ( hh->m_pingInfo.m_udpSlotsInUseIncoming >
minOutstandingRequests )
continue;
minOutstandingRequests = hh->m_pingInfo.m_udpSlotsInUseIncoming;
minOutstandingRequests =hh->m_pingInfo.m_udpSlotsInUseIncoming;
minOutstandingRequestsIndex = i;
}
if(minOutstandingRequestsIndex == -1) return shard;
// we should never return a nospider/noquery host depending on
// the niceness, so return bestDead
if(minOutstandingRequestsIndex == -1) return bestDead;//shard;
return &shard[minOutstandingRequestsIndex];
}

@ -450,7 +450,7 @@ class Hostdb {
//Host *getLiveHostInGroup ( int32_t groupId );
Host *getLiveHostInShard ( int32_t shardNum );
Host *getLeastLoadedInShard ( uint32_t shardNum );
Host *getLeastLoadedInShard ( uint32_t shardNum , char niceness );
int32_t getHostIdWithSpideringEnabled ( uint32_t shardNum );
// in the entire cluster. return host #0 if its alive, otherwise

@ -4,6 +4,7 @@
#include "Titledb.h"
#include "UdpServer.h"
static void handleRequest22 ( UdpSlot *slot , int32_t netnice ) ;
Msg22Request::Msg22Request()
@ -165,46 +166,13 @@ bool Msg22::getTitleRec ( Msg22Request *r ,
if ( hostNum >= numHosts ) { char *xx = NULL; *xx = 0; }
firstHostId = hosts [ hostNum ].m_hostId ;
*/
Host *firstHost ;
// if niceness 0 can't pick noquery host.
// if niceness 1 can't pick nospider host.
firstHost = g_hostdb.getLeastLoadedInShard ( shardNum, r->m_niceness );
int32_t firstHostId = firstHost->m_hostId;
// get our group
int32_t allNumHosts = g_hostdb.getNumHostsPerShard();
Host *allHosts = g_hostdb.getShard ( shardNum );//Group ( groupId );
// put all alive hosts in this array
Host *cand[32];
int64_t nc = 0;
for ( int32_t i = 0 ; i < allNumHosts ; i++ ) {
// get that host
Host *hh = &allHosts[i];
// skip if dead
if ( g_hostdb.isDead(hh) ) continue;
// add it if alive
cand[nc++] = hh;
}
// if none alive, make them all candidates then
bool allDead = (nc == 0);
for ( int32_t i = 0 ; allDead && i < allNumHosts ; i++ )
cand[nc++] = &allHosts[i];
// route based on docid region, not parity, because we want to hit
// the urldb page cache as much as possible
int64_t sectionWidth =((128LL*1024*1024)/nc)+1;//(DOCID_MASK/nc)+1LL;
// we mod by 1MB since tied scores resort to sorting by docid
// so we don't want to overload the host responsible for the lowest
// range of docids. CAUTION: do this for msg22 too!
// in this way we should still ensure a pretty good biased urldb
// cache...
// . TODO: fix the urldb cache preload logic
int32_t hostNum = (docId % (128LL*1024*1024)) / sectionWidth;
if ( hostNum < 0 ) hostNum = 0; // watch out for negative docids
if ( hostNum >= nc ) { char *xx = NULL; *xx = 0; }
int32_t firstHostId = cand [ hostNum ]->m_hostId ;
// while this prevents tfndb seeks, it also causes bottlenecks
// if one host is particularly slow, because load balancing is
// bypassed.
//if ( ! g_conf.m_useBiasedTfndb ) firstHostId = -1;
// flag it
m_outstanding = true;
r->m_inUse = 1;

@ -2803,24 +2803,15 @@ bool Msg8a::launchGetRequests ( ) {
//uint32_t gid = g_hostdb.getGroupId ( m_rdbId , &startKey , true );
//Host *group = g_hostdb.getGroup ( gid );
int32_t shardNum = getShardNum ( m_rdbId , &startKey );//, true );
Host *group = g_hostdb.getShard ( shardNum );
//int32_t numTwins = g_hostdb.getNumHostsPerShard();
// use top byte!
uint8_t *sks = (uint8_t *)&startKey;
uint8_t top = sks[sizeof(TAGDB_KEY)-1];
//int32_t hostNum = 0;
//if ( numTwins == 2 && (top & 0x80) ) hostNum = 1;
// TODO: fix this!
//if ( numTwins >= 3 ) { char *xx=NULL;*xx=0; }
// support more than 2 stripes now...
int32_t hostNum = top % g_hostdb.getNumHostsPerShard();
int32_t hostId = group[hostNum].m_hostId;
Host *firstHost ;
// if niceness 0 can't pick noquery host.
// if niceness 1 can't pick nospider host.
firstHost = g_hostdb.getLeastLoadedInShard ( shardNum , m_niceness );
int32_t firstHostId = firstHost->m_hostId;
// . launch this request, even if to ourselves
// . TODO: just use msg0!!
bool status = m->getList ( hostId , // hostId
bool status = m->getList ( firstHostId , // hostId
0 , // ip
0 , // port
0 , // maxCacheAge
@ -2837,7 +2828,7 @@ bool Msg8a::launchGetRequests ( ) {
true , // error correction?
true , // include tree?
true , // doMerge?
-1 , // firstHostId
firstHostId , // firstHostId
0 , // startFileNum
-1 , // numFiles
3600*24*365 );// timeout

@ -20898,6 +20898,10 @@ char *XmlDoc::getIsSiteRoot ( ) {
if ( ! site || site == (char *)-1 ) return (char *)site;
// get our url without the http:// or https://
char *u = getFirstUrl()->getHost();
if ( ! u ) {
g_errno = EBADURL;
return NULL;
}
// assume valid now
m_isSiteRootValid = true;
// get it
@ -21808,7 +21812,12 @@ bool XmlDoc::logIt ( SafeBuf *bb ) {
// like how we index it, do not include the filename. so we can
// have a bunch of pathdepth 0 urls with filenames like xyz.com/abc.htm
if ( m_firstUrlValid ) {
int32_t pd = m_firstUrl.getPathDepth(false);
int32_t pd = -1;
// fix core
if ( m_firstUrl.m_url &&
m_firstUrl.m_ulen > 0 &&
m_firstUrl.m_path )
pd = m_firstUrl.getPathDepth(false);
sb->safePrintf("pathdepth=%"INT32" ",pd);
}
else {