mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-15 02:36:08 -04:00
Merge remote-tracking branch 'origin/master' into nomerge2
This commit is contained in:
2
Conf.cpp
2
Conf.cpp
@ -131,6 +131,8 @@ Conf::Conf ( ) {
|
||||
m_hashGroupWeightInUrl = 0.0;
|
||||
m_hashGroupWeightInMenu = 0.0;
|
||||
m_synonymWeight = 0.0;
|
||||
m_pageTemperatureWeightMin = 0.0;
|
||||
m_pageTemperatureWeightMax = 0.0;
|
||||
m_usePageTemperatureForRanking = true;
|
||||
m_numFlagScoreMultipliers = 26;
|
||||
m_numFlagRankAdjustments = 26;
|
||||
|
3
Conf.h
3
Conf.h
@ -226,8 +226,9 @@ class Conf {
|
||||
float m_hashGroupWeightInternalLinkText;
|
||||
float m_hashGroupWeightInUrl;
|
||||
float m_hashGroupWeightInMenu;
|
||||
|
||||
float m_synonymWeight;
|
||||
float m_pageTemperatureWeightMin;
|
||||
float m_pageTemperatureWeightMax;
|
||||
|
||||
bool m_usePageTemperatureForRanking;
|
||||
|
||||
|
159
DailyMerge.cpp
159
DailyMerge.cpp
@ -70,9 +70,6 @@ void DailyMerge::dailyMergeLoop ( ) {
|
||||
// how many MINUTES into the day are we? (in UTC)
|
||||
int32_t elapsedMins = tt->tm_hour * 60 + tt->tm_min ;
|
||||
|
||||
// what collnum to merge?
|
||||
collnum_t i ;
|
||||
|
||||
// . if we are not 0, just use host #0's collnum
|
||||
// . an error here will screw up the whole daily merge process
|
||||
if ( hid != 0 && m_mergeMode == 0 ) {
|
||||
@ -83,7 +80,7 @@ void DailyMerge::dailyMergeLoop ( ) {
|
||||
// hostid #0 must NOT be in mode 0
|
||||
if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 ) return;
|
||||
// get the collnum that host #0 is currently daily merging
|
||||
i = g_hostdb.m_hosts[0].m_pingInfo.m_dailyMergeCollnum;
|
||||
collnum_t i = g_hostdb.m_hosts[0].m_pingInfo.m_dailyMergeCollnum;
|
||||
// this means host #0 is not daily merging a collnum now
|
||||
if ( i < 0 ) return;
|
||||
// if it is valid, the CollectionRec MUST be there
|
||||
@ -99,84 +96,86 @@ void DailyMerge::dailyMergeLoop ( ) {
|
||||
}
|
||||
|
||||
// . only host #0 should do this loop!!!
|
||||
// . loop through each collection to check the time
|
||||
for (i=0; hid==0&&m_mergeMode==0 && i<g_collectiondb.getNumRecs(); i++) {
|
||||
// get collection rec for collnum #i
|
||||
CollectionRec *cr = g_collectiondb.getRec ( i );
|
||||
// skip if empty, it was deleted at some point
|
||||
if ( ! cr ) continue;
|
||||
// skip if daily merge trigger is < 0 (do not do dailies)
|
||||
if ( cr->m_dailyMergeTrigger < 0 ) continue;
|
||||
// . skip if not time yet
|
||||
// . !!!!!THIS IS IN MINUTES!!!!!!!!
|
||||
if ( (int32_t)elapsedMins < (int32_t)cr->m_dailyMergeTrigger )
|
||||
continue;
|
||||
// do not start more than 15 mins after the trigger time,
|
||||
// if we miss that cuz we are down, then too bad
|
||||
if ( (int32_t)elapsedMins > (int32_t)cr->m_dailyMergeTrigger + 15 )
|
||||
continue;
|
||||
// . how long has it been (in seconds)
|
||||
// . !!!!!THIS IS IN SECONDS!!!!!!!!
|
||||
int32_t diff = nowSynced - cr->m_dailyMergeStarted;
|
||||
// crazy?
|
||||
if ( diff < 0 ) continue;
|
||||
// if less than 24 hours ago, we already did it
|
||||
if ( diff < 24*3600 ) continue;
|
||||
// . we must now match the day of week
|
||||
// . use <= 0 to do it every day
|
||||
// . 0 = sunday ... 6 = saturday
|
||||
// . comma separated list is ok ("0,1, 6")
|
||||
// . leave blank or at least no numbers to do every day
|
||||
char *s = cr->m_dailyMergeDOWList;
|
||||
char dowCounts[8];
|
||||
memset(dowCounts,0,8);
|
||||
for ( ; *s ; s++ ) {
|
||||
if ( ! is_digit(*s) ) continue;
|
||||
int32_t num = atoi(s);
|
||||
if ( num < 0 ) continue;
|
||||
if ( num > 6 ) continue;
|
||||
dowCounts[num]++;
|
||||
}
|
||||
// get our dow
|
||||
int32_t todayDOW = tt->tm_wday + 1;
|
||||
// make sure 1 to 7
|
||||
if ( todayDOW < 0 || todayDOW > 6 ) {
|
||||
log(LOG_WARN, "merge: bad today dow of %i for coll %s",
|
||||
(int)todayDOW,cr->m_coll);
|
||||
return;
|
||||
}
|
||||
//if ( todayDOW > 6 ) { g_process.shutdownAbort(true); }
|
||||
// skip if not a dayofweek to merge on
|
||||
if ( dowCounts [ todayDOW ] == 0 ) continue;
|
||||
if(hid==0) {
|
||||
// . loop through each collection to check the time
|
||||
for (collnum_t i=0; m_mergeMode==0 && i<g_collectiondb.getNumRecs(); i++) {
|
||||
// get collection rec for collnum #i
|
||||
CollectionRec *cr = g_collectiondb.getRec ( i );
|
||||
// skip if empty, it was deleted at some point
|
||||
if ( ! cr ) continue;
|
||||
// skip if daily merge trigger is < 0 (do not do dailies)
|
||||
if ( cr->m_dailyMergeTrigger < 0 ) continue;
|
||||
// . skip if not time yet
|
||||
// . !!!!!THIS IS IN MINUTES!!!!!!!!
|
||||
if ( (int32_t)elapsedMins < (int32_t)cr->m_dailyMergeTrigger )
|
||||
continue;
|
||||
// do not start more than 15 mins after the trigger time,
|
||||
// if we miss that cuz we are down, then too bad
|
||||
if ( (int32_t)elapsedMins > (int32_t)cr->m_dailyMergeTrigger + 15 )
|
||||
continue;
|
||||
// . how long has it been (in seconds)
|
||||
// . !!!!!THIS IS IN SECONDS!!!!!!!!
|
||||
int32_t diff = nowSynced - cr->m_dailyMergeStarted;
|
||||
// crazy?
|
||||
if ( diff < 0 ) continue;
|
||||
// if less than 24 hours ago, we already did it
|
||||
if ( diff < 24*3600 ) continue;
|
||||
// . we must now match the day of week
|
||||
// . use <= 0 to do it every day
|
||||
// . 0 = sunday ... 6 = saturday
|
||||
// . comma separated list is ok ("0,1, 6")
|
||||
// . leave blank or at least no numbers to do every day
|
||||
char *s = cr->m_dailyMergeDOWList;
|
||||
char dowCounts[8];
|
||||
memset(dowCounts,0,8);
|
||||
for ( ; *s ; s++ ) {
|
||||
if ( ! is_digit(*s) ) continue;
|
||||
int32_t num = atoi(s);
|
||||
if ( num < 0 ) continue;
|
||||
if ( num > 6 ) continue;
|
||||
dowCounts[num]++;
|
||||
}
|
||||
// get our dow
|
||||
int32_t todayDOW = tt->tm_wday + 1;
|
||||
// make sure 1 to 7
|
||||
if ( todayDOW < 0 || todayDOW > 6 ) {
|
||||
log(LOG_WARN, "merge: bad today dow of %i for coll %s",
|
||||
(int)todayDOW,cr->m_coll);
|
||||
return;
|
||||
}
|
||||
//if ( todayDOW > 6 ) { g_process.shutdownAbort(true); }
|
||||
// skip if not a dayofweek to merge on
|
||||
if ( dowCounts [ todayDOW ] == 0 ) continue;
|
||||
|
||||
// set the start time here, but don't commit to m_cr just yet
|
||||
m_savedStartTime = nowSynced;
|
||||
// . wait for everyone to be in mode #0 in case they just
|
||||
// finished another daily merge. only host #0 does this loop.
|
||||
// . PROBLEM: if host #0 crashes before everyone can get into
|
||||
// mode 1+ and then host #0 is brought back up, then
|
||||
// obviously, we will not be able to meet this condition,
|
||||
// therefore only check to see if this condition is
|
||||
// satisfied our "second time around" (so we must complete
|
||||
// one daily merge before checking this again). that is why
|
||||
// i added "m_didDaily". -- MDW
|
||||
for ( int32_t i = 0 ; m_didDaily && i<g_hostdb.getNumHosts() ; i++){
|
||||
// skip ourselves, obviously we are in merge mode 2
|
||||
if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
|
||||
continue;
|
||||
// that's good if he is in mode 0
|
||||
if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags &
|
||||
PFLAG_MERGEMODE0 )
|
||||
continue;
|
||||
// oops, someone is not mode 0
|
||||
return;
|
||||
// set the start time here, but don't commit to m_cr just yet
|
||||
m_savedStartTime = nowSynced;
|
||||
// . wait for everyone to be in mode #0 in case they just
|
||||
// finished another daily merge. only host #0 does this loop.
|
||||
// . PROBLEM: if host #0 crashes before everyone can get into
|
||||
// mode 1+ and then host #0 is brought back up, then
|
||||
// obviously, we will not be able to meet this condition,
|
||||
// therefore only check to see if this condition is
|
||||
// satisfied our "second time around" (so we must complete
|
||||
// one daily merge before checking this again). that is why
|
||||
// i added "m_didDaily". -- MDW
|
||||
for ( int32_t i = 0 ; m_didDaily && i<g_hostdb.getNumHosts() ; i++){
|
||||
// skip ourselves, obviously we are in merge mode 2
|
||||
if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
|
||||
continue;
|
||||
// that's good if he is in mode 0
|
||||
if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags &
|
||||
PFLAG_MERGEMODE0 )
|
||||
continue;
|
||||
// oops, someone is not mode 0
|
||||
return;
|
||||
}
|
||||
// got one, save it
|
||||
m_cr = cr;
|
||||
// if we were hostid 0, go into merge mode 1 now
|
||||
m_mergeMode = 1;
|
||||
// bust out of loop
|
||||
break;
|
||||
}
|
||||
// got one, save it
|
||||
m_cr = cr;
|
||||
// if we were hostid 0, go into merge mode 1 now
|
||||
m_mergeMode = 1;
|
||||
// bust out of loop
|
||||
break;
|
||||
}
|
||||
|
||||
// can we advance to merge mode 1?
|
||||
|
12
Hostdb.cpp
12
Hostdb.cpp
@ -683,7 +683,7 @@ createFile:
|
||||
m_hosts[i].m_emailCode = -2;
|
||||
// reset these
|
||||
m_hosts[i].m_pingInfo.m_flags = 0;
|
||||
m_hosts[i].m_pingInfo.m_cpuUsage = 0.0;
|
||||
m_hosts[i].m_pingInfo.m_unused4 = 0.0;
|
||||
m_hosts[i].m_loadAvg = 0.0;
|
||||
|
||||
m_hosts[i].m_lastResponseReceiveTimestamp = 0;
|
||||
@ -1429,14 +1429,14 @@ int32_t Hostdb::getBestHosts2IP(const Host *h) {
|
||||
void Hostdb::updatePingInfo(Host *h, const PingInfo &pi) {
|
||||
ScopedLock sl(m_mtxPinginfo);
|
||||
|
||||
h->m_pingInfo.m_localHostTimeMS = pi.m_localHostTimeMS;
|
||||
h->m_pingInfo.m_unused0 = 0;
|
||||
h->m_pingInfo.m_hostId = pi.m_hostId;
|
||||
h->m_pingInfo.m_loadAvg = pi.m_loadAvg;
|
||||
h->m_pingInfo.m_unused2 = 0;
|
||||
h->m_pingInfo.m_percentMemUsed = pi.m_percentMemUsed;
|
||||
h->m_pingInfo.m_cpuUsage = pi.m_cpuUsage;
|
||||
h->m_pingInfo.m_unused4 = 0.0;
|
||||
h->m_pingInfo.m_totalDocsIndexed = pi.m_totalDocsIndexed;
|
||||
h->m_pingInfo.m_hostsConfCRC = pi.m_hostsConfCRC;
|
||||
h->m_pingInfo.m_diskUsage = pi.m_diskUsage;
|
||||
h->m_pingInfo.m_unused7 = 0.0;
|
||||
h->m_pingInfo.m_flags = pi.m_flags;
|
||||
h->m_pingInfo.m_numCorruptDiskReads = pi.m_numCorruptDiskReads;
|
||||
h->m_pingInfo.m_numOutOfMems = pi.m_numOutOfMems;
|
||||
@ -1451,7 +1451,7 @@ void Hostdb::updatePingInfo(Host *h, const PingInfo &pi) {
|
||||
h->m_pingInfo.m_dailyMergeCollnum = pi.m_dailyMergeCollnum;
|
||||
memcpy(h->m_pingInfo.m_gbVersionStr,pi.m_gbVersionStr,sizeof(pi.m_gbVersionStr));
|
||||
h->m_pingInfo.m_repairMode = pi.m_repairMode;
|
||||
h->m_pingInfo.m_recoveryLevel = pi.m_recoveryLevel;
|
||||
h->m_pingInfo.m_unused18 = 0;
|
||||
}
|
||||
|
||||
|
||||
|
12
Hostdb.h
12
Hostdb.h
@ -43,16 +43,14 @@ int32_t *getLocalIps ( ) ;
|
||||
|
||||
class PingInfo {
|
||||
public:
|
||||
// this timestamp MUST be on top because we set requestSize to 8
|
||||
// and treat it like an old 8-byte ping in PingServer.cpp
|
||||
int64_t m_localHostTimeMS;
|
||||
int64_t m_unused0; //used to be a timestamp for clock synchronization
|
||||
int32_t m_hostId;
|
||||
int32_t m_loadAvg;
|
||||
int32_t m_unused2; //used for the m_loadAvg
|
||||
float m_percentMemUsed;
|
||||
float m_cpuUsage;
|
||||
float m_unused4; //used to be m_cpuUsage
|
||||
int32_t m_totalDocsIndexed;
|
||||
int32_t m_hostsConfCRC;
|
||||
float m_diskUsage;
|
||||
float m_unused7; //used to be m_diskUsage
|
||||
int32_t m_flags;
|
||||
// some new stuff
|
||||
int32_t m_numCorruptDiskReads;
|
||||
@ -67,7 +65,7 @@ public:
|
||||
|
||||
char m_gbVersionStr[21];
|
||||
char m_repairMode;
|
||||
uint8_t m_recoveryLevel;
|
||||
uint8_t m_unused18;
|
||||
};
|
||||
|
||||
class Host {
|
||||
|
@ -105,7 +105,10 @@ void Msg39Request::reset() {
|
||||
m_useQueryStopWords = true;
|
||||
m_doMaxScoreAlgo = true;
|
||||
m_synonymWeight = 0.9;
|
||||
m_pageTemperatureWeightMin = 1.0;
|
||||
m_pageTemperatureWeightMax = 20.0;
|
||||
m_usePageTemperatureForRanking = true;
|
||||
|
||||
for(int i=0; i<26; i++)
|
||||
m_flagScoreMultiplier[i] = 1.0;
|
||||
for(int i=0; i<26; i++)
|
||||
|
2
Msg39.h
2
Msg39.h
@ -61,6 +61,8 @@ class Msg39Request {
|
||||
|
||||
ScoringWeights m_scoringWeights;
|
||||
float m_synonymWeight;
|
||||
float m_pageTemperatureWeightMin;
|
||||
float m_pageTemperatureWeightMax;
|
||||
bool m_usePageTemperatureForRanking;
|
||||
|
||||
float m_flagScoreMultiplier[26];
|
||||
|
@ -345,6 +345,9 @@ bool Msg40::federatedLoop ( ) {
|
||||
m_si->m_hashGroupWeightInUrl,
|
||||
m_si->m_hashGroupWeightInMenu);
|
||||
mr.m_synonymWeight = m_si->m_synonymWeight;
|
||||
mr.m_pageTemperatureWeightMin = m_si->m_pageTemperatureWeightMin;
|
||||
mr.m_pageTemperatureWeightMax = m_si->m_pageTemperatureWeightMax;
|
||||
|
||||
mr.m_usePageTemperatureForRanking = m_si->m_usePageTemperatureForRanking;
|
||||
memcpy(mr.m_flagScoreMultiplier, m_si->m_flagScoreMultiplier, sizeof(mr.m_flagScoreMultiplier));
|
||||
memcpy(mr.m_flagRankAdjustment, m_si->m_flagRankAdjustment, sizeof(mr.m_flagRankAdjustment));
|
||||
|
@ -22,8 +22,6 @@ static int tryagainSort ( const void *i1, const void *i2 );
|
||||
static int dgramsToSort ( const void *i1, const void *i2 );
|
||||
static int dgramsFromSort ( const void *i1, const void *i2 );
|
||||
static int memUsedSort ( const void *i1, const void *i2 );
|
||||
static int cpuUsageSort ( const void *i1, const void *i2 );
|
||||
static int diskUsageSort ( const void *i1, const void *i2 );
|
||||
|
||||
static int32_t generatePingMsg( Host *h, int64_t nowms, char *buffer );
|
||||
|
||||
@ -159,12 +157,6 @@ skipReplaceHost:
|
||||
"<td><a href=\"/admin/hosts?c=%s&sort=9\">"
|
||||
"<b>mem used</a></td>"
|
||||
|
||||
"<td><a href=\"/admin/hosts?c=%s&sort=10\">"
|
||||
"<b>cpu used</b></a></td>"
|
||||
|
||||
"<td><a href=\"/admin/hosts?c=%s&sort=17\">"
|
||||
"<b>disk used</b></a></td>"
|
||||
|
||||
"<td><a href=\"/admin/hosts?c=%s&sort=14\">"
|
||||
"<b>max ping1</b></a></td>"
|
||||
|
||||
@ -191,8 +183,6 @@ skipReplaceHost:
|
||||
cs,
|
||||
cs,
|
||||
cs,
|
||||
cs,
|
||||
cs,
|
||||
shotcol );
|
||||
|
||||
// loop through each host we know and print it's stats
|
||||
@ -226,14 +216,12 @@ skipReplaceHost:
|
||||
case 7: gbsort ( hostSort, nh, sizeof(int32_t), dgramsFromSort ); break;
|
||||
//case 8:
|
||||
case 9: gbsort ( hostSort, nh, sizeof(int32_t), memUsedSort ); break;
|
||||
case 10:gbsort ( hostSort, nh, sizeof(int32_t), cpuUsageSort ); break;
|
||||
case 11:gbsort ( hostSort, nh, sizeof(int32_t), pingAgeSort ); break;
|
||||
case 12:gbsort ( hostSort, nh, sizeof(int32_t), flagSort ); break;
|
||||
case 13:gbsort ( hostSort, nh, sizeof(int32_t), splitTimeSort ); break;
|
||||
case 14:gbsort ( hostSort, nh, sizeof(int32_t), pingMaxSort ); break;
|
||||
//case 15:
|
||||
case 16:gbsort ( hostSort, nh, sizeof(int32_t), defaultSort ); break;
|
||||
case 17:gbsort ( hostSort, nh, sizeof(int32_t), diskUsageSort ); break;
|
||||
|
||||
}
|
||||
|
||||
@ -317,19 +305,6 @@ skipReplaceHost:
|
||||
fontTagBack = "</font>";
|
||||
}
|
||||
|
||||
float cpu = h->m_pingInfo.m_cpuUsage;
|
||||
if ( cpu > 100.0 ) cpu = 100.0;
|
||||
if ( cpu < 0.0 ) cpu = -1.0;
|
||||
|
||||
char diskUsageMsg[64];
|
||||
sprintf(diskUsageMsg,"%.1f%%",h->m_pingInfo.m_diskUsage);
|
||||
if ( h->m_pingInfo.m_diskUsage < 0.0 )
|
||||
sprintf(diskUsageMsg,"???");
|
||||
if ( h->m_pingInfo.m_diskUsage>=98.0 && format == FORMAT_HTML )
|
||||
sprintf(diskUsageMsg,"<font color=red><b>%.1f%%"
|
||||
"</b></font>",h->m_pingInfo.m_diskUsage);
|
||||
|
||||
|
||||
// split time, don't divide by zero!
|
||||
int32_t splitTime = 0;
|
||||
if ( h->m_splitsDone )
|
||||
@ -384,13 +359,6 @@ skipReplaceHost:
|
||||
if ((flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML ) {
|
||||
fb.safePrintf("<b title=\"Recovered from core"
|
||||
"\">x</b>");
|
||||
// this is only 8-bits at the moment so it's capped
|
||||
// at 255. this level is 1 the first time we core
|
||||
// and are restarted.
|
||||
if ( h->m_pingInfo.m_recoveryLevel > 1 )
|
||||
fb.safePrintf("<sup>%" PRId32"</sup>",
|
||||
(int32_t)
|
||||
h->m_pingInfo.m_recoveryLevel);
|
||||
}
|
||||
|
||||
if ((flags & PFLAG_RECOVERYMODE)&& format != FORMAT_HTML )
|
||||
@ -602,14 +570,6 @@ skipReplaceHost:
|
||||
"</percentMemUsed>",
|
||||
h->m_pingInfo.m_percentMemUsed); // float
|
||||
|
||||
sb.safePrintf("\t\t<cpuUsage>%.1f%%"
|
||||
"</cpuUsage>",
|
||||
cpu );
|
||||
|
||||
sb.safePrintf("\t\t<percentDiskUsed><![CDATA[%s]]>"
|
||||
"</percentDiskUsed>",
|
||||
diskUsageMsg);
|
||||
|
||||
sb.safePrintf("\t\t<maxPing1>%s</maxPing1>\n",
|
||||
pms );
|
||||
|
||||
@ -710,11 +670,6 @@ skipReplaceHost:
|
||||
sb.safePrintf("\t\t\t\t\"percentMemUsed\":\"%.1f%%\",\n",
|
||||
h->m_pingInfo.m_percentMemUsed); // float
|
||||
|
||||
sb.safePrintf("\t\t\t\t\"cpuUsage\":\"%.1f%%\",\n",cpu);
|
||||
|
||||
sb.safePrintf("\t\t\t\t\"percentDiskUsed\":\"%s\",\n",
|
||||
diskUsageMsg);
|
||||
|
||||
sb.safePrintf("\t\t\t\t\"maxPing1\":\"%s\",\n",pms);
|
||||
|
||||
sb.safePrintf("\t\t\t\t\"maxPingAge1\":\"%" PRId32"ms\",\n",
|
||||
@ -801,10 +756,6 @@ skipReplaceHost:
|
||||
|
||||
// percent mem used
|
||||
"<td>%s%.1f%%%s</td>"
|
||||
// cpu usage
|
||||
"<td>%.1f%%</td>"
|
||||
// disk usage
|
||||
"<td>%s</td>"
|
||||
|
||||
// ping max
|
||||
"<td>%s</td>"
|
||||
@ -847,8 +798,6 @@ skipReplaceHost:
|
||||
fontTagFront,
|
||||
h->m_pingInfo.m_percentMemUsed, // float
|
||||
fontTagBack,
|
||||
cpu, // float
|
||||
diskUsageMsg,
|
||||
|
||||
// ping max
|
||||
pms,
|
||||
@ -1351,23 +1300,3 @@ int memUsedSort ( const void *i1, const void *i2 ) {
|
||||
if ( p1->m_percentMemUsed < p2->m_percentMemUsed ) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cpuUsageSort ( const void *i1, const void *i2 ) {
|
||||
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
|
||||
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
|
||||
PingInfo *p1 = &h1->m_pingInfo;
|
||||
PingInfo *p2 = &h2->m_pingInfo;
|
||||
if ( p1->m_cpuUsage > p2->m_cpuUsage ) return -1;
|
||||
if ( p1->m_cpuUsage < p2->m_cpuUsage ) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int diskUsageSort ( const void *i1, const void *i2 ) {
|
||||
Host *h1 = g_hostdb.getHost ( *(int32_t*)i1 );
|
||||
Host *h2 = g_hostdb.getHost ( *(int32_t*)i2 );
|
||||
PingInfo *p1 = &h1->m_pingInfo;
|
||||
PingInfo *p2 = &h2->m_pingInfo;
|
||||
if ( p1->m_diskUsage > p2->m_diskUsage ) return -1;
|
||||
if ( p1->m_diskUsage < p2->m_diskUsage ) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,10 +1,11 @@
|
||||
#include "PageTemperatureRegistry.h"
|
||||
#include "ScalingFunctions.h"
|
||||
#include "Log.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include <math.h>
|
||||
|
||||
PageTemperatureRegistry g_pageTemperatureRegistry;
|
||||
|
||||
@ -97,13 +98,18 @@ bool PageTemperatureRegistry::load() {
|
||||
|
||||
temperature_range_for_scaling = max_temperature-min_temperature;
|
||||
|
||||
min_temperature_log = log(min_temperature);
|
||||
max_temperature_log = log(max_temperature);
|
||||
temperature_range_for_scaling_log = log(temperature_range_for_scaling);
|
||||
default_temperature_log = log(default_temperature);
|
||||
|
||||
if(!using_meta)
|
||||
log(LOG_WARN, "meta-file %s could not be loaded. Using default temperature of %u which can scew results for new pages", meta_filename, default_temperature);
|
||||
|
||||
log(LOG_DEBUG, "pagetemp: min_temperature=%u",min_temperature);
|
||||
log(LOG_DEBUG, "pagetemp: max_temperature=%u",max_temperature);
|
||||
log(LOG_DEBUG, "pagetemp: default_temperature=%u",default_temperature);
|
||||
|
||||
|
||||
log(LOG_DEBUG, "%s loaded (%lu items)", filename, (unsigned long)new_entries);
|
||||
return true;
|
||||
}
|
||||
@ -129,11 +135,15 @@ unsigned PageTemperatureRegistry::query_page_temperature_internal(uint64_t docid
|
||||
}
|
||||
|
||||
|
||||
double PageTemperatureRegistry::query_page_temperature(uint64_t docid) const {
|
||||
double PageTemperatureRegistry::query_page_temperature(uint64_t docid, double range_min, double range_max) const {
|
||||
if(hash_table_size==0)
|
||||
return 1.0;
|
||||
unsigned temperature_26bit = query_page_temperature_internal(docid);
|
||||
return scale_linear(default_temperature_log, min_temperature_log, max_temperature_log, range_min, range_max);
|
||||
|
||||
double temperature_26bit_log = log((double)query_page_temperature_internal(docid));
|
||||
//Then scale to a number in the rangte [0..1]
|
||||
//It is a bit annoying to do this computation for each lookup but it saves memory
|
||||
return ((double)(temperature_26bit - min_temperature)) / temperature_range_for_scaling;
|
||||
// return ((double)(temperature_26bit - min_temperature)) / temperature_range_for_scaling;
|
||||
return scale_linear(temperature_26bit_log, min_temperature_log, max_temperature_log, range_min, range_max);
|
||||
}
|
||||
|
||||
|
||||
|
@ -15,6 +15,12 @@ class PageTemperatureRegistry {
|
||||
unsigned max_temperature;
|
||||
unsigned temperature_range_for_scaling;
|
||||
unsigned default_temperature;
|
||||
|
||||
double min_temperature_log;
|
||||
double max_temperature_log;
|
||||
double temperature_range_for_scaling_log;
|
||||
double default_temperature_log;
|
||||
|
||||
unsigned query_page_temperature_internal(uint64_t docid) const;
|
||||
public:
|
||||
PageTemperatureRegistry()
|
||||
@ -26,7 +32,7 @@ public:
|
||||
bool load();
|
||||
void unload();
|
||||
|
||||
double query_page_temperature(uint64_t docid) const;
|
||||
double query_page_temperature(uint64_t docid, double range_min, double range_max) const;
|
||||
|
||||
bool empty() const { return entries==0; }
|
||||
};
|
||||
|
21
Pages.cpp
21
Pages.cpp
@ -2411,25 +2411,6 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
|
||||
mb->safePrintf("%s",boxEnd);
|
||||
}
|
||||
|
||||
// out of disk space?
|
||||
int32_t out = 0;
|
||||
for ( int32_t i = 0 ; i < g_hostdb.getNumHosts() ; i++ ) {
|
||||
Host *h = &g_hostdb.m_hosts[i];
|
||||
if ( h->m_pingInfo.m_diskUsage < 98.0 ) continue;
|
||||
out++;
|
||||
}
|
||||
if ( out > 0 ) {
|
||||
if ( adds ) mb->safePrintf("<br>");
|
||||
adds++;
|
||||
const char *s = "s are";
|
||||
if ( out == 1 ) s = " is";
|
||||
mb->safePrintf("%s",box);
|
||||
mb->safePrintf("%" PRId32" host%s over 98%% disk usage. "
|
||||
"See the <a href=/admin/hosts?c=%s>"
|
||||
"hosts</a> table.",out,s,coll);
|
||||
mb->safePrintf("%s",boxEnd);
|
||||
}
|
||||
|
||||
// injections disabled?
|
||||
if ( ! g_conf.m_injectionsEnabled ) {
|
||||
if ( adds ) mb->safePrintf("<br>");
|
||||
@ -2487,7 +2468,7 @@ bool printRedBox ( SafeBuf *mb , TcpSocket *sock , HttpRequest *hr ) {
|
||||
if ( adds ) mb->safePrintf("<br>");
|
||||
adds++;
|
||||
const char *s = "s are";
|
||||
if ( out == 1 ) s = " is";
|
||||
if ( jammedHosts == 1 ) s = " is";
|
||||
mb->safePrintf("%s",box);
|
||||
mb->safePrintf("%" PRId32" host%s jammed with "
|
||||
"over %" PRId32" unhandled "
|
||||
|
40
Parms.cpp
40
Parms.cpp
@ -3665,6 +3665,26 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m++;
|
||||
|
||||
m->m_title = "Page temp weight min";
|
||||
m->m_desc = "Page temp is scaled to be between the min and max";
|
||||
m->m_cgi = "pagetempweightmin";
|
||||
simple_m_set(SearchInput,m_pageTemperatureWeightMin);
|
||||
m->m_defOff2 = offsetof(Conf,m_pageTemperatureWeightMin);
|
||||
m->m_def = "1.000000";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m++;
|
||||
|
||||
m->m_title = "Page temp weight max";
|
||||
m->m_desc = "Page temp is scaled to be between the min and max";
|
||||
m->m_cgi = "pagetempweightmax";
|
||||
simple_m_set(SearchInput,m_pageTemperatureWeightMax);
|
||||
m->m_defOff2 = offsetof(Conf,m_pageTemperatureWeightMax);
|
||||
m->m_def = "20.000000";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m++;
|
||||
|
||||
m->m_title = "Use page temperature";
|
||||
m->m_desc = "Use page temperature (if available) for ranking";
|
||||
m->m_cgi = "use_page_temperature";
|
||||
@ -4021,6 +4041,26 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_RANKING;
|
||||
m++;
|
||||
|
||||
m->m_title = "Page temp weight min";
|
||||
m->m_desc = "Page temp is scaled to be between the min and max";
|
||||
m->m_cgi = "pagetempweightmin";
|
||||
simple_m_set(Conf,m_pageTemperatureWeightMin);
|
||||
m->m_def = "1.000000";
|
||||
m->m_group = false;
|
||||
m->m_flags = PF_REBUILDRANKINGSETTINGS;
|
||||
m->m_page = PAGE_RANKING;
|
||||
m++;
|
||||
|
||||
m->m_title = "Page temp weight max";
|
||||
m->m_desc = "Page temp is scaled to be between the min and max";
|
||||
m->m_cgi = "pagetempweightmax";
|
||||
simple_m_set(Conf,m_pageTemperatureWeightMax);
|
||||
m->m_def = "20.000000";
|
||||
m->m_group = false;
|
||||
m->m_flags = PF_REBUILDRANKINGSETTINGS;
|
||||
m->m_page = PAGE_RANKING;
|
||||
m++;
|
||||
|
||||
m->m_title = "Use page temperature";
|
||||
m->m_desc = "Use page temperature (if available) for ranking";
|
||||
m->m_cgi = "use_page_temperature";
|
||||
|
@ -210,9 +210,7 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
|
||||
// let the receiver know our repair mode
|
||||
newPingInfo.m_repairMode = g_repairMode;
|
||||
|
||||
int32_t l_loadavg = (int32_t) (g_process.getLoadAvg() * 100.0);
|
||||
//gbmemcpy(p, &l_loadavg, sizeof(int32_t)); p += sizeof(int32_t);
|
||||
newPingInfo.m_loadAvg = l_loadavg ;
|
||||
newPingInfo.m_unused2 = 0;
|
||||
|
||||
// then our percent mem used
|
||||
float mem = g_mem.getUsedMemPercentage();
|
||||
@ -229,7 +227,7 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
|
||||
if ( g_hostdb.getCRC() == 0 ) { g_process.shutdownAbort(true); }
|
||||
|
||||
// disk usage (df -ka)
|
||||
newPingInfo.m_diskUsage = g_process.m_diskUsage;
|
||||
newPingInfo.m_unused7 = 0.0;
|
||||
|
||||
// flags indicating our state
|
||||
int32_t flags = 0;
|
||||
@ -247,9 +245,7 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
|
||||
if ( g_dailyMerge.m_mergeMode ==0 || g_dailyMerge.m_mergeMode == 6 )
|
||||
flags |= PFLAG_MERGEMODE0OR6;
|
||||
|
||||
uint8_t rv8 = (uint8_t)g_recoveryLevel;
|
||||
if ( g_recoveryLevel > 255 ) rv8 = 255;
|
||||
newPingInfo.m_recoveryLevel = rv8;
|
||||
newPingInfo.m_unused18 = 0;
|
||||
|
||||
//*(int32_t *)p = flags; p += 4; // 4 bytes
|
||||
newPingInfo.m_flags = flags;
|
||||
@ -261,14 +257,13 @@ void PingServer::pingHost ( Host *h , uint32_t ip , uint16_t port ) {
|
||||
|
||||
newPingInfo.m_hostId = me->m_hostId;
|
||||
|
||||
newPingInfo.m_localHostTimeMS = gettimeofdayInMilliseconds();
|
||||
newPingInfo.m_unused0 = 0;
|
||||
|
||||
newPingInfo.m_udpSlotsInUseIncoming = g_udpServer.getNumUsedSlotsIncoming();
|
||||
|
||||
newPingInfo.m_tcpSocketsInUse = g_httpServer.m_tcp.m_numUsed;
|
||||
|
||||
// from Loop.cpp
|
||||
newPingInfo.m_cpuUsage = 0.0;
|
||||
newPingInfo.m_unused4 = 0.0;
|
||||
|
||||
// store the gbVersionStrBuf now, just a date with a \0 included
|
||||
char *v = getVersion();
|
||||
|
@ -4312,13 +4312,12 @@ void PosdbTable::intersectLists10_r ( ) {
|
||||
|
||||
if(m_msg39req->m_usePageTemperatureForRanking) {
|
||||
use_page_temperature = true;
|
||||
page_temperature = g_pageTemperatureRegistry.query_page_temperature(m_docId);
|
||||
page_temperature = g_pageTemperatureRegistry.query_page_temperature(m_docId, m_msg39req->m_pageTemperatureWeightMin, m_msg39req->m_pageTemperatureWeightMax);
|
||||
score *= page_temperature;
|
||||
logTrace(g_conf.m_logTracePosdb, "Page temperature for docId %" PRIu64 " is %.4f, score %f->%f", m_docId, page_temperature, score_before_page_temp, score);
|
||||
logTrace(g_conf.m_logTracePosdb, "Page temperature for docId %" PRIu64 " is %.14f, score %f -> %f", m_docId, page_temperature, score_before_page_temp, score);
|
||||
}
|
||||
|
||||
|
||||
|
||||
//#
|
||||
//# Handle sortby int/float and minimum docid/score pairs
|
||||
//#
|
||||
|
20
RdbBase.cpp
20
RdbBase.cpp
@ -1239,16 +1239,22 @@ bool RdbBase::incorporateMerge ( ) {
|
||||
//note: also seen when resuming an interrupted merge, inwhich case there is probably nothing wrong
|
||||
}
|
||||
|
||||
if ( postmergePositiveRecords < m_premergeNumPositiveRecords - m_premergeNumNegativeRecords ) {
|
||||
if ( postmergePositiveRecords < (m_premergeNumPositiveRecords - m_premergeNumNegativeRecords) ) {
|
||||
int64_t lostPositive = m_premergeNumPositiveRecords - postmergePositiveRecords;
|
||||
double lostPercentage = (lostPositive * 100.00) / m_premergeNumPositiveRecords;
|
||||
|
||||
log(LOG_INFO,"merge: %s: lost %" PRId64" (%.2f%%) positives", m_dbname, lostPositive, lostPercentage);
|
||||
if (m_premergeNumPositiveRecords > 0) {
|
||||
double lostPercentage = (lostPositive * 100.00) / m_premergeNumPositiveRecords;
|
||||
|
||||
int32_t maxLostPercentage = getMaxLostPositivesPercentage(m_rdb->getRdbId());
|
||||
if (lostPercentage > maxLostPercentage) {
|
||||
log(LOG_ERROR, "merge: %s: lost more than %d%% of positive records. Aborting.", m_dbname, maxLostPercentage);
|
||||
gbshutdownCorrupted();
|
||||
log(LOG_INFO,"merge: %s: lost %" PRId64" (%.2f%%) positives", m_dbname, lostPositive, lostPercentage);
|
||||
|
||||
int32_t maxLostPercentage = getMaxLostPositivesPercentage(m_rdb->getRdbId());
|
||||
if (lostPercentage > maxLostPercentage) {
|
||||
log(LOG_ERROR, "merge: %s: lost more than %d%% of positive records. Aborting.", m_dbname, maxLostPercentage);
|
||||
gbshutdownCorrupted();
|
||||
}
|
||||
} else {
|
||||
// this case is unlikely, but coverity complained about it
|
||||
log(LOG_INFO,"merge: %s: lost %" PRId64" positives", m_dbname, lostPositive);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -65,6 +65,8 @@ SearchInput::SearchInput() {
|
||||
memset(m_queryId, 0, sizeof(m_queryId));
|
||||
m_doMaxScoreAlgo = false;
|
||||
m_synonymWeight = 0.9;
|
||||
m_pageTemperatureWeightMin = 1.0;
|
||||
m_pageTemperatureWeightMax = 20.0;
|
||||
m_usePageTemperatureForRanking = true;
|
||||
m_numFlagScoreMultipliers=26;
|
||||
for(int i=0; i<26; i++)
|
||||
|
@ -162,8 +162,9 @@ public:
|
||||
float m_hashGroupWeightInternalLinkText;
|
||||
float m_hashGroupWeightInUrl;
|
||||
float m_hashGroupWeightInMenu;
|
||||
|
||||
float m_synonymWeight;
|
||||
float m_pageTemperatureWeightMin;
|
||||
float m_pageTemperatureWeightMax;
|
||||
bool m_usePageTemperatureForRanking;
|
||||
|
||||
int32_t m_numFlagScoreMultipliers;
|
||||
|
Reference in New Issue
Block a user