forked from Mirrors/privacore-open-source-search-engine
Merge branch 'master' into nomerge2
This commit is contained in:
23
Doledb.cpp
23
Doledb.cpp
@ -43,36 +43,24 @@ static void nukeDoledbWrapper ( int fd , void *state ) {
|
||||
}
|
||||
|
||||
void nukeDoledb ( collnum_t collnum ) {
|
||||
|
||||
//g_spiderLoop.m_winnerListCache.verify();
|
||||
// in case we changed url filters for this collection #
|
||||
{
|
||||
RdbCacheLock rcl(g_spiderLoop.m_winnerListCache);
|
||||
g_spiderLoop.m_winnerListCache.clear ( collnum );
|
||||
}
|
||||
|
||||
//g_spiderLoop.m_winnerListCache.verify();
|
||||
|
||||
//if ( we->m_registered )
|
||||
// g_loop.unregisterSleepCallback ( we , doDoledbNuke );
|
||||
|
||||
// . nuke doledb for this collnum
|
||||
// . it will unlink the files and maps for doledb for this collnum
|
||||
// . it will remove all recs of this collnum from its tree too
|
||||
if ( g_doledb.getRdb()->isSavingTree () ) {
|
||||
g_loop.registerSleepCallback(100,&collnum,nukeDoledbWrapper);
|
||||
//we->m_registered = true;
|
||||
if (g_doledb.getRdb()->isSavingTree()) {
|
||||
g_loop.registerSleepCallback(100, &collnum, nukeDoledbWrapper);
|
||||
return;
|
||||
}
|
||||
|
||||
// . ok, tree is not saving, it should complete entirely from this call
|
||||
g_doledb.getRdb()->deleteAllRecs ( collnum );
|
||||
|
||||
// re-add it back so the RdbBase is new'd
|
||||
//g_doledb.getRdb()->addColl2 ( we->m_collnum );
|
||||
|
||||
SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull ( collnum );
|
||||
g_doledb.getRdb()->deleteAllRecs(collnum);
|
||||
|
||||
SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(collnum);
|
||||
if ( sc ) {
|
||||
// . make sure to nuke m_doleIpTable as well
|
||||
sc->m_doleIpTable.clear();
|
||||
@ -80,8 +68,7 @@ void nukeDoledb ( collnum_t collnum ) {
|
||||
//sc->m_ufnMapValid = false;
|
||||
|
||||
// log it
|
||||
log("spider: rebuilding %s from doledb nuke",
|
||||
sc->getCollName());
|
||||
log("spider: rebuilding %s from doledb nuke", sc->getCollName());
|
||||
// activate a scan if not already activated
|
||||
sc->m_waitingTreeNeedsRebuild = true;
|
||||
// if a scan is ongoing, this will re-set it
|
||||
|
12
Msg0.cpp
12
Msg0.cpp
@ -801,18 +801,10 @@ void doneSending_ass ( void *state , UdpSlot *slot ) {
|
||||
if ( st0->m_rdbId == RDB_TAGDB ) {
|
||||
}
|
||||
else if(slot->getNiceness() > 0) {
|
||||
g_stats.addStat_r ( slot->m_sendBufSize ,
|
||||
st0->m_startTime ,
|
||||
now ,
|
||||
//"transmit_data_nice",
|
||||
0x00aa00aa);
|
||||
g_stats.addStat_r(slot->m_sendBufSize, st0->m_startTime, now, 0x00aa00aa);
|
||||
}
|
||||
else {
|
||||
g_stats.addStat_r ( slot->m_sendBufSize ,
|
||||
st0->m_startTime ,
|
||||
now ,
|
||||
//"transmit_data",
|
||||
0x00ff00ff );
|
||||
g_stats.addStat_r(slot->m_sendBufSize, st0->m_startTime, now, 0x00ff00ff);
|
||||
}
|
||||
|
||||
|
||||
|
34
Msg5.cpp
34
Msg5.cpp
@ -174,6 +174,8 @@ bool Msg5::getTreeList(RdbList *result, const void *startKey, const void *endKey
|
||||
base->useHalfKeys()))
|
||||
return true;
|
||||
structName = "buckets";
|
||||
*memUsedByTree = buckets->getMemOccupied();
|
||||
*numUsedNodes = buckets->getNumKeys();
|
||||
}
|
||||
|
||||
int64_t now = gettimeofdayInMilliseconds();
|
||||
@ -476,28 +478,16 @@ bool Msg5::readList ( ) {
|
||||
// . use an avg. rec size for variable-length records
|
||||
// . just use tree to estimate avg. rec size
|
||||
if ( rs == -1) {
|
||||
if(rdb->useTree()) {
|
||||
// get avg record size
|
||||
if ( numRecs > 0 ) rs = memUsedByTree / numRecs;
|
||||
// add 10% for deviations
|
||||
rs = (rs * 110) / 100;
|
||||
// what is the minimal record size?
|
||||
int32_t minrs = sizeof(key96_t) + 4;
|
||||
// ensure a minimal record size
|
||||
if ( rs < minrs ) rs = minrs;
|
||||
}
|
||||
else {
|
||||
RdbBuckets *buckets = rdb->getBuckets();
|
||||
if( !buckets ) {
|
||||
log(LOG_WARN,"%s:%s:%d: No buckets!", __FILE__, __func__, __LINE__);
|
||||
return false;
|
||||
}
|
||||
|
||||
rs = buckets->getNumKeys() / buckets->getMemOccupied();
|
||||
int32_t minrs = buckets->getRecSize() + 4;
|
||||
// ensure a minimal record size
|
||||
if ( rs < minrs ) rs = minrs;
|
||||
}
|
||||
// what is the minimal record size?
|
||||
int32_t minrs = rdb->getKeySize() + 4;
|
||||
// get avg record size
|
||||
if(numRecs > 0)
|
||||
rs = memUsedByTree / numRecs;
|
||||
// add 10% for deviations
|
||||
rs = (rs * 110) / 100;
|
||||
// ensure a minimal record size
|
||||
if(rs < minrs)
|
||||
rs = minrs;
|
||||
}
|
||||
|
||||
// . TODO: get avg recSize in this rdb (avgRecSize*numNeg..)
|
||||
|
@ -488,13 +488,6 @@ static void sendUdpReply7(void *state) {
|
||||
|
||||
UdpSlot *slot = xd->m_injectionSlot;
|
||||
|
||||
uint32_t statColor = 0xccffcc;
|
||||
if(xd->m_indexCode) {
|
||||
statColor = 0xaaddaa;//0x4e99e9;
|
||||
}
|
||||
g_stats.addStat_r ( xd->m_rawUtf8ContentSize, xd->m_injectStartTime, gettimeofdayInMilliseconds(), statColor );
|
||||
|
||||
|
||||
// injecting a warc seems to not set m_indexCodeValid to true
|
||||
// for the container doc... hmmm...
|
||||
int32_t indexCode = -1;
|
||||
|
107
PagePerf.cpp
107
PagePerf.cpp
@ -83,51 +83,18 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
|
||||
//skip request path
|
||||
while (!isspace(*rbufEnd)) rbufEnd++;
|
||||
*rbufEnd = '\0';
|
||||
//char* refresh = strstr(rbuf, "&rr=");
|
||||
|
||||
|
||||
// print resource table
|
||||
// columns are the dbs
|
||||
p.safePrintf(
|
||||
//"<center>Disk Statistics<br><br>"
|
||||
"<center>"
|
||||
//"<br>"
|
||||
//"<img name=\"diskgraph\"
|
||||
//src=/diskGraph%" PRId32".gif><br><br>",
|
||||
//g_hostdb.m_hostId );
|
||||
);
|
||||
p.safePrintf("<center>");
|
||||
|
||||
// now try using absolute divs instead of a GIF
|
||||
g_stats.printGraphInHtml ( p );
|
||||
|
||||
/*
|
||||
if(autoRefresh > 0) {
|
||||
if(refresh) *(refresh+4) = '0';
|
||||
p.safePrintf(
|
||||
"<center><a href=\"%s\">Auto Refresh Off</a>"
|
||||
"</center>",
|
||||
rbuf + 4); // skip over GET
|
||||
p.safePrintf( "<input type=\"hidden\" "
|
||||
"name=\"dontlog\" value=\"1\">");
|
||||
|
||||
}
|
||||
else {
|
||||
char* rr = "";
|
||||
if(refresh) *(refresh+4) = '1';
|
||||
else rr = "&rr=1";
|
||||
p.safePrintf(
|
||||
"<center><a href=\"%s%s\">Auto Refresh</a>"
|
||||
"</center>",
|
||||
rbuf + 4, rr); // skip over "GET "
|
||||
}
|
||||
*/
|
||||
|
||||
// print the key
|
||||
p.safePrintf (
|
||||
"<br>"
|
||||
"<center>"
|
||||
//"<table %s>"
|
||||
//"<tr>%s</tr></table>"
|
||||
|
||||
"<style>"
|
||||
".poo { background-color:#%s;}\n"
|
||||
@ -155,17 +122,6 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
|
||||
"<td> Disk write. "
|
||||
"Thicker lines for bigger writes.</td>"
|
||||
|
||||
// light brown
|
||||
"<td bgcolor=#b58869> </td>"
|
||||
"<td> Processing end user query. No raw= parm.</td>"
|
||||
"</tr>"
|
||||
|
||||
|
||||
// dark brown
|
||||
"<tr class=poo>"
|
||||
"<td bgcolor=#753d30> </td>"
|
||||
"<td> Processing raw query. Has raw= parm.</td>"
|
||||
|
||||
// blue
|
||||
"<td bgcolor=#0000ff> </td>"
|
||||
"<td> Summary extraction for one document.</td>"
|
||||
@ -178,25 +134,12 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
|
||||
"<td> Send data over network. (low priority)"
|
||||
"Thicker lines for bigger sends.</td>"
|
||||
|
||||
// yellow
|
||||
"<td bgcolor=#aaaa00> </td>"
|
||||
"<td> Read all termlists (msg2). (low priority)"
|
||||
"Thicker lines for bigger reads.</td>"
|
||||
"</tr>"
|
||||
|
||||
// pinkish purple
|
||||
"<tr class=poo>"
|
||||
"<td bgcolor=#ff00ff> </td>"
|
||||
"<td> Send data over network. (high priority)"
|
||||
"Thicker lines for bigger sends.</td>"
|
||||
|
||||
// light yellow
|
||||
"<td bgcolor=#ffff00> </td>"
|
||||
"<td> Read all termlists (msg2). (high priority)"
|
||||
"Thicker lines for bigger reads.</td>"
|
||||
"</tr>"
|
||||
|
||||
|
||||
// dark purple
|
||||
"<tr class=poo>"
|
||||
"<td bgcolor=#8220ff> </td>"
|
||||
@ -208,65 +151,24 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
|
||||
"Thicker lines for bigger merges.</td>"
|
||||
"</tr>"
|
||||
|
||||
|
||||
// white
|
||||
"<tr class=poo>"
|
||||
"<td bgcolor=#ffffff> </td>"
|
||||
"<td> Uncompress cached document.</td>"
|
||||
|
||||
// orange
|
||||
"<td bgcolor=#fea915> </td>"
|
||||
"<td> Parse a document. Blocks CPU.</td>"
|
||||
"</tr>"
|
||||
|
||||
|
||||
// bright green
|
||||
"<tr class=poo>"
|
||||
"<td bgcolor=#00ff00> </td>"
|
||||
"<td> Compute search results. "
|
||||
"All terms required. rat=1.</td>"
|
||||
|
||||
// dark green
|
||||
"<td bgcolor=#008000> </td>"
|
||||
"<td> Compute search results. "
|
||||
"Not all terms required. rat=0.</td>"
|
||||
"</tr>"
|
||||
|
||||
// bright green
|
||||
"<tr class=poo>"
|
||||
"<td bgcolor=#ccffcc> </td>"
|
||||
"<td> Inject a document"
|
||||
"</td>"
|
||||
|
||||
// dark green
|
||||
"<td bgcolor=#FFFACD> </td>"
|
||||
"<td> Compute related pages. "
|
||||
"</td>"
|
||||
"<td> Compute search results. </td>"
|
||||
"</tr>"
|
||||
|
||||
"<tr class=poo>"
|
||||
|
||||
"<td bgcolor=#d1e1ff> </td>"
|
||||
"<td> Compute Gigabits. "
|
||||
"</td>"
|
||||
|
||||
"<td bgcolor=#009fe5> </td>"
|
||||
"<td> Quick Poll. "
|
||||
"</td>"
|
||||
|
||||
"</tr>"
|
||||
|
||||
|
||||
"<tr class=poo>"
|
||||
|
||||
"<td bgcolor=#0000b0> </td>"
|
||||
"<td> \"Summary\" extraction (low priority) "
|
||||
"</td>"
|
||||
|
||||
"<td bgcolor=#ffffff> </td>"
|
||||
"<td> </td>"
|
||||
"<td> "
|
||||
"</td>"
|
||||
|
||||
"</tr>"
|
||||
|
||||
|
||||
@ -274,9 +176,6 @@ bool sendPagePerf ( TcpSocket *s , HttpRequest *r ) {
|
||||
"</center>"
|
||||
, LIGHT_BLUE
|
||||
, TABLE_STYLE
|
||||
//,g_stats.m_keyCols.getBufStart() &&
|
||||
//g_conf.m_dynamicPerfGraph ?
|
||||
//g_stats.m_keyCols.getBufStart() : ""
|
||||
);
|
||||
|
||||
if(autoRefresh > 0) p.safePrintf("</body>");
|
||||
|
@ -93,22 +93,7 @@ static bool sendReply(State0 *st, char *reply) {
|
||||
if ( reply ) rlen = strlen(reply);
|
||||
logf(LOG_DEBUG,"gb: sending back %" PRId32" bytes",rlen);
|
||||
|
||||
// . use light brown if coming directly from an end user
|
||||
// . use darker brown if xml feed
|
||||
int32_t color = 0x00b58869;
|
||||
if ( si->m_format != FORMAT_HTML ) {
|
||||
color = 0x00753d30 ;
|
||||
}
|
||||
|
||||
int64_t nowms = gettimeofdayInMilliseconds();
|
||||
int64_t took = nowms - st->m_startTime ;
|
||||
g_stats.addStat_r ( took ,
|
||||
st->m_startTime ,
|
||||
nowms,
|
||||
color ,
|
||||
STAT_QUERY );
|
||||
|
||||
Statistics::register_query_time(si->m_q.m_numWords, si->m_queryLangId, took);
|
||||
Statistics::register_query_time(si->m_q.m_numWords, si->m_queryLangId, (gettimeofdayInMilliseconds() - st->m_startTime));
|
||||
|
||||
// . log the time
|
||||
// . do not do this if g_errno is set lest m_sbuf1 be bogus b/c
|
||||
|
28
Proxy.cpp
28
Proxy.cpp
@ -617,22 +617,10 @@ void Proxy::gotReplyPage ( void *state, UdpSlot *slot ) {
|
||||
// add stat for stats graph
|
||||
if ( stC->m_isQuery ) {
|
||||
g_stats.logAvgQueryTime(stC->m_startTime);
|
||||
// i dont check if query is raw or not
|
||||
int32_t color = 0x00b58869;
|
||||
if ( stC->m_raw ) color = 0x00753d30;
|
||||
int64_t nowms = gettimeofdayInMilliseconds();
|
||||
// . add the stat
|
||||
// . use brown for the stat
|
||||
g_stats.addStat_r ( 0 ,
|
||||
stC->m_startTime ,
|
||||
nowms ,
|
||||
//"query",
|
||||
color ,
|
||||
STAT_QUERY );
|
||||
g_stats.m_numSuccess++;
|
||||
|
||||
/// @todo ALC use proper query language
|
||||
Statistics::register_query_time(stC->m_numQueryTerms, langUnknown, (nowms - stC->m_startTime));
|
||||
Statistics::register_query_time(stC->m_numQueryTerms, langUnknown, (gettimeofdayInMilliseconds() - stC->m_startTime));
|
||||
}
|
||||
|
||||
// let tcp server free it when done
|
||||
@ -653,22 +641,10 @@ void Proxy::gotReplyPage ( void *state, UdpSlot *slot ) {
|
||||
|
||||
if ( stC->m_isQuery && httpStatus == 200 ){
|
||||
g_stats.logAvgQueryTime(stC->m_startTime);
|
||||
// i dont check if query is raw or not
|
||||
int32_t color = 0x00b58869;
|
||||
if ( stC->m_raw ) color = 0x00753d30;
|
||||
int64_t nowms = gettimeofdayInMilliseconds();
|
||||
// . add the stat
|
||||
// . use brown for the stat
|
||||
g_stats.addStat_r ( 0 ,
|
||||
stC->m_startTime ,
|
||||
nowms ,
|
||||
//"query",
|
||||
color ,
|
||||
STAT_QUERY );
|
||||
g_stats.m_numSuccess++;
|
||||
|
||||
/// @todo ALC use proper query language
|
||||
Statistics::register_query_time(stC->m_numQueryTerms, langUnknown, (nowms - stC->m_startTime));
|
||||
Statistics::register_query_time(stC->m_numQueryTerms, langUnknown, (gettimeofdayInMilliseconds() - stC->m_startTime));
|
||||
}
|
||||
else if ( stC->m_isQuery && httpStatus != 200 )
|
||||
g_stats.m_numFails++;
|
||||
|
1
Stats.h
1
Stats.h
@ -29,7 +29,6 @@ class StatPoint {
|
||||
#define MAX_LINES (DY / (MAX_WIDTH+1)) // leave free pixel above each line
|
||||
|
||||
#define STAT_GENERIC 0
|
||||
#define STAT_QUERY 1
|
||||
#define MAX_BUCKETS 16
|
||||
|
||||
class Stats {
|
||||
|
10
XmlDoc.cpp
10
XmlDoc.cpp
@ -970,10 +970,7 @@ bool XmlDoc::set2 ( char *titleRec ,
|
||||
}
|
||||
// . add the stat
|
||||
// . use white for the stat
|
||||
g_stats.addStat_r ( 0 ,
|
||||
startTime ,
|
||||
gettimeofdayInMilliseconds(),
|
||||
0x00ffffff );
|
||||
g_stats.addStat_r(0, startTime, gettimeofdayInMilliseconds(), 0x00ffffff);
|
||||
|
||||
// first 2 bytes in m_ubuf is the header size
|
||||
int32_t headerSize = *(uint16_t *)m_ubuf;
|
||||
@ -3063,10 +3060,7 @@ SafeBuf *XmlDoc::getTitleRecBuf ( ) {
|
||||
|
||||
// . add the stat
|
||||
// . use white for the stat
|
||||
g_stats.addStat_r ( 0 ,
|
||||
startTime ,
|
||||
gettimeofdayInMilliseconds(),
|
||||
0x00ffffff );
|
||||
g_stats.addStat_r(0, startTime, gettimeofdayInMilliseconds(), 0x00ffffff);
|
||||
|
||||
char *cbuf = m_titleRecBuf.getBufStart();
|
||||
m_titleRecKey = *(key96_t *)cbuf;
|
||||
|
Reference in New Issue
Block a user