formatting

This commit is contained in:
Matt Wells
2014-01-19 10:44:19 -08:00
parent e6eb9003b5
commit 471599e9e7
5 changed files with 191 additions and 162 deletions

@ -2606,6 +2606,8 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
return false;
// shortcut
XmlDoc **docs = g_spiderLoop.m_docs;
// row count
long j = 0;
// first print the spider recs we are spidering
for ( long i = 0 ; i < (long)MAX_SPIDERS ; i++ ) {
// get it
@ -2622,8 +2624,9 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
// get status
char *status = xd->m_statusMsg;
// show that
if ( ! oldsr->printToTableSimple ( &sb , status,xd) )
if ( ! oldsr->printToTableSimple ( &sb , status,xd,j))
return false;
j++;
}
// end the table

@ -557,7 +557,7 @@ skipReplaceHost:
// print spare hosts table
sb.safePrintf (
"<table %s>"
"<tr><td colspan=10 bgcolor=#%s><center>"
"<tr class=hdrow><td colspan=10><center>"
//"<font size=+1>"
"<b>Spares</b>"
//"</font>"
@ -578,7 +578,6 @@ skipReplaceHost:
"<td><b>note</td>",
TABLE_STYLE,
DARK_BLUE ,
DARK_BLUE );
for ( long i = 0; i < g_hostdb.m_numSpareHosts; i++ ) {
@ -623,7 +622,7 @@ skipReplaceHost:
// print proxy hosts table
sb.safePrintf (
"<table %s>"
"<tr><td colspan=12 bgcolor=#%s><center>"
"<tr class=hdrow><td colspan=12><center>"
//"<font size=+1>"
"<b>Proxies</b>"
//"</font>"
@ -650,7 +649,6 @@ skipReplaceHost:
"<td><b>note</td>",
TABLE_STYLE,
DARK_BLUE ,
DARK_BLUE
);
for ( long i = 0; i < g_hostdb.m_numProxyHosts; i++ ) {
@ -739,7 +737,7 @@ skipReplaceHost:
// print help table
sb.safePrintf (
"<table %s>"
"<tr><td colspan=10 bgcolor=#%s><center>"
"<tr class=hdrow><td colspan=10><center>"
//"<font size=+1>"
"<b>Key</b>"
//"</font>"
@ -926,8 +924,8 @@ skipReplaceHost:
,
TABLE_STYLE ,
DARK_BLUE );
TABLE_STYLE
);
sb.safePrintf ( "</table><br></form><br>" );

@ -2390,15 +2390,14 @@ bool sendPageCgiParms ( TcpSocket *s , HttpRequest *r ) {
// p.incrementLength ( pp - p.getBuf() );
// }
p.safePrintf ( "<table width=100%% cellpadding=4 "
"bgcolor=#%s border=1>"
"<tr><td colspan=8 bgcolor=#%s>"
p.safePrintf ( "<table %s>"
"<tr class=hdrow><td colspan=8>"
"<center><b>CGI Parameters</b></tr></tr>"
"<tr><td><b>CGI</b></td>"
"<tr bgcolor=#%s><td><b>CGI</b></td>"
"<td><b>Page</b></td>"
"<td><b>Type</b></td>"
"<td><b>Name</b></td><td><b>Description</b></td></tr>\n",
LIGHT_BLUE, DARK_BLUE );
TABLE_STYLE , DARK_BLUE);
for ( long i = 0; i < g_parms.m_numParms; i++ ) {
Parm *parm = &g_parms.m_parms[i];
if ( !parm->m_sparm ) continue;
@ -2413,7 +2412,8 @@ bool sendPageCgiParms ( TcpSocket *s , HttpRequest *r ) {
if ( ! page ) page = "";
// print the parm
p.safePrintf ( "<tr><td><b>%s</b></td>", cgi );
p.safePrintf ( "<tr bgcolor=#%s><td><b>%s</b></td>",
LIGHT_BLUE , cgi );
p.safePrintf("<td>%s</td>",page);
p.safePrintf("<td nowrap=1>");
switch ( parm->m_type ) {
@ -2435,14 +2435,13 @@ bool sendPageCgiParms ( TcpSocket *s , HttpRequest *r ) {
}
p.safePrintf ( "</table><br><br>" );
p.safePrintf ( "<table width=100%% cellpadding=2 "
"bgcolor=#%s border=1>"
"<tr><td colspan=2 bgcolor=#%s>"
p.safePrintf ( "<table %s>"
"<tr class=hdrow><td colspan=2>"
"<center><b>Query Operators</b></td></tr>"
"<tr><td><b>Operator</b></td>"
"<td><b>Description</b>"
"</td></tr>\n",
LIGHT_BLUE, DARK_BLUE );
TABLE_STYLE );
// table of the query keywords
long n = getNumFieldCodes();
for ( long i = 0 ; i < n ; i++ ) {
@ -2452,8 +2451,9 @@ bool sendPageCgiParms ( TcpSocket *s , HttpRequest *r ) {
char *d = f->desc;
// fix table internal cell bordering
if ( d[0] == '\0' ) d = "&nbsp;";
p.safePrintf("<tr><td><b>%s</b>:</td><td>%s</td></tr>\n",
f->text,d);
p.safePrintf("<tr bgcolor=#%s>"
"<td><b>%s</b>:</td><td>%s</td></tr>\n",
LIGHT_BLUE,f->text,d);
}
p.safePrintf("</body></html>");

@ -252,14 +252,15 @@ long SpiderReply::print ( SafeBuf *sbarg ) {
long SpiderRequest::printToTable ( SafeBuf *sb , char *status ,
XmlDoc *xd ) {
XmlDoc *xd , long row ) {
sb->safePrintf("<tr>\n");
sb->safePrintf("<tr bgcolor=#%s>\n",LIGHT_BLUE);
// show elapsed time
if ( xd ) {
long long now = gettimeofdayInMilliseconds();
long long elapsed = now - xd->m_startTime;
sb->safePrintf(" <td>%li</td>\n",row);
sb->safePrintf(" <td>%llims</td>\n",elapsed);
sb->safePrintf(" <td>%li</td>\n",(long)xd->m_collnum);
}
@ -361,10 +362,11 @@ long SpiderRequest::printToTable ( SafeBuf *sb , char *status ,
long SpiderRequest::printTableHeaderSimple ( SafeBuf *sb ,
bool currentlySpidering) {
sb->safePrintf("<tr>\n");
sb->safePrintf("<tr bgcolor=#%s>\n",DARK_BLUE);
// how long its been being spidered
if ( currentlySpidering ) {
sb->safePrintf(" <td><b>#</b></td>\n");
sb->safePrintf(" <td><b>elapsed</b></td>\n");
sb->safePrintf(" <td><b>coll</b></td>\n");
}
@ -384,14 +386,15 @@ long SpiderRequest::printTableHeaderSimple ( SafeBuf *sb ,
}
long SpiderRequest::printToTableSimple ( SafeBuf *sb , char *status ,
XmlDoc *xd ) {
XmlDoc *xd , long row ) {
sb->safePrintf("<tr>\n");
sb->safePrintf("<tr bgcolor=#%s>\n",LIGHT_BLUE);
// show elapsed time
if ( xd ) {
long long now = gettimeofdayInMilliseconds();
long long elapsed = now - xd->m_startTime;
sb->safePrintf(" <td>%li</td>\n",row);
sb->safePrintf(" <td>%llims</td>\n",elapsed);
}
@ -465,10 +468,11 @@ long SpiderRequest::printToTableSimple ( SafeBuf *sb , char *status ,
long SpiderRequest::printTableHeader ( SafeBuf *sb , bool currentlySpidering) {
sb->safePrintf("<tr>\n");
sb->safePrintf("<tr bgcolor=#%s>\n",DARK_BLUE);
// how long its been being spidered
if ( currentlySpidering ) {
sb->safePrintf(" <td><b>#</b></td>\n");
sb->safePrintf(" <td><b>elapsed</b></td>\n");
sb->safePrintf(" <td><b>coll</b></td>\n");
}
@ -7309,6 +7313,8 @@ bool printList ( State11 *st ) {
SafeBuf *sbTable = &st->m_safeBuf;
// shorcuts
RdbList *list = &st->m_list;
// row count
long j = 0;
// put it in there
for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
// stop if we got enough
@ -7337,7 +7343,10 @@ bool printList ( State11 *st ) {
// get the spider rec, encapsed in the data of the doledb rec
SpiderRequest *sreq = (SpiderRequest *)rec;
// print it into sbTable
if ( ! sreq->printToTable ( sbTable,"ready",NULL))return false;
if ( ! sreq->printToTable ( sbTable,"ready",NULL,j))
return false;
// count row
j++;
}
// need to load more?
if ( st->m_count >= st->m_numRecs ||
@ -7408,21 +7417,24 @@ bool sendPage ( State11 *st ) {
// begin the table
sb.safePrintf ( "<table width=100%% border=1 cellpadding=4 "
"bgcolor=#%s>\n"
"<tr><td colspan=50 bgcolor=#%s>"
"<b>Currently Spidering on This Host</b> (%li spiders)"
sb.safePrintf ( "<table %s>\n"
"<tr><td colspan=50>"
"<center>"
"<b>Currently Spidering on This Host</b>"
//" (%li spiders)"
//" (%li locks)"
"</center>"
"</td></tr>\n" ,
LIGHT_BLUE,
DARK_BLUE,
(long)g_spiderLoop.m_numSpidersOut
TABLE_STYLE
//(long)g_spiderLoop.m_numSpidersOut
//g_spiderLoop.m_lockTable.m_numSlotsUsed);
);
// the table headers so SpiderRequest::printToTable() works
if ( ! SpiderRequest::printTableHeader ( &sb , true ) ) return false;
// shortcut
XmlDoc **docs = g_spiderLoop.m_docs;
// count # of spiders out
long j = 0;
// first print the spider recs we are spidering
for ( long i = 0 ; i < (long)MAX_SPIDERS ; i++ ) {
// get it
@ -7436,113 +7448,15 @@ bool sendPage ( State11 *st ) {
// get status
char *status = xd->m_statusMsg;
// show that
if ( ! oldsr->printToTable ( &sb , status,xd) ) return false;
if ( ! oldsr->printToTable ( &sb , status,xd,j) ) return false;
// inc count
j++;
}
// end the table
sb.safePrintf ( "</table>\n" );
sb.safePrintf ( "<br>\n" );
// begin the table
sb.safePrintf ( "<table width=100%% border=1 cellpadding=4 "
"bgcolor=#%s>\n"
"<tr><td colspan=50 bgcolor=#%s>"
"<b>Ready to Spider (doledb)(coll = "
"<font color=red><b>%s</b>"
"</font>)"
,
LIGHT_BLUE,
DARK_BLUE ,
st->m_coll );
// print time format: 7/23/1971 10:45:32
time_t nowUTC = getTimeGlobal();
struct tm *timeStruct ;
char time[256];
timeStruct = gmtime ( &nowUTC );
strftime ( time , 256 , "%b %e %T %Y UTC", timeStruct );
sb.safePrintf("</b> (current time = %s = %lu) "
"</td></tr>\n"
,time,nowUTC);
// the table headers so SpiderRequest::printToTable() works
if ( ! SpiderRequest::printTableHeader ( &sb ,false ) ) return false;
// the the doledb spider recs
char *bs = sbTable->getBufStart();
if ( bs && ! sb.safePrintf("%s",bs) ) return false;
// end the table
sb.safePrintf ( "</table>\n" );
sb.safePrintf ( "<br>\n" );
// then spider collection
//SpiderColl *sc = g_spiderCache.m_spiderColls[collnum];
SpiderColl *sc = g_spiderCache.getSpiderColl(collnum);
/////////////////
//
// PRINT WAITING TREE
//
// each row is an ip. print the next url to spider for that ip.
//
/////////////////
sb.safePrintf ( "<table width=100%% border=1 cellpadding=4 "
"bgcolor=#%s>\n"
"<tr><td colspan=50 bgcolor=#%s>"
"<b>IPs Waiting for Selection Scan (coll = "
"<font color=red><b>%s</b>"
"</font>)"
,
LIGHT_BLUE,
DARK_BLUE ,
st->m_coll );
// print time format: 7/23/1971 10:45:32
long long timems = gettimeofdayInMillisecondsGlobal();
sb.safePrintf("</b> (current time = %llu)(totalcount=%li)"
"(waittablecount=%li)</td></tr>\n",
timems,
sc->m_waitingTree.getNumUsedNodes(),
sc->m_waitingTable.getNumUsedSlots());
sb.safePrintf("<tr>");
sb.safePrintf("<td><b>spidertime (MS)</b></td>\n");
sb.safePrintf("<td><b>firstip</b></td>\n");
sb.safePrintf("</tr>\n");
// the the waiting tree
long node = sc->m_waitingTree.getFirstNode();
long count = 0;
for ( ; node >= 0 ; node = sc->m_waitingTree.getNextNode(node) ) {
// breathe
QUICKPOLL(MAX_NICENESS);
// get key
key_t *key = (key_t *)sc->m_waitingTree.getKey(node);
// get ip from that
long firstIp = (key->n0) & 0xffffffff;
// get the time
unsigned long long spiderTimeMS = key->n1;
// shift upp
spiderTimeMS <<= 32;
// or in
spiderTimeMS |= (key->n0 >> 32);
// get the rest of the data
sb.safePrintf("<tr>"
"<td>%llu</td>"
"<td>%s</td>"
"</tr>\n",
spiderTimeMS,
iptoa(firstIp));
// stop after 20
if ( ++count == 20 ) break;
}
// ...
if ( count )
sb.safePrintf("<tr><td colspan=10>...</td></tr>\n");
// end the table
sb.safePrintf ( "</table>\n" );
sb.safePrintf ( "<br>\n" );
/*
if ( g_spiderCache.m_numMsgSamples > 0 ) {
sb.safePrintf (
@ -7642,6 +7556,11 @@ bool sendPage ( State11 *st ) {
*/
// then spider collection
//SpiderColl *sc = g_spiderCache.m_spiderColls[collnum];
SpiderColl *sc = g_spiderCache.getSpiderColl(collnum);
//
// spiderdb rec stats, from scanning spiderdb
//
@ -7697,12 +7616,19 @@ bool sendPage ( State11 *st ) {
(double)(sampleOld);
}
sb.safePrintf(
"<style>"
".poo { background-color:#%s;}\n"
"</style>\n" ,
LIGHT_BLUE );
sb.safePrintf (
"<table cellpadding=4 width=100%% bgcolor=#%s border=1>"
"<table %s>"
"<tr>"
"<td colspan=7 bgcolor=#%s>"
"<td colspan=7>"
"<center><b>Spider Stats</b></td></tr>\n"
"<tr><td>"
"<tr bgcolor=#%s><td>"
"</td><td><b>Total</b></td>"
"<td><b>Total New</b></td>"
"<td><b>Total Old</b></td>"
@ -7711,20 +7637,21 @@ bool sendPage ( State11 *st ) {
"<td><b>Sample Old</b></b>"
"</td></tr>"
"<tr><td><b>Total Spiders</n>"
"<tr class=poo><td><b>Total Spiders</n>"
"</td><td>%lli</td><td>%lli</td><td>%lli</td>\n"
"</td><td>%li</td><td>%li</td><td>%li</td></tr>\n"
//"<tr><td><b>Successful Spiders</n>"
//"<tr class=poo><td><b>Successful Spiders</n>"
//"</td><td>%lli</td><td>%lli</td><td>%lli</td>\n"
//"</td><td>%li</td><td>%li</td><td>%li</td></tr>\n"
//"<tr><td><b>Failed Spiders</n>"
//"<tr class=poo><td><b>Failed Spiders</n>"
//"</td><td>%lli</td><td>%lli</td><td>%lli</td>\n"
//"</td><td>%li</td><td>%li</td><td>%li</td></tr>\n"
"<tr><td><b>Success Rate</b>"
"<tr class=poo><td><b>Success Rate</b>"
"</td><td>%.02f%%</td><td>%.02f%%</td>"
"</td><td>%.02f%%</td><td>%.02f%%</td>"
"</td><td>%.02f%%</td><td>%.02f%%</td></tr>",
LIGHT_BLUE, DARK_BLUE,
TABLE_STYLE,
DARK_BLUE,
totalPoints,
totalNew,
totalOld,
@ -7772,7 +7699,7 @@ bool sendPage ( State11 *st ) {
g_stats.m_allErrorsOld[i] == 0 &&
bucketsNew[i] == 0 && bucketsOld[i] == 0 ) continue;
sb.safePrintf (
"<tr><td><b>%s</b></td>"
"<tr bgcolor=#%s><td><b>%s</b></td>"
"<td>%lli</td>"
"<td>%lli</td>"
"<td>%lli</td>"
@ -7780,6 +7707,7 @@ bool sendPage ( State11 *st ) {
"<td>%li</td>"
"<td>%li</td>"
"</tr>\n" ,
LIGHT_BLUE,
mstrerror(i),
g_stats.m_allErrorsNew[i] +
g_stats.m_allErrorsOld[i],
@ -7790,7 +7718,7 @@ bool sendPage ( State11 *st ) {
bucketsOld[i] );
}
sb.safePrintf ( "</table><br><br>\n" );
sb.safePrintf ( "</table><br>\n" );
@ -7799,48 +7727,48 @@ bool sendPage ( State11 *st ) {
sb.safePrintf (
"<table width=100%% bgcolor=#%s "
"cellpadding=4 border=1>"
"<tr>"
"<tr class=poo>"
"<td colspan=2 bgcolor=#%s>"
"<b>Field descriptions</b>"
"</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>hits</td><td>The number of attempts that were "
"made by the spider to read a url from the spider "
"queue cache.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>misses</td><td>The number of those attempts that "
"failed to get a url to spider.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>cached</td><td>The number of urls that are "
"currently in the spider queue cache.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>water</td><td>The number of urls that were in the "
"spider queue cache at any one time, since the start "
"of the last disk scan.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>kicked</td><td>The number of urls that were "
"replaced in the spider queue cache with urls loaded "
"from disk, since the start of the last disk scan.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>added</td><td>The number of urls that were added "
"to the spider queue cache since the start of the last "
"disk scan. After a document is spidered its url "
"if often added again to the spider queue cache.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>attempted</td><td>The number of urls that "
"Gigablast attempted to add to the spider queue cache "
"since the start of the last disk scan. In "
@ -7852,42 +7780,42 @@ bool sendPage ( State11 *st ) {
"spider time.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>nl</td><td>This is 1 iff Gigablast currently "
"needs to reload the spider queue cache from disk.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>rnl</td><td>This is 1 iff Gigablast currently "
"really needs to reload the spider queue cache from "
"disk.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>more</td><td>This is 1 iff there are urls on "
"the disk that are not in the spider queue cache.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>loading</td><td>This is 1 iff Gigablast is "
"currently loading this spider cache queue from "
"disk.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>scanned</td><td>The number of bytes that were "
"read from disk since the start of the last disk "
"scan.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>reads</td><td>The number of disk read "
"operations since the start of the last disk "
"scan.</td>"
"</tr>\n"
"<tr>"
"<tr class=poo>"
"<td>elapsed</td><td>The time in seconds that has "
"elapsed since the start or end of the last disk "
"scan, depending on if a scan is currently in "
@ -7900,6 +7828,106 @@ bool sendPage ( State11 *st ) {
DARK_BLUE );
*/
/////
//
// READY TO SPIDER table
//
/////
// begin the table
sb.safePrintf ( "<table %s>\n"
"<tr><td colspan=50>"
"<b>URLs Ready to Spider for collection "
"<font color=red><b>%s</b>"
"</font>"
,
TABLE_STYLE,
st->m_coll );
// print time format: 7/23/1971 10:45:32
time_t nowUTC = getTimeGlobal();
struct tm *timeStruct ;
char time[256];
timeStruct = gmtime ( &nowUTC );
strftime ( time , 256 , "%b %e %T %Y UTC", timeStruct );
sb.safePrintf("</b>" // (current time = %s = %lu) "
"</td></tr>\n"
//,time,nowUTC
);
// the table headers so SpiderRequest::printToTable() works
if ( ! SpiderRequest::printTableHeader ( &sb ,false ) ) return false;
// the the doledb spider recs
char *bs = sbTable->getBufStart();
if ( bs && ! sb.safePrintf("%s",bs) ) return false;
// end the table
sb.safePrintf ( "</table>\n" );
sb.safePrintf ( "<br>\n" );
/////////////////
//
// PRINT WAITING TREE
//
// each row is an ip. print the next url to spider for that ip.
//
/////////////////
sb.safePrintf ( "<table %s>\n"
"<tr><td colspan=50>"
"<b>IPs Waiting for Selection Scan for collection "
"<font color=red><b>%s</b>"
"</font>"
,
TABLE_STYLE,
st->m_coll );
// print time format: 7/23/1971 10:45:32
long long timems = gettimeofdayInMillisecondsGlobal();
sb.safePrintf("</b> (current time = %llu)(totalcount=%li)"
"(waittablecount=%li)</td></tr>\n",
timems,
sc->m_waitingTree.getNumUsedNodes(),
sc->m_waitingTable.getNumUsedSlots());
sb.safePrintf("<tr bgcolor=#%s>",DARK_BLUE);
sb.safePrintf("<td><b>spidertime (MS)</b></td>\n");
sb.safePrintf("<td><b>firstip</b></td>\n");
sb.safePrintf("</tr>\n");
// the the waiting tree
long node = sc->m_waitingTree.getFirstNode();
long count = 0;
for ( ; node >= 0 ; node = sc->m_waitingTree.getNextNode(node) ) {
// breathe
QUICKPOLL(MAX_NICENESS);
// get key
key_t *key = (key_t *)sc->m_waitingTree.getKey(node);
// get ip from that
long firstIp = (key->n0) & 0xffffffff;
// get the time
unsigned long long spiderTimeMS = key->n1;
// shift upp
spiderTimeMS <<= 32;
// or in
spiderTimeMS |= (key->n0 >> 32);
// get the rest of the data
sb.safePrintf("<tr bgcolor=#%s>"
"<td>%llu</td>"
"<td>%s</td>"
"</tr>\n",
LIGHT_BLUE,
spiderTimeMS,
iptoa(firstIp));
// stop after 20
if ( ++count == 20 ) break;
}
// ...
if ( count )
sb.safePrintf("<tr bgcolor=#%s>"
"<td colspan=10>...</td></tr>\n",
LIGHT_BLUE);
// end the table
sb.safePrintf ( "</table>\n" );
sb.safePrintf ( "<br>\n" );
// get the socket
TcpSocket *s = st->m_socket;
// then we can nuke the state

@ -711,10 +711,10 @@ class SpiderRequest {
long print( class SafeBuf *sb );
long printToTable ( SafeBuf *sb , char *status ,
class XmlDoc *xd ) ;
class XmlDoc *xd , long row ) ;
// for diffbot...
long printToTableSimple ( SafeBuf *sb , char *status ,
class XmlDoc *xd ) ;
class XmlDoc *xd , long row ) ;
static long printTableHeader ( SafeBuf *sb , bool currentlSpidering ) ;
static long printTableHeaderSimple ( SafeBuf *sb ,
bool currentlSpidering ) ;