fix pagecrawlbot.cpp to support &c=token-name.

cleanup mem at process exit better.
This commit is contained in:
Matt Wells
2014-01-22 23:40:38 -08:00
parent df063dbdf2
commit bc35b7d0ec
6 changed files with 46 additions and 9 deletions

@ -1465,6 +1465,13 @@ void CollectionRec::reset() {
Rdb *rdb = g_process.m_rdbs[i];
rdb->resetBase ( m_collnum );
}
for ( long i = 0 ; i < g_process.m_numRdbs ; i++ ) {
RdbBase *base = m_bases[i];
if ( ! base ) continue;
mdelete (base, sizeof(RdbBase), "Rdb Coll");
delete (base);
}
}
CollectionRec *g_cr = NULL;

@ -1285,6 +1285,10 @@ bool CountryCode::loadHashTable(void) {
return(s_catToCountry.load(g_hostdb.m_dir, "catcountry.dat"));
}
void CountryCode::reset ( ) {
s_catToCountry.reset();
}
int CountryCode::getNumCodes(void) {
return(s_numCountryCodes);
}

@ -25,6 +25,7 @@ class CountryCode {
uint8_t getLanguageFromDMOZ(long catid);
int createHashTable(void);
bool loadHashTable(void);
void reset();
long getNumEntries(void);
void debugDumpNumbers(void);
uint64_t getLanguagesWritten(int index);

@ -1537,6 +1537,29 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
// . put in xml or json if format=xml or format=json or
// xml=1 or json=1 ...
char fmt = FMT_JSON;
// token is always required. get from json or html form input
//char *token = getInputString ( "token" );
char *token = hr->getString("token");
char *name = hr->getString("name");
// . try getting token-name from ?c=
// . the name of the collection is encoded as <token>-<crawlname>
char *c = hr->getString("c");
char tmp[MAX_COLL_LEN+100];
if ( ! token && c ) {
strncpy ( tmp , c , MAX_COLL_LEN );
token = tmp;
name = strstr(tmp,"-");
if ( name ) {
*name = '\0';
name++;
}
// change default formatting to html
fmt = FMT_HTML;
}
char *fs = hr->getString("format",NULL,NULL);
// give john a json api
if ( fs && strcmp(fs,"html") == 0 ) fmt = FMT_HTML;
@ -1545,9 +1568,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
// if we got json as input, give it as output
//if ( JS.getFirstItem() ) fmt = FMT_JSON;
// token is always required. get from json or html form input
//char *token = getInputString ( "token" );
char *token = hr->getString("token");
if ( ! token && fmt == FMT_JSON ) { // (cast==0|| fmt == FMT_JSON ) ) {
char *msg = "invalid token";
@ -1607,8 +1628,6 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
bool restartColl = hr->hasField("restart");
char *name = hr->getString("name");
//if ( delColl && ! && cast == 0 ) {
// log("crawlbot: no collection found to delete.");
// char *msg = "Could not find crawl to delete.";

@ -414,7 +414,7 @@ bool Process::init ( ) {
//m_rdbs[m_numRdbs++] = g_tfndb.getRdb ();
m_rdbs[m_numRdbs++] = g_titledb.getRdb ();
//m_rdbs[m_numRdbs++] = g_revdb.getRdb ();
//m_rdbs[m_numRdbs++] = g_sectiondb.getRdb ();
m_rdbs[m_numRdbs++] = g_sectiondb.getRdb ();
m_rdbs[m_numRdbs++] = g_posdb.getRdb ();
//m_rdbs[m_numRdbs++] = g_datedb.getRdb ();
m_rdbs[m_numRdbs++] = g_spiderdb.getRdb ();
@ -434,7 +434,7 @@ bool Process::init ( ) {
//m_rdbs[m_numRdbs++] = g_tfndb2.getRdb ();
m_rdbs[m_numRdbs++] = g_titledb2.getRdb ();
//m_rdbs[m_numRdbs++] = g_revdb2.getRdb ();
//m_rdbs[m_numRdbs++] = g_sectiondb2.getRdb ();
m_rdbs[m_numRdbs++] = g_sectiondb2.getRdb ();
m_rdbs[m_numRdbs++] = g_posdb2.getRdb ();
//m_rdbs[m_numRdbs++] = g_datedb2.getRdb ();
m_rdbs[m_numRdbs++] = g_spiderdb2.getRdb ();
@ -1782,6 +1782,8 @@ void Process::resetAll ( ) {
g_wiktionary.reset();
g_countryCode.reset();
s_clusterdbQuickCache.reset();
s_hammerCache.reset();
s_table32.reset();
@ -1835,7 +1837,7 @@ void Process::resetPageCaches ( ) {
//g_datedb .getDiskPageCache()->reset();
g_linkdb .getDiskPageCache()->reset();
g_titledb .getDiskPageCache()->reset();
//g_sectiondb .getDiskPageCache()->reset();
g_sectiondb .getDiskPageCache()->reset();
g_tagdb .getDiskPageCache()->reset();
g_spiderdb .getDiskPageCache()->reset();
//g_tfndb .getDiskPageCache()->reset();

@ -263,7 +263,11 @@ long SpiderRequest::printToTable ( SafeBuf *sb , char *status ,
long long elapsed = now - xd->m_startTime;
sb->safePrintf(" <td>%li</td>\n",row);
sb->safePrintf(" <td>%llims</td>\n",elapsed);
sb->safePrintf(" <td>%li</td>\n",(long)xd->m_collnum);
collnum_t collnum = xd->m_collnum;
CollectionRec *cr = g_collectiondb.getRec(collnum);
char *cs = ""; if ( cr ) cs = cr->m_coll;
sb->safePrintf(" <td><a href=/crawlbot?c=%s>%li</a></td>\n",
cs,(long)collnum);
}
sb->safePrintf(" <td><nobr>");