forked from Mirrors/privacore-open-source-search-engine
fix pagecrawlbot.cpp to support &c=token-name.
cleanup mem at process exit better.
This commit is contained in:
@ -1465,6 +1465,13 @@ void CollectionRec::reset() {
|
||||
Rdb *rdb = g_process.m_rdbs[i];
|
||||
rdb->resetBase ( m_collnum );
|
||||
}
|
||||
|
||||
for ( long i = 0 ; i < g_process.m_numRdbs ; i++ ) {
|
||||
RdbBase *base = m_bases[i];
|
||||
if ( ! base ) continue;
|
||||
mdelete (base, sizeof(RdbBase), "Rdb Coll");
|
||||
delete (base);
|
||||
}
|
||||
}
|
||||
|
||||
CollectionRec *g_cr = NULL;
|
||||
|
@ -1285,6 +1285,10 @@ bool CountryCode::loadHashTable(void) {
|
||||
return(s_catToCountry.load(g_hostdb.m_dir, "catcountry.dat"));
|
||||
}
|
||||
|
||||
void CountryCode::reset ( ) {
|
||||
s_catToCountry.reset();
|
||||
}
|
||||
|
||||
int CountryCode::getNumCodes(void) {
|
||||
return(s_numCountryCodes);
|
||||
}
|
||||
|
@ -25,6 +25,7 @@ class CountryCode {
|
||||
uint8_t getLanguageFromDMOZ(long catid);
|
||||
int createHashTable(void);
|
||||
bool loadHashTable(void);
|
||||
void reset();
|
||||
long getNumEntries(void);
|
||||
void debugDumpNumbers(void);
|
||||
uint64_t getLanguagesWritten(int index);
|
||||
|
@ -1537,6 +1537,29 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
|
||||
// . put in xml or json if format=xml or format=json or
|
||||
// xml=1 or json=1 ...
|
||||
char fmt = FMT_JSON;
|
||||
|
||||
// token is always required. get from json or html form input
|
||||
//char *token = getInputString ( "token" );
|
||||
char *token = hr->getString("token");
|
||||
char *name = hr->getString("name");
|
||||
|
||||
// . try getting token-name from ?c=
|
||||
// . the name of the collection is encoded as <token>-<crawlname>
|
||||
char *c = hr->getString("c");
|
||||
char tmp[MAX_COLL_LEN+100];
|
||||
if ( ! token && c ) {
|
||||
strncpy ( tmp , c , MAX_COLL_LEN );
|
||||
token = tmp;
|
||||
name = strstr(tmp,"-");
|
||||
if ( name ) {
|
||||
*name = '\0';
|
||||
name++;
|
||||
}
|
||||
// change default formatting to html
|
||||
fmt = FMT_HTML;
|
||||
}
|
||||
|
||||
|
||||
char *fs = hr->getString("format",NULL,NULL);
|
||||
// give john a json api
|
||||
if ( fs && strcmp(fs,"html") == 0 ) fmt = FMT_HTML;
|
||||
@ -1545,9 +1568,7 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
|
||||
// if we got json as input, give it as output
|
||||
//if ( JS.getFirstItem() ) fmt = FMT_JSON;
|
||||
|
||||
// token is always required. get from json or html form input
|
||||
//char *token = getInputString ( "token" );
|
||||
char *token = hr->getString("token");
|
||||
|
||||
|
||||
if ( ! token && fmt == FMT_JSON ) { // (cast==0|| fmt == FMT_JSON ) ) {
|
||||
char *msg = "invalid token";
|
||||
@ -1607,8 +1628,6 @@ bool sendPageCrawlbot ( TcpSocket *socket , HttpRequest *hr ) {
|
||||
bool restartColl = hr->hasField("restart");
|
||||
|
||||
|
||||
char *name = hr->getString("name");
|
||||
|
||||
//if ( delColl && ! && cast == 0 ) {
|
||||
// log("crawlbot: no collection found to delete.");
|
||||
// char *msg = "Could not find crawl to delete.";
|
||||
|
@ -414,7 +414,7 @@ bool Process::init ( ) {
|
||||
//m_rdbs[m_numRdbs++] = g_tfndb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_titledb.getRdb ();
|
||||
//m_rdbs[m_numRdbs++] = g_revdb.getRdb ();
|
||||
//m_rdbs[m_numRdbs++] = g_sectiondb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_sectiondb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_posdb.getRdb ();
|
||||
//m_rdbs[m_numRdbs++] = g_datedb.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_spiderdb.getRdb ();
|
||||
@ -434,7 +434,7 @@ bool Process::init ( ) {
|
||||
//m_rdbs[m_numRdbs++] = g_tfndb2.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_titledb2.getRdb ();
|
||||
//m_rdbs[m_numRdbs++] = g_revdb2.getRdb ();
|
||||
//m_rdbs[m_numRdbs++] = g_sectiondb2.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_sectiondb2.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_posdb2.getRdb ();
|
||||
//m_rdbs[m_numRdbs++] = g_datedb2.getRdb ();
|
||||
m_rdbs[m_numRdbs++] = g_spiderdb2.getRdb ();
|
||||
@ -1782,6 +1782,8 @@ void Process::resetAll ( ) {
|
||||
|
||||
g_wiktionary.reset();
|
||||
|
||||
g_countryCode.reset();
|
||||
|
||||
s_clusterdbQuickCache.reset();
|
||||
s_hammerCache.reset();
|
||||
s_table32.reset();
|
||||
@ -1835,7 +1837,7 @@ void Process::resetPageCaches ( ) {
|
||||
//g_datedb .getDiskPageCache()->reset();
|
||||
g_linkdb .getDiskPageCache()->reset();
|
||||
g_titledb .getDiskPageCache()->reset();
|
||||
//g_sectiondb .getDiskPageCache()->reset();
|
||||
g_sectiondb .getDiskPageCache()->reset();
|
||||
g_tagdb .getDiskPageCache()->reset();
|
||||
g_spiderdb .getDiskPageCache()->reset();
|
||||
//g_tfndb .getDiskPageCache()->reset();
|
||||
|
@ -263,7 +263,11 @@ long SpiderRequest::printToTable ( SafeBuf *sb , char *status ,
|
||||
long long elapsed = now - xd->m_startTime;
|
||||
sb->safePrintf(" <td>%li</td>\n",row);
|
||||
sb->safePrintf(" <td>%llims</td>\n",elapsed);
|
||||
sb->safePrintf(" <td>%li</td>\n",(long)xd->m_collnum);
|
||||
collnum_t collnum = xd->m_collnum;
|
||||
CollectionRec *cr = g_collectiondb.getRec(collnum);
|
||||
char *cs = ""; if ( cr ) cs = cr->m_coll;
|
||||
sb->safePrintf(" <td><a href=/crawlbot?c=%s>%li</a></td>\n",
|
||||
cs,(long)collnum);
|
||||
}
|
||||
|
||||
sb->safePrintf(" <td><nobr>");
|
||||
|
Reference in New Issue
Block a user