mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-04-18 14:48:48 -04:00
Made spider status ¤defines into strongly typed enum
This commit is contained in:
parent
0e9cec3133
commit
d07ce5c4df
@ -706,7 +706,7 @@ bool Collectiondb::resetColl2(collnum_t oldCollnum, collnum_t newCollnum) {
|
||||
cr->m_spiderColl = NULL;
|
||||
}
|
||||
|
||||
cr->m_spiderStatus = SP_INITIALIZING; // this is 0
|
||||
cr->m_spiderStatus = spider_status_t::SP_INITIALIZING; // this is 0
|
||||
//cr->m_spiderStatusMsg = NULL;
|
||||
|
||||
// so XmlDoc.cpp can detect if the collection was reset since it
|
||||
@ -768,7 +768,7 @@ bool Collectiondb::resetColl2(collnum_t oldCollnum, collnum_t newCollnum) {
|
||||
g_linkdb.getRdb()->deleteColl ( oldCollnum , newCollnum );
|
||||
|
||||
// reset crawl status too!
|
||||
cr->m_spiderStatus = SP_INITIALIZING;
|
||||
cr->m_spiderStatus = spider_status_t::SP_INITIALIZING;
|
||||
|
||||
// . set m_recs[oldCollnum] to NULL and remove from hash table
|
||||
// . do after calls to deleteColl() above so it wont crash
|
||||
@ -958,7 +958,7 @@ CollectionRec::CollectionRec() {
|
||||
m_overflow = 0x12345678;
|
||||
m_overflow2 = 0x12345678;
|
||||
// the spiders are currently uninhibited i guess
|
||||
m_spiderStatus = SP_INITIALIZING; // this is 0
|
||||
m_spiderStatus = spider_status_t::SP_INITIALIZING; // this is 0
|
||||
// inits for sortbydatetable
|
||||
m_msg5 = NULL;
|
||||
// JAB - track which regex parsers have been initialized
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <atomic>
|
||||
#include "SafeBuf.h"
|
||||
#include "rdbid_t.h"
|
||||
#include "spider_status_t.h"
|
||||
#include "GbMutex.h"
|
||||
|
||||
|
||||
@ -236,7 +237,7 @@ public:
|
||||
|
||||
int32_t m_maxQueryTerms;
|
||||
|
||||
char m_spiderStatus;
|
||||
spider_status_t m_spiderStatus;
|
||||
|
||||
//ranking settings
|
||||
float m_sameLangWeight;
|
||||
|
@ -841,7 +841,7 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
|
||||
// show stats
|
||||
//
|
||||
const char *crawlMsg;
|
||||
int32_t crawlStatus = -1;
|
||||
spider_status_t crawlStatus;
|
||||
getSpiderStatusMsg ( cr , &crawlMsg, &crawlStatus );
|
||||
|
||||
sb.safePrintf(
|
||||
@ -856,7 +856,7 @@ bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
|
||||
"<td><b>Crawl Status Msg:</td>"
|
||||
"<td>%s</td>"
|
||||
"</tr>"
|
||||
, crawlStatus
|
||||
, (int)crawlStatus
|
||||
, crawlMsg);
|
||||
|
||||
// print link to embed the code in their own site
|
||||
|
@ -35,7 +35,7 @@
|
||||
|
||||
bool printCrawlDetails2 (SafeBuf *sb , CollectionRec *cx , char format ) {
|
||||
const char *crawlMsg;
|
||||
int32_t crawlStatus = -1;
|
||||
spider_status_t crawlStatus;
|
||||
getSpiderStatusMsg ( cx , &crawlMsg, &crawlStatus );
|
||||
|
||||
if ( format == FORMAT_JSON ) {
|
||||
@ -43,7 +43,7 @@ bool printCrawlDetails2 (SafeBuf *sb , CollectionRec *cx , char format ) {
|
||||
"\"response\":{\n"
|
||||
"\t\"statusCode\":%" PRId32",\n"
|
||||
"\t\"statusMsg\":\"%s\",\n"
|
||||
, crawlStatus, crawlMsg);
|
||||
, (int)crawlStatus, crawlMsg);
|
||||
sb->safePrintf("\t\"processStartTime\":%" PRId64",\n", (g_process.m_processStartTime / 1000));
|
||||
sb->safePrintf("\t\"currentTime\":%" PRIu32"\n", (uint32_t)getTimeGlobal() );
|
||||
sb->safePrintf("\t}\n");
|
||||
@ -51,7 +51,7 @@ bool printCrawlDetails2 (SafeBuf *sb , CollectionRec *cx , char format ) {
|
||||
}
|
||||
|
||||
if ( format == FORMAT_XML ) {
|
||||
sb->safePrintf("<response>\n\t<statusCode>%" PRId32"</statusCode>\n", crawlStatus);
|
||||
sb->safePrintf("<response>\n\t<statusCode>%" PRId32"</statusCode>\n", (int)crawlStatus);
|
||||
sb->safePrintf("\t<statusMsg><![CDATA[%s]]></statusMsg>\n", crawlMsg);
|
||||
sb->safePrintf("\t<currentTime>%" PRIu32"</currentTime>\n", (uint32_t)getTimeGlobal() );
|
||||
sb->safePrintf("\t<currentTimeUTC>%" PRIu32"</currentTimeUTC>\n", (uint32_t)getTimeGlobal() );
|
||||
|
@ -224,10 +224,10 @@ static bool printList ( State11 *st ) {
|
||||
|
||||
static bool generatePageHTML(CollectionRec *cr, SafeBuf *sb, const SafeBuf *doledbbuf) {
|
||||
// print reason why spiders are not active for this collection
|
||||
int32_t tmp2;
|
||||
spider_status_t tmp2;
|
||||
const char *crawlMsg;
|
||||
getSpiderStatusMsg ( cr , &crawlMsg, &tmp2 );
|
||||
if ( crawlMsg && tmp2 != SP_INITIALIZING )
|
||||
if ( crawlMsg && tmp2 != spider_status_t::SP_INITIALIZING )
|
||||
sb->safePrintf("<table cellpadding=5 style=\"max-width:600px\" border=0>"
|
||||
"<tr>"
|
||||
"<td>"
|
||||
@ -464,11 +464,11 @@ static bool generatePageHTML(CollectionRec *cr, SafeBuf *sb, const SafeBuf *dole
|
||||
static bool generatePageJSON(CollectionRec *cr, SafeBuf *sb, const SafeBuf *doledbbuf) {
|
||||
sb->safePrintf("{\n\"response\": {\n");
|
||||
|
||||
int32_t crawlStatus;
|
||||
spider_status_t crawlStatus;
|
||||
const char *crawlMsg;
|
||||
getSpiderStatusMsg ( cr , &crawlMsg , &crawlStatus );
|
||||
|
||||
sb->safePrintf("\t\"statusCode\": %d,\n", crawlStatus);
|
||||
sb->safePrintf("\t\"statusCode\": %d,\n", (int)crawlStatus);
|
||||
sb->safePrintf("\t\"statusMsg\": \"%s\",\n", crawlMsg);
|
||||
sb->safePrintf("\t\"spiderCount\": %d,\n", g_spiderLoop.getNumSpidersOut());
|
||||
|
||||
|
26
Spider.cpp
26
Spider.cpp
@ -2785,34 +2785,34 @@ void dedupSpiderdbList ( RdbList *list ) {
|
||||
|
||||
|
||||
|
||||
void getSpiderStatusMsg(const CollectionRec *cx, const char **msg, int32_t *status) {
|
||||
void getSpiderStatusMsg(const CollectionRec *cx, const char **msg, spider_status_t *status) {
|
||||
if ( ! g_conf.m_spideringEnabled ) {
|
||||
*status = SP_ADMIN_PAUSED;
|
||||
*status = spider_status_t::SP_ADMIN_PAUSED;
|
||||
*msg = "Spidering disabled in master controls. You can turn it back on there.";
|
||||
return;
|
||||
}
|
||||
|
||||
if ( g_conf.m_readOnlyMode ) {
|
||||
*status = SP_ADMIN_PAUSED;
|
||||
*status = spider_status_t::SP_ADMIN_PAUSED;
|
||||
*msg = "In read-only mode. Spidering off.";
|
||||
return;
|
||||
}
|
||||
|
||||
if ( g_dailyMerge.m_mergeMode ) {
|
||||
*status = SP_ADMIN_PAUSED;
|
||||
*status = spider_status_t::SP_ADMIN_PAUSED;
|
||||
*msg = "Daily merge engaged, spidering paused.";
|
||||
return;
|
||||
}
|
||||
|
||||
if ( g_repairMode ) {
|
||||
*status = SP_ADMIN_PAUSED;
|
||||
*status = spider_status_t::SP_ADMIN_PAUSED;
|
||||
*msg = "In repair mode, spidering paused.";
|
||||
return;
|
||||
}
|
||||
|
||||
// do not spider until collections/parms in sync with host #0
|
||||
if ( ! g_parms.inSyncWithHost0() ) {
|
||||
*status = SP_ADMIN_PAUSED;
|
||||
*status = spider_status_t::SP_ADMIN_PAUSED;
|
||||
*msg = "Parms not in sync with host #0, spidering paused";
|
||||
return;
|
||||
}
|
||||
@ -2820,25 +2820,25 @@ void getSpiderStatusMsg(const CollectionRec *cx, const char **msg, int32_t *stat
|
||||
// don't spider if not all hosts are up, or they do not all
|
||||
// have the same hosts.conf.
|
||||
if ( g_hostdb.hostsConfInDisagreement() ) {
|
||||
*status = SP_ADMIN_PAUSED;
|
||||
*status = spider_status_t::SP_ADMIN_PAUSED;
|
||||
*msg = "Hosts.conf discrepancy, spidering paused.";
|
||||
return;
|
||||
}
|
||||
|
||||
if ( ! cx->m_spideringEnabled ) {
|
||||
*status = SP_PAUSED;
|
||||
*status = spider_status_t::SP_PAUSED;
|
||||
*msg = "Spidering disabled in spider controls.";
|
||||
return;
|
||||
}
|
||||
|
||||
if ( cx->m_spiderStatus == SP_INITIALIZING ) {
|
||||
*status = SP_INITIALIZING;
|
||||
if ( cx->m_spiderStatus == spider_status_t::SP_INITIALIZING ) {
|
||||
*status = spider_status_t::SP_INITIALIZING;
|
||||
*msg = "Job is initializing.";
|
||||
return;
|
||||
}
|
||||
|
||||
if ( ! g_conf.m_spideringEnabled ) {
|
||||
*status = SP_ADMIN_PAUSED;
|
||||
*status = spider_status_t::SP_ADMIN_PAUSED;
|
||||
*msg = "All crawling temporarily paused by root administrator for maintenance.";
|
||||
return;
|
||||
}
|
||||
@ -2847,14 +2847,14 @@ void getSpiderStatusMsg(const CollectionRec *cx, const char **msg, int32_t *stat
|
||||
// host's counts tallied into it, which could make a difference on
|
||||
// whether we have exceed a maxtocrawl limit or some such, so wait...
|
||||
if (g_hostdb.hasDeadHost()) {
|
||||
*status = SP_ADMIN_PAUSED;
|
||||
*status = spider_status_t::SP_ADMIN_PAUSED;
|
||||
*msg = "All crawling temporarily paused because a shard is down.";
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// otherwise in progress?
|
||||
*status = SP_INPROGRESS;
|
||||
*status = spider_status_t::SP_INPROGRESS;
|
||||
*msg = "Spider is in progress.";
|
||||
}
|
||||
|
||||
|
15
Spider.h
15
Spider.h
@ -8,6 +8,7 @@
|
||||
#include "Rdb.h"
|
||||
#include "Titledb.h" //DOCID_MASK
|
||||
#include "hash.h"
|
||||
#include "spider_status_t.h"
|
||||
|
||||
|
||||
class RdbList;
|
||||
@ -35,20 +36,8 @@ class SafeBuf;
|
||||
// we reduce this below if the spiderdb is smaller.
|
||||
#define MAX_WINNER_NODES 2000
|
||||
|
||||
// . values for CollectionRec::m_spiderStatus
|
||||
// . reasons why crawl is not happening
|
||||
#define SP_INITIALIZING 0
|
||||
//#define SP_UNUSED_1 1
|
||||
//#define SP_UNUSED_2 2
|
||||
//#define SP_UNUSED_3 3
|
||||
//#define SP_UNUSED_4 4
|
||||
//#define SP_UNUSED_5 5
|
||||
#define SP_PAUSED 6 // user paused spider
|
||||
#define SP_INPROGRESS 7 // it is going on!
|
||||
#define SP_ADMIN_PAUSED 8 // g_conf.m_spideringEnabled = false
|
||||
//#define SP_UNUSED_9 9
|
||||
|
||||
void getSpiderStatusMsg(const CollectionRec *cx, const char **msg, int32_t *status);
|
||||
void getSpiderStatusMsg(const CollectionRec *cx, const char **msg, spider_status_t *status);
|
||||
|
||||
|
||||
|
||||
|
@ -907,7 +907,7 @@ skipDoledbRec:
|
||||
}
|
||||
|
||||
// reset reason why crawl is not running, because we basically are now
|
||||
cr->m_spiderStatus = SP_INPROGRESS; // this is 7
|
||||
cr->m_spiderStatus = spider_status_t::SP_INPROGRESS;
|
||||
|
||||
// be sure to save state so we do not re-send emails
|
||||
cr->setNeedsSave();
|
||||
|
14
spider_status_t.h
Normal file
14
spider_status_t.h
Normal file
@ -0,0 +1,14 @@
|
||||
#ifndef COLLECTION_SPIDER_STATUS_T_H_
|
||||
#define COLLECTION_SPIDER_STATUS_T_H_
|
||||
|
||||
// . values for CollectionRec::m_spiderStatus
|
||||
// . reasons why crawl is not happening
|
||||
enum class spider_status_t : char {
|
||||
SP_INITIALIZING = 0,
|
||||
SP_PAUSED = 6, // user paused spider
|
||||
SP_INPROGRESS = 7, // it is going on!
|
||||
SP_ADMIN_PAUSED = 8, // g_conf.m_spideringEnabled = false
|
||||
};
|
||||
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user