1ec5c427b9
Only rebuilds spiderdb with the first-URL of each titlerec. The links in documents are not added. Useful for cleaning out obsolete/useless/unwated links in spiderdb.
161 lines
3.7 KiB
C++
161 lines
3.7 KiB
C++
// Copyright Gigablast, Inc. Mar 2007
|
|
|
|
#ifndef GB_REPAIR_H
|
|
#define GB_REPAIR_H
|
|
|
|
#include "RdbList.h"
|
|
#include "Msg5.h"
|
|
#include "repair_mode.h"
|
|
|
|
#define SR_BUFSIZE 2048
|
|
|
|
|
|
class XmlDoc;
|
|
class CollectionRec;
|
|
|
|
|
|
class Repair {
|
|
public:
|
|
|
|
Repair();
|
|
|
|
// is the scan active and adding recs to the secondary rdbs?
|
|
bool isRepairActive() const;
|
|
bool isRepairingColl(collnum_t coll) const { return m_collnum==coll; }
|
|
|
|
bool init();
|
|
// if we core, call this so repair can resume where it left off
|
|
bool save();
|
|
|
|
// called by Parms.cpp
|
|
bool printRepairStatus(SafeBuf *sb);
|
|
|
|
bool linkdbRebuildPending() const { return m_rebuildLinkdb; }
|
|
|
|
private:
|
|
//void allHostsReady();
|
|
void initScan();
|
|
void resetForNewCollection();
|
|
void getNextCollToRepair();
|
|
bool loop();
|
|
bool dumpLoop();
|
|
void resetSecondaryRdbs();
|
|
bool dumpsCompleted();
|
|
void updateRdbs ( ) ;
|
|
|
|
// titledbscan functions
|
|
bool scanRecs();
|
|
bool gotScanRecList ( );
|
|
bool getTitleRec ( );
|
|
bool injectTitleRec ( ) ; // TitleRec *tr );
|
|
bool injectTitleRecSmall(char *titleRec, int32_t titleRecSize);
|
|
static void smallInjectCallback(void *state);
|
|
|
|
|
|
bool load();
|
|
|
|
// general scan vars
|
|
Msg5 m_msg5;
|
|
RdbList m_titleRecList;
|
|
int64_t m_docId;
|
|
int64_t m_totalMem;
|
|
int32_t m_stage ;
|
|
bool m_updated;
|
|
|
|
// titledb scan vars
|
|
key96_t m_nextTitledbKey;
|
|
key96_t m_endKey;
|
|
|
|
// . state info
|
|
// . indicator of what we save to disk
|
|
char m_SAVE_START;
|
|
int64_t m_lastDocId;
|
|
int64_t m_prevDocId;
|
|
bool m_completedFirstScan ;
|
|
bool m_completedSpiderdbScan ;
|
|
key96_t m_lastTitledbKey;
|
|
key96_t m_lastSpiderdbKey;
|
|
|
|
int64_t m_recsScanned;
|
|
int64_t m_recsetErrors;
|
|
int64_t m_recsCorruptErrors;
|
|
int64_t m_recsDupDocIds;
|
|
int64_t m_recsNegativeKeys;
|
|
int64_t m_recsUnassigned;
|
|
int64_t m_recsWrongGroupId;
|
|
int64_t m_recsInjected;
|
|
int64_t m_nonIndexableExtensions;
|
|
int64_t m_urlBlocked;
|
|
int64_t m_urlUnwanted;
|
|
|
|
// spiderdb scan stats
|
|
int32_t m_spiderRecsScanned ;
|
|
int32_t m_spiderRecSetErrors ;
|
|
int32_t m_spiderRecNotAssigned ;
|
|
int32_t m_spiderRecBadTLD ;
|
|
|
|
// generic scan parms
|
|
bool m_rebuildTitledb ;
|
|
bool m_rebuildPosdb ;
|
|
bool m_rebuildClusterdb ;
|
|
bool m_rebuildSpiderdb ;
|
|
bool m_rebuildSpiderdbSmall;
|
|
bool m_rebuildSitedb ;
|
|
bool m_rebuildLinkdb ;
|
|
bool m_rebuildTagdb ;
|
|
bool m_fullRebuild ;
|
|
|
|
bool m_rebuildRoots ;
|
|
bool m_rebuildNonRoots ;
|
|
|
|
// current collection being repaired
|
|
collnum_t m_collnum;
|
|
|
|
// . m_colli is the index into m_colls
|
|
// . m_colli is the index into g_collectiondb.m_recs if the list
|
|
// of collections to repair was empty
|
|
int32_t m_colli;
|
|
|
|
// list of collections to repair, only valid of g_conf.m_collsToRepair
|
|
// is not empty
|
|
static const int32_t maxCollections = 100;
|
|
int32_t m_collOffs[maxCollections];
|
|
int32_t m_collLens[maxCollections];
|
|
int32_t m_numColls;
|
|
// end the stuff to be saved
|
|
char m_SAVE_END;
|
|
|
|
// i'd like to save these but they are ptrs
|
|
CollectionRec *m_cr;
|
|
|
|
//for timing a repair process
|
|
int64_t m_startTime;
|
|
|
|
// if repairing is disabled in the middle of a repair
|
|
bool m_isSuspended;
|
|
|
|
// keep track of how many injects we have out
|
|
int32_t m_numOutstandingInjects;
|
|
|
|
// sanity check
|
|
bool m_msg5InUse ;
|
|
|
|
bool m_saveRepairState;
|
|
|
|
bool m_isRetrying;
|
|
|
|
static void repairWrapper(int fd, void *state);
|
|
static void loopWrapper(void *state, RdbList *list, Msg5 *msg5);
|
|
|
|
static bool saveAllRdbs();
|
|
static bool anyRdbNeedsSave();
|
|
static void doneSavingRdb(void *state);
|
|
static void doneWithIndexDoc(XmlDoc *xd);
|
|
static void doneWithIndexDocWrapper(void *state);
|
|
};
|
|
|
|
// the global class
|
|
extern Repair g_repair;
|
|
|
|
#endif // GB_REPAIR_H
|