mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-17 02:56:07 -04:00
Add skiphash as parameter for injecting document so we can force a reindex even when content is the same
This commit is contained in:
@ -596,7 +596,7 @@ void handleRequest7 ( UdpSlot *slot , int32_t netnice ) {
|
||||
ir->ptr_contentTypeStr, // text/html text/xml
|
||||
ir->m_spiderLinks ,
|
||||
ir->m_newOnly, // index iff new
|
||||
|
||||
ir->m_skipContentHashCheck,
|
||||
xd, // state ,
|
||||
sendUdpReply7 ,
|
||||
|
||||
|
@ -30,6 +30,7 @@ class InjectionRequest {
|
||||
bool m_spiderLinks;
|
||||
bool m_shortReply;
|
||||
bool m_newOnly;
|
||||
bool m_skipContentHashCheck;
|
||||
bool m_deleteUrl;
|
||||
bool m_hasMime;
|
||||
bool m_doConsistencyTesting;
|
||||
|
10
Parms.cpp
10
Parms.cpp
@ -6698,6 +6698,16 @@ void Parms::init ( ) {
|
||||
simple_m_set_checkbox(InjectionRequest,m_newOnly);
|
||||
m++;
|
||||
|
||||
m->m_title = "skip content hash check";
|
||||
m->m_desc = "Skip content hash check to force reindexing of document even "
|
||||
"when content is identical";
|
||||
m->m_cgi = "skiphash";
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_API;
|
||||
m->m_page = PAGE_INJECT;
|
||||
simple_m_set_checkbox(InjectionRequest,m_skipContentHashCheck);
|
||||
m++;
|
||||
|
||||
m->m_title = "delete from index";
|
||||
m->m_desc = "Delete the specified url from the index.";
|
||||
m->m_cgi = "deleteurl";
|
||||
|
11
XmlDoc.cpp
11
XmlDoc.cpp
@ -218,6 +218,7 @@ void XmlDoc::reset ( ) {
|
||||
|
||||
// if this is true, then only index if new
|
||||
m_newOnly = 0;
|
||||
m_skipContentHashCheck = false;
|
||||
|
||||
if ( m_httpReplyValid && m_httpReply ) {
|
||||
mfree(m_httpReply,m_httpReplyAllocSize,"httprep");
|
||||
@ -1287,7 +1288,7 @@ bool XmlDoc::injectDoc ( const char *url ,
|
||||
const char *contentTypeStr, // text/html application/json
|
||||
bool spiderLinks ,
|
||||
char newOnly, // index iff new
|
||||
|
||||
bool skipContentHashCheck,
|
||||
void *state,
|
||||
void (*callback)(void *state) ,
|
||||
|
||||
@ -1398,6 +1399,8 @@ bool XmlDoc::injectDoc ( const char *url ,
|
||||
// . maybe just set indexCode
|
||||
m_newOnly = newOnly;
|
||||
|
||||
m_skipContentHashCheck = skipContentHashCheck;
|
||||
|
||||
// do not re-lookup the robots.txt
|
||||
m_isAllowed = true;
|
||||
m_isAllowedValid = true;
|
||||
@ -2512,8 +2515,10 @@ int32_t *XmlDoc::getIndexCode ( ) {
|
||||
|
||||
// disable content hash check if language differ (we could have overridden language when injecting doc)
|
||||
bool checkContentHash = true;
|
||||
if (m_wasContentInjected && m_langIdValid && m_langId != od->m_langId) {
|
||||
checkContentHash = false;
|
||||
if (m_wasContentInjected) {
|
||||
if (m_skipContentHashCheck || (m_langIdValid && m_langId != od->m_langId)) {
|
||||
checkContentHash = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (checkContentHash && *ch32 == od->m_contentHash32) {
|
||||
|
3
XmlDoc.h
3
XmlDoc.h
@ -907,6 +907,7 @@ public:
|
||||
// for limiting # of iframe tag expansions
|
||||
int32_t m_numExpansions;
|
||||
char m_newOnly;
|
||||
bool m_skipContentHashCheck;
|
||||
char m_isWWWDup; // May be -1
|
||||
|
||||
SafeBuf m_linkSiteHashBuf;
|
||||
@ -1143,7 +1144,7 @@ public:
|
||||
const char *contentTypeStr, // text/html, text/xml etc.
|
||||
bool spiderLinks ,
|
||||
char newOnly, // index iff new
|
||||
|
||||
bool skipContentHashCheck,
|
||||
void *state,
|
||||
void (*callback)(void *state) ,
|
||||
|
||||
|
Reference in New Issue
Block a user