Add skiphash as parameter for injecting document so we can force a reindex even when content is the same

This commit is contained in:
Ai Lin Chia
2017-02-17 12:49:35 +01:00
parent 19db02a16e
commit 63392dd835
5 changed files with 22 additions and 5 deletions

@ -596,7 +596,7 @@ void handleRequest7 ( UdpSlot *slot , int32_t netnice ) {
ir->ptr_contentTypeStr, // text/html text/xml
ir->m_spiderLinks ,
ir->m_newOnly, // index iff new
ir->m_skipContentHashCheck,
xd, // state ,
sendUdpReply7 ,

@ -30,6 +30,7 @@ class InjectionRequest {
bool m_spiderLinks;
bool m_shortReply;
bool m_newOnly;
bool m_skipContentHashCheck;
bool m_deleteUrl;
bool m_hasMime;
bool m_doConsistencyTesting;

@ -6698,6 +6698,16 @@ void Parms::init ( ) {
simple_m_set_checkbox(InjectionRequest,m_newOnly);
m++;
m->m_title = "skip content hash check";
m->m_desc = "Skip content hash check to force reindexing of document even "
"when content is identical";
m->m_cgi = "skiphash";
m->m_def = "0";
m->m_flags = PF_API;
m->m_page = PAGE_INJECT;
simple_m_set_checkbox(InjectionRequest,m_skipContentHashCheck);
m++;
m->m_title = "delete from index";
m->m_desc = "Delete the specified url from the index.";
m->m_cgi = "deleteurl";

@ -218,6 +218,7 @@ void XmlDoc::reset ( ) {
// if this is true, then only index if new
m_newOnly = 0;
m_skipContentHashCheck = false;
if ( m_httpReplyValid && m_httpReply ) {
mfree(m_httpReply,m_httpReplyAllocSize,"httprep");
@ -1287,7 +1288,7 @@ bool XmlDoc::injectDoc ( const char *url ,
const char *contentTypeStr, // text/html application/json
bool spiderLinks ,
char newOnly, // index iff new
bool skipContentHashCheck,
void *state,
void (*callback)(void *state) ,
@ -1398,6 +1399,8 @@ bool XmlDoc::injectDoc ( const char *url ,
// . maybe just set indexCode
m_newOnly = newOnly;
m_skipContentHashCheck = skipContentHashCheck;
// do not re-lookup the robots.txt
m_isAllowed = true;
m_isAllowedValid = true;
@ -2512,8 +2515,10 @@ int32_t *XmlDoc::getIndexCode ( ) {
// disable content hash check if language differ (we could have overridden language when injecting doc)
bool checkContentHash = true;
if (m_wasContentInjected && m_langIdValid && m_langId != od->m_langId) {
checkContentHash = false;
if (m_wasContentInjected) {
if (m_skipContentHashCheck || (m_langIdValid && m_langId != od->m_langId)) {
checkContentHash = false;
}
}
if (checkContentHash && *ch32 == od->m_contentHash32) {

@ -907,6 +907,7 @@ public:
// for limiting # of iframe tag expansions
int32_t m_numExpansions;
char m_newOnly;
bool m_skipContentHashCheck;
char m_isWWWDup; // May be -1
SafeBuf m_linkSiteHashBuf;
@ -1143,7 +1144,7 @@ public:
const char *contentTypeStr, // text/html, text/xml etc.
bool spiderLinks ,
char newOnly, // index iff new
bool skipContentHashCheck,
void *state,
void (*callback)(void *state) ,