forked from Mirrors/privacore-open-source-search-engine
Move document indexing from main thraed to a job
XmlDoc::indexDoc/indexDoc2/getMetaList() was being called from a callback in main thraed. Move that work to a thrad. Due to the multiple async calls done by getMetaList() there will be some overhead by this but at least the main thread won't be clogged when encountering 5MB+ documents anymore.
This commit is contained in:
@ -409,6 +409,7 @@ bool JobScheduler_impl::submit(thread_type_t thread_type, JobEntry &e)
|
||||
case thread_type_spider_write: job_queue = &cpu_job_queue; break;
|
||||
case thread_type_spider_filter: job_queue = &external_job_queue; break;
|
||||
case thread_type_spider_query: job_queue = &cpu_job_queue; break;
|
||||
case thread_type_spider_index: job_queue = &cpu_job_queue; break;
|
||||
case thread_type_merge_filter: job_queue = &merge_job_queue; break;
|
||||
case thread_type_replicate_write: job_queue = &cpu_job_queue; break;
|
||||
case thread_type_replicate_read: job_queue = &cpu_job_queue; break;
|
||||
|
@ -32,6 +32,7 @@ enum thread_type_t {
|
||||
thread_type_spider_write,
|
||||
thread_type_spider_filter, //pdf2html/doc2html/...
|
||||
thread_type_spider_query, //?
|
||||
thread_type_spider_index,
|
||||
thread_type_merge_filter,
|
||||
thread_type_replicate_write,
|
||||
thread_type_replicate_read,
|
||||
|
@ -18,6 +18,7 @@ static const char *thread_type_name(thread_type_t tt) {
|
||||
case thread_type_spider_write: return "spider-write";
|
||||
case thread_type_spider_filter: return "spider-filter";
|
||||
case thread_type_spider_query: return "spider-query";
|
||||
case thread_type_spider_index: return "spider-index";
|
||||
case thread_type_merge_filter: return "merge-filter";
|
||||
case thread_type_replicate_write: return "replicate-write";
|
||||
case thread_type_replicate_read: return "replicate-read";
|
||||
|
54
XmlDoc.cpp
54
XmlDoc.cpp
@ -91,6 +91,7 @@ XmlDoc::XmlDoc() {
|
||||
void *pend = &m_VALIDEND;
|
||||
memset ( p , 0 , (char *)pend - (char *)p );//(int32_t)pend-(int32_t)p
|
||||
m_msg22Request.m_inUse = 0;
|
||||
m_indexedDoc = false;
|
||||
m_msg4Waiting = false;
|
||||
m_msg4Launched = false;
|
||||
m_dupTrPtr = NULL;
|
||||
@ -148,6 +149,7 @@ void XmlDoc::reset ( ) {
|
||||
|
||||
m_loaded = false;
|
||||
|
||||
m_indexedDoc = false;
|
||||
m_msg4Launched = false;
|
||||
|
||||
//m_downloadAttempted = false;
|
||||
@ -1228,6 +1230,31 @@ void XmlDoc::setCallback ( void *state, bool (*callback) (void *state) ) {
|
||||
|
||||
|
||||
|
||||
static void indexDoc3(void *state) {
|
||||
XmlDoc *that = reinterpret_cast<XmlDoc*>(state);
|
||||
logTrace( g_conf.m_logTraceXmlDoc, "Calling XmlDoc::indexDoc" );
|
||||
// return if it blocked
|
||||
if (!that->indexDoc()) {
|
||||
logTrace(g_conf.m_logTraceXmlDoc, "END, indexDoc blocked");
|
||||
return;
|
||||
}
|
||||
|
||||
// otherwise, all done, call the caller callback
|
||||
|
||||
that->m_indexedDoc = true;
|
||||
|
||||
logTrace(g_conf.m_logTraceXmlDoc, "END");
|
||||
}
|
||||
|
||||
static void indexedDoc3(void *state, job_exit_t exit_type) {
|
||||
XmlDoc *that = reinterpret_cast<XmlDoc*>(state);
|
||||
if(that->m_indexedDoc) {
|
||||
logTrace(g_conf.m_logTraceXmlDoc, "Calling callback");
|
||||
that->callCallback();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void indexDocWrapper ( void *state ) {
|
||||
logTrace( g_conf.m_logTraceXmlDoc, "BEGIN" );
|
||||
|
||||
@ -1237,31 +1264,18 @@ static void indexDocWrapper ( void *state ) {
|
||||
// note it
|
||||
THIS->setStatus ( "in index doc wrapper" );
|
||||
|
||||
logTrace( g_conf.m_logTraceXmlDoc, "Calling XmlDoc::indexDoc" );
|
||||
// return if it blocked
|
||||
if ( ! THIS->indexDoc( ) )
|
||||
{
|
||||
logTrace( g_conf.m_logTraceXmlDoc, "END, indexDoc blocked" );
|
||||
//shovel this off to a thread
|
||||
if(g_jobScheduler.submit(&indexDoc3,indexedDoc3,THIS,thread_type_spider_index,THIS->m_niceness)) {
|
||||
//excellent
|
||||
logTrace( g_conf.m_logTraceXmlDoc, "END, queued for thread" );
|
||||
return;
|
||||
}
|
||||
|
||||
// otherwise, all done, call the caller callback
|
||||
|
||||
// g_statsdb.addStat ( MAX_NICENESS,
|
||||
// "docs_indexed",
|
||||
// 20,
|
||||
// 21,
|
||||
// );
|
||||
|
||||
|
||||
logTrace( g_conf.m_logTraceXmlDoc, "Calling callback" );
|
||||
THIS->callCallback();
|
||||
|
||||
logTrace( g_conf.m_logTraceXmlDoc, "END" );
|
||||
//threads not available (or oom or simmilar)
|
||||
indexDoc3(THIS);
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
// . the highest level function in here
|
||||
// . user is requesting to inject this url
|
||||
// . returns false if blocked and your callback will be called when done
|
||||
|
2
XmlDoc.h
2
XmlDoc.h
@ -1116,6 +1116,8 @@ public:
|
||||
|
||||
bool m_freed;
|
||||
|
||||
bool m_indexedDoc; //indexDoc() perfomrned completely
|
||||
|
||||
bool m_msg4Waiting;
|
||||
bool m_msg4Launched;
|
||||
|
||||
|
Reference in New Issue
Block a user