mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-10 02:06:08 -04:00
Merge branch 'diffbot-testing' into testing
This commit is contained in:
@ -201,11 +201,12 @@ static WebPage s_pages[] = {
|
||||
sendPageCloneColl , 0 ,NULL,NULL,
|
||||
PG_MASTERADMIN|PG_ACTIVE},
|
||||
|
||||
// let's replace this with query reindex for the most part
|
||||
{ PAGE_REPAIR , "admin/rebuild" , 0 , "rebuild" , 1 , 0 ,
|
||||
"rebuild data",
|
||||
//USER_MASTER ,
|
||||
sendPageGeneric , 0 ,NULL,NULL,
|
||||
PG_MASTERADMIN |PG_ACTIVE},
|
||||
PG_MASTERADMIN },//|PG_ACTIVE},
|
||||
|
||||
{ PAGE_FILTERS , "admin/filters", 0 , "url filters" , 1 ,M_POST,
|
||||
"prioritize urls for spidering",
|
||||
@ -272,11 +273,12 @@ static WebPage s_pages[] = {
|
||||
sendPageAutoban , 0 ,NULL,NULL,
|
||||
PG_NOAPI|PG_MASTERADMIN},
|
||||
|
||||
// deactivate until works on 64-bit... mdw 12/14/14
|
||||
{ PAGE_PROFILER , "admin/profiler" , 0 , "profiler" , 0 ,M_POST,
|
||||
//USER_MASTER ,
|
||||
"profiler",
|
||||
sendPageProfiler , 0 ,NULL,NULL,
|
||||
PG_NOAPI|PG_MASTERADMIN|PG_ACTIVE},
|
||||
PG_NOAPI|PG_MASTERADMIN},//|PG_ACTIVE},
|
||||
|
||||
{ PAGE_THREADS , "admin/threads" , 0 , "threads" , 0 , 0 ,
|
||||
//USER_MASTER ,
|
||||
|
20
Parms.cpp
20
Parms.cpp
@ -10288,11 +10288,18 @@ void Parms::init ( ) {
|
||||
|
||||
m->m_title = "ask for gzipped docs when downloading";
|
||||
m->m_desc = "If this is true, gb will send Accept-Encoding: gzip "
|
||||
"to web servers when doing http downloads.";
|
||||
"to web servers when doing http downloads. It does have "
|
||||
"a tendency to cause out-of-memory errors when you enable "
|
||||
"this, so until that is fixed better, it's probably a good "
|
||||
"idea to leave this disabled.";
|
||||
m->m_cgi = "afgdwd";
|
||||
m->m_off = (char *)&g_conf.m_gzipDownloads - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
// keep this default off because it seems some pages are huge
|
||||
// uncomressed causing OOM errors and possibly corrupting stuff?
|
||||
// not sure exactly, but i don't like going OOM. so maybe until
|
||||
// that is fixed leave this off.
|
||||
m->m_def = "0";
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
@ -17015,12 +17022,17 @@ void Parms::init ( ) {
|
||||
m->m_title = "enable link voting";
|
||||
m->m_desc = "If this is true Gigablast will "
|
||||
"index hyper-link text and use hyper-link "
|
||||
"structures to boost the quality of indexed documents.";
|
||||
"structures to boost the quality of indexed documents. "
|
||||
"You can disable this when doing a ton of injections to "
|
||||
"keep things fast. Then do a posdb (index) rebuild "
|
||||
"after re-enabling this when you are done injecting. Or "
|
||||
"if you simply do not want link voting this will speed up"
|
||||
"your injections and spidering a bit.";
|
||||
m->m_cgi = "glt";
|
||||
m->m_off = (char *)&cr.m_getLinkInfo - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SPIDER;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
@ -1087,6 +1087,10 @@ bool Repair::loop ( void *state ) {
|
||||
// . titledb scan
|
||||
// . build g_checksumdb2, g_spiderdb2, g_clusterdb2, g_tfndb2
|
||||
loop1:
|
||||
|
||||
if ( g_process.m_mode == EXIT_MODE )
|
||||
return true;
|
||||
|
||||
if ( m_stage == STAGE_TITLEDB_0 ) {
|
||||
m_stage++;
|
||||
if ( ! scanRecs() ) return false;
|
||||
|
18
XmlDoc.cpp
18
XmlDoc.cpp
@ -13658,12 +13658,25 @@ LinkInfo s_dummy2;
|
||||
// . returns -1 if blocked, will re-call m_callback
|
||||
LinkInfo *XmlDoc::getLinkInfo1 ( ) {
|
||||
|
||||
if ( m_linkInfo1Valid && ptr_linkInfo1 )
|
||||
return ptr_linkInfo1;
|
||||
|
||||
// just return nothing if not doing link voting
|
||||
CollectionRec *cr = getCollRec();
|
||||
if ( ! cr ) return NULL;
|
||||
// to keep things fast we avoid getting link info for some collections
|
||||
if ( ! m_linkInfo1Valid && ! cr->m_getLinkInfo ) {
|
||||
ptr_linkInfo1 = NULL;
|
||||
m_linkInfo1Valid = true;
|
||||
}
|
||||
|
||||
// sometimes it is NULL in title rec when setting from title rec
|
||||
if ( m_linkInfo1Valid && ! ptr_linkInfo1 ) {
|
||||
memset ( &s_dummy2 , 0 , sizeof(LinkInfo) );
|
||||
s_dummy2.m_lisize = sizeof(LinkInfo);
|
||||
ptr_linkInfo1 = &s_dummy2;
|
||||
size_linkInfo1 = sizeof(LinkInfo);
|
||||
return ptr_linkInfo1;
|
||||
}
|
||||
|
||||
// return if we got it
|
||||
@ -13673,9 +13686,6 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
|
||||
// change status
|
||||
setStatus ( "getting local inlinkers" );
|
||||
|
||||
CollectionRec *cr = getCollRec();
|
||||
if ( ! cr ) return NULL;
|
||||
|
||||
XmlDoc **od = getOldXmlDoc ( );
|
||||
if ( ! od || od == (XmlDoc **)-1 ) return (LinkInfo *)od;
|
||||
int32_t *sni = getSiteNumInlinks();
|
||||
@ -13805,7 +13815,7 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
|
||||
// onlyNeedGoodInlinks = false;
|
||||
//}
|
||||
|
||||
// call it
|
||||
// call it. this is defined in Linkdb.cpp
|
||||
char *url = getFirstUrl()->getUrl();
|
||||
if ( ! getLinkInfo ( &m_tmpBuf12,
|
||||
&m_mcast12,
|
||||
|
Reference in New Issue
Block a user