Merge branch 'diffbot-testing' into testing

This commit is contained in:
Matt
2014-12-15 10:20:59 -08:00
4 changed files with 38 additions and 10 deletions

@ -201,11 +201,12 @@ static WebPage s_pages[] = {
sendPageCloneColl , 0 ,NULL,NULL,
PG_MASTERADMIN|PG_ACTIVE},
// let's replace this with query reindex for the most part
{ PAGE_REPAIR , "admin/rebuild" , 0 , "rebuild" , 1 , 0 ,
"rebuild data",
//USER_MASTER ,
sendPageGeneric , 0 ,NULL,NULL,
PG_MASTERADMIN |PG_ACTIVE},
PG_MASTERADMIN },//|PG_ACTIVE},
{ PAGE_FILTERS , "admin/filters", 0 , "url filters" , 1 ,M_POST,
"prioritize urls for spidering",
@ -272,11 +273,12 @@ static WebPage s_pages[] = {
sendPageAutoban , 0 ,NULL,NULL,
PG_NOAPI|PG_MASTERADMIN},
// deactivate until works on 64-bit... mdw 12/14/14
{ PAGE_PROFILER , "admin/profiler" , 0 , "profiler" , 0 ,M_POST,
//USER_MASTER ,
"profiler",
sendPageProfiler , 0 ,NULL,NULL,
PG_NOAPI|PG_MASTERADMIN|PG_ACTIVE},
PG_NOAPI|PG_MASTERADMIN},//|PG_ACTIVE},
{ PAGE_THREADS , "admin/threads" , 0 , "threads" , 0 , 0 ,
//USER_MASTER ,

@ -10288,11 +10288,18 @@ void Parms::init ( ) {
m->m_title = "ask for gzipped docs when downloading";
m->m_desc = "If this is true, gb will send Accept-Encoding: gzip "
"to web servers when doing http downloads.";
"to web servers when doing http downloads. It does have "
"a tendency to cause out-of-memory errors when you enable "
"this, so until that is fixed better, it's probably a good "
"idea to leave this disabled.";
m->m_cgi = "afgdwd";
m->m_off = (char *)&g_conf.m_gzipDownloads - g;
m->m_type = TYPE_BOOL;
m->m_def = "1";
// keep this default off because it seems some pages are huge
// uncomressed causing OOM errors and possibly corrupting stuff?
// not sure exactly, but i don't like going OOM. so maybe until
// that is fixed leave this off.
m->m_def = "0";
m->m_page = PAGE_MASTER;
m->m_obj = OBJ_CONF;
m++;
@ -17015,12 +17022,17 @@ void Parms::init ( ) {
m->m_title = "enable link voting";
m->m_desc = "If this is true Gigablast will "
"index hyper-link text and use hyper-link "
"structures to boost the quality of indexed documents.";
"structures to boost the quality of indexed documents. "
"You can disable this when doing a ton of injections to "
"keep things fast. Then do a posdb (index) rebuild "
"after re-enabling this when you are done injecting. Or "
"if you simply do not want link voting this will speed up"
"your injections and spidering a bit.";
m->m_cgi = "glt";
m->m_off = (char *)&cr.m_getLinkInfo - x;
m->m_type = TYPE_BOOL;
m->m_def = "1";
m->m_flags = PF_HIDDEN | PF_NOSAVE;
m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE;
m->m_page = PAGE_SPIDER;
m->m_obj = OBJ_COLL;
m++;

@ -1087,6 +1087,10 @@ bool Repair::loop ( void *state ) {
// . titledb scan
// . build g_checksumdb2, g_spiderdb2, g_clusterdb2, g_tfndb2
loop1:
if ( g_process.m_mode == EXIT_MODE )
return true;
if ( m_stage == STAGE_TITLEDB_0 ) {
m_stage++;
if ( ! scanRecs() ) return false;

@ -13658,12 +13658,25 @@ LinkInfo s_dummy2;
// . returns -1 if blocked, will re-call m_callback
LinkInfo *XmlDoc::getLinkInfo1 ( ) {
if ( m_linkInfo1Valid && ptr_linkInfo1 )
return ptr_linkInfo1;
// just return nothing if not doing link voting
CollectionRec *cr = getCollRec();
if ( ! cr ) return NULL;
// to keep things fast we avoid getting link info for some collections
if ( ! m_linkInfo1Valid && ! cr->m_getLinkInfo ) {
ptr_linkInfo1 = NULL;
m_linkInfo1Valid = true;
}
// sometimes it is NULL in title rec when setting from title rec
if ( m_linkInfo1Valid && ! ptr_linkInfo1 ) {
memset ( &s_dummy2 , 0 , sizeof(LinkInfo) );
s_dummy2.m_lisize = sizeof(LinkInfo);
ptr_linkInfo1 = &s_dummy2;
size_linkInfo1 = sizeof(LinkInfo);
return ptr_linkInfo1;
}
// return if we got it
@ -13673,9 +13686,6 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
// change status
setStatus ( "getting local inlinkers" );
CollectionRec *cr = getCollRec();
if ( ! cr ) return NULL;
XmlDoc **od = getOldXmlDoc ( );
if ( ! od || od == (XmlDoc **)-1 ) return (LinkInfo *)od;
int32_t *sni = getSiteNumInlinks();
@ -13805,7 +13815,7 @@ LinkInfo *XmlDoc::getLinkInfo1 ( ) {
// onlyNeedGoodInlinks = false;
//}
// call it
// call it. this is defined in Linkdb.cpp
char *url = getFirstUrl()->getUrl();
if ( ! getLinkInfo ( &m_tmpBuf12,
&m_mcast12,