minor updates

This commit is contained in:
Matt Wells
2014-02-16 13:38:54 -08:00
parent dc8b9090e8
commit 4930243de3
2 changed files with 11 additions and 5 deletions

@ -2096,6 +2096,9 @@ bool CollectionRec::rebuildUrlFilters ( ) {
// default to 250ms i guess. -1 means unset i think.
if ( m_collectiveCrawlDelay < 0.0 ) wait = 250;
bool isEthan = false;
if (m_coll)isEthan=strstr(m_coll,"2b44a0e0bb91bbec920f7efd29ce3d5b");
// make the gigablast regex table just "default" so it does not
// filtering, but accepts all urls. we will add code to pass the urls
// through m_diffbotUrlCrawlPattern alternatively. if that itself
@ -2106,6 +2109,9 @@ bool CollectionRec::rebuildUrlFilters ( ) {
m_maxSpidersPerRule [i] = 100;
m_spiderIpWaits [i] = wait;
m_spiderIpMaxSpiders[i] = 7; // keep it respectful
// ethan wants some speed
if ( isEthan )
m_spiderIpMaxSpiders[i] = 30;
//m_spidersEnabled [i] = 1;
m_spiderFreqs [i] =m_collectiveRespiderFrequency;
//m_spiderDiffbotApiUrl[i].purge();

10
gb.conf

@ -328,10 +328,10 @@
<collectionsToRepairOrRebuild><![CDATA[main]]></>
# In bytes.
<memoryToUseForRepair>300000000</>
<memoryToUseForRepair>200000000</>
# Maximum number of outstanding inject spiders for repair.
<maxRepairSpiders>32</>
<maxRepairSpiders>2</>
# If enabled, gigablast will reinject the content of all title recs into a
# secondary rdb system. That will the primary rdb system when complete.
@ -342,13 +342,13 @@
<keepNewSpiderdbRecs>1</>
# If enabled, gigablast will recycle the link info when rebuilding titledb.
<recycleLinkInfo>0</>
<recycleLinkInfo>1</>
# If enabled, gigablast will rebuild this rdb
<rebuildTitledb>1</>
# If enabled, gigablast will rebuild this rdb
<rebuildPosdb>0</>
<rebuildPosdb>1</>
# If enabled, gigablast will rebuild this rdb
<rebuildClusterdb>0</>
@ -368,4 +368,4 @@
# When rebuilding spiderdb and scanning it for new spiderdb records, should a
# tagdb lookup be performed? Runs much much faster without it. Will also keep
# the original doc quality and spider priority in tact.
<skipTagdbLookup>0</>
<skipTagdbLookup>1</>