moved email logic from xmldoc into spider.cpp.
add maxCrawlRounds parm. added crawlStatus msg in json output to indicate why crawl stopped.
This commit is contained in:
parent
9595f65542
commit
469be5f216
@ -2649,7 +2649,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
//if ( cx->m_collectionNameAlias.length() > 0 )
|
||||
// alias=cx->m_collectionNameAlias.getBufStart();
|
||||
//long paused = 1;
|
||||
char *ss = "normal";
|
||||
char *ss = "Normal";
|
||||
if ( cx->m_spiderStatusMsg )
|
||||
ss = cx->m_spiderStatusMsg;
|
||||
//if ( cx->m_spideringEnabled ) paused = 0;
|
||||
@ -2698,7 +2698,7 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
sb.safeUtf8ToJSON ( cx->m_diffbotSeeds.getBufStart());
|
||||
sb.safePrintf("\",\n");
|
||||
|
||||
sb.safePrintf("\"crawlRoundNumber\":%li,\n",
|
||||
sb.safePrintf("\"crawlRoundsCompleted\":%li,\n",
|
||||
cx->m_spiderRoundNum);
|
||||
|
||||
sb.safePrintf("\"crawlRoundStartTime\":%lu,\n",
|
||||
|
13
Parms.cpp
13
Parms.cpp
@ -8373,7 +8373,7 @@ void Parms::init ( ) {
|
||||
m->m_off = (char *)&cr.m_maxToCrawl - x;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_def = "";
|
||||
m->m_def = "100001";
|
||||
m++;
|
||||
|
||||
m->m_cgi = "maxtoprocess";
|
||||
@ -8382,7 +8382,16 @@ void Parms::init ( ) {
|
||||
m->m_off = (char *)&cr.m_maxToProcess - x;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_def = "";
|
||||
m->m_def = "100001";
|
||||
m++;
|
||||
|
||||
m->m_cgi = "maxCrawlRounds";
|
||||
m->m_title = "max crawl rounds";
|
||||
m->m_xml = "maxCrawlRounds";
|
||||
m->m_off = (char *)&cr.m_maxCrawlRounds - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_def = "-1";
|
||||
m++;
|
||||
|
||||
/*
|
||||
|
22
Spider.cpp
22
Spider.cpp
@ -3947,7 +3947,8 @@ void doneSendingNotification ( void *state ) {
|
||||
if ( ! cr ) return;
|
||||
|
||||
// we can re-use the EmailInfo class now
|
||||
ei->m_inUse = false;
|
||||
// pingserver.cpp sets this
|
||||
//ei->m_inUse = false;
|
||||
|
||||
// mark it as sent. anytime a new url is spidered will mark this
|
||||
// as false again! use LOCAL crawlInfo, since global is reset often.
|
||||
@ -3956,6 +3957,8 @@ void doneSendingNotification ( void *state ) {
|
||||
// sanity check
|
||||
if ( g_hostdb.m_myHost->m_hostId != 0 ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// if not round done we are done
|
||||
if ( cr->m_spiderStatus != SP_ROUNDDONE ) return;
|
||||
|
||||
// this should have been set below
|
||||
if ( cr->m_spiderRoundStartTime == 0 ) { char *xx=NULL;*xx=0; }
|
||||
@ -4030,7 +4033,8 @@ bool sendNotificationForCollRec ( CollectionRec *cr ) {
|
||||
// in use already?
|
||||
if ( ei->m_inUse ) return true;
|
||||
|
||||
ei->m_inUse = true;
|
||||
// pingserver.cpp sets this
|
||||
//ei->m_inUse = true;
|
||||
|
||||
// set it up
|
||||
ei->m_finalCallback = doneSendingNotification;
|
||||
@ -4135,9 +4139,10 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
if ( ! cr->m_spideringEnabled ) continue;
|
||||
|
||||
// hit crawl round max?
|
||||
if ( cr->m_spiderRoundNum >= cr->m_maxCrawlRounds ) {
|
||||
if ( //cr->m_maxCrawlRounds > 0 &&
|
||||
cr->m_spiderRoundNum >= cr->m_maxCrawlRounds ) {
|
||||
cr->m_spiderStatus = SP_MAXROUNDS;
|
||||
cr->m_spiderStatusMsg = "Hit max rounds limit.";
|
||||
cr->m_spiderStatusMsg = "Hit maxCrawlRounds limit.";
|
||||
sendNotificationForCollRec ( cr );
|
||||
continue;
|
||||
}
|
||||
@ -4146,7 +4151,7 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
if ( cr->m_globalCrawlInfo.m_pageDownloadSuccesses >=
|
||||
cr->m_maxToCrawl ) {
|
||||
cr->m_spiderStatus = SP_MAXTOCRAWL;
|
||||
cr->m_spiderStatusMsg = "Hit max pages limit.";
|
||||
cr->m_spiderStatusMsg = "Hit maxToCrawl limit.";
|
||||
sendNotificationForCollRec ( cr );
|
||||
continue;
|
||||
}
|
||||
@ -4155,7 +4160,7 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
if ( cr->m_globalCrawlInfo.m_pageProcessSuccesses >=
|
||||
cr->m_maxToProcess ) {
|
||||
cr->m_spiderStatus = SP_MAXTOPROCESS;
|
||||
cr->m_spiderStatusMsg = "Hit max process limit.";
|
||||
cr->m_spiderStatusMsg = "Hit maxToProcess limit.";
|
||||
sendNotificationForCollRec ( cr );
|
||||
continue;
|
||||
}
|
||||
@ -4847,6 +4852,11 @@ bool SpiderLoop::gotDoledbList2 ( ) {
|
||||
// a spider
|
||||
m_sc->m_lastSpiderCouldLaunch = nowGlobal;
|
||||
|
||||
// set crawl done email sent flag so another email can be sent again
|
||||
// in case the user upped the maxToCrawl limit, for instance,
|
||||
// so that the crawl could continue.
|
||||
m_sc->m_cr->m_localCrawlInfo.m_sentCrawlDoneAlert = 0;
|
||||
|
||||
// assume not an empty read
|
||||
//m_sc->m_encounteredDoledbRecs = true;
|
||||
|
||||
|
2
XmlDoc.h
2
XmlDoc.h
@ -1567,7 +1567,7 @@ class XmlDoc {
|
||||
long *nukeJSONObjects ( ) ;
|
||||
long m_joc;
|
||||
|
||||
EmailInfo m_emailInfo;
|
||||
//EmailInfo m_emailInfo;
|
||||
|
||||
//
|
||||
// functions and vars for the seo query matching tool
|
||||
|
@ -1,12 +1,15 @@
|
||||
<diffbotToken><![CDATA[]]></>
|
||||
<diffbotCrawlName><![CDATA[]]></>
|
||||
<notifyEmail><![CDATA[]]></>
|
||||
<notifyUrl><![CDATA[ccc]]></>
|
||||
<frequency>0.000</>
|
||||
<collectiveRespiderFrequency>0.000000</>
|
||||
<diffbotPageProcessPattern><![CDATA[]]></>
|
||||
<diffbotOnlyProcessIfNew>1</>
|
||||
<diffbotSeeds><![CDATA[]]></>
|
||||
<isCustomCrawl>0</>
|
||||
<diffbotMaxToCrawl>0</>
|
||||
<diffbotMaxToProcess>0</>
|
||||
<maxToCrawl>100001</>
|
||||
<maxToProcess>100001</>
|
||||
<maxCrawlRounds>3</>
|
||||
|
||||
# All <, >, " and # characters that are values for a field contained herein
|
||||
# must be represented as <, >, " and # respectively.
|
||||
@ -67,7 +70,7 @@
|
||||
|
||||
# If less than this many days have elapsed since the last time we added the
|
||||
# outlinks to spiderdb, do not re-add them to spiderdb. Saves resources.
|
||||
<doNotReaddOldOutlinksMoreThanThisManyDays>30.000</>
|
||||
<doNotReaddOldOutlinksMoreThanThisManyDays>30.000000</>
|
||||
|
||||
# When the spider round started
|
||||
<spiderRoundStartTime>0</>
|
||||
@ -218,7 +221,7 @@
|
||||
# is unchanged then the link info will not be updated. When getting the link
|
||||
# info or quality of the root url from an external cluster, Gigablast will
|
||||
# tell the external cluster to recompute it if its age is this or higher.
|
||||
<updateLinkInfoFrequency>60.000</>
|
||||
<updateLinkInfoFrequency>60.000000</>
|
||||
|
||||
# If this is true Gigablast will only allow one vote per the top 2 significant
|
||||
# bytes of the IP address. Otherwise, multiple pages from the same top IP can
|
||||
@ -319,12 +322,12 @@
|
||||
<maxRobotstxtCacheAge>86400</>
|
||||
|
||||
# Only spider URLs scheduled to be spidered at this time or after. In UTC.
|
||||
<spiderStartTime>16 Jan 1970 12:00 UTC</>
|
||||
<spiderStartTime>16 Jan 1970 20:00 UTC</>
|
||||
|
||||
# Only spider URLs scheduled to be spidered at this time or before. If "use
|
||||
# current time" is true then the current local time is used for this value
|
||||
# instead. in UTC.
|
||||
<spiderEndTime>16 Jan 2010 12:00 UTC</>
|
||||
<spiderEndTime>16 Jan 2010 20:00 UTC</>
|
||||
|
||||
# Use the current time as the spider end time?
|
||||
<useCurrentTime>1</>
|
||||
@ -414,19 +417,19 @@
|
||||
# Demotion factor of non-relevant languages. Score will be penalized by this
|
||||
# factor as a percent if it's language is foreign. A safe value is probably
|
||||
# anywhere from 0.5 to 1.
|
||||
<demotionForForeignLanguages>0.999</>
|
||||
<demotionForForeignLanguages>0.999000</>
|
||||
|
||||
# Demotion factor for unknown languages. Page's score will be penalized by
|
||||
# this factor as a percent if it's language is not known. A safe value is 0,
|
||||
# as these pages will be reranked by country (see below). 0 means no demotion.
|
||||
<demotionForUnknownLanguages>0.000</>
|
||||
<demotionForUnknownLanguages>0.000000</>
|
||||
|
||||
# Demotion for pages where the country of the page writes in the same language
|
||||
# as the country of the query. If query language is the same as the language
|
||||
# of the page, then if a language written in the country of the page matches a
|
||||
# language written by the country of the query, then page's score will be
|
||||
# demoted by this factor as a percent. A safe range is between 0.5 and 1.
|
||||
<demotionForPagesWhereTheCountryOfThePageWritesInTheSameLanguageAsTheCountryOfTheQuery>0.980</>
|
||||
<demotionForPagesWhereTheCountryOfThePageWritesInTheSameLanguageAsTheCountryOfTheQuery>0.980000</>
|
||||
|
||||
# Demotion factor for query terms or gigabits in a result's url. Score will be
|
||||
# penalized by this factor times the number of query terms or gigabits in the
|
||||
@ -440,7 +443,7 @@
|
||||
# terms or gigabits in its url will be demoted 10%; and 10 or more query terms
|
||||
# or gigabits in the url will not be demoted at all. 0 means no demotion. A
|
||||
# safe range is from 0 to 0.35.
|
||||
<demotionForQueryTermsOrGigabitsInUrl>0.000</>
|
||||
<demotionForQueryTermsOrGigabitsInUrl>0.000000</>
|
||||
|
||||
# Max number of query terms or gigabits in a url. Pages with a number of query
|
||||
# terms or gigabits in their urls greater than or equal to this value will not
|
||||
@ -456,7 +459,7 @@
|
||||
# the formula (max quality - page's quality) * this factor / the max value
|
||||
# given below. Generally, a page will not be demoted more than this factor as
|
||||
# a percent. 0 means no demotion. A safe range is between 0 to 1.
|
||||
<demotionForPagesThatAreNotHighQuality>0.000</>
|
||||
<demotionForPagesThatAreNotHighQuality>0.000000</>
|
||||
|
||||
# Max page quality. Pages with a quality level equal to or higher than this
|
||||
# value will not be demoted.
|
||||
@ -466,7 +469,7 @@
|
||||
# as a percent multiplied by the number of paths in the url divided by the max
|
||||
# value below. Generally, the page will not be demoted more than this value as
|
||||
# a percent. 0 means no demotion. A safe range is from 0 to 0.75.
|
||||
<demotionForPagesThatAreNotRootOrHaveManyPathsInTheUrl>0.000</>
|
||||
<demotionForPagesThatAreNotRootOrHaveManyPathsInTheUrl>0.000000</>
|
||||
|
||||
# Max number of paths in a url. This should be set to a value representing a
|
||||
# very high number of paths for a url. Lower values increase the difference
|
||||
@ -475,7 +478,7 @@
|
||||
|
||||
# Demotion factor for pages that do not have a catid. Score will be penalized
|
||||
# by this factor as a percent. A safe range is from 0 to 0.2.
|
||||
<demotionForPagesThatDoNotHaveACatid>0.000</>
|
||||
<demotionForPagesThatDoNotHaveACatid>0.000000</>
|
||||
|
||||
# Demotion factor for pages where smallest catid has a lot of super topics.
|
||||
# Page will be penalized by the number of super topics multiplied by this
|
||||
@ -483,7 +486,7 @@
|
||||
# demoted more than this factor as a percent. Note: pages with no catid are
|
||||
# demoted by this factor as a percent so as not to penalize pages with a
|
||||
# catid. 0 means no demotion. A safe range is between 0 and 0.25.
|
||||
<demotionForPagesWhereSmallestCatidHasALotOfSuperTopics>0.000</>
|
||||
<demotionForPagesWhereSmallestCatidHasALotOfSuperTopics>0.000000</>
|
||||
|
||||
# Max number of super topics. Pages whose smallest catid that has more super
|
||||
# topics than this will be demoted by the maximum amount given by the factor
|
||||
@ -496,7 +499,7 @@
|
||||
# this factor divided by the max page size below. Generally, a page will not
|
||||
# be demoted more than this factor as a percent. 0 means no demotion. A safe
|
||||
# range is between 0 and 0.25.
|
||||
<demotionForLargerPages>0.000</>
|
||||
<demotionForLargerPages>0.000000</>
|
||||
|
||||
# Max page size. Pages with a size greater than or equal to this will be
|
||||
# demoted by the max amount (the factor above as a percent).
|
||||
@ -508,7 +511,7 @@
|
||||
# factor divided by the max place population specified below. Generally, a
|
||||
# page will not be demoted more than this value as a percent. 0 means no
|
||||
# demotion.
|
||||
<demotionForNonlocationSpecificQueriesWithALocationSpecificTitle>0.990</>
|
||||
<demotionForNonlocationSpecificQueriesWithALocationSpecificTitle>0.990000</>
|
||||
|
||||
# Demotion factor for non-location specific queries with a location specific
|
||||
# summary. Pages which contain a location in their summary which is not in the
|
||||
@ -516,7 +519,7 @@
|
||||
# factor divided by the max place population specified below. Generally, a
|
||||
# page will not be demoted more than this value as a percent. 0 means no
|
||||
# demotion.
|
||||
<demotionForNonlocationSpecificQueriesWithALocationSpecificSummary>0.950</>
|
||||
<demotionForNonlocationSpecificQueriesWithALocationSpecificSummary>0.950000</>
|
||||
|
||||
# Demotion factor for non-location specific queries with a location specific
|
||||
# dmoz regional category. Pages which contain a location in their dmoz which
|
||||
@ -524,7 +527,7 @@
|
||||
# multiplied by this factor divided by the max place population specified
|
||||
# below. Generally, a page will not be demoted more than this value as a
|
||||
# percent. 0 means no demotion.
|
||||
<demotionForNonlocationSpecificQueriesWithALocationSpecificDmozCategory>0.950</>
|
||||
<demotionForNonlocationSpecificQueriesWithALocationSpecificDmozCategory>0.950000</>
|
||||
|
||||
# Demote locations that appear in gigabits.
|
||||
<demoteLocationsThatAppearInGigabits>1</>
|
||||
@ -537,20 +540,20 @@
|
||||
# Demotion factor for content type that is non-html. Pages which do not have
|
||||
# an html content type will be demoted by this factor as a percent. 0 means no
|
||||
# demotion. A safe range is between 0 and 0.35.
|
||||
<demotionForNonhtml>0.000</>
|
||||
<demotionForNonhtml>0.000000</>
|
||||
|
||||
# Demotion factor for content type that is xml. Pages which have an xml
|
||||
# content type will be demoted by this factor as a percent. 0 means no
|
||||
# demotion. Any value between 0 and 1 is safe if demotion for non-html is set
|
||||
# to 0. Otherwise, 0 should probably be used.
|
||||
<demotionForXml>0.950</>
|
||||
<demotionForXml>0.950000</>
|
||||
|
||||
# Demotion factor for pages with fewer other pages from same hostname. Pages
|
||||
# with results from the same host will be demoted by this factor times each
|
||||
# fewer host than the max value given below, divided by the max value.
|
||||
# Generally, a page will not be demoted more than this factor as a percent. 0
|
||||
# means no demotion. A safe range is between 0 and 0.35.
|
||||
<demotionForPagesWithOtherPagesFromSameHostname>0.000</>
|
||||
<demotionForPagesWithOtherPagesFromSameHostname>0.000000</>
|
||||
|
||||
# Max number of pages from same domain. Pages which have this many or more
|
||||
# pages from the same domain will not be demoted.
|
||||
@ -564,13 +567,13 @@
|
||||
# common topics in dmoz as other results, as the number of pages with common
|
||||
# topics in dmoz increases. 0 means no demotion. A safe range is between 0 and
|
||||
# 0.35.
|
||||
<initialDemotionForPagesWithCommonTopicsInDmozAsOtherResults>0.000</>
|
||||
<initialDemotionForPagesWithCommonTopicsInDmozAsOtherResults>0.000000</>
|
||||
|
||||
# Decay factor for pages with common topics in dmoz as other results. The
|
||||
# initial demotion factor will be decayed by this factor as a percent as the
|
||||
# number of common topics increase. 0 means no decay. A safe range is between
|
||||
# 0 and 0.25.
|
||||
<decayForPagesWithCommonTopicsInDmozAsOtherResults>0.000</>
|
||||
<decayForPagesWithCommonTopicsInDmozAsOtherResults>0.000000</>
|
||||
|
||||
# Max number of common topics in dmoz as other results. Pages with a number of
|
||||
# common topics equal to or greater than this value will be demoted to the
|
||||
@ -583,7 +586,7 @@
|
||||
# this factor, divided by the max value. Generally, a page will not be demoted
|
||||
# more than this value as a percent. 0 means no demotion. A safe range is
|
||||
# between 0 and 0.3.
|
||||
<demotionForPagesWhereDmozCategoryNamesContainQueryTermsOrTheirSynonyms>0.000</>
|
||||
<demotionForPagesWhereDmozCategoryNamesContainQueryTermsOrTheirSynonyms>0.000000</>
|
||||
|
||||
# Max number of query terms and their synonyms in a page's dmoz category name.
|
||||
# Pages with a number of query terms or their synonyms in all dmoz category
|
||||
@ -595,7 +598,7 @@
|
||||
# fewer than the max value given below divided by the max value. Generally, a
|
||||
# page will not be demoted more than than this factor as a percent. 0 means no
|
||||
# demotion. A safe range is between 0 and 0.3.
|
||||
<demotionForPagesWhereDmozCategoryNamesContainGigabits>0.000</>
|
||||
<demotionForPagesWhereDmozCategoryNamesContainGigabits>0.000000</>
|
||||
|
||||
# Max number of pages where dmoz category names contain a gigabit. Pages with
|
||||
# a number of gigabits in all dmoz category names greater than or equal to
|
||||
@ -608,7 +611,7 @@
|
||||
# page's date and the max date, divided by the max date. Generally, a page
|
||||
# will not be demoted more than this value as a percent. 0 means no demotion.
|
||||
# A safe range is between 0 and 0.4.
|
||||
<demotionForPagesBasedOnDatedbDate>0.000</>
|
||||
<demotionForPagesBasedOnDatedbDate>0.000000</>
|
||||
|
||||
# Pages with a publish date equal to or earlier than this date will be demoted
|
||||
# to the max (the factor above as a percent). Use this parm in conjunction
|
||||
@ -629,17 +632,17 @@
|
||||
# Demotion factor for proximity of query terms in a document. The closer
|
||||
# together terms occur in a document, the higher it will score.0 means no
|
||||
# demotion.
|
||||
<demotionForPagesBasedOnProximity>0.000</>
|
||||
<demotionForPagesBasedOnProximity>0.000000</>
|
||||
|
||||
# Demotion factor for where the query terms occur in the document. If the
|
||||
# terms only occur in a menu, a link, or a list, the document will be
|
||||
# punished.0 means no demotion.
|
||||
<demotionForPagesBasedOnQueryTermsSection>0.000</>
|
||||
<demotionForPagesBasedOnQueryTermsSection>0.000000</>
|
||||
|
||||
# The proportion that the original score affects its rerank position. A factor
|
||||
# of 1 will maintain the original score, 0 will only use the indexed score to
|
||||
# break ties.
|
||||
<weightOfIndexedScoreOnPqr>1.000</>
|
||||
<weightOfIndexedScoreOnPqr>1.000000</>
|
||||
|
||||
# Max summary score where no more demotion occurs above. Pages with a summary
|
||||
# score greater than or equal to this value will not be demoted.
|
||||
@ -647,15 +650,15 @@
|
||||
|
||||
# Search result which contains the query terms only as a subphrase of a larger
|
||||
# phrase will have its score reduced by this percent.
|
||||
<demotionForQueryBeingExclusivlyInASubphrase>0.000</>
|
||||
<demotionForQueryBeingExclusivlyInASubphrase>0.000000</>
|
||||
|
||||
# Based on the number of inlinks a search results has which are in common with
|
||||
# another search result.
|
||||
<demotionBasedOnCommonInlinks>0.500</>
|
||||
<demotionBasedOnCommonInlinks>0.500000</>
|
||||
|
||||
# Allows more results to be gathered in the case of an index having a high
|
||||
# rate of duplicate results. Generally expressed as 1.2
|
||||
<numberOfDocumentCallsMultiplier>1.200</>
|
||||
<numberOfDocumentCallsMultiplier>1.200000</>
|
||||
|
||||
# Limit number of linksdb inlinks requested per result.
|
||||
<maxRealTimeInlinks>10000</>
|
||||
@ -741,7 +744,7 @@
|
||||
|
||||
# Use this multiplier to fetch more than the required number of reference
|
||||
# pages. fetches N * (this parm) references and displays the top scoring N.
|
||||
<pageFetchMultiplierForReferences>1.500</>
|
||||
<pageFetchMultiplierForReferences>1.500000</>
|
||||
|
||||
# A in A * numLinks + B * quality + C * numLinks/totalLinks.
|
||||
<numberOfLinksCoefficient>0</>
|
||||
@ -765,7 +768,7 @@
|
||||
<maximumAllowedValueForMaxLinkersParameter>5000</>
|
||||
|
||||
# maximum allowed value for additionalTRFetch parameter
|
||||
<maximumAllowedValueForAdditionalTRFetch>10.000</>
|
||||
<maximumAllowedValueForAdditionalTRFetch>10.000000</>
|
||||
|
||||
# number of related pages to generate.
|
||||
<numberOfRelatedPagesToGenerate>0</>
|
||||
@ -837,7 +840,7 @@
|
||||
# results are mostly imported from a large collection they will usually have
|
||||
# higher scores because of having more link texts or whatever, so tone it down
|
||||
# a bit to put it on par with the integrating collection.
|
||||
<importedScoreWeight>0.800</>
|
||||
<importedScoreWeight>0.800000</>
|
||||
|
||||
# The urls of imported search results must be linked to by at least this many
|
||||
# documents in the primary collection.
|
||||
@ -1037,33 +1040,33 @@
|
||||
<spidersEnabled>0</>
|
||||
<spidersEnabled>1</>
|
||||
<spidersEnabled>0</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>1.000</>
|
||||
<filterFrequency>1.000</>
|
||||
<filterFrequency>1.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>7.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>10.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>20.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>0.000</>
|
||||
<filterFrequency>30.000</>
|
||||
<filterFrequency>30.000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>1.000000</>
|
||||
<filterFrequency>1.000000</>
|
||||
<filterFrequency>1.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>7.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>10.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>20.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>0.000000</>
|
||||
<filterFrequency>30.000000</>
|
||||
<filterFrequency>30.000000</>
|
||||
|
||||
# Do not allow more than this many outstanding spiders for all urls in this
|
||||
# priority.
|
||||
|
Loading…
x
Reference in New Issue
Block a user