forked from Mirrors/privacore-open-source-search-engine
Defect #2122: If a crawl and there are no urlCrawlPattern or urlCrawlRegEx values, only return URLs from that domain
This commit is contained in:
@ -875,6 +875,17 @@ void StateCD::printSpiderdbList ( RdbList *list,SafeBuf *sb,char **lastKeyPtr){
|
||||
);
|
||||
// but default to csv
|
||||
else {
|
||||
// If a crawl and there are no urlCrawlPattern or urlCrawlRegEx values, only return URLs from seed domain
|
||||
if (cr
|
||||
&& cr->m_isCustomCrawl == 1
|
||||
&& cr->m_diffbotUrlCrawlPattern.m_length == 0
|
||||
&& cr->m_diffbotUrlProcessPattern.m_length == 0
|
||||
&& expression
|
||||
&& strstr(expression,"!isonsamedomain")) {
|
||||
continue;
|
||||
}
|
||||
fprintf(stderr, "%s", expression);
|
||||
|
||||
sb->safePrintf("\"%s\",\"%s\","
|
||||
, sreq->m_url
|
||||
, as
|
||||
|
Reference in New Issue
Block a user