forked from Mirrors/privacore-open-source-search-engine
added more langs to url filters drop down
This commit is contained in:
149
Collectiondb.cpp
149
Collectiondb.cpp
@ -1948,12 +1948,12 @@ bool CollectionRec::countEvents ( ) {
|
||||
*/
|
||||
|
||||
bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
bool rebuild = false;
|
||||
bool rebuild = true;
|
||||
if ( m_numRegExs == 0 )
|
||||
rebuild = true;
|
||||
// don't touch it if not supposed to as long as we have some already
|
||||
if ( m_urlFiltersProfile != UFP_NONE )
|
||||
rebuild = true;
|
||||
//if ( m_urlFiltersProfile != UFP_NONE )
|
||||
// rebuild = true;
|
||||
// never for custom crawls however
|
||||
if ( m_isCustomCrawl )
|
||||
rebuild = false;
|
||||
@ -1961,12 +1961,47 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
// addDefault = true;
|
||||
if ( ! rebuild ) return true;
|
||||
|
||||
if ( m_urlFiltersProfile == UFP_CHINESE )
|
||||
return rebuildChineseRules();
|
||||
char *s = m_urlFiltersProfile.getBufStart();
|
||||
|
||||
if ( m_urlFiltersProfile == UFP_SHALLOW )
|
||||
if ( !strcmp(s,"shallow" ) )
|
||||
return rebuildShallowRules();
|
||||
|
||||
//if ( strcmp(s,"web") )
|
||||
// just fall through for that
|
||||
|
||||
|
||||
if ( !strcmp(s,"english") )
|
||||
return rebuildLangRules( "en","com,us,gov");
|
||||
|
||||
if ( !strcmp(s,"german") )
|
||||
return rebuildLangRules( "de","de");
|
||||
|
||||
if ( !strcmp(s,"") )
|
||||
return rebuildLangRules( "fr","fr");
|
||||
|
||||
if ( !strcmp(s,"norwegian") )
|
||||
return rebuildLangRules( "nl","nl");
|
||||
|
||||
if ( !strcmp(s,"spanish") )
|
||||
return rebuildLangRules( "es","es");
|
||||
|
||||
//if ( m_urlFiltersProfile == UFP_EURO )
|
||||
// return rebuildLangRules( "de,fr,nl,es,sv,no,it",
|
||||
// "com,gov,org,de,fr,nl,es,sv,no,it");
|
||||
|
||||
|
||||
if ( !strcmp(s,"romantic") )
|
||||
return rebuildLangRules("en,de,fr,nl,es,sv,no,it,fi,pt",
|
||||
|
||||
"de,fr,nl,es,sv,no,it,fi,pt,"
|
||||
|
||||
"com,gov,org"
|
||||
);
|
||||
|
||||
if ( !strcmp(s,"chinese") )
|
||||
return rebuildLangRules( "zh_cn,zh_tw","cn");
|
||||
|
||||
|
||||
long n = 0;
|
||||
|
||||
/*
|
||||
@ -2024,7 +2059,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 1; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 45;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS )
|
||||
if ( ! strcmp(s,"news") )
|
||||
m_spiderFreqs [n] = .00347; // 5 mins
|
||||
n++;
|
||||
|
||||
@ -2035,7 +2070,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 7; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 85;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS )
|
||||
if ( ! strcmp(s,"news") )
|
||||
m_spiderFreqs [n] = .00347; // 5 mins
|
||||
n++;
|
||||
|
||||
@ -2046,7 +2081,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 7; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 50;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS )
|
||||
if ( ! strcmp(s,"news") )
|
||||
m_spiderFreqs [n] = .00347; // 5 mins
|
||||
n++;
|
||||
|
||||
@ -2057,7 +2092,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 7; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 48;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS )
|
||||
if ( ! strcmp(s,"news") )
|
||||
m_spiderFreqs [n] = .00347; // 5 mins
|
||||
n++;
|
||||
|
||||
@ -2068,7 +2103,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 7; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 49;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS )
|
||||
if ( ! strcmp(s,"news") )
|
||||
m_spiderFreqs [n] = .00347; // 5 mins
|
||||
n++;
|
||||
|
||||
@ -2079,7 +2114,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 7; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 47;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS )
|
||||
if ( ! strcmp(s,"news") )
|
||||
m_spiderFreqs [n] = .00347; // 5 mins
|
||||
n++;
|
||||
|
||||
@ -2090,7 +2125,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 7; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 40;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS )
|
||||
if ( ! strcmp(s,"news") )
|
||||
m_spiderFreqs [n] = .04166; // 60 minutes
|
||||
n++;
|
||||
|
||||
@ -2101,7 +2136,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 7; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 39;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS )
|
||||
if ( ! strcmp(s,"news") )
|
||||
m_spiderFreqs [n] = .04166; // 60 minutes
|
||||
n++;
|
||||
|
||||
@ -2113,7 +2148,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 30;
|
||||
// do not harvest links if we are spiderings NEWS
|
||||
if ( m_urlFiltersProfile == UFP_NEWS ) {
|
||||
if ( ! strcmp(s,"news") ) {
|
||||
m_spiderFreqs [n] = 5.0;
|
||||
m_harvestLinks [n] = 0;
|
||||
}
|
||||
@ -2127,7 +2162,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 29;
|
||||
// do not harvest links if we are spiderings NEWS
|
||||
if ( m_urlFiltersProfile == UFP_NEWS ) {
|
||||
if ( ! strcmp(s,"news") ) {
|
||||
m_spiderFreqs [n] = 5.0;
|
||||
m_harvestLinks [n] = 0;
|
||||
}
|
||||
@ -2141,7 +2176,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 20;
|
||||
// turn off spidering if hopcount is too big and we are spiderings NEWS
|
||||
if ( m_urlFiltersProfile == UFP_NEWS ) {
|
||||
if ( ! strcmp(s,"news") ) {
|
||||
m_maxSpidersPerRule [n] = 0;
|
||||
m_harvestLinks [n] = 0;
|
||||
}
|
||||
@ -2157,7 +2192,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 19;
|
||||
// turn off spidering if hopcount is too big and we are spiderings NEWS
|
||||
if ( m_urlFiltersProfile == UFP_NEWS ) {
|
||||
if ( ! strcmp(s,"news") ) {
|
||||
m_maxSpidersPerRule [n] = 0;
|
||||
m_harvestLinks [n] = 0;
|
||||
}
|
||||
@ -2183,7 +2218,7 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
m_spiderIpMaxSpiders [n] = 7; // max spiders per ip
|
||||
m_spiderIpWaits [n] = 1000; // same ip wait
|
||||
m_spiderPriorities [n] = 1;
|
||||
if ( m_urlFiltersProfile == UFP_NEWS ) {
|
||||
if ( ! strcmp(s,"news") ) {
|
||||
m_maxSpidersPerRule [n] = 0;
|
||||
m_harvestLinks [n] = 0;
|
||||
}
|
||||
@ -2212,7 +2247,8 @@ bool CollectionRec::rebuildUrlFilters2 ( ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
bool CollectionRec::rebuildLangRules ( char *langStr , char *tldStr ) {
|
||||
|
||||
long n = 0;
|
||||
|
||||
@ -2271,7 +2307,9 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 85;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==0 && iswww && isnew && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==0 && iswww && isnew && tld==%s",
|
||||
tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 7; // 30 days default
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2280,7 +2318,10 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 50;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==0 && iswww && isnew && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==0 && iswww && isnew && "
|
||||
"parentlang==%s,xx"
|
||||
,langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 7; // 30 days default
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2300,7 +2341,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount==0 && iswww && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==0 && iswww && tld==%s",tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 7.0; // days b4 respider
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2309,7 +2351,9 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 48;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==0 && iswww && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==0 && iswww && parentlang==%s,xx",
|
||||
langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 7.0; // days b4 respider
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2331,7 +2375,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount==0 && isnew && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==0 && isnew && tld==%s",tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 7.0;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2340,7 +2385,9 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 49;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==0 && isnew && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==0 && isnew && parentlang==%s,xx",
|
||||
langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 7.0;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2360,7 +2407,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount==0 && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==0 && tld==%s",tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 10.0;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2369,7 +2417,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 47;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==0 && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==0 && parentlang==%s,xx",langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 10.0;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2390,7 +2439,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount==1 && isnew && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==1 && isnew && tld==%s",tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 20.0;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2399,7 +2449,9 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 40;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==1 && isnew && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==1 && isnew && parentlang==%s,xx",
|
||||
tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 20.0;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2419,7 +2471,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount==1 && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==1 && tld==%s",tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 20.0;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2428,7 +2481,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 39;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==1 && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==1 && parentlang==%s,xx",langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 20.0;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2448,8 +2502,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount==2 && isnew && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==2 && isnew && tld==%s",tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 40;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2458,7 +2512,9 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 30;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==2 && isnew && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==2 && isnew && parentlang==%s,xx",
|
||||
langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 40;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2479,7 +2535,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount==2 && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==2 && tld==%s",tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 40;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2488,7 +2545,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 29;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount==2 && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount==2 && parentlang==%s,xx",langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 40;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2509,7 +2567,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount>=3 && isnew && tld==cn");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount>=3 && isnew && tld==%s",tldStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 60;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2518,7 +2577,9 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 22;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount>=3 && isnew && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount>=3 && isnew && parentlang==%s,xx",
|
||||
langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 60;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2539,7 +2600,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
|
||||
|
||||
|
||||
m_regExs[n].set("hopcount>=3 && tld==cn");
|
||||
m_regExs[n].safePrintf("hopcount>=3 && tld==%s",tldStr);
|
||||
m_regExs[n].reset();
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 60;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -2548,7 +2610,8 @@ bool CollectionRec::rebuildChineseRules ( ) {
|
||||
m_spiderPriorities [n] = 21;
|
||||
n++;
|
||||
|
||||
m_regExs[n].set("hopcount>=3 && parentlang==zh_cn,zh_tw,xx");
|
||||
m_regExs[n].reset();
|
||||
m_regExs[n].safePrintf("hopcount>=3 && parentlang==%s,xx",langStr);
|
||||
m_harvestLinks [n] = 1;
|
||||
m_spiderFreqs [n] = 60;
|
||||
m_maxSpidersPerRule [n] = 9; // max spiders
|
||||
@ -3053,8 +3116,8 @@ void nukeDoledb ( collnum_t collnum );
|
||||
bool CollectionRec::rebuildUrlFilters ( ) {
|
||||
|
||||
if ( ! g_conf.m_doingCommandLine )
|
||||
log("coll: Rebuilding url filters for %s ufp=%li",m_coll,
|
||||
(long)m_urlFiltersProfile);
|
||||
log("coll: Rebuilding url filters for %s ufp=%s",m_coll,
|
||||
m_urlFiltersProfile.getBufStart());
|
||||
|
||||
// if not a custom crawl, and no expressions, add a default one
|
||||
//if ( m_numRegExs == 0 && ! m_isCustomCrawl ) {
|
||||
|
@ -385,7 +385,7 @@ class CollectionRec {
|
||||
// for regular crawls
|
||||
bool rebuildUrlFilters2();
|
||||
|
||||
bool rebuildChineseRules();
|
||||
bool rebuildLangRules( char *lang , char *tld );
|
||||
|
||||
bool rebuildShallowRules();
|
||||
|
||||
@ -730,8 +730,8 @@ class CollectionRec {
|
||||
SafeBuf m_siteListBuf;
|
||||
char m_spiderToo;
|
||||
|
||||
// see UFP_* values in Parms.h. i.e. UFP_NEWS for crawling for NEWS
|
||||
char m_urlFiltersProfile;
|
||||
// can be "web" "english" "romantic" "german" etc.
|
||||
SafeBuf m_urlFiltersProfile;
|
||||
|
||||
// . now the url regular expressions
|
||||
// . we chain down the regular expressions
|
||||
|
57
Parms.cpp
57
Parms.cpp
@ -1601,17 +1601,48 @@ bool printDropDown ( long n , SafeBuf* sb, char *name, long select,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool printDropDownProfile ( SafeBuf* sb, char *name, long select ) {
|
||||
class DropLangs {
|
||||
public:
|
||||
char *m_title;
|
||||
char *m_lang;
|
||||
char *m_tld;
|
||||
};
|
||||
|
||||
DropLangs g_drops[] = {
|
||||
{"custom",NULL,NULL},
|
||||
{"web",NULL,NULL},
|
||||
{"news",NULL,NULL},
|
||||
{"english","en","com,us.gov,org"},
|
||||
{"german","de","de"},
|
||||
{"french","fr","fr"},
|
||||
{"norweigian","nl","nl"},
|
||||
{"spanish","es","es"},
|
||||
{"italian","it","it"},
|
||||
{"romantic","en,de,fr,nl,es,it","com,us.gov,org,de,fr,nl,es,it"}
|
||||
};
|
||||
|
||||
// "url filters profile" values. used to set default crawl rules
|
||||
// in Collectiondb.cpp's CollectionRec::setUrlFiltersToDefaults().
|
||||
// for instance, UFP_NEWS spiders sites more frequently but less deep in
|
||||
// order to get "news" pages and articles
|
||||
bool printDropDownProfile ( SafeBuf* sb, char *name, CollectionRec *cr ) {
|
||||
sb->safePrintf ( "<select name=%s>", name );
|
||||
// the type of url filters profiles
|
||||
char *items[] = {"custom","web","news","chinese","shallow"};
|
||||
char *s;
|
||||
for ( long i = 0 ; i < 5 ; i++ ) {
|
||||
if ( i == select ) s = " selected";
|
||||
else s = "";
|
||||
sb->safePrintf ("<option value=%li%s>%s",i,s,items[i]);
|
||||
//char *items[] = {"custom","web","news","chinese","shallow"};
|
||||
long nd = sizeof(g_drops)/sizeof(DropLangs);
|
||||
for ( long i = 0 ; i < nd ; i++ ) {
|
||||
//if ( i == select ) s = " selected";
|
||||
//else s = "";
|
||||
char *x = cr->m_urlFiltersProfile.getBufStart();
|
||||
char *s;
|
||||
if ( strcmp(g_drops[i].m_title, x) == 0 ) s = " selected";
|
||||
else s = "";
|
||||
sb->safePrintf ("<option value=%s%s>%s",
|
||||
g_drops[i].m_title,
|
||||
s,
|
||||
g_drops[i].m_title );
|
||||
}
|
||||
sb->safePrintf ( "</select>" );
|
||||
sb->safePrintf ( "</select>");
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -2354,9 +2385,11 @@ bool Parms::printParm ( SafeBuf* sb,
|
||||
//else if ( t == TYPE_DIFFBOT_DROPDOWN ) {
|
||||
// char *xx=NULL;*xx=0;
|
||||
//}
|
||||
else if ( t == TYPE_UFP )
|
||||
//else if ( t == TYPE_UFP )
|
||||
else if ( t == TYPE_SAFEBUF &&
|
||||
strcmp(m->m_title,"url filters profile")==0)
|
||||
// url filters profile drop down "ufp"
|
||||
printDropDownProfile ( sb , "ufp" , *s );
|
||||
printDropDownProfile ( sb , "ufp" , cr );//*s );
|
||||
else if ( t == TYPE_RETRIES )
|
||||
printDropDown ( 4 , sb , cgi , *s , false , false );
|
||||
else if ( t == TYPE_FILEUPLOADBUTTON ) {
|
||||
@ -12491,8 +12524,8 @@ void Parms::init ( ) {
|
||||
"to the table will be lost.";
|
||||
m->m_off = (char *)&cr.m_urlFiltersProfile - x;
|
||||
m->m_colspan = 3;
|
||||
m->m_type = TYPE_UFP;// 1 byte dropdown menu
|
||||
m->m_def = "1"; // UFP_WEB
|
||||
m->m_type = TYPE_SAFEBUF;//UFP;// 1 byte dropdown menu
|
||||
m->m_def = "web"; // UFP_WEB
|
||||
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
||||
m->m_page = PAGE_FILTERS;
|
||||
m->m_obj = OBJ_COLL;
|
||||
|
2
Parms.h
2
Parms.h
@ -23,7 +23,7 @@ enum {
|
||||
UFP_NONE = 0 ,
|
||||
UFP_WEB = 1 ,
|
||||
UFP_NEWS = 2 ,
|
||||
UFP_CHINESE = 3,
|
||||
UFP_LANG = 3,
|
||||
UFP_SHALLOW = 4
|
||||
};
|
||||
|
||||
|
Reference in New Issue
Block a user