forked from Mirrors/privacore-open-source-search-engine
Remove more unused variables/settings/functions
This commit is contained in:
@ -1446,11 +1446,6 @@ CollectionRec::CollectionRec() {
|
||||
m_overflow2 = 0x12345678;
|
||||
// the spiders are currently uninhibited i guess
|
||||
m_spiderStatus = SP_INITIALIZING; // this is 0
|
||||
//m_spiderStatusMsg = NULL;
|
||||
// for Url::getSite()
|
||||
m_updateSiteRulesTable = 1;
|
||||
//m_lastUpdateTime = 0LL;
|
||||
m_clickNScrollEnabled = false;
|
||||
// inits for sortbydatetable
|
||||
m_msg5 = NULL;
|
||||
m_importState = NULL;
|
||||
|
@ -357,14 +357,10 @@ class CollectionRec {
|
||||
char m_getLinkInfo ; // turn off to save seeks
|
||||
char m_computeSiteNumInlinks ;
|
||||
char m_indexInlinkNeighborhoods;
|
||||
char m_newAlgo ; // use new links: termlist algo
|
||||
char m_removeBannedPages ;
|
||||
|
||||
char m_dedupURLDefault ;
|
||||
float m_numDocsMultiplier ;
|
||||
int32_t m_percentSimilarSummary ; // Dedup by summary similiarity
|
||||
int32_t m_summDedupNumLines ;
|
||||
int32_t m_contentLenMaxForSummary ;
|
||||
|
||||
int32_t m_maxQueryTerms;
|
||||
|
||||
@ -373,11 +369,7 @@ class CollectionRec {
|
||||
float m_sameLangWeight;
|
||||
|
||||
// Language stuff
|
||||
float m_languageUnknownWeight;
|
||||
float m_languageWeightFactor;
|
||||
char m_enableLanguageSorting;
|
||||
char m_defaultSortLanguage2[6];
|
||||
char m_defaultSortCountry[3];
|
||||
|
||||
// for Spider.cpp
|
||||
int32_t m_updateRoundNum;
|
||||
@ -399,8 +391,6 @@ class CollectionRec {
|
||||
|
||||
char m_dedupResultsByDefault ;
|
||||
char m_doTagdbLookups ;
|
||||
char m_useOldIps ;
|
||||
int32_t m_summaryMode ;
|
||||
char m_deleteTimeouts ; // can delete docs that time out?
|
||||
char m_allowAdultDocs ;
|
||||
char m_useCanonicalRedirects ;
|
||||
@ -559,13 +549,6 @@ class CollectionRec {
|
||||
// how long a robots.txt can be in the cache (Msg13.cpp/Robotdb.cpp)
|
||||
int32_t m_maxRobotsCacheAge;
|
||||
|
||||
// for importing search results from another cluster
|
||||
int32_t m_numResultsToImport ;
|
||||
float m_importWeight;
|
||||
int32_t m_numLinkerWeight;
|
||||
int32_t m_minLinkersPerImportedResult ;
|
||||
char m_importColl [ MAX_COLL_LEN + 1 ];
|
||||
|
||||
// use query expansion for this collection?
|
||||
char m_queryExpansion;
|
||||
|
||||
@ -574,45 +557,6 @@ class CollectionRec {
|
||||
|
||||
char m_hideAllClustered;
|
||||
|
||||
// display indexed date, last modified date, datedb (published) date
|
||||
char m_displayIndexedDate;
|
||||
char m_displayLastModDate;
|
||||
char m_displayPublishDate;
|
||||
|
||||
// data feed parms
|
||||
char m_useDFAcctServer;
|
||||
int32_t m_dfAcctIp;
|
||||
int32_t m_dfAcctPort;
|
||||
|
||||
// enable click 'n' scroll
|
||||
char m_clickNScrollEnabled;
|
||||
|
||||
// post query reranking
|
||||
int32_t m_pqr_docsToScan; // also for # docs for language
|
||||
float m_pqr_demFactCountry; // demotion for foreign countries
|
||||
float m_pqr_demFactPaths; // demotion factor for more paths
|
||||
int32_t m_pqr_maxValPaths; // max value for more paths
|
||||
float m_pqr_demFactPageSize; // demotion factor for higher page sizes
|
||||
int32_t m_pqr_maxValPageSize; // max value for higher page sizes
|
||||
int32_t m_pqr_maxValLoc; // max value for non-location specific queries with location specific results
|
||||
float m_pqr_demFactNonHtml; // demotion factor for non-html content type
|
||||
float m_pqr_demFactXml; // demotion factor for xml content type
|
||||
float m_pqr_demFactOthFromHost; // demotion factor for no other pages from same host
|
||||
int32_t m_pqr_maxValOthFromHost; // max value for no other pages from same host
|
||||
float m_pqr_demFactDatedbDate; // demotion for datedb date
|
||||
int32_t m_pqr_minValDatedbDate; // dates earlier than this will be demoted to the max
|
||||
int32_t m_pqr_maxValDatedbDate; // dates later than this will not be demoted
|
||||
float m_pqr_demFactProximity; // demotion for proximity of query terms
|
||||
int32_t m_pqr_maxValProximity; // max value for proximity of query terms
|
||||
int32_t m_pqr_maxValInSection; // max value for section of query terms
|
||||
float m_pqr_demFactOrigScore;
|
||||
|
||||
float m_pqr_demFactSubPhrase;
|
||||
float m_pqr_demFactCommonInlinks;
|
||||
|
||||
// lookup table for sitedb filter
|
||||
char m_updateSiteRulesTable;
|
||||
|
||||
// special var to prevent Collectiondb.cpp from copying the crap
|
||||
// below here
|
||||
char m_END_COPY;
|
||||
|
4
Conf.cpp
4
Conf.cpp
@ -296,10 +296,6 @@ bool Conf::init ( char *dir ) { // , int32_t hostId ) {
|
||||
log("db: Increase MAX_SHARDS");
|
||||
char *xx = NULL; *xx = 0;
|
||||
}
|
||||
// and always keep a decent site quality cache of at least 3M
|
||||
if ( g_conf.m_siteQualityMaxCacheMem < 3000000 )
|
||||
g_conf.m_siteQualityMaxCacheMem = 3000000;
|
||||
|
||||
|
||||
// HACK: set this now
|
||||
setRootIps();
|
||||
|
63
Conf.h
63
Conf.h
@ -195,86 +195,30 @@ class Conf {
|
||||
// that yields higher performance when dumping/merging on disk
|
||||
bool m_isLive;
|
||||
|
||||
// for holding robot.txt files for various hostnames
|
||||
int32_t m_robotdbMaxCacheMem ;
|
||||
bool m_robotdbSaveCache;
|
||||
|
||||
int32_t m_maxTotalSpiders;
|
||||
|
||||
// indexdb has a max cached age for getting IndexLists (10 mins deflt)
|
||||
int32_t m_indexdbMaxTreeMem ;
|
||||
int32_t m_indexdbMaxCacheMem;
|
||||
//int32_t m_indexdbMaxDiskPageCacheMem; // for DiskPageCache class only
|
||||
int32_t m_indexdbMaxIndexListAge;
|
||||
int32_t m_indexdbTruncationLimit;
|
||||
int32_t m_indexdbMinFilesToMerge;
|
||||
bool m_indexdbSaveCache;
|
||||
|
||||
int32_t m_datedbMaxTreeMem ;
|
||||
int32_t m_datedbMaxCacheMem;
|
||||
//int32_t m_datedbMaxDiskPageCacheMem; // for DiskPageCache class only
|
||||
int32_t m_datedbMaxIndexListAge;
|
||||
int32_t m_datedbTruncationLimit;
|
||||
int32_t m_datedbMinFilesToMerge;
|
||||
bool m_datedbSaveCache;
|
||||
// for caching exact quotas in Msg36.cpp
|
||||
|
||||
// used by qa.cpp and Msg13.cpp
|
||||
//bool m_qaBuildMode;
|
||||
|
||||
//int32_t m_quotaTableMaxMem;
|
||||
|
||||
//bool m_useBuckets;
|
||||
|
||||
// port of the main udp server
|
||||
int16_t m_udpPort;
|
||||
|
||||
// TODO: parse these out!!!!
|
||||
//char m_httpRootDir[256] ;
|
||||
//int16_t m_httpPort ; now in hosts.conf only
|
||||
int32_t m_httpMaxSockets ;
|
||||
int32_t m_httpsMaxSockets ;
|
||||
//int32_t m_httpMaxReadBufSize ;
|
||||
int32_t m_httpMaxSendBufSize ;
|
||||
//int32_t m_httpMaxDownloadSockets ;
|
||||
|
||||
// a search results cache (for Msg40)
|
||||
int32_t m_searchResultsMaxCacheMem ;
|
||||
int32_t m_searchResultsMaxCacheAge ; // in seconds
|
||||
bool m_searchResultsSaveCache;
|
||||
int64_t m_docSummaryWithDescriptionMaxCacheAge; //cache timeout for document summaries for documents with a meta-tag with description, in milliseconds
|
||||
|
||||
// a sitelinkinfo cache (for Msg25)
|
||||
int32_t m_siteLinkInfoMaxCacheMem;
|
||||
int32_t m_siteLinkInfoMaxCacheAge;
|
||||
bool m_siteLinkInfoSaveCache;
|
||||
|
||||
// a sitelinkinfo cache (for MsgD)
|
||||
int32_t m_siteQualityMaxCacheMem;
|
||||
int32_t m_siteQualityMaxCacheAge;
|
||||
bool m_siteQualitySaveCache;
|
||||
|
||||
// a sitelinkinfo cache (for Msg25)
|
||||
|
||||
// for downloading an rdb
|
||||
//int32_t m_downloadBufSize; // how big should hosts read buf be?
|
||||
|
||||
// . how many incoming links should we sample?
|
||||
// . used for linkText and quality weighting from number of links
|
||||
// and their total base quality
|
||||
int32_t m_maxIncomingLinksToSample;
|
||||
|
||||
// phrase weighting
|
||||
float m_queryPhraseWeight;
|
||||
|
||||
// for Weights.cpp
|
||||
int32_t m_sliderParm;
|
||||
|
||||
//int32_t m_indexTableIntersectionAlgo;
|
||||
// . maxmimum relative weight of a query term (1.0 to inf)
|
||||
// . default about 8?
|
||||
//float m_queryMaxMultiplier;
|
||||
|
||||
// use sendmail to forward emails we send out
|
||||
char m_sendmailIp[MAX_MX_LEN];
|
||||
|
||||
@ -286,14 +230,7 @@ class Conf {
|
||||
char m_delayEmailsAfter[6];
|
||||
//delay emails before
|
||||
char m_delayEmailsBefore[6];
|
||||
//bool m_sendEmailAlertsToMattTmobile;
|
||||
//bool m_sendEmailAlertsToMattAlltell;
|
||||
//bool m_sendEmailAlertsToJavier;
|
||||
//bool m_sendEmailAlertsToMelissa;
|
||||
//bool m_sendEmailAlertsToPartap;
|
||||
//bool m_sendEmailAlertsToCinco;
|
||||
bool m_sendEmailAlertsToSysadmin;
|
||||
//bool m_sendEmailAlertsToZak;
|
||||
|
||||
bool m_sendEmailAlertsToEmail1;
|
||||
char m_email1MX[MAX_MX_LEN];
|
||||
|
20
Msg39.cpp
20
Msg39.cpp
@ -520,15 +520,7 @@ bool Msg39::getLists () {
|
||||
if ( qt->m_rightPhraseTerm &&
|
||||
qt->m_rightPhraseTerm->m_isWikiHalfStopBigram )
|
||||
rightwikibigram = 1;
|
||||
/*
|
||||
char c = m_tmpq.getTermSign(i);
|
||||
char tt[512];
|
||||
int32_t ttlen = m_tmpq.getTermLen(i);
|
||||
if ( ttlen > 254 ) ttlen = 254;
|
||||
if ( ttlen < 0 ) ttlen = 0;
|
||||
// old:painful: convert each term from unicode to ascii
|
||||
gbmemcpy ( tt , m_tmpq.getTerm(i) , ttlen );
|
||||
*/
|
||||
|
||||
int32_t isSynonym = 0;
|
||||
QueryTerm *st = qt->m_synonymOf;
|
||||
if ( st ) isSynonym = true;
|
||||
@ -655,16 +647,6 @@ bool Msg39::getLists () {
|
||||
return false;
|
||||
}
|
||||
|
||||
// error?
|
||||
//if ( g_errno ) {
|
||||
// log("msg39: Had error getting termlists2: %s.",
|
||||
// mstrerror(g_errno));
|
||||
// // don't bail out here because we are in docIdSplitLoop()
|
||||
// //sendReply (m_slot,this,NULL,0,0,true);
|
||||
// return true;
|
||||
//}
|
||||
|
||||
//return gotLists ( true );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
12
Msg40.cpp
12
Msg40.cpp
@ -767,16 +767,6 @@ bool Msg40::reallocMsg20Buf ( ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void didTaskWrapper ( void* state ) {
|
||||
Msg40 *THIS = (Msg40 *) state;
|
||||
// one less task
|
||||
THIS->m_tasksRemaining--;
|
||||
// this returns false if blocked
|
||||
if ( ! THIS->launchMsg20s ( false ) ) return;
|
||||
// we are done, call the callback
|
||||
THIS->m_callback ( THIS->m_state );
|
||||
}
|
||||
|
||||
bool Msg40::launchMsg20s ( bool recalled ) {
|
||||
|
||||
// don't launch any more if client browser closed socket
|
||||
@ -956,8 +946,6 @@ bool Msg40::launchMsg20s ( bool recalled ) {
|
||||
// let "ns" parm override
|
||||
req.m_numSummaryLines = m_si->m_numLinesInSummary;
|
||||
|
||||
if ( m_si->m_pqr_demFactCommonInlinks > 0.0 )
|
||||
req.m_getLinkInfo = true;
|
||||
// . buzz likes to do the &inlinks=1 parm to get inlinks
|
||||
// . use "&inlinks=1" for realtime inlink info, use
|
||||
// "&inlinks=2" to just get it from the title rec, which is
|
||||
|
5
Msg40.h
5
Msg40.h
@ -2,8 +2,8 @@
|
||||
|
||||
// . gets the title/summary/docLen/url results from a query
|
||||
|
||||
#ifndef _MSG40_H_
|
||||
#define _MSG40_H_
|
||||
#ifndef MSG40_H
|
||||
#define MSG40_H
|
||||
|
||||
#define SAMPLE_VECTOR_SIZE (32*4)
|
||||
|
||||
@ -75,7 +75,6 @@ public:
|
||||
int32_t getFirstResultNum ( ) { return m_si->m_firstResultNum; }
|
||||
|
||||
int32_t getNumResults ( ){return m_msg3a.getNumDocIds(); }
|
||||
int32_t getNumDocIds ( ){return m_msg3a.getNumDocIds(); }
|
||||
|
||||
char getClusterLevel(int32_t i){return m_msg3a.getClusterLevels()[i];}
|
||||
|
||||
|
@ -1338,10 +1338,6 @@ bool printSearchResultsHeader ( State0 *st ) {
|
||||
"kick in.");
|
||||
}
|
||||
else if ( moreFollow && si->m_format == FORMAT_HTML ) {
|
||||
if ( isAdmin && si->m_docsToScanForReranking > 1 ) {
|
||||
sb->safePrintf ( "PQR'd " );
|
||||
}
|
||||
|
||||
sb->safePrintf ("Results <b>%"INT32"</b> to <b>%"INT32"</b> of "
|
||||
"exactly <b>%s</b> from an index "
|
||||
"of about %s pages" ,
|
||||
@ -1353,8 +1349,6 @@ bool printSearchResultsHeader ( State0 *st ) {
|
||||
}
|
||||
// otherwise, we didn't get enough results to show this page
|
||||
else if ( si->m_format == FORMAT_HTML ) {
|
||||
if ( isAdmin && si->m_docsToScanForReranking > 1 )
|
||||
sb->safePrintf ( "PQR'd " );
|
||||
sb->safePrintf ("Results <b>%"INT32"</b> to <b>%"INT32"</b> of "
|
||||
"exactly <b>%s</b> from an index "
|
||||
"of about %s pages" ,
|
||||
@ -3021,26 +3015,18 @@ bool printResult ( State0 *st, int32_t ix , int32_t *numPrintedSoFar ) {
|
||||
char *coll = "UNKNOWN";
|
||||
if ( scr ) coll = scr->m_coll;
|
||||
|
||||
if ( si->m_format == FORMAT_HTML ) {
|
||||
if ( printCached && cr->m_clickNScrollEnabled )
|
||||
sb->safePrintf ( " - <a href=/scroll.html?page="
|
||||
"get?"
|
||||
"q=%s&c=%s&d=%"INT64">"
|
||||
"cached</a>\n",
|
||||
st->m_qesb.getBufStart() , coll ,
|
||||
mr->m_docId );
|
||||
else if ( printCached )
|
||||
sb->safePrintf ( "<a href=\""
|
||||
"/get?"
|
||||
"q=%s&"
|
||||
"qlang=%s&"
|
||||
"c=%s&d=%"INT64"&cnsp=0\">"
|
||||
"cached</a>\n",
|
||||
st->m_qesb.getBufStart() ,
|
||||
// "qlang" parm
|
||||
si->m_defaultSortLang,
|
||||
coll ,
|
||||
mr->m_docId );
|
||||
if ( si->m_format == FORMAT_HTML && printCached ) {
|
||||
sb->safePrintf ( "<a href=\""
|
||||
"/get?"
|
||||
"q=%s&"
|
||||
"qlang=%s&"
|
||||
"c=%s&d=%"INT64"&cnsp=0\">"
|
||||
"cached</a>\n",
|
||||
st->m_qesb.getBufStart() ,
|
||||
// "qlang" parm
|
||||
si->m_defaultSortLang,
|
||||
coll ,
|
||||
mr->m_docId );
|
||||
}
|
||||
|
||||
// unhide the divs on click
|
||||
|
821
Parms.cpp
821
Parms.cpp
@ -3658,27 +3658,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "robotdb max cache mem";
|
||||
m->m_desc = "Robotdb caches robot.txt files.";
|
||||
m->m_off = (char *)&g_conf.m_robotdbMaxCacheMem - g;
|
||||
m->m_def = "128000";
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "robotdb save cache";
|
||||
m->m_cgi = "rdbsc";
|
||||
m->m_desc = "";
|
||||
m->m_off = (char *)&g_conf.m_robotdbSaveCache - g;
|
||||
m->m_def = "0";
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_flags = PF_NOAPI;
|
||||
m++;
|
||||
|
||||
m->m_title = "statsdb max tree mem";
|
||||
m->m_desc = "";
|
||||
m->m_off = (char *)&g_conf.m_statsdbMaxTreeMem - g;
|
||||
@ -4190,21 +4169,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "fast results";
|
||||
m->m_desc = "Use &fast=1 to obtain seach results from the much "
|
||||
"faster Gigablast index, although the results are not "
|
||||
"searched as thoroughly.";
|
||||
m->m_obj = OBJ_SI;
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m->m_off = (char *)&si.m_query - y;
|
||||
m->m_type = TYPE_CHARPTR;//STRING;
|
||||
m->m_def = "0";
|
||||
m->m_cgi = "fast";
|
||||
m->m_flags = PF_COOKIE | PF_WIDGET_PARM | PF_API;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "query";
|
||||
m->m_desc = "The query to perform. See <a href=/help.html>help</a>. "
|
||||
"See the <a href=#qops>query operators</a> below for "
|
||||
@ -4488,22 +4452,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "use language weights";
|
||||
m->m_desc = "Use Language weights to sort query results. "
|
||||
"This will give results that match the specified &qlang "
|
||||
"higher ranking.";
|
||||
m->m_cgi = "lsort";
|
||||
m->m_off = (char *)&cr.m_enableLanguageSorting - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_group = 1;
|
||||
m->m_smin = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "sort language preference";
|
||||
m->m_desc = "Default language to use for ranking results. "
|
||||
//"This should only be used on limited collections. "
|
||||
@ -4538,393 +4486,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_SI;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "sort country preference";
|
||||
m->m_desc = "Default country to use for ranking results. "
|
||||
//"This should only be used on limited collections. "
|
||||
"Value should be any country code abbreviation, for example "
|
||||
"\"us\" for United States. This is currently not working.";
|
||||
m->m_cgi = "qcountry";
|
||||
m->m_off = (char *)&si.m_defaultSortCountry - y;
|
||||
m->m_type = TYPE_CHARPTR;
|
||||
m->m_size = 2+1;
|
||||
m->m_def = "us";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_API;
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m->m_obj = OBJ_SI;
|
||||
m->m_flags = PF_NOAPI;
|
||||
m++;
|
||||
|
||||
m->m_title = "docs to check for post query";
|
||||
m->m_desc = "How many search results should we "
|
||||
"scan for post query demotion? "
|
||||
"0 disables all post query reranking. ";
|
||||
m->m_cgi = "pqrds";
|
||||
m->m_off = (char *)&si.m_docsToScanForReranking - y;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "0";
|
||||
m->m_group = 1;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m->m_obj = OBJ_SI;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for foreign languages";
|
||||
m->m_desc = "Demotion factor of non-relevant languages. Score "
|
||||
"will be penalized by this factor as a percent if "
|
||||
"it's language is foreign. "
|
||||
"A safe value is probably anywhere from 0.5 to 1. ";
|
||||
m->m_cgi = "pqrlang";
|
||||
m->m_off = (char *)&cr.m_languageWeightFactor - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0.999";
|
||||
m->m_group = 0;
|
||||
m->m_smin = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for unknown languages";
|
||||
m->m_desc = "Demotion factor for unknown languages. "
|
||||
"Page's score will be penalized by this factor as a percent "
|
||||
"if it's language is not known. "
|
||||
"A safe value is 0, as these pages will be reranked by "
|
||||
"country (see below). "
|
||||
"0 means no demotion.";
|
||||
m->m_cgi = "pqrlangunk";
|
||||
m->m_off = (char *)&cr.m_languageUnknownWeight- x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0.0";
|
||||
m->m_group = 0;
|
||||
m->m_smin = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for pages where the country of the page writes "
|
||||
"in the same language as the country of the query";
|
||||
m->m_desc = "Demotion for pages where the country of the page writes "
|
||||
"in the same language as the country of the query. "
|
||||
"If query language is the same as the language of the page, "
|
||||
"then if a language written in the country of the page matches "
|
||||
"a language written by the country of the query, then page's "
|
||||
"score will be demoted by this factor as a percent. "
|
||||
"A safe range is between 0.5 and 1. ";
|
||||
m->m_cgi = "pqrcntry";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactCountry - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0.98";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for pages that are not "
|
||||
"root or have many paths in the url";
|
||||
m->m_desc = "Demotion factor each path in the url. "
|
||||
"Score will be demoted by this factor as a percent "
|
||||
"multiplied by the number of paths in the url divided "
|
||||
"by the max value below. "
|
||||
"Generally, the page will not be demoted more than this "
|
||||
"value as a percent. "
|
||||
"0 means no demotion. "
|
||||
"A safe range is from 0 to 0.75. ";
|
||||
m->m_cgi = "pqrpaths";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactPaths - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "max value for pages that have many paths in the url";
|
||||
m->m_desc = "Max number of paths in a url. "
|
||||
"This should be set to a value representing a very high "
|
||||
"number of paths for a url. Lower values increase the "
|
||||
"difference between how much each additional path demotes. ";
|
||||
m->m_cgi = "pqrpathsm";
|
||||
m->m_off = (char *)&cr.m_pqr_maxValPaths - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "16";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for larger pages";
|
||||
m->m_desc = "Demotion factor for larger pages. "
|
||||
"Page will be penalized by its size times this factor "
|
||||
"divided by the max page size below. "
|
||||
"Generally, a page will not be demoted more than this "
|
||||
"factor as a percent. "
|
||||
"0 means no demotion. "
|
||||
"A safe range is between 0 and 0.25. ";
|
||||
m->m_cgi = "pqrpgsz";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactPageSize - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "max value for larger pages";
|
||||
m->m_desc = "Max page size. "
|
||||
"Pages with a size greater than or equal to this will be "
|
||||
"demoted by the max amount (the factor above as a percent). ";
|
||||
m->m_cgi = "pqrpgszm";
|
||||
m->m_off = (char *)&cr.m_pqr_maxValPageSize - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "524288";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "max value for non-location specific queries "
|
||||
"with location specific results";
|
||||
m->m_desc = "Max place population. "
|
||||
"Places with a population greater than or equal to this "
|
||||
"will be demoted to the maximum amount given by the "
|
||||
"factor above as a percent. ";
|
||||
m->m_cgi = "pqrlocm";
|
||||
m->m_off = (char *)&cr.m_pqr_maxValLoc - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
// charlottesville was getting missed when this was 1M
|
||||
m->m_def = "100000";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for non-html";
|
||||
m->m_desc = "Demotion factor for content type that is non-html. "
|
||||
"Pages which do not have an html content type will be "
|
||||
"demoted by this factor as a percent. "
|
||||
"0 means no demotion. "
|
||||
"A safe range is between 0 and 0.35. ";
|
||||
m->m_cgi = "pqrhtml";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactNonHtml - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for xml";
|
||||
m->m_desc = "Demotion factor for content type that is xml. "
|
||||
"Pages which have an xml content type will be "
|
||||
"demoted by this factor as a percent. "
|
||||
"0 means no demotion. "
|
||||
"Any value between 0 and 1 is safe if demotion for non-html "
|
||||
"is set to 0. Otherwise, 0 should probably be used. ";
|
||||
m->m_cgi = "pqrxml";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactXml - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0.95";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for pages with other pages from same "
|
||||
"hostname";
|
||||
m->m_desc = "Demotion factor for pages with fewer other pages from "
|
||||
"same hostname. "
|
||||
"Pages with results from the same host will be "
|
||||
"demoted by this factor times each fewer host than the max "
|
||||
"value given below, divided by the max value. "
|
||||
"Generally, a page will not be demoted more than this "
|
||||
"factor as a percent. "
|
||||
"0 means no demotion. "
|
||||
"A safe range is between 0 and 0.35. ";
|
||||
m->m_cgi = "pqrfsd";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactOthFromHost - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "max value for pages with other pages from same "
|
||||
"domain";
|
||||
m->m_desc = "Max number of pages from same domain. "
|
||||
"Pages which have this many or more pages from the same "
|
||||
"domain will not be demoted. ";
|
||||
m->m_cgi = "pqrfsdm";
|
||||
m->m_off = (char *)&cr.m_pqr_maxValOthFromHost - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "12";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for pages based on datedb date";
|
||||
m->m_desc = "Demotion factor for pages based on datedb date. "
|
||||
"Pages will be penalized for being published earlier than the "
|
||||
"max date given below. "
|
||||
"The older the page, the more it will be penalized based on "
|
||||
"the time difference between the page's date and the max date, "
|
||||
"divided by the max date. "
|
||||
"Generally, a page will not be demoted more than this "
|
||||
"value as a percent. "
|
||||
"0 means no demotion. "
|
||||
"A safe range is between 0 and 0.4. ";
|
||||
m->m_cgi = "pqrdate";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactDatedbDate - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "min value for demotion based on datedb date ";
|
||||
m->m_desc = "Pages with a publish date equal to or earlier than "
|
||||
"this date will be demoted to the max (the factor above as "
|
||||
"a percent). "
|
||||
"Use this parm in conjunction with the max value below "
|
||||
"to specify the range of dates where demotion occurs. "
|
||||
"If you set this parm near the estimated earliest publish "
|
||||
"date that occurs somewhat frequently, this method can better "
|
||||
"control the additional demotion per publish day. "
|
||||
"This number is given as seconds since the epoch, January 1st, "
|
||||
"1970 divided by 1000. "
|
||||
"0 means use the epoch. ";
|
||||
m->m_cgi = "pqrdatei";
|
||||
m->m_off = (char *)&cr.m_pqr_minValDatedbDate - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "631177"; // Jan 01, 1990
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "max value for demotion based on datedb date ";
|
||||
m->m_desc = "Pages with a publish date greater than or equal to "
|
||||
"this value divided by 1000 will not be demoted. "
|
||||
"Use this parm in conjunction with the min value above "
|
||||
"to specify the range of dates where demotion occurs. "
|
||||
"This number is given as seconds before the current date "
|
||||
"and time taken from the system clock divided by 1000. "
|
||||
"0 means use the current time of the current day. ";
|
||||
m->m_cgi = "pqrdatem";
|
||||
m->m_off = (char *)&cr.m_pqr_maxValDatedbDate - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion for pages based on proximity";
|
||||
m->m_desc = "Demotion factor for proximity of query terms in "
|
||||
"a document. The closer together terms occur in a "
|
||||
"document, the higher it will score."
|
||||
"0 means no demotion. ";
|
||||
m->m_cgi = "pqrprox";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactProximity - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "weight of indexed score on pqr";
|
||||
m->m_desc = "The proportion that the original score affects "
|
||||
"its rerank position. A factor of 1 will maintain "
|
||||
"the original score, 0 will only use the indexed "
|
||||
"score to break ties.";
|
||||
m->m_cgi = "pqrorig";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactOrigScore - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "1";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
|
||||
|
||||
m->m_title = "max value for demotion for pages based on proximity";
|
||||
m->m_desc = "Max summary score where no more demotion occurs above. "
|
||||
"Pages with a summary score greater than or equal to this "
|
||||
"value will not be demoted. ";
|
||||
m->m_cgi = "pqrproxm";
|
||||
m->m_off = (char *)&cr.m_pqr_maxValProximity - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "100000";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "demotion for query being exclusivly in a subphrase";
|
||||
m->m_desc = "Search result which contains the query terms only"
|
||||
" as a subphrase of a larger phrase will have its score "
|
||||
" reduced by this percent.";
|
||||
m->m_cgi = "pqrspd";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactSubPhrase - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "demotion based on common inlinks";
|
||||
m->m_desc = "Based on the number of inlinks a search results has "
|
||||
"which are in common with another search result.";
|
||||
m->m_cgi = "pqrcid";
|
||||
m->m_off = (char *)&cr.m_pqr_demFactCommonInlinks - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = ".5";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "number of document calls multiplier";
|
||||
m->m_desc = "Allows more results to be gathered in the case of "
|
||||
"an index having a high rate of duplicate results. Generally"
|
||||
" expressed as 1.2";
|
||||
m->m_cgi = "ndm";
|
||||
m->m_off = (char *)&cr.m_numDocsMultiplier - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = "1.2";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "max query terms";
|
||||
m->m_desc = "Do not allow more than this many query terms. Helps "
|
||||
"prevent big queries from resource hogging.";
|
||||
@ -4938,81 +4499,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
// import search results controls
|
||||
m->m_title = "how many imported results should we insert";
|
||||
m->m_desc = "Gigablast will import X search results from the "
|
||||
"external cluster given by hosts2.conf and merge those "
|
||||
"search results into the current set of search results. "
|
||||
"Set to 0 to disable.";
|
||||
m->m_cgi = "imp";
|
||||
m->m_off = (char *)&cr.m_numResultsToImport - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "imported score weight";
|
||||
m->m_desc = "The score of all imported results will be multiplied "
|
||||
"by this number. Since results are mostly imported from "
|
||||
"a large collection they will usually have higher scores "
|
||||
"because of having more link texts or whatever, so tone it "
|
||||
"down a bit to put it on par with the integrating collection.";
|
||||
m->m_cgi = "impw";
|
||||
m->m_off = (char *)&cr.m_importWeight - x;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
m->m_def = ".80";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "how many linkers must each imported result have";
|
||||
m->m_desc = "The urls of imported search results must be linked to "
|
||||
"by at least this many documents in the primary collection.";
|
||||
m->m_cgi = "impl";
|
||||
m->m_off = (char *)&cr.m_minLinkersPerImportedResult - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "3";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "num linkers weight";
|
||||
m->m_desc = "The number of linkers an imported result has from "
|
||||
"the base collection is multiplied by this weight and then "
|
||||
"added to the final score. The higher this is the more an "
|
||||
"imported result with a lot of linkers will be boosted. "
|
||||
"Currently, 100 is the max number of linkers permitted.";
|
||||
m->m_cgi = "impnlw";
|
||||
m->m_off = (char *)&cr.m_numLinkerWeight - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "50";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "the name of the collection to import from";
|
||||
m->m_desc = "Gigablast will import X search results from this "
|
||||
"external collection and merge them into the current search "
|
||||
"results.";
|
||||
m->m_cgi = "impc";
|
||||
m->m_off = (char *)&cr.m_importColl - x;
|
||||
m->m_type = TYPE_STRING;
|
||||
m->m_size = MAX_COLL_LEN;
|
||||
m->m_def = "main";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "Minimum number of in linkers required to consider getting"
|
||||
" the title from in linkers";
|
||||
m->m_desc = "Minimum number of in linkers required to consider getting"
|
||||
@ -5038,22 +4524,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "summary mode";
|
||||
m->m_desc = "0 = old compatibility mode, 1 = UTF-8 mode, "
|
||||
"2 = fast ASCII mode, "
|
||||
"3 = Ascii Proximity Summary, "
|
||||
"4 = Utf8 Proximity Summary, "
|
||||
"5 = Ascii Pre Proximity Summary, "
|
||||
"6 = Utf8 Pre Proximity Summary:";
|
||||
m->m_cgi = "smd";
|
||||
m->m_off = (char *)&cr.m_summaryMode - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "number of summary excerpts";
|
||||
m->m_desc = "How many summary excerpts to display per search result?";
|
||||
m->m_cgi = "ns";
|
||||
@ -5624,32 +5094,6 @@ void Parms::init ( ) {
|
||||
m->m_flags = PF_NOAPI;
|
||||
m++;
|
||||
|
||||
// buzz
|
||||
m->m_title = "spider results";
|
||||
m->m_desc = "Results of this query will be forced into the spider "
|
||||
"queue for reindexing.";
|
||||
m->m_off = (char *)&si.m_spiderResults - y;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_cgi = "spiderresults";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m->m_obj = OBJ_SI;
|
||||
m++;
|
||||
|
||||
// buzz
|
||||
m->m_title = "spider result roots";
|
||||
m->m_desc = "Root urls of the results of this query will be forced "
|
||||
"into the spider queue for reindexing.";
|
||||
m->m_off = (char *)&si.m_spiderResultRoots - y;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_cgi = "spiderresultroots";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_RESULTS;
|
||||
m->m_obj = OBJ_SI;
|
||||
m++;
|
||||
|
||||
m->m_title = "include cached copy of page";
|
||||
m->m_desc = "Will cause a cached copy of content to be returned "
|
||||
"instead of summary.";
|
||||
@ -7634,20 +7078,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "phrase weight";
|
||||
m->m_desc = "Percent to weight phrases in queries.";
|
||||
m->m_cgi = "qp";
|
||||
m->m_off = (char *)&g_conf.m_queryPhraseWeight - g;
|
||||
m->m_type = TYPE_FLOAT;
|
||||
// was 350, but 'new mexico tourism' and 'boots uk'
|
||||
// emphasized the phrase terms too much!!
|
||||
m->m_def = "100";
|
||||
m->m_units = "%%";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "weights.cpp slider parm (tmp)";
|
||||
m->m_desc = "Percent of how much to use words to phrase ratio weights.";
|
||||
m->m_cgi = "wsp";
|
||||
@ -8715,21 +8145,6 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "dedup URLs by default";
|
||||
m->m_desc = "Should we dedup URLs with case insensitivity? This is "
|
||||
"mainly to correct duplicate wiki pages.";
|
||||
m->m_cgi = "ddu";
|
||||
m->m_off = (char *)&cr.m_dedupURLDefault - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_API | PF_CLONE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "sort language preference default";
|
||||
m->m_desc = "Default language to use for ranking results. "
|
||||
@ -8749,39 +8164,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "sort country preference default";
|
||||
m->m_desc = "Default country to use for ranking results. "
|
||||
//"This should only be used on limited collections. "
|
||||
"Value should be any country code abbreviation, for example "
|
||||
"\"us\" for United States. This is currently not working.";
|
||||
m->m_cgi = "qcountry";
|
||||
m->m_off = (char *)&cr.m_defaultSortCountry - x;
|
||||
m->m_type = TYPE_STRING;
|
||||
m->m_size = 2+1;
|
||||
m->m_def = "us";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_API | PF_CLONE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
// for post query reranking
|
||||
m->m_title = "docs to check for post query demotion by default";
|
||||
m->m_desc = "How many search results should we "
|
||||
"scan for post query demotion? "
|
||||
"0 disables all post query reranking. ";
|
||||
m->m_cgi = "pqrds";
|
||||
m->m_off = (char *)&cr.m_pqr_docsToScan - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "0";
|
||||
m->m_group = 1;
|
||||
//m->m_scgi = "pqrds";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "max summary len";
|
||||
m->m_desc = "What is the maximum number of "
|
||||
"characters displayed in a summary for a search result?";
|
||||
@ -8837,19 +8219,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "bytes of doc to scan for summary generation";
|
||||
m->m_desc = "Truncating this will miss out on good summaries, but "
|
||||
"performance will increase.";
|
||||
m->m_cgi = "clmfs";
|
||||
m->m_off = (char *)&cr.m_contentLenMaxForSummary - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "70000";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_API | PF_CLONE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "front highlight tag";
|
||||
m->m_desc = "Front html tag used for highlightig query terms in the "
|
||||
"summaries displated in the search results.";
|
||||
@ -8878,90 +8247,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "display indexed date";
|
||||
m->m_desc = "Display the indexed date along with results.";
|
||||
m->m_cgi = "didt";
|
||||
m->m_off = (char *)&cr.m_displayIndexedDate - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "display last modified date";
|
||||
m->m_desc = "Display the last modified date along with results.";
|
||||
m->m_cgi = "dlmdt";
|
||||
m->m_off = (char *)&cr.m_displayLastModDate - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "display published date";
|
||||
m->m_desc = "Display the published date along with results.";
|
||||
m->m_cgi = "dipt";
|
||||
m->m_off = (char *)&cr.m_displayPublishDate - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "enable click 'n' scroll";
|
||||
m->m_desc = "The [cached] link on results pages loads click n "
|
||||
"scroll.";
|
||||
m->m_cgi = "ecns";
|
||||
m->m_off = (char *)&cr.m_clickNScrollEnabled - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "use data feed account server";
|
||||
m->m_desc = "Enable/disable the use of a remote account verification "
|
||||
"for Data Feed Customers.";
|
||||
m->m_cgi = "dfuas";
|
||||
m->m_off = (char *)&cr.m_useDFAcctServer - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "data feed server ip";
|
||||
m->m_desc = "The ip address of the Gigablast data feed server to "
|
||||
"retrieve customer account information from.";
|
||||
m->m_cgi = "dfip";
|
||||
m->m_off = (char *)&cr.m_dfAcctIp - x;
|
||||
m->m_type = TYPE_IP;
|
||||
m->m_def = "2130706433";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "data feed server port";
|
||||
m->m_desc = "The port of the Gigablast data feed server to retrieve "
|
||||
"customer account information from.";
|
||||
m->m_cgi = "dfport";
|
||||
m->m_off = (char *)&cr.m_dfAcctPort - x;
|
||||
m->m_type = TYPE_LONG;
|
||||
m->m_def = "8040";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "home page";
|
||||
static SafeBuf s_tmpBuf;
|
||||
s_tmpBuf.setLabel("stmpb1");
|
||||
@ -8999,18 +8284,6 @@ void Parms::init ( ) {
|
||||
"<br><br>"
|
||||
"<b>My Search Engine</b>"
|
||||
"<br><br>"
|
||||
// "<br><br><br>"
|
||||
// "<b>web</b> "
|
||||
// " "
|
||||
// "<a href=\"/Top\">directory</a> "
|
||||
// " "
|
||||
// "<a href=/adv.html>advanced search</a> "
|
||||
// " "
|
||||
// "<a href=/addurl "
|
||||
// "title=\"Instantly add your url to "
|
||||
//"the index\">"
|
||||
// "add url</a>"
|
||||
// "<br><br>"
|
||||
"<form method=get action=/search name=f>"
|
||||
"<input type=hidden name=c value=\"%c\">"
|
||||
"<input name=q type=text size=60 value=\"\">"
|
||||
@ -9029,9 +8302,7 @@ void Parms::init ( ) {
|
||||
m->m_xml = "homePageHtml";
|
||||
m->m_cgi = "hp";
|
||||
m->m_off = (char *)&cr.m_htmlRoot - x;
|
||||
//m->m_plen = (char *)&cr.m_htmlRootLen - x; // length of string
|
||||
m->m_type = TYPE_SAFEBUF;//STRINGBOX;
|
||||
//m->m_size = MAX_HTML_LEN + 1;
|
||||
m->m_def = "";
|
||||
m->m_page = PAGE_SEARCH;
|
||||
m->m_obj = OBJ_COLL;
|
||||
@ -9050,11 +8321,6 @@ void Parms::init ( ) {
|
||||
"%q, to represent the query to display in a "
|
||||
"text box. "
|
||||
"Use %e to print the url encoded query. "
|
||||
//"Use %e to print the page encoding. "
|
||||
// i guess this is out for now
|
||||
//"Use %D to "
|
||||
//"print a drop down "
|
||||
//"menu for the number of search results to return. "
|
||||
"Use %S "
|
||||
"to print sort by date or relevance link. Use "
|
||||
"%L to "
|
||||
@ -9069,22 +8335,6 @@ void Parms::init ( ) {
|
||||
"not duplicate them in the html tail. "
|
||||
"Use %f to display "
|
||||
"the family filter radio buttons. "
|
||||
// take this out for now
|
||||
//"Directory: Use %s to display the directory "
|
||||
//"search type options. "
|
||||
//"Use %l to specify the "
|
||||
//"location of "
|
||||
//"dir=rtl in the body tag for RTL pages. "
|
||||
//"Use %where and %when to substitute the where "
|
||||
//"and when of "
|
||||
//"the query. "
|
||||
//"These values may be set based on the cookie "
|
||||
//"if "
|
||||
//"none was explicitly given. "
|
||||
//"IMPORTANT: In the xml configuration file, "
|
||||
//"this html "
|
||||
//"must be encoded (less thans mapped to <, "
|
||||
//"etc.).";
|
||||
"Example to paste into textbox: <br><i>";
|
||||
s_tmpBuf2.safeStrcpy(fff);
|
||||
s_tmpBuf2.htmlEncode(
|
||||
@ -9097,23 +8347,12 @@ void Parms::init ( ) {
|
||||
"content=\"text/html; charset=utf-8\">\n"
|
||||
"</head>\n"
|
||||
"<body%l>\n"
|
||||
|
||||
//"<form method=\"get\" action=\"/search\" name=\"f\">\n"
|
||||
// . %F prints the <form method=...> tag
|
||||
// . method will be GET or POST depending on the size of the
|
||||
// input data. MSIE can't handle sending large GETs requests
|
||||
// that are more than like 1k or so, which happens a lot with
|
||||
// our CTS technology (the sites= cgi parm can be very large)
|
||||
"%F"
|
||||
"<table cellpadding=\"2\" cellspacing=\"0\" border=\"0\">\n"
|
||||
"<tr>\n"
|
||||
"<td valign=top>"
|
||||
// this prints the Logo
|
||||
"%L"
|
||||
//"<a href=\"/\">"
|
||||
//"<img src=\"logo2.gif\" alt=\"Gigablast Logo\" "
|
||||
//"width=\"210\" height=\"25\" border=\"0\" valign=\"top\">"
|
||||
//"</a>"
|
||||
"</td>\n"
|
||||
|
||||
"<td valign=top>\n"
|
||||
@ -9126,15 +8365,12 @@ void Parms::init ( ) {
|
||||
// family filter
|
||||
// %R radio button for site(s) search
|
||||
"<br>%f %R\n"
|
||||
// directory search options
|
||||
// MDW: i guess this is out for now
|
||||
//"</td><td>%s</td>\n"
|
||||
"</tr>\n"
|
||||
"</table>\n"
|
||||
// %H prints the hidden for vars. Print them *after* the input
|
||||
// text boxes, radio buttons, etc. so these hidden vars can be
|
||||
// %H prints the hidden for vars. Print them *after* the input
|
||||
// text boxes, radio buttons, etc. so these hidden vars can be
|
||||
// overriden as they should be.
|
||||
"%H");
|
||||
"%H");
|
||||
s_tmpBuf2.safePrintf("</i>");
|
||||
m->m_desc = s_tmpBuf2.getBufStart();
|
||||
m->m_xml = "htmlHead";
|
||||
@ -9157,21 +8393,8 @@ void Parms::init ( ) {
|
||||
s_tmpBuf3.safeStrcpy(fff);
|
||||
s_tmpBuf3.htmlEncode (
|
||||
"<br>\n"
|
||||
//"%F"
|
||||
"<table cellpadding=2 cellspacing=0 border=0>\n"
|
||||
"<tr><td></td>\n"
|
||||
//"<td valign=top align=center>\n"
|
||||
// this old query is overriding a newer query above so
|
||||
// i commented out. mfd 6/2014
|
||||
//"<nobr>"
|
||||
//"<input type=text name=q size=60 value=\"%q\"> %D\n"
|
||||
//"<input type=submit value=\"Blast It!\" border=0>\n"
|
||||
//"</nobr>"
|
||||
// family filter
|
||||
//"<br>%f %R\n"
|
||||
//"<br>"
|
||||
//"%R\n"
|
||||
//"</td>"
|
||||
"<td>%s</td>\n"
|
||||
"</tr>\n"
|
||||
"</table>\n"
|
||||
@ -9179,15 +8402,8 @@ void Parms::init ( ) {
|
||||
"<a href=http://www.google.com/search?q=%e>google</a> \n"
|
||||
"<a href=http://search.yahoo.com/bin/search?p=%e>yahoo</a> "
|
||||
" \n"
|
||||
//"<a href=http://www.alltheweb.com/search?query=%e>alltheweb"
|
||||
//"</a>\n"
|
||||
"<a href=http://search.dmoz.org/cgi-bin/search?search=%e>"
|
||||
"dmoz</a> \n"
|
||||
//"<a href=http://search01.altavista.com/web/results?q=%e>"
|
||||
//"alta vista</a>\n"
|
||||
//"<a href=http://s.teoma.com/search?q=%e>teoma</a> \n"
|
||||
//"<a href=http://wisenut.com/search/query.dll?q=%e>wisenut"
|
||||
//"</a>\n"
|
||||
"</font></body>\n");
|
||||
s_tmpBuf3.safePrintf("</i>");
|
||||
m->m_desc = s_tmpBuf3.getBufStart();
|
||||
@ -9857,22 +9073,6 @@ void Parms::init ( ) {
|
||||
m->m_flags = PF_CLONE;
|
||||
m++;
|
||||
|
||||
m->m_title = "use new link algo";
|
||||
m->m_desc = "Use the links: termlists instead of link:. Also "
|
||||
"allows pages linking from the same domain or IP to all "
|
||||
"count as a single link from a different IP. This is also "
|
||||
"required for incorporating RSS and Atom feed information "
|
||||
"when indexing a document.";
|
||||
m->m_cgi = "na";
|
||||
m->m_off = (char *)&cr.m_newAlgo - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SPIDER;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "index inlink neighborhoods";
|
||||
m->m_desc = "If this is true Gigablast will "
|
||||
"index the plain text surrounding the hyper-link text. The "
|
||||
@ -9929,21 +9129,6 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "use old IPs";
|
||||
m->m_desc = "Should the stored IP "
|
||||
"of documents we are reindexing be used? Useful for "
|
||||
"pages banned by IP address and then reindexed with "
|
||||
"the reindexer tool.";
|
||||
m->m_cgi = "useOldIps";
|
||||
m->m_off = (char *)&cr.m_useOldIps - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SPIDER;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "remove banned pages";
|
||||
m->m_desc = "Remove banned pages from the index. Pages can be "
|
||||
"banned using tagdb or the Url Filters table.";
|
||||
|
@ -2481,8 +2481,7 @@ bool Query::setQWords ( char boolFlag ,
|
||||
|
||||
int32_t wkid = 0;
|
||||
int32_t upTo = -1;
|
||||
int32_t wk_start;
|
||||
int32_t wk_nwk;
|
||||
|
||||
//
|
||||
// set the wiki phrase ids
|
||||
//
|
||||
@ -2503,9 +2502,7 @@ bool Query::setQWords ( char boolFlag ,
|
||||
nwk = g_wiki.getNumWordsInWikiPhrase ( i , &words );
|
||||
// bail if none
|
||||
if ( nwk <= 1 ) continue;
|
||||
// save these too
|
||||
wk_start = i;
|
||||
wk_nwk = nwk;
|
||||
|
||||
// inc it
|
||||
wkid++;
|
||||
// store it
|
||||
@ -2514,7 +2511,6 @@ bool Query::setQWords ( char boolFlag ,
|
||||
upTo = i + nwk;
|
||||
}
|
||||
|
||||
|
||||
// consider terms strongly connected like wikipedia title phrases
|
||||
for ( int32_t i = 0 ; i + 2 < m_numWords ; i++ ) {
|
||||
// get ith word
|
||||
|
@ -101,9 +101,6 @@ public:
|
||||
|
||||
char m_debug; // msg40
|
||||
|
||||
char m_spiderResults;
|
||||
char m_spiderResultRoots;
|
||||
|
||||
char m_spellCheck;
|
||||
|
||||
char *m_displayMetas; // msg40
|
||||
@ -137,8 +134,6 @@ public:
|
||||
|
||||
// prefer what lang in the results. it gets a 20x boost. "en" "xx" "fr"
|
||||
char *m_defaultSortLang;
|
||||
// prefer what country in the results. currently unused. support later.
|
||||
char *m_defaultSortCountry;
|
||||
|
||||
// general parameters
|
||||
char m_dedupURL;
|
||||
@ -182,9 +177,6 @@ public:
|
||||
char *m_highlightQuery;
|
||||
Query m_hqq;
|
||||
|
||||
int32_t m_docsToScanForReranking;
|
||||
float m_pqr_demFactCommonInlinks;
|
||||
|
||||
// . buzz stuff (buzz)
|
||||
// . these controls the set of results, so should be in the makeKey()
|
||||
// as it is, in between the start and end hash vars
|
||||
|
@ -575,12 +575,6 @@ bool downloadTestUrlFromProxies ( ) {
|
||||
char *tu = g_conf.m_proxyTestUrl.getBufStart();
|
||||
if ( ! tu ) continue;
|
||||
|
||||
//spip *ss = (spip *)mmalloc(8,"sptb");
|
||||
// if ( ! ss ) return false;
|
||||
// ss->m_ip = sp->m_ip;
|
||||
// ss->m_port = sp->m_port;
|
||||
|
||||
|
||||
sp->m_isWaiting = true;
|
||||
|
||||
sp->m_lastDownloadTestAttemptMS = nowms;
|
||||
|
8
Wiki.cpp
8
Wiki.cpp
@ -53,13 +53,7 @@ bool Wiki::load() {
|
||||
// "dir" is NULL since already included in ff2
|
||||
return m_ht.load ( NULL , ff2 );
|
||||
}
|
||||
// if we got a newer binary version, use that
|
||||
// add in 10 seconds i guess
|
||||
if ( ! errno2 && ! errno1 && stats2.st_mtime +10> stats1.st_mtime ) {
|
||||
log(LOG_INFO,"wiki: Loading %s",ff2);
|
||||
// "dir" is NULL since already included in ff2
|
||||
return m_ht.load ( NULL , ff2 );
|
||||
}
|
||||
|
||||
// if no text file that is bad
|
||||
if ( errno1 ) {
|
||||
g_errno = errno1 ;
|
||||
|
2
Wiki.h
2
Wiki.h
@ -15,8 +15,6 @@ public:
|
||||
|
||||
int32_t getNumWordsInWikiPhrase ( int32_t i , class Words *words );
|
||||
|
||||
bool isInWiki ( uint32_t h ) { return ( m_ht.getSlot ( &h ) >= 0 ); }
|
||||
|
||||
// . load from disk
|
||||
// . wikititles.txt (loads wikititles.dat if and date is newer)
|
||||
bool load();
|
||||
|
Reference in New Issue
Block a user