23351 lines
687 KiB
C++
23351 lines
687 KiB
C++
#include "gb-include.h"
|
|
|
|
#include "Parms.h"
|
|
#include "File.h"
|
|
#include "Conf.h"
|
|
#include "TcpSocket.h"
|
|
#include "HttpRequest.h"
|
|
#include "Pages.h" // g_pages
|
|
#include "Tagdb.h" // g_tagdb
|
|
#include "Catdb.h"
|
|
#include "Collectiondb.h"
|
|
#include "HttpMime.h" // atotime()
|
|
#include "SearchInput.h"
|
|
#include "Unicode.h"
|
|
#include "Threads.h"
|
|
#include "Spider.h" // MAX_SPIDER_PRIORITIES
|
|
#include "Statsdb.h"
|
|
#include "Msg17.h"
|
|
#include "Process.h"
|
|
#include "Repair.h"
|
|
#include "LanguagePages.h"
|
|
#include "PingServer.h"
|
|
#include "Proxy.h"
|
|
#include "hash.h"
|
|
#include "Test.h"
|
|
#include "Rebalance.h"
|
|
#include "SpiderProxy.h" // buildProxyTable()
|
|
#include "PageInject.h" // InjectionRequest
|
|
|
|
// width of input box in characters for url filter expression
|
|
|
|
Parms g_parms;
|
|
|
|
|
|
#include "Clusterdb.h"
|
|
|
|
//
|
|
// new functions to extricate info from parm recs
|
|
//
|
|
|
|
int32_t getDataSizeFromParmRec ( char *rec ) {
|
|
return *(int32_t *)(rec+sizeof(key96_t));
|
|
}
|
|
|
|
char *getDataFromParmRec ( char *rec ) {
|
|
return rec+sizeof(key96_t)+4;
|
|
}
|
|
|
|
collnum_t getCollnumFromParmRec ( char *rec ) {
|
|
key96_t *k = (key96_t *)rec;
|
|
return (collnum_t)k->n1;
|
|
}
|
|
|
|
// for parms that are arrays...
|
|
int16_t getOccNumFromParmRec ( char *rec ) {
|
|
key96_t *k = (key96_t *)rec;
|
|
return (int16_t)((k->n0>>16));
|
|
}
|
|
|
|
Parm *getParmFromParmRec ( char *rec ) {
|
|
key96_t *k = (key96_t *)rec;
|
|
int32_t cgiHash32 = (k->n0 >> 32);
|
|
return g_parms.getParmFast2 ( cgiHash32 );
|
|
}
|
|
|
|
int32_t getHashFromParmRec ( char *rec ) {
|
|
key96_t *k = (key96_t *)rec;
|
|
int32_t cgiHash32 = (k->n0 >> 32);
|
|
return cgiHash32;
|
|
}
|
|
|
|
// . occNum is index # for parms that are arrays. it is -1 if not used.
|
|
// . collnum is -1 for g_conf, which is not a collrec
|
|
// . occNUm is -1 for a non-array parm
|
|
key96_t makeParmKey ( collnum_t collnum , Parm *m , int16_t occNum ) {
|
|
key96_t k;
|
|
k.n1 = collnum;
|
|
k.n0 = (uint32_t)m->m_cgiHash; // 32 bit
|
|
k.n0 <<= 16;
|
|
k.n0 |= (uint16_t)occNum;
|
|
// blanks
|
|
k.n0 <<= 16;
|
|
// delbit. 1 means positive key
|
|
k.n0 |= 0x01;
|
|
// test
|
|
if ( getCollnumFromParmRec ((char *)&k)!=collnum){char*xx=NULL;*xx=0;}
|
|
if ( getOccNumFromParmRec ((char *)&k)!=occNum){char*xx=NULL;*xx=0;}
|
|
return k;
|
|
}
|
|
|
|
bool printUrlExpressionExamples ( SafeBuf *sb ) ;
|
|
|
|
|
|
//////////////////////////////////////////////
|
|
//
|
|
// Command Functions. All return false if block... yadayada
|
|
//
|
|
//////////////////////////////////////////////
|
|
|
|
////////
|
|
//
|
|
// . do commands this way now
|
|
// . when handleRequest4 receives a special "command" parmdb rec
|
|
// it calls executes the cmd, one of the functions listed below
|
|
// . all these Command*() functions are called in updateParm() below
|
|
// . they return false if they would block and they'll call your callback
|
|
// specified in you "we" the WaitEntry
|
|
// . they return true with g_errno set on error, set to 0 on success
|
|
//
|
|
////////
|
|
|
|
|
|
// from PageBasic.cpp:
|
|
bool updateSiteListBuf(collnum_t collnum,bool addSeeds,char *siteListArg);
|
|
|
|
bool CommandUpdateSiteList ( char *rec ) {
|
|
// caller must specify collnum
|
|
collnum_t collnum = getCollnumFromParmRec ( rec );
|
|
if ( collnum < 0 ) {
|
|
log("parms: bad collnum for update site list");
|
|
g_errno = ENOCOLLREC;
|
|
return true;
|
|
}
|
|
// sanity
|
|
int32_t dataSize = getDataSizeFromParmRec ( rec );
|
|
if ( dataSize < 0 ) {
|
|
log("parms: bad site list size = %" INT32 " bad!",dataSize);
|
|
g_errno = EBADENGINEER;
|
|
return true;
|
|
}
|
|
// need this
|
|
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
if ( ! cr ) {
|
|
log("parms: no cr for collnum %" INT32 " to update",(int32_t)collnum);
|
|
return true;
|
|
}
|
|
// get the sitelist
|
|
char *data = getDataFromParmRec ( rec );
|
|
// update the table that maps site to whether we should spider it
|
|
// and also add newly introduced sites in "data" into spiderdb.
|
|
updateSiteListBuf ( collnum ,
|
|
true , // add NEW seeds?
|
|
data // entire sitelist
|
|
);
|
|
// now that we deduped the old site list with the new one for
|
|
// purposes of adding NEW seeds, we can do the final copy
|
|
cr->m_siteListBuf.set ( data );
|
|
return true;
|
|
}
|
|
|
|
// . require user manually execute this to prevent us fucking up the data
|
|
// at first initially because of a bad hosts.conf file!!!
|
|
// . maybe put a red 'A' in the hosts table on the web page to indicate
|
|
// we detected records that don't belong to our shard so user knows to
|
|
// rebalance?
|
|
// . we'll show it in a special msg box on all admin pages if required
|
|
bool CommandRebalance ( char *rec ) {
|
|
g_rebalance.m_userApproved = true;
|
|
// force this to on so it goes through
|
|
g_rebalance.m_numForeignRecs = 1;
|
|
g_rebalance.m_needsRebalanceValid = false;
|
|
return true;
|
|
}
|
|
|
|
bool CommandInsertUrlFiltersRow ( char *rec ) {
|
|
// caller must specify collnum
|
|
collnum_t collnum = getCollnumFromParmRec ( rec );
|
|
if ( collnum < 0 ) {
|
|
log("parms: bad collnum for insert row");
|
|
g_errno = ENOCOLLREC;
|
|
return true;
|
|
}
|
|
// sanity
|
|
int32_t dataSize = getDataSizeFromParmRec ( rec );
|
|
if ( dataSize <= 1 ) {
|
|
log("parms: insert row data size = %" INT32 " bad!",dataSize);
|
|
g_errno = EBADENGINEER;
|
|
return true;
|
|
}
|
|
// need this
|
|
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
// get the row #
|
|
char *data = getDataFromParmRec ( rec );
|
|
int32_t rowNum = atol(data);//*(int32_t *)data;
|
|
// scan all parms for url filter parms
|
|
for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
|
Parm *m = &g_parms.m_parms[i];
|
|
// parm must be a url filters parm
|
|
if ( m->m_page != PAGE_FILTERS ) continue;
|
|
// must be an array!
|
|
if ( ! m->isArray() ) continue;
|
|
// sanity check
|
|
if ( m->m_obj != OBJ_COLL ) { char *xx=NULL;*xx=0; }
|
|
// . add that row
|
|
// . returns false and sets g_errno on error
|
|
if ( ! g_parms.insertParm ( i, rowNum,(char *)cr)) return true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool CommandRemoveConnectIpRow ( char *rec ) {
|
|
// caller must specify collnum
|
|
//collnum_t collnum = getCollnumFromParmRec ( rec );
|
|
//if ( collnum < 0 ) {
|
|
// g_errno = ENOCOLLREC;
|
|
// log("parms: bad collnum for remove row");
|
|
// return true;
|
|
//}
|
|
// sanity
|
|
int32_t dataSize = getDataSizeFromParmRec ( rec );
|
|
if ( dataSize <= 1 ) {
|
|
log("parms: insert row data size = %" INT32 " bad!",dataSize);
|
|
g_errno = EBADENGINEER;
|
|
return true;
|
|
}
|
|
// need this
|
|
//CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
// get the row #
|
|
char *data = getDataFromParmRec ( rec );
|
|
int32_t rowNum = atol(data);
|
|
// scan all parms for url filter parms
|
|
for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
|
Parm *m = &g_parms.m_parms[i];
|
|
// parm must be a url filters parm
|
|
if ( m->m_page != PAGE_MASTERPASSWORDS ) continue;
|
|
// must be an array!
|
|
if ( ! m->isArray() ) continue;
|
|
// sanity check
|
|
if ( m->m_obj != OBJ_CONF ) { char *xx=NULL;*xx=0; }
|
|
// must be masterip
|
|
if ( m->m_type != TYPE_IP ) continue;
|
|
// . nuke that parm's element
|
|
// . returns false and sets g_errno on error
|
|
if (!g_parms.removeParm(i,rowNum,(char *)&g_conf))return true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool CommandRemovePasswordRow ( char *rec ) {
|
|
// sanity
|
|
int32_t dataSize = getDataSizeFromParmRec ( rec );
|
|
if ( dataSize <= 1 ) {
|
|
log("parms: insert row data size = %" INT32 " bad!",dataSize);
|
|
g_errno = EBADENGINEER;
|
|
return true;
|
|
}
|
|
// get the row #
|
|
char *data = getDataFromParmRec ( rec );
|
|
int32_t rowNum = atol(data);
|
|
// scan all parms for url filter parms
|
|
for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
|
Parm *m = &g_parms.m_parms[i];
|
|
// parm must be a url filters parm
|
|
if ( m->m_page != PAGE_MASTERPASSWORDS ) continue;
|
|
// must be an array!
|
|
if ( ! m->isArray() ) continue;
|
|
// sanity check
|
|
if ( m->m_obj != OBJ_CONF ) { char *xx=NULL;*xx=0; }
|
|
// must be master password
|
|
if ( m->m_type != TYPE_STRINGNONEMPTY ) continue;
|
|
// . nuke that parm's element
|
|
// . returns false and sets g_errno on error
|
|
if (!g_parms.removeParm(i,rowNum,(char *)&g_conf))return true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool CommandRemoveUrlFiltersRow ( char *rec ) {
|
|
// caller must specify collnum
|
|
collnum_t collnum = getCollnumFromParmRec ( rec );
|
|
if ( collnum < 0 ) {
|
|
g_errno = ENOCOLLREC;
|
|
log("parms: bad collnum for remove row");
|
|
return true;
|
|
}
|
|
// sanity
|
|
int32_t dataSize = getDataSizeFromParmRec ( rec );
|
|
if ( dataSize <= 1 ) {
|
|
log("parms: insert row data size = %" INT32 " bad!",dataSize);
|
|
g_errno = EBADENGINEER;
|
|
return true;
|
|
}
|
|
// need this
|
|
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
// get the row #
|
|
char *data = getDataFromParmRec ( rec );
|
|
int32_t rowNum = atol(data);
|
|
// scan all parms for url filter parms
|
|
for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
|
Parm *m = &g_parms.m_parms[i];
|
|
// parm must be a url filters parm
|
|
if ( m->m_page != PAGE_FILTERS ) continue;
|
|
// must be an array!
|
|
if ( ! m->isArray() ) continue;
|
|
// sanity check
|
|
if ( m->m_obj != OBJ_COLL ) { char *xx=NULL;*xx=0; }
|
|
// . nuke that parm's element
|
|
// . returns false and sets g_errno on error
|
|
if ( ! g_parms.removeParm ( i,rowNum,(char *)cr)) return true;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// after we add a new coll, or at anytime after we can clone it
|
|
bool CommandCloneColl ( char *rec ) {
|
|
|
|
// the collnum we want to affect.
|
|
collnum_t dstCollnum = getCollnumFromParmRec ( rec );
|
|
|
|
// . data is the collnum in ascii.
|
|
// . from "&restart=467" for example
|
|
char *data = rec + sizeof(key96_t) + 4;
|
|
int32_t dataSize = *(int32_t *)(rec + sizeof(key96_t));
|
|
//if ( dataSize < 1 ) { char *xx=NULL;*xx=0; }
|
|
// copy parm settings from this collection name
|
|
char *srcColl = data;
|
|
|
|
// return if none to clone from
|
|
if ( dataSize <= 0 ) return true;
|
|
// avoid defaulting to main collection
|
|
if ( ! data[0] ) return true;
|
|
|
|
CollectionRec *srcRec = NULL;
|
|
CollectionRec *dstRec = NULL;
|
|
srcRec = g_collectiondb.getRec ( srcColl ); // get from name
|
|
dstRec = g_collectiondb.getRec ( dstCollnum ); // get from #
|
|
|
|
if ( ! srcRec )
|
|
return log("parms: invalid coll %s to clone from",
|
|
srcColl);
|
|
if ( ! dstRec )
|
|
return log("parms: invalid collnum %" INT32 " to clone to",
|
|
(int32_t)dstCollnum);
|
|
|
|
log ("parms: cloning parms from collection %s to %s",
|
|
srcRec->m_coll,dstRec->m_coll);
|
|
|
|
g_parms.cloneCollRec ( (char *)dstRec , (char *)srcRec );
|
|
|
|
return true;
|
|
}
|
|
|
|
// customCrawl:
|
|
// 0 for regular collection
|
|
// 1 for custom crawl
|
|
// 2 for bulk job
|
|
// . returns false if blocks true otherwise
|
|
bool CommandAddColl ( char *rec , char customCrawl ) {
|
|
|
|
// caller must specify collnum
|
|
collnum_t newCollnum = getCollnumFromParmRec ( rec );
|
|
|
|
// sanity.
|
|
if ( newCollnum < 0 ) {
|
|
g_errno = ENOCOLLREC;
|
|
log("parms: bad collnum for AddColl");
|
|
return true;
|
|
}
|
|
|
|
char *data = rec + sizeof(key96_t) + 4;
|
|
int32_t dataSize = *(int32_t *)(rec + sizeof(key96_t));
|
|
// collection name must be at least 2 bytes (includes \0)
|
|
if ( dataSize <= 1 ) { char *xx=NULL;*xx=0; }
|
|
|
|
// then collname, \0 terminated
|
|
char *collName = data;
|
|
|
|
if ( gbstrlen(collName) > MAX_COLL_LEN ) {
|
|
log("crawlbot: collection name too long");
|
|
return true;
|
|
}
|
|
|
|
// if ( ! g_parms.m_inSyncWithHost0 ) {
|
|
// log("parms: can not add coll #%i %s until in sync with host 0",
|
|
// (int)newCollnum,collName);
|
|
// g_errno = EBADENGINEER;
|
|
// return true;
|
|
// }
|
|
|
|
// this saves it to disk! returns false and sets g_errno on error.
|
|
if ( ! g_collectiondb.addNewColl ( collName,
|
|
customCrawl ,
|
|
NULL , // copy from
|
|
0 , // copy from len
|
|
true , // save?
|
|
newCollnum
|
|
) )
|
|
// error! g_errno should be set
|
|
return true;
|
|
|
|
return true;
|
|
}
|
|
|
|
// all nodes are guaranteed to add the same collnum for the given name
|
|
bool CommandAddColl0 ( char *rec ) { // regular collection
|
|
return CommandAddColl ( rec , 0 );
|
|
}
|
|
|
|
bool CommandAddColl1 ( char *rec ) { // custom crawl
|
|
return CommandAddColl ( rec , 1 );
|
|
}
|
|
|
|
bool CommandAddColl2 ( char *rec ) { // bulk job
|
|
return CommandAddColl ( rec , 2 );
|
|
}
|
|
|
|
bool CommandResetProxyTable ( char *rec ) {
|
|
// from SpiderProxy.h
|
|
return resetProxyStats();
|
|
}
|
|
|
|
// . returns true and sets g_errno on error
|
|
// . returns false if would block
|
|
bool CommandDeleteColl ( char *rec , WaitEntry *we ) {
|
|
collnum_t collnum = getCollnumFromParmRec ( rec );
|
|
|
|
// if ( ! g_parms.m_inSyncWithHost0 ) {
|
|
// log("parms: can not del collnum %i until in sync with host 0",
|
|
// (int)collnum);
|
|
// g_errno = EBADENGINEER;
|
|
// return true;
|
|
// }
|
|
|
|
// the delete might block because the tree is saving and we can't
|
|
// remove our collnum recs from it while it is doing that
|
|
if ( ! g_collectiondb.deleteRec2 ( collnum ) )
|
|
// we blocked, we->m_callback will be called when done
|
|
return false;
|
|
// delete is successful
|
|
return true;
|
|
}
|
|
|
|
// . returns true and sets g_errno on error
|
|
// . returns false if would block
|
|
bool CommandDeleteColl2 ( char *rec , WaitEntry *we ) {
|
|
char *data = rec + sizeof(key96_t) + 4;
|
|
char *coll = (char *)data;
|
|
collnum_t collnum = g_collectiondb.getCollnum ( coll );
|
|
|
|
// if ( ! g_parms.m_inSyncWithHost0 ) {
|
|
// log("parms: can not del collnum %i until in sync with host 0",
|
|
// (int)collnum);
|
|
// g_errno = EBADENGINEER;
|
|
// return true;
|
|
// }
|
|
|
|
if ( collnum < 0 ) {
|
|
g_errno = ENOCOLLREC;
|
|
return true;;
|
|
}
|
|
// the delete might block because the tree is saving and we can't
|
|
// remove our collnum recs from it while it is doing that
|
|
if ( ! g_collectiondb.deleteRec2 ( collnum ) )
|
|
// we blocked, we->m_callback will be called when done
|
|
return false;
|
|
// delete is successful
|
|
return true;
|
|
}
|
|
|
|
bool CommandForceNextSpiderRound ( char *rec ) {
|
|
|
|
// caller must specify collnum
|
|
collnum_t collnum = getCollnumFromParmRec ( rec );
|
|
// need this
|
|
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
if ( ! cr ) {
|
|
g_errno = ENOCOLLREC;
|
|
log("parms: bad collnum %" INT32 " for restart spider round",
|
|
(int32_t)collnum);
|
|
return true;
|
|
}
|
|
|
|
// seems like parmlist is an rdblist, so we have a key_t followed
|
|
// by 4 bytes of datasize then the data... which is an ascii string
|
|
// in our case...
|
|
char *data = getDataFromParmRec ( rec );
|
|
uint32_t roundStartTime;
|
|
int32_t newRoundNum;
|
|
// see the HACK: in Parms::convertHttpRequestToParmList() where we
|
|
// construct this data in response to a "roundStart" cmd. we used
|
|
// sprintf() so it's natural to use sscanf() to parse it out.
|
|
sscanf ( data , "%" UINT32 ",%" INT32 "",
|
|
&roundStartTime,
|
|
&newRoundNum);
|
|
|
|
cr->m_spiderRoundStartTime = roundStartTime;
|
|
cr->m_spiderRoundNum = newRoundNum;
|
|
|
|
// if we don't have this is prints out "skipping0 ... " for urls
|
|
// we try to spider in Spider.cpp.
|
|
cr->m_spiderStatus = SP_INPROGRESS;
|
|
|
|
// reset the round counts. this will log a msg. resetting the
|
|
// round counts will prevent maxToProcess/maxToCrawl from holding
|
|
// us back...
|
|
spiderRoundIncremented ( cr );
|
|
|
|
// yeah, if we don't nuke doledb then it doesn't work...
|
|
cr->rebuildUrlFilters();
|
|
|
|
return true;
|
|
}
|
|
|
|
// . returns true and sets g_errno on error
|
|
// . returns false if would block
|
|
bool CommandRestartColl ( char *rec , WaitEntry *we ) {
|
|
|
|
collnum_t newCollnum = getCollnumFromParmRec ( rec );
|
|
|
|
// . data is the collnum in ascii.
|
|
// . from "&restart=467" for example
|
|
char *data = rec + sizeof(key96_t) + 4;
|
|
int32_t dataSize = *(int32_t *)(rec + sizeof(key96_t));
|
|
if ( dataSize < 1 ) { char *xx=NULL;*xx=0; }
|
|
collnum_t oldCollnum = atol(data);
|
|
|
|
if ( oldCollnum < 0 ||
|
|
oldCollnum >= g_collectiondb.m_numRecs ||
|
|
! g_collectiondb.m_recs[oldCollnum] ) {
|
|
log("parms: invalid collnum %" INT32 " to restart",(int32_t)oldCollnum);
|
|
return true;
|
|
}
|
|
|
|
// this can block if tree is saving, it has to wait
|
|
// for tree save to complete before removing old
|
|
// collnum recs from tree
|
|
if ( ! g_collectiondb.resetColl2 ( oldCollnum ,
|
|
newCollnum ,
|
|
false ) ) // purgeSeeds?
|
|
// we blocked, we->m_callback will be called when done
|
|
return false;
|
|
|
|
// turn on spiders on new collrec. collname is same but collnum
|
|
// will be different.
|
|
CollectionRec *cr = g_collectiondb.getRec ( newCollnum );
|
|
// if reset from crawlbot api page then enable spiders
|
|
// to avoid user confusion
|
|
//if ( cr ) cr->m_spideringEnabled = 1;
|
|
|
|
if ( ! cr ) return true;
|
|
|
|
//
|
|
// repopulate spiderdb with the same sites
|
|
//
|
|
|
|
char *oldSiteList = cr->m_siteListBuf.getBufStart();
|
|
// do not let it have the buf any more
|
|
cr->m_siteListBuf.detachBuf();
|
|
// can't leave it NULL, safebuf parms do not like to be null
|
|
cr->m_siteListBuf.nullTerm();
|
|
// re-add the buf so it re-seeds spiderdb. it will not dedup these
|
|
// urls in "oldSiteList" with "m_siteListBuf" which is now empty.
|
|
// "true" = addSeeds.
|
|
updateSiteListBuf ( newCollnum , true , oldSiteList );
|
|
// now put it back
|
|
if ( oldSiteList ) cr->m_siteListBuf.safeStrcpy ( oldSiteList );
|
|
|
|
// all done
|
|
return true;
|
|
}
|
|
|
|
// . returns true and sets g_errno on error
|
|
// . returns false if would block
|
|
bool CommandResetColl ( char *rec , WaitEntry *we ) {
|
|
|
|
collnum_t newCollnum = getCollnumFromParmRec ( rec );
|
|
|
|
// . data is the collnum in ascii.
|
|
// . from "&restart=467" for example
|
|
char *data = rec + sizeof(key96_t) + 4;
|
|
int32_t dataSize = *(int32_t *)(rec + sizeof(key96_t));
|
|
if ( dataSize < 1 ) { char *xx=NULL;*xx=0; }
|
|
collnum_t oldCollnum = atol(data);
|
|
|
|
if ( oldCollnum < 0 ||
|
|
oldCollnum >= g_collectiondb.m_numRecs ||
|
|
! g_collectiondb.m_recs[oldCollnum] ) {
|
|
log("parms: invalid collnum %" INT32 " to reset",(int32_t)oldCollnum);
|
|
return true;
|
|
}
|
|
|
|
// this will not go through if tree is saving, it has to wait
|
|
// for tree save to complete before removing old
|
|
// collnum recs from tree. so return false in that case so caller
|
|
// will know to re-call later.
|
|
if ( ! g_collectiondb.resetColl2 ( oldCollnum ,
|
|
newCollnum ,
|
|
true ) ) // purgeSeeds?
|
|
// we blocked, we->m_callback will be called when done
|
|
return false;
|
|
|
|
// turn on spiders on new collrec. collname is same but collnum
|
|
// will be different.
|
|
CollectionRec *cr = g_collectiondb.getRec ( newCollnum );
|
|
|
|
if ( ! cr ) return true;
|
|
|
|
//
|
|
// repopulate spiderdb with the same sites
|
|
//
|
|
|
|
char *oldSiteList = cr->m_siteListBuf.getBufStart();
|
|
// do not let it have the buf any more
|
|
cr->m_siteListBuf.detachBuf();
|
|
// can't leave it NULL, safebuf parms do not like to be null
|
|
cr->m_siteListBuf.nullTerm();
|
|
// re-add the buf so it re-seeds spiderdb. it will not dedup these
|
|
// urls in "oldSiteList" with "m_siteListBuf" which is now empty.
|
|
// "true" = addSeeds.
|
|
updateSiteListBuf ( newCollnum , true , oldSiteList );
|
|
// now put it back
|
|
if ( oldSiteList ) cr->m_siteListBuf.safeStrcpy ( oldSiteList );
|
|
|
|
// turn spiders off
|
|
//if ( cr ) cr->m_spideringEnabled = 0;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool CommandParserTestInit ( char *rec ) {
|
|
// enable testing for all other hosts
|
|
g_conf.m_testParserEnabled = 1;
|
|
// reset all files
|
|
g_test.removeFiles();
|
|
// turn spiders on globally
|
|
g_conf.m_spideringEnabled = 1;
|
|
//g_conf.m_webSpideringEnabled = 1;
|
|
// turn on for test coll too
|
|
CollectionRec *cr = g_collectiondb.getRec("qatest123");
|
|
// turn on spiders
|
|
if ( cr ) cr->m_spideringEnabled = 1;
|
|
// tell spider loop to update active list
|
|
g_spiderLoop.m_activeListValid = false;
|
|
// if we are not host 0, turn on spiders for testing
|
|
if ( g_hostdb.m_myHost->m_hostId != 0 ) return true;
|
|
// start the test loop to inject urls for parsing/spidering
|
|
g_test.initTestRun();
|
|
// done
|
|
return true;
|
|
}
|
|
|
|
bool CommandSpiderTestInit ( char *rec ) {
|
|
// enable testing for all other hosts
|
|
g_conf.m_testSpiderEnabled = 1;
|
|
// reset all files
|
|
g_test.removeFiles();
|
|
// turn spiders on globally
|
|
g_conf.m_spideringEnabled = 1;
|
|
//g_conf.m_webSpideringEnabled = 1;
|
|
// turn on for test coll too
|
|
CollectionRec *cr = g_collectiondb.getRec("qatest123");
|
|
// turn on spiders
|
|
if ( cr ) cr->m_spideringEnabled = 1;
|
|
// tell spider loop to update active list
|
|
g_spiderLoop.m_activeListValid = false;
|
|
// if we are not host 0, turn on spiders for testing
|
|
if ( g_hostdb.m_myHost->m_hostId != 0 ) return true;
|
|
// start the test loop to inject urls for parsing/spidering
|
|
g_test.initTestRun();
|
|
// done
|
|
return true;
|
|
}
|
|
|
|
bool CommandSpiderTestCont ( char *rec ) {
|
|
// enable testing for all other hosts
|
|
g_conf.m_testSpiderEnabled = 1;
|
|
// turn spiders on globally
|
|
g_conf.m_spideringEnabled = 1;
|
|
//g_conf.m_webSpideringEnabled = 1;
|
|
// turn on for test coll too
|
|
CollectionRec *cr = g_collectiondb.getRec("qatest123");
|
|
// turn on spiders
|
|
if ( cr ) cr->m_spideringEnabled = 1;
|
|
// tell spider loop to update active list
|
|
g_spiderLoop.m_activeListValid = false;
|
|
// done
|
|
return true;
|
|
}
|
|
|
|
// some of these can block a little. if threads are off, a lot!
|
|
bool CommandMerge ( char *rec ) {
|
|
forceMergeAll ( RDB_POSDB ,1);
|
|
forceMergeAll ( RDB_TITLEDB ,1);
|
|
forceMergeAll ( RDB_TAGDB ,1);
|
|
forceMergeAll ( RDB_SPIDERDB ,1);
|
|
forceMergeAll ( RDB_LINKDB ,1);
|
|
// most of these are probably already in good shape
|
|
//g_checksumdb.getRdb()->attemptMerge (1,true);
|
|
// g_clusterdb.getRdb()->attemptMerge (1,true); // niceness, force?
|
|
// g_tagdb.getRdb()->attemptMerge (1,true);
|
|
// g_catdb.getRdb()->attemptMerge (1,true);
|
|
// //g_tfndb.getRdb()->attemptMerge (1,true);
|
|
// g_spiderdb.getRdb()->attemptMerge (1,true);
|
|
// // these 2 will probably need the merge the most
|
|
// g_indexdb.getRdb()->attemptMerge (1,true);
|
|
// g_datedb.getRdb()->attemptMerge (1,true);
|
|
// g_titledb.getRdb()->attemptMerge (1,true);
|
|
// //g_sectiondb.getRdb()->attemptMerge (1,true);
|
|
// g_statsdb.getRdb()->attemptMerge (1,true);
|
|
// g_linkdb .getRdb()->attemptMerge (1,true);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool CommandMergePosdb ( char *rec ) {
|
|
forceMergeAll ( RDB_POSDB ,1);
|
|
// set this for each posdb base
|
|
return true;
|
|
}
|
|
|
|
|
|
bool CommandMergeSectiondb ( char *rec ) {
|
|
//g_sectiondb.getRdb()->attemptMerge (1,true); // nice , force
|
|
return true;
|
|
}
|
|
|
|
|
|
bool CommandMergeTitledb ( char *rec ) {
|
|
forceMergeAll ( RDB_TITLEDB ,1);
|
|
//g_titledb.getRdb()->attemptMerge (1,true);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool CommandMergeSpiderdb ( char *rec ) {
|
|
forceMergeAll ( RDB_SPIDERDB ,1);
|
|
//g_spiderdb.getRdb()->attemptMerge (1,true);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool CommandDiskPageCacheOff ( char *rec ) {
|
|
g_process.resetPageCaches();
|
|
return true;
|
|
}
|
|
|
|
bool CommandForceIt ( char *rec ) {
|
|
g_conf.m_forceIt = true;
|
|
return true;
|
|
}
|
|
|
|
bool CommandDiskDump ( char *rec ) {
|
|
//g_checksumdb.getRdb()->dumpTree ( 1 ); // niceness
|
|
g_clusterdb.getRdb()->dumpTree ( 1 );
|
|
g_tagdb.getRdb()->dumpTree ( 1 );
|
|
g_catdb.getRdb()->dumpTree ( 1 );
|
|
//g_tfndb.getRdb()->dumpTree ( 1 );
|
|
g_spiderdb.getRdb()->dumpTree ( 1 );
|
|
g_posdb.getRdb()->dumpTree ( 1 );
|
|
//g_datedb.getRdb()->dumpTree ( 1 );
|
|
g_titledb.getRdb()->dumpTree ( 1 );
|
|
//g_sectiondb.getRdb()->dumpTree ( 1 );
|
|
g_statsdb.getRdb()->dumpTree ( 1 );
|
|
g_linkdb.getRdb() ->dumpTree ( 1 );
|
|
g_errno = 0;
|
|
return true;
|
|
}
|
|
|
|
|
|
bool CommandJustSave ( char *rec ) {
|
|
// returns false if blocked, true otherwise
|
|
g_process.save ();
|
|
// always return true here
|
|
return true;
|
|
}
|
|
|
|
bool CommandSaveAndExit ( char *rec ) {
|
|
// return true if this blocks
|
|
g_process.shutdown ( false , NULL , NULL );
|
|
return true;
|
|
}
|
|
|
|
bool CommandUrgentSaveAndExit ( char *rec ) {
|
|
// "true" means urgent
|
|
g_process.shutdown ( true );
|
|
return true;
|
|
}
|
|
|
|
bool CommandReloadLanguagePages ( char *rec ) {
|
|
g_languagePages.reloadPages();
|
|
return true;
|
|
}
|
|
|
|
bool CommandClearKernelError ( char *rec ) {
|
|
g_hostdb.m_myHost->m_pingInfo.m_kernelErrors = 0;
|
|
return true;
|
|
}
|
|
|
|
bool CommandPowerNotice ( int32_t hasPower ) {
|
|
|
|
//int32_t hasPower = r->getLong("haspower",-1);
|
|
log("powermo: received haspower=%" INT32 "",hasPower);
|
|
if ( hasPower != 0 && hasPower != 1 ) return true;
|
|
|
|
// did power state change? if not just return true
|
|
if ( g_process.m_powerIsOn && hasPower ) return true;
|
|
if ( ! g_process.m_powerIsOn && ! hasPower ) return true;
|
|
|
|
if ( hasPower ) {
|
|
log("powermo: power is regained");
|
|
g_process.m_powerIsOn = true;
|
|
return true;
|
|
}
|
|
|
|
// if it was on and went off...
|
|
// now it is off
|
|
log("powermo: power was lost");
|
|
// . SpiderLoop.cpp will not launch any more spiders as
|
|
// int32_t as the power is off
|
|
// . autosave should kick in every 30 seconds
|
|
g_process.m_powerIsOn = false;
|
|
// note the autosave
|
|
log("powermo: disabling spiders, suspending merges, disabling "
|
|
"tree writes and saving.");
|
|
// tell Process.cpp::save2() to save the blocking caches too!
|
|
//g_process.m_pleaseSaveCaches = true;
|
|
// . save everything now... this may block some when saving the
|
|
// caches... then do not do ANY writes...
|
|
// . RdbMerge suspends all merging if power is off
|
|
// . Rdb.cpp does not allow any adds if power is off. it will
|
|
// send back an ETRYAGAIN...
|
|
// . if a tree is being dumped, this will keep re-calling
|
|
// Process.cpp::save2()
|
|
g_process.save();
|
|
|
|
// also send an email if we are host #0
|
|
if ( g_hostdb.m_myHost->m_hostId != 0 ) return true;
|
|
if ( g_proxy.isProxy() ) return true;
|
|
|
|
char tmp[128];
|
|
Host *h0 = g_hostdb.getHost ( 0 );
|
|
int32_t ip0 = 0;
|
|
if ( h0 ) ip0 = h0->m_ip;
|
|
sprintf(tmp,"%s: POWER IS OFF",iptoa(ip0));
|
|
|
|
g_pingServer.sendEmail ( NULL , // Host ptr
|
|
tmp , // msg
|
|
true , // sendToAdmin
|
|
false , // oom?
|
|
false , // kernel error?
|
|
true , // parm change?
|
|
// force it? even if disabled?
|
|
false );
|
|
return true;
|
|
}
|
|
|
|
|
|
bool CommandPowerOnNotice ( char *rec ) {
|
|
return CommandPowerNotice ( 1 );
|
|
}
|
|
|
|
bool CommandPowerOffNotice ( char *rec ) {
|
|
return CommandPowerNotice ( 0 );
|
|
}
|
|
|
|
bool CommandInSync ( char *rec ) {
|
|
g_parms.m_inSyncWithHost0 = true;
|
|
return true;
|
|
}
|
|
|
|
//////////////////////
|
|
//
|
|
// end new commands
|
|
//
|
|
//////////////////////
|
|
|
|
|
|
static bool printDropDown ( int32_t n , SafeBuf* sb, char *name,
|
|
int32_t selet ,
|
|
bool includeMinusOne ,
|
|
bool includeMinusTwo ) ;
|
|
|
|
extern bool closeAll ( void *state, void (* callback)(void *state) );
|
|
extern bool allExit ( ) ;
|
|
/*
|
|
class Checksum {
|
|
public:
|
|
Checksum() : m_sum1( 0xffff ), m_sum2( 0xffff ) {}
|
|
|
|
void addIn( const uint16_t *data, size_t size, FILE *f = 0 ) {
|
|
// if an odd len of data, add first byte, then do rest below
|
|
if ( size % 2 != 0 ) {
|
|
m_sum1 += (uint16_t)*(uint8_t *)data;
|
|
m_sum2 += m_sum1;
|
|
|
|
size--;
|
|
data = (uint16_t *)(((uint8_t *)data)+1);
|
|
}
|
|
|
|
size_t len = size/2;
|
|
while ( len ) {
|
|
unsigned tlen = len;
|
|
// . 360 is largest amnt of sums that can be performed
|
|
// without overflow
|
|
if ( len > 360 ) tlen = 360;
|
|
len -= tlen;
|
|
do {
|
|
m_sum1 += *data++;
|
|
m_sum2 += m_sum1;
|
|
} while ( --tlen );
|
|
|
|
m_sum1 = (m_sum1 & 0xffff) + (m_sum1 >> 16);
|
|
m_sum2 = (m_sum2 & 0xffff) + (m_sum2 >> 16);
|
|
}
|
|
}
|
|
|
|
void addInStrings( const uint16_t *data, int32_t cnt, int32_t size ) {
|
|
while ( cnt ) {
|
|
const uint16_t *origData = data;
|
|
int32_t len = gbstrlen((char *)data);
|
|
|
|
// if an odd len of data, add first byte,
|
|
// then do rest below
|
|
if ( len % 2 != 0 ) {
|
|
m_sum1 += (uint16_t)*(uint8_t *)data;
|
|
m_sum2 += m_sum1;
|
|
|
|
len--;
|
|
data = (uint16_t *)(((uint8_t *)data)+1);
|
|
}
|
|
|
|
len /= 2;
|
|
while ( len ) {
|
|
unsigned tlen = len;
|
|
// . 360 = largest amnt of sums that can be
|
|
// performed without overflow
|
|
if ( len > 360 ) tlen = 360;
|
|
len -= tlen;
|
|
do {
|
|
m_sum1 += *data++;
|
|
m_sum2 += m_sum1;
|
|
} while ( --tlen );
|
|
|
|
m_sum1 = (m_sum1 & 0xffff) + (m_sum1 >> 16);
|
|
m_sum2 = (m_sum2 & 0xffff) + (m_sum2 >> 16);
|
|
}
|
|
|
|
cnt--;
|
|
data = (uint16_t *)((char *)origData + size);
|
|
}
|
|
}
|
|
|
|
void finalize() {
|
|
m_sum1 = (m_sum1 & 0xffff) + (m_sum1 >> 16);
|
|
m_sum2 = (m_sum2 & 0xffff) + (m_sum2 >> 16);
|
|
}
|
|
|
|
uint32_t getSum() const {
|
|
return ( m_sum2 << 16 | m_sum1 );
|
|
}
|
|
|
|
private:
|
|
uint32_t m_sum1;
|
|
uint32_t m_sum2;
|
|
};
|
|
*/
|
|
|
|
Parms::Parms ( ) {
|
|
m_isDefaultLoaded = false;
|
|
m_inSyncWithHost0 = false;
|
|
m_triedToSync = false;
|
|
}
|
|
|
|
void Parms::detachSafeBufs ( CollectionRec *cr ) {
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
Parm *m = &m_parms[i];
|
|
if ( m->m_type != TYPE_SAFEBUF ) continue;
|
|
if ( m->m_obj != OBJ_COLL ) continue;
|
|
if ( m->m_off < 0 ) continue;
|
|
int32_t max = 1;
|
|
// this will be zero if not an array.
|
|
// otherwise it is the # of elements in the array
|
|
if ( m->m_size > max ) max = m->m_size;
|
|
// an array of safebufs? m->m_size will be > 1 then.
|
|
for ( int32_t j = 0 ; j < max ; j++ ) {
|
|
// get it
|
|
SafeBuf *sb = (SafeBuf *)((char *)cr + m->m_off +
|
|
j*sizeof(SafeBuf));
|
|
sb->detachBuf();
|
|
}
|
|
}
|
|
}
|
|
/*
|
|
uint32_t Parms::calcChecksum() {
|
|
Checksum cs;
|
|
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
Parm *m = &m_parms[i];
|
|
if ( m->m_obj == OBJ_SI ) continue;
|
|
if ( m->m_off < 0 ) continue;
|
|
if ( m->m_type == TYPE_COMMENT ) continue;
|
|
if ( m->m_type == TYPE_MONOD2 ) continue;
|
|
if ( m->m_type == TYPE_MONOM2 ) continue;
|
|
if ( m->m_type == TYPE_CMD ) continue;
|
|
if ( m->m_type == TYPE_LONG_CONST ) continue;
|
|
|
|
int32_t size = 0;
|
|
if ( m->m_type == TYPE_CHECKBOX ) size = 1;
|
|
if ( m->m_type == TYPE_CHAR ) size = 1;
|
|
if ( m->m_type == TYPE_CHAR2 ) size = 1;
|
|
if ( m->m_type == TYPE_BOOL ) size = 1;
|
|
if ( m->m_type == TYPE_BOOL2 ) size = 1;
|
|
if ( m->m_type == TYPE_PRIORITY ) size = 1;
|
|
if ( m->m_type == TYPE_PRIORITY2 ) size = 1;
|
|
//if ( m->m_type == TYPE_DIFFBOT_DROPDOWN) size = 1;
|
|
if ( m->m_type == TYPE_PRIORITY_BOXES ) size = 1;
|
|
if ( m->m_type == TYPE_RETRIES ) size = 1;
|
|
if ( m->m_type == TYPE_TIME ) size = 6;
|
|
if ( m->m_type == TYPE_DATE2 ) size = 4;
|
|
if ( m->m_type == TYPE_DATE ) size = 4;
|
|
if ( m->m_type == TYPE_FLOAT ) size = 4;
|
|
if ( m->m_type == TYPE_IP ) size = 4;
|
|
if ( m->m_type == TYPE_RULESET ) size = 4;
|
|
if ( m->m_type == TYPE_LONG ) size = 4;
|
|
if ( m->m_type == TYPE_LONG_LONG ) size = 8;
|
|
if ( m->m_type == TYPE_STRING ) size = m->m_size;
|
|
if ( m->m_type == TYPE_STRINGBOX ) size = m->m_size;
|
|
if ( m->m_type == TYPE_STRINGNONEMPTY ) size = m->m_size;
|
|
if ( m->m_type == TYPE_SAFEBUF ) size = m->m_size;
|
|
if ( m->m_type == TYPE_SITERULE ) size = 4;
|
|
|
|
|
|
// if we have an array
|
|
int32_t cnt = 1;
|
|
if (m->m_fixed > 0) {
|
|
size *= m->m_fixed;
|
|
cnt = m->m_fixed;
|
|
}
|
|
else {
|
|
size *= m->m_max;
|
|
cnt = m->m_max;
|
|
}
|
|
|
|
uint16_t *p = NULL;
|
|
if ( m->m_obj == OBJ_CONF ) {
|
|
p = (uint16_t *)((char *)&g_conf + m->m_off);
|
|
if (m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
cs.addInStrings( p,
|
|
cnt,
|
|
m->m_size );
|
|
}
|
|
else if ( m->m_type == TYPE_SAFEBUF ) {
|
|
uint16_t *p2;
|
|
SafeBuf *sb2 = (SafeBuf *)p;
|
|
p2 = (uint16_t *)sb2->getBufStart();
|
|
cs.addIn( p2 , sb2->length() );
|
|
}
|
|
else {
|
|
cs.addIn( p, size );
|
|
}
|
|
}
|
|
else if ( m->m_obj == OBJ_COLL ) {
|
|
collnum_t j = g_collectiondb.getFirstCollnum ();
|
|
while ( j >= 0 ) {
|
|
CollectionRec *cr = g_collectiondb.getRec( j );
|
|
p = (uint16_t *)((char *)cr + m->m_off);
|
|
if (m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
cs.addInStrings( p,
|
|
cnt,
|
|
m->m_size );
|
|
}
|
|
else if ( m->m_type == TYPE_SAFEBUF ) {
|
|
uint16_t *p2;
|
|
SafeBuf *sb2 = (SafeBuf *)p;
|
|
p2 = (uint16_t *)sb2->getBufStart();
|
|
cs.addIn( p2 , sb2->length() );
|
|
}
|
|
else {
|
|
cs.addIn( p, size );
|
|
}
|
|
j = g_collectiondb.getNextCollnum ( j );
|
|
}
|
|
}
|
|
}
|
|
|
|
cs.finalize();
|
|
|
|
return cs.getSum();
|
|
}
|
|
*/
|
|
|
|
// from Pages.cpp
|
|
bool printApiForPage ( SafeBuf *sb , int32_t PAGENUM , CollectionRec *cr ) ;
|
|
|
|
// returns false and sets g_errno on error
|
|
bool Parms::setGigablastRequest ( TcpSocket *socket ,
|
|
HttpRequest *hrArg ,
|
|
GigablastRequest *gr ) {
|
|
// get the page from the path... like /sockets --> PAGE_SOCKETS
|
|
int32_t page = g_pages.getDynamicPageNumber ( hrArg );
|
|
// is it a collection?
|
|
char *THIS = (char *)gr;
|
|
|
|
// ensure valid
|
|
if ( ! THIS ) {
|
|
// it is null when no collection explicitly specified...
|
|
log("admin: THIS is null for page %" INT32 ".",page);
|
|
return false;
|
|
}
|
|
|
|
// just in case
|
|
gr->clear();
|
|
|
|
gr->m_socket = socket;
|
|
|
|
// make a copy of the httprequest because the original is on the stack
|
|
// in HttpServer::requestHandler()
|
|
if ( ! gr->m_hr.copy ( hrArg ) ) {
|
|
log("admin: failed to copy httprequest: %s",
|
|
mstrerror(g_errno));
|
|
return false;
|
|
}
|
|
|
|
// use the one we copied which won't disappear/beFreed on us
|
|
HttpRequest *hr = &gr->m_hr;
|
|
|
|
// need this
|
|
int32_t obj = OBJ_GBREQUEST;
|
|
|
|
//
|
|
// reset THIS to defaults. use NULL for cr since mostly for SearchInput
|
|
//
|
|
setToDefault ( THIS , obj , NULL);
|
|
|
|
|
|
// map PAGE_ADDURL to PAGE_ADDURL2 so
|
|
// /addurl is same as /admin/addurl as far as parms.
|
|
if ( page == PAGE_ADDURL )
|
|
page = PAGE_ADDURL2;
|
|
|
|
// loop through cgi parms
|
|
for ( int32_t i = 0 ; i < hr->getNumFields() ; i++ ) {
|
|
// get cgi parm name
|
|
char *field = hr->getField ( i );
|
|
//int32_t flen = hr->getFieldLen ( i );
|
|
// find in parms list
|
|
int32_t j;
|
|
Parm *m;
|
|
for ( j = 0 ; j < m_numParms ; j++ ) {
|
|
// get it
|
|
m = &m_parms[j];
|
|
// must be of this type
|
|
if ( m->m_obj != obj ) continue;
|
|
// page must match
|
|
if ( m->m_page != page ) continue;
|
|
// skip if no cgi parm, may not be configurable now
|
|
if ( ! m->m_cgi ) continue;
|
|
// otherwise, must match the cgi name exactly
|
|
if ( strcmp ( field,m->m_cgi ) == 0 ) break;
|
|
//if ( ! m->m_cgi2 ) continue; // alias check
|
|
//if ( strcmp ( field,m->m_cgi2 ) == 0 ) break;
|
|
//if ( ! m->m_cgi2 ) continue; // alias check
|
|
//if ( strcmp ( field,m->m_cgi3 ) == 0 ) break;
|
|
//if ( ! m->m_cgi3 ) continue; // alias check
|
|
//if ( strcmp ( field,m->m_cgi4 ) == 0 ) break;
|
|
}
|
|
// bail if the cgi field is not in the parms list
|
|
if ( j >= m_numParms ) {
|
|
//log("parms: missing cgi parm %s",field);
|
|
continue;
|
|
}
|
|
// value of cgi parm (null terminated)
|
|
char *v = hr->getValue ( i );
|
|
// . skip if no value was provided
|
|
// . unless it was a string! so we can make them empty.
|
|
if ( v[0] == '\0' &&
|
|
m->m_type != TYPE_CHARPTR &&
|
|
m->m_type != TYPE_STRING &&
|
|
m->m_type != TYPE_STRINGBOX ) continue;
|
|
// skip if offset is negative, that means none
|
|
if ( m->m_off < 0 ) continue;
|
|
// skip if no permission
|
|
//if ( (m->m_perms & user) == 0 ) continue;
|
|
// set it. now our TYPE_CHARPTR will just be set to it directly
|
|
// to save memory...
|
|
setParm ( (char *)THIS , m, j, 0, v, false,//not html enc
|
|
false ); // true );
|
|
// need to save it
|
|
//if ( THIS != (char *)&g_conf )
|
|
// ((CollectionRec *)THIS)->m_needsSave = true;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool printSitePatternExamples ( SafeBuf *sb , HttpRequest *hr );
|
|
|
|
// . returns false if blocked, true otherwise
|
|
// . sets g_errno on error
|
|
// . must ultimately send reply back on "s"
|
|
// . called by Pages.cpp's sendDynamicReply() when it calls pg->function()
|
|
// which is called by HttpServer::sendReply(s,r) when it gets an http request
|
|
bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r ) {
|
|
|
|
char buf [ 128000 ];
|
|
SafeBuf stackBuf(buf,128000);
|
|
|
|
SafeBuf *sb = &stackBuf;
|
|
|
|
int32_t page = g_pages.getDynamicPageNumber ( r );
|
|
|
|
char format = r->getReplyFormat();
|
|
|
|
char guide = r->getLong("guide",0);
|
|
|
|
|
|
bool isMasterAdmin = g_conf.isMasterAdmin ( s , r );
|
|
bool isCollAdmin = g_conf.isCollAdmin ( s , r );
|
|
if ( ! g_conf.m_allowCloudUsers &&
|
|
! isMasterAdmin &&
|
|
! isCollAdmin ) {
|
|
char *msg = "NO PERMISSION";
|
|
return g_httpServer.sendDynamicPage (s, msg,gbstrlen(msg));
|
|
}
|
|
|
|
//
|
|
// CLOUD SEARCH ENGINE SUPPORT
|
|
//
|
|
char *action = r->getString("action",NULL);
|
|
if ( page == PAGE_BASIC_SETTINGS &&
|
|
guide &&
|
|
// this is non-null if handling a submit request
|
|
action &&
|
|
format == FORMAT_HTML ) {
|
|
//return g_parms.sendPageGeneric ( s, r, PAGE_BASIC_SETTINGS );
|
|
// just redirect to it
|
|
char *coll = r->getString("c",NULL);
|
|
if ( coll ) {
|
|
sb->safePrintf("<meta http-equiv=Refresh "
|
|
"content=\"0; URL=/widgets.html"
|
|
"?guide=1&c=%s\">",
|
|
coll);
|
|
return g_httpServer.sendDynamicPage (s,
|
|
sb->getBufStart(),
|
|
sb->length());
|
|
}
|
|
}
|
|
|
|
|
|
//
|
|
// some "generic" pages do additional processing on the provided input
|
|
// so we need to call those functions here...
|
|
//
|
|
|
|
// if we were an addurl page..
|
|
//if ( page == PAGE_ADDURL2 ) {
|
|
// // this returns false if blocked and it should re-call
|
|
// // sendPageGeneric when completed
|
|
// if ( ! processAddUrlRequest ( s , r ) )
|
|
// return false;
|
|
//}
|
|
|
|
char *bodyjs = NULL;
|
|
if ( page == PAGE_BASIC_SETTINGS )
|
|
bodyjs =" onload=document.getElementById('tabox').focus();";
|
|
|
|
// print standard header
|
|
if ( format != FORMAT_XML && format != FORMAT_JSON )
|
|
g_pages.printAdminTop ( sb , s , r , NULL , bodyjs );
|
|
|
|
// xml/json header
|
|
char *res = NULL;
|
|
if ( format == FORMAT_XML )
|
|
res = "<response>\n"
|
|
"\t<statusCode>0</statusCode>\n"
|
|
"\t<statusMsg>Success</statusMsg>\n";
|
|
if ( format == FORMAT_JSON )
|
|
res = "{ \"response:\"{\n"
|
|
"\t\"statusCode\":0,\n"
|
|
"\t\"statusMsg\":\"Success\"\n";
|
|
if ( res )
|
|
sb->safeStrcpy ( res );
|
|
|
|
// do not show the parms and their current values unless showsettings=1
|
|
// was explicitly given for the xml/json feeds
|
|
int32_t show = 1;
|
|
if ( format != FORMAT_HTML )
|
|
show = r->getLong("show",0);
|
|
if ( show )
|
|
printParmTable ( sb , s , r );
|
|
|
|
// xml/json tail
|
|
if ( format == FORMAT_XML )
|
|
res = "</response>\n";
|
|
if ( format == FORMAT_JSON )
|
|
res = "\t}\n}\n";
|
|
if ( res )
|
|
sb->safeStrcpy ( res );
|
|
|
|
|
|
bool POSTReply = g_pages.getPage ( page )->m_usePost;
|
|
|
|
char *ct = "text/html";
|
|
if ( format == FORMAT_XML ) ct = "text/xml";
|
|
if ( format == FORMAT_JSON ) ct = "application/json";
|
|
|
|
return g_httpServer.sendDynamicPage ( s ,
|
|
sb->getBufStart() ,
|
|
sb->length() ,
|
|
-1 ,
|
|
POSTReply ,
|
|
ct , // contType
|
|
-1 , // httpstatus
|
|
NULL,//cookie ,
|
|
NULL );// charset
|
|
}
|
|
|
|
|
|
bool Parms::printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r ) {
|
|
|
|
int32_t page = g_pages.getDynamicPageNumber ( r );
|
|
|
|
int32_t fromIp = s->m_ip;
|
|
|
|
char format = r->getReplyFormat();
|
|
/*
|
|
if ( format == FORMAT_HTML )
|
|
sb->safePrintf (
|
|
"<script type=\"text/javascript\">"
|
|
"function filterRow(str) {"
|
|
//"alert ('string: ' + str);"
|
|
"var tab = document.all ? document.all"
|
|
"['parmtable'] :"
|
|
" document.getElementById ?"
|
|
"document.getElementById('parmtable') : null;"
|
|
" for(var j = 1; j < tab.rows.length;j++) {"
|
|
" if(tab.rows[j].innerHTML.indexOf(str) < 0) {"
|
|
" tab.rows[j].style.display = 'none';"
|
|
" } else {"
|
|
" tab.rows[j].style.display = '';"
|
|
" }"
|
|
" }"
|
|
"}\n"
|
|
"function checkAll(form, name, num) {\n "
|
|
" for (var i = 0; i < num; i++) {\n"
|
|
" var nombre;\n"
|
|
" if( i > 0) nombre = name + i;\n"
|
|
" else nombre = name;\n"
|
|
" var e = document.getElementById(nombre);\n"
|
|
" e.checked = !e.checked;\n"
|
|
//" if ( e.value == 'Y' ) e.value='N';"
|
|
//" else if ( e.value == 'N' ) e.value='Y';"
|
|
" }\n"
|
|
"}\n"
|
|
"</script>");
|
|
*/
|
|
|
|
if ( page == PAGE_COLLPASSWORDS2 )
|
|
page = PAGE_COLLPASSWORDS;
|
|
|
|
// print the start of the table
|
|
char *tt = "None";
|
|
if ( page == PAGE_LOG ) tt = "Log Controls";
|
|
if ( page == PAGE_MASTER ) tt = "Master Controls";
|
|
if ( page == PAGE_INJECT ) tt = "Inject Url";
|
|
if ( page == PAGE_MASTERPASSWORDS ) tt = "Master Passwords";
|
|
if ( page == PAGE_ADDURL2 ) tt = "Add Urls";
|
|
if ( page == PAGE_SPIDER ) tt = "Spider Controls";
|
|
if ( page == PAGE_SEARCH ) tt = "Search Controls";
|
|
if ( page == PAGE_ACCESS ) tt = "Access Controls";
|
|
if ( page == PAGE_FILTERS ) tt = "Url Filters";
|
|
if ( page == PAGE_BASIC_SETTINGS ) tt = "Settings";
|
|
if ( page == PAGE_COLLPASSWORDS ) tt = "Collection Passwords";
|
|
//if ( page == PAGE_SITES ) tt = "Site List";
|
|
//if ( page == PAGE_PRIORITIES ) tt = "Priority Controls";
|
|
//if ( page == PAGE_RULES ) tt = "Site Rules";
|
|
//if ( page == PAGE_SYNC ) tt = "Sync";
|
|
if ( page == PAGE_REPAIR ) tt = "Rebuild Controls";
|
|
//if ( page == PAGE_ADFEED ) tt = "Ad Feed Controls";
|
|
|
|
// special messages for spider controls
|
|
char *e1 = "";
|
|
char *e2 = "";
|
|
if ( page == PAGE_SPIDER && ! g_conf.m_spideringEnabled )
|
|
e1 = "<tr><td colspan=20><font color=#ff0000><b><center>"
|
|
"Spidering is temporarily disabled in Master Controls."
|
|
"</font></td></tr>\n";
|
|
if ( page == PAGE_SPIDER && ! g_conf.m_addUrlEnabled )
|
|
e2 = "<tr><td colspan=20><font color=#ff0000><b><center>"
|
|
"Add url is temporarily disabled in Master Controls."
|
|
"</font></td></tr>\n";
|
|
|
|
if ( format == FORMAT_XML || format == FORMAT_JSON ) {
|
|
char *coll = g_collectiondb.getDefaultColl(r);
|
|
CollectionRec *cr = g_collectiondb.getRec(coll);//2(r,true);
|
|
bool isMasterAdmin = g_conf.isMasterAdmin ( s , r );
|
|
bool isCollAdmin = g_conf.isCollAdmin ( s , r );
|
|
g_parms.printParms2 ( sb ,
|
|
page ,
|
|
cr ,
|
|
1 , // int32_t nc , # cols?
|
|
1 , // int32_t pd , print desc?
|
|
false , // isCrawlbot
|
|
format ,
|
|
NULL , // TcpSocket *sock
|
|
isMasterAdmin ,
|
|
isCollAdmin );
|
|
return true;
|
|
}
|
|
|
|
|
|
// . page repair (PageRepair.cpp) has a status table BEFORE the parms
|
|
// iff we are doing a repair
|
|
// . only one page for all collections, we have a parm that is
|
|
// a comma-separated list of the collections to repair. leave blank
|
|
// to repair all collections.
|
|
if ( page == PAGE_REPAIR )
|
|
g_repair.printRepairStatus ( sb , fromIp );
|
|
|
|
// start the table
|
|
sb->safePrintf(
|
|
"\n"
|
|
"<table %s "
|
|
//"style=\"border-radius:15px;"
|
|
//"border:#6060f0 2px solid;"
|
|
//"\" "
|
|
//"width=100%% bgcolor=#%s "
|
|
//"bgcolor=black "
|
|
//"cellpadding=4 "
|
|
//"border=0 "//border=1 "
|
|
"id=\"parmtable\">"
|
|
"<tr><td colspan=20>"// bgcolor=#%s>"
|
|
,TABLE_STYLE
|
|
//,DARKER_BLUE
|
|
//,DARK_BLUE
|
|
);
|
|
|
|
/*
|
|
|
|
take this out since we took out a ton of parms for
|
|
simplicties sake
|
|
|
|
if ( page != PAGE_FILTERS )
|
|
sb->safePrintf("<div style=\"float:left;\">"
|
|
"filter:<input type=\"text\" "
|
|
"onkeyup=\"filterRow(this.value)\" "
|
|
"value=\"\"></div>"
|
|
);
|
|
*/
|
|
|
|
sb->safePrintf(//"<div style=\"margin-left:45%%;\">"
|
|
//"<font size=+1>"
|
|
"<center>"
|
|
"<b>%s</b>"
|
|
//"</font>"
|
|
"</center>"
|
|
//"</div>"
|
|
"</td></tr>%s%s\n",
|
|
tt,e1,e2);
|
|
|
|
//bool isCrawlbot = false;
|
|
//if ( collOveride ) isCrawlbot = true;
|
|
|
|
// print the table(s) of controls
|
|
//p= g_parms.printParms (p, pend, page, user, THIS, coll, pwd, nc, pd);
|
|
g_parms.printParms ( sb , s , r );
|
|
|
|
// end the table
|
|
sb->safePrintf ( "</table>\n" );
|
|
|
|
// this must be outside of table, submit button follows
|
|
sb->safePrintf ( "<br>\n" );
|
|
|
|
if ( page == PAGE_SPIDERPROXIES ) {
|
|
// wrap up the form, print a submit button
|
|
g_pages.printSubmit ( sb );
|
|
printSpiderProxyTable ( sb );
|
|
// do not print another submit button
|
|
return true;
|
|
}
|
|
|
|
// url filter page has a test table
|
|
if ( page == PAGE_FILTERS ) {
|
|
// wrap up the form, print a submit button
|
|
g_pages.printSubmit ( sb );
|
|
printUrlExpressionExamples ( sb );
|
|
}
|
|
else if ( page == PAGE_BASIC_SETTINGS ) {
|
|
// wrap up the form, print a submit button
|
|
g_pages.printSubmit ( sb );
|
|
printSitePatternExamples ( sb , r );
|
|
}
|
|
else if ( page == PAGE_SPIDER ) { // PAGE_SITES
|
|
// wrap up the form, print a submit button
|
|
g_pages.printSubmit ( sb );
|
|
printSitePatternExamples ( sb , r );
|
|
}
|
|
else {
|
|
// wrap up the form, print a submit button
|
|
g_pages.printAdminBottom ( sb );
|
|
}
|
|
|
|
|
|
// extra sync table
|
|
/*
|
|
if ( page == PAGE_SYNC ) {
|
|
// a table that shows the progress of a sync process
|
|
sb.safePrintf (
|
|
"<br>"
|
|
"<table width=100%% border=1 bgcolor=#d0d0e0 "
|
|
"cellpadding=4 border=0>"
|
|
//"<tr><td colspan=2 bgcolor=#d0c0d0>"
|
|
"<tr><td colspan=2 bgcolor=#%s>"
|
|
"<center>"
|
|
//"<font size=+1>"
|
|
"<b>Sync Progress</b>"
|
|
//"</font>"
|
|
"</td></tr>\n" , DARK_BLUE);
|
|
|
|
for ( int32_t i = RDB_START ; i < RDB_END ; i++ ) {
|
|
Rdb *r = getRdbFromId ( i );
|
|
if ( ! r ) continue;
|
|
float pd = g_sync.getPercentDone ( i );
|
|
sb.safePrintf (
|
|
"<tr>"
|
|
"<td>%s</td>"
|
|
"<td>%.1f%%</td></tr>\n",
|
|
r->m_dbname , pd );
|
|
}
|
|
sb.safePrintf ( "</table>\n");
|
|
}
|
|
*/
|
|
|
|
|
|
// if just printing into a buffer, return now
|
|
//if ( pageBuf ) return true;
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
char *printDropDown ( int32_t n , char *p, char *pend, char *name, int32_t select,
|
|
bool includeMinusOne ,
|
|
bool includeMinusTwo ) {
|
|
// begin the drop down menu
|
|
sprintf ( p , "<select name=%s>", name );
|
|
p += gbstrlen ( p );
|
|
char *s;
|
|
int32_t i = -1;
|
|
if ( includeMinusOne ) i = -1;
|
|
// . by default, minus 2 includes minus 3, the new "FILTERED" priority
|
|
// . it is link "BANNED" but does not mean the url is low quality necessarily
|
|
if ( includeMinusTwo ) i = -3;
|
|
for ( ; i < n ; i++ ) {
|
|
if ( i == select ) s = " selected";
|
|
else s = "";
|
|
if ( i == -3 )
|
|
sprintf (p,"<option value=%" INT32 "%s>FILTERED",i,s);
|
|
else if ( i == -2 )
|
|
sprintf (p,"<option value=%" INT32 "%s>BANNED",i,s);
|
|
else if ( i == -1 )
|
|
sprintf (p,"<option value=%" INT32 "%s>undefined",i,s);
|
|
else
|
|
sprintf (p,"<option value=%" INT32 "%s>%" INT32 "",i,s,i);
|
|
p += gbstrlen ( p );
|
|
}
|
|
sprintf ( p , "</select>" );
|
|
p += gbstrlen ( p );
|
|
return p;
|
|
}
|
|
|
|
bool printDiffbotDropDown ( SafeBuf *sb,char *name,char *THIS , SafeBuf *sx) {
|
|
//CollectionRec *cr = (CollectionRec *)THIS;
|
|
// . get the string we have selected
|
|
// . the list of available strings to select is in
|
|
// m_diffbotApiList for this collection, and that can
|
|
// be changed by john to add custom diffbot api urls.
|
|
// . should just be m_spiderDiffbotApiUrl[i] safebuf
|
|
char *usingApi = sx->getBufStart();
|
|
if ( sx->length() == 0 ) usingApi = NULL;
|
|
// now scan each item in the list. see the setting of
|
|
// "m_def" for "diffbotApiList" below to see the
|
|
// comma separated list of default strings. each item in
|
|
// this list is of the format "<title>|<urlPath>,"
|
|
//char *p = cr->m_diffbotApiList.getBufStart();
|
|
char *p =
|
|
"None|none,"
|
|
"All|http://www.diffbot.com/api/analyze?mode=auto&fields=*,"
|
|
"Article (autodetect)|http://www.diffbot.com/api/analyze?mode=article&fields=*,"
|
|
"Article (force)|http://www.diffbot.com/api/article?fields=*,"
|
|
"Product (autodetect)|http://www.diffbot.com/api/analyze?mode=product&fields=*,"
|
|
"Product (force)|http://www.diffbot.com/v2/product?fields=*,"
|
|
"Image (autodetect)|http://www.diffbot.com/api/analyze?mode=image&fields=*,"
|
|
"Image (force)|http://www.diffbot.com/api/image?fields=*,"
|
|
"FrontPage (autodetect)|http://www.diffbot.com/api/analyze?mode=frontpage&fields=*,"
|
|
"FrontPage (force)|http://www.diffbot.com/api/frontpage?fields=*"
|
|
;
|
|
|
|
// wtf?
|
|
if ( ! p ) return true;
|
|
// print out. cgi is "dapi%" INT32 "".
|
|
sb->safePrintf("<select name=%s>\n",name);
|
|
// print "none" as the first option
|
|
//char *sel = "";
|
|
//if ( ! usingApi ) sel = " selected";
|
|
//sb->safePrintf("<option value=\"\"%s>None</option>",sel);
|
|
// the various "diffbot urls" are separated by commas
|
|
for ( ; *p ; ) {
|
|
// point to start of item name
|
|
char *name = p;
|
|
// p should now point to name of the item
|
|
char *end1 = p;
|
|
// point to start of url for that item
|
|
for ( ; *end1 && *end1 != '|' ;end1++);
|
|
// save that
|
|
char *url = end1;
|
|
if ( *url == '|' ) url++;
|
|
// find end of url
|
|
char *urlEnd = url;
|
|
for ( ; *urlEnd && *urlEnd != ',' ; urlEnd++ );
|
|
// do we match it?
|
|
char *sel = "";
|
|
if ( usingApi && strncmp(usingApi,url,urlEnd-url)== 0 )
|
|
sel = " selected";
|
|
if ( ! usingApi && urlEnd - url == 0 )
|
|
sel = " selected";
|
|
// advance p
|
|
p = urlEnd;
|
|
// skip over comma to get next one
|
|
if ( *p == ',' ) p++;
|
|
// use the hash as the identifier
|
|
sb->safePrintf("<option value=\"");
|
|
sb->safeMemcpy ( url, urlEnd - url );
|
|
sb->safePrintf("\"%s>",sel);
|
|
// print item name
|
|
sb->safeMemcpy ( name , end1 - name );
|
|
sb->safePrintf("</option>\n");
|
|
}
|
|
sb->safePrintf("</select>");
|
|
return true;
|
|
}
|
|
*/
|
|
|
|
bool printDropDown ( int32_t n , SafeBuf* sb, char *name, int32_t select,
|
|
bool includeMinusOne ,
|
|
bool includeMinusTwo ) { // begin the drop down menu
|
|
sb->safePrintf ( "<select name=%s>", name );
|
|
char *s;
|
|
int32_t i = -1;
|
|
if ( includeMinusOne ) i = -1;
|
|
// . by default, minus 2 includes minus 3, the new "FILTERED" priority
|
|
// . it is link "BANNED" but does not mean the url is low quality necessarily
|
|
if ( includeMinusTwo ) i = -3;
|
|
|
|
// no more DELETE, etc.
|
|
i = 0;
|
|
if ( select < 0 ) select = 0;
|
|
|
|
for ( ; i < n ; i++ ) {
|
|
if ( i == select ) s = " selected";
|
|
else s = "";
|
|
if ( i == -3 )
|
|
sb->safePrintf ("<option value=%" INT32 "%s>DELETE",i,s);
|
|
else if ( i == -2 )
|
|
//sb->safePrintf ("<option value=%" INT32 "%s>BANNED",i,s);
|
|
continue;
|
|
else if ( i == -1 )
|
|
//sb->safePrintf ("<option value=%" INT32 "%s>undefined",i,s);
|
|
continue;
|
|
else
|
|
sb->safePrintf ("<option value=%" INT32 "%s>%" INT32 "",i,s,i);
|
|
}
|
|
sb->safePrintf ( "</select>" );
|
|
return true;
|
|
}
|
|
|
|
class DropLangs {
|
|
public:
|
|
char *m_title;
|
|
char *m_lang;
|
|
char *m_tld;
|
|
};
|
|
|
|
DropLangs g_drops[] = {
|
|
{"custom",NULL,NULL},
|
|
{"web",NULL,NULL},
|
|
{"news",NULL,NULL},
|
|
{"english","en","com,us.gov,org"},
|
|
{"german","de","de"},
|
|
{"french","fr","fr"},
|
|
{"norweigian","nl","nl"},
|
|
{"spanish","es","es"},
|
|
{"italian","it","it"},
|
|
{"romantic","en,de,fr,nl,es,it","com,us.gov,org,de,fr,nl,es,it"}
|
|
};
|
|
|
|
// "url filters profile" values. used to set default crawl rules
|
|
// in Collectiondb.cpp's CollectionRec::setUrlFiltersToDefaults().
|
|
// for instance, UFP_NEWS spiders sites more frequently but less deep in
|
|
// order to get "news" pages and articles
|
|
bool printDropDownProfile ( SafeBuf* sb, char *name, CollectionRec *cr ) {
|
|
sb->safePrintf ( "<select name=%s>", name );
|
|
// the type of url filters profiles
|
|
//char *items[] = {"custom","web","news","chinese","shallow"};
|
|
int32_t nd = sizeof(g_drops)/sizeof(DropLangs);
|
|
for ( int32_t i = 0 ; i < nd ; i++ ) {
|
|
//if ( i == select ) s = " selected";
|
|
//else s = "";
|
|
char *x = cr->m_urlFiltersProfile.getBufStart();
|
|
char *s;
|
|
if ( strcmp(g_drops[i].m_title, x) == 0 ) s = " selected";
|
|
else s = "";
|
|
sb->safePrintf ("<option value=%s%s>%s",
|
|
g_drops[i].m_title,
|
|
s,
|
|
g_drops[i].m_title );
|
|
}
|
|
sb->safePrintf ( "</select>");
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
char *printCheckBoxes ( int32_t n , char *p, char *pend, char *name, char *array){
|
|
for ( int32_t i = 0 ; i < n ; i++ ) {
|
|
if ( i > 0 )
|
|
sprintf (p, "<input type=checkbox value=1 name=%s%" INT32 "",
|
|
name,i);
|
|
else
|
|
sprintf (p, "<input type=checkbox value=1 name=%s",
|
|
name);
|
|
p += gbstrlen ( p );
|
|
if ( array[i] ) {
|
|
sprintf ( p , " checked");
|
|
p += gbstrlen ( p );
|
|
}
|
|
sprintf ( p , ">%" INT32 " " , i );
|
|
p += gbstrlen ( p );
|
|
//if i is single digit, add another nbsp so that everything's
|
|
//aligned
|
|
if ( i < 10 )
|
|
sprintf(p," ");
|
|
p +=gbstrlen(p);
|
|
|
|
if ( i > 0 && (i+1) % 6 == 0 )
|
|
sprintf(p,"<br>\n");
|
|
p+=gbstrlen(p);
|
|
}
|
|
return p;
|
|
}
|
|
*/
|
|
|
|
bool printCheckBoxes ( int32_t n , SafeBuf* sb, char *name, char *array){
|
|
for ( int32_t i = 0 ; i < n ; i++ ) {
|
|
if ( i > 0 )
|
|
sb->safePrintf ("<input type=checkbox value=1 name=%s%" INT32 "",
|
|
name,i);
|
|
else
|
|
sb->safePrintf ("<input type=checkbox value=1 name=%s",
|
|
name);
|
|
if ( array[i] ) {
|
|
sb->safePrintf ( " checked");
|
|
}
|
|
sb->safePrintf ( ">%" INT32 " " , i );
|
|
//if i is single digit, add another nbsp so that everything's
|
|
//aligned
|
|
if ( i < 10 )
|
|
sb->safePrintf(" ");
|
|
|
|
if ( i > 0 && (i+1) % 6 == 0 )
|
|
sb->safePrintf("<br>\n");
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Parms::printParms (SafeBuf* sb, TcpSocket *s , HttpRequest *r) {
|
|
int32_t page = g_pages.getDynamicPageNumber ( r );
|
|
int32_t nc = r->getLong("nc",1);
|
|
int32_t pd = r->getLong("pd",1);
|
|
char *coll = g_collectiondb.getDefaultColl(r);
|
|
CollectionRec *cr = g_collectiondb.getRec(coll);//2(r,true);
|
|
|
|
bool isMasterAdmin = g_conf.isMasterAdmin ( s , r );
|
|
bool isCollAdmin = g_conf.isCollAdmin ( s , r );
|
|
|
|
//char *coll = r->getString ( "c" );
|
|
//if ( ! coll || ! coll[0] ) coll = "main";
|
|
//CollectionRec *cr = g_collectiondb.getRec ( coll );
|
|
// if "main" collection does not exist, try another
|
|
//if ( ! cr ) cr = getCollRecFromHttpRequest ( r );
|
|
printParms2 ( sb, page, cr, nc, pd,0,0 , s,isMasterAdmin,isCollAdmin);
|
|
return true;
|
|
}
|
|
|
|
static int32_t s_count = 0;
|
|
|
|
bool Parms::printParms2 ( SafeBuf* sb ,
|
|
int32_t page ,
|
|
CollectionRec *cr ,
|
|
int32_t nc ,
|
|
int32_t pd ,
|
|
bool isCrawlbot ,
|
|
char format , // bool isJSON ,
|
|
TcpSocket *sock ,
|
|
bool isMasterAdmin ,
|
|
bool isCollAdmin ) {
|
|
bool status = true;
|
|
s_count = 0;
|
|
// background color
|
|
char *bg1 = LIGHT_BLUE;
|
|
char *bg2 = DARK_BLUE;
|
|
// background color
|
|
char *bg = NULL;
|
|
|
|
char *coll = NULL;
|
|
if ( cr ) coll = cr->m_coll;
|
|
|
|
// page aliases
|
|
//if ( page == PAGE_COLLPASSWORDS )
|
|
// page = PAGE_MASTERPASSWORDS;
|
|
|
|
if ( page == PAGE_COLLPASSWORDS2 )
|
|
page = PAGE_COLLPASSWORDS;
|
|
|
|
GigablastRequest gr;
|
|
g_parms.setToDefault ( (char *)&gr , OBJ_GBREQUEST , NULL);
|
|
|
|
InjectionRequest ir;
|
|
g_parms.setToDefault ( (char *)&ir , OBJ_IR , NULL);
|
|
|
|
// Begin "parms":[]
|
|
if (format == FORMAT_JSON ) {
|
|
sb->safePrintf ("\"parms\":[\n");
|
|
}
|
|
|
|
// find in parms list
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
// get it
|
|
Parm *m = &m_parms[i];
|
|
// make sure we got the right parms for what we want
|
|
if ( m->m_page != page ) continue;
|
|
// and same object tpye. but allow OBJ_NONE for
|
|
// PageAddUrl.cpp
|
|
//if ( m->m_obj != parmObj && m->m_obj != OBJ_NONE ) continue;
|
|
// skip if offset is negative, that means none
|
|
// well then use OBJ_NONE now!!!
|
|
//if ( m->m_off < 0 &&
|
|
// m->m_type != TYPE_MONOD2 &&
|
|
// m->m_type != TYPE_MONOM2 &&
|
|
// m->m_type != TYPE_CMD ) continue;
|
|
// skip if hidden
|
|
if ( m->m_flags & PF_HIDDEN ) continue;
|
|
|
|
// or if should not show in html, like the
|
|
// name of the collection, the "c" parm we do not show
|
|
// generally on the html page even though it is a required parm
|
|
// we have it in a hidden html input tag in Pages.cpp.
|
|
if ( (m->m_flags & PF_NOHTML) &&
|
|
format != FORMAT_JSON &&
|
|
format != FORMAT_XML )
|
|
continue;
|
|
|
|
// get right ptr
|
|
char *THIS = NULL;
|
|
if ( m->m_obj == OBJ_CONF )
|
|
THIS = (char *)&g_conf;
|
|
if ( m->m_obj == OBJ_COLL ) {
|
|
THIS = (char *)cr;
|
|
if ( ! THIS ) continue;
|
|
}
|
|
if ( m->m_obj == OBJ_GBREQUEST )
|
|
THIS = (char *)&gr;
|
|
if ( m->m_obj == OBJ_IR )
|
|
THIS = (char *)&ir;
|
|
// might have an array, do not exceed the array size
|
|
int32_t jend = m->m_max;
|
|
int32_t size = jend ;
|
|
char *ss = ((char *)THIS + m->m_off - 4);
|
|
if ( m->m_type == TYPE_MONOD2 ) ss = NULL;
|
|
if ( m->m_type == TYPE_MONOM2 ) ss = NULL;
|
|
if ( m->m_max > 1 && ss ) size = *(int32_t *)ss;
|
|
if ( size < jend ) jend = size;
|
|
// toggle background color on group boundaries...
|
|
if ( m->m_group == 1 ) {
|
|
if ( bg == bg1 ) bg = bg2;
|
|
else bg = bg1;
|
|
}
|
|
|
|
//
|
|
// mdw just debug to here ... left off here
|
|
//char *xx=NULL;*xx=0;
|
|
|
|
// . do we have an array? if so print title on next row
|
|
// UNLESS these are priority checkboxes, those can all
|
|
// cluster together onto one row
|
|
// . only add if not in a row of controls
|
|
if ( m->m_max > 1 && m->m_type != TYPE_PRIORITY_BOXES &&
|
|
m->m_rowid == -1 &&
|
|
format != FORMAT_JSON &&
|
|
format != FORMAT_XML ) { // ! isJSON ) {
|
|
//
|
|
// make a separate table for array of parms
|
|
sb->safePrintf (
|
|
//"<table width=100%% bgcolor=#d0d0e0 "
|
|
//"cellpadding=4 border=1>\n"
|
|
"<tr><td colspan=20 bgcolor=#%s>"
|
|
"<center>"
|
|
//"<font size=+1>"
|
|
"<b>%s"
|
|
"</b>"
|
|
//"</font>"
|
|
"</td></tr>\n"
|
|
"<tr><td colspan=20><font size=-1>"
|
|
,DARK_BLUE,m->m_title);
|
|
// print the description
|
|
sb->safePrintf ( "%s" , m->m_desc );
|
|
// end the description
|
|
sb->safePrintf("</font></td></tr>\n");
|
|
|
|
}
|
|
|
|
// arrays always have blank line for adding stuff
|
|
if ( m->m_max > 1 )
|
|
// not for PAGE_PRIORITIES!
|
|
//m->m_page != PAGE_PRIORITIES )
|
|
size++;
|
|
// if m_rowid of consecutive parms are the same then they
|
|
// are all printed in the same row, otherwise the inner loop
|
|
// has no effect
|
|
int32_t rowid = m_parms[i].m_rowid;
|
|
// if not part of a complex row, just print this array right up
|
|
if ( rowid == -1 ) {
|
|
for ( int32_t j = 0 ; j < size ; j++ )
|
|
status &=printParm ( sb,NULL,&m_parms[i],i,
|
|
j, jend, (char *)THIS,
|
|
coll,NULL,
|
|
bg,nc,pd,
|
|
false,
|
|
isCrawlbot,
|
|
format,
|
|
isMasterAdmin,
|
|
isCollAdmin,
|
|
sock);
|
|
continue;
|
|
}
|
|
// if not first in a row, skip it, we printed it already
|
|
if ( i > 0 && m_parms[i-1].m_rowid == rowid ) continue;
|
|
|
|
// otherwise print everything in the row
|
|
for ( int32_t j = 0 ; j < size ; j++ ) {
|
|
// flip j if in this page
|
|
int32_t newj = j;
|
|
//if ( m->m_page == PAGE_PRIORITIES )
|
|
// newj = size - 1 - j;
|
|
for ( int32_t k = i ;
|
|
k < m_numParms &&
|
|
m_parms[k].m_rowid == rowid;
|
|
k++ ) {
|
|
|
|
status &=printParm(sb,NULL,&m_parms[k],k,
|
|
newj,jend,(char *)THIS,coll,NULL,
|
|
bg,nc,pd, j==size-1,
|
|
isCrawlbot,format,
|
|
isMasterAdmin,
|
|
isCollAdmin,
|
|
sock);
|
|
}
|
|
}
|
|
|
|
// end array table
|
|
//if ( m->m_max > 1 ) {
|
|
// sprintf ( p , "</table><br>\n");
|
|
// p += gbstrlen ( p );
|
|
//}
|
|
}
|
|
|
|
// end "parms":[]
|
|
if ( format == FORMAT_JSON ) {
|
|
if ( m_numParms != 0 ) sb->m_length -= 2;
|
|
sb->safePrintf("\n]\n");
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
|
|
bool Parms::printParm ( SafeBuf* sb,
|
|
//int32_t user ,
|
|
char *username,
|
|
Parm *m ,
|
|
int32_t mm , // m = &m_parms[mm]
|
|
int32_t j ,
|
|
int32_t jend ,
|
|
char *THIS ,
|
|
char *coll ,
|
|
char *pwd ,
|
|
char *bg ,
|
|
int32_t nc , // # column?
|
|
int32_t pd , // print description
|
|
bool lastRow ,
|
|
bool isCrawlbot ,
|
|
//bool isJSON ) {
|
|
char format ,
|
|
bool isMasterAdmin ,
|
|
bool isCollAdmin ,
|
|
TcpSocket *sock ) {
|
|
bool status = true;
|
|
// do not print if no permissions
|
|
//if ( m->m_perms != 0 && !g_users.hasPermission(username,m->m_perms) )
|
|
// return status;
|
|
//if ( m->m_perms != 0 && (m->m_perms & user) == 0 ) return status;
|
|
// do not print some if #define _CLIENT_ is true
|
|
//#ifdef _GLOBALSPEC_
|
|
//if ( m->m_priv == 2 ) return status;
|
|
//if ( m->m_priv == 3 ) return status;
|
|
//#elif _CLIENT_
|
|
//if ( m->m_priv ) return status;
|
|
//#elif _METALINCS_
|
|
//if ( m->m_priv == 2 ) return status;
|
|
//if ( m->m_priv == 3 ) return status;
|
|
//#endif
|
|
// priv of 4 means do not print at all
|
|
if ( m->m_priv == 4 ) return true;
|
|
|
|
// do not print comments, those are for the xml conf file
|
|
if ( m->m_type == TYPE_COMMENT ) return true;
|
|
|
|
if ( m->m_flags & PF_HIDDEN ) return true;
|
|
|
|
CollectionRec *cr = NULL;
|
|
collnum_t collnum = -1;
|
|
if ( coll ) {
|
|
cr = g_collectiondb.getRec ( coll );
|
|
if ( cr ) collnum = cr->m_collnum;
|
|
}
|
|
|
|
if ( format == FORMAT_XML || format == FORMAT_JSON ) {
|
|
// the upload button has no val, cmds too
|
|
if ( m->m_type == TYPE_FILEUPLOADBUTTON ) return true;
|
|
}
|
|
|
|
int32_t page = m->m_page;
|
|
|
|
if ( format == FORMAT_XML ) {
|
|
sb->safePrintf ( "\t<parm>\n");
|
|
sb->safePrintf ( "\t\t<title><![CDATA[");
|
|
sb->cdataEncode ( m->m_title );
|
|
sb->safePrintf ( "]]></title>\n");
|
|
sb->safePrintf ( "\t\t<desc><![CDATA[");
|
|
sb->cdataEncode ( m->m_desc );
|
|
sb->safePrintf ( "]]></desc>\n");
|
|
if ( m->m_flags & PF_REQUIRED )
|
|
sb->safePrintf("\t\t<required>1</required>\n");
|
|
sb->safePrintf ( "\t\t<cgi>%s</cgi>\n",m->m_cgi);
|
|
// and default value if it exists
|
|
char *def = m->m_def;
|
|
if ( ! def ) def = "";
|
|
sb->safePrintf ( "\t\t<defaultValue><![CDATA[");
|
|
sb->cdataEncode ( def );
|
|
sb->safePrintf ( "]]></defaultValue>\n");
|
|
if ( page == PAGE_MASTER ||
|
|
page == PAGE_SEARCH ||
|
|
page == PAGE_SPIDER ||
|
|
page == PAGE_SPIDERPROXIES ||
|
|
page == PAGE_FILTERS ||
|
|
page == PAGE_MASTERPASSWORDS ||
|
|
page == PAGE_REPAIR ||
|
|
page == PAGE_LOG ) {
|
|
sb->safePrintf ( "\t\t<currentValue><![CDATA[");
|
|
SafeBuf xb;
|
|
m->printVal ( &xb , collnum , 0 );//occNum
|
|
sb->cdataEncode ( xb.getBufStart() );
|
|
sb->safePrintf ( "]]></currentValue>\n");
|
|
}
|
|
sb->safePrintf ( "\t</parm>\n");
|
|
return true;
|
|
}
|
|
|
|
if ( format == FORMAT_JSON ) {
|
|
sb->safePrintf ( "\t{\n");
|
|
sb->safePrintf ( "\t\t\"title\":\"%s\",\n",m->m_title);
|
|
sb->safePrintf ( "\t\t\"desc\":\"");
|
|
sb->jsonEncode ( m->m_desc );
|
|
sb->safePrintf("\",\n");
|
|
if ( m->m_flags & PF_REQUIRED )
|
|
sb->safePrintf("\t\t\"required\":1,\n");
|
|
sb->safePrintf ( "\t\t\"cgi\":\"%s\",\n",m->m_cgi);
|
|
// and default value if it exists
|
|
char *def = m->m_def;
|
|
if ( ! def ) def = "";
|
|
sb->safePrintf ( "\t\t\"defaultValue\":\"");
|
|
sb->jsonEncode(def);
|
|
sb->safePrintf("\",\n");
|
|
if ( page == PAGE_MASTER ||
|
|
page == PAGE_SEARCH ||
|
|
page == PAGE_SPIDER ||
|
|
page == PAGE_SPIDERPROXIES ||
|
|
page == PAGE_FILTERS ||
|
|
page == PAGE_MASTERPASSWORDS ||
|
|
page == PAGE_REPAIR ||
|
|
page == PAGE_LOG ) {
|
|
sb->safePrintf ( "\t\t\"currentValue\":\"");
|
|
SafeBuf js;
|
|
m->printVal ( &js , collnum , 0 );//occNum );
|
|
sb->jsonEncode(js.getBufStart());
|
|
sb->safePrintf("\",\n");
|
|
}
|
|
sb->m_length -= 2; // hack of trailing comma
|
|
sb->safePrintf("\n\t},\n");
|
|
return true;
|
|
}
|
|
|
|
// . if printing on crawlbot page hide these
|
|
// . we repeat this logic below when printing parm titles
|
|
// for the column headers in the table
|
|
//char *vt = "";
|
|
//if ( isCrawlbot &&
|
|
// m->m_page == PAGE_FILTERS &&
|
|
// (strcmp(m->m_xml,"spidersEnabled") == 0 ||
|
|
// //strcmp(m->m_xml,"maxSpidersPerRule")==0||
|
|
// //strcmp(m->m_xml,"maxSpidersPerIp") == 0||
|
|
// strcmp(m->m_xml,"spiderIpWait") == 0
|
|
// ) )
|
|
// vt = " style=display:none;";
|
|
|
|
// what type of parameter?
|
|
char t = m->m_type;
|
|
// point to the data in THIS
|
|
char *s = THIS + m->m_off + m->m_size * j ;
|
|
|
|
// if THIS is NULL then it must be GigablastRequest or something
|
|
// and is not really a persistent thing, but a one-shot deal.
|
|
if ( ! THIS ) s = NULL;
|
|
|
|
// . if an array, passed our end, this is the blank line at the end
|
|
// . USE THIS EMPTY/DEFAULT LINE TO ADD NEW DATA TO AN ARRAY
|
|
// . make at least as big as a int64_t
|
|
if ( j >= jend ) s = "\0\0\0\0\0\0\0\0";
|
|
// delimit each cgi var if we need to
|
|
if ( m->m_cgi && gbstrlen(m->m_cgi) > 45 ) {
|
|
log(LOG_LOGIC,"admin: Cgi variable is TOO big.");
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
char cgi[64];
|
|
if ( m->m_cgi ) {
|
|
if ( j > 0 ) sprintf ( cgi , "%s%" INT32 "" , m->m_cgi , j );
|
|
else sprintf ( cgi , "%s" , m->m_cgi );
|
|
// let's try dropping the index # and just doing dup parms
|
|
//sprintf ( cgi , "%s" , m->m_cgi );
|
|
}
|
|
// . display title and description of the control/parameter
|
|
// . the input cell of some parameters are colored
|
|
char *color = "";
|
|
if ( t == TYPE_CMD || t == TYPE_BOOL2 )
|
|
color = " bgcolor=#6060ff";
|
|
if ( t == TYPE_BOOL ) {
|
|
if ( *s ) color = " bgcolor=#00ff00";
|
|
else color = " bgcolor=#ff0000";
|
|
}
|
|
if ( t == TYPE_BOOL || t == TYPE_BOOL2 ) {
|
|
// disable controls not allowed in read only mode
|
|
if ( g_conf.m_readOnlyMode && m->m_rdonly )
|
|
color = " bgcolor=#ffff00";
|
|
}
|
|
|
|
bool firstInRow = false;
|
|
if ( (s_count % nc) == 0 ) firstInRow = true;
|
|
s_count++;
|
|
|
|
if ( mm > 0 && m->m_rowid >= 0 && m_parms[mm-1].m_rowid == m->m_rowid )
|
|
firstInRow = false;
|
|
|
|
int32_t firstRow = 0;
|
|
//if ( m->m_page==PAGE_PRIORITIES ) firstRow = MAX_PRIORITY_QUEUES - 1;
|
|
// . use a separate table for arrays
|
|
// . make title and description header of that table
|
|
// . do not print all headers if not m_hdrs, a special case for the
|
|
// default line in the url filters table
|
|
if ( j == firstRow && m->m_rowid >= 0 && firstInRow && m->m_hdrs ) {
|
|
// print description as big comment
|
|
if ( m->m_desc && pd == 1 ) {
|
|
// url FILTERS table description row
|
|
sb->safePrintf ( "<td colspan=20 bgcolor=#%s>"
|
|
"<font size=-1>\n" , DARK_BLUE);
|
|
|
|
//p = htmlEncode ( p , pend , m->m_desc ,
|
|
// m->m_desc + gbstrlen ( m->m_desc ) );
|
|
sb->safePrintf ( "%s" , m->m_desc );
|
|
sb->safePrintf ( "</font></td></tr>"
|
|
// for "#,expression,harvestlinks.."
|
|
// header row in url FILTERS table
|
|
"<tr bgcolor=#%s>\n" ,DARK_BLUE);
|
|
}
|
|
// # column
|
|
// do not show this for PAGE_PRIORITIES it is confusing
|
|
if ( m->m_max > 1 ) {
|
|
//m->m_page != PAGE_PRIORITIES ) {
|
|
sb->safePrintf ( "<td><b>#</b></td>\n" );
|
|
}
|
|
// print all headers
|
|
for ( int32_t k = mm ;
|
|
k<m_numParms && m_parms[k].m_rowid==m->m_rowid; k++ ) {
|
|
// parm int16_tcut
|
|
Parm *mk = &m_parms[k];
|
|
// not if printing json
|
|
//if ( format != FORMAT_HTML )continue;//isJSON )
|
|
// skip if hidden
|
|
if ( cr && ! cr->m_isCustomCrawl &&
|
|
(mk->m_flags & PF_DIFFBOT) )
|
|
continue;
|
|
|
|
// . hide table column headers that are too advanced
|
|
// . we repeat this logic above for the actual parms
|
|
//char *vt = "";
|
|
//if ( isCrawlbot &&
|
|
// m->m_page == PAGE_FILTERS &&
|
|
// (strcmp(mk->m_xml,"spidersEnabled") == 0 ||
|
|
// //strcmp(mk->m_xml,"maxSpidersPerRule")==0||
|
|
// //strcmp(mk->m_xml,"maxSpidersPerIp") == 0||
|
|
// strcmp(mk->m_xml,"spiderIpWait") == 0 ) )
|
|
// vt = " style=display:none;display:none;";
|
|
//sb->safePrintf ( "<td%s>" , vt );
|
|
sb->safePrintf ( "<td>" );
|
|
// if its of type checkbox in a table make it
|
|
// toggle them all on/off
|
|
if ( mk->m_type == TYPE_CHECKBOX &&
|
|
mk->m_page == PAGE_FILTERS ) {
|
|
sb->safePrintf("<a href=# "
|
|
"onclick=\"checkAll(this, "
|
|
"'id_%s', %" INT32 ");\">",
|
|
m_parms[k].m_cgi, m->m_max);
|
|
}
|
|
sb->safePrintf ( "<b>%s</b>", m_parms[k].m_title );
|
|
if ( mk->m_type == TYPE_CHECKBOX &&
|
|
mk->m_page == PAGE_FILTERS )
|
|
sb->safePrintf("</a>");
|
|
/*
|
|
if ( m->m_page == PAGE_PRIORITIES &&
|
|
m_parms[k].m_type == TYPE_CHECKBOX)
|
|
sb->safePrintf("<br><a href=# "
|
|
"onclick=\"checkAll(this, "
|
|
"'id_%s', %" INT32 ");\">(toggle)</a>",
|
|
m_parms[k].m_cgi, m->m_max);
|
|
*/
|
|
sb->safePrintf ("</td>\n");
|
|
}
|
|
//if ( format == FORMAT_HTML )
|
|
sb->safePrintf ( "</tr>\n" ); // mdw added
|
|
}
|
|
|
|
// skip if hidden. diffbot api url only for custom crawls.
|
|
//if(cr && ! cr->m_isCustomCrawl && (m->m_flags & PF_DIFFBOT) )
|
|
// return true;
|
|
|
|
// print row start for single parm
|
|
if ( m->m_max <= 1 && ! m->m_hdrs ) {
|
|
if ( firstInRow ) {
|
|
sb->safePrintf ( "<tr bgcolor=#%s><td>" , bg );
|
|
}
|
|
sb->safePrintf ( "<td width=%" INT32 "%%>" , 100/nc/2 );
|
|
}
|
|
|
|
// if parm value is not default, use orange!
|
|
char rr[1024];
|
|
SafeBuf val1(rr,1024);
|
|
if ( m->m_type != TYPE_FILEUPLOADBUTTON )
|
|
m->printVal ( &val1 , collnum , j ); // occNum );
|
|
// test it
|
|
if ( m->m_def &&
|
|
m->m_obj != OBJ_NONE &&
|
|
m->m_obj != OBJ_IR && // do not do for injectionrequest
|
|
m->m_obj != OBJ_GBREQUEST && // do not do for GigablastRequest
|
|
strcmp ( val1.getBufStart() , m->m_def ) )
|
|
// put non-default valued parms in orange!
|
|
bg = "ffa500";
|
|
|
|
|
|
// print the title/description in current table for non-arrays
|
|
if ( m->m_max <= 1 && m->m_hdrs ) { // j == 0 && m->m_rowid < 0 ) {
|
|
if ( firstInRow )
|
|
sb->safePrintf ( "<tr bgcolor=#%s>",bg);
|
|
|
|
if ( t == TYPE_STRINGBOX ) {
|
|
sb->safePrintf ( "<td colspan=2><center>"
|
|
"<b>%s</b><br><font size=-1>",m->m_title );
|
|
if ( pd ) {
|
|
status &= sb->htmlEncode (m->m_desc,
|
|
gbstrlen(m->m_desc),
|
|
false);
|
|
// is it required?
|
|
if ( m->m_flags & PF_REQUIRED )
|
|
sb->safePrintf(" <b><font color=green>"
|
|
"REQUIRED</font></b>");
|
|
}
|
|
|
|
sb->safePrintf ( "</font><br>\n" );
|
|
}
|
|
if ( t != TYPE_STRINGBOX ) {
|
|
// this td will be invisible if isCrawlbot and the
|
|
// parm is too advanced to display
|
|
sb->safePrintf ( "<td " );
|
|
if ( m->m_colspan > 0 )
|
|
sb->safePrintf ( "colspan=%" INT32 " ",
|
|
(int32_t)m->m_colspan);
|
|
sb->safePrintf ( "width=%" INT32 "%%>"//"<td width=78%%>
|
|
"<b>%s</b><br><font size=1>",
|
|
3*100/nc/2/4, m->m_title );
|
|
|
|
// the "site list" parm has html in description
|
|
if ( pd ) {
|
|
status &= sb->safeStrcpy(m->m_desc);
|
|
//status &= sb->htmlEncode (m->m_desc,
|
|
// gbstrlen(m->m_desc),
|
|
// false);
|
|
// is it required?
|
|
if ( m->m_flags & PF_REQUIRED )
|
|
sb->safePrintf(" <b><font color=green>"
|
|
"REQUIRED</font></b>");
|
|
|
|
// print users current ip if showing the list
|
|
// of "Master IPs" for admin access
|
|
if ( ( m->m_page == PAGE_MASTERPASSWORDS ||
|
|
m->m_page == PAGE_COLLPASSWORDS ) &&
|
|
sock &&
|
|
m->m_title &&
|
|
strstr(m->m_title,"IP") )
|
|
sb->safePrintf(" <b>Your current IP "
|
|
"is %s.</b>",
|
|
iptoa(sock->m_ip));
|
|
}
|
|
|
|
// and cgi parm if it exists
|
|
//if ( m->m_def && m->m_scgi )
|
|
// sb->safePrintf(" CGI override: %s.",m->m_scgi);
|
|
// just let them see the api page for this...
|
|
//sb->safePrintf(" CGI: %s.",m->m_cgi);
|
|
// and default value if it exists
|
|
if ( m->m_def && m->m_def[0] && t != TYPE_CMD ) {
|
|
char *d = m->m_def;
|
|
if ( t == TYPE_BOOL || t == TYPE_CHECKBOX ) {
|
|
if ( d[0]=='0' ) d = "NO";
|
|
else d = "YES";
|
|
sb->safePrintf ( " <nobr>"
|
|
"Default: %s."
|
|
"</nobr>",d);
|
|
}
|
|
else {
|
|
sb->safePrintf (" Default: ");
|
|
status &= sb->htmlEncode (d,
|
|
gbstrlen(d),
|
|
false);
|
|
}
|
|
}
|
|
sb->safePrintf ( "</font></td>\n<td%s width=%" INT32 "%%>" ,
|
|
color , 100/nc/2/4 );
|
|
}
|
|
}
|
|
|
|
// . print number in row if array, start at 1 for clarity's sake
|
|
// . used for url filters table, etc.
|
|
if ( m->m_max > 1 ) {
|
|
// bg color alternates
|
|
char *bgc = LIGHT_BLUE;
|
|
if ( j % 2 ) bgc = DARK_BLUE;
|
|
// do not print this if doing json
|
|
//if ( format != FORMAT_HTML );//isJSON ) ;
|
|
// but if it is in same row as previous, do not repeat it
|
|
// for this same row, silly
|
|
if ( firstInRow ) // && m->m_page != PAGE_PRIORITIES )
|
|
sb->safePrintf ( "<tr bgcolor=#%s>"
|
|
"<td>%" INT32 "</td>\n<td>",
|
|
bgc,
|
|
j );//j+1
|
|
else if ( firstInRow )
|
|
sb->safePrintf ( "<tr><td>" );
|
|
else
|
|
//sb->safePrintf ( "<td%s>" , vt);
|
|
sb->safePrintf ( "<td>" );
|
|
}
|
|
|
|
//int32_t cast = m->m_cast;
|
|
//if ( g_proxy.isProxy() ) cast = 0;
|
|
|
|
// print the input box
|
|
if ( t == TYPE_BOOL ) {
|
|
char *tt, *v;
|
|
if ( *s ) { tt = "YES"; v = "0"; }
|
|
else { tt = "NO" ; v = "1"; }
|
|
if ( g_conf.m_readOnlyMode && m->m_rdonly )
|
|
sb->safePrintf ( "<b>read-only mode</b>" );
|
|
// if cast=1, command IS broadcast to all hosts
|
|
else
|
|
sb->safePrintf ( "<b><a href=\"/%s?c=%s&"
|
|
"%s=%s\">" // &cast=%" INT32 "\">"
|
|
"<center>%s</center></a></b>",
|
|
g_pages.getPath(m->m_page),coll,
|
|
cgi,v,//cast,
|
|
tt);
|
|
}
|
|
else if ( t == TYPE_BOOL2 ) {
|
|
if ( g_conf.m_readOnlyMode && m->m_rdonly )
|
|
sb->safePrintf ( "<b><center>read-only mode"
|
|
"</center></b>");
|
|
// always use m_def as the value for TYPE_BOOL2
|
|
else
|
|
sb->safePrintf ( "<b><a href=\"/%s?c=%s&%s=%s\">"
|
|
//"cast=1\">"
|
|
"<center>%s</center></a></b>",
|
|
g_pages.getPath(m->m_page),coll,
|
|
cgi,m->m_def, m->m_title);
|
|
}
|
|
else if ( t == TYPE_CHECKBOX ) {
|
|
//char *ddd1 = "";
|
|
//char *ddd2 = "";
|
|
//if ( *s ) ddd1 = " checked";
|
|
//else ddd2 = " checked";
|
|
// just show the parm name and value if printing in json
|
|
// if ( format == FORMAT_JSON ) { // isJSON ) {
|
|
// if ( ! lastRow ) {
|
|
// int32_t val = 0;
|
|
// if ( *s ) val = 1;
|
|
// sb->safePrintf("\"%s\":%" INT32 ",\n",cgi,val);
|
|
// }
|
|
// }
|
|
//sb->safePrintf("<center><nobr>");
|
|
sb->safePrintf("<nobr>");
|
|
// this is part of the "HACK" fix below. you have to
|
|
// specify the cgi parm in the POST request, and
|
|
// unchecked checkboxes are not included in the POST
|
|
// request.
|
|
//if ( lastRow && m->m_page == PAGE_FILTERS )
|
|
// sb->safePrintf("<input type=hidden ");
|
|
//char *val = "Y";
|
|
//if ( ! *s ) val = "N";
|
|
char *val = "";
|
|
// "s" is invalid of parm has no "object"
|
|
if ( m->m_obj == OBJ_NONE && m->m_def[0] != '0' )
|
|
val = " checked";
|
|
if ( m->m_obj != OBJ_NONE && s && *s )
|
|
val = " checked";
|
|
// s is NULL for GigablastRequest parms
|
|
if ( ! s && m->m_def && m->m_def[0]=='1' )
|
|
val = " checked";
|
|
// in case it is not checked, submit that!
|
|
// if it gets checked this should be overridden then
|
|
sb->safePrintf("<input type=hidden name=%s value=0>"
|
|
, cgi );
|
|
//else
|
|
sb->safePrintf("<input type=checkbox value=1 ");
|
|
//"<nobr><input type=button ");
|
|
if ( m->m_page == PAGE_FILTERS)
|
|
sb->safePrintf("id=id_%s ",cgi);
|
|
|
|
sb->safePrintf("name=%s%s"
|
|
//" onmouseup=\""
|
|
//"if ( this.value=='N' ) {"
|
|
//"this.value='Y';"
|
|
//"} "
|
|
//"else if ( this.value=='Y' ) {"
|
|
//"this.value='N';"
|
|
//"}"
|
|
//"\" "
|
|
">"
|
|
,cgi
|
|
,val);//,ddd);
|
|
//
|
|
// repeat for off position
|
|
//
|
|
//if ( ! lastRow || m->m_page != PAGE_FILTERS ) {
|
|
// sb->safePrintf(" Off:<input type=radio ");
|
|
// if ( m->m_page == PAGE_FILTERS)
|
|
// sb->safePrintf("id=id_%s ",cgi);
|
|
// sb->safePrintf("value=0 name=%s%s>",
|
|
// cgi,ddd2);
|
|
//}
|
|
sb->safePrintf("</nobr>"
|
|
//"</center>"
|
|
);
|
|
}
|
|
else if ( t == TYPE_CHAR )
|
|
sb->safePrintf ("<input type=text name=%s value=\"%" INT32 "\" "
|
|
"size=3>",cgi,(int32_t)(*s));
|
|
/* else if ( t == TYPE_CHAR2 )
|
|
sprintf (p,"<input type=text name=%s value=\"%" INT32 "\" "
|
|
"size=3>",cgi,*(char*)s);*/
|
|
else if ( t == TYPE_PRIORITY )
|
|
printDropDown ( MAX_SPIDER_PRIORITIES , sb , cgi , *s ,
|
|
false , false );
|
|
else if ( t == TYPE_PRIORITY2 ) {
|
|
// just show the parm name and value if printing in json
|
|
// if ( format==FORMAT_JSON) // isJSON )
|
|
// sb->safePrintf("\"%s\":%" INT32 ",\n",cgi,(int32_t)*(char *)s);
|
|
// else
|
|
printDropDown ( MAX_SPIDER_PRIORITIES , sb , cgi , *s ,
|
|
true , true );
|
|
}
|
|
// this url filters parm is an array of SAFEBUFs now, so each is
|
|
// a string and that string is the diffbot api url to use.
|
|
// the string is empty or zero length to indicate none.
|
|
//else if ( t == TYPE_DIFFBOT_DROPDOWN ) {
|
|
// char *xx=NULL;*xx=0;
|
|
//}
|
|
//else if ( t == TYPE_UFP )
|
|
else if ( t == TYPE_SAFEBUF &&
|
|
strcmp(m->m_title,"url filters profile")==0)
|
|
// url filters profile drop down "ufp"
|
|
printDropDownProfile ( sb , "ufp" , cr );//*s );
|
|
|
|
// do not expose master passwords or IPs to non-root admins
|
|
else if ( ( m->m_flags & PF_PRIVATE ) &&
|
|
m->m_obj == OBJ_CONF &&
|
|
! isMasterAdmin )
|
|
return true;
|
|
|
|
// do not expose master passwords or IPs to non-root admins
|
|
else if ( ( m->m_flags & PF_PRIVATE ) &&
|
|
m->m_obj == OBJ_COLL &&
|
|
! isCollAdmin )
|
|
return true;
|
|
|
|
else if ( t == TYPE_RETRIES )
|
|
printDropDown ( 4 , sb , cgi , *s , false , false );
|
|
else if ( t == TYPE_FILEUPLOADBUTTON ) {
|
|
sb->safePrintf("<input type=file name=%s>",cgi);
|
|
}
|
|
else if ( t == TYPE_PRIORITY_BOXES ) {
|
|
// print ALL the checkboxes when we get the first parm
|
|
if ( j != 0 ) return status;
|
|
printCheckBoxes ( MAX_SPIDER_PRIORITIES , sb , cgi , s );
|
|
}
|
|
else if ( t == TYPE_CMD )
|
|
// if cast=0 it will be executed, otherwise it will be
|
|
// broadcasted with cast=1 to all hosts and they will all
|
|
// execute it
|
|
sb->safePrintf ( "<b><a href=\"/%s?c=%s&%s=1\">" // cast=%" INT32 "
|
|
"<center>%s</center></a></b>",
|
|
g_pages.getPath(m->m_page),coll,
|
|
cgi,m->m_title);
|
|
else if ( t == TYPE_FLOAT ) {
|
|
// just show the parm name and value if printing in json
|
|
// if ( format == FORMAT_JSON )//isJSON )
|
|
// sb->safePrintf("\"%s\":%f,\n",cgi,*(float *)s);
|
|
// else
|
|
sb->safePrintf ("<input type=text name=%s "
|
|
"value=\"%f\" "
|
|
// 3 was ok on firefox but need 6
|
|
// on chrome
|
|
"size=7>",cgi,*(float *)s);
|
|
}
|
|
else if ( t == TYPE_IP ) {
|
|
if ( m->m_max > 0 && j == jend )
|
|
sb->safePrintf ("<input type=text name=%s value=\"\" "
|
|
"size=12>",cgi);
|
|
else
|
|
sb->safePrintf ("<input type=text name=%s value=\"%s\" "
|
|
"size=12>",cgi,iptoa(*(int32_t *)s));
|
|
}
|
|
else if ( t == TYPE_LONG ) {
|
|
// just show the parm name and value if printing in json
|
|
// if ( format == FORMAT_JSON ) // isJSON )
|
|
// sb->safePrintf("\"%s\":%" INT32 ",\n",cgi,*(int32_t *)s);
|
|
// else
|
|
sb->safePrintf ("<input type=text name=%s "
|
|
"value=\"%" INT32 "\" "
|
|
// 3 was ok on firefox but need 6
|
|
// on chrome
|
|
"size=6>",cgi,*(int32_t *)s);
|
|
}
|
|
else if ( t == TYPE_LONG_CONST )
|
|
sb->safePrintf ("%" INT32 "",*(int32_t *)s);
|
|
else if ( t == TYPE_LONG_LONG )
|
|
sb->safePrintf ("<input type=text name=%s value=\"%" INT64 "\" "
|
|
"size=12>",cgi,*(int64_t *)s);
|
|
else if ( t == TYPE_STRING || t == TYPE_STRINGNONEMPTY ) {
|
|
int32_t size = m->m_size;
|
|
// give regular expression box on url filters page more room
|
|
//if ( m->m_page == PAGE_FILTERS ) {
|
|
// if ( size > REGEX_TXT_MAX ) size = REGEX_TXT_MAX;
|
|
//}
|
|
//else {
|
|
if ( size > 20 ) size = 20;
|
|
//}
|
|
sb->safePrintf ("<input type=text name=%s size=%" INT32 " value=\"",
|
|
cgi,size);
|
|
|
|
// if it has PF_DEFAULTCOLL flag set then use the coll
|
|
if ( cr && (m->m_flags & PF_COLLDEFAULT) )
|
|
sb->safePrintf("%s",cr->m_coll);
|
|
else
|
|
sb->dequote ( s , gbstrlen(s) );
|
|
|
|
sb->safePrintf ("\">");
|
|
}
|
|
// HACK: print a drop down not a textbox for selecting the
|
|
// m_spiderDiffbotApiUrl[]. we can't just store this selection
|
|
// as a number because m_diffbotApiList (a string of comma separated
|
|
// items to select from) can change! it is not a typical dropdown.
|
|
// so we have to record the actual text we selected, which is
|
|
// basically the diffbot api url. this is because john can add
|
|
// custom diffbot api urls at anytime to the list.
|
|
/*
|
|
else if ( t == TYPE_SAFEBUF && strcmp(m->m_cgi,"dapi") == 0 ) {
|
|
SafeBuf *sx = (SafeBuf *)s;
|
|
// just show the parm name and value if printing in json
|
|
if ( isJSON ) {
|
|
// this can be empty for the empty row i guess
|
|
if ( sx->length() ) {
|
|
// convert diffbot # to string
|
|
sb->safePrintf("\"%s\":\"",cgi);
|
|
// this is just the url path, not the title
|
|
// of the menu option... so this would be
|
|
// like "/api/article?u="
|
|
sb->safeUtf8ToJSON (sx->getBufStart() );
|
|
sb->safePrintf("\",\n");
|
|
}
|
|
}
|
|
else
|
|
printDiffbotDropDown ( sb , cgi , THIS , sx );
|
|
}
|
|
*/
|
|
else if ( t == TYPE_CHARPTR ) {
|
|
int32_t size = m->m_size;
|
|
char *sp = NULL;
|
|
if ( s && *s ) sp = *(char **)s;
|
|
if ( ! sp ) sp = "";
|
|
if ( m->m_flags & PF_TEXTAREA ) {
|
|
sb->safePrintf ("<textarea name=%s rows=10 cols=80>",
|
|
cgi);
|
|
if ( m->m_obj != OBJ_NONE )
|
|
sb->htmlEncode(sp,gbstrlen(sp),false);
|
|
sb->safePrintf ("</textarea>");
|
|
}
|
|
else {
|
|
sb->safePrintf ("<input type=text name=%s size=%" INT32 " "
|
|
"value=\"",cgi,size);
|
|
// if it has PF_DEFAULTCOLL flag set then use the coll
|
|
if ( cr && (m->m_flags & PF_COLLDEFAULT) )
|
|
sb->safePrintf("%s",cr->m_coll);
|
|
else if ( sp )
|
|
sb->dequote ( sp , gbstrlen(sp) );
|
|
sb->safePrintf ("\">");
|
|
}
|
|
}
|
|
else if ( t == TYPE_SAFEBUF ) {
|
|
int32_t size = m->m_size;
|
|
// give regular expression box on url filters page more room
|
|
if ( m->m_page == PAGE_FILTERS ) {
|
|
//if ( size > REGEX_TXT_MAX ) size = REGEX_TXT_MAX;
|
|
size = 40;
|
|
}
|
|
else {
|
|
if ( size > 20 ) size = 20;
|
|
}
|
|
SafeBuf *sx = (SafeBuf *)s;
|
|
|
|
SafeBuf tmp;
|
|
// if printing a parm in a one-shot deal like GigablastRequest
|
|
// then s and sx will always be NULL, so set to default
|
|
if ( ! sx ) {
|
|
sx = &tmp;
|
|
char *def = m->m_def;
|
|
// if it has PF_DEFAULTCOLL flag set then use the coll
|
|
if ( cr && (m->m_flags & PF_COLLDEFAULT) )
|
|
def = cr->m_coll;
|
|
tmp.safePrintf("%s",def);
|
|
}
|
|
|
|
// just show the parm name and value if printing in json
|
|
// if ( format == FORMAT_JSON ) { // isJSON ) {
|
|
// // this can be empty for the empty row i guess
|
|
// if ( sx->length() ) {
|
|
// // convert diffbot # to string
|
|
// sb->safePrintf("\"%s\":\"",cgi);
|
|
// if ( m->m_obj != OBJ_NONE )
|
|
// sb->safeUtf8ToJSON (sx->getBufStart());
|
|
// sb->safePrintf("\",\n");
|
|
// }
|
|
// }
|
|
if ( m->m_flags & PF_TEXTAREA ) {
|
|
int rows = 10;
|
|
if ( m->m_flags & PF_SMALLTEXTAREA )
|
|
rows = 4;
|
|
sb->safePrintf ("<textarea id=tabox "
|
|
"name=%s rows=%i cols=80>",
|
|
cgi,rows);
|
|
//sb->dequote ( s , gbstrlen(s) );
|
|
// note it
|
|
//log("hack: %s",sx->getBufStart());
|
|
//sb->dequote ( sx->getBufStart() , sx->length() );
|
|
if ( m->m_obj != OBJ_NONE )
|
|
sb->htmlEncode(sx->getBufStart(),
|
|
sx->length(),false);
|
|
sb->safePrintf ("</textarea>");
|
|
}
|
|
else {
|
|
sb->safePrintf ("<input type=text name=%s size=%" INT32 " "
|
|
"value=\"",
|
|
cgi,size);
|
|
//sb->dequote ( s , gbstrlen(s) );
|
|
// note it
|
|
//log("hack: %s",sx->getBufStart());
|
|
|
|
|
|
if ( cr &&
|
|
(m->m_flags & PF_COLLDEFAULT) &&
|
|
sx &&
|
|
sx->length() <= 0 )
|
|
sb->dequote ( cr->m_coll,gbstrlen(cr->m_coll));
|
|
|
|
// if parm is OBJ_NONE there is no stored valued
|
|
else if ( m->m_obj != OBJ_NONE )
|
|
sb->dequote ( sx->getBufStart(), sx->length());
|
|
|
|
sb->safePrintf ("\">");
|
|
}
|
|
}
|
|
else if ( t == TYPE_STRINGBOX ) {
|
|
sb->safePrintf("<textarea id=tabox rows=10 cols=64 name=%s>",
|
|
cgi);
|
|
//p += urlEncode ( p , pend - p , s , gbstrlen(s) );
|
|
//p += htmlDecode ( p , s , gbstrlen(s) );
|
|
sb->htmlEncode ( s , gbstrlen(s), false );
|
|
//sprintf ( p , "%s" , s );
|
|
//p += gbstrlen(p);
|
|
sb->safePrintf ("</textarea>\n");
|
|
}
|
|
else if ( t == TYPE_CONSTANT )
|
|
sb->safePrintf ("%s",m->m_title);
|
|
else if ( t == TYPE_MONOD2 )
|
|
sb->safePrintf ("%" INT32 "",j / 2 );
|
|
else if ( t == TYPE_MONOM2 ) {
|
|
/*
|
|
if ( m->m_page == PAGE_PRIORITIES ) {
|
|
if ( j % 2 == 0 ) sb->safePrintf ("old");
|
|
else sb->safePrintf ("new");
|
|
}
|
|
else
|
|
*/
|
|
sb->safePrintf ("%" INT32 "",j % 2 );
|
|
}
|
|
else if ( t == TYPE_RULESET ) ;
|
|
// subscript is already included in "cgi"
|
|
//g_pages.printRulesetDropDown ( sb ,
|
|
// user ,
|
|
// cgi ,
|
|
// *(int32_t *)s , // selected
|
|
// -1 ); // subscript
|
|
else if ( t == TYPE_TIME ) {
|
|
//time is stored as a string
|
|
//if time is not stored properly, just write 00:00
|
|
if ( s[2] != ':' )
|
|
strcpy ( s, "00:00" );
|
|
|
|
char hr[3];
|
|
char min[3];
|
|
gbmemcpy ( hr, s, 2 );
|
|
gbmemcpy ( min, s + 3, 2 );
|
|
hr[2] = '\0';
|
|
min[2] = '\0';
|
|
// print the time in the input forms
|
|
sb->safePrintf("<input type=text name=%shr size=2 "
|
|
"value=%s>h "
|
|
"<input type=text name=%smin size=2 "
|
|
"value=%s>m " ,
|
|
cgi ,
|
|
hr ,
|
|
cgi ,
|
|
min );
|
|
}
|
|
|
|
else if ( t == TYPE_DATE || t == TYPE_DATE2 ) {
|
|
// time is stored as int32_t
|
|
int32_t ct = *(int32_t *)s;
|
|
// get the time struct
|
|
struct tm *tp = gmtime ( (time_t *)&ct ) ;
|
|
// set the "selected" month for the drop down
|
|
char *ss[12];
|
|
for ( int32_t i = 0 ; i < 12 ; i++ ) ss[i]="";
|
|
int32_t month = tp->tm_mon;
|
|
if ( month < 0 || month > 11 ) month = 0; // Jan
|
|
ss[month] = " selected";
|
|
// print the date in the input forms
|
|
sb->safePrintf(
|
|
"<input type=text name=%sday "
|
|
"size=2 value=%" INT32 "> "
|
|
"<select name=%smon>"
|
|
"<option value=0%s>Jan"
|
|
"<option value=1%s>Feb"
|
|
"<option value=2%s>Mar"
|
|
"<option value=3%s>Apr"
|
|
"<option value=4%s>May"
|
|
"<option value=5%s>Jun"
|
|
"<option value=6%s>Jul"
|
|
"<option value=7%s>Aug"
|
|
"<option value=8%s>Sep"
|
|
"<option value=9%s>Oct"
|
|
"<option value=10%s>Nov"
|
|
"<option value=11%s>Dec"
|
|
"</select>\n"
|
|
"<input type=text name=%syr size=4 value=%" INT32 ">"
|
|
"<br>"
|
|
"<input type=text name=%shr size=2 "
|
|
"value=%02" INT32 ">h "
|
|
"<input type=text name=%smin size=2 "
|
|
"value=%02" INT32 ">m "
|
|
"<input type=text name=%ssec size=2 "
|
|
"value=%02" INT32 ">s" ,
|
|
cgi ,
|
|
(int32_t)tp->tm_mday ,
|
|
cgi ,
|
|
ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[8],
|
|
ss[9],ss[10],ss[11],
|
|
cgi ,
|
|
(int32_t)tp->tm_year + 1900 ,
|
|
cgi ,
|
|
(int32_t)tp->tm_hour ,
|
|
cgi ,
|
|
(int32_t)tp->tm_min ,
|
|
cgi ,
|
|
(int32_t)tp->tm_sec );
|
|
/*
|
|
if ( t == TYPE_DATE2 ) {
|
|
p += gbstrlen ( p );
|
|
// a int32_t after the int32_t is used for this
|
|
int32_t ct = *(int32_t *)(THIS+m->m_off+4);
|
|
char *ss = "";
|
|
if ( ct ) ss = " checked";
|
|
sprintf ( p , "<br><input type=checkbox "
|
|
"name=%sct value=1%s> use current "
|
|
"time\n",cgi,ss);
|
|
}
|
|
*/
|
|
}
|
|
else if ( t == TYPE_SITERULE ) {
|
|
// print the siterec rules as a drop down
|
|
char *ss[5];
|
|
for ( int32_t i = 0; i < 5; i++ ) ss[i] = "";
|
|
int32_t v = *(int32_t*)s;
|
|
if ( v < 0 || v > 4 ) v = 0;
|
|
ss[v] = " selected";
|
|
sb->safePrintf ( "<select name=%s>"
|
|
"<option value=0%s>Hostname"
|
|
"<option value=1%s>Path Depth 1"
|
|
"<option value=2%s>Path Depth 2"
|
|
"<option value=3%s>Path Depth 3"
|
|
"</select>\n",
|
|
cgi, ss[0], ss[1], ss[2], ss[3] );
|
|
}
|
|
|
|
|
|
// end the input cell
|
|
sb->safePrintf ( "</td>\n");
|
|
|
|
// "insert above" link? used for arrays only, where order matters
|
|
if ( m->m_addin && j < jend ) {//! isJSON ) {
|
|
sb->safePrintf ( "<td><a href=\"?c=%s&" // cast=1&"
|
|
//"ins_%s=1\">insert</td>\n",coll,cgi );
|
|
// insert=<rowNum>
|
|
// "j" is the row #
|
|
"insert=%" INT32 "\">insert</td>\n",coll,j );
|
|
}
|
|
|
|
// does next guy start a new row?
|
|
bool lastInRow = true; // assume yes
|
|
if (mm+1<m_numParms&&m->m_rowid>=0&&m_parms[mm+1].m_rowid==m->m_rowid)
|
|
lastInRow = false;
|
|
if ( ((s_count-1) % nc) != (nc-1) ) lastInRow = false;
|
|
|
|
// . display the remove link for arrays if we need to
|
|
// . but don't display if next guy does NOT start a new row
|
|
//if ( m->m_max > 1 && lastInRow && ! isJSON ) {
|
|
if ( m->m_addin && j < jend ) { //! isJSON ) {
|
|
// m->m_page != PAGE_PRIORITIES ) {
|
|
// show remove link?
|
|
bool show = true;
|
|
//if ( j >= jend ) show = false;
|
|
// get # of rows
|
|
int32_t *nr = (int32_t *)((char *)THIS + m->m_off - 4);
|
|
// are we the last row?
|
|
bool lastRow = false;
|
|
// yes, if this is true
|
|
if ( j == *nr - 1 ) lastRow = true;
|
|
// do not allow removal of last default url filters rule
|
|
//if ( lastRow && !strcmp(m->m_cgi,"fsp")) show = false;
|
|
char *suffix = "";
|
|
if ( m->m_page == PAGE_MASTERPASSWORDS &&
|
|
m->m_type == TYPE_IP )
|
|
suffix = "ip";
|
|
if ( m->m_page == PAGE_MASTERPASSWORDS &&
|
|
m->m_type == TYPE_STRINGNONEMPTY )
|
|
suffix = "pwd";
|
|
if ( show )
|
|
sb->safePrintf ("<td><a href=\"?c=%s&" // cast=1&"
|
|
//"rm_%s=1\">"
|
|
// remove=<rownum>
|
|
"remove%s=%" INT32 "\">"
|
|
"remove</a></td>\n",coll,//cgi );
|
|
suffix,
|
|
j); // j is row #
|
|
|
|
else
|
|
sb->safePrintf ( "<td></td>\n");
|
|
}
|
|
|
|
if ( lastInRow ) sb->safePrintf ("</tr>\n");
|
|
return status;
|
|
}
|
|
|
|
/*
|
|
// get the object of our desire
|
|
char *Parms::getTHIS ( HttpRequest *r , int32_t page ) {
|
|
// if not master controls, must be a collection rec
|
|
//if ( page < PAGE_CGIPARMS ) return (char *)&g_conf;
|
|
char *coll = r->getString ( "c" );
|
|
// support john wanting to use "id" for the crawl id which is really
|
|
// the collection id, hopefully won't conflict with other things.
|
|
if ( ! coll ) coll = r->getString ( "id" );
|
|
if ( ! coll || ! coll[0] )
|
|
//coll = g_conf.m_defaultColl;
|
|
coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
|
|
CollectionRec *cr = g_collectiondb.getRec ( coll );
|
|
if ( ! cr ) log("admin: Collection \"%s\" not found.",
|
|
r->getString("c") );
|
|
return (char *)cr;
|
|
}
|
|
*/
|
|
|
|
|
|
// now we use this to set SearchInput and GigablastRequest
|
|
bool Parms::setFromRequest ( HttpRequest *r ,
|
|
TcpSocket* s,
|
|
CollectionRec *newcr ,
|
|
char *THIS ,
|
|
int32_t objType ) {
|
|
|
|
// get the page from the path... like /sockets --> PAGE_SOCKETS
|
|
//int32_t page = g_pages.getDynamicPageNumber ( r );
|
|
|
|
// use convertHttpRequestToParmList() for these because they
|
|
// are persistent records that are updated on every shard.
|
|
if ( objType == OBJ_COLL ) { char *xx=NULL;*xx=0; }
|
|
if ( objType == OBJ_CONF ) { char *xx=NULL;*xx=0; }
|
|
|
|
// ensure valid
|
|
if ( ! THIS ) {
|
|
// it is null when no collection explicitly specified...
|
|
log(LOG_LOGIC,"admin: THIS is null for setFromRequest");
|
|
char *xx=NULL;*xx=0;
|
|
}
|
|
|
|
// need this for searchInput which takes default from "cr"
|
|
//CollectionRec *cr = g_collectiondb.getRec ( r , true );
|
|
|
|
// no SearchInput.cpp does this and then overrides if xml feed
|
|
// to set m_docsToScanForTopics
|
|
//setToDefault ( THIS , objType , cr );
|
|
|
|
// loop through cgi parms
|
|
for ( int32_t i = 0 ; i < r->getNumFields() ; i++ ) {
|
|
// get cgi parm name
|
|
char *field = r->getField ( i );
|
|
// find in parms list
|
|
int32_t j;
|
|
Parm *m;
|
|
for ( j = 0 ; j < m_numParms ; j++ ) {
|
|
// get it
|
|
m = &m_parms[j];
|
|
// skip if not our type
|
|
if ( m->m_obj != objType ) continue;
|
|
// skip if offset is negative, that means none
|
|
if ( m->m_off < 0 ) continue;
|
|
// skip if no cgi parm, may not be configurable now
|
|
if ( ! m->m_cgi ) continue;
|
|
// otherwise, must match the cgi name exactly
|
|
if ( strcmp ( field,m->m_cgi ) == 0 ) break;
|
|
}
|
|
// bail if the cgi field is not in the parms list
|
|
if ( j >= m_numParms ) continue;
|
|
// get the value of cgi parm (null terminated)
|
|
char *v = r->getValue ( i );
|
|
// empty?
|
|
if ( ! v ) continue;
|
|
// . skip if no value was provided
|
|
// . unless it was a string! so we can make them empty.
|
|
if ( v[0] == '\0' &&
|
|
m->m_type != TYPE_STRING &&
|
|
m->m_type != TYPE_STRINGBOX ) continue;
|
|
// set it
|
|
setParm ( (char *)THIS , m, j, 0, v, false,//not html enc
|
|
false );//true );
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
bool Parms::insertParm ( int32_t i , int32_t an , char *THIS ) {
|
|
Parm *m = &m_parms[i];
|
|
// . shift everyone above down
|
|
// . first int32_t at offset is always the count
|
|
// for arrays
|
|
char *pos = (char *)THIS + m->m_off ;
|
|
int32_t num = *(int32_t *)(pos - 4);
|
|
// ensure we are valid
|
|
if ( an >= num || an < 0 ) {
|
|
log("admin: Invalid insertion of element "
|
|
"%" INT32 " in array of size %" INT32 " for \"%s\".",
|
|
an,num,m->m_title);
|
|
return false;
|
|
}
|
|
// also ensure that we have space to put the parm in, because in
|
|
// case of URl filters, it is bounded by MAX_FILTERS
|
|
if ( num >= MAX_FILTERS ){
|
|
log("admin: Invalid insert of element %" INT32 ", array is full "
|
|
"in size %" INT32 " for \"%s\".",an, num, m->m_title);
|
|
return false;
|
|
}
|
|
// point to the place where the element is to be inserted
|
|
char *src = pos + m->m_size * an;
|
|
|
|
//point to where it is to be moved
|
|
char *dst = pos + m->m_size * ( an + 1 );
|
|
|
|
// how much to move
|
|
int32_t size = ( num - an ) * m->m_size ;
|
|
// move them
|
|
memmove ( dst , src , size );
|
|
// if the src was a TYPE_SAFEBUF clear it so we don't end up doing
|
|
// a double free, etc.!
|
|
memset ( src , 0 , m->m_size );
|
|
// inc the count
|
|
*(int32_t *)(pos-4) = (*(int32_t *)(pos-4)) + 1;
|
|
// put the defaults in the inserted line
|
|
setParm ( (char *)THIS , m , i , an , m->m_def , false ,false );
|
|
return true;
|
|
}
|
|
|
|
bool Parms::removeParm ( int32_t i , int32_t an , char *THIS ) {
|
|
Parm *m = &m_parms[i];
|
|
// . shift everyone above down
|
|
// . first int32_t at offset is always the count
|
|
// for arrays
|
|
char *pos = (char *)THIS + m->m_off ;
|
|
int32_t num = *(int32_t *)(pos - 4);
|
|
// ensure we are valid
|
|
if ( an >= num || an < 0 ) {
|
|
log("admin: Invalid removal of element "
|
|
"%" INT32 " in array of size %" INT32 " for \"%s\".",
|
|
an,num,m->m_title);
|
|
return false;
|
|
}
|
|
// point to the element being removed
|
|
char *dst = pos + m->m_size * an;
|
|
// free memory pointed to by safebuf, if we are safebuf, before
|
|
// overwriting it... prevents a memory leak
|
|
if ( m->m_type == TYPE_SAFEBUF ) {
|
|
SafeBuf *dx = (SafeBuf *)dst;
|
|
dx->purge();
|
|
}
|
|
// then point to the good stuf
|
|
char *src = pos + m->m_size * (an+1);
|
|
// how much to bury it with
|
|
int32_t size = (num - an - 1 ) * m->m_size ;
|
|
// bury it
|
|
gbmemcpy ( dst , src , size );
|
|
|
|
// and detach the buf on the tail so it doesn't core in Mem.cpp
|
|
// when it tries to free...
|
|
if ( m->m_type == TYPE_SAFEBUF ) {
|
|
SafeBuf *tail = (SafeBuf *)(pos + m->m_size * (num-1));
|
|
tail->detachBuf();
|
|
}
|
|
|
|
// dec the count
|
|
*(int32_t *)(pos-4) = (*(int32_t *)(pos-4)) - 1;
|
|
return true;
|
|
}
|
|
|
|
void Parms::setParm ( char *THIS , Parm *m , int32_t mm , int32_t j , char *s ,
|
|
bool isHtmlEncoded , bool fromRequest ) {
|
|
|
|
if ( fromRequest ) { char *xx=NULL;*xx=0; }
|
|
|
|
// . this is just for setting CollectionRecs, so skip if offset < 0
|
|
// . some parms are just for SearchInput (search parms)
|
|
if ( m->m_off < 0 ) return;
|
|
|
|
if ( m->m_obj == OBJ_NONE ) return ;
|
|
|
|
float oldVal = 0;
|
|
float newVal = 0;
|
|
|
|
if ( ! s &&
|
|
m->m_type != TYPE_CHARPTR &&
|
|
m->m_type != TYPE_FILEUPLOADBUTTON &&
|
|
m->m_defOff==-1) {
|
|
s = "0";
|
|
char *tit = m->m_title;
|
|
if ( ! tit || ! tit[0] ) tit = m->m_xml;
|
|
log(LOG_LOGIC,"admin: Parm \"%s\" had NULL default value. "
|
|
"Forcing to 0.",
|
|
tit);
|
|
//char *xx = NULL; *xx = 0;
|
|
}
|
|
|
|
// sanity check
|
|
if ( &m_parms[mm] != m ) {
|
|
log(LOG_LOGIC,"admin: Not sane parameters.");
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
|
|
// if attempting to add beyond array max, bail out
|
|
if ( j >= m->m_max && j >= m->m_fixed ) {
|
|
log ( "admin: Attempted to set parm beyond limit. Aborting." );
|
|
return;
|
|
}
|
|
|
|
// if we are setting a guy in an array AND he is NOT the first
|
|
// in his row, ensure the guy before has a count of j+1 or more.
|
|
//
|
|
// crap, on the url filters page if you do not check "spidering
|
|
// enabled" checkbox when adding a new rule at the bottom of the
|
|
// table, , then the spidering enabled parameter does not transmit so
|
|
// the "respider frequency" ends up checking the "spider enabled"
|
|
// array whose "count" was not incremented like it should have been.
|
|
// HACK: make new line at bottom always have spidering enabled
|
|
// checkbox set and make it impossible to unset.
|
|
/*
|
|
if ( m->m_max > 1 && m->m_rowid >= 0 && mm > 0 &&
|
|
m_parms[mm-1].m_rowid == m->m_rowid ) {
|
|
char *pos = (char *)THIS + m_parms[mm-1].m_off - 4 ;
|
|
int32_t maxcount = *(int32_t *)pos;
|
|
if ( j >= maxcount ) {
|
|
log("admin: parm before \"%s\" is limiting us",
|
|
m_parms[mm-1].m_title);
|
|
//log("admin: try nuking the url filters or whatever "
|
|
// "and re-adding");
|
|
return;
|
|
}
|
|
}
|
|
*/
|
|
|
|
// ensure array count at least j+1
|
|
if ( m->m_max > 1 ) {
|
|
// . is this element we're adding bumping up the count?
|
|
// . array count is 4 bytes before the array
|
|
char *pos = (char *)THIS + m->m_off - 4 ;
|
|
// set the count to it if it is bigger than current count
|
|
if ( j + 1 > *(int32_t *)pos ) *(int32_t *)pos = j + 1;
|
|
}
|
|
|
|
char t = m->m_type;
|
|
|
|
if ( t == TYPE_CHAR ||
|
|
t == TYPE_CHAR2 ||
|
|
t == TYPE_CHECKBOX ||
|
|
t == TYPE_BOOL ||
|
|
t == TYPE_BOOL2 ||
|
|
t == TYPE_PRIORITY ||
|
|
t == TYPE_PRIORITY2 ||
|
|
//t == TYPE_DIFFBOT_DROPDOWN ||
|
|
t == TYPE_UFP ||
|
|
t == TYPE_PRIORITY_BOXES ||
|
|
t == TYPE_RETRIES ||
|
|
t == TYPE_FILTER ) {
|
|
if ( fromRequest && *(char *)(THIS + m->m_off + j) == atol(s))
|
|
return;
|
|
if ( fromRequest)oldVal = (float)*(char *)(THIS + m->m_off +j);
|
|
*(char *)(THIS + m->m_off + j) = atol ( s );
|
|
newVal = (float)*(char *)(THIS + m->m_off + j);
|
|
goto changed; }
|
|
else if ( t == TYPE_CHARPTR ) {
|
|
// "s" might be NULL or m->m_def...
|
|
*(char **)(THIS + m->m_off + j) = s;
|
|
}
|
|
else if ( t == TYPE_FILEUPLOADBUTTON ) {
|
|
// "s" might be NULL or m->m_def...
|
|
*(char **)(THIS + m->m_off + j) = s;
|
|
}
|
|
else if ( t == TYPE_CMD ) {
|
|
log(LOG_LOGIC, "conf: Parms: TYPE_CMD is not a cgi var.");
|
|
return; }
|
|
else if ( t == TYPE_DATE2 || t == TYPE_DATE ) {
|
|
int32_t v = (int32_t)atotime ( s );
|
|
if ( fromRequest && *(int32_t *)(THIS + m->m_off + 4*j) == v )
|
|
return;
|
|
*(int32_t *)(THIS + m->m_off + 4*j) = v;
|
|
if ( v < 0 ) log("conf: Date for <%s> of \""
|
|
"%s\" is not in proper format like: "
|
|
"01 Jan 1980 22:45",m->m_xml,s);
|
|
goto changed; }
|
|
else if ( t == TYPE_FLOAT ) {
|
|
if( fromRequest &&
|
|
*(float *)(THIS + m->m_off + 4*j) == (float)atof ( s ) )
|
|
return;
|
|
// if changed within .00001 that is ok too, do not count
|
|
// as changed, the atof() has roundoff errors
|
|
//float curVal = *(float *)(THIS + m->m_off + 4*j);
|
|
//float newVal = atof(s);
|
|
//if ( newVal < curVal && newVal + .000001 >= curVal ) return;
|
|
//if ( newVal > curVal && newVal - .000001 <= curVal ) return;
|
|
if ( fromRequest ) oldVal = *(float *)(THIS + m->m_off + 4*j);
|
|
*(float *)(THIS + m->m_off + 4*j) = (float)atof ( s );
|
|
newVal = *(float *)(THIS + m->m_off + 4*j);
|
|
goto changed; }
|
|
else if ( t == TYPE_DOUBLE ) {
|
|
if( fromRequest &&
|
|
*(double *)(THIS + m->m_off + 4*j) == (double)atof ( s ) )
|
|
return;
|
|
if ( fromRequest ) oldVal = *(double *)(THIS + m->m_off + 4*j);
|
|
*(double *)(THIS + m->m_off + 4*j) = (double)atof ( s );
|
|
newVal = *(double *)(THIS + m->m_off + 4*j);
|
|
goto changed; }
|
|
else if ( t == TYPE_IP ) {
|
|
if ( fromRequest && *(int32_t *)(THIS + m->m_off + 4*j) ==
|
|
(int32_t)atoip (s,gbstrlen(s) ) )
|
|
return;
|
|
*(int32_t *)(THIS + m->m_off + 4*j) = (int32_t)atoip (s,gbstrlen(s) );
|
|
goto changed; }
|
|
else if ( t == TYPE_LONG || t == TYPE_LONG_CONST || t == TYPE_RULESET||
|
|
t == TYPE_SITERULE ) {
|
|
int32_t v = atol ( s );
|
|
// min is considered valid if >= 0
|
|
if ( m->m_min >= 0 && v < m->m_min ) v = m->m_min;
|
|
if ( fromRequest && *(int32_t *)(THIS + m->m_off + 4*j) == v )
|
|
return;
|
|
if ( fromRequest)oldVal=(float)*(int32_t *)(THIS + m->m_off +4*j);
|
|
*(int32_t *)(THIS + m->m_off + 4*j) = v;
|
|
newVal = (float)*(int32_t *)(THIS + m->m_off + 4*j);
|
|
goto changed; }
|
|
else if ( t == TYPE_LONG_LONG ) {
|
|
if ( fromRequest &&
|
|
*(uint64_t *)(THIS + m->m_off+8*j)==
|
|
strtoull(s,NULL,10))
|
|
return;
|
|
*(int64_t *)(THIS + m->m_off + 8*j) = strtoull(s,NULL,10);
|
|
goto changed; }
|
|
// like TYPE_STRING but dynamically allocates
|
|
else if ( t == TYPE_SAFEBUF ) {
|
|
int32_t len = gbstrlen(s);
|
|
// no need to truncate since safebuf is dynamic
|
|
//if ( len >= m->m_size ) len = m->m_size - 1; // truncate!!
|
|
//char *dst = THIS + m->m_off + m->m_size*j ;
|
|
// point to the safebuf, in the case of an array of
|
|
// SafeBufs "j" is the # in the array, starting at 0
|
|
SafeBuf *sb = (SafeBuf *)(THIS+m->m_off+(j*sizeof(SafeBuf)) );
|
|
int32_t oldLen = sb->length();
|
|
// why was this commented out??? we need it now that we
|
|
// send email alerts when parms change!
|
|
if ( fromRequest &&
|
|
! isHtmlEncoded && oldLen == len &&
|
|
memcmp ( sb->getBufStart() , s , len ) == 0 )
|
|
return;
|
|
// nuke it
|
|
sb->purge();
|
|
// this means that we can not use string POINTERS as parms!!
|
|
if ( ! isHtmlEncoded ) sb->safeMemcpy ( s , len );
|
|
else len = sb->htmlDecode (s,len,false,0);
|
|
// tag it
|
|
sb->setLabel ( "parm1" );
|
|
// ensure null terminated
|
|
sb->nullTerm();
|
|
// note it
|
|
//log("hack: %s",s);
|
|
|
|
// null term it all
|
|
//dst[len] = '\0';
|
|
//sb->reserve ( 1 );
|
|
// null terminate but do not include as m_length so the
|
|
// memcmp() above still works right
|
|
//sb->m_buf[sb->m_length] = '\0';
|
|
// . might have to set length
|
|
// . used for CollectionRec::m_htmlHeadLen and m_htmlTailLen
|
|
//if ( m->m_plen >= 0 )
|
|
// *(int32_t *)(THIS + m->m_plen) = len ;
|
|
goto changed;
|
|
}
|
|
else if ( t == TYPE_STRING ||
|
|
t == TYPE_STRINGBOX ||
|
|
t == TYPE_STRINGNONEMPTY ||
|
|
t == TYPE_TIME ) {
|
|
int32_t len = gbstrlen(s);
|
|
if ( len >= m->m_size ) len = m->m_size - 1; // truncate!!
|
|
char *dst = THIS + m->m_off + m->m_size*j ;
|
|
// why was this commented out??? we need it now that we
|
|
// send email alerts when parms change!
|
|
if ( fromRequest &&
|
|
! isHtmlEncoded && (int32_t)gbstrlen(dst) == len &&
|
|
memcmp ( dst , s , len ) == 0 )
|
|
return;
|
|
// this means that we can not use string POINTERS as parms!!
|
|
if ( ! isHtmlEncoded ) {gbmemcpy ( dst , s , len ); }
|
|
else len = htmlDecode (dst , s,len,false,0);
|
|
dst[len] = '\0';
|
|
// . might have to set length
|
|
// . used for CollectionRec::m_htmlHeadLen and m_htmlTailLen
|
|
if ( m->m_plen >= 0 )
|
|
*(int32_t *)(THIS + m->m_plen) = len ;
|
|
goto changed; }
|
|
changed:
|
|
// tell gigablast the value is EXPLICITLY given -- no longer based
|
|
// on default.conf
|
|
//if ( m->m_obj == OBJ_COLL ) ((CollectionRec *)THIS)->m_orig[mm] = 2;
|
|
|
|
// we do not recognize timezones correctly when this is serialized
|
|
// into coll.conf, it says UTC, which is ignored in HttpMime.cpp's
|
|
// atotime() function. and when we submit it i think we use the
|
|
// local time zone, so the values end up changing every time we
|
|
// submit!!! i think it might read it in as UTC then write it out
|
|
// as local time, or vice versa.
|
|
if ( t == TYPE_DATE || t == TYPE_DATE2 ) return;
|
|
|
|
// do not send if setting from startup
|
|
if ( ! fromRequest ) return;
|
|
|
|
// note it in the log
|
|
log("admin: parm \"%s\" changed value",m->m_title);
|
|
|
|
int64_t nowms = gettimeofdayInMillisecondsLocal();
|
|
|
|
// . note it in statsdb
|
|
// . record what parm change and from/to what value
|
|
g_statsdb.addStat ( 0, // niceness ,
|
|
"parm_change" ,
|
|
nowms,
|
|
nowms,
|
|
0 , // value
|
|
m->m_hash , // parmHash
|
|
oldVal,
|
|
newVal);
|
|
|
|
// if they turn spiders on or off then tell spiderloop to update
|
|
// the active list
|
|
//if ( strcmp(m->m_cgi,"cse") )
|
|
// g_spiderLoop.m_activeListValid = false;
|
|
|
|
// only send email alerts if we are host 0 since everyone syncs up
|
|
// with host #0 anyway
|
|
if ( g_hostdb.m_hostId != 0 ) return;
|
|
|
|
// send an email alert notifying the admins that this parm was changed
|
|
// BUT ALWAYS send it if email alerts were just TURNED OFF
|
|
// ("sea" = Send Email Alerts)
|
|
if ( ! g_conf.m_sendEmailAlerts && strcmp(m->m_cgi,"sea") != 0 )
|
|
return;
|
|
|
|
// if spiders we turned on, do not send an email alert, cuz we
|
|
// turn them on when we restart the cluster
|
|
if ( strcmp(m->m_cgi,"se")==0 && g_conf.m_spideringEnabled )
|
|
return;
|
|
|
|
|
|
char tmp[1024];
|
|
Host *h0 = g_hostdb.getHost ( 0 );
|
|
int32_t ip0 = 0;
|
|
if ( h0 ) ip0 = h0->m_ip;
|
|
sprintf(tmp,"%s: parm \"%s\" changed value",iptoa(ip0),m->m_title);
|
|
g_pingServer.sendEmail ( NULL , // Host ptr
|
|
tmp , // msg
|
|
true , // sendToAdmin
|
|
false , // oom?
|
|
false , // kernel error?
|
|
true , // parm change?
|
|
true );// force it? even if disabled?
|
|
|
|
// now the spider collection can just check the collection rec
|
|
//int64_t nowms = gettimeofdayInMilliseconds();
|
|
//((CollectionRec *)THIS)->m_lastUpdateTime = nowms;
|
|
|
|
return;
|
|
}
|
|
|
|
Parm *Parms::getParmFromParmHash ( int32_t parmHash ) {
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
Parm *m = &m_parms[i];
|
|
if ( m->m_hash != parmHash ) continue;
|
|
return m;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
void Parms::setToDefault ( char *THIS , char objType , CollectionRec *argcr ) {
|
|
// init if we should
|
|
init();
|
|
|
|
// . clear out any coll rec to get the diffbotApiNum dropdowns
|
|
// . this is a backwards-compatibility hack since this new parm
|
|
// will not be in old coll.conf files and will not be properly
|
|
// initialize when displaying a url filter row.
|
|
//if ( THIS != (char *)&g_conf ) {
|
|
// CollectionRec *cr = (CollectionRec *)THIS;
|
|
// memset ( cr->m_spiderDiffbotApiNum , 0 , MAX_FILTERS);
|
|
//}
|
|
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
Parm *m = &m_parms[i];
|
|
if ( m->m_obj != objType ) continue;
|
|
if ( m->m_obj == OBJ_NONE ) continue;
|
|
if ( m->m_type == TYPE_COMMENT ) continue;
|
|
// no, we gotta set GigablastRequest::m_contentFile to NULL
|
|
//if ( m->m_type == TYPE_FILEUPLOADBUTTON )
|
|
// continue;
|
|
if ( m->m_type == TYPE_MONOD2 ) continue;
|
|
if ( m->m_type == TYPE_MONOM2 ) continue;
|
|
if ( m->m_type == TYPE_CMD ) continue;
|
|
if (THIS == (char *)&g_conf && m->m_obj != OBJ_CONF ) continue;
|
|
if (THIS != (char *)&g_conf && m->m_obj == OBJ_CONF ) continue;
|
|
// what is this?
|
|
//if ( m->m_obj == OBJ_COLL ) {
|
|
// CollectionRec *cr = (CollectionRec *)THIS;
|
|
// if ( cr->m_bases[1] ) { char *xx=NULL;*xx=0; }
|
|
//}
|
|
// sanity check, make sure it does not overflow
|
|
if ( m->m_obj == OBJ_COLL &&
|
|
m->m_off > (int32_t)sizeof(CollectionRec)){
|
|
log(LOG_LOGIC,"admin: Parm in Parms.cpp should use "
|
|
"OBJ_COLL not OBJ_CONF");
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
//if ( m->m_page == PAGE_PRIORITIES )
|
|
// log("hey");
|
|
// or
|
|
if ( m->m_page > PAGE_API && // CGIPARMS &&
|
|
m->m_page != PAGE_NONE &&
|
|
m->m_obj == OBJ_CONF ) {
|
|
log(LOG_LOGIC,"admin: Page can not reference "
|
|
"g_conf and be declared AFTER PAGE_CGIPARMS in "
|
|
"Pages.h. Title=%s",m->m_title);
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
// if defOff >= 0 get from cr like for searchInput vals
|
|
// whose default is from the collectionRec...
|
|
if ( m->m_defOff >= 0 && argcr ) {
|
|
if ( ! argcr ) { char *xx=NULL;*xx=0; }
|
|
char *def = m->m_defOff+(char *)argcr;
|
|
char *dst = (char *)THIS + m->m_off;
|
|
gbmemcpy ( dst , def , m->m_size );
|
|
continue;
|
|
}
|
|
// leave arrays empty, set everything else to default
|
|
if ( m->m_max <= 1 ) {
|
|
//if ( i == 282 ) // "query" parm
|
|
// log("hey");
|
|
//if ( ! m->m_def ) { char *xx=NULL;*xx=0; }
|
|
setParm ( THIS , m, i, 0, m->m_def, false/*not enc.*/,
|
|
false );
|
|
//((CollectionRec *)THIS)->m_orig[i] = 1;
|
|
//m->m_orig = 0; // set in setToDefaults()
|
|
}
|
|
// these are special, fixed size arrays
|
|
if ( m->m_fixed > 0 ) {
|
|
for ( int32_t k = 0 ; k < m->m_fixed ; k++ ) {
|
|
setParm(THIS,m,i,k,m->m_def,false/*not enc.*/,
|
|
false);
|
|
//m->m_orig = 0; // set in setToDefaults()
|
|
//((CollectionRec *)THIS)->m_orig[i] = 1;
|
|
}
|
|
continue;
|
|
}
|
|
// make array sizes 0
|
|
if ( m->m_max <= 1 ) continue;
|
|
// otherwise, array is not fixed size
|
|
char *s = THIS + m->m_off ;
|
|
// set count to 1 if a default is present
|
|
//if ( m->m_def[0] ) *(int32_t *)(s-4) = 1;
|
|
//else *(int32_t *)(s-4) = 0;
|
|
*(int32_t *)(s-4) = 0;
|
|
}
|
|
}
|
|
|
|
// . returns false and sets g_errno on error
|
|
// . you should set your "THIS" to its defaults before calling this
|
|
bool Parms::setFromFile ( void *THIS ,
|
|
char *filename ,
|
|
char *filenameDef ,
|
|
char objType ) {
|
|
// make sure we're init'd
|
|
init();
|
|
// let em know
|
|
//if ( THIS == &g_conf) log (LOG_INIT,"conf: Reading %s." , filename );
|
|
// . let the log know what we are doing
|
|
// . filename is NULL if a call from CollectionRec::setToDefaults()
|
|
Xml xml;
|
|
//char buf [ MAX_XML_CONF ];
|
|
SafeBuf sb;
|
|
if ( filename&&!setXmlFromFile(&xml,filename,&sb)){//buf,MAX_XML_CONF))
|
|
log("parms: error setting from file %s: %s",filename,
|
|
mstrerror(g_errno));
|
|
return false;
|
|
}
|
|
|
|
// . all the collectionRecs have the same default file in
|
|
// the workingDir/collections/default.conf
|
|
// . so use our built in buffer for that
|
|
/*
|
|
if ( THIS != &g_conf && ! m_isDefaultLoaded ) {
|
|
m_isDefaultLoaded = true;
|
|
File f;
|
|
f.set ( filenameDef );
|
|
if ( ! f.doesExist() ) {
|
|
log(LOG_INIT,
|
|
"db: Default collection configuration file "
|
|
"%s was not found. Newly created collections "
|
|
"will use hard coded defaults.",f.getFilename());
|
|
goto skip;
|
|
}
|
|
if ( ! setXmlFromFile ( &m_xml2 ,
|
|
filenameDef ,
|
|
m_buf ,
|
|
MAX_XML_CONF ) ) return false;
|
|
}
|
|
|
|
skip:
|
|
*/
|
|
int32_t vlen;
|
|
char *v ;
|
|
//char c ;
|
|
int32_t numNodes = xml.getNumNodes();
|
|
int32_t numNodes2 = m_xml2.getNumNodes();
|
|
|
|
// now set THIS based on the parameters in the xml file
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
// get it
|
|
Parm *m = &m_parms[i];
|
|
if ( m->m_obj != objType ) continue;
|
|
if ( m->m_obj == OBJ_NONE ) continue;
|
|
//log(LOG_DEBUG, "Parms: %s: parm: %s", filename, m->m_xml);
|
|
// . there are 2 object types, coll recs and g_conf, aka
|
|
// OBJ_COLL and OBJ_CONF.
|
|
// . make sure we got the right parms for what we want
|
|
if ( THIS == &g_conf && m->m_obj != OBJ_CONF ) continue;
|
|
if ( THIS != &g_conf && m->m_obj == OBJ_CONF ) continue;
|
|
// skip comments and command
|
|
if ( m->m_type == TYPE_COMMENT ) continue;
|
|
if ( m->m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
if ( m->m_type == TYPE_MONOD2 ) continue;
|
|
if ( m->m_type == TYPE_MONOM2 ) continue;
|
|
if ( m->m_type == TYPE_CMD ) continue;
|
|
if ( m->m_type == TYPE_CONSTANT ) continue;
|
|
// these are special commands really
|
|
if ( m->m_type == TYPE_BOOL2 ) continue;
|
|
//if ( strcmp ( m->m_xml , "forceDeleteUrls" ) == 0 )
|
|
// log("got it");
|
|
// we did not get one from first xml file yet
|
|
bool first = true;
|
|
// array count
|
|
int32_t j = 0;
|
|
// node number
|
|
int32_t nn = 0;
|
|
// a tmp thingy
|
|
char tt[1];
|
|
int32_t nb;
|
|
int32_t newnn;
|
|
loop:
|
|
if ( m->m_obj == OBJ_NONE ) { char *xx=NULL;*xx=0; }
|
|
// get xml node number of m->m_xml in the "xml" file
|
|
newnn = xml.getNodeNum(nn,1000000,m->m_xml,gbstrlen(m->m_xml));
|
|
#ifdef _GLOBALSPEC_
|
|
if ( m->m_priv == 2 ) continue;
|
|
if ( m->m_priv == 3 ) continue;
|
|
#elif _CLIENT_
|
|
// always use default value if client not allowed control of
|
|
if ( m->m_priv ) continue;
|
|
#elif _METALINCS_
|
|
if ( m->m_priv == 2 ) continue;
|
|
if ( m->m_priv == 3 ) continue;
|
|
#endif
|
|
|
|
// debug
|
|
//log("%s --> %" INT32 "",m->m_xml,nn);
|
|
// try default xml file if none, but only if first try
|
|
if ( newnn < 0 && first ) goto try2;
|
|
// it is valid, use it
|
|
nn = newnn;
|
|
// set the flag, we've committed the array to the first file
|
|
first = false;
|
|
// otherwise, we had some in this file, but now we're out
|
|
if ( nn < 0 ) continue;
|
|
// . next node is the value of this tag
|
|
// . skip if none there
|
|
if ( nn + 1 >= numNodes ) continue;
|
|
// point to it
|
|
v = xml.getNode ( nn + 1 );
|
|
vlen = xml.getNodeLen ( nn + 1 );
|
|
// if a back tag... set the value to the empty string
|
|
if ( v[0] == '<' && v[1] == '/' ) vlen = 0;
|
|
// now, extricate from the <![CDATA[ ... ]]> tag if we need to
|
|
if ( m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_SAFEBUF ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
char *oldv = v;
|
|
int32_t oldvlen = vlen;
|
|
// if next guy is NOT a tag node, try the next one
|
|
if ( v[0] != '<' && nn + 2 < numNodes ) {
|
|
v = xml.getNode ( nn + 2 );
|
|
vlen = xml.getNodeLen ( nn + 2 );
|
|
}
|
|
// should be a <![CDATA[...]]>
|
|
if ( vlen<12 || strncasecmp(v,"<![CDATA[",9)!=0 ) {
|
|
log("conf: No <![CDATA[...]]> tag found "
|
|
"for \"<%s>\" tag. Trying without CDATA.",
|
|
m->m_xml);
|
|
v = oldv;
|
|
vlen = oldvlen;
|
|
}
|
|
// point to the nugget
|
|
else {
|
|
v += 9;
|
|
vlen -= 12;
|
|
}
|
|
}
|
|
// get the value
|
|
//v = xml.getString ( nn , nn+2 , m->m_xml , &vlen );
|
|
// this only happens when tag is there, but without a value
|
|
if ( ! v || vlen == 0 ) { vlen = 0; v = tt; }
|
|
//c = v[vlen];
|
|
v[vlen]='\0';
|
|
if ( vlen == 0 ){
|
|
// . this is generally ok
|
|
// . this is spamming the log so i am commenting out! (MDW)
|
|
//log(LOG_INFO, "parms: %s: Empty value.", m->m_xml);
|
|
// Allow an empty string
|
|
//continue;
|
|
}
|
|
|
|
// now use proper cdata
|
|
// we can't do this and be backwards compatible right now
|
|
//nb = cdataDecode ( v , v , 0 );//, vlen , false ,0);
|
|
// now decode it into itself
|
|
nb = htmlDecode ( v , v , vlen , false ,0);
|
|
v[nb] = '\0';
|
|
// set our parm
|
|
setParm ( (char *)THIS, m, i, j, v, false/*is html encoded?*/,
|
|
false );
|
|
// we were set from the explicit file
|
|
//((CollectionRec *)THIS)->m_orig[i] = 2;
|
|
// go back
|
|
//v[vlen] = c;
|
|
// do not repeat same node
|
|
nn++;
|
|
// try to get the next node if we're an array
|
|
if ( ++j < m->m_max || j < m->m_fixed ) { goto loop; }
|
|
// otherwise, if not an array, go to next parm
|
|
continue;
|
|
try2:
|
|
// get xml node number of m->m_xml in the "m_xml" file
|
|
nn = m_xml2.getNodeNum(nn,1000000,m->m_xml,gbstrlen(m->m_xml));
|
|
// otherwise, we had one in file, but now we're out
|
|
if ( nn < 0 ) {
|
|
// if it was ONLY a search input parm, with no
|
|
// default value that can be changed in the
|
|
// CollectionRec then skip it
|
|
// if ( m->m_soff != -1 &&
|
|
// m->m_off == -1 &&
|
|
// m->m_smaxc == -1 )
|
|
// continue;
|
|
// . if it is a string, like <adminIp> and default is
|
|
// NULL then don't worry about reporting it
|
|
// . no, just make the default "" then
|
|
//if ( m->m_type==TYPE_STRING && ! m->m_def) continue;
|
|
// bitch that it was not found
|
|
//if ( ! m->m_def[0] )
|
|
// log("conf: %s does not have <%s> tag. "
|
|
// "Omitting.",filename,m->m_xml);
|
|
//else
|
|
/*
|
|
if ( ! m->m_def ) //m->m_def[0] )
|
|
log("conf: %s does not have <%s> tag. Using "
|
|
"default value of \"%s\".", filename,
|
|
m->m_xml,m->m_def);
|
|
*/
|
|
continue;
|
|
}
|
|
// . next node is the value of this tag
|
|
// . skip if none there
|
|
if ( nn + 1 >= numNodes2 ) continue;
|
|
// point to it
|
|
v = m_xml2.getNode ( nn + 1 );
|
|
vlen = m_xml2.getNodeLen ( nn + 1 );
|
|
// if a back tag... set the value to the empty string
|
|
if ( v[0] == '<' && v[1] == '/' ) vlen = 0;
|
|
// now, extricate from the <![CDATA[ ... ]]> tag if we need to
|
|
if ( m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
char *oldv = v;
|
|
int32_t oldvlen = vlen;
|
|
// reset if not a tag node
|
|
if ( v[0] != '<' && nn + 2 < numNodes2 ) {
|
|
v = m_xml2.getNode ( nn + 2 );
|
|
vlen = m_xml2.getNodeLen ( nn + 2 );
|
|
}
|
|
// should be a <![CDATA[...]]>
|
|
if ( vlen<12 || strncasecmp(v,"<![CDATA[",9)!=0 ) {
|
|
log("conf: No <![CDATA[...]]> tag found "
|
|
"for \"<%s>\" tag. Trying without CDATA.",
|
|
m->m_xml);
|
|
v = oldv;
|
|
vlen = oldvlen;
|
|
}
|
|
// point to the nugget
|
|
else {
|
|
v += 9;
|
|
vlen -= 12;
|
|
}
|
|
}
|
|
// get the value
|
|
//v = m_xml2.getString ( nn , nn+2 , m->m_xml , &vlen );
|
|
// this only happens when tag is there, but without a value
|
|
if ( ! v || vlen == 0 ) { vlen = 0; v = tt; }
|
|
//c = v[vlen];
|
|
v[vlen]='\0';
|
|
// now decode it into itself
|
|
nb = htmlDecode ( v , v , vlen , false,0);
|
|
v[nb] = '\0';
|
|
// set our parm
|
|
setParm ( (char *)THIS, m, i, j, v, false/*is html encoded?*/,
|
|
false );
|
|
// we were set from the backup default file
|
|
//((CollectionRec *)THIS)->m_orig[i] = 1;
|
|
// go back
|
|
//v[vlen] = c;
|
|
// do not repeat same node
|
|
nn++;
|
|
// try to get the next node if we're an array
|
|
if ( ++j < m->m_max || j < m->m_fixed ) { goto loop; }
|
|
// otherwise, if not an array, go to next parm
|
|
continue;
|
|
}
|
|
|
|
// backwards compatible hack for old <masterPassword> tags
|
|
for ( int32_t i = 1 ; i < numNodes ; i++ ) {
|
|
if ( objType != OBJ_CONF ) break;
|
|
XmlNode *pn = &xml.m_nodes[i-1];
|
|
XmlNode *xn = &xml.m_nodes[i];
|
|
// look for <masterPassword>
|
|
if ( pn->m_tagNameLen != 14 ) continue;
|
|
if ( xn->m_tagNameLen != 8 ) continue;
|
|
// if it is not the OLD supported tag then skip
|
|
if ( strncmp ( pn->m_tagName,"masterPassword",14 ) ) continue;
|
|
if ( strncmp ( xn->m_tagName,"![CDATA[",8 ) ) continue;
|
|
// otherwise append to buf
|
|
char *text = xn->m_node + 9;
|
|
int32_t tlen = xn->m_nodeLen - 12;
|
|
g_conf.m_masterPwds.safeMemcpy(text,tlen);
|
|
// a \n
|
|
g_conf.m_masterPwds.pushChar('\n');
|
|
g_conf.m_masterPwds.nullTerm();
|
|
}
|
|
// another backwards compatible hack for old masterIp tags
|
|
for ( int32_t i = 1 ; i < numNodes ; i++ ) {
|
|
if ( objType != OBJ_CONF ) break;
|
|
XmlNode *xn = &xml.m_nodes[i];
|
|
XmlNode *pn = &xml.m_nodes[i-1];
|
|
// look for <masterPassword>
|
|
if ( pn->m_tagNameLen != 8 ) continue;
|
|
if ( xn->m_tagNameLen != 8 ) continue;
|
|
// if it is not the OLD supported tag then skip
|
|
if ( strncmp ( pn->m_tagName,"masterIp",8 ) ) continue;
|
|
if ( strncmp ( xn->m_tagName,"![CDATA[",8 ) ) continue;
|
|
// otherwise append to buf
|
|
char *text = xn->m_node + 9;
|
|
int32_t tlen = xn->m_nodeLen - 12;
|
|
// otherwise append to buf
|
|
g_conf.m_connectIps.safeMemcpy(text,tlen);
|
|
// a \n
|
|
g_conf.m_connectIps.pushChar('\n');
|
|
g_conf.m_connectIps.nullTerm();
|
|
}
|
|
|
|
/*
|
|
|
|
// no! now we warn with a redbox alert
|
|
|
|
// always make sure we got some admin security
|
|
if ( g_conf.m_numMasterIps <= 0 && g_conf.m_numMasterPwds <= 0 ) {
|
|
//log(LOG_INFO,
|
|
// "conf: No master IP or password provided. Using default "
|
|
// "password 'footbar23'." );
|
|
//g_conf.m_masterIps[0] = atoip ( "64.139.94.202", 13 );
|
|
//g_conf.m_numMasterIps = 1;
|
|
strcpy ( g_conf.m_masterPwds[0] , "footbar23" );
|
|
g_conf.m_numMasterPwds = 1;
|
|
}
|
|
*/
|
|
|
|
return true;
|
|
}
|
|
|
|
// returns false and sets g_errno on error
|
|
bool Parms::setXmlFromFile(Xml *xml, char *filename, SafeBuf *sb ) {
|
|
// File f;
|
|
// f.set ( filename );
|
|
// is it too big?
|
|
// int32_t fsize = f.getFileSize();
|
|
// if ( fsize > bufSize ) {
|
|
// log ("conf: File size of %s is %" INT32 ", must be "
|
|
// "less than %" INT32 ".",f.getFilename(),fsize,bufSize );
|
|
// char *xx = NULL; *xx = 0;
|
|
// }
|
|
// open it for reading
|
|
// f.set ( filename );
|
|
// if ( ! f.open ( O_RDONLY ) )
|
|
// return log("conf: Could not open %s: %s.",
|
|
// filename,mstrerror(g_errno));
|
|
// // read in the file
|
|
// int32_t numRead = f.read ( buf , bufSize , 0 /*offset*/ );
|
|
// f.close ( );
|
|
// if ( numRead != fsize )
|
|
// return log ("conf: Could not read %s : %s.",
|
|
// filename,mstrerror(g_errno));
|
|
// // null terminate it
|
|
// buf [ fsize ] = '\0';
|
|
|
|
sb->load ( filename );
|
|
char *buf = sb->getBufStart();
|
|
if ( ! buf )
|
|
return log ("conf: Could not read %s : %s.",
|
|
filename,mstrerror(g_errno));
|
|
|
|
// . remove all comments in case they contain tags
|
|
// . if you have a # as part of your string, it must be html encoded,
|
|
// just like you encode < and >
|
|
char *s = buf;
|
|
char *d = buf;
|
|
while ( *s ) {
|
|
// . skip comments
|
|
// . watch out for html encoded pound signs though
|
|
if ( *s == '#' ) {
|
|
if (s>buf && *(s-1)=='&' && is_digit(*(s+1))) goto ok;
|
|
while ( *s && *s != '\n' ) s++;
|
|
continue;
|
|
}
|
|
// otherwise, transcribe over
|
|
ok:
|
|
*d++ = *s++;
|
|
}
|
|
*d = '\0';
|
|
int32_t bufSize = d - buf;
|
|
// . set to xml
|
|
// . use version of 0
|
|
return xml->set ( buf ,
|
|
bufSize ,
|
|
false , // ownData
|
|
0 , // allocSize
|
|
false , // pureXml?
|
|
0 , // version
|
|
true , // setParents
|
|
0 , // niceness
|
|
CT_XML );
|
|
}
|
|
|
|
//#define MAX_CONF_SIZE 200000
|
|
|
|
// returns false and sets g_errno on error
|
|
bool Parms::saveToXml ( char *THIS , char *f , char objType ) {
|
|
if ( g_conf.m_readOnlyMode ) return true;
|
|
// print into buffer
|
|
// "seeds" can be pretty big so go with safebuf now
|
|
// fix so if we core in malloc/free we can still save conf
|
|
char tmpbuf[200000];
|
|
SafeBuf sb(tmpbuf,200000);
|
|
//char *p = buf;
|
|
//char *pend = buf + MAX_CONF_SIZE;
|
|
int32_t len ;
|
|
//int32_t n ;
|
|
File ff ;
|
|
int32_t j ;
|
|
int32_t count;
|
|
char *s;
|
|
CollectionRec *cr = NULL;
|
|
if ( THIS != (char *)&g_conf ) cr = (CollectionRec *)THIS;
|
|
// now set THIS based on the parameters in the xml file
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
// get it
|
|
Parm *m = &m_parms[i];
|
|
if ( m->m_obj != objType ) continue;
|
|
// . there are 2 object types, coll recs and g_conf, aka
|
|
// OBJ_COLL and OBJ_CONF.
|
|
// . make sure we got the right parms for what we want
|
|
if ( m->m_obj == OBJ_NONE ) continue;
|
|
// skip dups
|
|
if ( m->m_flags & PF_DUP ) continue;
|
|
// do not allow searchinput parms through
|
|
if ( m->m_obj == OBJ_SI ) continue;
|
|
if ( THIS == (char *)&g_conf && m->m_obj != OBJ_CONF) continue;
|
|
if ( THIS != (char *)&g_conf && m->m_obj == OBJ_CONF) continue;
|
|
if ( m->m_type == TYPE_MONOD2 ) continue;
|
|
if ( m->m_type == TYPE_MONOM2 ) continue;
|
|
if ( m->m_type == TYPE_CMD ) continue;
|
|
if ( m->m_type == TYPE_BOOL2 ) continue;
|
|
if ( m->m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
// ignore if hidden as well! no, have to keep those separate
|
|
// since spiderroundnum/starttime is hidden but should be saved
|
|
if ( m->m_flags & PF_NOSAVE ) continue;
|
|
// ignore if diffbot and we are not a diffbot/custom crawl
|
|
if ( cr &&
|
|
! cr->m_isCustomCrawl &&
|
|
(m->m_flags & PF_DIFFBOT) ) continue;
|
|
// skip if we should not save to xml
|
|
if ( ! m->m_save ) continue;
|
|
// allow comments though
|
|
if ( m->m_type == TYPE_COMMENT ) goto skip2;
|
|
// skip if this was compiled for a client and they should not
|
|
// see this control
|
|
//#ifdef _GLOBALSPEC_
|
|
// if ( m->m_priv == 2 ) continue;
|
|
// if ( m->m_priv == 3 ) continue;
|
|
//#elif _CLIENT_
|
|
// if ( m->m_priv ) continue;
|
|
//#elif _METALINCS_
|
|
// if ( m->m_priv == 2 ) continue;
|
|
// if ( m->m_priv == 3 ) continue;
|
|
//#endif
|
|
// skip if offset is negative, that means none
|
|
s = (char *)THIS + m->m_off ;
|
|
// if array, count can be 0 or more than 1
|
|
count = 1;
|
|
if ( m->m_max > 1 ) count = *(int32_t *)(s-4);
|
|
if ( m->m_fixed > 0 ) count = m->m_fixed;
|
|
// sanity check
|
|
if ( count > 100000 ) {
|
|
log(LOG_LOGIC,"admin: Outrageous array size in for "
|
|
"parameter %s. Does the array max size int32_t "
|
|
"preceed it in the conf class?",m->m_title);
|
|
exit(-1);
|
|
}
|
|
skip2:
|
|
// description, do not wrap words around lines
|
|
char *d = m->m_desc;
|
|
// if empty array mod description to include the tag name
|
|
char tmp [10*1024];
|
|
if ( m->m_max > 1 && count == 0 && gbstrlen(d) < 9000 &&
|
|
m->m_xml && m->m_xml[0] ) {
|
|
char *cc = "";
|
|
if ( d && d[0] ) cc = "\n";
|
|
sprintf ( tmp , "%s%sUse <%s> tag.",d,cc,m->m_xml);
|
|
d = tmp;
|
|
}
|
|
char *END = d + gbstrlen(d);
|
|
char *dend;
|
|
char *last;
|
|
char *start;
|
|
// just print tag if it has no description
|
|
if ( ! *d ) goto skip;
|
|
//if ( p + gbstrlen(d)+5 >= pend ) goto hadError;
|
|
//if ( p > buf ) *p++='\n';
|
|
if ( sb.length() ) sb.pushChar('\n');
|
|
loop:
|
|
dend = d + 77;
|
|
if ( dend > END ) dend = END;
|
|
last = d;
|
|
start = d;
|
|
while ( *d && d < dend ) {
|
|
if ( *d == ' ' ) last = d;
|
|
if ( *d == '\n' ) { last = d; break; }
|
|
d++;
|
|
}
|
|
if ( ! *d ) last = d;
|
|
//gbmemcpy ( p , "# " , 2 );
|
|
//p += 2;
|
|
sb.safeMemcpy("# ",2);
|
|
//gbmemcpy ( p , start , last - start );
|
|
//p += last - start;
|
|
sb.safeMemcpy(start,last-start);
|
|
//*p++='\n';
|
|
sb.pushChar('\n');
|
|
d = last + 1;
|
|
if ( d < END && *d ) goto loop;
|
|
// bail if comment
|
|
if ( m->m_type == TYPE_COMMENT ) {
|
|
//sprintf ( p , "\n" );
|
|
//p += gbstrlen ( p );
|
|
continue;
|
|
}
|
|
if ( m->m_type == TYPE_MONOD2 ) continue;
|
|
if ( m->m_type == TYPE_MONOM2 ) continue;
|
|
|
|
skip:
|
|
/* . note: this code commented out because it was specific to
|
|
an old client
|
|
// if value is from default collection file, do not
|
|
// explicitly list it
|
|
if ( m->m_obj == OBJ_COLL &&
|
|
((CollectionRec *)THIS)->m_orig[i] == 1 ) {
|
|
sprintf ( p ,"# Value for <%s> tag taken from "
|
|
"default.conf.\n",m->m_xml );
|
|
p += gbstrlen ( p );
|
|
continue;
|
|
}
|
|
*/
|
|
|
|
// debug point
|
|
//if ( m->m_type == TYPE_SAFEBUF )
|
|
// log("hey");
|
|
|
|
// loop over all in this potential array
|
|
for ( j = 0 ; j < count ; j++ ) {
|
|
// the xml
|
|
//if ( p + gbstrlen(m->m_xml) >= pend ) goto hadError;
|
|
if ( g_errno ) goto hadError;
|
|
//sprintf ( p , "<%s>" , m->m_xml );
|
|
//p += gbstrlen ( p );
|
|
sb.safePrintf("<%s>" , m->m_xml );
|
|
// print CDATA if string
|
|
if ( m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_SAFEBUF ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
//sprintf ( p , "<![CDATA[" );
|
|
//p += gbstrlen ( p );
|
|
sb.safeStrcpy( "<![CDATA[" );
|
|
}
|
|
// break point
|
|
//if (strcmp ( m->m_xml , "filterRulesetDefault")==0)
|
|
// log("got it");
|
|
// . represent it in ascii form
|
|
// . this escapes out <'s and >'s
|
|
// . this ALSO encodes #'s (xml comment indicators)
|
|
//p = getParmHtmlEncoded(p,pend,m,s);
|
|
getParmHtmlEncoded(&sb,m,s);
|
|
// print CDATA if string
|
|
if ( m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_SAFEBUF ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
//sprintf ( p , "]]>" );
|
|
//p += gbstrlen ( p );
|
|
sb.safeStrcpy("]]>" );
|
|
}
|
|
// this is NULL if it ran out of room
|
|
//if ( ! p ) goto hadError;
|
|
if ( g_errno ) goto hadError;
|
|
// advance to next element in array, if it is one
|
|
s = s + m->m_size;
|
|
// close the xml tag
|
|
//if ( p + 4 >= pend ) goto hadError;
|
|
//sprintf ( p , "</>\n" );
|
|
//p += gbstrlen ( p );
|
|
sb.safeStrcpy("</>\n" );
|
|
if ( g_errno ) goto hadError;
|
|
}
|
|
}
|
|
//*p = '\0';
|
|
sb.nullTerm();
|
|
|
|
//ff.set ( f );
|
|
//if ( ! ff.open ( O_RDWR | O_CREAT | O_TRUNC ) )
|
|
// return log("db: Could not open %s : %s",
|
|
// ff.getFilename(),mstrerror(g_errno));
|
|
|
|
// save the parm to the file
|
|
//len = gbstrlen(buf);
|
|
len = sb.length();
|
|
// use -1 for offset so we do not use pwrite() so it will not leave
|
|
// garbage at end of file
|
|
//n = ff.write ( buf , len , -1 );
|
|
//n = ff.write ( sb.getBufStart() , len , -1 );
|
|
//ff.close();
|
|
//if ( n == len ) return true;
|
|
|
|
// save to filename "f". returns # of bytes written. -1 on error.
|
|
if ( sb.safeSave ( f ) >= 0 )
|
|
return true;
|
|
|
|
return log("admin: Could not write to file %s.",f);
|
|
hadError:
|
|
return log("admin: Error writing to %s: %s",f,mstrerror(g_errno));
|
|
|
|
//File bigger than %" INT32 " bytes."
|
|
// " Please increase #define in Parms.cpp.",
|
|
// (int32_t)MAX_CONF_SIZE);
|
|
}
|
|
|
|
Parm *Parms::getParm ( char *cgi ) {
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
if ( ! m_parms[i].m_cgi ) continue ;
|
|
if ( m_parms[i].m_cgi[0] != cgi[0] ) continue;
|
|
if ( m_parms[i].m_cgi[1] != cgi[1] ) continue;
|
|
if ( strcmp ( m_parms[i].m_cgi , cgi ) == 0 )
|
|
return &m_parms[i];
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
Parm *Parms::getParm2 ( char *cgi , int32_t cgiLen ) {
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
if ( ! m_parms[i].m_cgi ) continue ;
|
|
if ( m_parms[i].m_cgi[0] != cgi[0] ) continue;
|
|
if ( cgiLen >=2 && m_parms[i].m_cgi[1] != cgi[1] ) continue;
|
|
// only compare as many letters as the cgi name has
|
|
if ( strncmp ( m_parms[i].m_cgi , cgi , cgiLen ) ) continue;
|
|
// that means we gotta check lengths next
|
|
if ( gbstrlen(m_parms[i].m_cgi) != cgiLen ) continue;
|
|
// got a match
|
|
return &m_parms[i];
|
|
}
|
|
return NULL;
|
|
}
|
|
*/
|
|
/*
|
|
#define PHTABLE_SIZE (MAX_PARMS*2)
|
|
|
|
Parm *Parms::getParm ( char *cgi ) {
|
|
// make the hash table for the first call
|
|
static int32_t s_phtable [ PHTABLE_SIZE ];
|
|
static Parm *s_phparm [ PHTABLE_SIZE ];
|
|
static bool s_init = false;
|
|
// do not re-make the table if we already did
|
|
if ( s_init ) goto skipMakeTable;
|
|
// ok, now make the table
|
|
s_init = true;
|
|
memset ( s_phparm , 0 , PHTABLE_SIZE );
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
if ( ! m_parms[i].m_cgi ) continue ;
|
|
int32_t h = hash32 ( m_parms[i].m_cgi );
|
|
int32_t n = h % PHTABLE_SIZE;
|
|
while ( s_phparm[n] ) {
|
|
// . sanity check
|
|
// . we don't have that many parms, they should never
|
|
// collide!!... but it is possible i guess.
|
|
if ( s_phtable[n] == h ) {
|
|
log(LOG_LOGIC,"Parms: collisions forbidden in "
|
|
"getParm(). Duplicate cgi name?");
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
if (++n >= PHTABLE_SIZE) n = 0;
|
|
}
|
|
s_phtable[n] = h; // fill the bucket
|
|
s_phparm [n] = m; // the parm
|
|
}
|
|
skipMakeTable:
|
|
// look up in table
|
|
int32_t h = hash32 ( cgi );
|
|
int32_t n = h % PHTABLE_SIZE;
|
|
// while bucket is occupied and does not equal our hash... chain
|
|
while ( s_phparm[n] && s_phtable[n] != h )
|
|
if (++n >= PHTABLE_SIZE) n = 0;
|
|
// if empty, no match
|
|
return s_phparm[n];
|
|
}
|
|
*/
|
|
|
|
bool Parms::getParmHtmlEncoded ( SafeBuf *sb , Parm *m , char *s ) {
|
|
// do not breech the buffer
|
|
//if ( p + 100 >= pend ) return p;
|
|
// print it out
|
|
char t = m->m_type;
|
|
if ( t == TYPE_CHAR || t == TYPE_BOOL ||
|
|
t == TYPE_CHECKBOX ||
|
|
t == TYPE_PRIORITY || t == TYPE_PRIORITY2 ||
|
|
//t == TYPE_DIFFBOT_DROPDOWN ||
|
|
t == TYPE_UFP ||
|
|
t == TYPE_PRIORITY_BOXES || t == TYPE_RETRIES ||
|
|
t == TYPE_RETRIES || t == TYPE_FILTER ||
|
|
t == TYPE_BOOL2 || t == TYPE_CHAR2 )
|
|
sb->safePrintf("%" INT32 "",(int32_t)*s);
|
|
else if ( t == TYPE_FLOAT )
|
|
sb->safePrintf("%f",*(float *)s);
|
|
else if ( t == TYPE_IP )
|
|
sb->safePrintf("%s",iptoa(*(int32_t *)s));
|
|
else if ( t == TYPE_LONG || t == TYPE_LONG_CONST || t == TYPE_RULESET||
|
|
t == TYPE_SITERULE )
|
|
sb->safePrintf("%" INT32 "",*(int32_t *)s);
|
|
else if ( t == TYPE_LONG_LONG )
|
|
sb->safePrintf("%" INT64 "",*(int64_t *)s);
|
|
else if ( t == TYPE_SAFEBUF ) {
|
|
SafeBuf *sb2 = (SafeBuf *)s;
|
|
char *buf = sb2->getBufStart();
|
|
//int32_t blen = 0;
|
|
//if ( buf ) blen = gbstrlen(buf);
|
|
//p = htmlEncode ( p , pend , buf , buf + blen , true ); // #?*
|
|
// we can't do proper cdata and be backwards compatible
|
|
//sb->cdataEncode ( buf );//, blen );//, true ); // #?*
|
|
if ( buf ) sb->htmlEncode ( buf );
|
|
}
|
|
else if ( t == TYPE_STRING ||
|
|
t == TYPE_STRINGBOX ||
|
|
t == TYPE_STRINGNONEMPTY ||
|
|
t == TYPE_TIME) {
|
|
//int32_t slen = gbstrlen ( s );
|
|
// this returns the length of what was written, it may
|
|
// not have converted everything if pend-p was too small...
|
|
//p += saftenTags2 ( p , pend - p , s , len );
|
|
//p = htmlEncode ( p , pend , s , s + slen , true /*#?*/);
|
|
// we can't do proper cdata and be backwards compatible
|
|
//sb->cdataEncode ( s );//, slen );//, true /*#?*/);
|
|
sb->htmlEncode ( s );
|
|
}
|
|
else if ( t == TYPE_DATE || t == TYPE_DATE2 ) {
|
|
// time is stored as int32_t
|
|
int32_t ct = *(int32_t *)s;
|
|
// get the time struct
|
|
struct tm *tp = localtime ( (time_t *)&ct ) ;
|
|
// set the "selected" month for the drop down
|
|
char tmp[100];
|
|
strftime ( tmp , 100 , "%d %b %Y %H:%M UTC" , tp );
|
|
sb->safeStrcpy ( tmp );
|
|
sb->setLabel("parm3");
|
|
}
|
|
//p += gbstrlen ( p );
|
|
//return p;
|
|
return true;
|
|
}
|
|
/*
|
|
// returns the size needed to serialize parms
|
|
int32_t Parms::getStoredSize() {
|
|
int32_t size = 0;
|
|
|
|
// calling serialize with no ptr gets size
|
|
serialize( NULL, &size );
|
|
return size;
|
|
}
|
|
|
|
// . serialize parms to buffer
|
|
// . accepts addr of buffer ptr and addr of buffer size
|
|
// . on entry buf can be NULL to determine required size
|
|
// . if buf is not NULL, *bufSize must specify the size of buf
|
|
// . on exit *buf is filled with serialized parms
|
|
// . on exit *bufSize is set to the actual len of *buf
|
|
bool Parms::serialize( char *buf, int32_t *bufSize ) {
|
|
g_errno = 0;
|
|
if ( ! bufSize ) {
|
|
g_errno = EBADENGINEER;
|
|
log( "admin: serialize: bad engineer: no bufSize ptr" );
|
|
*bufSize = 0;
|
|
return false;
|
|
}
|
|
bool sizeChk = false;
|
|
char *end = NULL;
|
|
if ( ! buf ) sizeChk = true; // just calc size
|
|
else end = buf + *bufSize; // for overrun checking
|
|
|
|
// serialize OBJ_CONF and OBJ_COLL parms
|
|
*bufSize = 0;
|
|
char *p = buf;
|
|
|
|
// now the parms
|
|
struct SerParm *sp = NULL;
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
Parm *m = &m_parms[i];
|
|
|
|
// ignore these:
|
|
if ( m->m_obj == OBJ_SI ) continue;
|
|
if ( m->m_off < 0 ) continue;
|
|
if ( m->m_type == TYPE_COMMENT ) continue;
|
|
if ( m->m_type == TYPE_MONOD2 ) continue;
|
|
if ( m->m_type == TYPE_MONOM2 ) continue;
|
|
if ( m->m_type == TYPE_CMD ) continue;
|
|
if ( m->m_type == TYPE_LONG_CONST ) continue;
|
|
if ( ! m->m_sync ) continue; // parm is not to be synced
|
|
|
|
// determine the size of the parm value
|
|
int32_t size = 0;
|
|
if ( m->m_type == TYPE_CHAR ) size = 1;
|
|
if ( m->m_type == TYPE_CHAR2 ) size = 1;
|
|
if ( m->m_type == TYPE_CHECKBOX ) size = 1;
|
|
if ( m->m_type == TYPE_BOOL ) size = 1;
|
|
if ( m->m_type == TYPE_BOOL2 ) size = 1;
|
|
if ( m->m_type == TYPE_PRIORITY ) size = 1;
|
|
if ( m->m_type == TYPE_PRIORITY2 ) size = 1;
|
|
//if ( m->m_type == TYPE_DIFFBOT_DROPDOWN) size = 1;
|
|
if ( m->m_type == TYPE_PRIORITY_BOXES ) size = 1;
|
|
if ( m->m_type == TYPE_RETRIES ) size = 1;
|
|
if ( m->m_type == TYPE_TIME ) size = 6;
|
|
if ( m->m_type == TYPE_DATE2 ) size = 4;
|
|
if ( m->m_type == TYPE_DATE ) size = 4;
|
|
if ( m->m_type == TYPE_FLOAT ) size = 4;
|
|
if ( m->m_type == TYPE_IP ) size = 4;
|
|
if ( m->m_type == TYPE_RULESET ) size = 4;
|
|
if ( m->m_type == TYPE_LONG ) size = 4;
|
|
if ( m->m_type == TYPE_LONG_LONG ) size = 8;
|
|
if ( m->m_type == TYPE_STRING ) size = m->m_size;
|
|
if ( m->m_type == TYPE_STRINGBOX ) size = m->m_size;
|
|
if ( m->m_type == TYPE_STRINGNONEMPTY ) size = m->m_size;
|
|
if ( m->m_type == TYPE_SAFEBUF ) size = m->m_size;
|
|
if ( m->m_type == TYPE_SITERULE ) size = 4;
|
|
|
|
// . set size to the total size of array
|
|
// . set cnt to the number of itmes
|
|
int32_t cnt = 1;
|
|
if (m->m_fixed > 0) {
|
|
size *= m->m_fixed;
|
|
cnt = m->m_fixed;
|
|
}
|
|
else {
|
|
size *= m->m_max;
|
|
cnt = m->m_max;
|
|
}
|
|
|
|
if ( m->m_obj == OBJ_CONF ) {
|
|
bool overflew = serializeConfParm( m, i, &p, end,
|
|
size, cnt,
|
|
sizeChk, bufSize );
|
|
if ( overflew ) goto overflow;
|
|
}
|
|
else if ( m->m_obj == OBJ_COLL ) {
|
|
collnum_t j = g_collectiondb.getFirstCollnum ();
|
|
while ( j >= 0 ) {
|
|
CollectionRec *cr = g_collectiondb.getRec( j );
|
|
bool overflew = serializeCollParm( cr,
|
|
m, i, &p,
|
|
end,
|
|
size, cnt,
|
|
sizeChk,
|
|
bufSize );
|
|
if ( overflew ) goto overflow;
|
|
j = g_collectiondb.getNextCollnum ( j );
|
|
}
|
|
}
|
|
}
|
|
if ( ! sizeChk ) {
|
|
// set the final marker to 0s to indicate the end
|
|
sp = (struct SerParm *)p;
|
|
sp->i = 0;
|
|
sp->obj = 0;
|
|
sp->size = 0;
|
|
sp->cnt = 0;
|
|
}
|
|
*bufSize += sizeof( struct SerParm );
|
|
|
|
return true;
|
|
|
|
overflow:
|
|
g_errno = EBADENGINEER;
|
|
log(LOG_WARN, "admin: serialize: bad engineer: overflow" );
|
|
*bufSize = 0;
|
|
return false;
|
|
}
|
|
|
|
// . serialize a conf parm
|
|
// . if sizeChk is true then we do not serialize, but just get the
|
|
// bytes required if we did serialize
|
|
// . serialize parm into *p, the cursor i guess, buf end is "end"
|
|
bool Parms::serializeConfParm( Parm *m, int32_t i, char **p, char *end,
|
|
int32_t size, int32_t cnt,
|
|
bool sizeChk, int32_t *bufSz ) {
|
|
SerParm *sp = NULL;
|
|
|
|
// safebuf not supported here yet, but it for coll recs below
|
|
// so copy code from there if you need it
|
|
if ( m->m_type == TYPE_SAFEBUF ) { char *xx=NULL;*xx=0;}
|
|
|
|
if (m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
char *sVal = NULL;
|
|
if ( ! sizeChk ) {
|
|
sp = (SerParm *)*p;
|
|
sp->i = i; // index of parm
|
|
sp->obj = OBJ_CONF;
|
|
sp->size = 0L; // 0 for strings
|
|
sp->cnt = cnt; // # of strings
|
|
// if an array, get num of member
|
|
if ( cnt > 1 ) {
|
|
sp->off = m->m_off - sizeof(int32_t);
|
|
sp->num = *(int32_t *)((char *)&g_conf
|
|
+ sp->off);
|
|
}
|
|
else {
|
|
sp->off = 0;
|
|
sp->num = 0;
|
|
}
|
|
|
|
sVal = sp->val;
|
|
}
|
|
char *sConf = (char *)&g_conf + m->m_off;
|
|
int32_t totLen = 0;
|
|
int32_t tcnt = cnt;
|
|
while ( tcnt ) {
|
|
int32_t len = gbstrlen( sConf );
|
|
if ( ! sizeChk ) {
|
|
// copy the parm value
|
|
if ( sVal + len > end )
|
|
return true; // overflow
|
|
strcpy( sVal, sConf );
|
|
}
|
|
totLen += len + 1; // incl the NULL
|
|
// inc conf ptr by size of strings
|
|
sConf += m->m_size;
|
|
// inc ser value by len of str + NULL
|
|
sVal += len + 1;
|
|
tcnt--;
|
|
}
|
|
if ( ! sizeChk ) {
|
|
// inc by tot len of compacted strings
|
|
*p += sizeof( *sp ) + totLen;
|
|
}
|
|
*bufSz += sizeof( SerParm ) + totLen;
|
|
}
|
|
else {
|
|
if ( ! sizeChk ) {
|
|
sp = (SerParm *)*p;
|
|
sp->i = i;
|
|
sp->obj = OBJ_CONF;
|
|
sp->size = size; // tot size if array
|
|
sp->cnt = cnt; // num of items
|
|
// if array, get num of member
|
|
if ( cnt > 1 ) {
|
|
sp->off = m->m_off - sizeof(int32_t);
|
|
sp->num = *(int32_t *)((char *)&g_conf
|
|
+ sp->off);
|
|
}
|
|
else {
|
|
sp->off = 0;
|
|
sp->num = 0;
|
|
}
|
|
|
|
// copy the parm's whole value
|
|
if ( sp->val + size > end )
|
|
return true; // overflow
|
|
gbmemcpy( sp->val,
|
|
(char *)&g_conf + m->m_off, size );
|
|
// inc by tot size if array
|
|
*p += sizeof( *sp ) + size;
|
|
}
|
|
*bufSz += sizeof( SerParm ) + size;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
// . serialize a coll parm in CollectionRec.h
|
|
// . if sizeChk is true then we do not serialize, but just get the
|
|
// bytes required if we did serialize
|
|
// . serialize parm into *p, the cursor i guess, buf end is "end"
|
|
bool Parms::serializeCollParm( CollectionRec *cr,
|
|
Parm *m, int32_t i, char **p, char *end,
|
|
int32_t size, int32_t cnt,
|
|
bool sizeChk, int32_t *bufSize) {
|
|
SerParm *sp = NULL;
|
|
|
|
if (m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_SAFEBUF ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
char *sVal = NULL;
|
|
if ( ! sizeChk ) {
|
|
sp = (SerParm *)*p;
|
|
sp->i = i; // index of parm
|
|
sp->obj = OBJ_COLL;
|
|
sp->size = 0L; // 0 for strings
|
|
sp->cnt = cnt; // # of strings
|
|
// is this parm an array if parms?
|
|
if ( cnt > 1 ) {
|
|
// the offset of the "count" or the
|
|
// "number of elements" in the array.
|
|
// it preceeds the value of the first element
|
|
// as can be seen infor parms in
|
|
// CollectionRec.h.
|
|
sp->off = m->m_off - sizeof(int32_t);
|
|
// store the # of then into "num"
|
|
sp->num = *(int32_t *)((char *)cr + sp->off);
|
|
}
|
|
else {
|
|
sp->off = 0;
|
|
sp->num = 0;
|
|
}
|
|
sVal = sp->val;
|
|
}
|
|
// point to the actual parm itself
|
|
char *sColl = (char *)cr + m->m_off;
|
|
int32_t totLen = 0;
|
|
// "cnt" is how many elements in the array
|
|
int32_t tcnt = cnt;
|
|
while ( tcnt ) {
|
|
// the length of the string
|
|
int32_t len;
|
|
// the string
|
|
char *pstr;
|
|
// if a safebuf, point to string it has
|
|
if ( m->m_type == TYPE_SAFEBUF ) {
|
|
SafeBuf *sx = (SafeBuf *)sColl;
|
|
pstr = sx->getBuf();
|
|
len = sx->length();
|
|
if ( ! pstr ) pstr = "";
|
|
}
|
|
// get length of the string. if not a safebuf it will
|
|
// just be an outright string in CollectionRec.h
|
|
else {
|
|
pstr = sColl;
|
|
len = gbstrlen( sColl );
|
|
}
|
|
if ( ! sizeChk ) {
|
|
// copy the string
|
|
if ( sVal+len > end ) {
|
|
log("parms: buffer too small");
|
|
return true;
|
|
}
|
|
// this puts a \0 at the end
|
|
strcpy( sVal, pstr );
|
|
}
|
|
totLen += len + 1; // incl NULL
|
|
// . inc cr ptr by size of strs
|
|
// . this is the size of the SafeBuf for TYPE_SAFEBUF
|
|
sColl += m->m_size;
|
|
// . inc the write cursor by string length + the \0
|
|
sVal += len + 1;
|
|
tcnt--;
|
|
}
|
|
if ( ! sizeChk ) {
|
|
// inc by tot len of cmpctd str
|
|
*p += sizeof( *sp ) + totLen;
|
|
}
|
|
*bufSize += sizeof( SerParm ) + totLen;
|
|
}
|
|
else {
|
|
if ( ! sizeChk ) {
|
|
sp = (SerParm *)*p;
|
|
sp->i = i;
|
|
sp->obj = OBJ_COLL;
|
|
sp->size = size; // tot size
|
|
sp->cnt = cnt; // num of items
|
|
// get num of member
|
|
if ( cnt > 1 ) {
|
|
sp->off = m->m_off - sizeof(int32_t);
|
|
sp->num = *(int32_t *)((char *)cr + sp->off);
|
|
}
|
|
else {
|
|
sp->off = 0;
|
|
sp->num = 0;
|
|
}
|
|
// copy whole value
|
|
if ( sp->val + size > end )
|
|
return true;
|
|
gbmemcpy( sp->val,
|
|
(char *)cr + m->m_off,
|
|
size );
|
|
// inc by whole size of value
|
|
*p += sizeof( *sp ) + size;
|
|
}
|
|
*bufSize += sizeof( SerParm ) + size;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
|
|
// deserialize parms from buffer and set our values to the new values
|
|
void Parms::deserialize( char *buf ) {
|
|
g_errno = 0;
|
|
char *p = buf;
|
|
bool confChgd = false;
|
|
|
|
SerParm *sp = (SerParm *)p;
|
|
int32_t numLooped = 0;
|
|
const int32_t MAX_LOOP = (int32_t)(MAX_PARMS*1.5);
|
|
// if one of these is non-zero, we're still working
|
|
while ( (sp->obj || sp->size || sp->cnt) &&
|
|
(sp->obj > 0 && sp->size > 0 && sp->cnt > 0) &&
|
|
numLooped < MAX_LOOP ) {
|
|
// grab the parm we're working on
|
|
if ( sp->i < 0 || sp->i >= m_numParms ) {
|
|
log( "admin: invalid parm # in Parms::deserialize" );
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
Parm *m = &m_parms[ sp->i ];
|
|
|
|
if ( sp->obj == OBJ_CONF ) {
|
|
deserializeConfParm( m, sp, &p, &confChgd );
|
|
sp = (struct SerParm *)p;
|
|
}
|
|
else if ( sp->obj == OBJ_COLL ) {
|
|
collnum_t j = g_collectiondb.getFirstCollnum ();
|
|
//if(j <= 0) {
|
|
// log("coll: Collectiondb does not have a rec" );
|
|
// return;
|
|
//}
|
|
while ( j >= 0 ) {
|
|
CollectionRec *cr = g_collectiondb.getRec( j );
|
|
deserializeCollParm( cr,
|
|
m, sp, &p );
|
|
sp = (SerParm *)p;
|
|
j = g_collectiondb.getNextCollnum ( j );
|
|
|
|
}
|
|
}
|
|
|
|
// setup the next rec
|
|
sp = (SerParm *)p;
|
|
numLooped++;
|
|
}
|
|
if (numLooped >= MAX_LOOP) {
|
|
log( "admin: infinite loop in Parms::deserialize(). halting!");
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
|
|
// if we changed the conf, we need to save it
|
|
if ( confChgd ) {
|
|
g_conf.save ();
|
|
}
|
|
|
|
// if we changed a CollectionRec, we need to save it
|
|
int32_t j = g_collectiondb.getFirstCollnum ();
|
|
while ( j >= 0 ) {
|
|
CollectionRec *cr = g_collectiondb.getRec( j );
|
|
if ( cr->m_needsSave ) {
|
|
cr->save ();
|
|
// so g_spiderCache can reload if sameDomainWait, etc.
|
|
// have changed
|
|
g_collectiondb.updateTime();
|
|
}
|
|
j = g_collectiondb.getNextCollnum ( j );
|
|
}
|
|
}
|
|
|
|
void Parms::deserializeConfParm( Parm *m, SerParm *sp, char **p,
|
|
bool *confChgd ) {
|
|
if ( m->m_off + sp->size > (int32_t)sizeof(g_conf) ||
|
|
m->m_off + sp->size < 0 ){
|
|
log(LOG_WARN, "admin: deserializing parm would overflow "
|
|
"the collection rec!");
|
|
char *xx =0; *xx = 0;
|
|
}
|
|
if ( sp->size == 0 ) { // string
|
|
char *sVal = sp->val;
|
|
char *sConf = (char *)&g_conf + m->m_off;
|
|
int32_t totLen = 0;
|
|
bool goodParm = true;
|
|
int32_t tcnt = sp->cnt;
|
|
while ( tcnt ) {
|
|
goodParm = (goodParm && 0 == strcmp( sVal, sConf ));
|
|
int32_t len = gbstrlen( sVal );
|
|
totLen += len + 1;
|
|
// inc ser value by len of str + NULL
|
|
sVal += len + 1;
|
|
// inc conf ptr by size of strings
|
|
sConf += m->m_size;
|
|
tcnt--;
|
|
}
|
|
if ( goodParm ) {
|
|
// . inc by sizeof rec and tot len of compacted array
|
|
*p += sizeof( *sp ) + totLen;
|
|
return;
|
|
}
|
|
// parms don't match
|
|
sVal = sp->val;
|
|
sConf = (char *)&g_conf + m->m_off;
|
|
totLen = 0;
|
|
tcnt = sp->cnt;
|
|
while ( tcnt ) {
|
|
// copy an array value to this parm
|
|
strcpy( sConf, sVal );
|
|
int32_t len = gbstrlen( sVal );
|
|
totLen += len + 1; // incl the NULL
|
|
// inc conf ptr by size of strings
|
|
sConf += m->m_size;
|
|
// inc ser value by len of str + NULL
|
|
sVal += len + 1;
|
|
tcnt--;
|
|
}
|
|
|
|
// set num of member
|
|
if ( sp->off ) {
|
|
int32_t *tmp = (int32_t *)((char *)&g_conf + sp->off);
|
|
*tmp = sp->num;
|
|
}
|
|
|
|
// log the changed parm
|
|
log( LOG_INFO, "admin: Parm "
|
|
"#%" INT32 " \"%s\" (\"%s\") in conf "
|
|
"changed on sync.",
|
|
sp->i, m->m_cgi, m->m_title );
|
|
|
|
*confChgd = true;
|
|
|
|
// inc by sizeof rec and tot len of compacted array
|
|
*p += sizeof( *sp ) + totLen;
|
|
}
|
|
else {
|
|
bool goodParm = ( 0 == memcmp( sp->val,
|
|
(char *)&g_conf + m->m_off,
|
|
sp->size ) );
|
|
if ( ! goodParm ) {
|
|
// copy the new parm to m's loc
|
|
gbmemcpy( (char *)&g_conf + m->m_off, sp->val,
|
|
sp->size );
|
|
|
|
// set num of member
|
|
if ( sp->off ) {
|
|
int32_t *tmp = (int32_t *)((char *)&g_conf
|
|
+ sp->off);
|
|
*tmp = sp->num;
|
|
}
|
|
|
|
// log the changed parm
|
|
log( LOG_INFO, "admin: Parm "
|
|
"#%" INT32 " \"%s\" (\"%s\") in conf "
|
|
"changed on sync.",
|
|
sp->i, m->m_cgi, m->m_title );
|
|
|
|
*confChgd = true;
|
|
}
|
|
// increase by rec size and size of parm
|
|
*p += sizeof( *sp ) + sp->size;
|
|
}
|
|
}
|
|
|
|
void Parms::deserializeCollParm( CollectionRec *cr,
|
|
Parm *m, SerParm *sp, char **p ) {
|
|
if ( m->m_off + sp->size > (int32_t)sizeof(CollectionRec) ||
|
|
m->m_off + sp->size < 0 ) {
|
|
log(LOG_WARN, "admin: deserializing parm would overflow "
|
|
"the collection rec!");
|
|
char *xx =0; *xx = 0;
|
|
}
|
|
if ( sp->size == 0 ) { // strings
|
|
char *sVal = sp->val; // the sent string buffer i guess
|
|
char *sColl = (char *)cr + m->m_off; // what we have
|
|
int32_t totLen = 0;
|
|
int32_t tcnt = sp->cnt; // # of strings
|
|
bool goodParm = true;
|
|
while ( tcnt ) {
|
|
|
|
char *pstr;
|
|
if ( m->m_type == TYPE_SAFEBUF ) {
|
|
SafeBuf *sx = (SafeBuf *)sColl;
|
|
pstr = sx->getBuf();
|
|
}
|
|
else {
|
|
pstr = sColl;
|
|
}
|
|
|
|
// set goodParm to true if unchanged
|
|
goodParm= (goodParm && 0 == strcmp(sVal, pstr));
|
|
// get length of what was sent to us
|
|
int32_t len = gbstrlen( sVal );
|
|
totLen += len + 1; //incl NULL
|
|
// this is a list of strings with \0s (sent to us)
|
|
sVal += len + 1; //incl NULL
|
|
// inc by size of strs. point to next string we have
|
|
// stored in our array of strings in CollectionRec.
|
|
// for TYPE_SAFEBUF this size is sizeof(SafeBuf).
|
|
sColl += m->m_size;
|
|
tcnt--;
|
|
}
|
|
// if parm was an exact match return now
|
|
if ( goodParm ) {
|
|
// . inc by sizeof rec and
|
|
// tot len of compacted array
|
|
// . skip the SerParm and following string buffer.
|
|
*p += sizeof( *sp ) + totLen;
|
|
return;
|
|
}
|
|
//
|
|
// if parms don't match, we need to update our stuff
|
|
//
|
|
//
|
|
// point to the sent string buffer
|
|
sVal = sp->val;
|
|
// point to the local parm, array of strings or safebufs
|
|
sColl = (char *)cr + m->m_off;
|
|
totLen = 0;
|
|
// how many strings or safebufs in there?
|
|
tcnt = sp->cnt;
|
|
// loop over each one
|
|
while ( tcnt ) {
|
|
if ( m->m_type == TYPE_SAFEBUF ) {
|
|
SafeBuf *sx = (SafeBuf *)sColl;
|
|
sx->set ( sVal );
|
|
sx->nullTerm ( );
|
|
}
|
|
else {
|
|
// copy an array value to this parm
|
|
strcpy( sColl, sVal );
|
|
}
|
|
// get length of string we copied
|
|
int32_t len = gbstrlen( sVal );
|
|
totLen += len + 1; // +the NULL
|
|
// . inc conf ptr by size
|
|
// of strings
|
|
sColl += m->m_size;
|
|
// . inc ser value by len of str + NULL
|
|
sVal += len + 1;
|
|
tcnt--;
|
|
}
|
|
// we changed the record
|
|
cr->m_needsSave = true;
|
|
|
|
// set num of member
|
|
if ( sp->off ) {
|
|
int32_t *tmp = (int32_t *)((char *)cr + sp->off);
|
|
*tmp = sp->num;
|
|
}
|
|
|
|
// log the changed parm
|
|
log( LOG_INFO, "admin: Parm "
|
|
"#%" INT32 " \"%s\" (\"%s\") in "
|
|
"collection \"%s\" "
|
|
"changed on sync.",
|
|
sp->i, m->m_cgi, m->m_title,
|
|
cr->m_coll );
|
|
|
|
// . inc by sizeof rec and
|
|
// tot len of compacted array
|
|
*p += sizeof( *sp ) + totLen;
|
|
}
|
|
else {
|
|
// sanity
|
|
if ( m->m_type == TYPE_SAFEBUF ) { char *xx=NULL;*xx=0; }
|
|
|
|
if ( 0 != memcmp( sp->val, (char *)cr + m->m_off, sp->size) ) {
|
|
// copy the new value
|
|
gbmemcpy( (char *)cr + m->m_off,
|
|
sp->val,
|
|
sp->size );
|
|
|
|
// set num of member
|
|
if ( sp->off ) {
|
|
int32_t *tmp = (int32_t *)((char *)cr + sp->off);
|
|
*tmp = sp->num;
|
|
}
|
|
|
|
// log the changed parm
|
|
log( LOG_INFO, "admin: Parm "
|
|
"#%" INT32 " \"%s\" (\"%s\") "
|
|
"in collection \"%s\" "
|
|
"changed on sync.",
|
|
sp->i, m->m_cgi,
|
|
m->m_title,
|
|
cr->m_coll );
|
|
|
|
// we changed the record
|
|
cr->m_needsSave = true;
|
|
}
|
|
// inc by rec size and tot len of array
|
|
*p += sizeof( *sp ) + sp->size;
|
|
}
|
|
}
|
|
*/
|
|
|
|
void Parms::init ( ) {
|
|
// initialize the Parms class if we need to, only do it once
|
|
static bool s_init = false ;
|
|
if ( s_init ) return;
|
|
s_init = true ;
|
|
|
|
// default all
|
|
for ( int32_t i = 0 ; i < MAX_PARMS ; i++ ) {
|
|
m_parms[i].m_parmNum= i;
|
|
m_parms[i].m_hash = 0 ;
|
|
m_parms[i].m_title = "" ; // for detecting if not set
|
|
m_parms[i].m_desc = "" ; // for detecting if not set
|
|
m_parms[i].m_cgi = NULL ; // for detecting if not set
|
|
m_parms[i].m_off = -1 ; // for detecting if not set
|
|
// for PAGE_FILTERS url filters for printing the url
|
|
// filter profile parm above the url filters table rows.
|
|
m_parms[i].m_colspan= -1;
|
|
m_parms[i].m_def = NULL ; // for detecting if not set
|
|
m_parms[i].m_defOff = -1; // if default pts to collrec parm
|
|
m_parms[i].m_type = TYPE_NONE ; // for detecting if not set
|
|
m_parms[i].m_page = -1 ; // for detecting if not set
|
|
m_parms[i].m_obj = -1 ; // for detecting if not set
|
|
m_parms[i].m_max = 1 ; // max elements in array
|
|
m_parms[i].m_fixed = 0 ; // size of fixed size array
|
|
m_parms[i].m_size = 0 ; // max string size
|
|
m_parms[i].m_cast = 1 ; // send to all hosts?
|
|
m_parms[i].m_rowid = -1 ; // rowid of -1 means not in row
|
|
m_parms[i].m_addin = 0 ; // add insert row command?
|
|
m_parms[i].m_rdonly = 0 ; // is command off in read-only mode?
|
|
m_parms[i].m_hdrs = 1 ; // assume to always print headers
|
|
m_parms[i].m_perms = 0 ; // same as containing WebPages perms
|
|
m_parms[i].m_plen = -1 ; // offset for strings length
|
|
m_parms[i].m_group = 1 ; // start of a new group of controls?
|
|
m_parms[i].m_priv = 0 ; // is it private?
|
|
m_parms[i].m_save = 1 ; // save to xml file?
|
|
m_parms[i].m_min = -1 ; // min value (for int32_t parms)
|
|
// search fields
|
|
//m_parms[i].m_sparm = 0;
|
|
//m_parms[i].m_scmd = NULL;//"/search";
|
|
//m_parms[i].m_scgi = NULL;// defaults to m_cgi
|
|
m_parms[i].m_flags = 0;
|
|
m_parms[i].m_icon = NULL;
|
|
m_parms[i].m_class = NULL;
|
|
m_parms[i].m_qterm = NULL;
|
|
m_parms[i].m_subMenu= 0;
|
|
m_parms[i].m_spriv = 0;
|
|
// m_sdefo = -1; // just use m_off for this!
|
|
m_parms[i].m_sminc = -1; // min in collection rec
|
|
m_parms[i].m_smaxc = -1; // max in collection rec
|
|
m_parms[i].m_smin = 0x80000000; // 0xffffffff;
|
|
m_parms[i].m_smax = 0x7fffffff;
|
|
//m_parms[i].m_soff = -1; // offset into SearchInput
|
|
m_parms[i].m_sprpg = 1; // propagate to other pages via GET
|
|
m_parms[i].m_sprpp = 1; // propagate to other pages via POST
|
|
m_parms[i].m_sync = true;
|
|
}
|
|
|
|
// inherit perms from page
|
|
//for ( int32_t i = 1 ; i < MAX_PARMS ; i++ )
|
|
// if ( m_parms[i].m_page )
|
|
// m_parms[i].m_perms = m_parms[i-1].m_perms;
|
|
|
|
Parm *m = &m_parms [ 0 ];
|
|
|
|
CollectionRec cr;
|
|
SearchInput si;
|
|
|
|
///////////////////////////////////////////
|
|
// CAN ONLY BE CHANGED IN CONF AT STARTUP (no cgi field)
|
|
///////////////////////////////////////////
|
|
|
|
char *g = (char *)&g_conf;
|
|
char *x = (char *)&cr;
|
|
char *y = (char *)&si;
|
|
|
|
|
|
//////////////
|
|
//
|
|
// now for Pages.cpp printApiForPage() we need these
|
|
//
|
|
//////////////
|
|
|
|
|
|
GigablastRequest gr;
|
|
|
|
InjectionRequest ir;
|
|
|
|
/*
|
|
m->m_title = "delete collection";
|
|
m->m_desc = "A collection name to delete. You can specify multiple "
|
|
"&delColl= parms in the request to delete multiple "
|
|
"collections.";
|
|
m->m_cgi = "delColl";
|
|
m->m_page = PAGE_DELCOLL;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
m->m_flags = 0;//PF_API | PF_REQUIRED;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "delete collection";
|
|
m->m_desc = "A collection name to delete. You can specify multiple "
|
|
"&delColl= parms in the request to delete multiple "
|
|
"collections.";
|
|
// camelcase as opposed to above lowercase
|
|
m->m_cgi = "delcoll";
|
|
m->m_page = PAGE_DELCOLL;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "add collection";
|
|
m->m_desc = "A collection name to add.";
|
|
// camelcase support
|
|
m->m_cgi = "addColl";
|
|
m->m_page = PAGE_ADDCOLL;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "add collection";
|
|
m->m_desc = "A collection name to add.";
|
|
// lowercase support
|
|
m->m_cgi = "addcoll";
|
|
m->m_page = PAGE_ADDCOLL;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_HIDDEN;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Clone settings INTO this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_CLONECOLL;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Use this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_BASIC_STATUS;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Use this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
// do not show in html controls
|
|
m->m_flags = PF_API | PF_REQUIRED | PF_NOHTML;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Use this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
// do not show in html controls
|
|
m->m_flags = PF_API | PF_REQUIRED | PF_NOHTML;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Use this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_SPIDERDB;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
// do not show in html controls
|
|
m->m_flags = PF_API | PF_REQUIRED | PF_NOHTML;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Use this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_SITEDB;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
// do not show in html controls
|
|
m->m_flags = PF_API | PF_REQUIRED | PF_NOHTML;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Inject into this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
// PF_COLLDEFAULT: so it gets set to default coll on html page
|
|
m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
// //
|
|
// // more global-ish parms
|
|
// //
|
|
|
|
// m->m_title = "show settings";
|
|
// m->m_desc = "show settings or values for this page.";
|
|
// m->m_cgi = "showsettings";
|
|
// m->m_page = PAGE_MASTER;
|
|
// m->m_obj = OBJ_NONE;
|
|
// m->m_type = TYPE_BOOL;
|
|
// m->m_def = "1";
|
|
// // do not show in html controls
|
|
// m->m_flags = PF_API | PF_NOHTML;
|
|
// m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
// m++;
|
|
|
|
|
|
|
|
////////////
|
|
//
|
|
// end stuff for printApiForPage()
|
|
//
|
|
////////////
|
|
|
|
// just a comment in the conf file
|
|
m->m_desc =
|
|
"All <, >, \" and # characters that are values for a field "
|
|
"contained herein must be represented as "
|
|
"<, >, " and # respectively.";
|
|
m->m_type = TYPE_COMMENT;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// if the next guy has no description (m_desc) he is assumed to
|
|
// share the description of the previous parm with one.
|
|
/*
|
|
m->m_title = "main external ip";
|
|
m->m_desc = "This is the IP and port that a user connects to in "
|
|
"order to search this Gigablast network. This should be the "
|
|
"same for all gb processes.";
|
|
m->m_off = (char *)&g_conf.m_mainExternalIp - g;
|
|
m->m_def = "127.0.0.1"; // if no default, it is required!
|
|
m->m_type = TYPE_IP;
|
|
m++;
|
|
|
|
m->m_title = "main external port";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_mainExternalPort - g;
|
|
m->m_def = "80";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "indexdb split";
|
|
m->m_desc = "Number of times to split indexdb across groups. "
|
|
"Must be a power of 2.";
|
|
m->m_off = (char *)&g_hostdb.m_indexSplits - g;
|
|
// -1 means to do a full split just based on docid, just like titledb
|
|
m->m_def = "-1"; // "1";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "full indexdb split";
|
|
m->m_desc = "Set to 1 (true) if indexdb is fully split. Performance "
|
|
"is much better for fully split indexes.";
|
|
m->m_off = (char *)&g_conf.m_fullSplit - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m++;
|
|
|
|
m->m_title = "legacy indexdb split";
|
|
m->m_desc = "Set to 1 (true) if using legacy indexdb splitting. For "
|
|
"data generated with farmington release.";
|
|
m->m_off = (char *)&g_conf.m_legacyIndexdbSplit - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m++;
|
|
|
|
m->m_title = "tfndb extension bits";
|
|
m->m_desc = "Number of extension bits to use in Tfndb. Increased for "
|
|
"large indexes.";
|
|
m->m_off = (char *)&g_conf.m_tfndbExtBits - g;
|
|
m->m_def = "7";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "checksumdb key size";
|
|
m->m_desc = "This determines the key size for checksums. "
|
|
"Must be set for every host.";
|
|
//m->m_cgi = "";
|
|
m->m_off = (char *)&g_conf.m_checksumdbKeySize - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "12";
|
|
m++;
|
|
*/
|
|
|
|
// just a comment in the conf file
|
|
m->m_desc =
|
|
"Below the various Gigablast databases are configured.\n"
|
|
"<*dbMaxTreeMem> - mem used for holding new recs\n"
|
|
"<*dbMaxDiskPageCacheMem> - disk page cache mem for this db\n"
|
|
"<*dbMaxCacheMem> - cache mem for holding single recs\n"
|
|
//"<*dbMinFilesToMerge> - required # files to trigger merge\n"
|
|
"<*dbSaveCache> - save the rec cache on exit?\n"
|
|
"<*dbMaxCacheAge> - max age (seconds) for recs in rec cache\n"
|
|
"See that Stats page for record counts and stats.\n";
|
|
m->m_type = TYPE_COMMENT;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns max cache mem";
|
|
m->m_desc = "How many bytes should be used for caching DNS replies?";
|
|
m->m_off = (char *)&g_conf.m_dnsMaxCacheMem - g;
|
|
m->m_def = "128000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// g_dnsDistributed always saves now. main.cpp inits it that way.
|
|
//m->m_title = "dns save cache";
|
|
//m->m_desc = "Should the DNS reply cache be saved/loaded on "
|
|
// "exit/startup?";
|
|
//m->m_off = (char *)&g_conf.m_dnsSaveCache - g;
|
|
//m->m_def = "0";
|
|
//m->m_type = TYPE_BOOL;
|
|
//m++;
|
|
|
|
m->m_title = "tagdb max tree mem";
|
|
m->m_desc = "A tagdb record "
|
|
"assigns a url or site to a ruleset. Each tagdb record is "
|
|
"about 100 bytes or so.";
|
|
m->m_off = (char *)&g_conf.m_tagdbMaxTreeMem - g;
|
|
m->m_def = "1028000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// m->m_title = "tagdb max page cache mem";
|
|
// m->m_desc = "";
|
|
// m->m_off = (char *)&g_conf.m_tagdbMaxDiskPageCacheMem - g;
|
|
// m->m_def = "200000";
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
// m->m_page = PAGE_NONE;
|
|
// m->m_obj = OBJ_CONF;
|
|
// m++;
|
|
|
|
//m->m_title = "tagdb max cache mem";
|
|
//m->m_desc = "";
|
|
//m->m_off = (char *)&g_conf.m_tagdbMaxCacheMem - g;
|
|
//m->m_def = "128000";
|
|
//m->m_type = TYPE_LONG;
|
|
//m++;
|
|
|
|
//m->m_title = "tagdb min files to merge";
|
|
//m->m_desc = "";
|
|
//m->m_off = (char *)&g_conf.m_tagdbMinFilesToMerge - g;
|
|
//m->m_def = "2";
|
|
//m->m_type = TYPE_LONG;
|
|
//m->m_save = 0;
|
|
//m++;
|
|
|
|
m->m_title = "catdb max tree mem";
|
|
m->m_desc = "A catdb record "
|
|
"assigns a url or site to DMOZ categories. Each catdb record "
|
|
"is about 100 bytes.";
|
|
m->m_off = (char *)&g_conf.m_catdbMaxTreeMem - g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// m->m_title = "catdb max page cache mem";
|
|
// m->m_desc = "";
|
|
// m->m_off = (char *)&g_conf.m_catdbMaxDiskPageCacheMem - g;
|
|
// m->m_def = "25000000";
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
// m->m_page = PAGE_NONE;
|
|
// m->m_obj = OBJ_CONF;
|
|
// m++;
|
|
|
|
m->m_title = "catdb max cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_catdbMaxCacheMem - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "catdb min files to merge";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_catdbMinFilesToMerge - g;
|
|
m->m_def = "2";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_save = 0;
|
|
m++;
|
|
|
|
m->m_title = "revdb max tree mem";
|
|
m->m_desc = "Revdb holds the meta list we added for this doc.";
|
|
m->m_off = (char *)&g_conf.m_revdbMaxTreeMem - g;
|
|
m->m_def = "30000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "timedb max tree mem";
|
|
m->m_desc = "Timedb holds event time intervals";
|
|
m->m_off = (char *)&g_conf.m_timedbMaxTreeMem - g;
|
|
m->m_def = "30000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "titledb max tree mem";
|
|
m->m_desc = "Titledb holds the compressed documents that have been "
|
|
"indexed.";
|
|
m->m_off = (char *)&g_conf.m_titledbMaxTreeMem - g;
|
|
m->m_def = "10000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "titledb max cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_titledbMaxCacheMem - g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "titledb max cache age";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_titledbMaxCacheAge - g;
|
|
m->m_def = "86400"; // 1 day
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "titledb save cache";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_titledbSaveCache - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "clusterdb max tree mem";
|
|
m->m_desc = "Clusterdb caches small records for site clustering "
|
|
"and deduping.";
|
|
m->m_off = (char *)&g_conf.m_clusterdbMaxTreeMem - g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "clusterdb max cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_clusterdbMaxCacheMem - g;
|
|
m->m_def = "100000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "clusterdb max page cache mem";
|
|
m->m_desc = "";
|
|
m->m_off =(char *)&g_conf.m_clusterdbMaxDiskPageCacheMem - g;
|
|
m->m_def = "100000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
*/
|
|
|
|
// this is overridden by collection
|
|
m->m_title = "clusterdb min files to merge";
|
|
m->m_desc = "";
|
|
m->m_cgi = "cmftm";
|
|
m->m_off = (char *)&g_conf.m_clusterdbMinFilesToMerge - g;
|
|
//m->m_def = "2";
|
|
m->m_def = "-1"; // -1 means to use collection rec
|
|
m->m_type = TYPE_LONG;
|
|
m->m_save = 0;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "clusterdb save cache";
|
|
m->m_desc = "";
|
|
m->m_cgi = "cdbsc";
|
|
m->m_off = (char *)&g_conf.m_clusterdbSaveCache - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "max vector cache mem";
|
|
m->m_desc = "Max memory for dup vector cache.";
|
|
m->m_off = (char *)&g_conf.m_maxVectorCacheMem - g;
|
|
m->m_def = "10000000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "checksumdb max tree mem";
|
|
m->m_desc = "Checksumdb is used for deduping same-site urls at "
|
|
"index time.";
|
|
m->m_off = (char *)&g_conf.m_checksumdbMaxTreeMem - g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "checksumdb max cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_checksumdbMaxCacheMem - g;
|
|
m->m_def = "2000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "checksumdb max page cache mem";
|
|
m->m_desc = "";
|
|
m->m_off =(char *)&g_conf.m_checksumdbMaxDiskPageCacheMem-g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
// this is overridden by collection
|
|
m->m_title = "checksumdb min files to merge";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_checksumdbMinFilesToMerge- g;
|
|
//m->m_def = "2";
|
|
m->m_def = "-1"; // -1 means to use collection rec
|
|
m->m_type = TYPE_LONG;
|
|
m->m_save = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "tfndb max tree mem";
|
|
m->m_desc = "Tfndb holds small records for each url in Spiderdb or "
|
|
"Titledb.";
|
|
m->m_off = (char *)&g_conf.m_tfndbMaxTreeMem - g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "tfndb max page cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_tfndbMaxDiskPageCacheMem - g;
|
|
m->m_def = "5000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
// this is overridden by collection
|
|
m->m_title = "tfndb min files to merge";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_tfndbMinFilesToMerge - g;
|
|
m->m_def = "2";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_save = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "spiderdb max tree mem";
|
|
m->m_desc = "Spiderdb holds urls to be spidered.";
|
|
m->m_off = (char *)&g_conf.m_spiderdbMaxTreeMem - g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "spiderdb max cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_spiderdbMaxCacheMem - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "spiderdb max page cache mem";
|
|
m->m_desc = "";
|
|
m->m_off =(char *)&g_conf.m_spiderdbMaxDiskPageCacheMem-g;
|
|
m->m_def = "500000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
// this is overridden by collection
|
|
m->m_title = "spiderdb min files to merge";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_spiderdbMinFilesToMerge - g;
|
|
//m->m_def = "2";
|
|
m->m_def = "-1"; // -1 means to use collection rec
|
|
m->m_type = TYPE_LONG;
|
|
m->m_save = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "robotdb max cache mem";
|
|
m->m_desc = "Robotdb caches robot.txt files.";
|
|
m->m_off = (char *)&g_conf.m_robotdbMaxCacheMem - g;
|
|
m->m_def = "128000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "robotdb save cache";
|
|
m->m_cgi = "rdbsc";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_robotdbSaveCache - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "indexdb max tree mem";
|
|
m->m_desc = "Indexdb holds the terms extracted from spidered "
|
|
"documents.";
|
|
m->m_off = (char *)&g_conf.m_indexdbMaxTreeMem - g;
|
|
m->m_def = "10000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "indexdb max cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_indexdbMaxCacheMem - g;
|
|
m->m_def = "5000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "indexdb max page cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_indexdbMaxDiskPageCacheMem - g;
|
|
m->m_def = "50000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
*/
|
|
|
|
// m->m_title = "linkdb max page cache mem";
|
|
// m->m_desc = "";
|
|
// m->m_off = (char *)&g_conf.m_linkdbMaxDiskPageCacheMem - g;
|
|
// m->m_def = "0";
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
// m->m_page = PAGE_NONE;
|
|
// m->m_obj = OBJ_CONF;
|
|
// m++;
|
|
|
|
/*
|
|
// this is overridden by collection
|
|
m->m_title = "indexdb min files to merge";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_indexdbMinFilesToMerge - g;
|
|
//m->m_def = "6";
|
|
m->m_def = "-1"; // -1 means to use collection rec
|
|
m->m_type = TYPE_LONG;
|
|
m->m_save = 0;
|
|
m++;
|
|
|
|
m->m_title = "indexdb max index list age";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_indexdbMaxIndexListAge - g;
|
|
m->m_def = "60";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
//m->m_title = "indexdb truncation limit";
|
|
//m->m_desc = "";
|
|
//m->m_off = (char *)&g_conf.m_indexdbTruncationLimit - g;
|
|
//m->m_def = "50000000";
|
|
//m->m_type = TYPE_LONG;
|
|
//m++;
|
|
|
|
m->m_title = "indexdb save cache";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_indexdbSaveCache - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "datedb max tree mem";
|
|
m->m_desc = "Datedb holds the terms extracted from spidered "
|
|
"documents.";
|
|
m->m_off = (char *)&g_conf.m_datedbMaxTreeMem - g;
|
|
m->m_def = "10000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "datedb max cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_datedbMaxCacheMem - g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
// this is overridden by collection
|
|
m->m_title = "datedb min files to merge";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_datedbMinFilesToMerge - g;
|
|
//m->m_def = "8";
|
|
m->m_def = "-1"; // -1 means to use collection rec
|
|
m->m_type = TYPE_LONG;
|
|
m->m_save = 0;
|
|
m++;
|
|
|
|
m->m_title = "datedb max index list age";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_datedbMaxIndexListAge - g;
|
|
m->m_def = "60";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "datedb save cache";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_datedbSaveCache - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "linkdb max tree mem";
|
|
m->m_desc = "Linkdb stores linking information";
|
|
m->m_off = (char *)&g_conf.m_linkdbMaxTreeMem - g;
|
|
m->m_def = "20000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
// this is overridden by collection
|
|
m->m_title = "linkdb min files to merge";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_linkdbMinFilesToMerge - g;
|
|
m->m_def = "-1"; // -1 means to use collection rec
|
|
m->m_type = TYPE_LONG;
|
|
//m->m_save = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "quota table max mem";
|
|
m->m_desc = "For caching and keeping tabs on exact quotas per "
|
|
"domain without having to do a disk seek. If you are using "
|
|
"exact quotas and see a lot of disk seeks on Indexdb, try "
|
|
"increasing this.";
|
|
m->m_off = (char *)&g_conf.m_quotaTableMaxMem - g;
|
|
m->m_def = "1000000";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "statsdb max tree mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_statsdbMaxTreeMem - g;
|
|
m->m_def = "5000000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "statsdb max cache mem";
|
|
m->m_desc = "";
|
|
m->m_off = (char *)&g_conf.m_statsdbMaxCacheMem - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// m->m_title = "statsdb max disk page cache mem";
|
|
// m->m_desc = "";
|
|
// m->m_off = (char *)&g_conf.m_statsdbMaxDiskPageCacheMem - g;
|
|
// m->m_def = "1000000";
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
// m->m_page = PAGE_NONE;
|
|
// m->m_obj = OBJ_CONF;
|
|
// m++;
|
|
|
|
//m->m_title = "statsdb min files to merge";
|
|
//m->m_desc = "";
|
|
//m->m_off = (char *)&g_conf.m_statsdbMinFilesToMerge - g;
|
|
//m->m_def = "5";
|
|
//m->m_type = TYPE_LONG;
|
|
//m++;
|
|
|
|
|
|
/*
|
|
m->m_title = "use buckets for in memory recs";
|
|
m->m_desc = "Use buckets for in memory recs for indexdb, datedb, "
|
|
"and linkdb.";
|
|
m->m_off = (char *)&g_conf.m_useBuckets - g;
|
|
m->m_def = "1";
|
|
m->m_type = TYPE_BOOL;
|
|
m++;
|
|
*/
|
|
|
|
|
|
m->m_title = "http max send buf size";
|
|
m->m_desc = "Maximum bytes of a doc that can be sent before having "
|
|
"to read more from disk";
|
|
m->m_cgi = "hmsbs";
|
|
m->m_off = (char *)&g_conf.m_httpMaxSendBufSize - g;
|
|
m->m_def = "128000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "search results max cache mem";
|
|
m->m_desc = "Bytes to use for caching search result pages.";
|
|
m->m_off = (char *)&g_conf.m_searchResultsMaxCacheMem - g;
|
|
m->m_def = "100000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_NOSYNC|PF_NOAPI;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
//m->m_title = "search results max cache age";
|
|
//m->m_desc = "Maximum age to cache search results page in seconds.";
|
|
//m->m_off = (char *)&g_conf.m_searchResultsMaxCacheAge - g;
|
|
//m->m_def = "86400";
|
|
//m->m_type = TYPE_LONG;
|
|
//m++;
|
|
|
|
//m->m_title = "search results save cache";
|
|
//m->m_desc = "Should the search results cache be saved to disk?";
|
|
//m->m_off = (char *)&g_conf.m_searchResultsSaveCache - g;
|
|
//m->m_def = "0";
|
|
//m->m_type = TYPE_BOOL;
|
|
//m++;
|
|
|
|
//m->m_title = "site link info max cache mem";
|
|
//m->m_desc = "Bytes to use for site link info data.";
|
|
//m->m_off = (char *)&g_conf.m_siteLinkInfoMaxCacheMem - g;
|
|
//m->m_def = "100000";
|
|
//m->m_type = TYPE_LONG;
|
|
//m++;
|
|
|
|
//m->m_title = "site link info max cache age";
|
|
//m->m_desc = "Maximum age to cache site link info data in seconds.";
|
|
//m->m_off = (char *)&g_conf.m_siteLinkInfoMaxCacheAge - g;
|
|
//m->m_def = "3600";
|
|
//m->m_type = TYPE_LONG;
|
|
//m++;
|
|
|
|
//m->m_title = "site link info save cache";
|
|
//m->m_desc = "Should the site link info cache be saved to disk?";
|
|
//m->m_off = (char *)&g_conf.m_siteLinkInfoSaveCache - g;
|
|
//m->m_def = "0";
|
|
//m->m_type = TYPE_BOOL;
|
|
//m++;
|
|
|
|
//m->m_title = "site quality max cache mem";
|
|
//m->m_desc = "Bytes to use for site or root page quality.";
|
|
//m->m_off = (char *)&g_conf.m_siteQualityMaxCacheMem - g;
|
|
//m->m_def = "2000000"; // 2MB
|
|
//m->m_type = TYPE_LONG;
|
|
//m++;
|
|
|
|
//m->m_title = "site quality save cache";
|
|
//m->m_desc = "Should the site link info cache be saved to disk?";
|
|
//m->m_off = (char *)&g_conf.m_siteQualitySaveCache - g;
|
|
//m->m_def = "0";
|
|
//m->m_type = TYPE_BOOL;
|
|
//m++;
|
|
|
|
//m->m_title = "max incoming links to sample";
|
|
//m->m_desc = "Max linkers to a doc that are sampled to determine "
|
|
// "quality and for gathering link text.";
|
|
//m->m_off = (char *)&g_conf.m_maxIncomingLinksToSample - g;
|
|
//m->m_def = "100";
|
|
//m->m_type = TYPE_LONG;
|
|
//m++;
|
|
|
|
//m->m_title = "allow async signals";
|
|
//m->m_desc = "Allow software interrupts?";
|
|
//m->m_off = (char *)&g_conf.m_allowAsyncSignals - g;
|
|
//m->m_def = "1";
|
|
//m->m_type = TYPE_BOOL;
|
|
//m++;
|
|
|
|
/*
|
|
m->m_title = "qa build mode";
|
|
m->m_desc = "When on Msg13.cpp saves docs in the qatest123 coll "
|
|
"to qa/ subdir, when off "
|
|
"if downloading a doc for qatest123 coll and not in "
|
|
"qa subdir then it returns a 404.";
|
|
m->m_cgi = "qabuildmode";
|
|
m->m_off = (char *)&g_conf.m_qaBuildMode - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_flags = PF_NOAPI | PF_HIDDEN;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "read only mode";
|
|
m->m_desc = "Read only mode does not allow spidering.";
|
|
m->m_cgi = "readonlymode";
|
|
m->m_off = (char *)&g_conf.m_readOnlyMode - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
/*
|
|
Disable this until it works.
|
|
m->m_title = "use merge token";
|
|
m->m_desc = "Restrict merging to one host per token group? Hosts "
|
|
"that use the same disk and mirror hosts are generally in the "
|
|
"same token group so that only one host in the group can be "
|
|
"doing a merge at a time. This prevents query response time "
|
|
"from suffering too much.";
|
|
m->m_off = (char *)&g_conf.m_useMergeToken - g;
|
|
m->m_def = "1";
|
|
m->m_type = TYPE_BOOL;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "do spell checking";
|
|
m->m_desc = "Spell check using the dictionary. Will be available "
|
|
"again soon.";
|
|
m->m_off = (char *)&g_conf.m_doSpellChecking - g;
|
|
m->m_cgi = "dospellchecking";
|
|
m->m_def = "1";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "do narrow search";
|
|
m->m_desc = "give narrow search suggestions.";
|
|
m->m_off = (char *)&g_conf.m_doNarrowSearch - g;
|
|
m->m_cgi = "donarrowsearch";
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
///////////////////////////////////////////
|
|
// BASIC SETTINGS
|
|
///////////////////////////////////////////
|
|
|
|
m->m_title = "spidering enabled";
|
|
m->m_desc = "Pause and resumes spidering for this collection.";
|
|
m->m_cgi = "bcse";
|
|
m->m_off = (char *)&cr.m_spideringEnabled - x;
|
|
m->m_page = PAGE_BASIC_SETTINGS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_DUP|PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "site list";
|
|
m->m_xml = "siteList";
|
|
m->m_desc = "List of sites to spider, one per line. "
|
|
"See <a href=#examples>example site list</a> below. "
|
|
"<br>"
|
|
"<br>"
|
|
"Example #1: <b>mysite.com myothersite.com</b>"
|
|
"<br>"
|
|
"<i>This will spider just those two sites.</i>"
|
|
"<br>"
|
|
"<br>"
|
|
"Example #2: <b>seed:dmoz.org</b>"
|
|
"<br>"
|
|
"<i>This will spider the whole web starting with the website "
|
|
"dmoz.org</i>"
|
|
"<br><br>"
|
|
"Gigablast uses the "
|
|
"<a href=/admin/filters#insitelist>insitelist</a> "
|
|
"directive on "
|
|
"the <a href=/admin/filters>url filters</a> "
|
|
"page to make sure that the spider only indexes urls "
|
|
"that match the site patterns you specify here, other than "
|
|
"urls you add individually via the add urls or inject url "
|
|
"tools. "
|
|
"Limit list to 300MB. If you have a lot of INDIVIDUAL urls "
|
|
"to add then consider using the <a href=/admin/addurl>add "
|
|
"urls</a> interface.";
|
|
m->m_cgi = "sitelist";
|
|
m->m_off = (char *)&cr.m_siteListBuf - x;
|
|
m->m_page = PAGE_BASIC_SETTINGS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_func = CommandUpdateSiteList;
|
|
m->m_def = "";
|
|
// rebuild urlfilters now will nuke doledb and call updateSiteList()
|
|
m->m_flags = PF_TEXTAREA | PF_DUP | PF_REBUILDURLFILTERS;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "spider sites";
|
|
m->m_desc = "Attempt to spider and index urls in the "
|
|
"\"site patterns\" above. Saves you from having to add "
|
|
"the same list of sites on the <a href=/admin/addurl>"
|
|
"add url</a> page.";
|
|
m->m_cgi = "spiderToo";
|
|
m->m_off = (char *)&cr.m_spiderToo - x;
|
|
m->m_page = PAGE_BASIC_SETTINGS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_NOSAVE | PF_DUP;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
// the new upload post submit button
|
|
m->m_title = "upload site list";
|
|
m->m_desc = "Upload your file of site patterns. Completely replaces "
|
|
"the site list in the text box above.";
|
|
m->m_cgi = "uploadsitelist";
|
|
m->m_page = PAGE_BASIC_SETTINGS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_off = 0;
|
|
m->m_def = NULL;
|
|
m->m_type = TYPE_FILEUPLOADBUTTON;
|
|
m->m_flags = PF_NOSAVE | PF_DUP;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "restart collection";
|
|
m->m_desc = "Remove all documents from the collection and re-add "
|
|
"seed urls from site list.";
|
|
// If you do this accidentally there "
|
|
//"is a <a href=/faq.html#recover>recovery procedure</a> to "
|
|
// "get back the trashed data.";
|
|
m->m_cgi = "restart";
|
|
m->m_page = PAGE_BASIC_SETTINGS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func2 = CommandRestartColl;
|
|
m++;
|
|
|
|
///////////////////////////////////////////
|
|
// SITE LIST
|
|
///////////////////////////////////////////
|
|
|
|
/*
|
|
m->m_title = "spider sites";
|
|
m->m_desc = "Attempt to spider and index urls in the "
|
|
"\"site patterns\" above. Saves you from having to add "
|
|
"the same list of sites on the <a href=/admin/addurl>"
|
|
"add url</a> page.";
|
|
m->m_cgi = "spiderToo";
|
|
m->m_off = (char *)&cr.m_spiderToo - x;
|
|
m->m_page = PAGE_SITES;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_NOSAVE ;
|
|
m++;
|
|
*/
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// SYNC CONTROLS
|
|
///////////////////////////////////////////
|
|
/*
|
|
|
|
m->m_title = "sync enabled";
|
|
m->m_desc = "Turn data synchronization on or off. When a host comes "
|
|
"up he will perform an incremental synchronization with a "
|
|
"twin if he detects that he was unable to save his data "
|
|
"when he last exited.";
|
|
m->m_cgi = "sye";
|
|
m->m_off = (char *)&g_conf.m_syncEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SYNC;
|
|
m++;
|
|
|
|
m->m_title = "dry run";
|
|
m->m_desc = "Should Gigablast just run through and log the changes "
|
|
"it would make without actually making them?";
|
|
m->m_cgi = "sdr";
|
|
m->m_off = (char *)&g_conf.m_syncDryRun - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "sync indexdb";
|
|
m->m_desc = "Turn data synchronization on or off for indexdb. "
|
|
"Indexdb holds the index information.";
|
|
m->m_cgi = "si";
|
|
m->m_off = (char *)&g_conf.m_syncIndexdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m++;
|
|
|
|
m->m_title = "sync logging";
|
|
m->m_desc = "Log fixes?";
|
|
m->m_cgi = "slf";
|
|
m->m_off = (char *)&g_conf.m_syncLogging - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "union titledb and spiderdb";
|
|
m->m_desc = "If a host being sync'd has a title record (cached web "
|
|
"page) that the "
|
|
"remote host does not, normally, it would be deleted. "
|
|
"But if this is true then it is kept. "
|
|
"Useful for reducing title rec not found errors.";
|
|
m->m_cgi = "sdu";
|
|
m->m_off = (char *)&g_conf.m_syncDoUnion - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "force out of sync";
|
|
m->m_desc = "Forces this host to be out of sync.";
|
|
m->m_cgi = "foos";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandForceOutOfSync;
|
|
m->m_cast = 0;
|
|
m++;
|
|
|
|
m->m_title = "bytes per second";
|
|
m->m_desc = "How many bytes to read per second for syncing. "
|
|
"Decrease to reduce impact of syncing on query "
|
|
"response time.";
|
|
m->m_cgi = "sbps";
|
|
m->m_off = (char *)&g_conf.m_syncBytesPerSecond - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10000000";
|
|
m->m_units = "bytes";
|
|
m++;
|
|
*/
|
|
|
|
/////////////////////
|
|
//
|
|
// DIFFBOT CRAWLBOT PARMS
|
|
//
|
|
//////////////////////
|
|
|
|
///////////
|
|
//
|
|
// DO NOT INSERT parms above here, unless you set
|
|
// m_obj = OBJ_COLL !!! otherwise it thinks it belongs to
|
|
// OBJ_CONF as used in the above parms.
|
|
//
|
|
///////////
|
|
|
|
m->m_cgi = "dbtoken";
|
|
m->m_xml = "diffbotToken";
|
|
m->m_off = (char *)&cr.m_diffbotToken - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "createdtime";
|
|
m->m_xml = "collectionCreatedTime";
|
|
m->m_desc = "Time when this collection was created, or time of "
|
|
"the last reset or restart.";
|
|
m->m_off = (char *)&cr.m_diffbotCrawlStartTime - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_NOAPI;//PF_DIFFBOT; no i want to saveToXml
|
|
m++;
|
|
|
|
m->m_cgi = "spiderendtime";
|
|
m->m_xml = "crawlEndTime";
|
|
m->m_desc = "If spider is done, when did it finish.";
|
|
m->m_off = (char *)&cr.m_diffbotCrawlEndTime - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_NOAPI;//PF_DIFFBOT; no i want to saveToXml
|
|
m++;
|
|
|
|
m->m_cgi = "dbcrawlname";
|
|
m->m_xml = "diffbotCrawlName";
|
|
m->m_off = (char *)&cr.m_diffbotCrawlName - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "notifyEmail";
|
|
m->m_title = "notify email";
|
|
m->m_xml = "notifyEmail";
|
|
m->m_off = (char *)&cr.m_notifyEmail - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "notifyWebhook";
|
|
m->m_xml = "notifyWebhook";
|
|
m->m_title = "notify webhook";
|
|
m->m_off = (char *)&cr.m_notifyUrl - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_DIFFBOT;
|
|
m++;
|
|
|
|
// collective respider frequency (for pagecrawlbot.cpp)
|
|
m->m_title = "collective respider frequency (days)";
|
|
m->m_cgi = "repeat";
|
|
m->m_xml = "collectiveRespiderFrequency";
|
|
m->m_off = (char *)&cr.m_collectiveRespiderFrequency - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.0"; // 0.0
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_units = "days";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_title = "collective crawl delay (seconds)";
|
|
m->m_cgi = "crawlDelay";
|
|
m->m_xml = "collectiveCrawlDelay";
|
|
m->m_off = (char *)&cr.m_collectiveCrawlDelay - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = ".250"; // 250 ms
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m->m_units = "seconds";
|
|
m++;
|
|
|
|
m->m_cgi = "urlCrawlPattern";
|
|
m->m_xml = "diffbotUrlCrawlPattern";
|
|
m->m_title = "url crawl pattern";
|
|
m->m_off = (char *)&cr.m_diffbotUrlCrawlPattern - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "urlProcessPattern";
|
|
m->m_xml = "diffbotUrlProcessPattern";
|
|
m->m_title = "url process pattern";
|
|
m->m_off = (char *)&cr.m_diffbotUrlProcessPattern - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "pageProcessPattern";
|
|
m->m_xml = "diffbotPageProcessPattern";
|
|
m->m_title = "page process pattern";
|
|
m->m_off = (char *)&cr.m_diffbotPageProcessPattern - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "urlCrawlRegEx";
|
|
m->m_xml = "diffbotUrlCrawlRegEx";
|
|
m->m_title = "url crawl regex";
|
|
m->m_off = (char *)&cr.m_diffbotUrlCrawlRegEx - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "urlProcessRegEx";
|
|
m->m_xml = "diffbotUrlProcessRegEx";
|
|
m->m_title = "url process regex";
|
|
m->m_off = (char *)&cr.m_diffbotUrlProcessRegEx - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "maxHops";
|
|
m->m_xml = "diffbotHopcount";
|
|
m->m_title = "diffbot max hopcount";
|
|
m->m_off = (char *)&cr.m_diffbotMaxHops - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "-1";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "onlyProcessIfNew";
|
|
m->m_xml = "diffbotOnlyProcessIfNew";
|
|
m->m_title = "onlyProcessIfNew";
|
|
m->m_off = (char *)&cr.m_diffbotOnlyProcessIfNewUrl - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "seeds";
|
|
m->m_xml = "diffbotSeeds";
|
|
m->m_off = (char *)&cr.m_diffbotSeeds - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_DIFFBOT;
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_xml = "isCustomCrawl";
|
|
m->m_off = (char *)&cr.m_isCustomCrawl - x;
|
|
m->m_type = TYPE_CHAR;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_cgi = "isCustomCrawl";
|
|
m->m_def = "0";
|
|
m->m_flags = PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "maxToCrawl";
|
|
m->m_title = "max to crawl";
|
|
m->m_xml = "maxToCrawl";
|
|
m->m_off = (char *)&cr.m_maxToCrawl - x;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "100000";
|
|
m->m_flags = PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "maxToProcess";
|
|
m->m_title = "max to process";
|
|
m->m_xml = "maxToProcess";
|
|
m->m_off = (char *)&cr.m_maxToProcess - x;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "-1";
|
|
m->m_flags = PF_DIFFBOT;
|
|
m++;
|
|
|
|
m->m_cgi = "maxRounds";
|
|
m->m_title = "max crawl rounds";
|
|
m->m_xml = "maxCrawlRounds";
|
|
m->m_off = (char *)&cr.m_maxCrawlRounds - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "-1";
|
|
m->m_flags = PF_DIFFBOT;
|
|
m++;
|
|
|
|
/////////////////////
|
|
//
|
|
// new cmd parms
|
|
//
|
|
/////////////////////
|
|
|
|
|
|
m->m_title = "insert parm row";
|
|
m->m_desc = "insert a row into a parm";
|
|
m->m_cgi = "insert";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func = CommandInsertUrlFiltersRow;
|
|
m->m_cast = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS;
|
|
m++;
|
|
|
|
m->m_title = "remove parm row";
|
|
m->m_desc = "remove a row from a parm";
|
|
m->m_cgi = "remove";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func = CommandRemoveUrlFiltersRow;
|
|
m->m_cast = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS;
|
|
m++;
|
|
|
|
m->m_title = "delete collection";
|
|
m->m_desc = "delete a collection";
|
|
m->m_cgi = "delete";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func2 = CommandDeleteColl;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
m->m_title = "delete collection 2";
|
|
m->m_desc = "delete the specified collection";
|
|
m->m_cgi = "delColl";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func2 = CommandDeleteColl2;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
m->m_title = "delete collection";
|
|
m->m_desc = "Delete the specified collection. You can specify "
|
|
"multiple &delcoll= parms in a single request to delete "
|
|
"multiple collections at once.";
|
|
// lowercase as opposed to camelcase above
|
|
m->m_cgi = "delcoll";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_DELCOLL;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func2 = CommandDeleteColl2;
|
|
m->m_cast = 1;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m++;
|
|
|
|
// arg is the collection # to clone from
|
|
m->m_title = "clone collection";
|
|
m->m_desc = "Clone collection settings FROM this collection.";
|
|
m->m_cgi = "clonecoll";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_CLONECOLL;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func = CommandCloneColl;
|
|
m->m_cast = 1;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m++;
|
|
|
|
m->m_title = "add collection";
|
|
m->m_desc = "add a new collection";
|
|
// camelcase support
|
|
m->m_cgi = "addColl";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func = CommandAddColl0;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
m->m_title = "add collection";
|
|
m->m_desc = "Add a new collection with this name. No spaces "
|
|
"allowed or strange characters allowed. Max of 64 characters.";
|
|
// lower case support
|
|
m->m_cgi = "addcoll";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_ADDCOLL;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func = CommandAddColl0;
|
|
m->m_cast = 1;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m++;
|
|
|
|
//
|
|
// CLOUD SEARCH ENGINE SUPPORT
|
|
//
|
|
// used to prevent a guest ip adding more than one coll
|
|
m->m_title = "user ip";
|
|
m->m_desc = "IP of user adding collection.";
|
|
m->m_cgi = "userip";
|
|
m->m_xml = "userIp";
|
|
m->m_off = (char *)&cr.m_userIp - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 16;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN;// | PF_NOSAVE;
|
|
m->m_page = PAGE_ADDCOLL;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "add custom crawl";
|
|
m->m_desc = "add custom crawl";
|
|
m->m_cgi = "addCrawl";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func = CommandAddColl1;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
m->m_title = "add bulk job";
|
|
m->m_desc = "add bulk job";
|
|
m->m_cgi = "addBulk";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func = CommandAddColl2;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
m->m_title = "in sync";
|
|
m->m_desc = "signify in sync with host 0";
|
|
m->m_cgi = "insync";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func = CommandInSync;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// SEARCH CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
|
|
//m->m_title = "allow RAID style list intersection";
|
|
//m->m_desc = "Allow using RAID style lookup for intersecting term "
|
|
// "lists and getting docIds for queries.";
|
|
//m->m_cgi = "uraid";
|
|
//m->m_off = (char *)&cr.m_allowRaidLookup - x;
|
|
//m->m_type = TYPE_BOOL;
|
|
//m->m_def = "0";
|
|
//m++;
|
|
|
|
//m->m_title = "allow RAIDed term list read";
|
|
//m->m_desc = "Allow splitting up the term list read for large lists "
|
|
// "amongst twins.";
|
|
//m->m_cgi = "ulraid";
|
|
//m->m_off = (char *)&cr.m_allowRaidListRead - x;
|
|
//m->m_type = TYPE_BOOL;
|
|
//m->m_def = "0";
|
|
//m->m_group = 0;
|
|
//m++;
|
|
|
|
//m->m_title = "max RAID mercenaries";
|
|
//m->m_desc = "Max number of mercenaries to use in RAID lookup and "
|
|
// "intersection.";
|
|
//m->m_cgi = "raidm";
|
|
//m->m_off = (char *)&cr.m_maxRaidMercenaries - x;
|
|
//m->m_type = TYPE_LONG;
|
|
//m->m_def = "2";
|
|
//m->m_group = 0;
|
|
//m++;
|
|
|
|
//m->m_title = "min term list size to RAID";
|
|
//m->m_desc = "Term list size to begin doing term list RAID";
|
|
//m->m_cgi = "raidsz";
|
|
//m->m_off = (char *)&cr.m_minRaidListSize - x;
|
|
//m->m_type = TYPE_LONG;
|
|
//m->m_def = "1000000";
|
|
//m->m_group = 0;
|
|
//m++;
|
|
|
|
m->m_title = "restrict indexdb for queries";
|
|
m->m_desc = "If this is true Gigablast will only search the root "
|
|
"index file for docIds. Saves on disk seeks, "
|
|
"but may use older versions of indexed web pages.";
|
|
m->m_cgi = "riq";
|
|
m->m_off = (char *)&cr.m_restrictIndexdbForQuery - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "0";
|
|
//m->m_sparm = 1;
|
|
//m->m_scgi = "ri";
|
|
//m->m_soff = (char *)&si.m_restrictIndexdbForQuery - y;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
|
|
m->m_title = "restrict indexdb for xml feed";
|
|
m->m_desc = "Like above, but specifically for XML feeds.";
|
|
m->m_cgi = "rix";
|
|
m->m_off = (char *)&cr.m_restrictIndexdbForXML - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
//m->m_title = "restrict indexdb for queries in xml feed";
|
|
//m->m_desc = "Same as above, but just for the XML feed.";
|
|
//m->m_cgi = "riqx";
|
|
//m->m_off = (char *)&cr.m_restrictIndexdbForQueryRaw - x;
|
|
//m->m_type = TYPE_BOOL;
|
|
//m->m_def = "1";
|
|
//m->m_group = 0;
|
|
//m++;
|
|
|
|
m->m_title = "read from cache by default";
|
|
m->m_desc = "Should we read search results from the cache? Set "
|
|
"to false to fix dmoz bug.";
|
|
m->m_cgi = "rcd";
|
|
m->m_off = (char *)&cr.m_rcache - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "fast results";
|
|
m->m_desc = "Use &fast=1 to obtain search results from the much "
|
|
"faster Gigablast index, although the results are not "
|
|
"searched as thoroughly.";
|
|
m->m_obj = OBJ_SI;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_off = (char *)&si.m_query - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_def = "0";
|
|
m->m_cgi = "fast";
|
|
//m->m_size = MAX_QUERY_LEN;
|
|
m->m_flags = PF_COOKIE | PF_WIDGET_PARM | PF_API;
|
|
m++;
|
|
|
|
|
|
m->m_title = "query";
|
|
m->m_desc = "The query to perform. See <a href=/help.html>help</a>. "
|
|
"See the <a href=#qops>query operators</a> below for "
|
|
"more info.";
|
|
m->m_obj = OBJ_SI;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_off = (char *)&si.m_query - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_cgi = "q";
|
|
//m->m_size = MAX_QUERY_LEN;
|
|
m->m_flags = PF_REQUIRED | PF_COOKIE | PF_WIDGET_PARM | PF_API;
|
|
m++;
|
|
|
|
// m->m_title = "query2";
|
|
// m->m_desc = "The query on which to score inlinkers.";
|
|
// m->m_obj = OBJ_SI;
|
|
// m->m_page = PAGE_NONE;
|
|
// m->m_off = (char *)&si.m_query2 - y;
|
|
// m->m_type = TYPE_CHARPTR;//STRING;
|
|
// m->m_cgi = "qq";
|
|
// m->m_size = MAX_QUERY_LEN;
|
|
// m->m_sprpg = 0; // do not store query, needs to be last so related
|
|
// m->m_sprpp = 0; // topics can append to it
|
|
// m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
// m++;
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Search this collection. Use multiple collection names "
|
|
"separated by a whitespace to search multiple collections at "
|
|
"once.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m->m_off = (char *)&si.m_coll - y;
|
|
m++;
|
|
|
|
m->m_title = "number of results per query";
|
|
m->m_desc = "The number of results returned per page.";
|
|
// make it 25 not 50 since we only have like 26 balloons
|
|
m->m_def = "10";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_off = (char *)&si.m_docsWanted - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_cgi = "n";
|
|
m->m_flags = PF_WIDGET_PARM | PF_API;
|
|
m->m_smin = 0;
|
|
m++;
|
|
|
|
|
|
m->m_title = "first result num";
|
|
m->m_desc = "Start displaying at search result #X. Starts at 0.";
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_off = (char *)&si.m_firstResultNum - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_cgi = "s";
|
|
m->m_smin = 0;
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_flags = PF_REDBOX;
|
|
m++;
|
|
|
|
m->m_title = "show errors";
|
|
m->m_desc = "Show errors from generating search result summaries "
|
|
"rather than just hide the docid. Useful for debugging.";
|
|
m->m_cgi = "showerrors";
|
|
m->m_off = (char *)&si.m_showErrors - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "site cluster";
|
|
m->m_desc = "Should search results be site clustered? This "
|
|
"limits each site to appearing at most twice in the "
|
|
"search results. Sites are subdomains for the most part, "
|
|
"like abc.xyz.com.";
|
|
m->m_cgi = "sc";
|
|
m->m_off = (char *)&si.m_doSiteClustering - y;
|
|
m->m_defOff= (char *)&cr.m_siteClusterByDefault - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "hide all clustered results";
|
|
m->m_desc = "Only display at most one result per site.";
|
|
m->m_cgi = "hacr";
|
|
m->m_off = (char *)&si.m_hideAllClustered - y;
|
|
m->m_defOff= (char *)&cr.m_hideAllClustered - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
|
|
m->m_title = "dedup results";
|
|
m->m_desc = "Should duplicate search results be removed? This is "
|
|
"based on a content hash of the entire document. "
|
|
"So documents must be exactly the same for the most part.";
|
|
m->m_cgi = "dr"; // dedupResultsByDefault";
|
|
m->m_off = (char *)&si.m_doDupContentRemoval - y;
|
|
m->m_defOff= (char *)&cr.m_dedupResultsByDefault - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 1;
|
|
m->m_cgi = "dr";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "percent similar dedup summary";
|
|
m->m_desc = "If document summary (and title) are "
|
|
"this percent similar "
|
|
"to a document summary above it, then remove it from the "
|
|
"search results. 100 means only to remove if exactly the "
|
|
"same. 0 means no summary deduping. You must also supply "
|
|
"dr=1 for this to work.";
|
|
m->m_cgi = "pss";
|
|
m->m_off = (char *)&si.m_percentSimilarSummary - y;
|
|
m->m_defOff= (char *)&cr.m_percentSimilarSummary - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m->m_smin = 0;
|
|
m->m_smax = 100;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "dedup URLs";
|
|
m->m_desc = "Should we dedup URLs with case insensitivity? This is "
|
|
"mainly to correct duplicate wiki pages.";
|
|
m->m_cgi = "ddu";
|
|
m->m_off = (char *)&si.m_dedupURL - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "do spell checking";
|
|
m->m_desc = "If enabled while using the XML feed, "
|
|
"when Gigablast finds a spelling recommendation it will be "
|
|
"included in the XML <spell> tag. Default is 0 if using an "
|
|
"XML feed, 1 otherwise. Will be available again soon.";
|
|
m->m_cgi = "spell";
|
|
m->m_off = (char *)&si.m_spellCheck - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
m->m_title = "stream search results";
|
|
m->m_desc = "Stream search results back on socket as they arrive. "
|
|
"Useful when thousands/millions of search results are "
|
|
"requested. Required when doing such things otherwise "
|
|
"Gigablast could run out of memory. Only supported for "
|
|
"JSON and XML formats, not HTML.";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_off = (char *)&si.m_streamResults - y;
|
|
m->m_type = TYPE_CHAR;
|
|
m->m_def = "0";
|
|
m->m_cgi = "stream";
|
|
m->m_flags = PF_API;
|
|
m->m_sprpg = 0; // propagate to next 10
|
|
m->m_sprpp = 0;
|
|
m++;
|
|
|
|
m->m_title = "seconds back";
|
|
m->m_desc = "Limit results to pages spidered this many seconds ago. "
|
|
"Use 0 to disable.";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_off = (char *)&si.m_secsBack - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_cgi = "secsback";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
m->m_title = "sort by";
|
|
m->m_desc = "Use 0 to sort results by relevance, 1 to sort by "
|
|
"most recent spider date down, and 2 to sort by oldest "
|
|
"spidered results first.";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_off = (char *)&si.m_sortBy - y;
|
|
m->m_type = TYPE_CHAR;
|
|
m->m_def = "0"; // this means relevance
|
|
m->m_cgi = "sortby";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
m->m_title = "filetype";
|
|
m->m_desc = "Restrict results to this filetype. Supported "
|
|
"filetypes are pdf, doc, html xml, json, xls.";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_off = (char *)&si.m_filetype - y;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = "";
|
|
m->m_cgi = "filetype";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
m->m_title = "get scoring info";
|
|
m->m_desc = "Get scoring information for each result so you "
|
|
"can see how each result is scored. You must explicitly "
|
|
"request this using &scores=1 for the XML feed because it "
|
|
"is not included by default.";
|
|
m->m_cgi = "scores"; // dedupResultsByDefault";
|
|
m->m_off = (char *)&si.m_getDocIdScoringInfo - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API;
|
|
// get default from collectionrec item
|
|
m->m_defOff= (char *)&cr.m_getDocIdScoringInfo - x;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "do query expansion";
|
|
m->m_desc = "If enabled, query expansion will expand your query "
|
|
"to include the various forms and "
|
|
"synonyms of the query terms.";
|
|
m->m_off = (char *)&si.m_queryExpansion - y;
|
|
m->m_defOff= (char *)&cr.m_queryExpansion - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "qe";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
// more general parameters
|
|
m->m_title = "max search results";
|
|
m->m_desc = "What is the maximum total number "
|
|
"of returned search results.";
|
|
m->m_cgi = "msr";
|
|
m->m_off = (char *)&cr.m_maxSearchResults - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1000";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max search results per query";
|
|
m->m_desc = "What is the limit to the total number "
|
|
"of returned search results per query?";
|
|
m->m_cgi = "msrpq";
|
|
m->m_off = (char *)&cr.m_maxSearchResultsPerQuery - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_flags = 0;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max search results for paying clients";
|
|
m->m_desc = "What is the limit to the total number "
|
|
"of returned search results for clients.";
|
|
m->m_cgi = "msrfpc";
|
|
m->m_off = (char *)&cr.m_maxSearchResultsForClients - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1000";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max search results per query for paying clients";
|
|
m->m_desc = "What is the limit to the total number "
|
|
"of returned search results per query for paying clients? "
|
|
"Auto ban must be enabled for this to work.";
|
|
m->m_cgi = "msrpqfc";
|
|
m->m_off = (char *)&cr.m_maxSearchResultsPerQueryForClients - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1000";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "user ip";
|
|
m->m_desc = "The ip address of the searcher. We can pass back "
|
|
"for use in the autoban technology which bans abusive IPs.";
|
|
m->m_obj = OBJ_SI;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_off = (char *)&si.m_userIpStr - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_cgi = "uip";
|
|
m->m_flags = PF_COOKIE | PF_WIDGET_PARM | PF_API;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "use min ranking algo";
|
|
m->m_desc = "Should search results be ranked using this algo?";
|
|
//m->m_cgi = "uma";
|
|
//m->m_off = (char *)&cr.m_siteClusterByDefault - x;
|
|
m->m_off = (char *)&si.m_useMinAlgo - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
// seems, good, default it on
|
|
m->m_def = "1";
|
|
m->m_cgi = "uma";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
|
|
|
|
// limit to this # of the top term pairs from inlink text whose
|
|
// score is accumulated
|
|
m->m_title = "real max top";
|
|
m->m_desc = "Only score up to this many inlink text term pairs";
|
|
m->m_off = (char *)&si.m_realMaxTop - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_def = "10";
|
|
m->m_cgi = "rmt";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
|
|
m->m_title = "use new ranking algo";
|
|
m->m_desc = "Should search results be ranked using this new algo?";
|
|
m->m_off = (char *)&si.m_useNewAlgo - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
// seems, good, default it on
|
|
m->m_def = "1";
|
|
m->m_cgi = "una";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
|
|
m->m_title = "do max score algo";
|
|
m->m_desc = "Quickly eliminated docids using max score algo";
|
|
m->m_off = (char *)&si.m_doMaxScoreAlgo - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_def = "1";
|
|
m->m_cgi = "dmsa";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
|
|
|
|
m->m_title = "use fast intersection algo";
|
|
m->m_desc = "Should we try to speed up search results generation?";
|
|
m->m_off = (char *)&si.m_fastIntersection - y;
|
|
m->m_type = TYPE_CHAR;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
// turn off until we debug
|
|
m->m_def = "-1";
|
|
m->m_cgi = "fi";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
|
|
m->m_title = "max number of facets to return";
|
|
m->m_desc = "Max number of facets to return";
|
|
m->m_off = (char *)&si.m_maxFacets - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "50";
|
|
m->m_group = 1;
|
|
m->m_cgi = "nf";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
// m->m_title = "special query";
|
|
// m->m_desc = "List of docids to restrain results to.";
|
|
// m->m_cgi = "sq";
|
|
// m->m_off = (char *)&si.m_sq - y;
|
|
// m->m_type = TYPE_CHARPTR;
|
|
// m->m_def = NULL;
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "negative docids";
|
|
// m->m_desc = "List of docids to ignore.";
|
|
// m->m_cgi = "nodocids";
|
|
// m->m_off = (char *)&si.m_noDocIds - y;
|
|
// m->m_type = TYPE_CHARPTR;
|
|
// m->m_def = NULL;
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "negative siteids";
|
|
// m->m_desc = "Whitespace-separated list of 32-bit sitehashes "
|
|
//"to ignore.";
|
|
// m->m_cgi = "nositeids";
|
|
// m->m_off = (char *)&si.m_noSiteIds - y;
|
|
// m->m_type = TYPE_CHARPTR;
|
|
// m->m_def = NULL;
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
m->m_title = "language weight";
|
|
m->m_desc = "Default language weight if document matches quer "
|
|
"language. Use this to give results that match the specified "
|
|
"the specified &qlang higher ranking, or docs whose language "
|
|
"is unnknown. Can be override with "
|
|
"&langw in the query url.";
|
|
m->m_cgi = "langweight";
|
|
m->m_off = (char *)&cr.m_sameLangWeight - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "20.000000";
|
|
m->m_group = 1;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "use language weights";
|
|
m->m_desc = "Use Language weights to sort query results. "
|
|
"This will give results that match the specified &qlang "
|
|
"higher ranking.";
|
|
m->m_cgi = "lsort";
|
|
m->m_off = (char *)&cr.m_enableLanguageSorting - x;
|
|
//m->m_soff = (char *)&si.m_enableLanguageSorting - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 1;
|
|
//m->m_scgi = "lsort";
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "sort language preference";
|
|
m->m_desc = "Default language to use for ranking results. "
|
|
//"This should only be used on limited collections. "
|
|
"Value should be any language abbreviation, for example "
|
|
"\"en\" for English. Use <i>xx</i> to give ranking "
|
|
"boosts to no language in particular. See the language "
|
|
"abbreviations at the bottom of the "
|
|
"<a href=/admin/filters>url filters</a> page.";
|
|
m->m_cgi = "qlang";
|
|
m->m_off = (char *)&si.m_defaultSortLang - y;
|
|
m->m_type = TYPE_CHARPTR;
|
|
//m->m_size = 6; // up to 5 chars + NULL, e.g. "en_US"
|
|
m->m_def = "";//"xx";//_US";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "language weight";
|
|
m->m_desc = "Use this to override the default language weight "
|
|
"for this collection. The default language weight can be "
|
|
"set in the search controls and is usually something like "
|
|
"20.0. Which means that we multiply a result's score by 20 "
|
|
"if from the same language as the query or the language is "
|
|
"unknown.";
|
|
m->m_off = (char *)&si.m_sameLangWeight - y;
|
|
m->m_defOff= (char *)&cr.m_sameLangWeight - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_cgi = "langw";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "sort country preference";
|
|
m->m_desc = "Default country to use for ranking results. "
|
|
//"This should only be used on limited collections. "
|
|
"Value should be any country code abbreviation, for example "
|
|
"\"us\" for United States. This is currently not working.";
|
|
m->m_cgi = "qcountry";
|
|
m->m_off = (char *)&si.m_defaultSortCountry - y;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_size = 2+1;
|
|
m->m_def = "us";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "language method weights";
|
|
m->m_desc = "Language method weights for spider language "
|
|
"detection. A string of ascii numerals that "
|
|
"should default to 895768712";
|
|
m->m_cgi = "lmweights";
|
|
m->m_off = (char *)&cr.m_languageMethodWeights - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 10; // up to 9 chars + NULL
|
|
m->m_def = "894767812";
|
|
m->m_group = 0;
|
|
// m->m_sparm = 1;
|
|
m++;
|
|
|
|
m->m_title = "language detection sensitivity";
|
|
m->m_desc = "Language detection sensitivity. Higher"
|
|
" values mean higher hitrate, but lower accuracy."
|
|
" Suggested values are from 2 to 20";
|
|
m->m_cgi = "lmbailout";
|
|
m->m_off = (char *)&cr.m_languageBailout - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5";
|
|
m->m_group = 0;
|
|
// m->m_sparm = 1;
|
|
m++;
|
|
|
|
m->m_title = "language detection threshold";
|
|
m->m_desc = "Language detection threshold sensitivity."
|
|
" Higher values mean better accuracy, but lower hitrate."
|
|
" Suggested values are from 2 to 20";
|
|
m->m_cgi = "lmthreshold";
|
|
m->m_off = (char *)&cr.m_languageThreshold - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "3";
|
|
m->m_group = 0;
|
|
// m->m_sparm = 1;
|
|
m++;
|
|
|
|
m->m_title = "language detection samplesize";
|
|
m->m_desc = "Language detection size. Higher values"
|
|
" mean more accuracy, but longer processing time."
|
|
" Suggested values are 300-1000";
|
|
m->m_cgi = "lmsamples";
|
|
m->m_off = (char *)&cr.m_languageSamples - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "600";
|
|
m->m_group = 0;
|
|
// m->m_sparm = 1;
|
|
m++;
|
|
|
|
m->m_title = "language detection spider samplesize";
|
|
m->m_desc = "Language detection page sample size. "
|
|
"Higher values mean more accuracy, but longer "
|
|
"spider time."
|
|
" Suggested values are 3000-10000";
|
|
m->m_cgi = "lpsamples";
|
|
m->m_off = (char *)&cr.m_langPageLimit - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "6000";
|
|
m->m_group = 0;
|
|
// m->m_sparm = 1;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "docs to check for post query";
|
|
m->m_desc = "How many search results should we "
|
|
"scan for post query demotion? "
|
|
"0 disables all post query reranking. ";
|
|
m->m_cgi = "pqrds";
|
|
m->m_off = (char *)&si.m_docsToScanForReranking - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 1;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "demotion for foreign languages";
|
|
m->m_desc = "Demotion factor of non-relevant languages. Score "
|
|
"will be penalized by this factor as a percent if "
|
|
"it's language is foreign. "
|
|
"A safe value is probably anywhere from 0.5 to 1. ";
|
|
m->m_cgi = "pqrlang";
|
|
m->m_off = (char *)&cr.m_languageWeightFactor - x;
|
|
//m->m_soff = (char *)&si.m_languageWeightFactor - y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.999";
|
|
m->m_group = 0;
|
|
//m->m_scgi = "pqrlang";
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for unknown languages";
|
|
m->m_desc = "Demotion factor for unknown languages. "
|
|
"Page's score will be penalized by this factor as a percent "
|
|
"if it's language is not known. "
|
|
"A safe value is 0, as these pages will be reranked by "
|
|
"country (see below). "
|
|
"0 means no demotion.";
|
|
m->m_cgi = "pqrlangunk";
|
|
m->m_off = (char *)&cr.m_languageUnknownWeight- x;
|
|
//m->m_soff = (char *)&si.m_languageUnknownWeight- y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.0";
|
|
m->m_group = 0;
|
|
//m->m_scgi = "pqrlangunk";
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages where the country of the page writes "
|
|
"in the same language as the country of the query";
|
|
m->m_desc = "Demotion for pages where the country of the page writes "
|
|
"in the same language as the country of the query. "
|
|
"If query language is the same as the language of the page, "
|
|
"then if a language written in the country of the page matches "
|
|
"a language written by the country of the query, then page's "
|
|
"score will be demoted by this factor as a percent. "
|
|
"A safe range is between 0.5 and 1. ";
|
|
m->m_cgi = "pqrcntry";
|
|
m->m_off = (char *)&cr.m_pqr_demFactCountry - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.98";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for query terms or gigabits in url";
|
|
m->m_desc = "Demotion factor for query terms or gigabits "
|
|
"in a result's url. "
|
|
"Score will be penalized by this factor times the number "
|
|
"of query terms or gigabits in the url divided by "
|
|
"the max value below such that fewer "
|
|
"query terms or gigabits in the url causes the result "
|
|
"to be demoted more heavily, depending on the factor. "
|
|
"Higher factors demote more per query term or gigabit "
|
|
"in the page's url. "
|
|
"Generally, a page may not be demoted more than this "
|
|
"factor as a percent. Also, how it is demoted is "
|
|
"dependent on the max value. For example, "
|
|
"a factor of 0.2 will demote the page 20% if it has no "
|
|
"query terms or gigabits in its url. And if the max value is "
|
|
"10, then a page with 5 query terms or gigabits in its "
|
|
"url will be demoted 10%; and 10 or more query terms or "
|
|
"gigabits in the url will not be demoted at all. "
|
|
"0 means no demotion. "
|
|
"A safe range is from 0 to 0.35. ";
|
|
m->m_cgi = "pqrqttiu";
|
|
m->m_off = (char *)&cr.m_pqr_demFactQTTopicsInUrl - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for pages with query terms or gigabits "
|
|
"in url";
|
|
m->m_desc = "Max number of query terms or gigabits in a url. "
|
|
"Pages with a number of query terms or gigabits in their "
|
|
"urls greater than or equal to this value will not be "
|
|
"demoted. "
|
|
"This controls the range of values expected to represent "
|
|
"the number of query terms or gigabits in a url. It should "
|
|
"be set to or near the estimated max number of query terms "
|
|
"or topics that can be in a url. Setting to a lower value "
|
|
"increases the penalty per query term or gigabit that is "
|
|
"not in a url, but decreases the range of values that "
|
|
"will be demoted.";
|
|
m->m_cgi = "pqrqttium";
|
|
m->m_off = (char *)&cr.m_pqr_maxValQTTopicsInUrl - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages that are not high quality";
|
|
m->m_desc = "Demotion factor for pages that are not high quality. "
|
|
"Score is penalized by this number as a percent times level "
|
|
"of quality. A pqge will be demoted by the formula "
|
|
"(max quality - page's quality) * this factor / the max "
|
|
"value given below. Generally, a page will not be "
|
|
"demoted more than this factor as a percent. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 to 1. ";
|
|
m->m_cgi = "pqrqual";
|
|
m->m_off = (char *)&cr.m_pqr_demFactQual - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for pages that are not high quality";
|
|
m->m_desc = "Max page quality. Pages with a quality level "
|
|
"equal to or higher than this value "
|
|
"will not be demoted. ";
|
|
m->m_cgi = "pqrqualm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValQual - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages that are not "
|
|
"root or have many paths in the url";
|
|
m->m_desc = "Demotion factor each path in the url. "
|
|
"Score will be demoted by this factor as a percent "
|
|
"multiplied by the number of paths in the url divided "
|
|
"by the max value below. "
|
|
"Generally, the page will not be demoted more than this "
|
|
"value as a percent. "
|
|
"0 means no demotion. "
|
|
"A safe range is from 0 to 0.75. ";
|
|
m->m_cgi = "pqrpaths";
|
|
m->m_off = (char *)&cr.m_pqr_demFactPaths - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for pages that have many paths in the url";
|
|
m->m_desc = "Max number of paths in a url. "
|
|
"This should be set to a value representing a very high "
|
|
"number of paths for a url. Lower values increase the "
|
|
"difference between how much each additional path demotes. ";
|
|
m->m_cgi = "pqrpathsm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValPaths - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "16";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages that do not have a catid";
|
|
m->m_desc = "Demotion factor for pages that do not have a catid. "
|
|
"Score will be penalized by this factor as a percent. "
|
|
"A safe range is from 0 to 0.2. ";
|
|
m->m_cgi = "pqrcatid";
|
|
m->m_off = (char *)&cr.m_pqr_demFactNoCatId - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages where smallest "
|
|
"catid has a lot of super topics";
|
|
m->m_desc = "Demotion factor for pages where smallest "
|
|
"catid has a lot of super topics. "
|
|
"Page will be penalized by the number of super topics "
|
|
"multiplied by this factor divided by the max value given "
|
|
"below. "
|
|
"Generally, the page will not be demoted more than this "
|
|
"factor as a percent. "
|
|
"Note: pages with no catid are demoted by this factor as "
|
|
"a percent so as not to penalize pages with a catid. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 and 0.25. ";
|
|
m->m_cgi = "pqrsuper";
|
|
m->m_off = (char *)&cr.m_pqr_demFactCatidHasSupers - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for pages where smallest catid has a lot "
|
|
"of super topics";
|
|
m->m_desc = "Max number of super topics. "
|
|
"Pages whose smallest catid that has more super "
|
|
"topics than this will be demoted by the maximum amount "
|
|
"given by the factor above as a percent. "
|
|
"This should be set to a value representing a very high "
|
|
"number of super topics for a category id. "
|
|
"Lower values increase the difference between how much each "
|
|
"additional path demotes. ";
|
|
m->m_cgi = "pqrsuperm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValCatidHasSupers - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "11";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for larger pages";
|
|
m->m_desc = "Demotion factor for larger pages. "
|
|
"Page will be penalized by its size times this factor "
|
|
"divided by the max page size below. "
|
|
"Generally, a page will not be demoted more than this "
|
|
"factor as a percent. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 and 0.25. ";
|
|
m->m_cgi = "pqrpgsz";
|
|
m->m_off = (char *)&cr.m_pqr_demFactPageSize - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for larger pages";
|
|
m->m_desc = "Max page size. "
|
|
"Pages with a size greater than or equal to this will be "
|
|
"demoted by the max amount (the factor above as a percent). ";
|
|
m->m_cgi = "pqrpgszm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValPageSize - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "524288";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for non-location specific queries "
|
|
"with a location specific title";
|
|
m->m_desc = "Demotion factor for non-location specific queries "
|
|
"with a location specific title. "
|
|
"Pages which contain a location in their title which is "
|
|
"not in the query or the gigabits will be demoted by their "
|
|
"population multiplied by this factor divided by the max "
|
|
"place population specified below. "
|
|
"Generally, a page will not be demoted more than this "
|
|
"value as a percent. "
|
|
"0 means no demotion. ";
|
|
m->m_cgi = "pqrloct";
|
|
m->m_off = (char *)&cr.m_pqr_demFactLocTitle - x;
|
|
//m->m_scgi = "pqrloct";
|
|
//m->m_soff = (char *)&si.m_pqr_demFactLocTitle - y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.99";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for non-location specific queries "
|
|
"with a location specific summary";
|
|
m->m_desc = "Demotion factor for non-location specific queries "
|
|
"with a location specific summary. "
|
|
"Pages which contain a location in their summary which is "
|
|
"not in the query or the gigabits will be demoted by their "
|
|
"population multiplied by this factor divided by the max "
|
|
"place population specified below. "
|
|
"Generally, a page will not be demoted more than this "
|
|
"value as a percent. "
|
|
"0 means no demotion. ";
|
|
m->m_cgi = "pqrlocs";
|
|
m->m_off = (char *)&cr.m_pqr_demFactLocSummary - x;
|
|
//m->m_scgi = "pqrlocs";
|
|
//m->m_soff = (char *)&si.m_pqr_demFactLocSummary - y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.95";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for non-location specific queries "
|
|
"with a location specific dmoz category";
|
|
m->m_desc = "Demotion factor for non-location specific queries "
|
|
"with a location specific dmoz regional category. "
|
|
"Pages which contain a location in their dmoz which is "
|
|
"not in the query or the gigabits will be demoted by their "
|
|
"population multiplied by this factor divided by the max "
|
|
"place population specified below. "
|
|
"Generally, a page will not be demoted more than this "
|
|
"value as a percent. "
|
|
"0 means no demotion. ";
|
|
m->m_cgi = "pqrlocd";
|
|
m->m_off = (char *)&cr.m_pqr_demFactLocDmoz - x;
|
|
//m->m_scgi = "pqrlocd";
|
|
//m->m_soff = (char *)&si.m_pqr_demFactLocDmoz - y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.95";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demote locations that appear in gigabits";
|
|
m->m_desc = "Demote locations that appear in gigabits.";
|
|
m->m_cgi = "pqrlocg";
|
|
m->m_off = (char *)&cr.m_pqr_demInTopics - x;
|
|
//m->m_scgi = "pqrlocg";
|
|
//m->m_soff = (char *)&si.m_pqr_demInTopics - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for non-location specific queries "
|
|
"with location specific results";
|
|
m->m_desc = "Max place population. "
|
|
"Places with a population greater than or equal to this "
|
|
"will be demoted to the maximum amount given by the "
|
|
"factor above as a percent. ";
|
|
m->m_cgi = "pqrlocm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValLoc - x;
|
|
m->m_type = TYPE_LONG;
|
|
// charlottesville was getting missed when this was 1M
|
|
m->m_def = "100000";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for non-html";
|
|
m->m_desc = "Demotion factor for content type that is non-html. "
|
|
"Pages which do not have an html content type will be "
|
|
"demoted by this factor as a percent. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 and 0.35. ";
|
|
m->m_cgi = "pqrhtml";
|
|
m->m_off = (char *)&cr.m_pqr_demFactNonHtml - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for xml";
|
|
m->m_desc = "Demotion factor for content type that is xml. "
|
|
"Pages which have an xml content type will be "
|
|
"demoted by this factor as a percent. "
|
|
"0 means no demotion. "
|
|
"Any value between 0 and 1 is safe if demotion for non-html "
|
|
"is set to 0. Otherwise, 0 should probably be used. ";
|
|
m->m_cgi = "pqrxml";
|
|
m->m_off = (char *)&cr.m_pqr_demFactXml - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.95";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages with other pages from same "
|
|
"hostname";
|
|
m->m_desc = "Demotion factor for pages with fewer other pages from "
|
|
"same hostname. "
|
|
"Pages with results from the same host will be "
|
|
"demoted by this factor times each fewer host than the max "
|
|
"value given below, divided by the max value. "
|
|
"Generally, a page will not be demoted more than this "
|
|
"factor as a percent. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 and 0.35. ";
|
|
m->m_cgi = "pqrfsd";
|
|
m->m_off = (char *)&cr.m_pqr_demFactOthFromHost - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for pages with other pages from same "
|
|
"domain";
|
|
m->m_desc = "Max number of pages from same domain. "
|
|
"Pages which have this many or more pages from the same "
|
|
"domain will not be demoted. ";
|
|
m->m_cgi = "pqrfsdm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValOthFromHost - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "12";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "initial demotion for pages with common "
|
|
"topics in dmoz as other results";
|
|
m->m_desc = "Initial demotion factor for pages with common "
|
|
"topics in dmoz as other results. "
|
|
"Pages will be penalized by the number of common topics "
|
|
"in dmoz times this factor divided by the max value "
|
|
"given below. "
|
|
"Generally, a page will not be demoted by more than this "
|
|
"factor as a percent. "
|
|
"Note: this factor is decayed by the factor specified in "
|
|
"the parm below, decay for pages with common topics in "
|
|
"dmoz as other results, as the number of pages with "
|
|
"common topics in dmoz increases. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 and 0.35. ";
|
|
m->m_cgi = "pqrctid";
|
|
m->m_off = (char *)&cr.m_pqr_demFactComTopicInDmoz - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "decay for pages with common topics in dmoz "
|
|
"as other results";
|
|
m->m_desc = "Decay factor for pages with common topics in "
|
|
"dmoz as other results. "
|
|
"The initial demotion factor will be decayed by this factor "
|
|
"as a percent as the number of common topics increase. "
|
|
"0 means no decay. "
|
|
"A safe range is between 0 and 0.25. ";
|
|
m->m_cgi = "pqrctidd";
|
|
m->m_off = (char *)&cr.m_pqr_decFactComTopicInDmoz - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for pages with common topics in dmoz "
|
|
"as other results";
|
|
m->m_desc = "Max number of common topics in dmoz as other results. "
|
|
"Pages with a number of common topics equal to or greater "
|
|
"than this value will be demoted to the maximum as given "
|
|
"by the initial factor above as a percent. ";
|
|
m->m_cgi = "pqrctidm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValComTopicInDmoz - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "32";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages where dmoz category names "
|
|
"contain query terms or their synonyms";
|
|
m->m_desc = "Demotion factor for pages where dmoz category names "
|
|
"contain fewer query terms or their synonyms. "
|
|
"Pages will be penalized for each query term or synonym of "
|
|
"a query term less than the max value given below multiplied "
|
|
"by this factor, divided by the max value. "
|
|
"Generally, a page will not be demoted more than this value "
|
|
"as a percent. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 and 0.3. ";
|
|
m->m_cgi = "pqrdcndcqt";
|
|
m->m_off = (char *)&cr.m_pqr_demFactDmozCatNmNoQT - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for pages where dmoz category names "
|
|
"contain query terms or their synonyms";
|
|
m->m_desc = "Max number of query terms and their synonyms "
|
|
"in a page's dmoz category name. "
|
|
"Pages with a number of query terms or their synonyms in all "
|
|
"dmoz category names greater than or equal to this value "
|
|
"will not be demoted. ";
|
|
m->m_cgi = "pqrcndcqtm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValDmozCatNmNoQT - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages where dmoz category names "
|
|
"contain gigabits";
|
|
m->m_desc = "Demotion factor for pages where dmoz category "
|
|
"names contain fewer gigabits. "
|
|
"Pages will be penalized by the number of gigabits in all "
|
|
"dmoz category names fewer than the max value given below "
|
|
"divided by the max value. "
|
|
"Generally, a page will not be demoted more than than this "
|
|
"factor as a percent. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 and 0.3. ";
|
|
m->m_cgi = "pqrdcndcgb";
|
|
m->m_off = (char *)&cr.m_pqr_demFactDmozCatNmNoGigabits - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for pages where dmoz category names "
|
|
"contain gigabits";
|
|
m->m_desc = "Max number of pages where dmoz category names "
|
|
"contain a gigabit. "
|
|
"Pages with a number of gigabits in all dmoz category names "
|
|
"greater than or equal to this value will not be demoted. ";
|
|
m->m_cgi = "pqrdcndcgbm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValDmozCatNmNoGigabits - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "16";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages based on datedb date";
|
|
m->m_desc = "Demotion factor for pages based on datedb date. "
|
|
"Pages will be penalized for being published earlier than the "
|
|
"max date given below. "
|
|
"The older the page, the more it will be penalized based on "
|
|
"the time difference between the page's date and the max date, "
|
|
"divided by the max date. "
|
|
"Generally, a page will not be demoted more than this "
|
|
"value as a percent. "
|
|
"0 means no demotion. "
|
|
"A safe range is between 0 and 0.4. ";
|
|
m->m_cgi = "pqrdate";
|
|
m->m_off = (char *)&cr.m_pqr_demFactDatedbDate - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "min value for demotion based on datedb date ";
|
|
m->m_desc = "Pages with a publish date equal to or earlier than "
|
|
"this date will be demoted to the max (the factor above as "
|
|
"a percent). "
|
|
"Use this parm in conjunction with the max value below "
|
|
"to specify the range of dates where demotion occurs. "
|
|
"If you set this parm near the estimated earliest publish "
|
|
"date that occurs somewhat frequently, this method can better "
|
|
"control the additional demotion per publish day. "
|
|
"This number is given as seconds since the epoch, January 1st, "
|
|
"1970 divided by 1000. "
|
|
"0 means use the epoch. ";
|
|
m->m_cgi = "pqrdatei";
|
|
m->m_off = (char *)&cr.m_pqr_minValDatedbDate - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "631177"; // Jan 01, 1990
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max value for demotion based on datedb date ";
|
|
m->m_desc = "Pages with a publish date greater than or equal to "
|
|
"this value divided by 1000 will not be demoted. "
|
|
"Use this parm in conjunction with the min value above "
|
|
"to specify the range of dates where demotion occurs. "
|
|
"This number is given as seconds before the current date "
|
|
"and time taken from the system clock divided by 1000. "
|
|
"0 means use the current time of the current day. ";
|
|
m->m_cgi = "pqrdatem";
|
|
m->m_off = (char *)&cr.m_pqr_maxValDatedbDate - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages based on proximity";
|
|
m->m_desc = "Demotion factor for proximity of query terms in "
|
|
"a document. The closer together terms occur in a "
|
|
"document, the higher it will score."
|
|
"0 means no demotion. ";
|
|
m->m_cgi = "pqrprox";
|
|
m->m_off = (char *)&cr.m_pqr_demFactProximity - x;
|
|
//m->m_scgi = "pqrprox";
|
|
//m->m_soff = (char *)&si.m_pqr_demFactProximity - y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion for pages based on query terms section";
|
|
m->m_desc = "Demotion factor for where the query terms occur "
|
|
"in the document. If the terms only occur in a menu, "
|
|
"a link, or a list, the document will be punished."
|
|
"0 means no demotion. ";
|
|
m->m_cgi = "pqrinsec";
|
|
//m->m_scgi = "pqrinsec";
|
|
m->m_off = (char *)&cr.m_pqr_demFactInSection - x;
|
|
//m->m_soff = (char *)&si.m_pqr_demFactInSection - y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "weight of indexed score on pqr";
|
|
m->m_desc = "The proportion that the original score affects "
|
|
"its rerank position. A factor of 1 will maintain "
|
|
"the original score, 0 will only use the indexed "
|
|
"score to break ties.";
|
|
m->m_cgi = "pqrorig";
|
|
//m->m_scgi = "pqrorig";
|
|
m->m_off = (char *)&cr.m_pqr_demFactOrigScore - x;
|
|
//m->m_soff = (char *)&si.m_pqr_demFactOrigScore - y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "max value for demotion for pages based on proximity";
|
|
m->m_desc = "Max summary score where no more demotion occurs above. "
|
|
"Pages with a summary score greater than or equal to this "
|
|
"value will not be demoted. ";
|
|
m->m_cgi = "pqrproxm";
|
|
m->m_off = (char *)&cr.m_pqr_maxValProximity - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100000";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "demotion for query being exclusivly in a subphrase";
|
|
m->m_desc = "Search result which contains the query terms only"
|
|
" as a subphrase of a larger phrase will have its score "
|
|
" reduced by this percent.";
|
|
m->m_cgi = "pqrspd";
|
|
m->m_off = (char *)&cr.m_pqr_demFactSubPhrase - x;
|
|
//m->m_soff = (char *)&si.m_pqr_demFactSubPhrase - y;
|
|
//m->m_scgi = "pqrspd";
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "demotion based on common inlinks";
|
|
m->m_desc = "Based on the number of inlinks a search results has "
|
|
"which are in common with another search result.";
|
|
m->m_cgi = "pqrcid";
|
|
m->m_off = (char *)&cr.m_pqr_demFactCommonInlinks - x;
|
|
//m->m_soff = (char *)&si.m_pqr_demFactCommonInlinks - y;
|
|
//m->m_scgi = "pqrcid";
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = ".5";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of document calls multiplier";
|
|
m->m_desc = "Allows more results to be gathered in the case of "
|
|
"an index having a high rate of duplicate results. Generally"
|
|
" expressed as 1.2";
|
|
m->m_cgi = "ndm";
|
|
m->m_off = (char *)&cr.m_numDocsMultiplier - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "1.2";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "max documents to compute per host";
|
|
m->m_desc = "Limit number of documents to search that do not provide"
|
|
" the required results.";
|
|
m->m_cgi = "mdi";
|
|
m->m_off = (char *)&cr.m_maxDocIdsToCompute - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1000";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "max real time inlinks";
|
|
m->m_desc = "Limit number of linksdb inlinks requested per result.";
|
|
m->m_cgi = "mrti";
|
|
m->m_off = (char *)&cr.m_maxRealTimeInlinks - x;
|
|
//m->m_soff = (char *)&si.m_maxRealTimeInlinks - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10000";
|
|
m->m_group = 0;
|
|
//m->m_scgi = "mrti";
|
|
m->m_smin = 0;
|
|
m->m_smax = 100000;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "percent topic similar default";
|
|
m->m_desc = "Like above, but used for deciding when to cluster "
|
|
"results by topic for the news collection.";
|
|
m->m_cgi = "ptcd";
|
|
m->m_off = (char *)&cr.m_topicSimilarCutoffDefault - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "50";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max query terms";
|
|
m->m_desc = "Do not allow more than this many query terms. Helps "
|
|
"prevent big queries from resource hogging.";
|
|
m->m_cgi = "mqt";
|
|
m->m_off = (char *)&cr.m_maxQueryTerms - x;
|
|
//m->m_soff = (char *)&si.m_maxQueryTerms - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "999999"; // now we got synonyms... etc
|
|
m->m_group = 0;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "dictionary site";
|
|
m->m_desc = "Where do we send requests for definitions of search "
|
|
"terms. Set to the empty string to turn this feature off.";
|
|
m->m_cgi = "dictionarySite";
|
|
m->m_off = (char *)&cr.m_dictionarySite - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = SUMMARYHIGHLIGHTTAGMAXSIZE;
|
|
m->m_def = "http://www.answers.com/";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "allow links: searches";
|
|
m->m_desc = "Allows anyone access to perform links: searches on this "
|
|
"collection.";
|
|
m->m_cgi = "als";
|
|
m->m_off = (char *)&cr.m_allowLinksSearch - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
// REFERENCE PAGES CONTROLS
|
|
m->m_title = "number of reference pages to generate";
|
|
m->m_desc = "What is the number of "
|
|
"reference pages to generate per query? Set to 0 to save "
|
|
"CPU time.";
|
|
m->m_cgi = "nrp";
|
|
m->m_off = (char *)&cr.m_refs_numToGenerate - x;
|
|
//m->m_soff = (char *)&si.m_refs_numToGenerate - y;
|
|
m->m_smaxc = (char *)&cr.m_refs_numToGenerateCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_priv = 0;
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of reference pages to generate";
|
|
m->m_desc = "What is the number of "
|
|
"reference pages to generate per query? Set to 0 to save "
|
|
"CPU time.";
|
|
m->m_cgi = "snrp";
|
|
m->m_off = (char *)&si.m_refs_numToGenerate - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_defOff =(char *)&cr.m_refs_numToGenerate - x;
|
|
m->m_priv = 0;
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "number of reference pages to display";
|
|
m->m_desc = "What is the number of "
|
|
"reference pages to display per query?";
|
|
m->m_cgi = "nrpdd";
|
|
m->m_off = (char *)&cr.m_refs_numToDisplay - x;
|
|
//m->m_soff = (char *)&si.m_refs_numToDisplay - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_priv = 0; // allow the (more) link
|
|
m->m_sprpg = 0; // do not propagate
|
|
m->m_sprpp = 0; // do not propagate
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "docs to scan for reference pages";
|
|
m->m_desc = "How many search results should we "
|
|
"scan for reference pages per query?";
|
|
m->m_cgi = "dsrp";
|
|
m->m_off = (char *)&cr.m_refs_docsToScan - x;
|
|
//m->m_soff = (char *)&si.m_refs_docsToScan - y;
|
|
m->m_smaxc = (char *)&cr.m_refs_docsToScanCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "30";
|
|
m->m_group = 0;
|
|
m->m_priv = 0;
|
|
m->m_smin = 0;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
|
|
m->m_title = "min references quality";
|
|
m->m_desc = "References with page quality below this "
|
|
"will be excluded. (set to 101 to disable references while "
|
|
"still generating related pages.";
|
|
m->m_cgi = "mrpq";
|
|
m->m_off = (char *)&cr.m_refs_minQuality - x;
|
|
//m->m_soff = (char *)&si.m_refs_minQuality - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "min links per references";
|
|
m->m_desc = "References need this many links to results to "
|
|
"be included.";
|
|
m->m_cgi = "mlpr";
|
|
m->m_off = (char *)&cr.m_refs_minLinksPerReference - x;
|
|
//m->m_soff = (char *)&si.m_refs_minLinksPerReference - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "2";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max linkers to consider for references per page";
|
|
m->m_desc = "Stop processing referencing pages after hitting this "
|
|
"limit.";
|
|
m->m_cgi = "mrpl";
|
|
m->m_off = (char *)&cr.m_refs_maxLinkers - x;
|
|
//m->m_soff = (char *)&si.m_refs_maxLinkers - y;
|
|
m->m_smaxc = (char *)&cr.m_refs_maxLinkersCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "500";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "page fetch multiplier for references";
|
|
m->m_desc = "Use this multiplier to fetch more than the required "
|
|
"number of reference pages. fetches N * (this parm) "
|
|
"references and displays the top scoring N.";
|
|
m->m_cgi = "ptrfr";
|
|
m->m_off = (char *)&cr.m_refs_additionalTRFetch - x;
|
|
//m->m_soff = (char *)&si.m_refs_additionalTRFetch - y;
|
|
m->m_smaxc = (char *)&cr.m_refs_additionalTRFetchCeiling - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "1.5";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of links coefficient";
|
|
m->m_desc = "A in A * numLinks + B * quality + C * "
|
|
"numLinks/totalLinks.";
|
|
m->m_cgi = "nlc";
|
|
m->m_off = (char *)&cr.m_refs_numLinksCoefficient - x;
|
|
//m->m_soff = (char *)&si.m_refs_numLinksCoefficient - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "quality coefficient";
|
|
m->m_desc = "B in A * numLinks + B * quality + C * "
|
|
"numLinks/totalLinks.";
|
|
m->m_cgi = "qc";
|
|
m->m_off = (char *)&cr.m_refs_qualityCoefficient - x;
|
|
//m->m_soff = (char *)&si.m_refs_qualityCoefficient - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "link density coefficient";
|
|
m->m_desc = "C in A * numLinks + B * quality + C * "
|
|
"numLinks/totalLinks.";
|
|
m->m_cgi = "ldc";
|
|
m->m_off = (char *)&cr.m_refs_linkDensityCoefficient - x;
|
|
//m->m_soff = (char *)&si.m_refs_linkDensityCoefficient - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1000";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
//m->m_sparm = 1;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "add or multiply quality times link density";
|
|
m->m_desc = "[+|*] in A * numLinks + B * quality [+|*]"
|
|
" C * numLinks/totalLinks.";
|
|
m->m_cgi = "mrs";
|
|
m->m_off = (char *)&cr.m_refs_multiplyRefScore - x;
|
|
//m->m_soff = (char *)&si.m_refs_multiplyRefScore - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
// reference pages ceiling parameters
|
|
m->m_title = "maximum allowed value for "
|
|
"numReferences parameter";
|
|
m->m_desc = "maximum allowed value for "
|
|
"numReferences parameter";
|
|
m->m_cgi = "nrpc";
|
|
m->m_off = (char *)&cr.m_refs_numToGenerateCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "maximum allowed value for "
|
|
"docsToScanForReferences parameter";
|
|
m->m_desc = "maximum allowed value for "
|
|
"docsToScanForReferences parameter";
|
|
m->m_cgi = "dsrpc";
|
|
m->m_off = (char *)&cr.m_refs_docsToScanCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "maximum allowed value for "
|
|
"maxLinkers parameter";
|
|
m->m_desc = "maximum allowed value for "
|
|
"maxLinkers parameter";
|
|
m->m_cgi = "mrplc";
|
|
m->m_off = (char *)&cr.m_refs_maxLinkersCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5000";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "maximum allowed value for "
|
|
"additionalTRFetch";
|
|
m->m_desc = "maximum allowed value for "
|
|
"additionalTRFetch parameter";
|
|
m->m_cgi = "ptrfrc";
|
|
m->m_off = (char *)&cr.m_refs_additionalTRFetchCeiling - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "10";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
// related pages parameters
|
|
m->m_title = "number of related pages to generate";
|
|
m->m_desc = "number of related pages to generate.";
|
|
m->m_cgi = "nrpg";
|
|
m->m_off = (char *)&cr.m_rp_numToGenerate - x;
|
|
//m->m_soff = (char *)&si.m_rp_numToGenerate - y;
|
|
m->m_smaxc = (char *)&cr.m_rp_numToGenerateCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_priv = 0;
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of related pages to display";
|
|
m->m_desc = "number of related pages to display.";
|
|
m->m_cgi = "nrpd";
|
|
m->m_off = (char *)&cr.m_rp_numToDisplay - x;
|
|
//m->m_soff = (char *)&si.m_rp_numToDisplay - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_priv = 0; // allow the (more) link
|
|
m->m_sprpg = 0; // do not propagate
|
|
m->m_sprpp = 0; // do not propagate
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of links to scan for related pages";
|
|
m->m_desc = "number of links per reference page to scan for related "
|
|
"pages.";
|
|
m->m_cgi = "nlpd";
|
|
m->m_off = (char *)&cr.m_rp_numLinksPerDoc - x;
|
|
//m->m_soff = (char *)&si.m_rp_numLinksPerDoc - y;
|
|
m->m_smaxc = (char *)&cr.m_rp_numLinksPerDocCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1024";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "min related page quality";
|
|
m->m_desc = "related pages with a quality lower than this will be "
|
|
"ignored.";
|
|
m->m_cgi = "merpq";
|
|
m->m_off = (char *)&cr.m_rp_minQuality - x;
|
|
//m->m_soff = (char *)&si.m_rp_minQuality - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "30";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "min related page score";
|
|
m->m_desc = "related pages with an adjusted score lower than this "
|
|
"will be ignored.";
|
|
m->m_cgi = "merps";
|
|
m->m_off = (char *)&cr.m_rp_minScore - x;
|
|
//m->m_soff = (char *)&si.m_rp_minScore - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "min related page links";
|
|
m->m_desc = "related pages with less than this number of links"
|
|
" will be ignored.";
|
|
m->m_cgi = "merpl";
|
|
m->m_off = (char *)&cr.m_rp_minLinks - x;
|
|
//m->m_soff = (char *)&si.m_rp_minLinks - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "2";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "coefficient for number of links in related pages score "
|
|
"calculation";
|
|
m->m_desc = "A in A * numLinks + B * avgLnkrQlty + C * PgQlty"
|
|
" + D * numSRPLinks.";
|
|
m->m_cgi = "nrplc";
|
|
m->m_off = (char *)&cr.m_rp_numLinksCoeff - x;
|
|
//m->m_soff = (char *)&si.m_rp_numLinksCoeff - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "coefficient for average linker quality in related pages "
|
|
"score calculation";
|
|
m->m_desc = "B in A * numLinks + B * avgLnkrQlty + C * PgQlty"
|
|
" + D * numSRPLinks.";
|
|
m->m_cgi = "arplqc";
|
|
m->m_off = (char *)&cr.m_rp_avgLnkrQualCoeff - x;
|
|
//m->m_soff = (char *)&si.m_rp_avgLnkrQualCoeff - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "coefficient for page quality in related pages "
|
|
"score calculation";
|
|
m->m_desc = "C in A * numLinks + B * avgLnkrQlty + C * PgQlty"
|
|
" + D * numSRPLinks";
|
|
m->m_cgi = "qrpc";
|
|
m->m_off = (char *)&cr.m_rp_qualCoeff - x;
|
|
//m->m_soff = (char *)&si.m_rp_qualCoeff - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "coefficient for search result links in related pages "
|
|
"score calculation";
|
|
m->m_desc = "D in A * numLinks + B * avgLnkrQlty + C * PgQlty"
|
|
" + D * numSRPLinks.";
|
|
m->m_cgi = "srprpc";
|
|
m->m_off = (char *)&cr.m_rp_srpLinkCoeff - x;
|
|
//m->m_soff = (char *)&si.m_rp_srpLinkCoeff - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of related page summary excerpts";
|
|
m->m_desc = "What is the maximum number of "
|
|
"excerpts displayed in the summary of a related page?";
|
|
m->m_cgi = "nrps";
|
|
m->m_off = (char *)&cr.m_rp_numSummaryLines - x;
|
|
//m->m_soff = (char *)&si.m_rp_numSummaryLines - y;
|
|
m->m_smaxc = (char *)&cr.m_rp_numSummaryLinesCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "highlight query terms in related pages summary";
|
|
m->m_desc = "Highlight query terms in related pages summary.";
|
|
m->m_cgi = "hqtirps";
|
|
m->m_off = (char *)&cr.m_rp_doRelatedPageSumHighlight - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "number of characters to display in title before "
|
|
"truncating";
|
|
m->m_desc = "Truncates a related page title after this many "
|
|
"characters and adds ...";
|
|
m->m_cgi = "ttl";
|
|
m->m_off = (char *)&cr.m_rp_titleTruncateLimit - x;
|
|
//m->m_soff = (char *)&si.m_rp_titleTruncateLimit - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "50";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "use results pages as references";
|
|
m->m_desc = "Use the search results' links in order to generate "
|
|
"related pages.";
|
|
m->m_cgi = "urar";
|
|
m->m_off = (char *)&cr.m_rp_useResultsAsReferences - x;
|
|
//m->m_soff = (char *)&si.m_rp_useResultsAsReferences - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "get related pages from other cluster";
|
|
m->m_desc = "Say yes here to make Gigablast check another Gigablast "
|
|
"cluster for title rec for related pages. Gigablast will "
|
|
"use the hosts2.conf file in the working directory to "
|
|
"tell it what hosts belong to the other cluster.";
|
|
m->m_cgi = "erp"; // external related pages
|
|
m->m_off = (char *)&cr.m_rp_getExternalPages - x;
|
|
//m->m_soff = (char *)&si.m_rp_getExternalPages - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "collection for other related pages cluster";
|
|
m->m_desc = "Gigablast will fetch the related pages title record "
|
|
"from this collection in the other cluster.";
|
|
m->m_cgi = "erpc"; // external related pages collection
|
|
m->m_off = (char *)&cr.m_rp_externalColl - x;
|
|
//m->m_soff = (char *)&si.m_rp_externalColl - y;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_COLL_LEN;
|
|
m->m_def = "main";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
// relate pages ceiling parameters
|
|
m->m_title = "maximum allowed value for numToGenerate parameter";
|
|
m->m_desc = "maximum allowed value for numToGenerate parameter";
|
|
m->m_cgi = "nrpgc";
|
|
m->m_off = (char *)&cr.m_rp_numToGenerateCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "maximum allowed value for numRPLinksPerDoc parameter";
|
|
m->m_desc = "maximum allowed value for numRPLinksPerDoc parameter";
|
|
m->m_cgi = "nlpdc";
|
|
m->m_off = (char *)&cr.m_rp_numLinksPerDocCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5000";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "maximum allowed value for numSummaryLines parameter";
|
|
m->m_desc = "maximum allowed value for numSummaryLines parameter";
|
|
m->m_cgi = "nrpsc";
|
|
m->m_off = (char *)&cr.m_rp_numSummaryLinesCeiling - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
// import search results controls
|
|
m->m_title = "how many imported results should we insert";
|
|
m->m_desc = "Gigablast will import X search results from the "
|
|
"external cluster given by hosts2.conf and merge those "
|
|
"search results into the current set of search results. "
|
|
"Set to 0 to disable.";
|
|
m->m_cgi = "imp";
|
|
m->m_off = (char *)&cr.m_numResultsToImport - x;
|
|
//m->m_soff = (char *)&si.m_numResultsToImport - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "imported score weight";
|
|
m->m_desc = "The score of all imported results will be multiplied "
|
|
"by this number. Since results are mostly imported from "
|
|
"a large collection they will usually have higher scores "
|
|
"because of having more link texts or whatever, so tone it "
|
|
"down a bit to put it on par with the integrating collection.";
|
|
m->m_cgi = "impw";
|
|
m->m_off = (char *)&cr.m_importWeight - x;
|
|
//m->m_soff = (char *)&si.m_importWeight - y;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = ".80";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "how many linkers must each imported result have";
|
|
m->m_desc = "The urls of imported search results must be linked to "
|
|
"by at least this many documents in the primary collection.";
|
|
m->m_cgi = "impl";
|
|
m->m_off = (char *)&cr.m_minLinkersPerImportedResult - x;
|
|
//m->m_soff = (char *)&si.m_minLinkersPerImportedResult - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "3";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "num linkers weight";
|
|
m->m_desc = "The number of linkers an imported result has from "
|
|
"the base collection is multiplied by this weight and then "
|
|
"added to the final score. The higher this is the more an "
|
|
"imported result with a lot of linkers will be boosted. "
|
|
"Currently, 100 is the max number of linkers permitted.";
|
|
m->m_cgi = "impnlw";
|
|
m->m_off = (char *)&cr.m_numLinkerWeight - x;
|
|
//m->m_soff = (char *)&si.m_numLinkerWeight - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "50";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "the name of the collection to import from";
|
|
m->m_desc = "Gigablast will import X search results from this "
|
|
"external collection and merge them into the current search "
|
|
"results.";
|
|
m->m_cgi = "impc";
|
|
m->m_off = (char *)&cr.m_importColl - x;
|
|
//m->m_soff = (char *)&si.m_importColl - y;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_COLL_LEN;
|
|
m->m_def = "main";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max similar results for cluster by topic";
|
|
m->m_desc = "Max similar results to show when clustering by topic.";
|
|
m->m_cgi = "ncbt";
|
|
m->m_off = (char *)&cr.m_maxClusterByTopicResults - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10";
|
|
m->m_group = 0;
|
|
//m->m_scgi = "ncbt";
|
|
//m->m_soff = (char *)&si.m_maxClusterByTopicResults - y;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of extra results to get for cluster by topic";
|
|
m->m_desc = "number of extra results to get for cluster by topic";
|
|
m->m_cgi = "ntwo";
|
|
m->m_off = (char *)&cr.m_numExtraClusterByTopicResults - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
//m->m_scgi = "ntwo";
|
|
//m->m_soff = (char *)&si.m_numExtraClusterByTopicResults - y;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "Minimum number of in linkers required to consider getting"
|
|
" the title from in linkers";
|
|
m->m_desc = "Minimum number of in linkers required to consider getting"
|
|
" the title from in linkers";
|
|
m->m_cgi = "mininlinkers";
|
|
m->m_off = (char *)&cr.m_minTitleInLinkers - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "Max number of in linkers to consider";
|
|
m->m_desc = "Max number of in linkers to consider for getting in "
|
|
"linkers titles.";
|
|
m->m_cgi = "maxinlinkers";
|
|
m->m_off = (char *)&cr.m_maxTitleInLinkers - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "128";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max title len";
|
|
m->m_desc = "What is the maximum number of "
|
|
"characters allowed in titles displayed in the search "
|
|
"results?";
|
|
m->m_cgi = "tml";
|
|
m->m_defOff= (char *)&cr.m_titleMaxLen - x;
|
|
m->m_off = (char *)&si.m_titleMaxLen - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "use new summary generator";
|
|
m->m_desc = "Also used for gigabits and titles.";
|
|
m->m_cgi = "uns"; // external related pages
|
|
m->m_off = (char *)&cr.m_useNewSummaries - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_sparm = 1;
|
|
m->m_scgi = "uns";
|
|
m->m_soff = (char *)&si.m_useNewSummaries - y;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "summary mode";
|
|
m->m_desc = "0 = old compatibility mode, 1 = UTF-8 mode, "
|
|
"2 = fast ASCII mode, "
|
|
"3 = Ascii Proximity Summary, "
|
|
"4 = Utf8 Proximity Summary, "
|
|
"5 = Ascii Pre Proximity Summary, "
|
|
"6 = Utf8 Pre Proximity Summary:";
|
|
m->m_cgi = "smd";
|
|
m->m_off = (char *)&cr.m_summaryMode - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
//m->m_scgi = "smd";
|
|
//m->m_soff = (char*) &si.m_summaryMode - y;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of summary excerpts";
|
|
m->m_desc = "How many summary excerpts to display per search result?";
|
|
m->m_cgi = "ns";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_defOff= (char *)&cr.m_summaryMaxNumLines - x;
|
|
m->m_group = 0;
|
|
m->m_off = (char *)&si.m_numLinesInSummary - y;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "max summary line width";
|
|
m->m_desc = "<br> tags are inserted to keep the number "
|
|
"of chars in the summary per line at or below this width. "
|
|
"Also affects title. "
|
|
"Strings without spaces that exceed this "
|
|
"width are not split. Has no affect on xml or json feed, "
|
|
"only works on html.";
|
|
m->m_cgi = "sw";
|
|
//m->m_off = (char *)&cr.m_summaryMaxWidth - x;
|
|
m->m_off = (char *)&si.m_summaryMaxWidth - y;
|
|
m->m_defOff= (char *)&cr.m_summaryMaxWidth - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "max summary excerpt length";
|
|
m->m_desc = "What is the maximum number of "
|
|
"characters allowed per summary excerpt?";
|
|
m->m_cgi = "smxcpl";
|
|
m->m_off = (char *)&si.m_summaryMaxNumCharsPerLine - y;
|
|
m->m_defOff= (char *)&cr.m_summaryMaxNumCharsPerLine - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
/*
|
|
m->m_title = "enable page turk";
|
|
m->m_desc = "If enabled, search results shall feed the page turk "
|
|
"is used to mechanically rank websites.";
|
|
m->m_cgi = "ept";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&cr.m_pageTurkEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "results to scan for gigabits generation";
|
|
m->m_desc = "How many search results should we "
|
|
"scan for gigabit (related topics) generation. Set this to "
|
|
"zero to disable gigabits!";
|
|
m->m_cgi = "dsrt";
|
|
m->m_off = (char *)&si.m_docsToScanForTopics - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_defOff= (char *)&cr.m_docsToScanForTopics - x;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "ip restriction for gigabits";
|
|
m->m_desc = "Should Gigablast only get one document per IP domain "
|
|
"and per domain for gigabits (related topics) generation?";
|
|
m->m_cgi = "ipr";
|
|
m->m_off = (char *)&si.m_ipRestrictForTopics - y;
|
|
m->m_defOff= (char *)&cr.m_ipRestrict - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "number of gigabits to show";
|
|
m->m_desc = "What is the number of gigabits (related topics) "
|
|
"displayed per query? Set to 0 to save a little CPU time.";
|
|
m->m_cgi = "nrt";
|
|
m->m_defOff= (char *)&cr.m_numTopics - x;
|
|
m->m_off = (char *)&si.m_numTopicsToDisplay - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "11";
|
|
m->m_group = 0;
|
|
m->m_sprpg = 0; // do not propagate
|
|
m->m_sprpp = 0; // do not propagate
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "min topics score";
|
|
m->m_desc = "Gigabits (related topics) with scores below this "
|
|
"will be excluded. Scores range from 0% to over 100%.";
|
|
m->m_cgi = "mts";
|
|
m->m_defOff= (char *)&cr.m_minTopicScore - x;
|
|
m->m_off = (char *)&si.m_minTopicScore - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "min gigabit doc count by default";
|
|
m->m_desc = "How many documents must contain the gigabit "
|
|
"(related topic) in order for it to be displayed.";
|
|
m->m_cgi = "mdc";
|
|
m->m_defOff= (char *)&cr.m_minDocCount - x;
|
|
m->m_off = (char *)&si.m_minDocCount - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "2";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "dedup doc percent for gigabits (related topics)";
|
|
m->m_desc = "If a document is this percent similar to another "
|
|
"document with a higher score, then it will not contribute "
|
|
"to the gigabit generation.";
|
|
m->m_cgi = "dsp";
|
|
m->m_defOff= (char *)&cr.m_dedupSamplePercent - x;
|
|
m->m_off = (char *)&si.m_dedupSamplePercent - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "80";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
///////////////////////////////////////////
|
|
// SPIDER PROXY CONTROLS
|
|
//
|
|
///////////////////////////////////////////
|
|
|
|
m->m_title = "always use spider proxies for all collections";
|
|
m->m_desc = "ALWAYS Use the spider proxies listed below for "
|
|
"spidering. If none are "
|
|
"listed then gb will not use any. Applies to all collections. "
|
|
"If you want to regulate this on a per collection basis then "
|
|
"set this to <b>NO</b> here and adjust the "
|
|
"proxy controls on the "
|
|
"<b>spider controls</b> page. If the list of proxy IPs below "
|
|
"is empty, then of course, no proxies will be used.";
|
|
m->m_cgi = "useproxyips";
|
|
m->m_xml = "useSpiderProxies";
|
|
m->m_off = (char *)&g_conf.m_useProxyIps - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
// hide this for now. just make it a per collection parm.
|
|
m->m_flags = PF_HIDDEN;
|
|
m->m_page = PAGE_SPIDERPROXIES;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "automatically use spider proxies for all collections";
|
|
m->m_desc = "AUTOMATICALLY use the spider proxies listed below for "
|
|
"spidering. If none are "
|
|
"listed then gb will not use any. Applies to all collections. "
|
|
"If you want to regulate this on a per collection basis then "
|
|
"set this to <b>NO</b> here and adjust the "
|
|
"proxy controls on the "
|
|
"<b>spider controls</b> page. If the list of proxy IPs below "
|
|
"is empty, then of course, no proxies will be used.";
|
|
m->m_cgi = "autouseproxyips";
|
|
m->m_xml = "automaticallyUseSpiderProxies";
|
|
m->m_off = (char *)&g_conf.m_automaticallyUseProxyIps - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
// hide this for now. just make it a per collection parm.
|
|
m->m_flags = PF_HIDDEN;
|
|
m->m_page = PAGE_SPIDERPROXIES;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "spider proxy ips";
|
|
m->m_desc = "List of white space-separated spider proxy IPs. Put "
|
|
"in IP:port format. Example <i>1.2.3.4:80 4.5.6.7:99</i>. "
|
|
"You can also use <i>username:password@1.2.3.4:80</i>. "
|
|
"If a proxy itself times out when downloading through it "
|
|
"it will be perceived as a normal download timeout and the "
|
|
"page will be retried according to the url filters table, so "
|
|
"you might want to modify the url filters to retry network "
|
|
"errors more aggressively. Search for 'private proxies' on "
|
|
"google to find proxy providers. Try to ensure all your "
|
|
"proxies are on different class C IPs if possible. "
|
|
"That is, the first 3 numbers in the IP addresses are all "
|
|
"different.";
|
|
m->m_cgi = "proxyips";
|
|
m->m_xml = "proxyIps";
|
|
m->m_off = (char *)&g_conf.m_proxyIps - g;
|
|
m->m_type = TYPE_SAFEBUF; // TYPE_IP;
|
|
m->m_def = "";
|
|
m->m_flags = PF_TEXTAREA | PF_REBUILDPROXYTABLE;
|
|
m->m_page = PAGE_SPIDERPROXIES;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "spider proxy test url";
|
|
m->m_desc = "Download this url every minute through each proxy "
|
|
"listed above to ensure they are up. Typically you should "
|
|
"make this a URL you own so you do not aggravate another "
|
|
"webmaster.";
|
|
m->m_xml = "proxyTestUrl";
|
|
m->m_cgi = "proxytesturl";
|
|
m->m_off = (char *)&g_conf.m_proxyTestUrl - g;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_def = "http://www.gigablast.com/";
|
|
m->m_flags = 0;
|
|
m->m_page = PAGE_SPIDERPROXIES;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "reset proxy table";
|
|
m->m_desc = "Reset the proxy statistics in the table below. Makes "
|
|
"all your proxies treated like new again.";
|
|
m->m_cgi = "resetproxytable";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandResetProxyTable;
|
|
m->m_cast = 1;
|
|
m->m_page = PAGE_SPIDERPROXIES;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "mix up user agents";
|
|
m->m_desc = "Use random user-agents when downloading through "
|
|
"a spider proxy listed above to "
|
|
"protecting gb's anonymity. The User-Agent used is a function "
|
|
"of the proxy IP/port and IP of the url being downloaded. "
|
|
"That way it is consistent when downloading the same website "
|
|
"through the same proxy.";
|
|
m->m_cgi = "userandagents";
|
|
m->m_xml = "useRandAgents";
|
|
m->m_off = (char *)&g_conf.m_useRandAgents - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = 0;
|
|
m->m_page = PAGE_SPIDERPROXIES;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "squid proxy authorized users";
|
|
m->m_desc = "Gigablast can also simulate a squid proxy, "
|
|
"complete with "
|
|
"caching. It will forward your request to the proxies you "
|
|
"list above, if any. This list consists of space-separated "
|
|
"<i>username:password</i> items. Leave this list empty "
|
|
"to disable squid caching behaviour. The default cache "
|
|
"size for this is 10MB per shard. Use item *:* to allow "
|
|
"anyone access.";
|
|
m->m_xml = "proxyAuth";
|
|
m->m_cgi = "proxyAuth";
|
|
m->m_off = (char *)&g_conf.m_proxyAuth - g;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_def = "";
|
|
m->m_flags = PF_TEXTAREA;
|
|
m->m_page = PAGE_SPIDERPROXIES;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "max words per gigabit (related topic) by default";
|
|
m->m_desc = "Maximum number of words a gigabit (related topic) "
|
|
"can have. Affects xml feeds, too.";
|
|
m->m_cgi = "mwpt";
|
|
m->m_defOff= (char *)&cr.m_maxWordsPerTopic - x;
|
|
m->m_off = (char *)&si.m_maxWordsPerTopic - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "6";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "show images";
|
|
m->m_desc = "Should we return or show the thumbnail images in the "
|
|
"search results?";
|
|
m->m_cgi = "showimages";
|
|
m->m_off = (char *)&si.m_showImages - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_flags = PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "use cache";
|
|
m->m_desc = "Use 0 if Gigablast should not read or write from "
|
|
"any caches at any level.";
|
|
m->m_def = "-1";
|
|
m->m_off = (char *)&si.m_useCache - y;
|
|
m->m_type = TYPE_CHAR;
|
|
m->m_cgi = "usecache";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "read from cache";
|
|
m->m_desc = "Should we read search results from the cache? Set "
|
|
"to false to fix dmoz bug.";
|
|
m->m_cgi = "rcache";
|
|
m->m_off = (char *)&si.m_rcache - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_flags = PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "write to cache";
|
|
m->m_desc = "Use 0 if Gigablast should not write to "
|
|
"any caches at any level.";
|
|
m->m_def = "-1";
|
|
m->m_off = (char *)&si.m_wcache - y;
|
|
m->m_type = TYPE_CHAR;
|
|
m->m_cgi = "wcache";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "max serp docid";
|
|
m->m_desc = "Start displaying results after this score/docid pair. "
|
|
"Used by widget to append results to end when index is "
|
|
"volatile.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_minSerpDocId - y;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_cgi = "minserpdocid";
|
|
m->m_flags = PF_API;
|
|
m->m_smin = 0;
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "max serp score";
|
|
m->m_desc = "Start displaying results after this score/docid pair. "
|
|
"Used by widget to append results to end when index is "
|
|
"volatile.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_maxSerpScore - y;
|
|
m->m_type = TYPE_DOUBLE;
|
|
m->m_cgi = "maxserpscore";
|
|
m->m_flags = PF_API;
|
|
m->m_smin = 0;
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "restrict search to this url";
|
|
m->m_desc = "Does a url: query.";
|
|
m->m_off = (char *)&si.m_url - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
//m->m_size = MAX_URL_LEN;
|
|
m->m_cgi = "url";
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "restrict search to pages that link to this url";
|
|
m->m_desc = "The url which the pages must link to.";
|
|
m->m_off = (char *)&si.m_link - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
//m->m_size = MAX_URL_LEN;
|
|
m->m_cgi = "link";
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "search for this phrase quoted";
|
|
m->m_desc = "The phrase which will be quoted in the query. From the "
|
|
"advanced search page, adv.html.";
|
|
m->m_off = (char *)&si.m_quote1 - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
//m->m_size = 512;
|
|
m->m_cgi = "quotea";
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "search for this second phrase quoted";
|
|
m->m_desc = "The phrase which will be quoted in the query. From the "
|
|
"advanced search page, adv.html.";
|
|
m->m_off = (char *)&si.m_quote2 - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
//m->m_size = 512;
|
|
m->m_cgi = "quoteb";
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "restrict results to this site";
|
|
m->m_desc = "Returned results will have URLs from this site, X.";
|
|
m->m_off = (char *)&si.m_site - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_cgi = "site";
|
|
m->m_size = 1024; // MAX_SITE_LEN;
|
|
m->m_sprpg = 1;
|
|
m->m_sprpp = 1;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "restrict results to these sites";
|
|
m->m_desc = "Returned results will have URLs from these "
|
|
"space-separated list of sites. Can have up to 200 sites. "
|
|
"A site can include sub folders. This is allows you to build "
|
|
"a <a href=\"/cts.html\">Custom Topic Search Engine</a>.";
|
|
m->m_off = (char *)&si.m_sites - y;
|
|
m->m_type = TYPE_CHARPTR;
|
|
//m->m_size = 32*1024; // MAX_SITES_LEN;
|
|
m->m_cgi = "sites";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_sprpg = 1;
|
|
m->m_sprpp = 1;
|
|
m++;
|
|
|
|
m->m_title = "require these query terms";
|
|
m->m_desc = "Returned results will have all the words in X. "
|
|
"From the advanced search page, adv.html.";
|
|
m->m_off = (char *)&si.m_plus - y;
|
|
m->m_def = NULL;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_cgi = "plus";
|
|
//m->m_size = 500;
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "avoid these query terms";
|
|
m->m_desc = "Returned results will NOT have any of the words in X. "
|
|
"From the advanced search page, adv.html.";
|
|
m->m_off = (char *)&si.m_minus - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_cgi = "minus";
|
|
//m->m_size = 500;
|
|
m->m_sprpg = 0;
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "format of the returned search results";
|
|
m->m_desc = "Can be html, xml or json to get results back in that "
|
|
"format.";
|
|
m->m_def = "html";
|
|
m->m_off = (char *)&si.m_formatStr - y;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_cgi = "format";
|
|
m->m_flags = PF_NOAPI; // already in the api, so don't repeat
|
|
m++;
|
|
|
|
m->m_title = "family filter";
|
|
m->m_desc = "Remove objectionable results if this is enabled.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_familyFilter - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_cgi = "ff";
|
|
m++;
|
|
|
|
|
|
m->m_title = "highlight query terms in summaries";
|
|
m->m_desc = "Use to disable or enable "
|
|
"highlighting of the query terms in the summaries.";
|
|
m->m_def = "1";
|
|
m->m_off = (char *)&si.m_doQueryHighlighting - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "qh";
|
|
m->m_smin = 0;
|
|
m->m_smax = 8;
|
|
m->m_sprpg = 1; // turn off for now
|
|
m->m_sprpp = 1;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "cached page highlight query";
|
|
m->m_desc = "Highlight the terms in this query instead.";
|
|
m->m_def = NULL;
|
|
m->m_off = (char *)&si.m_highlightQuery - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_cgi = "hq";
|
|
//m->m_size = 1000;
|
|
m->m_sprpg = 0; // no need to propagate this one
|
|
m->m_sprpp = 0;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
|
|
/*
|
|
m->m_title = "highlight event date in summaries.";
|
|
m->m_desc = "Can be 0 or 1 to respectively disable or enable "
|
|
"highlighting of the event date terms in the summaries.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_doDateHighlighting - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "dh";
|
|
m->m_smin = 0;
|
|
m->m_smax = 8;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "limit search results to this ruleset";
|
|
m->m_desc = "limit search results to this ruleset";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_ruleset - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_cgi = "ruleset";
|
|
m->m_smin = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "Query match offsets";
|
|
m->m_desc = "Return a list of the offsets of each query word "
|
|
"actually matched in the document. 1 means byte offset, "
|
|
"and 2 means word offset.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_queryMatchOffsets - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_cgi = "qmo";
|
|
m->m_smin = 0;
|
|
m->m_smax = 2;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "boolean status";
|
|
m->m_desc = "Can be 0 or 1 or 2. 0 means the query is NOT boolean, "
|
|
"1 means the query is boolean and 2 means to auto-detect.";
|
|
m->m_def = "2";
|
|
m->m_off = (char *)&si.m_boolFlag - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_cgi = "bq";
|
|
m->m_smin = 0;
|
|
m->m_smax = 2;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "meta tags to display";
|
|
m->m_desc = "A space-separated string of <b>meta tag names</b>. "
|
|
"Do not forget to url-encode the spaces to +'s or %%20's. "
|
|
"Gigablast will extract the contents of these specified meta "
|
|
"tags out of the pages listed in the search results and "
|
|
"display that content after each summary. i.e. "
|
|
"<i>&dt=description</i> will display the meta description of "
|
|
"each search result. <i>&dt=description:32+keywords:64</i> "
|
|
"will display the meta description and meta keywords of each "
|
|
"search result and limit the fields to 32 and 64 characters "
|
|
"respectively. When used in an XML feed the <i><display "
|
|
"name=\"meta_tag_name\">meta_tag_content</></i> XML "
|
|
"tag will be used to convey each requested meta tag's "
|
|
"content.";
|
|
m->m_off = (char *)&si.m_displayMetas - y;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_cgi = "dt";
|
|
//m->m_size = 3000;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
/*
|
|
// . you can have multiple topics= parms in you query url...
|
|
// . this is used to set the TopicGroups array in SearchInput
|
|
m->m_title = "related topic parameters";
|
|
m->m_desc =
|
|
"X=<b>NUM+MAX+SCAN+MIN+MAXW+META+DEL+IDF+DEDUP</b>\n"
|
|
"<br><br>\n"
|
|
"<b>NUM</b> is how many <b>related topics</b> you want "
|
|
"returned.\n"
|
|
"<br><br>\n"
|
|
"<b>MAX</b> is the maximum number of topics to generate "
|
|
"and store in cache, so if TW is increased, but still below "
|
|
"MT, it will result in a fast cache hit.\n"
|
|
"<br><br>\n"
|
|
"<b>SCAN</b> is how many documents to scan for related "
|
|
"topics. If this is 30, for example, then Gigablast will "
|
|
"scan the first 30 search results for related topics.\n"
|
|
"<br><br>\n"
|
|
"<b>MIN</b> is the minimum score of returned topics. Ranges "
|
|
"from 0%% to over 100%%. 50%% is considered pretty good. "
|
|
"BUG: This must be at least 1 to get any topics back.\n"
|
|
"<br><br>\n"
|
|
"<b>MAXW</b> is the maximum number of words per topic.\n"
|
|
"<br><br>\n"
|
|
"<b>META</b> is the meta tag name to which Gigablast will "
|
|
"restrict the content used to generate the topics. Do not "
|
|
"specify this field to restrict the content to the body of "
|
|
"each document, that is the default.\n"
|
|
"<br><br>\n"
|
|
"<b>DEL</b> is a single character delimeter which defines "
|
|
"the topic candidates. All candidates must be separated from "
|
|
"the other candidates with the delimeter. So <meta "
|
|
"name=test content=\" cat dog ; pig rabbit horse\"> "
|
|
"when using the ; as a delimeter would only have two topic "
|
|
"candidates: \"cat dog\" and \"pig rabbit horse\". If no "
|
|
"delimeter is provided, default funcationality is assumed.\n"
|
|
"<br><br>\n"
|
|
"<b>IDF</b> is 1, the default, if you want Gigablast to "
|
|
"weight topic candidates by their idf, 0 otherwise."
|
|
"<br><br>\n"
|
|
"<b>DEDUP</b> is 1, the default, if the topics should be "
|
|
"deduped. This involves removing topics that are substrings "
|
|
"or superstrings of other higher-scoring topics."
|
|
"<br><br>\n"
|
|
"Example: topics=49+100+30+1+6+author+%%3B+0+0"
|
|
"<br><br>\n"
|
|
"The default values for those parameters with unspecifed "
|
|
"defaults can be defined on the \"Search Controls\" page. "
|
|
"<br><br>\n"
|
|
"XML feeds will contain the generated topics like: "
|
|
"<topic><name><![CDATA[some topic]]><"
|
|
"/name><score>13</score><from>"
|
|
"metaTagName</from></topic>"
|
|
"<br><br>\n"
|
|
"Even though somewhat nonstandard, you can specify multiple "
|
|
"<i>&topic=</i> parameters to get back multiple topic "
|
|
"groups."
|
|
"<br><br>\n"
|
|
"Performance will decrease if you increase the MAX, SCAN or "
|
|
"MAXW.";
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 512;
|
|
m->m_cgi = "topics";
|
|
m->m_size = 100;
|
|
// MDW: NO NO NO... was causing a write breach!!! -- take this all out
|
|
m->m_off = -2; // bogus offset
|
|
//m->m_off = (char *)&si.m_topics - y;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "niceness";
|
|
m->m_desc = "Can be 0 or 1. 0 is usually a faster, high-priority "
|
|
"query, 1 is a slower, lower-priority query.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_niceness - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_cgi = "niceness";
|
|
m->m_smin = 0;
|
|
m->m_smax = 1;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "debug flag";
|
|
m->m_desc = "Is 1 to log debug information, 0 otherwise.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_debug - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "debug";
|
|
//m->m_priv = 1;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "return number of docs per topic";
|
|
m->m_desc = "Use 1 if you want Gigablast to return the number of "
|
|
"documents in the search results that contained each topic "
|
|
"(gigabit).";
|
|
m->m_def = "1";
|
|
m->m_off = (char *)&si.m_returnDocIdCount - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "rdc";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "return docids per topic";
|
|
m->m_desc = "Use 1 if you want Gigablast to return the list of "
|
|
"docIds from the search results that contained each topic "
|
|
"(gigabit).";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_returnDocIds - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "rd";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "return popularity per topic";
|
|
m->m_desc = "Use 1 if you want Gigablast to return the popularity "
|
|
"of each topic (gigabit).";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_returnPops - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "rp";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
//m->m_title = "compound list max size";
|
|
//m->m_desc = "Is the max size in bytes of the compound termlist. "
|
|
// "Each document id is 6 bytes.";
|
|
//m->m_def = "-1";
|
|
//m->m_off = (char *)&si.m_compoundListMaxSize - y;
|
|
//m->m_type = TYPE_LONG;
|
|
//m->m_cgi = "clms";
|
|
//m->m_smin = 0;
|
|
//m->m_priv = 1;
|
|
//m++;
|
|
|
|
|
|
m->m_title = "debug gigabits flag";
|
|
m->m_desc = "Is 1 to log gigabits debug information, 0 otherwise.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_debugGigabits - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "debuggigabits";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "return docids only";
|
|
m->m_desc = "Is 1 to return only docids as query results.";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&si.m_docIdsOnly - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "dio";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "image url";
|
|
m->m_desc = "The url of an image to co-brand on the search "
|
|
"results page.";
|
|
m->m_off = (char *)&si.m_imgUrl - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_def = NULL;
|
|
//m->m_size = 512;
|
|
m->m_cgi = "iu";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "image link";
|
|
m->m_desc = "The hyperlink to use on the image to co-brand on "
|
|
"the search results page.";
|
|
m->m_off = (char *)&si.m_imgLink - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_def = NULL;
|
|
//m->m_size = 512;
|
|
m->m_cgi = "ix";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "image width";
|
|
m->m_desc = "The width of the image on the search results page.";
|
|
m->m_off = (char *)&si.m_imgWidth - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_cgi = "iw";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_def = "200";
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
m->m_title = "image height";
|
|
m->m_desc = "The height of the image on the search results "
|
|
"page.";
|
|
m->m_off = (char *)&si.m_imgHeight - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_cgi = "ih";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_def = "200";
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
// m->m_title = "password";
|
|
// m->m_desc = "The password.";
|
|
// m->m_off = (char *)&si.m_pwd - y;
|
|
// m->m_type = TYPE_CHARPTR;//STRING;
|
|
// m->m_cgi = "pwd";
|
|
// m->m_size = 32;
|
|
// m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
// m->m_page = PAGE_RESULTS;
|
|
// m->m_obj = OBJ_SI;
|
|
// m++;
|
|
|
|
m->m_title = "admin override";
|
|
m->m_desc = "admin override";
|
|
m->m_off = (char *)&si.m_isMasterAdmin - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_cgi = "admin";
|
|
m->m_sprpg = 1; // propagate on GET request
|
|
m->m_sprpp = 1; // propagate on POST request
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "language";
|
|
m->m_desc = "Language code to restrict search. 0 = All. Uses "
|
|
"Clusterdb to filter languages. This is being phased out "
|
|
"please do not use much, use gblang instead.";
|
|
m->m_off = (char *)&si.m_languageCode - y;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 5+1;
|
|
m->m_def = "none";
|
|
// our google gadget gets &lang=en passed to it from google, so
|
|
// change this!!
|
|
m->m_cgi = "clang";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
this should be a hash on the lang abbr line gblang:en
|
|
m->m_title = "GB language";
|
|
m->m_desc = "Language code to restrict search. 0 = All. Uses "
|
|
"the gblang: keyword to filter languages.";
|
|
m->m_off = (char *)&si.m_gblang - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_cgi = "gblang";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
*/
|
|
|
|
// prepend to query
|
|
m->m_title = "prepend";
|
|
m->m_desc = "prepend this to the supplied query followed by a |.";
|
|
m->m_off = (char *)&si.m_prepend - y;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_cgi = "prepend";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "GB Country";
|
|
m->m_desc = "Country code to restrict search";
|
|
m->m_off = (char *)&si.m_gbcountry - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
//m->m_size = 4+1;
|
|
m->m_def = NULL;
|
|
//m->m_def = "iso-8859-1";
|
|
m->m_cgi = "gbcountry";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "rerank ruleset";
|
|
m->m_desc = "Use this ruleset to rerank the search results. Will "
|
|
"rerank at least the first X results specified with &n=X. "
|
|
"And be sure to say &recycle=0 to recompute the quality "
|
|
"of each page in the search results.";
|
|
m->m_off = (char *)&si.m_rerankRuleset - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "-1";
|
|
m->m_cgi = "rerank";
|
|
m++;
|
|
|
|
m->m_title = "apply ruleset to roots";
|
|
m->m_desc = "Recompute the quality of the root urls of each "
|
|
"search result in order to compute the quality of that "
|
|
"search result, since it depends on its root quality. This "
|
|
"can take a lot longer when enabled.";
|
|
m->m_off = (char *)&si.m_artr - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_cgi = "artr";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "show banned pages";
|
|
m->m_desc = "show banned pages";
|
|
m->m_off = (char *)&si.m_showBanned - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_cgi = "sb";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "allow punctuation in query phrases";
|
|
m->m_desc = "allow punctuation in query phrases";
|
|
m->m_off = (char *)&si.m_allowPunctInPhrase - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_cgi = "apip";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "use ad feed num";
|
|
m->m_desc = "use ad feed num";
|
|
m->m_off = (char *)&si.m_useAdFeedNum - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_cgi = "uafn";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "do bot detection";
|
|
m->m_desc = "Passed in for raw feeds that want bot detection cgi "
|
|
"parameters passed back in the XML.";
|
|
m->m_off = (char *)&si.m_doBotDetection - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_cgi = "bd";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "bot detection query";
|
|
m->m_desc = "Passed in for raw feeds that want bot detection cgi "
|
|
"parameters passed back in the XML. Use this variable "
|
|
"when an actual query against gigablast is not needed "
|
|
"(i.e. - image/video/news searches).";
|
|
m->m_off = (char *)&si.m_botDetectionQuery - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
m->m_cgi = "bdq";
|
|
m->m_def = NULL;
|
|
m->m_size = MAX_QUERY_LEN;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "queryCharset";
|
|
m->m_desc = "Charset in which the query is encoded";
|
|
m->m_off = (char *)&si.m_queryCharset - y;
|
|
m->m_type = TYPE_CHARPTR;//STRING;
|
|
//m->m_size = 32+1;
|
|
m->m_def = "utf-8";
|
|
//m->m_def = "iso-8859-1";
|
|
m->m_cgi = "qcs";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
// buzz
|
|
m->m_title = "display inlinks";
|
|
m->m_desc = "Display all inlinks of each result.";
|
|
m->m_off = (char *)&si.m_displayInlinks - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_cgi = "inlinks";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
// buzz
|
|
m->m_title = "display outlinks";
|
|
m->m_desc = "Display all outlinks of each result. outlinks=1 "
|
|
"displays only external outlinks. outlinks=2 displays "
|
|
"external and internal outlinks.";
|
|
m->m_off = (char *)&si.m_displayOutlinks - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_cgi = "outlinks";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
// buzz
|
|
m->m_title = "display term frequencies";
|
|
m->m_desc = "Display Terms and Frequencies in results.";
|
|
m->m_off = (char *)&si.m_displayTermFreqs - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_cgi = "tf";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
// buzz
|
|
m->m_title = "spider results";
|
|
m->m_desc = "Results of this query will be forced into the spider "
|
|
"queue for reindexing.";
|
|
m->m_off = (char *)&si.m_spiderResults - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_cgi = "spiderresults";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
// buzz
|
|
m->m_title = "spider result roots";
|
|
m->m_desc = "Root urls of the results of this query will be forced "
|
|
"into the spider queue for reindexing.";
|
|
m->m_off = (char *)&si.m_spiderResultRoots - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_cgi = "spiderresultroots";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
// buzz
|
|
m->m_title = "just mark clusterlevels";
|
|
m->m_desc = "Check for deduping, but just mark the cluster levels "
|
|
"and the doc deduped against, don't remove the result.";
|
|
m->m_off = (char *)&si.m_justMarkClusterLevels - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_cgi = "jmcl";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m++;
|
|
|
|
m->m_title = "include cached copy of page";
|
|
m->m_desc = "Will cause a cached copy of content to be returned "
|
|
"instead of summary.";
|
|
m->m_off = (char *)&si.m_includeCachedCopy - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_cgi = "icc";
|
|
m->m_page = PAGE_RESULTS;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
// m->m_title = "get section voting info in json";
|
|
// m->m_desc = "Will cause section voting info to be returned.";
|
|
// m->m_off = (char *)&si.m_getSectionVotingInfo - y;
|
|
// m->m_type = TYPE_CHAR;
|
|
// m->m_def = "0";
|
|
// m->m_cgi = "sectionvotes";
|
|
// m->m_page = PAGE_RESULTS;
|
|
// m->m_obj = OBJ_SI;
|
|
// m->m_flags = PF_API;
|
|
// m++;
|
|
|
|
//////////////
|
|
// END /search
|
|
//////////////
|
|
|
|
|
|
//////////
|
|
// PAGE GET (cached web pages)
|
|
///////////
|
|
m->m_title = "docId";
|
|
m->m_desc = "The docid of the cached page to view.";
|
|
m->m_off = (char *)&gr.m_docId - (char *)&gr;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_page = PAGE_GET;
|
|
m->m_obj = OBJ_GBREQUEST; // generic request class
|
|
m->m_def = "0";
|
|
m->m_cgi = "d";
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m++;
|
|
|
|
|
|
m->m_title = "url";
|
|
m->m_desc = "Instead of specifying a docid, you can get the "
|
|
"cached webpage by url as well.";
|
|
m->m_off = (char *)&gr.m_url - (char *)&gr;
|
|
m->m_type = TYPE_CHARPTR; // reference into the HttpRequest
|
|
m->m_page = PAGE_GET;
|
|
m->m_obj = OBJ_GBREQUEST; // generic request class
|
|
m->m_def = NULL;
|
|
m->m_cgi = "url";
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m++;
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Get the cached page from this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_GET;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m->m_type = TYPE_CHARPTR;//SAFEBUF;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_REQUIRED | PF_API;
|
|
m++;
|
|
|
|
m->m_title = "strip";
|
|
m->m_desc = "Is 1 or 2 two strip various tags from the "
|
|
"cached content.";
|
|
m->m_off = (char *)&gr.m_strip - (char *)&gr;
|
|
m->m_page = PAGE_GET;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_cgi = "strip";
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
m->m_title = "include header";
|
|
m->m_desc = "Is 1 to include the Gigablast header at the top of "
|
|
"the cached page, 0 to exclude the header.";
|
|
m->m_def = "1";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_GET;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_cgi = "ih";
|
|
m->m_off = (char *)&gr.m_includeHeader - (char *)&gr;
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
m->m_title = "query";
|
|
m->m_desc = "Highlight this query in the page.";
|
|
m->m_def = "";
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_page = PAGE_GET;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_cgi = "q";
|
|
m->m_off = (char *)&gr.m_query - (char *)&gr;
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
/*
|
|
// for /get
|
|
m->m_title = "query highlighting query";
|
|
m->m_desc = "Is 1 to highlight query terms in the cached page.";
|
|
m->m_def = "1";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "qh";
|
|
m->m_off = (char *)&si.m_queryHighlighting - y;
|
|
m++;
|
|
*/
|
|
|
|
// for /addurl
|
|
/*
|
|
m->m_title = "url to add";
|
|
m->m_desc = "Used by add url page.";
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_cgi = "u";
|
|
m->m_off = (char *)&si.m_url2 - y;
|
|
m++;
|
|
*/
|
|
|
|
// Process.cpp calls Msg28::massConfig with &haspower=[0|1] to
|
|
// indicate power loss or coming back on from a power loss
|
|
m->m_title = "power on status notification";
|
|
m->m_desc = "Indicates power is back on.";
|
|
m->m_cgi = "poweron";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandPowerOnNotice;
|
|
m->m_cast = 0;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "power off status notification";
|
|
m->m_desc = "Indicates power is off.";
|
|
m->m_cgi = "poweroff";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandPowerOffNotice;
|
|
m->m_cast = 0;
|
|
m->m_page = PAGE_NONE;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
//////////////
|
|
// END PAGE_GET
|
|
//////////////
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// MASTER CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
m->m_title = "spidering enabled";
|
|
m->m_desc = "Controls all spidering for all collections";
|
|
m->m_cgi = "se";
|
|
m->m_off = (char *)&g_conf.m_spideringEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
//m->m_cast = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "injections enabled";
|
|
m->m_desc = "Controls injecting for all collections";
|
|
m->m_cgi = "injen";
|
|
m->m_off = (char *)&g_conf.m_injectionsEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "querying enabled";
|
|
m->m_desc = "Controls querying for all collections";
|
|
m->m_cgi = "qryen";
|
|
m->m_off = (char *)&g_conf.m_queryingEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "return results even if a shard is down";
|
|
m->m_desc = "If you turn this off then Gigablast will return "
|
|
"an error message if a shard was down and did not return "
|
|
"results for a query. The XML and JSON feed let's you know "
|
|
"when a shard is down and will give you the results back "
|
|
"any way, but if you would rather have just and error message "
|
|
"and no results, then set then set this to 'NO'.";
|
|
m->m_cgi = "rra";
|
|
m->m_off = (char *)&g_conf.m_returnResultsAnyway - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max mem";
|
|
m->m_desc = "Mem available to this process. May be exceeded due "
|
|
"to fragmentation.";
|
|
m->m_cgi = "maxmem";
|
|
m->m_off = (char *)&g_conf.m_maxMem - g;
|
|
m->m_def = "8000000000";
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_page = PAGE_MASTER; // PAGE_NONE;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
//m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
|
|
m->m_title = "max total spiders";
|
|
m->m_desc = "What is the maximum number of web "
|
|
"pages the spider is allowed to download "
|
|
"simultaneously for ALL collections PER HOST? Caution: "
|
|
"raising this too high could result in some Out of Memory "
|
|
"(OOM) errors. The hard limit is currently 300. Each "
|
|
"collection has its own limit in the <i>spider controls</i> "
|
|
"that you may have to increase as well.";
|
|
m->m_cgi = "mtsp";
|
|
m->m_off = (char *)&g_conf.m_maxTotalSpiders - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "web spidering enabled";
|
|
m->m_desc = "Spiders events on web";
|
|
m->m_cgi = "wse";
|
|
m->m_off = (char *)&g_conf.m_webSpideringEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "add url enabled";
|
|
m->m_desc = "Can people use the add url interface to add urls "
|
|
"to the index?";
|
|
m->m_cgi = "ae";
|
|
m->m_off = (char *)&g_conf.m_addUrlEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
//m->m_cast = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "use collection passwords";
|
|
m->m_desc = "Should collections have individual password settings "
|
|
"so different users can administrer different collections? "
|
|
"If not the only the master passwords and IPs will be able "
|
|
"to administer any collection.";
|
|
m->m_cgi = "ucp";
|
|
m->m_off = (char *)&g_conf.m_useCollectionPasswords - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "allow cloud users";
|
|
m->m_desc = "Can guest users create and administer "
|
|
"a collection? Limit: 1 "
|
|
"collection per IP address. This is mainly for doing "
|
|
"demos on the gigablast.com domain.";
|
|
m->m_cgi = "acu";
|
|
m->m_off = (char *)&g_conf.m_allowCloudUsers - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "auto save frequency";
|
|
m->m_desc = "Save data in memory to disk after this many minutes "
|
|
"have passed without the data having been dumped or saved "
|
|
"to disk. Use 0 to disable.";
|
|
m->m_cgi = "asf";
|
|
m->m_off = (char *)&g_conf.m_autoSaveFrequency - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5";
|
|
m->m_units = "mins";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max http sockets";
|
|
m->m_desc = "Maximum sockets available to serve incoming HTTP "
|
|
"requests. Too many outstanding requests will increase "
|
|
"query latency. Excess requests will simply have their "
|
|
"sockets closed.";
|
|
m->m_cgi = "ms";
|
|
m->m_off = (char *)&g_conf.m_httpMaxSockets - g;
|
|
m->m_type = TYPE_LONG;
|
|
// up this some, am seeing sockets closed because of using gb
|
|
// as a cache...
|
|
m->m_def = "300";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max https sockets";
|
|
m->m_desc = "Maximum sockets available to serve incoming HTTPS "
|
|
"requests. Like max http sockets, but for secure sockets.";
|
|
m->m_cgi = "mss";
|
|
m->m_off = (char *)&g_conf.m_httpsMaxSockets - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "spider user agent";
|
|
m->m_desc = "Identification seen by web servers when "
|
|
"the Gigablast spider downloads their web pages. "
|
|
"It is polite to insert a contact email address here so "
|
|
"webmasters that experience problems from the Gigablast "
|
|
"spider have somewhere to vent.";
|
|
m->m_cgi = "sua";
|
|
m->m_off = (char *)&g_conf.m_spiderUserAgent - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = USERAGENTMAXSIZE;
|
|
m->m_def = "GigablastOpenSource/1.0";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "use temporary cluster";
|
|
m->m_desc = "Used by proxy to point to a temporary cluster while the "
|
|
"original cluster is updated with a new binary. The "
|
|
"temporary cluster is the same as the original cluster but "
|
|
"the ports are all incremented by one from what is in "
|
|
"the hosts.conf. This should ONLY be used for the proxy.";
|
|
m->m_cgi = "aotp";
|
|
m->m_off = (char *)&g_conf.m_useTmpCluster - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "url injection enabled";
|
|
m->m_desc = "If enabled you can directly inject URLs into the index.";
|
|
m->m_cgi = "ie";
|
|
m->m_off = (char *)&g_conf.m_injectionEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "1";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "init QA tests";
|
|
m->m_desc = "If initiated gb performs some integrity tests "
|
|
"to ensure injecting, spidering and searching works "
|
|
"properly. Uses ./test/ subdirectory. Injects "
|
|
"urls in ./test/inject.txt. Spiders urls "
|
|
"in ./test/spider.txt. "
|
|
"Each of those two files is essentially a simple format of "
|
|
"a url followed by the http reply received from the server "
|
|
"for that url. "
|
|
// TODO: generate these files
|
|
;
|
|
m->m_cgi = "qasptei";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandSpiderTestInit;
|
|
m->m_def = "1";
|
|
m->m_cast = 1;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "init parser test run";
|
|
m->m_desc = "If enabled gb injects the urls in the "
|
|
"./test-parser/urls.txt "
|
|
"file and outputs ./test-parser/qa.html";
|
|
m->m_cgi = "qaptei";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandParserTestInit;
|
|
m->m_def = "1";
|
|
m->m_cast = 1;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "init spider test run";
|
|
m->m_desc = "If enabled gb injects the urls in "
|
|
"./test-spider/spider.txt "
|
|
"and spiders links.";
|
|
m->m_cgi = "qasptei";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandSpiderTestInit;
|
|
m->m_def = "1";
|
|
m->m_cast = 1;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "continue spider test run";
|
|
m->m_desc = "Resumes the test.";
|
|
m->m_cgi = "qaspter";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandSpiderTestCont;
|
|
m->m_def = "1";
|
|
m->m_cast = 1;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "do docid range splitting";
|
|
m->m_desc = "Split msg39 docids into ranges to save mem?";
|
|
m->m_cgi = "ddrs";
|
|
m->m_off = (char *)&g_conf.m_doDocIdRangeSplitting - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "qa search test enabled";
|
|
m->m_desc = "If enabled gb does the search queries in "
|
|
"./test-search/queries.txt and compares to the last run and "
|
|
"outputs the diffs for inspection and validation.";
|
|
m->m_cgi = "qasste";
|
|
m->m_off = (char *)&g_conf.m_testSearchEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
//m->m_cast = 0;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "just save";
|
|
m->m_desc = "Copies the data in memory to disk for just this host. "
|
|
"Does Not exit.";
|
|
m->m_cgi = "js";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandJustSave;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_cast = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "save";
|
|
m->m_desc = "Saves in-memory data for ALL hosts. Does Not exit.";
|
|
m->m_cgi = "js";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandJustSave;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "all spiders on";
|
|
m->m_desc = "Enable spidering on all hosts";
|
|
m->m_cgi = "ase";
|
|
m->m_def = "1";
|
|
m->m_off = (char *)&g_conf.m_spideringEnabled - g;
|
|
m->m_type = TYPE_BOOL2; // no yes or no, just a link
|
|
m++;
|
|
|
|
m->m_title = "all spiders off";
|
|
m->m_desc = "Disable spidering on all hosts";
|
|
m->m_cgi = "ase";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&g_conf.m_spideringEnabled - g;
|
|
m->m_type = TYPE_BOOL2; // no yes or no, just a link
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "save & exit";
|
|
m->m_desc = "Copies the data in memory to disk for just this host "
|
|
"and then shuts down the gb process.";
|
|
m->m_cgi = "save";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandSaveAndExit;
|
|
m->m_cast = 0;
|
|
m++;
|
|
|
|
m->m_title = "urgent save & exit";
|
|
m->m_desc = "Copies the data in memory to disk for just this host "
|
|
"and then shuts down the gb process.";
|
|
m->m_cgi = "usave";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandUrgentSaveAndExit;
|
|
m->m_cast = 0;
|
|
m->m_priv = 4;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "save & exit";
|
|
m->m_desc = "Saves the data and exits for ALL hosts.";
|
|
m->m_cgi = "save";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandSaveAndExit;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "rebalance shards";
|
|
m->m_desc = "Tell all hosts to scan all records in all databases, "
|
|
"and move "
|
|
"records to the shard they belong to. You only need to run "
|
|
"this if Gigablast tells you to, when you are changing "
|
|
"hosts.conf to add or remove more nodes/hosts.";
|
|
m->m_cgi = "rebalance";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandRebalance;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dump to disk";
|
|
m->m_desc = "Flushes all records in memory to the disk on all hosts.";
|
|
m->m_cgi = "dump";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandDiskDump;
|
|
m->m_cast = 1;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "force reclaim";
|
|
m->m_desc = "Force reclaim of doledb mem.";
|
|
m->m_cgi = "forceit";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandForceIt;
|
|
m->m_cast = 1;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
|
|
m->m_title = "tight merge posdb";
|
|
m->m_desc = "Merges all outstanding posdb (index) files.";
|
|
m->m_cgi = "pmerge";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandMergePosdb;
|
|
m->m_cast = 1;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
//m->m_title = "tight merge sectiondb";
|
|
//m->m_desc = "Merges all outstanding sectiondb files.";
|
|
//m->m_cgi = "smerge";
|
|
//m->m_type = TYPE_CMD;
|
|
//m->m_func = CommandMergeSectiondb;
|
|
//m->m_cast = 1;
|
|
//m++;
|
|
|
|
m->m_title = "tight merge titledb";
|
|
m->m_desc = "Merges all outstanding titledb (web page cache) files.";
|
|
m->m_cgi = "tmerge";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandMergeTitledb;
|
|
m->m_cast = 1;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "tight merge spiderdb";
|
|
m->m_desc = "Merges all outstanding spiderdb files.";
|
|
m->m_cgi = "spmerge";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandMergeSpiderdb;
|
|
m->m_cast = 1;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "clear kernel error message";
|
|
m->m_desc = "Clears the kernel error message. You must do this "
|
|
"to stop getting email alerts for a kernel ring buffer "
|
|
"error alert.";
|
|
m->m_cgi = "clrkrnerr";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandClearKernelError;
|
|
m->m_cast = 1;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "disk page cache off";
|
|
m->m_desc = "Disable all disk page caches to save mem for "
|
|
"tmp cluster. Run "
|
|
"gb cacheoff to do for all hosts.";
|
|
m->m_cgi = "dpco";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandDiskPageCacheOff;
|
|
m->m_cast = 1;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
//m->m_title = "http server enabled";
|
|
//m->m_desc = "Disable this if you do not want anyone hitting your "
|
|
// "http server. Admin and local IPs are still permitted, "
|
|
// "however.";
|
|
//m->m_cgi = "hse";
|
|
//m->m_off = (char *)&g_conf.m_httpServerEnabled - g;
|
|
//m->m_type = TYPE_BOOL;
|
|
//m->m_def = "1";
|
|
//m++;
|
|
|
|
/*
|
|
m->m_title = "ad feed enabled";
|
|
m->m_desc = "Serves ads unless pure=1 is in cgi parms.";
|
|
m->m_cgi = "afe";
|
|
m->m_off = (char *)&g_conf.m_adFeedEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_scgi = "ads";
|
|
m->m_soff = (char *)&si.m_adFeedEnabled - y;
|
|
m->m_sparm = 1;
|
|
m->m_priv = 2;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "do stripe balancing";
|
|
m->m_desc = "Stripe #n contains twin #n from each group. Doing "
|
|
"stripe balancing helps prevent too many query requests "
|
|
"coming into one host. This parm is only for the proxy. "
|
|
"Stripe balancing is done by default unless the parm is "
|
|
"disabled on the proxy in which case it appends a "
|
|
"&dsb=0 to the query url it sends to the host. The proxy "
|
|
"alternates to which host it forwards the incoming query "
|
|
"based on the stripe. It takes the number of query terms in "
|
|
"the query into account to make a more even balance.";
|
|
m->m_cgi = "dsb";
|
|
m->m_off = (char *)&g_conf.m_doStripeBalancing - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
//m->m_scgi = "dsb";
|
|
//m->m_soff = (char *)&si.m_doStripeBalancing - y;
|
|
//m->m_sparm = 1;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "is live cluster";
|
|
m->m_desc = "Is this cluster part of a live production cluster? "
|
|
"If this is true we make sure that elvtune is being "
|
|
"set properly for best performance, otherwise, gb will "
|
|
"not startup.";
|
|
m->m_cgi = "live";
|
|
m->m_off = (char *)&g_conf.m_isLive - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "is BuzzLogic";
|
|
m->m_desc = "Is this a BuzzLogic cluster?";
|
|
m->m_cgi = "isbuzz";
|
|
m->m_off = (char *)&g_conf.m_isBuzzLogic - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
// we use wikipedia cluster for quick categorization
|
|
m->m_title = "is wikipedia cluster";
|
|
m->m_desc = "Is this cluster just used for indexing wikipedia pages?";
|
|
m->m_cgi = "iswiki";
|
|
m->m_off = (char *)&g_conf.m_isWikipedia - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "ask for gzipped docs when downloading";
|
|
m->m_desc = "If this is true, gb will send Accept-Encoding: gzip "
|
|
"to web servers when doing http downloads. It does have "
|
|
"a tendency to cause out-of-memory errors when you enable "
|
|
"this, so until that is fixed better, it's probably a good "
|
|
"idea to leave this disabled.";
|
|
m->m_cgi = "afgdwd";
|
|
m->m_off = (char *)&g_conf.m_gzipDownloads - g;
|
|
m->m_type = TYPE_BOOL;
|
|
// keep this default off because it seems some pages are huge
|
|
// uncomressed causing OOM errors and possibly corrupting stuff?
|
|
// not sure exactly, but i don't like going OOM. so maybe until
|
|
// that is fixed leave this off.
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "search results cache max age";
|
|
m->m_desc = "How many seconds should we cache a search results "
|
|
"page for?";
|
|
m->m_cgi = "srcma";
|
|
m->m_off = (char *)&g_conf.m_searchResultsMaxCacheAge - g;
|
|
m->m_def = "10800"; // 3 hrs
|
|
m->m_type = TYPE_LONG;
|
|
m->m_units = "seconds";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "autoban IPs which violate the queries per day quotas";
|
|
m->m_desc = "Keep track of ips which do queries, disallow "
|
|
"non-customers from hitting us too hard.";
|
|
m->m_cgi = "ab";
|
|
m->m_off = (char *)&g_conf.m_doAutoBan - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
if ( g_isYippy ) {
|
|
m->m_title = "Max outstanding search requests out for yippy";
|
|
m->m_desc = "Max outstanding search requests out for yippy";
|
|
m->m_cgi = "ymo";
|
|
m->m_off = (char *)&g_conf.m_maxYippyOut - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "150";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
}
|
|
|
|
m->m_title = "free queries per day ";
|
|
m->m_desc = "Non-customers get this many queries per day before"
|
|
"being autobanned";
|
|
m->m_cgi = "nfqpd";
|
|
m->m_off = (char *)&g_conf.m_numFreeQueriesPerDay - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1024";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "free queries per minute ";
|
|
m->m_desc = "Non-customers get this many queries per minute before"
|
|
"being autobanned";
|
|
m->m_cgi = "nfqpm";
|
|
m->m_off = (char *)&g_conf.m_numFreeQueriesPerMinute - g;
|
|
m->m_type = TYPE_CHAR;
|
|
m->m_def = "30";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max heartbeat delay in milliseconds";
|
|
m->m_desc = "If a heartbeat is delayed this many milliseconds "
|
|
"dump a core so we can see where the CPU was. "
|
|
"Logs 'db: missed heartbeat by %" INT64 " ms'. "
|
|
"Use 0 or less to disable.";
|
|
m->m_cgi = "mhdms";
|
|
m->m_off = (char *)&g_conf.m_maxHeartbeatDelay - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_CLONE; // PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max delay before logging a callback or handler";
|
|
m->m_desc = "If a call to a message callback or message handler "
|
|
"in the udp server takes more than this many milliseconds, "
|
|
"then log it. "
|
|
"Logs 'udp: Took %" INT64 " ms to call callback for msgType="
|
|
"0x%hhx niceness=%" INT32 "'. "
|
|
"Use -1 or less to disable the logging.";
|
|
m->m_cgi = "mdch";
|
|
m->m_off = (char *)&g_conf.m_maxCallbackDelay - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "-1";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "sendmail IP";
|
|
m->m_desc = "We send crawlbot notification emails to this sendmail "
|
|
"server which forwards them to the specified email address.";
|
|
m->m_cgi = "smip";
|
|
m->m_off = (char *)&g_conf.m_sendmailIp - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "10.5.54.47";
|
|
m->m_size = MAX_MX_LEN;
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send email alerts";
|
|
m->m_desc = "Sends emails to admin if a host goes down.";
|
|
m->m_cgi = "sea";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlerts - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "delay non critical email alerts";
|
|
m->m_desc = "Do not send email alerts about dead hosts to "
|
|
"anyone except sysadmin@gigablast.com between the times "
|
|
"given below unless all the twins of the dead host are "
|
|
"also dead. Instead, wait till after if the host "
|
|
"is still dead. ";
|
|
m->m_cgi = "dnca";
|
|
m->m_off = (char *)&g_conf.m_delayNonCriticalEmailAlerts - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
//m->m_title = "send email alerts to matt at tmobile 450-3518";
|
|
//m->m_desc = "Sends to cellphone.";
|
|
//m->m_cgi = "seatmt";
|
|
//m->m_off = (char *)&g_conf.m_sendEmailAlertsToMattTmobile - g;
|
|
//m->m_type = TYPE_BOOL;
|
|
//m->m_def = "1";
|
|
//m->m_priv = 2;
|
|
//m->m_group = 0;
|
|
//m++;
|
|
|
|
//m->m_title = "send email alerts to matt at alltel 362-6809";
|
|
/*
|
|
m->m_title = "send email alerts to matt at alltel 450-3518";
|
|
m->m_desc = "Sends to cellphone.";
|
|
m->m_cgi = "seatmv";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToMattAlltell - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "send email alerts to javier";
|
|
m->m_desc = "Sends to cellphone.";
|
|
m->m_cgi = "seatj";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToJavier - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
// m->m_title = "send email alerts to melissa";
|
|
// m->m_desc = "Sends to cell phone.";
|
|
// m->m_cgi = "seatme";
|
|
// m->m_off = (char *)&g_conf.m_sendEmailAlertsToMelissa - g;
|
|
// m->m_type = TYPE_BOOL;
|
|
// m->m_def = "0";
|
|
// m->m_priv = 2;
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
/*
|
|
m->m_title = "send email alerts to partap";
|
|
m->m_desc = "Sends to cell phone.";
|
|
m->m_cgi = "seatp";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToPartap - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
// m->m_title = "send email alerts to cinco";
|
|
// m->m_desc = "Sends to cell phone.";
|
|
// m->m_cgi = "seatc";
|
|
// m->m_off = (char *)&g_conf.m_sendEmailAlertsToCinco - g;
|
|
// m->m_type = TYPE_BOOL;
|
|
// m->m_def = "0";
|
|
// m->m_priv = 2;
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
/* m->m_title = "maximum hops from parent page";
|
|
m->m_desc = "Only index pages that are within a particular number "
|
|
"of hops from the parent page given in Page Add Url. -1 means "
|
|
"that max hops is infinite.";
|
|
m->m_cgi = "mnh";
|
|
m->m_off = (char *)&cr.m_maxNumHops - x;
|
|
m->m_type = TYPE_CHAR2;
|
|
m->m_def = "-1";
|
|
m->m_group = 0;
|
|
m++;*/
|
|
|
|
m->m_title = "cluster name";
|
|
m->m_desc = "Email alerts will include the cluster name";
|
|
m->m_cgi = "cn";
|
|
m->m_off = (char *)&g_conf.m_clusterName - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 32;
|
|
m->m_def = "unspecified";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
|
|
m->m_title = "spider round start time";
|
|
m->m_desc = "When the next spider round starts. If you force this to "
|
|
"zero it sets it to the current time. That way you can "
|
|
"respider all the urls that were already spidered, and urls "
|
|
"that were not yet spidered in the round will still be "
|
|
"spidered.";
|
|
m->m_cgi = "spiderRoundStart";
|
|
m->m_size = 0;
|
|
m->m_off = (char *)&cr.m_spiderRoundStartTime - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_HIDDEN | PF_REBUILDURLFILTERS ;
|
|
m++;
|
|
|
|
// DIFFBOT:
|
|
// this http parm actually ads the "forceround" parm to the parmlist
|
|
// below with the appropriate args.
|
|
m->m_title = "manually restart a spider round";
|
|
m->m_desc = "Updates round number and resets local processed "
|
|
"and crawled counts to 0.";
|
|
m->m_cgi = "roundStart";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = NULL;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_HIDDEN;
|
|
m++;
|
|
|
|
// DIFFBOT:
|
|
// . this is sent to each shard by issuing a "&roundStart=1" cmd
|
|
// . similar to the "addcoll" cmd we add args to it and make it
|
|
// the "forceround" cmd parm and add THAT to the parmlist.
|
|
// so "roundStart=1" is really an alias for us.
|
|
m->m_title = "manually restart a spider round on shard";
|
|
m->m_desc = "Updates round number and resets local processed "
|
|
"and crawled counts to 0.";
|
|
m->m_cgi = "forceround";
|
|
//m->m_off = (char *)&cr.m_spiderRoundStartTime - x;
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandForceNextSpiderRound;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_HIDDEN | PF_REBUILDURLFILTERS ;
|
|
m++;
|
|
|
|
m->m_title = "spider round num";
|
|
m->m_desc = "The spider round number.";
|
|
m->m_cgi = "spiderRoundNum";
|
|
m->m_off = (char *)&cr.m_spiderRoundNum - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_HIDDEN ;
|
|
m++;
|
|
|
|
m->m_title = "send email alerts to sysadmin";
|
|
m->m_desc = "Sends to sysadmin@gigablast.com.";
|
|
m->m_cgi = "seatsa";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToSysadmin - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_priv = 2;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "send email alerts to zak";
|
|
m->m_desc = "Sends to zak@gigablast.com.";
|
|
m->m_cgi = "seatz";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToZak - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "send email alerts to sabino";
|
|
m->m_desc = "Sends to cell phone.";
|
|
m->m_cgi = "seatms";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToSabino - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "dead host timeout";
|
|
m->m_desc = "Consider a host in the Gigablast network to be dead if "
|
|
"it does not respond to successive pings for this number of "
|
|
"seconds. Gigablast does not send requests to dead hosts. "
|
|
"Outstanding requests may be re-routed to a twin.";
|
|
m->m_cgi = "dht";
|
|
m->m_off = (char *)&g_conf.m_deadHostTimeout - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "4000";
|
|
m->m_units = "milliseconds";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send email timeout";
|
|
m->m_desc = "Send an email after a host has not responded to "
|
|
"successive pings for this many milliseconds.";
|
|
m->m_cgi = "set";
|
|
m->m_off = (char *)&g_conf.m_sendEmailTimeout - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "62000";
|
|
m->m_priv = 2;
|
|
m->m_units = "milliseconds";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "ping spacer";
|
|
m->m_desc = "Wait this many milliseconds before pinging the next "
|
|
"host. Each host pings all other hosts in the network.";
|
|
m->m_cgi = "ps";
|
|
m->m_off = (char *)&g_conf.m_pingSpacer - g;
|
|
m->m_min = 50; // i've seen values of 0 hammer the cpu
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_units = "milliseconds";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
//m->m_title = "max query time";
|
|
//m->m_desc = "When computing the average query latency "
|
|
// "truncate query latency times to this so that "
|
|
// "a single insanely int32_t query latency time does "
|
|
// "not trigger the alarm. This is in seconds.";
|
|
//m->m_cgi = "mqlr";
|
|
//m->m_off = (char *)&g_conf.m_maxQueryTime - g;
|
|
//m->m_type = TYPE_FLOAT;
|
|
//m->m_def = "30.0";
|
|
//m->m_priv = 2;
|
|
//m->m_group = 0;
|
|
//m++;
|
|
|
|
m->m_title = "query success rate threshold";
|
|
m->m_desc = "Send email alerts when query success rate goes below "
|
|
"this threshold. (percent rate between 0.0 and 1.0)";
|
|
m->m_cgi = "qsrt";
|
|
m->m_off = (char *)&g_conf.m_querySuccessThreshold - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.850000";
|
|
m->m_priv = 2;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "average query latency threshold";
|
|
m->m_desc = "Send email alerts when average query latency goes above "
|
|
"this threshold. (in seconds)";
|
|
m->m_cgi = "aqpst";
|
|
m->m_off = (char *)&g_conf.m_avgQueryTimeThreshold - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
// a titlerec fetch times out after 2 seconds and is re-routed
|
|
m->m_def = "2.000000";
|
|
m->m_priv = 2;
|
|
m->m_units = "seconds";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "number of query times in average";
|
|
m->m_desc = "Record this number of query times before calculating "
|
|
"average query latency.";
|
|
m->m_cgi = "nqt";
|
|
m->m_off = (char *)&g_conf.m_numQueryTimes - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "300";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "max corrupt index lists";
|
|
m->m_desc = "If we reach this many corrupt index lists, send "
|
|
"an admin email. Set to -1 to disable.";
|
|
m->m_cgi = "mcil";
|
|
m->m_off = (char *)&g_conf.m_maxCorruptLists - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max hard drive temperature";
|
|
m->m_desc = "At what temperature in Celsius should we send "
|
|
"an email alert if a hard drive reaches it?";
|
|
m->m_cgi = "mhdt";
|
|
m->m_off = (char *)&g_conf.m_maxHardDriveTemp - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "45";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "delay emails after";
|
|
m->m_desc = "If delay non critical email alerts is on, don't send "
|
|
"emails after this time. Time is hh:mm. Time is take from "
|
|
"host #0's system clock in UTC.";
|
|
m->m_cgi = "dea";
|
|
m->m_off = (char *)&g_conf.m_delayEmailsAfter - g;
|
|
m->m_type = TYPE_TIME; // time format -- very special
|
|
m->m_def = "00:00";
|
|
m->m_priv = 2;
|
|
m++;
|
|
|
|
m->m_title = "delay emails before";
|
|
m->m_desc = "If delay non critical email alerts is on, don't send "
|
|
"emails before this time. Time is hh:mm Time is take from "
|
|
"host #0's system clock in UTC.";
|
|
m->m_cgi = "deb";
|
|
m->m_off = (char *)&g_conf.m_delayEmailsBefore - g;
|
|
m->m_type = TYPE_TIME; // time format -- very special
|
|
m->m_def = "00:00";
|
|
m->m_priv = 2;
|
|
m++;
|
|
*/
|
|
|
|
|
|
/*
|
|
Disable this until it works.
|
|
m->m_title = "use merge token";
|
|
m->m_desc = "If used, prevents twins, or hosts on the same ide "
|
|
"channel, from merging simultaneously.";
|
|
m->m_cgi = "umt";
|
|
m->m_off = (char *)&g_conf.m_useMergeToken - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "error string 1";
|
|
m->m_desc = "Look for this string in the kernel buffer for sending "
|
|
"email alert. Useful for detecting some strange "
|
|
"hard drive failures that really slow performance.";
|
|
m->m_cgi = "errstrone";
|
|
m->m_off = (char *)&g_conf.m_errstr1 - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "I/O error";
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "error string 2";
|
|
m->m_desc = "Look for this string in the kernel buffer for sending "
|
|
"email alert. Useful for detecting some strange "
|
|
"hard drive failures that really slow performance.";
|
|
m->m_cgi = "errstrtwo";
|
|
m->m_off = (char *)&g_conf.m_errstr2 - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "";
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "error string 3";
|
|
m->m_desc = "Look for this string in the kernel buffer for sending "
|
|
"email alert. Useful for detecting some strange "
|
|
"hard drive failures that really slow performance.";
|
|
m->m_cgi = "errstrthree";
|
|
m->m_off = (char *)&g_conf.m_errstr3 - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "";
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send email alerts to email 1";
|
|
m->m_desc = "Sends to email address 1 through email server 1.";
|
|
m->m_cgi = "seatone";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToEmail1 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send parm change email alerts to email 1";
|
|
m->m_desc = "Sends to email address 1 through email server 1 if "
|
|
"any parm is changed.";
|
|
m->m_cgi = "seatonep";
|
|
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail1 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "email server 1";
|
|
m->m_desc = "Connects to this IP or hostname "
|
|
"directly when sending email 1. "
|
|
"Use <i>apt-get install sendmail</i> to install sendmail "
|
|
"on that IP or hostname. Add <i>From:10.5 RELAY</i> to "
|
|
"/etc/mail/access to allow sendmail to forward email it "
|
|
"receives from gigablast if gigablast hosts are on the "
|
|
"10.5.*.* IPs. Then run <i>/etc/init.d/sendmail restart</i> "
|
|
"as root to pick up those changes so sendmail will forward "
|
|
"Gigablast's email to the email address you give below.";
|
|
m->m_cgi = "esrvone";
|
|
m->m_off = (char *)&g_conf.m_email1MX - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "127.0.0.1";
|
|
m->m_size = MAX_MX_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "email address 1";
|
|
m->m_desc = "Sends to this address when sending email 1 ";
|
|
m->m_cgi = "eaddrone";
|
|
m->m_off = (char *)&g_conf.m_email1Addr - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "4081234567@vtext.com";
|
|
m->m_size = MAX_EMAIL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "from email address 1";
|
|
m->m_desc = "The from field when sending email 1 ";
|
|
m->m_cgi = "efaddrone";
|
|
m->m_off = (char *)&g_conf.m_email1From - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "sysadmin@mydomain.com";
|
|
m->m_size = MAX_EMAIL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send email alerts to email 2";
|
|
m->m_desc = "Sends to email address 2 through email server 2.";
|
|
m->m_cgi = "seattwo";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToEmail2 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send parm change email alerts to email 2";
|
|
m->m_desc = "Sends to email address 2 through email server 2 if "
|
|
"any parm is changed.";
|
|
m->m_cgi = "seattwop";
|
|
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail2 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "email server 2";
|
|
m->m_desc = "Connects to this server directly when sending email 2 ";
|
|
m->m_cgi = "esrvtwo";
|
|
m->m_off = (char *)&g_conf.m_email2MX - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "mail.mydomain.com";
|
|
m->m_size = MAX_MX_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "email address 2";
|
|
m->m_desc = "Sends to this address when sending email 2 ";
|
|
m->m_cgi = "eaddrtwo";
|
|
m->m_off = (char *)&g_conf.m_email2Addr - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "";
|
|
m->m_size = MAX_EMAIL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "from email address 2";
|
|
m->m_desc = "The from field when sending email 2 ";
|
|
m->m_cgi = "efaddrtwo";
|
|
m->m_off = (char *)&g_conf.m_email2From - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "sysadmin@mydomain.com";
|
|
m->m_size = MAX_EMAIL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send email alerts to email 3";
|
|
m->m_desc = "Sends to email address 3 through email server 3.";
|
|
m->m_cgi = "seatthree";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToEmail3 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send parm change email alerts to email 3";
|
|
m->m_desc = "Sends to email address 3 through email server 3 if "
|
|
"any parm is changed.";
|
|
m->m_cgi = "seatthreep";
|
|
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail3 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "email server 3";
|
|
m->m_desc = "Connects to this server directly when sending email 3 ";
|
|
m->m_cgi = "esrvthree";
|
|
m->m_off = (char *)&g_conf.m_email3MX - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "mail.mydomain.com";
|
|
m->m_size = MAX_MX_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "email address 3";
|
|
m->m_desc = "Sends to this address when sending email 3 ";
|
|
m->m_cgi = "eaddrthree";
|
|
m->m_off = (char *)&g_conf.m_email3Addr - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "";
|
|
m->m_size = MAX_EMAIL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "from email address 3";
|
|
m->m_desc = "The from field when sending email 3 ";
|
|
m->m_cgi = "efaddrthree";
|
|
m->m_off = (char *)&g_conf.m_email3From - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "sysadmin@mydomain.com";
|
|
m->m_size = MAX_EMAIL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "send email alerts to email 4";
|
|
m->m_desc = "Sends to email address 4 through email server 4.";
|
|
m->m_cgi = "seatfour";
|
|
m->m_off = (char *)&g_conf.m_sendEmailAlertsToEmail4 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send parm change email alerts to email 4";
|
|
m->m_desc = "Sends to email address 4 through email server 4 if "
|
|
"any parm is changed.";
|
|
m->m_cgi = "seatfourp";
|
|
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail4 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "email server 4";
|
|
m->m_desc = "Connects to this server directly when sending email 4 ";
|
|
m->m_cgi = "esrvfour";
|
|
m->m_off = (char *)&g_conf.m_email4MX - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "mail.mydomain.com";
|
|
m->m_size = MAX_MX_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "email address 4";
|
|
m->m_desc = "Sends to this address when sending email 4 ";
|
|
m->m_cgi = "eaddrfour";
|
|
m->m_off = (char *)&g_conf.m_email4Addr - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "";
|
|
m->m_size = MAX_EMAIL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "from email address 4";
|
|
m->m_desc = "The from field when sending email 4 ";
|
|
m->m_cgi = "efaddrfour";
|
|
m->m_off = (char *)&g_conf.m_email4From - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_def = "sysadmin@mydomain.com";
|
|
m->m_size = MAX_EMAIL_LEN;
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "prefer local reads";
|
|
m->m_desc = "If you have scsi drives or a slow network, say yes here "
|
|
"to minimize data fetches across the network.";
|
|
m->m_cgi = "plr";
|
|
m->m_off = (char *)&g_conf.m_preferLocalReads - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "use biased tfndb";
|
|
m->m_desc = "Should we always send titledb record lookup requests "
|
|
"to a particular host in order to increase tfndb page cache "
|
|
"hits? This bypasses load balancing and may result in "
|
|
"slower hosts being more of a bottleneck. Keep this disabled "
|
|
"unless you notice tfndb disk seeks slowing things down.";
|
|
m->m_cgi = "ubu";
|
|
m->m_off = (char *)&g_conf.m_useBiasedTfndb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
// this is ifdef'd out in Msg3.cpp for performance reasons,
|
|
// so do it here, too
|
|
#ifdef GBSANITYCHECK
|
|
m->m_title = "max corrupted read retries";
|
|
m->m_desc = "How many times to retry disk reads that had corrupted "
|
|
"data before requesting the list from a twin, and, if that "
|
|
"fails, removing the bad data.";
|
|
m->m_cgi = "crr";
|
|
m->m_off = (char *)&g_conf.m_corruptRetries - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
#endif
|
|
|
|
m->m_title = "do incremental updating";
|
|
m->m_desc = "When reindexing a document, do not re-add data "
|
|
"that should already be in index or clusterdb "
|
|
"since the last time the document was indexed. Otherwise, "
|
|
"re-add the data regardless.";
|
|
m->m_cgi = "oic";
|
|
//m->m_off = (char *)&g_conf.m_onlyAddUnchangedTermIds - g;
|
|
m->m_off = (char *)&g_conf.m_doIncrementalUpdating - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// you can really screw up the index if this is false, so
|
|
// comment it out for now
|
|
/*
|
|
m->m_title = "index deletes";
|
|
m->m_desc = "Should we allow indexdb recs to be deleted? This is "
|
|
"always true, except in very rare indexdb rebuilds.";
|
|
m->m_cgi = "id";
|
|
m->m_off = (char *)&g_conf.m_indexDeletes - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "use etc hosts";
|
|
m->m_desc = "Use /etc/hosts file to resolve hostnames? the "
|
|
"/etc/host file is reloaded every minute, so if you make "
|
|
"a change to it you might have to wait one minute for the "
|
|
"change to take affect.";
|
|
m->m_cgi = "ueh";
|
|
m->m_off = (char *)&g_conf.m_useEtcHosts - g;
|
|
m->m_def = "0";
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "twins are split";
|
|
m->m_desc = "If enabled, Gigablast assumes the first half of "
|
|
"machines in hosts.conf "
|
|
"are on a different network switch than the second half, "
|
|
"and minimizes transmits between the switches.";
|
|
m->m_cgi = "stw";
|
|
m->m_off = (char *)&g_conf.m_splitTwins - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "do out of memory testing";
|
|
m->m_desc = "When enabled Gigablast will randomly fail at "
|
|
"allocating memory. Used for testing stability.";
|
|
m->m_cgi = "dot";
|
|
m->m_off = (char *)&g_conf.m_testMem - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "do consistency testing";
|
|
m->m_desc = "When enabled Gigablast will make sure it reparses "
|
|
"the document exactly the same way. It does this every "
|
|
"1000th document anyway, but enabling this makes it do it "
|
|
"for every document.";
|
|
m->m_cgi = "dct";
|
|
m->m_off = (char *)&g_conf.m_doConsistencyTesting - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "use shotgun";
|
|
m->m_desc = "If enabled, all servers must have two gigabit "
|
|
"ethernet ports hooked up and Gigablast will round robin "
|
|
"packets between both ethernet ports when sending to another "
|
|
"host. Can speed up network transmissions as much as 2x.";
|
|
m->m_cgi = "usht";
|
|
m->m_off = (char *)&g_conf.m_useShotgun - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "use quickpoll";
|
|
m->m_desc = "If enabled, Gigablast will use quickpoll. Significantly "
|
|
"improves performance. Only turn this off for testing.";
|
|
m->m_cgi = "uqp";
|
|
m->m_off = (char *)&g_conf.m_useQuickpoll - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// m->m_title = "quickpoll core on error";
|
|
// m->m_desc = "If enabled, quickpoll will terminate the process and "
|
|
// "generate a core file when callbacks are called with the "
|
|
// "wrong niceness.";
|
|
// m->m_cgi = "qpoe";
|
|
// m->m_off = (char *)&g_conf.m_quickpollCoreOnError - g;
|
|
// m->m_type = TYPE_BOOL;
|
|
// m->m_def = "1";
|
|
// m++;
|
|
|
|
|
|
|
|
// . this will leak the shared mem if the process is Ctrl+C'd
|
|
// . that is expected behavior
|
|
// . you can clean up the leaks using 'gb freecache 20000000'
|
|
// and use 'ipcs -m' to see what leaks you got
|
|
// . generally, only the main gb should use shared mem, so
|
|
// keep this off for teting
|
|
m->m_title = "use shared mem";
|
|
m->m_desc = "If enabled, Gigablast will use shared memory. "
|
|
"Should really only be used on the live cluster, "
|
|
"keep this on the testing cluster since it can "
|
|
"leak easily.";
|
|
m->m_cgi = "ushm";
|
|
m->m_off = (char *)&g_conf.m_useSHM - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// disable disk caches... for testing really
|
|
/*
|
|
m->m_title = "use disk page cache for indexdb";
|
|
m->m_desc = "Use disk page cache?";
|
|
m->m_cgi = "udpci";
|
|
m->m_off = (char *)&g_conf.m_useDiskPageCacheIndexdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "posdb disk cache size";
|
|
m->m_desc = "How much file cache size to use in bytes? Posdb is "
|
|
"the index.";
|
|
m->m_cgi = "dpcsp";
|
|
m->m_off = (char *)&g_conf.m_posdbFileCacheSize - g;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "30000000";
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "tagdb disk cache size";
|
|
m->m_desc = "How much file cache size to use in bytes? Tagdb is "
|
|
"consulted at spider time and query time to determine "
|
|
"if a url or outlink is banned or what its siterank is, etc.";
|
|
m->m_cgi = "dpcst";
|
|
m->m_off = (char *)&g_conf.m_tagdbFileCacheSize - g;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "30000000";
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "clusterdb disk cache size";
|
|
m->m_desc = "How much file cache size to use in bytes? "
|
|
"Gigablast does a "
|
|
"lookup in clusterdb for each search result at query time to "
|
|
"get its site information for site clustering. If you "
|
|
"disable site clustering in the search controls then "
|
|
"clusterdb will not be consulted.";
|
|
m->m_cgi = "dpcsc";
|
|
m->m_off = (char *)&g_conf.m_clusterdbFileCacheSize - g;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "30000000";
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "titledb disk cache size";
|
|
m->m_desc = "How much file cache size to use in bytes? Titledb "
|
|
"holds the cached web pages, compressed. Gigablast consults "
|
|
"it to generate a summary for a search result, or to see if "
|
|
"a url Gigablast is spidering is already in the index.";
|
|
m->m_cgi = "dpcsx";
|
|
m->m_off = (char *)&g_conf.m_titledbFileCacheSize - g;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "30000000";
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "spiderdb disk cache size";
|
|
m->m_desc = "How much file cache size to use in bytes? Titledb "
|
|
"holds the cached web pages, compressed. Gigablast consults "
|
|
"it to generate a summary for a search result, or to see if "
|
|
"a url Gigablast is spidering is already in the index.";
|
|
m->m_cgi = "dpcsy";
|
|
m->m_off = (char *)&g_conf.m_spiderdbFileCacheSize - g;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "30000000";
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
|
|
|
|
/*
|
|
m->m_title = "exclude link text";
|
|
m->m_desc = "Exclude search results that have one or more query "
|
|
"that only appear in the incoming link text";
|
|
m->m_cgi = "exlt";
|
|
m->m_off = (char *)&g_conf.m_excludeLinkText - g;
|
|
m->m_sparm = 1;
|
|
m->m_soff = (char *)&si.m_excludeLinkText - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_scgi = "excludelinktext";
|
|
m++;
|
|
|
|
m->m_title = "exclude meta text";
|
|
m->m_desc = "Exclude search results that have one or more query "
|
|
"that only appear in the meta text";
|
|
m->m_cgi = "exmt";
|
|
m->m_off = (char *)&g_conf.m_excludeMetaText - g;
|
|
m->m_sparm = 1;
|
|
m->m_soff = (char *)&si.m_excludeMetaText - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_scgi = "excludemetatext";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "scan all if not found";
|
|
m->m_desc = "Scan all titledb files if rec not found. You should "
|
|
"keep this on to avoid corruption. Do not turn it off unless "
|
|
"you are Matt Wells.";
|
|
m->m_cgi = "sainf";
|
|
m->m_off = (char *)&g_conf.m_scanAllIfNotFound - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "interface machine";
|
|
m->m_desc = "for specifying if this is an interface machine"
|
|
"messages are rerouted from this machine to the main"
|
|
"cluster set in the hosts.conf.";
|
|
m->m_cgi = "intmch";
|
|
m->m_off = (char *)&g_conf.m_interfaceMachine - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "generate vector at query time";
|
|
m->m_desc = "At query time, should Gigablast generate content "
|
|
"vectors for title records lacking them? This is an "
|
|
"expensive operation, so is really just for testing purposes.";
|
|
m->m_cgi = "gv";
|
|
m->m_off = (char *)&g_conf.m_generateVectorAtQueryTime - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "redirect non-raw traffic";
|
|
m->m_desc = "If this is non empty, http traffic will be redirected "
|
|
"to the specified address.";
|
|
m->m_cgi = "redir";
|
|
m->m_off = (char *)&g_conf.m_redirect - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_def = "";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "send requests to compression proxy";
|
|
m->m_desc = "If this is true, gb will route download requests for"
|
|
" web pages to proxies in hosts.conf. Proxies will"
|
|
" download and compress docs before sending back. ";
|
|
m->m_cgi = "srtcp";
|
|
m->m_off = (char *)&g_conf.m_useCompressionProxy - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "synchronize proxy to cluster time";
|
|
m->m_desc = "Enable/disable the ability to synchronize time between "
|
|
"the cluster and the proxy";
|
|
m->m_cgi = "sptct";
|
|
m->m_off = (char *)&g_conf.m_timeSyncProxy - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
|
|
/*
|
|
m->m_title = "use data feed account server";
|
|
m->m_desc = "Enable/disable the use of a remote account verification "
|
|
"for Data Feed Customers. This should ONLY be used for the "
|
|
"proxy.";
|
|
m->m_cgi = "pdfuas";
|
|
m->m_off = (char *)&g_conf.m_useDFAcctServer - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "data feed server ip";
|
|
m->m_desc = "The ip address of the Gigablast data feed server to "
|
|
"retrieve customer account information from. This should ONLY "
|
|
"be used for the proxy.";
|
|
m->m_cgi = "pdfip";
|
|
m->m_off = (char *)&g_conf.m_dfAcctIp - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "2130706433";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "data feed server port";
|
|
m->m_desc = "The port of the Gigablast data feed server to retrieve "
|
|
"customer account information from. This should ONLY be used "
|
|
"for the proxy";
|
|
m->m_cgi = "pdfport";
|
|
m->m_off = (char *)&g_conf.m_dfAcctPort - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "8040";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "data feed server collection";
|
|
m->m_desc = "The collection on the Gigablast data feed server to "
|
|
"retrieve customer account information from. This should ONLY "
|
|
"be used for the proxy.";
|
|
m->m_cgi = "pdfcoll";
|
|
m->m_off = (char *)&g_conf.m_dfAcctColl - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_COLL_LEN;
|
|
m->m_def = "customers";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "allow scaling of hosts";
|
|
m->m_desc = "Allows scaling up of hosts by deleting recs not in "
|
|
"the correct group. This should only happen why copying "
|
|
"a set of servers to the new hosts. Otherwise corrupted "
|
|
"data will cause a halt.";
|
|
m->m_cgi = "asoh";
|
|
m->m_off = (char *)&g_conf.m_allowScale - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "allow bypass of db validation";
|
|
m->m_desc = "Allows bypass of db validation so gigablast will not "
|
|
"halt if a corrupt db is discovered during load. Use this "
|
|
"when attempting to load with a collection that has known "
|
|
"corruption.";
|
|
m->m_cgi = "abov";
|
|
m->m_off = (char *)&g_conf.m_bypassValidation - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "reload language pages";
|
|
m->m_desc = "Reloads language specific pages.";
|
|
m->m_cgi = "rlpages";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_func = CommandReloadLanguagePages;
|
|
m->m_cast = 0;
|
|
m++;
|
|
|
|
m->m_title = "proxy port";
|
|
m->m_desc = "Retrieve pages from the proxy on "
|
|
"this port.";
|
|
m->m_cgi = "proxyport";
|
|
m->m_off = (char *)&cr.m_proxyPort - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "all reload language pages";
|
|
m->m_desc = "Reloads language specific pages for all hosts.";
|
|
m->m_cgi = "rlpages";
|
|
m->m_type = TYPE_CMD;
|
|
m++;
|
|
*/
|
|
|
|
// do we need this any more?
|
|
/*
|
|
m->m_title = "give up on dead hosts";
|
|
m->m_desc = "Give up requests to dead hosts. Only set this when you "
|
|
"know a host is dead and will not come back online without "
|
|
"a restarting all hosts. Messages will timeout on the dead "
|
|
"host but will not error, allowing outstanding spidering to "
|
|
"finish to the twin.";
|
|
m->m_cgi = "gvup";
|
|
m->m_off = (char *)&g_conf.m_giveupOnDeadHosts - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "ask root name servers";
|
|
m->m_desc = "if enabled Gigablast will direct DNS requests to "
|
|
"the root DNS servers, otherwise it will continue to "
|
|
"send DNS queries to the bind9 servers defined in "
|
|
"the Master Controls.";
|
|
m->m_cgi = "bdns";
|
|
m->m_off = (char *)&g_conf.m_askRootNameservers - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "do dig sanity checks";
|
|
m->m_desc = "call dig @nameServer hostname and on timedout lookups"
|
|
" and see if dig also timed out";
|
|
m->m_cgi = "dig";
|
|
m->m_off = (char *)&g_conf.m_useDig - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "dns root name server 1";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsa";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[0] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "192.228.79.201";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 2";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsb";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[1] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "192.33.4.12";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 3";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsc";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[2] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "128.8.10.90";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 4";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsd";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[3] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "192.203.230.10";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 5";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnse";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[4] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "192.5.5.241";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 6";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsf";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[5] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "192.112.36.4";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 7";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsg";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[6] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "128.63.2.53";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 8";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsh";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[7] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "192.36.148.17";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 9";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsi";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[8] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "192.58.128.30";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 10";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsj";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[9] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "193.0.14.129";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 11";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsk";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[10] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "198.32.64.12";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 12";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsl";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[11] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "202.12.27.33";
|
|
m++;
|
|
|
|
m->m_title = "dns root name server 13";
|
|
m->m_desc = "IP address of a DNS root server. Assumes UDP "
|
|
"port 53.";
|
|
m->m_cgi = "rnsm";
|
|
m->m_off = (char *)&g_conf.m_rnsIps[12] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "198.41.0.4";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "dns 0";
|
|
m->m_desc = "IP address of the primary DNS server. Assumes UDP "
|
|
"port 53. REQUIRED FOR SPIDERING! Use Google's "
|
|
"public DNS 8.8.8.8 as default.";
|
|
m->m_cgi = "pdns";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[0] - g;
|
|
m->m_type = TYPE_IP;
|
|
// default to google public dns #1
|
|
m->m_def = "8.8.8.8";
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 1";
|
|
m->m_desc = "IP address of the secondary DNS server. Assumes UDP "
|
|
"port 53. Will be accessed in conjunction with the primary "
|
|
"dns, so make sure this is always up. An ip of 0 means "
|
|
"disabled. Google's secondary public DNS is 8.8.4.4.";
|
|
m->m_cgi = "sdns";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[1] - g;
|
|
m->m_type = TYPE_IP;
|
|
// default to google public dns #2
|
|
m->m_def = "8.8.4.4";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 2";
|
|
m->m_desc = "All hosts send to these DNSes based on hash "
|
|
"of the subdomain to try to split DNS load evenly.";
|
|
m->m_cgi = "sdnsa";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[2] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 3";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsb";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[3] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 4";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsc";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[4] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 5";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsd";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[5] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 6";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnse";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[6] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 7";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsf";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[7] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 8";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsg";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[8] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 9";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsh";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[9] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 10";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsi";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[10] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 11";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsj";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[11] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 12";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsk";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[12] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 13";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsl";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[13] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 14";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsm";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[14] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dns 15";
|
|
m->m_desc = "";
|
|
m->m_cgi = "sdnsn";
|
|
m->m_off = (char *)&g_conf.m_dnsIps[15] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "geocoder IP #1";
|
|
m->m_desc = "";
|
|
m->m_cgi = "gca";
|
|
m->m_off = (char *)&g_conf.m_geocoderIps[0] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "10.5.66.11"; // sp1
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "geocoder IP #2";
|
|
m->m_desc = "";
|
|
m->m_cgi = "gcb";
|
|
m->m_off = (char *)&g_conf.m_geocoderIps[1] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "geocoder IP #3";
|
|
m->m_desc = "";
|
|
m->m_cgi = "gcc";
|
|
m->m_off = (char *)&g_conf.m_geocoderIps[2] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "geocoder IP #4";
|
|
m->m_desc = "";
|
|
m->m_cgi = "gcd";
|
|
m->m_off = (char *)&g_conf.m_geocoderIps[3] - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "wiki proxy ip";
|
|
m->m_desc = "Access the wiki coll through this proxy ip";
|
|
m->m_cgi = "wpi";
|
|
m->m_off = (char *)&g_conf.m_wikiProxyIp - g;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "wiki proxy port";
|
|
m->m_desc = "Access the wiki coll through this proxy port";
|
|
m->m_cgi = "wpp";
|
|
m->m_off = (char *)&g_conf.m_wikiProxyPort - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "default collection";
|
|
m->m_desc = "When no collection is explicitly specified, assume "
|
|
"this collection name.";
|
|
m->m_cgi = "dcn";
|
|
m->m_off = (char *)&g_conf.m_defaultColl - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_COLL_LEN+1;
|
|
m->m_def = "";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "directory collection";
|
|
m->m_desc = "Collection to be used for directory searching and "
|
|
"display of directory topic pages.";
|
|
m->m_cgi = "dircn";
|
|
m->m_off = (char *)&g_conf.m_dirColl - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_COLL_LEN+1;
|
|
m->m_def = "main";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "directory hostname";
|
|
m->m_desc = "Hostname of the server providing the directory. "
|
|
"Leave empty to use this host.";
|
|
m->m_cgi = "dirhn";
|
|
m->m_off = (char *)&g_conf.m_dirHost - g;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max incoming bandwidth for spider";
|
|
m->m_desc = "Total incoming bandwidth used by all spiders should "
|
|
"not exceed this many kilobits per second. ";
|
|
m->m_cgi = "mkbps";
|
|
m->m_off = (char *)&g_conf.m_maxIncomingKbps - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "999999.0";
|
|
m->m_units = "Kbps";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max 1-minute sliding-window loadavg";
|
|
m->m_desc = "Spiders will shed load when their host exceeds this "
|
|
"value for the 1-minute load average in /proc/loadavg. "
|
|
"The value 0.0 disables this feature.";
|
|
m->m_cgi = "mswl";
|
|
m->m_off = (char *)&g_conf.m_maxLoadAvg - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.0";
|
|
m->m_units = "";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max pages per second";
|
|
m->m_desc = "Maximum number of pages to index or delete from index "
|
|
"per second for all hosts combined.";
|
|
m->m_cgi = "mpps";
|
|
m->m_off = (char *)&g_conf.m_maxPagesPerSecond - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "999999.0";
|
|
m->m_units = "pages/second";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "distributed spider balance";
|
|
m->m_desc = "Max number of ready domains a host can have distributed "
|
|
"to it by all other host. This should be some multiple of the "
|
|
"total number of hosts in the cluster.";
|
|
m->m_cgi = "dsb";
|
|
m->m_off = (char *)&g_conf.m_distributedSpiderBalance - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1024";
|
|
m->m_units = "domains";
|
|
m++;
|
|
|
|
m->m_title = "distributed same ip wait (hack)";
|
|
m->m_desc = "Amount of time to wait if this IP is already being "
|
|
"downloaded by a host. Works only in conjunction with "
|
|
"distribute spider downloads by ip in Spider Controls.";
|
|
m->m_cgi = "dsiw";
|
|
m->m_off = (char *)&g_conf.m_distributedIpWait - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_units = "ms";
|
|
m->m_group = 0;
|
|
m->m_min = 0;
|
|
m++;
|
|
*/
|
|
|
|
|
|
/*
|
|
m->m_title = "root quality max cache age base";
|
|
m->m_desc = "Maximum age to cache quality of a root url in seconds. "
|
|
"Computing "
|
|
"the quality of especially root urls can be expensive. "
|
|
"This number is multiplied by (Q-30)/10 where Q is the cached "
|
|
"quality of the root url. Therefore, higher quality and more "
|
|
"stable root urls are updated less often, which is a good thing "
|
|
"since they are more expensive to recompute.";
|
|
m->m_cgi = "rqmca";
|
|
m->m_off = (char *)&g_conf.m_siteQualityMaxCacheAge - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "7257600"; // 3 months (in seconds)
|
|
m->m_units = "seconds";
|
|
m++;
|
|
*/
|
|
|
|
|
|
|
|
m->m_title = "use threads";
|
|
m->m_desc = "If enabled, Gigablast will use threads.";
|
|
m->m_cgi = "ut";
|
|
m->m_off = (char *)&g_conf.m_useThreads - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
// turn off for now. after seeing how SLOOOOOW brian's merge op was
|
|
// when all 16 shards on a 16-core machine were merging (even w/ SSDs)
|
|
// i turned threads off and it was over 100x faster. so until we have
|
|
// pooling or something turn these off
|
|
m->m_title = "use threads for disk";
|
|
m->m_desc = "If enabled, Gigablast will use threads for disk ops. "
|
|
"Now that Gigablast uses pthreads more effectively, "
|
|
"leave this enabled for optimal performance in all cases.";
|
|
//"Until pthreads is any good leave this off. If you have "
|
|
//"SSDs performance can be as much as 100x better.";
|
|
m->m_cgi = "utfd";
|
|
m->m_off = (char *)&g_conf.m_useThreadsForDisk - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = 0;//PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "use threads for intersects and merges";
|
|
m->m_desc = "If enabled, Gigablast will use threads for these ops. "
|
|
"Default is now on in the event you have simultaneous queries "
|
|
"so one query does not hold back the other. There seems "
|
|
"to be a bug so leave this ON for now.";
|
|
//"Until pthreads is any good leave this off.";
|
|
m->m_cgi = "utfio";
|
|
m->m_off = (char *)&g_conf.m_useThreadsForIndexOps - g;
|
|
m->m_type = TYPE_BOOL;
|
|
// enable this in the event of multiple cores available and
|
|
// large simultaneous queries coming in
|
|
m->m_def = "1";
|
|
m->m_flags = 0;//PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "use threads for system calls";
|
|
m->m_desc = "Gigablast does not make too many system calls so "
|
|
"leave this on in case the system call is slow.";
|
|
m->m_cgi = "utfsc";
|
|
m->m_off = (char *)&g_conf.m_useThreadsForSystemCalls - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = 0;//PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
|
|
m->m_title = "max cpu threads";
|
|
m->m_desc = "Maximum number of threads to use per Gigablast process "
|
|
"for intersecting docid lists.";
|
|
m->m_cgi = "mct";
|
|
m->m_off = (char *)&g_conf.m_maxCpuThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
// make it 3 for new gb in case one query takes way longer
|
|
// than the others
|
|
m->m_def = "6"; // "2";
|
|
m->m_units = "threads";
|
|
m->m_min = 1;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "max cpu merge threads";
|
|
m->m_desc = "Maximum number of threads to use per Gigablast process "
|
|
"for merging lists read from disk.";
|
|
m->m_cgi = "mcmt";
|
|
m->m_off = (char *)&g_conf.m_maxCpuMergeThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10";
|
|
m->m_units = "threads";
|
|
m->m_min = 1;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "max write threads";
|
|
m->m_desc = "Maximum number of threads to use per Gigablast process "
|
|
"for writing data to the disk. "
|
|
"Keep low to reduce file interlace effects and impact "
|
|
"on query response time.";
|
|
m->m_cgi = "mwt";
|
|
m->m_off = (char *)&g_conf.m_maxWriteThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_units = "threads";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "flush disk writes";
|
|
m->m_desc = "If enabled then all writes will be flushed to disk. "
|
|
"If not enabled, then gb uses the Linux disk write cache.";
|
|
m->m_cgi = "fw";
|
|
m->m_off = (char *)&g_conf.m_flushWrites - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "files group writable";
|
|
m->m_desc = "Make all created files group writable? If you have "
|
|
"multiple user accounts starting Gigablast processes you "
|
|
"will want the files to be group writable. You will "
|
|
"need to make sure you run gigablast under the "
|
|
"primary group you want to use for gigablast administration.";
|
|
m->m_cgi = "afgw";
|
|
m->m_off = (char *)&g_conf.m_makeAllFilesGroupWritable - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "verify written lists";
|
|
m->m_desc = "Ensure lists being written to disk are not corrupt. "
|
|
"That title recs appear valid, etc. Helps isolate sources "
|
|
"of corruption. Used for debugging.";
|
|
m->m_cgi = "vwl";
|
|
m->m_off = (char *)&g_conf.m_verifyDumpedLists - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
|
|
m->m_title = "verify disk writes";
|
|
m->m_desc = "Read what was written in a verification step. Decreases "
|
|
"performance, but may help fight disk corruption mostly on "
|
|
"Maxtors and Western Digitals.";
|
|
m->m_cgi = "vdw";
|
|
m->m_off = (char *)&g_conf.m_verifyWrites - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "max spider read threads";
|
|
m->m_desc = "Maximum number of threads to use per Gigablast process "
|
|
"for accessing the disk "
|
|
"for index-building purposes. Keep low to reduce impact "
|
|
"on query response time. Increase for fast disks or when "
|
|
"preferring build speed over lower query latencies";
|
|
m->m_cgi = "smdt";
|
|
m->m_off = (char *)&g_conf.m_spiderMaxDiskThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "20";
|
|
m->m_units = "threads";
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "max spider big read threads";
|
|
m->m_desc = "This particular number applies to all disk "
|
|
"reads above 1MB. "
|
|
"The number of total threads is also "
|
|
"limited to MAX_STACKS which is currently 20.";
|
|
m->m_cgi = "smbdt";
|
|
m->m_off = (char *)&g_conf.m_spiderMaxBigDiskThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "2";
|
|
m->m_units = "threads";
|
|
m->m_group = 0;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max spider medium read threads";
|
|
m->m_desc = "This particular number applies to all disk "
|
|
"reads above 100K. "
|
|
"The number of total threads is also "
|
|
"limited to MAX_STACKS which is currently 20.";
|
|
m->m_cgi = "smmdt";
|
|
m->m_off = (char *)&g_conf.m_spiderMaxMedDiskThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "4";
|
|
m->m_units = "threads";
|
|
m->m_group = 0;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max spider small read threads";
|
|
m->m_desc = "This particular number applies to all disk "
|
|
"reads above 1MB. "
|
|
"The number of total threads is also "
|
|
"limited to MAX_STACKS which is currently 20.";
|
|
m->m_cgi = "smsdt";
|
|
m->m_off = (char *)&g_conf.m_spiderMaxSmaDiskThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "15";
|
|
m->m_units = "threads";
|
|
m->m_group = 0;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "separate disk reads";
|
|
m->m_desc = "If enabled then we will not launch a low priority "
|
|
"disk read or write while a high priority is outstanding. "
|
|
"Help improve query response time at the expense of "
|
|
"spider performance.";
|
|
m->m_cgi = "sdt";
|
|
m->m_off = (char *)&g_conf.m_separateDiskReads - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = 0;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
/*
|
|
m->m_title = "max query read threads";
|
|
m->m_desc = "Maximum number of threads to use per Gigablast process "
|
|
"for accessing the disk "
|
|
"for querying purposes.";
|
|
//IDE systems tend to be more "
|
|
// "responsive when this is low. Increase for SCSI or RAID "
|
|
// "systems.";
|
|
m->m_cgi = "qmdt";
|
|
m->m_off = (char *)&g_conf.m_queryMaxDiskThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_units = "threads";
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "max query big read threads";
|
|
m->m_desc = "This particular number applies to all reads above 1MB. "
|
|
"The number of total threads is also "
|
|
"limited to MAX_STACKS which is currently 20.";
|
|
m->m_cgi = "qmbdt";
|
|
m->m_off = (char *)&g_conf.m_queryMaxBigDiskThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "20"; // 1
|
|
m->m_units = "threads";
|
|
m->m_group = 0;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max query medium read threads";
|
|
m->m_desc = "This particular number applies to all disk "
|
|
"reads above 100K. "
|
|
"The number of total threads is also "
|
|
"limited to MAX_STACKS which is currently 20.";
|
|
m->m_cgi = "qmmdt";
|
|
m->m_off = (char *)&g_conf.m_queryMaxMedDiskThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "20"; // 3
|
|
m->m_units = "threads";
|
|
m->m_group = 0;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "max query small read threads";
|
|
m->m_desc = "This particular number applies to all disk "
|
|
"reads above 1MB. "
|
|
"The number of total threads is also "
|
|
"limited to MAX_STACKS which is currently 20.";
|
|
m->m_cgi = "qmsdt";
|
|
m->m_off = (char *)&g_conf.m_queryMaxSmaDiskThreads - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "20";
|
|
m->m_units = "threads";
|
|
m->m_group = 0;
|
|
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "min popularity for speller";
|
|
m->m_desc = "Word or phrase must be present in this percent "
|
|
"of documents in order to qualify as a spelling "
|
|
"recommendation.";
|
|
m->m_cgi = "mps";
|
|
m->m_off = (char *)&g_conf.m_minPopForSpeller - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = ".01";
|
|
m->m_units = "%%";
|
|
m->m_priv = 2;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "phrase weight";
|
|
m->m_desc = "Percent to weight phrases in queries.";
|
|
m->m_cgi = "qp";
|
|
m->m_off = (char *)&g_conf.m_queryPhraseWeight - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
// was 350, but 'new mexico tourism' and 'boots uk'
|
|
// emphasized the phrase terms too much!!
|
|
m->m_def = "100";
|
|
m->m_units = "%%";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "weights.cpp slider parm (tmp)";
|
|
m->m_desc = "Percent of how much to use words to phrase ratio weights.";
|
|
m->m_cgi = "wsp";
|
|
m->m_off = (char *)&g_conf.m_sliderParm - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "90";
|
|
m->m_units = "%%";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "indextable intersection algo to use";
|
|
m->m_desc = "0 means adds the term scores, 1 means average them "
|
|
"and 2 means take the RMS.";
|
|
m->m_cgi = "iia";
|
|
m->m_off = (char *)&g_conf.m_indexTableIntersectionAlgo - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "2";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "max weight";
|
|
m->m_desc = "Maximum, relative query term weight. Set to 0 or less "
|
|
"to indicate now max. 10.0 or 20.0 might be a good value.";
|
|
m->m_cgi = "qm";
|
|
m->m_off = (char *)&g_conf.m_queryMaxMultiplier - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "query term exponent";
|
|
m->m_desc = "Raise the weights of the query "
|
|
"terms to this power. The weight of a query term is "
|
|
"basically the log of its term frequency. Increasing "
|
|
"this will increase the effects of the term frequency "
|
|
"related to each term in the query. Term frequency is "
|
|
"also known as the term popularity. Very common words "
|
|
"typically have lower weights tied to them, but the effects "
|
|
"of such weighting will be increased if you increase this "
|
|
"exponent.";
|
|
m->m_cgi = "qte";
|
|
m->m_off = (char *)&g_conf.m_queryExp - g;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "1.1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "use dynamic phrase weighting";
|
|
m->m_desc = "A new algorithm which reduces the weight on a query "
|
|
"word term if the query phrase terms it is in are of "
|
|
"similar popularity (term frequency) to that of the word "
|
|
"term.";
|
|
m->m_cgi = "udpw";
|
|
m->m_off = (char *)&g_conf.m_useDynamicPhraseWeighting - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "maximum serialized query size";
|
|
m->m_desc = "When passing queries around the network, send the raw "
|
|
"string instead of the serialized query if the required "
|
|
"buffer is bigger than this. Smaller values decrease network "
|
|
"traffic for large queries at the expense of processing time.";
|
|
m->m_cgi = "msqs";
|
|
m->m_off = (char *)&g_conf.m_maxSerializedQuerySize - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "8192";
|
|
m->m_units = "bytes";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "merge buf size";
|
|
m->m_desc = "Read and write this many bytes at a time when merging "
|
|
"files. Smaller values are kinder to query performance, "
|
|
" but the merge takes longer. Use at least 1000000 for "
|
|
"fast merging.";
|
|
m->m_cgi = "mbs";
|
|
m->m_off = (char *)&g_conf.m_mergeBufSize - g;
|
|
m->m_type = TYPE_LONG;
|
|
// keep this way smaller than that 800k we had in here, 100k seems
|
|
// to be way better performance for qps
|
|
m->m_def = "500000";
|
|
m->m_units = "bytes";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "catdb minRecSizes";
|
|
m->m_desc = "minRecSizes for Catdb lookups";
|
|
m->m_cgi = "catmsr";
|
|
m->m_off = (char *)&g_conf.m_catdbMinRecSizes - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100000000"; // 100 million
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "max http download sockets";
|
|
m->m_desc = "Maximum sockets available to spiders for downloading "
|
|
"web pages.";
|
|
m->m_cgi = "mds";
|
|
m->m_off = (char *)&g_conf.m_httpMaxDownloadSockets - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5000";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "doc count adjustment";
|
|
m->m_desc = "Add this number to the total document count in the "
|
|
"index. Just used for displaying on the homepage.";
|
|
m->m_cgi = "dca";
|
|
m->m_off = (char *)&g_conf.m_docCountAdjustment - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "dynamic performance graph";
|
|
m->m_desc = "Generates profiling data for callbacks on page "
|
|
"performance";
|
|
m->m_cgi = "dpg";
|
|
m->m_off = (char *)&g_conf.m_dynamicPerfGraph - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "enable profiling";
|
|
m->m_desc = "Enable profiler to do accounting of time taken by "
|
|
"functions. ";
|
|
m->m_cgi = "enp";
|
|
m->m_off = (char *)&g_conf.m_profilingEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "minimum profiling threshold";
|
|
m->m_desc = "Profiler will not show functions which take less "
|
|
"than this many milliseconds "
|
|
"in the log or on the performance graph.";
|
|
m->m_cgi = "mpt";
|
|
m->m_off = (char *)&g_conf.m_minProfThreshold - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
|
|
m->m_title = "sequential profiling.";
|
|
m->m_desc = "Produce a LOG_TIMING log message for each "
|
|
"callback called, along with the time it took. "
|
|
"Profiler must be enabled.";
|
|
m->m_cgi = "ensp";
|
|
m->m_off = (char *)&g_conf.m_sequentialProfiling - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "use statsdb";
|
|
m->m_desc = "Archive system statistics information in Statsdb.";
|
|
m->m_cgi = "usdb";
|
|
m->m_off = (char *)&g_conf.m_useStatsdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_MASTER;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "statsdb snapshots.";
|
|
m->m_desc = "Archive system statistics information in Statsdb. "
|
|
"Takes one snapshot every minute.";
|
|
m->m_cgi = "sdbss";
|
|
m->m_off = (char *)&g_conf.m_statsdbSnapshots - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "statsdb web interface.";
|
|
m->m_desc = "Enable the Statsdb page for viewing stats history.";
|
|
m->m_cgi = "sdbwi";
|
|
m->m_off = (char *)&g_conf.m_statsdbPageEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
/*
|
|
m->m_title = "max synonyms";
|
|
m->m_desc = "Maximum possible synonyms to expand a word to.";
|
|
m->m_cgi = "msyn";
|
|
m->m_off = (char *)&g_conf.m_maxSynonyms - g;
|
|
m->m_def = "5";
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "default affinity";
|
|
m->m_desc = "spelling/number synonyms get this number as their "
|
|
"affinity; negative values mean treat them as unknown, "
|
|
"values higher than 1.0 get treated as 1.0";
|
|
m->m_cgi = "daff";
|
|
m->m_off = (char *)&g_conf.m_defaultAffinity - g;
|
|
m->m_def = "0.9";
|
|
m->m_type = TYPE_FLOAT;
|
|
m++;
|
|
|
|
m->m_title = "frequency threshold";
|
|
m->m_desc = "the minimum amount a synonym term has to be in relation "
|
|
"to its master term in order to be considered as a synonym";
|
|
m->m_cgi = "fqth";
|
|
m->m_off = (char *)&g_conf.m_frequencyThreshold - g;
|
|
m->m_def = "0.25";
|
|
m->m_type = TYPE_FLOAT;
|
|
m++;
|
|
|
|
m->m_title = "maximum affinity requests";
|
|
m->m_desc = "Maximum number of outstanding requests the affinity "
|
|
"builder can generate. Keep this number at 10 or lower for "
|
|
"local servers, higher for internet servers or servers with "
|
|
"high latency.";
|
|
m->m_cgi = "mar";
|
|
m->m_off = (char *)&g_conf.m_maxAffinityRequests - g;
|
|
m->m_def = "10";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "maximum affinity errors";
|
|
m->m_desc = "Maximum number of times the affinity builder should "
|
|
"encounter an error before giving up entirely.";
|
|
m->m_cgi = "mae";
|
|
m->m_off = (char *)&g_conf.m_maxAffinityErrors - g;
|
|
m->m_def = "100";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "affinity timeout";
|
|
m->m_desc = "Amount of time in milliseconds to wait for a response to "
|
|
"an affinity query. You shouldn't have to touch this unless "
|
|
"the network is slow or overloaded.";
|
|
m->m_cgi = "ato";
|
|
m->m_off = (char *)&g_conf.m_affinityTimeout - g;
|
|
m->m_def = "30000";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "affinity rebuild server";
|
|
m->m_desc = "Use this server:port to rebuild the affinity.";
|
|
m->m_cgi = "ars";
|
|
m->m_off = (char *)&g_conf.m_affinityServer - g;
|
|
m->m_def = "localhost:8000";
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "additional affinity parameters";
|
|
m->m_desc = "Additional parameters to pass in the query. Tweak these "
|
|
"to get better/faster responses. Don't touch the raw parameter "
|
|
"unless you know what you are doing.";
|
|
m->m_cgi = "aap";
|
|
m->m_off = (char *)&g_conf.m_affinityParms - g;
|
|
m->m_def = "&raw=5&dio=1&n=1000&code=gbmonitor";
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
//////
|
|
// END MASTER CONTROLS
|
|
//////
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// ACCESS CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
/*
|
|
// ARRAYS
|
|
// each will have its own table, title will be in first row
|
|
// of that table, 2nd row is description, then one row per
|
|
// element in the array, then a final row for adding new elements
|
|
// if not exceeding our m->m_max limit.
|
|
m->m_title = "Passwords Required to Search this Collection";
|
|
m->m_desc ="Passwords allowed to perform searches on this collection."
|
|
" If no passwords are specified, then anyone can search it.";
|
|
m->m_cgi = "searchpwd";
|
|
m->m_xml = "searchPassword";
|
|
m->m_max = MAX_SEARCH_PASSWORDS;
|
|
m->m_off = (char *)cr.m_searchPwds - x;
|
|
m->m_type = TYPE_STRINGNONEMPTY;
|
|
m->m_size = PASSWORD_MAX_LEN+1; // string size max
|
|
m->m_page = PAGE_ACCESS;
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_title = "IPs Banned from Searching this Collection";
|
|
m->m_desc = "These IPs are not allowed to search this collection or "
|
|
"use add url. Useful to keep out miscreants. Use zero for the "
|
|
"last number of the IP to ban an entire IP domain.";
|
|
m->m_cgi = "bip";
|
|
m->m_xml = "bannedIp";
|
|
m->m_max = MAX_BANNED_IPS;
|
|
m->m_off = (char *)cr.m_banIps - x;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_title = "Only These IPs can Search this Collection";
|
|
m->m_desc = "Only these IPs are allowed to search the collection and "
|
|
"use the add url facilities. If you'd like to make your "
|
|
"collection publicly searchable then do not add any IPs "
|
|
"here.Use zero for the "
|
|
"last number of the IP to restrict to an entire "
|
|
"IP domain, i.e. 1.2.3.0.";
|
|
m->m_cgi = "searchip";
|
|
m->m_xml = "searchIp";
|
|
m->m_max = MAX_SEARCH_IPS;
|
|
m->m_off = (char *)cr.m_searchIps - x;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_title = "Spam Assassin IPs";
|
|
m->m_desc = "Browsers coming from these IPs are deemed to be spam "
|
|
"assassins and have access to a subset of the controls to "
|
|
"ban and remove domains and IPs from the index.";
|
|
m->m_cgi = "assip";
|
|
m->m_xml = "assassinIp";
|
|
m->m_max = MAX_SPAM_IPS;
|
|
m->m_off = (char *)cr.m_spamIps - x;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_title = "Admin Passwords";
|
|
m->m_desc = "Passwords allowed to edit this collection record. "
|
|
"First password can only be deleted by the master "
|
|
"administrator. If no password of Admin IP is given at time "
|
|
"of creation then the default password of 'footbar23' will "
|
|
"be assigned.";
|
|
m->m_cgi = "apwd";
|
|
m->m_xml = "adminPassword";
|
|
m->m_max = MAX_ADMIN_PASSWORDS;
|
|
m->m_off = (char *)cr.m_adminPwds - x;
|
|
m->m_type = TYPE_STRINGNONEMPTY;
|
|
m->m_size = PASSWORD_MAX_LEN+1;
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_title = "Admin IPs";
|
|
m->m_desc = "If someone connects from one of these IPs and provides "
|
|
"a password from the table above then they will have full "
|
|
"administrative privileges for this collection. If you "
|
|
"specified no Admin Passwords above then they need only "
|
|
"connect from an IP in this table to get the privledges. ";
|
|
m->m_cgi = "adminip";
|
|
m->m_xml = "adminIp";
|
|
m->m_max = MAX_ADMIN_IPS;
|
|
m->m_off = (char *)cr.m_adminIps - x;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "";
|
|
m++;
|
|
*/
|
|
|
|
///////////////////////////////////////////
|
|
// URL FILTERS
|
|
///////////////////////////////////////////
|
|
|
|
//m->m_title = "Url Filters";
|
|
// this is description just for the conf file.
|
|
//m->m_cdesc = "See overview.html for a description of URL filters.";
|
|
//m->m_type = TYPE_COMMENT;
|
|
//m++;
|
|
|
|
m->m_cgi = "ufp";
|
|
m->m_title = "url filters profile";
|
|
m->m_xml = "urlFiltersProfile";
|
|
m->m_desc = "Rather than editing the table below, you can select "
|
|
"a predefined set of url instructions in this drop down menu "
|
|
"that will update the table for you. Selecting <i>custom</i> "
|
|
"allows you to make custom changes to the table. "
|
|
"Selecting <i>web</i> configures the table for spidering "
|
|
"the web in general. "
|
|
"Selecting <i>news</i> configures the table for spidering "
|
|
"new sites. "
|
|
"Selecting <i>chinese</i> makes the spider prioritize the "
|
|
"spidering of chinese pages, etc. "
|
|
"Selecting <i>shallow</i> makes the spider go deep on "
|
|
"all sites unless they are tagged <i>shallow</i> in the "
|
|
"site list. "
|
|
"Important: "
|
|
"If you select a profile other than <i>custom</i> "
|
|
"then your changes "
|
|
"to the table will be lost.";
|
|
m->m_off = (char *)&cr.m_urlFiltersProfile - x;
|
|
m->m_colspan = 3;
|
|
m->m_type = TYPE_SAFEBUF;//UFP;// 1 byte dropdown menu
|
|
m->m_def = "web"; // UFP_WEB
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "expression";
|
|
m->m_desc = "Before downloading the contents of a URL, Gigablast "
|
|
"first chains down this "
|
|
"list of "
|
|
"expressions</a>, "
|
|
"starting with expression #0. "
|
|
//"This table is also consulted "
|
|
//"for every outlink added to spiderdb. "
|
|
"The first expression it matches is the ONE AND ONLY "
|
|
"matching row for that url. "
|
|
"It then uses the "
|
|
//"<a href=/overview.html#spiderfreq>"
|
|
"respider frequency, "
|
|
//"<a href=/overview.html#spiderpriority>"
|
|
"spider priority, etc. on the MATCHING ROW when spidering "
|
|
//"and <a href=/overview.html#ruleset>ruleset</a> to "
|
|
"that URL. "
|
|
"If you specify the <i>expression</i> as "
|
|
"<i><b>default</b></i> then that MATCHES ALL URLs. "
|
|
"URLs with high spider priorities take spidering "
|
|
"precedence over "
|
|
"URLs with lower spider priorities. "
|
|
"The respider frequency dictates how often a URL will "
|
|
"be respidered. "
|
|
|
|
"See the help table below for examples of all the supported "
|
|
"expressions. "
|
|
"Use the <i>&&</i> operator to string multiple expressions "
|
|
"together in the same expression text box. "
|
|
"If you check the <i>delete</i> checkbox then urls matching "
|
|
"that row will be deleted if already indexed, otherwise, "
|
|
"they just won't be indexed."
|
|
//"A <i>spider priority</i> of "
|
|
//"<i>FILTERED</i> or <i>BANNED</i> "
|
|
// "<i>DELETE</i> "
|
|
// "will cause the URL to not be spidered, "
|
|
// "or if it has already "
|
|
// "been indexed, it will be deleted when it is respidered."
|
|
"<br><br>";
|
|
|
|
/*
|
|
"A URL is respidered according to the "
|
|
"spider frequency. If this is blank then Gigablast will "
|
|
"use the spider frequency explicitly dictated by the rule "
|
|
"set. If the ruleset does not contain a <spiderFrequency> "
|
|
"xml tag, then Gigablast will "
|
|
"intelligently determine the best time to respider that "
|
|
"URL.<br><br>"
|
|
|
|
"If the "
|
|
"<a href=/overview.html#spiderpriority>"
|
|
"spider priority</a> of a URL is undefined then "
|
|
"Gigablast will use the spider priority explicitly "
|
|
"dictated by the ruleset. If the ruleset does not contain "
|
|
"a <spiderPriority> xml tag, then Gigablast "
|
|
"will spider that URL with a priority of its linking parent "
|
|
"minus 1, "
|
|
"resulting in breadth first spidering. A URL of spider "
|
|
"priority X will be placed in spider priority queue #X. "
|
|
"Many spider parameters can be configured on a per "
|
|
"spider priority queue basis. For instance, spidering "
|
|
"can be toggled on a per queue basis, as can link "
|
|
"harvesting.<br><br>"
|
|
|
|
"The <b>ruleset</b> you select corresponds to a file on "
|
|
"disk named tagdb*.xml, where the '*' is a number. Each of "
|
|
"these files is a set of rules in XML that dictate how to "
|
|
"index and spider a document. "
|
|
"You can add your own ruleset file to Gigablast's working "
|
|
"directory and it will automatically be "
|
|
"included in the ruleset drop down menu. Once a document "
|
|
"has been indexed with a ruleset, then the corresponding "
|
|
"ruleset file cannot be deleted without risk of corruption."
|
|
"<br><br>"
|
|
|
|
"You can have up to 32 regular expressions. "
|
|
"Example: <b>^http://.*\\.uk/</b> would match all urls from "
|
|
"the UK. See this "
|
|
"<a href=/?redir="
|
|
"http://www.phpbuilder.com/columns/dario19990616.php3>"
|
|
"tutorial by example</a> for more information."
|
|
|
|
"<br><br>"
|
|
"Gigablast also supports the following special \"regular "
|
|
"expressions\": "
|
|
"link:gigablast and doc:quality<X and doc:quality>X.";
|
|
*/
|
|
|
|
m->m_cgi = "fe";
|
|
m->m_xml = "filterExpression";
|
|
m->m_max = MAX_FILTERS;
|
|
// array of safebufs i guess...
|
|
m->m_off = (char *)cr.m_regExs - x;
|
|
// this is a safebuf, dynamically allocated string really
|
|
m->m_type = TYPE_SAFEBUF;//STRINGNONEMPTY
|
|
// the size of each element in the array:
|
|
m->m_size = sizeof(SafeBuf);//MAX_REGEX_LEN+1;
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_rowid = 1; // if we START a new row
|
|
m->m_def = "";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "harvest links";
|
|
m->m_cgi = "hspl";
|
|
m->m_xml = "harvestLinks";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_harvestLinks - x;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_rowid = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "spidering enabled";
|
|
m->m_cgi = "cspe";
|
|
m->m_xml = "spidersEnabled";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_spidersEnabled - x;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_rowid = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "respider frequency (days)";
|
|
m->m_cgi = "fsf";
|
|
m->m_xml = "filterFrequency";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_spiderFreqs - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
// why was this default 0 days?
|
|
m->m_def = "30.0"; // 0.0
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_units = "days";
|
|
m->m_rowid = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "max spiders";
|
|
m->m_desc = "Do not allow more than this many outstanding spiders "
|
|
"for all urls in this priority."; // was "per rule"
|
|
m->m_cgi = "mspr";
|
|
m->m_xml = "maxSpidersPerRule";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_maxSpidersPerRule - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "99";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_rowid = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "max spiders per ip";
|
|
m->m_desc = "Allow this many spiders per IP.";
|
|
m->m_cgi = "mspi";
|
|
m->m_xml = "maxSpidersPerIp";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_spiderIpMaxSpiders - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "7";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_rowid = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "same ip wait (ms)";
|
|
m->m_desc = "Wait at least this int32_t before downloading urls from "
|
|
"the same IP address.";
|
|
m->m_cgi = "xg";
|
|
m->m_xml = "spiderIpWait";
|
|
m->m_max = MAX_FILTERS;
|
|
//m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_spiderIpWaits - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1000";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_units = "milliseconds";
|
|
m->m_rowid = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "page quota";
|
|
m->m_cgi = "fsq";
|
|
m->m_xml = "filterQuota";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_spiderQuotas - x;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "-1"; // -1 means no quota
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_units = "pages";
|
|
m->m_rowid = 1;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "delete";
|
|
m->m_cgi = "fdu";
|
|
m->m_xml = "forceDeleteUrls";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_forceDelete - x;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_rowid = 1;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "spider priority";
|
|
m->m_cgi = "fsp";
|
|
m->m_xml = "filterPriority";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_spiderPriorities - x;
|
|
m->m_type = TYPE_PRIORITY2; // includes UNDEFINED priority in dropdown
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_rowid = 1;
|
|
m->m_def = "50";
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m->m_addin = 1; // "insert" follows?
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "diffbot api";
|
|
m->m_cgi = "dapi";
|
|
m->m_xml = "diffbotAPI";
|
|
m->m_max = MAX_FILTERS;
|
|
m->m_off = (char *)cr.m_spiderDiffbotApiUrl - x;
|
|
// HACK: we print a dropdown for this but the value is a string
|
|
// because the items in the drop down can change so we can't store
|
|
// an item # here, it has to be a string, i.e. the diffbot api url.
|
|
// john might add a new custom api to m_diffbotApiList at any time.
|
|
// so we select the item in the drop down if it matches THIS string.
|
|
m->m_type = TYPE_SAFEBUF;//DIFFBOT_DROPDOWN;
|
|
m->m_def = "";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_size = sizeof(SafeBuf);
|
|
m->m_rowid = 1;
|
|
m->m_addin = 1; // "insert" follows?
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
|
|
m++;
|
|
*/
|
|
|
|
//m->m_title = "<a href=/overview.html#ruleset>ruleset</a>";
|
|
//m->m_cgi = "frs";
|
|
//m->m_xml = "filterRuleset";
|
|
//m->m_max = MAX_FILTERS;
|
|
//m->m_off = (char *)cr.m_rulesets - x;
|
|
//m->m_type = TYPE_RULESET; // int32_t with dropdown of rulesets
|
|
//m->m_page = PAGE_FILTERS;
|
|
//m->m_rowid = 1;
|
|
//m->m_addin = 1; // "insert" follows?
|
|
//m->m_def = "";
|
|
//m++;
|
|
|
|
/*
|
|
// default rule
|
|
m->m_title = "<b>DEFAULT</b>";
|
|
m->m_desc = "Use the following values by default if no ruleset in "
|
|
"tagdb matches the URL.";
|
|
m->m_type = TYPE_CONSTANT;
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_rowid = 2;
|
|
m->m_hdrs = 0;
|
|
m++;
|
|
|
|
//m->m_cdesc = "The default parameters if no reg exs above matched.";
|
|
m->m_cgi = "fsfd";
|
|
m->m_xml = "filterFrequencyDefault";
|
|
m->m_off = (char *)&cr.m_defaultSpiderFrequency - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "0.0";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_units = "days";
|
|
m->m_rowid = 2;
|
|
m->m_hdrs = 0;
|
|
m++;
|
|
|
|
m->m_cgi = "fsqd";
|
|
m->m_xml = "filterQuotaDefault";
|
|
m->m_off = (char *)&cr.m_defaultSpiderQuota - x;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "-1";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_units = "pages";
|
|
m->m_rowid = 2;
|
|
m->m_hdrs = 0;
|
|
m++;
|
|
|
|
m->m_cgi = "fspd";
|
|
m->m_xml = "filterPriorityDefault";
|
|
m->m_off = (char *)&cr.m_defaultSpiderPriority - x;
|
|
m->m_type = TYPE_PRIORITY2; // includes UNDEFINED priority in dropdown
|
|
m->m_def = "4";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_rowid = 2;
|
|
m->m_hdrs = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_cgi = "frsd";
|
|
m->m_xml = "filterRulesetDefault";
|
|
m->m_off = (char *)&cr.m_defaultSiteFileNum - x;
|
|
m->m_type = TYPE_RULESET; // int32_t with dropdown of rulesets
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_FILTERS;
|
|
m->m_rowid = 2;
|
|
m->m_hdrs = 0;
|
|
m++;
|
|
*/
|
|
|
|
|
|
/*
|
|
///////////////////////////////////////////
|
|
// PRIORITY CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
// . show the priority in this column
|
|
// . a monotnic sequence repeating each number twice,
|
|
// basically, div 2 is what "D2" means
|
|
// . so we get 0,0,1,1,2,2,3,3, ...
|
|
m->m_title = "priority";
|
|
//m->m_desc = "What priority is this spdier queue?";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_type = TYPE_MONOD2;
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m++;
|
|
|
|
// . show an alternating 0 and 1 in this column
|
|
// because it is type MONOM2, a monotonic sequence
|
|
// modulus 2.
|
|
// . so we get 0,1,0,1,0,1,0,1,0,1, ...
|
|
m->m_title = "is new";
|
|
m->m_desc = "Does this priority contain new (unindexed) urls?";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_type = TYPE_MONOM2;
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m++;
|
|
|
|
m->m_title = "spidering enabled";
|
|
m->m_desc = "Are spiders enabled for this priority?";
|
|
m->m_cgi = "xa";
|
|
m->m_xml = "spiderPrioritySpideringEnabled";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_spideringEnabled - x;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m++;
|
|
|
|
m->m_title = "time slice weight";
|
|
m->m_desc = "What percentage of the time to draw urls from "
|
|
"this priority?";
|
|
m->m_cgi = "xb";
|
|
m->m_xml = "spiderPriotiyTimeSlice";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_timeSlice - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3; // if we START a new row
|
|
m->m_def = "100.0";
|
|
m->m_units = "%%";
|
|
m++;
|
|
|
|
m->m_title = "spidered";
|
|
m->m_desc = "How many urls we spidered so far last 5 minutes.";
|
|
m->m_cgi = "sps";
|
|
m->m_xml = "spiderPriotiySpidered";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_spidered - x;
|
|
m->m_type = TYPE_LONG_CONST;
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3; // if we START a new row
|
|
m->m_def = "0";
|
|
m->m_sync = false; // do not sync this parm
|
|
m++;
|
|
|
|
m->m_title = "spider links";
|
|
m->m_desc = "Harvest links from the content and add to spiderdb.";
|
|
m->m_cgi = "xc";
|
|
m->m_xml = "spiderPrioritySpiderLinks";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_spiderLinks - x;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m++;
|
|
|
|
m->m_title = "spider same host outlinks only";
|
|
m->m_desc = "Harvest links to the same hostnames (www.xyz.com) "
|
|
"and add to spiderdb.";
|
|
m->m_cgi = "xd";
|
|
m->m_xml = "spiderPrioritySpiderSameHostnameLinks";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_spiderSameHostnameLinks - x;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m++;
|
|
|
|
m->m_title = "force links into queue";
|
|
m->m_desc = "If slated to be added to this queue, and link is "
|
|
"already in a non-forced queue, force it into this queue. "
|
|
"Keep a cache to reduce reptitious adds to this queue.";
|
|
m->m_cgi = "xdd";
|
|
m->m_xml = "spiderPriorityForceQueue";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_autoForceQueue - x;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m++;
|
|
|
|
m->m_title = "max spiders per ip";
|
|
m->m_desc = "Do not allow more than this many simultaneous "
|
|
"downloads per IP address.";
|
|
m->m_cgi = "xe";
|
|
m->m_xml = "spiderPriorityMaxSpidersPerIp";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_maxSpidersPerIp - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m++;
|
|
|
|
m->m_title = "max spiders per domain";
|
|
m->m_desc = "Do not allow more than this many simultaneous "
|
|
"downloads per domain.";
|
|
m->m_cgi = "xf";
|
|
m->m_xml = "spiderPriorityMaxSpidersPerDom";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_maxSpidersPerDom - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m++;
|
|
|
|
m->m_title = "max respider wait (days)";
|
|
m->m_desc = "Do not wait longer than this before attempting to "
|
|
"respider.";
|
|
m->m_cgi = "xr";
|
|
m->m_xml = "spiderPriorityMaxRespiderWait";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_maxRespiderWait - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "180.0";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m->m_units = "days";
|
|
m++;
|
|
|
|
m->m_title = "first respider wait (days)";
|
|
m->m_desc = "Reschedule a new url for respidering this many days "
|
|
"from the first time it is actually spidered.";
|
|
m->m_cgi = "xfrw";
|
|
m->m_xml = "spiderPriorityFirstRespiderWait";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_firstRespiderWait - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "60.0";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m->m_units = "days";
|
|
m++;
|
|
|
|
|
|
m->m_title = "same ip wait (ms)";
|
|
m->m_desc = "Wait at least this int32_t before downloading urls from "
|
|
"the same IP address.";
|
|
m->m_cgi = "xg";
|
|
m->m_xml = "spiderPrioritySameIpWait";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_sameIpWait - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10000";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m->m_units = "milliseconds";
|
|
m++;
|
|
|
|
m->m_title = "same domain wait (ms)";
|
|
m->m_desc = "Wait at least this int32_t before downloading urls from "
|
|
"the same domain.";
|
|
m->m_cgi = "xh";
|
|
m->m_xml = "spiderPrioritySameDomainWait";
|
|
m->m_max = MAX_PRIORITY_QUEUES;
|
|
m->m_fixed = MAX_PRIORITY_QUEUES;
|
|
m->m_off = (char *)cr.m_pq_sameDomainWait - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10000";
|
|
m->m_page = PAGE_PRIORITIES;
|
|
m->m_rowid = 3;
|
|
m->m_units = "milliseconds";
|
|
m++;
|
|
*/
|
|
|
|
///////////////////////////////////////////
|
|
// SITEDB FILTERS
|
|
///////////////////////////////////////////
|
|
|
|
/*
|
|
m->m_title = "site expression";
|
|
m->m_desc = "The site of a url is a substring of that url, which "
|
|
"defined a set of urls which are all primarily controlled "
|
|
"by the same entity. The smallest such site of a url is "
|
|
"returned, because a url can have multiple sites. Like "
|
|
"fred.blogspot.com is a site and the blogspot.com site "
|
|
"contains that site.";
|
|
m->m_cgi = "sdbfe";
|
|
m->m_xml = "siteExpression";
|
|
m->m_max = MAX_SITE_EXPRESSIONS;
|
|
m->m_off = (char *)cr.m_siteExpressions - x;
|
|
m->m_type = TYPE_STRINGNONEMPTY;
|
|
m->m_size = MAX_SITE_EXPRESSION_LEN+1;
|
|
m->m_page = PAGE_RULES;
|
|
m->m_rowid = 1; // if we START a new row
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_title = "site rule";
|
|
m->m_cgi = "sdbsrs";
|
|
m->m_xml = "siteRule";
|
|
m->m_max = MAX_SITE_EXPRESSIONS;
|
|
m->m_off = (char *)cr.m_siteRules - x;
|
|
m->m_type = TYPE_SITERULE;
|
|
m->m_page = PAGE_RULES;
|
|
m->m_rowid = 1;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "siterec default ruleset";
|
|
m->m_cgi = "sdbfdr";
|
|
m->m_xml = "siterecDefaultRuleset";
|
|
m->m_max = MAX_SITEDB_FILTERS;
|
|
m->m_off = (char *)cr.m_sitedbFilterRulesets - x;
|
|
m->m_type = TYPE_RULESET;
|
|
m->m_page = PAGE_FILTERS2;
|
|
m->m_rowid = 1;
|
|
m->m_def = "-1";
|
|
m++;
|
|
|
|
m->m_title = "ban subdomains";
|
|
m->m_cgi = "sdbbsd";
|
|
m->m_xml = "siterecBanSubdomains";
|
|
m->m_max = MAX_SITEDB_FILTERS;
|
|
m->m_off = (char *)cr.m_sitedbFilterBanSubdomains - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_FILTERS2;
|
|
m->m_rowid = 1;
|
|
m->m_addin = 1; // "insert" follows
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
// ///////////////////////////////////////////
|
|
// // SPAM CONTROLS //
|
|
// ///////////////////////////////////////////
|
|
|
|
|
|
// m->m_title = "char in url";
|
|
// m->m_desc = "url has - or _ or a digit in the domain, "
|
|
// "has a plus in the cgi part.";
|
|
// m->m_cgi = "spamctrla";
|
|
// m->m_off = (char *)&cr.m_spamTests[CHAR_IN_URL] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "20";
|
|
// //m->m_smaxc = (char *)&cr.m_spamMaxes[CHAR_IN_URL] - x;
|
|
// m->m_group = 1;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "bad tld";
|
|
// m->m_desc = "tld is info or biz";
|
|
// m->m_cgi = "spamctrlb";
|
|
// m->m_off = (char *)&cr.m_spamTests[BAD_TLD] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "20";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "good tld";
|
|
// m->m_desc = "tld is gov, edu or mil";
|
|
// m->m_cgi = "spamctrlc";
|
|
// m->m_off = (char *)&cr.m_spamTests[GOOD_TLD] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "-20";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "title has spammy words";
|
|
// m->m_desc = "Title has spammy words, is all lower case, "
|
|
// "or has > 200 chars. ";
|
|
// m->m_cgi = "spamctrld";
|
|
// m->m_off = (char *)&cr.m_spamTests[WORD_IN_TITLE] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "20";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "img src to other domains";
|
|
// m->m_desc = "Page has img src to other domains. ";
|
|
// m->m_cgi = "spamctrle";
|
|
// m->m_off = (char *)&cr.m_spamTests[IMG_SRC_OTHER_DOMAIN] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "5";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "page has spammy words";
|
|
// m->m_desc = "Page has spammy words. ";
|
|
// m->m_cgi = "spamctrlf";
|
|
// m->m_off = (char *)&cr.m_spamTests[SPAMMY_WORDS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "5";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "consecutive link text";
|
|
// m->m_desc = "Three consecutive link texts "
|
|
// "contain the same word. ";
|
|
// m->m_cgi = "spamctrlg";
|
|
// m->m_off = (char *)&cr.m_spamTests[CONSECUTIVE_LINK_TEXT] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "10";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "affiliate company links";
|
|
// m->m_desc = "links to amazon, allposters, or zappos. ";
|
|
// m->m_cgi = "spamctrlh";
|
|
// m->m_off = (char *)&cr.m_spamTests[AFFILIATE_LINKS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "10";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "affiliate in links";
|
|
// m->m_desc = "Has string 'affiliate' in the links. ";
|
|
// m->m_cgi = "spamctrli";
|
|
// m->m_off = (char *)&cr.m_spamTests[AFFILIATE_LINKS2] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "40";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "Iframe to amazon";
|
|
// m->m_desc = "Has an iframe whose src is amazon. ";
|
|
// m->m_cgi = "spamctrlj";
|
|
// m->m_off = (char *)&cr.m_spamTests[IFRAME_TO_AMAZON] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "30";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "int32_t links";
|
|
// m->m_desc = "Links to urls which are > 128 chars. ";
|
|
// m->m_cgi = "spamctrlk";
|
|
// m->m_off = (char *)&cr.m_spamTests[LINKS_OVER_128_CHARS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "5";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "links to queries";
|
|
// m->m_desc = "links have ?q= or &q= in them. ";
|
|
// m->m_cgi = "spamctrll";
|
|
// m->m_off = (char *)&cr.m_spamTests[LINKS_HAVE_QUERIES] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "5";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "google ad client";
|
|
// m->m_desc = "Page has a google ad client. ";
|
|
// m->m_cgi = "spamctrlm";
|
|
// m->m_off = (char *)&cr.m_spamTests[GOOGLE_AD_CLIENT] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "20";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "percent text in links";
|
|
// m->m_desc = "percent of text in links (over 50 percent). ";
|
|
// m->m_cgi = "spamctrln";
|
|
// m->m_off = (char *)&cr.m_spamTests[PERCENT_IN_LINKS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "15";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "links to a url with a - or _ in the domain";
|
|
// m->m_desc = "Links to a url with a - or _ in the domain";
|
|
// m->m_cgi = "spamctrlo";
|
|
// m->m_off = (char *)&cr.m_spamTests[DASH_IN_LINK] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "2";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "links to a url which is .info or .biz";
|
|
// m->m_desc = "Links to a url which is .info or .biz.";
|
|
// m->m_cgi = "spamctrlp";
|
|
// m->m_off = (char *)&cr.m_spamTests[LINK_TO_BADTLD] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "2";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "links to a dmoz category";
|
|
// m->m_desc = "Links to a dmoz category.";
|
|
// m->m_cgi = "spamctrlq";
|
|
// m->m_off = (char *)&cr.m_spamTests[LINKS_ARE_DMOZ_CATS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "4";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "consecutive bold text";
|
|
// m->m_desc = "Three consecutive bold texts "
|
|
// "contain the same word. ";
|
|
// m->m_cgi = "spamctrlr";
|
|
// m->m_off = (char *)&cr.m_spamTests[CONSECUTIVE_BOLD_TEXT] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "10";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "link text doesn't match domain";
|
|
// m->m_desc = "Link text looks like a domain, but the link doesn't go there";
|
|
// m->m_cgi = "spamctrls";
|
|
// m->m_off = (char *)&cr.m_spamTests[LINK_TEXT_NEQ_DOMAIN] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "10";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "force multiplier";
|
|
// m->m_desc = "Multiply this by the number of spam categories "
|
|
// "that have points times the total points, for the final"
|
|
// " score. Range between 0 and 1.";
|
|
// m->m_cgi = "frcmult";
|
|
// m->m_off = (char *)&cr.m_forceMultiplier - x;
|
|
// m->m_type = TYPE_FLOAT;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "0.01";
|
|
// m->m_group = 1;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
|
|
|
|
// /////////////////////// MAXES FOR SPAM CONTROLS ///////////////////////
|
|
|
|
// m->m_title = "max points for char in url";
|
|
// m->m_desc = "Max points for url has - or _ or a digit in the domain";
|
|
// m->m_cgi = "spammaxa";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[CHAR_IN_URL] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 1;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for bad tld";
|
|
// m->m_desc = "Max points for tld is info or biz";
|
|
// m->m_cgi = "spammaxb";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[BAD_TLD] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_group = 0;
|
|
// m->m_def = "300";
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for good tld";
|
|
// m->m_desc = "Max points for tld is gov, edu or mil";
|
|
// m->m_cgi = "spammaxc";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[GOOD_TLD] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for title has spammy words";
|
|
// m->m_desc = "Max points for Title has spammy words. ";
|
|
// m->m_cgi = "spammaxd";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[WORD_IN_TITLE] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for img src to other domains";
|
|
// m->m_desc = "Max points for Page has img src to other domains. ";
|
|
// m->m_cgi = "spammaxe";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[IMG_SRC_OTHER_DOMAIN] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for page has spammy words";
|
|
// m->m_desc = "Max points for Page has spammy words. ";
|
|
// m->m_cgi = "spammaxf";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[SPAMMY_WORDS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for consecutive link text";
|
|
// m->m_desc = "Max points for three consecutive link texts"
|
|
// "contain the same word. ";
|
|
// m->m_cgi = "spammaxg";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[CONSECUTIVE_LINK_TEXT] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for affiliate company links";
|
|
// m->m_desc = "Max points for links to amazon, allposters, or zappos. ";
|
|
// m->m_cgi = "spammaxh";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[AFFILIATE_LINKS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for affiliate in links";
|
|
// m->m_desc = "Max points for Has string 'affiliate' in the links. ";
|
|
// m->m_cgi = "spammaxi";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[AFFILIATE_LINKS2] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for Iframe to amazon";
|
|
// m->m_desc = "Max points for Has an iframe whose src is amazon. ";
|
|
// m->m_cgi = "spammaxj";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[IFRAME_TO_AMAZON] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for int32_t links";
|
|
// m->m_desc = "Max points for Links to urls which are > 128 chars. ";
|
|
// m->m_cgi = "spammaxk";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[LINKS_OVER_128_CHARS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for links to queries";
|
|
// m->m_desc = "Max points for links have ?q= or &q= in them. ";
|
|
// m->m_cgi = "spammaxl";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[LINKS_HAVE_QUERIES] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m->m_sparm = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for google ad client";
|
|
// m->m_desc = "Max points for Page has a google ad client. ";
|
|
// m->m_cgi = "spammaxm";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[GOOGLE_AD_CLIENT] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for percent text in links";
|
|
// m->m_desc = "Max points for percent of text in links (over 50 percent). ";
|
|
// m->m_cgi = "spammaxn";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[PERCENT_IN_LINKS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m++;
|
|
// m->m_title = "max points for links have - or _";
|
|
// m->m_desc = "Max points for links have - or _";
|
|
// m->m_cgi = "spammaxo";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[DASH_IN_LINK] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m++;
|
|
// m->m_title = "max points for links to .info or .biz";
|
|
// m->m_desc = "Max points for links to .info or .biz ";
|
|
// m->m_cgi = "spammaxp";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[LINK_TO_BADTLD] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for links to a dmoz category";
|
|
// m->m_desc = "Max points for links to a dmoz category.";
|
|
// m->m_cgi = "spammaxq";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[LINKS_ARE_DMOZ_CATS] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for consecutive bold text";
|
|
// m->m_desc = "Max points for three consecutive bold texts"
|
|
// "contain the same word. ";
|
|
// m->m_cgi = "spammaxr";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[CONSECUTIVE_BOLD_TEXT] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
// m->m_title = "max points for link text doesn't match domain";
|
|
// m->m_desc = "Max points for link text doesn't match domain";
|
|
// m->m_cgi = "spammaxs";
|
|
// m->m_off = (char *)&cr.m_spamMaxes[LINK_TEXT_NEQ_DOMAIN] - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_SPAM;
|
|
// m->m_def = "300";
|
|
// m->m_group = 0;
|
|
// m++;
|
|
|
|
|
|
|
|
|
|
// ///////////////////////////////////////////
|
|
// // END SPAM CONTROLS //
|
|
// ///////////////////////////////////////////
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// QUALITY AGENT CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
|
|
/*
|
|
m->m_title = "all agents on";
|
|
m->m_desc = "Enable quality agent on all hosts for this collection";
|
|
m->m_cgi = "aqae";
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "1";
|
|
m->m_off = (char *)&cr.m_qualityAgentEnabled - x;
|
|
m->m_type = TYPE_BOOL2; // no yes or no, just a link
|
|
m->m_page = PAGE_QAGENT;
|
|
m++;
|
|
|
|
m->m_title = "all agents off";
|
|
m->m_desc = "Disable quality agent on all hosts for this collection";
|
|
m->m_cgi = "aqad";
|
|
m->m_def = "0";
|
|
m->m_off = (char *)&cr.m_qualityAgentEnabled - x;
|
|
m->m_type = TYPE_BOOL2; // no yes or no, just a link
|
|
m++;
|
|
|
|
m->m_title = "quality agent enabled";
|
|
m->m_desc = "If enabled, the agent will find quality modifiers for "
|
|
"all of the sites found in titledb.";
|
|
m->m_cgi = "qae";
|
|
m->m_off = (char *)&cr.m_qualityAgentEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_cast = 0;
|
|
m->m_page = PAGE_QAGENT;
|
|
m++;
|
|
|
|
m->m_title = "quality agent continuous loop";
|
|
m->m_desc = "If enabled, the agent will loop when it reaches "
|
|
"the end of titledb. Otherwise, it will disable itself.";
|
|
m->m_cgi = "qale";
|
|
m->m_off = (char *)&cr.m_qualityAgentLoop - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
m->m_title = "ban subsites";
|
|
m->m_desc = "If enabled, the agent will look at the paths of"
|
|
" its titlerec sample, if the offending spam scores"
|
|
" all come from the same subsite, we just ban that one."
|
|
" Good for banning hijacked forums or spammed archives.";
|
|
m->m_cgi = "qabs";
|
|
m->m_off = (char *)&cr.m_qualityAgentBanSubSites - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
m->m_title = "start document";
|
|
m->m_desc = "The agent will start at this docid when scanning "
|
|
"titledb looking for sites.";
|
|
m->m_cgi = "qasd";
|
|
m->m_off = (char *)&cr.m_qualityAgentStartDoc - x;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "0";
|
|
m->m_cast = 1;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_sync = false; // do not sync this parm
|
|
m++;
|
|
|
|
m->m_title = "site quality refresh rate";
|
|
m->m_desc = "The quality agent will try to reexamine entries in "
|
|
"tagdb which were added more than this many seconds ago";
|
|
m->m_cgi = "qasqrr";
|
|
m->m_off = (char *)&cr.m_tagdbRefreshRate - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_group = 1;
|
|
m->m_cast = 1;
|
|
m->m_def = "2592000";
|
|
m++;
|
|
|
|
m->m_title = "link samples to get";
|
|
m->m_desc = "Lookup the qualities of this many links in tagdb.";
|
|
m->m_cgi = "lstg";
|
|
m->m_off = (char *)&cr.m_linkSamplesToGet - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "256";
|
|
m++;
|
|
|
|
m->m_title = "min pages to evaluate";
|
|
m->m_desc = "The quality agent will skip this site if there are"
|
|
" less than this many pages to evaluate.";
|
|
m->m_cgi = "mpte";
|
|
m->m_off = (char *)&cr.m_minPagesToEvaluate - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "1";
|
|
m++;
|
|
|
|
m->m_title = "link bonus divisor";
|
|
m->m_desc = "Decrease a page's spam score if it has a high "
|
|
"link quality. The bonus is computed by dividing the "
|
|
"page's link quality by this parm. LinkInfos older "
|
|
"than 30 days are considered stale and are not used.";
|
|
m->m_cgi = "lbd";
|
|
m->m_off = (char *)&cr.m_linkBonusDivisor - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "20";
|
|
m++;
|
|
|
|
m->m_title = "points per banned link";
|
|
m->m_desc = "Subtract x points per banned site that a site links to.";
|
|
m->m_cgi = "nppbl";
|
|
m->m_off = (char *)&cr.m_negPointsPerBannedLink - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "3";
|
|
m++;
|
|
|
|
m->m_title = "points per link to different sites on the same IP";
|
|
m->m_desc = "Subtract x points per site linked to that is on the "
|
|
"same IP as other links. Good for catching domain parking "
|
|
"lots and spammers in general, but looking up the IPs "
|
|
"slows down the agent considerably. (set to 0 to disable.)";
|
|
m->m_cgi = "pfltdssi";
|
|
m->m_off = (char *)&cr.m_penaltyForLinksToDifferentSiteSameIp - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "number of sites on an ip to sample";
|
|
m->m_desc = "Examine this many sites on the same ip as this site";
|
|
m->m_cgi = "nsoits";
|
|
m->m_off = (char *)&cr.m_numSitesOnIpToSample - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "100";
|
|
m++;
|
|
|
|
m->m_title = "points per banned site on ip";
|
|
m->m_desc = "Subtract x points from a site quality for each banned "
|
|
"site on the ip";
|
|
m->m_cgi = "nppbsoi";
|
|
m->m_off = (char *)&cr.m_negPointsPerBannedSiteOnIp - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "2";
|
|
m++;
|
|
|
|
m->m_title = "max penalty from being on a bad IP";
|
|
m->m_desc = "The penalty for being on a bad IP will not"
|
|
" exceed this value.";
|
|
m->m_cgi = "qampfboabi";
|
|
m->m_off = (char *)&cr.m_maxPenaltyFromIp - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "-30";
|
|
m++;
|
|
|
|
|
|
m->m_title = "max sites per second";
|
|
m->m_desc = "The agent will not process more than this many"
|
|
" sites per second. Can be less than 1.";
|
|
m->m_cgi = "msps";
|
|
m->m_off = (char *)&cr.m_maxSitesPerSecond - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "99999.0";
|
|
m++;
|
|
|
|
m->m_title = "site agent banned ruleset";
|
|
m->m_desc = "Site agent will assign this ruleset to documents "
|
|
" which are determined to be low quality.";
|
|
m->m_cgi = "";
|
|
m->m_off = (char *)&cr.m_qualityAgentBanRuleset - x;
|
|
m->m_type = TYPE_RULESET; // int32_t with dropdown of rulesets
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "30";
|
|
m++;
|
|
|
|
m->m_title = "ban quality threshold";
|
|
m->m_desc = "If the site has a spam score greater than this parm, it will"
|
|
" be inserted into the above ruleset.";
|
|
m->m_cgi = "tttsb";
|
|
m->m_off = (char *)&cr.m_siteQualityBanThreshold - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "-100";
|
|
m++;
|
|
|
|
m->m_title = "threshold to trigger site reindex";
|
|
m->m_desc = "If the site has a quality less than this parm, it will"
|
|
" be added to the spider queue for reindexing";
|
|
m->m_cgi = "tttsr";
|
|
m->m_off = (char *)&cr.m_siteQualityReindexThreshold - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_QAGENT;
|
|
m->m_cast = 1;
|
|
m->m_def = "-100";
|
|
m++;
|
|
|
|
|
|
|
|
// m->m_title = "";
|
|
// m->m_desc = "";
|
|
// m->m_cgi = "";
|
|
// m->m_off = (char *)&cr.m_ - x;
|
|
// m->m_type = TYPE_LONG;
|
|
// m->m_page = PAGE_QAGENT;
|
|
// m->m_def = "";
|
|
// m++;
|
|
|
|
*/
|
|
|
|
///////////////////////////////////////////
|
|
// END QUALITY AGENT CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// AD FEED CONTROLS
|
|
///////////////////////////////////////////
|
|
/*
|
|
m->m_title = "num ads in paid inclusion ad feed";
|
|
m->m_desc = "The number of ads we would like returned from the ad"
|
|
" server. This applies to all paid inclusion ads below.";
|
|
m->m_cgi = "apin";
|
|
m->m_off = (char *)&cr.m_adPINumAds - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "2";
|
|
m->m_page = PAGE_ADFEED;
|
|
m++;
|
|
|
|
m->m_title = "num ads in skyscraper ad feed";
|
|
m->m_desc = "The number of ads we would like returned from the ad"
|
|
" server. This applies to all skyscraper ads below.";
|
|
m->m_cgi = "assn";
|
|
m->m_off = (char *)&cr.m_adSSNumAds - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5";
|
|
m->m_page = PAGE_ADFEED;
|
|
m++;
|
|
|
|
m->m_title = "skyscraper ad width";
|
|
m->m_desc = "The width of the skyscraper ad column in pixels";
|
|
m->m_cgi = "awd";
|
|
m->m_off = (char *)&cr.m_adWidth - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "300";
|
|
m->m_page = PAGE_ADFEED;
|
|
m++;
|
|
|
|
m->m_title = "ad feed timeout";
|
|
m->m_desc = "The time (in milliseconds) to wait for an ad list to be "
|
|
"returned before timing out and displaying the results "
|
|
"without any ads. This applies to all ads below.";
|
|
m->m_cgi = "afto";
|
|
m->m_off = (char *)&cr.m_adFeedTimeOut - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1000";
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion ad enable";
|
|
m->m_desc = "Enable/Disable the paid inclusion ad.";
|
|
m->m_cgi = "apie";
|
|
m->m_off = (char *)&cr.m_adPIEnable - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "1";
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion ad feed link";
|
|
m->m_desc = "Full link with address and parameters to retrieve an ad "
|
|
"feed. To specify parameter input: %q for query, %n "
|
|
"for num results, %p for page number, %i for query ip, "
|
|
"and %% for %.";
|
|
m->m_cgi = "apicgi";
|
|
m->m_off = (char *)cr.m_adCGI[0] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_CGI_URL;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion ad feed xml result tag";
|
|
m->m_desc = "Specify the full xml path for a result.";
|
|
m->m_cgi = "apirx";
|
|
m->m_off = (char *)cr.m_adResultXml[0] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion ad feed xml title tag";
|
|
m->m_desc = "Specify the full xml path for the results title.";
|
|
m->m_cgi = "apitx";
|
|
m->m_off = (char *)cr.m_adTitleXml[0] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion ad feed xml description tag";
|
|
m->m_desc = "Specify the full xml path for the results description.";
|
|
m->m_cgi = "apidx";
|
|
m->m_off = (char *)cr.m_adDescXml[0] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion ad feed xml link tag";
|
|
m->m_desc = "Specify the full xml path for the results link. This "
|
|
"is the link that is shown as plain text, not an actual "
|
|
"link, below the ad description.";
|
|
m->m_cgi = "apilx";
|
|
m->m_off = (char *)cr.m_adLinkXml[0] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion ad feed xml url tag";
|
|
m->m_desc = "Specify the full xml path for the results url. This is "
|
|
"the link associated with the title.";
|
|
m->m_cgi = "apiux";
|
|
m->m_off = (char *)cr.m_adUrlXml[0] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion backup ad feed link";
|
|
m->m_desc = "Full link with address and parameters to retrieve an ad "
|
|
"feed. To specify parameter input: %q for query, %n "
|
|
"for num results, %p for page number, %i for query ip, "
|
|
"and %% for %.";
|
|
m->m_cgi = "apicgib";
|
|
m->m_off = (char *)cr.m_adCGI[1] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_CGI_URL;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion backup ad feed xml result tag";
|
|
m->m_desc = "Specify the full xml path for a result.";
|
|
m->m_cgi = "apirxb";
|
|
m->m_off = (char *)cr.m_adResultXml[1] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion backup ad feed xml title tag";
|
|
m->m_desc = "Specify the full xml path for the results title.";
|
|
m->m_cgi = "apitxb";
|
|
m->m_off = (char *)cr.m_adTitleXml[1] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion backup ad feed xml description tag";
|
|
m->m_desc = "Specify the full xml path for the results description.";
|
|
m->m_cgi = "apidxb";
|
|
m->m_off = (char *)cr.m_adDescXml[1] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion backup ad feed xml link tag";
|
|
m->m_desc = "Specify the full xml path for the results link. This "
|
|
"is the link that is shown as plain text, not an actual "
|
|
"link, below the ad description.";
|
|
m->m_cgi = "apilxb";
|
|
m->m_off = (char *)cr.m_adLinkXml[1] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion backup ad feed xml url tag";
|
|
m->m_desc = "Specify the full xml path for the results url. This is "
|
|
"the link associated with the title.";
|
|
m->m_cgi = "apiuxb";
|
|
m->m_off = (char *)cr.m_adUrlXml[1] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) paid inclusion format text";
|
|
m->m_desc = "Specify the formatting text from the <div tag in";
|
|
m->m_cgi = "apift";
|
|
m->m_off = (char *)cr.m_adPIFormat - x;
|
|
m->m_plen = (char *)&cr.m_adPIFormatLen - x; // length of string
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_size = MAX_HTML_LEN + 1;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "style=\"padding: 3px;"
|
|
"text-align: left; background-color: "
|
|
"lightyellow;\"><span style=\"font-size: larger; "
|
|
"font-weight: bold;\">Sponsored Results</span>\n"
|
|
"<br><br>";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper ad enable";
|
|
m->m_desc = "Enable/Disable the skyscraper ad.";
|
|
m->m_cgi = "asse";
|
|
m->m_off = (char *)&cr.m_adSSEnable - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "1";
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper ad feed same as paid inclusion";
|
|
m->m_desc = "Use the same feed CGI as used above for the paid "
|
|
"inclusion.";
|
|
m->m_cgi = "asssap";
|
|
m->m_off = (char *)&cr.m_adSSSameasPI - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper ad feed link";
|
|
m->m_desc = "Full link with address and parameters to retrieve an ad "
|
|
"feed. To specify parameter input: %q for query, %n "
|
|
"for num results, %p for page number, %i for query ip, "
|
|
"and %% for %.";
|
|
m->m_cgi = "asscgi";
|
|
m->m_off = (char *)cr.m_adCGI[2] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_CGI_URL;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper ad feed xml result tag";
|
|
m->m_desc = "Specify the full xml path for a result.";
|
|
m->m_cgi = "assrx";
|
|
m->m_off = (char *)cr.m_adResultXml[2] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper ad feed xml title tag";
|
|
m->m_desc = "Specify the full xml path for the results title.";
|
|
m->m_cgi = "asstx";
|
|
m->m_off = (char *)cr.m_adTitleXml[2] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper ad feed xml description tag";
|
|
m->m_desc = "Specify the full xml path for the results description.";
|
|
m->m_cgi = "assdx";
|
|
m->m_off = (char *)cr.m_adDescXml[2] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper ad feed xml link tag";
|
|
m->m_desc = "Specify the full xml path for the results link. This "
|
|
"is the link that is shown as plain text, not an actual "
|
|
"link, below the ad description.";
|
|
m->m_cgi = "asslx";
|
|
m->m_off = (char *)cr.m_adLinkXml[2] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper ad feed xml url tag";
|
|
m->m_desc = "Specify the full xml path for the results url. This is "
|
|
"the link associated with the title.";
|
|
m->m_cgi = "assux";
|
|
m->m_off = (char *)cr.m_adUrlXml[2] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper backup ad feed same as paid inclusion";
|
|
m->m_desc = "Use the same feed CGI as used above for the backup paid "
|
|
"inclusion.";
|
|
m->m_cgi = "asssapb";
|
|
m->m_off = (char *)&cr.m_adBSSSameasBPI - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper backup ad feed link";
|
|
m->m_desc = "Full link with address and parameters to retrieve an ad "
|
|
"feed. To specify parameter input: %q for query, %n "
|
|
"for num results, %p for page number, %i for query ip, "
|
|
"and %% for %.";
|
|
m->m_cgi = "asscgib";
|
|
m->m_off = (char *)cr.m_adCGI[3] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_CGI_URL;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper backup ad feed xml result tag";
|
|
m->m_desc = "Specify the full xml path for a result.";
|
|
m->m_cgi = "assrxb";
|
|
m->m_off = (char *)cr.m_adResultXml[3] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper backup ad feed xml title tag";
|
|
m->m_desc = "Specify the full xml path for the results title.";
|
|
m->m_cgi = "asstxb";
|
|
m->m_off = (char *)cr.m_adTitleXml[3] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper backup ad feed xml description tag";
|
|
m->m_desc = "Specify the full xml path for the results description.";
|
|
m->m_cgi = "assdxb";
|
|
m->m_off = (char *)cr.m_adDescXml[3] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper backup ad feed xml link tag";
|
|
m->m_desc = "Specify the full xml path for the results link. This "
|
|
"is the link that is shown as plain text, not an actual "
|
|
"link, below the ad description.";
|
|
m->m_cgi = "asslxb";
|
|
m->m_off = (char *)cr.m_adLinkXml[3] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper backup ad feed xml url tag";
|
|
m->m_desc = "Specify the full xml path for the results url. This is "
|
|
"the link associated with the title.";
|
|
m->m_cgi = "assuxb";
|
|
m->m_off = (char *)cr.m_adUrlXml[3] - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_XML_LEN;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "(1) skyscraper format text";
|
|
m->m_desc = "Specify the formatting text from the <div tag in";
|
|
m->m_cgi = "assft";
|
|
m->m_off = (char *)cr.m_adSSFormat - x;
|
|
m->m_plen = (char *)&cr.m_adSSFormatLen - x; // length of string
|
|
m->m_size = MAX_HTML_LEN + 1;
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_page = PAGE_ADFEED;
|
|
m->m_def = "style=\"height: 100%; padding: 3px;"
|
|
"text-align: center;background-color: "
|
|
"lightyellow;\"><span style=\""
|
|
"font-size: larger; font-weight: bold;\">"
|
|
"Sponsored Results</span><br><br> ";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
///////////////////////////////////////////
|
|
// END AD FEED CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// SEARCH URL CONTROLS
|
|
// these are only specified in the search url when doing a search
|
|
///////////////////////////////////////////
|
|
|
|
|
|
/////
|
|
//
|
|
// OLDER SEARCH INPUTS
|
|
//
|
|
////
|
|
|
|
|
|
// when we do &qa=1 we do not show things like responseTime in
|
|
// search results so we can verify serp checksum consistency for QA
|
|
// in qa.cpp
|
|
/*
|
|
m->m_title = "quality assurance";
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_SI;
|
|
m->m_desc = "This is 1 if doing a QA test in qa.cpp";
|
|
m->m_def = "0";
|
|
m->m_soff = (char *)&si.m_qa - y;
|
|
m->m_type = TYPE_CHAR;
|
|
m->m_sparm = 1;
|
|
m->m_scgi = "qa";
|
|
m++;
|
|
*/
|
|
|
|
//m->m_title = "show turk forms";
|
|
//m->m_desc = "If enabled summaries in search results will be "
|
|
// "turkable input forms.";
|
|
//m->m_def = "0";
|
|
//m->m_soff = (char *)&si.m_getTurkForm - y;
|
|
//m->m_type = TYPE_BOOL;
|
|
//m->m_sparm = 1;
|
|
//m->m_scgi = "turk";
|
|
//m++;
|
|
|
|
|
|
// IMPORT PARMS
|
|
m->m_title = "enable document importation";
|
|
m->m_desc = "Import documents into this collection.";
|
|
m->m_cgi = "import";
|
|
m->m_page = PAGE_IMPORT;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_off = (char *)&cr.m_importEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
// m->m_title = "collection";
|
|
// m->m_desc = "Collection to import documents into.";
|
|
// m->m_cgi = "c";
|
|
// m->m_page = PAGE_IMPORT;
|
|
// m->m_obj = OBJ_GBREQUEST;
|
|
// m->m_off = (char *)&cr.m_imcoll - (char *)&gr;
|
|
// m->m_type = TYPE_CHARPTR;
|
|
// m->m_def = NULL;
|
|
// // PF_COLLDEFAULT: so it gets set to default coll on html page
|
|
// m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML;
|
|
// m++;
|
|
|
|
m->m_title = "directory containing titledb files";
|
|
m->m_desc = "Import documents contained in titledb files in this "
|
|
"directory. This is an ABSOLUTE directory path.";
|
|
m->m_cgi = "importdir";
|
|
m->m_xml = "importDir";
|
|
m->m_page = PAGE_IMPORT;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_off = (char *)&cr.m_importDir - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_def = "";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
m->m_title = "number of simultaneous injections";
|
|
m->m_desc = "Typically try one or two injections per host in "
|
|
"your cluster.";
|
|
m->m_cgi = "numimportinjects";
|
|
m->m_xml = "numImportInjects";
|
|
m->m_page = PAGE_IMPORT;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_off = (char *)&cr.m_numImportInjects - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "2";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
|
|
|
|
///////////
|
|
//
|
|
// ADD URL PARMS
|
|
//
|
|
///////////
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "Add urls into this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_page = PAGE_ADDURL2;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
// PF_COLLDEFAULT: so it gets set to default coll on html page
|
|
m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML;
|
|
m++;
|
|
|
|
m->m_title = "urls to add";
|
|
m->m_desc = "List of urls to index. One per line or space separated. "
|
|
"If your url does not index as you expect you "
|
|
"can check it's spider history by doing a url: search on it. "
|
|
"Added urls will have a "
|
|
"<a href=/admin/filters#hopcount>hopcount</a> of 0. "
|
|
"Added urls will match the <i><a href=/admin/filters#isaddurl>"
|
|
"isaddurl</a></i> directive on "
|
|
"the url filters page. "
|
|
"The add url api is described on the "
|
|
"<a href=/admin/api>api</a> page.";
|
|
m->m_cgi = "urls";
|
|
m->m_page = PAGE_ADDURL2;
|
|
m->m_obj = OBJ_GBREQUEST; // do not store in g_conf or collectionrec
|
|
m->m_off = (char *)&gr.m_urlsBuf - (char *)&gr;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_TEXTAREA | PF_NOSAVE | PF_API|PF_REQUIRED;
|
|
m++;
|
|
|
|
/*
|
|
// the new upload post submit button
|
|
m->m_title = "upload urls";
|
|
m->m_desc = "Upload your file of urls.";
|
|
m->m_cgi = "urls";
|
|
m->m_page = PAGE_ADDURL2;
|
|
m->m_obj = OBJ_NONE;
|
|
m->m_def = NULL;
|
|
m->m_type = TYPE_FILEUPLOADBUTTON;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "strip sessionids";
|
|
m->m_desc = "Strip added urls of their session ids.";
|
|
m->m_cgi = "strip";
|
|
m->m_page = PAGE_ADDURL2;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_off = (char *)&gr.m_stripBox - (char *)&gr;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
m->m_title = "harvest links";
|
|
m->m_desc = "Harvest links of added urls so we can spider them?.";
|
|
m->m_cgi = "spiderlinks";
|
|
m->m_page = PAGE_ADDURL2;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_off = (char *)&gr.m_harvestLinks - (char *)&gr;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "force respider";
|
|
m->m_desc = "Force an immediate respider even if the url "
|
|
"is already indexed.";
|
|
m->m_cgi = "force";
|
|
m->m_page = PAGE_ADDURL2;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_off = (char *)&gr.m_forceRespiderBox - (char *)&gr;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m++;
|
|
*/
|
|
|
|
|
|
|
|
////////
|
|
//
|
|
// now the new injection parms
|
|
//
|
|
////////
|
|
|
|
m->m_title = "url";
|
|
m->m_desc = "Specify the URL that will be immediately crawled "
|
|
"and indexed in real time while you wait. The browser "
|
|
"will return the "
|
|
"final index status code. Alternatively, "
|
|
"use the <a href=/admin/addurl>add url</a> page "
|
|
"to add urls individually or in bulk "
|
|
"without having to wait for the pages to be "
|
|
"actually indexed in realtime. "
|
|
|
|
"By default, injected urls "
|
|
"take precedence over the \"insitelist\" expression in the "
|
|
"<a href=/admin/filters>url filters</a> "
|
|
"so injected urls need not match the patterns in your "
|
|
"<a href=/admin/sites>site list</a>. You can "
|
|
"change that behavior in the <a href=/admin/filters>url "
|
|
"filters</a> if you want. "
|
|
"Injected urls will have a "
|
|
"<a href=/admin/filters#hopcount>hopcount</a> of 0. "
|
|
"The injection api is described on the "
|
|
"<a href=/admin/api>api</a> page. "
|
|
"Make up a fake url if you are injecting content that "
|
|
"does not have one."
|
|
"<br>"
|
|
"<br>"
|
|
"If the url ends in .warc or .arc or .warc.gz or .arc.gz "
|
|
"Gigablast will index the contained documents as individual "
|
|
"documents, using the appropriate dates and other meta "
|
|
"information contained in the containing archive file."
|
|
;
|
|
m->m_cgi = "url";
|
|
//m->m_cgi2 = "u";
|
|
//m->m_cgi3 = "seed"; // pagerawlbot
|
|
//m->m_cgi4 = "injecturl";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API | PF_REQUIRED;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_url - (char *)&ir;
|
|
m++;
|
|
|
|
// alias #1
|
|
m->m_title = "url";
|
|
m->m_cgi = "u";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_HIDDEN;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_url - (char *)&ir;
|
|
m++;
|
|
|
|
// alias #2
|
|
m->m_title = "url";
|
|
m->m_cgi = "seed";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_HIDDEN | PF_DIFFBOT;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_url - (char *)&ir;
|
|
m++;
|
|
|
|
// alias #3
|
|
m->m_title = "url";
|
|
m->m_cgi = "injecturl";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_HIDDEN;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_url - (char *)&ir;
|
|
m++;
|
|
|
|
|
|
m->m_title = "query to scrape";
|
|
m->m_desc = "Scrape popular search engines for this query "
|
|
"and inject their links. You are not required to supply "
|
|
"the <i>url</i> parm if you supply this parm.";
|
|
m->m_cgi = "qts";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_queryToScrape - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "inject links";
|
|
m->m_desc = "Should we inject the links found in the injected "
|
|
"content as well?";
|
|
m->m_cgi = "injectlinks";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_injectLinks - (char *)&ir;
|
|
m++;
|
|
|
|
|
|
m->m_title = "spider links";
|
|
m->m_desc = "Add the outlinks of the injected content into spiderdb "
|
|
"for spidering?";
|
|
m->m_cgi = "spiderlinks";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
// leave off because could start spidering whole web unintentionally
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_spiderLinks - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "short reply";
|
|
m->m_desc = "Should the injection response be short and simple?";
|
|
m->m_cgi = "quick";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_shortReply - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "only inject content if new";
|
|
m->m_desc = "If the specified url is already in the index then "
|
|
"skip the injection.";
|
|
m->m_cgi = "newonly";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_newOnly - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "delete from index";
|
|
m->m_desc = "Delete the specified url from the index.";
|
|
m->m_cgi = "deleteurl";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_deleteUrl - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "recycle content";
|
|
m->m_desc = "If the url is already in the index, then do not "
|
|
"re-download the content, just use the content that was "
|
|
"stored in the cache from last time.";
|
|
m->m_cgi = "recycle";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_recycle - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "dedup url";
|
|
m->m_desc = "Do not index the url if there is already another "
|
|
"url in the index with the same content.";
|
|
m->m_cgi = "dedup";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_dedup - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "do consistency checking";
|
|
m->m_desc = "Turn this on for debugging.";
|
|
m->m_cgi = "consist";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN; // | PF_API
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_doConsistencyTesting - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "hop count";
|
|
m->m_desc = "Use this hop count when injecting the page.";
|
|
m->m_cgi = "hopcount";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN; // | PF_API
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_hopCount - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "url IP";
|
|
m->m_desc = "Use this IP when injecting the document. Do not use or "
|
|
"set to 0.0.0.0, if unknown. If provided, it will save an IP "
|
|
"lookup.";
|
|
m->m_cgi = "urlip";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_injectDocIp - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "last spider time";
|
|
m->m_desc = "Override last time spidered";
|
|
m->m_cgi = "lastspidered";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN; // | PF_API
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_lastSpidered - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "first indexed";
|
|
m->m_desc = "Override first indexed time";
|
|
m->m_cgi = "firstindexed";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN; // | PF_API
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_firstIndexed - (char *)&ir;
|
|
m++;
|
|
|
|
|
|
m->m_title = "content has mime";
|
|
m->m_desc = "If the content of the url is provided below, does "
|
|
"it begin with an HTTP mime header?";
|
|
m->m_cgi = "hasmime";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_hasMime - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "content delimeter";
|
|
m->m_desc = "If the content of the url is provided below, then "
|
|
"it consist of multiple documents separated by this "
|
|
"delimeter. Each such item will be injected as an "
|
|
"independent document. Some possible delimiters: "
|
|
"<i>========</i> or <i><doc></i>. If you set "
|
|
"<i>hasmime</i> above to true then Gigablast will check "
|
|
"for a url after the delimeter and use that url as the "
|
|
"injected url. Otherwise it will append numbers to the "
|
|
"url you provide above.";
|
|
m->m_cgi = "delim";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_contentDelim - (char *)&ir;
|
|
m++;
|
|
|
|
|
|
m->m_title = "content type";
|
|
m->m_desc = "If you supply content in the text box below without "
|
|
"an HTTP mime header, "
|
|
"then you need to enter the content type. "
|
|
"Possible values: <b>text/html text/plain text/xml "
|
|
"application/json</b>";
|
|
m->m_cgi = "contenttype";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR; //text/html application/json application/xml
|
|
m->m_def = "text/html";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_contentTypeStr - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "content charset";
|
|
m->m_desc = "A number representing the charset of the content "
|
|
"if provided below and no HTTP mime header "
|
|
"is given. Defaults to utf8 "
|
|
"which is 106. "
|
|
"See iana_charset.h for the numeric values.";
|
|
m->m_cgi = "charset";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "106";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_charset - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "upload content file";
|
|
m->m_desc = "Instead of specifying the content to be injected in "
|
|
"the text box below, upload this file for it.";
|
|
m->m_cgi = "file";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_FILEUPLOADBUTTON;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_NOAPI;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_contentFile - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "content";
|
|
m->m_desc = "If you want to supply the URL's content "
|
|
"rather than have Gigablast download it, then "
|
|
"enter the content here. "
|
|
"Enter MIME header "
|
|
"first if \"content has mime\" is set to true above. "
|
|
"Separate MIME from actual content with two returns. "
|
|
"At least put a single space in here if you want to "
|
|
"inject empty content, otherwise the content will "
|
|
"be downloaded from the url. This is because the "
|
|
"page injection form always submits the content text area "
|
|
"even if it is empty, which should signify that the "
|
|
"content should be downloaded.";
|
|
m->m_cgi = "content";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API|PF_TEXTAREA;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_content - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "metadata";
|
|
m->m_desc = "Json encoded metadata to be indexed with the document.";
|
|
m->m_cgi = "metadata";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API|PF_TEXTAREA;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_metadata - (char *)&ir;
|
|
m++;
|
|
|
|
|
|
m->m_title = "get sectiondb voting info";
|
|
m->m_desc = "Return section information of injected content for "
|
|
"the injected subdomain. ";
|
|
m->m_cgi = "sections";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API|PF_NOHTML;
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.m_getSections - (char *)&ir;
|
|
m++;
|
|
|
|
m->m_title = "diffbot reply";
|
|
m->m_desc = "Used exclusively by diffbot. Do not use.";
|
|
m->m_cgi = "diffbotreply";
|
|
m->m_obj = OBJ_IR;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API|PF_TEXTAREA|PF_NOHTML; // do not show in our api
|
|
m->m_page = PAGE_INJECT;
|
|
m->m_off = (char *)&ir.ptr_diffbotReply - (char *)&ir;
|
|
m++;
|
|
|
|
|
|
///////////////////
|
|
//
|
|
// QUERY REINDEX
|
|
//
|
|
///////////////////
|
|
|
|
m->m_title = "collection";
|
|
m->m_desc = "query reindex in this collection.";
|
|
m->m_cgi = "c";
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_def = NULL;
|
|
// PF_COLLDEFAULT: so it gets set to default coll on html page
|
|
m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML;
|
|
m->m_page = PAGE_REINDEX;
|
|
m->m_off = (char *)&gr.m_coll - (char *)&gr;
|
|
m++;
|
|
|
|
m->m_title = "query to reindex or delete";
|
|
m->m_desc = "We either reindex or delete the search results of "
|
|
"this query. Reindexing them will redownload them and "
|
|
"possible update the siterank, which is based on the "
|
|
"number of links to the site. This will add the url "
|
|
"requests to "
|
|
"the spider queue so ensure your spiders are enabled.";
|
|
m->m_cgi = "q";
|
|
m->m_off = (char *)&gr.m_query - (char *)&gr;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_page = PAGE_REINDEX;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_def = NULL;
|
|
m->m_flags = PF_API |PF_REQUIRED;
|
|
m++;
|
|
|
|
m->m_title = "start result number";
|
|
m->m_desc = "Starting with this result #. Starts at 0.";
|
|
m->m_cgi = "srn";
|
|
m->m_off = (char *)&gr.m_srn - (char *)&gr;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_REINDEX;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API ;
|
|
m++;
|
|
|
|
m->m_title = "end result number";
|
|
m->m_desc = "Ending with this result #. 0 is the first result #.";
|
|
m->m_cgi = "ern";
|
|
m->m_off = (char *)&gr.m_ern - (char *)&gr;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_REINDEX;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_def = "99999999";
|
|
m->m_flags = PF_API ;
|
|
m++;
|
|
|
|
m->m_title = "query language";
|
|
m->m_desc = "The language the query is in. Used to rank results. "
|
|
"Just use xx to indicate no language in particular. But "
|
|
"you should use the same qlang value you used for doing "
|
|
"the query if you want consistency.";
|
|
m->m_cgi = "qlang";
|
|
m->m_off = (char *)&gr.m_qlang - (char *)&gr;
|
|
m->m_type = TYPE_CHARPTR;
|
|
m->m_page = PAGE_REINDEX;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_def = "en";
|
|
m->m_flags = PF_API ;
|
|
m++;
|
|
|
|
|
|
m->m_title = "recycle content";
|
|
m->m_desc = "If you check this box then Gigablast will not "
|
|
"re-download the content, but use the content that was "
|
|
"stored in the cache from last time. Useful for rebuilding "
|
|
"the index to pick up new inlink text or fresher "
|
|
"sitenuminlinks counts which influence ranking.";
|
|
m->m_cgi = "qrecycle";
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_REINDEX;
|
|
m->m_off = (char *)&gr.m_recycleContent - (char *)&gr;
|
|
m++;
|
|
|
|
|
|
m->m_title = "FORCE DELETE";
|
|
m->m_desc = "Check this checkbox to delete the results, not just "
|
|
"reindex them.";
|
|
m->m_cgi = "forcedel";
|
|
m->m_off = (char *)&gr.m_forceDel - (char *)&gr;
|
|
m->m_type = TYPE_CHECKBOX;
|
|
m->m_page = PAGE_REINDEX;
|
|
m->m_obj = OBJ_GBREQUEST;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API ;
|
|
m++;
|
|
|
|
|
|
///////////////////
|
|
//
|
|
// SEARCH CONTROLS
|
|
//
|
|
///////////////////
|
|
|
|
|
|
m->m_title = "do spell checking by default";
|
|
m->m_desc = "If enabled while using the XML feed, "
|
|
"when Gigablast finds a spelling recommendation it will be "
|
|
"included in the XML <spell> tag. Default is 0 if using an "
|
|
"XML feed, 1 otherwise.";
|
|
m->m_cgi = "spell";
|
|
m->m_off = (char *)&cr.m_spellCheck - x;
|
|
//m->m_soff = (char *)&si.m_spellCheck - y;
|
|
//m->m_sparm = 1;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_API | PF_NOSAVE | PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "get scoring info by default";
|
|
m->m_desc = "Get scoring information for each result so you "
|
|
"can see how each result is scored. You must explicitly "
|
|
"request this using &scores=1 for the XML feed because it "
|
|
"is not included by default.";
|
|
m->m_cgi = "scores"; // dedupResultsByDefault";
|
|
m->m_off = (char *)&cr.m_getDocIdScoringInfo - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "do query expansion by default";
|
|
m->m_desc = "If enabled, query expansion will expand your query "
|
|
"to include the various forms and "
|
|
"synonyms of the query terms.";
|
|
m->m_def = "1";
|
|
m->m_off = (char *)&cr.m_queryExpansion - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "qe";
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "highlight query terms in summaries by default";
|
|
m->m_desc = "Use to disable or enable "
|
|
"highlighting of the query terms in the summaries.";
|
|
m->m_def = "1";
|
|
m->m_off = (char *)&cr.m_doQueryHighlighting - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_cgi = "qh";
|
|
m->m_smin = 0;
|
|
m->m_smax = 8;
|
|
m->m_sprpg = 1; // turn off for now
|
|
m->m_sprpp = 1;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max title len";
|
|
m->m_desc = "What is the maximum number of "
|
|
"characters allowed in titles displayed in the search "
|
|
"results?";
|
|
m->m_cgi = "tml";
|
|
m->m_off = (char *)&cr.m_titleMaxLen - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_def = "80";
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "consider titles from body";
|
|
m->m_desc = "Can Gigablast make titles from the document content? "
|
|
"Used mostly for the news collection where the title tags "
|
|
"are not very reliable.";
|
|
m->m_cgi = "gtfb";
|
|
m->m_off = (char *)&cr.m_considerTitlesFromBody - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
//m->m_soff = (char *)&si.m_considerTitlesFromBody - y;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "site cluster by default";
|
|
m->m_desc = "Should search results be site clustered? This "
|
|
"limits each site to appearing at most twice in the "
|
|
"search results. Sites are subdomains for the most part, "
|
|
"like abc.xyz.com.";
|
|
m->m_cgi = "scd";
|
|
m->m_off = (char *)&cr.m_siteClusterByDefault - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
// buzz
|
|
m->m_title = "hide all clustered results";
|
|
m->m_desc = "Only display at most one result per site.";
|
|
m->m_cgi = "hacr";
|
|
m->m_off = (char *)&cr.m_hideAllClustered - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "dedup results by default";
|
|
m->m_desc = "Should duplicate search results be removed? This is "
|
|
"based on a content hash of the entire document. "
|
|
"So documents must be exactly the same for the most part.";
|
|
m->m_cgi = "drd"; // dedupResultsByDefault";
|
|
m->m_off = (char *)&cr.m_dedupResultsByDefault - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 1;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "do tagdb lookups for queries";
|
|
m->m_desc = "For each search result a tagdb lookup is made, "
|
|
"usually across the network on distributed clusters, to "
|
|
"see if the URL's site has been manually banned in tagdb. "
|
|
"If you don't manually ban sites then turn this off for "
|
|
"extra speed.";
|
|
m->m_cgi = "stgdbl";
|
|
m->m_off = (char *)&cr.m_doTagdbLookups - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 1;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "percent similar dedup summary default value";
|
|
m->m_desc = "If document summary (and title) are "
|
|
"this percent similar "
|
|
"to a document summary above it, then remove it from the "
|
|
"search results. 100 means only to remove if exactly the "
|
|
"same. 0 means no summary deduping.";
|
|
m->m_cgi = "psds";
|
|
m->m_off = (char *)&cr.m_percentSimilarSummary - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "90";
|
|
m->m_group = 0;
|
|
m->m_smin = 0;
|
|
m->m_smax = 100;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of lines to use in summary to dedup";
|
|
m->m_desc = "Sets the number of lines to generate for summary "
|
|
"deduping. This is to help the deduping process not throw "
|
|
"out valid summaries when normally displayed summaries are "
|
|
"smaller values. Requires percent similar dedup summary to "
|
|
"be non-zero.";
|
|
m->m_cgi = "msld";
|
|
m->m_off = (char *)&cr.m_summDedupNumLines - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "4";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "dedup URLs by default";
|
|
m->m_desc = "Should we dedup URLs with case insensitivity? This is "
|
|
"mainly to correct duplicate wiki pages.";
|
|
m->m_cgi = "ddu";
|
|
m->m_off = (char *)&cr.m_dedupURLDefault - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "use vhost language detection";
|
|
m->m_desc = "Use language specific pages for home, etc.";
|
|
m->m_cgi = "vhost";
|
|
m->m_off = (char *)&cr.m_useLanguagePages - x;
|
|
//m->m_soff = (char *)&si.m_useLanguagePages - y;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
//m->m_scgi = "vhost";
|
|
m->m_smin = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "sort language preference default";
|
|
m->m_desc = "Default language to use for ranking results. "
|
|
//"This should only be used on limited collections. "
|
|
"Value should be any language abbreviation, for example "
|
|
"\"en\" for English. Use <i>xx</i> to give ranking "
|
|
"boosts to no language in particular. See the language "
|
|
"abbreviations at the bottom of the "
|
|
"<a href=/admin/filters>url filters</a> page.";
|
|
m->m_cgi = "defqlang";
|
|
m->m_off = (char *)&cr.m_defaultSortLanguage2 - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 6; // up to 5 chars + NULL, e.g. "en_US"
|
|
m->m_def = "xx";//_US";
|
|
//m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "sort country preference default";
|
|
m->m_desc = "Default country to use for ranking results. "
|
|
//"This should only be used on limited collections. "
|
|
"Value should be any country code abbreviation, for example "
|
|
"\"us\" for United States. This is currently not working.";
|
|
m->m_cgi = "qcountry";
|
|
m->m_off = (char *)&cr.m_defaultSortCountry - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 2+1;
|
|
m->m_def = "us";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
// for post query reranking
|
|
m->m_title = "docs to check for post query demotion by default";
|
|
m->m_desc = "How many search results should we "
|
|
"scan for post query demotion? "
|
|
"0 disables all post query reranking. ";
|
|
m->m_cgi = "pqrds";
|
|
m->m_off = (char *)&cr.m_pqr_docsToScan - x;
|
|
//m->m_soff = (char *)&si.m_docsToScanForReranking - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 1;
|
|
//m->m_scgi = "pqrds";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max summary len";
|
|
m->m_desc = "What is the maximum number of "
|
|
"characters displayed in a summary for a search result?";
|
|
m->m_cgi = "sml";
|
|
m->m_off = (char *)&cr.m_summaryMaxLen - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "512";
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max summary excerpts";
|
|
m->m_desc = "What is the maximum number of "
|
|
"excerpts displayed in the summary of a search result?";
|
|
m->m_cgi = "smnl";
|
|
m->m_off = (char *)&cr.m_summaryMaxNumLines - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "4";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max summary excerpt length";
|
|
m->m_desc = "What is the maximum number of "
|
|
"characters allowed per summary excerpt?";
|
|
m->m_cgi = "smxcpl";
|
|
m->m_off = (char *)&cr.m_summaryMaxNumCharsPerLine - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "90";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "default number of summary excerpts by default";
|
|
m->m_desc = "What is the default number of "
|
|
"summary excerpts displayed per search result?";
|
|
m->m_cgi = "sdnl";
|
|
m->m_off = (char *)&cr.m_summaryDefaultNumLines - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "3";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "max summary line width by default";
|
|
m->m_desc = "<br> tags are inserted to keep the number "
|
|
"of chars in the summary per line at or below this width. "
|
|
"Also affects title. "
|
|
"Strings without spaces that exceed this "
|
|
"width are not split. Has no affect on xml or json feed, "
|
|
"only works on html.";
|
|
m->m_cgi = "smw";
|
|
m->m_off = (char *)&cr.m_summaryMaxWidth - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "80";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "bytes of doc to scan for summary generation";
|
|
m->m_desc = "Truncating this will miss out on good summaries, but "
|
|
"performance will increase.";
|
|
m->m_cgi = "clmfs";
|
|
m->m_off = (char *)&cr.m_contentLenMaxForSummary - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "70000";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "Prox summary carver radius";
|
|
m->m_desc = "Maximum number of characters to allow in between "
|
|
"search terms.";
|
|
m->m_cgi = "pscr";
|
|
m->m_off = (char *)&cr.m_proxCarveRadius - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "256";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "front highlight tag";
|
|
m->m_desc = "Front html tag used for highlightig query terms in the "
|
|
"summaries displated in the search results.";
|
|
m->m_cgi = "sfht";
|
|
m->m_off = (char *)cr.m_summaryFrontHighlightTag - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = SUMMARYHIGHLIGHTTAGMAXSIZE ;
|
|
m->m_def = "<b style=\"color:black;background-color:#ffff66\">";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "back highlight tag";
|
|
m->m_desc = "Front html tag used for highlightig query terms in the "
|
|
"summaries displated in the search results.";
|
|
m->m_cgi = "sbht";
|
|
m->m_off = (char *)cr.m_summaryBackHighlightTag - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = SUMMARYHIGHLIGHTTAGMAXSIZE ;
|
|
m->m_def = "</b>";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "results to scan for gigabits generation by default";
|
|
m->m_desc = "How many search results should we "
|
|
"scan for gigabit (related topics) generation. Set this to "
|
|
"zero to disable gigabits generation by default.";
|
|
m->m_cgi = "dsrt";
|
|
m->m_off = (char *)&cr.m_docsToScanForTopics - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "30";
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "ip restriction for gigabits by default";
|
|
m->m_desc = "Should Gigablast only get one document per IP domain "
|
|
"and per domain for gigabits (related topics) generation?";
|
|
m->m_cgi = "ipr";
|
|
m->m_off = (char *)&cr.m_ipRestrict - x;
|
|
m->m_type = TYPE_BOOL;
|
|
// default to 0 since newspaperarchive only has docs from same IP dom
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "remove overlapping topics";
|
|
m->m_desc = "Should Gigablast remove overlapping topics (gigabits)?";
|
|
m->m_cgi = "rot";
|
|
m->m_off = (char *)&cr.m_topicRemoveOverlaps - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "number of gigabits to show by default";
|
|
m->m_desc = "What is the number of "
|
|
"related topics (gigabits) "
|
|
"displayed per query? Set to 0 to save "
|
|
"CPU time.";
|
|
m->m_cgi = "nrt";
|
|
m->m_off = (char *)&cr.m_numTopics - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "11";
|
|
m->m_group = 0;
|
|
m->m_sprpg = 0; // do not propagate
|
|
m->m_sprpp = 0; // do not propagate
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "min gigabit score by default";
|
|
m->m_desc = "Gigabits (related topics) with scores below this "
|
|
"will be excluded. Scores range from 0% to over 100%.";
|
|
m->m_cgi = "mts";
|
|
m->m_off = (char *)&cr.m_minTopicScore - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "min gigabit doc count by default";
|
|
m->m_desc = "How many documents must contain the gigabit "
|
|
"(related topic) in order for it to be displayed.";
|
|
m->m_cgi = "mdc";
|
|
m->m_off = (char *)&cr.m_minDocCount - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "2";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "dedup doc percent for gigabits (related topics)";
|
|
m->m_desc = "If a document is this percent similar to another "
|
|
"document with a higher score, then it will not contribute "
|
|
"to the gigabit generation.";
|
|
m->m_cgi = "dsp";
|
|
m->m_off = (char *)&cr.m_dedupSamplePercent - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "80";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max words per gigabit (related topic) by default";
|
|
m->m_desc = "Maximum number of words a gigabit (related topic) "
|
|
"can have. Affects xml feeds, too.";
|
|
m->m_cgi = "mwpt";
|
|
m->m_off = (char *)&cr.m_maxWordsPerTopic - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "6";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "gigabit max sample size";
|
|
m->m_desc = "Max chars to sample from each doc for gigabits "
|
|
"(related topics).";
|
|
m->m_cgi = "tmss";
|
|
m->m_off = (char *)&cr.m_topicSampleSize - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "4096";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "gigabit max punct len";
|
|
m->m_desc = "Max sequential punct chars allowed in a gigabit "
|
|
"(related topic). "
|
|
" Set to 1 for speed, 5 or more for best topics but twice as "
|
|
"slow.";
|
|
m->m_cgi = "tmpl";
|
|
m->m_off = (char *)&cr.m_topicMaxPunctLen - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
|
|
m->m_title = "display dmoz categories in results";
|
|
m->m_desc = "If enabled, results in dmoz will display their "
|
|
"categories on the results page.";
|
|
m->m_cgi = "ddc";
|
|
m->m_off = (char *)&cr.m_displayDmozCategories - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "display indirect dmoz categories in results";
|
|
m->m_desc = "If enabled, results in dmoz will display their "
|
|
"indirect categories on the results page.";
|
|
m->m_cgi = "didc";
|
|
m->m_off = (char *)&cr.m_displayIndirectDmozCategories - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "display Search Category link to query category of result";
|
|
m->m_desc = "If enabled, a link will appear next to each category "
|
|
"on each result allowing the user to perform their query "
|
|
"on that entire category.";
|
|
m->m_cgi = "dscl";
|
|
m->m_off = (char *)&cr.m_displaySearchCategoryLink - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "use dmoz for untitled";
|
|
m->m_desc = "Yes to use DMOZ given title when a page is untitled but "
|
|
"is in DMOZ.";
|
|
m->m_cgi = "udfu";
|
|
m->m_off = (char *)&cr.m_useDmozForUntitled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "show dmoz summaries";
|
|
m->m_desc = "Yes to always show DMOZ summaries with search results "
|
|
"that are in DMOZ.";
|
|
m->m_cgi = "udsm";
|
|
m->m_off = (char *)&cr.m_showDmozSummary - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "show adult category on top";
|
|
m->m_desc = "Yes to display the Adult category in the Top category";
|
|
m->m_cgi = "sacot";
|
|
m->m_off = (char *)&cr.m_showAdultCategoryOnTop - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_API | PF_CLONE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "show sensitive info in xml feed";
|
|
m->m_desc = "If enabled, we show certain tagb tags for each "
|
|
"search result, allow &inlinks=1 cgi parms, show "
|
|
"<docsInColl>, etc. in the xml feed. Created for buzzlogic.";
|
|
m->m_cgi = "sss";
|
|
m->m_off = (char *)&cr.m_showSensitiveStuff - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "display indexed date";
|
|
m->m_desc = "Display the indexed date along with results.";
|
|
m->m_cgi = "didt";
|
|
m->m_off = (char *)&cr.m_displayIndexedDate - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "display last modified date";
|
|
m->m_desc = "Display the last modified date along with results.";
|
|
m->m_cgi = "dlmdt";
|
|
m->m_off = (char *)&cr.m_displayLastModDate - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "display published date";
|
|
m->m_desc = "Display the published date along with results.";
|
|
m->m_cgi = "dipt";
|
|
m->m_off = (char *)&cr.m_displayPublishDate - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "enable click 'n' scroll";
|
|
m->m_desc = "The [cached] link on results pages loads click n "
|
|
"scroll.";
|
|
m->m_cgi = "ecns";
|
|
m->m_off = (char *)&cr.m_clickNScrollEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "use data feed account server";
|
|
m->m_desc = "Enable/disable the use of a remote account verification "
|
|
"for Data Feed Customers.";
|
|
m->m_cgi = "dfuas";
|
|
m->m_off = (char *)&cr.m_useDFAcctServer - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "data feed server ip";
|
|
m->m_desc = "The ip address of the Gigablast data feed server to "
|
|
"retrieve customer account information from.";
|
|
m->m_cgi = "dfip";
|
|
m->m_off = (char *)&cr.m_dfAcctIp - x;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "2130706433";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "data feed server port";
|
|
m->m_desc = "The port of the Gigablast data feed server to retrieve "
|
|
"customer account information from.";
|
|
m->m_cgi = "dfport";
|
|
m->m_off = (char *)&cr.m_dfAcctPort - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "8040";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "data feed server collection";
|
|
m->m_desc = "The collection on the Gigablast data feed server to "
|
|
"retrieve customer account information from.";
|
|
m->m_cgi = "dfcoll";
|
|
m->m_off = (char *)&cr.m_dfAcctColl - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_COLL_LEN;
|
|
m->m_def = "customers";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
//
|
|
// not sure cols=x goes here or not
|
|
//
|
|
/*
|
|
m->m_title = "Number Of Columns(1-6)";
|
|
m->m_desc = "How many columns results should be shown in. (1-6)";
|
|
m->m_cgi = "cols";
|
|
m->m_smin = 1;
|
|
m->m_smax = 6;
|
|
m->m_off = (char *)&cr.m_numCols - x;
|
|
m->m_soff = (char *)&si.m_numCols - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_sparm = 1;
|
|
m++;
|
|
*/
|
|
|
|
//
|
|
// Gets the screen width
|
|
//
|
|
/*
|
|
m->m_title = "Screen Width";
|
|
m->m_desc = "screen size of browser window";
|
|
m->m_cgi = "ws";
|
|
m->m_smin = 600;
|
|
m->m_off = (char *)&cr.m_screenWidth - x;
|
|
m->m_soff = (char *)&si.m_screenWidth - y;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1100";
|
|
m->m_group = 0;
|
|
m->m_sparm = 1;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "collection hostname";
|
|
m->m_desc = "Hostname that will default to this collection. Blank"
|
|
" for none or default collection.";
|
|
m->m_cgi = "chstn";
|
|
m->m_off = (char *)cr.m_collectionHostname - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_def = "";
|
|
m++;
|
|
|
|
m->m_title = "collection hostname (1)";
|
|
m->m_desc = "Hostname that will default to this collection. Blank"
|
|
" for none or default collection.";
|
|
m->m_cgi = "chstna";
|
|
m->m_off = (char *)cr.m_collectionHostname1 - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "collection hostname (2)";
|
|
m->m_desc = "Hostname that will default to this collection. Blank"
|
|
" for none or default collection.";
|
|
m->m_cgi = "chstnb";
|
|
m->m_off = (char *)cr.m_collectionHostname2 - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_URL_LEN;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "home page";
|
|
static SafeBuf s_tmpBuf;
|
|
s_tmpBuf.setLabel("stmpb1");
|
|
s_tmpBuf.safePrintf (
|
|
"Html to display for the home page. "
|
|
"Leave empty for default home page. "
|
|
"Use %%N for total "
|
|
"number of pages indexed. Use %%n for number of "
|
|
"pages indexed "
|
|
"for the current collection. "
|
|
//"Use %%H so Gigablast knows where to insert "
|
|
//"the hidden form input tags, which must be there. "
|
|
"Use %%c to insert the current collection name. "
|
|
//"Use %T to display the standard footer. "
|
|
"Use %%q to display the query in "
|
|
"a text box. "
|
|
"Use %%t to display the directory TOP. "
|
|
"Example to paste into textbox: "
|
|
"<br><i>"
|
|
);
|
|
s_tmpBuf.htmlEncode (
|
|
"<html>"
|
|
"<title>My Gigablast Search Engine</title>"
|
|
"<script>\n"
|
|
//"<!--"
|
|
"function x(){document.f.q.focus();}"
|
|
//"// -->"
|
|
"\n</script>"
|
|
"<body onload=\"x()\">"
|
|
"<br><br>"
|
|
"<center>"
|
|
"<a href=/>"
|
|
"<img border=0 width=500 height=122 "
|
|
"src=/logo-med.jpg></a>"
|
|
"<br><br>"
|
|
"<b>My Search Engine</b>"
|
|
"<br><br>"
|
|
// "<br><br><br>"
|
|
// "<b>web</b> "
|
|
// " "
|
|
// "<a href=\"/Top\">directory</a> "
|
|
// " "
|
|
// "<a href=/adv.html>advanced search</a> "
|
|
// " "
|
|
// "<a href=/addurl "
|
|
// "title=\"Instantly add your url to "
|
|
//"the index\">"
|
|
// "add url</a>"
|
|
// "<br><br>"
|
|
"<form method=get action=/search name=f>"
|
|
"<input type=hidden name=c value=\"%c\">"
|
|
"<input name=q type=text size=60 value=\"\">"
|
|
" "
|
|
"<input type=\"submit\" value=\"Search\">"
|
|
"</form>"
|
|
"<br>"
|
|
"<center>"
|
|
"Searching the <b>%c</b> collection of %n "
|
|
"documents."
|
|
"</center>"
|
|
"<br>"
|
|
"</body></html>") ;
|
|
s_tmpBuf.safePrintf("</i>");
|
|
m->m_desc = s_tmpBuf.getBufStart();
|
|
m->m_xml = "homePageHtml";
|
|
m->m_cgi = "hp";
|
|
m->m_off = (char *)&cr.m_htmlRoot - x;
|
|
//m->m_plen = (char *)&cr.m_htmlRootLen - x; // length of string
|
|
m->m_type = TYPE_SAFEBUF;//STRINGBOX;
|
|
//m->m_size = MAX_HTML_LEN + 1;
|
|
m->m_def = "";
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_TEXTAREA | PF_CLONE;
|
|
m++;
|
|
|
|
|
|
m->m_title = "html head";
|
|
static SafeBuf s_tmpBuf2;
|
|
s_tmpBuf2.setLabel("stmpb2");
|
|
s_tmpBuf2.safePrintf("Html to display before the search results. ");
|
|
char *fff = "Leave empty for default. "
|
|
"Convenient "
|
|
"for changing colors and displaying logos. Use "
|
|
"the variable, "
|
|
"%q, to represent the query to display in a "
|
|
"text box. "
|
|
"Use %e to print the url encoded query. "
|
|
//"Use %e to print the page encoding. "
|
|
// i guess this is out for now
|
|
//"Use %D to "
|
|
//"print a drop down "
|
|
//"menu for the number of search results to return. "
|
|
"Use %S "
|
|
"to print sort by date or relevance link. Use "
|
|
"%L to "
|
|
"display the logo. Use %R to display radio "
|
|
"buttons for site "
|
|
"search. Use %F to begin the form. and use %H to "
|
|
"insert "
|
|
"hidden text "
|
|
"boxes of parameters like the current search result "
|
|
"page number. "
|
|
"BOTH %F and %H are necessary for the html head, but do "
|
|
"not duplicate them in the html tail. "
|
|
"Use %f to display "
|
|
"the family filter radio buttons. "
|
|
// take this out for now
|
|
//"Directory: Use %s to display the directory "
|
|
//"search type options. "
|
|
//"Use %l to specify the "
|
|
//"location of "
|
|
//"dir=rtl in the body tag for RTL pages. "
|
|
//"Use %where and %when to substitute the where "
|
|
//"and when of "
|
|
//"the query. "
|
|
//"These values may be set based on the cookie "
|
|
//"if "
|
|
//"none was explicitly given. "
|
|
//"IMPORTANT: In the xml configuration file, "
|
|
//"this html "
|
|
//"must be encoded (less thans mapped to <, "
|
|
//"etc.).";
|
|
"Example to paste into textbox: <br><i>";
|
|
s_tmpBuf2.safeStrcpy(fff);
|
|
s_tmpBuf2.htmlEncode(
|
|
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 "
|
|
"Transitional//EN\">\n"
|
|
"<html>\n"
|
|
"<head>\n"
|
|
"<title>My Gigablast Search Results</title>\n"
|
|
"<meta http-equiv=\"Content-Type\" "
|
|
"content=\"text/html; charset=utf-8\">\n"
|
|
"</head>\n"
|
|
"<body%l>\n"
|
|
|
|
//"<form method=\"get\" action=\"/search\" name=\"f\">\n"
|
|
// . %F prints the <form method=...> tag
|
|
// . method will be GET or POST depending on the size of the
|
|
// input data. MSIE can't handle sending large GETs requests
|
|
// that are more than like 1k or so, which happens a lot with
|
|
// our CTS technology (the sites= cgi parm can be very large)
|
|
"%F"
|
|
"<table cellpadding=\"2\" cellspacing=\"0\" border=\"0\">\n"
|
|
"<tr>\n"
|
|
"<td valign=top>"
|
|
// this prints the Logo
|
|
"%L"
|
|
//"<a href=\"/\">"
|
|
//"<img src=\"logo2.gif\" alt=\"Gigablast Logo\" "
|
|
//"width=\"210\" height=\"25\" border=\"0\" valign=\"top\">"
|
|
//"</a>"
|
|
"</td>\n"
|
|
|
|
"<td valign=top>\n"
|
|
"<nobr>\n"
|
|
"<input type=\"text\" name=\"q\" size=\"60\" value=\"\%q\"> "
|
|
// %D is the number of results drop down menu
|
|
"\%D"
|
|
"<input type=\"submit\" value=\"Blast It!\" border=\"0\">\n"
|
|
"</nobr>\n"
|
|
// family filter
|
|
// %R radio button for site(s) search
|
|
"<br>%f %R\n"
|
|
// directory search options
|
|
// MDW: i guess this is out for now
|
|
//"</td><td>%s</td>\n"
|
|
"</tr>\n"
|
|
"</table>\n"
|
|
// %H prints the hidden for vars. Print them *after* the input
|
|
// text boxes, radio buttons, etc. so these hidden vars can be
|
|
// overridden as they should be.
|
|
"%H");
|
|
s_tmpBuf2.safePrintf("</i>");
|
|
m->m_desc = s_tmpBuf2.getBufStart();
|
|
m->m_xml = "htmlHead";
|
|
m->m_cgi = "hh";
|
|
m->m_off = (char *)&cr.m_htmlHead - x;
|
|
m->m_type = TYPE_SAFEBUF;//STRINGBOX;
|
|
m->m_def = "";
|
|
//m->m_sparm = 1;
|
|
//m->m_soff = (char *)&si.m_htmlHead - y;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_TEXTAREA | PF_CLONE;
|
|
m++;
|
|
|
|
|
|
m->m_title = "html tail";
|
|
static SafeBuf s_tmpBuf3;
|
|
s_tmpBuf3.setLabel("stmpb3");
|
|
s_tmpBuf3.safePrintf("Html to display after the search results. ");
|
|
s_tmpBuf3.safeStrcpy(fff);
|
|
s_tmpBuf3.htmlEncode (
|
|
"<br>\n"
|
|
//"%F"
|
|
"<table cellpadding=2 cellspacing=0 border=0>\n"
|
|
"<tr><td></td>\n"
|
|
//"<td valign=top align=center>\n"
|
|
// this old query is overriding a newer query above so
|
|
// i commented out. mfd 6/2014
|
|
//"<nobr>"
|
|
//"<input type=text name=q size=60 value=\"%q\"> %D\n"
|
|
//"<input type=submit value=\"Blast It!\" border=0>\n"
|
|
//"</nobr>"
|
|
// family filter
|
|
//"<br>%f %R\n"
|
|
//"<br>"
|
|
//"%R\n"
|
|
//"</td>"
|
|
"<td>%s</td>\n"
|
|
"</tr>\n"
|
|
"</table>\n"
|
|
"Try your search on \n"
|
|
"<a href=http://www.google.com/search?q=%e>google</a> \n"
|
|
"<a href=http://search.yahoo.com/bin/search?p=%e>yahoo</a> "
|
|
" \n"
|
|
//"<a href=http://www.alltheweb.com/search?query=%e>alltheweb"
|
|
//"</a>\n"
|
|
"<a href=http://search.dmoz.org/cgi-bin/search?search=%e>"
|
|
"dmoz</a> \n"
|
|
//"<a href=http://search01.altavista.com/web/results?q=%e>"
|
|
//"alta vista</a>\n"
|
|
//"<a href=http://s.teoma.com/search?q=%e>teoma</a> \n"
|
|
//"<a href=http://wisenut.com/search/query.dll?q=%e>wisenut"
|
|
//"</a>\n"
|
|
"</font></body>\n");
|
|
s_tmpBuf3.safePrintf("</i>");
|
|
m->m_desc = s_tmpBuf3.getBufStart();
|
|
m->m_xml = "htmlTail";
|
|
m->m_cgi = "ht";
|
|
m->m_off = (char *)&cr.m_htmlTail - x;
|
|
m->m_type = TYPE_SAFEBUF;//STRINGBOX;
|
|
m->m_def = "";
|
|
//m->m_sparm = 1;
|
|
//m->m_soff = (char *)&si.m_htmlHead - y;
|
|
m->m_page = PAGE_SEARCH;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_TEXTAREA | PF_CLONE;
|
|
m++;
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// PAGE SPIDER CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
// just a comment in the conf file
|
|
m->m_desc =
|
|
"All <, >, \" and # characters that are values for a field "
|
|
"contained herein must be represented as "
|
|
"<, >, " and # respectively.";
|
|
m->m_type = TYPE_COMMENT;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "spidering enabled";
|
|
m->m_desc = "Controls just the spiders for this collection.";
|
|
m->m_cgi = "cse";
|
|
m->m_off = (char *)&cr.m_spideringEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
// this linked list of colls is in Spider.cpp and used to only
|
|
// poll the active spider colls for spidering. so if coll
|
|
// gets paused/unpaused we have to update it.
|
|
m->m_flags = PF_CLONE | PF_REBUILDACTIVELIST;
|
|
m++;
|
|
|
|
m->m_title = "site list";
|
|
m->m_xml = "siteList";
|
|
m->m_desc = "List of sites to spider, one per line. "
|
|
"See <a href=#examples>example site list</a> below. "
|
|
"Gigablast uses the "
|
|
"<a href=/admin/filters#insitelist>insitelist</a> "
|
|
"directive on "
|
|
"the <a href=/admin/filters>url filters</a> "
|
|
"page to make sure that the spider only indexes urls "
|
|
"that match the site patterns you specify here, other than "
|
|
"urls you add individually via the add urls or inject url "
|
|
"tools. "
|
|
"Limit list to 300MB. If you have a lot of INDIVIDUAL urls "
|
|
"to add then consider using the <a href=/admin/addurl>addurl"
|
|
"</a> interface.";
|
|
m->m_cgi = "sitelist";
|
|
m->m_off = (char *)&cr.m_siteListBuf - x;
|
|
m->m_page = PAGE_SPIDER;// PAGE_SITES;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_func = CommandUpdateSiteList;
|
|
m->m_def = "";
|
|
// rebuild urlfilters now will nuke doledb and call updateSiteList()
|
|
m->m_flags = PF_TEXTAREA | PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m++;
|
|
|
|
|
|
m->m_title = "reset collection";
|
|
m->m_desc = "Remove all documents from the collection and turn "
|
|
"spiders off.";
|
|
m->m_cgi = "reset";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func2 = CommandResetColl;
|
|
m->m_cast = 1;
|
|
m->m_flags = PF_HIDDEN;
|
|
m++;
|
|
|
|
m->m_title = "restart collection";
|
|
m->m_desc = "Remove all documents from the collection and re-add "
|
|
"seed urls from site list.";
|
|
m->m_cgi = "restart";
|
|
m->m_type = TYPE_CMD;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_func2 = CommandRestartColl;
|
|
m->m_cast = 1;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "new spidering enabled";
|
|
m->m_desc = "When enabled the spider adds NEW "
|
|
"pages to your index. ";
|
|
m->m_cgi = "nse";
|
|
m->m_off = (char *)&cr.m_newSpideringEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "old spidering enabled";
|
|
m->m_desc = "When enabled the spider will re-visit "
|
|
"and update pages that are already in your index.";
|
|
m->m_cgi = "ose";
|
|
m->m_off = (char *)&cr.m_oldSpideringEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "new spider weight";
|
|
m->m_desc = "Weight time slices of new spiders in the priority "
|
|
"page by this factor relative to the old spider queues.";
|
|
m->m_cgi = "nsw";
|
|
m->m_off = (char *)&cr.m_newSpiderWeight - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "1.0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "max spiders";
|
|
m->m_desc = "What is the maximum number of web "
|
|
"pages the spider is allowed to download "
|
|
"simultaneously PER HOST for THIS collection? The "
|
|
"maximum number of spiders over all collections is "
|
|
"controlled in the <i>master controls</i>.";
|
|
m->m_cgi = "mns";
|
|
m->m_off = (char *)&cr.m_maxNumSpiders - x;
|
|
m->m_type = TYPE_LONG;
|
|
// make it the hard max so control is really in the master controls
|
|
m->m_def = "300";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "spider delay in milliseconds";
|
|
m->m_desc = "make each spider wait this many milliseconds before "
|
|
"getting the ip and downloading the page.";
|
|
m->m_cgi = "sdms";
|
|
m->m_off = (char *)&cr.m_spiderDelayInMilliseconds - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "obey robots.txt";
|
|
m->m_xml = "useRobotstxt";
|
|
m->m_desc = "If this is true Gigablast will respect "
|
|
"the robots.txt convention and rel no follow meta tags.";
|
|
m->m_cgi = "obeyRobots";
|
|
m->m_off = (char *)&cr.m_useRobotsTxt - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "obey rel no follow links";
|
|
m->m_desc = "If this is true Gigablast will respect "
|
|
"the rel no follow link attribute.";
|
|
m->m_cgi = "obeyRelNoFollow";
|
|
m->m_off = (char *)&cr.m_obeyRelNoFollowLinks - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "max robots.txt cache age";
|
|
m->m_desc = "How many seconds to cache a robots.txt file for. "
|
|
"86400 is 1 day. 0 means Gigablast will not read from the "
|
|
"cache at all and will download the robots.txt before every "
|
|
"page if robots.txt use is enabled above. However, if this is "
|
|
"0 then Gigablast will still store robots.txt files in the "
|
|
"cache.";
|
|
m->m_cgi = "mrca";
|
|
m->m_off = (char *)&cr.m_maxRobotsCacheAge - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "86400"; // 24*60*60 = 1day
|
|
m->m_units = "seconds";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
|
|
m->m_title = "always use spider proxies";
|
|
m->m_desc = "If this is true Gigablast will ALWAYS use the proxies "
|
|
"listed on the <a href=/admin/proxies>proxies</a> "
|
|
"page for "
|
|
"spidering for "
|
|
"this collection."
|
|
//"regardless whether the proxies are enabled "
|
|
//"on the <a href=/admin/proxies>proxies</a> page."
|
|
;
|
|
m->m_cgi = "useproxies";
|
|
m->m_off = (char *)&cr.m_forceUseFloaters - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "automatically use spider proxies";
|
|
m->m_desc = "Use the spider proxies listed on the proxies page "
|
|
"if gb detects that "
|
|
"a webserver is throttling the spiders. This way we can "
|
|
"learn the webserver's spidering policy so that our spiders "
|
|
"can be more polite. If no proxies are listed on the "
|
|
"proxies page then this parameter will have no effect.";
|
|
m->m_cgi = "automaticallyuseproxies";
|
|
m->m_off = (char *)&cr.m_automaticallyUseProxies - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "automatically back off";
|
|
m->m_desc = "Set the crawl delay to 5 seconds if gb detects "
|
|
"that an IP is throttling or banning gigabot from crawling "
|
|
"it. The crawl delay just applies to that IP. "
|
|
"Such throttling will be logged.";
|
|
m->m_cgi = "automaticallybackoff";
|
|
m->m_xml = "automaticallyBackOff";
|
|
m->m_off = (char *)&cr.m_automaticallyBackOff - x;
|
|
m->m_type = TYPE_BOOL;
|
|
// a lot of pages have recaptcha links but they have valid content
|
|
// so leave this off for now... they have it in a hidden div which
|
|
// popups to email the article link or whatever to someone.
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "use time axis";
|
|
m->m_desc = "If this is true Gigablast will index the same "
|
|
"url multiple times if its content varies over time, "
|
|
"rather than overwriting the older version in the index. "
|
|
"Useful for archive web pages as they change over time.";
|
|
m->m_cgi = "usetimeaxis";
|
|
m->m_off = (char *)&cr.m_useTimeAxis - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "index warc or arc files";
|
|
m->m_desc = "If this is true Gigablast will index .warc and .arc "
|
|
"files by injecting the pages contained in them as if they "
|
|
"were spidered with the content in the .warc or .arc file. "
|
|
"The spidered time will be taken from the archive file "
|
|
"as well.";
|
|
m->m_cgi = "indexwarcs";
|
|
m->m_off = (char *)&cr.m_indexWarcs - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "add url enabled";
|
|
m->m_desc = "If this is enabled others can add "
|
|
"web pages to your index via the add url page.";
|
|
m->m_cgi = "aue";
|
|
m->m_off = (char *)&cr.m_addUrlEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "daily merge time";
|
|
m->m_desc = "Do a tight merge on posdb and titledb at this time "
|
|
"every day. This is expressed in MINUTES past midnight UTC. "
|
|
"UTC is 5 hours ahead "
|
|
"of EST and 7 hours ahead of MST. Leave this as -1 to "
|
|
"NOT perform a daily merge. To merge at midnight EST use "
|
|
"60*5=300 and midnight MST use 60*7=420.";
|
|
m->m_cgi = "dmt";
|
|
m->m_off = (char *)&cr.m_dailyMergeTrigger - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "-1";
|
|
m->m_units = "minutes";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "daily merge days";
|
|
m->m_desc = "Comma separated list of days to merge on. Use "
|
|
"0 for Sunday, 1 for Monday, ... 6 for Saturday. Leaving "
|
|
"this parameter empty or without any numbers will make the "
|
|
"daily merge happen every day";
|
|
m->m_cgi = "dmdl";
|
|
m->m_off = (char *)&cr.m_dailyMergeDOWList - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 48;
|
|
// make sunday the default
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "daily merge last started";
|
|
m->m_desc = "When the daily merge was last kicked off. Expressed in "
|
|
"UTC in seconds since the epoch.";
|
|
m->m_cgi = "dmls";
|
|
m->m_off = (char *)&cr.m_dailyMergeStarted - x;
|
|
m->m_type = TYPE_LONG_CONST;
|
|
m->m_def = "-1";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_NOAPI;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "use datedb";
|
|
m->m_desc = "Index documents for generating results sorted by date "
|
|
"or constrained by date range. Only documents indexed while "
|
|
"this is enabled will be returned for date-related searches.";
|
|
m->m_cgi = "ud";
|
|
m->m_off = (char *)&cr.m_useDatedb - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "age cutoff for datedb";
|
|
m->m_desc = "Do not index pubdates into datedb that are more "
|
|
"than this many days old. Use -1 for no limit. A value "
|
|
"of zero essentially turns off datedb. Pre-existing pubdates "
|
|
"in datedb that fail to meet this constraint WILL BE "
|
|
"COMPLETELY ERASED when datedb is merged.";
|
|
m->m_cgi = "dbc";
|
|
m->m_off = (char *)&cr.m_datedbCutoff - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "-1";
|
|
m->m_units = "days";
|
|
m++;
|
|
|
|
m->m_title = "datedb default timezone";
|
|
m->m_desc = "Default timezone to use when none specified on parsed "
|
|
"time. Use offset from GMT, i.e 0400 (AMT) or -0700 (MST)";
|
|
m->m_cgi = "ddbdt";
|
|
m->m_off = (char *)&cr.m_datedbDefaultTimezone - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
//m->m_title = "days before now to index";
|
|
//m->m_desc = "Only index page if the datedb date was found to be "
|
|
// "within this many days of the current time. Use 0 to index "
|
|
// "all dates. Parm is float for fine control.";
|
|
//m->m_cgi = "ddbdbn";
|
|
//m->m_off = (char *)&cr.m_datedbDaysBeforeNow - x;
|
|
//m->m_type = TYPE_FLOAT;
|
|
//m->m_def = "0";
|
|
//m->m_group = 0;
|
|
//m++;
|
|
|
|
m->m_title = "turing test enabled";
|
|
m->m_desc = "If this is true, users will have to "
|
|
"pass a simple Turing test to add a url. This prevents "
|
|
"automated url submission.";
|
|
m->m_cgi = "dtt";
|
|
m->m_off = (char *)&cr.m_doTuringTest - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "max add urls";
|
|
m->m_desc = "Maximum number of urls that can be "
|
|
"submitted via the addurl interface, per IP domain, per "
|
|
"24 hour period. A value less than or equal to zero "
|
|
"implies no limit.";
|
|
m->m_cgi = "mau";
|
|
m->m_off = (char *)&cr.m_maxAddUrlsPerIpDomPerDay - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
|
|
// use url filters harvest links parm for this now
|
|
/*
|
|
m->m_title = "spider links";
|
|
m->m_desc = "If this is false, the spider will not "
|
|
"harvest links from web pages it visits. Links that it does "
|
|
"harvest will be attempted to be indexed at a later time. ";
|
|
m->m_cgi = "sl";
|
|
m->m_off = (char *)&cr.m_spiderLinks - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
|
|
MDW: use the "onsite" directive in the url filters page now...
|
|
|
|
m->m_title = "only spider links from same host";
|
|
m->m_desc = "If this is true the spider will only harvest links "
|
|
"to pages that are contained on the same host as the page "
|
|
"that is being spidered. "
|
|
"Example: When spidering a page from "
|
|
"www.gigablast.com, only links to pages that are from "
|
|
"www.gigablast.com would "
|
|
"be harvested, if this switch were enabled. This allows you "
|
|
"to seed the spider with URLs from a specific set of hosts "
|
|
"and ensure that only links to pages that are from those "
|
|
"hosts are harvested.";
|
|
m->m_cgi = "slsh";
|
|
m->m_off = (char *)&cr.m_sameHostLinks - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "do not re-add old outlinks more than this many days";
|
|
m->m_desc = "If less than this many days have elapsed since the "
|
|
"last time we added the outlinks to spiderdb, do not re-add "
|
|
"them to spiderdb. Saves resources.";
|
|
m->m_cgi = "slrf";
|
|
m->m_off = (char *)&cr.m_outlinksRecycleFrequencyDays - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "30";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "spider links by priority";
|
|
m->m_desc = "Specify priorities for which links should be spidered. "
|
|
"If the <i>spider links</i> option above is "
|
|
"disabled then these setting will have no effect.";
|
|
m->m_cgi = "slp";
|
|
m->m_xml = "spiderLinksByPriority";
|
|
m->m_off = (char *)&cr.m_spiderLinksByPriority - x;
|
|
m->m_type = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
|
|
m->m_fixed = MAX_SPIDER_PRIORITIES;
|
|
m->m_def = "1"; // default for each one is on
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "min link priority";
|
|
m->m_desc = "Only add links to the spider "
|
|
"queue if their spider priority is this or higher. "
|
|
"This can make the spider process more efficient "
|
|
"since a lot of disk seeks are used when adding "
|
|
"links.";
|
|
m->m_cgi = "mlp";
|
|
m->m_off = (char *)&cr.m_minLinkPriority - x;
|
|
m->m_type = TYPE_PRIORITY;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/* m->m_title = "maximum hops from parent page";
|
|
m->m_desc = "Only index pages that are within a particular number "
|
|
"of hops from the parent page given in Page Add Url. -1 means "
|
|
"that max hops is infinite.";
|
|
m->m_cgi = "mnh";
|
|
m->m_off = (char *)&cr.m_maxNumHops - x;
|
|
m->m_type = TYPE_CHAR2;
|
|
m->m_def = "-1";
|
|
m->m_group = 0;
|
|
m++;*/
|
|
|
|
m->m_title = "scraping enabled procog";
|
|
m->m_desc = "Do searches for queries in this hosts part of the "
|
|
"query log.";
|
|
m->m_cgi = "scrapepc";
|
|
m->m_off = (char *)&cr.m_scrapingEnabledProCog - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "scraping enabled web";
|
|
m->m_desc = "Perform random searches on googles news search engine "
|
|
"to add sites with ingoogle tags into tagdb.";
|
|
m->m_cgi = "scrapeweb";
|
|
m->m_off = (char *)&cr.m_scrapingEnabledWeb - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "scraping enabled news";
|
|
m->m_desc = "Perform random searches on googles news search engine "
|
|
"to add sites with news and goognews and ingoogle "
|
|
"tags into tagdb.";
|
|
m->m_cgi = "scrapenews";
|
|
m->m_off = (char *)&cr.m_scrapingEnabledNews - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "scraping enabled blogs";
|
|
m->m_desc = "Perform random searches on googles news search engine "
|
|
"to add sites with blogs and googblogs and ingoogle "
|
|
"tags into tagdb.";
|
|
m->m_cgi = "scrapeblogs";
|
|
m->m_off = (char *)&cr.m_scrapingEnabledBlogs - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "subsite detection enabled";
|
|
m->m_desc = "Add the \"sitepathdepth\" to tagdb if a hostname "
|
|
"is determined to have subsites at a particular depth.";
|
|
m->m_cgi = "ssd";
|
|
m->m_off = (char *)&cr.m_subsiteDetectionEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "deduping enabled";
|
|
m->m_desc = "When enabled, the spider will "
|
|
"discard web pages which are identical to other web pages "
|
|
"that are already in the index. "//AND that are from the same "
|
|
//"hostname.
|
|
//"An example of a hostname is www1.ibm.com. "
|
|
"However, root urls, urls that have no path, are never "
|
|
"discarded. It most likely has to hit disk to do these "
|
|
"checks so it does cause some slow down. Only use it if you "
|
|
"need it.";
|
|
m->m_cgi = "de";
|
|
m->m_off = (char *)&cr.m_dedupingEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "deduping enabled for www";
|
|
m->m_desc = "When enabled, the spider will "
|
|
"discard web pages which, when a www is prepended to the "
|
|
"page's url, result in a url already in the index.";
|
|
m->m_cgi = "dew";
|
|
m->m_off = (char *)&cr.m_dupCheckWWW - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "detect custom error pages";
|
|
m->m_desc = "Detect and do not index pages which have a 200 status"
|
|
" code, but are likely to be error pages.";
|
|
m->m_cgi = "dcep";
|
|
m->m_off = (char *)&cr.m_detectCustomErrorPages - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "delete 404s";
|
|
m->m_desc = "Should pages be removed from the index if they are no "
|
|
"longer accessible on the web?";
|
|
m->m_cgi = "dnf";
|
|
m->m_off = (char *)&cr.m_delete404s - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_HIDDEN;
|
|
m++;
|
|
|
|
m->m_title = "delete timed out docs";
|
|
m->m_desc = "Should documents be deleted from the index "
|
|
"if they have been retried them enough times and the "
|
|
"last received error is a time out? "
|
|
"If your internet connection is flaky you may say "
|
|
"no here to ensure you do not lose important docs.";
|
|
m->m_cgi = "dtod";
|
|
m->m_off = (char *)&cr.m_deleteTimeouts - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "use simplified redirects";
|
|
m->m_desc = "If this is true, the spider, when a url redirects "
|
|
"to a \"simpler\" url, will add that simpler url into "
|
|
"the spider queue and abandon the spidering of the current "
|
|
"url.";
|
|
m->m_cgi = "usr";
|
|
m->m_off = (char *)&cr.m_useSimplifiedRedirects - x;
|
|
m->m_type = TYPE_BOOL;
|
|
// turn off for now. spider time deduping should help any issues
|
|
// by disabling this.
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "use canonical redirects";
|
|
m->m_desc = "If page has a <link canonical> on it then treat it "
|
|
"as a redirect, add it to spiderdb for spidering "
|
|
"and abandon the indexing of the current url.";
|
|
m->m_cgi = "ucr";
|
|
m->m_off = (char *)&cr.m_useCanonicalRedirects - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "use ifModifiedSince";
|
|
m->m_desc = "If this is true, the spider, when "
|
|
"updating a web page that is already in the index, will "
|
|
"not even download the whole page if it hasn't been "
|
|
"updated since the last time Gigablast spidered it. "
|
|
"This is primarily a bandwidth saving feature. It relies on "
|
|
"the remote webserver's returned Last-Modified-Since field "
|
|
"being accurate.";
|
|
m->m_cgi = "uims";
|
|
m->m_off = (char *)&cr.m_useIfModifiedSince - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "build similarity vector from content only";
|
|
m->m_desc = "If this is true, the spider, when checking the page "
|
|
"if it has changed enough to reindex or update the "
|
|
"published date, it will build the vector only from "
|
|
"the content located on that page.";
|
|
m->m_cgi = "bvfc";
|
|
m->m_off = (char *)&cr.m_buildVecFromCont - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "use content similarity to index publish date";
|
|
m->m_desc = "This requires build similarity from content only to be "
|
|
"on. This indexes the publish date (only if the content "
|
|
"has changed enough) to be between the last two spider "
|
|
"dates.";
|
|
m->m_cgi = "uspd";
|
|
m->m_off = (char *)&cr.m_useSimilarityPublishDate - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max percentage similar to update publish date";
|
|
m->m_desc = "This requires build similarity from content only and "
|
|
"use content similarity to index publish date to be "
|
|
"on. This percentage is the maximum similarity that can "
|
|
"exist between an old document and new before the publish "
|
|
"date will be updated.";
|
|
m->m_cgi = "mpspd";
|
|
m->m_off = (char *)&cr.m_maxPercentSimilarPublishDate - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "80";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
// use url filters for this. this is a crawlbot parm really.
|
|
/*
|
|
m->m_title = "restrict domain";
|
|
m->m_desc = "Keep crawler on same domain as seed urls?";
|
|
m->m_cgi = "restrictDomain";
|
|
m->m_off = (char *)&cr.m_restrictDomain - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
// we need to save this it is a diffbot parm
|
|
m->m_flags = PF_HIDDEN | PF_DIFFBOT;// | PF_NOSAVE;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "do url sporn checking";
|
|
m->m_desc = "If this is true and the spider finds "
|
|
"lewd words in the hostname of a url it will throw "
|
|
"that url away. It will also throw away urls that have 5 or "
|
|
"more hyphens in their hostname.";
|
|
m->m_cgi = "dusc";
|
|
m->m_off = (char *)&cr.m_doUrlSpamCheck - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "hours before adding unspiderable url to spiderdb";
|
|
m->m_desc = "Hours to wait after trying to add an unspiderable url "
|
|
"to spiderdb again.";
|
|
m->m_cgi = "dwma";
|
|
m->m_off = (char *)&cr.m_deadWaitMaxAge - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "24";
|
|
m++;
|
|
*/
|
|
|
|
//m->m_title = "link text anomaly threshold";
|
|
//m->m_desc = "Prevent pages from link voting for "
|
|
// "another page if its link text has a "
|
|
// "word which doesn't occur in at least this "
|
|
// "many other link texts. (set to 1 to disable)";
|
|
//m->m_cgi = "ltat";
|
|
//m->m_off = (char *)&cr.m_linkTextAnomalyThresh - x;
|
|
//m->m_type = TYPE_LONG;
|
|
//m->m_def = "2";
|
|
//m++;
|
|
|
|
/*
|
|
m->m_title = "enforce domain quotas on new docs";
|
|
m->m_desc = "If this is true then new documents will be removed "
|
|
"from the index if the quota for their domain "
|
|
"has been breeched.";
|
|
m->m_cgi = "enq";
|
|
m->m_off = (char *)&cr.m_enforceNewQuotas - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "enforce domain quotas on indexed docs";
|
|
m->m_desc = "If this is true then indexed documents will be removed "
|
|
"from the index if the quota for their domain has been "
|
|
"breeched.";
|
|
m->m_cgi = "eoq";
|
|
m->m_off = (char *)&cr.m_enforceOldQuotas - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "use exact quotas";
|
|
m->m_desc = "Does not use approximations so will do more disk seeks "
|
|
"and may impact indexing performance significantly.";
|
|
m->m_cgi = "ueq";
|
|
m->m_off = (char *)&cr.m_exactQuotas - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "restrict indexdb for spidering";
|
|
m->m_desc = "If this is true then only the root indexb file is "
|
|
"searched for linkers. Saves on disk seeks, "
|
|
"but may use older versions of indexed web pages.";
|
|
m->m_cgi = "ris";
|
|
m->m_off = (char *)&cr.m_restrictIndexdbForSpider - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "indexdb max total files to merge";
|
|
m->m_desc = "Do not merge more than this many files during a single "
|
|
"merge operation. Merge does not scale well to numbers above "
|
|
"50 or so.";
|
|
m->m_cgi = "mttftm";
|
|
m->m_off = (char *)&cr.m_indexdbMinTotalFilesToMerge - x;
|
|
m->m_def = "50";
|
|
//m->m_max = 100;
|
|
m->m_type = TYPE_LONG;
|
|
m++;
|
|
|
|
m->m_title = "indexdb min files needed to trigger merge";
|
|
m->m_desc = "Merge is triggered when this many indexdb data files "
|
|
"are on disk.";
|
|
m->m_cgi = "miftm";
|
|
m->m_off = (char *)&cr.m_indexdbMinFilesToMerge - x;
|
|
m->m_def = "6"; // default to high query performance, not spider
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "datedb min files needed to trigger to merge";
|
|
m->m_desc = "Merge is triggered when this many datedb data files "
|
|
"are on disk.";
|
|
m->m_cgi = "mdftm";
|
|
m->m_off = (char *)&cr.m_datedbMinFilesToMerge - x;
|
|
m->m_def = "5";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "spiderdb min files needed to trigger to merge";
|
|
m->m_desc = "Merge is triggered when this many spiderdb data files "
|
|
"are on disk.";
|
|
m->m_cgi = "msftm";
|
|
m->m_off = (char *)&cr.m_spiderdbMinFilesToMerge - x;
|
|
m->m_def = "2";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "checksumdb min files needed to trigger to merge";
|
|
m->m_desc = "Merge is triggered when this many checksumdb data files "
|
|
"are on disk.";
|
|
m->m_cgi = "mcftm";
|
|
m->m_off = (char *)&cr.m_checksumdbMinFilesToMerge - x;
|
|
m->m_def = "2";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "clusterdb min files needed to trigger to merge";
|
|
m->m_desc = "Merge is triggered when this many clusterdb data files "
|
|
"are on disk.";
|
|
m->m_cgi = "mclftm";
|
|
m->m_off = (char *)&cr.m_clusterdbMinFilesToMerge - x;
|
|
m->m_def = "2";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "linkdb min files needed to trigger to merge";
|
|
m->m_desc = "Merge is triggered when this many linkdb data files "
|
|
"are on disk. Raise this when initially growing an index "
|
|
"in order to keep merging down.";
|
|
m->m_cgi = "mlkftm";
|
|
m->m_off = (char *)&cr.m_linkdbMinFilesToMerge - x;
|
|
m->m_def = "6";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "tagdb min files to merge";
|
|
m->m_desc = "Merge is triggered when this many linkdb data files "
|
|
"are on disk.";
|
|
m->m_cgi = "mtftgm";
|
|
m->m_off = (char *)&cr.m_tagdbMinFilesToMerge - x;
|
|
m->m_def = "2";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
// this is overridden by collection
|
|
m->m_title = "titledb min files needed to trigger to merge";
|
|
m->m_desc = "Merge is triggered when this many titledb data files "
|
|
"are on disk.";
|
|
m->m_cgi = "mtftm";
|
|
m->m_off = (char *)&cr.m_titledbMinFilesToMerge - x;
|
|
m->m_def = "6";
|
|
m->m_type = TYPE_LONG;
|
|
//m->m_save = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
//m->m_title = "sectiondb min files to merge";
|
|
//m->m_desc ="Merge is triggered when this many sectiondb data files "
|
|
// "are on disk.";
|
|
//m->m_cgi = "mscftm";
|
|
//m->m_off = (char *)&cr.m_sectiondbMinFilesToMerge - x;
|
|
//m->m_def = "4";
|
|
//m->m_type = TYPE_LONG;
|
|
//m->m_group = 0;
|
|
//m++;
|
|
|
|
m->m_title = "posdb min files needed to trigger to merge";
|
|
m->m_desc = "Merge is triggered when this many posdb data files "
|
|
"are on disk. Raise this while doing massive injections "
|
|
"and not doing much querying. Then when done injecting "
|
|
"keep this low to make queries fast.";
|
|
m->m_cgi = "mpftm";
|
|
m->m_off = (char *)&cr.m_posdbMinFilesToMerge - x;
|
|
m->m_def = "6";
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "recycle content";
|
|
m->m_desc = "Rather than downloading the content again when "
|
|
"indexing old urls, use the stored content. Useful for "
|
|
"reindexing documents under a different ruleset or for "
|
|
"rebuilding an index. You usually "
|
|
"should turn off the 'use robots.txt' switch. "
|
|
"And turn on the 'use old ips' and "
|
|
"'recycle link votes' switches for speed. If rebuilding an "
|
|
"index then you should turn off the 'only index changes' "
|
|
"switches.";
|
|
m->m_cgi = "rc";
|
|
m->m_off = (char *)&cr.m_recycleContent - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "enable link voting";
|
|
m->m_desc = "If this is true Gigablast will "
|
|
"index hyper-link text and use hyper-link "
|
|
"structures to boost the quality of indexed documents. "
|
|
"You can disable this when doing a ton of injections to "
|
|
"keep things fast. Then do a posdb (index) rebuild "
|
|
"after re-enabling this when you are done injecting. Or "
|
|
"if you simply do not want link voting this will speed up"
|
|
"your injections and spidering a bit.";
|
|
m->m_cgi = "glt";
|
|
m->m_off = (char *)&cr.m_getLinkInfo - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "compute inlinks to sites";
|
|
m->m_desc = "If this is true Gigablast will "
|
|
"compute the number of site inlinks for the sites it "
|
|
"indexes. This is a measure of the sites popularity and is "
|
|
"used for ranking and some times spidering prioritzation. "
|
|
"It will cache the site information in tagdb. "
|
|
"The greater the number of inlinks, the longer the cached "
|
|
"time, because the site is considered more stable. If this "
|
|
"is NOT true then Gigablast will use the included file, "
|
|
"sitelinks.txt, which stores the site inlinks of millions "
|
|
"of the most popular sites. This is the fastest way. If you "
|
|
"notice a lot of <i>getting link info</i> requests in the "
|
|
"<i>sockets table</i> you may want to disable this "
|
|
"parm.";
|
|
m->m_cgi = "csni";
|
|
m->m_off = (char *)&cr.m_computeSiteNumInlinks - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "do link spam checking";
|
|
m->m_desc = "If this is true, do not allow spammy inlinks to vote. "
|
|
"This check is "
|
|
"too aggressive for some collections, i.e. it "
|
|
"does not allow pages with cgi in their urls to vote.";
|
|
m->m_cgi = "dlsc";
|
|
m->m_off = (char *)&cr.m_doLinkSpamCheck - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "restrict link voting by ip";
|
|
m->m_desc = "If this is true Gigablast will "
|
|
"only allow one vote per the top 2 significant bytes "
|
|
"of the IP address. Otherwise, multiple pages "
|
|
"from the same top IP can contribute to the link text and "
|
|
"link-based quality ratings of a particular URL. "
|
|
"Furthermore, no votes will be accepted from IPs that have "
|
|
"the same top 2 significant bytes as the IP of the page "
|
|
"being indexed.";
|
|
m->m_cgi = "ovpid";
|
|
m->m_off = (char *)&cr.m_oneVotePerIpDom - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "use new link algo";
|
|
m->m_desc = "Use the links: termlists instead of link:. Also "
|
|
"allows pages linking from the same domain or IP to all "
|
|
"count as a single link from a different IP. This is also "
|
|
"required for incorporating RSS and Atom feed information "
|
|
"when indexing a document.";
|
|
m->m_cgi = "na";
|
|
m->m_off = (char *)&cr.m_newAlgo - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "recycle link votes";
|
|
m->m_desc = "If this is true Gigablast will "
|
|
"use the old links and link text when re-indexing old urls "
|
|
"and not do any link voting when indexing new urls.";
|
|
m->m_cgi = "rv";
|
|
m->m_off = (char *)&cr.m_recycleVotes - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "update link info frequency";
|
|
m->m_desc = "How often should Gigablast recompute the "
|
|
"link info for a url. "
|
|
"Also applies to getting the quality of a site "
|
|
"or root url, which is based on the link info. "
|
|
"In days. Can use decimals. 0 means to update "
|
|
"the link info every time the url's content is re-indexed. "
|
|
"If the content is not reindexed because it is unchanged "
|
|
"then the link info will not be updated. When getting the "
|
|
"link info or quality of the root url from an "
|
|
"external cluster, Gigablast will tell the external cluster "
|
|
"to recompute it if its age is this or higher.";
|
|
m->m_cgi = "uvf";
|
|
m->m_off = (char *)&cr.m_updateVotesFreq - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "60.000000";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "recycle imported link info";
|
|
m->m_desc = "If true, we ALWAYS recycle the imported link info and "
|
|
"NEVER recompute it again. Otherwise, recompute it when we "
|
|
"recompute the local link info.";
|
|
m->m_cgi = "rili";
|
|
m->m_off = (char *)&cr.m_recycleLinkInfo2 - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "use imported link info for quality";
|
|
m->m_desc = "If true, we will use the imported link info to "
|
|
"help us determine the quality of the page we are indexing.";
|
|
m->m_cgi = "uifq";
|
|
m->m_off = (char *)&cr.m_useLinkInfo2ForQuality - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
// this can hurt us too much if mis-assigned, remove it
|
|
/*
|
|
m->m_title = "restrict link voting to roots";
|
|
m->m_desc = "If this is true Gigablast will "
|
|
"not perform link analysis on urls that are not "
|
|
"root urls.";
|
|
m->m_cgi = "rvr";
|
|
m->m_off = (char *)&cr.m_restrictVotesToRoots - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "index link text";
|
|
m->m_desc = "If this is true Gigablast will "
|
|
"index both incoming and outgoing link text for the "
|
|
"appropriate documents, depending on url filters and "
|
|
"site rules, under the gbinlinktext: and gboutlinktext: "
|
|
"fields. Generally, you want this disabled, it was for "
|
|
"a client.";
|
|
m->m_cgi = "ilt";
|
|
m->m_off = (char *)&cr.m_indexLinkText - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "index incoming link text";
|
|
m->m_desc = "If this is false no incoming link text is indexed.";
|
|
m->m_cgi = "iilt";
|
|
m->m_off = (char *)&cr.m_indexLinkText - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "index inlink neighborhoods";
|
|
m->m_desc = "If this is true Gigablast will "
|
|
"index the plain text surrounding the hyper-link text. The "
|
|
"score will be x times that of the hyper-link text, where x "
|
|
"is the scalar below.";
|
|
m->m_cgi = "iin";
|
|
m->m_off = (char *)&cr.m_indexInlinkNeighborhoods - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
// this is now hard-coded in XmlNode.cpp, currently .8
|
|
m->m_title = "inlink neighborhoods score scalar";
|
|
m->m_desc = "Gigablast can "
|
|
"index the plain text surrounding the hyper-link text. The "
|
|
"score will be x times that of the hyper-link text, where x "
|
|
"is this number.";
|
|
m->m_cgi = "inss";
|
|
m->m_off = (char *)&cr.m_inlinkNeighborhoodsScoreScalar - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = ".20";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "break web rings";
|
|
m->m_desc = "If this is true Gigablast will "
|
|
"attempt to detect link spamming rings and decrease "
|
|
"their influence on the link text for a URL.";
|
|
m->m_cgi = "bwr";
|
|
m->m_off = (char *)&cr.m_breakWebRings - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "break log spam";
|
|
m->m_desc = "If this is true Gigablast will attempt to detect "
|
|
"dynamically generated pages and remove their voting power. "
|
|
"Additionally, pages over 100k will not be have their "
|
|
"outgoing links counted. Pages that have a form which POSTS "
|
|
"to a cgi page will not be considered either.";
|
|
m->m_cgi = "bls";
|
|
m->m_off = (char *)&cr.m_breakLogSpam - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "tagdb collection name";
|
|
m->m_desc = "Sometimes you want the spiders to use the tagdb of "
|
|
"another collection, like the <i>main</i> collection. "
|
|
"If this is empty it defaults to the current collection.";
|
|
m->m_cgi = "tdbc";
|
|
m->m_off = (char *)&cr.m_tagdbColl - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_COLL_LEN+1;
|
|
m->m_def = "";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "catdb lookups enabled";
|
|
m->m_desc = "Spiders will look to see if the current page is in "
|
|
"catdb. If it is, all Directory information for that page "
|
|
"will be indexed with it.";
|
|
m->m_cgi = "cdbe";
|
|
m->m_off = (char *)&cr.m_catdbEnabled - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "recycle catdb info";
|
|
m->m_desc = "Rather than requesting new info from DMOZ, like "
|
|
"titles and topic ids, grab it from old record. Increases "
|
|
"performance if you are seeing a lot of "
|
|
"\"getting catdb record\" entries in the spider queues.";
|
|
m->m_cgi = "rci";
|
|
m->m_off = (char *)&cr.m_recycleCatdb - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "allow banning of pages in catdb";
|
|
m->m_desc = "If this is 'NO' then pages that are in catdb, "
|
|
"but banned from tagdb or the url filters page, can not "
|
|
"be banned.";
|
|
m->m_cgi = "abpc";
|
|
m->m_off = (char *)&cr.m_catdbPagesCanBeBanned - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "override spider errors for catdb";
|
|
m->m_desc = "Ignore and skip spider errors if the spidered site"
|
|
" is found in Catdb (DMOZ).";
|
|
m->m_cgi = "catose";
|
|
m->m_off = (char *)&cr.m_overrideSpiderErrorsForCatdb - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
//m->m_title = "only spider root urls";
|
|
//m->m_desc = "Only spider urls that are roots.";
|
|
//m->m_cgi = "osru";
|
|
//m->m_off = (char *)&cr.m_onlySpiderRoots - x;
|
|
//m->m_type = TYPE_BOOL;
|
|
//m->m_def = "0";
|
|
//m++;
|
|
|
|
m->m_title = "allow asian docs";
|
|
m->m_desc = "If this is disabled the spider "
|
|
"will not allow any docs from the gb2312 charset "
|
|
"into the index.";
|
|
m->m_cgi = "aad";
|
|
m->m_off = (char *)&cr.m_allowAsianDocs - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "allow adult docs";
|
|
m->m_desc = "If this is disabled the spider "
|
|
"will not allow any docs which contain adult content "
|
|
"into the index (overrides tagdb).";
|
|
m->m_cgi = "aprnd";
|
|
m->m_off = (char *)&cr.m_allowAdultDocs - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0 ;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "allow xml docs";
|
|
m->m_desc = "If this is disabled the spider "
|
|
"will not allow any xml "
|
|
"into the index.";
|
|
m->m_cgi = "axd";
|
|
m->m_off = (char *)&cr.m_allowXmlDocs - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "do serp detection";
|
|
m->m_desc = "If this is enabled the spider "
|
|
"will not allow any docs which are determined to "
|
|
"be serps.";
|
|
m->m_cgi = "dsd";
|
|
m->m_off = (char *)&cr.m_doSerpDetection - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "do IP lookup";
|
|
m->m_desc = "If this is disabled and the proxy "
|
|
"IP below is not zero then Gigablast will assume "
|
|
"all spidered URLs have an IP address of 1.2.3.4.";
|
|
m->m_cgi = "dil";
|
|
m->m_off = (char *)&cr.m_doIpLookups - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "use old IPs";
|
|
m->m_desc = "Should the stored IP "
|
|
"of documents we are reindexing be used? Useful for "
|
|
"pages banned by IP address and then reindexed with "
|
|
"the reindexer tool.";
|
|
m->m_cgi = "useOldIps";
|
|
m->m_off = (char *)&cr.m_useOldIps - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "remove banned pages";
|
|
m->m_desc = "Remove banned pages from the index. Pages can be "
|
|
"banned using tagdb or the Url Filters table.";
|
|
m->m_cgi = "rbp";
|
|
m->m_off = (char *)&cr.m_removeBannedPages - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "ban domains of urls banned by IP";
|
|
m->m_desc = "Most urls are banned by IP "
|
|
"address. But owners often will keep the same "
|
|
"domains and change their IP address. So when "
|
|
"banning a url that was banned by IP, should its domain "
|
|
"be banned too? (obsolete)";
|
|
m->m_cgi = "banDomains";
|
|
m->m_off = (char *)&cr.m_banDomains - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "allow HTTPS pages using SSL";
|
|
m->m_desc = "If this is true, spiders will read "
|
|
"HTTPS pages using SSL Protocols.";
|
|
m->m_cgi = "ahttps";
|
|
m->m_off = (char *)&cr.m_allowHttps - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "require dollar sign";
|
|
m->m_desc = "If this is YES, then do not allow document to be "
|
|
"indexed if they do not contain a dollar sign ($), but the "
|
|
"links will still be harvested. Used for building shopping "
|
|
"index.";
|
|
m->m_cgi = "nds";
|
|
m->m_off = (char *)&cr.m_needDollarSign - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "require numbers in url";
|
|
m->m_desc = "If this is YES, then do not allow document to be "
|
|
"indexed if they do not have two back-to-back digits in the "
|
|
"path of the url, but the links will still be harvested. Used "
|
|
"to build a news index.";
|
|
m->m_cgi = "nniu";
|
|
m->m_off = (char *)&cr.m_needNumbersInUrl - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "index news topics";
|
|
m->m_desc = "If this is YES, Gigablast will attempt to categorize "
|
|
"every page as being in particular news categories like "
|
|
"sports, business, etc. and will be searchable by doing a "
|
|
"query like \"newstopic:sports.";
|
|
m->m_cgi = "int";
|
|
m->m_off = (char *)&cr.m_getNewsTopic - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "follow RSS links";
|
|
m->m_desc = "If an item on a page has an RSS feed link, add the "
|
|
"RSS link to the spider queue and index the RSS pages "
|
|
"instead of the current page.";
|
|
m->m_cgi = "frss";
|
|
m->m_off = (char *)&cr.m_followRSSLinks - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "only index articles from RSS feeds";
|
|
m->m_desc = "Only index pages that were linked to by an RSS feed. "
|
|
"Follow RSS Links must be enabled (above).";
|
|
m->m_cgi = "orss";
|
|
m->m_off = (char *)&cr.m_onlyIndexRSS - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "max text doc length";
|
|
m->m_desc = "Gigablast will not download, index or "
|
|
"store more than this many bytes of an HTML or text "
|
|
"document. XML is NOT considered to be HTML or text, use "
|
|
"the rule below to control the maximum length of an XML "
|
|
"document. "
|
|
"Use -1 for no max.";
|
|
m->m_cgi = "mtdl";
|
|
m->m_off = (char *)&cr.m_maxTextDocLen - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1048576"; // 1MB
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE|PF_API;
|
|
m++;
|
|
|
|
m->m_title = "max other doc length";
|
|
m->m_desc = "Gigablast will not download, index or "
|
|
"store more than this many bytes of a non-html, non-text "
|
|
"document. XML documents will be restricted to this "
|
|
"length. "
|
|
"Use -1 for no max.";
|
|
m->m_cgi = "modl";
|
|
m->m_off = (char *)&cr.m_maxOtherDocLen - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1048576"; // 1MB
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE|PF_API;
|
|
m++;
|
|
|
|
//m->m_title = "indexdb truncation limit";
|
|
//m->m_cgi = "itl";
|
|
//m->m_desc = "How many documents per term? Keep this very high.";
|
|
//m->m_off = (char *)&cr.m_indexdbTruncationLimit - x;
|
|
//m->m_def = "50000000";
|
|
//m->m_type = TYPE_LONG;
|
|
//m->m_min = MIN_TRUNC; // from Indexdb.h
|
|
//m++;
|
|
|
|
m->m_title = "apply filter to text pages";
|
|
m->m_desc = "If this is false then the filter "
|
|
"will not be used on html or text pages.";
|
|
m->m_cgi = "aft";
|
|
m->m_off = (char *)&cr.m_applyFilterToText - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "filter name";
|
|
m->m_desc = "Program to spawn to filter all HTTP "
|
|
"replies the spider receives. Leave blank for none.";
|
|
m->m_cgi = "filter";
|
|
m->m_def = "";
|
|
m->m_off = (char *)&cr.m_filter - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_FILTER_LEN+1;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "filter timeout";
|
|
m->m_desc = "Kill filter shell after this many seconds. Assume it "
|
|
"stalled permanently.";
|
|
m->m_cgi = "fto";
|
|
m->m_def = "40";
|
|
m->m_off = (char *)&cr.m_filterTimeout - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "proxy ip";
|
|
m->m_desc = "Retrieve pages from the proxy at this IP address.";
|
|
m->m_cgi = "proxyip";
|
|
m->m_off = (char *)&cr.m_proxyIp - x;
|
|
m->m_type = TYPE_IP;
|
|
m->m_def = "0.0.0.0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "proxy port";
|
|
m->m_desc = "Retrieve pages from the proxy on "
|
|
"this port.";
|
|
m->m_cgi = "proxyport";
|
|
m->m_off = (char *)&cr.m_proxyPort - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "make image thumbnails";
|
|
m->m_desc = "Try to find the best image on each page and "
|
|
"store it as a thumbnail for presenting in the search "
|
|
"results.";
|
|
m->m_cgi = "mit";
|
|
m->m_off = (char *)&cr.m_makeImageThumbnails - x;
|
|
m->m_type = TYPE_BOOL;
|
|
// default to off since it slows things down to do this
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "max thumbnail width or height";
|
|
m->m_desc = "This is in pixels and limits the size of the thumbnail. "
|
|
"Gigablast tries to make at least the width or the height "
|
|
"equal to this maximum, but, unless the thumbnail is square, "
|
|
"one side will be longer than the other.";
|
|
m->m_cgi = "mtwh";
|
|
m->m_off = (char *)&cr.m_thumbnailMaxWidthHeight - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "250";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
m->m_title = "index spider status documents";
|
|
m->m_desc = "Index a spider status \"document\" "
|
|
"for every url the spider "
|
|
"attempts to spider. Search for them using special "
|
|
"query operators like type:status or gberrorstr:success or "
|
|
"stats:gberrornum to get a histogram. "
|
|
"See <a href=/syntax.html>syntax</a> page for more examples. "
|
|
"They will not otherwise "
|
|
"show up in the search results.";
|
|
// "This will not work for "
|
|
// "diffbot crawlbot collections yet until it has proven "
|
|
// "more stable.";
|
|
m->m_cgi = "isr";
|
|
m->m_off = (char *)&cr.m_indexSpiderReplies - x;
|
|
m->m_type = TYPE_BOOL;
|
|
// default off for now until we fix it better. 5/26/14 mdw
|
|
// turn back on 6/21 now that we do not index plain text terms
|
|
// and we add gbdocspidertime and gbdocindextime terms so you
|
|
// can use those to sort regular docs and not have spider reply
|
|
// status docs in the serps.
|
|
// back on 4/21/2015 seems pretty stable.
|
|
// but it uses disk space so turn off for now again. 6/16/2015
|
|
m->m_def = "0";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE;
|
|
m++;
|
|
|
|
// i put this in here so i can save disk space for my global
|
|
// diffbot json index
|
|
m->m_title = "index body";
|
|
m->m_desc = "Index the body of the documents so you can search it. "
|
|
"Required for searching that. You wil pretty much always "
|
|
"want to keep this enabled. Does not apply to JSON "
|
|
"documents.";
|
|
m->m_cgi = "ib";
|
|
m->m_off = (char *)&cr.m_indexBody - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_flags = PF_CLONE ;//| PF_HIDDEN;
|
|
m++;
|
|
|
|
m->m_cgi = "apiUrl";
|
|
m->m_desc = "Send every spidered url to this url and index "
|
|
"the reply in addition to the normal indexing process. "
|
|
"Example: by specifying http://api.diffbot.com/v3/"
|
|
"analyze?mode=high-precision&token=<yourDiffbotToken> here "
|
|
"you can index the structured JSON replies from diffbot for "
|
|
"every url that is spidered. "
|
|
"Gigablast will automatically "
|
|
"append a &url=<urlBeingSpidered> to this url "
|
|
"before sending it to diffbot.";
|
|
m->m_xml = "diffbotApiUrl";
|
|
m->m_title = "diffbot api url";
|
|
m->m_off = (char *)&cr.m_diffbotApiUrl - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m->m_def = "";
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_cgi = "urlProcessPatternTwo";
|
|
m->m_desc = "Only send urls that match this simple substring "
|
|
"pattern to Diffbot. Separate substrings with two pipe "
|
|
"operators, ||. Leave empty for no restrictions.";
|
|
m->m_xml = "diffbotUrlProcessPattern";
|
|
m->m_title = "diffbot url process pattern";
|
|
m->m_off = (char *)&cr.m_diffbotUrlProcessPattern - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m++;
|
|
|
|
m->m_cgi = "urlProcessRegExTwo";
|
|
m->m_desc = "Only send urls that match this regular expression "
|
|
"to Diffbot. "
|
|
"Leave empty for no restrictions.";
|
|
m->m_xml = "diffbotUrlProcessRegEx";
|
|
m->m_title = "diffbot url process regex";
|
|
m->m_off = (char *)&cr.m_diffbotUrlProcessRegEx - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m++;
|
|
|
|
m->m_cgi = "pageProcessPatternTwo";
|
|
m->m_desc = "Only send urls whose content matches this simple "
|
|
"substring "
|
|
"pattern to Diffbot. Separate substrings with two pipe "
|
|
"operators, ||. "
|
|
"Leave empty for no restrictions.";
|
|
m->m_xml = "diffbotPageProcessPattern";
|
|
m->m_title = "diffbot page process pattern";
|
|
m->m_off = (char *)&cr.m_diffbotPageProcessPattern - x;
|
|
m->m_type = TYPE_SAFEBUF;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
|
|
m++;
|
|
|
|
|
|
|
|
m->m_title = "spider start time";
|
|
m->m_desc = "Only spider URLs scheduled to be spidered "
|
|
"at this time or after. In UTC.";
|
|
m->m_cgi = "sta";
|
|
m->m_off = (char *)&cr.m_spiderTimeMin - x;
|
|
m->m_type = TYPE_DATE; // date format -- very special
|
|
m->m_def = "01 Jan 1970";
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "spider end time";
|
|
m->m_desc = "Only spider URLs scheduled to be spidered "
|
|
"at this time or before. If \"use current time\" is true "
|
|
"then the current local time is used for this value instead. "
|
|
"in UTC.";
|
|
m->m_cgi = "stb";
|
|
m->m_off = (char *)&cr.m_spiderTimeMax - x;
|
|
m->m_type = TYPE_DATE2;
|
|
m->m_def = "01 Jan 2010";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
m->m_title = "use current time";
|
|
m->m_desc = "Use the current time as the spider end time?";
|
|
m->m_cgi = "uct";
|
|
m->m_off = (char *)&cr.m_useCurrentTime - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_SPIDER;
|
|
m->m_obj = OBJ_COLL;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "default ruleset site file num";
|
|
m->m_desc = "Use this as the current Sitedb file num for Sitedb "
|
|
"entries that always use the current default";
|
|
m->m_cgi = "dftsfn";
|
|
m->m_off = (char *)&cr.m_defaultSiteRec - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "16";
|
|
m++;
|
|
|
|
m->m_title = "RSS ruleset site file num";
|
|
m->m_desc = "Use this Sitedb file num ruleset for RSS feeds";
|
|
m->m_cgi = "rssrs";
|
|
m->m_off = (char *)&cr.m_rssSiteRec - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "25";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "TOC ruleset site file num";
|
|
m->m_desc = "Use this Sitedb file num ruleset "
|
|
"for Table of Contents pages";
|
|
m->m_cgi = "tocrs";
|
|
m->m_off = (char *)&cr.m_tocSiteRec - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "29";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "store topics vector";
|
|
m->m_desc = "Should Gigablast compute and store a topics vector "
|
|
"for every document indexed. This allows Gigablast to "
|
|
"do topic clustering without having to compute this vector "
|
|
"at query time. You can turn topic clustering on in the "
|
|
"Search Controls page.";
|
|
m->m_cgi = "utv";
|
|
m->m_off = (char *)&cr.m_useGigabitVector - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "use gigabits for vector";
|
|
m->m_desc = "For news collection. "
|
|
"Should Gigablast form the similarity vector using "
|
|
"Gigabits, as opposed to a straight out random sample. "
|
|
"This does clustering more "
|
|
"by topic rather than by explicit content in common.";
|
|
m->m_cgi = "uct";
|
|
m->m_off = (char *)&cr.m_useGigabitVector - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
m->m_title = "max similarity to reindex";
|
|
m->m_desc = "If the url's content is over X% similar to what we "
|
|
"already "
|
|
"have indexed, then do not reindex it, and treat the content "
|
|
"as if it were unchanged for intelligent spider scheduling "
|
|
"purposes. Set to 100% to always reindex the document, "
|
|
"regardless, although the use-ifModifiedSince check "
|
|
"above may still be in affect, as well as the "
|
|
"deduping-enabled check. This will also affect the re-spider "
|
|
"time, because Gigablast spiders documents that change "
|
|
"frequently faster.";
|
|
m->m_cgi = "msti";
|
|
m->m_off = (char *)&cr.m_maxSimilarityToIndex - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "100";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
// this is obsolete -- we can use the reg exp "isroot"
|
|
/*
|
|
m->m_title = "root url priority";
|
|
m->m_desc = "What spider priority should root urls "
|
|
"be assigned? Spider priorities range from 0 to 31. If no "
|
|
"urls are scheduled to be spidered in the priority 31 "
|
|
"bracket, the spider moves down to 30, etc., until it finds "
|
|
"a url to spider. If this priority is undefined "
|
|
"then that url's priority is determined based on the rules "
|
|
"on the URL filters page. If the priority is still "
|
|
"undefined then the priority is taken to be the priority of "
|
|
"the parent minus one, which results in a breadth first "
|
|
"spidering algorithm."; // html
|
|
m->m_cgi = "srup";
|
|
m->m_off = (char *)&cr.m_spiderdbRootUrlPriority - x;
|
|
m->m_type = TYPE_PRIORITY2;// 0-(MAX_SPIDER_PRIORITIES-1)dropdown menu
|
|
m->m_def = "15";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
-- mdw, now in urlfilters using "isaddurl" "reg exp"
|
|
m->m_title = "add url priority";
|
|
m->m_desc = "What is the priority of a url which "
|
|
"is added to the spider queue via the "
|
|
"add url page?"; // html
|
|
m->m_cgi = "saup";
|
|
m->m_off = (char *)&cr.m_spiderdbAddUrlPriority - x;
|
|
m->m_type = TYPE_PRIORITY; // 0-(MAX_SPIDER_PRIORITIES-1)dropdown menu
|
|
m->m_def = "16";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "new spider by priority";
|
|
m->m_desc = "Specify priorities for which "
|
|
"new urls not yet in the index should be spidered.";
|
|
m->m_cgi = "sn";
|
|
m->m_xml = "spiderNewBits";
|
|
m->m_off = (char *)&cr.m_spiderNewBits - x;
|
|
m->m_type = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
|
|
m->m_fixed = MAX_SPIDER_PRIORITIES;
|
|
m->m_def = "1"; // default for each one is on
|
|
m++;
|
|
|
|
m->m_title = "old spider by priority";
|
|
m->m_desc = "Specify priorities for which old "
|
|
"urls already in the index should be spidered.";
|
|
m->m_cgi = "so";
|
|
m->m_xml = "spiderOldBits";
|
|
m->m_off = (char *)&cr.m_spiderOldBits - x;
|
|
m->m_type = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
|
|
m->m_fixed = MAX_SPIDER_PRIORITIES;
|
|
m->m_def = "1"; // default for each one is on
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "max spiders per domain";
|
|
m->m_desc = "How many pages should the spider "
|
|
"download simultaneously from any one domain? This can "
|
|
"prevents the spider from hitting one server too hard.";
|
|
m->m_cgi = "mspd";
|
|
m->m_off = (char *)&cr.m_maxSpidersPerDomain - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1";
|
|
m++;
|
|
|
|
m->m_title = "same domain wait";
|
|
m->m_desc = "How many milliseconds should Gigablast wait "
|
|
"between spidering a second url from the same domain. "
|
|
"This is used to prevent the spiders from hitting a "
|
|
"website too hard.";
|
|
m->m_cgi = "sdw";
|
|
m->m_off = (char *)&cr.m_sameDomainWait - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "500";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "same ip wait";
|
|
m->m_desc = "How many milliseconds should Gigablast wait "
|
|
"between spidering a second url from the same IP address. "
|
|
"This is used to prevent the spiders from hitting a "
|
|
"website too hard.";
|
|
m->m_cgi = "siw";
|
|
m->m_off = (char *)&cr.m_sameIpWait - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "10000";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "use distributed spider lock";
|
|
m->m_desc = "Enable distributed spider locking to strictly enforce "
|
|
"same domain waits at a global level.";
|
|
m->m_cgi = "udsl";
|
|
m->m_off = (char *)&cr.m_useSpiderLocks - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "distribute spider download based on ip";
|
|
m->m_desc = "Distribute web downloads based on the ip of the host so "
|
|
"only one spider ip hits the same hosting ip. Helps "
|
|
"webmaster's logs look nicer.";
|
|
m->m_cgi = "udsd";
|
|
m->m_off = (char*)&cr.m_distributeSpiderGet - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "percent of water mark to reload queues";
|
|
m->m_desc = "When a spider queue drops below this percent of its "
|
|
"max level it will reload from disk.";
|
|
m->m_cgi = "rlqp";
|
|
m->m_off = (char*)&cr.m_reloadQueuePercent - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "25";
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "min respider wait";
|
|
m->m_desc = "What is the minimum number of days "
|
|
"the spider should wait before re-visiting a particular "
|
|
"web page? "
|
|
"The spiders attempts to determine the update cycle of "
|
|
"each web page and it tries to visit them as needed, but it "
|
|
"will not wait less than this number of days regardless.";
|
|
m->m_cgi = "mrw";
|
|
m->m_off = (char *)&cr.m_minRespiderWait - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "1.0";
|
|
m++;
|
|
|
|
m->m_title = "max respider wait";
|
|
m->m_desc = "What is the maximum number of days "
|
|
"the spider should wait before re-visiting a particular "
|
|
"web page?";
|
|
m->m_cgi = "xrw";
|
|
m->m_off = (char *)&cr.m_maxRespiderWait - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "90.0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "first respider wait";
|
|
m->m_desc = "What is the number of days "
|
|
"Gigablast should wait before spidering a particular web page "
|
|
"for the second time? Tag in ruleset will override this value "
|
|
"if it is present.";
|
|
m->m_cgi = "frw";
|
|
m->m_off = (char *)&cr.m_firstRespiderWait - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "30.0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "error respider wait";
|
|
m->m_desc = "If a spidered web page has a network "
|
|
"error, such as a DNS not found error, or a time out error, "
|
|
"how many days should Gigablast wait before reattempting "
|
|
"to spider that web page?";
|
|
m->m_cgi = "erw";
|
|
m->m_off = (char *)&cr.m_errorRespiderWait - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "2.0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "doc not found error respider wait";
|
|
m->m_desc = "If a spidered web page has a http status "
|
|
"error, such as a 404 page not found error, "
|
|
"how many days should Gigablast wait before reattempting "
|
|
"to spider that web page?";
|
|
m->m_cgi = "dnferw";
|
|
m->m_off = (char *)&cr.m_docNotFoundErrorRespiderWait - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "7.0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "spider max kbps";
|
|
m->m_desc = "The maximum kilobits per second "
|
|
"that the spider can download.";
|
|
m->m_cgi = "cmkbps";
|
|
m->m_off = (char *)&cr.m_maxKbps - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "999999.0";
|
|
m++;
|
|
|
|
m->m_title = "spider max pages per second";
|
|
m->m_desc = "The maximum number of pages per "
|
|
"second that can be indexed or deleted from the index.";
|
|
m->m_cgi = "cmpps";
|
|
m->m_off = (char *)&cr.m_maxPagesPerSecond - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "999999.0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "spider new percent";
|
|
m->m_desc = "Approximate percentage of new vs. old docs to spider. "
|
|
"If set to a negative number, the old alternating "
|
|
"priority algorithm is used.";
|
|
m->m_cgi = "snp";
|
|
m->m_off = (char *)&cr.m_spiderNewPct - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "-1.0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "number retries per url";
|
|
m->m_desc = "How many times should the spider be "
|
|
"allowed to fail to download a particular web page before "
|
|
"it gives up? "
|
|
"Failure may result from temporary loss of internet "
|
|
"connectivity on the remote end, dns or routing problems.";
|
|
m->m_cgi = "nr";
|
|
m->m_off = (char *)&cr.m_numRetries - x;
|
|
m->m_type = TYPE_RETRIES; // dropdown from 0 to 3
|
|
m->m_def = "1";
|
|
m++;
|
|
|
|
m->m_title = "priority of urls being retried";
|
|
m->m_desc = "Keep this pretty high so that we get problem urls "
|
|
"out of the index fast, otherwise, you might be waiting "
|
|
"months for another retry. Use <i>undefined</i> to indicate "
|
|
"no change in the priority of the url.";
|
|
m->m_cgi = "rtp";
|
|
m->m_off = (char *)&cr.m_retryPriority - x;
|
|
m->m_type = TYPE_PRIORITY2; // -1 to 31
|
|
m->m_def = "-1";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "max pages in index";
|
|
m->m_desc = "What is the maximum number of "
|
|
"pages that are permitted for this collection?";
|
|
m->m_cgi = "mnp";
|
|
m->m_off = (char *)&cr.m_maxNumPages - x;
|
|
m->m_type = TYPE_LONG_LONG;
|
|
m->m_def = "10000000000"; // 10 billion
|
|
m++;
|
|
|
|
m->m_title = "import link info"; // from other cluster";
|
|
m->m_desc = "Say yes here to make Gigablast import "
|
|
"link text from another collection into this one "
|
|
"when spidering urls. Gigablast will "
|
|
"use the hosts.conf file in the working directory to "
|
|
"tell it what hosts belong to the cluster to import from. "
|
|
"Gigablast "
|
|
"will use the \"update link votes frequency\" parm above "
|
|
"to determine if the info should be recomputed on the other "
|
|
"cluster.";
|
|
m->m_cgi = "eli"; // external link info
|
|
m->m_off = (char *)&cr.m_getExternalLinkInfo - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 2;
|
|
m++;
|
|
|
|
m->m_title = "use hosts2.conf for import cluster";
|
|
m->m_desc = "Tell Gigablast to import from the cluster defined by "
|
|
"hosts2.conf in the working directory, rather than "
|
|
"hosts.conf";
|
|
m->m_cgi = "elib"; // external link info
|
|
m->m_off = (char *)&cr.m_importFromHosts2Conf - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_priv = 2;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
//m->m_title = "get link info from other cluster in real-time";
|
|
//m->m_desc = "Say yes here to make Gigablast tell the other "
|
|
// "cluster to compute the link info, not just return a "
|
|
// "stale copy from the last time it computed it.";
|
|
//m->m_cgi = "elif"; // external link info fresh
|
|
//m->m_off = (char *)&cr.m_getExternalLinkInfoFresh - x;
|
|
//m->m_type = TYPE_BOOL;
|
|
//m->m_def = "0";
|
|
//m->m_group = 0;
|
|
//m->m_priv = 2;
|
|
//m++;
|
|
|
|
m->m_title = "collection to import from";
|
|
m->m_desc = "Gigablast will fetch the link info from this "
|
|
"collection.";
|
|
m->m_cgi = "elic"; // external link info
|
|
m->m_off = (char *)&cr.m_externalColl - x;
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = MAX_COLL_LEN+1;
|
|
m->m_def = "";
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m++;
|
|
|
|
m->m_title = "turk tags to display";
|
|
m->m_desc = "Tell pageturk to display the tag questions "
|
|
"for the comma separated tag names."
|
|
" no space allowed.";
|
|
m->m_cgi = "ttags";
|
|
m->m_xml = "turkTags";
|
|
m->m_type = TYPE_STRING;
|
|
m->m_size = 256;
|
|
m->m_def = "blog,spam,news";
|
|
m->m_off = (char *)&cr.m_turkTags - x;
|
|
m->m_group = 0;
|
|
m->m_priv = 2;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
// now we store this in title recs, so we can change it on the fly
|
|
m->m_title = "title weight";
|
|
m->m_desc = "Weight title this much more or less. This units are "
|
|
"percentage. A 100 means to not give the title any special "
|
|
"weight. Generally, though, you want to give it significantly "
|
|
"more weight than that, so 2400 is the default.";
|
|
m->m_cgi = "tw";
|
|
|
|
m->m_off = (char *)&cr.m_titleWeight - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "4600";
|
|
m->m_min = 0;
|
|
m++;
|
|
|
|
// now we store this in title recs, so we can change it on the fly
|
|
m->m_title = "header weight";
|
|
m->m_desc = "Weight terms in header tags by this much more or less. "
|
|
"This units are "
|
|
"percentage. A 100 means to not give the header any special "
|
|
"weight. Generally, though, you want to give it significantly "
|
|
"more weight than that, so 600 is the default.";
|
|
m->m_cgi = "hw";
|
|
m->m_off = (char *)&cr.m_headerWeight - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "600";
|
|
m->m_min = 0;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
// now we store this in title recs, so we can change it on the fly
|
|
m->m_title = "url path word weight";
|
|
m->m_desc = "Weight text in url path this much more. "
|
|
"The units are "
|
|
"percentage. A 100 means to not give any special "
|
|
"weight. Generally, though, you want to give it significantly "
|
|
"more weight than that, so 600 is the default.";
|
|
m->m_cgi = "upw";
|
|
m->m_off = (char *)&cr.m_urlPathWeight - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "1600";
|
|
m->m_min = 0;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
// now we store this in title recs, so we can change it on the fly
|
|
m->m_title = "external link text weight";
|
|
m->m_desc = "Weight text in the incoming external link text this "
|
|
"much more. The units are percentage. It already receives a "
|
|
"decent amount of weight naturally.";
|
|
m->m_cgi = "eltw";
|
|
m->m_off = (char *)&cr.m_externalLinkTextWeight - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "600";
|
|
m->m_min = 0;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
// now we store this in title recs, so we can change it on the fly
|
|
m->m_title = "internal link text weight";
|
|
m->m_desc = "Weight text in the incoming internal link text this "
|
|
"much more. The units are percentage. It already receives a "
|
|
"decent amount of weight naturally.";
|
|
m->m_cgi = "iltw";
|
|
m->m_off = (char *)&cr.m_internalLinkTextWeight - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "200";
|
|
m->m_min = 0;
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
// now we store this in title recs, so we can change it on the fly
|
|
m->m_title = "concept weight";
|
|
m->m_desc = "Weight concepts this much more. "
|
|
"The units are "
|
|
"percentage. It already receives a decent amount of weight "
|
|
"naturally. AKA: surrounding text boost.";
|
|
m->m_cgi = "cw";
|
|
m->m_off = (char *)&cr.m_conceptWeight - x;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "50";
|
|
m->m_min = 0;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
// now we store this in title recs, so we can change it on the fly
|
|
m->m_title = "site num inlinks boost base";
|
|
m->m_desc = "Boost the score of all terms in the document using "
|
|
"this number. "
|
|
"The boost itself is expressed as a percentage. "
|
|
"The boost is B^X, where X is the number of good "
|
|
"inlinks to the document's site "
|
|
"and B is this is this boost base. "
|
|
"The score of each term in the "
|
|
"document is multiplied by the boost. That product "
|
|
"becomes the new score of that term. "
|
|
"For purposes of this calculation we limit X to 1000.";
|
|
m->m_cgi = "qbe";
|
|
m->m_off = (char *)&cr.m_siteNumInlinksBoostBase - x;
|
|
m->m_type = TYPE_FLOAT;
|
|
m->m_def = "1.005";
|
|
m->m_min = 0;
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
// use menu elimination technology?
|
|
m->m_title = "only index article content";
|
|
m->m_desc = "If this is true gigablast will only index the "
|
|
"article content on pages identifed as permalinks. It will "
|
|
"NOT index any page content on non-permalink pages, and it "
|
|
"will avoid indexing menu content on any page. It will not "
|
|
"index meta tags on any page. It will only index incoming "
|
|
"link text for permalink pages. Useful when "
|
|
"indexing blog or news sites.";
|
|
m->m_cgi = "met";
|
|
m->m_off = (char *)&cr.m_eliminateMenus - x;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
// replace by lang== lang!= in url filters
|
|
//m->m_title = "collection language";
|
|
//m->m_desc = "Only spider pages determined to be in "
|
|
// "this language (see Language.h)";
|
|
//m->m_cgi = "clang";
|
|
//m->m_off = (char *)&cr.m_language - x;
|
|
//m->m_type = TYPE_LONG;
|
|
//m->m_def = "0";
|
|
//m++;
|
|
|
|
////////////////
|
|
// END PAGE SPIDER CONTROLS
|
|
////////////////
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// PAGE REPAIR CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
m->m_title = "rebuild mode enabled";
|
|
m->m_desc = "If enabled, gigablast will rebuild the rdbs as "
|
|
"specified by the parameters below. When a particular "
|
|
"collection is in rebuild mode, it can not spider or merge "
|
|
"titledb files.";
|
|
m->m_cgi = "rme";
|
|
m->m_off = (char *)&g_conf.m_repairingEnabled - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m->m_sync = false; // do not sync this parm
|
|
m++;
|
|
|
|
m->m_title = "collection to rebuild";
|
|
m->m_xml = "collectionToRebuild";
|
|
m->m_desc = "Name of collection to rebuild.";
|
|
// m->m_desc = "Comma or space separated list of the collections "
|
|
// "to rebuild.";
|
|
m->m_cgi = "rctr"; // repair collections to repair
|
|
m->m_off = (char *)&g_conf.m_collsToRepair - g;
|
|
m->m_type = TYPE_SAFEBUF;//STRING;
|
|
//m->m_size = 1024;
|
|
m->m_def = "";
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_group = 0;
|
|
m->m_flags = PF_REQUIRED;// | PF_COLLDEFAULT;//| PF_NOHTML;
|
|
m++;
|
|
|
|
m->m_title = "rebuild ALL collections";
|
|
m->m_desc = "If enabled, gigablast will rebuild all collections.";
|
|
m->m_cgi = "rac";
|
|
m->m_off = (char *)&g_conf.m_rebuildAllCollections - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
|
|
m->m_title = "memory to use for rebuild";
|
|
m->m_desc = "In bytes.";
|
|
m->m_cgi = "rmtu"; // repair mem to use
|
|
m->m_off = (char *)&g_conf.m_repairMem - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "200000000";
|
|
m->m_units = "bytes";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "max rebuild injections";
|
|
m->m_desc = "Maximum number of outstanding injections for "
|
|
"rebuild.";
|
|
m->m_cgi = "mrps";
|
|
m->m_off = (char *)&g_conf.m_maxRepairSpiders - g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "2";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "full rebuild";
|
|
m->m_desc = "If enabled, gigablast will reinject the content of "
|
|
"all title recs into a secondary rdb system. That will "
|
|
"the primary rdb system when complete.";
|
|
m->m_cgi = "rfr"; // repair full rebuild
|
|
m->m_off = (char *)&g_conf.m_fullRebuild - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "add spiderdb recs of non indexed urls";
|
|
m->m_desc = "If enabled, gigablast will add the spiderdb "
|
|
"records of unindexed urls "
|
|
"when doing the full rebuild or the spiderdb "
|
|
"rebuild. Otherwise, only the indexed urls will get "
|
|
"spiderdb records in spiderdb. This can be faster because "
|
|
"Gigablast does not have to do an IP lookup on every url "
|
|
"if its IP address is not in tagdb already.";
|
|
m->m_cgi = "rfrknsx";
|
|
m->m_off = (char *)&g_conf.m_rebuildAddOutlinks - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "recycle link text";
|
|
m->m_desc = "If enabled, gigablast will recycle the link text "
|
|
"when rebuilding titledb. "
|
|
"The siterank, which is determined by the "
|
|
"number of inlinks to a site, is stored/cached in tagdb "
|
|
"so that is a separate item. If you want to pick up new "
|
|
"link text you will want to set this to <i>NO</i> and "
|
|
"make sure to rebuild titledb, since that stores the "
|
|
"link text.";
|
|
m->m_cgi = "rrli"; // repair full rebuild
|
|
m->m_off = (char *)&g_conf.m_rebuildRecycleLinkInfo - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "recycle imported link info";
|
|
m->m_desc = "If enabled, gigablast will recycle the imported "
|
|
"link info when rebuilding titledb.";
|
|
m->m_cgi = "rrlit"; // repair full rebuild
|
|
m->m_off = (char *)&g_conf.m_rebuildRecycleLinkInfo2 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "remove bad pages";
|
|
m->m_desc = "If enabled, gigablast just scans the titledb recs "
|
|
"in the given collection and removes those that are "
|
|
"banned or filtered according to the url filters table. It "
|
|
"will also lookup in tagdb.";
|
|
m->m_cgi = "rbadp";
|
|
m->m_off = (char *)&g_conf.m_removeBadPages - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "rebuild titledb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrt"; // repair rebuild titledb
|
|
m->m_off = (char *)&g_conf.m_rebuildTitledb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "rebuild tfndb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rru"; // repair rebuild tfndb
|
|
m->m_off = (char *)&g_conf.m_rebuildTfndb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "rebuild indexdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rri";
|
|
m->m_off = (char *)&g_conf.m_rebuildIndexdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "rebuild posdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rri";
|
|
m->m_off = (char *)&g_conf.m_rebuildPosdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "rebuild no splits";
|
|
m->m_desc = "If enabled, gigablast will just re-add the no split "
|
|
"lists from all the current title recs back into indexdb.";
|
|
m->m_cgi = "rns";
|
|
m->m_off = (char *)&g_conf.m_rebuildNoSplits - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "rebuild datedb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrd";
|
|
m->m_off = (char *)&g_conf.m_rebuildDatedb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "rebuild checksumdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrch";
|
|
m->m_off = (char *)&g_conf.m_rebuildChecksumdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "rebuild clusterdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrcl";
|
|
m->m_off = (char *)&g_conf.m_rebuildClusterdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "rebuild spiderdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrsp";
|
|
m->m_off = (char *)&g_conf.m_rebuildSpiderdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "rebuild tagdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrsi";
|
|
m->m_off = (char *)&g_conf.m_rebuildSitedb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "rebuild linkdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrld";
|
|
m->m_off = (char *)&g_conf.m_rebuildLinkdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "rebuild tagdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrtgld";
|
|
m->m_off = (char *)&g_conf.m_rebuildTagdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "rebuild placedb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrpld";
|
|
m->m_off = (char *)&g_conf.m_rebuildPlacedb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "rebuild timedb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrtmd";
|
|
m->m_off = (char *)&g_conf.m_rebuildTimedb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "rebuild sectiondb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrsnd";
|
|
m->m_off = (char *)&g_conf.m_rebuildSectiondb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
m->m_title = "rebuild revdb";
|
|
m->m_desc = "If enabled, gigablast will rebuild this rdb";
|
|
m->m_cgi = "rrrvd";
|
|
m->m_off = (char *)&g_conf.m_rebuildRevdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "rebuild root urls";
|
|
m->m_desc = "If disabled, gigablast will skip root urls.";
|
|
m->m_cgi = "ruru";
|
|
m->m_off = (char *)&g_conf.m_rebuildRoots - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "1";
|
|
m++;
|
|
|
|
m->m_title = "rebuild non-root urls";
|
|
m->m_desc = "If disabled, gigablast will skip non-root urls.";
|
|
m->m_cgi = "runru";
|
|
m->m_off = (char *)&g_conf.m_rebuildNonRoots - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "1";
|
|
m->m_group = 0;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "skip tagdb lookup";
|
|
m->m_desc = "When rebuilding spiderdb and scanning it for new "
|
|
"spiderdb records, should a tagdb lookup be performed? "
|
|
"Runs much much "
|
|
"faster without it. Will also keep the original doc quality "
|
|
"and "
|
|
"spider priority in tact.";
|
|
m->m_cgi = "rssl";
|
|
m->m_off = (char *)&g_conf.m_rebuildSkipSitedbLookup - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_page = PAGE_REPAIR;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m++;
|
|
*/
|
|
|
|
///////////////////////////////////////////
|
|
// END PAGE REPAIR //
|
|
///////////////////////////////////////////
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// AUTOBAN CONTROLS
|
|
//
|
|
///////////////////////////////////////////
|
|
|
|
m->m_title = "ban IPs";
|
|
m->m_desc = "add Ips here to bar them from accessing this "
|
|
"gigablast server.";
|
|
m->m_cgi = "banIps";
|
|
m->m_xml = "banIps";
|
|
m->m_off = (char *)g_conf.m_banIps - g;
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_page = PAGE_AUTOBAN;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_size = AUTOBAN_TEXT_SIZE;
|
|
m->m_group = 1;
|
|
m->m_def = "";
|
|
m->m_plen = (char *)&g_conf.m_banIpsLen - g; // length of string
|
|
m++;
|
|
|
|
m->m_title = "allow IPs";
|
|
m->m_desc = "add Ips here to give them an infinite query quota.";
|
|
m->m_cgi = "allowIps";
|
|
m->m_xml = "allowIps";
|
|
m->m_off = (char *)g_conf.m_allowIps - g;
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_page = PAGE_AUTOBAN;
|
|
m->m_size = AUTOBAN_TEXT_SIZE;
|
|
m->m_group = 1;
|
|
m->m_def = "";
|
|
m->m_plen = (char *)&g_conf.m_allowIpsLen - g; // length of string
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "valid search codes";
|
|
m->m_desc = "Don't try to autoban queries that have one "
|
|
"of these codes. Also, the code must be valid for us "
|
|
"to use &uip=IPADDRESS as the IP address of the submitter "
|
|
"for purposes of autoban AND purposes of addurl daily quotas.";
|
|
m->m_cgi = "validCodes";
|
|
m->m_xml = "validCodes";
|
|
m->m_off = (char *)g_conf.m_validCodes - g;
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_page = PAGE_AUTOBAN;
|
|
m->m_size = AUTOBAN_TEXT_SIZE;
|
|
m->m_group = 1;
|
|
m->m_def = "";
|
|
m->m_plen = (char *)&g_conf.m_validCodesLen - g; // length of string
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "Extra Parms";
|
|
m->m_desc = "Append extra default parms to queries that match "
|
|
"certain substrings. Format: text to match in url, "
|
|
"followed by a space, then the list of extra parms as "
|
|
"they would appear appended to the url. "
|
|
"One match per line.";
|
|
m->m_cgi = "extraParms";
|
|
m->m_xml = "extraParms";
|
|
m->m_off = (char *)g_conf.m_extraParms - g;
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_page = PAGE_AUTOBAN;
|
|
m->m_size = AUTOBAN_TEXT_SIZE;
|
|
m->m_group = 1;
|
|
m->m_def = "";
|
|
m->m_plen = (char *)&g_conf.m_extraParmsLen - g; // length of string
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "ban substrings";
|
|
m->m_desc = "ban any query that matches this list of "
|
|
"substrings. Must match all comma-separated strings "
|
|
"on the same line. ('\\n' = OR, ',' = AND)";
|
|
m->m_cgi = "banRegex";
|
|
m->m_xml = "banRegex";
|
|
m->m_off = (char *)g_conf.m_banRegex - g;
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_page = PAGE_AUTOBAN;
|
|
m->m_size = AUTOBAN_TEXT_SIZE;
|
|
m->m_group = 1;
|
|
m->m_def = "";
|
|
m->m_plen = (char *)&g_conf.m_banRegexLen - g; // length of string
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/////////////
|
|
// END AUTOBAN CONTROLS
|
|
/////////////
|
|
|
|
///////////////////////////////////////////
|
|
// ROOT PASSWORDS page
|
|
///////////////////////////////////////////
|
|
|
|
|
|
m->m_title = "Master Passwords";
|
|
m->m_desc = "Whitespace separated list of passwords. "
|
|
"Any matching password will have administrative access "
|
|
"to Gigablast and all collections.";
|
|
//"If no Admin Password or Admin IP is specified then "
|
|
//"Gigablast will only allow local IPs to connect to it "
|
|
//"as the master admin.";
|
|
m->m_cgi = "masterpwds";
|
|
m->m_xml = "masterPasswords";
|
|
m->m_def = "";
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_off = (char *)&g_conf.m_masterPwds - g;
|
|
m->m_type = TYPE_SAFEBUF; // STRINGNONEMPTY;
|
|
m->m_page = PAGE_MASTERPASSWORDS;
|
|
//m->m_max = MAX_MASTER_PASSWORDS;
|
|
//m->m_size = PASSWORD_MAX_LEN+1;
|
|
//m->m_addin = 1; // "insert" follows?
|
|
m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
|
|
m++;
|
|
|
|
|
|
m->m_title = "Master IPs";
|
|
//m->m_desc = "Allow UDP requests from this list of IPs. Any datagram "
|
|
// "received not coming from one of these IPs, or an IP in "
|
|
// "hosts.conf, is dropped. If another cluster is accessing this "
|
|
// "cluster for getting link text or whatever, you will need to "
|
|
// "list the IPs of the accessing machines here. These IPs are "
|
|
// "also used to allow access to the HTTP server even if it "
|
|
// "was disabled in the Master Controls. IPs that have 0 has "
|
|
// "their Least Significant Byte are treated as wildcards for "
|
|
// "IP blocks. That is, 1.2.3.0 means 1.2.3.*.";
|
|
m->m_desc = "Whitespace separated list of Ips. "
|
|
"Any IPs in this list will have administrative access "
|
|
"to Gigablast and all collections.";
|
|
m->m_cgi = "masterips";
|
|
m->m_xml = "masterIps";
|
|
m->m_page = PAGE_MASTERPASSWORDS;
|
|
m->m_off = (char *)&g_conf.m_connectIps - g;
|
|
m->m_type = TYPE_SAFEBUF;//IP;
|
|
m->m_def = "";
|
|
//m->m_max = MAX_CONNECT_IPS;
|
|
//m->m_priv = 2;
|
|
//m->m_addin = 1; // "insert" follows?
|
|
//m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
|
|
m++;
|
|
|
|
// m->m_title = "remove connect ip";
|
|
// m->m_desc = "remove a connect ip";
|
|
// m->m_cgi = "removeip";
|
|
// m->m_type = TYPE_CMD;
|
|
// m->m_page = PAGE_NONE;
|
|
// m->m_func = CommandRemoveConnectIpRow;
|
|
// m->m_cast = 1;
|
|
// m->m_obj = OBJ_CONF;
|
|
// m++;
|
|
|
|
// m->m_title = "remove a password";
|
|
// m->m_desc = "remove a password";
|
|
// m->m_cgi = "removepwd";
|
|
// m->m_type = TYPE_CMD;
|
|
// m->m_page = PAGE_NONE;
|
|
// m->m_func = CommandRemovePasswordRow;
|
|
// m->m_cast = 1;
|
|
// m->m_obj = OBJ_CONF;
|
|
// m++;
|
|
|
|
|
|
/*
|
|
m->m_title = "Super Turks";
|
|
m->m_desc = "Add facebook user IDs here so those people can "
|
|
"turk the results. Later we may limit each person to "
|
|
"turking a geographic region.";
|
|
m->m_cgi = "supterturks";
|
|
m->m_xml = "supterturks";
|
|
m->m_def = "";
|
|
m->m_off = (char *)&g_conf.m_superTurks - g;
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_perms = PAGE_MASTER;
|
|
m->m_size = USERS_TEXT_SIZE;
|
|
m->m_plen = (char *)&g_conf.m_superTurksLen - g;
|
|
m->m_page = PAGE_MASTERPASSWORDS;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "Users";
|
|
m->m_desc = "Add users here. The format is "
|
|
"collection:ip:username:password:relogin:pages:tagnames"
|
|
" Username and password cannot be blank."
|
|
" You can specify "
|
|
"* for collection to indicate all collections. "
|
|
" * can be used in IP as wildcard. "
|
|
" * in pages means user has access to all pages. Also"
|
|
" you can specify individual pages. A \'-\' sign at the"
|
|
" start of page means user is not allowed to access that"
|
|
" page. Please refer the page reference table at the bottom "
|
|
"of this page for available pages. If you want to just login "
|
|
" once and avoid relogin for gb shutdowns then set relogin=1,"
|
|
" else set it to 0. If relogin is 1 your login will never expire either."
|
|
"<br>"
|
|
" Ex: 1. master user -> *:*:master:master:1:*:english<br>"
|
|
" 2. public user -> *:*:public:1234:0:index.html"
|
|
",get,search,login,dir:english<br>"
|
|
"3. turk user -> 66.28.58.122:main:turk:1234:0:pageturkhome,"
|
|
"pageturk,pageturkget,get,login:english";
|
|
m->m_cgi = "users";
|
|
m->m_xml = "users";
|
|
m->m_off = (char *)&g_conf.m_users - g;
|
|
m->m_type = TYPE_STRINGBOX;
|
|
m->m_perms = PAGE_MASTER;
|
|
m->m_size = USERS_TEXT_SIZE;
|
|
m->m_plen = (char *)&g_conf.m_usersLen - g;
|
|
m->m_page = PAGE_MASTERPASSWORDS;
|
|
m++;
|
|
*/
|
|
|
|
/*
|
|
m->m_title = "Master IPs";
|
|
m->m_desc = "If someone connects from one of these IPs "
|
|
"then they will have full "
|
|
"master administrator privileges. "
|
|
"If no IPs are specified, then master administrators can "
|
|
"get access for any IP. "
|
|
"Connecting from 127.0.0.1 always grants master privledges. "
|
|
"If no Master Password or Master IP is specified then "
|
|
"Gigablast will assign a default password of footbar23.";
|
|
m->m_cgi = "masterip";
|
|
m->m_xml = "masterIp";
|
|
m->m_max = MAX_MASTER_IPS;
|
|
m->m_off = (char *)g_conf.m_masterIps - g;
|
|
m->m_type = TYPE_IP;
|
|
m++;
|
|
*/
|
|
|
|
|
|
m->m_title = "Collection Passwords";
|
|
m->m_desc = "Whitespace separated list of passwords. "
|
|
"Any matching password will have administrative access "
|
|
"to the controls for just this collection. The master "
|
|
"password and IPs are controlled through the "
|
|
"<i>master passwords</i> link under the ADVANCED controls "
|
|
"tab. The master passwords or IPs have administrative "
|
|
"access to all collections.";
|
|
m->m_cgi = "collpwd";
|
|
m->m_xml = "collectionPasswords";
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_off = (char *)&cr.m_collectionPasswords - x;
|
|
m->m_def = "";
|
|
m->m_type = TYPE_SAFEBUF; // STRINGNONEMPTY;
|
|
m->m_page = PAGE_COLLPASSWORDS;
|
|
m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
|
|
m++;
|
|
|
|
m->m_title = "Collection IPs";
|
|
m->m_desc = "Whitespace separated list of IPs. "
|
|
"Any matching IP will have administrative access "
|
|
"to the controls for just this collection.";
|
|
m->m_cgi = "collips";
|
|
m->m_xml = "collectionIps";
|
|
m->m_obj = OBJ_COLL;
|
|
m->m_off = (char *)&cr.m_collectionIps - x;
|
|
m->m_def = "";
|
|
m->m_type = TYPE_SAFEBUF; // STRINGNONEMPTY;
|
|
m->m_page = PAGE_COLLPASSWORDS;
|
|
m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
|
|
m++;
|
|
|
|
|
|
//////
|
|
// END SECURITY CONTROLS
|
|
//////
|
|
|
|
|
|
///////////////////////////////////////////
|
|
// LOG CONTROLS
|
|
///////////////////////////////////////////
|
|
|
|
m->m_title = "log http requests";
|
|
m->m_desc = "Log GET and POST requests received from the "
|
|
"http server?";
|
|
m->m_cgi = "hr";
|
|
m->m_off = (char *)&g_conf.m_logHttpRequests - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log autobanned queries";
|
|
m->m_desc = "Should we log queries that are autobanned? "
|
|
"They can really fill up the log.";
|
|
m->m_cgi = "laq";
|
|
m->m_off = (char *)&g_conf.m_logAutobannedQueries - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log query time threshold";
|
|
m->m_desc = "If query took this many millliseconds or longer, then log the "
|
|
"query and the time it took to process.";
|
|
m->m_cgi = "lqtt";
|
|
m->m_off = (char *)&g_conf.m_logQueryTimeThreshold- g;
|
|
m->m_type = TYPE_LONG;
|
|
m->m_def = "5000";
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log query reply";
|
|
m->m_desc = "Log query reply in proxy, but only for those queries "
|
|
"above the time threshold above.";
|
|
m->m_cgi = "lqr";
|
|
m->m_off = (char *)&g_conf.m_logQueryReply - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_group = 0;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log spidered urls";
|
|
m->m_desc = "Log status of spidered or injected urls?";
|
|
m->m_cgi = "lsu";
|
|
m->m_off = (char *)&g_conf.m_logSpideredUrls - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log network congestion";
|
|
m->m_desc = "Log messages if Gigablast runs out of udp sockets?";
|
|
m->m_cgi = "lnc";
|
|
m->m_off = (char *)&g_conf.m_logNetCongestion - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log informational messages";
|
|
m->m_desc = "Log messages not related to an error condition, "
|
|
"but meant more to give an idea of the state of "
|
|
"the gigablast process. These can be useful when "
|
|
"diagnosing problems.";
|
|
m->m_cgi = "li";
|
|
m->m_off = (char *)&g_conf.m_logInfo - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "1";
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log limit breeches";
|
|
m->m_desc = "Log it when document not added due to quota "
|
|
"breech. Log it when url is too long and it gets "
|
|
"truncated.";
|
|
m->m_cgi = "ll";
|
|
m->m_off = (char *)&g_conf.m_logLimits - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug admin messages";
|
|
m->m_desc = "Log various debug messages.";
|
|
m->m_cgi = "lda";
|
|
m->m_off = (char *)&g_conf.m_logDebugAdmin - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug build messages";
|
|
m->m_cgi = "ldb";
|
|
m->m_off = (char *)&g_conf.m_logDebugBuild - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug build time messages";
|
|
m->m_cgi = "ldbt";
|
|
m->m_off = (char *)&g_conf.m_logDebugBuildTime - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug database messages";
|
|
m->m_cgi = "ldd";
|
|
m->m_off = (char *)&g_conf.m_logDebugDb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug dirty messages";
|
|
m->m_cgi = "lddm";
|
|
m->m_off = (char *)&g_conf.m_logDebugDirty - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug disk messages";
|
|
m->m_cgi = "lddi";
|
|
m->m_off = (char *)&g_conf.m_logDebugDisk - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug disk page cache";
|
|
m->m_cgi = "ldpc";
|
|
m->m_off = (char *)&g_conf.m_logDebugDiskPageCache - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug dns messages";
|
|
m->m_cgi = "lddns";
|
|
m->m_off = (char *)&g_conf.m_logDebugDns - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug http messages";
|
|
m->m_cgi = "ldh";
|
|
m->m_off = (char *)&g_conf.m_logDebugHttp - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug image messages";
|
|
m->m_cgi = "ldi";
|
|
m->m_off = (char *)&g_conf.m_logDebugImage - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug loop messages";
|
|
m->m_cgi = "ldl";
|
|
m->m_off = (char *)&g_conf.m_logDebugLoop - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug language detection messages";
|
|
m->m_cgi = "ldg";
|
|
m->m_off = (char *)&g_conf.m_logDebugLang - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug link info";
|
|
m->m_cgi = "ldli";
|
|
m->m_off = (char *)&g_conf.m_logDebugLinkInfo - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug mem messages";
|
|
m->m_cgi = "ldm";
|
|
m->m_off = (char *)&g_conf.m_logDebugMem - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug mem usage messages";
|
|
m->m_cgi = "ldmu";
|
|
m->m_off = (char *)&g_conf.m_logDebugMemUsage - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug net messages";
|
|
m->m_cgi = "ldn";
|
|
m->m_off = (char *)&g_conf.m_logDebugNet - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug post query rerank messages";
|
|
m->m_cgi = "ldpqr";
|
|
m->m_off = (char *)&g_conf.m_logDebugPQR - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug query messages";
|
|
m->m_cgi = "ldq";
|
|
m->m_off = (char *)&g_conf.m_logDebugQuery - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug quota messages";
|
|
m->m_cgi = "ldqta";
|
|
m->m_off = (char *)&g_conf.m_logDebugQuota - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug robots messages";
|
|
m->m_cgi = "ldr";
|
|
m->m_off = (char *)&g_conf.m_logDebugRobots - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug spider cache messages";
|
|
m->m_cgi = "lds";
|
|
m->m_off = (char *)&g_conf.m_logDebugSpcache - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/*
|
|
m->m_title = "log debug spider wait messages";
|
|
m->m_cgi = "ldspw";
|
|
m->m_off = (char *)&g_conf.m_logDebugSpiderWait - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m++;
|
|
*/
|
|
|
|
m->m_title = "log debug speller messages";
|
|
m->m_cgi = "ldsp";
|
|
m->m_off = (char *)&g_conf.m_logDebugSpeller - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug sections messages";
|
|
m->m_cgi = "ldscc";
|
|
m->m_off = (char *)&g_conf.m_logDebugSections - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug seo insert messages";
|
|
m->m_cgi = "ldsi";
|
|
m->m_off = (char *)&g_conf.m_logDebugSEOInserts - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug seo messages";
|
|
m->m_cgi = "ldseo";
|
|
m->m_off = (char *)&g_conf.m_logDebugSEO - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug stats messages";
|
|
m->m_cgi = "ldst";
|
|
m->m_off = (char *)&g_conf.m_logDebugStats - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug summary messages";
|
|
m->m_cgi = "ldsu";
|
|
m->m_off = (char *)&g_conf.m_logDebugSummary - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug spider messages";
|
|
m->m_cgi = "ldspid";
|
|
m->m_off = (char *)&g_conf.m_logDebugSpider - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug msg13 messages";
|
|
m->m_cgi = "ldspmth";
|
|
m->m_off = (char *)&g_conf.m_logDebugMsg13 - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "disable host0 for msg13 reception hack";
|
|
m->m_cgi = "dmth";
|
|
m->m_off = (char *)&g_conf.m_diffbotMsg13Hack - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug spider proxies";
|
|
m->m_cgi = "ldspr";
|
|
m->m_off = (char *)&g_conf.m_logDebugProxies - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug url attempts";
|
|
m->m_cgi = "ldspua";
|
|
m->m_off = (char *)&g_conf.m_logDebugUrlAttempts - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug spider downloads";
|
|
m->m_cgi = "ldsd";
|
|
m->m_off = (char *)&g_conf.m_logDebugDownloads - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug facebook";
|
|
m->m_cgi = "ldfb";
|
|
m->m_off = (char *)&g_conf.m_logDebugFacebook - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug tagdb messages";
|
|
m->m_cgi = "ldtm";
|
|
m->m_off = (char *)&g_conf.m_logDebugTagdb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug tcp messages";
|
|
m->m_cgi = "ldt";
|
|
m->m_off = (char *)&g_conf.m_logDebugTcp - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug tcp buffer messages";
|
|
m->m_cgi = "ldtb";
|
|
m->m_off = (char *)&g_conf.m_logDebugTcpBuf - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug thread messages";
|
|
m->m_cgi = "ldth";
|
|
m->m_off = (char *)&g_conf.m_logDebugThread - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug title messages";
|
|
m->m_cgi = "ldti";
|
|
m->m_off = (char *)&g_conf.m_logDebugTitle - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug timedb messages";
|
|
m->m_cgi = "ldtim";
|
|
m->m_off = (char *)&g_conf.m_logDebugTimedb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug topic messages";
|
|
m->m_cgi = "ldto";
|
|
m->m_off = (char *)&g_conf.m_logDebugTopics - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug topDoc messages";
|
|
m->m_cgi = "ldtopd";
|
|
m->m_off = (char *)&g_conf.m_logDebugTopDocs - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug udp messages";
|
|
m->m_cgi = "ldu";
|
|
m->m_off = (char *)&g_conf.m_logDebugUdp - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug unicode messages";
|
|
m->m_cgi = "ldun";
|
|
m->m_off = (char *)&g_conf.m_logDebugUnicode - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug repair messages";
|
|
m->m_cgi = "ldre";
|
|
m->m_off = (char *)&g_conf.m_logDebugRepair - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log debug pub date extraction messages";
|
|
m->m_cgi = "ldpd";
|
|
m->m_off = (char *)&g_conf.m_logDebugDate - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log timing messages for build";
|
|
m->m_desc = "Log various timing related messages.";
|
|
m->m_cgi = "ltb";
|
|
m->m_off = (char *)&g_conf.m_logTimingBuild - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log timing messages for admin";
|
|
m->m_desc = "Log various timing related messages.";
|
|
m->m_cgi = "ltadm";
|
|
m->m_off = (char *)&g_conf.m_logTimingAdmin - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log timing messages for database";
|
|
m->m_cgi = "ltd";
|
|
m->m_off = (char *)&g_conf.m_logTimingDb - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log timing messages for network layer";
|
|
m->m_cgi = "ltn";
|
|
m->m_off = (char *)&g_conf.m_logTimingNet - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log timing messages for query";
|
|
m->m_cgi = "ltq";
|
|
m->m_off = (char *)&g_conf.m_logTimingQuery - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log timing messages for spcache";
|
|
m->m_desc = "Log various timing related messages.";
|
|
m->m_cgi = "ltspc";
|
|
m->m_off = (char *)&g_conf.m_logTimingSpcache - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log timing messages for related topics";
|
|
m->m_cgi = "ltt";
|
|
m->m_off = (char *)&g_conf.m_logTimingTopics - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
m->m_title = "log reminder messages";
|
|
m->m_desc = "Log reminders to the programmer. You do not need this.";
|
|
m->m_cgi = "lr";
|
|
m->m_off = (char *)&g_conf.m_logReminders - g;
|
|
m->m_type = TYPE_BOOL;
|
|
m->m_def = "0";
|
|
m->m_priv = 1;
|
|
m->m_page = PAGE_LOG;
|
|
m->m_obj = OBJ_CONF;
|
|
m++;
|
|
|
|
/////
|
|
// END PAGE LOG CONTROLS
|
|
/////
|
|
|
|
|
|
// END PARMS PARM END PARMS END
|
|
|
|
|
|
m_numParms = m - m_parms;
|
|
|
|
// sanity check
|
|
if ( m_numParms >= MAX_PARMS ) {
|
|
log("admin: Boost MAX_PARMS.");
|
|
exit(-1);
|
|
}
|
|
|
|
// make xml tag names and store in here
|
|
static char s_tbuf [ 18000 ];
|
|
char *p = s_tbuf;
|
|
char *pend = s_tbuf + 18000;
|
|
int32_t size;
|
|
char t;
|
|
|
|
// . set hashes of title
|
|
// . used by Statsdb.cpp for identifying a parm
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
if ( ! m_parms[i].m_title ) continue;
|
|
m_parms[i].m_hash = hash32n ( m_parms[i].m_title );
|
|
}
|
|
|
|
// cgi hashes
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
if ( ! m_parms[i].m_cgi ) continue;
|
|
m_parms[i].m_cgiHash = hash32n ( m_parms[i].m_cgi );
|
|
}
|
|
|
|
// sanity check: ensure all cgi parms are different
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
for ( int32_t j = 0 ; j < m_numParms ; j++ ) {
|
|
if ( j == i ) continue;
|
|
if ( m_parms[i].m_type == TYPE_BOOL2 ) continue;
|
|
if ( m_parms[j].m_type == TYPE_BOOL2 ) continue;
|
|
if ( m_parms[i].m_type == TYPE_CMD ) continue;
|
|
if ( m_parms[j].m_type == TYPE_CMD ) continue;
|
|
if ( m_parms[i].m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
if ( m_parms[j].m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
if ( m_parms[i].m_obj == OBJ_NONE ) continue;
|
|
if ( m_parms[j].m_obj == OBJ_NONE ) continue;
|
|
if ( m_parms[i].m_flags & PF_DUP ) continue;
|
|
if ( m_parms[j].m_flags & PF_DUP ) continue;
|
|
// hack to allow "c" for search, inject, addurls
|
|
if ( m_parms[j].m_page != m_parms[i].m_page &&
|
|
m_parms[i].m_obj != OBJ_COLL &&
|
|
m_parms[i].m_obj != OBJ_CONF )
|
|
continue;
|
|
if ( ! m_parms[i].m_cgi ) continue;
|
|
if ( ! m_parms[j].m_cgi ) continue;
|
|
// gotta be on same page now i guess
|
|
int32_t obj1 = m_parms[i].m_obj;
|
|
int32_t obj2 = m_parms[j].m_obj;
|
|
if ( obj1 != OBJ_COLL && obj1 != OBJ_CONF ) continue;
|
|
if ( obj2 != OBJ_COLL && obj2 != OBJ_CONF ) continue;
|
|
//if ( m_parms[i].m_page != m_parms[j].m_page ) continue;
|
|
// a different m_scmd means a different cgi parm really...
|
|
//if ( m_parms[i].m_sparm && m_parms[j].m_sparm &&
|
|
// strcmp ( m_parms[i].m_scmd, m_parms[j].m_scmd) != 0 )
|
|
// continue;
|
|
if ( strcmp ( m_parms[i].m_cgi , m_parms[j].m_cgi ) != 0 &&
|
|
// ensure cgi hashes are different as well!
|
|
m_parms[i].m_cgiHash != m_parms[j].m_cgiHash )
|
|
continue;
|
|
// upload file buttons are always dup of another parm
|
|
if ( m_parms[j].m_type == TYPE_FILEUPLOADBUTTON )
|
|
continue;
|
|
log(LOG_LOGIC,"conf: Cgi parm for #%" INT32 " \"%s\" "
|
|
"matches #%" INT32 " \"%s\". Exiting.",
|
|
i,m_parms[i].m_cgi,j,m_parms[j].m_cgi);
|
|
exit(-1);
|
|
}
|
|
}
|
|
|
|
int32_t mm = (int32_t)sizeof(CollectionRec);
|
|
if ( (int32_t)sizeof(Conf) > mm ) mm = (int32_t)sizeof(Conf);
|
|
if ( (int32_t)sizeof(SearchInput) > mm ) mm = (int32_t)sizeof(SearchInput);
|
|
// . set size of each parm based on its type
|
|
// . also do page and obj inheritance
|
|
// . also do sanity checking
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
// sanity check
|
|
if ( m_parms[i].m_off > mm ||
|
|
//m_parms[i].m_soff > mm ||
|
|
m_parms[i].m_smaxc > mm ) {
|
|
log(LOG_LOGIC,"conf: Bad offset in parm #%" INT32 " %s."
|
|
" (%" INT32 ",%" INT32 ",%" INT32 "). Did you FORGET to include "
|
|
"an & before the cr.myVariable when setting "
|
|
"m_off for this parm? Or subtract 'x' instead "
|
|
"of 'g' or vice versa.",
|
|
i,m_parms[i].m_title,
|
|
mm,
|
|
m_parms[i].m_off,
|
|
//m_parms[i].m_soff,
|
|
m_parms[i].m_smaxc);
|
|
exit(-1);
|
|
}
|
|
// do not allow numbers in cgi parms, they are used for
|
|
// denoting array indices
|
|
int32_t j = 0;
|
|
for ( ; m_parms[i].m_cgi && m_parms[i].m_cgi[j] ; j++ ) {
|
|
if ( is_digit ( m_parms[i].m_cgi[j] ) ) {
|
|
log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has "
|
|
"number in cgi name.",
|
|
i,m_parms[i].m_title);
|
|
exit(-1);
|
|
}
|
|
}
|
|
// these inheriting cause too many problems when moving
|
|
// parms around in the array
|
|
// inherit page
|
|
//if ( i > 0 && m_parms[i].m_page == -1 )
|
|
// m_parms[i].m_page = m_parms[i-1].m_page;
|
|
// inherit obj
|
|
//if ( i > 0 && m_parms[i].m_obj == -1 )
|
|
// m_parms[i].m_obj = m_parms[i-1].m_obj;
|
|
// sanity now
|
|
if ( m_parms[i].m_page == -1 ) {
|
|
log("parms: bad page \"%s\"",m_parms[i].m_title);
|
|
char *xx=NULL;*xx=0; }
|
|
if ( m_parms[i].m_obj == -1 ) {
|
|
log("parms: bad obj \"%s\"",m_parms[i].m_title);
|
|
char *xx=NULL;*xx=0; }
|
|
|
|
// if its a fixed size then make sure m_size is not set
|
|
if ( m_parms[i].m_fixed > 0 ) {
|
|
if ( m_parms[i].m_size != 0 ) {
|
|
log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" is "
|
|
"fixed but size is not 0.",
|
|
i,m_parms[i].m_title);
|
|
exit(-1);
|
|
}
|
|
}
|
|
// string sizes should already be set!
|
|
size = 0;
|
|
t = m_parms[i].m_type;
|
|
if ( t == -1 ) {
|
|
log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no type.",
|
|
i,m_parms[i].m_title);
|
|
exit(-1);
|
|
}
|
|
// skip if already set
|
|
if ( m_parms[i].m_size ) goto skipSize;
|
|
if ( t == TYPE_CHAR ) size = 1;
|
|
if ( t == TYPE_CHAR2 ) size = 1;
|
|
if ( t == TYPE_BOOL ) size = 1;
|
|
if ( t == TYPE_BOOL2 ) size = 1;
|
|
if ( t == TYPE_CHECKBOX ) size = 1;
|
|
if ( t == TYPE_PRIORITY ) size = 1;
|
|
if ( t == TYPE_PRIORITY2 ) size = 1;
|
|
//if ( t ==TYPE_DIFFBOT_DROPDOWN) size = 1;
|
|
if ( t == TYPE_UFP ) size = 1;
|
|
if ( t == TYPE_PRIORITY_BOXES ) size = 1;
|
|
if ( t == TYPE_RETRIES ) size = 1;
|
|
if ( t == TYPE_TIME ) size = 6;
|
|
if ( t == TYPE_DATE2 ) size = 4;
|
|
if ( t == TYPE_DATE ) size = 4;
|
|
if ( t == TYPE_FLOAT ) size = 4;
|
|
if ( t == TYPE_DOUBLE ) size = 8;
|
|
if ( t == TYPE_IP ) size = 4;
|
|
if ( t == TYPE_RULESET ) size = 4;
|
|
if ( t == TYPE_LONG ) size = 4;
|
|
if ( t == TYPE_LONG_CONST ) size = 4;
|
|
if ( t == TYPE_LONG_LONG ) size = 8;
|
|
if ( t == TYPE_STRING ) size = m_parms[i].m_size;
|
|
if ( t == TYPE_STRINGBOX ) size = m_parms[i].m_size;
|
|
if ( t == TYPE_STRINGNONEMPTY ) size = m_parms[i].m_size;
|
|
if ( t == TYPE_SITERULE ) size = 4;
|
|
|
|
// comments and commands do not control underlying variables
|
|
if ( size == 0 && t != TYPE_COMMENT && t != TYPE_CMD &&
|
|
t != TYPE_SAFEBUF &&
|
|
t != TYPE_FILEUPLOADBUTTON &&
|
|
t != TYPE_CONSTANT &&
|
|
t != TYPE_CHARPTR &&
|
|
t != TYPE_MONOD2 &&
|
|
t != TYPE_MONOM2 ) {
|
|
log(LOG_LOGIC,"conf: Size of parm #%" INT32 " \"%s\" "
|
|
"not set.", i,m_parms[i].m_title);
|
|
exit(-1);
|
|
}
|
|
m_parms[i].m_size = size;
|
|
skipSize:
|
|
// check offset
|
|
if ( m_parms[i].m_obj == OBJ_NONE ) continue;
|
|
if ( t == TYPE_COMMENT ) continue;
|
|
if ( t == TYPE_FILEUPLOADBUTTON ) continue;
|
|
if ( t == TYPE_CMD ) continue;
|
|
if ( t == TYPE_CONSTANT ) continue;
|
|
if ( t == TYPE_MONOD2 ) continue;
|
|
if ( t == TYPE_MONOM2 ) continue;
|
|
if ( t == TYPE_SAFEBUF ) continue;
|
|
// search parms do not need an offset
|
|
if ( m_parms[i].m_off == -1 ){//&& m_parms[i].m_sparm == 0 ) {
|
|
log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no offset.",
|
|
i,m_parms[i].m_title);
|
|
exit(-1);
|
|
}
|
|
if ( m_parms[i].m_off < -1 ) {
|
|
log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has bad offset "
|
|
"of %" INT32 ".", i,m_parms[i].m_title,m_parms[i].m_off);
|
|
exit(-1);
|
|
}
|
|
if ( m->m_obj == OBJ_CONF && m->m_off >= (int32_t)sizeof(Conf) ) {
|
|
log("admin: Parm %s has bad m_off value.",m->m_title);
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
if (m->m_obj==OBJ_COLL&&m->m_off>=(int32_t)sizeof(CollectionRec)){
|
|
log("admin: Parm %s has bad m_off value.",m->m_title);
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
if ( m->m_off >= 0 &&
|
|
m->m_obj == OBJ_SI &&
|
|
m->m_off >= (int32_t)sizeof(SearchInput)){
|
|
log("admin: Parm %s has bad m_off value.",m->m_title);
|
|
char *xx = NULL; *xx = 0;
|
|
}
|
|
|
|
if ( m_parms[i].m_page == -1 ) {
|
|
log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no page.",
|
|
i,m_parms[i].m_title);
|
|
exit(-1);
|
|
}
|
|
if ( m_parms[i].m_obj == -1 ) {
|
|
log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no object.",
|
|
i,m_parms[i].m_title);
|
|
exit(-1);
|
|
}
|
|
//if ( ! m_parms[i].m_title[0] ) {
|
|
// log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no title.",
|
|
// i,m_parms[i].m_cgi);
|
|
// exit(-1);
|
|
//}
|
|
// continue if already have the xml name
|
|
if ( m_parms[i].m_xml ) continue;
|
|
// set xml based on title
|
|
char *tt = m_parms[i].m_title;
|
|
if ( p + gbstrlen(tt) >= pend ) {
|
|
log(LOG_LOGIC,"conf: Not enough room to store xml "
|
|
"tag name in buffer.");
|
|
exit(-1);
|
|
}
|
|
m_parms[i].m_xml = p;
|
|
for ( int32_t k = 0 ; tt[k] ; k++ ) {
|
|
if ( ! is_alnum_a(tt[k]) ) continue;
|
|
if ( k > 0 && tt[k-1]==' ') *p++ = to_upper_a(tt[k]);
|
|
else *p++ = tt[k];
|
|
}
|
|
*p++ = '\0';
|
|
}
|
|
|
|
// set m_searchParms
|
|
int32_t n = 0;
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
//if ( ! m_parms[i].m_sparm ) continue;
|
|
if ( m_parms[i].m_obj != OBJ_SI ) continue;
|
|
m_searchParms[n++] = &m_parms[i];
|
|
// sanity check
|
|
if ( m_parms[i].m_off == -1 ) {
|
|
log(LOG_LOGIC,"conf: SEARCH Parm #%" INT32 " \"%s\" has "
|
|
"m_off < 0 (offset into SearchInput).",
|
|
i,m_parms[i].m_title);
|
|
exit(-1);
|
|
}
|
|
}
|
|
m_numSearchParms = n;
|
|
|
|
// . sanity check
|
|
// . we should have it all covered!
|
|
si.test();
|
|
|
|
//
|
|
// parm overlap detector
|
|
//
|
|
// . fill in each parm's buffer with byte #b
|
|
// . inc b for each parm
|
|
#ifndef _VALGRIND_
|
|
overlapTest(+1);
|
|
overlapTest(-1);
|
|
#endif
|
|
}
|
|
|
|
void Parms::overlapTest ( char step ) {
|
|
|
|
int32_t start = 0;
|
|
if ( step == -1 ) start = m_numParms - 1;
|
|
|
|
//log("conf: Using step=%" INT32 "",(int32_t)step);
|
|
|
|
SearchInput tmpsi;
|
|
GigablastRequest tmpgr;
|
|
InjectionRequest tmpir;
|
|
CollectionRec tmpcr;
|
|
Conf tmpconf;
|
|
char b;
|
|
char *p1 , *p2;
|
|
int32_t i;
|
|
// sanity check: ensure parms do not overlap
|
|
for ( i = start ; i < m_numParms && i >= 0 ; i += step ) {
|
|
|
|
// skip comments
|
|
if ( m_parms[i].m_type == TYPE_COMMENT ) continue;
|
|
if ( m_parms[i].m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
// skip if it is a broadcast switch, like "all spiders on"
|
|
// because that modifies another parm, "spidering enabled"
|
|
if ( m_parms[i].m_type == TYPE_BOOL2 ) continue;
|
|
|
|
if ( m_parms[i].m_type == TYPE_SAFEBUF ) continue;
|
|
|
|
// we use cr->m_spideringEnabled for PAGE_BASIC_SETTINGS too!
|
|
if ( m_parms[i].m_flags & PF_DUP ) continue;
|
|
|
|
p1 = NULL;
|
|
if ( m_parms[i].m_obj == OBJ_COLL ) p1 = (char *)&tmpcr;
|
|
if ( m_parms[i].m_obj == OBJ_CONF ) p1 = (char *)&tmpconf;
|
|
if ( m_parms[i].m_obj == OBJ_SI ) p1 = (char *)&tmpsi;
|
|
if ( m_parms[i].m_obj == OBJ_GBREQUEST ) p1 = (char *)&tmpgr;
|
|
if ( m_parms[i].m_obj == OBJ_IR ) p1 = (char *)&tmpir;
|
|
if ( p1 ) p1 += m_parms[i].m_off;
|
|
p2 = NULL;
|
|
int32_t size = m_parms[i].m_size;
|
|
// use i now
|
|
b = (char)i;
|
|
// string box type is a pointer!!
|
|
if ( p1 ) memset ( p1 , b , size );
|
|
//log("conf: setting %" INT32 " bytes for %s at 0x%" XINT32 " char=0x%hhx",
|
|
// size,m_parms[i].m_title,(int32_t)p1,b);
|
|
// search input uses character ptrs!!
|
|
if ( m_parms[i].m_type == TYPE_STRINGBOX ) size = 4;
|
|
if ( m_parms[i].m_type == TYPE_STRING ) size = 4;
|
|
if ( m_parms[i].m_fixed > 0 ) size *= m_parms[i].m_fixed ;
|
|
if ( p2 ) memset ( p2 , b , size );
|
|
//log("conf: setting %" INT32 " bytes for %s at 0x%" XINT32 " char=0x%hhx "
|
|
// "i=%" INT32 "", size,m_parms[i].m_title,(int32_t)p2,b,i);
|
|
}
|
|
|
|
//
|
|
// now make sure they are the same
|
|
//
|
|
if ( step == -1 ) b--;
|
|
else b = 0;
|
|
char *objStr = "none";
|
|
int32_t obj;
|
|
char infringerB;
|
|
int32_t j;
|
|
int32_t savedi = -1;
|
|
|
|
for ( i = 0 ; i < m_numParms ; i++ ) {
|
|
|
|
// skip comments
|
|
if ( m_parms[i].m_type == TYPE_COMMENT ) continue;
|
|
if ( m_parms[i].m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
// skip if it is a broadcast switch, like "all spiders on"
|
|
// because that modifies another parm, "spidering enabled"
|
|
if ( m_parms[i].m_type == TYPE_BOOL2 ) continue;
|
|
|
|
if ( m_parms[i].m_type == TYPE_SAFEBUF ) continue;
|
|
|
|
// we use cr->m_spideringEnabled for PAGE_BASIC_SETTINGS too!
|
|
if ( m_parms[i].m_flags & PF_DUP ) continue;
|
|
|
|
p1 = NULL;
|
|
if ( m_parms[i].m_obj == OBJ_COLL ) p1 = (char *)&tmpcr;
|
|
if ( m_parms[i].m_obj == OBJ_CONF ) p1 = (char *)&tmpconf;
|
|
if ( m_parms[i].m_obj == OBJ_SI ) p1 = (char *)&tmpsi;
|
|
if ( m_parms[i].m_obj == OBJ_GBREQUEST ) p1 = (char *)&tmpgr;
|
|
if ( m_parms[i].m_obj == OBJ_IR ) p1 = (char *)&tmpir;
|
|
if ( p1 ) p1 += m_parms[i].m_off;
|
|
p2 = NULL;
|
|
int32_t size = m_parms[i].m_size;
|
|
b = (char) i;
|
|
// save it
|
|
obj = m_parms[i].m_obj;
|
|
|
|
//log("conf: testing %" INT32 " bytes for %s at 0x%" XINT32 " char=0x%hhx "
|
|
// "i=%" INT32 "", size,m_parms[i].m_title,(int32_t)p1,b,i);
|
|
|
|
for ( j = 0 ; p1 && j < size ; j++ ) {
|
|
if ( p1[j] == b ) continue;
|
|
// this has multiple parms pointing to it!
|
|
//if ( m_parms[i].m_type == TYPE_BOOL2 ) continue;
|
|
// or special cases...
|
|
//if ( p1 == (char *)&tmpconf.m_spideringEnabled )
|
|
// continue;
|
|
// set object type
|
|
objStr = "??????";
|
|
if ( m_parms[i].m_obj == OBJ_COLL )
|
|
objStr = "CollectionRec.h";
|
|
if ( m_parms[i].m_obj == OBJ_CONF )
|
|
objStr = "Conf.h";
|
|
if ( m_parms[i].m_obj == OBJ_SI )
|
|
objStr = "SearchInput.h";
|
|
if ( m_parms[i].m_obj == OBJ_GBREQUEST )
|
|
objStr = "GigablastRequest/Parms.h";
|
|
if ( m_parms[i].m_obj == OBJ_IR )
|
|
objStr = "InjectionRequest/PageInject.h";
|
|
// save it
|
|
infringerB = p1[j];
|
|
savedi = i;
|
|
goto error;
|
|
}
|
|
// search input uses character ptrs!!
|
|
if ( m_parms[i].m_type == TYPE_STRINGBOX ) size = 4;
|
|
if ( m_parms[i].m_type == TYPE_STRING ) size = 4;
|
|
if ( m_parms[i].m_fixed > 0 ) size *= m_parms[i].m_fixed ;
|
|
objStr = "SearchInput.h";
|
|
|
|
//log("conf: testing %" INT32 " bytes for %s at 0x%" XINT32 " char=0x%hhx "
|
|
// "i=%" INT32 "", size,m_parms[i].m_title,(int32_t)p2,b,i);
|
|
|
|
for ( j = 0 ; p2 && j < size ; j++ ) {
|
|
if ( p2[j] == b ) continue;
|
|
// save it
|
|
infringerB = p2[j];
|
|
savedi = i;
|
|
log("conf: got b=0x%hhx when it should have been "
|
|
"b=0x%hhx",p2[j],b);
|
|
goto error;
|
|
}
|
|
}
|
|
|
|
return;
|
|
|
|
error:
|
|
log("conf: Had a parm value collision. Parm #%" INT32 " "
|
|
"\"%s\" (size=%" INT32 ") in %s has overlapped with another parm. "
|
|
"Your TYPE_* for this parm or a neighbor of it "
|
|
"does not agree with what you have declared it as in the *.h "
|
|
"file.",i,m_parms[i].m_title,m_parms[i].m_size,objStr);
|
|
if ( step == -1 ) b--;
|
|
else b = 0;
|
|
// show possible parms that could have overwritten it!
|
|
for ( i = start ; i < m_numParms && i >= 0 ; i += step ) {
|
|
//char *p1 = NULL;
|
|
//if ( m_parms[i].m_obj == OBJ_COLL ) p1 = (char *)&tmpcr;
|
|
//if ( m_parms[i].m_obj == OBJ_CONF ) p1 = (char *)&tmpconf;
|
|
// skip if comment
|
|
if ( m_parms[i].m_type == TYPE_COMMENT ) continue;
|
|
if ( m_parms[i].m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
if ( m_parms[i].m_flags & PF_DUP ) continue;
|
|
if ( m_parms[i].m_obj != m_parms[savedi].m_obj ) continue;
|
|
// skip if no match
|
|
//bool match = false;
|
|
//if ( m_parms[i].m_obj == obj ) match = true;
|
|
//if ( m_parms[i].m_sparm &&
|
|
// NOTE: these need to be fixed!!!
|
|
b = (char) i;
|
|
if ( b == infringerB )
|
|
log("conf: possible overlap with parm #%" INT32 " in %s "
|
|
"\"%s\" (size=%" INT32 ") "
|
|
"xml=%s "
|
|
"desc=\"%s\"",
|
|
i,objStr,m_parms[i].m_title,
|
|
m_parms[i].m_size,
|
|
m_parms[i].m_xml,
|
|
m_parms[i].m_desc);
|
|
}
|
|
|
|
log("conf: try including \"m->m_obj = OBJ_COLL;\" or "
|
|
"\"m->m_obj = OBJ_CONF;\" in your parm definitions");
|
|
log("conf: failed overlap test. exiting.");
|
|
exit(-1);
|
|
|
|
}
|
|
|
|
|
|
bool Parm::getValueAsBool ( SearchInput *si ) {
|
|
if ( m_obj != OBJ_SI ) { char *xx=NULL;*xx=0; }
|
|
char *p = (char *)si + m_off;
|
|
return *(bool *)p;
|
|
}
|
|
|
|
int32_t Parm::getValueAsLong ( SearchInput *si ) {
|
|
if ( m_obj != OBJ_SI ) { char *xx=NULL;*xx=0; }
|
|
char *p = (char *)si + m_off;
|
|
return *(int32_t *)p;
|
|
}
|
|
|
|
char *Parm::getValueAsString ( SearchInput *si ) {
|
|
if ( m_obj != OBJ_SI ) { char *xx=NULL;*xx=0; }
|
|
char *p = (char *)si + m_off;
|
|
return *(char **)p;
|
|
}
|
|
|
|
/////////
|
|
//
|
|
// new functions
|
|
//
|
|
/////////
|
|
|
|
bool Parms::addNewParmToList1 ( SafeBuf *parmList ,
|
|
collnum_t collnum ,
|
|
char *parmValString ,
|
|
int32_t occNum ,
|
|
char *parmName ) {
|
|
// get the parm descriptor
|
|
Parm *m = getParmFast1 ( parmName , NULL );
|
|
if ( ! m ) return log("parms: got bogus parm2 %s",parmName );
|
|
return addNewParmToList2 ( parmList,collnum,parmValString,occNum,m );
|
|
}
|
|
|
|
// . make a parm rec using the prodivded string
|
|
// . used to convert http requests into a parmlist
|
|
// . string could be a float or int32_t or int64_t in ascii, as well as a string
|
|
// . returns false w/ g_errno set on error
|
|
bool Parms::addNewParmToList2 ( SafeBuf *parmList ,
|
|
collnum_t collnum ,
|
|
char *parmValString ,
|
|
int32_t occNum ,
|
|
Parm *m ) {
|
|
// get value
|
|
char *val = NULL;
|
|
int32_t valSize = 0;
|
|
|
|
//char buf[2+MAX_COLL_LEN];
|
|
|
|
int32_t val32;
|
|
int64_t val64;
|
|
char val8;
|
|
float valf;
|
|
|
|
/*
|
|
char *obj = NULL;
|
|
// we might be adding a collnum if a collection that is being
|
|
// added via the CommandAddColl0() "addColl" or "addCrawl" or
|
|
// "addBulk" commands. they will reserve the collnum, so it might
|
|
// not be ready yet.
|
|
if ( collnum != -1 ) {
|
|
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
if ( cr ) obj = (char *)cr;
|
|
// log("parms: no coll rec for %" INT32 "",(int32_t)collnum);
|
|
// return false;
|
|
//}
|
|
//obj = (char *)cr;
|
|
}
|
|
else {
|
|
obj = (char *)&g_conf;
|
|
}
|
|
*/
|
|
|
|
|
|
if ( m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_SAFEBUF ||
|
|
m->m_type == TYPE_STRINGNONEMPTY ) {
|
|
// point to string
|
|
//val = obj + m->m_off;
|
|
// Parm::m_size is the max string size
|
|
//if ( occNum > 0 ) val += occNum * m->m_size;
|
|
// stringlength + 1. no just make it the whole string in
|
|
// case it does not use the \0 protocol
|
|
//valSize = m->m_max;
|
|
val = parmValString;
|
|
// include \0
|
|
valSize = gbstrlen(val)+1;
|
|
// sanity
|
|
if ( val[valSize-1] != '\0' ) { char *xx=NULL;*xx=0; }
|
|
}
|
|
else if ( m->m_type == TYPE_LONG ) {
|
|
// watch out for unsigned 32-bit numbers, so use atoLL()
|
|
val64 = atoll(parmValString);
|
|
val = (char *)&val64;
|
|
valSize = 4;
|
|
}
|
|
else if ( m->m_type == TYPE_FLOAT ) {
|
|
valf = atof(parmValString);
|
|
val = (char *)&valf;
|
|
valSize = 4;
|
|
}
|
|
else if ( m->m_type == TYPE_LONG_LONG ) {
|
|
val64 = atoll(parmValString);
|
|
val = (char *)&val64;
|
|
valSize = 8;
|
|
}
|
|
else if ( m->m_type == TYPE_BOOL ||
|
|
m->m_type == TYPE_BOOL2 ||
|
|
m->m_type == TYPE_CHECKBOX ||
|
|
m->m_type == TYPE_PRIORITY2 ||
|
|
m->m_type == TYPE_UFP ||
|
|
m->m_type == TYPE_CHAR ) {
|
|
val8 = atol(parmValString);
|
|
//if ( parmValString && to_lower_a(parmValString[0]) == 'y' )
|
|
// val8 = 1;
|
|
//if ( parmValString && to_lower_a(parmValString[0]) == 'n' )
|
|
// val8 = 0;
|
|
val = (char *)&val8;
|
|
valSize = 1;
|
|
}
|
|
// for resetting or restarting a coll i think the ascii arg is
|
|
// the NEW reserved collnum, but for other commands then parmValString
|
|
// will be NULL
|
|
else if ( m->m_type == TYPE_CMD ) {
|
|
val = parmValString;
|
|
if ( val ) valSize = gbstrlen(val)+1;
|
|
// . addcoll collection can not be too long
|
|
// . TODO: supply a Parm::m_checkValFunc to ensure val is
|
|
// legitimate, and set g_errno on error
|
|
if ( strcmp(m->m_cgi,"addcoll") == 0 &&valSize-1>MAX_COLL_LEN){
|
|
log("admin: addcoll coll too long");
|
|
g_errno = ECOLLTOOBIG;
|
|
return false;
|
|
}
|
|
// scan for holes if we hit the limit
|
|
//if ( g_collectiondb.m_numRecs >= 1LL>>sizeof(collnum_t) )
|
|
}
|
|
else if ( m->m_type == TYPE_IP ) {
|
|
// point to string
|
|
//val = obj + m->m_off;
|
|
// Parm::m_size is the max string size
|
|
//if ( occNum > 0 ) val += occNum * m->m_size;
|
|
// stringlength + 1. no just make it the whole string in
|
|
// case it does not use the \0 protocol
|
|
val32 = atoip(parmValString);
|
|
// store ip in binary format
|
|
val = (char *)&val32;
|
|
valSize = 4;
|
|
}
|
|
else {
|
|
log("parms: shit unsupported parm type");
|
|
char *xx=NULL;*xx=0;
|
|
}
|
|
|
|
key96_t key = makeParmKey ( collnum , m , occNum );
|
|
|
|
// then key
|
|
if ( ! parmList->safeMemcpy ( &key , sizeof(key) ) )
|
|
return false;
|
|
|
|
// datasize
|
|
if ( ! parmList->pushLong ( valSize ) )
|
|
return false;
|
|
|
|
// and data
|
|
if ( val && valSize && ! parmList->safeMemcpy ( val , valSize ) )
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// g_parms.addCurrentParmToList1 ( &parmList , cr , "spiderRoundNum" );
|
|
bool Parms::addCurrentParmToList1 ( SafeBuf *parmList ,
|
|
CollectionRec *cr ,
|
|
char *parmName ) {
|
|
collnum_t collnum = -1;
|
|
if ( cr ) collnum = cr->m_collnum;
|
|
// get the parm descriptor
|
|
int32_t occNum;
|
|
Parm *m = getParmFast1 ( parmName , &occNum );
|
|
if ( ! m ) return log("parms: got bogus parm1 %s",parmName );
|
|
return addCurrentParmToList2 ( parmList , collnum, -1 , m );
|
|
}
|
|
|
|
// . use the current value of the parm to make this record
|
|
// . parm class itself already helps us reference the binary parm value
|
|
bool Parms::addCurrentParmToList2 ( SafeBuf *parmList ,
|
|
collnum_t collnum ,
|
|
int32_t occNum ,
|
|
Parm *m ) {
|
|
|
|
char *obj = NULL;
|
|
|
|
if ( collnum != -1 ) {
|
|
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
if ( ! cr ) return false;
|
|
obj = (char *)cr;
|
|
}
|
|
else {
|
|
obj = (char *)&g_conf;
|
|
}
|
|
|
|
char *data = obj + m->m_off;
|
|
// Parm::m_size is the max string size
|
|
int32_t dataSize = m->m_size;
|
|
if ( occNum > 0 ) data += occNum * m->m_size;
|
|
|
|
if ( m->m_type == TYPE_STRING ||
|
|
m->m_type == TYPE_STRINGBOX ||
|
|
m->m_type == TYPE_SAFEBUF ||
|
|
m->m_type == TYPE_STRINGNONEMPTY )
|
|
// include \0 in string
|
|
dataSize = gbstrlen(data) + 1;
|
|
|
|
// if a safebuf, point to the string within
|
|
if ( m->m_type == TYPE_SAFEBUF ) {
|
|
SafeBuf *sb = (SafeBuf *)data;
|
|
data = sb->getBufStart();
|
|
dataSize = sb->length();
|
|
// sanity
|
|
if ( dataSize > 0 && !data[dataSize-1]){char *xx=NULL;*xx=0;}
|
|
// include the \0 since we do it for strings above
|
|
if ( dataSize > 0 ) dataSize++;
|
|
// empty? make it \0 then to be like strings i guess
|
|
if ( dataSize == 0 ) {
|
|
data = "\0";
|
|
dataSize = 1;
|
|
}
|
|
// sanity check
|
|
if ( dataSize > 0 && data[dataSize-1] ) {char *xx=NULL;*xx=0;}
|
|
// if just a \0 then make it empty
|
|
//if ( dataSize && !data[0] ) {
|
|
// data = NULL;
|
|
// dataSize = 0;
|
|
//}
|
|
}
|
|
|
|
//int32_t occNum = -1;
|
|
key96_t key = makeParmKey ( collnum , m , occNum );
|
|
/*
|
|
// debug it
|
|
log("parms: adding parm collnum=%i title=%s "
|
|
"key=%s datasize=%i data=%s hash=%"UINT32
|
|
,(int)collnum
|
|
,m->m_title
|
|
,KEYSTR(&key,sizeof(key))
|
|
,(int)dataSize
|
|
,data
|
|
,(uint32_t)hash32(data,dataSize));
|
|
*/
|
|
// then key
|
|
if ( ! parmList->safeMemcpy ( &key , sizeof(key) ) )
|
|
return false;
|
|
|
|
// size
|
|
if ( ! parmList->pushLong ( dataSize ) )
|
|
return false;
|
|
|
|
// and data
|
|
if ( dataSize && ! parmList->safeMemcpy ( data , dataSize ) )
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
// returns false and sets g_errno on error
|
|
bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList,
|
|
int32_t page , TcpSocket *sock ) {
|
|
|
|
// false = useDefaultRec?
|
|
CollectionRec *cr = g_collectiondb.getRec ( hr , false );
|
|
|
|
//if ( c ) {
|
|
// cr = g_collectiondb.getRec ( hr );
|
|
// if ( ! cr ) log("parms: coll not found");
|
|
//}
|
|
|
|
bool isMasterAdmin = g_conf.isMasterAdmin ( sock , hr );
|
|
|
|
// does this user have permission to update the parms?
|
|
bool isCollAdmin = g_conf.isCollAdmin ( sock , hr ) ;
|
|
|
|
|
|
// might be g_conf specific, not coll specific
|
|
//bool hasPerm = false;
|
|
// just knowing the collection name of a custom crawl means you
|
|
// know the token, so you have permission
|
|
//if ( cr && cr->m_isCustomCrawl ) hasPerm = true;
|
|
//if ( hr->isLocal() ) hasPerm = true;
|
|
|
|
// fix jenkins "GET /v2/crawl?token=crawlbottesting" request
|
|
char *name = hr->getString("name");
|
|
char *token = hr->getString("token");
|
|
//if ( ! cr && token ) hasPerm = true;
|
|
|
|
//if ( ! hasPerm ) {
|
|
// //log("parms: no permission to set parms");
|
|
// //g_errno = ENOPERM;
|
|
// //return false;
|
|
// // just leave the parm list empty and fail silently
|
|
// return true;
|
|
//}
|
|
|
|
// we set the parms in this collnum
|
|
collnum_t parmCollnum = -1;
|
|
if ( cr ) parmCollnum = cr->m_collnum;
|
|
|
|
// turn the collnum into an ascii string for providing as args
|
|
// when &reset=1 &restart=1 &delete=1 is given along with a
|
|
// &c= or a &name=/&token= pair.
|
|
char oldCollName[MAX_COLL_LEN+1];
|
|
oldCollName[0] = '\0';
|
|
if ( cr ) sprintf(oldCollName,"%" INT32 "",(int32_t)cr->m_collnum);
|
|
|
|
|
|
////////
|
|
//
|
|
// HACK: if crawlbot user supplies a token, name, and seeds, and the
|
|
// corresponding collection does not exist then assume it is an add
|
|
//
|
|
////////
|
|
char customCrawl = 0;
|
|
char *path = hr->getPath();
|
|
// i think /crawlbot is only used by me to see PageCrawlBot.cpp
|
|
// so don't bother...
|
|
if ( strncmp(path,"/crawlbot",9) == 0 ) customCrawl = 0;
|
|
if ( strncmp(path,"/v2/crawl",9) == 0 ) customCrawl = 1;
|
|
if ( strncmp(path,"/v2/bulk" ,8) == 0 ) customCrawl = 2;
|
|
if ( strncmp(path,"/v3/crawl",9) == 0 ) customCrawl = 1;
|
|
if ( strncmp(path,"/v3/bulk" ,8) == 0 ) customCrawl = 2;
|
|
|
|
// throw error if collection record custom crawl type doesn't equal
|
|
// the crawl type of current request
|
|
if (cr && customCrawl && customCrawl != cr->m_isCustomCrawl ) {
|
|
g_errno = ECUSTOMCRAWLMISMATCH;
|
|
return false;
|
|
}
|
|
|
|
bool hasAddCrawl = hr->hasField("addCrawl");
|
|
bool hasAddBulk = hr->hasField("addBulk");
|
|
bool hasAddColl = hr->hasField("addColl");
|
|
// sometimes they try to delete a collection that is not there so do
|
|
// not apply this logic in that case!
|
|
bool hasDelete = hr->hasField("delete");
|
|
bool hasRestart = hr->hasField("restart");
|
|
bool hasReset = hr->hasField("reset");
|
|
bool hasSeeds = hr->hasField("seeds");
|
|
// check for bulk jobs as well
|
|
if ( ! hasSeeds ) hasSeeds = hr->hasField("urls");
|
|
if ( ! cr &&
|
|
token &&
|
|
name &&
|
|
customCrawl &&
|
|
hasSeeds &&
|
|
! hasDelete &&
|
|
! hasRestart &&
|
|
! hasReset &&
|
|
! hasAddCrawl &&
|
|
! hasAddBulk &&
|
|
! hasAddColl ) {
|
|
// reserve a new collnum for adding this crawl
|
|
parmCollnum = g_collectiondb.reserveCollNum();
|
|
// must be there!
|
|
if ( parmCollnum == -1 ) {
|
|
g_errno = EBADENGINEER;
|
|
return false;
|
|
}
|
|
// log it for now
|
|
log("parms: trying to add custom crawl (%" INT32 ")",
|
|
(int32_t)parmCollnum);
|
|
// formulate name
|
|
char newName[MAX_COLL_LEN+1];
|
|
snprintf(newName,MAX_COLL_LEN,"%s-%s",token,name);
|
|
char *cmdStr = "addCrawl";
|
|
if ( customCrawl == 2 ) cmdStr = "addBulk";
|
|
// add to parm list
|
|
if ( ! addNewParmToList1 ( parmList ,
|
|
parmCollnum ,
|
|
newName ,
|
|
-1 , // occNum
|
|
cmdStr ) )
|
|
return false;
|
|
}
|
|
|
|
// loop through cgi parms
|
|
for ( int32_t i = 0 ; i < hr->getNumFields() ; i++ ) {
|
|
// get cgi parm name
|
|
char *field = hr->getField ( i );
|
|
// get value of the cgi field
|
|
char *val = hr->getValue (i);
|
|
// convert field to parm
|
|
int32_t occNum;
|
|
// parm names can be shared across pages, like "c"
|
|
// for search, addurl, inject, etc.
|
|
Parm *m = getParmFast1 ( field , &occNum );
|
|
if ( ! m ) continue;
|
|
|
|
// skip if not a command parm, like "addcoll"
|
|
if ( m->m_type != TYPE_CMD ) continue;
|
|
|
|
if ( m->m_obj != OBJ_CONF && m->m_obj != OBJ_COLL )
|
|
continue;
|
|
|
|
//
|
|
// HACK
|
|
//
|
|
// if its a resetcoll/restartcoll/addcoll we have to
|
|
// get the next available collnum and use that for setting
|
|
// any additional parms. that is the coll it will act on.
|
|
if ( strcmp(m->m_cgi,"addColl") == 0 ||
|
|
// lowercase support. camelcase is obsolete.
|
|
strcmp(m->m_cgi,"addcoll") == 0 ||
|
|
strcmp(m->m_cgi,"addCrawl") == 0 ||
|
|
strcmp(m->m_cgi,"addBulk" ) == 0 ||
|
|
strcmp(m->m_cgi,"reset" ) == 0 ||
|
|
strcmp(m->m_cgi,"restart" ) == 0 ) {
|
|
// if we wanted to we could make the data the
|
|
// new parmCollnum since we already store the old
|
|
// collnum in the parm rec key
|
|
parmCollnum = g_collectiondb.reserveCollNum();
|
|
//
|
|
//
|
|
// NOTE: the old collnum is in the "val" already
|
|
// like "&reset=462" or "&addColl=test"
|
|
//
|
|
//
|
|
// sanity. if all are full! we hit our limit of
|
|
// 32k collections. should increase collnum_t from
|
|
// int16_t to int32_t...
|
|
if ( parmCollnum == -1 ) {
|
|
g_errno = EBADENGINEER;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// . DIFFBOT HACK: so ppl can manually restart a spider round
|
|
// . val can be 0 or 1 or anything. i.e. roundStart=0 works.
|
|
// . map this parm to another parm with the round start
|
|
// time (current time) and the new round # as the args.
|
|
// . this will call CommandForceNextSpiderRound() function
|
|
// on every shard with these args, "tmpVal".
|
|
if ( cr && strcmp(m->m_cgi,"roundStart") == 0 ) {
|
|
// use the current time so anything spidered before
|
|
// this time (the round start time) will be respidered
|
|
//sprintf(tmp,"%" UINT32 "",getTimeGlobalNoCore());
|
|
//val = tmp;
|
|
char tmpVal[64];
|
|
// use the same round start time for all shards
|
|
sprintf(tmpVal,
|
|
"%" UINT32 ",%" INT32 ""
|
|
,(uint32_t)getTimeGlobalNoCore()
|
|
,cr->m_spiderRoundNum+1
|
|
);
|
|
// . also add command to reset crawl/process counts
|
|
// so if you hit maxToProcess/maxToCrawl it will
|
|
// not stop the round from restarting
|
|
// . CommandResetCrawlCounts()
|
|
if ( ! addNewParmToList1 ( parmList ,
|
|
parmCollnum ,
|
|
tmpVal, // a string
|
|
0 , // occNum (for arrays)
|
|
"forceround" ) )
|
|
return false;
|
|
// don't bother going below
|
|
continue;
|
|
}
|
|
|
|
// if a collection name was also provided, assume that is
|
|
// the target of the reset/delete/restart. we still
|
|
// need PageAddDelete.cpp to work...
|
|
if ( cr &&
|
|
( strcmp(m->m_cgi,"reset" ) == 0 ||
|
|
strcmp(m->m_cgi,"delete" ) == 0 ||
|
|
strcmp(m->m_cgi,"restart" ) == 0 ) )
|
|
// the collnum to reset/restart/del
|
|
// given as a string.
|
|
val = oldCollName;
|
|
|
|
//
|
|
// CLOUD SEARCH ENGINE SUPPORT
|
|
//
|
|
|
|
//
|
|
// if this is the "delcoll" parm then "c" may have been
|
|
// excluded from http request, therefore isCollAdmin and
|
|
// isMasterAdmin may be false, so see if they have permission
|
|
// for the "val" collection for this one...
|
|
bool hasPerm = false;
|
|
if ( m->m_page == PAGE_DELCOLL &&
|
|
strcmp(m->m_cgi,"delcoll") == 0 ) {
|
|
// permission override for /admin/delcoll cmd & parm
|
|
hasPerm = g_conf.isCollAdminForColl (sock,hr,val);
|
|
}
|
|
|
|
// if this IP c-block as already added a collection then do not
|
|
// allow it to add another.
|
|
if ( m->m_page == PAGE_ADDCOLL &&
|
|
g_conf.m_allowCloudUsers &&
|
|
! isMasterAdmin &&
|
|
strcmp(m->m_cgi,"addcoll")==0 ) {
|
|
// see if user's c block has already added a collection
|
|
int32_t numAdded = 0;
|
|
if ( numAdded >= 1 ) {
|
|
g_errno = ENOPERM;
|
|
log("parms: already added a collection from "
|
|
"this cloud user's c-block.");
|
|
return false;
|
|
}
|
|
hasPerm = true;
|
|
}
|
|
|
|
// master controls require root permission
|
|
if ( m->m_obj == OBJ_CONF && ! isMasterAdmin ) {
|
|
log("parms: could not run root parm \"%s\" no perm.",
|
|
m->m_title);
|
|
continue;
|
|
}
|
|
|
|
// need to have permission for collection for collrec parms
|
|
if ( m->m_obj == OBJ_COLL && ! isCollAdmin && ! hasPerm ) {
|
|
log("parms: could not run coll parm \"%s\" no perm.",
|
|
m->m_title);
|
|
continue;
|
|
}
|
|
|
|
// add the cmd parm
|
|
if ( ! addNewParmToList2 ( parmList ,
|
|
// it might be a collection-less
|
|
// command like 'gb stop' which
|
|
// uses the "save=1" parm.
|
|
// this is the "new" collnum to
|
|
// create in the case of
|
|
// add/reset/restart, but in the
|
|
// case of delete it is -1 or old.
|
|
parmCollnum ,
|
|
// the argument to the function...
|
|
// in the case of delete, the
|
|
// collnum to delete in ascii.
|
|
// in the case of add, the name
|
|
// of the new coll. in the case
|
|
// of reset/restart the OLD
|
|
// collnum is ascii to delete.
|
|
val,
|
|
occNum ,
|
|
m ) )
|
|
return false;
|
|
}
|
|
|
|
// if we are one page url filters, turn off all checkboxes!
|
|
// html should really transmit them as =0 if they are unchecked!!
|
|
// "fe" is a url filter expression for the first row.
|
|
//if ( hr->hasField("fe") && page == PAGE_FILTERS && cr ) {
|
|
// for ( int32_t i = 0 ; i < cr->m_numRegExs ; i++ ) {
|
|
// //cr->m_harvestLinks [i] = 0;
|
|
// //cr->m_spidersEnabled[i] = 0;
|
|
// if ( ! addNewParmToList2 ( parmList ,
|
|
// cr->m_collnum,
|
|
// "0",
|
|
// i,
|
|
// }
|
|
//}
|
|
|
|
|
|
//
|
|
// CLOUD SEARCH ENGINE SUPPORT
|
|
//
|
|
// provide userip so when adding a new collection we can
|
|
// store it in the collection rec to ensure that the same
|
|
// IP address cannot add more than one collection.
|
|
//
|
|
if ( sock && page == PAGE_ADDCOLL ) {
|
|
char *ipStr = iptoa(sock->m_ip);
|
|
int32_t occNum;
|
|
Parm *um = getParmFast1 ( "userip" , &occNum); // NULL = occNum
|
|
if ( ! addNewParmToList2 ( parmList ,
|
|
// HACK! operate on the to-be-added
|
|
// collrec, if there was an addcoll
|
|
// reset or restart coll cmd...
|
|
parmCollnum ,
|
|
ipStr, // val ,
|
|
occNum ,
|
|
um ) )
|
|
return false;
|
|
}
|
|
|
|
|
|
//
|
|
// now add the parms that are NOT commands
|
|
//
|
|
|
|
// loop through cgi parms
|
|
for ( int32_t i = 0 ; i < hr->getNumFields() ; i++ ) {
|
|
// get cgi parm name
|
|
char *field = hr->getField ( i );
|
|
// get value of the cgi field
|
|
char *val = hr->getValue (i);
|
|
|
|
// get the occurrence # if its regex. this is the row #
|
|
// in the url filters table, since those parms repeat names.
|
|
// url filter expression.
|
|
//if ( strcmp(field,"fe") == 0 ) occNum++;
|
|
|
|
// convert field to parm
|
|
int32_t occNum;
|
|
Parm *m = getParmFast1 ( field , &occNum );
|
|
|
|
//
|
|
// map "pause" to spidering enabled
|
|
//
|
|
if ( strcmp(field,"pause" ) == 0 ||
|
|
strcmp(field,"pauseCrawl") == 0 ) {
|
|
m = getParmFast1 ( "cse", &occNum);
|
|
if ( val && val[0] == '0' ) val = "1";
|
|
else if ( val && val[0] == '1' ) val = "0";
|
|
if ( ! m ) { char *xx=NULL;*xx=0; }
|
|
}
|
|
|
|
if ( ! m ) continue;
|
|
|
|
// skip if IS a command parm, like "addcoll", we did that above
|
|
if ( m->m_type == TYPE_CMD )
|
|
continue;
|
|
|
|
if ( m->m_obj != OBJ_CONF && m->m_obj != OBJ_COLL )
|
|
continue;
|
|
|
|
|
|
//
|
|
// CLOUD SEARCH ENGINE SUPPORT
|
|
//
|
|
// master controls require root permission. otherwise, just
|
|
// knowing the collection name is enough for a cloud user
|
|
// to change settings.
|
|
//
|
|
bool hasPerm = false;
|
|
|
|
// master controls require root permission
|
|
if ( m->m_obj == OBJ_CONF && ! isMasterAdmin ) {
|
|
log("parms: could not set root parm \"%s\" no perm.",
|
|
m->m_title);
|
|
continue;
|
|
}
|
|
|
|
// need to have permission for collection for collrec parms
|
|
if ( m->m_obj == OBJ_COLL && ! isCollAdmin && ! hasPerm ) {
|
|
log("parms: could not set coll parm \"%s\" no perm.",
|
|
m->m_title);
|
|
continue;
|
|
}
|
|
|
|
// convert spiderRoundStartTime=0 (roundStart=0 roundStart=1)
|
|
// to spiderRoundStartTime=<currenttime>+30secs
|
|
// so that will force the next spider round to kick in
|
|
/*
|
|
bool restartRound = false;
|
|
char tmp[24];
|
|
if ( strcmp(field,"roundStart")==0 &&
|
|
val && (val[0]=='0'||val[0]=='1') && val[1]==0 )
|
|
sprintf(tmp,"%" UINT32 "",(int32_t)getTimeGlobalNoCore()+0);
|
|
val = tmp;
|
|
}
|
|
*/
|
|
|
|
// add it to a list now
|
|
if ( ! addNewParmToList2 ( parmList ,
|
|
// HACK! operate on the to-be-added
|
|
// collrec, if there was an addcoll
|
|
// reset or restart coll cmd...
|
|
parmCollnum ,
|
|
val ,
|
|
occNum ,
|
|
m ) )
|
|
return false;
|
|
}
|
|
|
|
|
|
return true;
|
|
}
|
|
|
|
Parm *Parms::getParmFast2 ( int32_t cgiHash32 ) {
|
|
static HashTableX s_pht;
|
|
static char s_phtBuf[26700];
|
|
static bool s_init = false;
|
|
|
|
if ( ! s_init ) {
|
|
// init hashtable
|
|
s_pht.set ( 4,sizeof(char *),2048,s_phtBuf,26700,
|
|
false,0,"phttab" );
|
|
// reduce hash collisions:
|
|
s_pht.m_useKeyMagic = true;
|
|
// wtf?
|
|
if ( m_numParms <= 0 ) init();
|
|
if ( m_numParms <= 0 ) { char *xx=NULL;*xx=0; }
|
|
// fill up hashtable
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
// get it
|
|
Parm *parm = &m_parms[i];
|
|
// skip parms that are not for conf or coll lest
|
|
// it bitch that "c" is duplicated...
|
|
if ( parm->m_obj != OBJ_CONF &&
|
|
parm->m_obj != OBJ_COLL )
|
|
continue;
|
|
// skip comments
|
|
if ( parm->m_type == TYPE_COMMENT ) continue;
|
|
if ( parm->m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
// skip if no cgi
|
|
if ( ! parm->m_cgi ) continue;
|
|
// get its hash of its cgi
|
|
int32_t ph32 = parm->m_cgiHash;
|
|
// sanity!
|
|
if ( s_pht.isInTable ( &ph32 ) ) {
|
|
// get the dup guy
|
|
Parm *dup = *(Parm **)s_pht.getValue(&ph32);
|
|
// same underlying parm?
|
|
// like for "all spiders on" vs.
|
|
// "all spiders off"?
|
|
if ( dup->m_off == parm->m_off )
|
|
continue;
|
|
// otherwise bitch about it and drop core
|
|
log("parms: dup parm h32=%" INT32 " "
|
|
"\"%s\" vs \"%s\"",
|
|
ph32, dup->m_title,parm->m_title);
|
|
char *xx=NULL;*xx=0;
|
|
}
|
|
// add that to hash table
|
|
s_pht.addKey ( &ph32 , &parm );
|
|
}
|
|
// do not do this again
|
|
s_init = true;
|
|
}
|
|
|
|
Parm **pp = (Parm **)s_pht.getValue ( &cgiHash32 );
|
|
if ( ! pp ) return NULL;
|
|
return *pp;
|
|
}
|
|
|
|
|
|
Parm *Parms::getParmFast1 ( char *cgi , int32_t *occNum ) {
|
|
// strip off the %" INT32 " for things like 'fe3' for example
|
|
// because that is the occurrence # for parm arrays.
|
|
int32_t clen = gbstrlen(cgi);
|
|
|
|
char *d = NULL;
|
|
|
|
if ( clen > 1 ) {
|
|
d = cgi + clen - 1;
|
|
while ( is_digit(*d) ) d--;
|
|
d++;
|
|
}
|
|
|
|
int32_t h32;
|
|
|
|
// assume not an array
|
|
if ( occNum ) *occNum = -1;
|
|
|
|
if ( d && *d ) {
|
|
if ( occNum ) *occNum = atol(d);
|
|
h32 = hash32 ( cgi , d - cgi );
|
|
}
|
|
else
|
|
h32 = hash32n ( cgi );
|
|
|
|
Parm *m = getParmFast2 ( h32 );
|
|
|
|
if ( ! m ) return NULL;
|
|
|
|
// the first element does not have a number after it
|
|
if ( m->isArray() && occNum && *occNum == -1 )
|
|
*occNum = 0;
|
|
|
|
return m;
|
|
}
|
|
|
|
////////////
|
|
//
|
|
// functions for distributing/syncing parms to/with all hosts
|
|
//
|
|
////////////
|
|
|
|
class ParmNode {
|
|
public:
|
|
SafeBuf m_parmList;
|
|
int32_t m_numRequests;
|
|
int32_t m_numReplies;
|
|
int32_t m_numGoodReplies;
|
|
int32_t m_numHostsTotal;
|
|
class ParmNode *m_prevNode;
|
|
class ParmNode *m_nextNode;
|
|
int64_t m_parmId;
|
|
bool m_calledCallback;
|
|
int32_t m_startTime;
|
|
void *m_state;
|
|
void (* m_callback)(void *state);
|
|
bool m_sendToGrunts;
|
|
bool m_sendToProxies;
|
|
int32_t m_hostId; // -1 means send parm update to all hosts
|
|
// . if not -1 then [m_hostId,m_hostId2] is a range
|
|
// . used by main.cpp cmd line cmds like 'gb stop 3-5'
|
|
int32_t m_hostId2;
|
|
};
|
|
|
|
static ParmNode *s_headNode = NULL;
|
|
static ParmNode *s_tailNode = NULL;
|
|
static int64_t s_parmId = 0LL;
|
|
|
|
// . will send the parm update request to each host and retry forever,
|
|
// until dead hosts come back up
|
|
// . keeps parm update requests in order received
|
|
// . returns true and sets g_errno on error
|
|
// . returns false if blocked and will call your callback
|
|
bool Parms::broadcastParmList ( SafeBuf *parmList ,
|
|
void *state ,
|
|
void (* callback)(void *) ,
|
|
bool sendToGrunts ,
|
|
bool sendToProxies ,
|
|
// this is -1 if sending to all hosts
|
|
int32_t hostId ,
|
|
// this is not -1 if its range [hostId,hostId2]
|
|
int32_t hostId2 ) {
|
|
|
|
// empty list?
|
|
if ( parmList->getLength() <= 0 ) return true;
|
|
|
|
// only us? no need for this then. we now do this...
|
|
//if ( g_hostdb.m_numHosts <= 1 ) return true;
|
|
|
|
// make a new parm transmit node
|
|
ParmNode *pn = (ParmNode *)mmalloc ( sizeof(ParmNode) , "parmnode" );
|
|
if ( ! pn ) return true;
|
|
pn->m_parmList.constructor();
|
|
|
|
// update the ticket #. we use this to keep things ordered too.
|
|
// this should never be zero since it starts off at zero.
|
|
s_parmId++;
|
|
|
|
// set it
|
|
pn->m_parmList.stealBuf ( parmList );
|
|
pn->m_numRequests = 0;
|
|
pn->m_numReplies = 0;
|
|
pn->m_numGoodReplies = 0;
|
|
pn->m_numHostsTotal = 0;
|
|
pn->m_prevNode = NULL;
|
|
pn->m_nextNode = NULL;
|
|
pn->m_parmId = s_parmId; // take a ticket
|
|
pn->m_calledCallback = false;
|
|
pn->m_startTime = getTimeLocal();
|
|
pn->m_state = state;
|
|
pn->m_callback = callback;
|
|
pn->m_sendToGrunts = sendToGrunts;
|
|
pn->m_sendToProxies = sendToProxies;
|
|
pn->m_hostId = hostId;
|
|
pn->m_hostId2 = hostId2; // a range? then not -1 here.
|
|
|
|
// store it ordered in our linked list of parm transmit nodes
|
|
if ( ! s_tailNode ) {
|
|
s_headNode = pn;
|
|
s_tailNode = pn;
|
|
}
|
|
else {
|
|
// link pn at end of tail
|
|
s_tailNode->m_nextNode = pn;
|
|
pn->m_prevNode = s_tailNode;
|
|
// pn becomes the new tail
|
|
s_tailNode = pn;
|
|
}
|
|
|
|
// just the regular proxies, not compression proxies
|
|
if ( pn->m_sendToProxies )
|
|
pn->m_numHostsTotal += g_hostdb.getNumProxies();
|
|
|
|
if ( pn->m_sendToGrunts )
|
|
pn->m_numHostsTotal += g_hostdb.getNumGrunts();
|
|
|
|
if ( hostId >= 0 )
|
|
pn->m_numHostsTotal = 1;
|
|
|
|
// pump the parms out to other hosts in the network
|
|
doParmSendingLoop ( );
|
|
|
|
// . if waiting for more replies to come in that should be in soon
|
|
// . doParmSendingLoop() is called when a reply comes in so that
|
|
// the next requests can be sent out
|
|
//if ( waitingForLiveHostsToReply() ) return false;
|
|
|
|
// all done. how did this happen?
|
|
//return true;
|
|
|
|
// wait for replies
|
|
return false;
|
|
}
|
|
|
|
void tryToCallCallbacks ( ) {
|
|
|
|
ParmNode *pn = s_headNode;
|
|
int32_t now = getTimeLocal();
|
|
|
|
for ( ; pn ; pn = pn->m_nextNode ) {
|
|
// skip if already called callback
|
|
if ( pn->m_calledCallback ) continue;
|
|
// should we call the callback?
|
|
bool callIt = false;
|
|
if ( pn->m_numReplies >= pn->m_numRequests ) callIt = true;
|
|
// sometimes we don't launch any requests to update parms
|
|
// because we are jammed up. same logic as we use for
|
|
// freeing the pn below.
|
|
if ( pn->m_numGoodReplies < pn->m_numHostsTotal )
|
|
callIt = false;
|
|
|
|
// 8 seconds is enough to wait for all replies to come in.
|
|
// a host might be dead, so we need this here lest the
|
|
// underlying page handler (i.e. sendPageCrawlbot()) never
|
|
// get called if a host is dead. if you are updating some
|
|
// parms you want the page to return.
|
|
if ( now - pn->m_startTime > 8 &&
|
|
! callIt &&
|
|
g_hostdb.hasDeadHost() )
|
|
callIt = true;
|
|
|
|
if ( ! callIt ) continue;
|
|
// callback is NULL for updating parms like spiderRoundNum
|
|
// in Spider.cpp
|
|
if ( pn->m_callback ) pn->m_callback ( pn->m_state );
|
|
pn->m_calledCallback = true;
|
|
}
|
|
}
|
|
|
|
void gotParmReplyWrapper ( void *state , UdpSlot *slot ) {
|
|
|
|
// don't let upserver free the send buf! that's the ParmNode parmlist
|
|
slot->m_sendBufAlloc = NULL;
|
|
|
|
// in case host table is dynamically modified, go by #
|
|
Host *h = g_hostdb.getHost((int32_t)(PTRTYPE)state);
|
|
|
|
int32_t parmId = h->m_currentParmIdInProgress;
|
|
|
|
ParmNode *pn = h->m_currentNodePtr;
|
|
|
|
// inc this count
|
|
pn->m_numReplies++;
|
|
|
|
// nothing in progress now
|
|
h->m_currentParmIdInProgress = 0;
|
|
h->m_currentNodePtr = NULL;
|
|
|
|
// this is usually timeout on a dead host i guess
|
|
if ( g_errno ) {
|
|
log("parms: got parm update reply from host #%" INT32 ": %s",
|
|
h->m_hostId,mstrerror(g_errno));
|
|
}
|
|
|
|
|
|
// . note it so we do not retry every 1ms!
|
|
// . and only retry on time outs or no mem errors for now...
|
|
// . it'll retry once every 10 seconds using the sleep
|
|
// wrapper below
|
|
if ( g_errno != EUDPTIMEDOUT && g_errno != ENOMEM )
|
|
g_errno = 0;
|
|
|
|
if ( g_errno ) {
|
|
// remember error info for retry
|
|
h->m_lastTryError = g_errno;
|
|
h->m_lastTryTime = getTimeLocal();
|
|
// if a host timed out he could be dead, so try to call
|
|
// the callback for this "pn" anyway. if the only hosts we
|
|
// do not have replies for are dead, then we'll call the
|
|
// callback, but still keep trying to send to them.
|
|
tryToCallCallbacks ();
|
|
// try to send more i guess? i think this is right otherwise
|
|
// the callback might not ever get called
|
|
g_parms.doParmSendingLoop();
|
|
return;
|
|
}
|
|
|
|
// no error, otherwise
|
|
h->m_lastTryError = 0;
|
|
|
|
// successfully completed
|
|
h->m_lastParmIdCompleted = parmId;
|
|
|
|
// inc this count
|
|
pn->m_numGoodReplies++;
|
|
|
|
// . this will try to call any callback that can be called
|
|
// . for instances, if the "pn" has recvd all the replies
|
|
// . OR if the remaining hosts are "DEAD"
|
|
// . the callback is in the "pn"
|
|
tryToCallCallbacks ();
|
|
|
|
// nuke it?
|
|
if ( pn->m_numGoodReplies >= pn->m_numHostsTotal &&
|
|
pn->m_numReplies >= pn->m_numRequests ) {
|
|
|
|
// . we must always be the head lest we send out of order.
|
|
// . ParmNodes only destined to a specific hostid are ignored
|
|
// for this check, only look at those whose m_hostId is -1
|
|
if(pn != s_headNode && pn->m_hostId==-1){
|
|
log("parms: got parm request out of band. not head.");
|
|
}
|
|
|
|
// a new head
|
|
if ( pn == s_headNode ) {
|
|
// sanity
|
|
if ( pn->m_prevNode ) { char *xx=NULL;*xx=0; }
|
|
// the guy after us is the new head
|
|
s_headNode = pn->m_nextNode;
|
|
}
|
|
|
|
// a new tail?
|
|
if ( pn == s_tailNode ) {
|
|
// sanity
|
|
if ( pn->m_nextNode ) { char *xx=NULL;*xx=0; }
|
|
// the guy before us is the new tail
|
|
s_tailNode = pn->m_prevNode;
|
|
}
|
|
|
|
// empty?
|
|
if ( ! s_headNode ) s_tailNode = NULL;
|
|
|
|
// wtf?
|
|
if ( ! pn->m_calledCallback ) { char *xx=NULL;*xx=0; }
|
|
|
|
// do callback first before freeing pn
|
|
//if ( pn->m_callback ) pn->m_callback ( pn->m_state );
|
|
|
|
if ( pn->m_prevNode )
|
|
pn->m_prevNode->m_nextNode = pn->m_nextNode;
|
|
|
|
if ( pn->m_nextNode )
|
|
pn->m_nextNode->m_prevNode = pn->m_prevNode;
|
|
|
|
mfree ( pn , sizeof(ParmNode) , "pndfr");
|
|
}
|
|
|
|
// try to send more for him
|
|
g_parms.doParmSendingLoop();
|
|
}
|
|
|
|
void parmLoop ( int fd , void *state ) {
|
|
g_parms.doParmSendingLoop();
|
|
}
|
|
|
|
static bool s_registeredSleep = false;
|
|
static bool s_inLoop = false;
|
|
|
|
// . host #0 runs this to send out parms in the the parm queue (linked list)
|
|
// to all other hosts.
|
|
// . he also sends to himself, if m_sendToGrunts is true
|
|
bool Parms::doParmSendingLoop ( ) {
|
|
|
|
if ( ! s_headNode ) return true;
|
|
|
|
if ( g_isDumpingRdbFromMain ) return true;
|
|
|
|
if ( s_inLoop ) return true;
|
|
|
|
s_inLoop = true;
|
|
|
|
if ( ! s_registeredSleep &&
|
|
! g_loop.registerSleepCallback(2000,NULL,parmLoop,0) )
|
|
log("parms: failed to reg parm loop");
|
|
|
|
// do not re-register
|
|
s_registeredSleep = true;
|
|
|
|
int32_t now = getTimeLocal();
|
|
|
|
// try to send a parm update request to each host
|
|
for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
|
|
// get it
|
|
Host *h = g_hostdb.getHost(i);
|
|
// skip ourselves, host #0. we now send to ourselves
|
|
// so updateParm() will be called on us...
|
|
//if ( h->m_hostId == g_hostdb.m_myHostId ) continue;
|
|
// . if in progress, gotta wait for that to complete
|
|
// . 0 is not a legit parmid, it starts at 1
|
|
if ( h->m_currentParmIdInProgress ) continue;
|
|
// if his last completed parmid is the current he is up-to-date
|
|
if ( h->m_lastParmIdCompleted == s_parmId ) continue;
|
|
// if last try had an error, wait 10 secs i guess
|
|
if ( h->m_lastTryError &&
|
|
h->m_lastTryError != EUDPTIMEDOUT &&
|
|
now - h->m_lastTryTime < 10 )
|
|
continue;
|
|
// otherwise get him the next to send
|
|
ParmNode *pn = s_headNode;
|
|
for ( ; pn ; pn = pn->m_nextNode ) {
|
|
// stop when we got a parmnode we have not sent to
|
|
// him yet, we'll send it now
|
|
if ( pn->m_parmId > h->m_lastParmIdCompleted ) break;
|
|
}
|
|
// nothing? strange. something is not right.
|
|
if ( ! pn ) {
|
|
log("parms: pn is null");
|
|
break;
|
|
char *xx=NULL; *xx=0;
|
|
}
|
|
|
|
// give him a free pass? some parm updates are directed to
|
|
// a single host, we use this for syncing parms at startup.
|
|
if ( pn->m_hostId >= 0 &&
|
|
pn->m_hostId2 == -1 && // not a range
|
|
h->m_hostId != pn->m_hostId ) {
|
|
// assume we sent it to him
|
|
h->m_lastParmIdCompleted = pn->m_parmId;
|
|
h->m_currentNodePtr = NULL;
|
|
continue;
|
|
}
|
|
|
|
// range? if not in range, give free pass
|
|
if ( pn->m_hostId >= 0 &&
|
|
pn->m_hostId2 >= 0 &&
|
|
( h->m_hostId < pn->m_hostId ||
|
|
h->m_hostId > pn->m_hostId2 ) ) {
|
|
// assume we sent it to him
|
|
h->m_lastParmIdCompleted = pn->m_parmId;
|
|
h->m_currentNodePtr = NULL;
|
|
continue;
|
|
}
|
|
|
|
|
|
// force completion if we should NOT send to him
|
|
if ( (h->isProxy() && ! pn->m_sendToProxies) ||
|
|
(h->isGrunt() && ! pn->m_sendToGrunts ) ) {
|
|
h->m_lastParmIdCompleted = pn->m_parmId;
|
|
h->m_currentNodePtr = NULL;
|
|
continue;
|
|
}
|
|
|
|
// debug log
|
|
log(LOG_INFO,"parms: sending parm request id %i "
|
|
"to hostid %" INT32 "",(int)pn->m_parmId,h->m_hostId);
|
|
|
|
// count it
|
|
pn->m_numRequests++;
|
|
// ok, he's available
|
|
if ( ! g_udpServer.sendRequest ( pn->m_parmList.getBufStart(),
|
|
pn->m_parmList.length() ,
|
|
// a new msgtype
|
|
0x3f,
|
|
h->m_ip, // ip
|
|
h->m_port, // port
|
|
h->m_hostId ,
|
|
NULL, // retslot
|
|
(void *)(PTRTYPE)h->m_hostId , // state
|
|
gotParmReplyWrapper ,
|
|
30 , // timeout secs
|
|
-1 , // backoff
|
|
-1 , // maxwait
|
|
NULL , // replybuf
|
|
0 , // replybufmaxsize
|
|
0 ) ) { // niceness
|
|
log("parms: failed to send: %s",mstrerror(g_errno));
|
|
continue;
|
|
}
|
|
// flag this
|
|
h->m_currentParmIdInProgress = pn->m_parmId;
|
|
h->m_currentNodePtr = pn;
|
|
}
|
|
|
|
s_inLoop = false;
|
|
|
|
return true;
|
|
}
|
|
|
|
void handleRequest3fLoop ( void *weArg ) ;
|
|
|
|
void handleRequest3fLoop2 ( void *state , UdpSlot *slot ) {
|
|
handleRequest3fLoop(state);
|
|
}
|
|
|
|
// if a tree is saving while we are trying to delete a collnum (or reset)
|
|
// then the call to updateParm() below returns false and we must re-call
|
|
// in this sleep wrapper here
|
|
void handleRequest3fLoop3 ( int fd , void *state ) {
|
|
g_loop.unregisterSleepCallback(state,handleRequest3fLoop3);
|
|
handleRequest3fLoop(state);
|
|
}
|
|
|
|
// . host #0 is requesting that we update some parms
|
|
void handleRequest3fLoop ( void *weArg ) {
|
|
WaitEntry *we = (WaitEntry *)weArg;
|
|
|
|
CollectionRec *cx = NULL;
|
|
|
|
// process them
|
|
char *p = we->m_parmPtr;
|
|
for ( ; p < we->m_parmEnd ; ) {
|
|
// int16_tcut
|
|
char *rec = p;
|
|
// get size
|
|
int32_t dataSize = *(int32_t *)(rec+sizeof(key96_t));
|
|
int32_t recSize = sizeof(key96_t) + 4 + dataSize;
|
|
// skip it
|
|
p += recSize;
|
|
|
|
// get the actual parm
|
|
Parm *parm = getParmFromParmRec ( rec );
|
|
|
|
if ( ! parm ) {
|
|
int32_t h32 = getHashFromParmRec(rec);
|
|
log("parms: unknown parm sent to us hash=%" INT32 "",h32);
|
|
for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
|
|
Parm *x = &g_parms.m_parms[i];
|
|
if ( x->m_cgiHash != h32 ) continue;
|
|
log("parms: unknown parm=%s",x->m_title);
|
|
break;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// if was the cmd to save & exit then first send a reply back
|
|
if ( ! we->m_sentReply &&
|
|
parm->m_cgi &&
|
|
parm->m_cgi[0] == 's' &&
|
|
parm->m_cgi[1] == 'a' &&
|
|
parm->m_cgi[2] == 'v' &&
|
|
parm->m_cgi[3] == 'e' &&
|
|
parm->m_cgi[4] == '\0' ) {
|
|
// do not re-do this
|
|
we->m_sentReply = 1;
|
|
// note it
|
|
log("parms: sending early parm update reply");
|
|
// wait for reply to be sent and ack'd
|
|
g_udpServer.sendReply_ass ( NULL,0,
|
|
NULL,0,
|
|
we->m_slot,
|
|
8, // timeout in secs
|
|
// come back here when done
|
|
we ,
|
|
handleRequest3fLoop2 );
|
|
return;
|
|
}
|
|
|
|
|
|
// . determine if it alters the url filters
|
|
// . if those were changed we have to nuke doledb and
|
|
// waiting tree in Spider.cpp and rebuild them!
|
|
if ( parm->m_flags & PF_REBUILDURLFILTERS )
|
|
we->m_doRebuilds = true;
|
|
|
|
if ( parm->m_flags & PF_REBUILDPROXYTABLE )
|
|
we->m_doProxyRebuild = true;
|
|
|
|
if ( parm->m_flags & PF_REBUILDACTIVELIST )
|
|
we->m_rebuildActiveList = true;
|
|
|
|
// get collnum i guess
|
|
if ( parm->m_type != TYPE_CMD )
|
|
we->m_collnum = getCollnumFromParmRec ( rec );
|
|
|
|
// see if our spider round changes
|
|
int32_t oldRound;
|
|
if ( we->m_collnum >= 0 && ! cx ) {
|
|
cx = g_collectiondb.getRec ( we->m_collnum );
|
|
// i guess coll might gotten deleted! so check cx
|
|
if ( cx ) oldRound = cx->m_spiderRoundNum;
|
|
}
|
|
|
|
// . this returns false if blocked, returns true and sets
|
|
// g_errno on error
|
|
// . it'll block if trying to delete a coll when the tree
|
|
// is saving or something (CommandDeleteColl())
|
|
if ( ! g_parms.updateParm ( rec , we ) ) {
|
|
////////////
|
|
//
|
|
// . it blocked! it will call we->m_callback when done
|
|
// . we must re-call
|
|
// . try again in 100ms
|
|
//
|
|
////////////
|
|
if(!g_loop.registerSleepCallback(100,
|
|
we ,
|
|
handleRequest3fLoop3,
|
|
0 ) ){// niceness
|
|
log("parms: failed to reg sleeper");
|
|
return;
|
|
}
|
|
log("parms: updateParm blocked. waiting.");
|
|
return;
|
|
}
|
|
|
|
if ( cx && oldRound != cx->m_spiderRoundNum )
|
|
we->m_updatedRound = true;
|
|
|
|
// do the next parm
|
|
we->m_parmPtr = p;
|
|
|
|
// error?
|
|
if ( ! g_errno ) continue;
|
|
// this could mean failed to add coll b/c out of disk or
|
|
// something else that is bad
|
|
we->m_errno = g_errno;
|
|
}
|
|
|
|
// one last thing... kinda hacky. if we change certain spidering parms
|
|
// we have to do a couple rebuilds.
|
|
|
|
// reset page round counts
|
|
if ( we->m_updatedRound && cx ) {
|
|
// Spider.cpp will reset the *ThisRound page counts and
|
|
// the sent notification flag
|
|
spiderRoundIncremented ( cx );
|
|
}
|
|
|
|
// basically resetting the spider here...
|
|
if ( we->m_doRebuilds && cx ) {
|
|
// . this tells Spider.cpp to rebuild the spider queues
|
|
// . this is NULL if spider stuff never initialized yet,
|
|
// like if you just added the collection
|
|
if ( cx->m_spiderColl )
|
|
cx->m_spiderColl->m_waitingTreeNeedsRebuild = true;
|
|
// . assume we have urls ready to spider too
|
|
// . no, because if they change the filters and there are
|
|
// still no urls to spider i don't want to get another
|
|
// email alert!!
|
|
//cr->m_localCrawlInfo .m_hasUrlsReadyToSpider = true;
|
|
//cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider = true;
|
|
// . reconstruct the url filters if we were a custom crawl
|
|
// . this is used to abstract away the complexity of url
|
|
// filters in favor of simple regular expressions and
|
|
// substring matching for diffbot
|
|
cx->rebuildUrlFilters();
|
|
}
|
|
|
|
if ( we->m_rebuildActiveList && cx )
|
|
g_spiderLoop.m_activeListValid = false;
|
|
|
|
// if user changed the list of proxy ips rebuild the binary
|
|
// array representation of the proxy ips we have
|
|
if ( we->m_doProxyRebuild )
|
|
buildProxyTable();
|
|
|
|
// note it
|
|
if ( ! we->m_sentReply )
|
|
log("parms: sending parm update reply");
|
|
|
|
// send back reply now. empty reply for the most part
|
|
if ( we->m_errno && ! we->m_sentReply )
|
|
g_udpServer.sendErrorReply ( we->m_slot,we->m_errno,0 );
|
|
else if ( ! we->m_sentReply )
|
|
g_udpServer.sendReply_ass ( NULL,0,NULL,0,we->m_slot);
|
|
// all done
|
|
mfree ( we , sizeof(WaitEntry) , "weparm" );
|
|
return;
|
|
}
|
|
|
|
// . host #0 is requesting that we update some parms
|
|
// . the readbuf in the request is the list of the parms
|
|
void handleRequest3f ( UdpSlot *slot , int32_t niceness ) {
|
|
|
|
// sending to host #0 is not right...
|
|
//if ( g_hostdb.m_hostId == 0 ) { char *xx=NULL;*xx=0; }
|
|
|
|
char *parmRecs = slot->m_readBuf;
|
|
char *parmEnd = parmRecs + slot->m_readBufSize;
|
|
|
|
log("parms: got parm update request. size=%" INT32 ".",
|
|
(int32_t)(parmEnd-parmRecs));
|
|
|
|
// make a new waiting entry
|
|
WaitEntry *we ;
|
|
we = (WaitEntry *) mmalloc ( sizeof(WaitEntry),"weparm");
|
|
if ( ! we ) {
|
|
g_udpServer.sendErrorReply(slot,g_errno,60);
|
|
return;
|
|
}
|
|
we->m_slot = slot;
|
|
we->m_callback = handleRequest3fLoop;
|
|
we->m_parmPtr = parmRecs;
|
|
we->m_parmEnd = parmEnd;
|
|
we->m_errno = 0;
|
|
we->m_doRebuilds = false;
|
|
we->m_rebuildActiveList = false;
|
|
we->m_updatedRound = false;
|
|
we->m_doProxyRebuild = false;
|
|
we->m_collnum = -1;
|
|
we->m_sentReply = 0;
|
|
|
|
handleRequest3fLoop ( we );
|
|
}
|
|
|
|
|
|
|
|
|
|
////
|
|
//
|
|
// functions for syncing parms with host #0
|
|
//
|
|
////
|
|
|
|
// 1. we do not accept any recs into rdbs until in sync with host #0
|
|
// 2. at startup we send the hash of all parms for each collrec and
|
|
// for g_conf (collnum -1) to host #0, then he will send us all the
|
|
// parms for a collrec (or g_conf) if we are out of sync.
|
|
// 3. when host #0 changes a parm it lets everyone know via broadcastParmList()
|
|
// 4. only host #0 may initiate parm changes. so don't let that go down!
|
|
// 5. once in sync a host can drop recs for collnums that are invalid
|
|
// 6. until in parm sync with host #0 reject adds to collnums we don't
|
|
// have with ETRYAGAIN in Msg4.cpp
|
|
|
|
|
|
void tryToSyncWrapper ( int fd , void *state ) {
|
|
g_parms.syncParmsWithHost0();
|
|
}
|
|
|
|
// host #0 just sends back an empty reply, but it will hit us with
|
|
// 0x3f parmlist requests. that way it uses the same mechanism and can
|
|
// guarantee ordering of the parm update requests
|
|
void gotReplyFromHost0Wrapper ( void *state , UdpSlot *slot ) {
|
|
// ignore his reply unless error?
|
|
if ( g_errno ) {
|
|
log("parms: got error syncing with host 0: %s. Retrying.",
|
|
mstrerror(g_errno));
|
|
// re-try it!
|
|
g_parms.m_triedToSync = false;
|
|
}
|
|
else {
|
|
log("parms: synced with host #0");
|
|
// do not re-call
|
|
g_loop.unregisterSleepCallback(NULL,tryToSyncWrapper);
|
|
}
|
|
|
|
g_errno = 0;
|
|
}
|
|
|
|
// returns false and sets g_errno on error, true otherwise
|
|
bool Parms::syncParmsWithHost0 ( ) {
|
|
|
|
if ( m_triedToSync ) return true;
|
|
|
|
m_triedToSync = true;
|
|
|
|
m_inSyncWithHost0 = false;
|
|
|
|
// dont sync with ourselves
|
|
if ( g_hostdb.m_hostId == 0 ) {
|
|
m_inSyncWithHost0 = true;
|
|
return true;
|
|
}
|
|
|
|
// only grunts for now can sync, not proxies, so stop if we are proxy
|
|
if ( g_hostdb.m_myHost->m_type != HT_GRUNT ) {
|
|
m_inSyncWithHost0 = true;
|
|
return true;
|
|
}
|
|
|
|
|
|
SafeBuf hashList;
|
|
|
|
if ( ! makeSyncHashList ( &hashList ) ) return false;
|
|
|
|
// copy for sending
|
|
SafeBuf sendBuf;
|
|
if ( ! sendBuf.safeMemcpy ( &hashList ) ) return false;
|
|
if ( sendBuf.getCapacity() != hashList.length() ){char *xx=NULL;*xx=0;}
|
|
if ( sendBuf.length() != hashList.length() ){char *xx=NULL;*xx=0;}
|
|
|
|
// allow udpserver to free it
|
|
char *request = sendBuf.getBufStart();
|
|
int32_t requestLen = sendBuf.length();
|
|
sendBuf.detachBuf();
|
|
|
|
Host *h = g_hostdb.getHost(0);
|
|
|
|
log("parms: trying to sync with host #0");
|
|
|
|
// . send it off. use 3e i guess
|
|
// . host #0 will reply using msg4 really
|
|
// . msg4 guarantees ordering of requests
|
|
// . there will be a record that is CMD_INSYNC so when we get
|
|
// that we set g_parms.m_inSyncWithHost0 to true
|
|
if ( ! g_udpServer.sendRequest ( request ,//hashList.getBufStart() ,
|
|
requestLen, //hashList.length() ,
|
|
0x3e , // msgtype
|
|
h->m_ip, // ip
|
|
h->m_port, // port
|
|
h->m_hostId , // hostid , host #0!!!
|
|
NULL, // retslot
|
|
NULL , // state
|
|
gotReplyFromHost0Wrapper ,
|
|
99999999 ) ) { // timeout in secs
|
|
log("parms: error syncing with host 0: %s",mstrerror(g_errno));
|
|
return false;
|
|
}
|
|
|
|
// wait now
|
|
return true;
|
|
}
|
|
|
|
// . here host #0 is receiving a sync request from another host
|
|
// . host #0 scans this list of hashes to make sure the requesting host is
|
|
// in sync
|
|
// . host #0 will broadcast parm updates by calling broadcastParmList() which
|
|
// uses 0x3f, so this just returns and empty reply on success
|
|
// . sends CMD "addcoll" and "delcoll" cmd parms as well
|
|
// . include an "insync" command parm as last parm
|
|
void handleRequest3e ( UdpSlot *slot , int32_t niceness ) {
|
|
|
|
// right now we must be host #0
|
|
if ( g_hostdb.m_hostId != 0 ) {
|
|
g_errno = EBADENGINEER;
|
|
hadError:
|
|
g_udpServer.sendErrorReply(slot,g_errno,60);
|
|
return;
|
|
}
|
|
|
|
//
|
|
// 0. scan our collections and clear a flag
|
|
//
|
|
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
|
// skip if empty
|
|
CollectionRec *cr = g_collectiondb.m_recs[i];
|
|
if ( ! cr ) continue;
|
|
// clear flag
|
|
cr->m_hackFlag = 0;
|
|
}
|
|
|
|
Host *host = slot->m_host;
|
|
int32_t hostId = -1;
|
|
if ( host ) hostId = host->m_hostId;
|
|
|
|
SafeBuf replyBuf;
|
|
|
|
//
|
|
// 1. update parms on collections we both have
|
|
// 2. tell him to delete collections we do not have but he does
|
|
//
|
|
SafeBuf tmp;
|
|
char *p = slot->m_readBuf;
|
|
char *pend = p + slot->m_readBufSize;
|
|
for ( ; p < pend ; ) {
|
|
// get collnum
|
|
collnum_t c = *(collnum_t *)p;
|
|
p += sizeof(collnum_t);
|
|
// then coll NAME hash
|
|
uint32_t collNameHash32 = *(int32_t *)p;
|
|
p += 4;
|
|
// sanity check. -1 means g_conf. i guess.
|
|
if ( c < -1 ) { char *xx=NULL;*xx=0; }
|
|
// and parm hash
|
|
int64_t h64 = *(int64_t *)p;
|
|
p += 8;
|
|
// if we being host #0 do not have this collnum tell
|
|
// him to delete it!
|
|
CollectionRec *cr = NULL;
|
|
if ( c >= 0 ) cr = g_collectiondb.getRec ( c );
|
|
|
|
// if collection names are different delete it
|
|
if ( cr && collNameHash32 != hash32n ( cr->m_coll ) ) {
|
|
log("sync: host had collnum %i but wrong name, "
|
|
"name not %s like it should be",(int)c,cr->m_coll);
|
|
cr = NULL;
|
|
}
|
|
|
|
if ( c >= 0 && ! cr ) {
|
|
// note in log
|
|
logf(LOG_INFO,"sync: telling host #%" INT32 " to delete "
|
|
"collnum %" INT32 "", hostId,(int32_t)c);
|
|
// add the parm rec as a parm cmd
|
|
if (! g_parms.addNewParmToList1( &replyBuf,
|
|
c,
|
|
NULL,
|
|
-1,
|
|
"delete"))
|
|
goto hadError;
|
|
// ok, get next collection hash
|
|
continue;
|
|
}
|
|
// set our hack flag so we know he has this collection
|
|
if ( cr ) cr->m_hackFlag = 1;
|
|
// get our parmlist for that collnum
|
|
tmp.reset();
|
|
// c is -1 for g_conf
|
|
if ( ! g_parms.addAllParmsToList ( &tmp, c ) ) goto hadError;
|
|
// get checksum of that
|
|
int64_t m64 = hash64 ( tmp.getBufStart(),tmp.length() );
|
|
// if match, keep chugging, that's in sync
|
|
if ( h64 == m64 ) continue;
|
|
// note in log
|
|
logf(LOG_INFO,"sync: sending all parms for collnum %" INT32 " "
|
|
"to host #%" INT32 "", (int32_t)c, hostId);
|
|
// otherwise, send him the list
|
|
if ( ! replyBuf.safeMemcpy ( &tmp ) ) goto hadError;
|
|
}
|
|
|
|
//
|
|
// 3. now if he's missing one of our collections tell him to add it
|
|
//
|
|
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
|
// skip if empty
|
|
CollectionRec *cr = g_collectiondb.m_recs[i];
|
|
if ( ! cr ) continue;
|
|
// clear flag
|
|
if ( cr->m_hackFlag ) continue;
|
|
//char *cmdStr = "addColl";
|
|
// now use lowercase, not camelcase
|
|
char *cmdStr = "addcoll";
|
|
if ( cr->m_isCustomCrawl == 1 ) cmdStr = "addCrawl";
|
|
if ( cr->m_isCustomCrawl == 2 ) cmdStr = "addBulk";
|
|
// note in log
|
|
logf(LOG_INFO,"sync: telling host #%" INT32 " to add "
|
|
"collnum %" INT32 " coll=%s", hostId,(int32_t)cr->m_collnum,
|
|
cr->m_coll);
|
|
// add the parm rec as a parm cmd
|
|
if ( ! g_parms.addNewParmToList1 ( &replyBuf,
|
|
(collnum_t)i,
|
|
cr->m_coll, // parm val
|
|
-1,
|
|
cmdStr ) )
|
|
goto hadError;
|
|
// and the parmlist for it
|
|
if (!g_parms.addAllParmsToList (&replyBuf, i ) ) goto hadError;
|
|
}
|
|
|
|
// . final parm is the in sync stamp of approval which will set
|
|
// g_parms.m_inSyncWithHost0 to true. CommandInSync()
|
|
// . use -1 for collnum for this cmd
|
|
if ( ! g_parms.addNewParmToList1 ( &replyBuf,-1,NULL,-1,"insync"))
|
|
goto hadError;
|
|
|
|
// this should at least have the in sync command
|
|
log("parms: sending %" INT32 " bytes of parms to sync to host #%" INT32 "",
|
|
replyBuf.length(),hostId);
|
|
|
|
// . use the broadcast call here so things keep their order!
|
|
// . we do not need a callback when they have been completely
|
|
// broadcasted to all hosts so use NULL for that
|
|
// . crap, we only want to send this to host #x ...
|
|
g_parms.broadcastParmList ( &replyBuf , NULL , NULL ,
|
|
true , // sendToGrunts?
|
|
false , // sendToProxies?
|
|
hostId );
|
|
|
|
// but do send back an empty reply to this 0x3e request
|
|
g_udpServer.sendReply_ass ( NULL,0,NULL,0,slot);
|
|
|
|
// send that back now
|
|
//g_udpServer.sendReply_ass ( replyBuf.getBufStart() ,
|
|
// replyBuf.length() ,
|
|
// replyBuf.getBufStart() ,
|
|
// replyBuf.getCapacity() ,
|
|
// slot );
|
|
// udpserver will free it
|
|
//replyBuf.detachBuf();
|
|
}
|
|
|
|
|
|
// get the hash of every collection's parmlist
|
|
bool Parms::makeSyncHashList ( SafeBuf *hashList ) {
|
|
SafeBuf tmp;
|
|
|
|
// first do g_conf, collnum -1!
|
|
for ( int32_t i = -1 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
|
// shortcut
|
|
CollectionRec *cr = NULL;
|
|
if ( i >= 0 ) cr = g_collectiondb.m_recs[i];
|
|
// skip if empty
|
|
if ( i >=0 && ! cr ) continue;
|
|
// clear since last time
|
|
tmp.reset();
|
|
// g_conf? if i is -1 do g_conf
|
|
if ( ! addAllParmsToList ( &tmp , i ) )
|
|
return false;
|
|
// store collnum first as 4 bytes
|
|
if ( ! hashList->safeMemcpy ( &i , sizeof(collnum_t) ) )
|
|
return false;
|
|
// then store the collection name hash, 32 bit hash
|
|
uint32_t collNameHash32 = 0;
|
|
if ( cr ) collNameHash32 = hash32n ( cr->m_coll );
|
|
if ( ! hashList->safeMemcpy ( &collNameHash32, 4 ) )
|
|
return false;
|
|
// hash the parms
|
|
int64_t h64 = hash64 ( tmp.getBufStart(),tmp.length() );
|
|
// and store it
|
|
if ( ! hashList->pushLongLong ( h64 ) )
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
int32_t Parm::getNumInArray ( collnum_t collnum ) {
|
|
char *obj = (char *)&g_conf;
|
|
if ( m_obj == OBJ_COLL ) {
|
|
CollectionRec *cr = g_collectiondb.getRec ( collnum );
|
|
if ( ! cr ) return -1;
|
|
obj = (char *)cr;
|
|
}
|
|
// # in array is before it
|
|
return *(int32_t *)(obj+m_off-4);
|
|
}
|
|
|
|
// . we use this for syncing parms between hosts
|
|
// . called by convertAllCollRecsToParmList
|
|
// . returns false and sets g_errno on error
|
|
// . "rec" can be CollectionRec or g_conf ptr
|
|
bool Parms::addAllParmsToList ( SafeBuf *parmList, collnum_t collnum ) {
|
|
|
|
// loop over parms
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
// get it
|
|
Parm *parm = &m_parms[i];
|
|
// skip comments
|
|
if ( parm->m_type == TYPE_COMMENT ) continue;
|
|
if ( parm->m_type == TYPE_FILEUPLOADBUTTON ) continue;
|
|
// cmds
|
|
if ( parm->m_type == TYPE_CMD ) continue;
|
|
if ( parm->m_type == TYPE_BOOL2 ) continue;
|
|
|
|
// daily merge last started. do not sync this...
|
|
if ( parm->m_type == TYPE_LONG_CONST ) continue;
|
|
|
|
if ( collnum == -1 && parm->m_obj != OBJ_CONF ) continue;
|
|
if ( collnum >= 0 && parm->m_obj != OBJ_COLL ) continue;
|
|
if ( collnum < -1 ) { char *xx=NULL;*xx=0; }
|
|
|
|
// like 'statsdb max cache mem' etc.
|
|
if ( parm->m_flags & PF_NOSYNC ) continue;
|
|
|
|
// sanity, need cgi hash to look up the parm on the
|
|
// receiving end
|
|
if ( parm->m_cgiHash == 0 ) {
|
|
log("parms: no cgi for parm %s",parm->m_title);
|
|
char *xx=NULL; *xx=0;
|
|
}
|
|
|
|
int32_t occNum = -1;
|
|
int32_t maxOccNum = 0;
|
|
|
|
if ( parm->isArray() ) {
|
|
maxOccNum = parm->getNumInArray(collnum) ;
|
|
occNum = 0;
|
|
}
|
|
|
|
for ( ; occNum < maxOccNum ; occNum ++ ) {
|
|
// add each occ # to list
|
|
if ( ! addCurrentParmToList2 ( parmList ,
|
|
collnum ,
|
|
occNum ,
|
|
parm ) )
|
|
return false;
|
|
/*
|
|
//
|
|
// use this to debug parm list checksums being off
|
|
//
|
|
int64_t h64 ;
|
|
h64 = hash64 ( parmList->getBufStart(),
|
|
parmList->length() );
|
|
// note it for debugging hash
|
|
SafeBuf xb;
|
|
parm->printVal ( &xb ,collnum,occNum);
|
|
log("parms: adding (h=%" XINT64 ") parm %s = %s",
|
|
h64,parm->m_title,xb.getBufStart());
|
|
*/
|
|
}
|
|
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void resetImportLoopFlag () ;
|
|
|
|
// . this adds the key if not a cmd key to parmdb rdbtree
|
|
// . this executes cmds
|
|
// . this updates the CollectionRec which may disappear later and be fully
|
|
// replaced by Parmdb, just an RdbTree really.
|
|
// . returns false if blocked
|
|
// . returns true and sets g_errno on error
|
|
bool Parms::updateParm ( char *rec , WaitEntry *we ) {
|
|
|
|
collnum_t collnum = getCollnumFromParmRec ( rec );
|
|
|
|
g_errno = 0;
|
|
|
|
Parm *parm = getParmFromParmRec ( rec );
|
|
|
|
if ( ! parm ) {
|
|
log("parmdb: could not find parm for rec");
|
|
g_errno = EBADENGINEER;
|
|
return true;
|
|
}
|
|
|
|
// cmd to execute?
|
|
if ( parm->m_type == TYPE_CMD ||
|
|
// sitelist is a safebuf but it requires special deduping
|
|
// logic to update it so it uses CommandUpdateSiteList() to
|
|
// do the updating
|
|
parm->m_func ) {
|
|
// all parm rec data for TYPE_CMD should be ascii/utf8 chars
|
|
// and should be \0 terminated
|
|
char *data = getDataFromParmRec ( rec );
|
|
int32_t dataSize = getDataSizeFromParmRec ( rec );
|
|
if ( dataSize == 0 ) data = NULL;
|
|
log("parmdb: running function for "
|
|
"parm \"%s\" (collnum=%" INT32 ") args=\"%s\""
|
|
, parm->m_title
|
|
, (int32_t)collnum
|
|
, data
|
|
);
|
|
|
|
// sets g_errno on error
|
|
if ( parm->m_func ) {
|
|
parm->m_func ( rec );
|
|
return true;
|
|
}
|
|
|
|
// fix core from using "roundstart=1" on non-existent coll
|
|
if ( ! parm->m_func2 ) {
|
|
return true;
|
|
}
|
|
|
|
// . returns true and sets g_errno on error
|
|
// . returns false if blocked
|
|
// . this is for CommandDeleteColl() and CommandResetColl()
|
|
if ( parm->m_func2 ( rec , we ) ) return true;
|
|
|
|
// . it did not complete.
|
|
// . we need to re-call it using sleep wrapper above
|
|
return false;
|
|
}
|
|
|
|
// "cr" will remain null when updating g_conf and collnum -1
|
|
CollectionRec *cr = NULL;
|
|
if ( collnum >= 0 ) {
|
|
cr = g_collectiondb.getRec ( collnum );
|
|
if ( ! cr ) {
|
|
char *ps = "unknown parm";
|
|
if ( parm ) ps = parm->m_title;
|
|
log("parmdb: invalid collnum %" INT32 " for parm \"%s\"",
|
|
(int32_t)collnum,ps);
|
|
g_errno = ENOCOLLREC;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
// what are we updating?
|
|
void *base = NULL;
|
|
|
|
// we might have a collnum specified even if parm is global,
|
|
// maybe there are some collection/local parms specified as well
|
|
// that that collnum applies to
|
|
if ( parm->m_obj == OBJ_COLL ) base = cr;
|
|
else base = &g_conf;
|
|
|
|
if ( ! base ) {
|
|
log("parms: no collrec (%" INT32 ") to change parm",(int32_t)collnum);
|
|
g_errno = ENOCOLLREC;
|
|
return true;
|
|
}
|
|
|
|
int32_t occNum = getOccNumFromParmRec ( rec );
|
|
|
|
// get data
|
|
int32_t dataSize = *(int32_t *)(rec+sizeof(key96_t));
|
|
char *data = rec+sizeof(key96_t)+4;
|
|
|
|
// point to where to copy the data into collrect
|
|
char *dst = (char *)base + parm->m_off;
|
|
// point to count in case it is an array
|
|
int32_t *countPtr = NULL;
|
|
// array?
|
|
if ( parm->isArray() ) {
|
|
if ( occNum < 0 ) {
|
|
log("parms: bad occnum for %s",parm->m_title);
|
|
return false;
|
|
}
|
|
// point to count in case it is an array
|
|
countPtr = (int32_t *)(dst - 4);
|
|
// now point "dst" to the occNum-th element
|
|
dst += parm->m_size * occNum;
|
|
}
|
|
|
|
//
|
|
// compare parm to see if it changed value
|
|
//
|
|
SafeBuf val1;
|
|
parm->printVal ( &val1 , collnum , occNum );
|
|
|
|
// if parm is a safebuf...
|
|
if ( parm->m_type == TYPE_SAFEBUF ) {
|
|
// point to it
|
|
SafeBuf *sb = (SafeBuf *)dst;
|
|
// nuke it
|
|
sb->purge();
|
|
// require that the \0 be part of the update i guess
|
|
//if ( ! data || dataSize <= 0 ) { char *xx=NULL;*xx=0; }
|
|
// check for \0
|
|
if ( data && dataSize > 0 ) {
|
|
if ( data[dataSize-1] != '\0') { char *xx=NULL;*xx=0;}
|
|
// this means that we can not use string POINTERS as
|
|
// parms!! don't include \0 as part of length
|
|
sb->safeStrcpy ( data ); // , dataSize );
|
|
// ensure null terminated
|
|
sb->nullTerm();
|
|
sb->setLabel("parm2");
|
|
}
|
|
//return true;
|
|
// sanity
|
|
// we no longer include the \0 in the dataSize...so a dataSize
|
|
// of 0 means empty string...
|
|
//if ( data[dataSize-1] != '\0' ) { char *xx=NULL;*xx=0; }
|
|
}
|
|
else {
|
|
// and copy the data into collrec or g_conf
|
|
gbmemcpy ( dst , data , dataSize );
|
|
}
|
|
|
|
SafeBuf val2;
|
|
parm->printVal ( &val2 , collnum , occNum );
|
|
|
|
// did this parm change value?
|
|
bool changed = true;
|
|
if ( strcmp ( val1.getBufStart() , val2.getBufStart() ) == 0 )
|
|
changed = false;
|
|
|
|
// . update array count if necessary
|
|
// . parm might not have changed value based on what was in there
|
|
// by default, but for PAGE_FILTERS the default value in the row
|
|
// for this parm might have been zero! so we gotta update its
|
|
// "count" in that scenario even though the parm val was unchanged.
|
|
if ( parm->isArray() ) {
|
|
// the int32_t before the array is the # of elements
|
|
int32_t currentCount = *countPtr;
|
|
// update our # elements in our array if this is bigger
|
|
int32_t newCount = occNum + 1;
|
|
bool updateCount = false;
|
|
if ( newCount > currentCount ) updateCount = true;
|
|
// do not update counts if we are url filters
|
|
// and we are currently >= the expression count. we have
|
|
// to have a non-empty expression at the end in order to
|
|
// add the expression. this prevents the empty line from
|
|
// being added!
|
|
if ( parm->m_page == PAGE_FILTERS &&
|
|
cr->m_regExs[occNum].getLength() == 0 )
|
|
updateCount = false;
|
|
// and for other pages, like master ips, skip if empty!
|
|
// PAGE_PASSWORDS, PAGE_MASTERPASSWORDS, ...
|
|
if ( parm->m_page != PAGE_FILTERS && ! changed )
|
|
updateCount = false;
|
|
|
|
// ok, increment the array count of items in the array
|
|
if ( updateCount )
|
|
*countPtr = newCount;
|
|
}
|
|
|
|
// all done if value was unchanged
|
|
if ( ! changed )
|
|
return true;
|
|
|
|
// show it
|
|
log("parms: updating parm \"%s\" "
|
|
"(%s[%" INT32 "]) (collnum=%" INT32 ") from \"%s\" -> \"%s\"",
|
|
parm->m_title,
|
|
parm->m_cgi,
|
|
occNum,
|
|
(int32_t)collnum,
|
|
val1.getBufStart(),
|
|
val2.getBufStart());
|
|
|
|
if ( cr ) cr->m_needsSave = true;
|
|
|
|
// HACK #2
|
|
if ( base == cr && dst == (char *)&cr->m_importEnabled )
|
|
resetImportLoopFlag();
|
|
|
|
//
|
|
// HACK
|
|
//
|
|
// special hack. if spidering re-enabled then reset last spider
|
|
// attempt time to 0 to avoid the "has no more urls to spider"
|
|
// msg followed by the reviving url msg.
|
|
if ( base == cr && dst == (char *)&cr->m_spideringEnabled )
|
|
cr->m_localCrawlInfo.m_lastSpiderAttempt = 0;
|
|
if ( base == &g_conf && dst == (char *)&g_conf.m_spideringEnabled ){
|
|
for(int32_t i = 0;i<g_collectiondb.m_numRecs;i++){
|
|
CollectionRec *cr = g_collectiondb.m_recs[i];
|
|
if ( ! cr ) continue;
|
|
cr->m_localCrawlInfo.m_lastSpiderAttempt = 0;
|
|
}
|
|
}
|
|
|
|
//
|
|
// if user changed the crawl/process max then reset here so
|
|
// spiders will resume
|
|
//
|
|
if ( base == cr &&
|
|
dst == (char *)&cr->m_maxToCrawl &&
|
|
cr->m_spiderStatus == SP_MAXTOCRAWL ) {
|
|
// reset this for rebuilding of active spider collections
|
|
// so this collection can be in the linked list again
|
|
cr->m_spiderStatus = SP_INPROGRESS;
|
|
// rebuild list of active spider collections then
|
|
g_spiderLoop.m_activeListValid = false;
|
|
}
|
|
|
|
if ( base == cr &&
|
|
dst == (char *)&cr->m_maxToProcess &&
|
|
cr->m_spiderStatus == SP_MAXTOPROCESS ) {
|
|
// reset this for rebuilding of active spider collections
|
|
// so this collection can be in the linked list again
|
|
cr->m_spiderStatus = SP_INPROGRESS;
|
|
// rebuild list of active spider collections then
|
|
g_spiderLoop.m_activeListValid = false;
|
|
}
|
|
|
|
if ( base == cr &&
|
|
dst == (char *)&cr->m_maxCrawlRounds &&
|
|
cr->m_spiderStatus == SP_MAXROUNDS ) {
|
|
// reset this for rebuilding of active spider collections
|
|
// so this collection can be in the linked list again
|
|
cr->m_spiderStatus = SP_INPROGRESS;
|
|
// rebuild list of active spider collections then
|
|
g_spiderLoop.m_activeListValid = false;
|
|
}
|
|
|
|
//
|
|
// END HACK
|
|
//
|
|
|
|
// all done
|
|
return true;
|
|
}
|
|
|
|
bool Parm::printVal ( SafeBuf *sb , collnum_t collnum , int32_t occNum ) {
|
|
|
|
CollectionRec *cr = NULL;
|
|
if ( collnum >= 0 ) cr = g_collectiondb.getRec ( collnum );
|
|
|
|
// no value if no storage record offset
|
|
//if ( m_off < 0 ) return true;
|
|
|
|
char *base;
|
|
if ( m_obj == OBJ_COLL ) base = (char *)cr;
|
|
else base = (char *)&g_conf;
|
|
|
|
if ( ! base ) {
|
|
log("parms: no collrec (%" INT32 ") to change parm",(int32_t)collnum);
|
|
g_errno = ENOCOLLREC;
|
|
return true;
|
|
}
|
|
|
|
// point to where to copy the data into collrect
|
|
char *val = (char *)base + m_off;
|
|
|
|
if ( isArray() && occNum < 0 ) {
|
|
log("parms: bad occnum for %s",m_title);
|
|
return false;
|
|
}
|
|
|
|
// add array index to ptr
|
|
if ( isArray() ) val += m_size * occNum;
|
|
|
|
|
|
if ( m_type == TYPE_SAFEBUF ) {
|
|
// point to it
|
|
SafeBuf *sb2 = (SafeBuf *)val;
|
|
return sb->safePrintf("%s",sb2->getBufStart());
|
|
}
|
|
|
|
if ( m_type == TYPE_STRING ||
|
|
m_type == TYPE_STRINGBOX ||
|
|
m_type == TYPE_SAFEBUF ||
|
|
m_type == TYPE_STRINGNONEMPTY )
|
|
return sb->safePrintf("%s",val);
|
|
|
|
if ( m_type == TYPE_LONG || m_type == TYPE_LONG_CONST )
|
|
return sb->safePrintf("%" INT32 "",*(int32_t *)val);
|
|
|
|
if ( m_type == TYPE_DATE )
|
|
return sb->safePrintf("%" INT32 "",*(int32_t *)val);
|
|
|
|
if ( m_type == TYPE_DATE2 )
|
|
return sb->safePrintf("%" INT32 "",*(int32_t *)val);
|
|
|
|
if ( m_type == TYPE_FLOAT )
|
|
return sb->safePrintf("%f",*(float *)val);
|
|
|
|
if ( m_type == TYPE_LONG_LONG )
|
|
return sb->safePrintf("%" INT64 "",*(int64_t *)val);
|
|
|
|
if ( m_type == TYPE_CHARPTR ) {
|
|
if ( val ) return sb->safePrintf("%s",val);
|
|
return true;
|
|
}
|
|
|
|
if ( m_type == TYPE_BOOL ||
|
|
m_type == TYPE_BOOL2 ||
|
|
m_type == TYPE_CHECKBOX ||
|
|
m_type == TYPE_PRIORITY2 ||
|
|
m_type == TYPE_UFP ||
|
|
m_type == TYPE_CHAR )
|
|
return sb->safePrintf("%hhx",*val);
|
|
|
|
if ( m_type == TYPE_CMD )
|
|
return sb->safePrintf("CMD");
|
|
|
|
if ( m_type == TYPE_IP )
|
|
// may print 0.0.0.0
|
|
return sb->safePrintf("%s",iptoa(*(int32_t *)val) );
|
|
|
|
log("parms: missing parm type!!");
|
|
|
|
char *xx=NULL;*xx=0;
|
|
return false;
|
|
}
|
|
|
|
bool printUrlExpressionExamples ( SafeBuf *sb ) {
|
|
|
|
/*
|
|
CollectionRec *cr = (CollectionRec *)THIS;
|
|
// if testUrl is provided, find in the table
|
|
char testUrl [ 1025 ];
|
|
char *tt = r->getString ( "qatest123" , NULL );
|
|
testUrl[0]='\0';
|
|
if ( tt ) strncpy ( testUrl , tt , 1024 );
|
|
char *tu = testUrl;
|
|
if ( ! tu ) tu = "";
|
|
char matchString[12];
|
|
matchString[0] = '\0';
|
|
if ( testUrl[0] ) {
|
|
Url u;
|
|
u.set ( testUrl , gbstrlen(testUrl) );
|
|
//since we don't know the doc's quality, sfn, or
|
|
//other stuff, just give default values
|
|
int32_t n = cr->getRegExpNum ( &u ,
|
|
false , // links2gb?
|
|
false , // searchboxToGB
|
|
false , // onsite?
|
|
-1 , // docQuality
|
|
-1 , // hopCount
|
|
false , // siteInDmoz?
|
|
//-1 , // ruleset #
|
|
-1 , // langId
|
|
-1 , // parent priority
|
|
0 , // niceness
|
|
NULL , // tagRec
|
|
false , // isRSS?
|
|
false , // isPermalink?
|
|
false , // new outlink?
|
|
-1 , // age
|
|
NULL , // LinkInfo
|
|
NULL , // parentUrl
|
|
-1 , // priority
|
|
false , // isAddUrl
|
|
false , // parentRSS?
|
|
false , // parentIsNew?
|
|
false , // parentIsPermlnk
|
|
false );// isIndexed?
|
|
if ( n == -1 ) sprintf ( matchString , "default" );
|
|
else sprintf ( matchString, "%" INT32 "", n+1 );
|
|
}
|
|
// test table
|
|
sb.safePrintf (
|
|
//"</form><form method=get action=/cgi/14.cgi>"
|
|
//"<input type=hidden name="
|
|
"<table width=100%% cellpadding=4 border=1 "
|
|
"bgcolor=#%s>"
|
|
"<tr><td colspan=2 bgcolor=#%s><center>"
|
|
//"<font size=+1>"
|
|
"<b>"
|
|
"URL Filters Test</b>"
|
|
//"</font>"
|
|
"</td></tr>"
|
|
"<tr><td colspan=2>"
|
|
"<font size=1>"
|
|
"To test your URL filters simply enter a URL into "
|
|
"this box and submit it. The URL filter line number "
|
|
"that it matches will be displayed to the right."
|
|
"</font>"
|
|
"</td></tr>"
|
|
"<tr>"
|
|
"<td><b>Test URL</b></td>"
|
|
"<td><b>Matching Expression #</b></td>"
|
|
"</tr>"
|
|
"<tr>"
|
|
"<td><input type=text size=55 value=\"%s\" "
|
|
"name=test> "
|
|
"<input type=submit name=action value=test></td>"
|
|
"<td>%s</td></tr></table><br><br>\n" ,
|
|
LIGHT_BLUE , DARK_BLUE , testUrl , matchString );
|
|
*/
|
|
|
|
sb->safePrintf(
|
|
"<style>"
|
|
".poo { background-color:#%s;}\n"
|
|
"</style>\n" ,
|
|
LIGHT_BLUE );
|
|
|
|
sb->safePrintf (
|
|
"<table %s>"
|
|
"<tr><td colspan=2><center>"
|
|
"<b>"
|
|
"Supported Expressions</b>"
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>default</td>"
|
|
"<td>Matches every url."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>^http://whatever</td>"
|
|
"<td>Matches if the url begins with "
|
|
"<i>http://whatever</i>"
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>$.css</td>"
|
|
"<td>Matches if the url ends with \".css\"."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>foobar</td>"
|
|
"<td>Matches if the url CONTAINS <i>foobar</i>."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>tld==uk,jp</td>"
|
|
"<td>Matches if url's TLD ends in \"uk\" or \"jp\"."
|
|
"</td></tr>"
|
|
|
|
/*
|
|
"<tr class=poo><td>doc:quality<40</td>"
|
|
"<td>Matches if document quality is "
|
|
"less than 40. Can be used for assigning to spider "
|
|
"priority.</td></tr>"
|
|
|
|
"<tr class=poo><td>doc:quality<40 && tag:ruleset==22</td>"
|
|
"<td>Matches if document quality less than 40 and "
|
|
"belongs to ruleset 22. Only for assigning to "
|
|
"spider priority.</td></tr>"
|
|
|
|
"<tr class=poo><td><nobr>"
|
|
"doc:quality<40 && tag:manualban==1</nobr></td>"
|
|
"<td>Matches if document quality less than 40 and "
|
|
"is has a value of \"1\" for its \"manualban\" "
|
|
"tag.</td></tr>"
|
|
|
|
"<tr class=poo><td>tag:ruleset==33 && doc:quality<40</td>"
|
|
"<td>Matches if document quality less than 40 and "
|
|
"belongs to ruleset 33. Only for assigning to "
|
|
"spider priority or a banned ruleset.</td></tr>"
|
|
*/
|
|
|
|
"<tr class=poo><td><a name=hopcount></a>"
|
|
"hopcount<4 && iswww</td>"
|
|
"<td>Matches if document has a hop count of 4, and "
|
|
"is a \"www\" url (or domain-only url).</td></tr>"
|
|
|
|
"<tr class=poo><td>hopcount</td>"
|
|
"<td>All root urls, those that have only a single "
|
|
"slash for their path, and no cgi parms, have a "
|
|
"hop count of 0. Also, all RSS urls, ping "
|
|
"server urls and site roots (as defined in the "
|
|
"site rules table) have a hop count of 0. Their "
|
|
"outlinks have a hop count of 1, and the outlinks "
|
|
"of those outlinks a hop count of 2, etc."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>sitepages</td>"
|
|
"<td>The number of pages that are currently indexed "
|
|
"for the subdomain of the URL. "
|
|
"Used for doing quotas."
|
|
"</td></tr>"
|
|
|
|
|
|
// MDW: 7/11/2014 take this out until it works.
|
|
// problem is that the quota table m_localTable
|
|
// in Spider.cpp gets reset for each firstIp scan,
|
|
// and we have a.walmart.com and b.walmart.com
|
|
// with different first ips even though on same
|
|
// domain. perhaps we should use the domain as the
|
|
// key to getting the firstip for and subdomain.
|
|
// but out whole selection algo in spider.cpp is
|
|
// firstIp based, so it scans all the spiderrequests
|
|
// from a single firstip to get the winner for that
|
|
// firstip.
|
|
|
|
|
|
// "<tr class=poo><td>domainpages</td>"
|
|
// "<td>The number of pages that are currently indexed "
|
|
// "for the domain of the URL. "
|
|
// "Used for doing quotas."
|
|
// "</td></tr>"
|
|
|
|
"<tr class=poo><td>siteadds</td>"
|
|
"<td>The number URLs manually added to the "
|
|
"subdomain of the URL. Used to gauge a subdomain's "
|
|
"popularity."
|
|
"</td></tr>"
|
|
|
|
// taken out for the same reason as domainpages
|
|
// above was taken out. see expanation up there.
|
|
// "<tr class=poo><td>domainadds</td>"
|
|
// "<td>The number URLs manually added to the "
|
|
// "domain of the URL. Used to gauge a domain's "
|
|
// "popularity."
|
|
// "</td></tr>"
|
|
|
|
|
|
|
|
"<tr class=poo><td>isrss | !isrss</td>"
|
|
"<td>Matches if document is an RSS feed. Will "
|
|
"only match this rule if the document has been "
|
|
"successfully spidered before, because it requires "
|
|
"downloading the document content to see if it "
|
|
"truly is an RSS feed.."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>isrssext | !isrssext</td>"
|
|
"<td>Matches if url ends in .xml .rss or .atom. "
|
|
"TODO: Or if the link was in an "
|
|
"alternative link tag."
|
|
"</td></tr>"
|
|
|
|
//"<tr class=poo><td>!isrss</td>"
|
|
//"<td>Matches if document is NOT an rss feed."
|
|
//"</td></tr>"
|
|
|
|
"<tr class=poo><td>ispermalink | !ispermalink</td>"
|
|
"<td>Matches if document is a permalink. "
|
|
"When harvesting outlinks we <i>guess</i> if they "
|
|
"are a permalink by looking at the structure "
|
|
"of the url.</td></tr>"
|
|
|
|
//"<tr class=poo><td>!ispermalink</td>"
|
|
//"<td>Matches if document is NOT a permalink."
|
|
//"</td></tr>"
|
|
|
|
/*
|
|
"<tr class=poo><td>outlink | !outlink</td>"
|
|
"<td>"
|
|
"<b>This is true if url being added to spiderdb "
|
|
"is an outlink from the page being spidered. "
|
|
"Otherwise, the url being added to spiderdb "
|
|
"directly represents the page being spidered. It "
|
|
"is often VERY useful to partition the Spiderdb "
|
|
"records based on this criteria."
|
|
"</td></tr>"
|
|
*/
|
|
|
|
"<tr class=poo><td><nobr>isnewoutlink | !isnewoutlink"
|
|
"</nobr></td>"
|
|
"<td>"
|
|
"This is true since the outlink was not there "
|
|
"the last time we spidered the page we harvested "
|
|
"it from."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>hasreply | !hasreply</td>"
|
|
"<td>"
|
|
"This is true if we have tried to spider "
|
|
"this url, even if we got an error while trying."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>isnew | !isnew</td>"
|
|
"<td>"
|
|
"This is the opposite of hasreply above. A url "
|
|
"is new if it has no spider reply, including "
|
|
"error replies. So once a url has been attempted to "
|
|
"be spidered then this will be false even if there "
|
|
"was any kind of error."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>lastspidertime >= "
|
|
"<b>{roundstart}</b></td>"
|
|
"<td>"
|
|
"This is true if the url's last spidered time "
|
|
"indicates it was spidered already for this "
|
|
"current round of spidering. When no more urls "
|
|
"are available for spidering, then gigablast "
|
|
"automatically sets {roundstart} to the current "
|
|
"time so all the urls can be spidered again. This "
|
|
"is how you do round-based spidering. "
|
|
"You have to use the respider frequency as well "
|
|
"to adjust how often you want things respidered."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>urlage</td>"
|
|
"<td>"
|
|
"This is the time, in seconds, since a url was first "
|
|
"added to spiderdb to be spidered. This is "
|
|
"its discovery date. "
|
|
"Can use <, >, <=, >=, ==, != comparison operators."
|
|
"</td></tr>"
|
|
|
|
|
|
//"<tr class=poo><td>!newoutlink</td>"
|
|
//"<td>Matches if document is NOT a new outlink."
|
|
//"</td></tr>"
|
|
|
|
"<tr class=poo><td>age</td>"
|
|
"<td>"
|
|
"How old is the document <b>in seconds</b>. "
|
|
"The age is based on the publication date of "
|
|
"the document, which could also be the "
|
|
"time that the document was last significantly "
|
|
"modified. If this date is unknown then the age "
|
|
"will be -1 and only match the expression "
|
|
"<i>age==-1</i>. "
|
|
"When harvesting links, we guess the publication "
|
|
"date of the oulink by detecting dates contained "
|
|
"in the url itself, which is popular among some "
|
|
"forms of permalinks. This allows us to put "
|
|
"older permalinks into a slower spider queue."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>spiderwaited < 3600</td>"
|
|
"<td>"
|
|
"<i>spiderwaited</i> is how many seconds have elapsed "
|
|
"since the last time "
|
|
"we tried to spider/download the url. "
|
|
"The constaint containing <i>spiderwaited</i> will "
|
|
"fail to be matched if the url has never been "
|
|
"attempted to be spidered/downloaded before. Therefore, "
|
|
"it will only ever match urls that have a spider reply "
|
|
"of some sort, so there is no need to add an additional "
|
|
"<i>hasreply</i>-based constraint."
|
|
"</td></tr>"
|
|
|
|
|
|
"<tr class=poo><td>"
|
|
"<a name=insitelist>"
|
|
"insitelist | !insitelist"
|
|
"</a>"
|
|
"</td>"
|
|
"<td>"
|
|
"This is true if the url matches a pattern in "
|
|
"the list of sites on the <a href=/admin/sites>"
|
|
"site list</a> page. That site list is useful for "
|
|
"adding a large number of sites that can not be "
|
|
"accommodated by the url filters table. Plus "
|
|
"it is higher performance and easier to use, but "
|
|
"lacks the url filter table's "
|
|
"fine level of control."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>"
|
|
"<a name=isaddurl>"
|
|
"isaddurl | !isaddurl"
|
|
"</a>"
|
|
"</td>"
|
|
"<td>"
|
|
"This is true if the url was added from the add "
|
|
"url interface or API."
|
|
//"This replaces the add url priority "
|
|
//"parm."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>isinjected | !isinjected</td>"
|
|
"<td>"
|
|
"This is true if the url was directly "
|
|
"injected from the "
|
|
"<a href=/admin/inject>inject page</a> or API."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>isreindex | !isreindex</td>"
|
|
"<td>"
|
|
"This is true if the url was added from the "
|
|
"<a href=/admin/reindex>query reindex</a> "
|
|
"interface. The request does not contain "
|
|
"a url, but only a docid, that way we can add "
|
|
"millions of search results very quickly without "
|
|
"having to lookup each of their urls. You should "
|
|
"definitely have this if you use the reindexing "
|
|
"feature. "
|
|
"You can set max spiders to 0 "
|
|
"for non "
|
|
"isreindex requests while you reindex or delete "
|
|
"the results of a query for extra speed."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>ismanualadd | !ismanualadd</td>"
|
|
"<td>"
|
|
"This is true if the url was added manually. "
|
|
"Which means it matches isaddurl, isinjected, "
|
|
" or isreindex. as opposed to only "
|
|
"being discovered from the spider. "
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td><nobr>inpingserver | !inpingserver"
|
|
"</nobr></td>"
|
|
"<td>"
|
|
"This is true if the url has an inlink from "
|
|
"a recognized ping server. Ping server urls are "
|
|
"hard-coded in Url.cpp. <b><font color=red> "
|
|
"pingserver urls are assigned a hop count of 0"
|
|
"</font></b>"
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>isparentrss | !isparentrss</td>"
|
|
"<td>"
|
|
"If a parent of the URL was an RSS page "
|
|
"then this will be matched."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>isparentsitemap | "
|
|
"!isparentsitemap</td>"
|
|
"<td>"
|
|
"If a parent of the URL was a sitemap.xml page "
|
|
"then this will be matched."
|
|
"</td></tr>"
|
|
|
|
/*
|
|
"<tr class=poo><td>parentisnew | !parentisnew</td>"
|
|
"<td>"
|
|
"<b>Parent providing this outlink is not currently "
|
|
"in the index but is trying to be added right now. "
|
|
"</b>This is a special expression in that "
|
|
"it only applies to assigning spider priorities "
|
|
"to outlinks we are harvesting on a page.</b>"
|
|
"</td></tr>"
|
|
*/
|
|
|
|
"<tr class=poo><td>isindexed | !isindexed</td>"
|
|
"<td>"
|
|
"This url matches this if in the index already. "
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>errorcount==1</td>"
|
|
"<td>"
|
|
"The number of times the url has failed to "
|
|
"be indexed. 1 means just the last time, two means "
|
|
"the last two times. etc. Any kind of error parsing "
|
|
"the document (bad utf8, bad charset, etc.) "
|
|
"or any HTTP status error, like 404 or "
|
|
"505 is included in this count, in addition to "
|
|
"\"temporary\" errors like DNS timeouts."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>errorcode==32880</td>"
|
|
"<td>"
|
|
"If the last time it was spidered it had this "
|
|
"numeric error code. See the error codes in "
|
|
"Errno.cpp. In this particular example 32880 is "
|
|
"for EBADURL."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>hastmperror</td>"
|
|
"<td>"
|
|
"This is true if the last spider attempt resulted "
|
|
"in an error like EDNSTIMEDOUT or a similar error, "
|
|
"usually indicative of a temporary internet "
|
|
"failure, or local resource failure, like out of "
|
|
"memory, and should be retried soon. "
|
|
"Currently: "
|
|
"dns timed out, "
|
|
"tcp timed out, "
|
|
"dns dead, "
|
|
"network unreachable, "
|
|
"host unreachable, "
|
|
"diffbot internal error, "
|
|
"out of memory."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>percentchangedperday<=5</td>"
|
|
"<td>"
|
|
"Looks at how much a url's page content has changed "
|
|
"between the last two times it was spidered, and "
|
|
"divides that percentage by the number of days. "
|
|
"So if a URL's last two downloads were 10 days "
|
|
"apart and its page content changed 30%% then "
|
|
"the <i>percentchangedperday</i> will be 3. "
|
|
"Can use <, >, <=, >=, ==, != comparison operators. "
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>sitenuminlinks>20</td>"
|
|
"<td>"
|
|
"How many inlinks does the URL's site have? "
|
|
"We only count non-spammy inlinks, and at most only "
|
|
"one inlink per IP address C-Class is counted "
|
|
"so that a webmaster who owns an entire C-Class "
|
|
"of IP addresses will only have his inlinks counted "
|
|
"once."
|
|
"Can use <, >, <=, >=, ==, != comparison operators. "
|
|
"</td></tr>"
|
|
|
|
|
|
"<tr class=poo><td>numinlinks>20</td>"
|
|
"<td>"
|
|
"How many inlinks does the URL itself have? "
|
|
"We only count one link per unique C-Class IP "
|
|
"address "
|
|
"so that a webmaster who owns an entire C-Class "
|
|
"of IP addresses will only have her inlinks counted "
|
|
"once."
|
|
"Can use <, >, <=, >=, ==, != comparison operators. "
|
|
"This is useful for spidering popular URLs quickly."
|
|
"</td></tr>"
|
|
|
|
|
|
"<tr class=poo><td>httpstatus==404</td>"
|
|
"<td>"
|
|
"For matching the URL based on the http status "
|
|
"of its last download. Does not apply to URLs "
|
|
"that have not yet been successfully downloaded."
|
|
"Can use <, >, <=, >=, ==, != comparison operators. "
|
|
"</td></tr>"
|
|
|
|
/*
|
|
"<tr class=poo><td>priority==30</td>"
|
|
"<td>"
|
|
"<b>If the current priority of the url is 30, then "
|
|
"it will match this expression. Does not apply "
|
|
"to outlinks, of course."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>parentpriority==30</td>"
|
|
"<td>"
|
|
"<b>This is a special expression in that "
|
|
"it only applies to assigning spider priorities "
|
|
"to outlinks we are harvesting on a page.</b> "
|
|
"Matches if the url being added to spider queue "
|
|
"is from a parent url in priority queue 30. "
|
|
"The parent's priority queue is the one it got "
|
|
"moved into while being spidered. So if it was "
|
|
"in priority 20, but ended up in 25, then 25 will "
|
|
"be used when scanning the URL Filters table for "
|
|
"each of its outlinks. Only applies "
|
|
"to the FIRST time the url is added to spiderdb. "
|
|
"Use <i>parentpriority==-3</i> to indicate the "
|
|
"parent was FILTERED and <i>-2</i> to indicate "
|
|
"the parent was BANNED. A parentpriority of "
|
|
"<i>-1</i>"
|
|
" means that the urls is not a link being added to "
|
|
"spiderdb but rather a url being spidered."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>inlink==...</td>"
|
|
"<td>"
|
|
"If the url has an inlinker which contains the "
|
|
"given substring, then this rule is matched. "
|
|
"We use this like <i>inlink=www.weblogs.com/"
|
|
"int16_tChanges.xml</i> to detect if a page is in "
|
|
"the ping server or not, and if it is, then we "
|
|
"assign it to a slower-spidering queue, because "
|
|
"we can reply on the ping server for updates. Saves "
|
|
"us from having to spider all the blogspot.com "
|
|
"subdomains a couple times a day each."
|
|
"</td></tr>"
|
|
*/
|
|
|
|
//"NOTE: Until we get the link info to get the doc "
|
|
//"quality before calling msg8 in Msg16.cpp, we "
|
|
//"can not involve doc:quality for purposes of "
|
|
//"assigning a ruleset, unless banning it.</td>"
|
|
|
|
"<tr class=poo><td><nobr>tld!=com,org,edu"// && "
|
|
//"doc:quality<70"
|
|
"</nobr></td>"
|
|
"<td>Matches if the "
|
|
"url's TLD does NOT end in \"com\", \"org\" or "
|
|
"\"edu\". "
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td><nobr>lang==zh_cn,de"
|
|
"</nobr></td>"
|
|
"<td>Matches if "
|
|
"the url's content is in the language \"zh_cn\" or "
|
|
"\"de\". See table below for supported language "
|
|
"abbreviations. Used to only keep certain languages "
|
|
"in the index. This is hacky because the language "
|
|
"may not be known at spider time, so Gigablast "
|
|
"will check after downloading the document to "
|
|
"see if the language <i>spider priority</i> is "
|
|
"DELETE thereby discarding it.</td></tr>"
|
|
//"NOTE: Until we move the language "
|
|
//"detection up before any call to XmlDoc::set1() "
|
|
//"in Msg16.cpp, we can not use for purposes of "
|
|
//"assigning a ruleset, unless banning it.</td>"
|
|
//"</tr>"
|
|
|
|
"<tr class=poo><td><nobr>lang!=xx,en,de"
|
|
"</nobr></td>"
|
|
"<td>Matches if "
|
|
"the url's content is NOT in the language \"xx\" "
|
|
"(unknown), \"en\" or \"de\". "
|
|
"See table below for supported language "
|
|
"abbreviations.</td></tr>"
|
|
|
|
"<tr class=poo><td><nobr>parentlang==zh_cn,zh_tw,xx"
|
|
"</nobr></td>"
|
|
"<td>Matches if "
|
|
"the url's referring parent url is primarily in "
|
|
"this language. Useful for prioritizing spidering "
|
|
"pages of a certain language."
|
|
"See table below for supported language "
|
|
"abbreviations."
|
|
"</td></tr>"
|
|
|
|
/*
|
|
"<tr class=poo><td>link:gigablast</td>"
|
|
"<td>Matches if the document links to gigablast."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>searchbox:gigablast</td>"
|
|
"<td>Matches if the document has a submit form "
|
|
"to gigablast."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>site:dmoz</td>"
|
|
"<td>Matches if the document is directly or "
|
|
"indirectly in the DMOZ directory."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>tag:spam>X</td>"
|
|
"<td>Matches if the document's tagdb record "
|
|
"has a score greater than X for the sitetype, "
|
|
"'spam' in this case. "
|
|
"Can use <, >, <=, >=, ==, != comparison operators. "
|
|
"Other sitetypes include: "
|
|
"..."
|
|
"</td></tr>"
|
|
*/
|
|
|
|
"<tr class=poo><td>iswww | !iswww</td>"
|
|
"<td>Matches if the url's hostname is www or domain "
|
|
"only. For example: <i>www.xyz.com</i> would match, "
|
|
"and so would <i>abc.com</i>, but "
|
|
"<i>foo.somesite.com</i> would NOT match."
|
|
"</td></tr>"
|
|
|
|
|
|
"<tr class=poo><td>isroot | !isroot</td>"
|
|
"<td>Matches if the URL is a root URL. Like if "
|
|
"its path is just '/'. Example: http://www.abc.com "
|
|
"is a root ur but http://www.abc.com/foo is not. "
|
|
"</td></tr>"
|
|
|
|
|
|
"<tr class=poo><td>isonsamedomain | !isonsamedomain</td>"
|
|
"<td>"
|
|
"This is true if the url is from the same "
|
|
"DOMAIN as the page from which it was "
|
|
"harvested."
|
|
//"Only effective for links being added from a page "
|
|
//"being spidered, because this information is "
|
|
//"not preserved in the titleRec."
|
|
"</td></tr>"
|
|
|
|
|
|
"<tr class=poo><td><nobr>"
|
|
"isonsamesubdomain | !isonsamesubdomain"
|
|
"</nobr></td>"
|
|
"<td>"
|
|
"This is true if the url is from the same "
|
|
"SUBDOMAIN as the page from which it was "
|
|
"harvested."
|
|
//"Only effective for links being added from a page "
|
|
//"being spidered, because this information is "
|
|
//"not preserved in the titleRec."
|
|
"</td></tr>"
|
|
|
|
"<tr class=poo><td>ismedia | !ismedia</td>"
|
|
"<td>"
|
|
"Does the url have a media or css related "
|
|
"extension. Like gif, jpg, mpeg, css, etc.? "
|
|
"</td></tr>"
|
|
|
|
|
|
"<tr class=poo><td>tag:<i>tagname</i></td>"
|
|
"<td>"
|
|
"This is true if the url is tagged with this "
|
|
"<i>tagname</i> in the site list. Read about tags "
|
|
"on the <a href=/admin/settings>"//#examples>"
|
|
"site list</a> "
|
|
"page."
|
|
"</td></tr>"
|
|
|
|
|
|
|
|
"</td></tr></table><br><br>\n",
|
|
TABLE_STYLE );
|
|
|
|
|
|
// show the languages you can use
|
|
sb->safePrintf (
|
|
"<table %s>"
|
|
"<tr><td colspan=2><center>"
|
|
"<b>"
|
|
"Supported Language Abbreviations "
|
|
"for lang== Filter</b>"
|
|
"</td></tr>",
|
|
TABLE_STYLE );
|
|
for ( int32_t i = 0 ; i < 256 ; i++ ) {
|
|
char *lang1 = getLanguageAbbr ( i );
|
|
char *lang2 = getLanguageString ( i );
|
|
if ( ! lang1 ) continue;
|
|
sb->safePrintf("<tr class=poo>"
|
|
"<td>%s</td><td>%s</td></tr>\n",
|
|
lang1,lang2);
|
|
}
|
|
// wrap it up
|
|
sb->safePrintf("</table><br><br>");
|
|
return true;
|
|
}
|
|
|
|
// . copy/clone parms from one collrec to another
|
|
// . returns false and sets g_errno on error
|
|
// . if doing this after creating a new collection on host #0 we have to call
|
|
// syncParmsWithHost0() to get all the shards in sync.
|
|
bool Parms::cloneCollRec ( char *dstCR , char *srcCR ) {
|
|
|
|
// now set THIS based on the parameters in the xml file
|
|
for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
|
|
|
|
// get it
|
|
Parm *m = &m_parms[i];
|
|
if ( m->m_obj != OBJ_COLL ) continue;
|
|
|
|
//log(LOG_DEBUG, "Parms: %s: parm: %s", filename, m->m_xml);
|
|
// . there are 2 object types, coll recs and g_conf, aka
|
|
// OBJ_COLL and OBJ_CONF.
|
|
|
|
// skip comments and command
|
|
if ( !(m->m_flags & PF_CLONE) ) continue;
|
|
|
|
// get parm data ptr
|
|
char *src = srcCR + m->m_off;
|
|
char *dst = dstCR + m->m_off;
|
|
|
|
// if not an array use this
|
|
if ( ! m->isArray() ) {
|
|
if ( m->m_type == TYPE_SAFEBUF ) {
|
|
SafeBuf *a = (SafeBuf *)src;
|
|
SafeBuf *b = (SafeBuf *)dst;
|
|
b->reset();
|
|
b->safeMemcpy ( a );
|
|
b->nullTerm();
|
|
}
|
|
else {
|
|
// this should work for most types
|
|
gbmemcpy ( dst , src , m->m_size );
|
|
}
|
|
continue;
|
|
}
|
|
|
|
//
|
|
// arrays only below here
|
|
//
|
|
|
|
// for arrays only
|
|
int32_t *srcNum = (int32_t *)(src-4);
|
|
int32_t *dstNum = (int32_t *)(dst-4);
|
|
|
|
// array can have multiple values
|
|
for ( int32_t j = 0 ; j < *srcNum ; j++ ) {
|
|
|
|
if ( m->m_type == TYPE_SAFEBUF ) {
|
|
SafeBuf *a = (SafeBuf *)src;
|
|
SafeBuf *b = (SafeBuf *)dst;
|
|
b->reset();
|
|
b->safeMemcpy ( a );
|
|
b->nullTerm();
|
|
}
|
|
else {
|
|
// this should work for most types
|
|
gbmemcpy ( dst , src , m->m_size );
|
|
}
|
|
|
|
src += m->m_size;
|
|
dst += m->m_size;
|
|
|
|
}
|
|
|
|
// update # elements in array
|
|
*dstNum = *srcNum;
|
|
|
|
}
|
|
return true;
|
|
}
|