23351 lines
		
	
	
		
			687 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			23351 lines
		
	
	
		
			687 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| #include "gb-include.h"
 | |
| 
 | |
| #include "Parms.h"
 | |
| #include "File.h"
 | |
| #include "Conf.h"
 | |
| #include "TcpSocket.h"
 | |
| #include "HttpRequest.h"
 | |
| #include "Pages.h"         // g_pages
 | |
| #include "Tagdb.h"        // g_tagdb
 | |
| #include "Catdb.h"
 | |
| #include "Collectiondb.h"
 | |
| #include "HttpMime.h"      // atotime()
 | |
| #include "SearchInput.h"
 | |
| #include "Unicode.h"
 | |
| #include "Threads.h"
 | |
| #include "Spider.h" // MAX_SPIDER_PRIORITIES
 | |
| #include "Statsdb.h"
 | |
| #include "Msg17.h"
 | |
| #include "Process.h"
 | |
| #include "Repair.h"
 | |
| #include "LanguagePages.h"
 | |
| #include "PingServer.h"
 | |
| #include "Proxy.h"
 | |
| #include "hash.h"
 | |
| #include "Test.h"
 | |
| #include "Rebalance.h"
 | |
| #include "SpiderProxy.h" // buildProxyTable()
 | |
| #include "PageInject.h" // InjectionRequest
 | |
| 
 | |
| // width of input box in characters for url filter expression
 | |
| 
 | |
| Parms g_parms;
 | |
| 
 | |
| 
 | |
| #include "Clusterdb.h"
 | |
| 
 | |
| //
 | |
| // new functions to extricate info from parm recs
 | |
| //
 | |
| 
 | |
| int32_t getDataSizeFromParmRec ( char *rec ) {
 | |
| 	return *(int32_t *)(rec+sizeof(key96_t));
 | |
| }
 | |
| 
 | |
| char *getDataFromParmRec ( char *rec ) {
 | |
| 	return rec+sizeof(key96_t)+4;
 | |
| }
 | |
| 
 | |
| collnum_t getCollnumFromParmRec ( char *rec ) {
 | |
| 	key96_t *k = (key96_t *)rec;
 | |
| 	return (collnum_t)k->n1;
 | |
| }
 | |
| 
 | |
| // for parms that are arrays...
 | |
| int16_t getOccNumFromParmRec ( char *rec ) {
 | |
| 	key96_t *k = (key96_t *)rec;
 | |
| 	return (int16_t)((k->n0>>16));
 | |
| }
 | |
| 
 | |
| Parm *getParmFromParmRec ( char *rec ) {
 | |
| 	key96_t *k = (key96_t *)rec;
 | |
| 	int32_t cgiHash32 = (k->n0 >> 32);
 | |
| 	return g_parms.getParmFast2 ( cgiHash32 );
 | |
| }
 | |
| 
 | |
| int32_t getHashFromParmRec ( char *rec ) {
 | |
| 	key96_t *k = (key96_t *)rec;
 | |
| 	int32_t cgiHash32 = (k->n0 >> 32);
 | |
| 	return cgiHash32;
 | |
| }
 | |
| 
 | |
| // . occNum is index # for parms that are arrays. it is -1 if not used.
 | |
| // . collnum is -1 for g_conf, which is not a collrec
 | |
| // . occNUm is -1 for a non-array parm
 | |
| key96_t makeParmKey ( collnum_t collnum , Parm *m , int16_t occNum ) {
 | |
| 	key96_t k;
 | |
| 	k.n1 = collnum;
 | |
| 	k.n0 = (uint32_t)m->m_cgiHash; // 32 bit
 | |
| 	k.n0 <<= 16;
 | |
| 	k.n0 |= (uint16_t)occNum;
 | |
| 	// blanks
 | |
| 	k.n0 <<= 16;
 | |
| 	// delbit. 1 means positive key
 | |
| 	k.n0 |= 0x01;
 | |
| 	// test
 | |
| 	if ( getCollnumFromParmRec ((char *)&k)!=collnum){char*xx=NULL;*xx=0;}
 | |
| 	if ( getOccNumFromParmRec ((char *)&k)!=occNum){char*xx=NULL;*xx=0;}
 | |
| 	return k;
 | |
| }
 | |
| 
 | |
| bool printUrlExpressionExamples ( SafeBuf *sb ) ;
 | |
| 
 | |
| 
 | |
| //////////////////////////////////////////////
 | |
| //
 | |
| // Command Functions. All return false if block... yadayada
 | |
| //
 | |
| //////////////////////////////////////////////
 | |
| 
 | |
| ////////
 | |
| //
 | |
| // . do commands this way now
 | |
| // . when handleRequest4 receives a special "command" parmdb rec
 | |
| //   it calls executes the cmd, one of the functions listed below
 | |
| // . all these Command*() functions are called in updateParm() below
 | |
| // . they return false if they would block and they'll call your callback
 | |
| //   specified in you "we" the WaitEntry
 | |
| // . they return true with g_errno set on error, set to 0 on success
 | |
| //
 | |
| ////////
 | |
| 
 | |
| 
 | |
| // from PageBasic.cpp:
 | |
| bool updateSiteListBuf(collnum_t collnum,bool addSeeds,char *siteListArg);
 | |
| 
 | |
| bool CommandUpdateSiteList ( char *rec ) {
 | |
| 	// caller must specify collnum
 | |
| 	collnum_t collnum = getCollnumFromParmRec ( rec );
 | |
| 	if ( collnum < 0 ) {
 | |
| 		log("parms: bad collnum for update site list");
 | |
| 		g_errno = ENOCOLLREC;
 | |
| 		return true;
 | |
| 	}
 | |
| 	// sanity
 | |
| 	int32_t dataSize = getDataSizeFromParmRec ( rec );
 | |
| 	if ( dataSize < 0 ) {
 | |
| 		log("parms: bad site list size = %" INT32 " bad!",dataSize);
 | |
| 		g_errno = EBADENGINEER;
 | |
| 		return true;
 | |
| 	}
 | |
| 	// need this
 | |
| 	CollectionRec *cr = g_collectiondb.getRec ( collnum );
 | |
| 	if ( ! cr ) {
 | |
| 		log("parms: no cr for collnum %" INT32 " to update",(int32_t)collnum);
 | |
| 		return true;
 | |
| 	}
 | |
| 	// get the sitelist
 | |
| 	char *data = getDataFromParmRec ( rec );
 | |
| 	// update the table that maps site to whether we should spider it
 | |
| 	// and also add newly introduced sites in "data" into spiderdb.
 | |
| 	updateSiteListBuf ( collnum ,
 | |
| 			    true , // add NEW seeds?
 | |
| 			    data // entire sitelist
 | |
| 			    );
 | |
| 	// now that we deduped the old site list with the new one for
 | |
| 	// purposes of adding NEW seeds, we can do the final copy
 | |
| 	cr->m_siteListBuf.set ( data );
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // . require user manually execute this to prevent us fucking up the data
 | |
| //   at first initially because of a bad hosts.conf file!!!
 | |
| // . maybe put a red 'A' in the hosts table on the web page to indicate
 | |
| //   we detected records that don't belong to our shard so user knows to
 | |
| //   rebalance?
 | |
| // . we'll show it in a special msg box on all admin pages if required
 | |
| bool CommandRebalance ( char *rec ) {
 | |
| 	g_rebalance.m_userApproved = true;
 | |
| 	// force this to on so it goes through
 | |
| 	g_rebalance.m_numForeignRecs = 1;
 | |
| 	g_rebalance.m_needsRebalanceValid = false;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandInsertUrlFiltersRow ( char *rec ) {
 | |
| 	// caller must specify collnum
 | |
| 	collnum_t collnum = getCollnumFromParmRec ( rec );
 | |
| 	if ( collnum < 0 ) {
 | |
| 		log("parms: bad collnum for insert row");
 | |
| 		g_errno = ENOCOLLREC;
 | |
| 		return true;
 | |
| 	}
 | |
| 	// sanity
 | |
| 	int32_t dataSize = getDataSizeFromParmRec ( rec );
 | |
| 	if ( dataSize <= 1 ) {
 | |
| 		log("parms: insert row data size = %" INT32 " bad!",dataSize);
 | |
| 		g_errno = EBADENGINEER;
 | |
| 		return true;
 | |
| 	}
 | |
| 	// need this
 | |
| 	CollectionRec *cr = g_collectiondb.getRec ( collnum );
 | |
| 	// get the row #
 | |
| 	char *data = getDataFromParmRec ( rec );
 | |
| 	int32_t rowNum = atol(data);//*(int32_t *)data;
 | |
| 	// scan all parms for url filter parms
 | |
| 	for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
 | |
| 		Parm *m = &g_parms.m_parms[i];
 | |
| 		// parm must be a url filters parm
 | |
| 		if ( m->m_page != PAGE_FILTERS ) continue;
 | |
| 		// must be an array!
 | |
| 		if ( ! m->isArray() ) continue;
 | |
| 		// sanity check
 | |
| 		if ( m->m_obj != OBJ_COLL ) { char *xx=NULL;*xx=0; }
 | |
| 		// . add that row
 | |
| 		// . returns false and sets g_errno on error
 | |
| 		if ( ! g_parms.insertParm ( i, rowNum,(char *)cr)) return true;
 | |
| 	}
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandRemoveConnectIpRow ( char *rec ) {
 | |
| 	// caller must specify collnum
 | |
| 	//collnum_t collnum = getCollnumFromParmRec ( rec );
 | |
| 	//if ( collnum < 0 ) {
 | |
| 	//	g_errno = ENOCOLLREC;
 | |
| 	//	log("parms: bad collnum for remove row");
 | |
| 	//	return true;
 | |
| 	//}
 | |
| 	// sanity
 | |
| 	int32_t dataSize = getDataSizeFromParmRec ( rec );
 | |
| 	if ( dataSize <= 1 ) {
 | |
| 		log("parms: insert row data size = %" INT32 " bad!",dataSize);
 | |
| 		g_errno = EBADENGINEER;
 | |
| 		return true;
 | |
| 	}
 | |
| 	// need this
 | |
| 	//CollectionRec *cr = g_collectiondb.getRec ( collnum );
 | |
| 	// get the row #
 | |
| 	char *data = getDataFromParmRec ( rec );
 | |
| 	int32_t rowNum = atol(data);
 | |
| 	// scan all parms for url filter parms
 | |
| 	for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
 | |
| 		Parm *m = &g_parms.m_parms[i];
 | |
| 		// parm must be a url filters parm
 | |
| 		if ( m->m_page != PAGE_MASTERPASSWORDS ) continue;
 | |
| 		// must be an array!
 | |
| 		if ( ! m->isArray() ) continue;
 | |
| 		// sanity check
 | |
| 		if ( m->m_obj != OBJ_CONF ) { char *xx=NULL;*xx=0; }
 | |
| 		// must be masterip
 | |
| 		if ( m->m_type != TYPE_IP ) continue;
 | |
| 		// . nuke that parm's element
 | |
| 		// . returns false and sets g_errno on error
 | |
| 		if (!g_parms.removeParm(i,rowNum,(char *)&g_conf))return true;
 | |
| 	}
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandRemovePasswordRow ( char *rec ) {
 | |
| 	// sanity
 | |
| 	int32_t dataSize = getDataSizeFromParmRec ( rec );
 | |
| 	if ( dataSize <= 1 ) {
 | |
| 		log("parms: insert row data size = %" INT32 " bad!",dataSize);
 | |
| 		g_errno = EBADENGINEER;
 | |
| 		return true;
 | |
| 	}
 | |
| 	// get the row #
 | |
| 	char *data = getDataFromParmRec ( rec );
 | |
| 	int32_t rowNum = atol(data);
 | |
| 	// scan all parms for url filter parms
 | |
| 	for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
 | |
| 		Parm *m = &g_parms.m_parms[i];
 | |
| 		// parm must be a url filters parm
 | |
| 		if ( m->m_page != PAGE_MASTERPASSWORDS ) continue;
 | |
| 		// must be an array!
 | |
| 		if ( ! m->isArray() ) continue;
 | |
| 		// sanity check
 | |
| 		if ( m->m_obj != OBJ_CONF ) { char *xx=NULL;*xx=0; }
 | |
| 		// must be master password
 | |
| 		if ( m->m_type != TYPE_STRINGNONEMPTY ) continue;
 | |
| 		// . nuke that parm's element
 | |
| 		// . returns false and sets g_errno on error
 | |
| 		if (!g_parms.removeParm(i,rowNum,(char *)&g_conf))return true;
 | |
| 	}
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandRemoveUrlFiltersRow ( char *rec ) {
 | |
| 	// caller must specify collnum
 | |
| 	collnum_t collnum = getCollnumFromParmRec ( rec );
 | |
| 	if ( collnum < 0 ) {
 | |
| 		g_errno = ENOCOLLREC;
 | |
| 		log("parms: bad collnum for remove row");
 | |
| 		return true;
 | |
| 	}
 | |
| 	// sanity
 | |
| 	int32_t dataSize = getDataSizeFromParmRec ( rec );
 | |
| 	if ( dataSize <= 1 ) {
 | |
| 		log("parms: insert row data size = %" INT32 " bad!",dataSize);
 | |
| 		g_errno = EBADENGINEER;
 | |
| 		return true;
 | |
| 	}
 | |
| 	// need this
 | |
| 	CollectionRec *cr = g_collectiondb.getRec ( collnum );
 | |
| 	// get the row #
 | |
| 	char *data = getDataFromParmRec ( rec );
 | |
| 	int32_t rowNum = atol(data);
 | |
| 	// scan all parms for url filter parms
 | |
| 	for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
 | |
| 		Parm *m = &g_parms.m_parms[i];
 | |
| 		// parm must be a url filters parm
 | |
| 		if ( m->m_page != PAGE_FILTERS ) continue;
 | |
| 		// must be an array!
 | |
| 		if ( ! m->isArray() ) continue;
 | |
| 		// sanity check
 | |
| 		if ( m->m_obj != OBJ_COLL ) { char *xx=NULL;*xx=0; }
 | |
| 		// . nuke that parm's element
 | |
| 		// . returns false and sets g_errno on error
 | |
| 		if ( ! g_parms.removeParm ( i,rowNum,(char *)cr)) return true;
 | |
| 	}
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // after we add a new coll, or at anytime after we can clone it
 | |
| bool CommandCloneColl ( char *rec ) {
 | |
| 
 | |
| 	// the collnum we want to affect.
 | |
| 	collnum_t dstCollnum = getCollnumFromParmRec ( rec );
 | |
| 
 | |
| 	// . data is the collnum in ascii.
 | |
| 	// . from "&restart=467" for example
 | |
| 	char *data = rec + sizeof(key96_t) + 4;
 | |
| 	int32_t dataSize = *(int32_t *)(rec + sizeof(key96_t));
 | |
| 	//if ( dataSize < 1 ) { char *xx=NULL;*xx=0; }
 | |
| 	// copy parm settings from this collection name
 | |
| 	char *srcColl = data;
 | |
| 	
 | |
| 	// return if none to clone from
 | |
| 	if ( dataSize <= 0 ) return true;
 | |
| 	// avoid defaulting to main collection
 | |
| 	if ( ! data[0]  ) return true;
 | |
| 
 | |
| 	CollectionRec *srcRec = NULL;
 | |
| 	CollectionRec *dstRec = NULL;
 | |
| 	srcRec = g_collectiondb.getRec ( srcColl    ); // get from name
 | |
| 	dstRec = g_collectiondb.getRec ( dstCollnum ); // get from #
 | |
| 	
 | |
| 	if ( ! srcRec ) 
 | |
| 		return log("parms: invalid coll %s to clone from",
 | |
| 			   srcColl);
 | |
| 	if ( ! dstRec ) 
 | |
| 		return log("parms: invalid collnum %" INT32 " to clone to",
 | |
| 			   (int32_t)dstCollnum);
 | |
| 
 | |
| 	log ("parms: cloning parms from collection %s to %s",
 | |
| 	      srcRec->m_coll,dstRec->m_coll);
 | |
| 
 | |
| 	g_parms.cloneCollRec ( (char *)dstRec , (char *)srcRec );
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // customCrawl:
 | |
| // 0 for regular collection
 | |
| // 1 for custom crawl
 | |
| // 2 for bulk job
 | |
| // . returns false if blocks true otherwise
 | |
| bool CommandAddColl ( char *rec , char customCrawl ) {
 | |
| 
 | |
| 	// caller must specify collnum
 | |
| 	collnum_t newCollnum = getCollnumFromParmRec ( rec );
 | |
| 
 | |
| 	// sanity.
 | |
| 	if ( newCollnum < 0 ) {
 | |
| 		g_errno = ENOCOLLREC;
 | |
| 		log("parms: bad collnum for AddColl");
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	char *data = rec + sizeof(key96_t) + 4;
 | |
| 	int32_t dataSize = *(int32_t *)(rec + sizeof(key96_t));
 | |
| 	// collection name must be at least 2 bytes (includes \0)
 | |
| 	if ( dataSize <= 1 ) { char *xx=NULL;*xx=0; }
 | |
| 
 | |
| 	// then collname, \0 terminated
 | |
| 	char *collName = data;
 | |
| 
 | |
| 	if ( gbstrlen(collName) > MAX_COLL_LEN ) {
 | |
| 		log("crawlbot: collection name too long");
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// if ( ! g_parms.m_inSyncWithHost0 ) {
 | |
| 	// 	log("parms: can not add coll #%i %s until in sync with host 0",
 | |
| 	// 	    (int)newCollnum,collName);
 | |
| 	// 	g_errno = EBADENGINEER;
 | |
| 	// 	return true;
 | |
| 	// }
 | |
| 
 | |
| 	// this saves it to disk! returns false and sets g_errno on error.
 | |
| 	if ( ! g_collectiondb.addNewColl ( collName,
 | |
| 					   customCrawl ,
 | |
| 					   NULL ,  // copy from
 | |
| 					   0  , // copy from len
 | |
| 					   true , // save?
 | |
| 					   newCollnum
 | |
| 					   ) )
 | |
| 		// error! g_errno should be set
 | |
| 		return true;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // all nodes are guaranteed to add the same collnum for the given name
 | |
| bool CommandAddColl0 ( char *rec ) { // regular collection
 | |
| 	return CommandAddColl ( rec , 0 );
 | |
| }
 | |
| 
 | |
| bool CommandAddColl1 ( char *rec ) { // custom crawl
 | |
| 	return CommandAddColl ( rec , 1 );
 | |
| }
 | |
| 
 | |
| bool CommandAddColl2 ( char *rec ) { // bulk job
 | |
| 	return CommandAddColl ( rec , 2 );
 | |
| }
 | |
| 
 | |
| bool CommandResetProxyTable ( char *rec ) {
 | |
| 	// from SpiderProxy.h
 | |
| 	return resetProxyStats();
 | |
| }
 | |
| 
 | |
| // . returns true and sets g_errno on error
 | |
| // . returns false if would block
 | |
| bool CommandDeleteColl ( char *rec , WaitEntry *we ) {
 | |
| 	collnum_t collnum = getCollnumFromParmRec ( rec );
 | |
| 
 | |
| 	// if ( ! g_parms.m_inSyncWithHost0 ) {
 | |
| 	// 	log("parms: can not del collnum %i until in sync with host 0",
 | |
| 	// 	    (int)collnum);
 | |
| 	// 	g_errno = EBADENGINEER;
 | |
| 	// 	return true;
 | |
| 	// }
 | |
| 
 | |
| 	// the delete might block because the tree is saving and we can't
 | |
| 	// remove our collnum recs from it while it is doing that
 | |
| 	if ( ! g_collectiondb.deleteRec2 ( collnum ) )
 | |
| 		// we blocked, we->m_callback will be called when done
 | |
| 		return false;
 | |
| 	// delete is successful
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // . returns true and sets g_errno on error
 | |
| // . returns false if would block
 | |
| bool CommandDeleteColl2 ( char *rec , WaitEntry *we ) {
 | |
| 	char *data = rec + sizeof(key96_t) + 4;
 | |
| 	char *coll = (char *)data;
 | |
| 	collnum_t collnum = g_collectiondb.getCollnum ( coll );
 | |
| 
 | |
| 	// if ( ! g_parms.m_inSyncWithHost0 ) {
 | |
| 	// 	log("parms: can not del collnum %i until in sync with host 0",
 | |
| 	// 	    (int)collnum);
 | |
| 	// 	g_errno = EBADENGINEER;
 | |
| 	// 	return true;
 | |
| 	// }
 | |
| 
 | |
| 	if ( collnum < 0 ) {
 | |
| 		g_errno = ENOCOLLREC;
 | |
| 		return true;;
 | |
| 	}
 | |
| 	// the delete might block because the tree is saving and we can't
 | |
| 	// remove our collnum recs from it while it is doing that
 | |
| 	if ( ! g_collectiondb.deleteRec2 ( collnum ) )
 | |
| 		// we blocked, we->m_callback will be called when done
 | |
| 		return false;
 | |
| 	// delete is successful
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandForceNextSpiderRound ( char *rec ) {
 | |
| 
 | |
| 	// caller must specify collnum
 | |
| 	collnum_t collnum = getCollnumFromParmRec ( rec );
 | |
| 	// need this
 | |
| 	CollectionRec *cr = g_collectiondb.getRec ( collnum );
 | |
| 	if ( ! cr ) {
 | |
| 		g_errno = ENOCOLLREC;
 | |
| 		log("parms: bad collnum %" INT32 " for restart spider round",
 | |
| 		    (int32_t)collnum);
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// seems like parmlist is an rdblist, so we have a key_t followed
 | |
| 	// by 4 bytes of datasize then the data... which is an ascii string
 | |
| 	// in our case...
 | |
| 	char *data = getDataFromParmRec ( rec );
 | |
| 	uint32_t roundStartTime;
 | |
| 	int32_t newRoundNum;
 | |
| 	// see the HACK: in Parms::convertHttpRequestToParmList() where we
 | |
| 	// construct this data in response to a "roundStart" cmd. we used
 | |
| 	// sprintf() so it's natural to use sscanf() to parse it out.
 | |
| 	sscanf ( data , "%" UINT32 ",%" INT32 "", 
 | |
| 		 &roundStartTime,
 | |
| 		 &newRoundNum);
 | |
| 
 | |
| 	cr->m_spiderRoundStartTime = roundStartTime;
 | |
| 	cr->m_spiderRoundNum = newRoundNum;
 | |
| 
 | |
| 	// if we don't have this is prints  out "skipping0 ... " for urls
 | |
| 	// we try to spider in Spider.cpp.
 | |
| 	cr->m_spiderStatus = SP_INPROGRESS;
 | |
| 
 | |
| 	// reset the round counts. this will log a msg. resetting the
 | |
| 	// round counts will prevent maxToProcess/maxToCrawl from holding
 | |
| 	// us back...
 | |
| 	spiderRoundIncremented ( cr );
 | |
| 
 | |
| 	// yeah, if we don't nuke doledb then it doesn't work...
 | |
| 	cr->rebuildUrlFilters();
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // . returns true and sets g_errno on error
 | |
| // . returns false if would block
 | |
| bool CommandRestartColl ( char *rec , WaitEntry *we ) {
 | |
| 
 | |
| 	collnum_t newCollnum = getCollnumFromParmRec ( rec );
 | |
| 
 | |
| 	// . data is the collnum in ascii.
 | |
| 	// . from "&restart=467" for example
 | |
| 	char *data = rec + sizeof(key96_t) + 4;
 | |
| 	int32_t dataSize = *(int32_t *)(rec + sizeof(key96_t));
 | |
| 	if ( dataSize < 1 ) { char *xx=NULL;*xx=0; }
 | |
| 	collnum_t oldCollnum = atol(data);
 | |
| 
 | |
| 	if ( oldCollnum < 0 || 
 | |
| 	     oldCollnum >= g_collectiondb.m_numRecs ||
 | |
| 	     ! g_collectiondb.m_recs[oldCollnum] ) {
 | |
| 		log("parms: invalid collnum %" INT32 " to restart",(int32_t)oldCollnum);
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// this can block if tree is saving, it has to wait
 | |
| 	// for tree save to complete before removing old
 | |
| 	// collnum recs from tree
 | |
| 	if ( ! g_collectiondb.resetColl2 ( oldCollnum ,
 | |
| 					   newCollnum ,
 | |
| 					   false ) ) // purgeSeeds?
 | |
| 		// we blocked, we->m_callback will be called when done
 | |
| 		return false;
 | |
| 
 | |
| 	// turn on spiders on new collrec. collname is same but collnum
 | |
| 	// will be different.
 | |
| 	CollectionRec *cr = g_collectiondb.getRec ( newCollnum );
 | |
| 	// if reset from crawlbot api page then enable spiders
 | |
| 	// to avoid user confusion
 | |
| 	//if ( cr ) cr->m_spideringEnabled = 1;
 | |
| 
 | |
| 	if ( ! cr ) return true;
 | |
| 
 | |
| 	//
 | |
| 	// repopulate spiderdb with the same sites
 | |
| 	//
 | |
| 
 | |
| 	char *oldSiteList = cr->m_siteListBuf.getBufStart();
 | |
| 	// do not let it have the buf any more
 | |
| 	cr->m_siteListBuf.detachBuf();
 | |
| 	// can't leave it NULL, safebuf parms do not like to be null
 | |
| 	cr->m_siteListBuf.nullTerm();
 | |
| 	// re-add the buf so it re-seeds spiderdb. it will not dedup these
 | |
| 	// urls in "oldSiteList" with "m_siteListBuf" which is now empty.
 | |
| 	// "true" = addSeeds.
 | |
| 	updateSiteListBuf ( newCollnum , true , oldSiteList );
 | |
| 	// now put it back
 | |
| 	if ( oldSiteList ) cr->m_siteListBuf.safeStrcpy ( oldSiteList );
 | |
| 
 | |
| 	// all done
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // . returns true and sets g_errno on error
 | |
| // . returns false if would block
 | |
| bool CommandResetColl ( char *rec , WaitEntry *we ) {
 | |
| 
 | |
| 	collnum_t newCollnum = getCollnumFromParmRec ( rec );
 | |
| 
 | |
| 	// . data is the collnum in ascii.
 | |
| 	// . from "&restart=467" for example
 | |
| 	char *data = rec + sizeof(key96_t) + 4;
 | |
| 	int32_t dataSize = *(int32_t *)(rec + sizeof(key96_t));
 | |
| 	if ( dataSize < 1 ) { char *xx=NULL;*xx=0; }
 | |
| 	collnum_t oldCollnum = atol(data);
 | |
| 
 | |
| 	if ( oldCollnum < 0 || 
 | |
| 	     oldCollnum >= g_collectiondb.m_numRecs ||
 | |
| 	     ! g_collectiondb.m_recs[oldCollnum] ) {
 | |
| 		log("parms: invalid collnum %" INT32 " to reset",(int32_t)oldCollnum);
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// this will not go through if tree is saving, it has to wait
 | |
| 	// for tree save to complete before removing old
 | |
| 	// collnum recs from tree. so return false in that case so caller
 | |
| 	// will know to re-call later.
 | |
| 	if ( ! g_collectiondb.resetColl2 ( oldCollnum ,
 | |
| 					   newCollnum ,
 | |
| 					   true ) ) // purgeSeeds?
 | |
| 		// we blocked, we->m_callback will be called when done
 | |
| 		return false;
 | |
| 
 | |
| 	// turn on spiders on new collrec. collname is same but collnum
 | |
| 	// will be different.
 | |
| 	CollectionRec *cr = g_collectiondb.getRec ( newCollnum );
 | |
| 
 | |
| 	if ( ! cr ) return true;
 | |
| 
 | |
| 	//
 | |
| 	// repopulate spiderdb with the same sites
 | |
| 	//
 | |
| 
 | |
| 	char *oldSiteList = cr->m_siteListBuf.getBufStart();
 | |
| 	// do not let it have the buf any more
 | |
| 	cr->m_siteListBuf.detachBuf();
 | |
| 	// can't leave it NULL, safebuf parms do not like to be null
 | |
| 	cr->m_siteListBuf.nullTerm();
 | |
| 	// re-add the buf so it re-seeds spiderdb. it will not dedup these
 | |
| 	// urls in "oldSiteList" with "m_siteListBuf" which is now empty.
 | |
| 	// "true" = addSeeds.
 | |
| 	updateSiteListBuf ( newCollnum , true , oldSiteList );
 | |
| 	// now put it back
 | |
| 	if ( oldSiteList ) cr->m_siteListBuf.safeStrcpy ( oldSiteList );
 | |
| 
 | |
| 	// turn spiders off
 | |
| 	//if ( cr ) cr->m_spideringEnabled = 0;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandParserTestInit ( char *rec ) {
 | |
| 	// enable testing for all other hosts
 | |
| 	g_conf.m_testParserEnabled = 1;
 | |
| 	// reset all files
 | |
| 	g_test.removeFiles();
 | |
| 	// turn spiders on globally
 | |
| 	g_conf.m_spideringEnabled = 1;
 | |
| 	//g_conf.m_webSpideringEnabled = 1;
 | |
| 	// turn on for test coll too
 | |
| 	CollectionRec *cr = g_collectiondb.getRec("qatest123");
 | |
| 	// turn on spiders
 | |
| 	if ( cr ) cr->m_spideringEnabled = 1;
 | |
| 	// tell spider loop to update active list
 | |
| 	g_spiderLoop.m_activeListValid = false;
 | |
| 	// if we are not host 0, turn on spiders for testing
 | |
| 	if ( g_hostdb.m_myHost->m_hostId != 0 ) return true;
 | |
| 	// start the test loop to inject urls for parsing/spidering
 | |
| 	g_test.initTestRun();
 | |
| 	// done 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandSpiderTestInit ( char *rec ) {
 | |
| 	// enable testing for all other hosts
 | |
| 	g_conf.m_testSpiderEnabled = 1;
 | |
| 	// reset all files
 | |
| 	g_test.removeFiles();
 | |
| 	// turn spiders on globally
 | |
| 	g_conf.m_spideringEnabled = 1;
 | |
| 	//g_conf.m_webSpideringEnabled = 1;
 | |
| 	// turn on for test coll too
 | |
| 	CollectionRec *cr = g_collectiondb.getRec("qatest123");
 | |
| 	// turn on spiders
 | |
| 	if ( cr ) cr->m_spideringEnabled = 1;
 | |
| 	// tell spider loop to update active list
 | |
| 	g_spiderLoop.m_activeListValid = false;
 | |
| 	// if we are not host 0, turn on spiders for testing
 | |
| 	if ( g_hostdb.m_myHost->m_hostId != 0 ) return true;
 | |
| 	// start the test loop to inject urls for parsing/spidering
 | |
| 	g_test.initTestRun();
 | |
| 	// done 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandSpiderTestCont ( char *rec ) {
 | |
| 	// enable testing for all other hosts
 | |
| 	g_conf.m_testSpiderEnabled = 1;
 | |
| 	// turn spiders on globally
 | |
| 	g_conf.m_spideringEnabled = 1;
 | |
| 	//g_conf.m_webSpideringEnabled = 1;
 | |
| 	// turn on for test coll too
 | |
| 	CollectionRec *cr = g_collectiondb.getRec("qatest123");
 | |
| 	// turn on spiders
 | |
| 	if ( cr ) cr->m_spideringEnabled = 1;
 | |
| 	// tell spider loop to update active list
 | |
| 	g_spiderLoop.m_activeListValid = false;
 | |
| 	// done 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // some of these can block a little. if threads are off, a lot!
 | |
| bool CommandMerge ( char *rec ) {
 | |
| 	forceMergeAll ( RDB_POSDB ,1);
 | |
| 	forceMergeAll ( RDB_TITLEDB ,1);
 | |
| 	forceMergeAll ( RDB_TAGDB ,1);
 | |
| 	forceMergeAll ( RDB_SPIDERDB ,1);
 | |
| 	forceMergeAll ( RDB_LINKDB ,1);
 | |
| 	// most of these are probably already in good shape
 | |
| 	//g_checksumdb.getRdb()->attemptMerge (1,true);
 | |
| 	// g_clusterdb.getRdb()->attemptMerge  (1,true); // niceness, force?
 | |
| 	// g_tagdb.getRdb()->attemptMerge     (1,true);
 | |
| 	// g_catdb.getRdb()->attemptMerge      (1,true);
 | |
| 	// //g_tfndb.getRdb()->attemptMerge      (1,true);
 | |
| 	// g_spiderdb.getRdb()->attemptMerge   (1,true);
 | |
| 	// // these 2 will probably need the merge the most
 | |
| 	// g_indexdb.getRdb()->attemptMerge    (1,true);
 | |
| 	// g_datedb.getRdb()->attemptMerge     (1,true);
 | |
| 	// g_titledb.getRdb()->attemptMerge    (1,true);
 | |
| 	// //g_sectiondb.getRdb()->attemptMerge  (1,true);
 | |
| 	// g_statsdb.getRdb()->attemptMerge    (1,true);
 | |
| 	// g_linkdb .getRdb()->attemptMerge    (1,true);
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool CommandMergePosdb ( char *rec ) {
 | |
| 	forceMergeAll ( RDB_POSDB ,1);
 | |
| 	// set this for each posdb base
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool CommandMergeSectiondb ( char *rec ) {
 | |
| 	//g_sectiondb.getRdb()->attemptMerge    (1,true); // nice , force
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool CommandMergeTitledb ( char *rec ) {
 | |
| 	forceMergeAll ( RDB_TITLEDB ,1);
 | |
| 	//g_titledb.getRdb()->attemptMerge    (1,true);
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool CommandMergeSpiderdb ( char *rec ) {
 | |
| 	forceMergeAll ( RDB_SPIDERDB ,1);
 | |
| 	//g_spiderdb.getRdb()->attemptMerge    (1,true);
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool CommandDiskPageCacheOff ( char *rec ) {
 | |
| 	g_process.resetPageCaches();
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandForceIt ( char *rec ) {
 | |
| 	g_conf.m_forceIt = true;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandDiskDump ( char *rec ) {
 | |
| 	//g_checksumdb.getRdb()->dumpTree ( 1 ); // niceness
 | |
| 	g_clusterdb.getRdb()->dumpTree  ( 1 );
 | |
| 	g_tagdb.getRdb()->dumpTree     ( 1 );  
 | |
| 	g_catdb.getRdb()->dumpTree      ( 1 );
 | |
| 	//g_tfndb.getRdb()->dumpTree      ( 1 );   
 | |
| 	g_spiderdb.getRdb()->dumpTree   ( 1 );
 | |
| 	g_posdb.getRdb()->dumpTree    ( 1 );
 | |
| 	//g_datedb.getRdb()->dumpTree     ( 1 );
 | |
| 	g_titledb.getRdb()->dumpTree    ( 1 );
 | |
| 	//g_sectiondb.getRdb()->dumpTree  ( 1 );
 | |
| 	g_statsdb.getRdb()->dumpTree    ( 1 );
 | |
| 	g_linkdb.getRdb() ->dumpTree    ( 1 );
 | |
| 	g_errno = 0;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool CommandJustSave ( char *rec ) {
 | |
| 	// returns false if blocked, true otherwise
 | |
| 	g_process.save ();
 | |
| 	// always return true here
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandSaveAndExit ( char *rec ) {
 | |
| 	// return true if this blocks
 | |
| 	g_process.shutdown ( false , NULL , NULL );
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandUrgentSaveAndExit ( char *rec ) {
 | |
| 	// "true" means urgent
 | |
| 	g_process.shutdown ( true );
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandReloadLanguagePages ( char *rec ) {
 | |
| 	g_languagePages.reloadPages();
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandClearKernelError ( char *rec ) {
 | |
| 	g_hostdb.m_myHost->m_pingInfo.m_kernelErrors = 0;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool CommandPowerNotice ( int32_t hasPower ) {
 | |
| 
 | |
| 	//int32_t hasPower = r->getLong("haspower",-1);
 | |
| 	log("powermo: received haspower=%" INT32 "",hasPower);
 | |
| 	if ( hasPower != 0 && hasPower != 1 ) return true;
 | |
| 
 | |
| 	// did power state change? if not just return true
 | |
| 	if (   g_process.m_powerIsOn &&   hasPower ) return true;
 | |
| 	if ( ! g_process.m_powerIsOn && ! hasPower ) return true;
 | |
| 	
 | |
| 	if ( hasPower ) {
 | |
| 		log("powermo: power is regained");
 | |
| 		g_process.m_powerIsOn = true;
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// if it was on and went off...
 | |
| 	// now it is off
 | |
| 	log("powermo: power was lost");
 | |
| 	// . SpiderLoop.cpp will not launch any more spiders as
 | |
| 	//   int32_t as the power is off
 | |
| 	// . autosave should kick in every 30 seconds
 | |
| 	g_process.m_powerIsOn = false;
 | |
| 	// note the autosave
 | |
| 	log("powermo: disabling spiders, suspending merges, disabling "
 | |
| 	    "tree writes and saving.");
 | |
| 	// tell Process.cpp::save2() to save the blocking caches too!
 | |
| 	//g_process.m_pleaseSaveCaches = true;
 | |
| 	// . save everything now... this may block some when saving the
 | |
| 	//   caches... then do not do ANY writes... 
 | |
| 	// . RdbMerge suspends all merging if power is off
 | |
| 	// . Rdb.cpp does not allow any adds if power is off. it will
 | |
| 	//   send back an ETRYAGAIN...
 | |
| 	// . if a tree is being dumped, this will keep re-calling
 | |
| 	//   Process.cpp::save2()
 | |
| 	g_process.save();
 | |
| 
 | |
| 	// also send an email if we are host #0
 | |
| 	if ( g_hostdb.m_myHost->m_hostId != 0 ) return true;
 | |
| 	if ( g_proxy.isProxy() ) return true;
 | |
| 
 | |
| 	char tmp[128];
 | |
| 	Host *h0 = g_hostdb.getHost ( 0 );
 | |
| 	int32_t ip0 = 0;
 | |
| 	if ( h0 ) ip0 = h0->m_ip;
 | |
| 	sprintf(tmp,"%s: POWER IS OFF",iptoa(ip0));
 | |
| 
 | |
| 	g_pingServer.sendEmail ( NULL  , // Host ptr
 | |
| 				 tmp   , // msg
 | |
| 				 true  , // sendToAdmin
 | |
| 				 false , // oom?
 | |
| 				 false , // kernel error?
 | |
| 				 true  , // parm change?
 | |
| 				 // force it? even if disabled?
 | |
| 				 false  );
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool CommandPowerOnNotice ( char *rec ) {
 | |
| 	return CommandPowerNotice ( 1 );
 | |
| }
 | |
| 
 | |
| bool CommandPowerOffNotice ( char *rec ) {
 | |
| 	return CommandPowerNotice ( 0 );
 | |
| }
 | |
| 
 | |
| bool CommandInSync ( char *rec ) {
 | |
| 	g_parms.m_inSyncWithHost0 = true;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| //////////////////////
 | |
| //
 | |
| // end new commands
 | |
| //
 | |
| //////////////////////
 | |
| 
 | |
| 
 | |
| static bool printDropDown   ( int32_t n , SafeBuf* sb, char *name, 
 | |
| 			      int32_t selet , 
 | |
| 			      bool includeMinusOne ,
 | |
| 			      bool includeMinusTwo ) ;
 | |
| 
 | |
| extern bool closeAll ( void *state, void (* callback)(void *state) );
 | |
| extern bool allExit ( ) ;
 | |
| /*
 | |
| class Checksum {
 | |
| public:
 | |
| 	Checksum() : m_sum1( 0xffff ), m_sum2( 0xffff ) {}
 | |
| 
 | |
| 	void addIn( const uint16_t *data, size_t size, FILE *f = 0 ) {
 | |
| 		// if an odd len of data, add first byte, then do rest below
 | |
| 		if ( size % 2 != 0 ) { 
 | |
| 			m_sum1 += (uint16_t)*(uint8_t *)data;
 | |
| 			m_sum2 += m_sum1;
 | |
| 
 | |
| 			size--;
 | |
| 			data = (uint16_t *)(((uint8_t *)data)+1);
 | |
| 		}
 | |
| 
 | |
| 		size_t len = size/2;
 | |
| 		while ( len ) {
 | |
| 			unsigned tlen = len;
 | |
| 			// . 360 is largest amnt of sums that can be performed
 | |
| 			//   without overflow
 | |
| 			if ( len > 360 ) tlen = 360;
 | |
| 			len -= tlen;
 | |
| 			do {
 | |
| 				m_sum1 += *data++;
 | |
| 				m_sum2 += m_sum1;
 | |
| 			} while ( --tlen );
 | |
| 
 | |
| 			m_sum1 = (m_sum1 & 0xffff) + (m_sum1 >> 16);
 | |
| 			m_sum2 = (m_sum2 & 0xffff) + (m_sum2 >> 16);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	void addInStrings( const uint16_t *data, int32_t cnt, int32_t size ) {
 | |
| 		while ( cnt ) {
 | |
| 			const uint16_t *origData = data;
 | |
| 			int32_t len = gbstrlen((char *)data);
 | |
| 
 | |
| 			// if an odd len of data, add first byte, 
 | |
| 			// then do rest below
 | |
| 			if ( len % 2 != 0 ) { 
 | |
| 				m_sum1 += (uint16_t)*(uint8_t *)data;
 | |
| 				m_sum2 += m_sum1;
 | |
| 
 | |
| 				len--;
 | |
| 				data = (uint16_t *)(((uint8_t *)data)+1);
 | |
| 			}
 | |
| 
 | |
| 			len /= 2;
 | |
| 			while ( len ) {
 | |
| 				unsigned tlen = len;
 | |
| 				// . 360 = largest amnt of sums that can be 
 | |
| 				//   performed without overflow
 | |
| 				if ( len > 360 ) tlen = 360;
 | |
| 				len -= tlen;
 | |
| 				do {
 | |
| 					m_sum1 += *data++;
 | |
| 					m_sum2 += m_sum1;
 | |
| 				} while ( --tlen );
 | |
| 
 | |
| 				m_sum1 = (m_sum1 & 0xffff) + (m_sum1 >> 16);
 | |
| 				m_sum2 = (m_sum2 & 0xffff) + (m_sum2 >> 16);
 | |
| 			}
 | |
| 
 | |
| 			cnt--;
 | |
| 			data = (uint16_t *)((char *)origData + size);
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	void finalize() {
 | |
| 		m_sum1 = (m_sum1 & 0xffff) + (m_sum1 >> 16);
 | |
| 		m_sum2 = (m_sum2 & 0xffff) + (m_sum2 >> 16);
 | |
| 	}
 | |
| 
 | |
| 	uint32_t getSum() const {
 | |
| 		return ( m_sum2 << 16 | m_sum1 );
 | |
| 	}
 | |
| 
 | |
| private:
 | |
| 	uint32_t m_sum1;
 | |
| 	uint32_t m_sum2;
 | |
| };
 | |
| */
 | |
| 
 | |
| Parms::Parms ( ) {
 | |
| 	m_isDefaultLoaded = false;
 | |
| 	m_inSyncWithHost0 = false;
 | |
| 	m_triedToSync     = false;
 | |
| }
 | |
| 
 | |
| void Parms::detachSafeBufs ( CollectionRec *cr ) {
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		if ( m->m_type != TYPE_SAFEBUF ) continue;
 | |
| 		if ( m->m_obj != OBJ_COLL ) continue;
 | |
| 		if ( m->m_off < 0 ) continue;
 | |
| 		int32_t max = 1;
 | |
| 		// this will be zero if not an array.
 | |
| 		// otherwise it is the # of elements in the array
 | |
| 		if ( m->m_size > max ) max = m->m_size;
 | |
| 		// an array of safebufs? m->m_size will be > 1 then.
 | |
| 		for ( int32_t j = 0 ; j < max ; j++ ) {
 | |
| 			// get it
 | |
| 			SafeBuf *sb = (SafeBuf *)((char *)cr + m->m_off +
 | |
| 						  j*sizeof(SafeBuf));
 | |
| 			sb->detachBuf();
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| /*
 | |
| uint32_t Parms::calcChecksum() {
 | |
| 	Checksum cs;
 | |
| 
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		if ( m->m_obj == OBJ_SI ) continue;
 | |
| 		if ( m->m_off < 0 ) continue;
 | |
| 		if ( m->m_type == TYPE_COMMENT ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOD2  ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOM2  ) continue;
 | |
| 		if ( m->m_type == TYPE_CMD     ) continue;
 | |
| 		if ( m->m_type == TYPE_LONG_CONST ) continue;
 | |
| 
 | |
| 		int32_t size = 0;
 | |
| 		if ( m->m_type == TYPE_CHECKBOX       ) size = 1;
 | |
| 		if ( m->m_type == TYPE_CHAR           ) size = 1;
 | |
| 		if ( m->m_type == TYPE_CHAR2          ) size = 1;
 | |
| 		if ( m->m_type == TYPE_BOOL           ) size = 1;
 | |
| 		if ( m->m_type == TYPE_BOOL2          ) size = 1;
 | |
| 		if ( m->m_type == TYPE_PRIORITY       ) size = 1;
 | |
| 		if ( m->m_type == TYPE_PRIORITY2      ) size = 1;
 | |
| 		//if ( m->m_type == TYPE_DIFFBOT_DROPDOWN) size = 1;
 | |
| 		if ( m->m_type == TYPE_PRIORITY_BOXES ) size = 1;
 | |
| 		if ( m->m_type == TYPE_RETRIES        ) size = 1;
 | |
| 		if ( m->m_type == TYPE_TIME           ) size = 6;
 | |
| 		if ( m->m_type == TYPE_DATE2          ) size = 4;
 | |
| 		if ( m->m_type == TYPE_DATE           ) size = 4;
 | |
| 		if ( m->m_type == TYPE_FLOAT          ) size = 4;
 | |
| 		if ( m->m_type == TYPE_IP             ) size = 4;
 | |
| 		if ( m->m_type == TYPE_RULESET        ) size = 4;
 | |
| 		if ( m->m_type == TYPE_LONG           ) size = 4;
 | |
| 		if ( m->m_type == TYPE_LONG_LONG      ) size = 8;
 | |
| 		if ( m->m_type == TYPE_STRING         ) size = m->m_size;
 | |
| 		if ( m->m_type == TYPE_STRINGBOX      ) size = m->m_size;
 | |
| 		if ( m->m_type == TYPE_STRINGNONEMPTY ) size = m->m_size;
 | |
| 		if ( m->m_type == TYPE_SAFEBUF        ) size = m->m_size;
 | |
| 		if ( m->m_type == TYPE_SITERULE       ) size = 4;
 | |
| 
 | |
| 
 | |
| 		// if we have an array
 | |
| 		int32_t cnt = 1;
 | |
| 		if (m->m_fixed > 0) {
 | |
| 			size *= m->m_fixed;
 | |
| 			cnt = m->m_fixed;
 | |
| 		}
 | |
| 		else {
 | |
| 			size *= m->m_max;
 | |
| 			cnt = m->m_max;
 | |
| 		}
 | |
| 
 | |
| 		uint16_t *p = NULL;
 | |
| 		if ( m->m_obj == OBJ_CONF ) {
 | |
| 			p = (uint16_t *)((char *)&g_conf + m->m_off);
 | |
| 			if (m->m_type == TYPE_STRING || 
 | |
| 			    m->m_type == TYPE_STRINGBOX || 
 | |
| 			    m->m_type == TYPE_STRINGNONEMPTY ) {
 | |
| 				cs.addInStrings( p, 
 | |
| 						 cnt,
 | |
| 						 m->m_size );
 | |
| 			}
 | |
| 			else if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 				uint16_t *p2;
 | |
| 				SafeBuf *sb2 = (SafeBuf *)p;
 | |
| 				p2 = (uint16_t *)sb2->getBufStart();
 | |
| 				cs.addIn( p2 , sb2->length() );
 | |
| 			}
 | |
| 			else {
 | |
| 				cs.addIn( p, size );
 | |
| 			}
 | |
| 		}
 | |
| 	       	else if ( m->m_obj == OBJ_COLL )  {
 | |
| 			collnum_t j = g_collectiondb.getFirstCollnum ();
 | |
| 			while ( j >= 0 ) {
 | |
| 				CollectionRec *cr = g_collectiondb.getRec( j );
 | |
| 				p = (uint16_t *)((char *)cr + m->m_off);
 | |
| 				if (m->m_type == TYPE_STRING || 
 | |
| 				    m->m_type == TYPE_STRINGBOX || 
 | |
| 				    m->m_type == TYPE_STRINGNONEMPTY ) {
 | |
| 					cs.addInStrings( p, 
 | |
| 							 cnt,
 | |
| 							 m->m_size );
 | |
| 				}
 | |
| 				else if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 					uint16_t *p2;
 | |
| 					SafeBuf *sb2 = (SafeBuf *)p;
 | |
| 					p2 = (uint16_t *)sb2->getBufStart();
 | |
| 					cs.addIn( p2 , sb2->length() );
 | |
| 				}
 | |
| 				else {
 | |
| 					cs.addIn( p, size );
 | |
| 				}
 | |
| 				j = g_collectiondb.getNextCollnum ( j );
 | |
| 			}
 | |
| 		} 
 | |
| 	}
 | |
| 
 | |
| 	cs.finalize();
 | |
| 
 | |
| 	return cs.getSum();
 | |
| }
 | |
| */
 | |
| 
 | |
| // from Pages.cpp
 | |
| bool printApiForPage ( SafeBuf *sb , int32_t PAGENUM , CollectionRec *cr ) ;
 | |
| 
 | |
| // returns false and sets g_errno on error
 | |
| bool Parms::setGigablastRequest ( TcpSocket *socket ,
 | |
| 				  HttpRequest *hrArg ,
 | |
| 				  GigablastRequest *gr ) {
 | |
| 	// get the page from the path... like /sockets --> PAGE_SOCKETS
 | |
| 	int32_t page = g_pages.getDynamicPageNumber ( hrArg );
 | |
| 	// is it a collection?
 | |
| 	char *THIS = (char *)gr;
 | |
| 
 | |
| 	// ensure valid
 | |
| 	if ( ! THIS ) {
 | |
| 		// it is null when no collection explicitly specified...
 | |
| 		log("admin: THIS is null for page %" INT32 ".",page);
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	// just in case
 | |
| 	gr->clear();
 | |
| 
 | |
| 	gr->m_socket = socket;
 | |
| 
 | |
| 	// make a copy of the httprequest because the original is on the stack
 | |
| 	// in HttpServer::requestHandler()
 | |
| 	if ( ! gr->m_hr.copy ( hrArg ) ) {
 | |
| 		log("admin: failed to copy httprequest: %s",
 | |
| 		    mstrerror(g_errno));
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	// use the one we copied which won't disappear/beFreed on us
 | |
| 	HttpRequest *hr = &gr->m_hr;
 | |
| 
 | |
| 	// need this
 | |
| 	int32_t obj = OBJ_GBREQUEST;
 | |
| 
 | |
| 	//
 | |
| 	// reset THIS to defaults. use NULL for cr since mostly for SearchInput
 | |
| 	//
 | |
| 	setToDefault ( THIS , obj , NULL);
 | |
| 
 | |
| 
 | |
| 	// map PAGE_ADDURL to PAGE_ADDURL2 so 
 | |
| 	// /addurl is same as /admin/addurl as far as parms.
 | |
| 	if ( page == PAGE_ADDURL ) 
 | |
| 		page = PAGE_ADDURL2;
 | |
| 
 | |
| 	// loop through cgi parms
 | |
| 	for ( int32_t i = 0 ; i < hr->getNumFields() ; i++ ) {
 | |
| 		// get cgi parm name
 | |
| 		char *field = hr->getField    ( i );
 | |
| 		//int32_t  flen  = hr->getFieldLen ( i );
 | |
| 		// find in parms list
 | |
| 		int32_t  j;
 | |
| 		Parm *m;
 | |
| 		for ( j = 0 ; j < m_numParms ; j++ ) {
 | |
| 			// get it
 | |
| 			m = &m_parms[j];
 | |
| 			// must be of this type
 | |
| 			if ( m->m_obj != obj ) continue;
 | |
| 			// page must match
 | |
| 			if ( m->m_page != page ) continue;
 | |
| 			// skip if no cgi parm, may not be configurable now
 | |
| 			if ( ! m->m_cgi ) continue;
 | |
| 			// otherwise, must match the cgi name exactly
 | |
| 			if ( strcmp ( field,m->m_cgi ) == 0 ) break;
 | |
| 			//if ( ! m->m_cgi2 ) continue; // alias check
 | |
| 			//if ( strcmp ( field,m->m_cgi2 ) == 0 ) break;
 | |
| 			//if ( ! m->m_cgi2 ) continue; // alias check
 | |
| 			//if ( strcmp ( field,m->m_cgi3 ) == 0 ) break;
 | |
| 			//if ( ! m->m_cgi3 ) continue; // alias check
 | |
| 			//if ( strcmp ( field,m->m_cgi4 ) == 0 ) break;
 | |
| 		}
 | |
| 		// bail if the cgi field is not in the parms list
 | |
| 		if ( j >= m_numParms ) {
 | |
| 			//log("parms: missing cgi parm %s",field);
 | |
| 			continue;
 | |
| 		}
 | |
| 		// value of cgi parm (null terminated)
 | |
| 		char *v = hr->getValue ( i );
 | |
| 		// . skip if no value was provided
 | |
| 		// . unless it was a string! so we can make them empty.
 | |
| 		if ( v[0] == '\0' && 
 | |
| 		     m->m_type != TYPE_CHARPTR &&
 | |
| 		     m->m_type != TYPE_STRING && 
 | |
| 		     m->m_type != TYPE_STRINGBOX ) continue;
 | |
| 		// skip if offset is negative, that means none
 | |
| 		if ( m->m_off < 0 ) continue;
 | |
| 		// skip if no permission
 | |
| 		//if ( (m->m_perms & user) == 0 ) continue;
 | |
| 		// set it. now our TYPE_CHARPTR will just be set to it directly
 | |
| 		// to save memory...
 | |
| 		setParm ( (char *)THIS , m, j, 0, v, false,//not html enc
 | |
| 			  false ); // true );
 | |
| 		// need to save it
 | |
| 		//if ( THIS != (char *)&g_conf ) 
 | |
| 		//	((CollectionRec *)THIS)->m_needsSave = true;
 | |
| 	}
 | |
| 	
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool printSitePatternExamples ( SafeBuf *sb , HttpRequest *hr );
 | |
| 
 | |
| // . returns false if blocked, true otherwise
 | |
| // . sets g_errno on error
 | |
| // . must ultimately send reply back on "s"
 | |
| // . called by Pages.cpp's sendDynamicReply() when it calls pg->function()
 | |
| //   which is called by HttpServer::sendReply(s,r) when it gets an http request
 | |
| bool Parms::sendPageGeneric ( TcpSocket *s , HttpRequest *r ) {
 | |
| 
 | |
| 	char  buf [ 128000 ];
 | |
| 	SafeBuf stackBuf(buf,128000);
 | |
| 
 | |
| 	SafeBuf *sb = &stackBuf;
 | |
| 
 | |
| 	int32_t page = g_pages.getDynamicPageNumber ( r );
 | |
| 
 | |
| 	char format = r->getReplyFormat();
 | |
| 
 | |
| 	char guide = r->getLong("guide",0);
 | |
| 
 | |
| 
 | |
| 	bool isMasterAdmin = g_conf.isMasterAdmin ( s , r );
 | |
| 	bool isCollAdmin = g_conf.isCollAdmin ( s , r );
 | |
| 	if ( ! g_conf.m_allowCloudUsers &&
 | |
| 	     ! isMasterAdmin &&
 | |
| 	     ! isCollAdmin ) {
 | |
| 		char *msg = "NO PERMISSION";
 | |
| 		return g_httpServer.sendDynamicPage (s, msg,gbstrlen(msg));
 | |
| 	}
 | |
| 
 | |
| 	//
 | |
| 	// CLOUD SEARCH ENGINE SUPPORT
 | |
| 	//
 | |
| 	char *action = r->getString("action",NULL);
 | |
| 	if ( page == PAGE_BASIC_SETTINGS &&
 | |
| 	     guide &&
 | |
| 	     // this is non-null if handling a submit request
 | |
| 	     action && 
 | |
| 	     format == FORMAT_HTML ) {
 | |
| 		//return g_parms.sendPageGeneric ( s, r, PAGE_BASIC_SETTINGS );
 | |
| 		// just redirect to it
 | |
| 		char *coll = r->getString("c",NULL);
 | |
| 		if ( coll ) {
 | |
| 			sb->safePrintf("<meta http-equiv=Refresh "
 | |
| 				      "content=\"0; URL=/widgets.html"
 | |
| 				      "?guide=1&c=%s\">",
 | |
| 				      coll);
 | |
| 			return g_httpServer.sendDynamicPage (s,
 | |
| 							     sb->getBufStart(),
 | |
| 							     sb->length());
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	//
 | |
| 	// some "generic" pages do additional processing on the provided input
 | |
| 	// so we need to call those functions here...
 | |
| 	//
 | |
| 
 | |
| 	// if we were an addurl page..
 | |
| 	//if ( page == PAGE_ADDURL2 ) {
 | |
| 	//	// this returns false if blocked and it should re-call
 | |
| 	//	// sendPageGeneric when completed
 | |
| 	//	if ( ! processAddUrlRequest ( s , r ) )
 | |
| 	//		return false;
 | |
| 	//}
 | |
| 
 | |
| 	char *bodyjs = NULL;
 | |
| 	if ( page == PAGE_BASIC_SETTINGS )
 | |
| 		bodyjs =" onload=document.getElementById('tabox').focus();";
 | |
| 
 | |
| 	// print standard header
 | |
| 	if ( format != FORMAT_XML && format != FORMAT_JSON )
 | |
| 		g_pages.printAdminTop ( sb , s , r , NULL , bodyjs );
 | |
| 
 | |
| 	// xml/json header
 | |
| 	char *res = NULL;
 | |
| 	if ( format == FORMAT_XML )
 | |
| 		res = "<response>\n"
 | |
| 			"\t<statusCode>0</statusCode>\n"
 | |
| 			"\t<statusMsg>Success</statusMsg>\n";
 | |
| 	if ( format == FORMAT_JSON )
 | |
| 		res = "{ \"response:\"{\n"
 | |
| 			"\t\"statusCode\":0,\n"
 | |
| 			"\t\"statusMsg\":\"Success\"\n";
 | |
| 	if ( res )
 | |
| 		sb->safeStrcpy ( res );
 | |
| 	
 | |
| 	// do not show the parms and their current values unless showsettings=1
 | |
| 	// was explicitly given for the xml/json feeds
 | |
| 	int32_t show = 1;
 | |
| 	if ( format != FORMAT_HTML )
 | |
| 	 	show = r->getLong("show",0);
 | |
| 	if ( show )
 | |
| 	 	printParmTable ( sb , s , r );
 | |
| 
 | |
| 	// xml/json tail
 | |
| 	if ( format == FORMAT_XML )
 | |
| 		res = "</response>\n";
 | |
| 	if ( format == FORMAT_JSON )
 | |
| 		res = "\t}\n}\n";
 | |
| 	if ( res )
 | |
| 		sb->safeStrcpy ( res );
 | |
| 
 | |
| 
 | |
| 	bool POSTReply = g_pages.getPage ( page )->m_usePost;
 | |
| 
 | |
| 	char *ct = "text/html";
 | |
| 	if ( format == FORMAT_XML ) ct = "text/xml";
 | |
| 	if ( format == FORMAT_JSON ) ct = "application/json";
 | |
| 
 | |
| 	return g_httpServer.sendDynamicPage ( s                , 
 | |
| 					      sb->getBufStart() ,
 | |
| 					      sb->length()      , 
 | |
| 					      -1               ,
 | |
| 					      POSTReply        ,
 | |
| 					      ct             , // contType
 | |
| 					      -1               , // httpstatus
 | |
| 					      NULL,//cookie           ,
 | |
| 					      NULL             );// charset
 | |
| }
 | |
| 
 | |
| 
 | |
| bool Parms::printParmTable ( SafeBuf *sb , TcpSocket *s , HttpRequest *r ) {
 | |
| 
 | |
| 	int32_t page = g_pages.getDynamicPageNumber ( r );
 | |
| 
 | |
| 	int32_t  fromIp   = s->m_ip;
 | |
| 
 | |
| 	char format = r->getReplyFormat();
 | |
| 	/*
 | |
| 	if ( format == FORMAT_HTML )
 | |
| 		sb->safePrintf (  
 | |
| 				"<script type=\"text/javascript\">"
 | |
| 				"function filterRow(str) {"
 | |
| 				//"alert ('string: ' + str);"
 | |
| 				"var tab = document.all ? document.all"
 | |
| 				"['parmtable'] :"
 | |
| 				"               document.getElementById ?"
 | |
| 				"document.getElementById('parmtable') : null;"
 | |
| 				"  for(var j = 1; j < tab.rows.length;j++) {" 
 | |
| 				" if(tab.rows[j].innerHTML.indexOf(str) < 0) {"
 | |
| 				"      tab.rows[j].style.display = 'none';"
 | |
| 				"    } else {"
 | |
| 				"      tab.rows[j].style.display = '';"
 | |
| 				"    }"
 | |
| 				"  }"
 | |
| 				"}\n"
 | |
| 				"function checkAll(form, name, num) {\n "
 | |
| 				"    for (var i = 0; i < num; i++) {\n"
 | |
| 				"      var nombre;\n"
 | |
| 				"      if( i > 0) nombre = name + i;\n"
 | |
| 				"      else nombre = name;\n"
 | |
| 				"   var e = document.getElementById(nombre);\n"
 | |
| 				"      e.checked = !e.checked;\n"
 | |
| 				//"      if ( e.value == 'Y' ) e.value='N';"
 | |
| 				//"    else if ( e.value == 'N' ) e.value='Y';"
 | |
| 				"    }\n"
 | |
| 				"}\n"
 | |
| 				"</script>");
 | |
| 	*/
 | |
| 
 | |
| 	if ( page == PAGE_COLLPASSWORDS2 )
 | |
| 		page = PAGE_COLLPASSWORDS;
 | |
| 
 | |
| 	// print the start of the table
 | |
| 	char *tt = "None";
 | |
| 	if ( page == PAGE_LOG        ) tt = "Log Controls";
 | |
| 	if ( page == PAGE_MASTER     ) tt = "Master Controls";
 | |
| 	if ( page == PAGE_INJECT     ) tt = "Inject Url";
 | |
| 	if ( page == PAGE_MASTERPASSWORDS ) tt = "Master Passwords";
 | |
| 	if ( page == PAGE_ADDURL2    ) tt = "Add Urls";
 | |
| 	if ( page == PAGE_SPIDER     ) tt = "Spider Controls";
 | |
| 	if ( page == PAGE_SEARCH     ) tt = "Search Controls";
 | |
| 	if ( page == PAGE_ACCESS     ) tt = "Access Controls";
 | |
| 	if ( page == PAGE_FILTERS    ) tt = "Url Filters";
 | |
| 	if ( page == PAGE_BASIC_SETTINGS ) tt = "Settings";
 | |
| 	if ( page == PAGE_COLLPASSWORDS ) tt = "Collection Passwords";
 | |
| 	//if ( page == PAGE_SITES ) tt = "Site List";
 | |
| 	//if ( page == PAGE_PRIORITIES ) tt = "Priority Controls";
 | |
| 	//if ( page == PAGE_RULES      ) tt = "Site Rules";
 | |
| 	//if ( page == PAGE_SYNC       ) tt = "Sync";
 | |
| 	if ( page == PAGE_REPAIR     ) tt = "Rebuild Controls";
 | |
| 	//if ( page == PAGE_ADFEED     ) tt = "Ad Feed Controls";
 | |
| 
 | |
| 	// special messages for spider controls
 | |
| 	char *e1 = "";
 | |
| 	char *e2 = "";
 | |
| 	if ( page == PAGE_SPIDER && ! g_conf.m_spideringEnabled )
 | |
| 		e1 = "<tr><td colspan=20><font color=#ff0000><b><center>"
 | |
| 			"Spidering is temporarily disabled in Master Controls."
 | |
| 			"</font></td></tr>\n";
 | |
| 	if ( page == PAGE_SPIDER && ! g_conf.m_addUrlEnabled )
 | |
| 		e2 = "<tr><td colspan=20><font color=#ff0000><b><center>"
 | |
| 			"Add url is temporarily disabled in Master Controls."
 | |
| 			"</font></td></tr>\n";
 | |
| 
 | |
| 	if ( format == FORMAT_XML || format == FORMAT_JSON ) {
 | |
| 		char *coll = g_collectiondb.getDefaultColl(r);
 | |
| 		CollectionRec *cr = g_collectiondb.getRec(coll);//2(r,true);
 | |
| 		bool isMasterAdmin = g_conf.isMasterAdmin ( s , r );
 | |
| 		bool isCollAdmin = g_conf.isCollAdmin ( s , r );
 | |
| 	 	g_parms.printParms2 ( sb ,
 | |
| 				      page ,
 | |
| 	 	 		      cr ,
 | |
| 	 	 		      1 , // int32_t nc , # cols?
 | |
| 	 	 		      1 , // int32_t pd , print desc?
 | |
| 	 	 		      false , // isCrawlbot
 | |
| 	 	 		      format ,
 | |
| 	 	 		      NULL , // TcpSocket *sock
 | |
| 				      isMasterAdmin ,
 | |
| 				      isCollAdmin ); 
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	// . page repair (PageRepair.cpp) has a status table BEFORE the parms
 | |
| 	//   iff we are doing a repair
 | |
| 	// . only one page for all collections, we have a parm that is
 | |
| 	//   a comma-separated list of the collections to repair. leave blank
 | |
| 	//   to repair all collections.
 | |
| 	if ( page == PAGE_REPAIR )
 | |
| 		g_repair.printRepairStatus ( sb , fromIp );
 | |
| 	
 | |
| 	// start the table
 | |
| 	sb->safePrintf( 
 | |
| 		       "\n"
 | |
| 		       "<table %s "
 | |
| 		       //"style=\"border-radius:15px;"
 | |
| 		       //"border:#6060f0 2px solid;"
 | |
| 		       //"\" "
 | |
| 		       //"width=100%% bgcolor=#%s "
 | |
| 		       //"bgcolor=black "
 | |
| 		       //"cellpadding=4 "
 | |
| 		       //"border=0 "//border=1 "
 | |
| 		       "id=\"parmtable\">"
 | |
| 		       "<tr><td colspan=20>"// bgcolor=#%s>"
 | |
| 		       ,TABLE_STYLE
 | |
| 		       //,DARKER_BLUE
 | |
| 		       //,DARK_BLUE
 | |
| 			);
 | |
| 
 | |
| 	/*
 | |
| 
 | |
| 	  take this out since we took out a ton of parms for
 | |
| 	  simplicties sake
 | |
| 
 | |
| 	  if ( page != PAGE_FILTERS )
 | |
| 	  sb->safePrintf("<div style=\"float:left;\">" 
 | |
| 	  "filter:<input type=\"text\" "
 | |
| 	  "onkeyup=\"filterRow(this.value)\" "
 | |
| 	  "value=\"\"></div>"
 | |
| 	  );
 | |
| 	*/
 | |
| 
 | |
| 	sb->safePrintf(//"<div style=\"margin-left:45%%;\">"
 | |
| 		       //"<font size=+1>"
 | |
| 		       "<center>"
 | |
| 		       "<b>%s</b>"
 | |
| 		       //"</font>"
 | |
| 		       "</center>"
 | |
| 		       //"</div>"
 | |
| 		       "</td></tr>%s%s\n",
 | |
| 		       tt,e1,e2);
 | |
| 
 | |
| 	//bool isCrawlbot = false;
 | |
| 	//if ( collOveride ) isCrawlbot = true;
 | |
| 		
 | |
| 	// print the table(s) of controls
 | |
| 	//p= g_parms.printParms (p, pend, page, user, THIS, coll, pwd, nc, pd);
 | |
| 	g_parms.printParms ( sb , s , r );
 | |
| 
 | |
| 	// end the table
 | |
| 	sb->safePrintf ( "</table>\n" );
 | |
| 
 | |
| 	// this must be outside of table, submit button follows
 | |
| 	sb->safePrintf ( "<br>\n" );
 | |
| 
 | |
| 	if ( page == PAGE_SPIDERPROXIES ) {
 | |
| 		// wrap up the form, print a submit button
 | |
| 		g_pages.printSubmit ( sb );
 | |
| 		printSpiderProxyTable ( sb );
 | |
| 		// do not print another submit button
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// url filter page has a test table
 | |
| 	if ( page == PAGE_FILTERS ) {
 | |
| 		// wrap up the form, print a submit button
 | |
| 		g_pages.printSubmit ( sb );
 | |
| 		printUrlExpressionExamples ( sb );
 | |
| 	}
 | |
| 	else if ( page == PAGE_BASIC_SETTINGS ) {
 | |
| 		// wrap up the form, print a submit button
 | |
| 		g_pages.printSubmit ( sb );
 | |
| 		printSitePatternExamples ( sb , r );
 | |
| 	}
 | |
| 	else if ( page == PAGE_SPIDER ) { // PAGE_SITES
 | |
| 		// wrap up the form, print a submit button
 | |
| 		g_pages.printSubmit ( sb );
 | |
| 		printSitePatternExamples ( sb , r );
 | |
| 	}
 | |
| 	else {
 | |
| 		// wrap up the form, print a submit button
 | |
| 		g_pages.printAdminBottom ( sb );
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	// extra sync table
 | |
| 	/*
 | |
| 	if ( page == PAGE_SYNC ) {
 | |
| 		// a table that shows the progress of a sync process
 | |
| 		sb.safePrintf (
 | |
| 			  "<br>"
 | |
| 			  "<table width=100%% border=1 bgcolor=#d0d0e0 "
 | |
| 			  "cellpadding=4 border=0>" 
 | |
| 			  //"<tr><td colspan=2 bgcolor=#d0c0d0>"
 | |
| 			  "<tr><td colspan=2 bgcolor=#%s>"
 | |
| 			  "<center>"
 | |
| 			  //"<font size=+1>"
 | |
| 			  "<b>Sync Progress</b>"
 | |
| 			  //"</font>"
 | |
| 			  "</td></tr>\n" , DARK_BLUE);
 | |
| 		
 | |
| 		for ( int32_t i = RDB_START ; i < RDB_END ; i++ ) {
 | |
| 			Rdb *r = getRdbFromId ( i );
 | |
| 			if ( ! r ) continue;
 | |
| 			float pd = g_sync.getPercentDone ( i );
 | |
| 			sb.safePrintf (
 | |
| 				  "<tr>"
 | |
| 				  "<td>%s</td>"
 | |
| 				  "<td>%.1f%%</td></tr>\n",
 | |
| 				  r->m_dbname , pd );
 | |
| 		}
 | |
| 		sb.safePrintf (  "</table>\n");
 | |
| 	}
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	// if just printing into a buffer, return now
 | |
| 	//if ( pageBuf ) return true;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /*
 | |
| char *printDropDown ( int32_t n , char *p, char *pend, char *name, int32_t select,
 | |
| 		      bool includeMinusOne ,
 | |
| 		      bool includeMinusTwo ) {
 | |
| 	// begin the drop down menu
 | |
| 	sprintf ( p , "<select name=%s>", name );
 | |
| 	p += gbstrlen ( p );
 | |
| 	char *s;
 | |
| 	int32_t i = -1;
 | |
| 	if ( includeMinusOne ) i = -1;
 | |
| 	// . by default, minus 2 includes minus 3, the new "FILTERED" priority
 | |
| 	// . it is link "BANNED" but does not mean the url is low quality necessarily
 | |
| 	if ( includeMinusTwo ) i = -3;
 | |
| 	for ( ; i < n ; i++ ) {
 | |
| 		if ( i == select ) s = " selected";
 | |
| 		else               s = "";
 | |
| 		if      ( i == -3 ) 
 | |
| 			sprintf (p,"<option value=%" INT32 "%s>FILTERED",i,s);
 | |
| 		else if ( i == -2 ) 
 | |
| 			sprintf (p,"<option value=%" INT32 "%s>BANNED",i,s);
 | |
| 		else if ( i == -1 ) 
 | |
| 			sprintf (p,"<option value=%" INT32 "%s>undefined",i,s);
 | |
| 		else    
 | |
| 			sprintf (p,"<option value=%" INT32 "%s>%" INT32 "",i,s,i);
 | |
| 		p += gbstrlen ( p );
 | |
| 	}
 | |
| 	sprintf ( p , "</select>" );
 | |
| 	p += gbstrlen ( p );
 | |
| 	return p;
 | |
| }
 | |
| 
 | |
| bool printDiffbotDropDown ( SafeBuf *sb,char *name,char *THIS , SafeBuf *sx) {
 | |
| 	//CollectionRec *cr = (CollectionRec *)THIS;
 | |
| 	// . get the string we have selected
 | |
| 	// . the list of available strings to select is in
 | |
| 	//   m_diffbotApiList for this collection, and that can
 | |
| 	//   be changed by john to add custom diffbot api urls.
 | |
| 	// . should just be m_spiderDiffbotApiUrl[i] safebuf
 | |
| 	char *usingApi = sx->getBufStart();
 | |
| 	if ( sx->length() == 0 ) usingApi = NULL;
 | |
| 	// now scan each item in the list. see the setting of
 | |
| 	// "m_def" for "diffbotApiList" below to see the
 | |
| 	// comma separated list of default strings. each item in
 | |
| 	// this list is of the format "<title>|<urlPath>,"
 | |
| 	//char *p = cr->m_diffbotApiList.getBufStart();
 | |
| 	char *p = 
 | |
| 		"None|none,"
 | |
| 		"All|http://www.diffbot.com/api/analyze?mode=auto&fields=*,"
 | |
| 		"Article (autodetect)|http://www.diffbot.com/api/analyze?mode=article&fields=*,"
 | |
| 		"Article (force)|http://www.diffbot.com/api/article?fields=*,"
 | |
| 		"Product (autodetect)|http://www.diffbot.com/api/analyze?mode=product&fields=*,"
 | |
| 		"Product (force)|http://www.diffbot.com/v2/product?fields=*,"
 | |
| 		"Image (autodetect)|http://www.diffbot.com/api/analyze?mode=image&fields=*,"
 | |
| 		"Image (force)|http://www.diffbot.com/api/image?fields=*,"
 | |
| 		"FrontPage (autodetect)|http://www.diffbot.com/api/analyze?mode=frontpage&fields=*,"
 | |
| 		"FrontPage (force)|http://www.diffbot.com/api/frontpage?fields=*"
 | |
| 		;
 | |
| 
 | |
| 	// wtf?
 | |
| 	if ( ! p ) return true;
 | |
| 	// print out. cgi is "dapi%" INT32 "".
 | |
| 	sb->safePrintf("<select name=%s>\n",name);
 | |
| 	// print "none" as the first option
 | |
| 	//char *sel = "";
 | |
| 	//if ( ! usingApi ) sel = " selected";
 | |
| 	//sb->safePrintf("<option value=\"\"%s>None</option>",sel);
 | |
| 	// the various "diffbot urls" are separated by commas
 | |
| 	for ( ; *p ; ) {
 | |
| 		// point to start of item name
 | |
| 		char *name = p;
 | |
| 		// p should now point to name of the item
 | |
| 		char *end1 = p;
 | |
| 		// point to start of url for that item
 | |
| 		for ( ; *end1 && *end1 != '|' ;end1++);
 | |
| 		// save that
 | |
| 		char *url = end1;
 | |
| 		if ( *url == '|' ) url++;
 | |
| 		// find end of url
 | |
| 		char *urlEnd = url;
 | |
| 		for ( ; *urlEnd && *urlEnd != ',' ; urlEnd++ );
 | |
| 		// do we match it?
 | |
| 		char *sel = "";
 | |
| 		if ( usingApi && strncmp(usingApi,url,urlEnd-url)== 0 )
 | |
| 			sel = " selected";
 | |
| 		if ( ! usingApi && urlEnd - url == 0 )
 | |
| 			sel = " selected";
 | |
| 		// advance p
 | |
| 		p = urlEnd;
 | |
| 		// skip over comma to get next one
 | |
| 		if ( *p == ',' ) p++;
 | |
| 		// use the hash as the identifier
 | |
| 		sb->safePrintf("<option value=\"");
 | |
| 		sb->safeMemcpy ( url, urlEnd - url );
 | |
| 		sb->safePrintf("\"%s>",sel);
 | |
| 		// print item name
 | |
| 		sb->safeMemcpy ( name , end1 - name );
 | |
| 		sb->safePrintf("</option>\n");
 | |
| 	}
 | |
| 	sb->safePrintf("</select>");
 | |
| 	return true;
 | |
| }
 | |
| */
 | |
| 
 | |
| bool printDropDown ( int32_t n , SafeBuf* sb, char *name, int32_t select,
 | |
| 		     bool includeMinusOne ,
 | |
| 		     bool includeMinusTwo ) {	// begin the drop down menu
 | |
| 	sb->safePrintf ( "<select name=%s>", name );
 | |
| 	char *s;
 | |
| 	int32_t i = -1;
 | |
| 	if ( includeMinusOne ) i = -1;
 | |
| 	// . by default, minus 2 includes minus 3, the new "FILTERED" priority
 | |
| 	// . it is link "BANNED" but does not mean the url is low quality necessarily
 | |
| 	if ( includeMinusTwo ) i = -3;
 | |
| 
 | |
| 	// no more DELETE, etc.
 | |
| 	i = 0;
 | |
| 	if ( select < 0 ) select = 0;
 | |
| 
 | |
| 	for ( ; i < n ; i++ ) {
 | |
| 		if ( i == select ) s = " selected";
 | |
| 		else               s = "";
 | |
| 		if      ( i == -3 ) 
 | |
| 			sb->safePrintf ("<option value=%" INT32 "%s>DELETE",i,s);
 | |
| 		else if ( i == -2 ) 
 | |
| 			//sb->safePrintf ("<option value=%" INT32 "%s>BANNED",i,s);
 | |
| 			continue;
 | |
| 		else if ( i == -1 ) 
 | |
| 			//sb->safePrintf ("<option value=%" INT32 "%s>undefined",i,s);
 | |
| 			continue;
 | |
| 		else    
 | |
| 			sb->safePrintf ("<option value=%" INT32 "%s>%" INT32 "",i,s,i);
 | |
| 	}
 | |
| 	sb->safePrintf ( "</select>" );
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| class DropLangs {
 | |
| public:
 | |
| 	char *m_title;
 | |
| 	char *m_lang;
 | |
| 	char *m_tld;
 | |
| };
 | |
| 
 | |
| DropLangs g_drops[] = {
 | |
| 	{"custom",NULL,NULL},
 | |
| 	{"web",NULL,NULL},
 | |
| 	{"news",NULL,NULL},
 | |
| 	{"english","en","com,us.gov,org"},
 | |
| 	{"german","de","de"},
 | |
| 	{"french","fr","fr"},
 | |
| 	{"norweigian","nl","nl"},
 | |
| 	{"spanish","es","es"},
 | |
| 	{"italian","it","it"},
 | |
| 	{"romantic","en,de,fr,nl,es,it","com,us.gov,org,de,fr,nl,es,it"}
 | |
| };
 | |
| 
 | |
| // "url filters profile" values. used to set default crawl rules
 | |
| // in Collectiondb.cpp's CollectionRec::setUrlFiltersToDefaults(). 
 | |
| // for instance, UFP_NEWS spiders sites more frequently but less deep in
 | |
| // order to get "news" pages and articles
 | |
| bool printDropDownProfile ( SafeBuf* sb, char *name, CollectionRec *cr ) {
 | |
| 	sb->safePrintf ( "<select name=%s>", name );
 | |
| 	// the type of url filters profiles
 | |
| 	//char *items[] = {"custom","web","news","chinese","shallow"};
 | |
| 	int32_t nd = sizeof(g_drops)/sizeof(DropLangs);
 | |
| 	for ( int32_t i = 0 ; i < nd ; i++ ) {
 | |
| 		//if ( i == select ) s = " selected";
 | |
| 		//else               s = "";
 | |
| 		char *x = cr->m_urlFiltersProfile.getBufStart();
 | |
| 		char *s;
 | |
| 		if ( strcmp(g_drops[i].m_title, x) == 0 ) s = " selected";
 | |
| 		else                                      s = "";
 | |
| 		sb->safePrintf ("<option value=%s%s>%s",
 | |
| 				g_drops[i].m_title,
 | |
| 				s,
 | |
| 				g_drops[i].m_title );
 | |
| 	}
 | |
| 	sb->safePrintf ( "</select>");
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| /*
 | |
| char *printCheckBoxes ( int32_t n , char *p, char *pend, char *name, char *array){
 | |
| 	for ( int32_t i = 0 ; i < n ; i++ ) {
 | |
| 		if ( i > 0 ) 
 | |
| 			sprintf (p, "<input type=checkbox value=1 name=%s%" INT32 "",
 | |
| 				 name,i);
 | |
| 		else
 | |
| 			sprintf (p, "<input type=checkbox value=1 name=%s",
 | |
| 				 name);
 | |
| 		p += gbstrlen ( p );
 | |
| 		if ( array[i] ) {
 | |
| 			sprintf ( p , " checked");
 | |
| 			p += gbstrlen ( p );
 | |
| 		}
 | |
| 		sprintf ( p , ">%" INT32 "  " , i );
 | |
| 		p += gbstrlen ( p );
 | |
| 		//if i is single digit, add another nbsp so that everything's 
 | |
| 		//aligned
 | |
| 		if ( i < 10 )
 | |
| 			sprintf(p,"  ");
 | |
| 		p +=gbstrlen(p);
 | |
| 			
 | |
| 		if ( i > 0 && (i+1) % 6 == 0 )
 | |
| 			sprintf(p,"<br>\n");
 | |
| 		p+=gbstrlen(p);
 | |
| 	}
 | |
| 	return p;
 | |
| }
 | |
| */
 | |
| 
 | |
| bool printCheckBoxes ( int32_t n , SafeBuf* sb, char *name, char *array){
 | |
| 	for ( int32_t i = 0 ; i < n ; i++ ) {
 | |
| 		if ( i > 0 ) 
 | |
| 			sb->safePrintf ("<input type=checkbox value=1 name=%s%" INT32 "",
 | |
| 					name,i);
 | |
| 		else
 | |
| 			sb->safePrintf ("<input type=checkbox value=1 name=%s",
 | |
| 				 name);
 | |
| 		if ( array[i] ) {
 | |
| 			sb->safePrintf ( " checked");
 | |
| 		}
 | |
| 		sb->safePrintf ( ">%" INT32 "  " , i );
 | |
| 		//if i is single digit, add another nbsp so that everything's 
 | |
| 		//aligned
 | |
| 		if ( i < 10 )
 | |
| 			sb->safePrintf("  ");
 | |
| 			
 | |
| 		if ( i > 0 && (i+1) % 6 == 0 )
 | |
| 			sb->safePrintf("<br>\n");
 | |
| 	}
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool Parms::printParms (SafeBuf* sb, TcpSocket *s , HttpRequest *r) {
 | |
| 	int32_t  page = g_pages.getDynamicPageNumber ( r );
 | |
| 	int32_t nc = r->getLong("nc",1);
 | |
| 	int32_t pd = r->getLong("pd",1);
 | |
| 	char *coll = g_collectiondb.getDefaultColl(r);
 | |
| 	CollectionRec *cr = g_collectiondb.getRec(coll);//2(r,true);
 | |
| 
 | |
| 	bool isMasterAdmin = g_conf.isMasterAdmin ( s , r );
 | |
| 	bool isCollAdmin = g_conf.isCollAdmin ( s , r );
 | |
| 
 | |
| 	//char *coll = r->getString ( "c"   );
 | |
| 	//if ( ! coll || ! coll[0] ) coll = "main";
 | |
| 	//CollectionRec *cr = g_collectiondb.getRec ( coll );
 | |
| 	// if "main" collection does not exist, try another
 | |
| 	//if ( ! cr ) cr = getCollRecFromHttpRequest ( r );
 | |
| 	printParms2 ( sb, page, cr, nc, pd,0,0 , s,isMasterAdmin,isCollAdmin);
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| static int32_t s_count = 0;
 | |
| 
 | |
| bool Parms::printParms2 ( SafeBuf* sb , 
 | |
| 			  int32_t page , 
 | |
| 			  CollectionRec *cr , 
 | |
| 			  int32_t nc ,
 | |
| 			  int32_t pd , 
 | |
| 			  bool isCrawlbot , 
 | |
| 			  char format , // bool isJSON ,
 | |
| 			  TcpSocket *sock ,
 | |
| 			  bool isMasterAdmin ,
 | |
| 			  bool isCollAdmin ) {
 | |
| 	bool status = true;
 | |
| 	s_count = 0;
 | |
| 	// background color
 | |
| 	char *bg1 = LIGHT_BLUE;
 | |
| 	char *bg2 = DARK_BLUE;
 | |
| 	// background color
 | |
| 	char *bg = NULL;
 | |
| 
 | |
| 	char *coll = NULL;
 | |
| 	if ( cr ) coll = cr->m_coll;
 | |
| 
 | |
| 	// page aliases
 | |
| 	//if ( page == PAGE_COLLPASSWORDS )
 | |
| 	//	page = PAGE_MASTERPASSWORDS;
 | |
| 
 | |
| 	if ( page == PAGE_COLLPASSWORDS2 )
 | |
| 		page = PAGE_COLLPASSWORDS;
 | |
| 
 | |
| 	GigablastRequest gr;
 | |
| 	g_parms.setToDefault ( (char *)&gr , OBJ_GBREQUEST , NULL);
 | |
|     
 | |
| 	InjectionRequest ir;
 | |
| 	g_parms.setToDefault ( (char *)&ir , OBJ_IR , NULL);	
 | |
| 
 | |
| 	// Begin "parms":[]
 | |
| 	if (format == FORMAT_JSON ) {     
 | |
| 		sb->safePrintf ("\"parms\":[\n");
 | |
| 	}
 | |
| 
 | |
| 	// find in parms list
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		// get it
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		// make sure we got the right parms for what we want
 | |
| 		if ( m->m_page != page ) continue;
 | |
| 		// and same object tpye. but allow OBJ_NONE for
 | |
| 		// PageAddUrl.cpp
 | |
| 		//if ( m->m_obj != parmObj && m->m_obj != OBJ_NONE ) continue;
 | |
| 		// skip if offset is negative, that means none
 | |
| 		// well then use OBJ_NONE now!!!
 | |
| 		//if ( m->m_off < 0 && 
 | |
| 		//     m->m_type != TYPE_MONOD2 &&
 | |
| 		//     m->m_type != TYPE_MONOM2 &&
 | |
| 		//     m->m_type != TYPE_CMD     ) continue;
 | |
| 		// skip if hidden
 | |
| 		if ( m->m_flags & PF_HIDDEN ) continue;
 | |
| 
 | |
| 		// or if should not show in html, like the 
 | |
| 		// name of the collection, the "c" parm we do not show
 | |
| 		// generally on the html page even though it is a required parm
 | |
| 		// we have it in a hidden html input tag in Pages.cpp.
 | |
| 		if ( (m->m_flags & PF_NOHTML) && 
 | |
| 		     format != FORMAT_JSON &&
 | |
| 		     format != FORMAT_XML )
 | |
| 			continue;
 | |
| 
 | |
| 		// get right ptr
 | |
| 		char *THIS = NULL;
 | |
| 		if ( m->m_obj == OBJ_CONF ) 
 | |
| 			THIS = (char *)&g_conf;
 | |
| 		if ( m->m_obj == OBJ_COLL ) {
 | |
| 			THIS = (char *)cr;
 | |
| 			if ( ! THIS ) continue;
 | |
| 		}
 | |
| 		if ( m->m_obj == OBJ_GBREQUEST )
 | |
| 			THIS = (char *)&gr;
 | |
| 		if ( m->m_obj == OBJ_IR )
 | |
| 			THIS = (char *)&ir;
 | |
| 		// might have an array, do not exceed the array size
 | |
| 		int32_t  jend = m->m_max;
 | |
| 		int32_t  size = jend ;
 | |
| 		char *ss   = ((char *)THIS + m->m_off - 4);
 | |
| 		if ( m->m_type == TYPE_MONOD2 ) ss = NULL;
 | |
| 		if ( m->m_type == TYPE_MONOM2 ) ss = NULL;
 | |
| 		if ( m->m_max > 1 && ss ) size = *(int32_t *)ss;
 | |
| 		if ( size < jend        ) jend = size;
 | |
| 		// toggle background color on group boundaries...
 | |
| 		if ( m->m_group == 1 ) {
 | |
| 			if ( bg == bg1 ) bg = bg2;
 | |
| 			else             bg = bg1;
 | |
| 		}
 | |
| 
 | |
| 			//
 | |
| 			// mdw just debug to here ... left off here
 | |
| 			//char *xx=NULL;*xx=0;
 | |
| 
 | |
| 		// . do we have an array? if so print title on next row
 | |
| 		//   UNLESS these are priority checkboxes, those can all 
 | |
| 		//   cluster together onto one row
 | |
| 		// . only add if not in a row of controls
 | |
| 		if ( m->m_max > 1 && m->m_type != TYPE_PRIORITY_BOXES &&
 | |
| 		     m->m_rowid == -1 &&
 | |
| 		     format != FORMAT_JSON &&
 | |
| 		     format != FORMAT_XML ) { // ! isJSON ) {
 | |
| 			//
 | |
| 			// make a separate table for array of parms
 | |
| 			sb->safePrintf (
 | |
| 				  //"<table width=100%% bgcolor=#d0d0e0 "
 | |
| 				  //"cellpadding=4 border=1>\n"
 | |
| 				  "<tr><td colspan=20 bgcolor=#%s>"
 | |
| 				  "<center>"
 | |
| 				  //"<font size=+1>"
 | |
| 				  "<b>%s"
 | |
| 				  "</b>"
 | |
| 				  //"</font>"
 | |
| 				  "</td></tr>\n"
 | |
| 				  "<tr><td colspan=20><font size=-1>"
 | |
| 				  ,DARK_BLUE,m->m_title);
 | |
| 			// print the description
 | |
| 			sb->safePrintf ( "%s" , m->m_desc );
 | |
| 			// end the description
 | |
| 			sb->safePrintf("</font></td></tr>\n");
 | |
| 
 | |
| 		}
 | |
| 
 | |
| 		// arrays always have blank line for adding stuff
 | |
| 		if ( m->m_max > 1 )
 | |
| 		     // not for PAGE_PRIORITIES!
 | |
| 		     //m->m_page != PAGE_PRIORITIES )
 | |
| 			size++;
 | |
| 		// if m_rowid of consecutive parms are the same then they
 | |
| 		// are all printed in the same row, otherwise the inner loop
 | |
| 		// has no effect
 | |
| 		int32_t rowid = m_parms[i].m_rowid;
 | |
| 		// if not part of a complex row, just print this array right up
 | |
| 		if ( rowid == -1 ) {
 | |
| 			for ( int32_t j = 0 ; j < size ; j++ )
 | |
| 				status &=printParm ( sb,NULL,&m_parms[i],i,
 | |
| 						     j, jend, (char *)THIS,
 | |
| 						     coll,NULL,
 | |
| 						     bg,nc,pd,
 | |
| 						     false,
 | |
| 						     isCrawlbot,
 | |
| 						     format,
 | |
| 						     isMasterAdmin,
 | |
| 						     isCollAdmin,
 | |
| 						     sock);
 | |
| 			continue;
 | |
| 		}
 | |
| 		// if not first in a row, skip it, we printed it already
 | |
| 		if ( i > 0 && m_parms[i-1].m_rowid == rowid ) continue;
 | |
| 
 | |
| 		// otherwise print everything in the row
 | |
| 		for ( int32_t j = 0 ; j < size ; j++ ) {
 | |
| 			// flip j if in this page
 | |
| 			int32_t newj = j;
 | |
| 			//if ( m->m_page == PAGE_PRIORITIES )
 | |
| 			//	newj = size - 1 - j;
 | |
| 			for ( int32_t k = i ; 
 | |
| 			      k < m_numParms && 
 | |
| 				      m_parms[k].m_rowid == rowid;  
 | |
| 			      k++ ) {
 | |
| 
 | |
| 				status &=printParm(sb,NULL,&m_parms[k],k,
 | |
| 					    newj,jend,(char *)THIS,coll,NULL,
 | |
| 						   bg,nc,pd, j==size-1,
 | |
| 						   isCrawlbot,format,
 | |
| 						   isMasterAdmin,
 | |
| 						   isCollAdmin,
 | |
| 						   sock);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// end array table
 | |
| 		//if ( m->m_max > 1 ) {
 | |
| 		//	sprintf ( p , "</table><br>\n");
 | |
| 		//	p += gbstrlen ( p );
 | |
| 		//}
 | |
| 	}
 | |
| 
 | |
|   // end "parms":[]
 | |
|   if ( format == FORMAT_JSON ) {
 | |
|     if ( m_numParms != 0 ) sb->m_length -= 2;
 | |
|     sb->safePrintf("\n]\n");
 | |
|   }
 | |
| 
 | |
| 	return status;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool Parms::printParm ( SafeBuf* sb,
 | |
| 			//int32_t  user ,
 | |
| 			char *username,
 | |
| 			Parm *m    , 
 | |
| 			int32_t  mm   , // m = &m_parms[mm]
 | |
| 			int32_t  j    ,
 | |
| 			int32_t  jend ,
 | |
| 			char *THIS ,
 | |
| 			char *coll ,
 | |
| 			char *pwd  ,
 | |
| 			char *bg   ,
 | |
| 			int32_t  nc   , // # column?
 | |
| 			int32_t  pd   , // print description
 | |
| 			bool lastRow ,
 | |
| 			bool isCrawlbot ,
 | |
| 			//bool isJSON ) {
 | |
| 			char format ,
 | |
| 			bool isMasterAdmin ,
 | |
| 			bool isCollAdmin ,
 | |
| 			TcpSocket *sock ) {
 | |
| 	bool status = true;
 | |
| 	// do not print if no permissions
 | |
| 	//if ( m->m_perms != 0 && !g_users.hasPermission(username,m->m_perms) )
 | |
| 	//	return status;
 | |
| 	//if ( m->m_perms != 0 && (m->m_perms & user) == 0 ) return status;
 | |
| 	// do not print some if #define _CLIENT_ is true
 | |
| 	//#ifdef _GLOBALSPEC_
 | |
| 	//if ( m->m_priv == 2 ) return status;
 | |
| 	//if ( m->m_priv == 3 ) return status;
 | |
| 	//#elif _CLIENT_
 | |
| 	//if ( m->m_priv ) return status;
 | |
| 	//#elif _METALINCS_
 | |
| 	//if ( m->m_priv == 2 ) return status;
 | |
| 	//if ( m->m_priv == 3 ) return status;
 | |
| 	//#endif
 | |
| 	// priv of 4 means do not print at all
 | |
| 	if ( m->m_priv == 4 ) return true;
 | |
| 
 | |
| 	// do not print comments, those are for the xml conf file
 | |
| 	if ( m->m_type == TYPE_COMMENT ) return true;
 | |
| 
 | |
| 	if ( m->m_flags & PF_HIDDEN ) return true;
 | |
| 
 | |
| 	CollectionRec *cr = NULL;
 | |
| 	collnum_t collnum = -1;
 | |
| 	if ( coll ) {
 | |
| 		cr = g_collectiondb.getRec ( coll );
 | |
| 		if ( cr ) collnum = cr->m_collnum;
 | |
| 	}
 | |
| 
 | |
| 	if ( format == FORMAT_XML || format == FORMAT_JSON ) {
 | |
| 		// the upload button has no val, cmds too
 | |
| 		if ( m->m_type == TYPE_FILEUPLOADBUTTON ) return true;
 | |
| 	}
 | |
| 
 | |
| 	int32_t page = m->m_page;
 | |
| 
 | |
| 	if ( format == FORMAT_XML ) {
 | |
| 		sb->safePrintf ( "\t<parm>\n");
 | |
| 		sb->safePrintf ( "\t\t<title><![CDATA[");
 | |
| 		sb->cdataEncode ( m->m_title );
 | |
| 		sb->safePrintf ( "]]></title>\n");
 | |
| 		sb->safePrintf ( "\t\t<desc><![CDATA[");
 | |
| 		sb->cdataEncode ( m->m_desc );
 | |
| 		sb->safePrintf ( "]]></desc>\n");
 | |
| 		if ( m->m_flags & PF_REQUIRED )
 | |
| 			sb->safePrintf("\t\t<required>1</required>\n");
 | |
| 		sb->safePrintf ( "\t\t<cgi>%s</cgi>\n",m->m_cgi);
 | |
| 		// and default value if it exists
 | |
| 		char *def = m->m_def;
 | |
| 		if ( ! def ) def = "";
 | |
| 		sb->safePrintf ( "\t\t<defaultValue><![CDATA[");
 | |
| 		sb->cdataEncode ( def );
 | |
| 		sb->safePrintf ( "]]></defaultValue>\n");
 | |
| 		if ( page == PAGE_MASTER ||
 | |
| 		     page == PAGE_SEARCH ||
 | |
| 		     page == PAGE_SPIDER ||
 | |
| 		     page == PAGE_SPIDERPROXIES ||
 | |
| 		     page == PAGE_FILTERS ||
 | |
| 		     page == PAGE_MASTERPASSWORDS ||
 | |
| 		     page == PAGE_REPAIR ||
 | |
| 		     page == PAGE_LOG ) {
 | |
| 			sb->safePrintf ( "\t\t<currentValue><![CDATA[");
 | |
| 			SafeBuf xb;
 | |
| 			m->printVal ( &xb , collnum , 0 );//occNum
 | |
| 			sb->cdataEncode ( xb.getBufStart() );
 | |
| 			sb->safePrintf ( "]]></currentValue>\n");
 | |
| 		}
 | |
| 		sb->safePrintf ( "\t</parm>\n");
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	if ( format == FORMAT_JSON ) {
 | |
| 		sb->safePrintf ( "\t{\n");
 | |
| 		sb->safePrintf ( "\t\t\"title\":\"%s\",\n",m->m_title);
 | |
| 		sb->safePrintf ( "\t\t\"desc\":\"");
 | |
| 		sb->jsonEncode ( m->m_desc );
 | |
| 		sb->safePrintf("\",\n");
 | |
| 		if ( m->m_flags & PF_REQUIRED )
 | |
| 			sb->safePrintf("\t\t\"required\":1,\n");
 | |
| 		sb->safePrintf ( "\t\t\"cgi\":\"%s\",\n",m->m_cgi);
 | |
| 		// and default value if it exists
 | |
| 		char *def = m->m_def;
 | |
| 		if ( ! def ) def = "";
 | |
| 		sb->safePrintf ( "\t\t\"defaultValue\":\"");
 | |
| 		sb->jsonEncode(def);
 | |
| 		sb->safePrintf("\",\n");
 | |
| 		if ( page == PAGE_MASTER ||
 | |
| 		     page == PAGE_SEARCH ||
 | |
| 		     page == PAGE_SPIDER ||
 | |
| 		     page == PAGE_SPIDERPROXIES ||
 | |
| 		     page == PAGE_FILTERS ||
 | |
| 		     page == PAGE_MASTERPASSWORDS ||
 | |
| 		     page == PAGE_REPAIR ||
 | |
| 		     page == PAGE_LOG ) {
 | |
| 			sb->safePrintf ( "\t\t\"currentValue\":\"");
 | |
| 			SafeBuf js;
 | |
| 			m->printVal ( &js , collnum , 0 );//occNum );
 | |
| 			sb->jsonEncode(js.getBufStart());
 | |
| 			sb->safePrintf("\",\n");
 | |
| 		}
 | |
|     sb->m_length -= 2; // hack of trailing comma
 | |
| 		sb->safePrintf("\n\t},\n");
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// . if printing on crawlbot page hide these
 | |
| 	// . we repeat this logic below when printing parm titles
 | |
| 	//   for the column headers in the table
 | |
| 	//char *vt = "";
 | |
| 	//if ( isCrawlbot &&
 | |
| 	//     m->m_page == PAGE_FILTERS &&
 | |
| 	//     (strcmp(m->m_xml,"spidersEnabled") == 0 ||
 | |
| 	//      //strcmp(m->m_xml,"maxSpidersPerRule")==0||
 | |
| 	//      //strcmp(m->m_xml,"maxSpidersPerIp") == 0||
 | |
| 	//      strcmp(m->m_xml,"spiderIpWait") == 0 
 | |
| 	//      ) )
 | |
| 	//	vt = " style=display:none;";
 | |
| 
 | |
| 	// what type of parameter?
 | |
| 	char t = m->m_type;
 | |
| 	// point to the data in THIS
 | |
| 	char *s = THIS + m->m_off + m->m_size * j ;
 | |
| 
 | |
| 	// if THIS is NULL then it must be GigablastRequest or something
 | |
| 	// and is not really a persistent thing, but a one-shot deal.
 | |
| 	if ( ! THIS ) s = NULL;
 | |
| 
 | |
| 	// . if an array, passed our end, this is the blank line at the end
 | |
| 	// . USE THIS EMPTY/DEFAULT LINE TO ADD NEW DATA TO AN ARRAY
 | |
| 	// . make at least as big as a int64_t
 | |
| 	if ( j >= jend ) s = "\0\0\0\0\0\0\0\0";
 | |
| 	// delimit each cgi var if we need to
 | |
| 	if ( m->m_cgi && gbstrlen(m->m_cgi) > 45 ) {
 | |
| 		log(LOG_LOGIC,"admin: Cgi variable is TOO big.");
 | |
| 		char *xx = NULL; *xx = 0;
 | |
| 	}
 | |
| 	char cgi[64];
 | |
| 	if ( m->m_cgi ) {
 | |
| 		if ( j > 0 ) sprintf ( cgi , "%s%" INT32 "" , m->m_cgi , j );
 | |
| 		else         sprintf ( cgi , "%s"    , m->m_cgi     );
 | |
| 		// let's try dropping the index # and just doing dup parms
 | |
| 		//sprintf ( cgi , "%s"    , m->m_cgi     );
 | |
| 	}
 | |
| 	// . display title and description of the control/parameter
 | |
| 	// . the input cell of some parameters are colored
 | |
| 	char *color = "";
 | |
| 	if ( t == TYPE_CMD  || t == TYPE_BOOL2 ) 
 | |
| 		color = " bgcolor=#6060ff";
 | |
| 	if ( t == TYPE_BOOL ) {
 | |
| 		if ( *s ) color = " bgcolor=#00ff00";
 | |
| 		else      color = " bgcolor=#ff0000";
 | |
| 	}
 | |
| 	if ( t == TYPE_BOOL || t == TYPE_BOOL2 ) {
 | |
| 		// disable controls not allowed in read only mode
 | |
| 		if ( g_conf.m_readOnlyMode && m->m_rdonly )
 | |
| 			  color = " bgcolor=#ffff00";
 | |
| 	}
 | |
| 
 | |
| 	bool firstInRow = false;
 | |
| 	if ( (s_count % nc) == 0 ) firstInRow = true;
 | |
| 	s_count++;
 | |
| 
 | |
| 	if ( mm > 0 && m->m_rowid >= 0 && m_parms[mm-1].m_rowid == m->m_rowid )
 | |
| 		firstInRow = false;
 | |
| 
 | |
| 	int32_t firstRow = 0;
 | |
| 	//if ( m->m_page==PAGE_PRIORITIES ) firstRow = MAX_PRIORITY_QUEUES - 1;
 | |
| 	// . use a separate table for arrays
 | |
| 	// . make title and description header of that table
 | |
| 	// . do not print all headers if not m_hdrs, a special case for the 
 | |
| 	//   default line in the url filters table
 | |
| 	if ( j == firstRow && m->m_rowid >= 0 && firstInRow && m->m_hdrs ) {
 | |
| 		// print description as big comment
 | |
| 		if ( m->m_desc && pd == 1 ) {
 | |
| 			// url FILTERS table description row
 | |
| 			sb->safePrintf ( "<td colspan=20 bgcolor=#%s>"
 | |
| 					 "<font size=-1>\n" , DARK_BLUE);
 | |
| 
 | |
| 			//p = htmlEncode ( p , pend , m->m_desc ,
 | |
| 			//		 m->m_desc + gbstrlen ( m->m_desc ) );
 | |
| 			sb->safePrintf ( "%s" , m->m_desc );
 | |
| 			sb->safePrintf ( "</font></td></tr>"
 | |
| 					 // for "#,expression,harvestlinks.."
 | |
| 					 // header row in url FILTERS table
 | |
| 					 "<tr bgcolor=#%s>\n" ,DARK_BLUE);
 | |
| 		}
 | |
| 		// # column
 | |
| 		// do not show this for PAGE_PRIORITIES it is confusing
 | |
| 		if ( m->m_max > 1 ) {
 | |
| 		     //m->m_page != PAGE_PRIORITIES ) {
 | |
| 			sb->safePrintf (  "<td><b>#</b></td>\n" );
 | |
| 		}
 | |
| 		// print all headers
 | |
| 		for ( int32_t k = mm ; 
 | |
| 		      k<m_numParms && m_parms[k].m_rowid==m->m_rowid; k++ ) {
 | |
| 			// parm int16_tcut
 | |
| 			Parm *mk = &m_parms[k];
 | |
| 			// not if printing json
 | |
| 			//if ( format != FORMAT_HTML )continue;//isJSON )
 | |
| 			// skip if hidden
 | |
| 			if ( cr && ! cr->m_isCustomCrawl &&
 | |
| 			     (mk->m_flags & PF_DIFFBOT) )
 | |
| 				continue;
 | |
| 
 | |
| 			// . hide table column headers that are too advanced
 | |
| 			// . we repeat this logic above for the actual parms
 | |
| 			//char *vt = "";
 | |
| 			//if ( isCrawlbot &&
 | |
| 			//     m->m_page == PAGE_FILTERS &&
 | |
| 			//     (strcmp(mk->m_xml,"spidersEnabled") == 0 ||
 | |
| 			//      //strcmp(mk->m_xml,"maxSpidersPerRule")==0||
 | |
| 			//      //strcmp(mk->m_xml,"maxSpidersPerIp") == 0||
 | |
| 			//      strcmp(mk->m_xml,"spiderIpWait") == 0 ) )
 | |
| 			//	vt = " style=display:none;display:none;";
 | |
| 			//sb->safePrintf ( "<td%s>" , vt );
 | |
| 			sb->safePrintf ( "<td>" );
 | |
| 			// if its of type checkbox in a table make it
 | |
| 			// toggle them all on/off
 | |
| 			if ( mk->m_type == TYPE_CHECKBOX &&
 | |
| 			     mk->m_page == PAGE_FILTERS ) {
 | |
| 				sb->safePrintf("<a href=# "
 | |
| 					       "onclick=\"checkAll(this, "
 | |
| 					       "'id_%s', %" INT32 ");\">",
 | |
| 					       m_parms[k].m_cgi, m->m_max);
 | |
| 			}
 | |
| 			sb->safePrintf ( "<b>%s</b>", m_parms[k].m_title );
 | |
| 			if ( mk->m_type == TYPE_CHECKBOX &&
 | |
| 			     mk->m_page == PAGE_FILTERS ) 
 | |
| 				sb->safePrintf("</a>");
 | |
| 			/*
 | |
| 			if ( m->m_page == PAGE_PRIORITIES && 
 | |
| 			     m_parms[k].m_type == TYPE_CHECKBOX)
 | |
| 				sb->safePrintf("<br><a href=# "
 | |
| 					       "onclick=\"checkAll(this, "
 | |
| 					       "'id_%s', %" INT32 ");\">(toggle)</a>",
 | |
| 					       m_parms[k].m_cgi, m->m_max);
 | |
| 			*/
 | |
| 			sb->safePrintf ("</td>\n");
 | |
| 		}
 | |
| 		//if ( format == FORMAT_HTML ) 
 | |
| 		sb->safePrintf ( "</tr>\n" ); // mdw added
 | |
| 	}
 | |
| 
 | |
| 	// skip if hidden. diffbot api url only for custom crawls.
 | |
| 	//if(cr && ! cr->m_isCustomCrawl && (m->m_flags & PF_DIFFBOT) )
 | |
| 	//	return true;
 | |
| 
 | |
| 	// print row start for single parm
 | |
| 	if ( m->m_max <= 1 && ! m->m_hdrs ) {
 | |
| 		if ( firstInRow ) {
 | |
| 			sb->safePrintf ( "<tr bgcolor=#%s><td>" , bg );
 | |
| 		}
 | |
| 		sb->safePrintf ( "<td width=%" INT32 "%%>" , 100/nc/2 );
 | |
| 	}
 | |
| 
 | |
| 	// if parm value is not default, use orange!
 | |
| 	char rr[1024];
 | |
| 	SafeBuf val1(rr,1024);
 | |
| 	if ( m->m_type != TYPE_FILEUPLOADBUTTON )
 | |
| 		m->printVal ( &val1 , collnum , j ); // occNum );
 | |
| 	// test it
 | |
| 	if ( m->m_def && 
 | |
| 	     m->m_obj != OBJ_NONE &&
 | |
| 	     m->m_obj != OBJ_IR && // do not do for injectionrequest
 | |
| 	     m->m_obj != OBJ_GBREQUEST && // do not do for GigablastRequest
 | |
| 	     strcmp ( val1.getBufStart() , m->m_def ) )
 | |
| 		// put non-default valued parms in orange!
 | |
| 		bg = "ffa500";
 | |
| 
 | |
| 
 | |
| 	// print the title/description in current table for non-arrays
 | |
| 	if ( m->m_max <= 1 && m->m_hdrs ) { // j == 0 && m->m_rowid < 0 ) {
 | |
| 		if ( firstInRow )
 | |
| 			sb->safePrintf ( "<tr bgcolor=#%s>",bg);
 | |
| 
 | |
| 		if ( t == TYPE_STRINGBOX ) {
 | |
| 			sb->safePrintf ( "<td colspan=2><center>"
 | |
| 				  "<b>%s</b><br><font size=-1>",m->m_title );
 | |
| 			if ( pd ) {
 | |
| 				status &= sb->htmlEncode (m->m_desc,
 | |
| 							  gbstrlen(m->m_desc),
 | |
| 							  false);
 | |
| 				// is it required?
 | |
| 				if ( m->m_flags & PF_REQUIRED )
 | |
| 					sb->safePrintf(" <b><font color=green>"
 | |
| 						       "REQUIRED</font></b>");
 | |
| 			}
 | |
| 
 | |
| 			sb->safePrintf ( "</font><br>\n" );
 | |
| 		}
 | |
| 		if ( t != TYPE_STRINGBOX ) {
 | |
| 			// this td will be invisible if isCrawlbot and the
 | |
| 			// parm is too advanced to display
 | |
| 			sb->safePrintf ( "<td " );
 | |
| 			if ( m->m_colspan > 0 )
 | |
| 				sb->safePrintf ( "colspan=%" INT32 " ",
 | |
| 						 (int32_t)m->m_colspan);
 | |
| 			sb->safePrintf ( "width=%" INT32 "%%>"//"<td width=78%%>
 | |
| 					 "<b>%s</b><br><font size=1>",
 | |
| 					 3*100/nc/2/4, m->m_title );
 | |
| 
 | |
| 			// the "site list" parm has html in description
 | |
| 			if ( pd ) {
 | |
| 				status &= sb->safeStrcpy(m->m_desc);
 | |
| 				//status &= sb->htmlEncode (m->m_desc,
 | |
| 				//			  gbstrlen(m->m_desc),
 | |
| 				//			  false);
 | |
| 				// is it required?
 | |
| 				if ( m->m_flags & PF_REQUIRED )
 | |
| 					sb->safePrintf(" <b><font color=green>"
 | |
| 						       "REQUIRED</font></b>");
 | |
| 
 | |
| 				// print users current ip if showing the list
 | |
| 				// of "Master IPs" for admin access
 | |
| 				if ( ( m->m_page == PAGE_MASTERPASSWORDS ||
 | |
| 				       m->m_page == PAGE_COLLPASSWORDS ) &&
 | |
| 				     sock &&
 | |
| 				     m->m_title &&
 | |
| 				     strstr(m->m_title,"IP") )
 | |
| 					sb->safePrintf(" <b>Your current IP "
 | |
| 						       "is %s.</b>",
 | |
| 						       iptoa(sock->m_ip));
 | |
| 			}
 | |
| 
 | |
| 			// and cgi parm if it exists
 | |
| 			//if ( m->m_def && m->m_scgi )
 | |
| 			//	sb->safePrintf(" CGI override: %s.",m->m_scgi);
 | |
| 			// just let them see the api page for this...
 | |
| 			//sb->safePrintf(" CGI: %s.",m->m_cgi);
 | |
| 			// and default value if it exists
 | |
| 			if ( m->m_def && m->m_def[0] && t != TYPE_CMD ) {
 | |
| 				char *d = m->m_def;
 | |
| 				if ( t == TYPE_BOOL || t == TYPE_CHECKBOX ) {
 | |
| 					if ( d[0]=='0' ) d = "NO";
 | |
| 					else             d = "YES";
 | |
| 					sb->safePrintf ( " <nobr>"
 | |
| 							 "Default: %s."
 | |
| 							 "</nobr>",d);
 | |
| 				} 
 | |
| 				else {
 | |
| 					sb->safePrintf (" Default: ");
 | |
| 					status &= sb->htmlEncode (d,
 | |
| 								  gbstrlen(d),
 | |
| 								  false);
 | |
| 				}
 | |
| 			}
 | |
| 			sb->safePrintf ( "</font></td>\n<td%s width=%" INT32 "%%>" , 
 | |
| 				  color , 100/nc/2/4 );
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// . print number in row if array, start at 1 for clarity's sake
 | |
| 	// . used for url filters table, etc.
 | |
| 	if ( m->m_max > 1 ) {
 | |
| 		// bg color alternates
 | |
| 		char *bgc = LIGHT_BLUE;
 | |
| 		if ( j % 2 ) bgc = DARK_BLUE;
 | |
| 		// do not print this if doing json
 | |
| 		//if ( format != FORMAT_HTML );//isJSON ) ;
 | |
| 		// but if it is in same row as previous, do not repeat it
 | |
| 		// for this same row, silly
 | |
| 		if ( firstInRow ) // && m->m_page != PAGE_PRIORITIES ) 
 | |
| 			sb->safePrintf ( "<tr bgcolor=#%s>"
 | |
| 					 "<td>%" INT32 "</td>\n<td>", 
 | |
| 					 bgc,
 | |
| 					 j );//j+1
 | |
| 		else if ( firstInRow ) 
 | |
| 			sb->safePrintf ( "<tr><td>" );
 | |
| 		else    
 | |
| 			//sb->safePrintf ( "<td%s>" , vt);
 | |
| 			sb->safePrintf ( "<td>" );
 | |
| 	}
 | |
| 
 | |
| 	//int32_t cast = m->m_cast;
 | |
| 	//if ( g_proxy.isProxy() ) cast = 0;
 | |
| 
 | |
| 	// print the input box
 | |
| 	if ( t == TYPE_BOOL ) {
 | |
| 		char *tt, *v;
 | |
| 		if ( *s ) { tt = "YES"; v = "0"; }
 | |
| 		else      { tt = "NO" ; v = "1"; }
 | |
| 		if ( g_conf.m_readOnlyMode && m->m_rdonly )
 | |
| 			sb->safePrintf ( "<b>read-only mode</b>" );
 | |
| 		// if cast=1, command IS broadcast to all hosts
 | |
| 		else 
 | |
| 			sb->safePrintf ( "<b><a href=\"/%s?c=%s&"
 | |
| 					 "%s=%s\">" // &cast=%" INT32 "\">"
 | |
| 					 "<center>%s</center></a></b>", 
 | |
| 					 g_pages.getPath(m->m_page),coll,
 | |
| 					 cgi,v,//cast,
 | |
| 					 tt);
 | |
| 	}
 | |
| 	else if ( t == TYPE_BOOL2 ) {
 | |
| 		if ( g_conf.m_readOnlyMode && m->m_rdonly )
 | |
| 			sb->safePrintf ( "<b><center>read-only mode"
 | |
| 					 "</center></b>");
 | |
| 		// always use m_def as the value for TYPE_BOOL2
 | |
| 		else
 | |
| 			sb->safePrintf ( "<b><a href=\"/%s?c=%s&%s=%s\">"
 | |
| 					 //"cast=1\">"
 | |
| 					 "<center>%s</center></a></b>", 
 | |
| 					 g_pages.getPath(m->m_page),coll,
 | |
| 					 cgi,m->m_def, m->m_title);
 | |
| 	}
 | |
| 	else if ( t == TYPE_CHECKBOX ) {
 | |
| 		//char *ddd1 = "";
 | |
| 		//char *ddd2 = "";
 | |
| 		//if ( *s ) ddd1 = " checked";
 | |
| 		//else      ddd2 = " checked";
 | |
| 		// just show the parm name and value if printing in json
 | |
| 		// if ( format == FORMAT_JSON ) { // isJSON ) {
 | |
| 		// 	if ( ! lastRow ) {
 | |
| 		// 		int32_t val = 0;
 | |
| 		// 		if ( *s ) val = 1;
 | |
| 		// 		sb->safePrintf("\"%s\":%" INT32 ",\n",cgi,val);
 | |
| 		// 	}
 | |
| 		// }
 | |
| 		//sb->safePrintf("<center><nobr>");
 | |
| 		sb->safePrintf("<nobr>");
 | |
| 		// this is part of the "HACK" fix below. you have to
 | |
| 		// specify the cgi parm in the POST request, and 
 | |
| 		// unchecked checkboxes are not included in the POST 
 | |
| 		// request.
 | |
| 		//if ( lastRow && m->m_page == PAGE_FILTERS ) 
 | |
| 		//	sb->safePrintf("<input type=hidden ");
 | |
| 		//char *val = "Y";
 | |
| 		//if ( ! *s ) val = "N";
 | |
| 		char *val = "";
 | |
| 		// "s" is invalid of parm has no "object"
 | |
| 		if ( m->m_obj == OBJ_NONE && m->m_def[0] != '0' )
 | |
| 			val = " checked";
 | |
| 		if ( m->m_obj != OBJ_NONE && s && *s ) 
 | |
| 			val = " checked";
 | |
| 		// s is NULL for GigablastRequest parms
 | |
| 		if ( ! s && m->m_def && m->m_def[0]=='1' )
 | |
| 			val = " checked";
 | |
| 		// in case it is not checked, submit that!
 | |
| 		// if it gets checked this should be overridden then
 | |
| 		sb->safePrintf("<input type=hidden name=%s value=0>"
 | |
| 			       , cgi );
 | |
| 		//else
 | |
| 		sb->safePrintf("<input type=checkbox value=1 ");
 | |
| 		//"<nobr><input type=button ");
 | |
| 		if ( m->m_page == PAGE_FILTERS)
 | |
| 			sb->safePrintf("id=id_%s ",cgi);
 | |
| 			
 | |
| 		sb->safePrintf("name=%s%s"
 | |
| 			       //" onmouseup=\""
 | |
| 			       //"if ( this.value=='N' ) {"
 | |
| 			       //"this.value='Y';"
 | |
| 			       //"} "
 | |
| 			       //"else if ( this.value=='Y' ) {"
 | |
| 			       //"this.value='N';"
 | |
| 			       //"}"
 | |
| 			       //"\" "
 | |
| 			       ">"
 | |
| 			       ,cgi
 | |
| 			       ,val);//,ddd);
 | |
| 		//
 | |
| 		// repeat for off position
 | |
| 		//
 | |
| 		//if ( ! lastRow || m->m_page != PAGE_FILTERS )  {
 | |
| 		//	sb->safePrintf(" Off:<input type=radio ");
 | |
| 		//	if ( m->m_page == PAGE_FILTERS)
 | |
| 		//		sb->safePrintf("id=id_%s ",cgi);
 | |
| 		//	sb->safePrintf("value=0 name=%s%s>",
 | |
| 		//		       cgi,ddd2);
 | |
| 		//}
 | |
| 		sb->safePrintf("</nobr>"
 | |
| 			       //"</center>"
 | |
| 			       );
 | |
| 	}
 | |
| 	else if ( t == TYPE_CHAR )
 | |
| 		sb->safePrintf ("<input type=text name=%s value=\"%" INT32 "\" "
 | |
| 				"size=3>",cgi,(int32_t)(*s));
 | |
| 	/*	else if ( t == TYPE_CHAR2 )
 | |
| 		sprintf (p,"<input type=text name=%s value=\"%" INT32 "\" "
 | |
| 		"size=3>",cgi,*(char*)s);*/
 | |
| 	else if ( t == TYPE_PRIORITY ) 
 | |
| 		printDropDown ( MAX_SPIDER_PRIORITIES , sb , cgi , *s , 
 | |
| 				false , false );
 | |
| 	else if ( t == TYPE_PRIORITY2 ) {
 | |
| 		// just show the parm name and value if printing in json
 | |
| 		// if ( format==FORMAT_JSON) // isJSON )
 | |
| 		// 	sb->safePrintf("\"%s\":%" INT32 ",\n",cgi,(int32_t)*(char *)s);
 | |
| 		// else
 | |
| 		printDropDown ( MAX_SPIDER_PRIORITIES , sb , cgi , *s ,
 | |
| 				true , true );
 | |
| 	}
 | |
| 	// this url filters parm is an array of SAFEBUFs now, so each is
 | |
| 	// a string and that string is the diffbot api url to use. 
 | |
| 	// the string is empty or zero length to indicate none.
 | |
| 	//else if ( t == TYPE_DIFFBOT_DROPDOWN ) {
 | |
| 	//	char *xx=NULL;*xx=0;
 | |
| 	//}
 | |
| 	//else if ( t == TYPE_UFP )
 | |
| 	else if ( t == TYPE_SAFEBUF && 
 | |
| 		  strcmp(m->m_title,"url filters profile")==0)
 | |
| 		// url filters profile drop down "ufp"
 | |
| 		printDropDownProfile ( sb , "ufp" , cr );//*s );
 | |
| 
 | |
| 	// do not expose master passwords or IPs to non-root admins
 | |
| 	else if ( ( m->m_flags & PF_PRIVATE ) && 
 | |
| 		  m->m_obj == OBJ_CONF &&
 | |
| 		  ! isMasterAdmin )
 | |
| 		return true;
 | |
| 
 | |
| 	// do not expose master passwords or IPs to non-root admins
 | |
| 	else if ( ( m->m_flags & PF_PRIVATE ) && 
 | |
| 		  m->m_obj == OBJ_COLL &&
 | |
| 		  ! isCollAdmin )
 | |
| 		return true;
 | |
| 
 | |
| 	else if ( t == TYPE_RETRIES    ) 
 | |
| 		printDropDown ( 4 , sb , cgi , *s , false , false );
 | |
| 	else if ( t == TYPE_FILEUPLOADBUTTON    ) {
 | |
| 		sb->safePrintf("<input type=file name=%s>",cgi);
 | |
| 	}
 | |
| 	else if ( t == TYPE_PRIORITY_BOXES ) {
 | |
| 		// print ALL the checkboxes when we get the first parm
 | |
| 		if ( j != 0 ) return status;
 | |
| 		printCheckBoxes ( MAX_SPIDER_PRIORITIES , sb , cgi , s );
 | |
| 	}
 | |
| 	else if ( t == TYPE_CMD )
 | |
| 		// if cast=0 it will be executed, otherwise it will be
 | |
| 		// broadcasted with cast=1 to all hosts and they will all
 | |
| 		// execute it
 | |
| 		sb->safePrintf ( "<b><a href=\"/%s?c=%s&%s=1\">" // cast=%" INT32 "
 | |
| 			  "<center>%s</center></a></b>",
 | |
| 			  g_pages.getPath(m->m_page),coll,
 | |
| 			  cgi,m->m_title);
 | |
| 	else if ( t == TYPE_FLOAT ) {
 | |
| 		// just show the parm name and value if printing in json
 | |
| 		// if ( format == FORMAT_JSON )//isJSON )
 | |
| 		// 	sb->safePrintf("\"%s\":%f,\n",cgi,*(float *)s);
 | |
| 		// else
 | |
| 		sb->safePrintf ("<input type=text name=%s "
 | |
| 				"value=\"%f\" "
 | |
| 				// 3 was ok on firefox but need 6
 | |
| 				// on chrome
 | |
| 				"size=7>",cgi,*(float *)s);
 | |
| 	}
 | |
| 	else if ( t == TYPE_IP ) {
 | |
| 		if ( m->m_max > 0 && j == jend ) 
 | |
| 			sb->safePrintf ("<input type=text name=%s value=\"\" "
 | |
| 					"size=12>",cgi);
 | |
| 		else
 | |
| 			sb->safePrintf ("<input type=text name=%s value=\"%s\" "
 | |
| 					"size=12>",cgi,iptoa(*(int32_t *)s));
 | |
| 	}
 | |
| 	else if ( t == TYPE_LONG ) {
 | |
| 		// just show the parm name and value if printing in json
 | |
| 		// if ( format == FORMAT_JSON ) // isJSON )
 | |
| 		// 	sb->safePrintf("\"%s\":%" INT32 ",\n",cgi,*(int32_t *)s);
 | |
| 		// else
 | |
| 		sb->safePrintf ("<input type=text name=%s "
 | |
| 				"value=\"%" INT32 "\" "
 | |
| 				// 3 was ok on firefox but need 6
 | |
| 				// on chrome
 | |
| 				"size=6>",cgi,*(int32_t *)s);
 | |
| 	}
 | |
| 	else if ( t == TYPE_LONG_CONST ) 
 | |
| 		sb->safePrintf ("%" INT32 "",*(int32_t *)s);
 | |
| 	else if ( t == TYPE_LONG_LONG )
 | |
| 		sb->safePrintf ("<input type=text name=%s value=\"%" INT64 "\" "
 | |
| 				"size=12>",cgi,*(int64_t *)s);
 | |
| 	else if ( t == TYPE_STRING || t == TYPE_STRINGNONEMPTY ) {
 | |
| 		int32_t size = m->m_size;
 | |
| 		// give regular expression box on url filters page more room
 | |
| 		//if ( m->m_page == PAGE_FILTERS ) {
 | |
| 		//	if ( size > REGEX_TXT_MAX ) size = REGEX_TXT_MAX;
 | |
| 		//}
 | |
| 		//else {
 | |
| 		if ( size > 20 ) size = 20;
 | |
| 		//}
 | |
| 		sb->safePrintf ("<input type=text name=%s size=%" INT32 " value=\"",
 | |
| 				cgi,size);
 | |
| 
 | |
| 		// if it has PF_DEFAULTCOLL flag set then use the coll
 | |
| 		if ( cr && (m->m_flags & PF_COLLDEFAULT) )
 | |
| 			sb->safePrintf("%s",cr->m_coll);
 | |
| 		else
 | |
| 			sb->dequote ( s , gbstrlen(s) );
 | |
| 
 | |
| 		sb->safePrintf ("\">");
 | |
| 	}
 | |
| 	// HACK: print a drop down not a textbox for selecting the
 | |
| 	// m_spiderDiffbotApiUrl[]. we can't just store this selection
 | |
| 	// as a number because m_diffbotApiList (a string of comma separated
 | |
| 	// items to select from) can change! it is not a typical dropdown.
 | |
| 	// so we have to record the actual text we selected, which is
 | |
| 	// basically the diffbot api url. this is because john can add
 | |
| 	// custom diffbot api urls at anytime to the list.
 | |
| 	/*
 | |
| 	else if ( t == TYPE_SAFEBUF && strcmp(m->m_cgi,"dapi") == 0 ) {
 | |
| 		SafeBuf *sx = (SafeBuf *)s;
 | |
| 		// just show the parm name and value if printing in json
 | |
| 		if ( isJSON ) {
 | |
| 			// this can be empty for the empty row i guess
 | |
| 			if ( sx->length() ) {
 | |
| 				// convert diffbot # to string
 | |
| 				sb->safePrintf("\"%s\":\"",cgi);
 | |
| 				// this is just the url path, not the title
 | |
| 				// of the menu option... so this would be
 | |
| 				// like "/api/article?u="
 | |
| 				sb->safeUtf8ToJSON (sx->getBufStart() );
 | |
| 				sb->safePrintf("\",\n");
 | |
| 			}
 | |
| 		}
 | |
| 		else
 | |
| 			printDiffbotDropDown ( sb , cgi , THIS , sx );
 | |
| 	}
 | |
| 	*/
 | |
| 	else if ( t == TYPE_CHARPTR ) {
 | |
| 		int32_t size = m->m_size;
 | |
| 		char *sp = NULL;
 | |
| 		if ( s && *s ) sp = *(char **)s;
 | |
| 		if ( ! sp ) sp = "";
 | |
| 		if ( m->m_flags & PF_TEXTAREA ) {
 | |
| 			sb->safePrintf ("<textarea name=%s rows=10 cols=80>",
 | |
| 					cgi);
 | |
| 			if ( m->m_obj != OBJ_NONE )
 | |
| 				sb->htmlEncode(sp,gbstrlen(sp),false);
 | |
| 			sb->safePrintf ("</textarea>");
 | |
| 		}
 | |
| 		else {
 | |
| 			sb->safePrintf ("<input type=text name=%s size=%" INT32 " "
 | |
| 					"value=\"",cgi,size);
 | |
| 			// if it has PF_DEFAULTCOLL flag set then use the coll
 | |
| 			if ( cr && (m->m_flags & PF_COLLDEFAULT) )
 | |
| 				sb->safePrintf("%s",cr->m_coll);
 | |
| 			else if ( sp )
 | |
| 				sb->dequote ( sp , gbstrlen(sp) );
 | |
| 			sb->safePrintf ("\">");
 | |
| 		}
 | |
| 	}
 | |
| 	else if ( t == TYPE_SAFEBUF ) {
 | |
| 		int32_t size = m->m_size;
 | |
| 		// give regular expression box on url filters page more room
 | |
| 		if ( m->m_page == PAGE_FILTERS ) {
 | |
| 			//if ( size > REGEX_TXT_MAX ) size = REGEX_TXT_MAX;
 | |
| 			size = 40;
 | |
| 		}
 | |
| 		else {
 | |
| 			if ( size > 20 ) size = 20;
 | |
| 		}
 | |
| 		SafeBuf *sx = (SafeBuf *)s;
 | |
| 
 | |
| 		SafeBuf tmp;
 | |
| 		// if printing a parm in a one-shot deal like GigablastRequest
 | |
| 		// then s and sx will always be NULL, so set to default
 | |
| 		if ( ! sx ) {
 | |
| 			sx = &tmp;
 | |
| 			char *def = m->m_def;
 | |
| 			// if it has PF_DEFAULTCOLL flag set then use the coll
 | |
| 			if ( cr && (m->m_flags & PF_COLLDEFAULT) ) 
 | |
| 				def = cr->m_coll;
 | |
| 			tmp.safePrintf("%s",def);
 | |
| 		}
 | |
| 
 | |
| 		// just show the parm name and value if printing in json
 | |
| 		// if ( format == FORMAT_JSON ) { // isJSON ) {
 | |
| 		// 	// this can be empty for the empty row i guess
 | |
| 		// 	if ( sx->length() ) {
 | |
| 		// 		// convert diffbot # to string
 | |
| 		// 		sb->safePrintf("\"%s\":\"",cgi);
 | |
| 		// 		if ( m->m_obj != OBJ_NONE )
 | |
| 		// 			sb->safeUtf8ToJSON (sx->getBufStart());
 | |
| 		// 		sb->safePrintf("\",\n");
 | |
| 		// 	}
 | |
| 		// }
 | |
| 		if ( m->m_flags & PF_TEXTAREA ) {
 | |
| 			int rows = 10;
 | |
| 			if ( m->m_flags & PF_SMALLTEXTAREA )
 | |
| 				rows = 4;
 | |
| 			sb->safePrintf ("<textarea id=tabox "
 | |
| 					"name=%s rows=%i cols=80>",
 | |
| 					cgi,rows);
 | |
| 			//sb->dequote ( s , gbstrlen(s) );
 | |
| 			// note it
 | |
| 			//log("hack: %s",sx->getBufStart());
 | |
| 			//sb->dequote ( sx->getBufStart() , sx->length() );
 | |
| 			if ( m->m_obj != OBJ_NONE )
 | |
| 				sb->htmlEncode(sx->getBufStart(),
 | |
| 					       sx->length(),false);
 | |
| 			sb->safePrintf ("</textarea>");
 | |
| 		}
 | |
| 		else {
 | |
| 			sb->safePrintf ("<input type=text name=%s size=%" INT32 " "
 | |
| 					"value=\"",
 | |
| 					cgi,size);
 | |
| 			//sb->dequote ( s , gbstrlen(s) );
 | |
| 			// note it
 | |
| 			//log("hack: %s",sx->getBufStart());
 | |
| 
 | |
| 
 | |
| 			if ( cr && 
 | |
| 			     (m->m_flags & PF_COLLDEFAULT) &&
 | |
| 			     sx &&
 | |
| 			     sx->length() <= 0 ) 
 | |
| 				sb->dequote ( cr->m_coll,gbstrlen(cr->m_coll));
 | |
| 
 | |
| 			// if parm is OBJ_NONE there is no stored valued
 | |
| 			else if ( m->m_obj != OBJ_NONE )
 | |
| 				sb->dequote ( sx->getBufStart(), sx->length());
 | |
| 
 | |
| 			sb->safePrintf ("\">");
 | |
| 		}
 | |
| 	}
 | |
| 	else if ( t == TYPE_STRINGBOX ) {
 | |
| 		sb->safePrintf("<textarea id=tabox rows=10 cols=64 name=%s>",
 | |
| 			       cgi);
 | |
| 		//p += urlEncode ( p , pend - p , s , gbstrlen(s) );
 | |
| 		//p += htmlDecode ( p , s , gbstrlen(s) );
 | |
| 		sb->htmlEncode ( s , gbstrlen(s), false );
 | |
| 		//sprintf ( p , "%s" , s );
 | |
| 		//p += gbstrlen(p);		
 | |
| 		sb->safePrintf ("</textarea>\n");
 | |
| 	}
 | |
| 	else if ( t == TYPE_CONSTANT ) 
 | |
| 		sb->safePrintf ("%s",m->m_title);
 | |
| 	else if ( t == TYPE_MONOD2 )
 | |
| 		sb->safePrintf ("%" INT32 "",j / 2 );
 | |
| 	else if ( t == TYPE_MONOM2 ) {
 | |
| 		/*
 | |
| 		if ( m->m_page == PAGE_PRIORITIES ) {
 | |
| 			if ( j % 2 == 0 ) sb->safePrintf ("old");
 | |
| 			else              sb->safePrintf ("new");
 | |
| 		}
 | |
| 		else
 | |
| 		*/
 | |
| 			sb->safePrintf ("%" INT32 "",j % 2 );
 | |
| 	}
 | |
| 	else if ( t == TYPE_RULESET ) ;
 | |
| 		// subscript is already included in "cgi"
 | |
| 		//g_pages.printRulesetDropDown ( sb         ,
 | |
| 		//			       user       ,
 | |
| 		//			       cgi        ,
 | |
| 		//			       *(int32_t *)s ,  // selected  
 | |
| 		//			       -1         ); // subscript
 | |
| 	else if ( t == TYPE_TIME ) {
 | |
| 		//time is stored as a string
 | |
| 		//if time is not stored properly, just write 00:00
 | |
| 		if ( s[2] != ':' )
 | |
| 			strcpy ( s, "00:00" );
 | |
| 
 | |
| 		char hr[3];
 | |
| 		char min[3];
 | |
| 		gbmemcpy ( hr, s, 2 );
 | |
| 		gbmemcpy ( min, s + 3, 2 );
 | |
| 		hr[2] = '\0';
 | |
| 		min[2] = '\0';
 | |
| 		// print the time in the input forms
 | |
| 		sb->safePrintf("<input type=text name=%shr size=2 "
 | |
| 			       "value=%s>h " 
 | |
| 			       "<input type=text name=%smin size=2 "
 | |
| 			       "value=%s>m " ,
 | |
| 			       cgi    , 
 | |
| 			       hr ,
 | |
| 			       cgi    , 
 | |
| 			       min  );
 | |
| 	}
 | |
| 
 | |
| 	else if ( t == TYPE_DATE || t == TYPE_DATE2 ) {
 | |
| 		// time is stored as int32_t
 | |
| 		int32_t ct = *(int32_t *)s;
 | |
| 		// get the time struct
 | |
| 		struct tm *tp = gmtime ( (time_t *)&ct ) ;
 | |
| 		// set the "selected" month for the drop down
 | |
| 		char *ss[12];
 | |
| 		for ( int32_t i = 0 ; i < 12 ; i++ ) ss[i]="";
 | |
| 		int32_t month = tp->tm_mon;
 | |
| 		if ( month < 0 || month > 11 ) month = 0; // Jan
 | |
| 		ss[month] = " selected";
 | |
| 		// print the date in the input forms
 | |
| 		sb->safePrintf(
 | |
| 			"<input type=text name=%sday "
 | |
| 			"size=2 value=%" INT32 "> "
 | |
| 			"<select name=%smon>"
 | |
| 			"<option value=0%s>Jan"
 | |
| 			"<option value=1%s>Feb"
 | |
| 			"<option value=2%s>Mar"
 | |
| 			"<option value=3%s>Apr"
 | |
| 			"<option value=4%s>May"
 | |
| 			"<option value=5%s>Jun"
 | |
| 			"<option value=6%s>Jul"
 | |
| 			"<option value=7%s>Aug"
 | |
| 			"<option value=8%s>Sep"
 | |
| 			"<option value=9%s>Oct"
 | |
| 			"<option value=10%s>Nov"
 | |
| 			"<option value=11%s>Dec"
 | |
| 			"</select>\n"
 | |
| 			"<input type=text name=%syr size=4 value=%" INT32 ">"
 | |
| 			"<br>"
 | |
| 			"<input type=text name=%shr size=2 "
 | |
| 			"value=%02" INT32 ">h " 
 | |
| 			"<input type=text name=%smin size=2 "
 | |
| 			"value=%02" INT32 ">m " 
 | |
| 			"<input type=text name=%ssec size=2 "
 | |
| 			"value=%02" INT32 ">s" ,
 | |
| 			cgi    ,
 | |
| 			(int32_t)tp->tm_mday ,
 | |
| 			cgi    ,
 | |
| 			ss[0],ss[1],ss[2],ss[3],ss[4],ss[5],ss[6],ss[7],ss[8],
 | |
| 			ss[9],ss[10],ss[11],
 | |
| 			cgi    ,
 | |
| 			(int32_t)tp->tm_year + 1900 ,
 | |
| 			cgi    , 
 | |
| 			(int32_t)tp->tm_hour ,
 | |
| 			cgi    , 
 | |
| 			(int32_t)tp->tm_min  ,
 | |
| 			cgi    ,
 | |
| 			(int32_t)tp->tm_sec  );
 | |
| 		/*
 | |
| 		if ( t == TYPE_DATE2 ) {
 | |
| 			p += gbstrlen ( p );
 | |
| 			// a int32_t after the int32_t is used for this
 | |
| 			int32_t ct = *(int32_t *)(THIS+m->m_off+4);
 | |
| 			char *ss = "";
 | |
| 			if ( ct ) ss = " checked";
 | |
| 			sprintf ( p , "<br><input type=checkbox "
 | |
| 				  "name=%sct value=1%s> use current "
 | |
| 				  "time\n",cgi,ss);
 | |
| 		}
 | |
| 		*/
 | |
| 	}
 | |
| 	else if ( t == TYPE_SITERULE ) {
 | |
| 		// print the siterec rules as a drop down
 | |
| 		char *ss[5];
 | |
| 		for ( int32_t i = 0; i < 5; i++ ) ss[i] = "";
 | |
| 		int32_t v = *(int32_t*)s;
 | |
| 		if ( v < 0 || v > 4 ) v = 0;
 | |
| 		ss[v] = " selected";
 | |
| 		sb->safePrintf ( "<select name=%s>"
 | |
| 				 "<option value=0%s>Hostname"
 | |
| 				 "<option value=1%s>Path Depth 1"
 | |
| 				 "<option value=2%s>Path Depth 2"
 | |
| 				 "<option value=3%s>Path Depth 3"
 | |
| 				 "</select>\n",
 | |
| 				 cgi, ss[0], ss[1], ss[2], ss[3] );
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	// end the input cell
 | |
| 	sb->safePrintf ( "</td>\n");
 | |
| 
 | |
| 	// "insert above" link? used for arrays only, where order matters
 | |
| 	if ( m->m_addin && j < jend ) {//! isJSON ) {
 | |
| 		sb->safePrintf ( "<td><a href=\"?c=%s&" // cast=1&"
 | |
| 				 //"ins_%s=1\">insert</td>\n",coll,cgi );
 | |
| 				 // insert=<rowNum>
 | |
| 				 // "j" is the row #
 | |
| 				 "insert=%" INT32 "\">insert</td>\n",coll,j );
 | |
| 	}
 | |
| 
 | |
| 	// does next guy start a new row?
 | |
| 	bool lastInRow = true; // assume yes
 | |
| 	if (mm+1<m_numParms&&m->m_rowid>=0&&m_parms[mm+1].m_rowid==m->m_rowid)
 | |
| 		lastInRow = false;
 | |
| 	if ( ((s_count-1) % nc) != (nc-1) ) lastInRow = false;
 | |
| 
 | |
| 	// . display the remove link for arrays if we need to
 | |
| 	// . but don't display if next guy does NOT start a new row
 | |
| 	//if ( m->m_max > 1 && lastInRow && ! isJSON ) {
 | |
| 	if ( m->m_addin && j < jend ) { //! isJSON ) {
 | |
| 	//     m->m_page != PAGE_PRIORITIES ) {
 | |
| 		// show remove link?
 | |
| 		bool show = true;
 | |
| 		//if ( j >= jend )  show = false;
 | |
| 		// get # of rows
 | |
| 		int32_t *nr = (int32_t *)((char *)THIS + m->m_off - 4);
 | |
| 		// are we the last row?
 | |
| 		bool lastRow = false;
 | |
| 		// yes, if this is true
 | |
| 		if ( j == *nr - 1 ) lastRow = true;
 | |
| 		// do not allow removal of last default url filters rule
 | |
| 		//if ( lastRow && !strcmp(m->m_cgi,"fsp")) show = false;
 | |
| 		char *suffix = "";
 | |
| 		if ( m->m_page == PAGE_MASTERPASSWORDS &&
 | |
| 		     m->m_type == TYPE_IP ) 
 | |
| 			suffix = "ip";
 | |
| 		if ( m->m_page == PAGE_MASTERPASSWORDS &&
 | |
| 		     m->m_type == TYPE_STRINGNONEMPTY ) 
 | |
| 			suffix = "pwd";
 | |
| 		if ( show )
 | |
| 			sb->safePrintf ("<td><a href=\"?c=%s&" // cast=1&"
 | |
| 					//"rm_%s=1\">"
 | |
| 					// remove=<rownum>
 | |
| 					"remove%s=%" INT32 "\">"
 | |
| 					"remove</a></td>\n",coll,//cgi );
 | |
| 					suffix,
 | |
| 					j); // j is row #
 | |
| 					
 | |
| 		else
 | |
| 			sb->safePrintf ( "<td></td>\n");
 | |
| 	}
 | |
| 
 | |
| 	if ( lastInRow ) sb->safePrintf ("</tr>\n");
 | |
| 	return status;
 | |
| }
 | |
| 
 | |
| /*
 | |
| // get the object of our desire
 | |
| char *Parms::getTHIS ( HttpRequest *r , int32_t page ) {
 | |
| 	// if not master controls, must be a collection rec
 | |
| 	//if ( page < PAGE_CGIPARMS ) return (char *)&g_conf;
 | |
| 	char *coll = r->getString ( "c" );
 | |
| 	// support john wanting to use "id" for the crawl id which is really
 | |
| 	// the collection id, hopefully won't conflict with other things.
 | |
| 	if ( ! coll ) coll = r->getString ( "id" );
 | |
| 	if ( ! coll || ! coll[0] )
 | |
| 		//coll = g_conf.m_defaultColl;
 | |
| 		coll = g_conf.getDefaultColl( r->getHost(), r->getHostLen() );
 | |
| 	CollectionRec *cr = g_collectiondb.getRec ( coll );
 | |
| 	if ( ! cr ) log("admin: Collection \"%s\" not found.", 
 | |
| 			r->getString("c") );
 | |
| 	return (char *)cr;
 | |
| }
 | |
| */
 | |
| 
 | |
| 
 | |
| // now we use this to set SearchInput and GigablastRequest
 | |
| bool Parms::setFromRequest ( HttpRequest *r , 
 | |
| 			     TcpSocket* s,
 | |
| 			     CollectionRec *newcr ,
 | |
| 			     char *THIS ,
 | |
| 			     int32_t objType ) {
 | |
| 
 | |
| 	// get the page from the path... like /sockets --> PAGE_SOCKETS
 | |
| 	//int32_t page = g_pages.getDynamicPageNumber ( r );
 | |
| 
 | |
| 	// use convertHttpRequestToParmList() for these because they
 | |
| 	// are persistent records that are updated on every shard.
 | |
| 	if ( objType == OBJ_COLL ) { char *xx=NULL;*xx=0; }
 | |
| 	if ( objType == OBJ_CONF ) { char *xx=NULL;*xx=0; }
 | |
| 
 | |
| 	// ensure valid
 | |
| 	if ( ! THIS ) {
 | |
| 		// it is null when no collection explicitly specified...
 | |
| 		log(LOG_LOGIC,"admin: THIS is null for setFromRequest");
 | |
| 		char *xx=NULL;*xx=0; 
 | |
| 	}
 | |
| 
 | |
| 	// need this for searchInput which takes default from "cr"
 | |
| 	//CollectionRec *cr = g_collectiondb.getRec ( r , true );
 | |
| 
 | |
| 	// no SearchInput.cpp does this and then overrides if xml feed
 | |
| 	// to set m_docsToScanForTopics
 | |
| 	//setToDefault ( THIS , objType , cr );
 | |
| 
 | |
| 	// loop through cgi parms
 | |
| 	for ( int32_t i = 0 ; i < r->getNumFields() ; i++ ) {
 | |
| 		// get cgi parm name
 | |
| 		char *field = r->getField    ( i );
 | |
| 		// find in parms list
 | |
| 		int32_t  j;
 | |
| 		Parm *m;
 | |
| 		for ( j = 0 ; j < m_numParms ; j++ ) {
 | |
| 			// get it
 | |
| 			m = &m_parms[j];
 | |
| 			// skip if not our type
 | |
| 			if ( m->m_obj != objType ) continue;
 | |
| 			// skip if offset is negative, that means none
 | |
| 			if ( m->m_off < 0 ) continue;
 | |
| 			// skip if no cgi parm, may not be configurable now
 | |
| 			if ( ! m->m_cgi ) continue;
 | |
| 			// otherwise, must match the cgi name exactly
 | |
| 			if ( strcmp ( field,m->m_cgi ) == 0 ) break;
 | |
| 		}
 | |
| 		// bail if the cgi field is not in the parms list
 | |
| 		if ( j >= m_numParms ) continue;
 | |
| 		// get the value of cgi parm (null terminated)
 | |
| 		char *v = r->getValue ( i );
 | |
| 		// empty?
 | |
| 		if ( ! v ) continue;
 | |
| 		// . skip if no value was provided
 | |
| 		// . unless it was a string! so we can make them empty.
 | |
| 		if ( v[0] == '\0' && 
 | |
| 		     m->m_type != TYPE_STRING && 
 | |
| 		     m->m_type != TYPE_STRINGBOX ) continue;
 | |
| 		// set it
 | |
| 		setParm ( (char *)THIS , m, j, 0, v, false,//not html enc
 | |
| 			  false );//true );
 | |
| 	}
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| 
 | |
| bool Parms::insertParm ( int32_t i , int32_t an ,  char *THIS ) {
 | |
| 	Parm *m = &m_parms[i];
 | |
| 	// . shift everyone above down
 | |
| 	// . first int32_t at offset is always the count
 | |
| 	//   for arrays
 | |
| 	char *pos =  (char *)THIS + m->m_off ;
 | |
| 	int32_t  num = *(int32_t *)(pos - 4);
 | |
| 	// ensure we are valid
 | |
| 	if ( an >= num || an < 0 ) {
 | |
| 		log("admin: Invalid insertion of element "
 | |
| 		    "%" INT32 " in array of size %" INT32 " for \"%s\".",
 | |
| 		    an,num,m->m_title);
 | |
| 		return false;
 | |
| 	}
 | |
| 	// also ensure that we have space to put the parm in, because in
 | |
| 	// case of URl filters, it is bounded by MAX_FILTERS
 | |
| 	if ( num >= MAX_FILTERS ){
 | |
| 		log("admin: Invalid insert of element %" INT32 ", array is full "
 | |
| 		    "in size %" INT32 " for \"%s\".",an, num, m->m_title);
 | |
| 		return false;
 | |
| 	}
 | |
| 	// point to the place where the element is to be inserted
 | |
| 	char *src = pos + m->m_size * an;
 | |
| 
 | |
| 	//point to where it is to be moved
 | |
| 	char *dst = pos + m->m_size * ( an + 1 );
 | |
| 
 | |
| 	// how much to move
 | |
| 	int32_t size = ( num - an ) * m->m_size ;
 | |
| 	// move them
 | |
| 	memmove ( dst , src , size );
 | |
| 	// if the src was a TYPE_SAFEBUF clear it so we don't end up doing
 | |
| 	// a double free, etc.!
 | |
| 	memset ( src , 0 , m->m_size );
 | |
| 	// inc the count
 | |
| 	*(int32_t *)(pos-4) = (*(int32_t *)(pos-4)) + 1;
 | |
| 	// put the defaults in the inserted line
 | |
| 	setParm ( (char *)THIS , m , i , an , m->m_def , false ,false );
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool Parms::removeParm ( int32_t i , int32_t an , char *THIS ) {
 | |
| 	Parm *m = &m_parms[i];
 | |
| 	// . shift everyone above down
 | |
| 	// . first int32_t at offset is always the count
 | |
| 	//   for arrays
 | |
| 	char *pos =  (char *)THIS + m->m_off ;
 | |
| 	int32_t  num = *(int32_t *)(pos - 4);
 | |
| 	// ensure we are valid
 | |
| 	if ( an >= num || an < 0 ) {
 | |
| 		log("admin: Invalid removal of element "
 | |
| 		    "%" INT32 " in array of size %" INT32 " for \"%s\".",
 | |
| 		    an,num,m->m_title);
 | |
| 		return false;
 | |
| 	}
 | |
| 	// point to the element being removed
 | |
| 	char *dst = pos + m->m_size * an;
 | |
| 	// free memory pointed to by safebuf, if we are safebuf, before
 | |
| 	// overwriting it... prevents a memory leak
 | |
| 	if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 		SafeBuf *dx = (SafeBuf *)dst;
 | |
| 		dx->purge();
 | |
| 	}
 | |
| 	// then point to the good stuf
 | |
| 	char *src = pos + m->m_size * (an+1);
 | |
| 	// how much to bury it with
 | |
| 	int32_t size = (num - an - 1 ) * m->m_size ;
 | |
| 	// bury it
 | |
| 	gbmemcpy ( dst , src , size );
 | |
| 
 | |
| 	// and detach the buf on the tail so it doesn't core in Mem.cpp
 | |
| 	// when it tries to free...
 | |
| 	if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 		SafeBuf *tail = (SafeBuf *)(pos + m->m_size * (num-1));
 | |
| 		tail->detachBuf();
 | |
| 	}
 | |
| 
 | |
| 	// dec the count
 | |
| 	*(int32_t *)(pos-4) = (*(int32_t *)(pos-4)) - 1;
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| void Parms::setParm ( char *THIS , Parm *m , int32_t mm , int32_t j , char *s ,
 | |
| 		      bool isHtmlEncoded , bool fromRequest ) {
 | |
| 
 | |
| 	if ( fromRequest ) { char *xx=NULL;*xx=0; }
 | |
| 
 | |
| 	// . this is just for setting CollectionRecs, so skip if offset < 0
 | |
| 	// . some parms are just for SearchInput (search parms)
 | |
| 	if ( m->m_off < 0 ) return;
 | |
| 
 | |
| 	if ( m->m_obj == OBJ_NONE ) return ;
 | |
| 
 | |
| 	float oldVal = 0;
 | |
| 	float newVal = 0;
 | |
| 
 | |
| 	if ( ! s && 
 | |
| 	     m->m_type != TYPE_CHARPTR && 
 | |
| 	     m->m_type != TYPE_FILEUPLOADBUTTON && 
 | |
| 	     m->m_defOff==-1) {
 | |
| 		s = "0";
 | |
| 		char *tit = m->m_title;
 | |
| 		if ( ! tit || ! tit[0] ) tit = m->m_xml;
 | |
| 		log(LOG_LOGIC,"admin: Parm \"%s\" had NULL default value. "
 | |
| 		    "Forcing to 0.",
 | |
| 		    tit);
 | |
| 		//char *xx = NULL; *xx = 0;
 | |
| 	}
 | |
| 
 | |
| 	// sanity check
 | |
| 	if ( &m_parms[mm] != m ) {
 | |
| 		log(LOG_LOGIC,"admin: Not sane parameters.");
 | |
| 		char *xx = NULL; *xx = 0;
 | |
| 	}
 | |
| 
 | |
| 	// if attempting to add beyond array max, bail out
 | |
| 	if ( j >= m->m_max && j >= m->m_fixed ) {
 | |
| 		log ( "admin: Attempted to set parm beyond limit. Aborting." );
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	// if we are setting a guy in an array AND he is NOT the first
 | |
| 	// in his row, ensure the guy before has a count of j+1 or more.
 | |
| 	//
 | |
| 	// crap, on the url filters page if you do not check "spidering 
 | |
| 	// enabled" checkbox when adding a new rule at the bottom of the
 | |
| 	// table, , then the spidering enabled parameter does not transmit so
 | |
| 	// the "respider frequency" ends up checking the "spider enabled"
 | |
| 	// array whose "count" was not incremented like it should have been.
 | |
| 	// HACK: make new line at bottom always have spidering enabled
 | |
| 	// checkbox set and make it impossible to unset.
 | |
| 	/*
 | |
| 	if ( m->m_max > 1 && m->m_rowid >= 0 && mm > 0 &&
 | |
| 	     m_parms[mm-1].m_rowid == m->m_rowid ) {
 | |
| 		char *pos =  (char *)THIS + m_parms[mm-1].m_off - 4 ;
 | |
| 		int32_t maxcount = *(int32_t *)pos;
 | |
| 		if ( j >= maxcount ) {
 | |
| 			log("admin: parm before \"%s\" is limiting us",
 | |
| 			    m_parms[mm-1].m_title);
 | |
| 			//log("admin: try nuking the url filters or whatever "
 | |
| 			//    "and re-adding");
 | |
| 			return;
 | |
| 		}
 | |
| 	}
 | |
| 	*/
 | |
| 
 | |
| 	// ensure array count at least j+1
 | |
| 	if ( m->m_max > 1 ) {
 | |
| 		// . is this element we're adding bumping up the count?
 | |
| 		// . array count is 4 bytes before the array
 | |
| 		char *pos =  (char *)THIS + m->m_off - 4 ;
 | |
| 		// set the count to it if it is bigger than current count
 | |
| 		if ( j + 1 > *(int32_t *)pos ) *(int32_t *)pos = j + 1;
 | |
| 	}
 | |
| 
 | |
| 	char  t   = m->m_type;
 | |
| 
 | |
| 	if      ( t == TYPE_CHAR           || 
 | |
| 		  t == TYPE_CHAR2          || 
 | |
| 		  t == TYPE_CHECKBOX       ||
 | |
| 		  t == TYPE_BOOL           ||
 | |
| 		  t == TYPE_BOOL2          ||
 | |
| 		  t == TYPE_PRIORITY       || 
 | |
| 		  t == TYPE_PRIORITY2      || 
 | |
| 		  //t == TYPE_DIFFBOT_DROPDOWN ||
 | |
| 		  t == TYPE_UFP            ||
 | |
| 		  t == TYPE_PRIORITY_BOXES || 
 | |
| 		  t == TYPE_RETRIES        ||
 | |
| 		  t == TYPE_FILTER           ) {
 | |
| 		if ( fromRequest && *(char *)(THIS + m->m_off + j) == atol(s))
 | |
| 			return;
 | |
| 		if ( fromRequest)oldVal = (float)*(char *)(THIS + m->m_off +j);
 | |
| 		*(char *)(THIS + m->m_off + j) = atol ( s );
 | |
|  		newVal = (float)*(char *)(THIS + m->m_off + j);
 | |
| 		goto changed; }
 | |
| 	else if ( t == TYPE_CHARPTR ) {
 | |
| 		// "s" might be NULL or m->m_def...
 | |
| 		*(char **)(THIS + m->m_off + j) = s;
 | |
| 	}
 | |
| 	else if ( t == 	TYPE_FILEUPLOADBUTTON ) {
 | |
| 		// "s" might be NULL or m->m_def...
 | |
| 		*(char **)(THIS + m->m_off + j) = s;
 | |
| 	}
 | |
| 	else if ( t == TYPE_CMD ) {
 | |
| 		log(LOG_LOGIC, "conf: Parms: TYPE_CMD is not a cgi var.");
 | |
| 		return;	}
 | |
| 	else if ( t == TYPE_DATE2 || t == TYPE_DATE ) {
 | |
| 		int32_t v = (int32_t)atotime ( s ); 
 | |
| 		if ( fromRequest && *(int32_t *)(THIS + m->m_off + 4*j) == v ) 
 | |
| 			return;
 | |
| 		*(int32_t *)(THIS + m->m_off + 4*j) = v;
 | |
| 		if ( v < 0 ) log("conf: Date for <%s> of \""
 | |
| 				 "%s\" is not in proper format like: "
 | |
| 				 "01 Jan 1980 22:45",m->m_xml,s);
 | |
| 		goto changed; }
 | |
| 	else if ( t == TYPE_FLOAT ) {
 | |
| 		if( fromRequest &&
 | |
| 		    *(float *)(THIS + m->m_off + 4*j) == (float)atof ( s ) ) 
 | |
| 			return;
 | |
| 		// if changed within .00001 that is ok too, do not count
 | |
| 		// as changed, the atof() has roundoff errors
 | |
| 		//float curVal = *(float *)(THIS + m->m_off + 4*j);
 | |
| 		//float newVal = atof(s);
 | |
| 		//if ( newVal < curVal && newVal + .000001 >= curVal ) return;
 | |
| 		//if ( newVal > curVal && newVal - .000001 <= curVal ) return;
 | |
| 		if ( fromRequest ) oldVal = *(float *)(THIS + m->m_off + 4*j);
 | |
| 		*(float *)(THIS + m->m_off + 4*j) = (float)atof ( s );
 | |
| 		newVal = *(float *)(THIS + m->m_off + 4*j);
 | |
| 		goto changed; }
 | |
| 	else if ( t == TYPE_DOUBLE ) {
 | |
| 		if( fromRequest &&
 | |
| 		    *(double *)(THIS + m->m_off + 4*j) == (double)atof ( s ) ) 
 | |
| 			return;
 | |
| 		if ( fromRequest ) oldVal = *(double *)(THIS + m->m_off + 4*j);
 | |
| 		*(double *)(THIS + m->m_off + 4*j) = (double)atof ( s );
 | |
| 		newVal = *(double *)(THIS + m->m_off + 4*j);
 | |
| 		goto changed; }
 | |
| 	else if ( t == TYPE_IP ) {
 | |
| 		if ( fromRequest && *(int32_t *)(THIS + m->m_off + 4*j) == 
 | |
| 		     (int32_t)atoip (s,gbstrlen(s) ) ) 
 | |
| 			return;
 | |
| 		*(int32_t *)(THIS + m->m_off + 4*j) = (int32_t)atoip (s,gbstrlen(s) ); 
 | |
| 		goto changed; }
 | |
| 	else if ( t == TYPE_LONG || t == TYPE_LONG_CONST || t == TYPE_RULESET||
 | |
| 		  t == TYPE_SITERULE ) {
 | |
| 		int32_t v = atol ( s );
 | |
| 		// min is considered valid if >= 0
 | |
| 		if ( m->m_min >= 0 && v < m->m_min ) v = m->m_min;
 | |
| 		if ( fromRequest && *(int32_t *)(THIS + m->m_off + 4*j) == v ) 
 | |
| 			return;
 | |
| 		if ( fromRequest)oldVal=(float)*(int32_t *)(THIS + m->m_off +4*j);
 | |
| 		*(int32_t *)(THIS + m->m_off + 4*j) = v;
 | |
| 		newVal = (float)*(int32_t *)(THIS + m->m_off + 4*j);
 | |
| 		goto changed; }
 | |
| 	else if ( t == TYPE_LONG_LONG ) {
 | |
| 		if ( fromRequest &&
 | |
| 		     *(uint64_t *)(THIS + m->m_off+8*j)==
 | |
| 		     strtoull(s,NULL,10))
 | |
| 			return;
 | |
| 		*(int64_t *)(THIS + m->m_off + 8*j) = strtoull(s,NULL,10);
 | |
| 		goto changed; }
 | |
| 	// like TYPE_STRING but dynamically allocates
 | |
| 	else if ( t == TYPE_SAFEBUF ) {
 | |
| 		int32_t len = gbstrlen(s);
 | |
| 		// no need to truncate since safebuf is dynamic
 | |
| 		//if ( len >= m->m_size ) len = m->m_size - 1; // truncate!!
 | |
| 		//char *dst = THIS + m->m_off + m->m_size*j ;
 | |
| 		// point to the safebuf, in the case of an array of
 | |
| 		// SafeBufs "j" is the # in the array, starting at 0
 | |
| 		SafeBuf *sb = (SafeBuf *)(THIS+m->m_off+(j*sizeof(SafeBuf)) );
 | |
| 		int32_t oldLen = sb->length();
 | |
| 		// why was this commented out??? we need it now that we
 | |
| 		// send email alerts when parms change!
 | |
| 		if ( fromRequest &&
 | |
| 		     ! isHtmlEncoded && oldLen == len &&
 | |
| 		     memcmp ( sb->getBufStart() , s , len ) == 0 ) 
 | |
| 			return;
 | |
| 		// nuke it
 | |
| 		sb->purge();
 | |
| 		// this means that we can not use string POINTERS as parms!!
 | |
| 		if ( ! isHtmlEncoded ) sb->safeMemcpy ( s , len ); 
 | |
| 		else                   len = sb->htmlDecode (s,len,false,0);
 | |
| 		// tag it
 | |
| 		sb->setLabel ( "parm1" );
 | |
| 		// ensure null terminated
 | |
| 		sb->nullTerm();
 | |
| 		// note it
 | |
| 		//log("hack: %s",s);
 | |
| 
 | |
| 		// null term it all
 | |
| 		//dst[len] = '\0';
 | |
| 		//sb->reserve ( 1 );
 | |
| 		// null terminate but do not include as m_length so the
 | |
| 		// memcmp() above still works right
 | |
| 		//sb->m_buf[sb->m_length] = '\0';
 | |
| 		// . might have to set length
 | |
| 		// . used for CollectionRec::m_htmlHeadLen and m_htmlTailLen
 | |
| 		//if ( m->m_plen >= 0 ) 
 | |
| 		//	*(int32_t *)(THIS + m->m_plen) = len ;
 | |
| 		goto changed; 
 | |
| 	}
 | |
| 	else if ( t == TYPE_STRING         || 
 | |
| 		  t == TYPE_STRINGBOX      || 
 | |
| 		  t == TYPE_STRINGNONEMPTY ||
 | |
| 		  t == TYPE_TIME            ) {
 | |
| 		int32_t len = gbstrlen(s);
 | |
| 		if ( len >= m->m_size ) len = m->m_size - 1; // truncate!!
 | |
| 		char *dst = THIS + m->m_off + m->m_size*j ;
 | |
| 		// why was this commented out??? we need it now that we
 | |
| 		// send email alerts when parms change!
 | |
| 		if ( fromRequest &&
 | |
| 		     ! isHtmlEncoded && (int32_t)gbstrlen(dst) == len &&
 | |
| 		     memcmp ( dst , s , len ) == 0 ) 
 | |
| 			return;
 | |
| 		// this means that we can not use string POINTERS as parms!!
 | |
| 		if ( ! isHtmlEncoded ) {gbmemcpy ( dst , s , len ); }
 | |
| 		else                   len = htmlDecode (dst , s,len,false,0);
 | |
| 		dst[len] = '\0';
 | |
| 		// . might have to set length
 | |
| 		// . used for CollectionRec::m_htmlHeadLen and m_htmlTailLen
 | |
| 		if ( m->m_plen >= 0 ) 
 | |
| 			*(int32_t *)(THIS + m->m_plen) = len ;
 | |
| 		goto changed; }
 | |
|  changed:
 | |
| 	// tell gigablast the value is EXPLICITLY given -- no longer based
 | |
| 	// on default.conf
 | |
| 	//if ( m->m_obj == OBJ_COLL ) ((CollectionRec *)THIS)->m_orig[mm] = 2;
 | |
| 
 | |
| 	// we do not recognize timezones correctly when this is serialized
 | |
| 	// into coll.conf, it says UTC, which is ignored in HttpMime.cpp's
 | |
| 	// atotime() function. and when we submit it i think we use the
 | |
| 	// local time zone, so the values end up changing every time we 
 | |
| 	// submit!!! i think it might read it in as UTC then write it out
 | |
| 	// as local time, or vice versa.
 | |
| 	if ( t == TYPE_DATE || t == TYPE_DATE2 ) return;
 | |
| 
 | |
| 	// do not send if setting from startup
 | |
| 	if ( ! fromRequest ) return;
 | |
| 
 | |
| 	// note it in the log
 | |
| 	log("admin: parm \"%s\" changed value",m->m_title);
 | |
| 
 | |
| 	int64_t nowms = gettimeofdayInMillisecondsLocal();
 | |
| 
 | |
| 	// . note it in statsdb
 | |
| 	// . record what parm change and from/to what value
 | |
| 	g_statsdb.addStat ( 0, // niceness ,
 | |
| 			    "parm_change" ,
 | |
| 			    nowms,
 | |
| 			    nowms,
 | |
| 			    0         , // value
 | |
| 			    m->m_hash , // parmHash
 | |
| 			    oldVal,
 | |
| 			    newVal);
 | |
| 
 | |
| 	// if they turn spiders on or off then tell spiderloop to update
 | |
| 	// the active list
 | |
| 	//if ( strcmp(m->m_cgi,"cse") )
 | |
| 	//	g_spiderLoop.m_activeListValid = false;
 | |
| 
 | |
| 	// only send email alerts if we are host 0 since everyone syncs up
 | |
| 	// with host #0 anyway
 | |
| 	if ( g_hostdb.m_hostId != 0 ) return;
 | |
| 
 | |
| 	// send an email alert notifying the admins that this parm was changed
 | |
| 	// BUT ALWAYS send it if email alerts were just TURNED OFF 
 | |
| 	// ("sea" = Send Email Alerts)
 | |
| 	if ( ! g_conf.m_sendEmailAlerts && strcmp(m->m_cgi,"sea") != 0 )
 | |
| 		return;
 | |
| 
 | |
| 	// if spiders we turned on, do not send an email alert, cuz we
 | |
| 	// turn them on when we restart the cluster
 | |
| 	if ( strcmp(m->m_cgi,"se")==0 && g_conf.m_spideringEnabled )
 | |
| 		return;
 | |
| 
 | |
| 
 | |
| 	char tmp[1024];
 | |
| 	Host *h0 = g_hostdb.getHost ( 0 );
 | |
| 	int32_t ip0 = 0;
 | |
| 	if ( h0 ) ip0 = h0->m_ip;
 | |
| 	sprintf(tmp,"%s: parm \"%s\" changed value",iptoa(ip0),m->m_title);
 | |
| 	g_pingServer.sendEmail ( NULL  , // Host ptr
 | |
| 				 tmp   , // msg
 | |
| 				 true  , // sendToAdmin
 | |
| 				 false , // oom?
 | |
| 				 false , // kernel error?
 | |
| 				 true  , // parm change?
 | |
| 				 true  );// force it? even if disabled?
 | |
| 
 | |
| 	// now the spider collection can just check the collection rec
 | |
| 	//int64_t nowms = gettimeofdayInMilliseconds();
 | |
| 	//((CollectionRec *)THIS)->m_lastUpdateTime = nowms;
 | |
| 
 | |
| 	return;
 | |
| }
 | |
| 
 | |
| Parm *Parms::getParmFromParmHash ( int32_t parmHash ) {
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		if ( m->m_hash != parmHash ) continue;
 | |
| 		return m;
 | |
| 	}
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| 
 | |
| void Parms::setToDefault ( char *THIS , char objType , CollectionRec *argcr ) {
 | |
| 	// init if we should
 | |
| 	init();
 | |
| 
 | |
| 	// . clear out any coll rec to get the diffbotApiNum dropdowns
 | |
| 	// . this is a backwards-compatibility hack since this new parm
 | |
| 	//   will not be in old coll.conf files and will not be properly
 | |
| 	//   initialize when displaying a url filter row.
 | |
| 	//if ( THIS != (char *)&g_conf ) {
 | |
| 	//	CollectionRec *cr = (CollectionRec *)THIS;
 | |
| 	//	memset ( cr->m_spiderDiffbotApiNum , 0 , MAX_FILTERS);
 | |
| 	//}
 | |
| 
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		if ( m->m_obj != objType ) continue;
 | |
| 		if ( m->m_obj == OBJ_NONE ) continue;
 | |
| 		if ( m->m_type == TYPE_COMMENT ) continue;
 | |
| 		// no, we gotta set GigablastRequest::m_contentFile to NULL
 | |
| 		//if ( m->m_type == TYPE_FILEUPLOADBUTTON ) 
 | |
| 		//	continue;
 | |
| 		if ( m->m_type == TYPE_MONOD2  ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOM2  ) continue;
 | |
| 		if ( m->m_type == TYPE_CMD     ) continue;
 | |
| 		if (THIS == (char *)&g_conf && m->m_obj != OBJ_CONF ) continue;
 | |
| 		if (THIS != (char *)&g_conf && m->m_obj == OBJ_CONF ) continue;
 | |
| 		// what is this?
 | |
| 		//if ( m->m_obj == OBJ_COLL ) {
 | |
| 		//	CollectionRec *cr = (CollectionRec *)THIS;
 | |
| 		//	if ( cr->m_bases[1] ) { char *xx=NULL;*xx=0; }
 | |
| 		//}
 | |
| 		// sanity check, make sure it does not overflow
 | |
| 		if ( m->m_obj == OBJ_COLL &&
 | |
| 		     m->m_off > (int32_t)sizeof(CollectionRec)){
 | |
| 			log(LOG_LOGIC,"admin: Parm in Parms.cpp should use "
 | |
| 			    "OBJ_COLL not OBJ_CONF");
 | |
| 			char *xx = NULL; *xx = 0;
 | |
| 		}
 | |
| 		//if ( m->m_page == PAGE_PRIORITIES )
 | |
| 		//	log("hey");
 | |
| 		// or 
 | |
| 		if ( m->m_page > PAGE_API && // CGIPARMS &&
 | |
| 		     m->m_page != PAGE_NONE &&
 | |
| 		     m->m_obj == OBJ_CONF ) {
 | |
| 			log(LOG_LOGIC,"admin: Page can not reference "
 | |
| 			    "g_conf and be declared AFTER PAGE_CGIPARMS in "
 | |
| 			    "Pages.h. Title=%s",m->m_title);
 | |
| 			char *xx = NULL; *xx = 0;
 | |
| 		}
 | |
| 		// if defOff >= 0 get from cr like for searchInput vals
 | |
| 		// whose default is from the collectionRec...
 | |
| 		if ( m->m_defOff >= 0 && argcr ) {
 | |
| 			if ( ! argcr ) { char *xx=NULL;*xx=0; }
 | |
| 			char *def = m->m_defOff+(char *)argcr;
 | |
| 			char *dst = (char *)THIS + m->m_off;
 | |
| 			gbmemcpy ( dst , def , m->m_size );
 | |
| 			continue;
 | |
| 		}
 | |
| 		// leave arrays empty, set everything else to default
 | |
| 		if ( m->m_max <= 1 ) {
 | |
| 			//if ( i == 282 )  // "query" parm
 | |
| 			//	log("hey");
 | |
| 			//if ( ! m->m_def ) { char *xx=NULL;*xx=0; }
 | |
| 			setParm ( THIS , m, i, 0, m->m_def, false/*not enc.*/,
 | |
| 				  false );
 | |
| 			//((CollectionRec *)THIS)->m_orig[i] = 1;
 | |
| 			//m->m_orig = 0; // set in setToDefaults()
 | |
| 		}
 | |
| 		// these are special, fixed size arrays
 | |
| 		if ( m->m_fixed > 0 ) {
 | |
| 			for ( int32_t k = 0 ; k < m->m_fixed ; k++ ) {
 | |
| 				setParm(THIS,m,i,k,m->m_def,false/*not enc.*/,
 | |
| 					false);
 | |
| 				//m->m_orig = 0; // set in setToDefaults()
 | |
| 				//((CollectionRec *)THIS)->m_orig[i] = 1;
 | |
| 			}
 | |
| 			continue;
 | |
| 		}
 | |
| 		// make array sizes 0
 | |
| 		if ( m->m_max <= 1 ) continue;
 | |
| 		// otherwise, array is not fixed size
 | |
| 		char *s = THIS + m->m_off ;
 | |
| 		// set count to 1 if a default is present
 | |
| 		//if (   m->m_def[0] ) *(int32_t *)(s-4) = 1;
 | |
| 		//else                 *(int32_t *)(s-4) = 0;
 | |
| 		*(int32_t *)(s-4) = 0;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // . returns false and sets g_errno on error
 | |
| // . you should set your "THIS" to its defaults before calling this
 | |
| bool Parms::setFromFile ( void *THIS        , 
 | |
| 			  char *filename    , 
 | |
| 			  char *filenameDef ,
 | |
| 			  char  objType ) {
 | |
| 	// make sure we're init'd
 | |
| 	init();
 | |
| 	// let em know
 | |
| 	//if ( THIS == &g_conf) log (LOG_INIT,"conf: Reading %s." , filename );
 | |
| 	// . let the log know what we are doing
 | |
| 	// . filename is NULL if a call from CollectionRec::setToDefaults()
 | |
| 	Xml xml;
 | |
| 	//char buf [ MAX_XML_CONF ];
 | |
| 	SafeBuf sb;
 | |
| 	if ( filename&&!setXmlFromFile(&xml,filename,&sb)){//buf,MAX_XML_CONF))
 | |
| 		log("parms: error setting from file %s: %s",filename,
 | |
| 		    mstrerror(g_errno));
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	// . all the collectionRecs have the same default file in
 | |
| 	//   the workingDir/collections/default.conf
 | |
| 	// . so use our built in buffer for that
 | |
| 	/*
 | |
| 	if ( THIS != &g_conf && ! m_isDefaultLoaded ) {
 | |
| 		m_isDefaultLoaded = true;
 | |
| 		File f;
 | |
| 		f.set ( filenameDef );
 | |
| 		if ( ! f.doesExist() ) {
 | |
| 			log(LOG_INIT,
 | |
| 			    "db: Default collection configuration file "
 | |
| 			    "%s was not found. Newly created collections "
 | |
| 			    "will use hard coded defaults.",f.getFilename());
 | |
| 			goto skip;
 | |
| 		}
 | |
| 		if ( ! setXmlFromFile ( &m_xml2     , 
 | |
| 					filenameDef ,
 | |
| 					m_buf       ,
 | |
| 					MAX_XML_CONF ) ) return false;
 | |
| 	}
 | |
| 
 | |
|  skip:
 | |
| 	*/
 | |
| 	int32_t  vlen;
 | |
| 	char *v ;
 | |
| 	//char  c ;
 | |
| 	int32_t numNodes  = xml.getNumNodes();
 | |
| 	int32_t numNodes2 = m_xml2.getNumNodes();
 | |
| 
 | |
| 	// now set THIS based on the parameters in the xml file
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		// get it
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		if ( m->m_obj != objType ) continue;
 | |
| 		if ( m->m_obj == OBJ_NONE ) continue;
 | |
| 		//log(LOG_DEBUG, "Parms: %s: parm: %s", filename, m->m_xml);
 | |
| 		// . there are 2 object types, coll recs and g_conf, aka
 | |
| 		//   OBJ_COLL and OBJ_CONF.
 | |
| 		// . make sure we got the right parms for what we want
 | |
| 		if ( THIS == &g_conf && m->m_obj != OBJ_CONF ) continue;
 | |
| 		if ( THIS != &g_conf && m->m_obj == OBJ_CONF ) continue;
 | |
| 		// skip comments and command
 | |
| 		if ( m->m_type == TYPE_COMMENT  ) continue;
 | |
| 		if ( m->m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOD2   ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOM2   ) continue;
 | |
| 		if ( m->m_type == TYPE_CMD      ) continue;
 | |
| 		if ( m->m_type == TYPE_CONSTANT ) continue;
 | |
| 		// these are special commands really
 | |
| 		if ( m->m_type == TYPE_BOOL2    ) continue;
 | |
| 		//if ( strcmp ( m->m_xml , "forceDeleteUrls" ) == 0 )
 | |
| 		//	log("got it");
 | |
| 		// we did not get one from first xml file yet
 | |
| 		bool first = true;
 | |
| 		// array count
 | |
| 		int32_t j = 0;
 | |
| 		// node number
 | |
| 		int32_t nn = 0;
 | |
| 		// a tmp thingy
 | |
| 		char tt[1];
 | |
| 		int32_t nb;
 | |
| 		int32_t newnn;
 | |
| 	loop:
 | |
| 		if ( m->m_obj == OBJ_NONE ) { char *xx=NULL;*xx=0; }
 | |
| 		// get xml node number of m->m_xml in the "xml" file
 | |
| 		newnn = xml.getNodeNum(nn,1000000,m->m_xml,gbstrlen(m->m_xml));
 | |
| #ifdef _GLOBALSPEC_
 | |
| 		if ( m->m_priv == 2 ) continue;
 | |
| 		if ( m->m_priv == 3 ) continue;
 | |
| #elif _CLIENT_
 | |
| 		// always use default value if client not allowed control of
 | |
| 		if ( m->m_priv ) continue;
 | |
| #elif _METALINCS_
 | |
| 		if ( m->m_priv == 2 ) continue;
 | |
| 		if ( m->m_priv == 3 ) continue;
 | |
| #endif
 | |
| 
 | |
| 		// debug
 | |
| 		//log("%s --> %" INT32 "",m->m_xml,nn);
 | |
| 		// try default xml file if none, but only if first try
 | |
| 		if ( newnn < 0 && first ) goto try2; 
 | |
| 		// it is valid, use it
 | |
| 		nn = newnn;
 | |
| 		// set the flag, we've committed the array to the first file
 | |
| 		first = false;
 | |
| 		// otherwise, we had some in this file, but now we're out
 | |
| 		if ( nn < 0 ) continue;
 | |
| 		// . next node is the value of this tag
 | |
| 		// . skip if none there
 | |
| 		if ( nn + 1 >= numNodes ) continue;
 | |
| 		// point to it
 | |
| 		v    = xml.getNode    ( nn + 1 );
 | |
| 		vlen = xml.getNodeLen ( nn + 1 );
 | |
| 		// if a back tag... set the value to the empty string
 | |
| 		if ( v[0] == '<' && v[1] == '/' ) vlen = 0;
 | |
| 		// now, extricate from the <![CDATA[ ... ]]> tag if we need to
 | |
| 		if ( m->m_type == TYPE_STRING         || 
 | |
| 		     m->m_type == TYPE_STRINGBOX      ||
 | |
| 		     m->m_type == TYPE_SAFEBUF        ||
 | |
| 		     m->m_type == TYPE_STRINGNONEMPTY   ) {
 | |
| 			char *oldv    = v;
 | |
| 			int32_t  oldvlen = vlen;
 | |
| 			// if next guy is NOT a tag node, try the next one
 | |
| 			if ( v[0] != '<' && nn + 2 < numNodes ) {
 | |
| 				v    = xml.getNode    ( nn + 2 );
 | |
| 				vlen = xml.getNodeLen ( nn + 2 );
 | |
| 			}
 | |
| 			// should be a <![CDATA[...]]>
 | |
| 			if ( vlen<12 || strncasecmp(v,"<![CDATA[",9)!=0 ) {
 | |
| 				log("conf: No <![CDATA[...]]> tag found "
 | |
| 				    "for \"<%s>\" tag. Trying without CDATA.",
 | |
| 				    m->m_xml);
 | |
| 				v    = oldv;
 | |
| 				vlen = oldvlen;
 | |
| 			}
 | |
| 			// point to the nugget
 | |
| 			else {
 | |
| 				v    += 9;
 | |
| 				vlen -= 12;
 | |
| 			}
 | |
| 		}
 | |
| 		// get the value
 | |
| 		//v = xml.getString ( nn , nn+2 , m->m_xml , &vlen );
 | |
| 		// this only happens when tag is there, but without a value
 | |
| 		if ( ! v || vlen == 0 ) { vlen = 0; v = tt; }
 | |
| 		//c = v[vlen];
 | |
| 		v[vlen]='\0';
 | |
| 		if ( vlen == 0 ){ 
 | |
| 			// . this is generally ok
 | |
| 			// . this is spamming the log so i am commenting out! (MDW)
 | |
| 			//log(LOG_INFO, "parms: %s: Empty value.", m->m_xml);
 | |
| 			// Allow an empty string
 | |
| 			//continue;
 | |
| 		}
 | |
| 
 | |
| 		// now use proper cdata
 | |
| 		// we can't do this and be backwards compatible right now
 | |
| 		//nb = cdataDecode ( v , v , 0 );//, vlen , false ,0);
 | |
| 		// now decode it into itself
 | |
| 		nb = htmlDecode ( v , v , vlen , false ,0);
 | |
| 		v[nb] = '\0';
 | |
| 		// set our parm
 | |
| 		setParm ( (char *)THIS, m, i, j, v, false/*is html encoded?*/,
 | |
| 			  false );
 | |
| 		// we were set from the explicit file
 | |
| 		//((CollectionRec *)THIS)->m_orig[i] = 2;
 | |
| 		// go back
 | |
| 		//v[vlen] = c;
 | |
| 		// do not repeat same node
 | |
| 		nn++;
 | |
| 		// try to get the next node if we're an array
 | |
| 		if ( ++j < m->m_max || j < m->m_fixed ) { goto loop; }
 | |
| 		// otherwise, if not an array, go to next parm
 | |
| 		continue;
 | |
| 	try2:
 | |
| 		// get xml node number of m->m_xml in the "m_xml" file
 | |
| 		nn = m_xml2.getNodeNum(nn,1000000,m->m_xml,gbstrlen(m->m_xml));
 | |
| 		// otherwise, we had one in file, but now we're out
 | |
| 		if ( nn < 0 ) {
 | |
| 			// if it was ONLY a search input parm, with no
 | |
| 			// default value that can be changed in the 
 | |
| 			// CollectionRec then skip it
 | |
| 			// if ( m->m_soff  != -1 && 
 | |
| 			//      m->m_off   == -1 &&
 | |
| 			//      m->m_smaxc == -1 ) 
 | |
| 			// 	continue;
 | |
| 			// . if it is a string, like <adminIp> and default is 
 | |
| 			//   NULL then don't worry about reporting it
 | |
| 			// . no, just make the default "" then
 | |
| 			//if ( m->m_type==TYPE_STRING && ! m->m_def) continue;
 | |
| 			// bitch that it was not found
 | |
| 			//if ( ! m->m_def[0] ) 
 | |
| 			//	log("conf: %s does not have <%s> tag. "
 | |
| 			//	    "Omitting.",filename,m->m_xml);
 | |
| 			//else 
 | |
| 			/*
 | |
| 			if ( ! m->m_def ) //m->m_def[0] )
 | |
| 				log("conf: %s does not have <%s> tag. Using "
 | |
| 				    "default value of \"%s\".", filename,
 | |
| 				    m->m_xml,m->m_def);
 | |
| 			*/
 | |
| 			continue;
 | |
| 		}
 | |
| 		// . next node is the value of this tag
 | |
| 		// . skip if none there
 | |
| 		if ( nn + 1 >= numNodes2 ) continue;
 | |
| 		// point to it
 | |
| 		v    = m_xml2.getNode    ( nn + 1 );
 | |
| 		vlen = m_xml2.getNodeLen ( nn + 1 );
 | |
| 		// if a back tag... set the value to the empty string
 | |
| 		if ( v[0] == '<' && v[1] == '/' ) vlen = 0;
 | |
| 		// now, extricate from the <![CDATA[ ... ]]> tag if we need to
 | |
| 		if ( m->m_type == TYPE_STRING         || 
 | |
| 		     m->m_type == TYPE_STRINGBOX      ||
 | |
| 		     m->m_type == TYPE_STRINGNONEMPTY   ) {
 | |
| 			char *oldv    = v;
 | |
| 			int32_t  oldvlen = vlen;
 | |
| 			// reset if not a tag node
 | |
| 			if ( v[0] != '<' && nn + 2 < numNodes2 ) {
 | |
| 				v    = m_xml2.getNode    ( nn + 2 );
 | |
| 				vlen = m_xml2.getNodeLen ( nn + 2 );
 | |
| 			}
 | |
| 			// should be a <![CDATA[...]]>
 | |
| 			if ( vlen<12 || strncasecmp(v,"<![CDATA[",9)!=0 ) {
 | |
| 				log("conf: No <![CDATA[...]]> tag found "
 | |
| 				    "for \"<%s>\" tag. Trying without CDATA.",
 | |
| 				    m->m_xml);
 | |
| 				v    = oldv;
 | |
| 				vlen = oldvlen;
 | |
| 			}
 | |
| 			// point to the nugget
 | |
| 			else {
 | |
| 				v    += 9;
 | |
| 				vlen -= 12;
 | |
| 			}
 | |
| 		}
 | |
| 		// get the value
 | |
| 		//v = m_xml2.getString ( nn , nn+2 , m->m_xml , &vlen );
 | |
| 		// this only happens when tag is there, but without a value
 | |
| 		if ( ! v || vlen == 0 ) { vlen = 0; v = tt; }
 | |
| 		//c = v[vlen];
 | |
| 		v[vlen]='\0';
 | |
| 		// now decode it into itself
 | |
| 		nb = htmlDecode ( v , v , vlen , false,0);
 | |
| 		v[nb] = '\0';
 | |
| 		// set our parm
 | |
| 		setParm ( (char *)THIS, m, i, j, v, false/*is html encoded?*/,
 | |
| 			  false );
 | |
| 		// we were set from the backup default file
 | |
| 		//((CollectionRec *)THIS)->m_orig[i] = 1;
 | |
| 		// go back
 | |
| 		//v[vlen] = c;
 | |
| 		// do not repeat same node
 | |
| 		nn++;
 | |
| 		// try to get the next node if we're an array
 | |
| 		if ( ++j < m->m_max || j < m->m_fixed ) { goto loop; }
 | |
| 		// otherwise, if not an array, go to next parm
 | |
| 		continue;
 | |
| 	}
 | |
| 
 | |
| 	// backwards compatible hack for old <masterPassword> tags
 | |
| 	for ( int32_t i = 1 ; i < numNodes ; i++ ) {
 | |
| 		if ( objType != OBJ_CONF ) break;
 | |
| 		XmlNode *pn = &xml.m_nodes[i-1];
 | |
| 		XmlNode *xn = &xml.m_nodes[i];
 | |
| 		// look for <masterPassword>
 | |
| 		if ( pn->m_tagNameLen != 14 ) continue;
 | |
| 		if ( xn->m_tagNameLen != 8 ) continue;
 | |
| 		// if it is not the OLD supported tag then skip
 | |
| 		if ( strncmp ( pn->m_tagName,"masterPassword",14 ) ) continue;
 | |
| 		if ( strncmp ( xn->m_tagName,"![CDATA[",8 ) ) continue;
 | |
| 		// otherwise append to buf
 | |
| 		char *text = xn->m_node + 9;
 | |
| 		int32_t  tlen = xn->m_nodeLen - 12;
 | |
| 		g_conf.m_masterPwds.safeMemcpy(text,tlen);
 | |
| 		// a \n
 | |
| 		g_conf.m_masterPwds.pushChar('\n');
 | |
| 		g_conf.m_masterPwds.nullTerm();
 | |
| 	}
 | |
| 	// another backwards compatible hack for old masterIp tags
 | |
| 	for ( int32_t i = 1 ; i < numNodes ; i++ ) {
 | |
| 		if ( objType != OBJ_CONF ) break;
 | |
| 		XmlNode *xn = &xml.m_nodes[i];
 | |
| 		XmlNode *pn = &xml.m_nodes[i-1];
 | |
| 		// look for <masterPassword>
 | |
| 		if ( pn->m_tagNameLen != 8 ) continue;
 | |
| 		if ( xn->m_tagNameLen != 8 ) continue;
 | |
| 		// if it is not the OLD supported tag then skip
 | |
| 		if ( strncmp ( pn->m_tagName,"masterIp",8 ) ) continue;
 | |
| 		if ( strncmp ( xn->m_tagName,"![CDATA[",8 ) ) continue;
 | |
| 		// otherwise append to buf
 | |
| 		char *text = xn->m_node + 9;
 | |
| 		int32_t  tlen = xn->m_nodeLen - 12;
 | |
| 		// otherwise append to buf
 | |
| 		g_conf.m_connectIps.safeMemcpy(text,tlen);
 | |
| 		// a \n
 | |
| 		g_conf.m_connectIps.pushChar('\n');
 | |
| 		g_conf.m_connectIps.nullTerm();
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 
 | |
| 	  // no! now we warn with a redbox alert
 | |
| 
 | |
| 	// always make sure we got some admin security
 | |
| 	if ( g_conf.m_numMasterIps <= 0 && g_conf.m_numMasterPwds <= 0 ) {
 | |
| 		//log(LOG_INFO,
 | |
| 		//    "conf: No master IP or password provided. Using default "
 | |
| 		//    "password 'footbar23'." );
 | |
| 		//g_conf.m_masterIps[0] = atoip ( "64.139.94.202", 13 );
 | |
| 		//g_conf.m_numMasterIps = 1;
 | |
| 		strcpy ( g_conf.m_masterPwds[0] , "footbar23" );
 | |
| 		g_conf.m_numMasterPwds = 1;
 | |
| 	}
 | |
| 	*/
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // returns false and sets g_errno on error
 | |
| bool Parms::setXmlFromFile(Xml *xml, char *filename, SafeBuf *sb ) {
 | |
| 	// File f;
 | |
| 	// f.set ( filename );
 | |
| 	// is it too big?
 | |
| 	// int32_t fsize = f.getFileSize();
 | |
| 	// if ( fsize > bufSize ) {
 | |
| 	// 	log ("conf: File size of %s is %" INT32 ", must be "
 | |
| 	// 	     "less than %" INT32 ".",f.getFilename(),fsize,bufSize );
 | |
| 	// 	char *xx = NULL; *xx = 0;
 | |
| 	// }
 | |
| 	// open it for reading
 | |
| 	// f.set ( filename );
 | |
| 	// if ( ! f.open ( O_RDONLY ) )
 | |
| 	// 	return log("conf: Could not open %s: %s.",
 | |
| 	// 	    filename,mstrerror(g_errno));
 | |
| 	// // read in the file
 | |
| 	// int32_t numRead = f.read ( buf , bufSize , 0 /*offset*/ );
 | |
| 	// f.close ( );
 | |
| 	// if ( numRead != fsize )
 | |
| 	//       return log ("conf: Could not read %s : %s.",
 | |
| 	// 		  filename,mstrerror(g_errno));
 | |
| 	// // null terminate it
 | |
| 	// buf [ fsize ] = '\0';
 | |
| 
 | |
| 	sb->load ( filename );
 | |
| 	char *buf = sb->getBufStart();
 | |
| 	if ( ! buf )
 | |
| 		return log ("conf: Could not read %s : %s.",
 | |
| 			    filename,mstrerror(g_errno));
 | |
| 		
 | |
| 	// . remove all comments in case they contain tags
 | |
| 	// . if you have a # as part of your string, it must be html encoded,
 | |
| 	//   just like you encode < and >
 | |
| 	char *s = buf;
 | |
| 	char *d = buf;
 | |
| 	while ( *s ) {
 | |
| 		// . skip comments
 | |
| 		// . watch out for html encoded pound signs though
 | |
| 		if ( *s == '#' ) {
 | |
| 			if (s>buf && *(s-1)=='&' && is_digit(*(s+1))) goto ok;
 | |
| 			while ( *s && *s != '\n' ) s++; 
 | |
| 			continue; 
 | |
| 		}
 | |
| 		// otherwise, transcribe over
 | |
| 	ok:
 | |
| 		*d++ = *s++;
 | |
| 	}
 | |
| 	*d = '\0';
 | |
| 	int32_t bufSize = d - buf;
 | |
| 	// . set to xml
 | |
| 	// . use version of 0
 | |
| 	return xml->set ( buf     , 
 | |
| 			  bufSize , 
 | |
| 			  false   , // ownData
 | |
| 			  0       , // allocSize
 | |
| 			  false   , // pureXml?
 | |
| 			  0       , // version
 | |
| 			  true    , // setParents
 | |
| 			  0       , // niceness
 | |
| 			  CT_XML  );
 | |
| }
 | |
| 
 | |
| //#define MAX_CONF_SIZE 200000
 | |
| 
 | |
| // returns false and sets g_errno on error
 | |
| bool Parms::saveToXml ( char *THIS , char *f , char objType ) {
 | |
| 	if ( g_conf.m_readOnlyMode ) return true;
 | |
| 	// print into buffer
 | |
| 	// "seeds" can be pretty big so go with safebuf now
 | |
| 	// fix so if we core in malloc/free we can still save conf
 | |
| 	char  tmpbuf[200000];
 | |
| 	SafeBuf sb(tmpbuf,200000);
 | |
| 	//char *p    = buf;
 | |
| 	//char *pend = buf + MAX_CONF_SIZE;
 | |
| 	int32_t  len ;
 | |
| 	//int32_t  n   ;
 | |
| 	File  ff  ;
 | |
| 	int32_t  j   ;
 | |
| 	int32_t  count;
 | |
| 	char *s;
 | |
| 	CollectionRec *cr = NULL;
 | |
| 	if ( THIS != (char *)&g_conf ) cr = (CollectionRec *)THIS;
 | |
| 	// now set THIS based on the parameters in the xml file
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		// get it
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		if ( m->m_obj != objType ) continue;
 | |
| 		// . there are 2 object types, coll recs and g_conf, aka
 | |
| 		//   OBJ_COLL and OBJ_CONF.
 | |
| 		// . make sure we got the right parms for what we want
 | |
| 		if ( m->m_obj == OBJ_NONE ) continue;
 | |
| 		// skip dups
 | |
| 		if ( m->m_flags & PF_DUP ) continue;
 | |
| 		// do not allow searchinput parms through
 | |
| 		if ( m->m_obj == OBJ_SI ) continue;
 | |
| 		if ( THIS == (char *)&g_conf && m->m_obj != OBJ_CONF) continue;
 | |
| 		if ( THIS != (char *)&g_conf && m->m_obj == OBJ_CONF) continue;
 | |
| 		if ( m->m_type == TYPE_MONOD2  ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOM2  ) continue;
 | |
| 		if ( m->m_type == TYPE_CMD ) continue;
 | |
| 		if ( m->m_type == TYPE_BOOL2 ) continue;
 | |
| 		if ( m->m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		// ignore if hidden as well! no, have to keep those separate
 | |
| 		// since spiderroundnum/starttime is hidden but should be saved
 | |
| 		if ( m->m_flags & PF_NOSAVE ) continue;
 | |
| 		// ignore if diffbot and we are not a diffbot/custom crawl
 | |
| 		if ( cr && 
 | |
| 		     ! cr->m_isCustomCrawl &&
 | |
| 		     (m->m_flags & PF_DIFFBOT) ) continue;
 | |
| 		// skip if we should not save to xml
 | |
| 		if ( ! m->m_save ) continue;
 | |
| 		// allow comments though
 | |
| 		if ( m->m_type == TYPE_COMMENT ) goto skip2;
 | |
| 		// skip if this was compiled for a client and they should not
 | |
| 		// see this control
 | |
| 		//#ifdef _GLOBALSPEC_
 | |
| 		//		if ( m->m_priv == 2 ) continue;
 | |
| 		//		if ( m->m_priv == 3 ) continue;
 | |
| 		//#elif _CLIENT_
 | |
| 		//		if ( m->m_priv ) continue;
 | |
| 		//#elif _METALINCS_
 | |
| 		//		if ( m->m_priv == 2 ) continue;
 | |
| 		//		if ( m->m_priv == 3 ) continue;
 | |
| 		//#endif
 | |
| 		// skip if offset is negative, that means none
 | |
| 		s = (char *)THIS + m->m_off ;
 | |
| 		// if array, count can be 0 or more than 1
 | |
| 		count = 1;
 | |
| 		if ( m->m_max   > 1 ) count = *(int32_t *)(s-4);
 | |
| 		if ( m->m_fixed > 0 ) count = m->m_fixed;
 | |
| 		// sanity check
 | |
| 		if ( count > 100000 ) {
 | |
| 			log(LOG_LOGIC,"admin: Outrageous array size in for "
 | |
| 			    "parameter %s. Does the array max size int32_t "
 | |
| 			    "preceed it in the conf class?",m->m_title);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| skip2:
 | |
| 		// description, do not wrap words around lines
 | |
| 		char *d = m->m_desc;
 | |
| 		// if empty array mod description to include the tag name
 | |
| 		char tmp [10*1024];
 | |
| 		if ( m->m_max > 1 && count == 0 && gbstrlen(d) < 9000 &&
 | |
| 		     m->m_xml && m->m_xml[0] ) {
 | |
| 			char *cc = "";
 | |
| 			if ( d && d[0] ) cc = "\n";
 | |
| 			sprintf ( tmp , "%s%sUse <%s> tag.",d,cc,m->m_xml);
 | |
| 			d = tmp;
 | |
| 		}
 | |
| 		char *END  = d + gbstrlen(d);
 | |
| 		char *dend;
 | |
| 		char *last;
 | |
| 		char *start;
 | |
| 		// just print tag if it has no description
 | |
| 		if ( ! *d ) goto skip;
 | |
| 		//if ( p + gbstrlen(d)+5 >= pend ) goto hadError;
 | |
| 		//if ( p > buf ) *p++='\n';
 | |
| 		if ( sb.length() ) sb.pushChar('\n');
 | |
| 	loop:
 | |
| 		dend  = d + 77;
 | |
| 		if ( dend > END ) dend = END;
 | |
| 		last  = d;
 | |
| 		start = d;
 | |
| 		while ( *d && d < dend ) { 
 | |
| 			if ( *d == ' '  ) last = d; 
 | |
| 			if ( *d == '\n' ) { last = d; break; }
 | |
| 			d++; 
 | |
| 		}
 | |
| 		if ( ! *d ) last = d;
 | |
| 		//gbmemcpy ( p , "# " , 2 ); 
 | |
| 		//p += 2;
 | |
| 		sb.safeMemcpy("# ",2);
 | |
| 		//gbmemcpy ( p , start , last - start );
 | |
| 		//p += last - start;
 | |
| 		sb.safeMemcpy(start,last-start);
 | |
| 		//*p++='\n';
 | |
| 		sb.pushChar('\n');
 | |
| 		d = last + 1;
 | |
| 		if ( d < END && *d ) goto loop;
 | |
| 		// bail if comment
 | |
| 		if ( m->m_type == TYPE_COMMENT ) {
 | |
| 			//sprintf ( p , "\n" );
 | |
| 			//p += gbstrlen ( p );
 | |
| 			continue;
 | |
| 		}
 | |
| 		if ( m->m_type == TYPE_MONOD2  ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOM2  ) continue;
 | |
| 
 | |
| 	skip:
 | |
| 		/* . note: this code commented out because it was specific to
 | |
| 		     an old client
 | |
| 		// if value is from default collection file, do not
 | |
| 		// explicitly list it
 | |
| 		if ( m->m_obj == OBJ_COLL &&
 | |
| 		     ((CollectionRec *)THIS)->m_orig[i] == 1 ) {
 | |
| 			sprintf ( p ,"# Value for <%s> tag taken from "
 | |
| 				  "default.conf.\n",m->m_xml );
 | |
| 			p += gbstrlen ( p );
 | |
| 			continue;
 | |
| 		}
 | |
| 		*/
 | |
| 
 | |
| 		// debug point
 | |
| 		//if ( m->m_type == TYPE_SAFEBUF )
 | |
| 		//	log("hey");
 | |
| 
 | |
| 		// loop over all in this potential array
 | |
| 		for ( j = 0 ; j < count ; j++ ) {
 | |
| 			// the xml
 | |
| 			//if ( p + gbstrlen(m->m_xml) >= pend ) goto hadError;
 | |
| 			if ( g_errno ) goto hadError;
 | |
| 			//sprintf ( p , "<%s>" , m->m_xml );
 | |
| 			//p += gbstrlen ( p );
 | |
| 			sb.safePrintf("<%s>" , m->m_xml );
 | |
| 			// print CDATA if string
 | |
| 			if ( m->m_type == TYPE_STRING         || 
 | |
| 			     m->m_type == TYPE_STRINGBOX      ||
 | |
| 			     m->m_type == TYPE_SAFEBUF        ||
 | |
| 			     m->m_type == TYPE_STRINGNONEMPTY   ) {
 | |
| 				//sprintf ( p , "<![CDATA[" );
 | |
| 				//p += gbstrlen ( p );
 | |
| 				sb.safeStrcpy( "<![CDATA[" );
 | |
| 			}
 | |
| 			// break point
 | |
| 			//if (strcmp ( m->m_xml , "filterRulesetDefault")==0)
 | |
| 			//	log("got it");
 | |
| 			// . represent it in ascii form
 | |
| 			// . this escapes out <'s and >'s
 | |
| 			// . this ALSO encodes #'s (xml comment indicators)
 | |
| 			//p = getParmHtmlEncoded(p,pend,m,s);
 | |
| 			getParmHtmlEncoded(&sb,m,s);
 | |
| 			// print CDATA if string
 | |
| 			if ( m->m_type == TYPE_STRING         || 
 | |
| 			     m->m_type == TYPE_STRINGBOX      ||
 | |
| 			     m->m_type == TYPE_SAFEBUF        ||
 | |
| 			     m->m_type == TYPE_STRINGNONEMPTY   ) {
 | |
| 				//sprintf ( p , "]]>" );
 | |
| 				//p += gbstrlen ( p );
 | |
| 				sb.safeStrcpy("]]>" );
 | |
| 			}
 | |
| 			// this is NULL if it ran out of room
 | |
| 			//if ( ! p ) goto hadError;
 | |
| 			if ( g_errno ) goto hadError;
 | |
| 			// advance to next element in array, if it is one
 | |
| 			s = s + m->m_size;
 | |
| 			// close the xml tag
 | |
| 			//if ( p + 4 >= pend ) goto hadError;
 | |
| 			//sprintf ( p , "</>\n" );
 | |
| 			//p += gbstrlen ( p );
 | |
| 			sb.safeStrcpy("</>\n" );
 | |
| 			if ( g_errno ) goto hadError;
 | |
| 		}
 | |
| 	}
 | |
| 	//*p = '\0';
 | |
| 	sb.nullTerm();
 | |
| 
 | |
| 	//ff.set ( f );
 | |
| 	//if ( ! ff.open ( O_RDWR | O_CREAT | O_TRUNC ) )
 | |
| 	//	return log("db: Could not open %s : %s",
 | |
| 	//		   ff.getFilename(),mstrerror(g_errno));
 | |
| 
 | |
| 	// save the parm to the file
 | |
| 	//len = gbstrlen(buf);
 | |
| 	len = sb.length();
 | |
| 	// use -1 for offset so we do not use pwrite() so it will not leave
 | |
| 	// garbage at end of file
 | |
| 	//n    = ff.write ( buf , len , -1 );
 | |
| 	//n    = ff.write ( sb.getBufStart() , len , -1 );
 | |
| 	//ff.close();
 | |
| 	//if ( n == len ) return true;
 | |
| 
 | |
| 	// save to filename "f". returns # of bytes written. -1 on error.
 | |
| 	if ( sb.safeSave ( f ) >= 0 )
 | |
| 		return true;
 | |
| 
 | |
| 	return log("admin: Could not write to file %s.",f);
 | |
|  hadError:
 | |
| 	return log("admin: Error writing to %s: %s",f,mstrerror(g_errno));
 | |
| 
 | |
| 	//File bigger than %" INT32 " bytes."
 | |
| 	//	   "  Please increase #define in Parms.cpp.",
 | |
| 	//	   (int32_t)MAX_CONF_SIZE);
 | |
| }
 | |
| 
 | |
| Parm *Parms::getParm ( char *cgi ) {
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		if ( ! m_parms[i].m_cgi ) continue ;
 | |
| 		if (   m_parms[i].m_cgi[0] != cgi[0] ) continue;
 | |
| 		if (   m_parms[i].m_cgi[1] != cgi[1] ) continue;
 | |
| 		if (   strcmp ( m_parms[i].m_cgi , cgi ) == 0 ) 
 | |
| 			return &m_parms[i];
 | |
| 	}
 | |
| 	return NULL;
 | |
| }
 | |
| 
 | |
| /*
 | |
| Parm *Parms::getParm2 ( char *cgi , int32_t cgiLen ) {
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		if ( ! m_parms[i].m_cgi ) continue ;
 | |
| 		if (   m_parms[i].m_cgi[0] != cgi[0] ) continue;
 | |
| 		if (   cgiLen >=2 && m_parms[i].m_cgi[1] != cgi[1] ) continue;
 | |
| 		// only compare as many letters as the cgi name has
 | |
| 		if (   strncmp ( m_parms[i].m_cgi , cgi , cgiLen ) ) continue;
 | |
| 		// that means we gotta check lengths next
 | |
| 		if ( gbstrlen(m_parms[i].m_cgi) != cgiLen ) continue;
 | |
| 		// got a match
 | |
| 		return &m_parms[i];
 | |
| 	}
 | |
| 	return NULL;
 | |
| }
 | |
| */
 | |
| /*
 | |
| #define PHTABLE_SIZE (MAX_PARMS*2)
 | |
| 
 | |
| Parm *Parms::getParm ( char *cgi ) {
 | |
| 	// make the hash table for the first call
 | |
| 	static int32_t  s_phtable [ PHTABLE_SIZE ];
 | |
| 	static Parm *s_phparm  [ PHTABLE_SIZE ];
 | |
| 	static bool  s_init = false;
 | |
| 	// do not re-make the table if we already did
 | |
| 	if ( s_init ) goto skipMakeTable;
 | |
| 	// ok, now make the table
 | |
| 	s_init = true;
 | |
| 	memset ( s_phparm , 0 , PHTABLE_SIZE );
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		if ( ! m_parms[i].m_cgi ) continue ;
 | |
| 		int32_t h = hash32 ( m_parms[i].m_cgi );
 | |
| 		int32_t n = h % PHTABLE_SIZE;
 | |
| 	while ( s_phparm[n] ) {
 | |
| 		// . sanity check
 | |
| 			// . we don't have that many parms, they should never 
 | |
| 			//   collide!!... but it is possible i guess.
 | |
| 			if ( s_phtable[n] == h ) {
 | |
| 				log(LOG_LOGIC,"Parms: collisions forbidden in "
 | |
| 				    "getParm(). Duplicate cgi name?");
 | |
| 				char *xx = NULL; *xx = 0;
 | |
| 			}
 | |
| 			if (++n >= PHTABLE_SIZE) n = 0;
 | |
| 		}
 | |
| 		s_phtable[n] = h; // fill the bucket
 | |
| 		s_phparm [n] = m; // the parm
 | |
| 	}
 | |
| skipMakeTable:
 | |
| 	// look up in table
 | |
| 	int32_t h = hash32 ( cgi );
 | |
| 	int32_t n = h % PHTABLE_SIZE;
 | |
| 	// while bucket is occupied and does not equal our hash... chain
 | |
| 	while ( s_phparm[n] && s_phtable[n] != h ) 
 | |
| 		if (++n >= PHTABLE_SIZE) n = 0;
 | |
| 	// if empty, no match
 | |
| 	return s_phparm[n];
 | |
| }
 | |
| */
 | |
| 
 | |
| bool Parms::getParmHtmlEncoded ( SafeBuf *sb , Parm *m , char *s ) {
 | |
| 	// do not breech the buffer
 | |
| 	//if ( p + 100 >= pend ) return p;
 | |
| 	// print it out
 | |
| 	char t = m->m_type;
 | |
| 	if ( t == TYPE_CHAR           || t == TYPE_BOOL           ||
 | |
| 	     t == TYPE_CHECKBOX       ||
 | |
| 	     t == TYPE_PRIORITY       || t == TYPE_PRIORITY2      || 
 | |
| 	     //t == TYPE_DIFFBOT_DROPDOWN ||
 | |
| 	     t == TYPE_UFP            ||
 | |
| 	     t == TYPE_PRIORITY_BOXES || t == TYPE_RETRIES        ||
 | |
| 	     t == TYPE_RETRIES        || t == TYPE_FILTER         ||
 | |
| 	     t == TYPE_BOOL2          || t == TYPE_CHAR2           ) 
 | |
| 		sb->safePrintf("%" INT32 "",(int32_t)*s);
 | |
| 	else if ( t == TYPE_FLOAT )
 | |
| 		sb->safePrintf("%f",*(float *)s);
 | |
| 	else if ( t == TYPE_IP ) 
 | |
| 		sb->safePrintf("%s",iptoa(*(int32_t *)s));
 | |
| 	else if ( t == TYPE_LONG || t == TYPE_LONG_CONST || t == TYPE_RULESET||
 | |
| 		  t == TYPE_SITERULE ) 
 | |
| 		sb->safePrintf("%" INT32 "",*(int32_t *)s);
 | |
| 	else if ( t == TYPE_LONG_LONG )
 | |
| 		sb->safePrintf("%" INT64 "",*(int64_t *)s);
 | |
| 	else if ( t == TYPE_SAFEBUF ) {
 | |
| 		SafeBuf *sb2 = (SafeBuf *)s;
 | |
| 		char *buf = sb2->getBufStart();
 | |
| 		//int32_t blen = 0;
 | |
| 		//if ( buf ) blen = gbstrlen(buf);
 | |
| 		//p = htmlEncode ( p , pend , buf , buf + blen , true ); // #?*
 | |
| 		// we can't do proper cdata and be backwards compatible
 | |
| 		//sb->cdataEncode ( buf );//, blen );//, true ); // #?*
 | |
| 		if ( buf ) sb->htmlEncode ( buf );
 | |
| 	}
 | |
| 	else if ( t == TYPE_STRING         || 
 | |
| 		  t == TYPE_STRINGBOX      ||
 | |
| 		  t == TYPE_STRINGNONEMPTY ||
 | |
| 		  t == TYPE_TIME) {
 | |
| 		//int32_t slen = gbstrlen ( s );
 | |
| 		// this returns the length of what was written, it may
 | |
| 		// not have converted everything if pend-p was too small...
 | |
| 		//p += saftenTags2 ( p , pend - p , s , len );
 | |
| 		//p = htmlEncode ( p , pend , s , s + slen , true /*#?*/);
 | |
| 		// we can't do proper cdata and be backwards compatible
 | |
| 		//sb->cdataEncode ( s );//, slen );//, true /*#?*/);
 | |
| 		sb->htmlEncode ( s );
 | |
| 	}
 | |
| 	else if ( t == TYPE_DATE || t == TYPE_DATE2 ) {
 | |
| 		// time is stored as int32_t
 | |
| 		int32_t ct = *(int32_t *)s;
 | |
| 		// get the time struct
 | |
| 		struct tm *tp = localtime ( (time_t *)&ct ) ;
 | |
| 		// set the "selected" month for the drop down
 | |
| 		char tmp[100];
 | |
| 		strftime ( tmp , 100 , "%d %b %Y %H:%M UTC" , tp );
 | |
| 		sb->safeStrcpy ( tmp );
 | |
| 		sb->setLabel("parm3");
 | |
| 	}
 | |
| 	//p += gbstrlen ( p );
 | |
| 	//return p;
 | |
| 	return true;
 | |
| }
 | |
| /*
 | |
| // returns the size needed to serialize parms
 | |
| int32_t Parms::getStoredSize() {
 | |
| 	int32_t size = 0;
 | |
| 
 | |
| 	// calling serialize with no ptr gets size
 | |
| 	serialize( NULL, &size );
 | |
| 	return size;
 | |
| }
 | |
| 
 | |
| // . serialize parms to buffer
 | |
| // . accepts addr of buffer ptr and addr of buffer size
 | |
| // . on entry buf can be NULL to determine required size
 | |
| // . if buf is not NULL, *bufSize must specify the size of buf
 | |
| // . on exit *buf is filled with serialized parms
 | |
| // . on exit *bufSize is set to the actual len of *buf 
 | |
| bool Parms::serialize( char *buf, int32_t *bufSize ) {
 | |
| 	g_errno = 0;
 | |
|  	if ( ! bufSize ) {
 | |
| 		g_errno = EBADENGINEER;
 | |
| 		log( "admin: serialize: bad engineer: no bufSize ptr" );
 | |
| 		*bufSize = 0;
 | |
| 		return false;
 | |
| 	}
 | |
| 	bool sizeChk = false;
 | |
| 	char *end = NULL;
 | |
| 	if ( ! buf ) sizeChk = true; // just calc size
 | |
| 	else end = buf + *bufSize;   // for overrun checking
 | |
| 
 | |
| 	// serialize  OBJ_CONF and OBJ_COLL parms 
 | |
| 	*bufSize = 0;
 | |
| 	char *p = buf;
 | |
| 
 | |
| 	// now the parms
 | |
| 	struct SerParm *sp = NULL;
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		
 | |
| 		// ignore these:
 | |
| 		if ( m->m_obj == OBJ_SI ) continue;    
 | |
| 		if ( m->m_off < 0 ) continue;          
 | |
| 		if ( m->m_type == TYPE_COMMENT ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOD2  ) continue;
 | |
| 		if ( m->m_type == TYPE_MONOM2  ) continue;
 | |
| 		if ( m->m_type == TYPE_CMD     ) continue;
 | |
| 		if ( m->m_type == TYPE_LONG_CONST ) continue;
 | |
| 		if ( ! m->m_sync ) continue;  // parm is not to be synced
 | |
| 
 | |
| 		// determine the size of the parm value
 | |
| 		int32_t size = 0;
 | |
| 		if ( m->m_type == TYPE_CHAR           ) size = 1;
 | |
| 		if ( m->m_type == TYPE_CHAR2          ) size = 1;
 | |
| 		if ( m->m_type == TYPE_CHECKBOX       ) size = 1;
 | |
| 		if ( m->m_type == TYPE_BOOL           ) size = 1;
 | |
| 		if ( m->m_type == TYPE_BOOL2          ) size = 1;
 | |
| 		if ( m->m_type == TYPE_PRIORITY       ) size = 1;
 | |
| 		if ( m->m_type == TYPE_PRIORITY2      ) size = 1;
 | |
| 		//if ( m->m_type == TYPE_DIFFBOT_DROPDOWN) size = 1;
 | |
| 		if ( m->m_type == TYPE_PRIORITY_BOXES ) size = 1;
 | |
| 		if ( m->m_type == TYPE_RETRIES        ) size = 1;
 | |
| 		if ( m->m_type == TYPE_TIME           ) size = 6;
 | |
| 		if ( m->m_type == TYPE_DATE2          ) size = 4;
 | |
| 		if ( m->m_type == TYPE_DATE           ) size = 4;
 | |
| 		if ( m->m_type == TYPE_FLOAT          ) size = 4;
 | |
| 		if ( m->m_type == TYPE_IP             ) size = 4;
 | |
| 		if ( m->m_type == TYPE_RULESET        ) size = 4;
 | |
| 		if ( m->m_type == TYPE_LONG           ) size = 4;
 | |
| 		if ( m->m_type == TYPE_LONG_LONG      ) size = 8;
 | |
| 		if ( m->m_type == TYPE_STRING         ) size = m->m_size;
 | |
| 		if ( m->m_type == TYPE_STRINGBOX      ) size = m->m_size;
 | |
| 		if ( m->m_type == TYPE_STRINGNONEMPTY ) size = m->m_size;
 | |
| 		if ( m->m_type == TYPE_SAFEBUF        ) size = m->m_size;
 | |
| 		if ( m->m_type == TYPE_SITERULE       ) size = 4;
 | |
| 
 | |
| 		// . set size to the total size of array
 | |
| 		// . set cnt to the number of itmes
 | |
| 		int32_t cnt = 1;
 | |
| 		if (m->m_fixed > 0) {
 | |
| 			size *= m->m_fixed;
 | |
| 			cnt = m->m_fixed;
 | |
| 		}
 | |
| 		else {
 | |
| 			size *= m->m_max;
 | |
| 			cnt = m->m_max;
 | |
| 		}
 | |
| 
 | |
| 		if ( m->m_obj == OBJ_CONF ) {
 | |
| 			bool overflew = serializeConfParm( m, i, &p, end, 
 | |
| 							   size, cnt, 
 | |
| 							   sizeChk, bufSize );
 | |
| 			if ( overflew ) goto overflow;
 | |
| 		}
 | |
| 	       	else if ( m->m_obj == OBJ_COLL )  {
 | |
| 			collnum_t j = g_collectiondb.getFirstCollnum ();
 | |
| 			while ( j >= 0 ) {
 | |
| 				CollectionRec *cr = g_collectiondb.getRec( j );
 | |
| 				bool overflew = serializeCollParm( cr,
 | |
| 								   m, i, &p, 
 | |
| 								   end,
 | |
| 								   size, cnt,
 | |
| 								   sizeChk,
 | |
| 								   bufSize );
 | |
| 				if ( overflew ) goto overflow;
 | |
| 				j = g_collectiondb.getNextCollnum ( j );
 | |
| 			}
 | |
| 		} 
 | |
| 	}
 | |
| 	if ( ! sizeChk ) {
 | |
| 		// set the final marker to 0s to indicate the end
 | |
| 		sp = (struct SerParm *)p;
 | |
| 		sp->i = 0;
 | |
| 		sp->obj = 0;
 | |
| 		sp->size = 0;
 | |
| 		sp->cnt = 0;
 | |
| 	}
 | |
| 	*bufSize += sizeof( struct SerParm );
 | |
| 
 | |
| 	return true;
 | |
|  
 | |
|  overflow:
 | |
| 	g_errno = EBADENGINEER;
 | |
| 	log(LOG_WARN, "admin: serialize: bad engineer: overflow" );
 | |
| 	*bufSize = 0;
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| // . serialize a conf parm
 | |
| // . if sizeChk is true then we do not serialize, but just get the
 | |
| //   bytes required if we did serialize
 | |
| // . serialize parm into *p, the cursor i guess, buf end is "end"
 | |
| bool Parms::serializeConfParm( Parm *m, int32_t i, char **p, char *end,
 | |
| 			       int32_t size, int32_t cnt, 
 | |
| 			       bool sizeChk, int32_t *bufSz ) {
 | |
| 	SerParm *sp = NULL;
 | |
| 
 | |
| 	// safebuf not supported here yet, but it for coll recs below
 | |
| 	// so copy code from there if you need it
 | |
| 	if ( m->m_type == TYPE_SAFEBUF ) { char *xx=NULL;*xx=0;}
 | |
| 
 | |
| 	if (m->m_type == TYPE_STRING || 
 | |
| 	    m->m_type == TYPE_STRINGBOX || 
 | |
| 	    m->m_type == TYPE_STRINGNONEMPTY ) {
 | |
| 		char *sVal = NULL;
 | |
| 		if ( ! sizeChk ) {
 | |
| 			sp = (SerParm *)*p;
 | |
| 			sp->i = i;          // index of parm
 | |
| 			sp->obj = OBJ_CONF; 
 | |
| 			sp->size = 0L;      // 0 for strings
 | |
| 			sp->cnt = cnt;      // # of strings
 | |
| 			// if an array, get num of member
 | |
| 			if ( cnt > 1 ) {
 | |
| 				sp->off = m->m_off - sizeof(int32_t);
 | |
| 				sp->num = *(int32_t *)((char *)&g_conf 
 | |
| 					 + sp->off);
 | |
| 			}
 | |
| 			else {
 | |
| 				sp->off = 0;
 | |
| 				sp->num = 0;
 | |
| 			}
 | |
| 
 | |
| 			sVal = sp->val;
 | |
| 		}
 | |
| 		char *sConf = (char *)&g_conf + m->m_off;
 | |
| 		int32_t totLen = 0;
 | |
| 		int32_t tcnt = cnt;
 | |
| 		while ( tcnt ) {
 | |
| 			int32_t len = gbstrlen( sConf );
 | |
| 			if ( ! sizeChk ) {
 | |
| 				// copy the parm value
 | |
| 				if ( sVal + len > end ) 
 | |
| 					return true; // overflow
 | |
| 				strcpy( sVal, sConf );
 | |
| 			}
 | |
| 			totLen += len + 1; // incl the NULL
 | |
| 			// inc conf ptr by size of strings
 | |
| 			sConf += m->m_size;
 | |
| 			// inc ser value by len of str + NULL
 | |
| 			sVal += len + 1;
 | |
| 			tcnt--;
 | |
| 		}
 | |
| 		if ( ! sizeChk ) {
 | |
| 			// inc by tot len of compacted strings
 | |
| 			*p += sizeof( *sp ) + totLen;
 | |
| 		} 
 | |
| 		*bufSz += sizeof( SerParm ) + totLen;
 | |
| 	}
 | |
| 	else {
 | |
| 		if ( ! sizeChk ) {
 | |
| 			sp = (SerParm *)*p;
 | |
| 			sp->i = i;
 | |
| 			sp->obj = OBJ_CONF; 
 | |
| 			sp->size = size;   // tot size if array
 | |
| 			sp->cnt = cnt;     // num of items
 | |
| 			// if array, get num of member
 | |
| 			if ( cnt > 1 ) {
 | |
| 				sp->off = m->m_off - sizeof(int32_t);
 | |
| 				sp->num = *(int32_t *)((char *)&g_conf 
 | |
| 					 + sp->off);
 | |
| 			}
 | |
| 			else {
 | |
| 				sp->off = 0;
 | |
| 				sp->num = 0;
 | |
| 			}
 | |
| 			
 | |
| 			// copy the parm's whole value
 | |
| 			if ( sp->val + size > end )
 | |
| 				return true; // overflow
 | |
| 			gbmemcpy( sp->val, 
 | |
| 				(char *)&g_conf + m->m_off, size );
 | |
| 			// inc by tot size if array
 | |
| 			*p += sizeof( *sp ) + size; 
 | |
| 		}
 | |
| 		*bufSz += sizeof( SerParm ) + size;
 | |
| 	}
 | |
| 
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| // . serialize a coll parm in CollectionRec.h
 | |
| // . if sizeChk is true then we do not serialize, but just get the
 | |
| //   bytes required if we did serialize
 | |
| // . serialize parm into *p, the cursor i guess, buf end is "end"
 | |
| bool Parms::serializeCollParm( CollectionRec *cr,
 | |
| 			       Parm *m, int32_t i, char **p, char *end,
 | |
| 			       int32_t size, int32_t cnt,
 | |
| 			       bool sizeChk, int32_t *bufSize) {
 | |
| 	SerParm *sp = NULL;
 | |
| 
 | |
| 	if (m->m_type == TYPE_STRING || 
 | |
| 	    m->m_type == TYPE_STRINGBOX || 
 | |
| 	    m->m_type == TYPE_SAFEBUF ||
 | |
| 	    m->m_type == TYPE_STRINGNONEMPTY ) {
 | |
| 		char *sVal = NULL;
 | |
| 		if ( ! sizeChk ) {
 | |
| 			sp = (SerParm *)*p;
 | |
| 			sp->i = i;     // index of parm
 | |
| 			sp->obj = OBJ_COLL; 
 | |
| 			sp->size = 0L; // 0 for strings
 | |
| 			sp->cnt = cnt; // # of strings
 | |
| 			// is this parm an array if parms?
 | |
| 			if ( cnt > 1 ) {
 | |
| 				// the offset of the "count" or the 
 | |
| 				// "number of elements" in the array.
 | |
| 				// it preceeds the value of the first element
 | |
| 				// as can be seen infor parms in 
 | |
| 				// CollectionRec.h.
 | |
| 				sp->off = m->m_off - sizeof(int32_t);
 | |
| 				// store the # of then into "num"
 | |
| 				sp->num = *(int32_t *)((char *)cr + sp->off);
 | |
| 			}
 | |
| 			else {
 | |
| 				sp->off = 0;
 | |
| 				sp->num = 0;
 | |
| 			}
 | |
| 			sVal = sp->val;
 | |
| 		}
 | |
| 		// point to the actual parm itself
 | |
| 		char *sColl = (char *)cr + m->m_off;
 | |
| 		int32_t totLen = 0;
 | |
| 		// "cnt" is how many elements in the array
 | |
| 		int32_t tcnt = cnt;
 | |
| 		while ( tcnt ) {
 | |
| 			// the  length of the string
 | |
| 			int32_t len;
 | |
| 			// the string
 | |
| 			char *pstr;
 | |
| 			// if a safebuf, point to string it has
 | |
| 			if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 				SafeBuf *sx = (SafeBuf *)sColl;
 | |
| 				pstr = sx->getBuf();
 | |
| 				len = sx->length();
 | |
| 				if ( ! pstr ) pstr = "";
 | |
| 			}
 | |
| 			// get length of the string. if not a safebuf it will
 | |
| 			// just be an outright string in CollectionRec.h
 | |
| 			else {
 | |
| 				pstr = sColl;
 | |
| 				len = gbstrlen( sColl );
 | |
| 			}
 | |
| 			if ( ! sizeChk ) {
 | |
| 				// copy the string
 | |
| 				if  ( sVal+len > end ) {
 | |
| 					log("parms: buffer too small");
 | |
| 					return true; 
 | |
| 				}
 | |
| 				// this puts a \0 at the end
 | |
| 				strcpy( sVal, pstr );
 | |
| 			}
 | |
| 			totLen += len + 1; // incl NULL
 | |
| 			// . inc cr ptr by size of strs
 | |
| 			// . this is the size of the SafeBuf for TYPE_SAFEBUF
 | |
| 			sColl += m->m_size;
 | |
| 			// . inc the write cursor by string length + the \0
 | |
| 			sVal += len + 1;
 | |
| 			tcnt--;
 | |
| 		}
 | |
| 		if ( ! sizeChk ) {
 | |
| 			// inc by tot len of cmpctd str
 | |
| 			*p += sizeof( *sp ) + totLen; 
 | |
| 		}
 | |
| 		*bufSize += sizeof( SerParm ) + totLen;
 | |
| 	}
 | |
| 	else {
 | |
| 		if ( ! sizeChk ) {
 | |
| 			sp = (SerParm *)*p;
 | |
| 			sp->i = i;
 | |
| 			sp->obj = OBJ_COLL;
 | |
| 			sp->size = size; // tot size
 | |
| 			sp->cnt = cnt;  // num of items
 | |
| 			// get num of member
 | |
| 			if ( cnt > 1 ) {
 | |
| 				sp->off = m->m_off - sizeof(int32_t);
 | |
| 				sp->num = *(int32_t *)((char *)cr + sp->off);
 | |
| 			}
 | |
| 			else {
 | |
| 				sp->off = 0;
 | |
| 				sp->num = 0;
 | |
| 			}
 | |
| 			// copy whole value
 | |
| 			if ( sp->val + size > end )
 | |
| 				return true;
 | |
| 			gbmemcpy( sp->val, 
 | |
| 				(char *)cr + m->m_off, 
 | |
| 				size );
 | |
| 			// inc by whole size of value
 | |
| 			*p += sizeof( *sp ) + size; 
 | |
| 		}
 | |
| 		*bufSize += sizeof( SerParm ) + size;
 | |
| 	}
 | |
| 
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| 
 | |
| // deserialize parms from buffer and set our values to the new values
 | |
| void Parms::deserialize( char *buf ) {
 | |
| 	g_errno = 0;
 | |
| 	char *p = buf;
 | |
| 	bool confChgd = false;
 | |
| 
 | |
| 	SerParm *sp = (SerParm *)p;
 | |
| 	int32_t numLooped = 0;
 | |
| 	const int32_t MAX_LOOP = (int32_t)(MAX_PARMS*1.5);
 | |
| 	// if one of these is non-zero, we're still working
 | |
| 	while ( (sp->obj || sp->size || sp->cnt) && 
 | |
| 		(sp->obj > 0 && sp->size > 0 && sp->cnt > 0) &&
 | |
| 		numLooped < MAX_LOOP ) {
 | |
| 		// grab the parm we're working on
 | |
| 		if ( sp->i < 0 || sp->i >= m_numParms ) {
 | |
| 			log( "admin: invalid parm # in Parms::deserialize" );
 | |
| 			char *xx = NULL; *xx = 0;
 | |
| 		}
 | |
| 		Parm *m = &m_parms[ sp->i ]; 
 | |
| 		
 | |
| 		if ( sp->obj == OBJ_CONF ) {
 | |
| 			deserializeConfParm( m, sp, &p, &confChgd );
 | |
| 			sp = (struct SerParm *)p;
 | |
| 		}
 | |
| 		else if ( sp->obj == OBJ_COLL ) {
 | |
| 			collnum_t j = g_collectiondb.getFirstCollnum ();
 | |
| 			//if(j <= 0) { 
 | |
|                         //      log("coll: Collectiondb does not have a rec" );
 | |
|                         //        return;
 | |
|                         //}
 | |
|                         while ( j >= 0 ) {
 | |
| 				CollectionRec *cr = g_collectiondb.getRec( j );
 | |
| 				deserializeCollParm( cr, 
 | |
| 						     m, sp, &p );
 | |
| 				sp = (SerParm *)p;
 | |
| 				j = g_collectiondb.getNextCollnum ( j );
 | |
| 
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// setup the next rec
 | |
| 		sp = (SerParm *)p;
 | |
| 		numLooped++;
 | |
| 	}
 | |
| 	if (numLooped >= MAX_LOOP) {
 | |
| 		log( "admin: infinite loop in Parms::deserialize(). halting!");
 | |
| 		char *xx = NULL; *xx = 0;
 | |
| 	}
 | |
| 
 | |
| 	// if we changed the conf, we need to save it
 | |
| 	if ( confChgd ) {
 | |
| 		g_conf.save ();
 | |
| 	}
 | |
| 
 | |
| 	// if we changed a CollectionRec, we need to save it
 | |
| 	int32_t j = g_collectiondb.getFirstCollnum ();
 | |
| 	while ( j >= 0 ) {
 | |
| 		CollectionRec *cr = g_collectiondb.getRec( j );
 | |
| 		if ( cr->m_needsSave ) {
 | |
| 			cr->save ();
 | |
| 			// so g_spiderCache can reload if sameDomainWait, etc.
 | |
| 			// have changed
 | |
| 			g_collectiondb.updateTime();
 | |
| 		}
 | |
| 		j = g_collectiondb.getNextCollnum ( j );
 | |
| 	}
 | |
| }
 | |
| 
 | |
| void Parms::deserializeConfParm( Parm *m, SerParm *sp, char **p,
 | |
| 				 bool *confChgd ) {
 | |
| 	if ( m->m_off + sp->size > (int32_t)sizeof(g_conf) || 
 | |
| 	     m->m_off + sp->size < 0 ){
 | |
| 		log(LOG_WARN, "admin: deserializing parm would overflow "
 | |
| 		    "the collection rec!");
 | |
| 		char *xx =0; *xx = 0;
 | |
| 	}
 | |
| 	if ( sp->size == 0 ) { // string
 | |
| 		char *sVal = sp->val;
 | |
| 		char *sConf = (char *)&g_conf + m->m_off;
 | |
| 		int32_t totLen = 0;
 | |
| 		bool goodParm = true;
 | |
| 		int32_t tcnt = sp->cnt;
 | |
| 		while ( tcnt ) {
 | |
| 			goodParm = (goodParm && 0 == strcmp( sVal, sConf ));
 | |
| 			int32_t len = gbstrlen( sVal );
 | |
| 			totLen += len + 1;
 | |
| 			// inc ser value by len of str + NULL
 | |
| 			sVal += len + 1;
 | |
| 			// inc conf ptr by size of strings
 | |
| 			sConf += m->m_size;
 | |
| 			tcnt--;
 | |
| 		}
 | |
| 		if ( goodParm ) {
 | |
| 			// . inc by sizeof rec and tot len of compacted array
 | |
| 			*p += sizeof( *sp ) + totLen;
 | |
| 			return;
 | |
| 		}
 | |
| 		// parms don't match
 | |
| 		sVal = sp->val;
 | |
| 		sConf = (char *)&g_conf + m->m_off;
 | |
| 		totLen = 0;
 | |
| 		tcnt = sp->cnt;
 | |
| 		while ( tcnt ) {
 | |
| 			// copy an array value to this parm
 | |
| 			strcpy( sConf, sVal );
 | |
| 			int32_t len = gbstrlen( sVal );
 | |
| 			totLen += len + 1; // incl the NULL
 | |
| 			// inc conf ptr by size of strings
 | |
| 			sConf += m->m_size;
 | |
| 			// inc ser value by len of str + NULL
 | |
| 			sVal += len + 1;
 | |
| 			tcnt--;
 | |
| 		}
 | |
| 		
 | |
| 		// set num of member
 | |
| 		if ( sp->off ) {
 | |
| 			int32_t *tmp = (int32_t *)((char *)&g_conf + sp->off);
 | |
| 			*tmp = sp->num;
 | |
| 		}
 | |
| 		
 | |
| 		// log the changed parm
 | |
| 		log( LOG_INFO, "admin: Parm "
 | |
| 		     "#%" INT32 " \"%s\" (\"%s\") in conf "
 | |
| 		     "changed on sync.", 
 | |
| 		     sp->i, m->m_cgi, m->m_title );
 | |
| 		
 | |
| 		*confChgd = true;
 | |
| 		
 | |
| 		// inc by sizeof rec and tot len of compacted array
 | |
| 		*p += sizeof( *sp ) + totLen;
 | |
| 	}
 | |
| 	else {
 | |
| 		bool goodParm = ( 0 == memcmp( sp->val, 
 | |
| 					       (char *)&g_conf + m->m_off, 
 | |
| 					       sp->size ) );
 | |
| 		if ( ! goodParm ) {
 | |
| 			// copy the new parm to m's loc
 | |
| 			gbmemcpy( (char *)&g_conf + m->m_off, sp->val, 
 | |
| 				sp->size );
 | |
| 
 | |
| 			// set num of member
 | |
| 			if ( sp->off ) {             
 | |
| 				int32_t *tmp = (int32_t *)((char *)&g_conf 
 | |
| 						     + sp->off);
 | |
| 				*tmp = sp->num;
 | |
| 			}
 | |
| 			
 | |
| 			// log the changed parm
 | |
| 			log( LOG_INFO, "admin: Parm "
 | |
| 			     "#%" INT32 " \"%s\" (\"%s\") in conf "
 | |
| 			     "changed on sync.", 
 | |
| 			     sp->i, m->m_cgi, m->m_title );
 | |
| 			
 | |
| 			*confChgd = true;
 | |
| 		}
 | |
| 		// increase by rec size and size of parm
 | |
| 		*p += sizeof( *sp ) + sp->size;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| void Parms::deserializeCollParm( CollectionRec *cr,
 | |
| 				 Parm *m, SerParm *sp, char **p ) {
 | |
| 	if ( m->m_off + sp->size > (int32_t)sizeof(CollectionRec) || 
 | |
| 	     m->m_off + sp->size < 0 ) {
 | |
| 		log(LOG_WARN, "admin: deserializing parm would overflow "
 | |
| 		    "the collection rec!");
 | |
| 		char *xx =0; *xx = 0;
 | |
| 	}
 | |
| 	if ( sp->size == 0 ) { // strings
 | |
| 		char *sVal = sp->val; // the sent string buffer i guess
 | |
| 		char *sColl = (char *)cr + m->m_off; // what we have
 | |
| 		int32_t totLen = 0;
 | |
| 		int32_t tcnt = sp->cnt; // # of strings
 | |
| 		bool goodParm = true;
 | |
| 		while ( tcnt ) {
 | |
| 
 | |
| 			char *pstr;
 | |
| 			if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 				SafeBuf *sx = (SafeBuf *)sColl;
 | |
| 				pstr = sx->getBuf();
 | |
| 			}
 | |
| 			else {
 | |
| 				pstr = sColl;
 | |
| 			}
 | |
| 
 | |
| 			// set goodParm to true if unchanged
 | |
| 			goodParm= (goodParm && 0 == strcmp(sVal, pstr));
 | |
| 			// get length of what was sent to us
 | |
| 			int32_t len = gbstrlen( sVal );
 | |
| 			totLen += len + 1; //incl NULL
 | |
| 			// this is a list of strings with \0s (sent to us)
 | |
| 			sVal += len + 1;   //incl NULL
 | |
| 			// inc by size of strs. point to next string we have
 | |
| 			// stored in our array of strings in CollectionRec.
 | |
| 			// for TYPE_SAFEBUF this size is sizeof(SafeBuf).
 | |
| 			sColl += m->m_size;
 | |
| 			tcnt--;
 | |
| 		}
 | |
| 		// if parm was an exact match return now
 | |
| 		if ( goodParm ) {
 | |
| 			// . inc by sizeof rec and 
 | |
| 			//   tot len of compacted array
 | |
| 			// . skip the SerParm and following string buffer.
 | |
| 			*p += sizeof( *sp ) + totLen;
 | |
| 			return;
 | |
| 		}
 | |
| 		//
 | |
| 		// if parms don't match, we need to update our stuff
 | |
| 		//
 | |
| 		//
 | |
| 		// point to the sent string buffer
 | |
| 		sVal = sp->val;
 | |
| 		// point to the local parm, array of strings or safebufs
 | |
| 		sColl = (char *)cr + m->m_off;
 | |
| 		totLen = 0;
 | |
| 		// how many strings or safebufs in there?
 | |
| 		tcnt = sp->cnt;
 | |
| 		// loop over each one
 | |
| 		while ( tcnt ) {
 | |
| 			if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 				SafeBuf *sx = (SafeBuf *)sColl;
 | |
| 				sx->set ( sVal );
 | |
| 				sx->nullTerm ( );
 | |
| 			}
 | |
| 			else {
 | |
| 				// copy an array value to this parm
 | |
| 				strcpy( sColl, sVal );
 | |
| 			}
 | |
| 			// get length of string we copied
 | |
| 			int32_t len = gbstrlen( sVal );
 | |
| 			totLen += len + 1; // +the NULL
 | |
| 			// . inc conf ptr by size 
 | |
| 			//   of strings
 | |
| 			sColl += m->m_size;
 | |
| 			// . inc ser value by len of str + NULL
 | |
| 			sVal += len + 1;
 | |
| 			tcnt--;
 | |
| 		}
 | |
| 		// we changed the record
 | |
| 		cr->m_needsSave = true;
 | |
| 		
 | |
| 		// set num of member
 | |
| 		if ( sp->off ) {
 | |
| 			int32_t *tmp = (int32_t *)((char *)cr + sp->off);
 | |
| 			*tmp = sp->num;
 | |
| 		}
 | |
| 		
 | |
| 		// log the changed parm
 | |
| 		log( LOG_INFO, "admin: Parm "
 | |
| 		     "#%" INT32 " \"%s\" (\"%s\") in "
 | |
| 		     "collection \"%s\" "
 | |
| 		     "changed on sync.", 
 | |
| 		     sp->i, m->m_cgi, m->m_title,
 | |
| 		     cr->m_coll );
 | |
| 		
 | |
| 		// . inc by sizeof rec and 
 | |
| 		//   tot len of compacted array
 | |
| 		*p += sizeof( *sp ) + totLen;
 | |
| 	}
 | |
| 	else {
 | |
| 		// sanity
 | |
| 		if ( m->m_type == TYPE_SAFEBUF ) { char *xx=NULL;*xx=0; }
 | |
| 
 | |
| 		if ( 0 != memcmp( sp->val, (char *)cr + m->m_off, sp->size) ) {
 | |
| 			// copy the new value
 | |
| 			gbmemcpy( (char *)cr + m->m_off, 
 | |
| 				sp->val,
 | |
| 				sp->size );
 | |
| 			
 | |
| 			// set num of member
 | |
| 			if ( sp->off ) {
 | |
| 				int32_t *tmp = (int32_t *)((char *)cr + sp->off);
 | |
| 				*tmp = sp->num;
 | |
| 			}
 | |
| 			
 | |
| 			// log the changed parm
 | |
| 			log( LOG_INFO, "admin: Parm "
 | |
| 			     "#%" INT32 " \"%s\" (\"%s\") "
 | |
| 			     "in collection \"%s\" "
 | |
| 			     "changed on sync.", 
 | |
| 			     sp->i, m->m_cgi, 
 | |
| 			     m->m_title,
 | |
| 			     cr->m_coll );
 | |
| 			
 | |
| 			// we changed the record
 | |
| 			cr->m_needsSave = true;
 | |
| 		}
 | |
| 		// inc by rec size and tot len of array
 | |
| 		*p += sizeof( *sp ) + sp->size;
 | |
| 	}
 | |
| }
 | |
| */
 | |
| 
 | |
| void Parms::init ( ) {
 | |
| 	// initialize the Parms class if we need to, only do it once
 | |
| 	static bool s_init = false ;
 | |
| 	if ( s_init ) return;
 | |
| 	s_init = true ;
 | |
| 
 | |
| 	// default all
 | |
| 	for ( int32_t i = 0 ; i < MAX_PARMS ; i++ ) {
 | |
| 		m_parms[i].m_parmNum= i;
 | |
| 		m_parms[i].m_hash   = 0         ;
 | |
| 		m_parms[i].m_title  = ""         ; // for detecting if not set
 | |
| 		m_parms[i].m_desc   = ""         ; // for detecting if not set
 | |
| 		m_parms[i].m_cgi    = NULL       ; // for detecting if not set
 | |
| 		m_parms[i].m_off    = -1         ; // for detecting if not set
 | |
| 		// for PAGE_FILTERS url filters for printing the url
 | |
| 		// filter profile parm above the url filters table rows.
 | |
| 		m_parms[i].m_colspan= -1; 
 | |
| 		m_parms[i].m_def    = NULL       ; // for detecting if not set
 | |
| 		m_parms[i].m_defOff = -1; // if default pts to collrec parm
 | |
| 		m_parms[i].m_type   = TYPE_NONE  ; // for detecting if not set
 | |
| 		m_parms[i].m_page   = -1         ; // for detecting if not set
 | |
| 		m_parms[i].m_obj    = -1         ; // for detecting if not set
 | |
| 		m_parms[i].m_max    =  1         ; // max elements in array
 | |
| 		m_parms[i].m_fixed  =  0         ; // size of fixed size array
 | |
| 		m_parms[i].m_size   =  0         ; // max string size
 | |
| 		m_parms[i].m_cast   =  1 ; // send to all hosts?
 | |
| 		m_parms[i].m_rowid  = -1 ; // rowid of -1 means not in row
 | |
| 		m_parms[i].m_addin  =  0 ; // add insert row command?
 | |
| 		m_parms[i].m_rdonly =  0 ; // is command off in read-only mode?
 | |
| 		m_parms[i].m_hdrs   =  1 ; // assume to always print headers
 | |
| 		m_parms[i].m_perms  =  0 ; // same as containing WebPages perms
 | |
| 		m_parms[i].m_plen   = -1 ; // offset for strings length
 | |
| 		m_parms[i].m_group  =  1 ; // start of a new group of controls?
 | |
| 		m_parms[i].m_priv   =  0 ; // is it private?
 | |
| 		m_parms[i].m_save   =  1 ; // save to xml file?
 | |
| 		m_parms[i].m_min    = -1 ; // min value (for int32_t parms)
 | |
| 		// search fields
 | |
| 		//m_parms[i].m_sparm  = 0;
 | |
| 		//m_parms[i].m_scmd   = NULL;//"/search";
 | |
| 		//m_parms[i].m_scgi   = NULL;// defaults to m_cgi
 | |
| 		m_parms[i].m_flags  = 0;
 | |
| 		m_parms[i].m_icon   = NULL;
 | |
| 		m_parms[i].m_class  = NULL;
 | |
| 		m_parms[i].m_qterm  = NULL;
 | |
| 		m_parms[i].m_subMenu= 0;
 | |
| 		m_parms[i].m_spriv  = 0;
 | |
| 		//         m_sdefo  = -1;   // just use m_off for this!
 | |
| 		m_parms[i].m_sminc  = -1;  // min in collection rec
 | |
| 		m_parms[i].m_smaxc  = -1;  // max in collection rec
 | |
| 		m_parms[i].m_smin   = 0x80000000; // 0xffffffff;
 | |
| 		m_parms[i].m_smax   = 0x7fffffff;
 | |
| 		//m_parms[i].m_soff   = -1; // offset into SearchInput
 | |
| 		m_parms[i].m_sprpg  =  1; // propagate to other pages via GET
 | |
| 		m_parms[i].m_sprpp  =  1; // propagate to other pages via POST
 | |
| 		m_parms[i].m_sync   = true;
 | |
| 	}
 | |
| 
 | |
| 	// inherit perms from page
 | |
| 	//for ( int32_t i = 1 ; i < MAX_PARMS ; i++ ) 
 | |
| 	//	if ( m_parms[i].m_page )
 | |
| 	//		m_parms[i].m_perms = m_parms[i-1].m_perms;
 | |
| 
 | |
| 	Parm *m = &m_parms [ 0 ];
 | |
| 
 | |
| 	CollectionRec cr;
 | |
| 	SearchInput   si;
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// CAN ONLY BE CHANGED IN CONF AT STARTUP (no cgi field)
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	char *g = (char *)&g_conf;
 | |
| 	char *x = (char *)&cr;
 | |
| 	char *y = (char *)&si;
 | |
| 
 | |
| 
 | |
| 	//////////////
 | |
| 	// 
 | |
| 	// now for Pages.cpp printApiForPage() we need these
 | |
| 	//
 | |
| 	//////////////
 | |
| 
 | |
| 
 | |
| 	GigablastRequest gr;
 | |
| 
 | |
| 	InjectionRequest ir;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "delete collection";
 | |
| 	m->m_desc  = "A collection name to delete. You can specify multiple "
 | |
| 		"&delColl= parms in the request to delete multiple "
 | |
| 		"collections.";
 | |
| 	m->m_cgi   = "delColl";
 | |
| 	m->m_page  = PAGE_DELCOLL;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = 0;//PF_API | PF_REQUIRED;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delete collection";
 | |
| 	m->m_desc  = "A collection name to delete. You can specify multiple "
 | |
| 		"&delColl= parms in the request to delete multiple "
 | |
| 		"collections.";
 | |
| 	// camelcase as opposed to above lowercase
 | |
| 	m->m_cgi   = "delcoll";
 | |
| 	m->m_page  = PAGE_DELCOLL;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "add collection";
 | |
| 	m->m_desc  = "A collection name to add.";
 | |
| 	// camelcase support
 | |
| 	m->m_cgi   = "addColl";
 | |
| 	m->m_page  = PAGE_ADDCOLL;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "add collection";
 | |
| 	m->m_desc  = "A collection name to add.";
 | |
| 	// lowercase support
 | |
| 	m->m_cgi   = "addcoll";
 | |
| 	m->m_page  = PAGE_ADDCOLL;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Clone settings INTO this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_CLONECOLL;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Use this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_BASIC_STATUS;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Use this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	// do not show in html controls
 | |
| 	m->m_flags = PF_API | PF_REQUIRED | PF_NOHTML;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Use this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	// do not show in html controls
 | |
| 	m->m_flags = PF_API | PF_REQUIRED | PF_NOHTML;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Use this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_SPIDERDB;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	// do not show in html controls
 | |
| 	m->m_flags = PF_API | PF_REQUIRED | PF_NOHTML;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Use this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_SITEDB;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	// do not show in html controls
 | |
| 	m->m_flags = PF_API | PF_REQUIRED | PF_NOHTML;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Inject into this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	// PF_COLLDEFAULT: so it gets set to default coll on html page
 | |
| 	m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML; 
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	// //
 | |
| 	// // more global-ish parms
 | |
| 	// //
 | |
| 
 | |
| 	// m->m_title = "show settings";
 | |
| 	// m->m_desc  = "show settings or values for this page.";
 | |
| 	// m->m_cgi   = "showsettings";
 | |
| 	// m->m_page  = PAGE_MASTER;
 | |
| 	// m->m_obj   = OBJ_NONE;
 | |
| 	// m->m_type  = TYPE_BOOL;
 | |
| 	// m->m_def   = "1";
 | |
| 	// // do not show in html controls
 | |
| 	// m->m_flags = PF_API | PF_NOHTML;
 | |
| 	// m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	// m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	////////////
 | |
| 	//
 | |
| 	// end stuff for printApiForPage()
 | |
| 	//
 | |
| 	////////////
 | |
| 
 | |
| 	// just a comment in the conf file
 | |
| 	m->m_desc  = 
 | |
| 		"All <, >, \" and # characters that are values for a field "
 | |
| 		"contained herein must be represented as "
 | |
| 		"<, >, " and # respectively.";
 | |
| 	m->m_type  = TYPE_COMMENT;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	// if the next guy has no description (m_desc) he is assumed to
 | |
| 	// share the description of the previous parm with one.
 | |
| 	/*
 | |
| 	m->m_title = "main external ip";
 | |
| 	m->m_desc  = "This is the IP and port that a user connects to in "
 | |
| 		"order to search this Gigablast network. This should be the "
 | |
| 		"same for all gb processes.";
 | |
| 	m->m_off   = (char *)&g_conf.m_mainExternalIp - g;
 | |
| 	m->m_def   = "127.0.0.1"; // if no default, it is required!
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "main external port";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_mainExternalPort - g;
 | |
| 	m->m_def   = "80";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "indexdb split";
 | |
| 	m->m_desc  = "Number of times to split indexdb across groups. "
 | |
| 		"Must be a power of 2.";
 | |
| 	m->m_off   = (char *)&g_hostdb.m_indexSplits - g;
 | |
| 	// -1 means to do a full split just based on docid, just like titledb
 | |
| 	m->m_def   = "-1"; // "1";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "full indexdb split";
 | |
| 	m->m_desc  = "Set to 1 (true) if indexdb is fully split. Performance "
 | |
| 		"is much better for fully split indexes.";
 | |
| 	m->m_off   = (char *)&g_conf.m_fullSplit - g;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "legacy indexdb split";
 | |
| 	m->m_desc  = "Set to 1 (true) if using legacy indexdb splitting.  For "
 | |
| 		"data generated with farmington release.";
 | |
| 	m->m_off   = (char *)&g_conf.m_legacyIndexdbSplit - g;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "tfndb extension bits";
 | |
| 	m->m_desc  = "Number of extension bits to use in Tfndb.  Increased for "
 | |
| 		"large indexes.";
 | |
| 	m->m_off   = (char *)&g_conf.m_tfndbExtBits - g;
 | |
| 	m->m_def   = "7";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "checksumdb key size";
 | |
| 	m->m_desc  = "This determines the key size for checksums. " 
 | |
| 		     "Must be set for every host.";
 | |
| 	//m->m_cgi   = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_checksumdbKeySize - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "12";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// just a comment in the conf file
 | |
| 	m->m_desc  = 
 | |
|     "Below the various Gigablast databases are configured.\n"
 | |
|     "<*dbMaxTreeMem>          - mem used for holding new recs\n"
 | |
|     "<*dbMaxDiskPageCacheMem> - disk page cache mem for this db\n"
 | |
|     "<*dbMaxCacheMem>         - cache mem for holding single recs\n"
 | |
| //"<*dbMinFilesToMerge>     - required # files to trigger merge\n"
 | |
|     "<*dbSaveCache>           - save the rec cache on exit?\n"
 | |
|     "<*dbMaxCacheAge>         - max age (seconds) for recs in rec cache\n"
 | |
| 		"See that Stats page for record counts and stats.\n";
 | |
| 	m->m_type  = TYPE_COMMENT;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns max cache mem";
 | |
| 	m->m_desc  = "How many bytes should be used for caching DNS replies?";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsMaxCacheMem - g;
 | |
| 	m->m_def   = "128000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	// g_dnsDistributed always saves now. main.cpp inits it that way.
 | |
| 	//m->m_title = "dns save cache";
 | |
| 	//m->m_desc  = "Should the DNS reply cache be saved/loaded on "
 | |
| 	//	"exit/startup?";
 | |
| 	//m->m_off   = (char *)&g_conf.m_dnsSaveCache - g;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "tagdb max tree mem";
 | |
| 	m->m_desc  = "A tagdb record "
 | |
| 		"assigns a url or site to a ruleset. Each tagdb record is "
 | |
| 		"about 100 bytes or so.";
 | |
| 	m->m_off   = (char *)&g_conf.m_tagdbMaxTreeMem - g;
 | |
| 	m->m_def   = "1028000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "tagdb max page cache mem";
 | |
| 	// m->m_desc  = "";
 | |
| 	// m->m_off   = (char *)&g_conf.m_tagdbMaxDiskPageCacheMem - g;
 | |
| 	// m->m_def   = "200000"; 
 | |
| 	// m->m_type  = TYPE_LONG;
 | |
| 	// m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	// m->m_page  = PAGE_NONE;
 | |
| 	// m->m_obj   = OBJ_CONF;
 | |
| 	// m++;
 | |
| 
 | |
| 	//m->m_title = "tagdb max cache mem";
 | |
| 	//m->m_desc  = "";
 | |
| 	//m->m_off   = (char *)&g_conf.m_tagdbMaxCacheMem - g;
 | |
| 	//m->m_def   = "128000"; 
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "tagdb min files to merge";
 | |
| 	//m->m_desc  = "";
 | |
| 	//m->m_off   = (char *)&g_conf.m_tagdbMinFilesToMerge - g;
 | |
| 	//m->m_def   = "2"; 
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m->m_save  = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "catdb max tree mem";
 | |
| 	m->m_desc  = "A catdb record "
 | |
| 		"assigns a url or site to DMOZ categories. Each catdb record "
 | |
| 		"is about 100 bytes.";
 | |
| 	m->m_off   = (char *)&g_conf.m_catdbMaxTreeMem - g;
 | |
| 	m->m_def   = "1000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "catdb max page cache mem";
 | |
| 	// m->m_desc  = "";
 | |
| 	// m->m_off   = (char *)&g_conf.m_catdbMaxDiskPageCacheMem - g;
 | |
| 	// m->m_def   = "25000000";
 | |
| 	// m->m_type  = TYPE_LONG;
 | |
| 	// m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	// m->m_page  = PAGE_NONE;
 | |
| 	// m->m_obj   = OBJ_CONF;
 | |
| 	// m++;
 | |
| 
 | |
| 	m->m_title = "catdb max cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_catdbMaxCacheMem - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "catdb min files to merge";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_catdbMinFilesToMerge - g;
 | |
| 	m->m_def   = "2"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_save  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "revdb max tree mem";
 | |
| 	m->m_desc  = "Revdb holds the meta list we added for this doc.";
 | |
| 	m->m_off   = (char *)&g_conf.m_revdbMaxTreeMem - g;
 | |
| 	m->m_def   = "30000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "timedb max tree mem";
 | |
| 	m->m_desc  = "Timedb holds event time intervals";
 | |
| 	m->m_off   = (char *)&g_conf.m_timedbMaxTreeMem - g;
 | |
| 	m->m_def   = "30000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "titledb max tree mem";
 | |
| 	m->m_desc  = "Titledb holds the compressed documents that have been "
 | |
| 		"indexed.";
 | |
| 	m->m_off   = (char *)&g_conf.m_titledbMaxTreeMem - g;
 | |
| 	m->m_def   = "10000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "titledb max cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_titledbMaxCacheMem - g;
 | |
| 	m->m_def   = "1000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "titledb max cache age";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_titledbMaxCacheAge - g;
 | |
| 	m->m_def   = "86400";  // 1 day
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "titledb save cache";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_titledbSaveCache - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "clusterdb max tree mem";
 | |
| 	m->m_desc  = "Clusterdb caches small records for site clustering "
 | |
| 		"and deduping.";
 | |
| 	m->m_off   = (char *)&g_conf.m_clusterdbMaxTreeMem - g;
 | |
| 	m->m_def   = "1000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "clusterdb max cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_clusterdbMaxCacheMem - g;
 | |
| 	m->m_def   = "100000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "clusterdb max page cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off =(char *)&g_conf.m_clusterdbMaxDiskPageCacheMem - g;
 | |
| 	m->m_def   = "100000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// this is overridden by collection
 | |
| 	m->m_title = "clusterdb min files to merge";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "cmftm";
 | |
| 	m->m_off   = (char *)&g_conf.m_clusterdbMinFilesToMerge - g;
 | |
| 	//m->m_def   = "2";
 | |
| 	m->m_def   = "-1"; // -1 means to use collection rec
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_save  = 0;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "clusterdb save cache";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "cdbsc";
 | |
| 	m->m_off   = (char *)&g_conf.m_clusterdbSaveCache - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max vector cache mem";
 | |
| 	m->m_desc  = "Max memory for dup vector cache.";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxVectorCacheMem - g;
 | |
| 	m->m_def   = "10000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "checksumdb max tree mem";
 | |
| 	m->m_desc  = "Checksumdb is used for deduping same-site urls at "
 | |
| 		"index time.";
 | |
| 	m->m_off   = (char *)&g_conf.m_checksumdbMaxTreeMem - g;
 | |
| 	m->m_def   = "1000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "checksumdb max cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_checksumdbMaxCacheMem - g;
 | |
| 	m->m_def   = "2000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "checksumdb max page cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off =(char *)&g_conf.m_checksumdbMaxDiskPageCacheMem-g;
 | |
| 	m->m_def   = "1000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	// this is overridden by collection
 | |
| 	m->m_title = "checksumdb min files to merge";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_checksumdbMinFilesToMerge- g;
 | |
| 	//m->m_def   = "2"; 
 | |
| 	m->m_def   = "-1"; // -1 means to use collection rec
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_save  = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "tfndb max tree mem";
 | |
| 	m->m_desc  = "Tfndb holds small records for each url in Spiderdb or "
 | |
| 		"Titledb.";
 | |
| 	m->m_off   = (char *)&g_conf.m_tfndbMaxTreeMem - g;
 | |
| 	m->m_def   = "1000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "tfndb max page cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_tfndbMaxDiskPageCacheMem - g;
 | |
| 	m->m_def   = "5000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	// this is overridden by collection
 | |
| 	m->m_title = "tfndb min files to merge";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_tfndbMinFilesToMerge - g;
 | |
| 	m->m_def   = "2"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_save  = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "spiderdb max tree mem";
 | |
| 	m->m_desc  = "Spiderdb holds urls to be spidered.";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderdbMaxTreeMem - g;
 | |
| 	m->m_def   = "1000000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spiderdb max cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderdbMaxCacheMem - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spiderdb max page cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   =(char *)&g_conf.m_spiderdbMaxDiskPageCacheMem-g;
 | |
| 	m->m_def   = "500000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	// this is overridden by collection
 | |
| 	m->m_title = "spiderdb min files to merge";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderdbMinFilesToMerge - g;
 | |
| 	//m->m_def   = "2"; 
 | |
| 	m->m_def   = "-1"; // -1 means to use collection rec
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_save  = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "robotdb max cache mem";
 | |
| 	m->m_desc  = "Robotdb caches robot.txt files.";
 | |
| 	m->m_off   = (char *)&g_conf.m_robotdbMaxCacheMem - g;
 | |
| 	m->m_def   = "128000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "robotdb save cache";
 | |
| 	m->m_cgi   = "rdbsc";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_robotdbSaveCache - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "indexdb max tree mem";
 | |
| 	m->m_desc  = "Indexdb holds the terms extracted from spidered "
 | |
| 		"documents."; 
 | |
| 	m->m_off   = (char *)&g_conf.m_indexdbMaxTreeMem - g;
 | |
| 	m->m_def   = "10000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "indexdb max cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_indexdbMaxCacheMem - g;
 | |
| 	m->m_def   = "5000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "indexdb max page cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_indexdbMaxDiskPageCacheMem - g;
 | |
| 	m->m_def   = "50000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// m->m_title = "linkdb max page cache mem";
 | |
| 	// m->m_desc  = "";
 | |
| 	// m->m_off   = (char *)&g_conf.m_linkdbMaxDiskPageCacheMem - g;
 | |
| 	// m->m_def   = "0";
 | |
| 	// m->m_type  = TYPE_LONG;
 | |
| 	// m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	// m->m_page  = PAGE_NONE;
 | |
| 	// m->m_obj   = OBJ_CONF;
 | |
| 	// m++;
 | |
| 
 | |
| 	/*
 | |
| 	// this is overridden by collection
 | |
| 	m->m_title = "indexdb min files to merge";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_indexdbMinFilesToMerge - g;
 | |
| 	//m->m_def   = "6"; 
 | |
| 	m->m_def   = "-1"; // -1 means to use collection rec
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_save  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "indexdb max index list age";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_indexdbMaxIndexListAge - g;
 | |
| 	m->m_def   = "60"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "indexdb truncation limit";
 | |
| 	//m->m_desc  = "";
 | |
| 	//m->m_off   = (char *)&g_conf.m_indexdbTruncationLimit - g;
 | |
| 	//m->m_def   = "50000000"; 
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "indexdb save cache";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_indexdbSaveCache - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "datedb max tree mem";
 | |
| 	m->m_desc  = "Datedb holds the terms extracted from spidered "
 | |
| 		"documents."; 
 | |
| 	m->m_off   = (char *)&g_conf.m_datedbMaxTreeMem - g;
 | |
| 	m->m_def   = "10000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "datedb max cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_datedbMaxCacheMem - g;
 | |
| 	m->m_def   = "1000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	// this is overridden by collection
 | |
| 	m->m_title = "datedb min files to merge";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_datedbMinFilesToMerge - g;
 | |
| 	//m->m_def   = "8"; 
 | |
| 	m->m_def   = "-1"; // -1 means to use collection rec
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_save  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "datedb max index list age";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_datedbMaxIndexListAge - g;
 | |
| 	m->m_def   = "60"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "datedb save cache";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_datedbSaveCache - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "linkdb max tree mem";
 | |
| 	m->m_desc  = "Linkdb stores linking information";
 | |
| 	m->m_off   = (char *)&g_conf.m_linkdbMaxTreeMem - g;
 | |
| 	m->m_def   = "20000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	// this is overridden by collection
 | |
| 	m->m_title = "linkdb min files to merge";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_linkdbMinFilesToMerge - g;
 | |
|   	m->m_def   = "-1"; // -1 means to use collection rec
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	//m->m_save  = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "quota table max mem";
 | |
| 	m->m_desc  = "For caching and keeping tabs on exact quotas per "
 | |
| 		"domain without having to do a disk seek. If you are using "
 | |
| 		"exact quotas and see a lot of disk seeks on Indexdb, try "
 | |
| 		"increasing this.";
 | |
| 	m->m_off   = (char *)&g_conf.m_quotaTableMaxMem - g;
 | |
| 	m->m_def   = "1000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "statsdb max tree mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_statsdbMaxTreeMem - g;
 | |
| 	m->m_def   = "5000000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "statsdb max cache mem";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_off   = (char *)&g_conf.m_statsdbMaxCacheMem - g;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "statsdb max disk page cache mem";
 | |
| 	// m->m_desc  = "";
 | |
| 	// m->m_off   = (char *)&g_conf.m_statsdbMaxDiskPageCacheMem - g;
 | |
| 	// m->m_def   = "1000000";
 | |
| 	// m->m_type  = TYPE_LONG;
 | |
| 	// m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	// m->m_page  = PAGE_NONE;
 | |
| 	// m->m_obj   = OBJ_CONF;
 | |
| 	// m++;
 | |
| 
 | |
| 	//m->m_title = "statsdb min files to merge";
 | |
| 	//m->m_desc  = "";
 | |
| 	//m->m_off   = (char *)&g_conf.m_statsdbMinFilesToMerge - g;
 | |
| 	//m->m_def   = "5";
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m++;
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "use buckets for in memory recs";
 | |
| 	m->m_desc  = "Use buckets for in memory recs for indexdb, datedb, "
 | |
| 		"and linkdb.";
 | |
| 	m->m_off   = (char *)&g_conf.m_useBuckets - g;
 | |
| 	m->m_def   = "1"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	m->m_title = "http max send buf size";
 | |
| 	m->m_desc  = "Maximum bytes of a doc that can be sent before having "
 | |
| 		"to read more from disk";
 | |
| 	m->m_cgi   = "hmsbs";
 | |
| 	m->m_off   = (char *)&g_conf.m_httpMaxSendBufSize - g;
 | |
| 	m->m_def   = "128000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "search results max cache mem";
 | |
| 	m->m_desc  = "Bytes to use for caching search result pages.";
 | |
| 	m->m_off   = (char *)&g_conf.m_searchResultsMaxCacheMem - g;
 | |
| 	m->m_def   = "100000"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_NOSYNC|PF_NOAPI;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "search results max cache age";
 | |
| 	//m->m_desc  = "Maximum age to cache search results page in seconds.";
 | |
| 	//m->m_off   = (char *)&g_conf.m_searchResultsMaxCacheAge - g;
 | |
| 	//m->m_def   = "86400"; 
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "search results save cache";
 | |
| 	//m->m_desc  = "Should the search results cache be saved to disk?";
 | |
| 	//m->m_off   = (char *)&g_conf.m_searchResultsSaveCache - g;
 | |
| 	//m->m_def   = "0"; 
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "site link info max cache mem";
 | |
| 	//m->m_desc  = "Bytes to use for site link info data.";
 | |
| 	//m->m_off   = (char *)&g_conf.m_siteLinkInfoMaxCacheMem - g;
 | |
| 	//m->m_def   = "100000";
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "site link info max cache age";
 | |
| 	//m->m_desc  = "Maximum age to cache site link info data in seconds.";
 | |
| 	//m->m_off   = (char *)&g_conf.m_siteLinkInfoMaxCacheAge - g;
 | |
| 	//m->m_def   = "3600";
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "site link info save cache";
 | |
| 	//m->m_desc  = "Should the site link info cache be saved to disk?";
 | |
| 	//m->m_off   = (char *)&g_conf.m_siteLinkInfoSaveCache - g;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "site quality max cache mem";
 | |
| 	//m->m_desc  = "Bytes to use for site or root page quality.";
 | |
| 	//m->m_off   = (char *)&g_conf.m_siteQualityMaxCacheMem - g;
 | |
| 	//m->m_def   = "2000000"; // 2MB
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "site quality save cache";
 | |
| 	//m->m_desc  = "Should the site link info cache be saved to disk?";
 | |
| 	//m->m_off   = (char *)&g_conf.m_siteQualitySaveCache - g;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "max incoming links to sample";
 | |
| 	//m->m_desc  = "Max linkers to a doc that are sampled to determine "
 | |
| 	//	"quality and for gathering link text.";
 | |
| 	//m->m_off   = (char *)&g_conf.m_maxIncomingLinksToSample - g;
 | |
| 	//m->m_def   = "100"; 
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "allow async signals";
 | |
| 	//m->m_desc  = "Allow software interrupts?";
 | |
| 	//m->m_off   = (char *)&g_conf.m_allowAsyncSignals - g;
 | |
| 	//m->m_def   = "1"; 
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "qa build mode";
 | |
| 	m->m_desc  = "When on Msg13.cpp saves docs in the qatest123 coll "
 | |
| 		"to qa/ subdir, when off "
 | |
| 		"if downloading a doc for qatest123 coll and not in "
 | |
| 		"qa subdir then it returns a 404.";
 | |
| 	m->m_cgi   = "qabuildmode";
 | |
| 	m->m_off   = (char *)&g_conf.m_qaBuildMode - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_flags = PF_NOAPI | PF_HIDDEN;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "read only mode";
 | |
| 	m->m_desc  = "Read only mode does not allow spidering.";
 | |
| 	m->m_cgi   = "readonlymode";
 | |
| 	m->m_off   = (char *)&g_conf.m_readOnlyMode - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	  Disable this until it works.
 | |
| 	m->m_title = "use merge token";
 | |
| 	m->m_desc  = "Restrict merging to one host per token group? Hosts "
 | |
| 		"that use the same disk and mirror hosts are generally in the "
 | |
| 		"same token group so that only one host in the group can be "
 | |
| 		"doing a merge at a time. This prevents query response time "
 | |
| 		"from suffering too much.";
 | |
| 	m->m_off   = (char *)&g_conf.m_useMergeToken - g;
 | |
| 	m->m_def   = "1"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "do spell checking";
 | |
| 	m->m_desc  = "Spell check using the dictionary. Will be available "
 | |
| 		"again soon.";
 | |
| 	m->m_off   = (char *)&g_conf.m_doSpellChecking - g;
 | |
| 	m->m_cgi   = "dospellchecking";
 | |
| 	m->m_def   = "1"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "do narrow search";
 | |
| 	m->m_desc  = "give narrow search suggestions.";
 | |
| 	m->m_off   = (char *)&g_conf.m_doNarrowSearch - g;
 | |
| 	m->m_cgi   = "donarrowsearch";
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// BASIC SETTINGS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	m->m_title = "spidering enabled";
 | |
| 	m->m_desc  = "Pause and resumes spidering for this collection.";
 | |
| 	m->m_cgi   = "bcse";
 | |
| 	m->m_off   = (char *)&cr.m_spideringEnabled - x;
 | |
| 	m->m_page  = PAGE_BASIC_SETTINGS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_DUP|PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "site list";
 | |
| 	m->m_xml   = "siteList";
 | |
| 	m->m_desc  = "List of sites to spider, one per line. "
 | |
| 		"See <a href=#examples>example site list</a> below. "
 | |
| 		"<br>"
 | |
| 		"<br>"
 | |
| 		"Example #1: <b>mysite.com myothersite.com</b>"
 | |
| 		"<br>"
 | |
| 		"<i>This will spider just those two sites.</i>"
 | |
| 		"<br>"
 | |
| 		"<br>"
 | |
| 		"Example #2: <b>seed:dmoz.org</b>"
 | |
| 		"<br>"
 | |
| 		"<i>This will spider the whole web starting with the website "
 | |
| 		"dmoz.org</i>"
 | |
| 		"<br><br>"
 | |
| 		"Gigablast uses the "
 | |
| 		"<a href=/admin/filters#insitelist>insitelist</a> "
 | |
| 		"directive on "
 | |
| 		"the <a href=/admin/filters>url filters</a> "
 | |
| 		"page to make sure that the spider only indexes urls "
 | |
| 		"that match the site patterns you specify here, other than "
 | |
| 		"urls you add individually via the add urls or inject url "
 | |
| 		"tools. "
 | |
| 		"Limit list to 300MB. If you have a lot of INDIVIDUAL urls "
 | |
| 		"to add then consider using the <a href=/admin/addurl>add "
 | |
| 		"urls</a> interface.";
 | |
| 	m->m_cgi   = "sitelist";
 | |
| 	m->m_off   = (char *)&cr.m_siteListBuf - x;
 | |
| 	m->m_page  = PAGE_BASIC_SETTINGS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_func  = CommandUpdateSiteList;
 | |
| 	m->m_def   = "";
 | |
| 	// rebuild urlfilters now will nuke doledb and call updateSiteList()
 | |
| 	m->m_flags = PF_TEXTAREA | PF_DUP | PF_REBUILDURLFILTERS;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "spider sites";
 | |
| 	m->m_desc  = "Attempt to spider and index urls in the "
 | |
| 		"\"site patterns\" above. Saves you from having to add "
 | |
| 		"the same list of sites on the <a href=/admin/addurl>"
 | |
| 		"add url</a> page.";
 | |
| 	m->m_cgi   = "spiderToo";
 | |
| 	m->m_off   = (char *)&cr.m_spiderToo - x;
 | |
| 	m->m_page  = PAGE_BASIC_SETTINGS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_NOSAVE | PF_DUP;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	// the new upload post submit button
 | |
| 	m->m_title = "upload site list";
 | |
| 	m->m_desc  = "Upload your file of site patterns. Completely replaces "
 | |
| 		"the site list in the text box above.";
 | |
| 	m->m_cgi   = "uploadsitelist";
 | |
| 	m->m_page  = PAGE_BASIC_SETTINGS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_off   = 0;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_type  = TYPE_FILEUPLOADBUTTON;
 | |
| 	m->m_flags = PF_NOSAVE | PF_DUP;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "restart collection";
 | |
| 	m->m_desc  = "Remove all documents from the collection and re-add "
 | |
| 		"seed urls from site list.";
 | |
| 	// If you do this accidentally there "
 | |
| 	//"is a <a href=/faq.html#recover>recovery procedure</a> to "
 | |
| 	//	"get back the trashed data.";
 | |
| 	m->m_cgi   = "restart";
 | |
| 	m->m_page  = PAGE_BASIC_SETTINGS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func2 = CommandRestartColl;
 | |
| 	m++;
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// SITE LIST
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "spider sites";
 | |
| 	m->m_desc  = "Attempt to spider and index urls in the "
 | |
| 		"\"site patterns\" above. Saves you from having to add "
 | |
| 		"the same list of sites on the <a href=/admin/addurl>"
 | |
| 		"add url</a> page.";
 | |
| 	m->m_cgi   = "spiderToo";
 | |
| 	m->m_off   = (char *)&cr.m_spiderToo - x;
 | |
| 	m->m_page  = PAGE_SITES;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_NOSAVE ;
 | |
| 	m++;
 | |
| 	*/
 | |
| 	
 | |
| 	
 | |
| 	///////////////////////////////////////////
 | |
| 	// SYNC CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 	/*
 | |
| 
 | |
| 	m->m_title = "sync enabled";
 | |
| 	m->m_desc  = "Turn data synchronization on or off. When a host comes "
 | |
| 		"up he will perform an incremental synchronization with a "
 | |
| 		"twin if he detects that he was unable to save his data "
 | |
| 		"when he last exited.";
 | |
| 	m->m_cgi   = "sye";
 | |
| 	m->m_off   = (char *)&g_conf.m_syncEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SYNC;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dry run";
 | |
| 	m->m_desc  = "Should Gigablast just run through and log the changes "
 | |
| 		"it would make without actually making them?";
 | |
| 	m->m_cgi   = "sdr";
 | |
| 	m->m_off   = (char *)&g_conf.m_syncDryRun - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "sync indexdb";
 | |
| 	m->m_desc  = "Turn data synchronization on or off for indexdb. "
 | |
| 		"Indexdb holds the index information.";
 | |
| 	m->m_cgi   = "si";
 | |
| 	m->m_off   = (char *)&g_conf.m_syncIndexdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "sync logging";
 | |
| 	m->m_desc  = "Log fixes?";
 | |
| 	m->m_cgi   = "slf";
 | |
| 	m->m_off   = (char *)&g_conf.m_syncLogging - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "union titledb and spiderdb";
 | |
| 	m->m_desc  = "If a host being sync'd has a title record (cached web "
 | |
| 		"page) that the "
 | |
| 		"remote host does not, normally, it would be deleted. "
 | |
| 		"But if this is true then it is kept. "
 | |
| 		"Useful for reducing title rec not found errors.";
 | |
| 	m->m_cgi   = "sdu";
 | |
| 	m->m_off   = (char *)&g_conf.m_syncDoUnion - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "force out of sync";
 | |
| 	m->m_desc  = "Forces this host to be out of sync.";
 | |
| 	m->m_cgi   = "foos";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandForceOutOfSync;
 | |
| 	m->m_cast  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "bytes per second";
 | |
| 	m->m_desc  = "How many bytes to read per second for syncing. "
 | |
| 		  "Decrease to reduce impact of syncing on query "
 | |
| 		"response time.";
 | |
| 	m->m_cgi   = "sbps";
 | |
| 	m->m_off   = (char *)&g_conf.m_syncBytesPerSecond - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10000000";
 | |
| 	m->m_units = "bytes";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/////////////////////
 | |
| 	//
 | |
| 	// DIFFBOT CRAWLBOT PARMS
 | |
| 	//
 | |
| 	//////////////////////
 | |
| 
 | |
| 	///////////
 | |
| 	//
 | |
| 	// DO NOT INSERT parms above here, unless you set
 | |
| 	// m_obj = OBJ_COLL !!! otherwise it thinks it belongs to
 | |
| 	// OBJ_CONF as used in the above parms.
 | |
| 	//
 | |
| 	///////////
 | |
| 
 | |
| 	m->m_cgi   = "dbtoken";
 | |
| 	m->m_xml   = "diffbotToken";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotToken - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "createdtime";
 | |
| 	m->m_xml   = "collectionCreatedTime";
 | |
| 	m->m_desc  = "Time when this collection was created, or time of "
 | |
| 		"the last reset or restart.";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotCrawlStartTime - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_NOAPI;//PF_DIFFBOT; no i want to saveToXml
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "spiderendtime";
 | |
| 	m->m_xml   = "crawlEndTime";
 | |
| 	m->m_desc  = "If spider is done, when did it finish.";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotCrawlEndTime - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_NOAPI;//PF_DIFFBOT; no i want to saveToXml
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "dbcrawlname";
 | |
| 	m->m_xml   = "diffbotCrawlName";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotCrawlName - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "notifyEmail";
 | |
| 	m->m_title = "notify email";
 | |
| 	m->m_xml   = "notifyEmail";
 | |
| 	m->m_off   = (char *)&cr.m_notifyEmail - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "notifyWebhook";
 | |
| 	m->m_xml   = "notifyWebhook";
 | |
| 	m->m_title = "notify webhook";
 | |
| 	m->m_off   = (char *)&cr.m_notifyUrl - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	// collective respider frequency (for pagecrawlbot.cpp)
 | |
| 	m->m_title = "collective respider frequency (days)";
 | |
| 	m->m_cgi   = "repeat";
 | |
| 	m->m_xml   = "collectiveRespiderFrequency";
 | |
| 	m->m_off   = (char *)&cr.m_collectiveRespiderFrequency - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.0"; // 0.0
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_units = "days";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collective crawl delay (seconds)";
 | |
| 	m->m_cgi   = "crawlDelay";
 | |
| 	m->m_xml   = "collectiveCrawlDelay";
 | |
| 	m->m_off   = (char *)&cr.m_collectiveCrawlDelay - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = ".250"; // 250 ms
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m->m_units = "seconds";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "urlCrawlPattern";
 | |
| 	m->m_xml   = "diffbotUrlCrawlPattern";
 | |
| 	m->m_title = "url crawl pattern";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotUrlCrawlPattern - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "urlProcessPattern";
 | |
| 	m->m_xml   = "diffbotUrlProcessPattern";
 | |
| 	m->m_title = "url process pattern";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotUrlProcessPattern - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "pageProcessPattern";
 | |
| 	m->m_xml   = "diffbotPageProcessPattern";
 | |
| 	m->m_title = "page process pattern";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotPageProcessPattern - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "urlCrawlRegEx";
 | |
| 	m->m_xml   = "diffbotUrlCrawlRegEx";
 | |
| 	m->m_title = "url crawl regex";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotUrlCrawlRegEx - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "urlProcessRegEx";
 | |
| 	m->m_xml   = "diffbotUrlProcessRegEx";
 | |
| 	m->m_title = "url process regex";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotUrlProcessRegEx - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
|   
 | |
| 	m->m_cgi   = "maxHops";
 | |
| 	m->m_xml   = "diffbotHopcount";
 | |
| 	m->m_title = "diffbot max hopcount";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotMaxHops - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "onlyProcessIfNew";
 | |
| 	m->m_xml   = "diffbotOnlyProcessIfNew";
 | |
| 	m->m_title = "onlyProcessIfNew";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotOnlyProcessIfNewUrl - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "seeds";
 | |
| 	m->m_xml   = "diffbotSeeds";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotSeeds - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_xml   = "isCustomCrawl";
 | |
| 	m->m_off   = (char *)&cr.m_isCustomCrawl - x;
 | |
| 	m->m_type  = TYPE_CHAR;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_cgi   = "isCustomCrawl";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "maxToCrawl";
 | |
| 	m->m_title = "max to crawl";
 | |
| 	m->m_xml   = "maxToCrawl";
 | |
| 	m->m_off   = (char *)&cr.m_maxToCrawl - x;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "100000";
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "maxToProcess";
 | |
| 	m->m_title = "max to process";
 | |
| 	m->m_xml   = "maxToProcess";
 | |
| 	m->m_off   = (char *)&cr.m_maxToProcess - x;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "maxRounds";
 | |
| 	m->m_title = "max crawl rounds";
 | |
| 	m->m_xml   = "maxCrawlRounds";
 | |
| 	m->m_off   = (char *)&cr.m_maxCrawlRounds - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_flags = PF_DIFFBOT;
 | |
| 	m++;
 | |
| 
 | |
| 	/////////////////////
 | |
| 	//
 | |
| 	// new cmd parms
 | |
| 	//
 | |
| 	/////////////////////
 | |
| 
 | |
| 
 | |
| 	m->m_title = "insert parm row";
 | |
| 	m->m_desc  = "insert a row into a parm";
 | |
| 	m->m_cgi   = "insert";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func  = CommandInsertUrlFiltersRow;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "remove parm row";
 | |
| 	m->m_desc  = "remove a row from a parm";
 | |
| 	m->m_cgi   = "remove";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func  = CommandRemoveUrlFiltersRow;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delete collection";
 | |
| 	m->m_desc  = "delete a collection";
 | |
| 	m->m_cgi   = "delete";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func2 = CommandDeleteColl;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delete collection 2";
 | |
| 	m->m_desc  = "delete the specified collection";
 | |
| 	m->m_cgi   = "delColl";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func2 = CommandDeleteColl2;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delete collection";
 | |
| 	m->m_desc  = "Delete the specified collection. You can specify "
 | |
| 		"multiple &delcoll= parms in a single request to delete "
 | |
| 		"multiple collections at once.";
 | |
| 	// lowercase as opposed to camelcase above
 | |
| 	m->m_cgi   = "delcoll";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_DELCOLL;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func2 = CommandDeleteColl2;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m++;
 | |
| 
 | |
| 	// arg is the collection # to clone from
 | |
| 	m->m_title = "clone collection";
 | |
| 	m->m_desc  = "Clone collection settings FROM this collection.";
 | |
| 	m->m_cgi   = "clonecoll";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_CLONECOLL;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func  = CommandCloneColl;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "add collection";
 | |
| 	m->m_desc  = "add a new collection";
 | |
| 	// camelcase support
 | |
| 	m->m_cgi   = "addColl";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func  = CommandAddColl0;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "add collection";
 | |
| 	m->m_desc  = "Add a new collection with this name. No spaces "
 | |
| 		"allowed or strange characters allowed. Max of 64 characters.";
 | |
| 	// lower case support
 | |
| 	m->m_cgi   = "addcoll";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_ADDCOLL;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func  = CommandAddColl0;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m++;
 | |
| 
 | |
| 	//
 | |
| 	// CLOUD SEARCH ENGINE SUPPORT
 | |
| 	//
 | |
| 	// used to prevent a guest ip adding more than one coll
 | |
| 	m->m_title = "user ip";
 | |
| 	m->m_desc  = "IP of user adding collection.";
 | |
| 	m->m_cgi   = "userip";
 | |
| 	m->m_xml   = "userIp";
 | |
| 	m->m_off   = (char *)&cr.m_userIp - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 16;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN;// | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_ADDCOLL;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "add custom crawl";
 | |
| 	m->m_desc  = "add custom crawl";
 | |
| 	m->m_cgi   = "addCrawl";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func  = CommandAddColl1;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "add bulk job";
 | |
| 	m->m_desc  = "add bulk job";
 | |
| 	m->m_cgi   = "addBulk";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func  = CommandAddColl2;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "in sync";
 | |
| 	m->m_desc  = "signify in sync with host 0";
 | |
| 	m->m_cgi   = "insync";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func  = CommandInSync;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// SEARCH CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 	//m->m_title = "allow RAID style list intersection";
 | |
| 	//m->m_desc  = "Allow using RAID style lookup for intersecting term "
 | |
| 	//	     "lists and getting docIds for queries.";
 | |
| 	//m->m_cgi   = "uraid";
 | |
| 	//m->m_off   = (char *)&cr.m_allowRaidLookup - x;
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "allow RAIDed term list read";
 | |
| 	//m->m_desc  = "Allow splitting up the term list read for large lists "
 | |
| 	//	     "amongst twins.";
 | |
| 	//m->m_cgi   = "ulraid";
 | |
| 	//m->m_off   = (char *)&cr.m_allowRaidListRead - x;
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m->m_group = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "max RAID mercenaries";
 | |
| 	//m->m_desc  = "Max number of mercenaries to use in RAID lookup and "
 | |
| 	//	     "intersection.";
 | |
| 	//m->m_cgi   = "raidm";
 | |
| 	//m->m_off   = (char *)&cr.m_maxRaidMercenaries - x;
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m->m_def   = "2";
 | |
| 	//m->m_group = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "min term list size to RAID";
 | |
| 	//m->m_desc  = "Term list size to begin doing term list RAID";
 | |
| 	//m->m_cgi   = "raidsz";
 | |
| 	//m->m_off   = (char *)&cr.m_minRaidListSize - x;
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m->m_def   = "1000000";
 | |
| 	//m->m_group = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "restrict indexdb for queries";
 | |
| 	m->m_desc  = "If this is true Gigablast will only search the root "
 | |
| 		"index file for docIds. Saves on disk seeks, "
 | |
| 		"but may use older versions of indexed web pages.";
 | |
| 	m->m_cgi   = "riq";
 | |
| 	m->m_off   = (char *)&cr.m_restrictIndexdbForQuery - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "0";
 | |
| 	//m->m_sparm = 1;
 | |
| 	//m->m_scgi  = "ri";
 | |
| 	//m->m_soff  = (char *)&si.m_restrictIndexdbForQuery - y;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "restrict indexdb for xml feed";
 | |
| 	m->m_desc  = "Like above, but specifically for XML feeds.";
 | |
| 	m->m_cgi   = "rix";
 | |
| 	m->m_off   = (char *)&cr.m_restrictIndexdbForXML - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "restrict indexdb for queries in xml feed";
 | |
| 	//m->m_desc  = "Same as above, but just for the XML feed.";
 | |
| 	//m->m_cgi   = "riqx";
 | |
| 	//m->m_off   = (char *)&cr.m_restrictIndexdbForQueryRaw - x;
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m->m_def   = "1";
 | |
| 	//m->m_group = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "read from cache by default";
 | |
| 	m->m_desc  = "Should we read search results from the cache? Set "
 | |
| 		"to false to fix dmoz bug.";
 | |
| 	m->m_cgi   = "rcd";
 | |
| 	m->m_off   = (char *)&cr.m_rcache - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "fast results";
 | |
| 	m->m_desc  = "Use &fast=1 to obtain search results from the much "
 | |
| 		"faster Gigablast index, although the results are not "
 | |
| 		"searched as thoroughly.";
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_off   = (char *)&si.m_query - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "fast";
 | |
| 	//m->m_size  = MAX_QUERY_LEN;
 | |
| 	m->m_flags = PF_COOKIE | PF_WIDGET_PARM | PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "query";
 | |
| 	m->m_desc  = "The query to perform. See <a href=/help.html>help</a>. "
 | |
| 		"See the <a href=#qops>query operators</a> below for "
 | |
| 		"more info.";
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_off   = (char *)&si.m_query - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_cgi   = "q";
 | |
| 	//m->m_size  = MAX_QUERY_LEN;
 | |
| 	m->m_flags = PF_REQUIRED | PF_COOKIE | PF_WIDGET_PARM | PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "query2";
 | |
| 	// m->m_desc  = "The query on which to score inlinkers.";
 | |
| 	// m->m_obj   = OBJ_SI;
 | |
| 	// m->m_page  = PAGE_NONE;
 | |
| 	// m->m_off   = (char *)&si.m_query2 - y;
 | |
| 	// m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	// m->m_cgi   = "qq";
 | |
| 	// m->m_size  = MAX_QUERY_LEN;
 | |
| 	// m->m_sprpg = 0; // do not store query, needs to be last so related 
 | |
|         // m->m_sprpp = 0; // topics can append to it
 | |
| 	// m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	// m++;
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Search this collection. Use multiple collection names "
 | |
| 		"separated by a whitespace to search multiple collections at "
 | |
| 		"once.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m->m_off   = (char *)&si.m_coll - y;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of results per query";
 | |
| 	m->m_desc  = "The number of results returned per page.";
 | |
| 	// make it 25 not 50 since we only have like 26 balloons
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_off   = (char *)&si.m_docsWanted - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_cgi   = "n";
 | |
| 	m->m_flags = PF_WIDGET_PARM | PF_API;
 | |
| 	m->m_smin  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "first result num";
 | |
| 	m->m_desc  = "Start displaying at search result #X. Starts at 0.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_off   = (char *)&si.m_firstResultNum - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_cgi   = "s";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_flags = PF_REDBOX;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "show errors";
 | |
| 	m->m_desc  = "Show errors from generating search result summaries "
 | |
| 		"rather than just hide the docid. Useful for debugging.";
 | |
| 	m->m_cgi   = "showerrors";
 | |
| 	m->m_off   = (char *)&si.m_showErrors - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "site cluster";
 | |
| 	m->m_desc  = "Should search results be site clustered? This "
 | |
| 		"limits each site to appearing at most twice in the "
 | |
| 		"search results. Sites are subdomains for the most part, "
 | |
| 		"like abc.xyz.com.";
 | |
| 	m->m_cgi   = "sc";
 | |
| 	m->m_off   = (char *)&si.m_doSiteClustering - y;
 | |
| 	m->m_defOff= (char *)&cr.m_siteClusterByDefault - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "hide all clustered results";
 | |
| 	m->m_desc  = "Only display at most one result per site.";
 | |
| 	m->m_cgi   = "hacr";
 | |
| 	m->m_off   = (char *)&si.m_hideAllClustered - y;
 | |
| 	m->m_defOff= (char *)&cr.m_hideAllClustered - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "dedup results";
 | |
| 	m->m_desc  = "Should duplicate search results be removed? This is "
 | |
| 		"based on a content hash of the entire document. "
 | |
| 		"So documents must be exactly the same for the most part.";
 | |
| 	m->m_cgi   = "dr"; // dedupResultsByDefault";
 | |
| 	m->m_off   = (char *)&si.m_doDupContentRemoval - y;
 | |
| 	m->m_defOff= (char *)&cr.m_dedupResultsByDefault - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 1;
 | |
| 	m->m_cgi   = "dr";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "percent similar dedup summary";
 | |
| 	m->m_desc  = "If document summary (and title) are "
 | |
| 		"this percent similar "
 | |
| 		"to a document summary above it, then remove it from the "
 | |
| 		"search results. 100 means only to remove if exactly the "
 | |
| 		"same. 0 means no summary deduping. You must also supply "
 | |
| 		"dr=1 for this to work.";
 | |
| 	m->m_cgi   = "pss";
 | |
| 	m->m_off   = (char *)&si.m_percentSimilarSummary - y;
 | |
| 	m->m_defOff= (char *)&cr.m_percentSimilarSummary - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 100;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;       
 | |
| 
 | |
| 
 | |
| 	m->m_title = "dedup URLs";
 | |
| 	m->m_desc  = "Should we dedup URLs with case insensitivity? This is "
 | |
|                      "mainly to correct duplicate wiki pages.";
 | |
| 	m->m_cgi   = "ddu";
 | |
| 	m->m_off   = (char *)&si.m_dedupURL - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "do spell checking";
 | |
| 	m->m_desc  = "If enabled while using the XML feed, "
 | |
| 		"when Gigablast finds a spelling recommendation it will be "
 | |
| 		"included in the XML <spell> tag. Default is 0 if using an "
 | |
| 		"XML feed, 1 otherwise. Will be available again soon.";
 | |
| 	m->m_cgi   = "spell";
 | |
| 	m->m_off   = (char *)&si.m_spellCheck - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "stream search results";
 | |
| 	m->m_desc  = "Stream search results back on socket as they arrive. "
 | |
| 		"Useful when thousands/millions of search results are "
 | |
| 		"requested. Required when doing such things otherwise "
 | |
| 		"Gigablast could run out of memory. Only supported for "
 | |
| 		"JSON and XML formats, not HTML.";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_off   = (char *)&si.m_streamResults - y;
 | |
| 	m->m_type  = TYPE_CHAR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "stream";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_sprpg = 0; // propagate to next 10
 | |
| 	m->m_sprpp = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "seconds back";
 | |
| 	m->m_desc  = "Limit results to pages spidered this many seconds ago. "
 | |
| 		"Use 0 to disable.";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_off   = (char *)&si.m_secsBack - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "secsback";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "sort by";
 | |
| 	m->m_desc  = "Use 0 to sort results by relevance, 1 to sort by "
 | |
| 		"most recent spider date down, and 2 to sort by oldest "
 | |
| 		"spidered results first.";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_off   = (char *)&si.m_sortBy - y;
 | |
| 	m->m_type  = TYPE_CHAR;
 | |
| 	m->m_def   = "0"; // this means relevance
 | |
| 	m->m_cgi   = "sortby";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "filetype";
 | |
| 	m->m_desc  = "Restrict results to this filetype. Supported "
 | |
| 		"filetypes are pdf, doc, html xml, json, xls.";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_off   = (char *)&si.m_filetype - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_cgi   = "filetype";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "get scoring info";
 | |
| 	m->m_desc  = "Get scoring information for each result so you "
 | |
| 		"can see how each result is scored. You must explicitly "
 | |
| 		"request this using &scores=1 for the XML feed because it "
 | |
| 		"is not included by default.";
 | |
| 	m->m_cgi   = "scores"; // dedupResultsByDefault";
 | |
| 	m->m_off   = (char *)&si.m_getDocIdScoringInfo - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API;
 | |
| 	// get default from collectionrec item
 | |
| 	m->m_defOff= (char *)&cr.m_getDocIdScoringInfo - x;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "do query expansion";
 | |
| 	m->m_desc  = "If enabled, query expansion will expand your query "
 | |
| 		"to include the various forms and "
 | |
| 		"synonyms of the query terms.";
 | |
| 	m->m_off   = (char *)&si.m_queryExpansion - y;
 | |
| 	m->m_defOff= (char *)&cr.m_queryExpansion - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi  = "qe";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	// more general parameters
 | |
| 	m->m_title = "max search results";
 | |
| 	m->m_desc  = "What is the maximum total number "
 | |
| 		"of returned search results.";
 | |
| 	m->m_cgi   = "msr";
 | |
| 	m->m_off   = (char *)&cr.m_maxSearchResults - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1000";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max search results per query";
 | |
| 	m->m_desc  = "What is the limit to the total number "
 | |
| 		"of returned search results per query?";
 | |
| 	m->m_cgi   = "msrpq";
 | |
| 	m->m_off   = (char *)&cr.m_maxSearchResultsPerQuery - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_flags = 0;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max search results for paying clients";
 | |
| 	m->m_desc  = "What is the limit to the total number "
 | |
| 		"of returned search results for clients.";
 | |
| 	m->m_cgi   = "msrfpc";
 | |
| 	m->m_off   = (char *)&cr.m_maxSearchResultsForClients - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1000";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max search results per query for paying clients";
 | |
| 	m->m_desc  = "What is the limit to the total number "
 | |
| 		"of returned search results per query for paying clients? "
 | |
| 		"Auto ban must be enabled for this to work.";
 | |
| 	m->m_cgi   = "msrpqfc";
 | |
| 	m->m_off   = (char *)&cr.m_maxSearchResultsPerQueryForClients - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1000";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "user ip";
 | |
| 	m->m_desc  = "The ip address of the searcher. We can pass back "
 | |
| 		"for use in the autoban technology which bans abusive IPs.";
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_off   = (char *)&si.m_userIpStr - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_cgi   = "uip";
 | |
| 	m->m_flags = PF_COOKIE | PF_WIDGET_PARM | PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "use min ranking algo";
 | |
| 	m->m_desc  = "Should search results be ranked using this algo?";
 | |
| 	//m->m_cgi   = "uma";
 | |
| 	//m->m_off   = (char *)&cr.m_siteClusterByDefault - x;
 | |
| 	m->m_off   = (char *)&si.m_useMinAlgo - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	// seems, good, default it on
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cgi  = "uma";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	// limit to this # of the top term pairs from inlink text whose
 | |
| 	// score is accumulated
 | |
| 	m->m_title = "real max top";
 | |
| 	m->m_desc  = "Only score up to this many inlink text term pairs";
 | |
| 	m->m_off   = (char *)&si.m_realMaxTop - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_cgi   = "rmt";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use new ranking algo";
 | |
| 	m->m_desc  = "Should search results be ranked using this new algo?";
 | |
| 	m->m_off   = (char *)&si.m_useNewAlgo - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	// seems, good, default it on
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cgi   = "una";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do max score algo";
 | |
| 	m->m_desc  = "Quickly eliminated docids using max score algo";
 | |
| 	m->m_off   = (char *)&si.m_doMaxScoreAlgo - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cgi   = "dmsa";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "use fast intersection algo";
 | |
| 	m->m_desc  = "Should we try to speed up search results generation?";
 | |
| 	m->m_off   = (char *)&si.m_fastIntersection - y;
 | |
| 	m->m_type  = TYPE_CHAR;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	// turn off until we debug
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_cgi   = "fi";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max number of facets to return";
 | |
| 	m->m_desc  = "Max number of facets to return";
 | |
| 	m->m_off   = (char *)&si.m_maxFacets - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "50";
 | |
| 	m->m_group = 1;
 | |
| 	m->m_cgi   = "nf";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "special query";
 | |
| 	// m->m_desc  = "List of docids to restrain results to.";
 | |
| 	// m->m_cgi   = "sq";
 | |
| 	// m->m_off   = (char *)&si.m_sq - y;
 | |
| 	// m->m_type  = TYPE_CHARPTR;
 | |
| 	// m->m_def   = NULL;
 | |
| 	// m->m_group = 0;
 | |
| 	// m++;
 | |
| 
 | |
| 	// m->m_title = "negative docids";
 | |
| 	// m->m_desc  = "List of docids to ignore.";
 | |
| 	// m->m_cgi   = "nodocids";
 | |
| 	// m->m_off   = (char *)&si.m_noDocIds - y;
 | |
| 	// m->m_type  = TYPE_CHARPTR;
 | |
| 	// m->m_def   = NULL;
 | |
| 	// m->m_group = 0;
 | |
| 	// m++;
 | |
| 
 | |
| 	// m->m_title = "negative siteids";
 | |
| 	// m->m_desc  = "Whitespace-separated list of 32-bit sitehashes "
 | |
| 	//"to ignore.";
 | |
| 	// m->m_cgi   = "nositeids";
 | |
| 	// m->m_off   = (char *)&si.m_noSiteIds - y;
 | |
| 	// m->m_type  = TYPE_CHARPTR;
 | |
| 	// m->m_def   = NULL;
 | |
| 	// m->m_group = 0;
 | |
| 	// m++;
 | |
| 
 | |
| 	m->m_title = "language weight";
 | |
| 	m->m_desc  = "Default language weight if document matches quer "
 | |
| 		"language. Use this to give results that match the specified "
 | |
| 		"the specified &qlang higher ranking, or docs whose language "
 | |
| 		"is unnknown. Can be override with "
 | |
| 		"&langw in the query url.";
 | |
| 	m->m_cgi   = "langweight";
 | |
| 	m->m_off   = (char *)&cr.m_sameLangWeight - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "20.000000";
 | |
| 	m->m_group = 1;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "use language weights";
 | |
| 	m->m_desc  = "Use Language weights to sort query results. "
 | |
| 		"This will give results that match the specified &qlang "
 | |
| 		"higher ranking.";
 | |
| 	m->m_cgi   = "lsort";
 | |
| 	m->m_off   = (char *)&cr.m_enableLanguageSorting - x;
 | |
| 	//m->m_soff  = (char *)&si.m_enableLanguageSorting - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 1;
 | |
| 	//m->m_scgi  = "lsort";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "sort language preference";
 | |
| 	m->m_desc  = "Default language to use for ranking results. "
 | |
| 		//"This should only be used on limited collections. "
 | |
| 		"Value should be any language abbreviation, for example "
 | |
| 		"\"en\" for English. Use <i>xx</i> to give ranking "
 | |
| 		"boosts to no language in particular. See the language "
 | |
| 		"abbreviations at the bottom of the "
 | |
| 		"<a href=/admin/filters>url filters</a> page.";
 | |
| 	m->m_cgi   = "qlang";
 | |
| 	m->m_off   = (char *)&si.m_defaultSortLang - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	//m->m_size  = 6; // up to 5 chars + NULL, e.g. "en_US"
 | |
| 	m->m_def   = "";//"xx";//_US";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "language weight";
 | |
| 	m->m_desc  = "Use this to override the default language weight "
 | |
| 		"for this collection. The default language weight can be "
 | |
| 		"set in the search controls and is usually something like "
 | |
| 		"20.0. Which means that we multiply a result's score by 20 "
 | |
| 		"if from the same language as the query or the language is "
 | |
| 		"unknown.";
 | |
| 	m->m_off   = (char *)&si.m_sameLangWeight - y;
 | |
| 	m->m_defOff= (char *)&cr.m_sameLangWeight - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_cgi  = "langw";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "sort country preference";
 | |
| 	m->m_desc  = "Default country to use for ranking results. "
 | |
| 		//"This should only be used on limited collections. "
 | |
| 		"Value should be any country code abbreviation, for example "
 | |
| 		"\"us\" for United States. This is currently not working.";
 | |
| 	m->m_cgi   = "qcountry";
 | |
| 	m->m_off   = (char *)&si.m_defaultSortCountry - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_size  = 2+1;
 | |
| 	m->m_def   = "us";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "language method weights";
 | |
| 	m->m_desc  = "Language method weights for spider language "
 | |
| 		"detection. A string of ascii numerals that "
 | |
| 		"should default to 895768712";
 | |
| 	m->m_cgi   = "lmweights";
 | |
| 	m->m_off   = (char *)&cr.m_languageMethodWeights - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 10; // up to 9 chars + NULL
 | |
| 	m->m_def   = "894767812";
 | |
| 	m->m_group = 0;
 | |
| 	// m->m_sparm = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "language detection sensitivity";
 | |
| 	m->m_desc  = "Language detection sensitivity. Higher"
 | |
| 		" values mean higher hitrate, but lower accuracy."
 | |
| 		" Suggested values are from 2 to 20";
 | |
| 	m->m_cgi   = "lmbailout";
 | |
| 	m->m_off   = (char *)&cr.m_languageBailout - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5";
 | |
| 	m->m_group = 0;
 | |
| 	// m->m_sparm = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "language detection threshold";
 | |
| 	m->m_desc  = "Language detection threshold sensitivity."
 | |
| 		" Higher values mean better accuracy, but lower hitrate."
 | |
| 		" Suggested values are from 2 to 20";
 | |
| 	m->m_cgi   = "lmthreshold";
 | |
| 	m->m_off   = (char *)&cr.m_languageThreshold - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "3";
 | |
| 	m->m_group = 0;
 | |
| 	// m->m_sparm = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "language detection samplesize";
 | |
| 	m->m_desc  = "Language detection size. Higher values"
 | |
| 		" mean more accuracy, but longer processing time."
 | |
| 		" Suggested values are 300-1000";
 | |
| 	m->m_cgi   = "lmsamples";
 | |
| 	m->m_off   = (char *)&cr.m_languageSamples - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "600";
 | |
| 	m->m_group = 0;
 | |
| 	// m->m_sparm = 1;
 | |
| 	m++;
 | |
| 
 | |
|  	m->m_title = "language detection spider samplesize";
 | |
|  	m->m_desc  = "Language detection page sample size. "
 | |
|  		"Higher values mean more accuracy, but longer "
 | |
|  		"spider time."
 | |
|  		" Suggested values are 3000-10000";
 | |
|  	m->m_cgi   = "lpsamples";
 | |
|  	m->m_off   = (char *)&cr.m_langPageLimit - x;
 | |
|  	m->m_type  = TYPE_LONG;
 | |
|  	m->m_def   = "6000";
 | |
|  	m->m_group = 0;
 | |
|  	// m->m_sparm = 1;
 | |
|  	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "docs to check for post query";
 | |
| 	m->m_desc  = "How many search results should we "
 | |
| 		"scan for post query demotion? "
 | |
| 		"0 disables all post query reranking. ";
 | |
| 	m->m_cgi   = "pqrds";
 | |
| 	m->m_off   = (char *)&si.m_docsToScanForReranking - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 1;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for foreign languages";
 | |
| 	m->m_desc  = "Demotion factor of non-relevant languages.  Score "
 | |
| 		"will be penalized by this factor as a percent if "
 | |
| 		"it's language is foreign. "
 | |
| 		"A safe value is probably anywhere from 0.5 to 1. ";
 | |
| 	m->m_cgi   = "pqrlang";
 | |
| 	m->m_off   = (char *)&cr.m_languageWeightFactor - x;
 | |
| 	//m->m_soff  = (char *)&si.m_languageWeightFactor - y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.999";
 | |
| 	m->m_group = 0;
 | |
| 	//m->m_scgi  = "pqrlang";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for unknown languages";
 | |
| 	m->m_desc  = "Demotion factor for unknown languages. "
 | |
| 		"Page's score will be penalized by this factor as a percent "
 | |
| 		"if it's language is not known. "
 | |
| 		"A safe value is 0, as these pages will be reranked by "
 | |
| 		"country (see below). "
 | |
| 		"0 means no demotion.";
 | |
| 	m->m_cgi   = "pqrlangunk";
 | |
| 	m->m_off   = (char *)&cr.m_languageUnknownWeight- x;
 | |
| 	//m->m_soff  = (char *)&si.m_languageUnknownWeight- y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.0";
 | |
| 	m->m_group = 0;
 | |
| 	//m->m_scgi  = "pqrlangunk";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages where the country of the page writes "
 | |
| 		"in the same language as the country of the query";
 | |
| 	m->m_desc  = "Demotion for pages where the country of the page writes "
 | |
| 		"in the same language as the country of the query. "
 | |
| 		"If query language is the same as the language of the page, "
 | |
| 		"then if a language written in the country of the page matches "
 | |
| 		"a language written by the country of the query, then page's "
 | |
| 		"score will be demoted by this factor as a percent. "
 | |
| 		"A safe range is between 0.5 and 1. ";
 | |
| 	m->m_cgi   = "pqrcntry";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactCountry - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.98";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for query terms or gigabits in url";
 | |
| 	m->m_desc  = "Demotion factor for query terms or gigabits "
 | |
| 		"in a result's url. "
 | |
| 		"Score will be penalized by this factor times the number "
 | |
| 		"of query terms or gigabits in the url divided by "
 | |
| 		"the max value below such that fewer "
 | |
| 		"query terms or gigabits in the url causes the result "
 | |
| 		"to be demoted more heavily, depending on the factor. "
 | |
| 		"Higher factors demote more per query term or gigabit "
 | |
| 		"in the page's url. "
 | |
| 		"Generally, a page may not be demoted more than this "
 | |
| 		"factor as a percent. Also, how it is demoted is "
 | |
| 		"dependent on the max value. For example, "
 | |
| 		"a factor of 0.2 will demote the page 20% if it has no "
 | |
| 		"query terms or gigabits in its url. And if the max value is "
 | |
| 		"10, then a page with 5 query terms or gigabits in its "
 | |
| 		"url will be demoted 10%; and 10 or more query terms or "
 | |
| 		"gigabits in the url will not be demoted at all. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is from 0 to 0.35. ";
 | |
| 	m->m_cgi   = "pqrqttiu";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactQTTopicsInUrl - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for pages with query terms or gigabits "
 | |
| 		"in url";
 | |
| 	m->m_desc  = "Max number of query terms or gigabits in a url. "
 | |
| 		"Pages with a number of query terms or gigabits in their "
 | |
| 		"urls greater than or equal to this value will not be "
 | |
| 		"demoted. "
 | |
| 		"This controls the range of values expected to represent "
 | |
| 		"the number of query terms or gigabits in a url. It should "
 | |
| 		"be set to or near the estimated max number of query terms "
 | |
| 		"or topics that can be in a url. Setting to a lower value "
 | |
| 		"increases the penalty per query term or gigabit that is "
 | |
| 		"not in a url, but decreases the range of values that "
 | |
| 		"will be demoted.";
 | |
| 	m->m_cgi   = "pqrqttium";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValQTTopicsInUrl - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages that are not high quality";
 | |
| 	m->m_desc  = "Demotion factor for pages that are not high quality. "
 | |
| 		"Score is penalized by this number as a percent times level "
 | |
| 		"of quality. A pqge will be demoted by the formula "
 | |
| 		"(max quality - page's quality) * this factor / the max "
 | |
| 		"value given below. Generally, a page will not be "
 | |
| 		"demoted more than this factor as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 to 1. ";
 | |
| 	m->m_cgi   = "pqrqual";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactQual - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for pages that are not high quality";
 | |
| 	m->m_desc  = "Max page quality. Pages with a quality level "
 | |
| 		"equal to or higher than this value "
 | |
| 		"will not be demoted. ";
 | |
| 	m->m_cgi   = "pqrqualm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValQual - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages that are not "
 | |
| 		"root or have many paths in the url";
 | |
| 	m->m_desc  = "Demotion factor each path in the url. "
 | |
| 		"Score will be demoted by this factor as a percent "
 | |
| 		"multiplied by the number of paths in the url divided "
 | |
| 		"by the max value below. "
 | |
| 		"Generally, the page will not be demoted more than this "
 | |
| 		"value as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is from 0 to 0.75. ";
 | |
| 	m->m_cgi   = "pqrpaths";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactPaths - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for pages that have many paths in the url";
 | |
| 	m->m_desc  = "Max number of paths in a url. "
 | |
| 		"This should be set to a value representing a very high "
 | |
| 		"number of paths for a url. Lower values increase the "
 | |
| 		"difference between how much each additional path demotes. ";
 | |
| 	m->m_cgi   = "pqrpathsm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValPaths - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "16";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages that do not have a catid";
 | |
| 	m->m_desc  = "Demotion factor for pages that do not have a catid. "
 | |
| 		"Score will be penalized by this factor as a percent. "
 | |
| 		"A safe range is from 0 to 0.2. ";
 | |
| 	m->m_cgi   = "pqrcatid";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactNoCatId - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages where smallest "
 | |
| 		"catid has a lot of super topics";
 | |
| 	m->m_desc  = "Demotion factor for pages where smallest "
 | |
| 		"catid has a lot of super topics. "
 | |
| 		"Page will be penalized by the number of super topics "
 | |
| 		"multiplied by this factor divided by the max value given "
 | |
| 		"below. "
 | |
| 		"Generally, the page will not be demoted more than this "
 | |
| 		"factor as a percent. "
 | |
| 		"Note: pages with no catid are demoted by this factor as "
 | |
| 		"a percent so as not to penalize pages with a catid. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 and 0.25. ";
 | |
| 	m->m_cgi   = "pqrsuper";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactCatidHasSupers - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for pages where smallest catid has a lot "
 | |
| 		"of super topics";
 | |
| 	m->m_desc  = "Max number of super topics. "
 | |
| 		"Pages whose smallest catid that has more super "
 | |
| 		"topics than this will be demoted by the maximum amount "
 | |
| 		"given by the factor above as a percent. "
 | |
| 		"This should be set to a value representing a very high "
 | |
| 		"number of super topics for a category id. "
 | |
| 		"Lower values increase the difference between how much each "
 | |
| 		"additional path demotes. ";
 | |
| 	m->m_cgi   = "pqrsuperm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValCatidHasSupers - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "11";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for larger pages";
 | |
| 	m->m_desc  = "Demotion factor for larger pages. "
 | |
| 		"Page will be penalized by its size times this factor "
 | |
| 		"divided by the max page size below. "
 | |
| 		"Generally, a page will not be demoted more than this "
 | |
| 		"factor as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 and 0.25. ";
 | |
| 	m->m_cgi   = "pqrpgsz";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactPageSize - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for larger pages";
 | |
| 	m->m_desc  = "Max page size. "
 | |
| 		"Pages with a size greater than or equal to this will be "
 | |
| 		"demoted by the max amount (the factor above as a percent). ";
 | |
| 	m->m_cgi   = "pqrpgszm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValPageSize - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "524288";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for non-location specific queries "
 | |
| 		"with a location specific title";
 | |
| 	m->m_desc  = "Demotion factor for non-location specific queries "
 | |
| 		"with a location specific title. "
 | |
| 		"Pages which contain a location in their title which is "
 | |
| 		"not in the query or the gigabits will be demoted by their "
 | |
| 		"population multiplied by this factor divided by the max "
 | |
| 		"place population specified below. "
 | |
| 		"Generally, a page will not be demoted more than this "
 | |
| 		"value as a percent. "
 | |
| 		"0 means no demotion. ";
 | |
| 	m->m_cgi   = "pqrloct";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactLocTitle - x;
 | |
| 	//m->m_scgi  = "pqrloct";
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demFactLocTitle - y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.99";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for non-location specific queries "
 | |
| 		"with a location specific summary";
 | |
| 	m->m_desc  = "Demotion factor for non-location specific queries "
 | |
| 		"with a location specific summary. "
 | |
| 		"Pages which contain a location in their summary which is "
 | |
| 		"not in the query or the gigabits will be demoted by their "
 | |
| 		"population multiplied by this factor divided by the max "
 | |
| 		"place population specified below. "
 | |
| 		"Generally, a page will not be demoted more than this "
 | |
| 		"value as a percent. "
 | |
| 		"0 means no demotion. ";
 | |
| 	m->m_cgi   = "pqrlocs";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactLocSummary - x;
 | |
| 	//m->m_scgi  = "pqrlocs";
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demFactLocSummary - y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.95";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for non-location specific queries "
 | |
| 		"with a location specific dmoz category";
 | |
| 	m->m_desc  = "Demotion factor for non-location specific queries "
 | |
| 		"with a location specific dmoz regional category. "
 | |
| 		"Pages which contain a location in their dmoz which is "
 | |
| 		"not in the query or the gigabits will be demoted by their "
 | |
| 		"population multiplied by this factor divided by the max "
 | |
| 		"place population specified below. "
 | |
| 		"Generally, a page will not be demoted more than this "
 | |
| 		"value as a percent. "
 | |
| 		"0 means no demotion. ";
 | |
| 	m->m_cgi   = "pqrlocd";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactLocDmoz - x;
 | |
| 	//m->m_scgi  = "pqrlocd";
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demFactLocDmoz - y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.95";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demote locations that appear in gigabits";
 | |
| 	m->m_desc  = "Demote locations that appear in gigabits.";
 | |
| 	m->m_cgi   = "pqrlocg";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demInTopics - x;
 | |
| 	//m->m_scgi  = "pqrlocg";
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demInTopics - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for non-location specific queries "
 | |
| 		"with location specific results";
 | |
| 	m->m_desc  = "Max place population. "
 | |
| 		"Places with a population greater than or equal to this "
 | |
| 		"will be demoted to the maximum amount given by the "
 | |
| 		"factor above as a percent. ";
 | |
| 	m->m_cgi   = "pqrlocm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValLoc - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	// charlottesville was getting missed when this was 1M
 | |
| 	m->m_def   = "100000";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for non-html";
 | |
| 	m->m_desc  = "Demotion factor for content type that is non-html. "
 | |
| 		"Pages which do not have an html content type will be "
 | |
| 		"demoted by this factor as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 and 0.35. ";
 | |
| 	m->m_cgi   = "pqrhtml";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactNonHtml - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for xml";
 | |
| 	m->m_desc  = "Demotion factor for content type that is xml. "
 | |
| 		"Pages which have an xml content type will be "
 | |
| 		"demoted by this factor as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"Any value between 0 and 1 is safe if demotion for non-html "
 | |
| 		"is set to 0. Otherwise, 0 should probably be used. ";
 | |
| 	m->m_cgi   = "pqrxml";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactXml - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.95";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages with other pages from same "
 | |
| 		"hostname";
 | |
| 	m->m_desc  = "Demotion factor for pages with fewer other pages from "
 | |
| 		"same hostname. "
 | |
| 		"Pages with results from the same host will be "
 | |
| 		"demoted by this factor times each fewer host than the max "
 | |
| 		"value given below, divided by the max value. "
 | |
| 		"Generally, a page will not be demoted more than this "
 | |
| 		"factor as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 and 0.35. ";
 | |
| 	m->m_cgi   = "pqrfsd";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactOthFromHost - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for pages with other pages from same "
 | |
| 		"domain";
 | |
| 	m->m_desc  = "Max number of pages from same domain. "
 | |
| 		"Pages which have this many or more pages from the same "
 | |
| 		"domain will not be demoted. "; 
 | |
| 	m->m_cgi   = "pqrfsdm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValOthFromHost - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "12";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "initial demotion for pages with common "
 | |
| 		"topics in dmoz as other results";
 | |
| 	m->m_desc  = "Initial demotion factor for pages with common "
 | |
| 		"topics in dmoz as other results. "
 | |
| 		"Pages will be penalized by the number of common topics "
 | |
| 		"in dmoz times this factor divided by the max value "
 | |
| 		"given below. "
 | |
| 		"Generally, a page will not be demoted by more than this "
 | |
| 		"factor as a percent. "
 | |
| 		"Note: this factor is decayed by the factor specified in "
 | |
| 		"the parm below, decay for pages with common topics in "
 | |
| 		"dmoz as other results, as the number of pages with "
 | |
| 		"common topics in dmoz increases. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 and 0.35. ";
 | |
| 	m->m_cgi   = "pqrctid";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactComTopicInDmoz - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "decay for pages with common topics in dmoz "
 | |
| 		"as other results";
 | |
| 	m->m_desc  = "Decay factor for pages with common topics in "
 | |
| 		"dmoz as other results. "
 | |
| 		"The initial demotion factor will be decayed by this factor "
 | |
| 		"as a percent as the number of common topics increase. "
 | |
| 		"0 means no decay. "
 | |
| 		"A safe range is between 0 and 0.25. ";
 | |
| 	m->m_cgi   = "pqrctidd";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_decFactComTopicInDmoz - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for pages with common topics in dmoz "
 | |
| 		"as other results";
 | |
| 	m->m_desc  = "Max number of common topics in dmoz as other results. "
 | |
| 		"Pages with a number of common topics equal to or greater "
 | |
| 		"than this value will be demoted to the maximum as given "
 | |
| 		"by the initial factor above as a percent. ";
 | |
| 	m->m_cgi   = "pqrctidm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValComTopicInDmoz - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "32"; 
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages where dmoz category names "
 | |
| 		"contain query terms or their synonyms";
 | |
| 	m->m_desc  = "Demotion factor for pages where dmoz category names "
 | |
| 		"contain fewer query terms or their synonyms. "
 | |
| 		"Pages will be penalized for each query term or synonym of "
 | |
| 		"a query term less than the max value given below multiplied "
 | |
| 		"by this factor, divided by the max value. "
 | |
| 		"Generally, a page will not be demoted more than this value "
 | |
| 		"as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 and 0.3. ";
 | |
| 	m->m_cgi   = "pqrdcndcqt";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactDmozCatNmNoQT - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "max value for pages where dmoz category names "
 | |
| 		"contain query terms or their synonyms";
 | |
| 	m->m_desc  = "Max number of query terms and their synonyms "
 | |
| 		"in a page's dmoz category name. "
 | |
| 		"Pages with a number of query terms or their synonyms in all "
 | |
| 		"dmoz category names greater than or equal to this value "
 | |
| 		"will not be demoted. ";
 | |
| 	m->m_cgi   = "pqrcndcqtm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValDmozCatNmNoQT - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10"; 
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages where dmoz category names "
 | |
| 		"contain gigabits";
 | |
| 	m->m_desc  = "Demotion factor for pages where dmoz category "
 | |
| 		"names contain fewer gigabits. "
 | |
| 		"Pages will be penalized by the number of gigabits in all "
 | |
| 		"dmoz category names fewer than the max value given below "
 | |
| 		"divided by the max value. "
 | |
| 		"Generally, a page will not be demoted more than than this "
 | |
| 		"factor as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 and 0.3. ";
 | |
| 	m->m_cgi   = "pqrdcndcgb";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactDmozCatNmNoGigabits - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for pages where dmoz category names "
 | |
| 		"contain gigabits";
 | |
| 	m->m_desc  = "Max number of pages where dmoz category names "
 | |
| 		"contain a gigabit. "
 | |
| 		"Pages with a number of gigabits in all dmoz category names "
 | |
| 		"greater than or equal to this value will not be demoted. ";
 | |
| 	m->m_cgi   = "pqrdcndcgbm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValDmozCatNmNoGigabits - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "16"; 
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages based on datedb date";
 | |
| 	m->m_desc  = "Demotion factor for pages based on datedb date. "
 | |
| 		"Pages will be penalized for being published earlier than the "
 | |
| 		"max date given below. "
 | |
| 		"The older the page, the more it will be penalized based on "
 | |
| 		"the time difference between the page's date and the max date, "
 | |
| 		"divided by the max date. "
 | |
| 		"Generally, a page will not be demoted more than this "
 | |
| 		"value as a percent. "
 | |
| 		"0 means no demotion. "
 | |
| 		"A safe range is between 0 and 0.4. ";
 | |
| 	m->m_cgi   = "pqrdate";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactDatedbDate - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "min value for demotion based on datedb date ";
 | |
| 	m->m_desc  = "Pages with a publish date equal to or earlier than "
 | |
| 		"this date will be demoted to the max (the factor above as "
 | |
| 		"a percent). "
 | |
| 		"Use this parm in conjunction with the max value below "
 | |
| 		"to specify the range of dates where demotion occurs. "
 | |
| 		"If you set this parm near the estimated earliest publish "
 | |
| 		"date that occurs somewhat frequently, this method can better "
 | |
| 		"control the additional demotion per publish day. "
 | |
| 		"This number is given as seconds since the epoch, January 1st, "
 | |
| 		"1970 divided by 1000. "
 | |
| 		"0 means use the epoch. ";
 | |
| 	m->m_cgi   = "pqrdatei";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_minValDatedbDate - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "631177"; // Jan 01, 1990 
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max value for demotion based on datedb date ";
 | |
| 	m->m_desc  = "Pages with a publish date greater than or equal to "
 | |
| 		"this value divided by 1000 will not be demoted. "
 | |
| 		"Use this parm in conjunction with the min value above "
 | |
| 		"to specify the range of dates where demotion occurs. "
 | |
| 		"This number is given as seconds before the current date "
 | |
| 		"and time taken from the system clock divided by 1000. "
 | |
| 		"0 means use the current time of the current day. ";
 | |
| 	m->m_cgi   = "pqrdatem";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValDatedbDate - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages based on proximity";
 | |
| 	m->m_desc  = "Demotion factor for proximity of query terms in "
 | |
| 		"a document.  The closer together terms occur in a "
 | |
| 		"document, the higher it will score."
 | |
| 		"0 means no demotion. ";
 | |
| 	m->m_cgi   = "pqrprox";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactProximity - x;
 | |
| 	//m->m_scgi  = "pqrprox";
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demFactProximity - y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion for pages based on query terms section";
 | |
| 	m->m_desc  = "Demotion factor for where the query terms occur "
 | |
| 		"in the document.  If the terms only occur in a menu, "
 | |
| 		"a link, or a list, the document will be punished."
 | |
| 		"0 means no demotion. ";
 | |
| 	m->m_cgi   = "pqrinsec";
 | |
| 	//m->m_scgi  = "pqrinsec";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactInSection - x;
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demFactInSection - y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "weight of indexed score on pqr";
 | |
| 	m->m_desc  = "The proportion that the original score affects "
 | |
| 		"its rerank position. A factor of 1 will maintain "
 | |
| 		"the original score, 0 will only use the indexed "
 | |
| 		"score to break ties.";
 | |
| 	m->m_cgi   = "pqrorig";
 | |
| 	//m->m_scgi  = "pqrorig";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactOrigScore - x;
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demFactOrigScore - y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "max value for demotion for pages based on proximity";
 | |
| 	m->m_desc  = "Max summary score where no more demotion occurs above. "
 | |
| 		"Pages with a summary score greater than or equal to this "
 | |
| 		"value will not be demoted. ";
 | |
| 	m->m_cgi   = "pqrproxm";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_maxValProximity - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100000"; 
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "demotion for query being exclusivly in a subphrase";
 | |
| 	m->m_desc  = "Search result which contains the query terms only"
 | |
| 		" as a subphrase of a larger phrase will have its score "
 | |
| 		" reduced by this percent.";
 | |
| 	m->m_cgi   = "pqrspd";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactSubPhrase - x;
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demFactSubPhrase - y;
 | |
| 	//m->m_scgi  = "pqrspd";
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "demotion based on common inlinks";
 | |
| 	m->m_desc  = "Based on the number of inlinks a search results has "
 | |
| 		"which are in common with another search result.";
 | |
| 	m->m_cgi   = "pqrcid";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_demFactCommonInlinks - x;
 | |
| 	//m->m_soff  = (char *)&si.m_pqr_demFactCommonInlinks - y;
 | |
| 	//m->m_scgi  = "pqrcid";
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = ".5";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of document calls multiplier";
 | |
| 	m->m_desc  = "Allows more results to be gathered in the case of "
 | |
| 		"an index having a high rate of duplicate results.  Generally"
 | |
| 		" expressed as 1.2";
 | |
| 	m->m_cgi   = "ndm";
 | |
| 	m->m_off   = (char *)&cr.m_numDocsMultiplier - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "1.2";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "max documents to compute per host";
 | |
| 	m->m_desc  = "Limit number of documents to search that do not provide"
 | |
| 		" the required results.";
 | |
| 	m->m_cgi   = "mdi";
 | |
| 	m->m_off   = (char *)&cr.m_maxDocIdsToCompute - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1000";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "max real time inlinks";
 | |
| 	m->m_desc  = "Limit number of linksdb inlinks requested per result.";
 | |
| 	m->m_cgi   = "mrti";
 | |
| 	m->m_off   = (char *)&cr.m_maxRealTimeInlinks - x;
 | |
| 	//m->m_soff  = (char *)&si.m_maxRealTimeInlinks - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10000";
 | |
| 	m->m_group = 0;
 | |
| 	//m->m_scgi  = "mrti";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 100000;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "percent topic similar default";
 | |
| 	m->m_desc  = "Like above, but used for deciding when to cluster "
 | |
| 		"results by topic for the news collection.";
 | |
| 	m->m_cgi   = "ptcd";
 | |
| 	m->m_off   = (char *)&cr.m_topicSimilarCutoffDefault - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "50";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;	
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max query terms";
 | |
| 	m->m_desc  = "Do not allow more than this many query terms. Helps "
 | |
| 		"prevent big queries from resource hogging.";
 | |
| 	m->m_cgi   = "mqt";
 | |
| 	m->m_off   = (char *)&cr.m_maxQueryTerms - x;
 | |
| 	//m->m_soff  = (char *)&si.m_maxQueryTerms - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "999999"; // now we got synonyms... etc
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE; 
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "dictionary site";
 | |
| 	m->m_desc  = "Where do we send requests for definitions of search "
 | |
| 		"terms. Set to the empty string to turn this feature off.";
 | |
| 	m->m_cgi   = "dictionarySite";
 | |
| 	m->m_off   = (char *)&cr.m_dictionarySite - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = SUMMARYHIGHLIGHTTAGMAXSIZE;
 | |
| 	m->m_def   = "http://www.answers.com/";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "allow links: searches";
 | |
| 	m->m_desc  = "Allows anyone access to perform links: searches on this "
 | |
| 		"collection.";
 | |
| 	m->m_cgi   = "als";
 | |
| 	m->m_off   = (char *)&cr.m_allowLinksSearch - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 							    
 | |
| 	// REFERENCE PAGES CONTROLS
 | |
| 	m->m_title = "number of reference pages to generate";
 | |
| 	m->m_desc  = "What is the number of "
 | |
| 		"reference pages to generate per query? Set to 0 to save "
 | |
| 		"CPU time.";
 | |
| 	m->m_cgi   = "nrp";
 | |
| 	m->m_off   = (char *)&cr.m_refs_numToGenerate - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_numToGenerate - y;
 | |
| 	m->m_smaxc = (char *)&cr.m_refs_numToGenerateCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 0;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of reference pages to generate";
 | |
| 	m->m_desc  = "What is the number of "
 | |
| 		"reference pages to generate per query? Set to 0 to save "
 | |
| 		"CPU time.";
 | |
| 	m->m_cgi   = "snrp";
 | |
| 	m->m_off  = (char *)&si.m_refs_numToGenerate - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_defOff =(char *)&cr.m_refs_numToGenerate - x;
 | |
| 	m->m_priv  = 0;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of reference pages to display";
 | |
| 	m->m_desc  = "What is the number of "
 | |
| 		"reference pages to display per query?";
 | |
| 	m->m_cgi   = "nrpdd";
 | |
| 	m->m_off   = (char *)&cr.m_refs_numToDisplay - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_numToDisplay - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 0; // allow the (more) link
 | |
| 	m->m_sprpg = 0; // do not propagate
 | |
|         m->m_sprpp = 0; // do not propagate
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "docs to scan for reference pages";
 | |
| 	m->m_desc  = "How many search results should we "
 | |
| 		"scan for reference pages per query?";
 | |
| 	m->m_cgi   = "dsrp";
 | |
| 	m->m_off   = (char *)&cr.m_refs_docsToScan - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_docsToScan - y;
 | |
| 	m->m_smaxc = (char *)&cr.m_refs_docsToScanCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "30";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 0;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "min references quality";
 | |
| 	m->m_desc  = "References with page quality below this "
 | |
| 		"will be excluded.  (set to 101 to disable references while "
 | |
| 		"still generating related pages.";
 | |
| 	m->m_cgi   = "mrpq";
 | |
| 	m->m_off   = (char *)&cr.m_refs_minQuality - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_minQuality - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "min links per references";
 | |
| 	m->m_desc  = "References need this many links to results to "
 | |
| 		"be included.";
 | |
| 	m->m_cgi   = "mlpr";
 | |
| 	m->m_off   = (char *)&cr.m_refs_minLinksPerReference - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_minLinksPerReference - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max linkers to consider for references per page";
 | |
| 	m->m_desc  = "Stop processing referencing pages after hitting this "
 | |
| 		"limit.";
 | |
| 	m->m_cgi   = "mrpl";
 | |
| 	m->m_off   = (char *)&cr.m_refs_maxLinkers - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_maxLinkers - y;
 | |
| 	m->m_smaxc = (char *)&cr.m_refs_maxLinkersCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "500";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "page fetch multiplier for references";
 | |
| 	m->m_desc  = "Use this multiplier to fetch more than the required "
 | |
|                 "number of reference pages.  fetches N * (this parm) "
 | |
| 		"references and displays the top scoring N.";
 | |
| 	m->m_cgi   = "ptrfr";
 | |
| 	m->m_off   = (char *)&cr.m_refs_additionalTRFetch - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_additionalTRFetch - y;
 | |
| 	m->m_smaxc = (char *)&cr.m_refs_additionalTRFetchCeiling - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "1.5";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of links coefficient";
 | |
| 	m->m_desc  = "A in A * numLinks + B * quality + C * "
 | |
| 		"numLinks/totalLinks.";
 | |
|         m->m_cgi   = "nlc";
 | |
| 	m->m_off   = (char *)&cr.m_refs_numLinksCoefficient - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_numLinksCoefficient - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "quality coefficient";
 | |
| 	m->m_desc  = "B in A * numLinks + B * quality + C * "
 | |
| 		"numLinks/totalLinks.";
 | |
| 	m->m_cgi   = "qc";
 | |
| 	m->m_off   = (char *)&cr.m_refs_qualityCoefficient - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_qualityCoefficient - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "link density coefficient";
 | |
| 	m->m_desc  = "C in A * numLinks + B * quality + C * "
 | |
| 		"numLinks/totalLinks.";
 | |
| 	m->m_cgi   = "ldc";
 | |
| 	m->m_off   = (char *)&cr.m_refs_linkDensityCoefficient - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_linkDensityCoefficient - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1000";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	//m->m_sparm = 1;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "add or multiply quality times link density";
 | |
| 	m->m_desc  = "[+|*] in A * numLinks + B * quality [+|*]"
 | |
| 		" C * numLinks/totalLinks.";
 | |
| 	m->m_cgi   = "mrs";
 | |
| 	m->m_off   = (char *)&cr.m_refs_multiplyRefScore - x;
 | |
| 	//m->m_soff  = (char *)&si.m_refs_multiplyRefScore - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	// reference pages ceiling parameters
 | |
| 	m->m_title = "maximum allowed value for "
 | |
| 		"numReferences parameter";
 | |
| 	m->m_desc  = "maximum allowed value for "
 | |
| 		"numReferences parameter";
 | |
| 	m->m_cgi   = "nrpc";
 | |
| 	m->m_off   = (char *)&cr.m_refs_numToGenerateCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "maximum allowed value for "
 | |
| 		"docsToScanForReferences parameter";
 | |
| 	m->m_desc  = "maximum allowed value for "
 | |
| 		"docsToScanForReferences parameter";
 | |
| 	m->m_cgi   = "dsrpc";
 | |
| 	m->m_off   = (char *)&cr.m_refs_docsToScanCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "maximum allowed value for "
 | |
| 		"maxLinkers parameter";
 | |
| 	m->m_desc  = "maximum allowed value for "
 | |
| 		"maxLinkers parameter";
 | |
| 	m->m_cgi   = "mrplc";
 | |
| 	m->m_off   = (char *)&cr.m_refs_maxLinkersCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5000";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "maximum allowed value for "
 | |
| 		"additionalTRFetch";
 | |
| 	m->m_desc  = "maximum allowed value for "
 | |
| 		"additionalTRFetch parameter";
 | |
| 	m->m_cgi   = "ptrfrc";
 | |
| 	m->m_off   = (char *)&cr.m_refs_additionalTRFetchCeiling - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	// related pages parameters
 | |
| 	m->m_title = "number of related pages to generate";
 | |
| 	m->m_desc  = "number of related pages to generate.";
 | |
| 	m->m_cgi   = "nrpg";
 | |
| 	m->m_off   = (char *)&cr.m_rp_numToGenerate - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_numToGenerate - y;
 | |
| 	m->m_smaxc = (char *)&cr.m_rp_numToGenerateCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 0;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of related pages to display";
 | |
| 	m->m_desc  = "number of related pages to display.";
 | |
| 	m->m_cgi   = "nrpd";
 | |
| 	m->m_off   = (char *)&cr.m_rp_numToDisplay - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_numToDisplay - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 0; // allow the (more) link
 | |
| 	m->m_sprpg = 0; // do not propagate
 | |
|         m->m_sprpp = 0; // do not propagate
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of links to scan for related pages";
 | |
| 	m->m_desc  = "number of links per reference page to scan for related "
 | |
| 		"pages.";
 | |
| 	m->m_cgi   = "nlpd";
 | |
| 	m->m_off   = (char *)&cr.m_rp_numLinksPerDoc - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_numLinksPerDoc - y;
 | |
| 	m->m_smaxc = (char *)&cr.m_rp_numLinksPerDocCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1024";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "min related page quality";
 | |
| 	m->m_desc  = "related pages with a quality lower than this will be "
 | |
| 		"ignored.";
 | |
| 	m->m_cgi   = "merpq";
 | |
| 	m->m_off   = (char *)&cr.m_rp_minQuality - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_minQuality - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "30";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "min related page score";
 | |
| 	m->m_desc  = "related pages with an adjusted score lower than this "
 | |
| 		"will be ignored.";
 | |
| 	m->m_cgi   = "merps";
 | |
| 	m->m_off   = (char *)&cr.m_rp_minScore - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_minScore - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "min related page links";
 | |
| 	m->m_desc  = "related pages with less than this number of links"
 | |
| 		" will be ignored.";
 | |
| 	m->m_cgi   = "merpl";
 | |
| 	m->m_off   = (char *)&cr.m_rp_minLinks - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_minLinks - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "coefficient for number of links in related pages score "
 | |
| 		"calculation";
 | |
| 	m->m_desc  = "A in A * numLinks + B * avgLnkrQlty + C * PgQlty"
 | |
| 		" + D * numSRPLinks.";
 | |
| 	m->m_cgi   = "nrplc";
 | |
| 	m->m_off   = (char *)&cr.m_rp_numLinksCoeff - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_numLinksCoeff - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "coefficient for average linker quality in related pages "
 | |
| 		"score calculation";
 | |
| 	m->m_desc  = "B in A * numLinks + B * avgLnkrQlty + C * PgQlty"
 | |
| 		" + D * numSRPLinks.";
 | |
| 	m->m_cgi   = "arplqc";
 | |
| 	m->m_off   = (char *)&cr.m_rp_avgLnkrQualCoeff - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_avgLnkrQualCoeff - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "coefficient for page quality in related pages "
 | |
| 		"score calculation";
 | |
| 	m->m_desc  = "C in A * numLinks + B * avgLnkrQlty + C * PgQlty"
 | |
| 		" + D * numSRPLinks";
 | |
| 	m->m_cgi   = "qrpc";
 | |
| 	m->m_off   = (char *)&cr.m_rp_qualCoeff - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_qualCoeff - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "coefficient for search result links in related pages "
 | |
| 		"score calculation";
 | |
| 	m->m_desc  = "D in A * numLinks + B * avgLnkrQlty + C * PgQlty"
 | |
| 		" + D * numSRPLinks.";
 | |
| 	m->m_cgi   = "srprpc";
 | |
| 	m->m_off   = (char *)&cr.m_rp_srpLinkCoeff - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_srpLinkCoeff - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of related page summary excerpts";
 | |
| 	m->m_desc  = "What is the maximum number of "
 | |
| 		"excerpts displayed in the summary of a related page?";
 | |
| 	m->m_cgi   = "nrps";
 | |
| 	m->m_off   = (char *)&cr.m_rp_numSummaryLines - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_numSummaryLines - y;
 | |
| 	m->m_smaxc = (char *)&cr.m_rp_numSummaryLinesCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "highlight query terms in related pages summary";
 | |
| 	m->m_desc  = "Highlight query terms in related pages summary.";
 | |
| 	m->m_cgi   = "hqtirps"; 
 | |
| 	m->m_off   = (char *)&cr.m_rp_doRelatedPageSumHighlight - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "number of characters to display in title before "
 | |
| 		"truncating";
 | |
| 	m->m_desc  = "Truncates a related page title after this many "
 | |
| 		"characters and adds ...";
 | |
| 	m->m_cgi   = "ttl";
 | |
| 	m->m_off   = (char *)&cr.m_rp_titleTruncateLimit - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_titleTruncateLimit - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "50";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use results pages as references";
 | |
| 	m->m_desc  = "Use the search results' links in order to generate "
 | |
| 		"related pages.";
 | |
| 	m->m_cgi   = "urar"; 
 | |
| 	m->m_off   = (char *)&cr.m_rp_useResultsAsReferences - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_useResultsAsReferences - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "get related pages from other cluster";
 | |
| 	m->m_desc  = "Say yes here to make Gigablast check another Gigablast "
 | |
| 		"cluster for title rec for related pages. Gigablast will "
 | |
| 		"use the hosts2.conf file in the working directory to "
 | |
| 		"tell it what hosts belong to the other cluster.";
 | |
| 	m->m_cgi   = "erp"; // external related pages
 | |
| 	m->m_off   = (char *)&cr.m_rp_getExternalPages - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_getExternalPages - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection for other related pages cluster";
 | |
| 	m->m_desc  = "Gigablast will fetch the related pages title record "
 | |
| 		"from this collection in the other cluster.";
 | |
| 	m->m_cgi   = "erpc"; // external related pages collection
 | |
| 	m->m_off   = (char *)&cr.m_rp_externalColl - x;
 | |
| 	//m->m_soff  = (char *)&si.m_rp_externalColl - y;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_COLL_LEN;
 | |
| 	m->m_def   = "main";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	// relate pages ceiling parameters
 | |
| 	m->m_title = "maximum allowed value for numToGenerate parameter";
 | |
| 	m->m_desc  = "maximum allowed value for numToGenerate parameter";
 | |
| 	m->m_cgi   = "nrpgc";
 | |
| 	m->m_off   = (char *)&cr.m_rp_numToGenerateCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "maximum allowed value for numRPLinksPerDoc parameter";
 | |
| 	m->m_desc  = "maximum allowed value for numRPLinksPerDoc parameter";
 | |
| 	m->m_cgi   = "nlpdc";
 | |
| 	m->m_off   = (char *)&cr.m_rp_numLinksPerDocCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5000";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "maximum allowed value for numSummaryLines parameter";
 | |
| 	m->m_desc  = "maximum allowed value for numSummaryLines parameter";
 | |
| 	m->m_cgi   = "nrpsc";
 | |
| 	m->m_off   = (char *)&cr.m_rp_numSummaryLinesCeiling - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	// import search results controls
 | |
| 	m->m_title = "how many imported results should we insert";
 | |
| 	m->m_desc  = "Gigablast will import X search results from the "
 | |
| 		"external cluster given by hosts2.conf and merge those "
 | |
| 		"search results into the current set of search results. "
 | |
| 		"Set to 0 to disable.";
 | |
| 	m->m_cgi   = "imp";
 | |
| 	m->m_off   = (char *)&cr.m_numResultsToImport - x;
 | |
| 	//m->m_soff  = (char *)&si.m_numResultsToImport - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "imported score weight";
 | |
| 	m->m_desc  = "The score of all imported results will be multiplied "
 | |
| 		"by this number. Since results are mostly imported from "
 | |
| 		"a large collection they will usually have higher scores "
 | |
| 		"because of having more link texts or whatever, so tone it "
 | |
| 		"down a bit to put it on par with the integrating collection.";
 | |
| 	m->m_cgi   = "impw";
 | |
| 	m->m_off   = (char *)&cr.m_importWeight - x;
 | |
| 	//m->m_soff  = (char *)&si.m_importWeight - y;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = ".80";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "how many linkers must each imported result have";
 | |
| 	m->m_desc  = "The urls of imported search results must be linked to "
 | |
| 		"by at least this many documents in the primary collection.";
 | |
| 	m->m_cgi   = "impl";
 | |
| 	m->m_off   = (char *)&cr.m_minLinkersPerImportedResult - x;
 | |
| 	//m->m_soff  = (char *)&si.m_minLinkersPerImportedResult - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "3";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "num linkers weight";
 | |
| 	m->m_desc  = "The number of linkers an imported result has from "
 | |
| 		"the base collection is multiplied by this weight and then "
 | |
| 		"added to the final score. The higher this is the more an "
 | |
| 		"imported result with a lot of linkers will be boosted. "
 | |
| 		"Currently, 100 is the max number of linkers permitted.";
 | |
| 	m->m_cgi   = "impnlw";
 | |
| 	m->m_off   = (char *)&cr.m_numLinkerWeight - x;
 | |
| 	//m->m_soff  = (char *)&si.m_numLinkerWeight - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "50";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "the name of the collection to import from";
 | |
| 	m->m_desc  = "Gigablast will import X search results from this "
 | |
| 		"external collection and merge them into the current search "
 | |
| 		"results.";
 | |
| 	m->m_cgi   = "impc";
 | |
| 	m->m_off   = (char *)&cr.m_importColl - x;
 | |
| 	//m->m_soff  = (char *)&si.m_importColl - y;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_COLL_LEN;
 | |
| 	m->m_def   = "main";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max similar results for cluster by topic";
 | |
| 	m->m_desc  = "Max similar results to show when clustering by topic.";
 | |
| 	m->m_cgi   = "ncbt";
 | |
| 	m->m_off   = (char *)&cr.m_maxClusterByTopicResults - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_group = 0;
 | |
| 	//m->m_scgi  = "ncbt";
 | |
| 	//m->m_soff  = (char *)&si.m_maxClusterByTopicResults - y;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of extra results to get for cluster by topic";
 | |
| 	m->m_desc  = "number of extra results to get for cluster by topic";
 | |
| 	m->m_cgi   = "ntwo";
 | |
| 	m->m_off   = (char *)&cr.m_numExtraClusterByTopicResults - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	//m->m_scgi  = "ntwo";
 | |
| 	//m->m_soff  = (char *)&si.m_numExtraClusterByTopicResults - y;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "Minimum number of in linkers required to consider getting"
 | |
| 		" the title from in linkers";
 | |
| 	m->m_desc  = "Minimum number of in linkers required to consider getting"
 | |
| 	       "	the title from in linkers";
 | |
| 	m->m_cgi   = "mininlinkers";
 | |
| 	m->m_off   = (char *)&cr.m_minTitleInLinkers - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "Max number of in linkers to consider";
 | |
| 	m->m_desc  = "Max number of in linkers to consider for getting in "
 | |
| 		"linkers titles.";
 | |
| 	m->m_cgi   = "maxinlinkers";
 | |
| 	m->m_off   = (char *)&cr.m_maxTitleInLinkers - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "128";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max title len";
 | |
| 	m->m_desc  = "What is the maximum number of "
 | |
| 		"characters allowed in titles displayed in the search "
 | |
| 		"results?";
 | |
| 	m->m_cgi   = "tml";
 | |
| 	m->m_defOff= (char *)&cr.m_titleMaxLen - x;
 | |
| 	m->m_off   = (char *)&si.m_titleMaxLen - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "use new summary generator";
 | |
| 	m->m_desc  = "Also used for gigabits and titles.";
 | |
| 	m->m_cgi   = "uns"; // external related pages
 | |
| 	m->m_off   = (char *)&cr.m_useNewSummaries - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_sparm = 1;
 | |
| 	m->m_scgi  = "uns";
 | |
| 	m->m_soff  = (char *)&si.m_useNewSummaries - y;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "summary mode";
 | |
| 	m->m_desc  = "0 = old compatibility mode, 1 = UTF-8 mode, "
 | |
| 		"2 = fast ASCII mode, "
 | |
| 		"3 = Ascii Proximity Summary, "
 | |
| 		"4 = Utf8 Proximity Summary, "
 | |
| 		"5 = Ascii Pre Proximity Summary, "
 | |
| 		"6 = Utf8 Pre Proximity Summary:";
 | |
| 	m->m_cgi   = "smd";
 | |
| 	m->m_off   = (char *)&cr.m_summaryMode - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	//m->m_scgi  = "smd";
 | |
| 	//m->m_soff  = (char*) &si.m_summaryMode - y;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of summary excerpts";
 | |
| 	m->m_desc  = "How many summary excerpts to display per search result?";
 | |
| 	m->m_cgi   = "ns";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_defOff= (char *)&cr.m_summaryMaxNumLines - x;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_off   = (char *)&si.m_numLinesInSummary - y;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "max summary line width";
 | |
| 	m->m_desc  = "<br> tags are inserted to keep the number "
 | |
| 		"of chars in the summary per line at or below this width. "
 | |
| 		"Also affects title. "
 | |
| 		"Strings without spaces that exceed this "
 | |
| 		"width are not split. Has no affect on xml or json feed, "
 | |
| 		"only works on html.";
 | |
| 	m->m_cgi   = "sw";
 | |
| 	//m->m_off   = (char *)&cr.m_summaryMaxWidth - x;
 | |
| 	m->m_off   = (char *)&si.m_summaryMaxWidth - y;
 | |
| 	m->m_defOff= (char *)&cr.m_summaryMaxWidth - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "max summary excerpt length";
 | |
| 	m->m_desc = "What is the maximum number of "
 | |
| 		"characters allowed per summary excerpt?";
 | |
| 	m->m_cgi   = "smxcpl";
 | |
| 	m->m_off   = (char *)&si.m_summaryMaxNumCharsPerLine - y;
 | |
| 	m->m_defOff= (char *)&cr.m_summaryMaxNumCharsPerLine - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 	/*
 | |
| 	m->m_title = "enable page turk";
 | |
| 	m->m_desc  = "If enabled, search results shall feed the page turk "
 | |
| 		"is used to mechanically rank websites.";
 | |
| 	m->m_cgi   = "ept";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&cr.m_pageTurkEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "results to scan for gigabits generation";
 | |
| 	m->m_desc  = "How many search results should we "
 | |
| 		"scan for gigabit (related topics) generation. Set this to "
 | |
| 		"zero to disable gigabits!";
 | |
| 	m->m_cgi   = "dsrt";
 | |
| 	m->m_off   = (char *)&si.m_docsToScanForTopics - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_defOff= (char *)&cr.m_docsToScanForTopics - x;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "ip restriction for gigabits";
 | |
| 	m->m_desc  = "Should Gigablast only get one document per IP domain "
 | |
| 		"and per domain for gigabits (related topics) generation?";
 | |
| 	m->m_cgi   = "ipr";
 | |
| 	m->m_off   = (char *)&si.m_ipRestrictForTopics - y;
 | |
| 	m->m_defOff= (char *)&cr.m_ipRestrict - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "number of gigabits to show";
 | |
| 	m->m_desc  = "What is the number of gigabits (related topics) "
 | |
| 		"displayed per query? Set to 0 to save a little CPU time.";
 | |
| 	m->m_cgi   = "nrt";
 | |
| 	m->m_defOff= (char *)&cr.m_numTopics - x;
 | |
| 	m->m_off   = (char *)&si.m_numTopicsToDisplay - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "11";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_sprpg = 0; // do not propagate
 | |
|         m->m_sprpp = 0; // do not propagate
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "min topics score";
 | |
| 	m->m_desc  = "Gigabits (related topics) with scores below this "
 | |
| 		"will be excluded. Scores range from 0% to over 100%.";
 | |
| 	m->m_cgi   = "mts";
 | |
| 	m->m_defOff= (char *)&cr.m_minTopicScore - x;
 | |
| 	m->m_off   = (char *)&si.m_minTopicScore - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "min gigabit doc count by default";
 | |
| 	m->m_desc  = "How many documents must contain the gigabit "
 | |
| 		"(related topic) in order for it to be displayed.";
 | |
| 	m->m_cgi   = "mdc";
 | |
| 	m->m_defOff= (char *)&cr.m_minDocCount - x;
 | |
| 	m->m_off   = (char *)&si.m_minDocCount - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "dedup doc percent for gigabits (related topics)";
 | |
| 	m->m_desc  = "If a document is this percent similar to another "
 | |
| 		"document with a higher score, then it will not contribute "
 | |
| 		"to the gigabit generation.";
 | |
| 	m->m_cgi   = "dsp";
 | |
| 	m->m_defOff= (char *)&cr.m_dedupSamplePercent - x;
 | |
| 	m->m_off   = (char *)&si.m_dedupSamplePercent - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "80";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//  SPIDER PROXY CONTROLS
 | |
| 	//  
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	m->m_title = "always use spider proxies for all collections";
 | |
| 	m->m_desc  = "ALWAYS Use the spider proxies listed below for "
 | |
| 		"spidering. If none are "
 | |
| 		"listed then gb will not use any. Applies to all collections. "
 | |
| 		"If you want to regulate this on a per collection basis then "
 | |
| 		"set this to <b>NO</b> here and adjust the "
 | |
| 		"proxy controls on the "
 | |
| 		"<b>spider controls</b> page. If the list of proxy IPs below "
 | |
| 		"is empty, then of course, no proxies will be used.";
 | |
| 	m->m_cgi   = "useproxyips";
 | |
| 	m->m_xml   = "useSpiderProxies";
 | |
| 	m->m_off   = (char *)&g_conf.m_useProxyIps - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	// hide this for now. just make it a per collection parm.
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m->m_page  = PAGE_SPIDERPROXIES;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "automatically use spider proxies for all collections";
 | |
| 	m->m_desc  = "AUTOMATICALLY use the spider proxies listed below for "
 | |
| 		"spidering. If none are "
 | |
| 		"listed then gb will not use any. Applies to all collections. "
 | |
| 		"If you want to regulate this on a per collection basis then "
 | |
| 		"set this to <b>NO</b> here and adjust the "
 | |
| 		"proxy controls on the "
 | |
| 		"<b>spider controls</b> page. If the list of proxy IPs below "
 | |
| 		"is empty, then of course, no proxies will be used.";
 | |
| 	m->m_cgi   = "autouseproxyips";
 | |
| 	m->m_xml   = "automaticallyUseSpiderProxies";
 | |
| 	m->m_off   = (char *)&g_conf.m_automaticallyUseProxyIps - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	// hide this for now. just make it a per collection parm.
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m->m_page  = PAGE_SPIDERPROXIES;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "spider proxy ips";
 | |
| 	m->m_desc  = "List of white space-separated spider proxy IPs. Put "
 | |
| 		"in IP:port format. Example <i>1.2.3.4:80 4.5.6.7:99</i>. "
 | |
| 		"You can also use <i>username:password@1.2.3.4:80</i>. "
 | |
| 		"If a proxy itself times out when downloading through it "
 | |
| 		"it will be perceived as a normal download timeout and the "
 | |
| 		"page will be retried according to the url filters table, so "
 | |
| 		"you  might want to modify the url filters to retry network "
 | |
| 		"errors more aggressively. Search for 'private proxies' on "
 | |
| 		"google to find proxy providers. Try to ensure all your "
 | |
| 		"proxies are on different class C IPs if possible. "
 | |
| 		"That is, the first 3 numbers in the IP addresses are all "
 | |
| 		"different.";
 | |
| 	m->m_cgi   = "proxyips";
 | |
| 	m->m_xml   = "proxyIps";
 | |
| 	m->m_off   = (char *)&g_conf.m_proxyIps - g;
 | |
| 	m->m_type  = TYPE_SAFEBUF; // TYPE_IP;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_TEXTAREA | PF_REBUILDPROXYTABLE;
 | |
| 	m->m_page  = PAGE_SPIDERPROXIES;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider proxy test url";
 | |
| 	m->m_desc  = "Download this url every minute through each proxy "
 | |
| 		"listed above to ensure they are up. Typically you should "
 | |
| 		"make this a URL you own so you do not aggravate another "
 | |
| 		"webmaster.";
 | |
| 	m->m_xml   = "proxyTestUrl";
 | |
| 	m->m_cgi   = "proxytesturl";
 | |
| 	m->m_off   = (char *)&g_conf.m_proxyTestUrl - g;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_def   = "http://www.gigablast.com/";
 | |
| 	m->m_flags = 0;
 | |
| 	m->m_page  = PAGE_SPIDERPROXIES;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "reset proxy table";
 | |
| 	m->m_desc  = "Reset the proxy statistics in the table below. Makes "
 | |
| 		"all your proxies treated like new again.";
 | |
| 	m->m_cgi   = "resetproxytable";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandResetProxyTable;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_page  = PAGE_SPIDERPROXIES;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "mix up user agents";
 | |
| 	m->m_desc  = "Use random user-agents when downloading through "
 | |
| 		"a spider proxy listed above to "
 | |
| 		"protecting gb's anonymity. The User-Agent used is a function "
 | |
| 		"of the proxy IP/port and IP of the url being downloaded. "
 | |
| 		"That way it is consistent when downloading the same website "
 | |
| 		"through the same proxy.";
 | |
| 	m->m_cgi   = "userandagents";
 | |
| 	m->m_xml   = "useRandAgents";
 | |
| 	m->m_off   = (char *)&g_conf.m_useRandAgents - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = 0;
 | |
| 	m->m_page  = PAGE_SPIDERPROXIES;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "squid proxy authorized users";
 | |
| 	m->m_desc  = "Gigablast can also simulate a squid proxy, "
 | |
| 		"complete with "
 | |
| 		"caching. It will forward your request to the proxies you "
 | |
| 		"list above, if any. This list consists of space-separated "
 | |
| 		"<i>username:password</i> items. Leave this list empty "
 | |
| 		"to disable squid caching behaviour. The default cache "
 | |
| 		"size for this is 10MB per shard. Use item *:* to allow "
 | |
| 		"anyone access.";
 | |
| 	m->m_xml   = "proxyAuth";
 | |
| 	m->m_cgi   = "proxyAuth";
 | |
| 	m->m_off   = (char *)&g_conf.m_proxyAuth - g;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_TEXTAREA;
 | |
| 	m->m_page  = PAGE_SPIDERPROXIES;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "max words per gigabit (related topic) by default";
 | |
| 	m->m_desc  = "Maximum number of words a gigabit (related topic) "
 | |
| 		"can have. Affects xml feeds, too.";
 | |
| 	m->m_cgi   = "mwpt";
 | |
| 	m->m_defOff= (char *)&cr.m_maxWordsPerTopic - x;
 | |
| 	m->m_off   = (char *)&si.m_maxWordsPerTopic - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "6";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "show images";
 | |
| 	m->m_desc  = "Should we return or show the thumbnail images in the "
 | |
| 		"search results?";
 | |
| 	m->m_cgi   = "showimages";
 | |
| 	m->m_off   = (char *)&si.m_showImages - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_flags = PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "use cache";
 | |
| 	m->m_desc  = "Use 0 if Gigablast should not read or write from "
 | |
| 		"any caches at any level.";
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_off   = (char *)&si.m_useCache - y;
 | |
| 	m->m_type  = TYPE_CHAR;
 | |
| 	m->m_cgi   = "usecache";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "read from cache";
 | |
| 	m->m_desc  = "Should we read search results from the cache? Set "
 | |
| 		"to false to fix dmoz bug.";
 | |
| 	m->m_cgi   = "rcache";
 | |
| 	m->m_off   = (char *)&si.m_rcache - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_flags = PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "write to cache";
 | |
| 	m->m_desc  = "Use 0 if Gigablast should not write to "
 | |
| 		"any caches at any level.";
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_off   = (char *)&si.m_wcache - y;
 | |
| 	m->m_type  = TYPE_CHAR;
 | |
| 	m->m_cgi   = "wcache";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max serp docid";
 | |
| 	m->m_desc  = "Start displaying results after this score/docid pair. "
 | |
| 		"Used by widget to append results to end when index is "
 | |
| 		"volatile.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_minSerpDocId - y;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_cgi   = "minserpdocid";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max serp score";
 | |
| 	m->m_desc  = "Start displaying results after this score/docid pair. "
 | |
| 		"Used by widget to append results to end when index is "
 | |
| 		"volatile.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_maxSerpScore - y;
 | |
| 	m->m_type  = TYPE_DOUBLE;
 | |
| 	m->m_cgi   = "maxserpscore";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "restrict search to this url";
 | |
| 	m->m_desc  = "Does a url: query.";
 | |
| 	m->m_off   = (char *)&si.m_url - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	//m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_cgi   = "url";
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "restrict search to pages that link to this url";
 | |
| 	m->m_desc  = "The url which the pages must link to.";
 | |
| 	m->m_off   = (char *)&si.m_link - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	//m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_cgi   = "link";
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "search for this phrase quoted";
 | |
| 	m->m_desc  = "The phrase which will be quoted in the query. From the "
 | |
| 		"advanced search page, adv.html.";
 | |
| 	m->m_off   = (char *)&si.m_quote1 - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	//m->m_size  = 512;
 | |
| 	m->m_cgi   = "quotea";
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "search for this second phrase quoted";
 | |
| 	m->m_desc  = "The phrase which will be quoted in the query. From the "
 | |
| 		"advanced search page, adv.html.";
 | |
| 	m->m_off   = (char *)&si.m_quote2 - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	//m->m_size  = 512;
 | |
| 	m->m_cgi   = "quoteb";
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "restrict results to this site";
 | |
| 	m->m_desc  = "Returned results will have URLs from this site, X.";
 | |
| 	m->m_off   = (char *)&si.m_site - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_cgi   = "site";
 | |
| 	m->m_size  = 1024; // MAX_SITE_LEN;
 | |
| 	m->m_sprpg = 1;
 | |
| 	m->m_sprpp = 1;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "restrict results to these sites";
 | |
| 	m->m_desc  = "Returned results will have URLs from these "
 | |
| 		"space-separated list of sites. Can have up to 200 sites. "
 | |
| 		"A site can include sub folders. This is allows you to build "
 | |
| 		"a <a href=\"/cts.html\">Custom Topic Search Engine</a>.";
 | |
| 	m->m_off   = (char *)&si.m_sites - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	//m->m_size  = 32*1024; // MAX_SITES_LEN;
 | |
| 	m->m_cgi   = "sites";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_sprpg = 1;
 | |
| 	m->m_sprpp = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "require these query terms";
 | |
| 	m->m_desc  = "Returned results will have all the words in X. "
 | |
| 		"From the advanced search page, adv.html.";
 | |
| 	m->m_off   = (char *)&si.m_plus - y;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_cgi   = "plus";
 | |
| 	//m->m_size  = 500;
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "avoid these query terms";
 | |
| 	m->m_desc  = "Returned results will NOT have any of the words in X. "
 | |
| 		"From the advanced search page, adv.html.";
 | |
| 	m->m_off   = (char *)&si.m_minus - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_cgi   = "minus";
 | |
| 	//m->m_size  = 500;
 | |
| 	m->m_sprpg = 0;
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "format of the returned search results";
 | |
| 	m->m_desc  = "Can be html, xml or json to get results back in that "
 | |
| 		"format.";
 | |
| 	m->m_def   = "html";
 | |
| 	m->m_off   = (char *)&si.m_formatStr - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_cgi   = "format";
 | |
| 	m->m_flags = PF_NOAPI; // already in the api, so don't repeat
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "family filter";
 | |
| 	m->m_desc  = "Remove objectionable results if this is enabled.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_familyFilter - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_cgi   = "ff";
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "highlight query terms in summaries";
 | |
| 	m->m_desc  = "Use to disable or enable "
 | |
| 		"highlighting of the query terms in the summaries.";
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_off   = (char *)&si.m_doQueryHighlighting - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "qh";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 8;
 | |
| 	m->m_sprpg = 1; // turn off for now
 | |
| 	m->m_sprpp = 1;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "cached page highlight query";
 | |
| 	m->m_desc  = "Highlight the terms in this query instead.";
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_off   = (char *)&si.m_highlightQuery - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_cgi   = "hq";
 | |
| 	//m->m_size  = 1000;
 | |
| 	m->m_sprpg = 0; // no need to propagate this one
 | |
| 	m->m_sprpp = 0;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "highlight event date in summaries.";
 | |
| 	m->m_desc  = "Can be 0 or 1 to respectively disable or enable "
 | |
| 		"highlighting of the event date terms in the summaries.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_doDateHighlighting - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "dh";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 8;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "limit search results to this ruleset";
 | |
| 	m->m_desc  = "limit search results to this ruleset";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_ruleset - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_cgi   = "ruleset";
 | |
| 	m->m_smin  = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "Query match offsets";
 | |
| 	m->m_desc  = "Return a list of the offsets of each query word "
 | |
| 		"actually matched in the document.  1 means byte offset, "
 | |
| 		"and 2 means word offset.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_queryMatchOffsets - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_cgi   = "qmo";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 2;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "boolean status";
 | |
| 	m->m_desc  = "Can be 0 or 1 or 2. 0 means the query is NOT boolean, "
 | |
| 		"1 means the query is boolean and 2 means to auto-detect.";
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_off   = (char *)&si.m_boolFlag - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_cgi   = "bq";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 2;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "meta tags to display";
 | |
| 	m->m_desc  = "A space-separated string of <b>meta tag names</b>. "
 | |
| 		"Do not forget to url-encode the spaces to +'s or %%20's. "
 | |
| 		"Gigablast will extract the contents of these specified meta "
 | |
| 		"tags out of the pages listed in the search results and "
 | |
| 		"display that content after each summary. i.e. "
 | |
| 		"<i>&dt=description</i> will display the meta description of "
 | |
| 		"each search result. <i>&dt=description:32+keywords:64</i> "
 | |
| 		"will display the meta description and meta keywords of each "
 | |
| 		"search result and limit the fields to 32 and 64 characters "
 | |
| 		"respectively. When used in an XML feed the <i><display "
 | |
| 		"name=\"meta_tag_name\">meta_tag_content</></i> XML "
 | |
| 		"tag will be used to convey each requested meta tag's "
 | |
| 		"content.";
 | |
| 	m->m_off   = (char *)&si.m_displayMetas - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_cgi   = "dt";
 | |
| 	//m->m_size  = 3000;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 	
 | |
| 	/*
 | |
| 	// . you can have multiple topics= parms in you query url...
 | |
| 	// . this is used to set the TopicGroups array in SearchInput
 | |
| 	m->m_title = "related topic parameters";
 | |
| 	m->m_desc  = 
 | |
| 		"X=<b>NUM+MAX+SCAN+MIN+MAXW+META+DEL+IDF+DEDUP</b>\n"
 | |
| 		"<br><br>\n"
 | |
| 		"<b>NUM</b> is how many <b>related topics</b> you want "
 | |
| 		"returned.\n"
 | |
| 		"<br><br>\n"
 | |
| 		"<b>MAX</b> is the maximum number of topics to generate "
 | |
| 		"and store in cache, so if TW is increased, but still below "
 | |
| 		"MT, it will result in a fast cache hit.\n"
 | |
| 		"<br><br>\n"
 | |
| 		"<b>SCAN</b> is how many documents to scan for related "
 | |
| 		"topics. If this is 30, for example, then Gigablast will "
 | |
| 		"scan the first 30 search results for related topics.\n"
 | |
| 		"<br><br>\n"
 | |
| 		"<b>MIN</b> is the minimum score of returned topics. Ranges "
 | |
| 		"from 0%% to over 100%%. 50%% is considered pretty good. "
 | |
| 		"BUG: This must be at least 1 to get any topics back.\n"
 | |
| 		"<br><br>\n"
 | |
| 		"<b>MAXW</b> is the maximum number of words per topic.\n"
 | |
| 		"<br><br>\n"
 | |
| 		"<b>META</b> is the meta tag name to which Gigablast will "
 | |
| 		"restrict the content used to generate the topics. Do not "
 | |
| 		"specify this field to restrict the content to the body of "
 | |
| 		"each document, that is the default.\n"
 | |
| 		"<br><br>\n"	
 | |
| 		"<b>DEL</b> is a single character delimeter which defines "
 | |
| 		"the topic candidates. All candidates must be separated from "
 | |
| 		"the other candidates with the delimeter. So <meta "
 | |
| 		"name=test content=\" cat dog ; pig rabbit horse\"> "
 | |
| 		"when using the ; as a delimeter would only have two topic "
 | |
| 		"candidates: \"cat dog\" and \"pig rabbit horse\". If no "
 | |
| 		"delimeter is provided, default funcationality is assumed.\n"
 | |
| 		"<br><br>\n"
 | |
| 		"<b>IDF</b> is 1, the default, if you want Gigablast to "
 | |
| 		"weight topic candidates by their idf, 0 otherwise."
 | |
| 		"<br><br>\n"
 | |
| 		"<b>DEDUP</b> is 1, the default, if the topics should be "
 | |
| 		"deduped. This involves removing topics that are substrings "
 | |
| 		"or superstrings of other higher-scoring topics."
 | |
| 		"<br><br>\n"
 | |
| 		"Example: topics=49+100+30+1+6+author+%%3B+0+0"
 | |
| 		"<br><br>\n"
 | |
| 		"The default values for those parameters with unspecifed "
 | |
| 		"defaults can be defined on the \"Search Controls\" page.  "
 | |
| 		"<br><br>\n"
 | |
| 		"XML feeds will contain the generated topics like: "
 | |
| 		"<topic><name><![CDATA[some topic]]><"
 | |
| 		"/name><score>13</score><from>"
 | |
| 		"metaTagName</from></topic>"
 | |
| 		"<br><br>\n"
 | |
| 		"Even though somewhat nonstandard, you can specify multiple "
 | |
| 		"<i>&topic=</i> parameters to get back multiple topic "
 | |
| 		"groups."
 | |
| 		"<br><br>\n"
 | |
| 		"Performance will decrease if you increase the MAX, SCAN or "
 | |
| 		"MAXW.";
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 512;
 | |
| 	m->m_cgi   = "topics";
 | |
| 	m->m_size  = 100;
 | |
| 	// MDW: NO NO NO... was causing a write breach!!! -- take this all out
 | |
| 	m->m_off   = -2; // bogus offset
 | |
| 	//m->m_off   = (char *)&si.m_topics - y;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "niceness";
 | |
| 	m->m_desc  = "Can be 0 or 1. 0 is usually a faster, high-priority "
 | |
| 		"query, 1 is a slower, lower-priority query.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_niceness - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_cgi   = "niceness";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 1;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "debug flag";
 | |
| 	m->m_desc  = "Is 1 to log debug information, 0 otherwise.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_debug - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "debug";
 | |
| 	//m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "return number of docs per topic";
 | |
| 	m->m_desc  = "Use 1 if you want Gigablast to return the number of "
 | |
| 		"documents in the search results that contained each topic "
 | |
| 		"(gigabit).";
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_off   = (char *)&si.m_returnDocIdCount - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "rdc";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "return docids per topic";
 | |
| 	m->m_desc  = "Use 1 if you want Gigablast to return the list of "
 | |
| 		"docIds from the search results that contained each topic "
 | |
| 		"(gigabit).";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_returnDocIds - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "rd";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "return popularity per topic";
 | |
| 	m->m_desc  = "Use 1 if you want Gigablast to return the popularity "
 | |
| 		"of each topic (gigabit).";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_returnPops - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "rp";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "compound list max size";
 | |
| 	//m->m_desc  = "Is the max size in bytes of the compound termlist. "
 | |
| 	//	"Each document id is 6 bytes.";
 | |
| 	//m->m_def   = "-1";
 | |
| 	//m->m_off   = (char *)&si.m_compoundListMaxSize - y;
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m->m_cgi   = "clms";
 | |
| 	//m->m_smin  = 0;
 | |
| 	//m->m_priv  = 1;
 | |
| 	//m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "debug gigabits flag";
 | |
| 	m->m_desc  = "Is 1 to log gigabits debug information, 0 otherwise.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_debugGigabits - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "debuggigabits";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "return docids only";
 | |
| 	m->m_desc  = "Is 1 to return only docids as query results.";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&si.m_docIdsOnly - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "dio";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "image url";
 | |
| 	m->m_desc  = "The url of an image to co-brand on the search "
 | |
| 		"results page.";
 | |
| 	m->m_off   = (char *)&si.m_imgUrl - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_def   = NULL;
 | |
| 	//m->m_size  = 512;
 | |
| 	m->m_cgi   = "iu";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "image link";
 | |
| 	m->m_desc  = "The hyperlink to use on the image to co-brand on "
 | |
| 		"the search results page.";
 | |
| 	m->m_off   = (char *)&si.m_imgLink - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_def   = NULL;
 | |
| 	//m->m_size  = 512;
 | |
| 	m->m_cgi   = "ix";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "image width";
 | |
| 	m->m_desc  = "The width of the image on the search results page.";
 | |
| 	m->m_off   = (char *)&si.m_imgWidth - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_cgi   = "iw";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_def   = "200";
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "image height";
 | |
| 	m->m_desc  = "The height of the image on the search results "
 | |
| 		"page.";
 | |
| 	m->m_off   = (char *)&si.m_imgHeight - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_cgi   = "ih";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_def   = "200";
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "password";
 | |
| 	// m->m_desc  = "The password.";
 | |
| 	// m->m_off   = (char *)&si.m_pwd - y;
 | |
| 	// m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	// m->m_cgi   = "pwd";
 | |
| 	// m->m_size  = 32;
 | |
| 	// m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	// m->m_page  = PAGE_RESULTS;
 | |
| 	// m->m_obj   = OBJ_SI;
 | |
| 	// m++;
 | |
| 
 | |
| 	m->m_title = "admin override";
 | |
| 	m->m_desc  = "admin override";
 | |
| 	m->m_off   = (char *)&si.m_isMasterAdmin - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cgi   = "admin";
 | |
| 	m->m_sprpg = 1; // propagate on GET request
 | |
|         m->m_sprpp = 1; // propagate on POST request
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "language";
 | |
| 	m->m_desc  = "Language code to restrict search. 0 = All. Uses "
 | |
| 		"Clusterdb to filter languages. This is being phased out "
 | |
| 		"please do not use much, use gblang instead.";
 | |
| 	m->m_off   = (char *)&si.m_languageCode - y;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 5+1;
 | |
| 	m->m_def   = "none";
 | |
| 	// our google gadget gets &lang=en passed to it from google, so
 | |
| 	// change this!!
 | |
| 	m->m_cgi   = "clang";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/* 
 | |
| 	   this should be a hash on the lang abbr line gblang:en
 | |
| 	m->m_title = "GB language";
 | |
| 	m->m_desc  = "Language code to restrict search. 0 = All. Uses "
 | |
| 		"the gblang: keyword to filter languages.";
 | |
| 	m->m_off   = (char *)&si.m_gblang - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "gblang";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// prepend to query
 | |
| 	m->m_title = "prepend";
 | |
| 	m->m_desc  = "prepend this to the supplied query followed by a |.";
 | |
| 	m->m_off   = (char *)&si.m_prepend - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_cgi   = "prepend";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "GB Country";
 | |
| 	m->m_desc  = "Country code to restrict search";
 | |
| 	m->m_off   = (char *)&si.m_gbcountry - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	//m->m_size  = 4+1;
 | |
| 	m->m_def   = NULL;
 | |
| 	//m->m_def   = "iso-8859-1";
 | |
| 	m->m_cgi   = "gbcountry";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "rerank ruleset";
 | |
| 	m->m_desc  = "Use this ruleset to rerank the search results. Will "
 | |
|  		"rerank at least the first X results specified with &n=X. "
 | |
| 		"And be sure to say &recycle=0 to recompute the quality "
 | |
| 		"of each page in the search results.";
 | |
| 	m->m_off   = (char *)&si.m_rerankRuleset - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_cgi   = "rerank";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "apply ruleset to roots";
 | |
| 	m->m_desc  = "Recompute the quality of the root urls of each "
 | |
| 		"search result in order to compute the quality of that "
 | |
| 		"search result, since it depends on its root quality. This "
 | |
| 		"can take a lot longer when enabled.";
 | |
| 	m->m_off   = (char *)&si.m_artr - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "artr";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "show banned pages";
 | |
| 	m->m_desc  = "show banned pages";
 | |
| 	m->m_off   = (char *)&si.m_showBanned - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "sb";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "allow punctuation in query phrases";
 | |
| 	m->m_desc  = "allow punctuation in query phrases";
 | |
| 	m->m_off   = (char *)&si.m_allowPunctInPhrase - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cgi   = "apip";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "use ad feed num";
 | |
| 	m->m_desc  = "use ad feed num";
 | |
| 	m->m_off   = (char *)&si.m_useAdFeedNum - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "uafn";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "do bot detection";
 | |
| 	m->m_desc  = "Passed in for raw feeds that want bot detection cgi "
 | |
| 		     "parameters passed back in the XML.";
 | |
| 	m->m_off   = (char *)&si.m_doBotDetection - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "bd";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "bot detection query";
 | |
| 	m->m_desc  = "Passed in for raw feeds that want bot detection cgi "
 | |
| 		     "parameters passed back in the XML. Use this variable "
 | |
| 		     "when an actual query against gigablast is not needed "
 | |
|                      "(i.e. - image/video/news searches).";
 | |
| 	m->m_off   = (char *)&si.m_botDetectionQuery - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	m->m_cgi   = "bdq";
 | |
|         m->m_def   = NULL;
 | |
| 	m->m_size  = MAX_QUERY_LEN;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "queryCharset";
 | |
| 	m->m_desc  = "Charset in which the query is encoded";
 | |
| 	m->m_off   = (char *)&si.m_queryCharset - y;
 | |
| 	m->m_type  = TYPE_CHARPTR;//STRING;
 | |
| 	//m->m_size  = 32+1;
 | |
| 	m->m_def   = "utf-8";
 | |
| 	//m->m_def   = "iso-8859-1";
 | |
| 	m->m_cgi   = "qcs";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	// buzz
 | |
| 	m->m_title = "display inlinks";
 | |
| 	m->m_desc  = "Display all inlinks of each result.";
 | |
| 	m->m_off   = (char *)&si.m_displayInlinks - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "inlinks";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	// buzz
 | |
| 	m->m_title = "display outlinks";
 | |
| 	m->m_desc  = "Display all outlinks of each result. outlinks=1 "
 | |
| 		"displays only external outlinks. outlinks=2 displays "
 | |
| 		"external and internal outlinks.";
 | |
| 	m->m_off   = (char *)&si.m_displayOutlinks - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "outlinks";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	// buzz
 | |
| 	m->m_title = "display term frequencies";
 | |
| 	m->m_desc  = "Display Terms and Frequencies in results.";
 | |
| 	m->m_off   = (char *)&si.m_displayTermFreqs - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "tf";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	// buzz
 | |
| 	m->m_title = "spider results";
 | |
| 	m->m_desc  = "Results of this query will be forced into the spider "
 | |
| 		"queue for reindexing.";
 | |
| 	m->m_off   = (char *)&si.m_spiderResults - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "spiderresults";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	// buzz
 | |
| 	m->m_title = "spider result roots";
 | |
| 	m->m_desc  = "Root urls of the results of this query will be forced "
 | |
| 		"into the spider queue for reindexing.";
 | |
| 	m->m_off   = (char *)&si.m_spiderResultRoots - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "spiderresultroots";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	// buzz
 | |
| 	m->m_title = "just mark clusterlevels";
 | |
| 	m->m_desc  = "Check for deduping, but just mark the cluster levels "
 | |
| 		"and the doc deduped against, don't remove the result.";
 | |
| 	m->m_off   = (char *)&si.m_justMarkClusterLevels - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "jmcl";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "include cached copy of page";
 | |
| 	m->m_desc  = "Will cause a cached copy of content to be returned "
 | |
| 		"instead of summary.";
 | |
| 	m->m_off   = (char *)&si.m_includeCachedCopy - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "icc";
 | |
| 	m->m_page  = PAGE_RESULTS;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "get section voting info in json";
 | |
| 	// m->m_desc  = "Will cause section voting info to be returned.";
 | |
| 	// m->m_off   = (char *)&si.m_getSectionVotingInfo - y;
 | |
| 	// m->m_type  = TYPE_CHAR;
 | |
| 	// m->m_def   = "0";
 | |
| 	// m->m_cgi   = "sectionvotes";
 | |
| 	// m->m_page  = PAGE_RESULTS;
 | |
| 	// m->m_obj   = OBJ_SI;
 | |
| 	// m->m_flags = PF_API;
 | |
| 	// m++;
 | |
| 
 | |
| 	//////////////
 | |
| 	// END /search
 | |
| 	//////////////
 | |
| 
 | |
| 
 | |
| 	//////////
 | |
| 	// PAGE GET (cached web pages)
 | |
| 	///////////
 | |
| 	m->m_title = "docId";
 | |
| 	m->m_desc  = "The docid of the cached page to view.";
 | |
| 	m->m_off   = (char *)&gr.m_docId - (char *)&gr;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_page  = PAGE_GET;
 | |
| 	m->m_obj   = OBJ_GBREQUEST; // generic request class
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cgi   = "d";
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "url";
 | |
| 	m->m_desc  = "Instead of specifying a docid, you can get the "
 | |
| 		"cached webpage by url as well.";
 | |
| 	m->m_off   = (char *)&gr.m_url - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHARPTR; // reference into the HttpRequest
 | |
| 	m->m_page  = PAGE_GET;
 | |
| 	m->m_obj   = OBJ_GBREQUEST; // generic request class
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_cgi   = "url";
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Get the cached page from this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_GET;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHARPTR;//SAFEBUF;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_REQUIRED | PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "strip";
 | |
| 	m->m_desc  = "Is 1 or 2 two strip various tags from the "
 | |
| 		"cached content.";
 | |
| 	m->m_off   = (char *)&gr.m_strip - (char *)&gr;
 | |
| 	m->m_page  = PAGE_GET;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_cgi   = "strip";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "include header";
 | |
| 	m->m_desc  = "Is 1 to include the Gigablast header at the top of "
 | |
| 		"the cached page, 0 to exclude the header.";
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_GET;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_cgi   = "ih";
 | |
| 	m->m_off   = (char *)&gr.m_includeHeader - (char *)&gr;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "query";
 | |
| 	m->m_desc  = "Highlight this query in the page.";
 | |
| 	m->m_def   = "";
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_page  = PAGE_GET;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_cgi   = "q";
 | |
| 	m->m_off   = (char *)&gr.m_query - (char *)&gr;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	// for /get
 | |
| 	m->m_title = "query highlighting query";
 | |
| 	m->m_desc  = "Is 1 to highlight query terms in the cached page.";
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "qh";
 | |
| 	m->m_off   = (char *)&si.m_queryHighlighting - y;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// for /addurl
 | |
| 	/*
 | |
| 	m->m_title = "url to add";
 | |
| 	m->m_desc  = "Used by add url page.";
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_cgi   = "u";
 | |
| 	m->m_off   = (char *)&si.m_url2 - y;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// Process.cpp calls Msg28::massConfig with &haspower=[0|1] to 
 | |
| 	// indicate power loss or coming back on from a power loss
 | |
| 	m->m_title = "power on status notification";
 | |
| 	m->m_desc  = "Indicates power is back on.";
 | |
| 	m->m_cgi   = "poweron";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandPowerOnNotice;
 | |
| 	m->m_cast  = 0;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "power off status notification";
 | |
| 	m->m_desc  = "Indicates power is off.";
 | |
| 	m->m_cgi   = "poweroff";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandPowerOffNotice;
 | |
| 	m->m_cast  = 0;
 | |
| 	m->m_page  = PAGE_NONE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	//////////////
 | |
| 	// END PAGE_GET
 | |
| 	//////////////
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// MASTER CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	m->m_title = "spidering enabled";
 | |
| 	m->m_desc  = "Controls all spidering for all collections";
 | |
| 	m->m_cgi   = "se";
 | |
| 	m->m_off   = (char *)&g_conf.m_spideringEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	//m->m_cast  = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "injections enabled";
 | |
| 	m->m_desc  = "Controls injecting for all collections";
 | |
| 	m->m_cgi   = "injen";
 | |
| 	m->m_off   = (char *)&g_conf.m_injectionsEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "querying enabled";
 | |
| 	m->m_desc  = "Controls querying for all collections";
 | |
| 	m->m_cgi   = "qryen";
 | |
| 	m->m_off   = (char *)&g_conf.m_queryingEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "return results even if a shard is down";
 | |
| 	m->m_desc  = "If you turn this off then Gigablast will return "
 | |
| 		"an error message if a shard was down and did not return "
 | |
| 		"results for a query. The XML and JSON feed let's you know "
 | |
| 		"when a shard is down and will give you the results back "
 | |
| 		"any way, but if you would rather have just and error message "
 | |
| 		"and no results, then set then set this to 'NO'.";
 | |
| 	m->m_cgi   = "rra";
 | |
| 	m->m_off   = (char *)&g_conf.m_returnResultsAnyway - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max mem";
 | |
| 	m->m_desc  = "Mem available to this process. May be exceeded due "
 | |
| 		"to fragmentation.";
 | |
| 	m->m_cgi   = "maxmem";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxMem - g;
 | |
| 	m->m_def   = "8000000000";
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_page  = PAGE_MASTER; // PAGE_NONE;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	//m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	
 | |
| 	m->m_title = "max total spiders";
 | |
| 	m->m_desc  = "What is the maximum number of web "
 | |
| 		"pages the spider is allowed to download "
 | |
| 		"simultaneously for ALL collections PER HOST? Caution: "
 | |
| 		"raising this too high could result in some Out of Memory "
 | |
| 		"(OOM) errors. The hard limit is currently 300. Each "
 | |
| 		"collection has its own limit in the <i>spider controls</i> "
 | |
| 		"that you may have to increase as well.";
 | |
| 	m->m_cgi   = "mtsp";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxTotalSpiders - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "web spidering enabled";
 | |
| 	m->m_desc  = "Spiders events on web";
 | |
| 	m->m_cgi   = "wse";
 | |
| 	m->m_off   = (char *)&g_conf.m_webSpideringEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "add url enabled";
 | |
| 	m->m_desc  = "Can people use the add url interface to add urls "
 | |
| 		"to the index?";
 | |
| 	m->m_cgi   = "ae";
 | |
| 	m->m_off   = (char *)&g_conf.m_addUrlEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	//m->m_cast  = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use collection passwords";
 | |
| 	m->m_desc  = "Should collections have individual password settings "
 | |
| 		"so different users can administrer different collections? "
 | |
| 		"If not the only the master passwords and IPs will be able "
 | |
| 		"to administer any collection.";
 | |
| 	m->m_cgi   = "ucp";
 | |
| 	m->m_off   = (char *)&g_conf.m_useCollectionPasswords - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "allow cloud users";
 | |
| 	m->m_desc  = "Can guest users create and administer "
 | |
| 		"a collection? Limit: 1 "
 | |
| 		"collection per IP address. This is mainly for doing "
 | |
| 		"demos on the gigablast.com domain.";
 | |
| 	m->m_cgi   = "acu";
 | |
| 	m->m_off   = (char *)&g_conf.m_allowCloudUsers - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "auto save frequency";
 | |
| 	m->m_desc  = "Save data in memory to disk after this many minutes "
 | |
| 		"have passed without the data having been dumped or saved "
 | |
| 		"to disk. Use 0 to disable.";
 | |
| 	m->m_cgi   = "asf";
 | |
| 	m->m_off   = (char *)&g_conf.m_autoSaveFrequency - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5";
 | |
| 	m->m_units = "mins";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max http sockets";
 | |
| 	m->m_desc  = "Maximum sockets available to serve incoming HTTP "
 | |
| 		"requests. Too many outstanding requests will increase "
 | |
| 		"query latency. Excess requests will simply have their "
 | |
| 		"sockets closed.";
 | |
| 	m->m_cgi   = "ms";
 | |
| 	m->m_off   = (char *)&g_conf.m_httpMaxSockets - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	// up this some, am seeing sockets closed because of using gb
 | |
| 	// as a cache...
 | |
| 	m->m_def   = "300";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max https sockets";
 | |
| 	m->m_desc  = "Maximum sockets available to serve incoming HTTPS "
 | |
| 		"requests. Like max http sockets, but for secure sockets.";
 | |
| 	m->m_cgi   = "mss";
 | |
| 	m->m_off   = (char *)&g_conf.m_httpsMaxSockets - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider user agent";
 | |
| 	m->m_desc  = "Identification seen by web servers when "
 | |
| 		"the Gigablast spider downloads their web pages. "
 | |
| 		"It is polite to insert a contact email address here so "
 | |
| 		"webmasters that experience problems from the Gigablast "
 | |
| 		"spider have somewhere to vent.";
 | |
| 	m->m_cgi   = "sua";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderUserAgent - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = USERAGENTMAXSIZE;
 | |
| 	m->m_def   = "GigablastOpenSource/1.0";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "use temporary cluster";
 | |
|         m->m_desc  = "Used by proxy to point to a temporary cluster while the "
 | |
| 		"original cluster is updated with a new binary. The "
 | |
| 		"temporary cluster is the same as the original cluster but "
 | |
| 		"the ports are all incremented by one from what is in "
 | |
| 		"the hosts.conf. This should ONLY be used for the proxy.";
 | |
|         m->m_cgi   = "aotp";
 | |
|         m->m_off   = (char *)&g_conf.m_useTmpCluster - g;
 | |
|         m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
|         m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "url injection enabled";
 | |
| 	m->m_desc  = "If enabled you can directly inject URLs into the index.";
 | |
| 	m->m_cgi   = "ie";
 | |
| 	m->m_off   = (char *)&g_conf.m_injectionEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "init QA tests";
 | |
| 	m->m_desc  = "If initiated gb performs some integrity tests "
 | |
| 		"to ensure injecting, spidering and searching works "
 | |
| 		"properly. Uses ./test/ subdirectory. Injects "
 | |
| 		"urls in ./test/inject.txt. Spiders urls "
 | |
| 		"in ./test/spider.txt. "
 | |
| 		"Each of those two files is essentially a simple format of "
 | |
| 		"a url followed by the http reply received from the server "
 | |
| 		"for that url. "
 | |
| 		// TODO: generate these files
 | |
| 		;
 | |
| 	m->m_cgi   = "qasptei";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandSpiderTestInit;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "init parser test run";
 | |
| 	m->m_desc  = "If enabled gb injects the urls in the "
 | |
| 		"./test-parser/urls.txt "
 | |
| 		"file and outputs ./test-parser/qa.html";
 | |
| 	m->m_cgi   = "qaptei";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandParserTestInit;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "init spider test run";
 | |
| 	m->m_desc  = "If enabled gb injects the urls in "
 | |
| 		"./test-spider/spider.txt "
 | |
| 		"and spiders links.";
 | |
| 	m->m_cgi   = "qasptei";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandSpiderTestInit;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "continue spider test run";
 | |
| 	m->m_desc  = "Resumes the test.";
 | |
| 	m->m_cgi   = "qaspter";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandSpiderTestCont;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "do docid range splitting";
 | |
| 	m->m_desc  = "Split msg39 docids into ranges to save mem?";
 | |
| 	m->m_cgi   = "ddrs";
 | |
| 	m->m_off   = (char *)&g_conf.m_doDocIdRangeSplitting - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "qa search test enabled";
 | |
| 	m->m_desc  = "If enabled gb does the search queries in "
 | |
| 		"./test-search/queries.txt and compares to the last run and "
 | |
| 		"outputs the diffs for inspection and validation.";
 | |
| 	m->m_cgi   = "qasste";
 | |
| 	m->m_off   = (char *)&g_conf.m_testSearchEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	//m->m_cast  = 0;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "just save";
 | |
| 	m->m_desc  = "Copies the data in memory to disk for just this host. "
 | |
| 		"Does Not exit.";
 | |
| 	m->m_cgi   = "js";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandJustSave;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_cast  = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "save";
 | |
| 	m->m_desc  = "Saves in-memory data for ALL hosts. Does Not exit.";
 | |
| 	m->m_cgi   = "js";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandJustSave;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "all spiders on";
 | |
| 	m->m_desc  = "Enable spidering on all hosts";
 | |
| 	m->m_cgi   = "ase";
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_off   = (char *)&g_conf.m_spideringEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL2; // no yes or no, just a link
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "all spiders off";
 | |
| 	m->m_desc  = "Disable spidering on all hosts";
 | |
| 	m->m_cgi   = "ase";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&g_conf.m_spideringEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL2; // no yes or no, just a link
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "save & exit";
 | |
| 	m->m_desc  = "Copies the data in memory to disk for just this host "
 | |
| 		"and then shuts down the gb process.";
 | |
| 	m->m_cgi   = "save";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandSaveAndExit;
 | |
| 	m->m_cast  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "urgent save & exit";
 | |
| 	m->m_desc  = "Copies the data in memory to disk for just this host "
 | |
| 		"and then shuts down the gb process.";
 | |
| 	m->m_cgi   = "usave";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandUrgentSaveAndExit;
 | |
| 	m->m_cast  = 0;
 | |
| 	m->m_priv  = 4;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "save & exit";
 | |
| 	m->m_desc  = "Saves the data and exits for ALL hosts.";
 | |
| 	m->m_cgi   = "save";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandSaveAndExit;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebalance shards";
 | |
| 	m->m_desc  = "Tell all hosts to scan all records in all databases, "
 | |
| 		"and move "
 | |
| 		"records to the shard they belong to. You only need to run "
 | |
| 		"this if Gigablast tells you to, when you are changing "
 | |
| 		"hosts.conf to add or remove more nodes/hosts.";
 | |
| 	m->m_cgi   = "rebalance";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandRebalance;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dump to disk";
 | |
| 	m->m_desc  = "Flushes all records in memory to the disk on all hosts.";
 | |
| 	m->m_cgi   = "dump";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandDiskDump;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "force reclaim";
 | |
| 	m->m_desc  = "Force reclaim of doledb mem.";
 | |
| 	m->m_cgi   = "forceit";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandForceIt;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "tight merge posdb";
 | |
| 	m->m_desc  = "Merges all outstanding posdb (index) files.";
 | |
| 	m->m_cgi   = "pmerge";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandMergePosdb;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "tight merge sectiondb";
 | |
| 	//m->m_desc  = "Merges all outstanding sectiondb files.";
 | |
| 	//m->m_cgi   = "smerge";
 | |
| 	//m->m_type  = TYPE_CMD;
 | |
| 	//m->m_func  = CommandMergeSectiondb;
 | |
| 	//m->m_cast  = 1;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "tight merge titledb";
 | |
| 	m->m_desc  = "Merges all outstanding titledb (web page cache) files.";
 | |
| 	m->m_cgi   = "tmerge";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandMergeTitledb;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "tight merge spiderdb";
 | |
| 	m->m_desc  = "Merges all outstanding spiderdb files.";
 | |
| 	m->m_cgi   = "spmerge";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandMergeSpiderdb;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "clear kernel error message";
 | |
| 	m->m_desc  = "Clears the kernel error message. You must do this "
 | |
| 		"to stop getting email alerts for a kernel ring buffer "
 | |
| 		"error alert.";
 | |
| 	m->m_cgi   = "clrkrnerr";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandClearKernelError;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "disk page cache off";
 | |
| 	m->m_desc  = "Disable all disk page caches to save mem for "
 | |
| 		"tmp cluster. Run "
 | |
| 		"gb cacheoff to do for all hosts.";
 | |
| 	m->m_cgi   = "dpco";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandDiskPageCacheOff;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "http server enabled";
 | |
| 	//m->m_desc  = "Disable this if you do not want anyone hitting your "
 | |
| 	//	"http server. Admin and local IPs are still permitted, "
 | |
| 	//	"however.";
 | |
| 	//m->m_cgi   = "hse";
 | |
| 	//m->m_off   = (char *)&g_conf.m_httpServerEnabled - g;
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m->m_def   = "1";
 | |
| 	//m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "ad feed enabled";
 | |
| 	m->m_desc  = "Serves ads unless pure=1 is in cgi parms.";
 | |
| 	m->m_cgi   = "afe";
 | |
| 	m->m_off   = (char *)&g_conf.m_adFeedEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_scgi  = "ads";
 | |
| 	m->m_soff  = (char *)&si.m_adFeedEnabled - y;
 | |
| 	m->m_sparm = 1;	
 | |
| 	m->m_priv  = 2;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "do stripe balancing";
 | |
| 	m->m_desc  = "Stripe #n contains twin #n from each group. Doing "
 | |
| 		"stripe balancing helps prevent too many query requests "
 | |
| 		"coming into one host. This parm is only for the proxy. "
 | |
| 		"Stripe balancing is done by default unless the parm is "
 | |
| 		"disabled on the proxy in which case it appends a "
 | |
| 		"&dsb=0 to the query url it sends to the host. The proxy "
 | |
| 		"alternates to which host it forwards the incoming query "
 | |
| 		"based on the stripe. It takes the number of query terms in "
 | |
| 		"the query into account to make a more even balance.";
 | |
| 	m->m_cgi   = "dsb";
 | |
| 	m->m_off   = (char *)&g_conf.m_doStripeBalancing - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	//m->m_scgi  = "dsb";
 | |
| 	//m->m_soff  = (char *)&si.m_doStripeBalancing - y;
 | |
| 	//m->m_sparm = 1;	
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "is live cluster";
 | |
| 	m->m_desc  = "Is this cluster part of a live production cluster? "
 | |
| 		"If this is true we make sure that elvtune is being "
 | |
| 		"set properly for best performance, otherwise, gb will "
 | |
| 		"not startup.";
 | |
| 	m->m_cgi   = "live";
 | |
| 	m->m_off   = (char *)&g_conf.m_isLive - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "is BuzzLogic";
 | |
| 	m->m_desc  = "Is this a BuzzLogic cluster?";
 | |
| 	m->m_cgi   = "isbuzz";
 | |
| 	m->m_off   = (char *)&g_conf.m_isBuzzLogic - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// we use wikipedia cluster for quick categorization
 | |
| 	m->m_title = "is wikipedia cluster";
 | |
| 	m->m_desc  = "Is this cluster just used for indexing wikipedia pages?";
 | |
| 	m->m_cgi   = "iswiki";
 | |
| 	m->m_off   = (char *)&g_conf.m_isWikipedia - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
|         m->m_title = "ask for gzipped docs when downloading";
 | |
|         m->m_desc  = "If this is true, gb will send Accept-Encoding: gzip "
 | |
| 		"to web servers when doing http downloads. It does have "
 | |
| 		"a tendency to cause out-of-memory errors when you enable "
 | |
| 		"this, so until that is fixed better, it's probably a good "
 | |
| 		"idea to leave this disabled.";
 | |
|         m->m_cgi   = "afgdwd";
 | |
|         m->m_off   = (char *)&g_conf.m_gzipDownloads - g;
 | |
|         m->m_type  = TYPE_BOOL;
 | |
| 	// keep this default off because it seems some pages are huge
 | |
| 	// uncomressed causing OOM errors and possibly corrupting stuff?
 | |
| 	// not sure exactly, but i don't like going OOM. so maybe until
 | |
| 	// that is fixed leave this off.
 | |
|         m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
|         m++;
 | |
| 	
 | |
| 	m->m_title = "search results cache max age";
 | |
| 	m->m_desc = "How many seconds should we cache a search results "
 | |
| 		"page for?";
 | |
| 	m->m_cgi  = "srcma";
 | |
| 	m->m_off  = (char *)&g_conf.m_searchResultsMaxCacheAge - g;
 | |
| 	m->m_def  = "10800"; // 3 hrs
 | |
| 	m->m_type = TYPE_LONG;
 | |
| 	m->m_units = "seconds";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "autoban IPs which violate the queries per day quotas";
 | |
| 	m->m_desc  = "Keep track of ips which do queries, disallow "
 | |
| 		"non-customers from hitting us too hard.";
 | |
| 	m->m_cgi   = "ab";
 | |
| 	m->m_off   = (char *)&g_conf.m_doAutoBan - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	if ( g_isYippy ) {
 | |
| 	m->m_title = "Max outstanding search requests out for yippy";
 | |
| 	m->m_desc  = "Max outstanding search requests out for yippy";
 | |
| 	m->m_cgi   = "ymo";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxYippyOut - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "150";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 	}
 | |
| 
 | |
| 	m->m_title = "free queries per day ";
 | |
| 	m->m_desc  = "Non-customers get this many queries per day before"
 | |
| 		"being autobanned";
 | |
| 	m->m_cgi   = "nfqpd";
 | |
| 	m->m_off   = (char *)&g_conf.m_numFreeQueriesPerDay - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1024";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "free queries per minute ";
 | |
| 	m->m_desc  = "Non-customers get this many queries per minute before"
 | |
| 		"being autobanned";
 | |
| 	m->m_cgi   = "nfqpm";
 | |
| 	m->m_off   = (char *)&g_conf.m_numFreeQueriesPerMinute - g;
 | |
| 	m->m_type  = TYPE_CHAR;
 | |
| 	m->m_def   = "30";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max heartbeat delay in milliseconds";
 | |
| 	m->m_desc  = "If a heartbeat is delayed this many milliseconds "
 | |
| 		"dump a core so we can see where the CPU was. "
 | |
| 		"Logs 'db: missed heartbeat by %" INT64 " ms'. "
 | |
| 		"Use 0 or less to disable.";
 | |
| 	m->m_cgi   = "mhdms";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxHeartbeatDelay - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_CLONE; // PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max delay before logging a callback or handler";
 | |
| 	m->m_desc  = "If a call to a message callback or message handler "
 | |
| 		"in the udp server takes more than this many milliseconds, "
 | |
| 		"then log it. "
 | |
| 		"Logs 'udp: Took %" INT64 " ms to call callback for msgType="
 | |
| 		"0x%hhx niceness=%" INT32 "'. "
 | |
| 		"Use -1 or less to disable the logging.";
 | |
| 	m->m_cgi   = "mdch";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxCallbackDelay - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "sendmail IP";
 | |
| 	m->m_desc  = "We send crawlbot notification emails to this sendmail "
 | |
| 		"server which forwards them to the specified email address.";
 | |
| 		m->m_cgi   = "smip";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendmailIp - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "10.5.54.47";
 | |
| 	m->m_size  = MAX_MX_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send email alerts";
 | |
| 	m->m_desc  = "Sends emails to admin if a host goes down.";
 | |
| 	m->m_cgi   = "sea";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlerts - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delay non critical email alerts";
 | |
| 	m->m_desc  = "Do not send email alerts about dead hosts to "
 | |
| 		"anyone except sysadmin@gigablast.com between the times "
 | |
| 		"given below unless all the twins of the dead host are "
 | |
| 		"also dead. Instead, wait till after if the host "
 | |
| 		"is still dead. ";
 | |
| 	m->m_cgi   = "dnca";
 | |
| 	m->m_off   = (char *)&g_conf.m_delayNonCriticalEmailAlerts - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "send email alerts to matt at tmobile 450-3518";
 | |
| 	//m->m_desc  = "Sends to cellphone.";
 | |
| 	//m->m_cgi   = "seatmt";
 | |
| 	//m->m_off   = (char *)&g_conf.m_sendEmailAlertsToMattTmobile - g;
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m->m_def   = "1";
 | |
| 	//m->m_priv  = 2;
 | |
| 	//m->m_group = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	//m->m_title = "send email alerts to matt at alltel 362-6809";
 | |
| 	/*
 | |
| 	m->m_title = "send email alerts to matt at alltel 450-3518";
 | |
| 	m->m_desc  = "Sends to cellphone.";
 | |
| 	m->m_cgi   = "seatmv";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToMattAlltell - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send email alerts to javier";
 | |
| 	m->m_desc  = "Sends to cellphone.";
 | |
| 	m->m_cgi   = "seatj";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToJavier - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| // 	m->m_title = "send email alerts to melissa";
 | |
| // 	m->m_desc  = "Sends to cell phone.";
 | |
| // 	m->m_cgi   = "seatme";
 | |
| // 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToMelissa - g;
 | |
| // 	m->m_type  = TYPE_BOOL;
 | |
| // 	m->m_def   = "0";
 | |
| // 	m->m_priv  = 2;
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "send email alerts to partap";
 | |
| 	m->m_desc  = "Sends to cell phone.";
 | |
| 	m->m_cgi   = "seatp";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToPartap - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| // 	m->m_title = "send email alerts to cinco";
 | |
| // 	m->m_desc  = "Sends to cell phone.";
 | |
| // 	m->m_cgi   = "seatc";
 | |
| // 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToCinco - g;
 | |
| // 	m->m_type  = TYPE_BOOL;
 | |
| // 	m->m_def   = "0";
 | |
| // 	m->m_priv  = 2;
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| 
 | |
| 	/*	m->m_title = "maximum hops from parent page";
 | |
| 	m->m_desc  = "Only index pages that are within a particular number "
 | |
| 		"of hops from the parent page given in Page Add Url. -1 means "
 | |
| 		"that max hops is infinite.";
 | |
| 	m->m_cgi   = "mnh";
 | |
| 	m->m_off   = (char *)&cr.m_maxNumHops - x;
 | |
| 	m->m_type  = TYPE_CHAR2;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;*/
 | |
| 
 | |
| 	m->m_title = "cluster name";
 | |
| 	m->m_desc  = "Email alerts will include the cluster name";
 | |
| 	m->m_cgi   = "cn";
 | |
| 	m->m_off   = (char *)&g_conf.m_clusterName - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 32;
 | |
| 	m->m_def   = "unspecified";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 
 | |
| 	m->m_title = "spider round start time";
 | |
| 	m->m_desc  = "When the next spider round starts. If you force this to "
 | |
| 		"zero it sets it to the current time. That way you can "
 | |
| 		"respider all the urls that were already spidered, and urls "
 | |
| 		"that were not yet spidered in the round will still be "
 | |
| 		"spidered.";
 | |
| 	m->m_cgi   = "spiderRoundStart";
 | |
| 	m->m_size  = 0;
 | |
| 	m->m_off   = (char *)&cr.m_spiderRoundStartTime - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_HIDDEN | PF_REBUILDURLFILTERS ;
 | |
| 	m++;
 | |
| 
 | |
| 	// DIFFBOT:
 | |
| 	// this http parm actually ads the "forceround" parm to the parmlist
 | |
| 	// below with the appropriate args.
 | |
| 	m->m_title = "manually restart a spider round";
 | |
| 	m->m_desc  = "Updates round number and resets local processed "
 | |
| 		"and crawled counts to 0.";
 | |
| 	m->m_cgi   = "roundStart";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = NULL;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m++;
 | |
| 
 | |
| 	// DIFFBOT:
 | |
| 	// . this is sent to each shard by issuing a "&roundStart=1" cmd
 | |
| 	// . similar to the "addcoll" cmd we add args to it and make it
 | |
| 	//   the "forceround" cmd parm and add THAT to the parmlist.
 | |
| 	//   so "roundStart=1" is really an alias for us.
 | |
| 	m->m_title = "manually restart a spider round on shard";
 | |
| 	m->m_desc  = "Updates round number and resets local processed "
 | |
| 		"and crawled counts to 0.";
 | |
| 	m->m_cgi   = "forceround";
 | |
| 	//m->m_off   = (char *)&cr.m_spiderRoundStartTime - x;
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandForceNextSpiderRound;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_HIDDEN | PF_REBUILDURLFILTERS ;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider round num";
 | |
| 	m->m_desc  = "The spider round number.";
 | |
| 	m->m_cgi   = "spiderRoundNum";
 | |
| 	m->m_off   = (char *)&cr.m_spiderRoundNum - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_HIDDEN ;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send email alerts to sysadmin";
 | |
| 	m->m_desc  = "Sends to sysadmin@gigablast.com.";
 | |
| 	m->m_cgi   = "seatsa";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToSysadmin - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "send email alerts to zak";
 | |
| 	m->m_desc  = "Sends to zak@gigablast.com.";
 | |
| 	m->m_cgi   = "seatz";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToZak - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send email alerts to sabino";
 | |
| 	m->m_desc  = "Sends to cell phone.";
 | |
| 	m->m_cgi   = "seatms";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToSabino - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "dead host timeout";
 | |
| 	m->m_desc  = "Consider a host in the Gigablast network to be dead if "
 | |
| 		"it does not respond to successive pings for this number of "
 | |
| 		"seconds. Gigablast does not send requests to dead hosts. "
 | |
| 		"Outstanding requests may be re-routed to a twin.";
 | |
| 	m->m_cgi   = "dht";
 | |
| 	m->m_off   = (char *)&g_conf.m_deadHostTimeout - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "4000";
 | |
| 	m->m_units = "milliseconds";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send email timeout";
 | |
| 	m->m_desc  = "Send an email after a host has not responded to "
 | |
| 		"successive pings for this many milliseconds.";
 | |
| 	m->m_cgi   = "set";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailTimeout - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "62000";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_units = "milliseconds";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "ping spacer";
 | |
| 	m->m_desc  = "Wait this many milliseconds before pinging the next "
 | |
| 		"host. Each host pings all other hosts in the network.";
 | |
| 	m->m_cgi   = "ps";
 | |
| 	m->m_off   = (char *)&g_conf.m_pingSpacer - g;
 | |
| 	m->m_min   = 50; // i've seen values of 0 hammer the cpu
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_units = "milliseconds";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "max query time";
 | |
| 	//m->m_desc  = "When computing the average query latency "
 | |
| 	//	"truncate query latency times to this so that "
 | |
| 	//	"a single insanely int32_t query latency time does "
 | |
| 	//	"not trigger the alarm. This is in seconds.";
 | |
| 	//m->m_cgi   = "mqlr";
 | |
| 	//m->m_off   = (char *)&g_conf.m_maxQueryTime - g;
 | |
| 	//m->m_type  = TYPE_FLOAT;
 | |
| 	//m->m_def   = "30.0";
 | |
| 	//m->m_priv  = 2;
 | |
| 	//m->m_group = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "query success rate threshold";
 | |
| 	m->m_desc  = "Send email alerts when query success rate goes below "
 | |
| 		"this threshold. (percent rate between 0.0 and 1.0)";
 | |
| 	m->m_cgi   = "qsrt";
 | |
| 	m->m_off   = (char *)&g_conf.m_querySuccessThreshold - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.850000";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "average query latency threshold";
 | |
| 	m->m_desc  = "Send email alerts when average query latency goes above "
 | |
| 		"this threshold. (in seconds)";
 | |
| 	m->m_cgi   = "aqpst";
 | |
| 	m->m_off   = (char *)&g_conf.m_avgQueryTimeThreshold - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	// a titlerec fetch times out after 2 seconds and is re-routed
 | |
| 	m->m_def   = "2.000000";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_units = "seconds";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of query times in average";
 | |
| 	m->m_desc  = "Record this number of query times before calculating "
 | |
| 		"average query latency.";
 | |
| 	m->m_cgi   = "nqt";
 | |
| 	m->m_off   = (char *)&g_conf.m_numQueryTimes - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "300";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "max corrupt index lists";
 | |
| 	m->m_desc  = "If we reach this many corrupt index lists, send "
 | |
| 		"an admin email.  Set to -1 to disable.";
 | |
| 	m->m_cgi   = "mcil";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxCorruptLists - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max hard drive temperature";
 | |
| 	m->m_desc  = "At what temperature in Celsius should we send "
 | |
| 		"an email alert if a hard drive reaches it?";
 | |
| 	m->m_cgi   = "mhdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxHardDriveTemp - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "45";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "delay emails after";
 | |
| 	m->m_desc  = "If delay non critical email alerts is on, don't send "
 | |
| 		"emails after this time. Time is hh:mm. Time is take from "
 | |
| 		"host #0's system clock in UTC.";
 | |
| 	m->m_cgi   = "dea";
 | |
| 	m->m_off   = (char *)&g_conf.m_delayEmailsAfter - g;
 | |
| 	m->m_type  = TYPE_TIME; // time format -- very special
 | |
| 	m->m_def   = "00:00";
 | |
| 	m->m_priv  = 2;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delay emails before";
 | |
| 	m->m_desc  = "If delay non critical email alerts is on, don't send "
 | |
| 		"emails before this time. Time is hh:mm Time is take from "
 | |
| 		"host #0's system clock in UTC.";
 | |
| 	m->m_cgi   = "deb";
 | |
| 	m->m_off   = (char *)&g_conf.m_delayEmailsBefore - g;
 | |
| 	m->m_type  = TYPE_TIME; // time format -- very special
 | |
| 	m->m_def   = "00:00";
 | |
| 	m->m_priv  = 2;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	  Disable this until it works.
 | |
| 	m->m_title = "use merge token";
 | |
| 	m->m_desc  = "If used, prevents twins, or hosts on the same ide "
 | |
| 		"channel, from merging simultaneously.";
 | |
| 	m->m_cgi   = "umt";
 | |
| 	m->m_off   = (char *)&g_conf.m_useMergeToken - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "error string 1";
 | |
| 	m->m_desc  = "Look for this string in the kernel buffer for sending "
 | |
| 		"email alert. Useful for detecting some strange "
 | |
| 		"hard drive failures that really slow performance.";
 | |
| 	m->m_cgi   = "errstrone";
 | |
| 	m->m_off   = (char *)&g_conf.m_errstr1 - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "I/O error";
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "error string 2";
 | |
| 	m->m_desc  = "Look for this string in the kernel buffer for sending "
 | |
| 		"email alert. Useful for detecting some strange "
 | |
| 		"hard drive failures that really slow performance.";
 | |
| 	m->m_cgi   = "errstrtwo";
 | |
| 	m->m_off   = (char *)&g_conf.m_errstr2 - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "error string 3";
 | |
| 	m->m_desc  = "Look for this string in the kernel buffer for sending "
 | |
| 		"email alert. Useful for detecting some strange "
 | |
| 		"hard drive failures that really slow performance.";
 | |
| 	m->m_cgi   = "errstrthree";
 | |
| 	m->m_off   = (char *)&g_conf.m_errstr3 - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send email alerts to email 1";
 | |
| 	m->m_desc  = "Sends to email address 1 through email server 1.";
 | |
| 	m->m_cgi   = "seatone";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToEmail1 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send parm change email alerts to email 1";
 | |
| 	m->m_desc  = "Sends to email address 1 through email server 1 if "
 | |
| 		"any parm is changed.";
 | |
| 	m->m_cgi   = "seatonep";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendParmChangeAlertsToEmail1 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "email server 1";
 | |
| 	m->m_desc  = "Connects to this IP or hostname "
 | |
| 		"directly when sending email 1. "
 | |
| 		"Use <i>apt-get install sendmail</i> to install sendmail "
 | |
| 		"on that IP or hostname. Add <i>From:10.5 RELAY</i> to "
 | |
| 		"/etc/mail/access to allow sendmail to forward email it "
 | |
| 		"receives from gigablast if gigablast hosts are on the "
 | |
| 		"10.5.*.* IPs. Then run <i>/etc/init.d/sendmail restart</i> "
 | |
| 		"as root to pick up those changes so sendmail will forward "
 | |
| 		"Gigablast's email to the email address you give below.";
 | |
| 	m->m_cgi   = "esrvone";
 | |
| 	m->m_off   = (char *)&g_conf.m_email1MX - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "127.0.0.1";
 | |
| 	m->m_size  = MAX_MX_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "email address 1";
 | |
| 	m->m_desc  = "Sends to this address when sending email 1 ";
 | |
| 	m->m_cgi   = "eaddrone";
 | |
| 	m->m_off   = (char *)&g_conf.m_email1Addr - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "4081234567@vtext.com";
 | |
| 	m->m_size  = MAX_EMAIL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "from email address 1";
 | |
| 	m->m_desc  = "The from field when sending email 1 ";
 | |
| 	m->m_cgi   = "efaddrone";
 | |
| 	m->m_off   = (char *)&g_conf.m_email1From - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "sysadmin@mydomain.com";
 | |
| 	m->m_size  = MAX_EMAIL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send email alerts to email 2";
 | |
| 	m->m_desc  = "Sends to email address 2 through email server 2.";
 | |
| 	m->m_cgi   = "seattwo";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToEmail2 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send parm change email alerts to email 2";
 | |
| 	m->m_desc  = "Sends to email address 2 through email server 2 if "
 | |
| 		"any parm is changed.";
 | |
| 	m->m_cgi   = "seattwop";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendParmChangeAlertsToEmail2 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "email server 2";
 | |
| 	m->m_desc  = "Connects to this server directly when sending email 2 ";
 | |
| 	m->m_cgi   = "esrvtwo";
 | |
| 	m->m_off   = (char *)&g_conf.m_email2MX - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "mail.mydomain.com";
 | |
| 	m->m_size  = MAX_MX_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "email address 2";
 | |
| 	m->m_desc  = "Sends to this address when sending email 2 ";
 | |
| 	m->m_cgi   = "eaddrtwo";
 | |
| 	m->m_off   = (char *)&g_conf.m_email2Addr - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_size  = MAX_EMAIL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "from email address 2";
 | |
| 	m->m_desc  = "The from field when sending email 2 ";
 | |
| 	m->m_cgi   = "efaddrtwo";
 | |
| 	m->m_off   = (char *)&g_conf.m_email2From - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "sysadmin@mydomain.com";
 | |
| 	m->m_size  = MAX_EMAIL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send email alerts to email 3";
 | |
| 	m->m_desc  = "Sends to email address 3 through email server 3.";
 | |
| 	m->m_cgi   = "seatthree";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToEmail3 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send parm change email alerts to email 3";
 | |
| 	m->m_desc  = "Sends to email address 3 through email server 3 if "
 | |
| 		"any parm is changed.";
 | |
| 	m->m_cgi   = "seatthreep";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendParmChangeAlertsToEmail3 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "email server 3";
 | |
| 	m->m_desc  = "Connects to this server directly when sending email 3 ";
 | |
| 	m->m_cgi   = "esrvthree";
 | |
| 	m->m_off   = (char *)&g_conf.m_email3MX - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "mail.mydomain.com";
 | |
| 	m->m_size  = MAX_MX_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "email address 3";
 | |
| 	m->m_desc  = "Sends to this address when sending email 3 ";
 | |
| 	m->m_cgi   = "eaddrthree";
 | |
| 	m->m_off   = (char *)&g_conf.m_email3Addr - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_size  = MAX_EMAIL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "from email address 3";
 | |
| 	m->m_desc  = "The from field when sending email 3 ";
 | |
| 	m->m_cgi   = "efaddrthree";
 | |
| 	m->m_off   = (char *)&g_conf.m_email3From - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "sysadmin@mydomain.com";
 | |
| 	m->m_size  = MAX_EMAIL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "send email alerts to email 4";
 | |
| 	m->m_desc  = "Sends to email address 4 through email server 4.";
 | |
| 	m->m_cgi   = "seatfour";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendEmailAlertsToEmail4 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "send parm change email alerts to email 4";
 | |
| 	m->m_desc  = "Sends to email address 4 through email server 4 if "
 | |
| 		"any parm is changed.";
 | |
| 	m->m_cgi   = "seatfourp";
 | |
| 	m->m_off   = (char *)&g_conf.m_sendParmChangeAlertsToEmail4 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "email server 4";
 | |
| 	m->m_desc  = "Connects to this server directly when sending email 4 ";
 | |
| 	m->m_cgi   = "esrvfour";
 | |
| 	m->m_off   = (char *)&g_conf.m_email4MX - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "mail.mydomain.com";
 | |
| 	m->m_size  = MAX_MX_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "email address 4";
 | |
| 	m->m_desc  = "Sends to this address when sending email 4 ";
 | |
| 	m->m_cgi   = "eaddrfour";
 | |
| 	m->m_off   = (char *)&g_conf.m_email4Addr - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_size  = MAX_EMAIL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "from email address 4";
 | |
| 	m->m_desc  = "The from field when sending email 4 ";
 | |
| 	m->m_cgi   = "efaddrfour";
 | |
| 	m->m_off   = (char *)&g_conf.m_email4From - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_def   = "sysadmin@mydomain.com";
 | |
| 	m->m_size  = MAX_EMAIL_LEN;
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "prefer local reads";
 | |
| 	m->m_desc  = "If you have scsi drives or a slow network, say yes here "
 | |
| 		"to minimize data fetches across the network.";
 | |
| 	m->m_cgi   = "plr";
 | |
| 	m->m_off   = (char *)&g_conf.m_preferLocalReads - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "use biased tfndb";
 | |
| 	m->m_desc  = "Should we always send titledb record lookup requests "
 | |
| 		"to a particular host in order to increase tfndb page cache "
 | |
| 		"hits? This bypasses load balancing and may result in "
 | |
| 		"slower hosts being more of a bottleneck. Keep this disabled "
 | |
| 		"unless you notice tfndb disk seeks slowing things down.";
 | |
| 	m->m_cgi   = "ubu";
 | |
| 	m->m_off   = (char *)&g_conf.m_useBiasedTfndb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// this is ifdef'd out in Msg3.cpp for performance reasons,
 | |
| 	// so do it here, too
 | |
| #ifdef GBSANITYCHECK
 | |
| 	m->m_title = "max corrupted read retries";
 | |
| 	m->m_desc  = "How many times to retry disk reads that had corrupted "
 | |
| 		"data before requesting the list from a twin, and, if that "
 | |
| 		"fails, removing the bad data.";
 | |
| 	m->m_cgi   = "crr";
 | |
| 	m->m_off   = (char *)&g_conf.m_corruptRetries - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| #endif
 | |
| 
 | |
| 	m->m_title = "do incremental updating";
 | |
| 	m->m_desc  = "When reindexing a document, do not re-add data "
 | |
| 		"that should already be in index or clusterdb "
 | |
| 		"since the last time the document was indexed. Otherwise, "
 | |
| 		"re-add the data regardless.";
 | |
| 	m->m_cgi   = "oic";
 | |
| 	//m->m_off   = (char *)&g_conf.m_onlyAddUnchangedTermIds - g;
 | |
| 	m->m_off   = (char *)&g_conf.m_doIncrementalUpdating - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	// you can really screw up the index if this is false, so 
 | |
| 	// comment it out for now
 | |
| 	/*
 | |
| 	m->m_title = "index deletes";
 | |
| 	m->m_desc  = "Should we allow indexdb recs to be deleted? This is "
 | |
| 		"always true, except in very rare indexdb rebuilds.";
 | |
| 	m->m_cgi   = "id";
 | |
| 	m->m_off   = (char *)&g_conf.m_indexDeletes - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "use etc hosts";
 | |
| 	m->m_desc  = "Use /etc/hosts file to resolve hostnames? the "
 | |
| 		"/etc/host file is reloaded every minute, so if you make "
 | |
| 		"a change to it you might have to wait one minute for the "
 | |
| 		"change to take affect.";
 | |
| 	m->m_cgi   = "ueh";
 | |
| 	m->m_off   = (char *)&g_conf.m_useEtcHosts - g;
 | |
| 	m->m_def   = "0"; 
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "twins are split";
 | |
| 	m->m_desc  = "If enabled, Gigablast assumes the first half of "
 | |
| 		"machines in hosts.conf "
 | |
| 		"are on a different network switch than the second half, "
 | |
| 		"and minimizes transmits between the switches.";
 | |
| 	m->m_cgi   = "stw";
 | |
| 	m->m_off   = (char *)&g_conf.m_splitTwins - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do out of memory testing";
 | |
| 	m->m_desc  = "When enabled Gigablast will randomly fail at "
 | |
| 		"allocating memory. Used for testing stability.";
 | |
| 	m->m_cgi   = "dot";
 | |
| 	m->m_off   = (char *)&g_conf.m_testMem - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do consistency testing";
 | |
| 	m->m_desc  = "When enabled Gigablast will make sure it reparses "
 | |
| 		"the document exactly the same way. It does this every "
 | |
| 		"1000th document anyway, but enabling this makes it do it "
 | |
| 		"for every document.";
 | |
| 	m->m_cgi   = "dct";
 | |
| 	m->m_off   = (char *)&g_conf.m_doConsistencyTesting - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use shotgun";
 | |
| 	m->m_desc  = "If enabled, all servers must have two gigabit "
 | |
| 		"ethernet ports hooked up and Gigablast will round robin "
 | |
| 		"packets between both ethernet ports when sending to another "
 | |
| 		"host. Can speed up network transmissions as much as 2x.";
 | |
| 	m->m_cgi   = "usht";
 | |
| 	m->m_off   = (char *)&g_conf.m_useShotgun - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use quickpoll";
 | |
| 	m->m_desc  = "If enabled, Gigablast will use quickpoll. Significantly "
 | |
| 		"improves performance. Only turn this off for testing.";
 | |
| 	m->m_cgi   = "uqp";
 | |
| 	m->m_off   = (char *)&g_conf.m_useQuickpoll - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| // 	m->m_title = "quickpoll core on error";
 | |
| // 	m->m_desc  = "If enabled, quickpoll will terminate the process and "
 | |
| // 		"generate a core file when callbacks are called with the "
 | |
| // 		"wrong niceness.";
 | |
| // 	m->m_cgi   = "qpoe";
 | |
| // 	m->m_off   = (char *)&g_conf.m_quickpollCoreOnError - g;
 | |
| // 	m->m_type  = TYPE_BOOL;
 | |
| // 	m->m_def   = "1";
 | |
| // 	m++;
 | |
| 
 | |
| 	
 | |
| 
 | |
| 	// . this will leak the shared mem if the process is Ctrl+C'd
 | |
| 	// . that is expected behavior
 | |
| 	// . you can clean up the leaks using 'gb freecache 20000000'
 | |
| 	//   and use 'ipcs -m' to see what leaks you got
 | |
| 	// . generally, only the main gb should use shared mem, so
 | |
| 	//   keep this off for teting
 | |
| 	m->m_title = "use shared mem";
 | |
| 	m->m_desc  = "If enabled, Gigablast will use shared memory. "
 | |
| 		"Should really only be used on the live cluster, "
 | |
| 		"keep this on the testing cluster since it can "
 | |
| 		"leak easily.";
 | |
| 	m->m_cgi   = "ushm";
 | |
| 	m->m_off   = (char *)&g_conf.m_useSHM - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	// disable disk caches... for testing really
 | |
| 	/*
 | |
| 	m->m_title = "use disk page cache for indexdb";
 | |
| 	m->m_desc  = "Use disk page cache?";
 | |
| 	m->m_cgi   = "udpci";
 | |
| 	m->m_off   = (char *)&g_conf.m_useDiskPageCacheIndexdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "posdb disk cache size";
 | |
| 	m->m_desc  = "How much file cache size to use in bytes? Posdb is "
 | |
| 		"the index.";
 | |
| 	m->m_cgi   = "dpcsp";
 | |
| 	m->m_off   = (char *)&g_conf.m_posdbFileCacheSize - g;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "30000000";
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "tagdb disk cache size";
 | |
| 	m->m_desc  = "How much file cache size to use in bytes? Tagdb is "
 | |
| 		"consulted at spider time and query time to determine "
 | |
| 		"if a url or outlink is banned or what its siterank is, etc.";
 | |
| 	m->m_cgi   = "dpcst";
 | |
| 	m->m_off   = (char *)&g_conf.m_tagdbFileCacheSize - g;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "30000000";
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "clusterdb disk cache size";
 | |
| 	m->m_desc  = "How much file cache size to use in bytes? "
 | |
| 	        "Gigablast does a "
 | |
| 		"lookup in clusterdb for each search result at query time to "
 | |
| 		"get its site information for site clustering. If you "
 | |
| 		"disable site clustering in the search controls then "
 | |
| 		"clusterdb will not be consulted.";
 | |
| 	m->m_cgi   = "dpcsc";
 | |
| 	m->m_off   = (char *)&g_conf.m_clusterdbFileCacheSize - g;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "30000000";
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "titledb disk cache size";
 | |
| 	m->m_desc  = "How much file cache size to use in bytes? Titledb "
 | |
| 		"holds the cached web pages, compressed. Gigablast consults "
 | |
| 		"it to generate a summary for a search result, or to see if "
 | |
| 		"a url Gigablast is spidering is already in the index.";
 | |
| 	m->m_cgi   = "dpcsx";
 | |
| 	m->m_off   = (char *)&g_conf.m_titledbFileCacheSize - g;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "30000000";
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spiderdb disk cache size";
 | |
| 	m->m_desc  = "How much file cache size to use in bytes? Titledb "
 | |
| 		"holds the cached web pages, compressed. Gigablast consults "
 | |
| 		"it to generate a summary for a search result, or to see if "
 | |
| 		"a url Gigablast is spidering is already in the index.";
 | |
| 	m->m_cgi   = "dpcsy";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderdbFileCacheSize - g;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "30000000";
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "exclude link text";
 | |
| 	m->m_desc  = "Exclude search results that have one or more query "
 | |
| 		"that only appear in the incoming link text";
 | |
| 	m->m_cgi   = "exlt";
 | |
| 	m->m_off  = (char *)&g_conf.m_excludeLinkText - g;
 | |
| 	m->m_sparm = 1;
 | |
| 	m->m_soff  = (char *)&si.m_excludeLinkText - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_scgi  = "excludelinktext";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "exclude meta text";
 | |
| 	m->m_desc  = "Exclude search results that have one or more query "
 | |
| 		"that only appear in the meta text";
 | |
| 	m->m_cgi   = "exmt";
 | |
| 	m->m_off  = (char *)&g_conf.m_excludeMetaText - g;
 | |
| 	m->m_sparm = 1;
 | |
| 	m->m_soff  = (char *)&si.m_excludeMetaText - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_scgi  = "excludemetatext";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "scan all if not found";
 | |
| 	m->m_desc  = "Scan all titledb files if rec not found. You should "
 | |
| 		"keep this on to avoid corruption. Do not turn it off unless "
 | |
| 		"you are Matt Wells.";
 | |
| 	m->m_cgi   = "sainf";
 | |
| 	m->m_off   = (char *)&g_conf.m_scanAllIfNotFound - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "interface machine";
 | |
| 	m->m_desc  = "for specifying if this is an interface machine"
 | |
| 		     "messages are rerouted from this machine to the main"
 | |
| 		     "cluster set in the hosts.conf.";
 | |
| 	m->m_cgi   = "intmch";
 | |
| 	m->m_off   = (char *)&g_conf.m_interfaceMachine - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "generate vector at query time";
 | |
| 	m->m_desc  = "At query time, should Gigablast generate content "
 | |
| 		"vectors for title records lacking them? This is an "
 | |
| 		"expensive operation, so is really just for testing purposes.";
 | |
| 	m->m_cgi   = "gv";
 | |
| 	m->m_off   = (char *)&g_conf.m_generateVectorAtQueryTime - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
|         m->m_title = "redirect non-raw traffic";
 | |
|         m->m_desc  = "If this is non empty, http traffic will be redirected "
 | |
| 		"to the specified address.";
 | |
|         m->m_cgi   = "redir";
 | |
|         m->m_off   = (char *)&g_conf.m_redirect - g;
 | |
|         m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
|         m->m_def   = "";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
|         m++;
 | |
| 
 | |
|         m->m_title = "send requests to compression proxy";
 | |
|         m->m_desc  = "If this is true, gb will route download requests for"
 | |
| 		" web pages to proxies in hosts.conf.  Proxies will"
 | |
| 		" download and compress docs before sending back. ";
 | |
|         m->m_cgi   = "srtcp";
 | |
|         m->m_off   = (char *)&g_conf.m_useCompressionProxy - g;
 | |
|         m->m_type  = TYPE_BOOL;
 | |
|         m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
|         m++;
 | |
| 
 | |
|         m->m_title = "synchronize proxy to cluster time";
 | |
|         m->m_desc  = "Enable/disable the ability to synchronize time between "
 | |
|                 "the cluster and the proxy";
 | |
|         m->m_cgi   = "sptct";
 | |
|         m->m_off   = (char *)&g_conf.m_timeSyncProxy - g;
 | |
|         m->m_type  = TYPE_BOOL;
 | |
|         m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
|         m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 		/*
 | |
|         m->m_title = "use data feed account server";
 | |
|         m->m_desc  = "Enable/disable the use of a remote account verification "
 | |
|                 "for Data Feed Customers. This should ONLY be used for the "
 | |
|                 "proxy.";
 | |
|         m->m_cgi   = "pdfuas";
 | |
|         m->m_off   = (char *)&g_conf.m_useDFAcctServer - g;
 | |
|         m->m_type  = TYPE_BOOL;
 | |
|         m->m_def   = "0";
 | |
|         m++;
 | |
| 
 | |
|         m->m_title = "data feed server ip";
 | |
|         m->m_desc  = "The ip address of the Gigablast data feed server to "
 | |
|                 "retrieve customer account information from. This should ONLY "
 | |
|                 "be used for the proxy.";
 | |
|         m->m_cgi   = "pdfip";
 | |
|         m->m_off   = (char *)&g_conf.m_dfAcctIp - g;
 | |
|         m->m_type  = TYPE_IP;
 | |
|         m->m_def   = "2130706433";
 | |
|         m->m_group = 0;
 | |
|         m++;
 | |
| 
 | |
|         m->m_title = "data feed server port";
 | |
|         m->m_desc  = "The port of the Gigablast data feed server to retrieve "
 | |
|                 "customer account information from. This should ONLY be used "
 | |
|                 "for the proxy";
 | |
|         m->m_cgi   = "pdfport";
 | |
|         m->m_off   = (char *)&g_conf.m_dfAcctPort - g;
 | |
|         m->m_type  = TYPE_LONG;
 | |
|         m->m_def   = "8040";
 | |
|         m->m_group = 0;
 | |
|         m++;
 | |
| 
 | |
|         m->m_title = "data feed server collection";
 | |
|         m->m_desc  = "The collection on the Gigablast data feed server to "
 | |
|                 "retrieve customer account information from. This should ONLY "
 | |
|                 "be used for the proxy.";
 | |
|         m->m_cgi   = "pdfcoll";
 | |
|         m->m_off   = (char *)&g_conf.m_dfAcctColl - g;
 | |
|         m->m_type  = TYPE_STRING;
 | |
|         m->m_size  = MAX_COLL_LEN;
 | |
|         m->m_def   = "customers";
 | |
|         m->m_group = 0;
 | |
|         m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "allow scaling of hosts";
 | |
| 	m->m_desc  = "Allows scaling up of hosts by deleting recs not in "
 | |
| 		"the correct group.  This should only happen why copying "
 | |
| 		"a set of servers to the new hosts. Otherwise corrupted "
 | |
| 		"data will cause a halt.";
 | |
| 	m->m_cgi   = "asoh";
 | |
| 	m->m_off   = (char *)&g_conf.m_allowScale - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "allow bypass of db validation";
 | |
| 	m->m_desc  = "Allows bypass of db validation so gigablast will not "
 | |
| 		"halt if a corrupt db is discovered during load.  Use this "
 | |
| 		"when attempting to load with a collection that has known "
 | |
| 		"corruption.";
 | |
| 	m->m_cgi   = "abov";
 | |
| 	m->m_off   = (char *)&g_conf.m_bypassValidation - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "reload language pages";
 | |
| 	m->m_desc  = "Reloads language specific pages.";
 | |
| 	m->m_cgi   = "rlpages";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_func  = CommandReloadLanguagePages;
 | |
| 	m->m_cast  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "proxy port";
 | |
| 	m->m_desc  = "Retrieve pages from the proxy on "
 | |
| 		"this port.";
 | |
| 	m->m_cgi   = "proxyport";
 | |
| 	m->m_off   = (char *)&cr.m_proxyPort - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "all reload language pages";
 | |
| 	m->m_desc  = "Reloads language specific pages for all hosts.";
 | |
| 	m->m_cgi   = "rlpages";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// do we need this any more?
 | |
| 	/*
 | |
| 	m->m_title = "give up on dead hosts";
 | |
| 	m->m_desc  = "Give up requests to dead hosts.  Only set this when you "
 | |
| 		"know a host is dead and will not come back online without "
 | |
| 		"a restarting all hosts.  Messages will timeout on the dead "
 | |
| 		"host but will not error, allowing outstanding spidering to "
 | |
| 		"finish to the twin.";
 | |
| 	m->m_cgi   = "gvup";
 | |
| 	m->m_off   = (char *)&g_conf.m_giveupOnDeadHosts - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "ask root name servers";
 | |
| 	m->m_desc  = "if enabled Gigablast will direct DNS requests to "
 | |
| 		"the root DNS servers, otherwise it will continue to "
 | |
| 		"send DNS queries to the bind9 servers defined in "
 | |
| 		"the Master Controls.";
 | |
| 	m->m_cgi   = "bdns";
 | |
| 	m->m_off   = (char *)&g_conf.m_askRootNameservers - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "do dig sanity checks";
 | |
| 	m->m_desc  = "call dig @nameServer hostname and on timedout lookups" 
 | |
| 			" and see if dig also timed out";
 | |
| 	m->m_cgi   = "dig";
 | |
| 	m->m_off   = (char *)&g_conf.m_useDig - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "dns root name server 1";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsa";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[0] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "192.228.79.201";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns root name server 2";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsb";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[1] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "192.33.4.12";
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "dns root name server 3";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsc";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[2] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "128.8.10.90";
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "dns root name server 4";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsd";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[3] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "192.203.230.10";
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "dns root name server 5";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnse";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[4] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "192.5.5.241";
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "dns root name server 6";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsf";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[5] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "192.112.36.4";
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "dns root name server 7";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsg";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[6] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "128.63.2.53";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns root name server 8";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsh";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[7] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "192.36.148.17";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns root name server 9";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsi";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[8] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "192.58.128.30";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns root name server 10";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsj";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[9] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "193.0.14.129";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns root name server 11";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsk";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[10] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "198.32.64.12";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns root name server 12";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsl";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[11] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "202.12.27.33";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns root name server 13";
 | |
| 	m->m_desc  = "IP address of a DNS root server. Assumes UDP "
 | |
| 		"port 53.";
 | |
| 	m->m_cgi   = "rnsm";
 | |
| 	m->m_off   = (char *)&g_conf.m_rnsIps[12] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "198.41.0.4";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "dns 0";
 | |
| 	m->m_desc  = "IP address of the primary DNS server. Assumes UDP "
 | |
| 		"port 53. REQUIRED FOR SPIDERING! Use Google's "
 | |
| 		"public DNS 8.8.8.8 as default.";
 | |
| 	m->m_cgi   = "pdns";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[0] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	// default to google public dns #1
 | |
| 	m->m_def   = "8.8.8.8";
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 1";
 | |
| 	m->m_desc  = "IP address of the secondary DNS server. Assumes UDP "
 | |
| 	"port 53. Will be accessed in conjunction with the primary "
 | |
| 	"dns, so make sure this is always up. An ip of 0 means "
 | |
| 	"disabled. Google's secondary public DNS is 8.8.4.4.";
 | |
| 	m->m_cgi   = "sdns";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[1] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	// default to google public dns #2
 | |
| 	m->m_def   = "8.8.4.4";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 2";
 | |
| 	m->m_desc  = "All hosts send to these DNSes based on hash "
 | |
| 		"of the subdomain to try to split DNS load evenly.";
 | |
| 	m->m_cgi   = "sdnsa";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[2] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 3";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsb";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[3] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 4";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsc";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[4] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 5";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsd";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[5] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 6";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnse";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[6] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 7";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsf";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[7] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 8";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsg";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[8] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 9";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsh";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[9] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 10";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsi";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[10] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 11";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsj";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[11] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 12";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsk";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[12] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 13";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsl";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[13] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 14";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsm";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[14] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dns 15";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "sdnsn";
 | |
| 	m->m_off   = (char *)&g_conf.m_dnsIps[15] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "geocoder IP #1";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "gca";
 | |
| 	m->m_off   = (char *)&g_conf.m_geocoderIps[0] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "10.5.66.11"; // sp1
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "geocoder IP #2";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "gcb";
 | |
| 	m->m_off   = (char *)&g_conf.m_geocoderIps[1] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "geocoder IP #3";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "gcc";
 | |
| 	m->m_off   = (char *)&g_conf.m_geocoderIps[2] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "geocoder IP #4";
 | |
| 	m->m_desc  = "";
 | |
| 	m->m_cgi   = "gcd";
 | |
| 	m->m_off   = (char *)&g_conf.m_geocoderIps[3] - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "wiki proxy ip";
 | |
| 	m->m_desc  = "Access the wiki coll through this proxy ip";
 | |
| 	m->m_cgi   = "wpi";
 | |
| 	m->m_off   = (char *)&g_conf.m_wikiProxyIp - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "wiki proxy port";
 | |
| 	m->m_desc  = "Access the wiki coll through this proxy port";
 | |
| 	m->m_cgi   = "wpp";
 | |
| 	m->m_off   = (char *)&g_conf.m_wikiProxyPort - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "default collection";
 | |
| 	m->m_desc  = "When no collection is explicitly specified, assume "
 | |
| 		"this collection name.";
 | |
| 	m->m_cgi   = "dcn";
 | |
| 	m->m_off   = (char *)&g_conf.m_defaultColl - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_COLL_LEN+1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "directory collection";
 | |
| 	m->m_desc  = "Collection to be used for directory searching and "
 | |
| 		"display of directory topic pages.";
 | |
| 	m->m_cgi   = "dircn";
 | |
| 	m->m_off   = (char *)&g_conf.m_dirColl - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_COLL_LEN+1;
 | |
| 	m->m_def   = "main";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "directory hostname";
 | |
| 	m->m_desc  = "Hostname of the server providing the directory. "
 | |
| 		     "Leave empty to use this host.";
 | |
| 	m->m_cgi   = "dirhn";
 | |
| 	m->m_off   = (char *)&g_conf.m_dirHost - g;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max incoming bandwidth for spider";
 | |
| 	m->m_desc  = "Total incoming bandwidth used by all spiders should "
 | |
| 		"not exceed this many kilobits per second. ";
 | |
| 	m->m_cgi   = "mkbps";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxIncomingKbps - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "999999.0";
 | |
| 	m->m_units = "Kbps";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max 1-minute sliding-window loadavg";
 | |
| 	m->m_desc  = "Spiders will shed load when their host exceeds this "
 | |
| 		"value for the 1-minute load average in /proc/loadavg. "
 | |
| 		"The value 0.0 disables this feature.";
 | |
| 	m->m_cgi   = "mswl";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxLoadAvg - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.0";
 | |
| 	m->m_units = "";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max pages per second";
 | |
| 	m->m_desc  = "Maximum number of pages to index or delete from index "
 | |
| 		"per second for all hosts combined.";
 | |
| 	m->m_cgi   = "mpps";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxPagesPerSecond - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "999999.0";
 | |
| 	m->m_units = "pages/second";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "distributed spider balance";
 | |
| 	m->m_desc  = "Max number of ready domains a host can have distributed "
 | |
| 		"to it by all other host. This should be some multiple of the "
 | |
| 		"total number of hosts in the cluster.";
 | |
| 	m->m_cgi   = "dsb";
 | |
| 	m->m_off   = (char *)&g_conf.m_distributedSpiderBalance - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1024";
 | |
| 	m->m_units = "domains";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "distributed same ip wait (hack)";
 | |
| 	m->m_desc  = "Amount of time to wait if this IP is already being "
 | |
| 		"downloaded by a host.  Works only in conjunction with "
 | |
| 		"distribute spider downloads by ip in Spider Controls.";
 | |
| 	m->m_cgi   = "dsiw";
 | |
| 	m->m_off   = (char *)&g_conf.m_distributedIpWait - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_units = "ms";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_min   = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "root quality max cache age base";
 | |
| 	m->m_desc  = "Maximum age to cache quality of a root url in seconds. "
 | |
| 		"Computing "
 | |
| 		"the quality of especially root urls can be expensive. "
 | |
| 		"This number is multiplied by (Q-30)/10 where Q is the cached "
 | |
| 		"quality of the root url. Therefore, higher quality and more "
 | |
| 		"stable root urls are updated less often, which is a good thing "
 | |
| 		"since they are more expensive to recompute.";
 | |
| 	m->m_cgi   = "rqmca";
 | |
| 	m->m_off   = (char *)&g_conf.m_siteQualityMaxCacheAge - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "7257600"; // 3 months (in seconds)
 | |
| 	m->m_units = "seconds";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "use threads";
 | |
| 	m->m_desc  = "If enabled, Gigablast will use threads.";
 | |
| 	m->m_cgi   = "ut";
 | |
| 	m->m_off   = (char *)&g_conf.m_useThreads - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	// turn off for now. after seeing how SLOOOOOW brian's merge op was
 | |
| 	// when all 16 shards on a 16-core machine were merging (even w/ SSDs)
 | |
| 	// i turned threads off and it was over 100x faster. so until we have
 | |
| 	// pooling or something turn these off
 | |
| 	m->m_title = "use threads for disk";
 | |
| 	m->m_desc  = "If enabled, Gigablast will use threads for disk ops. "
 | |
| 		"Now that Gigablast uses pthreads more effectively, "
 | |
| 		"leave this enabled for optimal performance in all cases.";
 | |
| 	//"Until pthreads is any good leave this off. If you have "
 | |
| 	//"SSDs performance can be as much as 100x better.";
 | |
| 	m->m_cgi   = "utfd";
 | |
| 	m->m_off   = (char *)&g_conf.m_useThreadsForDisk - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = 0;//PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use threads for intersects and merges";
 | |
| 	m->m_desc  = "If enabled, Gigablast will use threads for these ops. "
 | |
| 		"Default is now on in the event you have simultaneous queries "
 | |
| 		"so one query does not hold back the other. There seems "
 | |
| 		"to be a bug so leave this ON for now.";
 | |
| 	        //"Until pthreads is any good leave this off.";
 | |
| 	m->m_cgi   = "utfio";
 | |
| 	m->m_off   = (char *)&g_conf.m_useThreadsForIndexOps - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	// enable this in the event of multiple cores available and
 | |
| 	// large simultaneous queries coming in
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = 0;//PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use threads for system calls";
 | |
| 	m->m_desc  = "Gigablast does not make too many system calls so "
 | |
| 		"leave this on in case the system call is slow.";
 | |
| 	m->m_cgi   = "utfsc";
 | |
| 	m->m_off   = (char *)&g_conf.m_useThreadsForSystemCalls - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = 0;//PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "max cpu threads";
 | |
| 	m->m_desc  = "Maximum number of threads to use per Gigablast process "
 | |
| 		"for intersecting docid lists.";
 | |
| 	m->m_cgi   = "mct";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxCpuThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	// make it 3 for new gb in case one query takes way longer 
 | |
| 	// than the others
 | |
| 	m->m_def   = "6"; // "2";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_min   = 1;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max cpu merge threads";
 | |
| 	m->m_desc  = "Maximum number of threads to use per Gigablast process "
 | |
| 		"for merging lists read from disk.";
 | |
| 	m->m_cgi   = "mcmt";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxCpuMergeThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_min   = 1;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max write threads";
 | |
| 	m->m_desc  = "Maximum number of threads to use per Gigablast process "
 | |
| 		"for writing data to the disk. "
 | |
| 		"Keep low to reduce file interlace effects and impact "
 | |
| 		"on query response time.";
 | |
| 	m->m_cgi   = "mwt";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxWriteThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "flush disk writes";
 | |
| 	m->m_desc  = "If enabled then all writes will be flushed to disk. "
 | |
| 		"If not enabled, then gb uses the Linux disk write cache.";
 | |
| 	m->m_cgi   = "fw";
 | |
| 	m->m_off   = (char *)&g_conf.m_flushWrites - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "files group writable";
 | |
| 	m->m_desc  = "Make all created files group writable? If you have "
 | |
| 		"multiple user accounts starting Gigablast processes you "
 | |
| 		"will want the files to be group writable. You will "
 | |
| 		"need to make sure you run gigablast under the "
 | |
| 		"primary group you want to use for gigablast administration.";
 | |
| 	m->m_cgi   = "afgw";
 | |
| 	m->m_off   = (char *)&g_conf.m_makeAllFilesGroupWritable - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "verify written lists";
 | |
| 	m->m_desc  = "Ensure lists being written to disk are not corrupt. "
 | |
| 		"That title recs appear valid, etc. Helps isolate sources "
 | |
| 		"of corruption. Used for debugging.";
 | |
| 	m->m_cgi   = "vwl";
 | |
| 	m->m_off   = (char *)&g_conf.m_verifyDumpedLists - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "verify disk writes";
 | |
| 	m->m_desc  = "Read what was written in a verification step. Decreases "
 | |
| 		"performance, but may help fight disk corruption mostly on "
 | |
| 		"Maxtors and Western Digitals.";
 | |
| 	m->m_cgi   = "vdw";
 | |
| 	m->m_off   = (char *)&g_conf.m_verifyWrites - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max spider read threads";
 | |
| 	m->m_desc  = "Maximum number of threads to use per Gigablast process "
 | |
| 		"for accessing the disk "
 | |
| 		"for index-building purposes. Keep low to reduce impact "
 | |
| 		"on query response time. Increase for fast disks or when "
 | |
| 		"preferring build speed over lower query latencies";
 | |
| 	m->m_cgi   = "smdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderMaxDiskThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "20";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "max spider big read threads";
 | |
| 	m->m_desc  = "This particular number applies to all disk "
 | |
| 		"reads above 1MB. "
 | |
| 		"The number of total threads is also "
 | |
| 		"limited to MAX_STACKS which is currently 20.";
 | |
| 	m->m_cgi   = "smbdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderMaxBigDiskThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max spider medium read threads";
 | |
| 	m->m_desc  = "This particular number applies to all disk "
 | |
| 		"reads above 100K. "
 | |
| 		"The number of total threads is also "
 | |
| 		"limited to MAX_STACKS which is currently 20.";
 | |
| 	m->m_cgi   = "smmdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderMaxMedDiskThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "4";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max spider small read threads";
 | |
| 	m->m_desc  = "This particular number applies to all disk "
 | |
| 		"reads above 1MB. "
 | |
| 		"The number of total threads is also "
 | |
| 		"limited to MAX_STACKS which is currently 20.";
 | |
| 	m->m_cgi   = "smsdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_spiderMaxSmaDiskThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "15";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "separate disk reads";
 | |
| 	m->m_desc  = "If enabled then we will not launch a low priority "
 | |
| 		"disk read or write while a high priority is outstanding. "
 | |
| 		"Help improve query response time at the expense of "
 | |
| 		"spider performance.";
 | |
| 	m->m_cgi   = "sdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_separateDiskReads - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = 0;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "max query read threads";
 | |
| 	m->m_desc  = "Maximum number of threads to use per Gigablast process "
 | |
| 		"for accessing the disk "
 | |
| 		"for querying purposes.";
 | |
| 	//IDE systems tend to be more "
 | |
| 	//	"responsive when this is low. Increase for SCSI or RAID "
 | |
| 	//	"systems.";
 | |
| 	m->m_cgi   = "qmdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_queryMaxDiskThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "max query big read threads";
 | |
| 	m->m_desc  = "This particular number applies to all reads above 1MB. "
 | |
| 		"The number of total threads is also "
 | |
| 		"limited to MAX_STACKS which is currently 20.";
 | |
| 	m->m_cgi   = "qmbdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_queryMaxBigDiskThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "20"; // 1
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max query medium read threads";
 | |
| 	m->m_desc  = "This particular number applies to all disk "
 | |
| 		"reads above 100K. "
 | |
| 		"The number of total threads is also "
 | |
| 		"limited to MAX_STACKS which is currently 20.";
 | |
| 	m->m_cgi   = "qmmdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_queryMaxMedDiskThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "20"; // 3
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max query small read threads";
 | |
| 	m->m_desc  = "This particular number applies to all disk "
 | |
| 		"reads above 1MB. "
 | |
| 		"The number of total threads is also "
 | |
| 		"limited to MAX_STACKS which is currently 20.";
 | |
| 	m->m_cgi   = "qmsdt";
 | |
| 	m->m_off   = (char *)&g_conf.m_queryMaxSmaDiskThreads - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "20";
 | |
| 	m->m_units = "threads";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "min popularity for speller";
 | |
| 	m->m_desc  = "Word or phrase must be present in this percent "
 | |
| 		"of documents in order to qualify as a spelling "
 | |
| 		"recommendation.";
 | |
| 	m->m_cgi   = "mps";
 | |
| 	m->m_off   = (char *)&g_conf.m_minPopForSpeller - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = ".01";
 | |
| 	m->m_units = "%%";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "phrase weight";
 | |
| 	m->m_desc  = "Percent to weight phrases in queries.";
 | |
| 	m->m_cgi   = "qp";
 | |
| 	m->m_off   = (char *)&g_conf.m_queryPhraseWeight - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	// was 350, but 'new mexico tourism' and 'boots uk'
 | |
| 	// emphasized the phrase terms too much!!
 | |
| 	m->m_def   = "100"; 
 | |
| 	m->m_units = "%%";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "weights.cpp slider parm (tmp)";
 | |
| 	m->m_desc  = "Percent of how much to use words to phrase ratio weights.";
 | |
| 	m->m_cgi   = "wsp";
 | |
| 	m->m_off   = (char *)&g_conf.m_sliderParm - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "90";
 | |
| 	m->m_units = "%%";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "indextable intersection algo to use";
 | |
| 	m->m_desc  = "0 means adds the term scores, 1 means average them "
 | |
| 		"and 2 means take the RMS.";
 | |
| 	m->m_cgi   = "iia";
 | |
| 	m->m_off   = (char *)&g_conf.m_indexTableIntersectionAlgo - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "max weight";
 | |
| 	m->m_desc  = "Maximum, relative query term weight. Set to 0 or less "
 | |
| 		"to indicate now max. 10.0 or 20.0 might be a good value.";
 | |
| 	m->m_cgi   = "qm";
 | |
| 	m->m_off   = (char *)&g_conf.m_queryMaxMultiplier - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "query term exponent";
 | |
| 	m->m_desc  = "Raise the weights of the query "
 | |
| 		"terms to this power. The weight of a query term is "
 | |
| 		"basically the log of its term frequency. Increasing "
 | |
| 		"this will increase the effects of the term frequency "
 | |
| 		"related to each term in the query. Term frequency is "
 | |
| 		"also known as the term popularity. Very common words "
 | |
| 		"typically have lower weights tied to them, but the effects "
 | |
| 		"of such weighting will be increased if you increase this "
 | |
| 		"exponent.";
 | |
| 	m->m_cgi   = "qte";
 | |
| 	m->m_off   = (char *)&g_conf.m_queryExp - g;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "1.1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "use dynamic phrase weighting";
 | |
| 	m->m_desc  = "A new algorithm which reduces the weight on a query "
 | |
| 		"word term if the query phrase terms it is in are of "
 | |
| 		"similar popularity (term frequency) to that of the word "
 | |
| 		"term.";
 | |
| 	m->m_cgi   = "udpw";
 | |
| 	m->m_off   = (char *)&g_conf.m_useDynamicPhraseWeighting - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "maximum serialized query size";
 | |
| 	m->m_desc  = "When passing queries around the network, send the raw "
 | |
| 		"string instead of the serialized query if the required "
 | |
| 		"buffer is bigger than this. Smaller values decrease network "
 | |
| 		"traffic for large queries at the expense of processing time.";
 | |
| 	m->m_cgi   = "msqs";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxSerializedQuerySize - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "8192";
 | |
| 	m->m_units = "bytes";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "merge buf size";
 | |
| 	m->m_desc  = "Read and write this many bytes at a time when merging "
 | |
| 		"files.  Smaller values are kinder to query performance, "
 | |
| 		" but the merge takes longer. Use at least 1000000 for "
 | |
| 		"fast merging.";
 | |
| 	m->m_cgi   = "mbs";
 | |
| 	m->m_off   = (char *)&g_conf.m_mergeBufSize - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	// keep this way smaller than that 800k we had in here, 100k seems
 | |
| 	// to be way better performance for qps
 | |
| 	m->m_def   = "500000";
 | |
| 	m->m_units = "bytes";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "catdb minRecSizes";
 | |
| 	m->m_desc  = "minRecSizes for Catdb lookups";
 | |
| 	m->m_cgi   = "catmsr";
 | |
| 	m->m_off   = (char *)&g_conf.m_catdbMinRecSizes - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100000000"; // 100 million
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "max http download sockets";
 | |
| 	m->m_desc  = "Maximum sockets available to spiders for downloading "
 | |
| 		"web pages.";
 | |
| 	m->m_cgi   = "mds";
 | |
| 	m->m_off   = (char *)&g_conf.m_httpMaxDownloadSockets - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5000";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "doc count adjustment";
 | |
| 	m->m_desc  = "Add this number to the total document count in the "
 | |
| 		"index. Just used for displaying on the homepage.";
 | |
| 	m->m_cgi   = "dca";
 | |
| 	m->m_off   = (char *)&g_conf.m_docCountAdjustment - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dynamic performance graph";
 | |
| 	m->m_desc  = "Generates profiling data for callbacks on page "
 | |
| 		"performance";
 | |
| 	m->m_cgi   = "dpg";
 | |
| 	m->m_off   = (char *)&g_conf.m_dynamicPerfGraph - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "enable profiling";
 | |
| 	m->m_desc  = "Enable profiler to do accounting of time taken by "
 | |
| 		"functions. ";
 | |
| 	m->m_cgi   = "enp";
 | |
| 	m->m_off   = (char *)&g_conf.m_profilingEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "minimum profiling threshold";
 | |
| 	m->m_desc  = "Profiler will not show functions which take less "
 | |
| 		"than this many milliseconds "
 | |
| 		"in the log or  on the performance graph.";
 | |
| 	m->m_cgi   = "mpt";
 | |
| 	m->m_off   = (char *)&g_conf.m_minProfThreshold - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "sequential profiling.";
 | |
| 	m->m_desc  = "Produce a LOG_TIMING log message for each "
 | |
| 		"callback called, along with the time it took.  "
 | |
| 		"Profiler must be enabled.";
 | |
| 	m->m_cgi   = "ensp";
 | |
| 	m->m_off   = (char *)&g_conf.m_sequentialProfiling - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "use statsdb";
 | |
| 	m->m_desc  = "Archive system statistics information in Statsdb.";
 | |
| 	m->m_cgi   = "usdb"; 
 | |
| 	m->m_off   = (char *)&g_conf.m_useStatsdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_MASTER;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "statsdb snapshots.";
 | |
| 	m->m_desc  = "Archive system statistics information in Statsdb. "
 | |
| 		     "Takes one snapshot every minute.";
 | |
| 	m->m_cgi   = "sdbss"; 
 | |
| 	m->m_off   = (char *)&g_conf.m_statsdbSnapshots - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "statsdb web interface.";
 | |
| 	m->m_desc  = "Enable the Statsdb page for viewing stats history.";
 | |
| 	m->m_cgi   = "sdbwi"; 
 | |
| 	m->m_off   = (char *)&g_conf.m_statsdbPageEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 	/*
 | |
| 	m->m_title = "max synonyms";
 | |
| 	m->m_desc = "Maximum possible synonyms to expand a word to.";
 | |
| 	m->m_cgi   = "msyn";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxSynonyms - g;
 | |
| 	m->m_def   = "5";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "default affinity";
 | |
| 	m->m_desc = "spelling/number synonyms get this number as their "
 | |
| 		"affinity; negative values mean treat them as unknown, "
 | |
| 		"values higher than 1.0 get treated as 1.0";
 | |
| 	m->m_cgi   = "daff";
 | |
| 	m->m_off   = (char *)&g_conf.m_defaultAffinity - g;
 | |
| 	m->m_def   = "0.9";
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "frequency threshold";
 | |
| 	m->m_desc = "the minimum amount a synonym term has to be in relation "
 | |
| 		"to its master term in order to be considered as a synonym";
 | |
| 	m->m_cgi   = "fqth";
 | |
| 	m->m_off   = (char *)&g_conf.m_frequencyThreshold - g;
 | |
| 	m->m_def   = "0.25";
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "maximum affinity requests";
 | |
| 	m->m_desc = "Maximum number of outstanding requests the affinity "
 | |
| 		"builder can generate. Keep this number at 10 or lower for "
 | |
| 		"local servers, higher for internet servers or servers with "
 | |
| 		"high latency.";
 | |
| 	m->m_cgi   = "mar";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxAffinityRequests - g;
 | |
| 	m->m_def   = "10";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "maximum affinity errors";
 | |
| 	m->m_desc  = "Maximum number of times the affinity builder should "
 | |
| 		"encounter an error before giving up entirely.";
 | |
| 	m->m_cgi   = "mae";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxAffinityErrors - g;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "affinity timeout";
 | |
| 	m->m_desc = "Amount of time in milliseconds to wait for a response to "
 | |
| 		"an affinity query. You shouldn't have to touch this unless "
 | |
| 		"the network is slow or overloaded.";
 | |
| 	m->m_cgi   = "ato";
 | |
| 	m->m_off   = (char *)&g_conf.m_affinityTimeout - g;
 | |
| 	m->m_def   = "30000";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "affinity rebuild server";
 | |
| 	m->m_desc = "Use this server:port to rebuild the affinity.";
 | |
| 	m->m_cgi   = "ars";
 | |
| 	m->m_off   = (char *)&g_conf.m_affinityServer - g;
 | |
| 	m->m_def   = "localhost:8000";
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "additional affinity parameters";
 | |
| 	m->m_desc = "Additional parameters to pass in the query. Tweak these "
 | |
| 		"to get better/faster responses. Don't touch the raw parameter "
 | |
| 		"unless you know what you are doing.";
 | |
| 	m->m_cgi  = "aap";
 | |
| 	m->m_off  = (char *)&g_conf.m_affinityParms - g;
 | |
| 	m->m_def  = "&raw=5&dio=1&n=1000&code=gbmonitor";
 | |
| 	m->m_type = TYPE_STRING;
 | |
| 	m->m_size = MAX_URL_LEN;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	//////
 | |
| 	// END MASTER CONTROLS
 | |
| 	//////
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// ACCESS CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	/*
 | |
| 	// ARRAYS
 | |
| 	// each will have its own table, title will be in first row
 | |
| 	// of that table, 2nd row is description, then one row per
 | |
| 	// element in the array, then a final row for adding new elements
 | |
| 	// if not exceeding our m->m_max limit.
 | |
| 	m->m_title = "Passwords Required to Search this Collection";
 | |
| 	m->m_desc  ="Passwords allowed to perform searches on this collection."
 | |
| 		" If no passwords are specified, then anyone can search it.";
 | |
| 	m->m_cgi   = "searchpwd";
 | |
| 	m->m_xml   = "searchPassword";
 | |
| 	m->m_max   = MAX_SEARCH_PASSWORDS;
 | |
| 	m->m_off   = (char *)cr.m_searchPwds - x;
 | |
| 	m->m_type  = TYPE_STRINGNONEMPTY;
 | |
| 	m->m_size  = PASSWORD_MAX_LEN+1; // string size max
 | |
| 	m->m_page  = PAGE_ACCESS;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "IPs Banned from Searching this Collection";
 | |
| 	m->m_desc  = "These IPs are not allowed to search this collection or "
 | |
| 		"use add url. Useful to keep out miscreants. Use zero for the "
 | |
| 		"last number of the IP to ban an entire IP domain.";
 | |
| 	m->m_cgi   = "bip";
 | |
| 	m->m_xml   = "bannedIp";
 | |
| 	m->m_max   = MAX_BANNED_IPS;
 | |
| 	m->m_off   = (char *)cr.m_banIps - x;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "Only These IPs can Search this Collection";
 | |
| 	m->m_desc  = "Only these IPs are allowed to search the collection and "
 | |
| 		"use the add url facilities. If you'd like to make your "
 | |
| 		"collection publicly searchable then do not add any IPs "
 | |
| 		"here.Use zero for the "
 | |
| 		"last number of the IP to restrict to an entire "
 | |
| 		"IP domain, i.e. 1.2.3.0.";
 | |
| 	m->m_cgi   = "searchip";
 | |
| 	m->m_xml   = "searchIp";
 | |
| 	m->m_max   = MAX_SEARCH_IPS;
 | |
| 	m->m_off   = (char *)cr.m_searchIps - x;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "Spam Assassin IPs";
 | |
| 	m->m_desc  = "Browsers coming from these IPs are deemed to be spam "
 | |
| 		  "assassins and have access to a subset of the controls to "
 | |
| 		"ban and remove domains and IPs from the index.";
 | |
| 	m->m_cgi   = "assip";
 | |
| 	m->m_xml   = "assassinIp";
 | |
| 	m->m_max   = MAX_SPAM_IPS;
 | |
| 	m->m_off   = (char *)cr.m_spamIps - x;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "Admin Passwords";
 | |
| 	m->m_desc  = "Passwords allowed to edit this collection record. "
 | |
| 		"First password can only be deleted by the master "
 | |
| 		"administrator. If no password of Admin IP is given at time "
 | |
| 		"of creation then the default password of 'footbar23' will "
 | |
| 		"be assigned.";
 | |
| 	m->m_cgi   = "apwd";
 | |
| 	m->m_xml   = "adminPassword";
 | |
| 	m->m_max   = MAX_ADMIN_PASSWORDS;
 | |
| 	m->m_off   = (char *)cr.m_adminPwds - x;
 | |
| 	m->m_type  = TYPE_STRINGNONEMPTY;
 | |
| 	m->m_size  = PASSWORD_MAX_LEN+1;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "Admin IPs";
 | |
| 	m->m_desc  = "If someone connects from one of these IPs and provides "
 | |
| 		"a password from the table above then they will have full "
 | |
| 		"administrative privileges for this collection. If you "
 | |
| 		"specified no Admin Passwords above then they need only "
 | |
| 		"connect from an IP in this table to get the privledges. ";
 | |
| 	m->m_cgi   = "adminip";
 | |
| 	m->m_xml   = "adminIp";
 | |
| 	m->m_max   = MAX_ADMIN_IPS;
 | |
| 	m->m_off   = (char *)cr.m_adminIps - x;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// URL FILTERS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	//m->m_title = "Url Filters";
 | |
| 	// this is description just for the conf file.
 | |
| 	//m->m_cdesc = "See overview.html for a description of URL filters.";
 | |
| 	//m->m_type  = TYPE_COMMENT;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_cgi   = "ufp";
 | |
| 	m->m_title = "url filters profile";
 | |
| 	m->m_xml   = "urlFiltersProfile";
 | |
| 	m->m_desc  = "Rather than editing the table below, you can select "
 | |
| 		"a predefined set of url instructions in this drop down menu "
 | |
| 		"that will update the table for you. Selecting <i>custom</i> "
 | |
| 		"allows you to make custom changes to the table. "
 | |
| 		"Selecting <i>web</i> configures the table for spidering "
 | |
| 		"the web in general. "
 | |
| 		"Selecting <i>news</i> configures the table for spidering "
 | |
| 		"new sites. "
 | |
| 		"Selecting <i>chinese</i> makes the spider prioritize the "
 | |
| 		"spidering of chinese pages, etc. "
 | |
| 		"Selecting <i>shallow</i> makes the spider go deep on "
 | |
| 		"all sites unless they are tagged <i>shallow</i> in the "
 | |
| 		"site list. "
 | |
| 		"Important: "
 | |
| 		"If you select a profile other than <i>custom</i> "
 | |
| 		"then your changes "
 | |
| 		"to the table will be lost.";
 | |
| 	m->m_off   = (char *)&cr.m_urlFiltersProfile - x;
 | |
| 	m->m_colspan = 3;
 | |
| 	m->m_type  = TYPE_SAFEBUF;//UFP;// 1 byte dropdown menu
 | |
| 	m->m_def   = "web"; // UFP_WEB
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "expression";
 | |
| 	m->m_desc  = "Before downloading the contents of a URL, Gigablast "
 | |
| 		"first chains down this "
 | |
| 		"list of "
 | |
| 		"expressions</a>, "
 | |
| 		"starting with expression #0.  "
 | |
| 		//"This table is also consulted "
 | |
| 		//"for every outlink added to spiderdb. "
 | |
| 		"The first expression it matches is the ONE AND ONLY "
 | |
| 		"matching row for that url. "
 | |
| 		"It then uses the "
 | |
| 		//"<a href=/overview.html#spiderfreq>"
 | |
| 		"respider frequency, "
 | |
| 		//"<a href=/overview.html#spiderpriority>"
 | |
| 		"spider priority, etc. on the MATCHING ROW when spidering "
 | |
| 		//"and <a href=/overview.html#ruleset>ruleset</a> to "
 | |
| 		"that URL. "
 | |
| 		"If you specify the <i>expression</i> as "
 | |
| 		"<i><b>default</b></i> then that MATCHES ALL URLs. "
 | |
| 		"URLs with high spider priorities take spidering "
 | |
| 		"precedence over "
 | |
| 		"URLs with lower spider priorities. "
 | |
| 		"The respider frequency dictates how often a URL will "
 | |
| 		"be respidered. "
 | |
| 
 | |
| 		"See the help table below for examples of all the supported "
 | |
| 		"expressions. "
 | |
| 		"Use the <i>&&</i> operator to string multiple expressions "
 | |
| 		"together in the same expression text box. "
 | |
| 		"If you check the <i>delete</i> checkbox then urls matching "
 | |
| 		"that row will be deleted if already indexed, otherwise, "
 | |
| 		"they just won't be indexed."
 | |
| 		//"A <i>spider priority</i> of "
 | |
| 		//"<i>FILTERED</i> or <i>BANNED</i> "
 | |
| 		// "<i>DELETE</i> "
 | |
| 		// "will cause the URL to not be spidered, "
 | |
| 		// "or if it has already "
 | |
| 		// "been indexed, it will be deleted when it is respidered."
 | |
| 		"<br><br>";
 | |
| 		
 | |
| 		/*
 | |
| 		"A URL is respidered according to the "
 | |
| 		"spider frequency. If this is blank then Gigablast will "
 | |
| 		"use the spider frequency explicitly dictated by the rule "
 | |
| 		"set. If the ruleset does not contain a <spiderFrequency> "
 | |
| 		"xml tag, then Gigablast will "
 | |
| 		"intelligently determine the best time to respider that "
 | |
| 		"URL.<br><br>"
 | |
| 		
 | |
| 		"If the "
 | |
| 		"<a href=/overview.html#spiderpriority>"
 | |
| 		"spider priority</a> of a URL is undefined then "
 | |
| 		"Gigablast will use the spider priority explicitly "
 | |
| 		"dictated by the ruleset. If the ruleset does not contain "
 | |
| 		"a <spiderPriority> xml tag, then Gigablast "
 | |
| 		"will spider that URL with a priority of its linking parent "
 | |
| 		"minus 1, "
 | |
| 		"resulting in breadth first spidering. A URL of spider "
 | |
| 		"priority X will be placed in spider priority queue #X. "
 | |
| 		"Many spider parameters can be configured on a per "
 | |
| 		"spider priority queue basis. For instance, spidering "
 | |
| 		"can be toggled on a per queue basis, as can link "
 | |
| 		"harvesting.<br><br>"
 | |
| 		
 | |
| 		"The <b>ruleset</b> you select corresponds to a file on "
 | |
| 		"disk named tagdb*.xml, where the '*' is a number. Each of "
 | |
| 		"these files is a set of rules in XML that dictate how to "
 | |
| 		"index and spider a document. "
 | |
| 		"You can add your own ruleset file to Gigablast's working "
 | |
| 		"directory and it will automatically be "
 | |
| 		"included in the ruleset drop down menu. Once a document "
 | |
| 		"has been indexed with a ruleset, then the corresponding "
 | |
| 		"ruleset file cannot be deleted without risk of corruption."
 | |
| 		"<br><br>"
 | |
| 		
 | |
| 		"You can have up to 32 regular expressions. "
 | |
| 		"Example: <b>^http://.*\\.uk/</b> would match all urls from "
 | |
| 		"the UK. See this "
 | |
| 		"<a href=/?redir="
 | |
| 		"http://www.phpbuilder.com/columns/dario19990616.php3>"
 | |
| 		"tutorial by example</a> for more information."
 | |
| 
 | |
| 		"<br><br>"
 | |
| 		"Gigablast also supports the following special \"regular "
 | |
| 		"expressions\": "
 | |
| 		"link:gigablast and doc:quality<X and doc:quality>X.";
 | |
| 		*/
 | |
| 
 | |
| 	m->m_cgi   = "fe";
 | |
| 	m->m_xml   = "filterExpression";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	// array of safebufs i guess...
 | |
| 	m->m_off   = (char *)cr.m_regExs - x;
 | |
| 	// this is a safebuf, dynamically allocated string really
 | |
| 	m->m_type  = TYPE_SAFEBUF;//STRINGNONEMPTY
 | |
| 	// the size of each element in the array:
 | |
| 	m->m_size  = sizeof(SafeBuf);//MAX_REGEX_LEN+1;
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_rowid = 1; // if we START a new row
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "harvest links";
 | |
| 	m->m_cgi   = "hspl";
 | |
| 	m->m_xml   = "harvestLinks";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_harvestLinks - x;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "spidering enabled";
 | |
| 	m->m_cgi   = "cspe";
 | |
| 	m->m_xml   = "spidersEnabled";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_spidersEnabled - x;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "respider frequency (days)";
 | |
| 	m->m_cgi   = "fsf";
 | |
| 	m->m_xml   = "filterFrequency";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_spiderFreqs - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	// why was this default 0 days?
 | |
| 	m->m_def   = "30.0"; // 0.0
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_units = "days";
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max spiders";
 | |
| 	m->m_desc  = "Do not allow more than this many outstanding spiders "
 | |
| 		"for all urls in this priority."; // was "per rule"
 | |
| 	m->m_cgi   = "mspr";
 | |
| 	m->m_xml   = "maxSpidersPerRule";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_maxSpidersPerRule - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "99";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max spiders per ip";
 | |
| 	m->m_desc  = "Allow this many spiders per IP.";
 | |
| 	m->m_cgi   = "mspi";
 | |
| 	m->m_xml   = "maxSpidersPerIp";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_spiderIpMaxSpiders - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "7";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "same ip wait (ms)";
 | |
| 	m->m_desc  = "Wait at least this int32_t before downloading urls from "
 | |
| 		"the same IP address.";
 | |
| 	m->m_cgi   = "xg";
 | |
| 	m->m_xml   = "spiderIpWait";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	//m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_spiderIpWaits - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1000";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_units = "milliseconds";
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "page quota";
 | |
| 	m->m_cgi   = "fsq";
 | |
| 	m->m_xml   = "filterQuota";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_spiderQuotas - x;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "-1"; // -1 means no quota
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_units = "pages";
 | |
| 	m->m_rowid = 1;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "delete";
 | |
| 	m->m_cgi   = "fdu";
 | |
| 	m->m_xml   = "forceDeleteUrls";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_forceDelete - x;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider priority";
 | |
| 	m->m_cgi   = "fsp";
 | |
| 	m->m_xml   = "filterPriority";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_spiderPriorities - x;
 | |
| 	m->m_type  = TYPE_PRIORITY2; // includes UNDEFINED priority in dropdown
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_def   = "50";
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m->m_addin = 1; // "insert" follows?
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "diffbot api";
 | |
| 	m->m_cgi   = "dapi";
 | |
| 	m->m_xml   = "diffbotAPI";
 | |
| 	m->m_max   = MAX_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_spiderDiffbotApiUrl - x;
 | |
| 	// HACK: we print a dropdown for this but the value is a string
 | |
| 	// because the items in the drop down can change so we can't store
 | |
| 	// an item # here, it has to be a string, i.e. the diffbot api url.
 | |
| 	// john might add a new custom api to m_diffbotApiList at any time.
 | |
| 	// so we select the item in the drop down if it matches THIS string.
 | |
| 	m->m_type  = TYPE_SAFEBUF;//DIFFBOT_DROPDOWN;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_size  = sizeof(SafeBuf);
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_addin = 1; // "insert" follows?
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_DIFFBOT;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	//m->m_title = "<a href=/overview.html#ruleset>ruleset</a>";
 | |
| 	//m->m_cgi   = "frs";
 | |
| 	//m->m_xml   = "filterRuleset";
 | |
| 	//m->m_max   = MAX_FILTERS;
 | |
| 	//m->m_off   = (char *)cr.m_rulesets - x;
 | |
| 	//m->m_type  = TYPE_RULESET; // int32_t with dropdown of rulesets
 | |
| 	//m->m_page  = PAGE_FILTERS;
 | |
| 	//m->m_rowid = 1;
 | |
| 	//m->m_addin = 1; // "insert" follows?
 | |
| 	//m->m_def   = "";
 | |
| 	//m++;
 | |
| 
 | |
| 	/*
 | |
| 	// default rule
 | |
| 	m->m_title = "<b>DEFAULT</b>";
 | |
| 	m->m_desc  = "Use the following values by default if no ruleset in "
 | |
| 		"tagdb matches the URL.";
 | |
| 	m->m_type  = TYPE_CONSTANT;
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_rowid = 2;
 | |
| 	m->m_hdrs  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_cdesc  = "The default parameters if no reg exs above matched.";
 | |
| 	m->m_cgi   = "fsfd";
 | |
| 	m->m_xml   = "filterFrequencyDefault";
 | |
| 	m->m_off   = (char *)&cr.m_defaultSpiderFrequency - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "0.0";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_units = "days";
 | |
| 	m->m_rowid = 2;
 | |
| 	m->m_hdrs  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "fsqd";
 | |
| 	m->m_xml   = "filterQuotaDefault";
 | |
| 	m->m_off   = (char *)&cr.m_defaultSpiderQuota - x;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_units = "pages";
 | |
| 	m->m_rowid = 2;
 | |
| 	m->m_hdrs  = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "fspd";
 | |
| 	m->m_xml   = "filterPriorityDefault";
 | |
| 	m->m_off   = (char *)&cr.m_defaultSpiderPriority - x;
 | |
| 	m->m_type  = TYPE_PRIORITY2; // includes UNDEFINED priority in dropdown
 | |
| 	m->m_def   = "4";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_rowid = 2;
 | |
| 	m->m_hdrs  = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_cgi   = "frsd";
 | |
| 	m->m_xml   = "filterRulesetDefault";
 | |
| 	m->m_off   = (char *)&cr.m_defaultSiteFileNum - x;
 | |
| 	m->m_type  = TYPE_RULESET; // int32_t with dropdown of rulesets
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_FILTERS;
 | |
| 	m->m_rowid = 2;
 | |
| 	m->m_hdrs  = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	///////////////////////////////////////////
 | |
| 	// PRIORITY CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	// . show the priority in this column
 | |
| 	// . a monotnic sequence repeating each number twice,
 | |
| 	//   basically, div 2 is what "D2" means
 | |
| 	// . so we get 0,0,1,1,2,2,3,3, ...
 | |
| 	m->m_title = "priority";
 | |
| 	//m->m_desc  = "What priority is this spdier queue?";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_type  = TYPE_MONOD2;
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m++;
 | |
| 
 | |
| 	// . show an alternating 0 and 1 in this column
 | |
| 	//   because it is type MONOM2, a monotonic sequence
 | |
| 	//   modulus 2.
 | |
| 	// . so we get 0,1,0,1,0,1,0,1,0,1, ...
 | |
| 	m->m_title = "is new";
 | |
| 	m->m_desc  = "Does this priority contain new (unindexed) urls?";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_type  = TYPE_MONOM2;
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spidering enabled";
 | |
| 	m->m_desc  = "Are spiders enabled for this priority?";
 | |
| 	m->m_cgi   = "xa";
 | |
| 	m->m_xml   = "spiderPrioritySpideringEnabled";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_spideringEnabled - x;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "time slice weight";
 | |
| 	m->m_desc  = "What percentage of the time to draw urls from "
 | |
| 		"this priority?";
 | |
| 	m->m_cgi   = "xb";
 | |
| 	m->m_xml   = "spiderPriotiyTimeSlice";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_timeSlice - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3; // if we START a new row
 | |
| 	m->m_def   = "100.0";
 | |
| 	m->m_units = "%%";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spidered";
 | |
| 	m->m_desc  = "How many urls we spidered so far last 5 minutes.";
 | |
| 	m->m_cgi   = "sps";
 | |
| 	m->m_xml   = "spiderPriotiySpidered";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_spidered - x;
 | |
| 	m->m_type  = TYPE_LONG_CONST;
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3; // if we START a new row
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_sync  = false;  // do not sync this parm
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider links";
 | |
| 	m->m_desc  = "Harvest links from the content and add to spiderdb.";
 | |
| 	m->m_cgi   = "xc";
 | |
| 	m->m_xml   = "spiderPrioritySpiderLinks";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_spiderLinks - x;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider same host outlinks only";
 | |
| 	m->m_desc  = "Harvest links to the same hostnames (www.xyz.com) "
 | |
| 		"and add to spiderdb.";
 | |
| 	m->m_cgi   = "xd";
 | |
| 	m->m_xml   = "spiderPrioritySpiderSameHostnameLinks";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_spiderSameHostnameLinks - x;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "force links into queue";
 | |
| 	m->m_desc  = "If slated to be added to this queue, and link is "
 | |
| 		"already in a non-forced queue, force it into this queue. "
 | |
| 		"Keep a cache to reduce reptitious adds to this queue.";
 | |
| 	m->m_cgi   = "xdd";
 | |
| 	m->m_xml   = "spiderPriorityForceQueue";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_autoForceQueue - x;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max spiders per ip";
 | |
| 	m->m_desc  = "Do not allow more than this many simultaneous "
 | |
| 		"downloads per IP address.";
 | |
| 	m->m_cgi   = "xe";
 | |
| 	m->m_xml   = "spiderPriorityMaxSpidersPerIp";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_maxSpidersPerIp - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max spiders per domain";
 | |
| 	m->m_desc  = "Do not allow more than this many simultaneous "
 | |
| 		"downloads per domain.";
 | |
| 	m->m_cgi   = "xf";
 | |
| 	m->m_xml   = "spiderPriorityMaxSpidersPerDom";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_maxSpidersPerDom - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max respider wait (days)";
 | |
| 	m->m_desc  = "Do not wait longer than this before attempting to "
 | |
| 		"respider.";
 | |
| 	m->m_cgi   = "xr";
 | |
| 	m->m_xml   = "spiderPriorityMaxRespiderWait";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_maxRespiderWait - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "180.0";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m->m_units = "days";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "first respider wait (days)";
 | |
| 	m->m_desc  = "Reschedule a new url for respidering this many days "
 | |
| 		"from the first time it is actually spidered.";
 | |
| 	m->m_cgi   = "xfrw";
 | |
| 	m->m_xml   = "spiderPriorityFirstRespiderWait";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_firstRespiderWait - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "60.0";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m->m_units = "days";
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "same ip wait (ms)";
 | |
| 	m->m_desc  = "Wait at least this int32_t before downloading urls from "
 | |
| 		"the same IP address.";
 | |
| 	m->m_cgi   = "xg";
 | |
| 	m->m_xml   = "spiderPrioritySameIpWait";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_sameIpWait - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10000";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m->m_units = "milliseconds";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "same domain wait (ms)";
 | |
| 	m->m_desc  = "Wait at least this int32_t before downloading urls from "
 | |
| 		"the same domain.";
 | |
| 	m->m_cgi   = "xh";
 | |
| 	m->m_xml   = "spiderPrioritySameDomainWait";
 | |
| 	m->m_max   = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_fixed = MAX_PRIORITY_QUEUES;
 | |
| 	m->m_off   = (char *)cr.m_pq_sameDomainWait - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10000";
 | |
| 	m->m_page  = PAGE_PRIORITIES;
 | |
| 	m->m_rowid = 3;
 | |
| 	m->m_units = "milliseconds";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// SITEDB FILTERS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "site expression";
 | |
| 	m->m_desc  = "The site of a url is a substring of that url, which "
 | |
| 		"defined a set of urls which are all primarily controlled "
 | |
| 		"by the same entity. The smallest such site of a url is "
 | |
| 		"returned, because a url can have multiple sites. Like "
 | |
| 		"fred.blogspot.com is a site and the blogspot.com site "
 | |
| 		"contains that site.";
 | |
| 	m->m_cgi   = "sdbfe";
 | |
| 	m->m_xml   = "siteExpression";
 | |
| 	m->m_max   = MAX_SITE_EXPRESSIONS;
 | |
| 	m->m_off   = (char *)cr.m_siteExpressions - x;
 | |
| 	m->m_type  = TYPE_STRINGNONEMPTY;
 | |
| 	m->m_size  = MAX_SITE_EXPRESSION_LEN+1;
 | |
| 	m->m_page  = PAGE_RULES;
 | |
| 	m->m_rowid = 1; // if we START a new row
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "site rule";
 | |
| 	m->m_cgi   = "sdbsrs";
 | |
| 	m->m_xml   = "siteRule";
 | |
| 	m->m_max   = MAX_SITE_EXPRESSIONS;
 | |
| 	m->m_off   = (char *)cr.m_siteRules - x;
 | |
| 	m->m_type  = TYPE_SITERULE;
 | |
| 	m->m_page  = PAGE_RULES;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "siterec default ruleset";
 | |
| 	m->m_cgi   = "sdbfdr";
 | |
| 	m->m_xml   = "siterecDefaultRuleset";
 | |
| 	m->m_max   = MAX_SITEDB_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_sitedbFilterRulesets - x;
 | |
| 	m->m_type  = TYPE_RULESET;
 | |
| 	m->m_page  = PAGE_FILTERS2;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_def   = "-1";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "ban subdomains";
 | |
| 	m->m_cgi   = "sdbbsd";
 | |
| 	m->m_xml   = "siterecBanSubdomains";
 | |
| 	m->m_max   = MAX_SITEDB_FILTERS;
 | |
| 	m->m_off   = (char *)cr.m_sitedbFilterBanSubdomains - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_FILTERS2;
 | |
| 	m->m_rowid = 1;
 | |
| 	m->m_addin = 1; // "insert" follows
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| // 	///////////////////////////////////////////
 | |
| // 	//              SPAM CONTROLS            //
 | |
| // 	///////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| // 	m->m_title = "char in url";
 | |
| // 	m->m_desc  = "url has - or _ or a digit in the domain, "
 | |
| // 		"has a plus in the cgi part.";
 | |
| // 	m->m_cgi   = "spamctrla";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[CHAR_IN_URL] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "20";
 | |
| // 	//m->m_smaxc = (char *)&cr.m_spamMaxes[CHAR_IN_URL] - x;
 | |
| // 	m->m_group = 1;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "bad tld";
 | |
| // 	m->m_desc  = "tld is info or biz";
 | |
| // 	m->m_cgi   = "spamctrlb";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[BAD_TLD] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "20";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "good tld";
 | |
| // 	m->m_desc  = "tld is gov, edu or mil";
 | |
| // 	m->m_cgi   = "spamctrlc";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[GOOD_TLD] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "-20";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "title has spammy words";
 | |
| // 	m->m_desc  = "Title has spammy words, is all lower case, "
 | |
| // 		"or has > 200 chars.  ";
 | |
| // 	m->m_cgi   = "spamctrld";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[WORD_IN_TITLE] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "20";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "img src to other domains";
 | |
| // 	m->m_desc  = "Page has img src to other domains.  ";
 | |
| // 	m->m_cgi   = "spamctrle";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[IMG_SRC_OTHER_DOMAIN] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "5";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "page has spammy words";
 | |
| // 	m->m_desc  = "Page has spammy words.  ";
 | |
| // 	m->m_cgi   = "spamctrlf";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[SPAMMY_WORDS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "5";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "consecutive link text";
 | |
| // 	m->m_desc  = "Three consecutive link texts "
 | |
| // 		"contain the same word.  ";
 | |
| // 	m->m_cgi   = "spamctrlg";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[CONSECUTIVE_LINK_TEXT] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "10";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "affiliate company links";
 | |
| // 	m->m_desc  = "links to amazon, allposters, or zappos.  ";
 | |
| // 	m->m_cgi   = "spamctrlh";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[AFFILIATE_LINKS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "10";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "affiliate in links";
 | |
| // 	m->m_desc  = "Has string 'affiliate' in the links.  ";
 | |
| // 	m->m_cgi   = "spamctrli";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[AFFILIATE_LINKS2] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "40";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "Iframe to amazon";
 | |
| // 	m->m_desc  = "Has an iframe whose src is amazon.  ";
 | |
| // 	m->m_cgi   = "spamctrlj";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[IFRAME_TO_AMAZON] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "30";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "int32_t links";
 | |
| // 	m->m_desc  = "Links to urls which are > 128 chars.  ";
 | |
| // 	m->m_cgi   = "spamctrlk";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[LINKS_OVER_128_CHARS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "5";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "links to queries";
 | |
| // 	m->m_desc  = "links have ?q= or &q= in them.  ";
 | |
| // 	m->m_cgi   = "spamctrll";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[LINKS_HAVE_QUERIES] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "5";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "google ad client";
 | |
| // 	m->m_desc  = "Page has a google ad client.  ";
 | |
| // 	m->m_cgi   = "spamctrlm";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[GOOGLE_AD_CLIENT] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "20";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "percent text in links";
 | |
| // 	m->m_desc  = "percent of text in links (over 50 percent).  ";
 | |
| // 	m->m_cgi   = "spamctrln";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[PERCENT_IN_LINKS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "15";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "links to a url with a - or _ in the domain";
 | |
| // 	m->m_desc  = "Links to a url with a - or _ in the domain";
 | |
| // 	m->m_cgi   = "spamctrlo";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[DASH_IN_LINK] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "2";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "links to a url which is .info or .biz";
 | |
| // 	m->m_desc  = "Links to a url which is .info or .biz.";
 | |
| // 	m->m_cgi   = "spamctrlp";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[LINK_TO_BADTLD] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "2";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "links to a dmoz category";
 | |
| // 	m->m_desc  = "Links to a dmoz category.";
 | |
| // 	m->m_cgi   = "spamctrlq";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[LINKS_ARE_DMOZ_CATS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "4";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "consecutive bold text";
 | |
| // 	m->m_desc  = "Three consecutive bold texts "
 | |
| // 		"contain the same word.  ";
 | |
| // 	m->m_cgi   = "spamctrlr";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[CONSECUTIVE_BOLD_TEXT] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "10";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "link text doesn't match domain";
 | |
| // 	m->m_desc  = "Link text looks like a domain, but the link doesn't go there";
 | |
| // 	m->m_cgi   = "spamctrls";
 | |
| // 	m->m_off   = (char *)&cr.m_spamTests[LINK_TEXT_NEQ_DOMAIN] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "10";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "force multiplier";
 | |
| // 	m->m_desc  = "Multiply this by the number of spam categories "
 | |
| // 		"that have points times the total points, for the final"
 | |
| // 		" score.  Range between 0 and 1.";
 | |
| // 	m->m_cgi   = "frcmult";
 | |
| // 	m->m_off   = (char *)&cr.m_forceMultiplier - x;
 | |
| // 	m->m_type  = TYPE_FLOAT;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "0.01";
 | |
| // 	m->m_group = 1;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| // 	///////////////////////   MAXES FOR SPAM CONTROLS  ///////////////////////
 | |
| 
 | |
| // 	m->m_title = "max points for char in url";
 | |
| // 	m->m_desc  = "Max points for url has - or _ or a digit in the domain";
 | |
| // 	m->m_cgi   = "spammaxa";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[CHAR_IN_URL] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 1;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for bad tld";
 | |
| // 	m->m_desc  = "Max points for tld is info or biz";
 | |
| // 	m->m_cgi   = "spammaxb";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[BAD_TLD] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for good tld";
 | |
| // 	m->m_desc  = "Max points for tld is gov, edu or mil";
 | |
| // 	m->m_cgi   = "spammaxc";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[GOOD_TLD] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for title has spammy words";
 | |
| // 	m->m_desc  = "Max points for Title has spammy words.  ";
 | |
| // 	m->m_cgi   = "spammaxd";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[WORD_IN_TITLE] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for img src to other domains";
 | |
| // 	m->m_desc  = "Max points for Page has img src to other domains.  ";
 | |
| // 	m->m_cgi   = "spammaxe";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[IMG_SRC_OTHER_DOMAIN] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for page has spammy words";
 | |
| // 	m->m_desc  = "Max points for Page has spammy words.  ";
 | |
| // 	m->m_cgi   = "spammaxf";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[SPAMMY_WORDS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for consecutive link text";
 | |
| // 	m->m_desc  = "Max points for three consecutive link texts"
 | |
| // 		"contain the same word.  ";
 | |
| // 	m->m_cgi   = "spammaxg";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[CONSECUTIVE_LINK_TEXT] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for affiliate company links";
 | |
| // 	m->m_desc  = "Max points for links to amazon, allposters, or zappos.  ";
 | |
| // 	m->m_cgi   = "spammaxh";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[AFFILIATE_LINKS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for affiliate in links";
 | |
| // 	m->m_desc  = "Max points for Has string 'affiliate' in the links.  ";
 | |
| // 	m->m_cgi   = "spammaxi";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[AFFILIATE_LINKS2] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for Iframe to amazon";
 | |
| // 	m->m_desc  = "Max points for Has an iframe whose src is amazon.  ";
 | |
| // 	m->m_cgi   = "spammaxj";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[IFRAME_TO_AMAZON] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for int32_t links";
 | |
| // 	m->m_desc  = "Max points for Links to urls which are > 128 chars.  ";
 | |
| // 	m->m_cgi   = "spammaxk";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[LINKS_OVER_128_CHARS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for links to queries";
 | |
| // 	m->m_desc  = "Max points for links have ?q= or &q= in them.  ";
 | |
| // 	m->m_cgi   = "spammaxl";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[LINKS_HAVE_QUERIES] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m->m_sparm = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for google ad client";
 | |
| // 	m->m_desc  = "Max points for Page has a google ad client.  ";
 | |
| // 	m->m_cgi   = "spammaxm";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[GOOGLE_AD_CLIENT] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for percent text in links";
 | |
| // 	m->m_desc  = "Max points for percent of text in links (over 50 percent).  ";
 | |
| // 	m->m_cgi   = "spammaxn";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[PERCENT_IN_LINKS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| // 	m->m_title = "max points for links have - or _";
 | |
| // 	m->m_desc  = "Max points for links have - or _";
 | |
| // 	m->m_cgi   = "spammaxo";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[DASH_IN_LINK] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| // 	m->m_title = "max points for links to .info or .biz";
 | |
| // 	m->m_desc  = "Max points for links to .info or .biz ";
 | |
| // 	m->m_cgi   = "spammaxp";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[LINK_TO_BADTLD] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for links to a dmoz category";
 | |
| // 	m->m_desc  = "Max points for links to a dmoz category.";
 | |
| // 	m->m_cgi   = "spammaxq";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[LINKS_ARE_DMOZ_CATS] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for consecutive bold text";
 | |
| // 	m->m_desc  = "Max points for three consecutive bold texts"
 | |
| // 		"contain the same word.  ";
 | |
| // 	m->m_cgi   = "spammaxr";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[CONSECUTIVE_BOLD_TEXT] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| 
 | |
| // 	m->m_title = "max points for link text doesn't match domain";
 | |
| // 	m->m_desc  = "Max points for link text doesn't match domain";
 | |
| // 	m->m_cgi   = "spammaxs";
 | |
| // 	m->m_off   = (char *)&cr.m_spamMaxes[LINK_TEXT_NEQ_DOMAIN] - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_SPAM;
 | |
| // 	m->m_def   = "300";
 | |
| // 	m->m_group = 0;
 | |
| // 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| // 	///////////////////////////////////////////
 | |
| // 	//          END SPAM CONTROLS            //
 | |
| // 	///////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//  QUALITY AGENT CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "all agents on";
 | |
| 	m->m_desc  = "Enable quality agent on all hosts for this collection";
 | |
| 	m->m_cgi   = "aqae";
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_off   = (char *)&cr.m_qualityAgentEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL2; // no yes or no, just a link
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "all agents off";
 | |
| 	m->m_desc  = "Disable quality agent on all hosts for this collection";
 | |
| 	m->m_cgi   = "aqad";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_off   = (char *)&cr.m_qualityAgentEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL2; // no yes or no, just a link
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "quality agent enabled";
 | |
| 	m->m_desc  = "If enabled, the agent will find quality modifiers for "
 | |
| 		"all of the sites found in titledb.";
 | |
| 	m->m_cgi   = "qae";
 | |
| 	m->m_off   = (char *)&cr.m_qualityAgentEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cast  = 0;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "quality agent continuous loop";
 | |
| 	m->m_desc  = "If enabled, the agent will loop when it reaches "
 | |
| 		"the end of titledb. Otherwise, it will disable itself.";
 | |
| 	m->m_cgi   = "qale";
 | |
| 	m->m_off   = (char *)&cr.m_qualityAgentLoop - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "ban subsites";
 | |
| 	m->m_desc  = "If enabled, the agent will look at the paths of"
 | |
| 		" its titlerec sample, if the offending spam scores"
 | |
| 		" all come from the same subsite, we just ban that one."
 | |
| 		"  Good for banning hijacked forums or spammed archives.";
 | |
| 	m->m_cgi   = "qabs";
 | |
| 	m->m_off   = (char *)&cr.m_qualityAgentBanSubSites - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "start document";
 | |
| 	m->m_desc  = "The agent will start at this docid when scanning "
 | |
| 		"titledb looking for sites.";
 | |
| 	m->m_cgi   = "qasd";
 | |
| 	m->m_off   = (char *)&cr.m_qualityAgentStartDoc - x;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_sync  = false;  // do not sync this parm
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "site quality refresh rate";
 | |
| 	m->m_desc  = "The quality agent will try to reexamine entries in "
 | |
| 		"tagdb which were added more than this many seconds ago";
 | |
| 	m->m_cgi   = "qasqrr";
 | |
| 	m->m_off   = (char *)&cr.m_tagdbRefreshRate - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_group = 1;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "2592000";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "link samples to get";
 | |
| 	m->m_desc  = "Lookup the qualities of this many links in tagdb.";
 | |
| 	m->m_cgi   = "lstg";
 | |
| 	m->m_off   = (char *)&cr.m_linkSamplesToGet - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "256";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "min pages to evaluate";
 | |
| 	m->m_desc  = "The quality agent will skip this site if there are"
 | |
| 		" less than this many pages to evaluate.";
 | |
| 	m->m_cgi   = "mpte";
 | |
| 	m->m_off   = (char *)&cr.m_minPagesToEvaluate - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "link bonus divisor";
 | |
| 	m->m_desc  = "Decrease a page's spam score if it has a high "
 | |
| 		"link quality.  The bonus is computed by dividing the "
 | |
| 		"page's link quality by this parm.  LinkInfos older "
 | |
| 		"than 30 days are considered stale and are not used.";
 | |
| 	m->m_cgi   = "lbd";
 | |
| 	m->m_off   = (char *)&cr.m_linkBonusDivisor - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "20";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "points per banned link";
 | |
| 	m->m_desc  = "Subtract x points per banned site that a site links to.";
 | |
| 	m->m_cgi   = "nppbl";
 | |
| 	m->m_off   = (char *)&cr.m_negPointsPerBannedLink - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "3";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "points per link to different sites on the same IP";
 | |
| 	m->m_desc  = "Subtract x points per site linked to that is on the "
 | |
| 		"same IP as other links.  Good for catching domain parking "
 | |
| 		"lots and spammers in general, but looking up the IPs "
 | |
| 		"slows down the agent considerably.  (set to 0 to disable.)";
 | |
| 	m->m_cgi   = "pfltdssi";
 | |
| 	m->m_off   = (char *)&cr.m_penaltyForLinksToDifferentSiteSameIp - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of sites on an ip to sample";
 | |
| 	m->m_desc  = "Examine this many sites on the same ip as this site";
 | |
| 	m->m_cgi   = "nsoits";
 | |
| 	m->m_off   = (char *)&cr.m_numSitesOnIpToSample - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "100";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "points per banned site on ip";
 | |
| 	m->m_desc  = "Subtract x points from a site quality for each banned "
 | |
| 		"site on the ip";
 | |
| 	m->m_cgi   = "nppbsoi";
 | |
| 	m->m_off   = (char *)&cr.m_negPointsPerBannedSiteOnIp - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "2";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max penalty from being on a bad IP";
 | |
| 	m->m_desc  = "The penalty for being on a bad IP will not"
 | |
| 		" exceed this value.";
 | |
| 	m->m_cgi   = "qampfboabi";
 | |
| 	m->m_off   = (char *)&cr.m_maxPenaltyFromIp - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "-30";
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "max sites per second";
 | |
| 	m->m_desc  = "The agent will not process more than this many"
 | |
| 		" sites per second.  Can be less than 1.";
 | |
| 	m->m_cgi   = "msps";
 | |
| 	m->m_off   = (char *)&cr.m_maxSitesPerSecond - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "99999.0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "site agent banned ruleset";
 | |
| 	m->m_desc  = "Site agent will assign this ruleset to documents "
 | |
| 		" which are determined to be low quality.";
 | |
| 	m->m_cgi   = "";
 | |
| 	m->m_off   = (char *)&cr.m_qualityAgentBanRuleset - x;
 | |
| 	m->m_type  = TYPE_RULESET; // int32_t with dropdown of rulesets
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "30";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "ban quality threshold";
 | |
| 	m->m_desc  = "If the site has a spam score greater than this parm, it will"
 | |
| 		" be inserted into the above ruleset.";
 | |
| 	m->m_cgi   = "tttsb";
 | |
| 	m->m_off   = (char *)&cr.m_siteQualityBanThreshold - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "-100";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "threshold to trigger site reindex";
 | |
| 	m->m_desc  = "If the site has a quality less than this parm, it will"
 | |
| 		" be added to the spider queue for reindexing";
 | |
| 	m->m_cgi   = "tttsr";
 | |
| 	m->m_off   = (char *)&cr.m_siteQualityReindexThreshold - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_QAGENT;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_def   = "-100";
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| // 	m->m_title = "";
 | |
| // 	m->m_desc  = "";
 | |
| // 	m->m_cgi   = "";
 | |
| // 	m->m_off   = (char *)&cr.m_ - x;
 | |
| // 	m->m_type  = TYPE_LONG;
 | |
| // 	m->m_page  = PAGE_QAGENT;
 | |
| // 	m->m_def   = "";
 | |
| // 	m++;
 | |
| 
 | |
| 	*/
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//  END QUALITY AGENT CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//  AD FEED CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 	/*
 | |
| 	m->m_title = "num ads in paid inclusion ad feed";
 | |
| 	m->m_desc  = "The number of ads we would like returned from the ad"
 | |
|                      " server. This applies to all paid inclusion ads below.";
 | |
| 	m->m_cgi   = "apin";
 | |
| 	m->m_off   = (char *)&cr.m_adPINumAds - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "num ads in skyscraper ad feed";
 | |
| 	m->m_desc  = "The number of ads we would like returned from the ad"
 | |
|                      " server. This applies to all skyscraper ads below.";
 | |
| 	m->m_cgi   = "assn";
 | |
| 	m->m_off   = (char *)&cr.m_adSSNumAds - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5";
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "skyscraper ad width";
 | |
| 	m->m_desc  = "The width of the skyscraper ad column in pixels";
 | |
| 	m->m_cgi   = "awd";
 | |
| 	m->m_off   = (char *)&cr.m_adWidth - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "300";
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "ad feed timeout";
 | |
| 	m->m_desc  = "The time (in milliseconds) to wait for an ad list to be "
 | |
| 		     "returned before timing out and displaying the results "
 | |
| 		     "without any ads. This applies to all ads below.";
 | |
| 	m->m_cgi   = "afto";
 | |
| 	m->m_off   = (char *)&cr.m_adFeedTimeOut - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1000";
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
|         
 | |
|         m->m_title = "(1) paid inclusion ad enable";
 | |
| 	m->m_desc  = "Enable/Disable the paid inclusion ad.";
 | |
| 	m->m_cgi   = "apie";
 | |
|         m->m_off   = (char *)&cr.m_adPIEnable - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion ad feed link";
 | |
| 	m->m_desc  = "Full link with address and parameters to retrieve an ad "
 | |
| 		     "feed.  To specify parameter input: %q for query, %n "
 | |
|                      "for num results, %p for page number, %i for query ip, "
 | |
|                      "and %% for %.";
 | |
| 	m->m_cgi   = "apicgi";
 | |
| 	m->m_off   = (char *)cr.m_adCGI[0] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_CGI_URL;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "(1) paid inclusion ad feed xml result tag";
 | |
| 	m->m_desc  = "Specify the full xml path for a result.";
 | |
| 	m->m_cgi   = "apirx";
 | |
| 	m->m_off   = (char *)cr.m_adResultXml[0] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	
 | |
|         m->m_title = "(1) paid inclusion ad feed xml title tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results title.";
 | |
| 	m->m_cgi   = "apitx";
 | |
| 	m->m_off   = (char *)cr.m_adTitleXml[0] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion ad feed xml description tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results description.";
 | |
| 	m->m_cgi   = "apidx";
 | |
| 	m->m_off   = (char *)cr.m_adDescXml[0] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion ad feed xml link tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results link.  This "
 | |
| 		     "is the link that is shown as plain text, not an actual "
 | |
| 		     "link, below the ad description.";
 | |
| 	m->m_cgi   = "apilx";
 | |
| 	m->m_off   = (char *)cr.m_adLinkXml[0] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion ad feed xml url tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results url.  This is "
 | |
| 		     "the link associated with the title.";
 | |
| 	m->m_cgi   = "apiux";
 | |
| 	m->m_off   = (char *)cr.m_adUrlXml[0] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion backup ad feed link";
 | |
| 	m->m_desc  = "Full link with address and parameters to retrieve an ad "
 | |
| 		     "feed.  To specify parameter input: %q for query, %n "
 | |
|                      "for num results, %p for page number, %i for query ip, "
 | |
|                      "and %% for %.";
 | |
| 	m->m_cgi   = "apicgib";
 | |
| 	m->m_off   = (char *)cr.m_adCGI[1] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_CGI_URL;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "(1) paid inclusion backup ad feed xml result tag";
 | |
| 	m->m_desc  = "Specify the full xml path for a result.";
 | |
| 	m->m_cgi   = "apirxb";
 | |
| 	m->m_off   = (char *)cr.m_adResultXml[1] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	
 | |
|         m->m_title = "(1) paid inclusion backup ad feed xml title tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results title.";
 | |
| 	m->m_cgi   = "apitxb";
 | |
| 	m->m_off   = (char *)cr.m_adTitleXml[1] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion backup ad feed xml description tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results description.";
 | |
| 	m->m_cgi   = "apidxb";
 | |
| 	m->m_off   = (char *)cr.m_adDescXml[1] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion backup ad feed xml link tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results link.  This "
 | |
| 		     "is the link that is shown as plain text, not an actual "
 | |
| 		     "link, below the ad description.";
 | |
| 	m->m_cgi   = "apilxb";
 | |
| 	m->m_off   = (char *)cr.m_adLinkXml[1] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion backup ad feed xml url tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results url.  This is "
 | |
| 		     "the link associated with the title.";
 | |
| 	m->m_cgi   = "apiuxb";
 | |
| 	m->m_off   = (char *)cr.m_adUrlXml[1] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) paid inclusion format text";
 | |
| 	m->m_desc  = "Specify the formatting text from the <div tag in";
 | |
| 	m->m_cgi   = "apift";
 | |
| 	m->m_off   = (char *)cr.m_adPIFormat - x;
 | |
| 	m->m_plen  = (char *)&cr.m_adPIFormatLen - x; // length of string
 | |
| 	m->m_type  = TYPE_STRINGBOX;
 | |
| 	m->m_size  = MAX_HTML_LEN + 1;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "style=\"padding: 3px;"
 | |
|                      "text-align: left; background-color: "
 | |
|                      "lightyellow;\"><span style=\"font-size: larger; "
 | |
|                      "font-weight: bold;\">Sponsored Results</span>\n"
 | |
|                      "<br><br>";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper ad enable";
 | |
| 	m->m_desc  = "Enable/Disable the skyscraper ad.";
 | |
| 	m->m_cgi   = "asse";
 | |
|         m->m_off   = (char *)&cr.m_adSSEnable - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper ad feed same as paid inclusion";
 | |
| 	m->m_desc  = "Use the same feed CGI as used above for the paid "
 | |
|                      "inclusion.";
 | |
| 	m->m_cgi   = "asssap";
 | |
|         m->m_off   = (char *)&cr.m_adSSSameasPI - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper ad feed link";
 | |
| 	m->m_desc  = "Full link with address and parameters to retrieve an ad "
 | |
| 		     "feed.  To specify parameter input: %q for query, %n "
 | |
|                      "for num results, %p for page number, %i for query ip, "
 | |
|                      "and %% for %.";
 | |
| 	m->m_cgi   = "asscgi";
 | |
| 	m->m_off   = (char *)cr.m_adCGI[2] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_CGI_URL;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "(1) skyscraper ad feed xml result tag";
 | |
| 	m->m_desc  = "Specify the full xml path for a result.";
 | |
| 	m->m_cgi   = "assrx";
 | |
| 	m->m_off   = (char *)cr.m_adResultXml[2] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	
 | |
|         m->m_title = "(1) skyscraper ad feed xml title tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results title.";
 | |
| 	m->m_cgi   = "asstx";
 | |
| 	m->m_off   = (char *)cr.m_adTitleXml[2] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper ad feed xml description tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results description.";
 | |
| 	m->m_cgi   = "assdx";
 | |
| 	m->m_off   = (char *)cr.m_adDescXml[2] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper ad feed xml link tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results link.  This "
 | |
| 		     "is the link that is shown as plain text, not an actual "
 | |
| 		     "link, below the ad description.";
 | |
| 	m->m_cgi   = "asslx";
 | |
| 	m->m_off   = (char *)cr.m_adLinkXml[2] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper ad feed xml url tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results url.  This is "
 | |
| 		     "the link associated with the title.";
 | |
| 	m->m_cgi   = "assux";
 | |
| 	m->m_off   = (char *)cr.m_adUrlXml[2] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper backup ad feed same as paid inclusion";
 | |
| 	m->m_desc  = "Use the same feed CGI as used above for the backup paid "
 | |
|                      "inclusion.";
 | |
| 	m->m_cgi   = "asssapb";
 | |
|         m->m_off   = (char *)&cr.m_adBSSSameasBPI - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper backup ad feed link";
 | |
| 	m->m_desc  = "Full link with address and parameters to retrieve an ad "
 | |
| 		     "feed.  To specify parameter input: %q for query, %n "
 | |
|                      "for num results, %p for page number, %i for query ip, "
 | |
|                      "and %% for %.";
 | |
| 	m->m_cgi   = "asscgib";
 | |
| 	m->m_off   = (char *)cr.m_adCGI[3] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_CGI_URL;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "(1) skyscraper backup ad feed xml result tag";
 | |
| 	m->m_desc  = "Specify the full xml path for a result.";
 | |
| 	m->m_cgi   = "assrxb";
 | |
| 	m->m_off   = (char *)cr.m_adResultXml[3] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	
 | |
|         m->m_title = "(1) skyscraper backup ad feed xml title tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results title.";
 | |
| 	m->m_cgi   = "asstxb";
 | |
| 	m->m_off   = (char *)cr.m_adTitleXml[3] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper backup ad feed xml description tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results description.";
 | |
| 	m->m_cgi   = "assdxb";
 | |
| 	m->m_off   = (char *)cr.m_adDescXml[3] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper backup ad feed xml link tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results link.  This "
 | |
| 		     "is the link that is shown as plain text, not an actual "
 | |
| 		     "link, below the ad description.";
 | |
| 	m->m_cgi   = "asslxb";
 | |
| 	m->m_off   = (char *)cr.m_adLinkXml[3] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "(1) skyscraper backup ad feed xml url tag";
 | |
| 	m->m_desc  = "Specify the full xml path for the results url.  This is "
 | |
| 		     "the link associated with the title.";
 | |
| 	m->m_cgi   = "assuxb";
 | |
| 	m->m_off   = (char *)cr.m_adUrlXml[3] - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_XML_LEN;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
|         
 | |
|         m->m_title = "(1) skyscraper format text";
 | |
| 	m->m_desc  = "Specify the formatting text from the <div tag in";
 | |
| 	m->m_cgi   = "assft";
 | |
| 	m->m_off   = (char *)cr.m_adSSFormat - x;
 | |
| 	m->m_plen  = (char *)&cr.m_adSSFormatLen - x; // length of string
 | |
| 	m->m_size  = MAX_HTML_LEN + 1;
 | |
| 	m->m_type  = TYPE_STRINGBOX;
 | |
| 	m->m_page  = PAGE_ADFEED;
 | |
| 	m->m_def   = "style=\"height: 100%; padding: 3px;"
 | |
|                      "text-align: center;background-color: "
 | |
|                      "lightyellow;\"><span style=\""
 | |
|                      "font-size: larger; font-weight: bold;\">"
 | |
|                      "Sponsored Results</span><br><br> ";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//  END AD FEED CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
|         
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//  SEARCH URL CONTROLS
 | |
| 	//  these are only specified in the search url when doing a search
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 	/////
 | |
| 	//
 | |
| 	// OLDER SEARCH INPUTS
 | |
| 	//
 | |
| 	////
 | |
| 
 | |
| 
 | |
| 	// when we do &qa=1 we do not show things like responseTime in
 | |
| 	// search results so we can verify serp checksum consistency for QA
 | |
| 	// in qa.cpp
 | |
| 	/*
 | |
| 	m->m_title = "quality assurance";
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_SI;
 | |
| 	m->m_desc  = "This is 1 if doing a QA test in qa.cpp";
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_soff  = (char *)&si.m_qa - y;
 | |
| 	m->m_type  = TYPE_CHAR;
 | |
| 	m->m_sparm = 1;
 | |
| 	m->m_scgi  = "qa";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	//m->m_title = "show turk forms";
 | |
| 	//m->m_desc  = "If enabled summaries in search results will be "
 | |
| 	//	"turkable input forms.";
 | |
| 	//m->m_def   = "0";
 | |
| 	//m->m_soff  = (char *)&si.m_getTurkForm - y;
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m->m_sparm = 1;
 | |
| 	//m->m_scgi  = "turk";
 | |
| 	//m++;
 | |
| 
 | |
| 
 | |
| 	// IMPORT PARMS
 | |
| 	m->m_title = "enable document importation";
 | |
| 	m->m_desc  = "Import documents into this collection.";
 | |
| 	m->m_cgi   = "import";
 | |
| 	m->m_page  = PAGE_IMPORT;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_off   = (char *)&cr.m_importEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API; 
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "collection";
 | |
| 	// m->m_desc  = "Collection to import documents into.";
 | |
| 	// m->m_cgi   = "c";
 | |
| 	// m->m_page  = PAGE_IMPORT;
 | |
| 	// m->m_obj   = OBJ_GBREQUEST;
 | |
| 	// m->m_off   = (char *)&cr.m_imcoll - (char *)&gr;
 | |
| 	// m->m_type  = TYPE_CHARPTR;
 | |
| 	// m->m_def   = NULL;
 | |
| 	// // PF_COLLDEFAULT: so it gets set to default coll on html page
 | |
| 	// m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML; 
 | |
| 	// m++;
 | |
| 
 | |
| 	m->m_title = "directory containing titledb files";
 | |
| 	m->m_desc  = "Import documents contained in titledb files in this "
 | |
| 		"directory. This is an ABSOLUTE directory path.";
 | |
| 	m->m_cgi   = "importdir";
 | |
| 	m->m_xml   = "importDir";
 | |
| 	m->m_page  = PAGE_IMPORT;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_off   = (char *)&cr.m_importDir - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of simultaneous injections";
 | |
| 	m->m_desc  = "Typically try one or two injections per host in "
 | |
| 		"your cluster.";
 | |
| 	m->m_cgi   = "numimportinjects";
 | |
| 	m->m_xml   = "numImportInjects";
 | |
| 	m->m_page  = PAGE_IMPORT;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_off   = (char *)&cr.m_numImportInjects - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	///////////
 | |
| 	//
 | |
| 	// ADD URL PARMS
 | |
| 	//
 | |
| 	///////////
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "Add urls into this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_page  = PAGE_ADDURL2;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	// PF_COLLDEFAULT: so it gets set to default coll on html page
 | |
| 	m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML; 
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "urls to add";
 | |
| 	m->m_desc  = "List of urls to index. One per line or space separated. "
 | |
| 		"If your url does not index as you expect you "
 | |
| 		"can check it's spider history by doing a url: search on it. "
 | |
| 		"Added urls will have a "
 | |
| 		"<a href=/admin/filters#hopcount>hopcount</a> of 0. "
 | |
| 		"Added urls will match the <i><a href=/admin/filters#isaddurl>"
 | |
| 		"isaddurl</a></i> directive on "
 | |
| 		"the url filters page. "
 | |
| 		"The add url api is described on the "
 | |
| 		"<a href=/admin/api>api</a> page.";
 | |
| 	m->m_cgi   = "urls";
 | |
| 	m->m_page  = PAGE_ADDURL2;
 | |
| 	m->m_obj   = OBJ_GBREQUEST; // do not store in g_conf or collectionrec
 | |
| 	m->m_off   = (char *)&gr.m_urlsBuf - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_TEXTAREA | PF_NOSAVE | PF_API|PF_REQUIRED;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	// the new upload post submit button
 | |
| 	m->m_title = "upload urls";
 | |
| 	m->m_desc  = "Upload your file of urls.";
 | |
| 	m->m_cgi   = "urls";
 | |
| 	m->m_page  = PAGE_ADDURL2;
 | |
| 	m->m_obj   = OBJ_NONE;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_type  = TYPE_FILEUPLOADBUTTON;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "strip sessionids";
 | |
| 	m->m_desc  = "Strip added urls of their session ids.";
 | |
| 	m->m_cgi   = "strip";
 | |
| 	m->m_page  = PAGE_ADDURL2;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_off   = (char *)&gr.m_stripBox - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "harvest links";
 | |
| 	m->m_desc  = "Harvest links of added urls so we can spider them?.";
 | |
| 	m->m_cgi   = "spiderlinks";
 | |
| 	m->m_page  = PAGE_ADDURL2;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_off   = (char *)&gr.m_harvestLinks - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "force respider";
 | |
| 	m->m_desc  = "Force an immediate respider even if the url "
 | |
| 		"is already indexed.";
 | |
| 	m->m_cgi   = "force";
 | |
| 	m->m_page  = PAGE_ADDURL2;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_off   = (char *)&gr.m_forceRespiderBox - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 
 | |
| 	////////
 | |
| 	//
 | |
| 	// now the new injection parms
 | |
| 	//
 | |
| 	////////
 | |
| 
 | |
| 	m->m_title = "url";
 | |
| 	m->m_desc  = "Specify the URL that will be immediately crawled "
 | |
| 		"and indexed in real time while you wait. The browser "
 | |
| 		"will return the "
 | |
| 		"final index status code. Alternatively, "
 | |
| 		"use the <a href=/admin/addurl>add url</a> page "
 | |
| 		"to add urls individually or in bulk "
 | |
| 		"without having to wait for the pages to be "
 | |
| 		"actually indexed in realtime. "
 | |
| 
 | |
| 		"By default, injected urls "
 | |
| 		"take precedence over the \"insitelist\" expression in the "
 | |
| 		"<a href=/admin/filters>url filters</a> "
 | |
| 		"so injected urls need not match the patterns in your "
 | |
| 		"<a href=/admin/sites>site list</a>. You can "
 | |
| 		"change that behavior in the <a href=/admin/filters>url "
 | |
| 		"filters</a> if you want. "
 | |
| 		"Injected urls will have a "
 | |
| 		"<a href=/admin/filters#hopcount>hopcount</a> of 0. "
 | |
| 		"The injection api is described on the "
 | |
| 		"<a href=/admin/api>api</a> page. "
 | |
| 		"Make up a fake url if you are injecting content that "
 | |
| 		"does not have one."
 | |
| 		"<br>"
 | |
| 		"<br>"
 | |
| 		"If the url ends in .warc or .arc or .warc.gz or .arc.gz "
 | |
| 		"Gigablast will index the contained documents as individual "
 | |
| 		"documents, using the appropriate dates and other meta "
 | |
| 		"information contained in the containing archive file."
 | |
| 		;
 | |
| 	m->m_cgi   = "url";
 | |
| 	//m->m_cgi2  = "u";
 | |
| 	//m->m_cgi3  = "seed"; // pagerawlbot
 | |
| 	//m->m_cgi4  = "injecturl";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API | PF_REQUIRED;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_url - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	// alias #1
 | |
| 	m->m_title = "url";
 | |
| 	m->m_cgi   = "u";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_url - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	// alias #2
 | |
| 	m->m_title = "url";
 | |
| 	m->m_cgi   = "seed";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_HIDDEN | PF_DIFFBOT;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_url - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	// alias #3
 | |
| 	m->m_title = "url";
 | |
| 	m->m_cgi   = "injecturl";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_url - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "query to scrape";
 | |
| 	m->m_desc  = "Scrape popular search engines for this query "
 | |
| 		"and inject their links. You are not required to supply "
 | |
| 		"the <i>url</i> parm if you supply this parm.";
 | |
| 	m->m_cgi   = "qts";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_queryToScrape - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "inject links";
 | |
| 	m->m_desc  = "Should we inject the links found in the injected "
 | |
| 		"content as well?";
 | |
| 	m->m_cgi   = "injectlinks";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_injectLinks - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "spider links";
 | |
| 	m->m_desc  = "Add the outlinks of the injected content into spiderdb "
 | |
| 		"for spidering?";
 | |
| 	m->m_cgi   = "spiderlinks";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	// leave off because could start spidering whole web unintentionally
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_spiderLinks - (char *)&ir;
 | |
| 	m++;
 | |
| 	
 | |
| 	m->m_title = "short reply";
 | |
| 	m->m_desc  = "Should the injection response be short and simple?";
 | |
| 	m->m_cgi   = "quick";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_shortReply - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "only inject content if new";
 | |
| 	m->m_desc  = "If the specified url is already in the index then "
 | |
| 		"skip the injection.";
 | |
| 	m->m_cgi   = "newonly";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_newOnly - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delete from index";
 | |
| 	m->m_desc  = "Delete the specified url from the index.";
 | |
| 	m->m_cgi   = "deleteurl";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_deleteUrl - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "recycle content";
 | |
| 	m->m_desc  = "If the url is already in the index, then do not "
 | |
| 		"re-download the content, just use the content that was "
 | |
| 		"stored in the cache from last time.";
 | |
| 	m->m_cgi   = "recycle";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_recycle - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dedup url";
 | |
| 	m->m_desc  = "Do not index the url if there is already another "
 | |
| 		"url in the index with the same content.";
 | |
| 	m->m_cgi   = "dedup";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_dedup - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do consistency checking";
 | |
| 	m->m_desc  = "Turn this on for debugging.";
 | |
| 	m->m_cgi   = "consist";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN; // | PF_API
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_doConsistencyTesting - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "hop count";
 | |
| 	m->m_desc  = "Use this hop count when injecting the page.";
 | |
| 	m->m_cgi   = "hopcount";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN; // | PF_API
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_hopCount - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "url IP";
 | |
| 	m->m_desc  = "Use this IP when injecting the document. Do not use or "
 | |
| 		"set to 0.0.0.0, if unknown. If provided, it will save an IP "
 | |
| 		"lookup.";
 | |
| 	m->m_cgi   = "urlip";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_injectDocIp - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "last spider time";
 | |
| 	m->m_desc  = "Override last time spidered";
 | |
| 	m->m_cgi   = "lastspidered";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN; // | PF_API
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_lastSpidered - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "first indexed";
 | |
| 	m->m_desc  = "Override first indexed time";
 | |
| 	m->m_cgi   = "firstindexed";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN; // | PF_API
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_firstIndexed - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "content has mime";
 | |
| 	m->m_desc  = "If the content of the url is provided below, does "
 | |
| 		"it begin with an HTTP mime header?";
 | |
| 	m->m_cgi   = "hasmime";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_hasMime - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "content delimeter";
 | |
| 	m->m_desc  = "If the content of the url is provided below, then "
 | |
| 		"it consist of multiple documents separated by this "
 | |
| 		"delimeter. Each such item will be injected as an "
 | |
| 		"independent document. Some possible delimiters: "
 | |
| 		"<i>========</i> or <i><doc></i>. If you set "
 | |
| 		"<i>hasmime</i> above to true then Gigablast will check "
 | |
| 		"for a url after the delimeter and use that url as the "
 | |
| 		"injected url. Otherwise it will append numbers to the "
 | |
| 		"url you provide above.";
 | |
| 	m->m_cgi   = "delim";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_contentDelim - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "content type";
 | |
| 	m->m_desc  = "If you supply content in the text box below without "
 | |
| 		"an HTTP mime header, "
 | |
| 		"then you need to enter the content type. "
 | |
| 		"Possible values: <b>text/html text/plain text/xml "
 | |
| 		"application/json</b>";
 | |
| 	m->m_cgi   = "contenttype";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR; //text/html application/json application/xml
 | |
| 	m->m_def   = "text/html";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_contentTypeStr - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "content charset";
 | |
| 	m->m_desc  = "A number representing the charset of the content "
 | |
| 		"if provided below and no HTTP mime header "
 | |
| 		"is given. Defaults to utf8 "
 | |
| 		"which is 106. "
 | |
| 		"See iana_charset.h for the numeric values.";
 | |
| 	m->m_cgi   = "charset";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "106";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_charset - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "upload content file";
 | |
| 	m->m_desc  = "Instead of specifying the content to be injected in "
 | |
| 		"the text box below, upload this file for it.";
 | |
| 	m->m_cgi   = "file";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_FILEUPLOADBUTTON;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_contentFile - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "content";
 | |
| 	m->m_desc = "If you want to supply the URL's content "
 | |
| 		"rather than have Gigablast download it, then "
 | |
| 		"enter the content here. "
 | |
| 		"Enter MIME header "
 | |
| 		"first if \"content has mime\" is set to true above. "
 | |
| 		"Separate MIME from actual content with two returns. "
 | |
| 		"At least put a single space in here if you want to "
 | |
| 		"inject empty content, otherwise the content will "
 | |
| 		"be downloaded from the url. This is because the "
 | |
| 		"page injection form always submits the content text area "
 | |
| 		"even if it is empty, which should signify that the "
 | |
| 		"content should be downloaded.";
 | |
| 	m->m_cgi   = "content";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API|PF_TEXTAREA;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_content - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "metadata";
 | |
| 	m->m_desc = "Json encoded metadata to be indexed with the document.";
 | |
| 	m->m_cgi   = "metadata";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API|PF_TEXTAREA;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_metadata - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "get sectiondb voting info";
 | |
| 	m->m_desc = "Return section information of injected content for "
 | |
| 		"the injected subdomain. ";
 | |
| 	m->m_cgi   = "sections";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API|PF_NOHTML;
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.m_getSections - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "diffbot reply";
 | |
| 	m->m_desc = "Used exclusively by diffbot. Do not use.";
 | |
| 	m->m_cgi   = "diffbotreply";
 | |
| 	m->m_obj   = OBJ_IR;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API|PF_TEXTAREA|PF_NOHTML; // do not show in our api
 | |
| 	m->m_page  = PAGE_INJECT;
 | |
| 	m->m_off   = (char *)&ir.ptr_diffbotReply - (char *)&ir;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	///////////////////
 | |
| 	//
 | |
| 	// QUERY REINDEX
 | |
| 	//
 | |
| 	///////////////////
 | |
| 
 | |
| 	m->m_title = "collection";
 | |
| 	m->m_desc  = "query reindex in this collection.";
 | |
| 	m->m_cgi   = "c";
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_def   = NULL;
 | |
| 	// PF_COLLDEFAULT: so it gets set to default coll on html page
 | |
| 	m->m_flags = PF_API|PF_REQUIRED|PF_NOHTML; 
 | |
| 	m->m_page  = PAGE_REINDEX;
 | |
| 	m->m_off   = (char *)&gr.m_coll - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "query to reindex or delete";
 | |
| 	m->m_desc  = "We either reindex or delete the search results of "
 | |
| 		"this query. Reindexing them will redownload them and "
 | |
| 		"possible update the siterank, which is based on the "
 | |
| 		"number of links to the site. This will add the url "
 | |
| 		"requests to "
 | |
| 		"the spider queue so ensure your spiders are enabled.";
 | |
| 	m->m_cgi   = "q";
 | |
| 	m->m_off   = (char *)&gr.m_query - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_page  = PAGE_REINDEX;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_def   = NULL;
 | |
| 	m->m_flags = PF_API |PF_REQUIRED;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "start result number";
 | |
| 	m->m_desc  = "Starting with this result #. Starts at 0.";
 | |
| 	m->m_cgi   = "srn";
 | |
| 	m->m_off   = (char *)&gr.m_srn - (char *)&gr;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_REINDEX;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API ;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "end result number";
 | |
| 	m->m_desc  = "Ending with this result #. 0 is the first result #.";
 | |
| 	m->m_cgi   = "ern";
 | |
| 	m->m_off   = (char *)&gr.m_ern - (char *)&gr;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_REINDEX;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_def   = "99999999";
 | |
| 	m->m_flags = PF_API ;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "query language";
 | |
| 	m->m_desc  = "The language the query is in. Used to rank results. "
 | |
| 		"Just use xx to indicate no language in particular. But "
 | |
| 		"you should use the same qlang value you used for doing "
 | |
| 		"the query if you want consistency.";
 | |
| 	m->m_cgi   = "qlang";
 | |
| 	m->m_off   = (char *)&gr.m_qlang - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHARPTR;
 | |
| 	m->m_page  = PAGE_REINDEX;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_def   = "en";
 | |
| 	m->m_flags = PF_API ;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "recycle content";
 | |
| 	m->m_desc  = "If you check this box then Gigablast will not "
 | |
| 		"re-download the content, but use the content that was "
 | |
| 		"stored in the cache from last time. Useful for rebuilding "
 | |
| 		"the index to pick up new inlink text or fresher "
 | |
| 		"sitenuminlinks counts which influence ranking.";
 | |
| 	m->m_cgi   = "qrecycle";
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_REINDEX;
 | |
| 	m->m_off   = (char *)&gr.m_recycleContent - (char *)&gr;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "FORCE DELETE";
 | |
| 	m->m_desc  = "Check this checkbox to delete the results, not just "
 | |
| 		"reindex them.";
 | |
| 	m->m_cgi   = "forcedel";
 | |
| 	m->m_off   = (char *)&gr.m_forceDel - (char *)&gr;
 | |
| 	m->m_type  = TYPE_CHECKBOX;
 | |
| 	m->m_page  = PAGE_REINDEX;
 | |
| 	m->m_obj   = OBJ_GBREQUEST;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API ;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	///////////////////
 | |
| 	//
 | |
| 	// SEARCH CONTROLS
 | |
| 	//
 | |
| 	///////////////////
 | |
| 
 | |
| 
 | |
| 	m->m_title = "do spell checking by default";
 | |
| 	m->m_desc  = "If enabled while using the XML feed, "
 | |
| 		"when Gigablast finds a spelling recommendation it will be "
 | |
| 		"included in the XML <spell> tag. Default is 0 if using an "
 | |
| 		"XML feed, 1 otherwise.";
 | |
| 	m->m_cgi   = "spell";
 | |
| 	m->m_off   = (char *)&cr.m_spellCheck - x;
 | |
| 	//m->m_soff  = (char *)&si.m_spellCheck - y;
 | |
| 	//m->m_sparm = 1;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_API | PF_NOSAVE | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "get scoring info by default";
 | |
| 	m->m_desc  = "Get scoring information for each result so you "
 | |
| 		"can see how each result is scored. You must explicitly "
 | |
| 		"request this using &scores=1 for the XML feed because it "
 | |
| 		"is not included by default.";
 | |
| 	m->m_cgi   = "scores"; // dedupResultsByDefault";
 | |
| 	m->m_off   = (char *)&cr.m_getDocIdScoringInfo - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do query expansion by default";
 | |
| 	m->m_desc  = "If enabled, query expansion will expand your query "
 | |
| 		"to include the various forms and "
 | |
| 		"synonyms of the query terms.";
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_off   = (char *)&cr.m_queryExpansion - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi  = "qe";
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "highlight query terms in summaries by default";
 | |
| 	m->m_desc  = "Use to disable or enable "
 | |
| 		"highlighting of the query terms in the summaries.";
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_off   = (char *)&cr.m_doQueryHighlighting - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_cgi   = "qh";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 8;
 | |
| 	m->m_sprpg = 1; // turn off for now
 | |
| 	m->m_sprpp = 1;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max title len";
 | |
| 	m->m_desc  = "What is the maximum number of "
 | |
| 		"characters allowed in titles displayed in the search "
 | |
| 		"results?";
 | |
| 	m->m_cgi   = "tml";
 | |
| 	m->m_off   = (char *)&cr.m_titleMaxLen - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_def   = "80";
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "consider titles from body";
 | |
| 	m->m_desc = "Can Gigablast make titles from the document content? "
 | |
| 		"Used mostly for the news collection where the title tags "
 | |
| 		"are not very reliable.";
 | |
| 	m->m_cgi   = "gtfb";
 | |
| 	m->m_off   = (char *)&cr.m_considerTitlesFromBody - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	//m->m_soff  = (char *)&si.m_considerTitlesFromBody - y;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "site cluster by default";
 | |
| 	m->m_desc  = "Should search results be site clustered? This "
 | |
| 		"limits each site to appearing at most twice in the "
 | |
| 		"search results. Sites are subdomains for the most part, "
 | |
| 		"like abc.xyz.com.";
 | |
| 	m->m_cgi   = "scd";
 | |
| 	m->m_off   = (char *)&cr.m_siteClusterByDefault - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	// buzz
 | |
| 	m->m_title = "hide all clustered results";
 | |
| 	m->m_desc  = "Only display at most one result per site.";
 | |
| 	m->m_cgi   = "hacr";
 | |
| 	m->m_off   = (char *)&cr.m_hideAllClustered - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dedup results by default";
 | |
| 	m->m_desc  = "Should duplicate search results be removed? This is "
 | |
| 		"based on a content hash of the entire document. "
 | |
| 		"So documents must be exactly the same for the most part.";
 | |
| 	m->m_cgi   = "drd"; // dedupResultsByDefault";
 | |
| 	m->m_off   = (char *)&cr.m_dedupResultsByDefault - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 1;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do tagdb lookups for queries";
 | |
| 	m->m_desc  = "For each search result a tagdb lookup is made, "
 | |
| 		"usually across the network on distributed clusters, to "
 | |
| 		"see if the URL's site has been manually banned in tagdb. "
 | |
| 		"If you don't manually ban sites then turn this off for "
 | |
| 		"extra speed.";
 | |
| 	m->m_cgi   = "stgdbl";
 | |
| 	m->m_off   = (char *)&cr.m_doTagdbLookups - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 1;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "percent similar dedup summary default value";
 | |
| 	m->m_desc  = "If document summary (and title) are "
 | |
| 		"this percent similar "
 | |
| 		"to a document summary above it, then remove it from the "
 | |
| 		"search results. 100 means only to remove if exactly the "
 | |
| 		"same. 0 means no summary deduping.";
 | |
| 	m->m_cgi   = "psds";
 | |
| 	m->m_off   = (char *)&cr.m_percentSimilarSummary - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "90";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_smax  = 100;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;       
 | |
| 
 | |
| 	m->m_title = "number of lines to use in summary to dedup";
 | |
| 	m->m_desc  = "Sets the number of lines to generate for summary "
 | |
| 		"deduping. This is to help the deduping process not throw "
 | |
| 		"out valid summaries when normally displayed summaries are "
 | |
| 		"smaller values. Requires percent similar dedup summary to "
 | |
| 		"be non-zero.";
 | |
| 	m->m_cgi   = "msld";
 | |
| 	m->m_off   = (char *)&cr.m_summDedupNumLines - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "4";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;       
 | |
| 	
 | |
| 
 | |
| 	m->m_title = "dedup URLs by default";
 | |
| 	m->m_desc  = "Should we dedup URLs with case insensitivity? This is "
 | |
|                      "mainly to correct duplicate wiki pages.";
 | |
| 	m->m_cgi   = "ddu";
 | |
| 	m->m_off   = (char *)&cr.m_dedupURLDefault - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use vhost language detection";
 | |
| 	m->m_desc  = "Use language specific pages for home, etc.";
 | |
| 	m->m_cgi   = "vhost";
 | |
| 	m->m_off   = (char *)&cr.m_useLanguagePages - x;
 | |
| 	//m->m_soff  = (char *)&si.m_useLanguagePages - y;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	//m->m_scgi  = "vhost";
 | |
| 	m->m_smin  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "sort language preference default";
 | |
| 	m->m_desc  = "Default language to use for ranking results. "
 | |
| 		//"This should only be used on limited collections. "
 | |
| 		"Value should be any language abbreviation, for example "
 | |
| 		"\"en\" for English. Use <i>xx</i> to give ranking "
 | |
| 		"boosts to no language in particular. See the language "
 | |
| 		"abbreviations at the bottom of the "
 | |
| 		"<a href=/admin/filters>url filters</a> page.";
 | |
| 	m->m_cgi   = "defqlang";
 | |
| 	m->m_off   = (char *)&cr.m_defaultSortLanguage2 - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 6; // up to 5 chars + NULL, e.g. "en_US"
 | |
| 	m->m_def   = "xx";//_US";
 | |
| 	//m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "sort country preference default";
 | |
| 	m->m_desc  = "Default country to use for ranking results. "
 | |
| 		//"This should only be used on limited collections. "
 | |
| 		"Value should be any country code abbreviation, for example "
 | |
| 		"\"us\" for United States. This is currently not working.";
 | |
| 	m->m_cgi   = "qcountry";
 | |
| 	m->m_off   = (char *)&cr.m_defaultSortCountry - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 2+1;
 | |
| 	m->m_def   = "us";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	// for post query reranking 
 | |
| 	m->m_title = "docs to check for post query demotion by default";
 | |
| 	m->m_desc  = "How many search results should we "
 | |
| 		"scan for post query demotion? "
 | |
| 		"0 disables all post query reranking. ";
 | |
| 	m->m_cgi   = "pqrds";
 | |
| 	m->m_off   = (char *)&cr.m_pqr_docsToScan - x;
 | |
| 	//m->m_soff  = (char *)&si.m_docsToScanForReranking - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 1;
 | |
| 	//m->m_scgi  = "pqrds";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max summary len";
 | |
| 	m->m_desc  = "What is the maximum number of "
 | |
| 		"characters displayed in a summary for a search result?";
 | |
| 	m->m_cgi   = "sml";
 | |
| 	m->m_off   = (char *)&cr.m_summaryMaxLen - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "512";
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max summary excerpts";
 | |
| 	m->m_desc  = "What is the maximum number of "
 | |
| 		"excerpts displayed in the summary of a search result?";
 | |
| 	m->m_cgi   = "smnl";
 | |
| 	m->m_off   = (char *)&cr.m_summaryMaxNumLines - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "4";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max summary excerpt length";
 | |
| 	m->m_desc = "What is the maximum number of "
 | |
| 		"characters allowed per summary excerpt?";
 | |
| 	m->m_cgi   = "smxcpl";
 | |
| 	m->m_off   = (char *)&cr.m_summaryMaxNumCharsPerLine - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "90";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "default number of summary excerpts by default";
 | |
| 	m->m_desc  = "What is the default number of "
 | |
| 		"summary excerpts displayed per search result?";
 | |
| 	m->m_cgi   = "sdnl";
 | |
| 	m->m_off   = (char *)&cr.m_summaryDefaultNumLines - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "3";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "max summary line width by default";
 | |
| 	m->m_desc  = "<br> tags are inserted to keep the number "
 | |
| 		"of chars in the summary per line at or below this width. "
 | |
| 		"Also affects title. "
 | |
| 		"Strings without spaces that exceed this "
 | |
| 		"width are not split. Has no affect on xml or json feed, "
 | |
| 		"only works on html.";
 | |
| 	m->m_cgi   = "smw";
 | |
| 	m->m_off   = (char *)&cr.m_summaryMaxWidth - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "80";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "bytes of doc to scan for summary generation";
 | |
| 	m->m_desc  = "Truncating this will miss out on good summaries, but "
 | |
| 		"performance will increase.";
 | |
| 	m->m_cgi   = "clmfs";
 | |
| 	m->m_off   = (char *)&cr.m_contentLenMaxForSummary - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "70000";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;       
 | |
| 
 | |
| 	m->m_title = "Prox summary carver radius";
 | |
| 	m->m_desc  = "Maximum number of characters to allow in between "
 | |
| 		"search terms.";
 | |
| 	m->m_cgi   = "pscr";
 | |
| 	m->m_off   = (char *)&cr.m_proxCarveRadius - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "256";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "front highlight tag";
 | |
| 	m->m_desc  = "Front html tag used for highlightig query terms in the "
 | |
| 		"summaries displated in the search results.";
 | |
| 	m->m_cgi   = "sfht";
 | |
| 	m->m_off   = (char *)cr.m_summaryFrontHighlightTag - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = SUMMARYHIGHLIGHTTAGMAXSIZE ;
 | |
| 	m->m_def   = "<b style=\"color:black;background-color:#ffff66\">";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "back highlight tag";
 | |
| 	m->m_desc  = "Front html tag used for highlightig query terms in the "
 | |
| 		"summaries displated in the search results.";
 | |
| 	m->m_cgi   = "sbht";
 | |
| 	m->m_off   = (char *)cr.m_summaryBackHighlightTag - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = SUMMARYHIGHLIGHTTAGMAXSIZE ;
 | |
| 	m->m_def   = "</b>";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "results to scan for gigabits generation by default";
 | |
| 	m->m_desc  = "How many search results should we "
 | |
| 		"scan for gigabit (related topics) generation. Set this to "
 | |
| 		"zero to disable gigabits generation by default.";
 | |
| 	m->m_cgi   = "dsrt";
 | |
| 	m->m_off   = (char *)&cr.m_docsToScanForTopics - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "30";
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "ip restriction for gigabits by default";
 | |
| 	m->m_desc  = "Should Gigablast only get one document per IP domain "
 | |
| 		"and per domain for gigabits (related topics) generation?";
 | |
| 	m->m_cgi   = "ipr";
 | |
| 	m->m_off   = (char *)&cr.m_ipRestrict - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	// default to 0 since newspaperarchive only has docs from same IP dom
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "remove overlapping topics";
 | |
| 	m->m_desc  = "Should Gigablast remove overlapping topics (gigabits)?";
 | |
| 	m->m_cgi   = "rot";
 | |
| 	m->m_off   = (char *)&cr.m_topicRemoveOverlaps - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "number of gigabits to show by default";
 | |
| 	m->m_desc  = "What is the number of "
 | |
| 		"related topics (gigabits) "
 | |
| 		"displayed per query? Set to 0 to save "
 | |
| 		"CPU time.";
 | |
| 	m->m_cgi   = "nrt";
 | |
| 	m->m_off   = (char *)&cr.m_numTopics - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "11";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_sprpg = 0; // do not propagate
 | |
|         m->m_sprpp = 0; // do not propagate
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "min gigabit score by default";
 | |
| 	m->m_desc  = "Gigabits (related topics) with scores below this "
 | |
| 		"will be excluded. Scores range from 0% to over 100%.";
 | |
| 	m->m_cgi   = "mts";
 | |
| 	m->m_off   = (char *)&cr.m_minTopicScore - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "min gigabit doc count by default";
 | |
| 	m->m_desc  = "How many documents must contain the gigabit "
 | |
| 		"(related topic) in order for it to be displayed.";
 | |
| 	m->m_cgi   = "mdc";
 | |
| 	m->m_off   = (char *)&cr.m_minDocCount - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "dedup doc percent for gigabits (related topics)";
 | |
| 	m->m_desc  = "If a document is this percent similar to another "
 | |
| 		"document with a higher score, then it will not contribute "
 | |
| 		"to the gigabit generation.";
 | |
| 	m->m_cgi   = "dsp";
 | |
| 	m->m_off   = (char *)&cr.m_dedupSamplePercent - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "80";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max words per gigabit (related topic) by default";
 | |
| 	m->m_desc  = "Maximum number of words a gigabit (related topic) "
 | |
| 		"can have. Affects xml feeds, too.";
 | |
| 	m->m_cgi   = "mwpt";
 | |
| 	m->m_off   = (char *)&cr.m_maxWordsPerTopic - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "6";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "gigabit max sample size";
 | |
| 	m->m_desc  = "Max chars to sample from each doc for gigabits "
 | |
| 		"(related topics).";
 | |
| 	m->m_cgi   = "tmss";
 | |
| 	m->m_off   = (char *)&cr.m_topicSampleSize - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "4096";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "gigabit max punct len";
 | |
| 	m->m_desc  = "Max sequential punct chars allowed in a gigabit "
 | |
| 		"(related topic). "
 | |
| 		" Set to 1 for speed, 5 or more for best topics but twice as "
 | |
| 		"slow.";
 | |
| 	m->m_cgi   = "tmpl";
 | |
| 	m->m_off   = (char *)&cr.m_topicMaxPunctLen - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "display dmoz categories in results";
 | |
| 	m->m_desc  = "If enabled, results in dmoz will display their "
 | |
| 		"categories on the results page.";
 | |
| 	m->m_cgi   = "ddc";
 | |
| 	m->m_off   = (char *)&cr.m_displayDmozCategories - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "display indirect dmoz categories in results";
 | |
| 	m->m_desc  = "If enabled, results in dmoz will display their "
 | |
| 		"indirect categories on the results page.";
 | |
| 	m->m_cgi   = "didc";
 | |
| 	m->m_off   = (char *)&cr.m_displayIndirectDmozCategories - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "display Search Category link to query category of result";
 | |
| 	m->m_desc  = "If enabled, a link will appear next to each category "
 | |
| 		"on each result allowing the user to perform their query "
 | |
| 		"on that entire category.";
 | |
| 	m->m_cgi   = "dscl";
 | |
| 	m->m_off   = (char *)&cr.m_displaySearchCategoryLink - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use dmoz for untitled";
 | |
| 	m->m_desc  = "Yes to use DMOZ given title when a page is untitled but "
 | |
| 		     "is in DMOZ.";
 | |
| 	m->m_cgi   = "udfu";
 | |
| 	m->m_off   = (char *)&cr.m_useDmozForUntitled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "show dmoz summaries";
 | |
| 	m->m_desc  = "Yes to always show DMOZ summaries with search results "
 | |
| 		     "that are in DMOZ.";
 | |
| 	m->m_cgi   = "udsm";
 | |
| 	m->m_off   = (char *)&cr.m_showDmozSummary - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "show adult category on top";
 | |
| 	m->m_desc  = "Yes to display the Adult category in the Top category";
 | |
| 	m->m_cgi   = "sacot";
 | |
| 	m->m_off   = (char *)&cr.m_showAdultCategoryOnTop - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_API | PF_CLONE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "show sensitive info in xml feed";
 | |
| 	m->m_desc  = "If enabled, we show certain tagb tags for each "
 | |
| 		"search result, allow &inlinks=1 cgi parms, show "
 | |
| 		"<docsInColl>, etc. in the xml feed. Created for buzzlogic.";
 | |
| 	m->m_cgi   = "sss";
 | |
| 	m->m_off   = (char *)&cr.m_showSensitiveStuff - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "display indexed date";
 | |
| 	m->m_desc  = "Display the indexed date along with results.";
 | |
| 	m->m_cgi   = "didt";
 | |
| 	m->m_off   = (char *)&cr.m_displayIndexedDate - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "display last modified date";
 | |
| 	m->m_desc  = "Display the last modified date along with results.";
 | |
| 	m->m_cgi   = "dlmdt";
 | |
| 	m->m_off   = (char *)&cr.m_displayLastModDate - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "display published date";
 | |
| 	m->m_desc  = "Display the published date along with results.";
 | |
| 	m->m_cgi   = "dipt";
 | |
| 	m->m_off   = (char *)&cr.m_displayPublishDate - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "enable click 'n' scroll";
 | |
| 	m->m_desc  = "The [cached] link on results pages loads click n "
 | |
| 		"scroll.";
 | |
| 	m->m_cgi   = "ecns";
 | |
| 	m->m_off   = (char *)&cr.m_clickNScrollEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
|         m->m_title = "use data feed account server";
 | |
|         m->m_desc  = "Enable/disable the use of a remote account verification "
 | |
|                 "for Data Feed Customers.";
 | |
|         m->m_cgi   = "dfuas";
 | |
|         m->m_off   = (char *)&cr.m_useDFAcctServer - x;
 | |
|         m->m_type  = TYPE_BOOL;
 | |
|         m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
|         m++;
 | |
| 
 | |
|         m->m_title = "data feed server ip";
 | |
|         m->m_desc  = "The ip address of the Gigablast data feed server to "
 | |
|                 "retrieve customer account information from.";
 | |
|         m->m_cgi   = "dfip";
 | |
|         m->m_off   = (char *)&cr.m_dfAcctIp - x;
 | |
|         m->m_type  = TYPE_IP;
 | |
|         m->m_def   = "2130706433";
 | |
|         m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
|         m++;
 | |
| 
 | |
|         m->m_title = "data feed server port";
 | |
|         m->m_desc  = "The port of the Gigablast data feed server to retrieve "
 | |
|                 "customer account information from.";
 | |
|         m->m_cgi   = "dfport";
 | |
|         m->m_off   = (char *)&cr.m_dfAcctPort - x;
 | |
|         m->m_type  = TYPE_LONG;
 | |
|         m->m_def   = "8040";
 | |
|         m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
|         m++;
 | |
| 
 | |
| 	/*
 | |
|         m->m_title = "data feed server collection";
 | |
|         m->m_desc  = "The collection on the Gigablast data feed server to "
 | |
|                 "retrieve customer account information from.";
 | |
|         m->m_cgi   = "dfcoll";
 | |
|         m->m_off   = (char *)&cr.m_dfAcctColl - x;
 | |
|         m->m_type  = TYPE_STRING;
 | |
|         m->m_size  = MAX_COLL_LEN;
 | |
|         m->m_def   = "customers";
 | |
|         m->m_group = 0;
 | |
|         m++;
 | |
| 	*/
 | |
| 
 | |
| 	//
 | |
| 	// not sure cols=x goes here or not
 | |
| 	//
 | |
| 	/*
 | |
| 	m->m_title = "Number Of Columns(1-6)";
 | |
| 	m->m_desc  = "How many columns results should be shown in. (1-6)";
 | |
| 	m->m_cgi   = "cols";
 | |
| 	m->m_smin  = 1;
 | |
| 	m->m_smax  = 6;
 | |
| 	m->m_off   = (char *)&cr.m_numCols - x;
 | |
| 	m->m_soff  = (char *)&si.m_numCols - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_sparm = 1;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	//
 | |
| 	// Gets the screen width
 | |
| 	//
 | |
| 	/*
 | |
| 	m->m_title = "Screen Width";
 | |
| 	m->m_desc  = "screen size of browser window";
 | |
| 	m->m_cgi   = "ws";
 | |
| 	m->m_smin  = 600;
 | |
| 	m->m_off   = (char *)&cr.m_screenWidth - x;
 | |
| 	m->m_soff  = (char *)&si.m_screenWidth - y;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1100";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_sparm = 1;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "collection hostname";
 | |
| 	m->m_desc  = "Hostname that will default to this collection. Blank"
 | |
| 		     " for none or default collection.";
 | |
| 	m->m_cgi   = "chstn";
 | |
| 	m->m_off   = (char *)cr.m_collectionHostname - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_def   = "";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection hostname (1)";
 | |
| 	m->m_desc  = "Hostname that will default to this collection. Blank"
 | |
| 		     " for none or default collection.";
 | |
| 	m->m_cgi   = "chstna";
 | |
| 	m->m_off   = (char *)cr.m_collectionHostname1 - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection hostname (2)";
 | |
| 	m->m_desc  = "Hostname that will default to this collection. Blank"
 | |
| 		     " for none or default collection.";
 | |
| 	m->m_cgi   = "chstnb";
 | |
| 	m->m_off   = (char *)cr.m_collectionHostname2 - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_URL_LEN;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "home page";
 | |
| 	static SafeBuf s_tmpBuf;
 | |
| 	s_tmpBuf.setLabel("stmpb1");
 | |
| 	s_tmpBuf.safePrintf (
 | |
| 			  "Html to display for the home page. "
 | |
| 			  "Leave empty for default home page. "
 | |
| 			  "Use %%N for total "
 | |
| 			  "number of pages indexed. Use %%n for number of "
 | |
| 			  "pages indexed "
 | |
| 			  "for the current collection. "
 | |
| 			  //"Use %%H so Gigablast knows where to insert "
 | |
| 			  //"the hidden form input tags, which must be there. "
 | |
| 			  "Use %%c to insert the current collection name. "
 | |
| 			  //"Use %T to display the standard footer. "
 | |
| 			  "Use %%q to display the query in "
 | |
| 			  "a text box. "
 | |
| 			  "Use %%t to display the directory TOP. "
 | |
| 			  "Example to paste into textbox: "
 | |
| 			  "<br><i>"
 | |
| 			  );
 | |
| 	s_tmpBuf.htmlEncode (
 | |
| 			      "<html>"
 | |
| 			      "<title>My Gigablast Search Engine</title>"
 | |
| 			      "<script>\n"
 | |
| 			      //"<!--"
 | |
| 			      "function x(){document.f.q.focus();}"
 | |
| 			      //"// -->"
 | |
| 			      "\n</script>"
 | |
| 			      "<body onload=\"x()\">"
 | |
| 			      "<br><br>"
 | |
| 			      "<center>"
 | |
| 			      "<a href=/>"
 | |
| 			      "<img border=0 width=500 height=122 "
 | |
| 			      "src=/logo-med.jpg></a>"
 | |
| 			      "<br><br>"
 | |
| 			      "<b>My Search Engine</b>"
 | |
| 			      "<br><br>"
 | |
| 			      // "<br><br><br>"
 | |
| 			      // "<b>web</b> "
 | |
| 			      // "     "
 | |
| 			      // "<a href=\"/Top\">directory</a> "
 | |
| 			      // "     "
 | |
| 			      // "<a href=/adv.html>advanced search</a> "
 | |
| 			      // "     "
 | |
| 			      // "<a href=/addurl "
 | |
| 			      // "title=\"Instantly add your url to "
 | |
| 			      //"the index\">"
 | |
| 			      // "add url</a>"
 | |
| 			      // "<br><br>"
 | |
| 			      "<form method=get action=/search name=f>"
 | |
| 			      "<input type=hidden name=c value=\"%c\">"
 | |
| 			      "<input name=q type=text size=60 value=\"\">"
 | |
| 			      " "
 | |
| 			      "<input type=\"submit\" value=\"Search\">"
 | |
| 			      "</form>"
 | |
| 			      "<br>"
 | |
| 			      "<center>"
 | |
| 			      "Searching the <b>%c</b> collection of %n "
 | |
| 			      "documents."
 | |
| 			      "</center>"
 | |
| 			      "<br>"
 | |
| 			      "</body></html>")  ;
 | |
| 	s_tmpBuf.safePrintf("</i>");
 | |
| 	m->m_desc = s_tmpBuf.getBufStart();
 | |
| 	m->m_xml  = "homePageHtml";
 | |
| 	m->m_cgi   = "hp";
 | |
| 	m->m_off   = (char *)&cr.m_htmlRoot - x;
 | |
| 	//m->m_plen  = (char *)&cr.m_htmlRootLen - x; // length of string
 | |
| 	m->m_type  = TYPE_SAFEBUF;//STRINGBOX;
 | |
| 	//m->m_size  = MAX_HTML_LEN + 1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_TEXTAREA | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "html head";
 | |
|         static SafeBuf s_tmpBuf2;
 | |
| 	s_tmpBuf2.setLabel("stmpb2");
 | |
| 	s_tmpBuf2.safePrintf("Html to display before the search results. ");
 | |
| 	char *fff = "Leave empty for default. "
 | |
| 		"Convenient "
 | |
| 		"for changing colors and displaying logos. Use "
 | |
| 		"the variable, "
 | |
| 		"%q, to represent the query to display in a "
 | |
| 		"text box. "
 | |
| 		"Use %e to print the url encoded query.  "
 | |
| 		//"Use %e to print the page encoding. "
 | |
| 		// i guess this is out for now
 | |
| 		//"Use %D to "
 | |
| 		//"print a drop down "
 | |
| 		//"menu for the number of search results to return. "
 | |
| 		"Use %S "
 | |
| 		"to print sort by date or relevance link. Use "
 | |
| 		"%L to "
 | |
| 		"display the logo. Use %R to display radio "
 | |
| 		"buttons for site "
 | |
| 		"search. Use %F to begin the form. and use %H to "
 | |
| 		"insert "
 | |
| 		"hidden text "
 | |
| 		"boxes of parameters like the current search result "
 | |
| 		"page number. "
 | |
| 		"BOTH %F and %H are necessary for the html head, but do "
 | |
| 		"not duplicate them in the html tail. "
 | |
| 		"Use %f to display "
 | |
| 		"the family filter radio buttons. "
 | |
| 		// take this out for now
 | |
| 		//"Directory: Use %s to display the directory "
 | |
| 		//"search type options. "
 | |
| 		//"Use %l to specify the "
 | |
| 		//"location of "
 | |
| 		//"dir=rtl in the body tag for RTL pages. "
 | |
| 		//"Use %where and %when to substitute the where "
 | |
| 		//"and when of "
 | |
| 		//"the query. "
 | |
| 		//"These values may be set based on the cookie "
 | |
| 		//"if "
 | |
| 		//"none was explicitly given. "
 | |
| 		//"IMPORTANT: In the xml configuration file, "
 | |
| 		//"this html "
 | |
| 		//"must be encoded (less thans mapped to <, "
 | |
| 		//"etc.).";
 | |
| 		"Example to paste into textbox: <br><i>";
 | |
| 	s_tmpBuf2.safeStrcpy(fff);
 | |
| 	s_tmpBuf2.htmlEncode(
 | |
| 		"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 "
 | |
| 		"Transitional//EN\">\n"
 | |
| 		"<html>\n"
 | |
| 		"<head>\n"
 | |
| 		"<title>My Gigablast Search Results</title>\n"
 | |
| 		"<meta http-equiv=\"Content-Type\" "
 | |
| 		"content=\"text/html; charset=utf-8\">\n"
 | |
| 		"</head>\n"
 | |
| 		"<body%l>\n"
 | |
| 
 | |
| 		//"<form method=\"get\" action=\"/search\" name=\"f\">\n"
 | |
| 		// . %F prints the <form method=...> tag
 | |
| 		// . method will be GET or POST depending on the size of the
 | |
| 		//   input data. MSIE can't handle sending large GETs requests
 | |
| 		//   that are more than like 1k or so, which happens a lot with
 | |
| 		//   our CTS technology (the sites= cgi parm can be very large)
 | |
| 		"%F"
 | |
| 		"<table cellpadding=\"2\" cellspacing=\"0\" border=\"0\">\n"
 | |
| 		"<tr>\n"
 | |
| 		"<td valign=top>"
 | |
| 		// this prints the Logo
 | |
| 		"%L"
 | |
| 		//"<a href=\"/\">"
 | |
| 		//"<img src=\"logo2.gif\" alt=\"Gigablast Logo\" "
 | |
| 		//"width=\"210\" height=\"25\" border=\"0\" valign=\"top\">"
 | |
| 		//"</a>"
 | |
| 		"</td>\n"
 | |
| 
 | |
| 		"<td valign=top>\n"
 | |
| 		"<nobr>\n"
 | |
| 		"<input type=\"text\" name=\"q\" size=\"60\" value=\"\%q\"> " 
 | |
| 		// %D is the number of results drop down menu
 | |
| 		"\%D" 
 | |
| 		"<input type=\"submit\" value=\"Blast It!\" border=\"0\">\n"
 | |
| 		"</nobr>\n"
 | |
| 		// family filter
 | |
| 		// %R radio button for site(s) search
 | |
| 		"<br>%f %R\n"
 | |
| 		// directory search options
 | |
| 		// MDW: i guess this is out for now
 | |
| 		//"</td><td>%s</td>\n"
 | |
| 		"</tr>\n"
 | |
| 		"</table>\n"
 | |
| 		// %H prints the hidden for vars. Print them *after* the input 
 | |
| 		// text boxes, radio buttons, etc. so these hidden vars can be 
 | |
| 		// overridden as they should be.
 | |
| 		"%H"); 
 | |
| 	s_tmpBuf2.safePrintf("</i>");
 | |
| 	m->m_desc  = s_tmpBuf2.getBufStart();
 | |
| 	m->m_xml   = "htmlHead";
 | |
| 	m->m_cgi   = "hh";
 | |
| 	m->m_off   = (char *)&cr.m_htmlHead - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;//STRINGBOX;
 | |
| 	m->m_def   = "";
 | |
| 	//m->m_sparm = 1;
 | |
| 	//m->m_soff  = (char *)&si.m_htmlHead - y;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_TEXTAREA | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "html tail";
 | |
|         static SafeBuf s_tmpBuf3;
 | |
| 	s_tmpBuf3.setLabel("stmpb3");
 | |
| 	s_tmpBuf3.safePrintf("Html to display after the search results. ");
 | |
| 	s_tmpBuf3.safeStrcpy(fff);
 | |
| 	s_tmpBuf3.htmlEncode (
 | |
| 		"<br>\n"
 | |
| 		//"%F"
 | |
| 		"<table cellpadding=2 cellspacing=0 border=0>\n"
 | |
| 		"<tr><td></td>\n"
 | |
| 		//"<td valign=top align=center>\n"
 | |
| 		// this old query is overriding a newer query above so
 | |
| 		// i commented out. mfd 6/2014
 | |
| 		//"<nobr>"
 | |
| 		//"<input type=text name=q size=60 value=\"%q\"> %D\n"
 | |
| 		//"<input type=submit value=\"Blast It!\" border=0>\n"
 | |
| 		//"</nobr>"
 | |
| 		// family filter
 | |
| 		//"<br>%f %R\n"
 | |
| 		//"<br>"
 | |
| 		//"%R\n"
 | |
| 		//"</td>"
 | |
| 		"<td>%s</td>\n"
 | |
| 		"</tr>\n"
 | |
| 		"</table>\n"
 | |
| 		"Try your search on  \n"
 | |
| 		"<a href=http://www.google.com/search?q=%e>google</a>  \n"
 | |
| 		"<a href=http://search.yahoo.com/bin/search?p=%e>yahoo</a> "
 | |
| 		" \n"
 | |
| 		//"<a href=http://www.alltheweb.com/search?query=%e>alltheweb"
 | |
| 		//"</a>\n"
 | |
| 		"<a href=http://search.dmoz.org/cgi-bin/search?search=%e>"
 | |
| 		"dmoz</a>  \n"
 | |
| 		//"<a href=http://search01.altavista.com/web/results?q=%e>"
 | |
| 		//"alta vista</a>\n"
 | |
| 		//"<a href=http://s.teoma.com/search?q=%e>teoma</a>  \n"
 | |
| 		//"<a href=http://wisenut.com/search/query.dll?q=%e>wisenut"
 | |
| 		//"</a>\n"
 | |
| 		"</font></body>\n");
 | |
| 	s_tmpBuf3.safePrintf("</i>");
 | |
| 	m->m_desc  = s_tmpBuf3.getBufStart();
 | |
| 	m->m_xml   = "htmlTail";
 | |
| 	m->m_cgi   = "ht";
 | |
| 	m->m_off   = (char *)&cr.m_htmlTail - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;//STRINGBOX;
 | |
| 	m->m_def   = "";
 | |
| 	//m->m_sparm = 1;
 | |
| 	//m->m_soff  = (char *)&si.m_htmlHead - y;
 | |
| 	m->m_page  = PAGE_SEARCH;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_TEXTAREA | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// PAGE SPIDER CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	// just a comment in the conf file
 | |
| 	m->m_desc  = 
 | |
| 		"All <, >, \" and # characters that are values for a field "
 | |
| 		"contained herein must be represented as "
 | |
| 		"<, >, " and # respectively.";
 | |
| 	m->m_type  = TYPE_COMMENT;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spidering enabled";
 | |
| 	m->m_desc  = "Controls just the spiders for this collection.";
 | |
| 	m->m_cgi   = "cse";
 | |
| 	m->m_off   = (char *)&cr.m_spideringEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	// this linked list of colls is in Spider.cpp and used to only
 | |
| 	// poll the active spider colls for spidering. so if coll
 | |
| 	// gets paused/unpaused we have to update it.
 | |
| 	m->m_flags = PF_CLONE | PF_REBUILDACTIVELIST;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "site list";
 | |
| 	m->m_xml   = "siteList";
 | |
| 	m->m_desc  = "List of sites to spider, one per line. "
 | |
| 		"See <a href=#examples>example site list</a> below. "
 | |
| 		"Gigablast uses the "
 | |
| 		"<a href=/admin/filters#insitelist>insitelist</a> "
 | |
| 		"directive on "
 | |
| 		"the <a href=/admin/filters>url filters</a> "
 | |
| 		"page to make sure that the spider only indexes urls "
 | |
| 		"that match the site patterns you specify here, other than "
 | |
| 		"urls you add individually via the add urls or inject url "
 | |
| 		"tools. "
 | |
| 		"Limit list to 300MB. If you have a lot of INDIVIDUAL urls "
 | |
| 		"to add then consider using the <a href=/admin/addurl>addurl"
 | |
| 		"</a> interface.";
 | |
| 	m->m_cgi   = "sitelist";
 | |
| 	m->m_off   = (char *)&cr.m_siteListBuf - x;
 | |
| 	m->m_page  = PAGE_SPIDER;// PAGE_SITES;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_func  = CommandUpdateSiteList;
 | |
| 	m->m_def   = "";
 | |
| 	// rebuild urlfilters now will nuke doledb and call updateSiteList()
 | |
| 	m->m_flags = PF_TEXTAREA | PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "reset collection";
 | |
| 	m->m_desc  = "Remove all documents from the collection and turn "
 | |
| 		"spiders off.";
 | |
| 	m->m_cgi   = "reset";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func2 = CommandResetColl;
 | |
| 	m->m_cast  = 1;
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "restart collection";
 | |
| 	m->m_desc  = "Remove all documents from the collection and re-add "
 | |
| 		"seed urls from site list.";
 | |
| 	m->m_cgi   = "restart";
 | |
| 	m->m_type  = TYPE_CMD;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_func2 = CommandRestartColl;
 | |
| 	m->m_cast  = 1;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "new spidering enabled";
 | |
| 	m->m_desc  = "When enabled the spider adds NEW "
 | |
| 		"pages to your index. ";
 | |
| 	m->m_cgi  = "nse";
 | |
| 	m->m_off   = (char *)&cr.m_newSpideringEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "old spidering enabled";
 | |
| 	m->m_desc  = "When enabled the spider will re-visit "
 | |
| 		"and update pages that are already in your index.";
 | |
| 	m->m_cgi  = "ose";
 | |
| 	m->m_off   = (char *)&cr.m_oldSpideringEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL; 
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "new spider weight";
 | |
| 	m->m_desc  = "Weight time slices of new spiders in the priority "
 | |
| 		"page by this factor relative to the old spider queues.";
 | |
| 	m->m_cgi  = "nsw";
 | |
| 	m->m_off   = (char *)&cr.m_newSpiderWeight - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "1.0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "max spiders";
 | |
| 	m->m_desc  = "What is the maximum number of web "
 | |
| 		"pages the spider is allowed to download "
 | |
| 		"simultaneously PER HOST for THIS collection? The "
 | |
| 		"maximum number of spiders over all collections is "
 | |
| 		"controlled in the <i>master controls</i>.";
 | |
| 	m->m_cgi   = "mns";
 | |
| 	m->m_off   = (char *)&cr.m_maxNumSpiders - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	// make it the hard max so control is really in the master controls
 | |
| 	m->m_def   = "300";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider delay in milliseconds";
 | |
| 	m->m_desc  = "make each spider wait this many milliseconds before "
 | |
| 		"getting the ip and downloading the page.";
 | |
| 	m->m_cgi  = "sdms";
 | |
| 	m->m_off   = (char *)&cr.m_spiderDelayInMilliseconds - x; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++; 
 | |
| 
 | |
| 	m->m_title = "obey robots.txt";
 | |
| 	m->m_xml   = "useRobotstxt";
 | |
| 	m->m_desc  = "If this is true Gigablast will respect "
 | |
| 		"the robots.txt convention and rel no follow meta tags.";
 | |
| 	m->m_cgi   = "obeyRobots";
 | |
| 	m->m_off   = (char *)&cr.m_useRobotsTxt - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "obey rel no follow links";
 | |
| 	m->m_desc  = "If this is true Gigablast will respect "
 | |
| 		"the rel no follow link attribute.";
 | |
| 	m->m_cgi   = "obeyRelNoFollow";
 | |
| 	m->m_off   = (char *)&cr.m_obeyRelNoFollowLinks - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max robots.txt cache age";
 | |
| 	m->m_desc  = "How many seconds to cache a robots.txt file for. "
 | |
| 		"86400 is 1 day. 0 means Gigablast will not read from the "
 | |
| 		"cache at all and will download the robots.txt before every "
 | |
| 		"page if robots.txt use is enabled above. However, if this is "
 | |
| 		"0 then Gigablast will still store robots.txt files in the "
 | |
| 		"cache.";
 | |
| 	m->m_cgi   = "mrca";
 | |
| 	m->m_off   = (char *)&cr.m_maxRobotsCacheAge - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "86400"; // 24*60*60 = 1day
 | |
| 	m->m_units = "seconds";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "always use spider proxies";
 | |
| 	m->m_desc  = "If this is true Gigablast will ALWAYS use the proxies "
 | |
| 		"listed on the <a href=/admin/proxies>proxies</a> "
 | |
| 		"page for "
 | |
| 		"spidering for "
 | |
| 		"this collection."
 | |
| 		//"regardless whether the proxies are enabled "
 | |
| 		//"on the <a href=/admin/proxies>proxies</a> page."
 | |
| 		;
 | |
| 	m->m_cgi   = "useproxies";
 | |
| 	m->m_off   = (char *)&cr.m_forceUseFloaters - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "automatically use spider proxies";
 | |
| 	m->m_desc  = "Use the spider proxies listed on the proxies page "
 | |
| 		"if gb detects that "
 | |
| 		"a webserver is throttling the spiders. This way we can "
 | |
| 		"learn the webserver's spidering policy so that our spiders "
 | |
| 		"can be more polite. If no proxies are listed on the "
 | |
| 		"proxies page then this parameter will have no effect.";
 | |
| 	m->m_cgi   = "automaticallyuseproxies";
 | |
| 	m->m_off   = (char *)&cr.m_automaticallyUseProxies - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "automatically back off";
 | |
| 	m->m_desc  = "Set the crawl delay to 5 seconds if gb detects "
 | |
| 		"that an IP is throttling or banning gigabot from crawling "
 | |
| 		"it. The crawl delay just applies to that IP. "
 | |
| 		"Such throttling will be logged.";
 | |
| 	m->m_cgi   = "automaticallybackoff";
 | |
| 	m->m_xml   = "automaticallyBackOff";
 | |
| 	m->m_off   = (char *)&cr.m_automaticallyBackOff - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	// a lot of pages have recaptcha links but they have valid content
 | |
| 	// so leave this off for now... they have it in a hidden div which
 | |
| 	// popups to email the article link or whatever to someone.
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use time axis";
 | |
| 	m->m_desc  = "If this is true Gigablast will index the same "
 | |
| 		"url multiple times if its content varies over time, "
 | |
| 		"rather than overwriting the older version in the index. "
 | |
| 		"Useful for archive web pages as they change over time.";
 | |
| 	m->m_cgi   = "usetimeaxis";
 | |
| 	m->m_off   = (char *)&cr.m_useTimeAxis - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "index warc or arc files";
 | |
| 	m->m_desc  = "If this is true Gigablast will index .warc and .arc "
 | |
| 		"files by injecting the pages contained in them as if they "
 | |
| 		"were spidered with the content in the .warc or .arc file. "
 | |
| 		"The spidered time will be taken from the archive file "
 | |
| 		"as well.";
 | |
| 	m->m_cgi   = "indexwarcs";
 | |
| 	m->m_off   = (char *)&cr.m_indexWarcs - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "add url enabled";
 | |
| 	m->m_desc  = "If this is enabled others can add "
 | |
| 		"web pages to your index via the add url page.";
 | |
| 	m->m_cgi   = "aue";
 | |
| 	m->m_off   = (char *)&cr.m_addUrlEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "daily merge time";
 | |
| 	m->m_desc  = "Do a tight merge on posdb and titledb at this time "
 | |
| 		"every day. This is expressed in MINUTES past midnight UTC. "
 | |
| 		"UTC is 5 hours ahead "
 | |
| 		"of EST and 7 hours ahead of MST. Leave this as -1 to "
 | |
| 		"NOT perform a daily merge. To merge at midnight EST use "
 | |
| 		"60*5=300 and midnight MST use 60*7=420.";
 | |
| 	m->m_cgi   = "dmt";
 | |
| 	m->m_off   = (char *)&cr.m_dailyMergeTrigger - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_units = "minutes";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "daily merge days";
 | |
| 	m->m_desc  = "Comma separated list of days to merge on. Use "
 | |
| 		"0 for Sunday, 1 for Monday, ... 6 for Saturday. Leaving "
 | |
| 		"this parameter empty or without any numbers will make the "
 | |
| 		"daily merge happen every day";
 | |
| 	m->m_cgi   = "dmdl";
 | |
| 	m->m_off   = (char *)&cr.m_dailyMergeDOWList - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 48;
 | |
| 	// make sunday the default
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "daily merge last started";
 | |
| 	m->m_desc  = "When the daily merge was last kicked off. Expressed in "
 | |
| 		"UTC in seconds since the epoch.";
 | |
| 	m->m_cgi   = "dmls";
 | |
| 	m->m_off   = (char *)&cr.m_dailyMergeStarted - x;
 | |
| 	m->m_type  = TYPE_LONG_CONST;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_NOAPI;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "use datedb";
 | |
| 	m->m_desc  = "Index documents for generating results sorted by date "
 | |
| 		"or constrained by date range. Only documents indexed while "
 | |
| 		"this is enabled will be returned for date-related searches.";
 | |
| 	m->m_cgi   = "ud";
 | |
| 	m->m_off   = (char *)&cr.m_useDatedb - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "age cutoff for datedb";
 | |
| 	m->m_desc  = "Do not index pubdates into datedb that are more "
 | |
| 		"than this many days old. Use -1 for no limit. A value "
 | |
| 		"of zero essentially turns off datedb. Pre-existing pubdates "
 | |
| 		"in datedb that fail to meet this constraint WILL BE "
 | |
| 		"COMPLETELY ERASED when datedb is merged.";
 | |
| 	m->m_cgi   = "dbc";
 | |
| 	m->m_off   = (char *)&cr.m_datedbCutoff - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_units = "days";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "datedb default timezone";
 | |
| 	m->m_desc  = "Default timezone to use when none specified on parsed "
 | |
| 		"time.  Use offset from GMT, i.e 0400 (AMT) or -0700 (MST)";
 | |
| 	m->m_cgi   = "ddbdt";
 | |
| 	m->m_off   = (char *)&cr.m_datedbDefaultTimezone - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	//m->m_title = "days before now to index";
 | |
| 	//m->m_desc  = "Only index page if the datedb date was found to be "
 | |
| 	//	"within this many days of the current time.  Use 0 to index "
 | |
| 	//	"all dates.  Parm is float for fine control.";
 | |
| 	//m->m_cgi   = "ddbdbn";
 | |
| 	//m->m_off   = (char *)&cr.m_datedbDaysBeforeNow - x;
 | |
| 	//m->m_type  = TYPE_FLOAT;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m->m_group = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "turing test enabled";
 | |
| 	m->m_desc  = "If this is true, users will have to "
 | |
| 		"pass a simple Turing test to add a url. This prevents "
 | |
| 		"automated url submission.";
 | |
| 	m->m_cgi   = "dtt";
 | |
| 	m->m_off   = (char *)&cr.m_doTuringTest - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max add urls";
 | |
| 	m->m_desc = "Maximum number of urls that can be "
 | |
| 		"submitted via the addurl interface, per IP domain, per "
 | |
| 		"24 hour period. A value less than or equal to zero "
 | |
| 		"implies no limit.";
 | |
| 	m->m_cgi = "mau";
 | |
| 	m->m_off = (char *)&cr.m_maxAddUrlsPerIpDomPerDay - x;
 | |
| 	m->m_type = TYPE_LONG;
 | |
| 	m->m_def = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	// use url filters harvest links parm for this now
 | |
| 	/*
 | |
| 	m->m_title = "spider links";
 | |
| 	m->m_desc  = "If this is false, the spider will not "
 | |
| 		"harvest links from web pages it visits. Links that it does "
 | |
| 		"harvest will be attempted to be indexed at a later time. ";
 | |
| 	m->m_cgi   = "sl";
 | |
| 	m->m_off   = (char *)&cr.m_spiderLinks - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 
 | |
| 	  MDW: use the "onsite" directive in the url filters page now...
 | |
| 
 | |
| 	m->m_title = "only spider links from same host";
 | |
| 	m->m_desc  = "If this is true the spider will only harvest links "
 | |
| 		"to pages that are contained on the same host as the page "
 | |
| 		"that is being spidered. "
 | |
| 		"Example: When spidering a page from "
 | |
| 		"www.gigablast.com, only links to pages that are from "
 | |
| 		"www.gigablast.com would "
 | |
| 		"be harvested, if this switch were enabled. This allows you "
 | |
| 		"to seed the spider with URLs from a specific set of hosts "
 | |
| 		"and ensure that only links to pages that are from those "
 | |
| 		"hosts are harvested.";
 | |
| 	m->m_cgi   = "slsh";
 | |
| 	m->m_off   = (char *)&cr.m_sameHostLinks - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "do not re-add old outlinks more than this many days";
 | |
| 	m->m_desc  = "If less than this many days have elapsed since the "
 | |
| 		"last time we added the outlinks to spiderdb, do not re-add "
 | |
| 		"them to spiderdb. Saves resources.";
 | |
| 	m->m_cgi   = "slrf";
 | |
| 	m->m_off   = (char *)&cr.m_outlinksRecycleFrequencyDays - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "30";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "spider links by priority";
 | |
| 	m->m_desc   = "Specify priorities for which links should be spidered. "
 | |
| 		"If the <i>spider links</i> option above is "
 | |
| 		"disabled then these setting will have no effect.";
 | |
| 	m->m_cgi   = "slp";
 | |
| 	m->m_xml   = "spiderLinksByPriority";
 | |
| 	m->m_off   = (char *)&cr.m_spiderLinksByPriority - x;
 | |
| 	m->m_type  = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
 | |
| 	m->m_fixed = MAX_SPIDER_PRIORITIES;
 | |
| 	m->m_def   = "1"; // default for each one is on
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "min link priority";
 | |
| 	m->m_desc  = "Only add links to the spider "
 | |
| 		"queue if their spider priority is this or higher. "
 | |
| 		"This can make the spider process more efficient "
 | |
| 		"since a lot of disk seeks are used when adding "
 | |
| 		"links.";
 | |
| 	m->m_cgi   = "mlp";
 | |
| 	m->m_off   = (char *)&cr.m_minLinkPriority - x;
 | |
| 	m->m_type  = TYPE_PRIORITY;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*	m->m_title = "maximum hops from parent page";
 | |
| 	m->m_desc  = "Only index pages that are within a particular number "
 | |
| 		"of hops from the parent page given in Page Add Url. -1 means "
 | |
| 		"that max hops is infinite.";
 | |
| 	m->m_cgi   = "mnh";
 | |
| 	m->m_off   = (char *)&cr.m_maxNumHops - x;
 | |
| 	m->m_type  = TYPE_CHAR2;
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;*/
 | |
| 
 | |
| 	m->m_title = "scraping enabled procog";
 | |
| 	m->m_desc  = "Do searches for queries in this hosts part of the "
 | |
| 		"query log.";
 | |
| 	m->m_cgi   = "scrapepc";
 | |
| 	m->m_off   = (char *)&cr.m_scrapingEnabledProCog - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "scraping enabled web";
 | |
| 	m->m_desc  = "Perform random searches on googles news search engine "
 | |
| 		"to add sites with ingoogle tags into tagdb.";
 | |
| 	m->m_cgi   = "scrapeweb";
 | |
| 	m->m_off   = (char *)&cr.m_scrapingEnabledWeb - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "scraping enabled news";
 | |
| 	m->m_desc  = "Perform random searches on googles news search engine "
 | |
| 		"to add sites with news and goognews and ingoogle "
 | |
| 		"tags into tagdb.";
 | |
| 	m->m_cgi   = "scrapenews";
 | |
| 	m->m_off   = (char *)&cr.m_scrapingEnabledNews - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "scraping enabled blogs";
 | |
| 	m->m_desc  = "Perform random searches on googles news search engine "
 | |
| 		"to add sites with blogs and googblogs and ingoogle "
 | |
| 		"tags into tagdb.";
 | |
| 	m->m_cgi   = "scrapeblogs";
 | |
| 	m->m_off   = (char *)&cr.m_scrapingEnabledBlogs - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "subsite detection enabled";
 | |
| 	m->m_desc  = "Add the \"sitepathdepth\" to tagdb if a hostname "
 | |
| 		"is determined to have subsites at a particular depth.";
 | |
| 	m->m_cgi   = "ssd";
 | |
| 	m->m_off   = (char *)&cr.m_subsiteDetectionEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "deduping enabled";
 | |
| 	m->m_desc  = "When enabled, the spider will "
 | |
| 		"discard web pages which are identical to other web pages "
 | |
| 		"that are already in the index. "//AND that are from the same "
 | |
| 		//"hostname. 
 | |
| 		//"An example of a hostname is www1.ibm.com. "
 | |
| 		"However, root urls, urls that have no path, are never "
 | |
| 		"discarded. It most likely has to hit disk to do these "
 | |
| 		"checks so it does cause some slow down. Only use it if you "
 | |
| 		"need it.";
 | |
| 	m->m_cgi   = "de";
 | |
| 	m->m_off   = (char *)&cr.m_dedupingEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "deduping enabled for www";
 | |
| 	m->m_desc  = "When enabled, the spider will "
 | |
| 		"discard web pages which, when a www is prepended to the "
 | |
| 		"page's url, result in a url already in the index.";
 | |
| 	m->m_cgi   = "dew";
 | |
| 	m->m_off   = (char *)&cr.m_dupCheckWWW - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "detect custom error pages";
 | |
| 	m->m_desc  = "Detect and do not index pages which have a 200 status"
 | |
| 		" code, but are likely to be error pages.";
 | |
| 	m->m_cgi   = "dcep";
 | |
| 	m->m_off   = (char *)&cr.m_detectCustomErrorPages - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delete 404s";
 | |
| 	m->m_desc  = "Should pages be removed from the index if they are no "
 | |
| 		"longer accessible on the web?";
 | |
| 	m->m_cgi   = "dnf";
 | |
| 	m->m_off   = (char *)&cr.m_delete404s - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_HIDDEN;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "delete timed out docs";
 | |
| 	m->m_desc  = "Should documents be deleted from the index "
 | |
| 		"if they have been retried them enough times and the "
 | |
| 		"last received error is a time out? "
 | |
| 		"If your internet connection is flaky you may say "
 | |
| 		"no here to ensure you do not lose important docs.";
 | |
| 	m->m_cgi   = "dtod";
 | |
| 	m->m_off   = (char *)&cr.m_deleteTimeouts - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use simplified redirects";
 | |
| 	m->m_desc  = "If this is true, the spider, when a url redirects "
 | |
| 		"to a \"simpler\" url, will add that simpler url into "
 | |
| 		"the spider queue and abandon the spidering of the current "
 | |
| 		"url.";
 | |
| 	m->m_cgi   = "usr";
 | |
| 	m->m_off   = (char *)&cr.m_useSimplifiedRedirects - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	// turn off for now. spider time deduping should help any issues
 | |
| 	// by disabling this.
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use canonical redirects";
 | |
| 	m->m_desc  = "If page has a <link canonical> on it then treat it "
 | |
| 		"as a redirect, add it to spiderdb for spidering "
 | |
| 		"and abandon the indexing of the current url.";
 | |
| 	m->m_cgi   = "ucr";
 | |
| 	m->m_off   = (char *)&cr.m_useCanonicalRedirects - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use ifModifiedSince";
 | |
| 	m->m_desc  = "If this is true, the spider, when "
 | |
| 		"updating a web page that is already in the index, will "
 | |
| 		"not even download the whole page if it hasn't been "
 | |
| 		"updated since the last time Gigablast spidered it. "
 | |
| 		"This is primarily a bandwidth saving feature. It relies on "
 | |
| 		"the remote webserver's returned Last-Modified-Since field "
 | |
| 		"being accurate.";
 | |
| 	m->m_cgi   = "uims";
 | |
| 	m->m_off   = (char *)&cr.m_useIfModifiedSince - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "build similarity vector from content only";
 | |
| 	m->m_desc  = "If this is true, the spider, when checking the page "
 | |
| 		     "if it has changed enough to reindex or update the "
 | |
| 		     "published date, it will build the vector only from "
 | |
| 		     "the content located on that page.";
 | |
| 	m->m_cgi   = "bvfc";
 | |
| 	m->m_off   = (char *)&cr.m_buildVecFromCont - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use content similarity to index publish date";
 | |
| 	m->m_desc  = "This requires build similarity from content only to be "
 | |
| 		     "on.  This indexes the publish date (only if the content "
 | |
| 		     "has changed enough) to be between the last two spider "
 | |
| 		     "dates.";
 | |
| 	m->m_cgi   = "uspd";
 | |
| 	m->m_off   = (char *)&cr.m_useSimilarityPublishDate - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max percentage similar to update publish date";
 | |
| 	m->m_desc  = "This requires build similarity from content only and "
 | |
| 		     "use content similarity to index publish date to be "
 | |
| 		     "on.  This percentage is the maximum similarity that can "
 | |
| 		     "exist between an old document and new before the publish "
 | |
| 		     "date will be updated.";
 | |
| 	m->m_cgi   = "mpspd";
 | |
| 	m->m_off   = (char *)&cr.m_maxPercentSimilarPublishDate - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "80";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	// use url filters for this. this is a crawlbot parm really.
 | |
| 	/*
 | |
| 	m->m_title = "restrict domain";
 | |
| 	m->m_desc  = "Keep crawler on same domain as seed urls?";
 | |
| 	m->m_cgi   = "restrictDomain";
 | |
| 	m->m_off   = (char *)&cr.m_restrictDomain - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	// we need to save this it is a diffbot parm
 | |
| 	m->m_flags = PF_HIDDEN | PF_DIFFBOT;// | PF_NOSAVE;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "do url sporn checking";
 | |
| 	m->m_desc  = "If this is true and the spider finds "
 | |
| 		"lewd words in the hostname of a url it will throw "
 | |
| 		"that url away. It will also throw away urls that have 5 or "
 | |
| 		"more hyphens in their hostname.";
 | |
| 	m->m_cgi   = "dusc";
 | |
| 	m->m_off   = (char *)&cr.m_doUrlSpamCheck - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "hours before adding unspiderable url to spiderdb";
 | |
| 	m->m_desc  = "Hours to wait after trying to add an unspiderable url "
 | |
| 		"to spiderdb again.";
 | |
| 	m->m_cgi   = "dwma";
 | |
| 	m->m_off   = (char *)&cr.m_deadWaitMaxAge - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "24";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	//m->m_title = "link text anomaly threshold";
 | |
| 	//m->m_desc  = "Prevent pages from link voting for "
 | |
| 	//	"another page if its link text has a "
 | |
| 	//	"word which doesn't occur in at least this "
 | |
| 	//	"many other link texts. (set to 1 to disable)";
 | |
| 	//m->m_cgi   = "ltat";
 | |
| 	//m->m_off   = (char *)&cr.m_linkTextAnomalyThresh - x;
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m->m_def   = "2";
 | |
| 	//m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "enforce domain quotas on new docs";
 | |
| 	m->m_desc  = "If this is true then new documents will be removed "
 | |
| 		"from the index if the quota for their domain "
 | |
| 		"has been breeched.";
 | |
| 	m->m_cgi   = "enq";
 | |
| 	m->m_off   = (char *)&cr.m_enforceNewQuotas - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "enforce domain quotas on indexed docs";
 | |
| 	m->m_desc  = "If this is true then indexed documents will be removed "
 | |
| 		"from the index if the quota for their domain has been "
 | |
| 		"breeched.";
 | |
| 	m->m_cgi   = "eoq";
 | |
| 	m->m_off   = (char *)&cr.m_enforceOldQuotas - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use exact quotas";
 | |
| 	m->m_desc  = "Does not use approximations so will do more disk seeks "
 | |
| 		"and may impact indexing performance significantly.";
 | |
| 	m->m_cgi   = "ueq";
 | |
| 	m->m_off   = (char *)&cr.m_exactQuotas - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "restrict indexdb for spidering";
 | |
| 	m->m_desc  = "If this is true then only the root indexb file is "
 | |
| 		"searched for linkers. Saves on disk seeks, "
 | |
| 		"but may use older versions of indexed web pages.";
 | |
| 	m->m_cgi   = "ris";
 | |
| 	m->m_off   = (char *)&cr.m_restrictIndexdbForSpider - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "indexdb max total files to merge";
 | |
| 	m->m_desc  = "Do not merge more than this many files during a single "
 | |
| 		"merge operation. Merge does not scale well to numbers above "
 | |
| 		"50 or so.";
 | |
| 	m->m_cgi   = "mttftm";
 | |
| 	m->m_off   = (char *)&cr.m_indexdbMinTotalFilesToMerge - x;
 | |
| 	m->m_def   = "50"; 
 | |
| 	//m->m_max   = 100;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "indexdb min files needed to trigger merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many indexdb data files "
 | |
| 		"are on disk.";
 | |
| 	m->m_cgi   = "miftm";
 | |
| 	m->m_off   = (char *)&cr.m_indexdbMinFilesToMerge - x;
 | |
| 	m->m_def   = "6"; // default to high query performance, not spider
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "datedb min files needed to trigger to merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many datedb data files "
 | |
| 		"are on disk.";
 | |
| 	m->m_cgi   = "mdftm";
 | |
| 	m->m_off   = (char *)&cr.m_datedbMinFilesToMerge - x;
 | |
| 	m->m_def   = "5";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spiderdb min files needed to trigger to merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many spiderdb data files "
 | |
| 		"are on disk.";
 | |
| 	m->m_cgi   = "msftm";
 | |
| 	m->m_off   = (char *)&cr.m_spiderdbMinFilesToMerge - x;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "checksumdb min files needed to trigger to merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many checksumdb data files "
 | |
| 		"are on disk.";
 | |
| 	m->m_cgi   = "mcftm";
 | |
| 	m->m_off   = (char *)&cr.m_checksumdbMinFilesToMerge - x;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "clusterdb min files needed to trigger to merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many clusterdb data files "
 | |
| 		"are on disk.";
 | |
| 	m->m_cgi   = "mclftm";
 | |
| 	m->m_off   = (char *)&cr.m_clusterdbMinFilesToMerge - x;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "linkdb min files needed to trigger to merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many linkdb data files "
 | |
| 		"are on disk. Raise this when initially growing an index "
 | |
| 		"in order to keep merging down.";
 | |
| 	m->m_cgi   = "mlkftm";
 | |
| 	m->m_off   = (char *)&cr.m_linkdbMinFilesToMerge - x;
 | |
| 	m->m_def   = "6"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "tagdb min files to merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many linkdb data files "
 | |
| 		"are on disk.";
 | |
| 	m->m_cgi   = "mtftgm";
 | |
| 	m->m_off   = (char *)&cr.m_tagdbMinFilesToMerge - x;
 | |
| 	m->m_def   = "2"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	// this is overridden by collection
 | |
| 	m->m_title = "titledb min files needed to trigger to merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many titledb data files "
 | |
| 		"are on disk.";
 | |
| 	m->m_cgi   = "mtftm";
 | |
| 	m->m_off   = (char *)&cr.m_titledbMinFilesToMerge - x;
 | |
| 	m->m_def   = "6"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	//m->m_save  = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "sectiondb min files to merge";
 | |
| 	//m->m_desc  ="Merge is triggered when this many sectiondb data files "
 | |
| 	//	"are on disk.";
 | |
| 	//m->m_cgi   = "mscftm";
 | |
| 	//m->m_off   = (char *)&cr.m_sectiondbMinFilesToMerge - x;
 | |
| 	//m->m_def   = "4"; 
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m->m_group = 0;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "posdb min files needed to trigger to merge";
 | |
| 	m->m_desc  = "Merge is triggered when this many posdb data files "
 | |
| 		"are on disk. Raise this while doing massive injections "
 | |
| 		"and not doing much querying. Then when done injecting "
 | |
| 		"keep this low to make queries fast.";
 | |
| 	m->m_cgi   = "mpftm";
 | |
| 	m->m_off   = (char *)&cr.m_posdbMinFilesToMerge - x;
 | |
| 	m->m_def   = "6"; 
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_CLONE;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "recycle content";
 | |
| 	m->m_desc   = "Rather than downloading the content again when "
 | |
| 		"indexing old urls, use the stored content. Useful for "
 | |
| 		"reindexing documents under a different ruleset or for "
 | |
| 		"rebuilding an index. You usually "
 | |
| 		"should turn off the 'use robots.txt' switch. "
 | |
| 		"And turn on the 'use old ips' and "
 | |
| 		"'recycle link votes' switches for speed. If rebuilding an "
 | |
| 		"index then you should turn off the 'only index changes' "
 | |
| 		"switches.";
 | |
| 	m->m_cgi   = "rc";
 | |
| 	m->m_off   = (char *)&cr.m_recycleContent - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "enable link voting";
 | |
| 	m->m_desc  = "If this is true Gigablast will "
 | |
| 		"index hyper-link text and use hyper-link "
 | |
| 		"structures to boost the quality of indexed documents. "
 | |
| 		"You can disable this when doing a ton of injections to "
 | |
| 		"keep things fast. Then do a posdb (index) rebuild "
 | |
| 		"after re-enabling this when you are done injecting. Or "
 | |
| 		"if you simply do not want link voting this will speed up"
 | |
| 		"your injections and spidering a bit.";
 | |
| 	m->m_cgi   = "glt";
 | |
| 	m->m_off   = (char *)&cr.m_getLinkInfo - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "compute inlinks to sites";
 | |
| 	m->m_desc  = "If this is true Gigablast will "
 | |
| 		"compute the number of site inlinks for the sites it "
 | |
| 		"indexes. This is a measure of the sites popularity and is "
 | |
| 		"used for ranking and some times spidering prioritzation. "
 | |
| 		"It will cache the site information in tagdb. "
 | |
| 		"The greater the number of inlinks, the longer the cached "
 | |
| 		"time, because the site is considered more stable. If this "
 | |
| 		"is NOT true then Gigablast will use the included file, "
 | |
| 		"sitelinks.txt, which stores the site inlinks of millions "
 | |
| 		"of the most popular sites. This is the fastest way. If you "
 | |
| 		"notice a lot of <i>getting link info</i> requests in the "
 | |
| 		"<i>sockets table</i> you may want to disable this "
 | |
| 		"parm.";
 | |
| 	m->m_cgi   = "csni";
 | |
| 	m->m_off   = (char *)&cr.m_computeSiteNumInlinks - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do link spam checking";
 | |
| 	m->m_desc  = "If this is true, do not allow spammy inlinks to vote. "
 | |
| 		"This check is "
 | |
| 		"too aggressive for some collections, i.e.  it "
 | |
| 		"does not allow pages with cgi in their urls to vote.";
 | |
| 	m->m_cgi   = "dlsc";
 | |
| 	m->m_off   = (char *)&cr.m_doLinkSpamCheck - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "restrict link voting by ip";
 | |
| 	m->m_desc  = "If this is true Gigablast will "
 | |
| 		"only allow one vote per the top 2 significant bytes "
 | |
| 		"of the IP address. Otherwise, multiple pages "
 | |
| 		"from the same top IP can contribute to the link text and "
 | |
| 		"link-based quality ratings of a particular URL. "
 | |
| 		"Furthermore, no votes will be accepted from IPs that have "
 | |
| 		"the same top 2 significant bytes as the IP of the page "
 | |
| 		"being indexed.";
 | |
| 	m->m_cgi   = "ovpid";
 | |
| 	m->m_off   = (char *)&cr.m_oneVotePerIpDom - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use new link algo";
 | |
| 	m->m_desc  = "Use the links: termlists instead of link:. Also "
 | |
| 		"allows pages linking from the same domain or IP to all "
 | |
| 		"count as a single link from a different IP. This is also "
 | |
| 		"required for incorporating RSS and Atom feed information "
 | |
| 		"when indexing a document.";
 | |
| 	m->m_cgi   = "na";
 | |
| 	m->m_off   = (char *)&cr.m_newAlgo - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "recycle link votes";
 | |
| 	m->m_desc   = "If this is true Gigablast will "
 | |
| 		"use the old links and link text when re-indexing old urls "
 | |
| 		"and not do any link voting when indexing new urls.";
 | |
| 	m->m_cgi   = "rv";
 | |
| 	m->m_off   = (char *)&cr.m_recycleVotes - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "update link info frequency";
 | |
| 	m->m_desc   = "How often should Gigablast recompute the "
 | |
| 		"link info for a url. "
 | |
| 		"Also applies to getting the quality of a site "
 | |
| 		"or root url, which is based on the link info. "
 | |
| 		"In days. Can use decimals. 0 means to update "
 | |
| 		"the link info every time the url's content is re-indexed. "
 | |
| 		"If the content is not reindexed because it is unchanged "
 | |
| 		"then the link info will not be updated. When getting the "
 | |
| 		"link info or quality of the root url from an "
 | |
| 		"external cluster, Gigablast will tell the external cluster "
 | |
| 		"to recompute it if its age is this or higher.";
 | |
| 	m->m_cgi   = "uvf";
 | |
| 	m->m_off   = (char *)&cr.m_updateVotesFreq - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "60.000000";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "recycle imported link info";
 | |
| 	m->m_desc  = "If true, we ALWAYS recycle the imported link info and "
 | |
| 		"NEVER recompute it again. Otherwise, recompute it when we "
 | |
| 		"recompute the local link info.";
 | |
| 	m->m_cgi   = "rili";
 | |
| 	m->m_off   = (char *)&cr.m_recycleLinkInfo2 - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "use imported link info for quality";
 | |
| 	m->m_desc  = "If true, we will use the imported link info to "
 | |
| 		"help us determine the quality of the page we are indexing.";
 | |
| 	m->m_cgi   = "uifq";
 | |
| 	m->m_off   = (char *)&cr.m_useLinkInfo2ForQuality - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// this can hurt us too much if mis-assigned, remove it
 | |
| 	/*
 | |
| 	m->m_title = "restrict link voting to roots";
 | |
| 	m->m_desc  = "If this is true Gigablast will "
 | |
| 		"not perform link analysis on urls that are not "
 | |
| 		"root urls.";
 | |
| 	m->m_cgi   = "rvr";
 | |
| 	m->m_off   = (char *)&cr.m_restrictVotesToRoots - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "index link text";
 | |
| 	m->m_desc  = "If this is true Gigablast will "
 | |
| 		"index both incoming and outgoing link text for the "
 | |
| 		"appropriate documents, depending on url filters and "
 | |
| 		"site rules, under the gbinlinktext: and gboutlinktext: "
 | |
| 		"fields. Generally, you want this disabled, it was for "
 | |
| 		"a client.";
 | |
| 	m->m_cgi   = "ilt";
 | |
| 	m->m_off   = (char *)&cr.m_indexLinkText - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "index incoming link text";
 | |
| 	m->m_desc  = "If this is false no incoming link text is indexed.";
 | |
| 	m->m_cgi   = "iilt";
 | |
| 	m->m_off   = (char *)&cr.m_indexLinkText - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "index inlink neighborhoods";
 | |
| 	m->m_desc  = "If this is true Gigablast will "
 | |
| 		"index the plain text surrounding the hyper-link text. The "
 | |
| 		"score will be x times that of the hyper-link text, where x "
 | |
| 		"is the scalar below.";
 | |
| 	m->m_cgi   = "iin";
 | |
| 	m->m_off   = (char *)&cr.m_indexInlinkNeighborhoods - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	// this is now hard-coded in XmlNode.cpp, currently .8
 | |
| 	m->m_title = "inlink neighborhoods score scalar";
 | |
| 	m->m_desc  = "Gigablast can "
 | |
| 		"index the plain text surrounding the hyper-link text. The "
 | |
| 		"score will be x times that of the hyper-link text, where x "
 | |
| 		"is this number.";
 | |
| 	m->m_cgi   = "inss";
 | |
| 	m->m_off   = (char *)&cr.m_inlinkNeighborhoodsScoreScalar - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = ".20";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "break web rings";
 | |
| 	m->m_desc  = "If this is true Gigablast will "
 | |
| 		"attempt to detect link spamming rings and decrease "
 | |
| 		"their influence on the link text for a URL.";
 | |
| 	m->m_cgi   = "bwr";
 | |
| 	m->m_off   = (char *)&cr.m_breakWebRings - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "break log spam";
 | |
| 	m->m_desc  = "If this is true Gigablast will attempt to detect "
 | |
| 		"dynamically generated pages and remove their voting power. "
 | |
| 		"Additionally, pages over 100k will not be have their "
 | |
| 		"outgoing links counted. Pages that have a form which POSTS "
 | |
| 		"to a cgi page will not be considered either.";
 | |
| 	m->m_cgi   = "bls";
 | |
| 	m->m_off   = (char *)&cr.m_breakLogSpam - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "tagdb collection name";
 | |
| 	m->m_desc  = "Sometimes you want the spiders to use the tagdb of "
 | |
| 		"another collection, like the <i>main</i> collection. "
 | |
| 		"If this is empty it defaults to the current collection.";
 | |
| 	m->m_cgi   = "tdbc";
 | |
| 	m->m_off   = (char *)&cr.m_tagdbColl - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_COLL_LEN+1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "catdb lookups enabled";
 | |
| 	m->m_desc  = "Spiders will look to see if the current page is in "
 | |
| 		"catdb.  If it is, all Directory information for that page "
 | |
| 		"will be indexed with it.";
 | |
| 	m->m_cgi   = "cdbe";
 | |
| 	m->m_off   = (char *)&cr.m_catdbEnabled - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "recycle catdb info";
 | |
| 	m->m_desc   = "Rather than requesting new info from DMOZ, like "
 | |
| 		"titles and topic ids, grab it from old record. Increases "
 | |
| 		"performance if you are seeing a lot of "
 | |
| 		"\"getting catdb record\" entries in the spider queues.";
 | |
| 	m->m_cgi   = "rci";
 | |
| 	m->m_off   = (char *)&cr.m_recycleCatdb - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "allow banning of pages in catdb";
 | |
| 	m->m_desc  = "If this is 'NO' then pages that are in catdb, "
 | |
| 		"but banned from tagdb or the url filters page, can not "
 | |
| 		"be banned.";
 | |
| 	m->m_cgi   = "abpc";
 | |
| 	m->m_off   = (char *)&cr.m_catdbPagesCanBeBanned - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "override spider errors for catdb";
 | |
| 	m->m_desc  = "Ignore and skip spider errors if the spidered site"
 | |
| 		     " is found in Catdb (DMOZ).";
 | |
| 	m->m_cgi   = "catose";
 | |
| 	m->m_off   = (char *)&cr.m_overrideSpiderErrorsForCatdb - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "only spider root urls";
 | |
| 	//m->m_desc  = "Only spider urls that are roots.";
 | |
| 	//m->m_cgi   = "osru";
 | |
| 	//m->m_off   = (char *)&cr.m_onlySpiderRoots - x;
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "allow asian docs";
 | |
| 	m->m_desc  = "If this is disabled the spider "
 | |
| 		"will not allow any docs from the gb2312 charset "
 | |
| 		"into the index.";
 | |
| 	m->m_cgi   = "aad";
 | |
| 	m->m_off   = (char *)&cr.m_allowAsianDocs - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "allow adult docs";
 | |
| 	m->m_desc  = "If this is disabled the spider "
 | |
| 		"will not allow any docs which contain adult content "
 | |
| 		"into the index (overrides tagdb).";
 | |
| 	m->m_cgi   = "aprnd";
 | |
| 	m->m_off   = (char *)&cr.m_allowAdultDocs - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group =  0 ;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "allow xml docs";
 | |
| 	m->m_desc  = "If this is disabled the spider "
 | |
| 		"will not allow any xml "
 | |
| 		"into the index.";
 | |
| 	m->m_cgi   = "axd";
 | |
| 	m->m_off   = (char *)&cr.m_allowXmlDocs - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do serp detection";
 | |
| 	m->m_desc  = "If this is enabled the spider "
 | |
| 		"will not allow any docs which are determined to "
 | |
| 		"be serps.";
 | |
| 	m->m_cgi   = "dsd";
 | |
| 	m->m_off   = (char *)&cr.m_doSerpDetection - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "do IP lookup";
 | |
| 	m->m_desc  = "If this is disabled and the proxy "
 | |
| 		"IP below is not zero then Gigablast will assume "
 | |
| 		"all spidered URLs have an IP address of 1.2.3.4.";
 | |
| 	m->m_cgi   = "dil";
 | |
| 	m->m_off   = (char *)&cr.m_doIpLookups - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use old IPs";
 | |
| 	m->m_desc = "Should the stored IP "
 | |
| 		"of documents we are reindexing be used? Useful for "
 | |
| 		"pages banned by IP address and then reindexed with "
 | |
| 		"the reindexer tool.";
 | |
| 	m->m_cgi = "useOldIps";
 | |
| 	m->m_off = (char *)&cr.m_useOldIps - x;
 | |
| 	m->m_type = TYPE_BOOL;
 | |
| 	m->m_def = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "remove banned pages";
 | |
| 	m->m_desc  = "Remove banned pages from the index. Pages can be "
 | |
| 		"banned using tagdb or the Url Filters table.";
 | |
| 	m->m_cgi   = "rbp";
 | |
| 	m->m_off   = (char *)&cr.m_removeBannedPages - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "ban domains of urls banned by IP";
 | |
| 	m->m_desc = "Most urls are banned by IP "
 | |
| 		"address. But owners often will keep the same "
 | |
| 		"domains and change their IP address. So when "
 | |
| 		"banning a url that was banned by IP, should its domain "
 | |
| 		"be banned too? (obsolete)";
 | |
| 	m->m_cgi = "banDomains";
 | |
| 	m->m_off = (char *)&cr.m_banDomains - x;
 | |
| 	m->m_type = TYPE_BOOL;
 | |
| 	m->m_def = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "allow HTTPS pages using SSL";
 | |
| 	m->m_desc  = "If this is true, spiders will read "
 | |
| 		     "HTTPS pages using SSL Protocols.";
 | |
| 	m->m_cgi   = "ahttps";
 | |
| 	m->m_off   = (char *)&cr.m_allowHttps - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "require dollar sign";
 | |
| 	m->m_desc  = "If this is YES, then do not allow document to be "
 | |
| 		"indexed if they do not contain a dollar sign ($), but the "
 | |
| 		"links will still be harvested. Used for building shopping "
 | |
| 		"index.";
 | |
| 	m->m_cgi   = "nds";
 | |
| 	m->m_off   = (char *)&cr.m_needDollarSign - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "require numbers in url";
 | |
| 	m->m_desc  = "If this is YES, then do not allow document to be "
 | |
| 		"indexed if they do not have two back-to-back digits in the "
 | |
| 		"path of the url, but the links will still be harvested. Used "
 | |
| 		"to build a news index.";
 | |
| 	m->m_cgi   = "nniu";
 | |
| 	m->m_off   = (char *)&cr.m_needNumbersInUrl - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "index news topics";
 | |
| 	m->m_desc  = "If this is YES, Gigablast will attempt to categorize "
 | |
| 		"every page as being in particular news categories like "
 | |
| 		"sports, business, etc. and will be searchable by doing a "
 | |
| 		"query like \"newstopic:sports.";
 | |
| 	m->m_cgi   = "int";
 | |
| 	m->m_off   = (char *)&cr.m_getNewsTopic - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "follow RSS links";
 | |
| 	m->m_desc  = "If an item on a page has an RSS feed link, add the "
 | |
| 		"RSS link to the spider queue and index the RSS pages "
 | |
| 		"instead of the current page.";
 | |
| 	m->m_cgi   = "frss";
 | |
| 	m->m_off   = (char *)&cr.m_followRSSLinks - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "only index articles from RSS feeds";
 | |
| 	m->m_desc  = "Only index pages that were linked to by an RSS feed. "
 | |
| 		"Follow RSS Links must be enabled (above).";
 | |
| 	m->m_cgi   = "orss";
 | |
| 	m->m_off   = (char *)&cr.m_onlyIndexRSS - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max text doc length";
 | |
| 	m->m_desc  = "Gigablast will not download, index or "
 | |
| 		"store more than this many bytes of an HTML or text "
 | |
| 		"document. XML is NOT considered to be HTML or text, use "
 | |
| 		"the rule below to control the maximum length of an XML "
 | |
| 		"document. "
 | |
| 		"Use -1 for no max.";
 | |
| 	m->m_cgi   = "mtdl";
 | |
| 	m->m_off   = (char *)&cr.m_maxTextDocLen - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1048576"; // 1MB
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE|PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max other doc length";
 | |
| 	m->m_desc  = "Gigablast will not download, index or "
 | |
| 		"store more than this many bytes of a non-html, non-text "
 | |
| 		"document. XML documents will be restricted to this "
 | |
| 		"length. "
 | |
| 		"Use -1 for no max.";
 | |
| 	m->m_cgi   = "modl";
 | |
| 	m->m_off   = (char *)&cr.m_maxOtherDocLen - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1048576"; // 1MB
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE|PF_API;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "indexdb truncation limit";
 | |
| 	//m->m_cgi   = "itl";
 | |
| 	//m->m_desc  = "How many documents per term? Keep this very high.";
 | |
| 	//m->m_off   = (char *)&cr.m_indexdbTruncationLimit - x;
 | |
| 	//m->m_def   = "50000000"; 
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m->m_min   = MIN_TRUNC; // from Indexdb.h
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "apply filter to text pages";
 | |
| 	m->m_desc  = "If this is false then the filter "
 | |
| 		"will not be used on html or text pages.";
 | |
| 	m->m_cgi   = "aft";
 | |
| 	m->m_off   = (char *)&cr.m_applyFilterToText - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "filter name";
 | |
| 	m->m_desc  = "Program to spawn to filter all HTTP "
 | |
| 		"replies the spider receives. Leave blank for none.";
 | |
| 	m->m_cgi   = "filter";
 | |
| 	m->m_def   = "";
 | |
| 	m->m_off   = (char *)&cr.m_filter - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_FILTER_LEN+1;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "filter timeout";
 | |
| 	m->m_desc  = "Kill filter shell after this many seconds. Assume it "
 | |
| 		"stalled permanently.";
 | |
| 	m->m_cgi   = "fto";
 | |
| 	m->m_def   = "40";
 | |
| 	m->m_off   = (char *)&cr.m_filterTimeout - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "proxy ip";
 | |
| 	m->m_desc  = "Retrieve pages from the proxy at this IP address.";
 | |
| 	m->m_cgi   = "proxyip";
 | |
| 	m->m_off   = (char *)&cr.m_proxyIp - x;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m->m_def   = "0.0.0.0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "proxy port";
 | |
| 	m->m_desc  = "Retrieve pages from the proxy on "
 | |
| 		"this port.";
 | |
| 	m->m_cgi   = "proxyport";
 | |
| 	m->m_off   = (char *)&cr.m_proxyPort - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "make image thumbnails";
 | |
| 	m->m_desc  = "Try to find the best image on each page and "
 | |
| 		"store it as a thumbnail for presenting in the search "
 | |
| 		"results.";
 | |
| 	m->m_cgi   = "mit";
 | |
| 	m->m_off   = (char *)&cr.m_makeImageThumbnails - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	// default to off since it slows things down to do this
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max thumbnail width or height";
 | |
| 	m->m_desc  = "This is in pixels and limits the size of the thumbnail. "
 | |
| 		"Gigablast tries to make at least the width or the height "
 | |
| 		"equal to this maximum, but, unless the thumbnail is square, "
 | |
| 		"one side will be longer than the other.";
 | |
| 	m->m_cgi   = "mtwh";
 | |
| 	m->m_off   = (char *)&cr.m_thumbnailMaxWidthHeight - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "250";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "index spider status documents";
 | |
| 	m->m_desc  = "Index a spider status \"document\" "
 | |
| 		"for every url the spider "
 | |
| 		"attempts to spider. Search for them using special "
 | |
| 		"query operators like type:status or gberrorstr:success or "
 | |
| 		"stats:gberrornum to get a histogram. "
 | |
| 		"See <a href=/syntax.html>syntax</a> page for more examples. "
 | |
| 		"They will not otherwise "
 | |
| 		"show up in the search results.";
 | |
| 	//      "This will not work for "
 | |
| 	// 	"diffbot crawlbot collections yet until it has proven "
 | |
| 	// 	"more stable.";
 | |
| 	m->m_cgi   = "isr";
 | |
| 	m->m_off   = (char *)&cr.m_indexSpiderReplies - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	// default off for now until we fix it better. 5/26/14 mdw
 | |
| 	// turn back on 6/21 now that we do not index plain text terms
 | |
| 	// and we add gbdocspidertime and gbdocindextime terms so you
 | |
| 	// can use those to sort regular docs and not have spider reply
 | |
| 	// status docs in the serps.
 | |
| 	// back on 4/21/2015 seems pretty stable.
 | |
| 	// but it uses disk space so turn off for now again. 6/16/2015
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	// i put this in here so i can save disk space for my global
 | |
| 	// diffbot json index
 | |
| 	m->m_title = "index body";
 | |
| 	m->m_desc  = "Index the body of the documents so you can search it. "
 | |
| 		"Required for searching that. You wil pretty much always "
 | |
| 		"want to keep this enabled. Does not apply to JSON "
 | |
| 		"documents.";
 | |
| 	m->m_cgi   = "ib";
 | |
| 	m->m_off   = (char *)&cr.m_indexBody - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_flags = PF_CLONE ;//| PF_HIDDEN; 
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "apiUrl";
 | |
| 	m->m_desc  = "Send every spidered url to this url and index "
 | |
| 		"the reply in addition to the normal indexing process. "
 | |
| 		"Example: by specifying http://api.diffbot.com/v3/"
 | |
| 		"analyze?mode=high-precision&token=<yourDiffbotToken> here "
 | |
| 		"you can index the structured JSON replies from diffbot for "
 | |
| 		"every url that is spidered. "
 | |
| 		"Gigablast will automatically "
 | |
| 		"append a &url=<urlBeingSpidered> to this url "
 | |
| 		"before sending it to diffbot.";
 | |
| 	m->m_xml   = "diffbotApiUrl";
 | |
| 	m->m_title = "diffbot api url";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotApiUrl - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "urlProcessPatternTwo";
 | |
| 	m->m_desc  = "Only send urls that match this simple substring "
 | |
| 		"pattern to Diffbot. Separate substrings with two pipe "
 | |
| 		"operators, ||. Leave empty for no restrictions.";
 | |
| 	m->m_xml   = "diffbotUrlProcessPattern";
 | |
| 	m->m_title = "diffbot url process pattern";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotUrlProcessPattern - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "urlProcessRegExTwo";
 | |
| 	m->m_desc  = "Only send urls that match this regular expression "
 | |
| 		"to Diffbot. "
 | |
| 		"Leave empty for no restrictions.";
 | |
| 	m->m_xml   = "diffbotUrlProcessRegEx";
 | |
| 	m->m_title = "diffbot url process regex";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotUrlProcessRegEx - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_cgi   = "pageProcessPatternTwo";
 | |
| 	m->m_desc  = "Only send urls whose content matches this simple "
 | |
| 		"substring "
 | |
| 		"pattern to Diffbot. Separate substrings with two pipe "
 | |
| 		"operators, ||. "
 | |
| 		"Leave empty for no restrictions.";
 | |
| 	m->m_xml   = "diffbotPageProcessPattern";
 | |
| 	m->m_title = "diffbot page process pattern";
 | |
| 	m->m_off   = (char *)&cr.m_diffbotPageProcessPattern - x;
 | |
| 	m->m_type  = TYPE_SAFEBUF;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_REBUILDURLFILTERS | PF_CLONE;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 
 | |
| 	m->m_title = "spider start time";
 | |
| 	m->m_desc  = "Only spider URLs scheduled to be spidered "
 | |
| 		"at this time or after. In UTC.";
 | |
| 	m->m_cgi   = "sta";
 | |
| 	m->m_off   = (char *)&cr.m_spiderTimeMin - x;
 | |
| 	m->m_type  = TYPE_DATE; // date format -- very special
 | |
| 	m->m_def   = "01 Jan 1970";
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider end time";
 | |
| 	m->m_desc  = "Only spider URLs scheduled to be spidered "
 | |
| 		"at this time or before. If \"use current time\" is true "
 | |
| 		"then the current local time is used for this value instead. "
 | |
| 		"in UTC.";
 | |
| 	m->m_cgi   = "stb";
 | |
| 	m->m_off   = (char *)&cr.m_spiderTimeMax - x;
 | |
| 	m->m_type  = TYPE_DATE2;
 | |
| 	m->m_def   = "01 Jan 2010";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use current time";
 | |
| 	m->m_desc  = "Use the current time as the spider end time?";
 | |
| 	m->m_cgi   = "uct";
 | |
| 	m->m_off   = (char *)&cr.m_useCurrentTime - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_SPIDER;
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "default ruleset site file num";
 | |
| 	m->m_desc  = "Use this as the current Sitedb file num for Sitedb "
 | |
| 		"entries that always use the current default";
 | |
| 	m->m_cgi   = "dftsfn";
 | |
| 	m->m_off   = (char *)&cr.m_defaultSiteRec - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "16";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "RSS ruleset site file num";
 | |
| 	m->m_desc  = "Use this Sitedb file num ruleset for RSS feeds";
 | |
| 	m->m_cgi   = "rssrs";
 | |
| 	m->m_off   = (char *)&cr.m_rssSiteRec - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "25";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "TOC ruleset site file num";
 | |
| 	m->m_desc  = "Use this Sitedb file num ruleset "
 | |
| 		"for Table of Contents pages";
 | |
| 	m->m_cgi   = "tocrs";
 | |
| 	m->m_off   = (char *)&cr.m_tocSiteRec - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "29";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "store topics vector";
 | |
| 	m->m_desc  = "Should Gigablast compute and store a topics vector "
 | |
| 		"for every document indexed. This allows Gigablast to "
 | |
| 		"do topic clustering without having to compute this vector "
 | |
| 		"at query time. You can turn topic clustering on in the "
 | |
| 		"Search Controls page.";
 | |
| 	m->m_cgi   = "utv";
 | |
| 	m->m_off   = (char *)&cr.m_useGigabitVector - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use gigabits for vector";
 | |
| 	m->m_desc  = "For news collection. "
 | |
| 		"Should Gigablast form the similarity vector using "
 | |
| 		"Gigabits, as opposed to a straight out random sample. "
 | |
| 		"This does clustering more "
 | |
| 		"by topic rather than by explicit content in common.";
 | |
| 	m->m_cgi   = "uct";
 | |
| 	m->m_off   = (char *)&cr.m_useGigabitVector - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max similarity to reindex";
 | |
| 	m->m_desc  = "If the url's content is over X% similar to what we "
 | |
| 		"already "
 | |
| 		"have indexed, then do not reindex it, and treat the content "
 | |
| 		"as if it were unchanged for intelligent spider scheduling "
 | |
| 		"purposes. Set to 100% to always reindex the document, "
 | |
| 		"regardless, although the use-ifModifiedSince check "
 | |
| 		"above may still be in affect, as well as the "
 | |
| 		"deduping-enabled check. This will also affect the re-spider "
 | |
| 		"time, because Gigablast spiders documents that change "
 | |
| 		"frequently faster.";
 | |
| 	m->m_cgi   = "msti";
 | |
| 	m->m_off   = (char *)&cr.m_maxSimilarityToIndex - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "100";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// this is obsolete -- we can use the reg exp "isroot"
 | |
| 	/*
 | |
| 	m->m_title = "root url priority";
 | |
| 	m->m_desc  = "What spider priority should root urls "
 | |
| 		"be assigned?  Spider priorities range from 0 to 31. If no "
 | |
| 		"urls are scheduled to be spidered in the priority 31 "
 | |
| 		"bracket, the spider moves down to 30, etc., until it finds "
 | |
| 		"a url to spider. If this priority is undefined "
 | |
| 		"then that url's priority is determined based on the rules "
 | |
| 		"on the URL filters page. If the priority is still "
 | |
| 		"undefined then the priority is taken to be the priority of "
 | |
| 		"the parent minus one, which results in a breadth first "
 | |
| 		"spidering algorithm."; // html
 | |
| 	m->m_cgi   = "srup";
 | |
| 	m->m_off   = (char *)&cr.m_spiderdbRootUrlPriority - x;
 | |
| 	m->m_type  = TYPE_PRIORITY2;// 0-(MAX_SPIDER_PRIORITIES-1)dropdown menu
 | |
| 	m->m_def   = "15"; 
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	  -- mdw, now in urlfilters using "isaddurl" "reg exp"
 | |
| 	m->m_title = "add url priority";
 | |
| 	m->m_desc  = "What is the priority of a url which "
 | |
| 		"is added to the spider queue via the "
 | |
| 		"add url page?"; // html
 | |
| 	m->m_cgi   = "saup";
 | |
| 	m->m_off   = (char *)&cr.m_spiderdbAddUrlPriority - x;
 | |
| 	m->m_type  = TYPE_PRIORITY; // 0-(MAX_SPIDER_PRIORITIES-1)dropdown menu
 | |
| 	m->m_def   = "16";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "new spider by priority";
 | |
| 	m->m_desc   = "Specify priorities for which "
 | |
| 		"new urls not yet in the index should be spidered.";
 | |
| 	m->m_cgi   = "sn";
 | |
| 	m->m_xml   = "spiderNewBits";
 | |
| 	m->m_off   = (char *)&cr.m_spiderNewBits - x;
 | |
| 	m->m_type  = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
 | |
| 	m->m_fixed = MAX_SPIDER_PRIORITIES;
 | |
| 	m->m_def   = "1"; // default for each one is on
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "old spider by priority";
 | |
| 	m->m_desc  = "Specify priorities for which old "
 | |
| 		"urls already in the index should be spidered.";
 | |
| 	m->m_cgi   = "so";
 | |
| 	m->m_xml   = "spiderOldBits";
 | |
| 	m->m_off   = (char *)&cr.m_spiderOldBits - x;
 | |
| 	m->m_type  = TYPE_PRIORITY_BOXES; // array of numbered (0-(MAX_SPIDER_PRIORITIES-1)) checkboxes
 | |
| 	m->m_fixed = MAX_SPIDER_PRIORITIES;
 | |
| 	m->m_def   = "1"; // default for each one is on
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max spiders per domain";
 | |
| 	m->m_desc  = "How many pages should the spider "
 | |
| 		"download simultaneously from any one domain? This can "
 | |
| 		"prevents the spider from hitting one server too hard.";
 | |
| 	m->m_cgi   = "mspd";
 | |
| 	m->m_off   = (char *)&cr.m_maxSpidersPerDomain - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "same domain wait";
 | |
| 	m->m_desc = "How many milliseconds should Gigablast wait "
 | |
| 		"between spidering a second url from the same domain. "
 | |
| 		"This is used to prevent the spiders from hitting a "
 | |
| 		"website too hard.";
 | |
| 	m->m_cgi = "sdw";
 | |
| 	m->m_off = (char *)&cr.m_sameDomainWait - x;
 | |
| 	m->m_type = TYPE_LONG;
 | |
| 	m->m_def = "500";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "same ip wait";
 | |
| 	m->m_desc  = "How many milliseconds should Gigablast wait "
 | |
| 		"between spidering a second url from the same IP address. "
 | |
| 		"This is used to prevent the spiders from hitting a "
 | |
| 		"website too hard.";
 | |
| 	m->m_cgi   = "siw";
 | |
| 	m->m_off   = (char *)&cr.m_sameIpWait - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "10000";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "use distributed spider lock";
 | |
| 	m->m_desc  = "Enable distributed spider locking to strictly enforce "
 | |
| 		"same domain waits at a global level.";
 | |
| 	m->m_cgi   = "udsl";
 | |
| 	m->m_off   = (char *)&cr.m_useSpiderLocks - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "distribute spider download based on ip";
 | |
| 	m->m_desc  = "Distribute web downloads based on the ip of the host so "
 | |
| 		"only one spider ip hits the same hosting ip.  Helps "
 | |
| 		"webmaster's logs look nicer.";
 | |
| 	m->m_cgi   = "udsd";
 | |
| 	m->m_off   = (char*)&cr.m_distributeSpiderGet - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "percent of water mark to reload queues";
 | |
| 	m->m_desc  = "When a spider queue drops below this percent of its "
 | |
| 		"max level it will reload from disk.";
 | |
| 	m->m_cgi   = "rlqp";
 | |
| 	m->m_off   = (char*)&cr.m_reloadQueuePercent - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "25";
 | |
| 	m++;
 | |
| 	 */
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "min respider wait";
 | |
| 	m->m_desc  = "What is the minimum number of days "
 | |
| 		"the spider should wait before re-visiting a particular "
 | |
| 		"web page? "
 | |
| 		"The spiders attempts to determine the update cycle of "
 | |
| 		"each web page and it tries to visit them as needed, but it "
 | |
| 		"will not wait less than this number of days regardless.";
 | |
| 	m->m_cgi   = "mrw";
 | |
| 	m->m_off   = (char *)&cr.m_minRespiderWait - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "1.0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max respider wait";
 | |
| 	m->m_desc  = "What is the maximum number of days "
 | |
| 		"the spider should wait before re-visiting a particular "
 | |
| 		"web page?";
 | |
| 	m->m_cgi   = "xrw";
 | |
| 	m->m_off   = (char *)&cr.m_maxRespiderWait - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "90.0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "first respider wait";
 | |
| 	m->m_desc  = "What is the number of days "
 | |
| 		"Gigablast should wait before spidering a particular web page "
 | |
| 		"for the second time? Tag in ruleset will override this value "
 | |
| 		"if it is present.";
 | |
| 	m->m_cgi   = "frw";
 | |
| 	m->m_off   = (char *)&cr.m_firstRespiderWait - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "30.0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "error respider wait";
 | |
| 	m->m_desc  = "If a spidered web page has a network "
 | |
| 		"error, such as a DNS not found error, or a time out error, "
 | |
| 		"how many days should Gigablast wait before reattempting "
 | |
| 		"to spider that web page?";
 | |
| 	m->m_cgi   = "erw";
 | |
| 	m->m_off   = (char *)&cr.m_errorRespiderWait - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "2.0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "doc not found error respider wait";
 | |
| 	m->m_desc  = "If a spidered web page has a http status "
 | |
| 		"error, such as a 404 page not found error, "
 | |
| 		"how many days should Gigablast wait before reattempting "
 | |
| 		"to spider that web page?";
 | |
| 	m->m_cgi   = "dnferw";
 | |
| 	m->m_off   = (char *)&cr.m_docNotFoundErrorRespiderWait - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "7.0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "spider max kbps";
 | |
| 	m->m_desc  = "The maximum kilobits per second "
 | |
| 		  "that the spider can download.";
 | |
| 	m->m_cgi   = "cmkbps";
 | |
| 	m->m_off   = (char *)&cr.m_maxKbps - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "999999.0";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "spider max pages per second";
 | |
| 	m->m_desc  = "The maximum number of pages per "
 | |
| 		"second that can be indexed or deleted from the index.";
 | |
| 	m->m_cgi   = "cmpps";
 | |
| 	m->m_off   = (char *)&cr.m_maxPagesPerSecond - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "999999.0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "spider new percent";
 | |
| 	m->m_desc  = "Approximate percentage of new vs. old docs to spider. "
 | |
| 		     "If set to a negative number, the old alternating "
 | |
| 		     "priority algorithm is used.";
 | |
| 	m->m_cgi   = "snp";
 | |
| 	m->m_off   = (char *)&cr.m_spiderNewPct - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "-1.0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "number retries per url";
 | |
| 	m->m_desc  = "How many times should the spider be "
 | |
| 		"allowed to fail to download a particular web page before "
 | |
| 		"it gives up? "
 | |
| 		"Failure may result from temporary loss of internet "
 | |
| 		"connectivity on the remote end, dns or routing problems.";
 | |
| 	m->m_cgi   = "nr";
 | |
| 	m->m_off   = (char *)&cr.m_numRetries - x;
 | |
| 	m->m_type  = TYPE_RETRIES; // dropdown from 0 to 3
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "priority of urls being retried";
 | |
| 	m->m_desc  = "Keep this pretty high so that we get problem urls "
 | |
| 		"out of the index fast, otherwise, you might be waiting "
 | |
| 		"months for another retry. Use <i>undefined</i> to indicate "
 | |
| 		"no change in the priority of the url.";
 | |
| 	m->m_cgi   = "rtp";
 | |
| 	m->m_off   = (char *)&cr.m_retryPriority - x;
 | |
| 	m->m_type  = TYPE_PRIORITY2; // -1 to 31
 | |
| 	m->m_def   = "-1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max pages in index";
 | |
| 	m->m_desc  = "What is the maximum number of "
 | |
| 		"pages that are permitted for this collection?";
 | |
| 	m->m_cgi   = "mnp";
 | |
| 	m->m_off   = (char *)&cr.m_maxNumPages - x;
 | |
| 	m->m_type  = TYPE_LONG_LONG;
 | |
| 	m->m_def   = "10000000000"; // 10 billion
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "import link info"; //  from other cluster";
 | |
| 	m->m_desc  = "Say yes here to make Gigablast import "
 | |
| 		"link text from another collection into this one "
 | |
| 		"when spidering urls. Gigablast will "
 | |
| 		"use the hosts.conf file in the working directory to "
 | |
| 		"tell it what hosts belong to the cluster to import from. "
 | |
| 		"Gigablast "
 | |
| 		"will use the \"update link votes frequency\" parm above "
 | |
| 		"to determine if the info should be recomputed on the other "
 | |
| 		"cluster.";
 | |
| 	m->m_cgi   = "eli"; // external link info
 | |
| 	m->m_off   = (char *)&cr.m_getExternalLinkInfo - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 2;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "use hosts2.conf for import cluster";
 | |
| 	m->m_desc  = "Tell Gigablast to import from the cluster defined by "
 | |
| 		"hosts2.conf in the working directory, rather than "
 | |
| 		"hosts.conf";
 | |
| 	m->m_cgi   = "elib"; // external link info
 | |
| 	m->m_off   = (char *)&cr.m_importFromHosts2Conf - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_priv  = 2;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	//m->m_title = "get link info from other cluster in real-time";
 | |
| 	//m->m_desc  = "Say yes here to make Gigablast tell the other "
 | |
| 	//	"cluster to compute the link info, not just return a "
 | |
| 	//	"stale copy from the last time it computed it.";
 | |
| 	//m->m_cgi   = "elif"; // external link info fresh
 | |
| 	//m->m_off   = (char *)&cr.m_getExternalLinkInfoFresh - x;
 | |
| 	//m->m_type  = TYPE_BOOL;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m->m_group = 0;
 | |
| 	//m->m_priv  = 2;
 | |
| 	//m++;
 | |
| 
 | |
| 	m->m_title = "collection to import from";
 | |
| 	m->m_desc  = "Gigablast will fetch the link info from this "
 | |
| 		"collection.";
 | |
| 	m->m_cgi   = "elic"; // external link info
 | |
| 	m->m_off   = (char *)&cr.m_externalColl - x;
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = MAX_COLL_LEN+1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "turk tags to display";
 | |
| 	m->m_desc  = "Tell pageturk to display the tag questions "
 | |
| 	             "for the comma separated tag names."
 | |
| 		     " no space allowed.";
 | |
|         m->m_cgi   = "ttags";
 | |
| 	m->m_xml   = "turkTags";
 | |
| 	m->m_type  = TYPE_STRING;
 | |
| 	m->m_size  = 256;
 | |
| 	m->m_def   = "blog,spam,news";
 | |
| 	m->m_off   = (char *)&cr.m_turkTags - x;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_priv  = 2;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	// now we store this in title recs, so we can change it on the fly
 | |
| 	m->m_title = "title weight";
 | |
| 	m->m_desc  = "Weight title this much more or less. This units are "
 | |
| 		"percentage. A 100 means to not give the title any special "
 | |
| 		"weight. Generally, though, you want to give it significantly "
 | |
| 		"more weight than that, so 2400 is the default.";
 | |
| 	m->m_cgi   = "tw"; 
 | |
| 
 | |
| 	m->m_off   = (char *)&cr.m_titleWeight - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "4600";
 | |
| 	m->m_min   = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	// now we store this in title recs, so we can change it on the fly
 | |
| 	m->m_title = "header weight";
 | |
| 	m->m_desc  = "Weight terms in header tags by this much more or less. "
 | |
| 		"This units are "
 | |
| 		"percentage. A 100 means to not give the header any special "
 | |
| 		"weight. Generally, though, you want to give it significantly "
 | |
| 		"more weight than that, so 600 is the default.";
 | |
| 	m->m_cgi   = "hw"; 
 | |
| 	m->m_off   = (char *)&cr.m_headerWeight - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "600";
 | |
| 	m->m_min   = 0;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	// now we store this in title recs, so we can change it on the fly
 | |
| 	m->m_title = "url path word weight";
 | |
| 	m->m_desc  = "Weight text in url path this much more. "
 | |
| 		"The units are "
 | |
| 		"percentage. A 100 means to not give any special "
 | |
| 		"weight. Generally, though, you want to give it significantly "
 | |
| 		"more weight than that, so 600 is the default.";
 | |
| 	m->m_cgi   = "upw"; 
 | |
| 	m->m_off   = (char *)&cr.m_urlPathWeight - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "1600";
 | |
| 	m->m_min   = 0;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	// now we store this in title recs, so we can change it on the fly
 | |
| 	m->m_title = "external link text weight";
 | |
| 	m->m_desc  = "Weight text in the incoming external link text this "
 | |
| 		"much more. The units are percentage. It already receives a "
 | |
| 		"decent amount of weight naturally.";
 | |
| 	m->m_cgi   = "eltw"; 
 | |
| 	m->m_off   = (char *)&cr.m_externalLinkTextWeight - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "600";
 | |
| 	m->m_min   = 0;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	// now we store this in title recs, so we can change it on the fly
 | |
| 	m->m_title = "internal link text weight";
 | |
| 	m->m_desc  = "Weight text in the incoming internal link text this "
 | |
| 		"much more. The units are percentage. It already receives a "
 | |
| 		"decent amount of weight naturally.";
 | |
| 	m->m_cgi   = "iltw"; 
 | |
| 	m->m_off   = (char *)&cr.m_internalLinkTextWeight - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "200";
 | |
| 	m->m_min   = 0;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	// now we store this in title recs, so we can change it on the fly
 | |
| 	m->m_title = "concept weight";
 | |
| 	m->m_desc  = "Weight concepts this much more. "
 | |
| 		"The units are "
 | |
| 		"percentage. It already receives a decent amount of weight "
 | |
| 		"naturally. AKA: surrounding text boost.";
 | |
| 	m->m_cgi   = "cw"; 
 | |
| 	m->m_off   = (char *)&cr.m_conceptWeight - x;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "50";
 | |
| 	m->m_min   = 0;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	// now we store this in title recs, so we can change it on the fly
 | |
| 	m->m_title = "site num inlinks boost base";
 | |
| 	m->m_desc  = "Boost the score of all terms in the document using "
 | |
| 		"this number. "
 | |
| 		"The boost itself is expressed as a percentage. "
 | |
| 		"The boost is B^X, where X is the number of good "
 | |
| 		"inlinks to the document's site "
 | |
| 		"and B is this is this boost base. "
 | |
| 		"The score of each term in the "
 | |
| 		"document is multiplied by the boost. That product "
 | |
| 		"becomes the new score of that term. "
 | |
| 		"For purposes of this calculation we limit X to 1000.";
 | |
| 	m->m_cgi   = "qbe"; 
 | |
| 	m->m_off   = (char *)&cr.m_siteNumInlinksBoostBase - x;
 | |
| 	m->m_type  = TYPE_FLOAT;
 | |
| 	m->m_def   = "1.005";
 | |
| 	m->m_min   = 0;
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	// use menu elimination technology?
 | |
| 	m->m_title = "only index article content";
 | |
| 	m->m_desc  = "If this is true gigablast will only index the "
 | |
| 		"article content on pages identifed as permalinks. It will "
 | |
| 		"NOT index any page content on non-permalink pages, and it "
 | |
| 		"will avoid indexing menu content on any page. It will not "
 | |
| 		"index meta tags on any page. It will only index incoming "
 | |
| 		"link text for permalink pages. Useful when "
 | |
| 		"indexing blog or news sites.";
 | |
| 	m->m_cgi   = "met";
 | |
| 	m->m_off   = (char *)&cr.m_eliminateMenus - x;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	// replace by lang== lang!= in url filters
 | |
| 	//m->m_title = "collection language";
 | |
| 	//m->m_desc  = "Only spider pages determined to be in "
 | |
| 	//	"this language (see Language.h)";
 | |
| 	//m->m_cgi   = "clang";
 | |
| 	//m->m_off   = (char *)&cr.m_language - x;
 | |
| 	//m->m_type  = TYPE_LONG;
 | |
| 	//m->m_def   = "0";
 | |
| 	//m++;
 | |
| 
 | |
| 	////////////////
 | |
| 	// END PAGE SPIDER CONTROLS
 | |
| 	////////////////
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//  PAGE REPAIR CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	m->m_title = "rebuild mode enabled";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild the rdbs as "
 | |
| 		"specified by the parameters below. When a particular "
 | |
| 		"collection is in rebuild mode, it can not spider or merge "
 | |
| 		"titledb files.";
 | |
| 	m->m_cgi   = "rme";
 | |
| 	m->m_off   = (char *)&g_conf.m_repairingEnabled - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_sync  = false;  // do not sync this parm
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "collection to rebuild";
 | |
| 	m->m_xml   = "collectionToRebuild";
 | |
| 	m->m_desc  = "Name of collection to rebuild.";
 | |
| 	// m->m_desc  = "Comma or space separated list of the collections "
 | |
| 	// 	"to rebuild.";
 | |
| 	m->m_cgi   = "rctr"; // repair collections to repair
 | |
| 	m->m_off   = (char *)&g_conf.m_collsToRepair - g;
 | |
| 	m->m_type  = TYPE_SAFEBUF;//STRING;
 | |
| 	//m->m_size  = 1024;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_group = 0;
 | |
| 	m->m_flags = PF_REQUIRED;// | PF_COLLDEFAULT;//| PF_NOHTML;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild ALL collections";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild all collections.";
 | |
| 	m->m_cgi   = "rac";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildAllCollections - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "memory to use for rebuild";
 | |
| 	m->m_desc  = "In bytes.";
 | |
| 	m->m_cgi   = "rmtu"; // repair mem to use
 | |
| 	m->m_off   = (char *)&g_conf.m_repairMem - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "200000000";
 | |
| 	m->m_units = "bytes";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "max rebuild injections";
 | |
| 	m->m_desc  = "Maximum number of outstanding injections for "
 | |
| 		"rebuild.";
 | |
| 	m->m_cgi   = "mrps";
 | |
| 	m->m_off   = (char *)&g_conf.m_maxRepairSpiders - g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "2";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "full rebuild";
 | |
| 	m->m_desc  = "If enabled, gigablast will reinject the content of "
 | |
| 		"all title recs into a secondary rdb system. That will "
 | |
| 		"the primary rdb system when complete.";
 | |
| 	m->m_cgi   = "rfr"; // repair full rebuild
 | |
| 	m->m_off   = (char *)&g_conf.m_fullRebuild - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "add spiderdb recs of non indexed urls";
 | |
| 	m->m_desc  = "If enabled, gigablast will add the spiderdb "
 | |
| 		"records of unindexed urls "
 | |
| 		"when doing the full rebuild or the spiderdb "
 | |
| 		"rebuild. Otherwise, only the indexed urls will get "
 | |
| 		"spiderdb records in spiderdb. This can be faster because "
 | |
| 		"Gigablast does not have to do an IP lookup on every url "
 | |
| 		"if its IP address is not in tagdb already.";
 | |
| 	m->m_cgi   = "rfrknsx";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildAddOutlinks - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "recycle link text";
 | |
| 	m->m_desc  = "If enabled, gigablast will recycle the link text "
 | |
| 		"when rebuilding titledb. "
 | |
| 		"The siterank, which is determined by the "
 | |
| 		"number of inlinks to a site, is stored/cached in tagdb "
 | |
| 		"so that is a separate item. If you want to pick up new "
 | |
| 		"link text you will want to set this to <i>NO</i> and "
 | |
| 		"make sure to rebuild titledb, since that stores the "
 | |
| 		"link text.";
 | |
| 	m->m_cgi   = "rrli"; // repair full rebuild
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildRecycleLinkInfo - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "recycle imported link info";
 | |
| 	m->m_desc  = "If enabled, gigablast will recycle the imported "
 | |
| 		"link info when rebuilding titledb.";
 | |
| 	m->m_cgi   = "rrlit"; // repair full rebuild
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildRecycleLinkInfo2 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "remove bad pages";
 | |
| 	m->m_desc  = "If enabled, gigablast just scans the titledb recs "
 | |
| 		"in the given collection and removes those that are "
 | |
| 		"banned or filtered according to the url filters table. It "
 | |
| 		"will also lookup in tagdb.";
 | |
| 	m->m_cgi   = "rbadp";
 | |
| 	m->m_off   = (char *)&g_conf.m_removeBadPages - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "rebuild titledb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrt"; // repair rebuild titledb
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildTitledb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "rebuild tfndb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rru"; // repair rebuild tfndb
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildTfndb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild indexdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rri";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildIndexdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "rebuild posdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rri";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildPosdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "rebuild no splits";
 | |
| 	m->m_desc  = "If enabled, gigablast will just re-add the no split "
 | |
| 		"lists from all the current title recs back into indexdb.";
 | |
| 	m->m_cgi   = "rns";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildNoSplits - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild datedb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrd";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildDatedb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild checksumdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrch";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildChecksumdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "rebuild clusterdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrcl";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildClusterdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild spiderdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrsp";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildSpiderdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "rebuild tagdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrsi";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildSitedb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "rebuild linkdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrld";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildLinkdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "rebuild tagdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrtgld";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildTagdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild placedb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrpld";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildPlacedb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild timedb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrtmd";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildTimedb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild sectiondb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrsnd";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildSectiondb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild revdb";
 | |
| 	m->m_desc  = "If enabled, gigablast will rebuild this rdb";
 | |
| 	m->m_cgi   = "rrrvd";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildRevdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "rebuild root urls";
 | |
| 	m->m_desc  = "If disabled, gigablast will skip root urls.";
 | |
| 	m->m_cgi   = "ruru";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildRoots - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "1";
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "rebuild non-root urls";
 | |
| 	m->m_desc  = "If disabled, gigablast will skip non-root urls.";
 | |
| 	m->m_cgi   = "runru";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildNonRoots - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "skip tagdb lookup";
 | |
| 	m->m_desc  = "When rebuilding spiderdb and scanning it for new "
 | |
| 		"spiderdb records, should a tagdb lookup be performed? "
 | |
| 		"Runs much much "
 | |
| 		"faster without it. Will also keep the original doc quality "
 | |
| 		"and "
 | |
| 		"spider priority in tact.";
 | |
| 	m->m_cgi   = "rssl";
 | |
| 	m->m_off   = (char *)&g_conf.m_rebuildSkipSitedbLookup - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_page  = PAGE_REPAIR;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//          END PAGE REPAIR              //
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	//  AUTOBAN CONTROLS
 | |
| 	//  
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	m->m_title = "ban IPs";
 | |
| 	m->m_desc  = "add Ips here to bar them from accessing this "
 | |
| 		"gigablast server.";
 | |
| 	m->m_cgi   = "banIps";
 | |
| 	m->m_xml   = "banIps";
 | |
| 	m->m_off   = (char *)g_conf.m_banIps - g;
 | |
| 	m->m_type  = TYPE_STRINGBOX;
 | |
| 	m->m_page  = PAGE_AUTOBAN;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_size  = AUTOBAN_TEXT_SIZE;
 | |
| 	m->m_group = 1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_plen  = (char *)&g_conf.m_banIpsLen - g; // length of string
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "allow IPs";
 | |
| 	m->m_desc  = "add Ips here to give them an infinite query quota.";
 | |
| 	m->m_cgi   = "allowIps";
 | |
| 	m->m_xml   = "allowIps";
 | |
| 	m->m_off   = (char *)g_conf.m_allowIps - g;
 | |
| 	m->m_type  = TYPE_STRINGBOX;
 | |
| 	m->m_page  = PAGE_AUTOBAN;
 | |
| 	m->m_size  = AUTOBAN_TEXT_SIZE;
 | |
| 	m->m_group = 1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_plen  = (char *)&g_conf.m_allowIpsLen - g; // length of string
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "valid search codes";
 | |
| 	m->m_desc  = "Don't try to autoban queries that have one "
 | |
| 		"of these codes. Also, the code must be valid for us "
 | |
| 		"to use &uip=IPADDRESS as the IP address of the submitter "
 | |
| 		"for purposes of autoban AND purposes of addurl daily quotas.";
 | |
| 	m->m_cgi   = "validCodes";
 | |
| 	m->m_xml   = "validCodes";
 | |
| 	m->m_off   = (char *)g_conf.m_validCodes - g;
 | |
| 	m->m_type  = TYPE_STRINGBOX;
 | |
| 	m->m_page  = PAGE_AUTOBAN;
 | |
| 	m->m_size  = AUTOBAN_TEXT_SIZE;
 | |
| 	m->m_group = 1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_plen  = (char *)&g_conf.m_validCodesLen - g; // length of string
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "Extra Parms";
 | |
| 	m->m_desc  = "Append extra default parms to queries that match "
 | |
| 		"certain substrings.  Format: text to match in url, "
 | |
| 		"followed by a space, then the list of extra parms as "
 | |
| 		"they would appear appended to the url.  "
 | |
| 		"One match per line.";
 | |
| 	m->m_cgi   = "extraParms";
 | |
| 	m->m_xml   = "extraParms";
 | |
| 	m->m_off   = (char *)g_conf.m_extraParms - g;
 | |
| 	m->m_type  = TYPE_STRINGBOX;
 | |
| 	m->m_page  = PAGE_AUTOBAN;
 | |
| 	m->m_size  = AUTOBAN_TEXT_SIZE;
 | |
| 	m->m_group = 1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_plen  = (char *)&g_conf.m_extraParmsLen - g; // length of string
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "ban substrings";
 | |
| 	m->m_desc  = "ban any query that matches this list of "
 | |
| 		"substrings.  Must match all comma-separated strings "
 | |
| 		"on the same line.  ('\\n' = OR, ',' = AND)";
 | |
| 	m->m_cgi   = "banRegex";
 | |
| 	m->m_xml   = "banRegex";
 | |
| 	m->m_off   = (char *)g_conf.m_banRegex - g;
 | |
| 	m->m_type  = TYPE_STRINGBOX;
 | |
| 	m->m_page  = PAGE_AUTOBAN;
 | |
| 	m->m_size  = AUTOBAN_TEXT_SIZE;
 | |
| 	m->m_group = 1;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_plen  = (char *)&g_conf.m_banRegexLen - g; // length of string
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/////////////
 | |
| 	// END AUTOBAN CONTROLS
 | |
| 	/////////////
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// ROOT PASSWORDS page
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 
 | |
| 	m->m_title = "Master Passwords";
 | |
| 	m->m_desc  = "Whitespace separated list of passwords. "
 | |
| 		"Any matching password will have administrative access "
 | |
| 		"to Gigablast and all collections.";
 | |
| 		//"If no Admin Password or Admin IP is specified then "
 | |
| 		//"Gigablast will only allow local IPs to connect to it "
 | |
| 		//"as the master admin.";
 | |
| 	m->m_cgi   = "masterpwds";
 | |
| 	m->m_xml   = "masterPasswords";
 | |
| 	m->m_def   = "";
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_off   = (char *)&g_conf.m_masterPwds - g;
 | |
| 	m->m_type  = TYPE_SAFEBUF; // STRINGNONEMPTY;
 | |
| 	m->m_page  = PAGE_MASTERPASSWORDS;
 | |
| 	//m->m_max   = MAX_MASTER_PASSWORDS;
 | |
| 	//m->m_size  = PASSWORD_MAX_LEN+1;
 | |
| 	//m->m_addin = 1; // "insert" follows?
 | |
| 	m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	m->m_title = "Master IPs";
 | |
| 	//m->m_desc = "Allow UDP requests from this list of IPs. Any datagram "
 | |
| 	//	"received not coming from one of these IPs, or an IP in "
 | |
| 	//	"hosts.conf, is dropped. If another cluster is accessing this "
 | |
| 	//	"cluster for getting link text or whatever, you will need to "
 | |
| 	//	"list the IPs of the accessing machines here. These IPs are "
 | |
| 	//	"also used to allow access to the HTTP server even if it "
 | |
| 	//	"was disabled in the Master Controls. IPs that have 0 has "
 | |
| 	//	"their Least Significant Byte are treated as wildcards for "
 | |
| 	//	"IP blocks. That is, 1.2.3.0 means 1.2.3.*.";
 | |
| 	m->m_desc  = "Whitespace separated list of Ips. "
 | |
| 		"Any IPs in this list will have administrative access "
 | |
| 		"to Gigablast and all collections.";
 | |
| 	m->m_cgi   = "masterips";
 | |
| 	m->m_xml   = "masterIps";
 | |
| 	m->m_page  = PAGE_MASTERPASSWORDS;
 | |
| 	m->m_off   = (char *)&g_conf.m_connectIps - g;
 | |
| 	m->m_type  = TYPE_SAFEBUF;//IP;
 | |
| 	m->m_def   = "";
 | |
| 	//m->m_max   = MAX_CONNECT_IPS;
 | |
| 	//m->m_priv  = 2;
 | |
| 	//m->m_addin = 1; // "insert" follows?
 | |
| 	//m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
 | |
| 	m++;
 | |
| 
 | |
| 	// m->m_title = "remove connect ip";
 | |
| 	// m->m_desc  = "remove a connect ip";
 | |
| 	// m->m_cgi   = "removeip";
 | |
| 	// m->m_type  = TYPE_CMD;
 | |
| 	// m->m_page  = PAGE_NONE;
 | |
| 	// m->m_func  = CommandRemoveConnectIpRow;
 | |
| 	// m->m_cast  = 1;
 | |
| 	// m->m_obj   = OBJ_CONF;
 | |
| 	// m++;
 | |
| 
 | |
| 	// m->m_title = "remove a password";
 | |
| 	// m->m_desc  = "remove a password";
 | |
| 	// m->m_cgi   = "removepwd";
 | |
| 	// m->m_type  = TYPE_CMD;
 | |
| 	// m->m_page  = PAGE_NONE;
 | |
| 	// m->m_func  = CommandRemovePasswordRow;
 | |
| 	// m->m_cast  = 1;
 | |
| 	// m->m_obj   = OBJ_CONF;
 | |
| 	// m++;
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "Super Turks";
 | |
| 	m->m_desc = "Add facebook user IDs here so those people can "
 | |
| 		"turk the results. Later we may limit each person to "
 | |
| 		"turking a geographic region.";
 | |
| 	m->m_cgi = "supterturks";
 | |
| 	m->m_xml = "supterturks";
 | |
| 	m->m_def = "";
 | |
| 	m->m_off = (char *)&g_conf.m_superTurks - g;
 | |
| 	m->m_type = TYPE_STRINGBOX;
 | |
| 	m->m_perms = PAGE_MASTER;
 | |
| 	m->m_size = USERS_TEXT_SIZE;
 | |
| 	m->m_plen = (char *)&g_conf.m_superTurksLen - g;
 | |
| 	m->m_page = PAGE_MASTERPASSWORDS;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "Users";
 | |
| 	m->m_desc = "Add users here. The format is "
 | |
| 		"collection:ip:username:password:relogin:pages:tagnames"
 | |
| 		" Username and password cannot be blank."
 | |
| 		" You can specify "
 | |
| 		"* for collection to indicate all collections. "
 | |
| 		" * can be used in IP as wildcard. "
 | |
| 		" * in pages means user has access to all pages. Also"
 | |
| 		" you can specify individual pages. A \'-\' sign at the"
 | |
| 		" start of page means user is not allowed to access that"
 | |
| 		" page. Please refer the page reference table at the bottom "
 | |
| 		"of this page for available pages. If you want to just login "
 | |
| 		" once and avoid relogin for gb shutdowns then set relogin=1,"
 | |
| 		" else set it to 0. If relogin is 1 your login will never expire either."
 | |
| 		"<br>"
 | |
| 		" Ex: 1. master user -> *:*:master:master:1:*:english<br>"
 | |
| 		" 2. public user -> *:*:public:1234:0:index.html"
 | |
| 		",get,search,login,dir:english<br>"
 | |
| 		"3. turk user ->  66.28.58.122:main:turk:1234:0:pageturkhome,"
 | |
| 		"pageturk,pageturkget,get,login:english";
 | |
| 	m->m_cgi = "users";
 | |
| 	m->m_xml = "users";
 | |
| 	m->m_off = (char *)&g_conf.m_users - g;
 | |
| 	m->m_type = TYPE_STRINGBOX;
 | |
| 	m->m_perms = PAGE_MASTER;
 | |
| 	m->m_size = USERS_TEXT_SIZE;
 | |
| 	m->m_plen = (char *)&g_conf.m_usersLen - g;
 | |
| 	m->m_page = PAGE_MASTERPASSWORDS;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "Master IPs";
 | |
| 	m->m_desc  = "If someone connects from one of these IPs "
 | |
| 		"then they will have full "
 | |
| 		"master administrator privileges. "
 | |
| 		"If no IPs are specified, then master administrators can "
 | |
| 		"get access for any IP. "
 | |
| 		"Connecting from 127.0.0.1 always grants master privledges. "
 | |
| 		"If no Master Password or Master IP is specified then "
 | |
| 		"Gigablast will assign a default password of footbar23.";
 | |
| 	m->m_cgi   = "masterip";
 | |
| 	m->m_xml   = "masterIp";
 | |
| 	m->m_max   = MAX_MASTER_IPS;
 | |
| 	m->m_off   = (char *)g_conf.m_masterIps - g;
 | |
| 	m->m_type  = TYPE_IP;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	m->m_title = "Collection Passwords";
 | |
| 	m->m_desc  = "Whitespace separated list of passwords. "
 | |
| 		"Any matching password will have administrative access "
 | |
| 		"to the controls for just this collection. The master "
 | |
| 		"password and IPs are controlled through the "
 | |
| 		"<i>master passwords</i> link under the ADVANCED controls "
 | |
| 		"tab. The master passwords or IPs have administrative "
 | |
| 		"access to all collections.";
 | |
| 	m->m_cgi   = "collpwd";
 | |
| 	m->m_xml   = "collectionPasswords";
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_off   = (char *)&cr.m_collectionPasswords - x;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_type  = TYPE_SAFEBUF; // STRINGNONEMPTY;
 | |
| 	m->m_page  = PAGE_COLLPASSWORDS;
 | |
| 	m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "Collection IPs";
 | |
| 	m->m_desc  = "Whitespace separated list of IPs. "
 | |
| 		"Any matching IP will have administrative access "
 | |
| 		"to the controls for just this collection.";
 | |
| 	m->m_cgi   = "collips";
 | |
| 	m->m_xml   = "collectionIps";
 | |
| 	m->m_obj   = OBJ_COLL;
 | |
| 	m->m_off   = (char *)&cr.m_collectionIps - x;
 | |
| 	m->m_def   = "";
 | |
| 	m->m_type  = TYPE_SAFEBUF; // STRINGNONEMPTY;
 | |
| 	m->m_page  = PAGE_COLLPASSWORDS;
 | |
| 	m->m_flags = PF_PRIVATE | PF_TEXTAREA | PF_SMALLTEXTAREA;
 | |
| 	m++;
 | |
| 
 | |
| 
 | |
| 	//////
 | |
| 	// END SECURITY CONTROLS
 | |
| 	//////
 | |
| 
 | |
| 
 | |
| 	///////////////////////////////////////////
 | |
| 	// LOG CONTROLS
 | |
| 	///////////////////////////////////////////
 | |
| 
 | |
| 	m->m_title = "log http requests";
 | |
| 	m->m_desc  = "Log GET and POST requests received from the "
 | |
| 		"http server?";
 | |
| 	m->m_cgi   = "hr";
 | |
| 	m->m_off   = (char *)&g_conf.m_logHttpRequests - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log autobanned queries";
 | |
| 	m->m_desc  = "Should we log queries that are autobanned? "
 | |
| 		"They can really fill up the log.";
 | |
| 	m->m_cgi   = "laq";
 | |
| 	m->m_off   = (char *)&g_conf.m_logAutobannedQueries - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log query time threshold";
 | |
| 	m->m_desc  = "If query took this many millliseconds or longer, then log the "
 | |
| 		"query and the time it took to process.";
 | |
| 	m->m_cgi   = "lqtt";
 | |
| 	m->m_off   = (char *)&g_conf.m_logQueryTimeThreshold- g;
 | |
| 	m->m_type  = TYPE_LONG;
 | |
| 	m->m_def   = "5000";
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log query reply";
 | |
| 	m->m_desc  = "Log query reply in proxy, but only for those queries "
 | |
| 		"above the time threshold above.";
 | |
| 	m->m_cgi   = "lqr";
 | |
| 	m->m_off   = (char *)&g_conf.m_logQueryReply - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_group = 0;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log spidered urls";
 | |
| 	m->m_desc  = "Log status of spidered or injected urls?";
 | |
| 	m->m_cgi   = "lsu";
 | |
| 	m->m_off   = (char *)&g_conf.m_logSpideredUrls - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log network congestion";
 | |
| 	m->m_desc  = "Log messages if Gigablast runs out of udp sockets?";
 | |
| 	m->m_cgi   = "lnc";
 | |
| 	m->m_off   = (char *)&g_conf.m_logNetCongestion - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log informational messages";
 | |
| 	m->m_desc  = "Log messages not related to an error condition, "
 | |
| 		"but meant more to give an idea of the state of "
 | |
| 		"the gigablast process. These can be useful when "
 | |
| 		"diagnosing problems.";
 | |
| 	m->m_cgi   = "li";
 | |
| 	m->m_off   = (char *)&g_conf.m_logInfo - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "1";
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log limit breeches";
 | |
| 	m->m_desc  = "Log it when document not added due to quota "
 | |
| 		"breech. Log it when url is too long and it gets "
 | |
| 		"truncated.";
 | |
| 	m->m_cgi   = "ll";
 | |
| 	m->m_off   = (char *)&g_conf.m_logLimits - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug admin messages";
 | |
| 	m->m_desc  = "Log various debug messages.";
 | |
| 	m->m_cgi   = "lda";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugAdmin - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug build messages";
 | |
| 	m->m_cgi   = "ldb";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugBuild - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug build time messages";
 | |
| 	m->m_cgi   = "ldbt";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugBuildTime - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug database messages";
 | |
| 	m->m_cgi   = "ldd";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugDb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug dirty messages";
 | |
| 	m->m_cgi   = "lddm";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugDirty - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug disk messages";
 | |
| 	m->m_cgi   = "lddi";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugDisk - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug disk page cache";
 | |
| 	m->m_cgi   = "ldpc";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugDiskPageCache - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug dns messages";
 | |
| 	m->m_cgi   = "lddns";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugDns - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug http messages";
 | |
| 	m->m_cgi   = "ldh";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugHttp - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug image messages";
 | |
| 	m->m_cgi   = "ldi";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugImage - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug loop messages";
 | |
| 	m->m_cgi   = "ldl";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugLoop - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug language detection messages";
 | |
| 	m->m_cgi   = "ldg";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugLang - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug link info";
 | |
| 	m->m_cgi   = "ldli";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugLinkInfo - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug mem messages";
 | |
| 	m->m_cgi   = "ldm";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugMem - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug mem usage messages";
 | |
| 	m->m_cgi   = "ldmu";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugMemUsage - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug net messages";
 | |
| 	m->m_cgi   = "ldn";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugNet - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug post query rerank messages";
 | |
| 	m->m_cgi   = "ldpqr";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugPQR - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_flags = PF_HIDDEN | PF_NOSAVE;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug query messages";
 | |
| 	m->m_cgi   = "ldq";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugQuery - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug quota messages";
 | |
| 	m->m_cgi   = "ldqta";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugQuota - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug robots messages";
 | |
| 	m->m_cgi   = "ldr";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugRobots - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug spider cache messages";
 | |
| 	m->m_cgi   = "lds";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugSpcache - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/*
 | |
| 	m->m_title = "log debug spider wait messages";
 | |
| 	m->m_cgi   = "ldspw";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugSpiderWait - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m++;
 | |
| 	*/
 | |
| 
 | |
| 	m->m_title = "log debug speller messages";
 | |
| 	m->m_cgi   = "ldsp";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugSpeller - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug sections messages";
 | |
| 	m->m_cgi   = "ldscc";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugSections - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug seo insert messages";
 | |
| 	m->m_cgi   = "ldsi";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugSEOInserts - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug seo messages";
 | |
| 	m->m_cgi   = "ldseo";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugSEO - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug stats messages";
 | |
| 	m->m_cgi   = "ldst";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugStats - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug summary messages";
 | |
| 	m->m_cgi   = "ldsu";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugSummary - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug spider messages";
 | |
| 	m->m_cgi   = "ldspid";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugSpider - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug msg13 messages";
 | |
| 	m->m_cgi   = "ldspmth";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugMsg13 - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "disable host0 for msg13 reception hack";
 | |
| 	m->m_cgi   = "dmth";
 | |
| 	m->m_off   = (char *)&g_conf.m_diffbotMsg13Hack - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug spider proxies";
 | |
| 	m->m_cgi   = "ldspr";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugProxies - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug url attempts";
 | |
| 	m->m_cgi   = "ldspua";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugUrlAttempts - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug spider downloads";
 | |
| 	m->m_cgi   = "ldsd";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugDownloads - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug facebook";
 | |
| 	m->m_cgi   = "ldfb";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugFacebook - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug tagdb messages";
 | |
| 	m->m_cgi   = "ldtm";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugTagdb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug tcp messages";
 | |
| 	m->m_cgi   = "ldt";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugTcp - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug tcp buffer messages";
 | |
| 	m->m_cgi   = "ldtb";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugTcpBuf - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug thread messages";
 | |
| 	m->m_cgi   = "ldth";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugThread - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug title messages";
 | |
| 	m->m_cgi   = "ldti";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugTitle - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug timedb messages";
 | |
| 	m->m_cgi   = "ldtim";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugTimedb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug topic messages";
 | |
| 	m->m_cgi   = "ldto";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugTopics - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug topDoc messages";
 | |
| 	m->m_cgi   = "ldtopd";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugTopDocs - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug udp messages";
 | |
| 	m->m_cgi   = "ldu";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugUdp - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug unicode messages";
 | |
| 	m->m_cgi   = "ldun";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugUnicode - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug repair messages";
 | |
| 	m->m_cgi   = "ldre";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugRepair - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log debug pub date extraction messages";
 | |
| 	m->m_cgi   = "ldpd";
 | |
| 	m->m_off   = (char *)&g_conf.m_logDebugDate - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log timing messages for build";
 | |
| 	m->m_desc  = "Log various timing related messages.";
 | |
| 	m->m_cgi   = "ltb";
 | |
| 	m->m_off   = (char *)&g_conf.m_logTimingBuild - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log timing messages for admin";
 | |
| 	m->m_desc  = "Log various timing related messages.";
 | |
| 	m->m_cgi   = "ltadm";
 | |
| 	m->m_off   = (char *)&g_conf.m_logTimingAdmin - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log timing messages for database";
 | |
| 	m->m_cgi   = "ltd";
 | |
| 	m->m_off   = (char *)&g_conf.m_logTimingDb - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log timing messages for network layer";
 | |
| 	m->m_cgi   = "ltn";
 | |
| 	m->m_off   = (char *)&g_conf.m_logTimingNet - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log timing messages for query";
 | |
| 	m->m_cgi   = "ltq";
 | |
| 	m->m_off   = (char *)&g_conf.m_logTimingQuery - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log timing messages for spcache";
 | |
| 	m->m_desc  = "Log various timing related messages.";
 | |
| 	m->m_cgi   = "ltspc";
 | |
| 	m->m_off   = (char *)&g_conf.m_logTimingSpcache - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log timing messages for related topics";
 | |
| 	m->m_cgi   = "ltt";
 | |
| 	m->m_off   = (char *)&g_conf.m_logTimingTopics - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	m->m_title = "log reminder messages";
 | |
| 	m->m_desc  = "Log reminders to the programmer. You do not need this.";
 | |
| 	m->m_cgi   = "lr";
 | |
| 	m->m_off   = (char *)&g_conf.m_logReminders - g;
 | |
| 	m->m_type  = TYPE_BOOL;
 | |
| 	m->m_def   = "0";
 | |
| 	m->m_priv  = 1;
 | |
| 	m->m_page  = PAGE_LOG;
 | |
| 	m->m_obj   = OBJ_CONF;
 | |
| 	m++;
 | |
| 
 | |
| 	/////
 | |
| 	// END PAGE LOG CONTROLS
 | |
| 	/////
 | |
| 
 | |
| 
 | |
| 	// END PARMS PARM END PARMS END
 | |
| 
 | |
| 
 | |
| 	m_numParms = m - m_parms;
 | |
| 
 | |
| 	// sanity check
 | |
| 	if ( m_numParms >= MAX_PARMS ) {
 | |
| 		log("admin: Boost MAX_PARMS.");
 | |
| 		exit(-1);
 | |
| 	}
 | |
| 	
 | |
| 	// make xml tag names and store in here
 | |
| 	static char s_tbuf [ 18000 ];
 | |
| 	char *p    = s_tbuf;
 | |
| 	char *pend = s_tbuf + 18000;
 | |
| 	int32_t  size;
 | |
| 	char  t;
 | |
| 
 | |
| 	// . set hashes of title
 | |
| 	// . used by Statsdb.cpp for identifying a parm
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		if ( ! m_parms[i].m_title ) continue;
 | |
| 		m_parms[i].m_hash = hash32n ( m_parms[i].m_title );
 | |
| 	}
 | |
| 
 | |
| 	// cgi hashes
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		if ( ! m_parms[i].m_cgi ) continue;
 | |
| 		m_parms[i].m_cgiHash = hash32n ( m_parms[i].m_cgi );
 | |
| 	}
 | |
| 
 | |
| 	// sanity check: ensure all cgi parms are different
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 	for ( int32_t j = 0 ; j < m_numParms ; j++ ) {
 | |
| 		if ( j == i             ) continue;
 | |
| 		if ( m_parms[i].m_type == TYPE_BOOL2 ) continue;
 | |
| 		if ( m_parms[j].m_type == TYPE_BOOL2 ) continue;
 | |
| 		if ( m_parms[i].m_type == TYPE_CMD   ) continue;
 | |
| 		if ( m_parms[j].m_type == TYPE_CMD   ) continue;
 | |
| 		if ( m_parms[i].m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		if ( m_parms[j].m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		if ( m_parms[i].m_obj == OBJ_NONE ) continue;
 | |
| 		if ( m_parms[j].m_obj == OBJ_NONE ) continue;
 | |
| 		if ( m_parms[i].m_flags & PF_DUP ) continue;
 | |
| 		if ( m_parms[j].m_flags & PF_DUP ) continue;
 | |
| 		// hack to allow "c" for search, inject, addurls
 | |
| 		if ( m_parms[j].m_page != m_parms[i].m_page &&
 | |
| 		     m_parms[i].m_obj != OBJ_COLL &&
 | |
| 		     m_parms[i].m_obj != OBJ_CONF ) 
 | |
| 			continue;
 | |
| 		if ( ! m_parms[i].m_cgi ) continue;
 | |
| 		if ( ! m_parms[j].m_cgi ) continue;
 | |
| 		// gotta be on same page now i guess
 | |
| 		int32_t obj1 = m_parms[i].m_obj;
 | |
| 		int32_t obj2 = m_parms[j].m_obj;
 | |
| 		if ( obj1 != OBJ_COLL && obj1 != OBJ_CONF ) continue;
 | |
| 		if ( obj2 != OBJ_COLL && obj2 != OBJ_CONF ) continue;
 | |
| 		//if ( m_parms[i].m_page != m_parms[j].m_page ) continue;
 | |
| 		// a different m_scmd means a different cgi parm really...
 | |
| 		//if ( m_parms[i].m_sparm && m_parms[j].m_sparm &&
 | |
| 		//     strcmp ( m_parms[i].m_scmd, m_parms[j].m_scmd) != 0 )
 | |
| 		//	continue;
 | |
| 		if ( strcmp ( m_parms[i].m_cgi , m_parms[j].m_cgi ) != 0 &&
 | |
| 		     // ensure cgi hashes are different as well!
 | |
| 		     m_parms[i].m_cgiHash != m_parms[j].m_cgiHash )
 | |
| 			continue;
 | |
| 		// upload file buttons are always dup of another parm
 | |
| 		if ( m_parms[j].m_type == TYPE_FILEUPLOADBUTTON )
 | |
| 			continue;
 | |
| 		log(LOG_LOGIC,"conf: Cgi parm for #%" INT32 " \"%s\" "
 | |
| 		    "matches #%" INT32 " \"%s\". Exiting.",
 | |
| 		    i,m_parms[i].m_cgi,j,m_parms[j].m_cgi);
 | |
| 		exit(-1);
 | |
| 	}
 | |
| 	}
 | |
| 
 | |
| 	int32_t mm = (int32_t)sizeof(CollectionRec);
 | |
| 	if ( (int32_t)sizeof(Conf)        > mm ) mm = (int32_t)sizeof(Conf);
 | |
| 	if ( (int32_t)sizeof(SearchInput) > mm ) mm = (int32_t)sizeof(SearchInput);
 | |
| 	// . set size of each parm based on its type
 | |
| 	// . also do page and obj inheritance
 | |
| 	// . also do sanity checking
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		// sanity check
 | |
| 		if ( m_parms[i].m_off   > mm ||
 | |
| 		     //m_parms[i].m_soff  > mm ||
 | |
| 		     m_parms[i].m_smaxc > mm   ) {
 | |
| 			log(LOG_LOGIC,"conf: Bad offset in parm #%" INT32 " %s."
 | |
| 			    " (%" INT32 ",%" INT32 ",%" INT32 "). Did you FORGET to include "
 | |
| 			    "an & before the cr.myVariable when setting "
 | |
| 			    "m_off for this parm? Or subtract  'x' instead "
 | |
| 			    "of 'g' or vice versa.",
 | |
| 			    i,m_parms[i].m_title,
 | |
| 			    mm,
 | |
| 			    m_parms[i].m_off,
 | |
| 			    //m_parms[i].m_soff,
 | |
| 			    m_parms[i].m_smaxc);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 		// do not allow numbers in cgi parms, they are used for
 | |
| 		// denoting array indices
 | |
| 		int32_t j = 0;
 | |
| 		for ( ; m_parms[i].m_cgi && m_parms[i].m_cgi[j] ; j++ ) {
 | |
| 			if ( is_digit ( m_parms[i].m_cgi[j] ) ) {
 | |
| 				log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has "
 | |
| 				    "number in cgi name.", 
 | |
| 				    i,m_parms[i].m_title);
 | |
| 				exit(-1);
 | |
| 			}
 | |
| 		}
 | |
| 		// these inheriting cause too many problems when moving
 | |
| 		// parms around in the array
 | |
| 		// inherit page
 | |
| 		//if ( i > 0 && m_parms[i].m_page == -1 ) 
 | |
| 		//	m_parms[i].m_page = m_parms[i-1].m_page;
 | |
| 		// inherit obj
 | |
| 		//if ( i > 0 && m_parms[i].m_obj == -1 ) 
 | |
| 		//	m_parms[i].m_obj = m_parms[i-1].m_obj;
 | |
| 		// sanity now
 | |
| 		if ( m_parms[i].m_page == -1 ) { 
 | |
| 			log("parms: bad page \"%s\"",m_parms[i].m_title);
 | |
| 			char *xx=NULL;*xx=0; }
 | |
| 		if ( m_parms[i].m_obj == -1 ) { 
 | |
| 			log("parms: bad obj \"%s\"",m_parms[i].m_title);
 | |
| 			char *xx=NULL;*xx=0; }
 | |
| 
 | |
| 		// if its a fixed size then make sure m_size is not set
 | |
| 		if ( m_parms[i].m_fixed > 0 ) {
 | |
| 			if ( m_parms[i].m_size != 0 ) {
 | |
| 				log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" is "
 | |
| 				    "fixed but size is not 0.", 
 | |
| 				    i,m_parms[i].m_title);
 | |
| 				exit(-1);
 | |
| 			}
 | |
| 		}
 | |
| 		// string sizes should already be set!
 | |
| 		size = 0;
 | |
| 		t = m_parms[i].m_type;
 | |
| 		if ( t == -1 ) {
 | |
| 			log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no type.",
 | |
| 			    i,m_parms[i].m_title);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 		// skip if already set
 | |
| 		if ( m_parms[i].m_size ) goto skipSize;
 | |
| 		if ( t == TYPE_CHAR           ) size = 1;
 | |
| 		if ( t == TYPE_CHAR2          ) size = 1;
 | |
| 		if ( t == TYPE_BOOL           ) size = 1;
 | |
| 		if ( t == TYPE_BOOL2          ) size = 1;
 | |
| 		if ( t == TYPE_CHECKBOX       ) size = 1;
 | |
| 		if ( t == TYPE_PRIORITY       ) size = 1;
 | |
| 		if ( t == TYPE_PRIORITY2      ) size = 1;
 | |
| 		//if ( t ==TYPE_DIFFBOT_DROPDOWN) size = 1;
 | |
| 		if ( t == TYPE_UFP            ) size = 1;
 | |
| 		if ( t == TYPE_PRIORITY_BOXES ) size = 1;
 | |
| 		if ( t == TYPE_RETRIES        ) size = 1;
 | |
| 		if ( t == TYPE_TIME           ) size = 6;
 | |
| 		if ( t == TYPE_DATE2          ) size = 4;
 | |
| 		if ( t == TYPE_DATE           ) size = 4;
 | |
| 		if ( t == TYPE_FLOAT          ) size = 4;
 | |
| 		if ( t == TYPE_DOUBLE         ) size = 8;
 | |
| 		if ( t == TYPE_IP             ) size = 4;
 | |
| 		if ( t == TYPE_RULESET        ) size = 4;
 | |
| 		if ( t == TYPE_LONG           ) size = 4;
 | |
| 		if ( t == TYPE_LONG_CONST     ) size = 4;
 | |
| 		if ( t == TYPE_LONG_LONG      ) size = 8;
 | |
| 		if ( t == TYPE_STRING         ) size = m_parms[i].m_size;
 | |
| 		if ( t == TYPE_STRINGBOX      ) size = m_parms[i].m_size;
 | |
| 		if ( t == TYPE_STRINGNONEMPTY ) size = m_parms[i].m_size;
 | |
| 		if ( t == TYPE_SITERULE       ) size = 4;
 | |
| 
 | |
| 		// comments and commands do not control underlying variables
 | |
| 		if ( size == 0 && t != TYPE_COMMENT && t != TYPE_CMD &&
 | |
| 		     t != TYPE_SAFEBUF  &&
 | |
| 		     t != TYPE_FILEUPLOADBUTTON &&
 | |
| 		     t != TYPE_CONSTANT &&
 | |
| 		     t != TYPE_CHARPTR &&
 | |
| 		     t != TYPE_MONOD2   &&
 | |
| 		     t != TYPE_MONOM2     ) {
 | |
| 			log(LOG_LOGIC,"conf: Size of parm #%" INT32 " \"%s\" "
 | |
| 			    "not set.", i,m_parms[i].m_title);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 		m_parms[i].m_size = size;
 | |
| 	skipSize:
 | |
| 		// check offset
 | |
| 		if ( m_parms[i].m_obj == OBJ_NONE ) continue;
 | |
| 		if ( t == TYPE_COMMENT  ) continue;
 | |
| 		if ( t == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		if ( t == TYPE_CMD      ) continue;
 | |
| 		if ( t == TYPE_CONSTANT ) continue;
 | |
| 		if ( t == TYPE_MONOD2   ) continue;
 | |
| 		if ( t == TYPE_MONOM2   ) continue;
 | |
| 		if ( t == TYPE_SAFEBUF  ) continue;
 | |
| 		// search parms do not need an offset
 | |
| 		if ( m_parms[i].m_off == -1 ){//&& m_parms[i].m_sparm == 0 ) {
 | |
| 			log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no offset.",
 | |
| 			    i,m_parms[i].m_title);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 		if ( m_parms[i].m_off < -1 ) {
 | |
| 			log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has bad offset "
 | |
| 			    "of %" INT32 ".", i,m_parms[i].m_title,m_parms[i].m_off);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 		if ( m->m_obj == OBJ_CONF && m->m_off >= (int32_t)sizeof(Conf) ) {
 | |
| 			log("admin: Parm %s has bad m_off value.",m->m_title);
 | |
| 			char *xx = NULL; *xx = 0;
 | |
| 		}
 | |
| 		if (m->m_obj==OBJ_COLL&&m->m_off>=(int32_t)sizeof(CollectionRec)){
 | |
| 			log("admin: Parm %s has bad m_off value.",m->m_title);
 | |
| 			char *xx = NULL; *xx = 0;
 | |
| 		}
 | |
| 		if ( m->m_off >= 0 && 
 | |
| 		     m->m_obj == OBJ_SI && 
 | |
| 		     m->m_off >= (int32_t)sizeof(SearchInput)){
 | |
| 			log("admin: Parm %s has bad m_off value.",m->m_title);
 | |
| 			char *xx = NULL; *xx = 0;
 | |
| 		}
 | |
| 
 | |
| 		if ( m_parms[i].m_page == -1 ) {
 | |
| 			log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no page.",
 | |
| 			    i,m_parms[i].m_title);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 		if ( m_parms[i].m_obj == -1 ) {
 | |
| 			log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no object.",
 | |
| 			    i,m_parms[i].m_title);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 		//if ( ! m_parms[i].m_title[0] ) {
 | |
| 		//	log(LOG_LOGIC,"conf: Parm #%" INT32 " \"%s\" has no title.",
 | |
| 		//	    i,m_parms[i].m_cgi);
 | |
| 		//	exit(-1);
 | |
| 		//}
 | |
| 		// continue if already have the xml name
 | |
| 		if ( m_parms[i].m_xml ) continue;
 | |
| 		// set xml based on title
 | |
| 		char *tt = m_parms[i].m_title;
 | |
| 		if ( p + gbstrlen(tt) >= pend ) {
 | |
| 			log(LOG_LOGIC,"conf: Not enough room to store xml "
 | |
| 			    "tag name in buffer.");
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 		m_parms[i].m_xml = p;
 | |
| 		for ( int32_t k = 0 ; tt[k] ; k++ ) {
 | |
| 			if ( ! is_alnum_a(tt[k]) ) continue;
 | |
| 			if ( k > 0 && tt[k-1]==' ') *p++ = to_upper_a(tt[k]);
 | |
| 			else                        *p++ = tt[k];
 | |
| 		}
 | |
| 		*p++ = '\0';
 | |
| 	}
 | |
| 
 | |
| 	// set m_searchParms
 | |
| 	int32_t n = 0;
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		//if ( ! m_parms[i].m_sparm ) continue;
 | |
| 		if ( m_parms[i].m_obj != OBJ_SI ) continue;
 | |
| 		m_searchParms[n++] = &m_parms[i];
 | |
| 		// sanity check
 | |
| 		if ( m_parms[i].m_off == -1 ) {
 | |
| 			log(LOG_LOGIC,"conf: SEARCH Parm #%" INT32 " \"%s\" has "
 | |
| 			    "m_off < 0 (offset into SearchInput).",
 | |
| 			    i,m_parms[i].m_title);
 | |
| 			exit(-1);
 | |
| 		}
 | |
| 	}
 | |
| 	m_numSearchParms = n;
 | |
| 
 | |
| 	// . sanity check
 | |
| 	// . we should have it all covered!
 | |
| 	si.test();
 | |
| 
 | |
| 	//
 | |
| 	// parm overlap detector
 | |
| 	//
 | |
| 	// . fill in each parm's buffer with byte #b
 | |
| 	// . inc b for each parm
 | |
| #ifndef _VALGRIND_
 | |
| 	overlapTest(+1);
 | |
| 	overlapTest(-1);
 | |
| #endif
 | |
| }
 | |
| 
 | |
| void Parms::overlapTest ( char step ) {
 | |
| 
 | |
| 	int32_t start = 0;
 | |
| 	if ( step == -1 ) start = m_numParms - 1;
 | |
| 
 | |
| 	//log("conf: Using step=%" INT32 "",(int32_t)step);
 | |
| 
 | |
| 	SearchInput   tmpsi;
 | |
| 	GigablastRequest tmpgr;
 | |
| 	InjectionRequest tmpir;
 | |
| 	CollectionRec tmpcr;
 | |
| 	Conf          tmpconf;
 | |
| 	char          b;
 | |
| 	char *p1 , *p2;
 | |
| 	int32_t i;
 | |
| 	// sanity check: ensure parms do not overlap
 | |
| 	for ( i = start ; i < m_numParms && i >= 0 ; i += step ) {
 | |
| 
 | |
| 		// skip comments
 | |
| 		if ( m_parms[i].m_type == TYPE_COMMENT ) continue;
 | |
| 		if ( m_parms[i].m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		// skip if it is a broadcast switch, like "all spiders on"
 | |
| 		// because that modifies another parm, "spidering enabled"
 | |
| 		if ( m_parms[i].m_type == TYPE_BOOL2 ) continue;
 | |
| 
 | |
| 		if ( m_parms[i].m_type == TYPE_SAFEBUF ) continue;
 | |
| 
 | |
| 		// we use cr->m_spideringEnabled for PAGE_BASIC_SETTINGS too!
 | |
| 		if ( m_parms[i].m_flags & PF_DUP ) continue;
 | |
| 
 | |
| 		p1 = NULL;
 | |
| 		if ( m_parms[i].m_obj == OBJ_COLL ) p1 = (char *)&tmpcr;
 | |
| 		if ( m_parms[i].m_obj == OBJ_CONF ) p1 = (char *)&tmpconf;
 | |
| 		if ( m_parms[i].m_obj == OBJ_SI   ) p1 = (char *)&tmpsi;
 | |
| 		if ( m_parms[i].m_obj == OBJ_GBREQUEST   ) p1 = (char *)&tmpgr;
 | |
| 		if ( m_parms[i].m_obj == OBJ_IR   ) p1 = (char *)&tmpir;
 | |
| 		if ( p1 ) p1 += m_parms[i].m_off;
 | |
| 		p2 = NULL;
 | |
| 		int32_t size = m_parms[i].m_size;
 | |
| 		// use i now
 | |
| 		b = (char)i;
 | |
| 		// string box type is a pointer!!
 | |
| 		if ( p1 ) memset ( p1 , b , size );
 | |
| 		//log("conf: setting %" INT32 " bytes for %s at 0x%" XINT32 " char=0x%hhx",
 | |
| 		//    size,m_parms[i].m_title,(int32_t)p1,b);
 | |
| 		// search input uses character ptrs!!
 | |
| 		if ( m_parms[i].m_type == TYPE_STRINGBOX ) size = 4;
 | |
| 		if ( m_parms[i].m_type == TYPE_STRING    ) size = 4;
 | |
| 		if ( m_parms[i].m_fixed > 0 ) size *= m_parms[i].m_fixed ;
 | |
| 		if ( p2 ) memset ( p2 , b , size );
 | |
| 		//log("conf: setting %" INT32 " bytes for %s at 0x%" XINT32 " char=0x%hhx "
 | |
| 		//    "i=%" INT32 "",   size,m_parms[i].m_title,(int32_t)p2,b,i);
 | |
| 	}
 | |
| 
 | |
| 	//
 | |
| 	// now make sure they are the same
 | |
| 	//
 | |
| 	if ( step == -1 ) b--;
 | |
| 	else              b = 0;
 | |
| 	char *objStr = "none";
 | |
| 	int32_t  obj;
 | |
| 	char  infringerB;
 | |
| 	int32_t  j;
 | |
| 	int32_t savedi = -1;
 | |
| 
 | |
| 	for ( i = 0 ; i < m_numParms ; i++ ) {
 | |
| 
 | |
| 		// skip comments
 | |
| 		if ( m_parms[i].m_type == TYPE_COMMENT ) continue;
 | |
| 		if ( m_parms[i].m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		// skip if it is a broadcast switch, like "all spiders on"
 | |
| 		// because that modifies another parm, "spidering enabled"
 | |
| 		if ( m_parms[i].m_type == TYPE_BOOL2 ) continue;
 | |
| 
 | |
| 		if ( m_parms[i].m_type == TYPE_SAFEBUF ) continue;
 | |
| 
 | |
| 		// we use cr->m_spideringEnabled for PAGE_BASIC_SETTINGS too!
 | |
| 		if ( m_parms[i].m_flags & PF_DUP ) continue;
 | |
| 
 | |
| 		p1 = NULL;
 | |
| 		if ( m_parms[i].m_obj == OBJ_COLL ) p1 = (char *)&tmpcr;
 | |
| 		if ( m_parms[i].m_obj == OBJ_CONF ) p1 = (char *)&tmpconf;
 | |
| 		if ( m_parms[i].m_obj == OBJ_SI   ) p1 = (char *)&tmpsi;
 | |
| 		if ( m_parms[i].m_obj == OBJ_GBREQUEST ) p1 = (char *)&tmpgr;
 | |
| 		if ( m_parms[i].m_obj == OBJ_IR   ) p1 = (char *)&tmpir;
 | |
| 		if ( p1 ) p1 += m_parms[i].m_off;
 | |
| 		p2 = NULL;
 | |
| 		int32_t size = m_parms[i].m_size;
 | |
| 		b = (char) i;
 | |
| 		// save it
 | |
| 		obj = m_parms[i].m_obj;
 | |
| 
 | |
| 		//log("conf: testing %" INT32 " bytes for %s at 0x%" XINT32 " char=0x%hhx "
 | |
| 		//    "i=%" INT32 "", size,m_parms[i].m_title,(int32_t)p1,b,i);
 | |
| 
 | |
| 		for ( j = 0 ; p1 && j < size ; j++ ) {
 | |
| 			if ( p1[j] == b ) continue;
 | |
| 			// this has multiple parms pointing to it!
 | |
| 			//if ( m_parms[i].m_type == TYPE_BOOL2 ) continue;
 | |
| 			// or special cases...
 | |
| 			//if ( p1 == (char *)&tmpconf.m_spideringEnabled ) 
 | |
| 			//	continue;
 | |
| 			// set object type
 | |
| 			objStr = "??????";
 | |
| 			if ( m_parms[i].m_obj == OBJ_COLL )
 | |
| 				objStr = "CollectionRec.h";
 | |
| 			if ( m_parms[i].m_obj == OBJ_CONF )
 | |
| 				objStr = "Conf.h";
 | |
| 			if ( m_parms[i].m_obj == OBJ_SI )
 | |
| 				objStr = "SearchInput.h";
 | |
| 			if ( m_parms[i].m_obj == OBJ_GBREQUEST )
 | |
| 				objStr = "GigablastRequest/Parms.h";
 | |
| 			if ( m_parms[i].m_obj == OBJ_IR )
 | |
| 				objStr = "InjectionRequest/PageInject.h";
 | |
| 			// save it
 | |
| 			infringerB = p1[j];
 | |
| 			savedi = i;
 | |
| 			goto error;
 | |
| 		}
 | |
| 		// search input uses character ptrs!!
 | |
| 		if ( m_parms[i].m_type == TYPE_STRINGBOX ) size = 4;
 | |
| 		if ( m_parms[i].m_type == TYPE_STRING    ) size = 4;
 | |
| 		if ( m_parms[i].m_fixed > 0 ) size *= m_parms[i].m_fixed ;
 | |
| 		objStr = "SearchInput.h";
 | |
| 
 | |
| 		//log("conf: testing %" INT32 " bytes for %s at 0x%" XINT32 " char=0x%hhx "
 | |
| 		//    "i=%" INT32 "",  size,m_parms[i].m_title,(int32_t)p2,b,i);
 | |
| 
 | |
| 		for ( j = 0 ; p2 && j < size ; j++ ) {
 | |
| 			if ( p2[j] == b ) continue;
 | |
| 			// save it
 | |
| 			infringerB = p2[j];
 | |
| 			savedi = i;
 | |
| 			log("conf: got b=0x%hhx when it should have been "
 | |
| 			    "b=0x%hhx",p2[j],b);
 | |
| 			goto error;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	return;
 | |
| 
 | |
|  error:
 | |
| 	log("conf: Had a parm value collision. Parm #%" INT32 " "
 | |
| 	    "\"%s\" (size=%" INT32 ") in %s has overlapped with another parm. "
 | |
| 	    "Your TYPE_* for this parm or a neighbor of it "
 | |
| 	    "does not agree with what you have declared it as in the *.h "
 | |
| 	    "file.",i,m_parms[i].m_title,m_parms[i].m_size,objStr);
 | |
| 	if ( step == -1 ) b--;
 | |
| 	else              b = 0;
 | |
| 	// show possible parms that could have overwritten it!
 | |
| 	for ( i = start ; i < m_numParms && i >= 0 ; i += step ) {
 | |
| 		//char *p1 = NULL;
 | |
| 		//if ( m_parms[i].m_obj == OBJ_COLL ) p1 = (char *)&tmpcr;
 | |
| 		//if ( m_parms[i].m_obj == OBJ_CONF ) p1 = (char *)&tmpconf;
 | |
| 		// skip if comment
 | |
| 		if ( m_parms[i].m_type == TYPE_COMMENT ) continue;
 | |
| 		if ( m_parms[i].m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		if ( m_parms[i].m_flags & PF_DUP ) continue;
 | |
| 		if ( m_parms[i].m_obj != m_parms[savedi].m_obj ) continue;
 | |
| 		// skip if no match
 | |
| 		//bool match = false;
 | |
| 		//if ( m_parms[i].m_obj == obj ) match = true;
 | |
| 		//if ( m_parms[i].m_sparm && 
 | |
| 		// NOTE: these need to be fixed!!!
 | |
| 		b = (char) i;
 | |
| 		if ( b == infringerB )
 | |
| 			log("conf: possible overlap with parm #%" INT32 " in %s "
 | |
| 			    "\"%s\" (size=%" INT32 ") "
 | |
| 			    "xml=%s "
 | |
| 			    "desc=\"%s\"",
 | |
| 			    i,objStr,m_parms[i].m_title,
 | |
| 			    m_parms[i].m_size,
 | |
| 			    m_parms[i].m_xml,
 | |
| 			    m_parms[i].m_desc);
 | |
| 	}
 | |
| 
 | |
| 	log("conf: try including \"m->m_obj = OBJ_COLL;\" or "
 | |
| 	    "\"m->m_obj   = OBJ_CONF;\" in your parm definitions");
 | |
| 	log("conf: failed overlap test. exiting.");
 | |
| 	exit(-1);
 | |
| 
 | |
| }
 | |
| 
 | |
| 
 | |
| bool Parm::getValueAsBool ( SearchInput *si ) {
 | |
| 	if ( m_obj != OBJ_SI ) { char *xx=NULL;*xx=0; }
 | |
| 	char *p = (char *)si + m_off;
 | |
| 	return *(bool *)p;
 | |
| }
 | |
| 
 | |
| int32_t Parm::getValueAsLong ( SearchInput *si ) {
 | |
| 	if ( m_obj != OBJ_SI ) { char *xx=NULL;*xx=0; }
 | |
| 	char *p = (char *)si + m_off;
 | |
| 	return *(int32_t *)p;
 | |
| }
 | |
| 
 | |
| char *Parm::getValueAsString ( SearchInput *si ) {
 | |
| 	if ( m_obj != OBJ_SI ) { char *xx=NULL;*xx=0; }
 | |
| 	char *p = (char *)si + m_off;
 | |
| 	return *(char **)p;
 | |
| }
 | |
| 
 | |
| /////////
 | |
| //
 | |
| // new functions
 | |
| //
 | |
| /////////
 | |
| 
 | |
| bool Parms::addNewParmToList1 ( SafeBuf *parmList ,
 | |
| 				collnum_t collnum ,
 | |
| 				char *parmValString ,
 | |
| 				int32_t  occNum ,
 | |
| 				char *parmName ) {
 | |
| 	// get the parm descriptor
 | |
| 	Parm *m = getParmFast1 ( parmName , NULL );
 | |
| 	if ( ! m ) return log("parms: got bogus parm2 %s",parmName );
 | |
| 	return addNewParmToList2 ( parmList,collnum,parmValString,occNum,m );
 | |
| }
 | |
| 
 | |
| // . make a parm rec using the prodivded string
 | |
| // . used to convert http requests into a parmlist
 | |
| // . string could be a float or int32_t or int64_t in ascii, as well as a string
 | |
| // . returns false w/ g_errno set on error
 | |
| bool Parms::addNewParmToList2 ( SafeBuf *parmList ,
 | |
| 				collnum_t collnum , 
 | |
| 				char *parmValString ,
 | |
| 				int32_t occNum ,
 | |
| 				Parm *m ) {
 | |
| 	// get value
 | |
| 	char *val = NULL;
 | |
| 	int32_t valSize = 0;
 | |
| 
 | |
| 	//char buf[2+MAX_COLL_LEN];
 | |
| 
 | |
| 	int32_t val32;
 | |
| 	int64_t val64;
 | |
| 	char val8;
 | |
| 	float valf;
 | |
| 
 | |
| 	/*
 | |
| 	char *obj = NULL;
 | |
| 	// we might be adding a collnum if a collection that is being
 | |
| 	// added via the CommandAddColl0() "addColl" or "addCrawl" or
 | |
| 	// "addBulk" commands. they will reserve the collnum, so it might
 | |
| 	// not be ready yet.
 | |
| 	if ( collnum != -1 ) {
 | |
| 		CollectionRec *cr = g_collectiondb.getRec ( collnum );
 | |
| 		if ( cr ) obj = (char *)cr;
 | |
| 		//	log("parms: no coll rec for %" INT32 "",(int32_t)collnum);
 | |
| 		//	return false;
 | |
| 		//}
 | |
| 		//obj = (char *)cr;
 | |
| 	}
 | |
| 	else {
 | |
| 		obj = (char *)&g_conf;
 | |
| 	}
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	if ( m->m_type == TYPE_STRING || 
 | |
| 	     m->m_type == TYPE_STRINGBOX || 
 | |
| 	     m->m_type == TYPE_SAFEBUF ||
 | |
| 	     m->m_type == TYPE_STRINGNONEMPTY ) {
 | |
| 		// point to string
 | |
| 		//val = obj + m->m_off;
 | |
| 		// Parm::m_size is the max string size
 | |
| 		//if ( occNum > 0 ) val += occNum * m->m_size;
 | |
| 		// stringlength + 1. no just make it the whole string in
 | |
| 		// case it does not use the \0 protocol
 | |
| 		//valSize = m->m_max;
 | |
| 		val = parmValString;
 | |
| 		// include \0
 | |
| 		valSize = gbstrlen(val)+1;
 | |
| 		// sanity
 | |
| 		if ( val[valSize-1] != '\0' ) { char *xx=NULL;*xx=0; }
 | |
| 	}
 | |
| 	else if ( m->m_type == TYPE_LONG ) {
 | |
| 		// watch out for unsigned 32-bit numbers, so use atoLL()
 | |
| 		val64 = atoll(parmValString);
 | |
| 		val = (char *)&val64;
 | |
| 		valSize = 4;
 | |
| 	}
 | |
| 	else if ( m->m_type == TYPE_FLOAT ) {
 | |
| 		valf = atof(parmValString);
 | |
| 		val = (char *)&valf;
 | |
| 		valSize = 4;
 | |
| 	}
 | |
| 	else if ( m->m_type == TYPE_LONG_LONG ) {
 | |
| 		val64 = atoll(parmValString);
 | |
| 		val = (char *)&val64;
 | |
| 		valSize = 8;
 | |
| 	}
 | |
| 	else if ( m->m_type == TYPE_BOOL ||
 | |
| 		  m->m_type == TYPE_BOOL2 ||
 | |
| 		  m->m_type == TYPE_CHECKBOX ||
 | |
| 		  m->m_type == TYPE_PRIORITY2 ||
 | |
| 		  m->m_type == TYPE_UFP ||
 | |
| 		  m->m_type == TYPE_CHAR ) {
 | |
| 		val8 = atol(parmValString);
 | |
| 		//if ( parmValString && to_lower_a(parmValString[0]) == 'y' )
 | |
| 		//	val8 = 1;
 | |
| 		//if ( parmValString && to_lower_a(parmValString[0]) == 'n' )
 | |
| 		//	val8 = 0;
 | |
| 		val = (char *)&val8;
 | |
| 		valSize = 1;
 | |
| 	}
 | |
| 	// for resetting or restarting a coll i think the ascii arg is
 | |
| 	// the NEW reserved collnum, but for other commands then parmValString
 | |
| 	// will be NULL
 | |
| 	else if ( m->m_type == TYPE_CMD ) {
 | |
| 		val = parmValString;
 | |
| 		if ( val ) valSize = gbstrlen(val)+1;
 | |
| 		// . addcoll collection can not be too long
 | |
| 		// . TODO: supply a Parm::m_checkValFunc to ensure val is
 | |
| 		//   legitimate, and set g_errno on error
 | |
| 		if ( strcmp(m->m_cgi,"addcoll") == 0 &&valSize-1>MAX_COLL_LEN){
 | |
| 			log("admin: addcoll coll too long");
 | |
| 			g_errno = ECOLLTOOBIG;
 | |
| 			return false;
 | |
| 		}
 | |
| 		// scan for holes if we hit the limit
 | |
| 		//if ( g_collectiondb.m_numRecs >= 1LL>>sizeof(collnum_t) )
 | |
| 	}
 | |
| 	else if ( m->m_type == TYPE_IP ) {
 | |
| 		// point to string
 | |
| 		//val = obj + m->m_off;
 | |
| 		// Parm::m_size is the max string size
 | |
| 		//if ( occNum > 0 ) val += occNum * m->m_size;
 | |
| 		// stringlength + 1. no just make it the whole string in
 | |
| 		// case it does not use the \0 protocol
 | |
| 		val32 = atoip(parmValString);
 | |
| 		// store ip in binary format
 | |
| 		val = (char *)&val32;
 | |
| 		valSize = 4;
 | |
| 	}
 | |
| 	else {
 | |
| 		log("parms: shit unsupported parm type");
 | |
| 		char *xx=NULL;*xx=0;
 | |
| 	}
 | |
| 
 | |
| 	key96_t key = makeParmKey ( collnum , m ,  occNum );
 | |
| 
 | |
| 	// then key
 | |
| 	if ( ! parmList->safeMemcpy ( &key , sizeof(key) ) )
 | |
| 		return false;
 | |
| 
 | |
| 	// datasize
 | |
| 	if ( ! parmList->pushLong ( valSize ) )
 | |
| 		return false;
 | |
| 
 | |
| 	// and data
 | |
| 	if ( val && valSize && ! parmList->safeMemcpy ( val , valSize ) )
 | |
| 		return false;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // g_parms.addCurrentParmToList1 ( &parmList , cr , "spiderRoundNum" ); 
 | |
| bool Parms::addCurrentParmToList1 ( SafeBuf *parmList ,
 | |
| 				    CollectionRec *cr , 
 | |
| 				    char *parmName ) {
 | |
| 	collnum_t collnum = -1;
 | |
| 	if ( cr ) collnum = cr->m_collnum;
 | |
| 	// get the parm descriptor
 | |
| 	int32_t occNum;
 | |
| 	Parm *m = getParmFast1 ( parmName , &occNum );
 | |
| 	if ( ! m ) return log("parms: got bogus parm1 %s",parmName );
 | |
| 	return addCurrentParmToList2 ( parmList , collnum, -1 , m );
 | |
| }
 | |
| 
 | |
| // . use the current value of the parm to make this record
 | |
| // . parm class itself already helps us reference the binary parm value
 | |
| bool Parms::addCurrentParmToList2 ( SafeBuf *parmList ,
 | |
| 				    collnum_t collnum , 
 | |
| 				    int32_t occNum ,
 | |
| 				    Parm *m ) {
 | |
| 
 | |
| 	char *obj = NULL;
 | |
| 
 | |
| 	if ( collnum != -1 ) {
 | |
| 		CollectionRec *cr = g_collectiondb.getRec ( collnum );
 | |
| 		if ( ! cr ) return false;
 | |
| 		obj = (char *)cr;
 | |
| 	}
 | |
| 	else {
 | |
| 		obj = (char *)&g_conf;
 | |
| 	}
 | |
| 
 | |
| 	char *data = obj + m->m_off;
 | |
| 	// Parm::m_size is the max string size
 | |
| 	int32_t dataSize = m->m_size;
 | |
| 	if ( occNum > 0 ) data += occNum * m->m_size;
 | |
| 
 | |
| 	if ( m->m_type == TYPE_STRING || 
 | |
| 	     m->m_type == TYPE_STRINGBOX || 
 | |
| 	     m->m_type == TYPE_SAFEBUF ||
 | |
| 	     m->m_type == TYPE_STRINGNONEMPTY )
 | |
| 		// include \0 in string
 | |
| 		dataSize = gbstrlen(data) + 1;
 | |
| 
 | |
| 	// if a safebuf, point to the string within
 | |
| 	if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 		SafeBuf *sb = (SafeBuf *)data;
 | |
| 		data = sb->getBufStart();
 | |
| 		dataSize = sb->length();
 | |
| 		// sanity
 | |
| 		if ( dataSize > 0 && !data[dataSize-1]){char *xx=NULL;*xx=0;}
 | |
| 		// include the \0 since we do it for strings above
 | |
| 		if ( dataSize > 0 ) dataSize++;
 | |
| 		// empty? make it \0 then to be like strings i guess
 | |
| 		if ( dataSize == 0 ) {
 | |
| 			data = "\0";
 | |
| 			dataSize = 1;
 | |
| 		}
 | |
| 		// sanity check
 | |
| 		if ( dataSize > 0 && data[dataSize-1] ) {char *xx=NULL;*xx=0;}
 | |
| 		// if just a \0 then make it empty
 | |
| 		//if ( dataSize && !data[0] ) {
 | |
| 		//	data = NULL;
 | |
| 		//	dataSize = 0;
 | |
| 		//}
 | |
| 	}
 | |
| 
 | |
| 	//int32_t occNum = -1;
 | |
| 	key96_t key = makeParmKey ( collnum , m ,  occNum );
 | |
| 	/*
 | |
| 	// debug it
 | |
| 	log("parms: adding parm collnum=%i title=%s "
 | |
| 	    "key=%s datasize=%i data=%s hash=%"UINT32
 | |
| 	    ,(int)collnum
 | |
| 	    ,m->m_title
 | |
| 	    ,KEYSTR(&key,sizeof(key))
 | |
| 	    ,(int)dataSize
 | |
| 	    ,data
 | |
| 	    ,(uint32_t)hash32(data,dataSize));
 | |
| 	*/
 | |
| 	// then key
 | |
| 	if ( ! parmList->safeMemcpy ( &key , sizeof(key) ) )
 | |
| 		return false;
 | |
| 
 | |
| 	// size
 | |
| 	if ( ! parmList->pushLong ( dataSize ) )
 | |
| 		return false;
 | |
| 
 | |
| 	// and data
 | |
| 	if ( dataSize && ! parmList->safeMemcpy ( data , dataSize ) )
 | |
| 		return false;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // returns false and sets g_errno on error
 | |
| bool Parms::convertHttpRequestToParmList (HttpRequest *hr, SafeBuf *parmList,
 | |
| 					  int32_t page , TcpSocket *sock ) {
 | |
| 
 | |
| 	// false = useDefaultRec?
 | |
| 	CollectionRec *cr = g_collectiondb.getRec ( hr , false );
 | |
| 
 | |
| 	//if ( c ) {
 | |
| 	//	cr = g_collectiondb.getRec ( hr );
 | |
| 	//	if ( ! cr ) log("parms: coll not found");
 | |
| 	//}
 | |
| 
 | |
| 	bool isMasterAdmin = g_conf.isMasterAdmin ( sock , hr );
 | |
| 	
 | |
| 	// does this user have permission to update the parms?
 | |
| 	bool isCollAdmin = g_conf.isCollAdmin ( sock , hr ) ;
 | |
| 
 | |
| 
 | |
| 	// might be g_conf specific, not coll specific
 | |
| 	//bool hasPerm = false;
 | |
| 	// just knowing the collection name of a custom crawl means you
 | |
| 	// know the token, so you have permission
 | |
| 	//if ( cr && cr->m_isCustomCrawl ) hasPerm = true;
 | |
| 	//if ( hr->isLocal() ) hasPerm = true;
 | |
| 
 | |
| 	// fix jenkins "GET /v2/crawl?token=crawlbottesting" request
 | |
| 	char *name  = hr->getString("name");
 | |
| 	char *token = hr->getString("token");
 | |
| 	//if ( ! cr && token ) hasPerm = true;
 | |
| 
 | |
| 	//if ( ! hasPerm ) {
 | |
| 	//	//log("parms: no permission to set parms");
 | |
| 	//	//g_errno = ENOPERM;
 | |
| 	//	//return false;
 | |
| 	//	// just leave the parm list empty and fail silently
 | |
| 	//	return true;
 | |
| 	//}
 | |
| 
 | |
| 	// we set the parms in this collnum
 | |
| 	collnum_t parmCollnum = -1;
 | |
| 	if ( cr ) parmCollnum = cr->m_collnum;
 | |
| 
 | |
| 	// turn the collnum into an ascii string for providing as args
 | |
| 	// when &reset=1 &restart=1 &delete=1 is given along with a
 | |
| 	// &c= or a &name=/&token= pair.
 | |
| 	char oldCollName[MAX_COLL_LEN+1];
 | |
| 	oldCollName[0] = '\0';
 | |
| 	if ( cr ) sprintf(oldCollName,"%" INT32 "",(int32_t)cr->m_collnum);
 | |
| 
 | |
| 
 | |
| 	////////
 | |
| 	//
 | |
| 	// HACK: if crawlbot user supplies a token, name, and seeds, and the
 | |
| 	// corresponding collection does not exist then assume it is an add
 | |
| 	//
 | |
| 	////////
 | |
| 	char customCrawl = 0;
 | |
| 	char *path = hr->getPath();
 | |
| 	// i think /crawlbot is only used by me to see PageCrawlBot.cpp
 | |
| 	// so don't bother...
 | |
| 	if ( strncmp(path,"/crawlbot",9) == 0 ) customCrawl = 0;
 | |
| 	if ( strncmp(path,"/v2/crawl",9) == 0 ) customCrawl = 1;
 | |
| 	if ( strncmp(path,"/v2/bulk" ,8) == 0 ) customCrawl = 2;
 | |
| 	if ( strncmp(path,"/v3/crawl",9) == 0 ) customCrawl = 1;
 | |
| 	if ( strncmp(path,"/v3/bulk" ,8) == 0 ) customCrawl = 2;
 | |
| 
 | |
|         // throw error if collection record custom crawl type doesn't equal 
 | |
| 	// the crawl type of current request
 | |
| 	if (cr && customCrawl && customCrawl != cr->m_isCustomCrawl ) {
 | |
| 		g_errno = ECUSTOMCRAWLMISMATCH;
 | |
| 		return false;
 | |
|         }
 | |
| 
 | |
| 	bool hasAddCrawl = hr->hasField("addCrawl");
 | |
| 	bool hasAddBulk  = hr->hasField("addBulk");
 | |
| 	bool hasAddColl  = hr->hasField("addColl");
 | |
| 	// sometimes they try to delete a collection that is not there so do
 | |
| 	// not apply this logic in that case!
 | |
| 	bool hasDelete   = hr->hasField("delete");
 | |
| 	bool hasRestart  = hr->hasField("restart");
 | |
| 	bool hasReset    = hr->hasField("reset");
 | |
| 	bool hasSeeds    = hr->hasField("seeds");
 | |
| 	// check for bulk jobs as well
 | |
| 	if ( ! hasSeeds ) hasSeeds = hr->hasField("urls");
 | |
| 	if ( ! cr          && 
 | |
| 	     token         && 
 | |
| 	     name          && 
 | |
| 	     customCrawl   &&
 | |
| 	     hasSeeds      &&
 | |
| 	     ! hasDelete   &&
 | |
| 	     ! hasRestart  &&
 | |
| 	     ! hasReset    &&
 | |
| 	     ! hasAddCrawl && 
 | |
| 	     ! hasAddBulk  && 
 | |
| 	     ! hasAddColl     ) {
 | |
| 		// reserve a new collnum for adding this crawl
 | |
| 		parmCollnum = g_collectiondb.reserveCollNum();
 | |
| 		// must be there!
 | |
| 		if ( parmCollnum == -1 ) {
 | |
| 			g_errno = EBADENGINEER;
 | |
| 			return false;
 | |
| 		}
 | |
| 		// log it for now
 | |
| 		log("parms: trying to add custom crawl (%" INT32 ")",
 | |
| 		    (int32_t)parmCollnum);
 | |
| 		// formulate name
 | |
| 		char newName[MAX_COLL_LEN+1];
 | |
| 		snprintf(newName,MAX_COLL_LEN,"%s-%s",token,name);
 | |
| 		char *cmdStr = "addCrawl";
 | |
| 		if ( customCrawl == 2 ) cmdStr = "addBulk";
 | |
| 		// add to parm list
 | |
| 		if ( ! addNewParmToList1 ( parmList ,
 | |
| 					   parmCollnum ,
 | |
| 					   newName ,
 | |
| 					   -1 , // occNum
 | |
| 					   cmdStr ) )
 | |
| 			return false;
 | |
| 	}
 | |
| 
 | |
| 	// loop through cgi parms
 | |
| 	for ( int32_t i = 0 ; i < hr->getNumFields() ; i++ ) {
 | |
| 		// get cgi parm name
 | |
| 		char *field = hr->getField    ( i );
 | |
| 		// get value of the cgi field
 | |
| 		char *val  = hr->getValue   (i);
 | |
| 		// convert field to parm
 | |
| 		int32_t occNum;
 | |
| 		// parm names can be shared across pages, like "c"
 | |
| 		// for search, addurl, inject, etc.
 | |
| 		Parm *m = getParmFast1 ( field , &occNum );
 | |
| 		if ( ! m ) continue;
 | |
| 
 | |
| 		// skip if not a command parm, like "addcoll"
 | |
| 		if ( m->m_type != TYPE_CMD ) continue;
 | |
| 
 | |
| 		if ( m->m_obj != OBJ_CONF && m->m_obj != OBJ_COLL )
 | |
| 			continue;
 | |
| 
 | |
| 		//
 | |
| 		// HACK
 | |
| 		//
 | |
| 		// if its a resetcoll/restartcoll/addcoll we have to
 | |
| 		// get the next available collnum and use that for setting
 | |
| 		// any additional parms. that is the coll it will act on.
 | |
| 		if ( strcmp(m->m_cgi,"addColl") == 0 ||
 | |
| 		     // lowercase support. camelcase is obsolete.
 | |
| 		     strcmp(m->m_cgi,"addcoll") == 0 ||
 | |
| 		     strcmp(m->m_cgi,"addCrawl") == 0 ||
 | |
| 		     strcmp(m->m_cgi,"addBulk" ) == 0 ||
 | |
| 		     strcmp(m->m_cgi,"reset" ) == 0 ||
 | |
| 		     strcmp(m->m_cgi,"restart" ) == 0 ) {
 | |
| 			// if we wanted to we could make the data the
 | |
| 			// new parmCollnum since we already store the old
 | |
| 			// collnum in the parm rec key
 | |
| 			parmCollnum = g_collectiondb.reserveCollNum();
 | |
| 			//
 | |
| 			//
 | |
| 			// NOTE: the old collnum is in the "val" already
 | |
| 			// like "&reset=462" or "&addColl=test"
 | |
| 			//
 | |
| 			//
 | |
| 			// sanity. if all are full! we hit our limit of
 | |
| 			// 32k collections. should increase collnum_t from
 | |
| 			// int16_t to int32_t...
 | |
| 			if ( parmCollnum == -1 ) {
 | |
| 				g_errno = EBADENGINEER;
 | |
| 				return false;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		// . DIFFBOT HACK: so ppl can manually restart a spider round
 | |
| 		// . val can be 0 or 1 or anything. i.e. roundStart=0 works.
 | |
| 		// . map this parm to another parm with the round start
 | |
| 		//   time (current time) and the new round # as the args.
 | |
| 		// . this will call CommandForceNextSpiderRound() function
 | |
| 		//   on every shard with these args, "tmpVal".
 | |
| 		if ( cr && strcmp(m->m_cgi,"roundStart") == 0 ) {
 | |
| 			// use the current time so anything spidered before
 | |
| 			// this time (the round start time) will be respidered
 | |
| 			//sprintf(tmp,"%" UINT32 "",getTimeGlobalNoCore());
 | |
| 			//val = tmp;
 | |
| 			char tmpVal[64];
 | |
| 			// use the same round start time for all shards
 | |
| 			sprintf(tmpVal,
 | |
| 				"%" UINT32 ",%" INT32 ""
 | |
| 				,(uint32_t)getTimeGlobalNoCore()
 | |
| 				,cr->m_spiderRoundNum+1
 | |
| 				);
 | |
| 			// . also add command to reset crawl/process counts
 | |
| 			//   so if you hit maxToProcess/maxToCrawl it will
 | |
| 			//   not stop the round from restarting
 | |
| 			// . CommandResetCrawlCounts()
 | |
| 			if ( ! addNewParmToList1 ( parmList ,
 | |
| 						   parmCollnum ,
 | |
| 						   tmpVal, // a string
 | |
| 						   0 , // occNum (for arrays)
 | |
| 						   "forceround" ) )
 | |
| 				return false;
 | |
| 			// don't bother going below
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		// if a collection name was also provided, assume that is
 | |
| 		// the target of the reset/delete/restart. we still
 | |
| 		// need PageAddDelete.cpp to work...
 | |
| 		if ( cr &&
 | |
| 		     ( strcmp(m->m_cgi,"reset" ) == 0 ||
 | |
| 		       strcmp(m->m_cgi,"delete" ) == 0 ||
 | |
| 		       strcmp(m->m_cgi,"restart" ) == 0 ) ) 
 | |
| 			// the collnum to reset/restart/del
 | |
| 			// given as a string.
 | |
| 			val = oldCollName;
 | |
| 
 | |
| 		//
 | |
| 		// CLOUD SEARCH ENGINE SUPPORT
 | |
| 		//
 | |
| 
 | |
| 		//
 | |
| 		// if this is the "delcoll" parm then "c" may have been
 | |
| 		// excluded from http request, therefore isCollAdmin and
 | |
| 		// isMasterAdmin may be false, so see if they have permission
 | |
| 		// for the "val" collection for this one...
 | |
| 		bool hasPerm = false;
 | |
| 		if ( m->m_page == PAGE_DELCOLL &&
 | |
| 		     strcmp(m->m_cgi,"delcoll") == 0 ) {
 | |
| 			// permission override for /admin/delcoll cmd & parm
 | |
| 			hasPerm = g_conf.isCollAdminForColl (sock,hr,val);
 | |
| 		}
 | |
| 
 | |
| 		// if this IP c-block as already added a collection then do not
 | |
| 		// allow it to add another.
 | |
| 		if ( m->m_page == PAGE_ADDCOLL &&
 | |
| 		     g_conf.m_allowCloudUsers &&
 | |
| 		     ! isMasterAdmin &&
 | |
| 		     strcmp(m->m_cgi,"addcoll")==0 ) {
 | |
| 			// see if user's c block has already added a collection
 | |
| 			int32_t numAdded = 0;
 | |
| 			if ( numAdded >= 1 ) {
 | |
| 				g_errno = ENOPERM;
 | |
| 				log("parms: already added a collection from "
 | |
| 				    "this cloud user's c-block.");
 | |
| 				return false;
 | |
| 			}
 | |
| 			hasPerm = true;
 | |
| 		}
 | |
| 
 | |
| 		// master controls require root permission
 | |
| 		if ( m->m_obj == OBJ_CONF && ! isMasterAdmin ) {
 | |
| 			log("parms: could not run root parm \"%s\" no perm.",
 | |
| 			    m->m_title);
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		// need to have permission for collection for collrec parms
 | |
| 		if ( m->m_obj == OBJ_COLL && ! isCollAdmin && ! hasPerm ) {
 | |
| 			log("parms: could not run coll parm \"%s\" no perm.",
 | |
| 			    m->m_title);
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		// add the cmd parm
 | |
| 		if ( ! addNewParmToList2 ( parmList ,
 | |
| 					   // it might be a collection-less
 | |
| 					   // command like 'gb stop' which
 | |
| 					   // uses the "save=1" parm.
 | |
| 					   // this is the "new" collnum to
 | |
| 					   // create in the case of
 | |
| 					   // add/reset/restart, but in the
 | |
| 					   // case of delete it is -1 or old.
 | |
| 					   parmCollnum ,
 | |
| 					   // the argument to the function...
 | |
| 					   // in the case of delete, the 
 | |
| 					   // collnum to delete in ascii.
 | |
| 					   // in the case of add, the name
 | |
| 					   // of the new coll. in the case
 | |
| 					   // of reset/restart the OLD 
 | |
| 					   // collnum is ascii to delete.
 | |
| 					   val,
 | |
| 					   occNum ,
 | |
| 					   m ) )
 | |
| 			return false;
 | |
| 	}
 | |
| 
 | |
| 	// if we are one page url filters, turn off all checkboxes!
 | |
| 	// html should really transmit them as =0 if they are unchecked!!
 | |
| 	// "fe" is a url filter expression for the first row.
 | |
| 	//if ( hr->hasField("fe") && page == PAGE_FILTERS && cr ) {
 | |
| 	//	for ( int32_t i = 0 ; i < cr->m_numRegExs ; i++ ) {
 | |
| 	//		//cr->m_harvestLinks  [i] = 0;
 | |
| 	//		//cr->m_spidersEnabled[i] = 0;
 | |
| 	//		if ( ! addNewParmToList2 ( parmList ,
 | |
| 	//					   cr->m_collnum,
 | |
| 	//					   "0",
 | |
| 	//					   i,
 | |
| 	//	}
 | |
| 	//}
 | |
| 
 | |
| 
 | |
| 	//
 | |
| 	// CLOUD SEARCH ENGINE SUPPORT
 | |
| 	//
 | |
| 	// provide userip so when adding a new collection we can
 | |
| 	// store it in the collection rec to ensure that the same
 | |
| 	// IP address cannot add more than one collection.
 | |
| 	//
 | |
| 	if ( sock && page == PAGE_ADDCOLL ) {
 | |
| 		char *ipStr = iptoa(sock->m_ip);
 | |
| 		int32_t occNum;
 | |
| 		Parm *um = getParmFast1 ( "userip" , &occNum); // NULL = occNum
 | |
| 		if ( ! addNewParmToList2 ( parmList ,
 | |
| 					   // HACK! operate on the to-be-added
 | |
| 					   // collrec, if there was an addcoll
 | |
| 					   // reset or restart coll cmd...
 | |
| 					   parmCollnum ,
 | |
| 					   ipStr, // val ,
 | |
| 					   occNum ,
 | |
| 					   um ) )
 | |
| 			return false;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	//
 | |
| 	// now add the parms that are NOT commands
 | |
| 	//
 | |
| 	
 | |
| 	// loop through cgi parms
 | |
| 	for ( int32_t i = 0 ; i < hr->getNumFields() ; i++ ) {
 | |
| 		// get cgi parm name
 | |
| 		char *field = hr->getField    ( i );
 | |
| 		// get value of the cgi field
 | |
| 		char *val  = hr->getValue   (i);
 | |
| 
 | |
| 		// get the occurrence # if its regex. this is the row #
 | |
| 		// in the url filters table, since those parms repeat names.
 | |
| 		// url filter expression.
 | |
| 		//if ( strcmp(field,"fe") == 0 ) occNum++;
 | |
| 
 | |
| 		// convert field to parm
 | |
| 		int32_t occNum;
 | |
| 		Parm *m = getParmFast1 ( field , &occNum );
 | |
| 
 | |
| 		//
 | |
| 		// map "pause" to spidering enabled
 | |
| 		//
 | |
| 		if ( strcmp(field,"pause"     ) == 0 ||
 | |
| 		     strcmp(field,"pauseCrawl") == 0 ) {
 | |
| 			m = getParmFast1 ( "cse",  &occNum);
 | |
| 			if      ( val && val[0] == '0' ) val = "1";
 | |
| 			else if ( val && val[0] == '1' ) val = "0";
 | |
| 			if ( ! m ) { char *xx=NULL;*xx=0; }
 | |
| 		}
 | |
| 
 | |
| 		if ( ! m ) continue;
 | |
| 
 | |
| 		// skip if IS a command parm, like "addcoll", we did that above
 | |
| 		if ( m->m_type == TYPE_CMD ) 
 | |
| 			continue;
 | |
| 
 | |
| 		if ( m->m_obj != OBJ_CONF && m->m_obj != OBJ_COLL )
 | |
| 			continue;
 | |
| 
 | |
| 
 | |
| 		//
 | |
| 		// CLOUD SEARCH ENGINE SUPPORT
 | |
| 		//
 | |
| 		// master controls require root permission. otherwise, just
 | |
| 		// knowing the collection name is enough for a cloud user
 | |
| 		// to change settings.
 | |
| 		//
 | |
| 		bool hasPerm = false;
 | |
| 
 | |
| 		// master controls require root permission
 | |
| 		if ( m->m_obj == OBJ_CONF && ! isMasterAdmin ) {
 | |
| 			log("parms: could not set root parm \"%s\" no perm.",
 | |
| 			    m->m_title);
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		// need to have permission for collection for collrec parms
 | |
| 		if ( m->m_obj == OBJ_COLL && ! isCollAdmin && ! hasPerm ) {
 | |
| 			log("parms: could not set coll parm \"%s\" no perm.",
 | |
| 			    m->m_title);
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		// convert spiderRoundStartTime=0 (roundStart=0 roundStart=1) 
 | |
| 		// to spiderRoundStartTime=<currenttime>+30secs
 | |
| 		// so that will force the next spider round to kick in
 | |
| 		/*
 | |
| 		bool restartRound = false;
 | |
| 		char tmp[24];
 | |
| 		if ( strcmp(field,"roundStart")==0 && 
 | |
| 		     val && (val[0]=='0'||val[0]=='1') && val[1]==0 ) 
 | |
| 			sprintf(tmp,"%" UINT32 "",(int32_t)getTimeGlobalNoCore()+0);
 | |
| 			val = tmp;
 | |
| 		}
 | |
| 		*/
 | |
| 
 | |
| 		// add it to a list now
 | |
| 		if ( ! addNewParmToList2 ( parmList ,
 | |
| 					   // HACK! operate on the to-be-added
 | |
| 					   // collrec, if there was an addcoll
 | |
| 					   // reset or restart coll cmd...
 | |
| 					   parmCollnum ,
 | |
| 					   val ,
 | |
| 					   occNum ,
 | |
| 					   m ) )
 | |
| 			return false;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| Parm *Parms::getParmFast2 ( int32_t cgiHash32 ) {
 | |
| 	static HashTableX s_pht;
 | |
| 	static char s_phtBuf[26700];
 | |
| 	static bool s_init = false;
 | |
| 
 | |
| 	if ( ! s_init ) {
 | |
| 		// init hashtable
 | |
| 		s_pht.set ( 4,sizeof(char *),2048,s_phtBuf,26700,
 | |
| 			    false,0,"phttab" );
 | |
| 		// reduce hash collisions:
 | |
| 		s_pht.m_useKeyMagic = true;
 | |
| 		// wtf?
 | |
| 		if ( m_numParms <= 0 ) init();
 | |
| 		if ( m_numParms <= 0 ) { char *xx=NULL;*xx=0; }
 | |
| 		// fill up hashtable
 | |
| 		for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 			// get it
 | |
| 			Parm *parm = &m_parms[i];
 | |
| 			// skip parms that are not for conf or coll lest
 | |
| 			// it bitch that "c" is duplicated...
 | |
| 			if ( parm->m_obj != OBJ_CONF &&
 | |
| 			     parm->m_obj != OBJ_COLL )
 | |
| 				continue;
 | |
| 			// skip comments
 | |
| 			if ( parm->m_type == TYPE_COMMENT ) continue;
 | |
| 			if ( parm->m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 			// skip if no cgi
 | |
| 			if ( ! parm->m_cgi ) continue;
 | |
| 			// get its hash of its cgi
 | |
| 			int32_t ph32 = parm->m_cgiHash;
 | |
| 			// sanity!
 | |
| 			if ( s_pht.isInTable ( &ph32 ) ) {
 | |
| 				// get the dup guy
 | |
| 				Parm *dup = *(Parm **)s_pht.getValue(&ph32);
 | |
| 				// same underlying parm?
 | |
| 				// like for "all spiders on" vs.
 | |
| 				// "all spiders off"?
 | |
| 				if ( dup->m_off == parm->m_off )
 | |
| 					continue;
 | |
| 				// otherwise bitch about it and drop core
 | |
| 				log("parms: dup parm h32=%" INT32 " "
 | |
| 				    "\"%s\" vs \"%s\"",
 | |
| 				    ph32, dup->m_title,parm->m_title);
 | |
| 				char *xx=NULL;*xx=0;
 | |
| 			}
 | |
| 			// add that to hash table
 | |
| 			s_pht.addKey ( &ph32 , &parm );
 | |
| 		}
 | |
| 		// do not do this again
 | |
| 		s_init = true;
 | |
| 	}
 | |
| 
 | |
| 	Parm **pp = (Parm **)s_pht.getValue ( &cgiHash32 );
 | |
| 	if ( ! pp ) return NULL;
 | |
| 	return *pp;
 | |
| }
 | |
| 
 | |
| 
 | |
| Parm *Parms::getParmFast1 ( char *cgi , int32_t *occNum ) {
 | |
| 	// strip off the %" INT32 " for things like 'fe3' for example
 | |
| 	// because that is the occurrence # for parm arrays.
 | |
| 	int32_t clen = gbstrlen(cgi);
 | |
| 
 | |
| 	char *d = NULL;
 | |
| 
 | |
| 	if ( clen > 1 ) {
 | |
| 		d = cgi + clen - 1;
 | |
| 		while ( is_digit(*d) ) d--;
 | |
| 		d++;
 | |
| 	}
 | |
| 
 | |
| 	int32_t h32;
 | |
| 
 | |
| 	// assume not an array
 | |
| 	if ( occNum ) *occNum = -1;
 | |
| 
 | |
| 	if ( d && *d ) {
 | |
| 		if ( occNum ) *occNum = atol(d);
 | |
| 		h32 = hash32 ( cgi , d - cgi );
 | |
| 	}
 | |
| 	else 
 | |
| 		h32 = hash32n ( cgi );
 | |
| 
 | |
| 	Parm *m = getParmFast2 ( h32 );
 | |
| 
 | |
| 	if ( ! m ) return NULL;
 | |
| 
 | |
| 	// the first element does not have a number after it
 | |
| 	if ( m->isArray() && occNum && *occNum == -1 )
 | |
| 		*occNum = 0;
 | |
| 
 | |
| 	return m;
 | |
| }
 | |
| 
 | |
| ////////////
 | |
| //
 | |
| // functions for distributing/syncing parms to/with all hosts
 | |
| //
 | |
| ////////////
 | |
| 
 | |
| class ParmNode {
 | |
| public:
 | |
| 	SafeBuf m_parmList;
 | |
| 	int32_t m_numRequests;
 | |
| 	int32_t m_numReplies;
 | |
| 	int32_t m_numGoodReplies;
 | |
| 	int32_t m_numHostsTotal;
 | |
| 	class ParmNode *m_prevNode;
 | |
| 	class ParmNode *m_nextNode;
 | |
| 	int64_t m_parmId;
 | |
| 	bool m_calledCallback;
 | |
| 	int32_t m_startTime;
 | |
| 	void *m_state;
 | |
| 	void (* m_callback)(void *state);
 | |
| 	bool m_sendToGrunts;
 | |
| 	bool m_sendToProxies;
 | |
| 	int32_t m_hostId; // -1 means send parm update to all hosts
 | |
| 	// . if not -1 then [m_hostId,m_hostId2] is a range
 | |
| 	// . used by main.cpp cmd line cmds like 'gb stop 3-5'
 | |
| 	int32_t m_hostId2; 
 | |
| };
 | |
| 
 | |
| static ParmNode *s_headNode = NULL;
 | |
| static ParmNode *s_tailNode = NULL;
 | |
| static int64_t s_parmId = 0LL;
 | |
| 
 | |
| // . will send the parm update request to each host and retry forever, 
 | |
| //   until dead hosts come back up
 | |
| // . keeps parm update requests in order received
 | |
| // . returns true and sets g_errno on error
 | |
| // . returns false if blocked and will call your callback
 | |
| bool Parms::broadcastParmList ( SafeBuf *parmList ,
 | |
| 				void    *state ,
 | |
| 				void   (* callback)(void *) ,
 | |
| 				bool sendToGrunts ,
 | |
| 				bool sendToProxies ,
 | |
| 				// this is -1 if sending to all hosts
 | |
| 				int32_t hostId ,
 | |
| 				// this is not -1 if its range [hostId,hostId2]
 | |
| 				int32_t hostId2 ) {
 | |
| 
 | |
| 	// empty list?
 | |
| 	if ( parmList->getLength() <= 0 ) return true;
 | |
| 
 | |
| 	// only us? no need for this then. we now do this...
 | |
| 	//if ( g_hostdb.m_numHosts <= 1 ) return true;
 | |
| 
 | |
| 	// make a new parm transmit node
 | |
| 	ParmNode *pn = (ParmNode *)mmalloc ( sizeof(ParmNode) , "parmnode" );
 | |
| 	if ( ! pn ) return true;
 | |
| 	pn->m_parmList.constructor();
 | |
| 
 | |
| 	// update the ticket #. we use this to keep things ordered too.
 | |
| 	// this should never be zero since it starts off at zero.
 | |
| 	s_parmId++;
 | |
| 
 | |
| 	// set it
 | |
| 	pn->m_parmList.stealBuf ( parmList );
 | |
| 	pn->m_numRequests    = 0;
 | |
| 	pn->m_numReplies     = 0;
 | |
| 	pn->m_numGoodReplies = 0;
 | |
| 	pn->m_numHostsTotal  = 0;
 | |
| 	pn->m_prevNode       = NULL;
 | |
| 	pn->m_nextNode       = NULL;
 | |
| 	pn->m_parmId         = s_parmId; // take a ticket
 | |
| 	pn->m_calledCallback = false;
 | |
| 	pn->m_startTime      = getTimeLocal();
 | |
| 	pn->m_state          = state;
 | |
| 	pn->m_callback       = callback;
 | |
| 	pn->m_sendToGrunts   = sendToGrunts;
 | |
| 	pn->m_sendToProxies  = sendToProxies;
 | |
| 	pn->m_hostId         = hostId;
 | |
| 	pn->m_hostId2        = hostId2; // a range? then not -1 here.
 | |
| 
 | |
| 	// store it ordered in our linked list of parm transmit nodes
 | |
| 	if ( ! s_tailNode ) {
 | |
| 		s_headNode = pn;
 | |
| 		s_tailNode = pn;
 | |
| 	}
 | |
| 	else {
 | |
| 		// link pn at end of tail
 | |
| 		s_tailNode->m_nextNode = pn;
 | |
| 		pn->m_prevNode = s_tailNode;
 | |
| 		// pn becomes the new tail
 | |
| 		s_tailNode = pn;
 | |
| 	}
 | |
| 
 | |
| 	// just the regular proxies, not compression proxies
 | |
| 	if ( pn->m_sendToProxies ) 
 | |
| 		pn->m_numHostsTotal += g_hostdb.getNumProxies();
 | |
| 
 | |
| 	if ( pn->m_sendToGrunts )
 | |
| 		pn->m_numHostsTotal += g_hostdb.getNumGrunts();
 | |
| 
 | |
| 	if ( hostId >= 0 )
 | |
| 		pn->m_numHostsTotal = 1;
 | |
| 
 | |
| 	// pump the parms out to other hosts in the network
 | |
| 	doParmSendingLoop ( );
 | |
| 
 | |
| 	// . if waiting for more replies to come in that should be in soon
 | |
| 	// . doParmSendingLoop() is called when a reply comes in so that
 | |
| 	//   the next requests can be sent out
 | |
| 	//if ( waitingForLiveHostsToReply() ) return false;
 | |
| 
 | |
| 	// all done. how did this happen?
 | |
| 	//return true;
 | |
| 
 | |
| 	// wait for replies
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| void tryToCallCallbacks ( ) {
 | |
| 
 | |
| 	ParmNode *pn = s_headNode;
 | |
| 	int32_t now = getTimeLocal();
 | |
| 
 | |
| 	for ( ; pn ; pn = pn->m_nextNode ) {
 | |
| 		// skip if already called callback
 | |
| 		if ( pn->m_calledCallback ) continue;
 | |
| 		// should we call the callback?
 | |
| 		bool callIt = false;
 | |
| 		if ( pn->m_numReplies >= pn->m_numRequests ) callIt = true;
 | |
| 		// sometimes we don't launch any requests to update parms
 | |
| 		// because we are jammed up. same logic as we use for
 | |
| 		// freeing the pn below.
 | |
| 		if ( pn->m_numGoodReplies < pn->m_numHostsTotal )
 | |
| 			callIt = false;
 | |
| 
 | |
| 		// 8 seconds is enough to wait for all replies to come in.
 | |
| 		// a host might be dead, so we need this here lest the
 | |
| 		// underlying page handler (i.e. sendPageCrawlbot()) never
 | |
| 		// get called if a host is dead. if you are updating some
 | |
| 		// parms you want the page to return.
 | |
| 		if ( now - pn->m_startTime > 8 && 
 | |
| 		     ! callIt &&
 | |
| 		     g_hostdb.hasDeadHost() ) 
 | |
| 			callIt = true;
 | |
| 
 | |
| 		if ( ! callIt ) continue;
 | |
| 		// callback is NULL for updating parms like spiderRoundNum
 | |
| 		// in Spider.cpp
 | |
| 		if ( pn->m_callback ) pn->m_callback ( pn->m_state );
 | |
| 		pn->m_calledCallback = true;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| void gotParmReplyWrapper ( void *state , UdpSlot *slot ) {
 | |
| 
 | |
| 	// don't let upserver free the send buf! that's the ParmNode parmlist
 | |
| 	slot->m_sendBufAlloc = NULL;
 | |
| 
 | |
| 	// in case host table is dynamically modified, go by #
 | |
| 	Host *h = g_hostdb.getHost((int32_t)(PTRTYPE)state);
 | |
| 
 | |
| 	int32_t parmId = h->m_currentParmIdInProgress;
 | |
| 
 | |
| 	ParmNode *pn = h->m_currentNodePtr;
 | |
| 
 | |
| 	// inc this count
 | |
| 	pn->m_numReplies++;
 | |
| 
 | |
| 	// nothing in progress now
 | |
| 	h->m_currentParmIdInProgress = 0;
 | |
| 	h->m_currentNodePtr = NULL;
 | |
| 
 | |
| 	// this is usually timeout on a dead host i guess
 | |
| 	if ( g_errno ) {
 | |
| 		log("parms: got parm update reply from host #%" INT32 ": %s",
 | |
| 		    h->m_hostId,mstrerror(g_errno));
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	// . note it so we do not retry every 1ms!
 | |
| 	// . and only retry on time outs or no mem errors for now...
 | |
| 	// . it'll retry once every 10 seconds using the sleep
 | |
| 	//   wrapper below
 | |
| 	if ( g_errno != EUDPTIMEDOUT &&  g_errno != ENOMEM ) 
 | |
| 		g_errno = 0;
 | |
| 
 | |
| 	if ( g_errno ) {
 | |
| 		// remember error info for retry
 | |
| 		h->m_lastTryError = g_errno;
 | |
| 		h->m_lastTryTime = getTimeLocal();
 | |
| 		// if a host timed out he could be dead, so try to call
 | |
| 		// the callback for this "pn" anyway. if the only hosts we
 | |
| 		// do not have replies for are dead, then we'll call the
 | |
| 		// callback, but still keep trying to send to them.
 | |
| 		tryToCallCallbacks ();
 | |
| 		// try to send more i guess? i think this is right otherwise
 | |
| 		// the callback might not ever get called
 | |
| 		g_parms.doParmSendingLoop();
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	// no error, otherwise
 | |
| 	h->m_lastTryError = 0;
 | |
| 
 | |
| 	// successfully completed
 | |
| 	h->m_lastParmIdCompleted = parmId;
 | |
| 
 | |
| 	// inc this count
 | |
| 	pn->m_numGoodReplies++;
 | |
| 
 | |
| 	// . this will try to call any callback that can be called
 | |
| 	// . for instances, if the "pn" has recvd all the replies
 | |
| 	// . OR if the remaining hosts are "DEAD"
 | |
| 	// . the callback is in the "pn"
 | |
| 	tryToCallCallbacks ();
 | |
| 
 | |
| 	// nuke it?
 | |
| 	if ( pn->m_numGoodReplies >= pn->m_numHostsTotal &&
 | |
| 	     pn->m_numReplies >= pn->m_numRequests ) {
 | |
| 
 | |
| 		// . we must always be the head lest we send out of order.
 | |
| 		// . ParmNodes only destined to a specific hostid are ignored
 | |
| 		//   for this check, only look at those whose m_hostId is -1
 | |
| 		if(pn != s_headNode && pn->m_hostId==-1){
 | |
| 			log("parms: got parm request out of band. not head.");
 | |
| 		}
 | |
| 
 | |
| 		// a new head
 | |
| 		if ( pn == s_headNode ) {
 | |
| 			// sanity
 | |
| 			if ( pn->m_prevNode ) { char *xx=NULL;*xx=0; }
 | |
| 			// the guy after us is the new head
 | |
| 			s_headNode = pn->m_nextNode;
 | |
| 		}
 | |
| 
 | |
| 		// a new tail?
 | |
| 		if ( pn == s_tailNode ) {
 | |
| 			// sanity
 | |
| 			if ( pn->m_nextNode ) { char *xx=NULL;*xx=0; }
 | |
| 			// the guy before us is the new tail
 | |
| 			s_tailNode = pn->m_prevNode;
 | |
| 		}
 | |
| 
 | |
| 		// empty?
 | |
| 		if ( ! s_headNode ) s_tailNode = NULL;
 | |
| 
 | |
| 		// wtf?
 | |
| 		if ( ! pn->m_calledCallback ) { char *xx=NULL;*xx=0; }
 | |
| 
 | |
| 		// do callback first before freeing pn
 | |
| 		//if ( pn->m_callback ) pn->m_callback ( pn->m_state );
 | |
| 
 | |
| 		if ( pn->m_prevNode ) 
 | |
| 			pn->m_prevNode->m_nextNode = pn->m_nextNode;
 | |
| 
 | |
| 		if ( pn->m_nextNode )
 | |
| 			pn->m_nextNode->m_prevNode = pn->m_prevNode;
 | |
| 
 | |
| 		mfree ( pn , sizeof(ParmNode) , "pndfr");
 | |
| 	}
 | |
| 
 | |
| 	// try to send more for him
 | |
| 	g_parms.doParmSendingLoop();
 | |
| }
 | |
| 
 | |
| void parmLoop ( int fd , void *state ) {
 | |
| 	g_parms.doParmSendingLoop();
 | |
| }
 | |
| 
 | |
| static bool s_registeredSleep = false;
 | |
| static bool s_inLoop = false;
 | |
| 
 | |
| // . host #0 runs this to send out parms in the the parm queue (linked list)
 | |
| //   to all other hosts.
 | |
| // . he also sends to himself, if m_sendToGrunts is true
 | |
| bool Parms::doParmSendingLoop ( ) {
 | |
| 
 | |
| 	if ( ! s_headNode ) return true;
 | |
| 
 | |
| 	if ( g_isDumpingRdbFromMain ) return true;
 | |
| 
 | |
| 	if ( s_inLoop ) return true;
 | |
| 
 | |
| 	s_inLoop = true;
 | |
| 
 | |
| 	if ( ! s_registeredSleep &&
 | |
| 	     ! g_loop.registerSleepCallback(2000,NULL,parmLoop,0) )
 | |
| 		log("parms: failed to reg parm loop");
 | |
| 
 | |
| 	// do not re-register
 | |
| 	s_registeredSleep = true;
 | |
| 
 | |
| 	int32_t now = getTimeLocal();
 | |
| 
 | |
| 	// try to send a parm update request to each host
 | |
| 	for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
 | |
| 		// get it
 | |
| 		Host *h = g_hostdb.getHost(i);
 | |
| 		// skip ourselves, host #0. we now send to ourselves
 | |
| 		// so updateParm() will be called on us...
 | |
| 		//if ( h->m_hostId == g_hostdb.m_myHostId ) continue;
 | |
| 		// . if in progress, gotta wait for that to complete
 | |
| 		// . 0 is not a legit parmid, it starts at 1
 | |
| 		if ( h->m_currentParmIdInProgress ) continue;
 | |
| 		// if his last completed parmid is the current he is up-to-date
 | |
| 		if ( h->m_lastParmIdCompleted == s_parmId ) continue;
 | |
| 		// if last try had an error, wait 10 secs i guess
 | |
| 		if ( h->m_lastTryError &&
 | |
| 		     h->m_lastTryError != EUDPTIMEDOUT &&
 | |
| 		     now - h->m_lastTryTime < 10 )
 | |
| 			continue;
 | |
| 		// otherwise get him the next to send
 | |
| 		ParmNode *pn = s_headNode;
 | |
| 		for ( ; pn ; pn = pn->m_nextNode ) {
 | |
| 			// stop when we got a parmnode we have not sent to
 | |
| 			// him yet, we'll send it now
 | |
| 			if ( pn->m_parmId > h->m_lastParmIdCompleted ) break;
 | |
| 		}
 | |
| 		// nothing? strange. something is not right.
 | |
| 		if ( ! pn ) { 
 | |
| 			log("parms: pn is null");
 | |
| 			break;
 | |
| 			char *xx=NULL; *xx=0; 
 | |
| 		}
 | |
| 
 | |
| 		// give him a free pass? some parm updates are directed to 
 | |
| 		// a single host, we use this for syncing parms at startup.
 | |
| 		if ( pn->m_hostId >= 0 && 
 | |
| 		     pn->m_hostId2 == -1 && // not a range
 | |
| 		     h->m_hostId != pn->m_hostId ) {
 | |
| 			// assume we sent it to him
 | |
| 			h->m_lastParmIdCompleted = pn->m_parmId;
 | |
| 			h->m_currentNodePtr = NULL;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		// range? if not in range, give free pass
 | |
| 		if ( pn->m_hostId >= 0 && 
 | |
| 		     pn->m_hostId2 >= 0 &&
 | |
| 		     ( h->m_hostId < pn->m_hostId ||
 | |
| 		       h->m_hostId > pn->m_hostId2 ) ) {
 | |
| 			// assume we sent it to him
 | |
| 			h->m_lastParmIdCompleted = pn->m_parmId;
 | |
| 			h->m_currentNodePtr = NULL;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 			
 | |
| 		// force completion if we should NOT send to him
 | |
| 		if ( (h->isProxy() && ! pn->m_sendToProxies) ||
 | |
| 		     (h->isGrunt() && ! pn->m_sendToGrunts ) ) {
 | |
| 			h->m_lastParmIdCompleted = pn->m_parmId;
 | |
| 			h->m_currentNodePtr = NULL;
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		// debug log
 | |
| 		log(LOG_INFO,"parms: sending parm request id %i "
 | |
| 		    "to hostid %" INT32 "",(int)pn->m_parmId,h->m_hostId);
 | |
| 
 | |
| 		// count it
 | |
| 		pn->m_numRequests++;
 | |
| 		// ok, he's available
 | |
| 		if ( ! g_udpServer.sendRequest ( pn->m_parmList.getBufStart(),
 | |
| 						 pn->m_parmList.length() ,
 | |
| 						 // a new msgtype
 | |
| 						 0x3f,
 | |
| 						 h->m_ip, // ip
 | |
| 						 h->m_port, // port
 | |
| 						 h->m_hostId ,
 | |
| 						 NULL, // retslot
 | |
| 						 (void *)(PTRTYPE)h->m_hostId , // state
 | |
| 						 gotParmReplyWrapper ,
 | |
| 						 30 , // timeout secs
 | |
| 						 -1 , // backoff
 | |
| 						 -1 , // maxwait
 | |
| 						 NULL , // replybuf
 | |
| 						 0 , // replybufmaxsize
 | |
| 						 0 ) ) { // niceness
 | |
| 			log("parms: failed to send: %s",mstrerror(g_errno));
 | |
| 			continue;
 | |
| 		}
 | |
| 		// flag this
 | |
| 		h->m_currentParmIdInProgress = pn->m_parmId;
 | |
| 		h->m_currentNodePtr = pn;
 | |
| 	}
 | |
| 
 | |
| 	s_inLoop = false;
 | |
| 
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| void handleRequest3fLoop ( void *weArg ) ;
 | |
| 
 | |
| void handleRequest3fLoop2 ( void *state , UdpSlot *slot ) {
 | |
| 	handleRequest3fLoop(state);
 | |
| }
 | |
| 
 | |
| // if a tree is saving while we are trying to delete a collnum (or reset)
 | |
| // then the call to updateParm() below returns false and we must re-call
 | |
| // in this sleep wrapper here
 | |
| void handleRequest3fLoop3 ( int fd , void *state ) {
 | |
| 	g_loop.unregisterSleepCallback(state,handleRequest3fLoop3);
 | |
| 	handleRequest3fLoop(state);	
 | |
| }
 | |
| 
 | |
| // . host #0 is requesting that we update some parms
 | |
| void handleRequest3fLoop ( void *weArg ) {
 | |
| 	WaitEntry *we = (WaitEntry *)weArg;
 | |
| 
 | |
| 	CollectionRec *cx = NULL;
 | |
| 
 | |
| 	// process them
 | |
| 	char *p = we->m_parmPtr;
 | |
| 	for ( ; p < we->m_parmEnd ; ) {
 | |
| 		// int16_tcut
 | |
| 		char *rec = p;
 | |
| 		// get size
 | |
| 		int32_t dataSize = *(int32_t *)(rec+sizeof(key96_t));
 | |
| 		int32_t recSize = sizeof(key96_t) + 4 + dataSize;
 | |
| 		// skip it
 | |
| 		p += recSize;
 | |
| 
 | |
| 		// get the actual parm
 | |
| 		Parm *parm = getParmFromParmRec ( rec );
 | |
| 
 | |
| 		if ( ! parm ) {
 | |
| 			int32_t h32 = getHashFromParmRec(rec);
 | |
| 			log("parms: unknown parm sent to us hash=%" INT32 "",h32);
 | |
| 			for ( int32_t i = 0 ; i < g_parms.m_numParms ; i++ ) {
 | |
| 				Parm *x = &g_parms.m_parms[i];
 | |
| 				if ( x->m_cgiHash != h32 ) continue;
 | |
| 				log("parms: unknown parm=%s",x->m_title);
 | |
| 				break;
 | |
| 			}
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		// if was the cmd to save & exit then first send a reply back
 | |
| 		if ( ! we->m_sentReply &&
 | |
| 		     parm->m_cgi && 
 | |
| 		     parm->m_cgi[0] == 's' &&
 | |
| 		     parm->m_cgi[1] == 'a' &&
 | |
| 		     parm->m_cgi[2] == 'v' &&
 | |
| 		     parm->m_cgi[3] == 'e' &&
 | |
| 		     parm->m_cgi[4] == '\0' ) {
 | |
| 			// do not re-do this
 | |
| 			we->m_sentReply = 1;
 | |
| 			// note it
 | |
| 			log("parms: sending early parm update reply");
 | |
| 			// wait for reply to be sent and ack'd
 | |
| 			g_udpServer.sendReply_ass ( NULL,0,
 | |
| 						    NULL,0,
 | |
| 						    we->m_slot,
 | |
| 						    8, // timeout in secs
 | |
| 						    // come back here when done
 | |
| 						    we ,
 | |
| 						    handleRequest3fLoop2 );
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 
 | |
| 		// . determine if it alters the url filters
 | |
| 		// . if those were changed we have to nuke doledb and
 | |
| 		//   waiting tree in Spider.cpp and rebuild them!
 | |
| 		if ( parm->m_flags & PF_REBUILDURLFILTERS )
 | |
| 			we->m_doRebuilds = true;
 | |
| 
 | |
| 		if ( parm->m_flags & PF_REBUILDPROXYTABLE )
 | |
| 			we->m_doProxyRebuild = true;
 | |
| 
 | |
| 		if ( parm->m_flags & PF_REBUILDACTIVELIST )
 | |
| 			we->m_rebuildActiveList = true;
 | |
| 
 | |
| 		// get collnum i guess
 | |
| 		if ( parm->m_type != TYPE_CMD )
 | |
| 			we->m_collnum = getCollnumFromParmRec ( rec );
 | |
| 
 | |
| 		// see if our spider round changes
 | |
| 		int32_t oldRound; 
 | |
| 		if ( we->m_collnum >= 0 && ! cx ) {
 | |
| 			cx = g_collectiondb.getRec ( we->m_collnum );
 | |
| 			// i guess coll might gotten deleted! so check cx
 | |
| 			if ( cx ) oldRound = cx->m_spiderRoundNum;
 | |
| 		}
 | |
| 
 | |
| 		// . this returns false if blocked, returns true and sets
 | |
| 		//   g_errno on error
 | |
| 		// . it'll block if trying to delete a coll when the tree
 | |
| 		//   is saving or something (CommandDeleteColl())
 | |
| 		if ( ! g_parms.updateParm ( rec , we ) ) {
 | |
| 			////////////
 | |
| 			//
 | |
| 			// . it blocked! it will call we->m_callback when done
 | |
| 			// . we must re-call
 | |
| 			// . try again in 100ms
 | |
| 			//
 | |
| 			////////////
 | |
| 			if(!g_loop.registerSleepCallback(100, 
 | |
| 							 we ,
 | |
| 							 handleRequest3fLoop3,
 | |
| 							 0 ) ){// niceness
 | |
| 				log("parms: failed to reg sleeper");
 | |
| 				return;
 | |
| 			}
 | |
| 			log("parms: updateParm blocked. waiting.");
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 		if ( cx && oldRound != cx->m_spiderRoundNum )
 | |
| 			we->m_updatedRound = true;
 | |
| 
 | |
| 		// do the next parm
 | |
| 		we->m_parmPtr = p;
 | |
| 
 | |
| 		// error?
 | |
| 		if ( ! g_errno ) continue;
 | |
| 		// this could mean failed to add coll b/c out of disk or
 | |
| 		// something else that is bad
 | |
| 		we->m_errno = g_errno;
 | |
| 	}
 | |
| 
 | |
| 	// one last thing... kinda hacky. if we change certain spidering parms
 | |
| 	// we have to do a couple rebuilds.
 | |
| 
 | |
| 	// reset page round counts
 | |
| 	if ( we->m_updatedRound && cx ) {
 | |
| 		// Spider.cpp will reset the *ThisRound page counts and
 | |
| 		// the sent notification flag
 | |
| 		spiderRoundIncremented ( cx );
 | |
| 	}
 | |
| 
 | |
| 	// basically resetting the spider here...
 | |
| 	if ( we->m_doRebuilds && cx ) {
 | |
| 		// . this tells Spider.cpp to rebuild the spider queues
 | |
| 		// . this is NULL if spider stuff never initialized yet,
 | |
| 		//   like if you just added the collection
 | |
| 		if ( cx->m_spiderColl )
 | |
| 			cx->m_spiderColl->m_waitingTreeNeedsRebuild = true;
 | |
| 		// . assume we have urls ready to spider too
 | |
| 		// . no, because if they change the filters and there are
 | |
| 		//   still no urls to spider i don't want to get another
 | |
| 		//   email alert!!
 | |
| 		//cr->m_localCrawlInfo .m_hasUrlsReadyToSpider = true;
 | |
| 		//cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider = true;
 | |
| 		// . reconstruct the url filters if we were a custom crawl
 | |
| 		// . this is used to abstract away the complexity of url
 | |
| 		//   filters in favor of simple regular expressions and
 | |
| 		//   substring matching for diffbot
 | |
| 		cx->rebuildUrlFilters();
 | |
| 	}
 | |
| 
 | |
| 	if ( we->m_rebuildActiveList && cx ) 
 | |
| 		g_spiderLoop.m_activeListValid = false;
 | |
| 
 | |
| 	// if user changed the list of proxy ips rebuild the binary
 | |
| 	// array representation of the proxy ips we have
 | |
| 	if ( we->m_doProxyRebuild )
 | |
| 		buildProxyTable();
 | |
| 		
 | |
| 	// note it
 | |
| 	if ( ! we->m_sentReply )
 | |
| 		log("parms: sending parm update reply");
 | |
| 
 | |
| 	// send back reply now. empty reply for the most part
 | |
| 	if ( we->m_errno && ! we->m_sentReply )
 | |
| 		g_udpServer.sendErrorReply ( we->m_slot,we->m_errno,0 );
 | |
| 	else if ( ! we->m_sentReply )
 | |
| 		g_udpServer.sendReply_ass ( NULL,0,NULL,0,we->m_slot);
 | |
| 	// all done
 | |
| 	mfree ( we , sizeof(WaitEntry) , "weparm" );
 | |
| 	return;
 | |
| }
 | |
| 
 | |
| // . host #0 is requesting that we update some parms
 | |
| // . the readbuf in the request is the list of the parms
 | |
| void handleRequest3f ( UdpSlot *slot , int32_t niceness ) {
 | |
| 
 | |
| 	// sending to host #0 is not right...
 | |
| 	//if ( g_hostdb.m_hostId == 0 ) { char *xx=NULL;*xx=0; }
 | |
| 
 | |
| 	char *parmRecs = slot->m_readBuf;
 | |
| 	char *parmEnd  = parmRecs + slot->m_readBufSize;
 | |
| 
 | |
| 	log("parms: got parm update request. size=%" INT32 ".",
 | |
| 	    (int32_t)(parmEnd-parmRecs));
 | |
| 
 | |
| 	// make a new waiting entry
 | |
| 	WaitEntry *we ;
 | |
| 	we = (WaitEntry *) mmalloc ( sizeof(WaitEntry),"weparm");
 | |
| 	if ( ! we ) {
 | |
| 		g_udpServer.sendErrorReply(slot,g_errno,60);
 | |
| 		return;
 | |
| 	}
 | |
| 	we->m_slot = slot;
 | |
| 	we->m_callback = handleRequest3fLoop;
 | |
| 	we->m_parmPtr = parmRecs;
 | |
| 	we->m_parmEnd = parmEnd;
 | |
| 	we->m_errno = 0;
 | |
| 	we->m_doRebuilds = false;
 | |
| 	we->m_rebuildActiveList = false;
 | |
| 	we->m_updatedRound = false;
 | |
| 	we->m_doProxyRebuild = false;
 | |
| 	we->m_collnum = -1;
 | |
| 	we->m_sentReply = 0;
 | |
| 
 | |
| 	handleRequest3fLoop ( we );
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| ////
 | |
| //
 | |
| // functions for syncing parms with host #0
 | |
| //
 | |
| ////
 | |
| 
 | |
| // 1. we do not accept any recs into rdbs until in sync with host #0
 | |
| // 2. at startup we send the hash of all parms for each collrec and
 | |
| //    for g_conf (collnum -1) to host #0, then he will send us all the
 | |
| //    parms for a collrec (or g_conf) if we are out of sync.
 | |
| // 3. when host #0 changes a parm it lets everyone know via broadcastParmList()
 | |
| // 4. only host #0 may initiate parm changes. so don't let that go down!
 | |
| // 5. once in sync a host can drop recs for collnums that are invalid
 | |
| // 6. until in parm sync with host #0 reject adds to collnums we don't 
 | |
| //    have with ETRYAGAIN in Msg4.cpp
 | |
| 
 | |
| 
 | |
| void tryToSyncWrapper ( int fd , void *state ) {
 | |
| 	g_parms.syncParmsWithHost0();
 | |
| }
 | |
| 
 | |
| // host #0 just sends back an empty reply, but it will hit us with
 | |
| // 0x3f parmlist requests. that way it uses the same mechanism and can
 | |
| // guarantee ordering of the parm update requests
 | |
| void gotReplyFromHost0Wrapper ( void *state , UdpSlot *slot ) {
 | |
| 	// ignore his reply unless error?
 | |
| 	if ( g_errno ) {
 | |
| 		log("parms: got error syncing with host 0: %s. Retrying.",
 | |
| 		    mstrerror(g_errno));
 | |
| 		// re-try it!
 | |
| 		g_parms.m_triedToSync = false;
 | |
| 	}
 | |
| 	else {
 | |
| 		log("parms: synced with host #0");
 | |
| 		// do not re-call
 | |
| 		g_loop.unregisterSleepCallback(NULL,tryToSyncWrapper);
 | |
| 	}
 | |
| 
 | |
| 	g_errno = 0;
 | |
| }
 | |
| 
 | |
| // returns false and sets g_errno on error, true otherwise
 | |
| bool Parms::syncParmsWithHost0 ( ) {
 | |
| 
 | |
| 	if ( m_triedToSync ) return true;
 | |
| 
 | |
| 	m_triedToSync = true;
 | |
| 
 | |
| 	m_inSyncWithHost0 = false;
 | |
| 
 | |
| 	// dont sync with ourselves
 | |
| 	if ( g_hostdb.m_hostId == 0 ) {
 | |
| 		m_inSyncWithHost0 = true;
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// only grunts for now can sync, not proxies, so stop if we are proxy
 | |
| 	if ( g_hostdb.m_myHost->m_type != HT_GRUNT ) {
 | |
| 		m_inSyncWithHost0 = true;
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	SafeBuf hashList;
 | |
| 	
 | |
| 	if ( ! makeSyncHashList ( &hashList ) ) return false;
 | |
| 
 | |
| 	// copy for sending
 | |
| 	SafeBuf sendBuf;
 | |
| 	if ( ! sendBuf.safeMemcpy ( &hashList ) ) return false;
 | |
| 	if ( sendBuf.getCapacity() != hashList.length() ){char *xx=NULL;*xx=0;}
 | |
| 	if ( sendBuf.length() != hashList.length()  ){char *xx=NULL;*xx=0;}
 | |
| 
 | |
| 	// allow udpserver to free it
 | |
| 	char *request = sendBuf.getBufStart();
 | |
| 	int32_t  requestLen = sendBuf.length();
 | |
| 	sendBuf.detachBuf();
 | |
| 
 | |
| 	Host *h = g_hostdb.getHost(0);
 | |
| 
 | |
| 	log("parms: trying to sync with host #0");
 | |
| 
 | |
| 	// . send it off. use 3e i guess
 | |
| 	// . host #0 will reply using msg4 really
 | |
| 	// . msg4 guarantees ordering of requests
 | |
| 	// . there will be a record that is CMD_INSYNC so when we get
 | |
| 	//   that we set g_parms.m_inSyncWithHost0 to true
 | |
| 	if ( ! g_udpServer.sendRequest ( request ,//hashList.getBufStart() ,
 | |
| 					 requestLen, //hashList.length() ,
 | |
| 					 0x3e , // msgtype
 | |
| 					 h->m_ip, // ip
 | |
| 					 h->m_port, // port
 | |
| 					 h->m_hostId , // hostid , host #0!!!
 | |
| 					 NULL, // retslot
 | |
| 					 NULL , // state
 | |
| 					 gotReplyFromHost0Wrapper ,
 | |
| 					 99999999 ) ) { // timeout in secs
 | |
| 		log("parms: error syncing with host 0: %s",mstrerror(g_errno));
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	// wait now
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| // . here host #0 is receiving a sync request from another host
 | |
| // . host #0 scans this list of hashes to make sure the requesting host is
 | |
| //   in sync
 | |
| // . host #0 will broadcast parm updates by calling broadcastParmList() which
 | |
| //   uses 0x3f, so this just returns and empty reply on success
 | |
| // . sends CMD "addcoll" and "delcoll" cmd parms as well
 | |
| // . include an "insync" command parm as last parm
 | |
| void handleRequest3e ( UdpSlot *slot , int32_t niceness ) {
 | |
| 
 | |
| 	// right now we must be host #0
 | |
| 	if ( g_hostdb.m_hostId != 0 ) { 
 | |
| 		g_errno = EBADENGINEER; 
 | |
| 	hadError:
 | |
| 		g_udpServer.sendErrorReply(slot,g_errno,60);
 | |
| 		return;
 | |
| 	}
 | |
| 
 | |
| 	//
 | |
| 	// 0. scan our collections and clear a flag
 | |
| 	//
 | |
| 	for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
 | |
| 		// skip if empty
 | |
| 		CollectionRec *cr = g_collectiondb.m_recs[i];
 | |
| 		if ( ! cr ) continue;
 | |
| 		// clear flag
 | |
| 		cr->m_hackFlag = 0;
 | |
| 	}
 | |
| 
 | |
| 	Host *host = slot->m_host;
 | |
| 	int32_t hostId = -1;
 | |
| 	if ( host ) hostId = host->m_hostId;
 | |
| 
 | |
| 	SafeBuf replyBuf;
 | |
| 
 | |
| 	//
 | |
| 	// 1. update parms on collections we both have
 | |
| 	// 2. tell him to delete collections we do not have but he does
 | |
| 	//
 | |
| 	SafeBuf tmp;
 | |
| 	char *p = slot->m_readBuf;
 | |
| 	char *pend = p + slot->m_readBufSize;
 | |
| 	for ( ; p < pend ; ) {
 | |
| 		// get collnum
 | |
| 		collnum_t c = *(collnum_t *)p;
 | |
| 		p += sizeof(collnum_t);
 | |
| 		// then coll NAME hash
 | |
| 		uint32_t collNameHash32 = *(int32_t *)p;
 | |
| 		p += 4;
 | |
| 		// sanity check. -1 means g_conf. i guess.
 | |
| 		if ( c < -1 ) { char *xx=NULL;*xx=0; }
 | |
| 		// and parm hash
 | |
| 		int64_t h64 = *(int64_t *)p;
 | |
| 		p += 8;
 | |
| 		// if we being host #0 do not have this collnum tell 
 | |
| 		// him to delete it!
 | |
| 		CollectionRec *cr = NULL;
 | |
| 		if ( c >= 0 ) cr = g_collectiondb.getRec ( c );
 | |
| 
 | |
| 		// if collection names are different delete it
 | |
| 		if ( cr && collNameHash32 != hash32n ( cr->m_coll ) ) {
 | |
| 			log("sync: host had collnum %i but wrong name, "
 | |
| 			    "name not %s like it should be",(int)c,cr->m_coll);
 | |
| 			cr = NULL;
 | |
| 		}
 | |
| 
 | |
| 		if ( c >= 0 && ! cr ) {
 | |
| 			// note in log
 | |
| 			logf(LOG_INFO,"sync: telling host #%" INT32 " to delete "
 | |
| 			     "collnum %" INT32 "", hostId,(int32_t)c);
 | |
| 			// add the parm rec as a parm cmd
 | |
| 			if (! g_parms.addNewParmToList1( &replyBuf,
 | |
| 							 c,
 | |
| 							 NULL,
 | |
| 							 -1,
 | |
| 							 "delete"))
 | |
| 				goto hadError;
 | |
| 			// ok, get next collection hash
 | |
| 			continue;
 | |
| 		}
 | |
| 		// set our hack flag so we know he has this collection
 | |
| 		if ( cr ) cr->m_hackFlag = 1;
 | |
| 		// get our parmlist for that collnum
 | |
| 		tmp.reset();
 | |
| 		// c is -1 for g_conf
 | |
| 		if ( ! g_parms.addAllParmsToList ( &tmp, c ) ) goto hadError;
 | |
| 		// get checksum of that
 | |
| 		int64_t m64 = hash64 ( tmp.getBufStart(),tmp.length() );
 | |
| 		// if match, keep chugging, that's in sync
 | |
| 		if ( h64 == m64 ) continue;
 | |
| 		// note in log
 | |
| 		logf(LOG_INFO,"sync: sending all parms for collnum %" INT32 " "
 | |
| 		     "to host #%" INT32 "", (int32_t)c, hostId);
 | |
| 		// otherwise, send him the list
 | |
| 		if ( ! replyBuf.safeMemcpy ( &tmp ) ) goto hadError;
 | |
| 	}
 | |
| 
 | |
| 	//
 | |
| 	// 3. now if he's missing one of our collections tell him to add it
 | |
| 	//
 | |
| 	for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
 | |
| 		// skip if empty
 | |
| 		CollectionRec *cr = g_collectiondb.m_recs[i];
 | |
| 		if ( ! cr ) continue;
 | |
| 		// clear flag
 | |
| 		if ( cr->m_hackFlag ) continue;
 | |
| 		//char *cmdStr = "addColl";
 | |
| 		// now use lowercase, not camelcase
 | |
| 		char *cmdStr = "addcoll";
 | |
| 		if ( cr->m_isCustomCrawl == 1 ) cmdStr = "addCrawl";
 | |
| 		if ( cr->m_isCustomCrawl == 2 ) cmdStr = "addBulk";
 | |
| 		// note in log
 | |
| 		logf(LOG_INFO,"sync: telling host #%" INT32 " to add "
 | |
| 		     "collnum %" INT32 " coll=%s", hostId,(int32_t)cr->m_collnum,
 | |
| 		     cr->m_coll);
 | |
| 		// add the parm rec as a parm cmd
 | |
| 		if ( ! g_parms.addNewParmToList1 ( &replyBuf,
 | |
| 						   (collnum_t)i,
 | |
| 						   cr->m_coll, // parm val
 | |
| 						   -1,
 | |
| 						   cmdStr ) )
 | |
| 			goto hadError;
 | |
| 		// and the parmlist for it
 | |
| 		if (!g_parms.addAllParmsToList (&replyBuf, i ) ) goto hadError;
 | |
| 	}
 | |
| 
 | |
| 	// . final parm is the in sync stamp of approval which will set
 | |
| 	//   g_parms.m_inSyncWithHost0 to true. CommandInSync()
 | |
| 	// .  use -1 for collnum for this cmd
 | |
| 	if ( ! g_parms.addNewParmToList1 ( &replyBuf,-1,NULL,-1,"insync"))
 | |
| 		goto hadError;
 | |
| 
 | |
| 	// this should at least have the in sync command
 | |
| 	log("parms: sending %" INT32 " bytes of parms to sync to host #%" INT32 "",
 | |
| 	    replyBuf.length(),hostId);
 | |
| 
 | |
| 	// . use the broadcast call here so things keep their order!
 | |
| 	// . we do not need a callback when they have been completely
 | |
| 	//   broadcasted to all hosts so use NULL for that
 | |
| 	// . crap, we only want to send this to host #x ...
 | |
| 	g_parms.broadcastParmList ( &replyBuf , NULL , NULL , 
 | |
| 				    true , // sendToGrunts?
 | |
| 				    false ,  // sendToProxies?
 | |
| 				    hostId );
 | |
| 
 | |
| 	// but do send back an empty reply to this 0x3e request
 | |
| 	g_udpServer.sendReply_ass ( NULL,0,NULL,0,slot);
 | |
| 	
 | |
| 	// send that back now
 | |
| 	//g_udpServer.sendReply_ass ( replyBuf.getBufStart() ,
 | |
| 	//			    replyBuf.length() ,
 | |
| 	//			    replyBuf.getBufStart() ,
 | |
| 	//			    replyBuf.getCapacity() ,
 | |
| 	//			    slot );
 | |
| 	// udpserver will free it
 | |
| 	//replyBuf.detachBuf();
 | |
| }
 | |
| 
 | |
| 
 | |
| // get the hash of every collection's parmlist
 | |
| bool Parms::makeSyncHashList ( SafeBuf *hashList ) {
 | |
| 	SafeBuf tmp;
 | |
| 
 | |
| 	// first do g_conf, collnum -1!
 | |
| 	for ( int32_t i = -1 ; i < g_collectiondb.m_numRecs ; i++ ) {
 | |
| 		// shortcut
 | |
| 		CollectionRec *cr = NULL;
 | |
| 		if ( i >= 0 ) cr = g_collectiondb.m_recs[i];
 | |
| 		// skip if empty
 | |
| 		if ( i >=0 && ! cr ) continue;
 | |
| 		// clear since last time
 | |
| 		tmp.reset();
 | |
| 		// g_conf? if i is -1 do g_conf
 | |
| 		if ( ! addAllParmsToList ( &tmp , i ) )
 | |
| 			return false;
 | |
| 		// store collnum first as 4 bytes
 | |
| 		if ( ! hashList->safeMemcpy ( &i , sizeof(collnum_t) ) )
 | |
| 			return false;
 | |
| 		// then store the collection name hash, 32 bit hash
 | |
| 		uint32_t collNameHash32 = 0;
 | |
| 		if ( cr ) collNameHash32 = hash32n ( cr->m_coll );
 | |
| 		if ( ! hashList->safeMemcpy ( &collNameHash32, 4 ) )
 | |
| 			return false;
 | |
| 		// hash the parms
 | |
| 		int64_t h64 = hash64 ( tmp.getBufStart(),tmp.length() );
 | |
| 		// and store it
 | |
| 		if ( ! hashList->pushLongLong ( h64 ) )
 | |
| 			return false;
 | |
| 	}
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| int32_t Parm::getNumInArray ( collnum_t collnum ) {
 | |
| 	char *obj = (char *)&g_conf;
 | |
| 	if ( m_obj == OBJ_COLL ) {
 | |
| 		CollectionRec *cr = g_collectiondb.getRec ( collnum );
 | |
| 		if ( ! cr ) return -1;
 | |
| 		obj = (char *)cr;
 | |
| 	}
 | |
| 	// # in array is before it
 | |
| 	return *(int32_t *)(obj+m_off-4);
 | |
| }
 | |
| 
 | |
| // . we use this for syncing parms between hosts
 | |
| // . called by convertAllCollRecsToParmList
 | |
| // . returns false and sets g_errno on error
 | |
| // . "rec" can be CollectionRec or g_conf ptr
 | |
| bool Parms::addAllParmsToList ( SafeBuf *parmList, collnum_t collnum ) {
 | |
| 
 | |
| 	// loop over parms
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 		// get it
 | |
| 		Parm *parm = &m_parms[i];
 | |
| 		// skip comments
 | |
| 		if ( parm->m_type == TYPE_COMMENT ) continue;
 | |
| 		if ( parm->m_type == TYPE_FILEUPLOADBUTTON ) continue;
 | |
| 		// cmds
 | |
| 		if ( parm->m_type == TYPE_CMD ) continue;
 | |
| 		if ( parm->m_type == TYPE_BOOL2 ) continue;
 | |
| 
 | |
| 		// daily merge last started. do not sync this...
 | |
| 		if ( parm->m_type == TYPE_LONG_CONST ) continue;
 | |
| 
 | |
| 		if ( collnum == -1 && parm->m_obj != OBJ_CONF ) continue;
 | |
| 		if ( collnum >=  0 && parm->m_obj != OBJ_COLL ) continue;
 | |
| 		if ( collnum < -1 ) { char *xx=NULL;*xx=0; }
 | |
| 
 | |
| 		// like 'statsdb max cache mem' etc.
 | |
| 		if ( parm->m_flags & PF_NOSYNC ) continue;
 | |
| 
 | |
| 		// sanity, need cgi hash to look up the parm on the 
 | |
| 		// receiving end
 | |
| 		if ( parm->m_cgiHash == 0 ) { 
 | |
| 			log("parms: no cgi for parm %s",parm->m_title);
 | |
| 			char *xx=NULL; *xx=0;
 | |
| 		}
 | |
| 
 | |
| 		int32_t occNum = -1;
 | |
| 		int32_t maxOccNum = 0;
 | |
| 
 | |
| 		if ( parm->isArray() ) {
 | |
| 			maxOccNum = parm->getNumInArray(collnum) ;
 | |
| 			occNum = 0;
 | |
| 		}
 | |
| 
 | |
| 		for ( ; occNum < maxOccNum ; occNum ++ ) {
 | |
| 			// add each occ # to list
 | |
| 			if ( ! addCurrentParmToList2 ( parmList ,
 | |
| 						       collnum ,
 | |
| 						       occNum ,
 | |
| 						       parm ) )
 | |
| 				return false;
 | |
| 			/*
 | |
| 			  //
 | |
| 			  // use this to debug parm list checksums being off
 | |
| 			  //
 | |
| 			int64_t h64 ;
 | |
| 			h64 = hash64 ( parmList->getBufStart(),
 | |
| 				       parmList->length() );
 | |
| 			// note it for debugging hash
 | |
| 			SafeBuf xb;
 | |
| 			parm->printVal ( &xb ,collnum,occNum);
 | |
| 			log("parms: adding (h=%" XINT64 ") parm %s = %s",
 | |
| 			    h64,parm->m_title,xb.getBufStart());
 | |
| 			*/
 | |
| 		}
 | |
| 
 | |
| 	}
 | |
| 	return true;
 | |
| }	
 | |
| 
 | |
| void resetImportLoopFlag () ;
 | |
| 
 | |
| // . this adds the key if not a cmd key to parmdb rdbtree
 | |
| // . this executes cmds
 | |
| // . this updates the CollectionRec which may disappear later and be fully
 | |
| //   replaced by Parmdb, just an RdbTree really.
 | |
| // . returns false if blocked
 | |
| // . returns true and sets g_errno on error
 | |
| bool Parms::updateParm ( char *rec , WaitEntry *we ) {
 | |
| 
 | |
| 	collnum_t collnum = getCollnumFromParmRec ( rec );
 | |
| 
 | |
| 	g_errno = 0;
 | |
| 
 | |
| 	Parm *parm = getParmFromParmRec ( rec );
 | |
| 
 | |
| 	if ( ! parm ) {
 | |
| 		log("parmdb: could not find parm for rec");
 | |
| 		g_errno = EBADENGINEER;
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	// cmd to execute?
 | |
| 	if ( parm->m_type == TYPE_CMD ||
 | |
| 	     // sitelist is a safebuf but it requires special deduping
 | |
| 	     // logic to update it so it uses CommandUpdateSiteList() to
 | |
| 	     // do the updating
 | |
| 	     parm->m_func ) {
 | |
| 		// all parm rec data for TYPE_CMD should be ascii/utf8 chars
 | |
| 		// and should be \0 terminated
 | |
| 		char *data = getDataFromParmRec ( rec );
 | |
| 		int32_t dataSize = getDataSizeFromParmRec ( rec );
 | |
| 		if ( dataSize == 0 ) data = NULL;
 | |
| 		log("parmdb: running function for "
 | |
| 		    "parm \"%s\" (collnum=%" INT32 ") args=\"%s\""
 | |
| 		    , parm->m_title
 | |
| 		    , (int32_t)collnum
 | |
| 		    , data 
 | |
| 		    );
 | |
| 
 | |
| 		// sets g_errno on error
 | |
| 		if ( parm->m_func ) {
 | |
| 			parm->m_func ( rec );
 | |
| 			return true;
 | |
| 		}
 | |
| 
 | |
| 		// fix core from using "roundstart=1" on non-existent coll
 | |
| 		if ( ! parm->m_func2 ) {
 | |
| 			return true;
 | |
| 		}
 | |
| 
 | |
| 		// . returns true and sets g_errno on error
 | |
| 		// . returns false if blocked
 | |
| 		// . this is for CommandDeleteColl() and CommandResetColl()
 | |
| 		if ( parm->m_func2 ( rec , we ) ) return true;
 | |
| 
 | |
| 		// . it did not complete.
 | |
| 		// . we need to re-call it using sleep wrapper above
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	// "cr" will remain null when updating g_conf and collnum -1
 | |
| 	CollectionRec *cr = NULL;
 | |
| 	if ( collnum >= 0 ) {
 | |
| 		cr = g_collectiondb.getRec ( collnum );
 | |
| 		if ( ! cr ) {
 | |
| 			char *ps = "unknown parm";
 | |
| 			if ( parm ) ps = parm->m_title;
 | |
| 			log("parmdb: invalid collnum %" INT32 " for parm \"%s\"",
 | |
| 			    (int32_t)collnum,ps);
 | |
| 			g_errno = ENOCOLLREC;
 | |
| 			return true;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// what are we updating?
 | |
| 	void *base = NULL;
 | |
| 
 | |
| 	// we might have a collnum specified even if parm is global,
 | |
| 	// maybe there are some collection/local parms specified as well
 | |
| 	// that that collnum applies to
 | |
| 	if ( parm->m_obj == OBJ_COLL ) base = cr;
 | |
| 	else                           base = &g_conf;
 | |
| 
 | |
| 	if ( ! base ) {
 | |
| 		log("parms: no collrec (%" INT32 ") to change parm",(int32_t)collnum);
 | |
| 		g_errno = ENOCOLLREC;
 | |
| 		return true;
 | |
| 	}
 | |
| 		
 | |
| 	int32_t occNum = getOccNumFromParmRec ( rec );
 | |
| 
 | |
| 	// get data
 | |
| 	int32_t dataSize = *(int32_t *)(rec+sizeof(key96_t));
 | |
| 	char *data = rec+sizeof(key96_t)+4;
 | |
| 
 | |
| 	// point to where to copy the data into collrect
 | |
| 	char *dst = (char *)base + parm->m_off;
 | |
| 	// point to count in case it is an array
 | |
| 	int32_t *countPtr = NULL;
 | |
| 	// array?
 | |
| 	if ( parm->isArray() ) {
 | |
| 		if ( occNum < 0 ) {
 | |
| 			log("parms: bad occnum for %s",parm->m_title);
 | |
| 			return false;
 | |
| 		}
 | |
| 		// point to count in case it is an array
 | |
| 		countPtr = (int32_t *)(dst - 4);
 | |
| 		// now point "dst" to the occNum-th element
 | |
| 		dst += parm->m_size * occNum;
 | |
| 	}
 | |
| 
 | |
| 	//
 | |
| 	// compare parm to see if it changed value
 | |
| 	//
 | |
| 	SafeBuf val1;
 | |
| 	parm->printVal ( &val1 , collnum , occNum );
 | |
| 
 | |
| 	// if parm is a safebuf...
 | |
| 	if ( parm->m_type == TYPE_SAFEBUF ) {
 | |
| 		// point to it
 | |
| 		SafeBuf *sb = (SafeBuf *)dst;
 | |
| 		// nuke it
 | |
| 		sb->purge();
 | |
| 		// require that the \0 be part of the update i guess
 | |
| 		//if ( ! data || dataSize <= 0 ) { char *xx=NULL;*xx=0; }
 | |
| 		// check for \0
 | |
| 		if ( data && dataSize > 0 ) {
 | |
| 			if ( data[dataSize-1] != '\0') { char *xx=NULL;*xx=0;}
 | |
| 			// this means that we can not use string POINTERS as 
 | |
| 			// parms!! don't include \0 as part of length
 | |
| 			sb->safeStrcpy ( data ); // , dataSize );
 | |
| 			// ensure null terminated
 | |
| 			sb->nullTerm();
 | |
| 			sb->setLabel("parm2");
 | |
| 		}
 | |
| 		//return true;
 | |
| 		// sanity
 | |
| 		// we no longer include the \0 in the dataSize...so a dataSize
 | |
| 		// of 0 means empty string...
 | |
| 		//if ( data[dataSize-1] != '\0' ) { char *xx=NULL;*xx=0; }
 | |
| 	}
 | |
| 	else {
 | |
| 		// and copy the data into collrec or g_conf
 | |
| 		gbmemcpy ( dst , data , dataSize );
 | |
| 	}
 | |
| 
 | |
| 	SafeBuf val2;
 | |
| 	parm->printVal ( &val2 , collnum , occNum );
 | |
| 
 | |
| 	// did this parm change value?
 | |
| 	bool changed = true;
 | |
| 	if ( strcmp ( val1.getBufStart() , val2.getBufStart() ) == 0 )
 | |
| 		changed = false;
 | |
| 
 | |
| 	// . update array count if necessary
 | |
| 	// . parm might not have changed value based on what was in there
 | |
| 	//   by default, but for PAGE_FILTERS the default value in the row
 | |
| 	//   for this parm might have been zero! so we gotta update its
 | |
| 	//   "count" in that scenario even though the parm val was unchanged.
 | |
| 	if ( parm->isArray() ) {
 | |
| 		// the int32_t before the array is the # of elements
 | |
| 		int32_t currentCount = *countPtr;
 | |
| 		// update our # elements in our array if this is bigger
 | |
| 		int32_t newCount = occNum + 1;
 | |
| 		bool updateCount = false;
 | |
| 		if ( newCount > currentCount ) updateCount = true;
 | |
| 		// do not update counts if we are url filters
 | |
| 		// and we are currently >= the expression count. we have
 | |
| 		// to have a non-empty expression at the end in order to
 | |
| 		// add the expression. this prevents the empty line from
 | |
| 		// being added!
 | |
| 		if ( parm->m_page == PAGE_FILTERS &&
 | |
| 		     cr->m_regExs[occNum].getLength() == 0 )
 | |
| 			updateCount = false;
 | |
| 		// and for other pages, like master ips, skip if empty!
 | |
| 		// PAGE_PASSWORDS, PAGE_MASTERPASSWORDS, ...
 | |
| 		if ( parm->m_page != PAGE_FILTERS && ! changed )
 | |
| 			updateCount = false;
 | |
| 
 | |
| 		// ok, increment the array count of items in the array
 | |
| 		if ( updateCount )
 | |
| 			*countPtr = newCount;
 | |
| 	}
 | |
| 
 | |
| 	// all done if value was unchanged
 | |
| 	if ( ! changed )
 | |
| 		return true;
 | |
| 
 | |
| 	// show it
 | |
| 	log("parms: updating parm \"%s\" "
 | |
| 	    "(%s[%" INT32 "]) (collnum=%" INT32 ") from \"%s\" -> \"%s\"",
 | |
| 	    parm->m_title,
 | |
| 	    parm->m_cgi,
 | |
| 	    occNum,
 | |
| 	    (int32_t)collnum,
 | |
| 	    val1.getBufStart(),
 | |
| 	    val2.getBufStart());
 | |
| 
 | |
| 	if ( cr ) cr->m_needsSave = true;
 | |
| 
 | |
| 	// HACK #2
 | |
| 	if ( base == cr && dst == (char *)&cr->m_importEnabled )
 | |
| 		resetImportLoopFlag();
 | |
| 
 | |
| 	//
 | |
| 	// HACK
 | |
| 	//
 | |
| 	// special hack. if spidering re-enabled then reset last spider
 | |
| 	// attempt time to 0 to avoid the "has no more urls to spider"
 | |
| 	// msg followed by the reviving url msg.
 | |
| 	if ( base == cr && dst == (char *)&cr->m_spideringEnabled )
 | |
| 		cr->m_localCrawlInfo.m_lastSpiderAttempt = 0;
 | |
| 	if ( base == &g_conf && dst == (char *)&g_conf.m_spideringEnabled ){
 | |
| 		for(int32_t i = 0;i<g_collectiondb.m_numRecs;i++){
 | |
| 			CollectionRec *cr = g_collectiondb.m_recs[i];
 | |
| 			if ( ! cr ) continue;
 | |
| 			cr->m_localCrawlInfo.m_lastSpiderAttempt = 0;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	//
 | |
| 	// if user changed the crawl/process max then reset here so
 | |
| 	// spiders will resume
 | |
| 	// 
 | |
| 	if ( base == cr && 
 | |
| 	     dst == (char *)&cr->m_maxToCrawl &&
 | |
| 	     cr->m_spiderStatus == SP_MAXTOCRAWL ) {
 | |
| 		// reset this for rebuilding of active spider collections
 | |
| 		// so this collection can be in the linked list again
 | |
| 		cr->m_spiderStatus = SP_INPROGRESS;
 | |
| 		// rebuild list of active spider collections then
 | |
| 		g_spiderLoop.m_activeListValid = false;
 | |
| 	}
 | |
| 
 | |
| 	if ( base == cr && 
 | |
| 	     dst == (char *)&cr->m_maxToProcess &&
 | |
| 	     cr->m_spiderStatus == SP_MAXTOPROCESS ) {
 | |
| 		// reset this for rebuilding of active spider collections
 | |
| 		// so this collection can be in the linked list again
 | |
| 		cr->m_spiderStatus = SP_INPROGRESS;
 | |
| 		// rebuild list of active spider collections then
 | |
| 		g_spiderLoop.m_activeListValid = false;
 | |
| 	}
 | |
| 
 | |
| 	if ( base == cr && 
 | |
| 	     dst == (char *)&cr->m_maxCrawlRounds &&
 | |
| 	     cr->m_spiderStatus == SP_MAXROUNDS ) {
 | |
| 		// reset this for rebuilding of active spider collections
 | |
| 		// so this collection can be in the linked list again
 | |
| 		cr->m_spiderStatus = SP_INPROGRESS;
 | |
| 		// rebuild list of active spider collections then
 | |
| 		g_spiderLoop.m_activeListValid = false;
 | |
| 	}
 | |
| 
 | |
| 	//
 | |
| 	// END HACK
 | |
| 	//
 | |
| 
 | |
| 	// all done
 | |
| 	return true;
 | |
| }
 | |
| 
 | |
| bool Parm::printVal ( SafeBuf *sb , collnum_t collnum , int32_t occNum ) {
 | |
| 
 | |
| 	CollectionRec *cr = NULL;
 | |
| 	if ( collnum >= 0 ) cr = g_collectiondb.getRec ( collnum );
 | |
| 
 | |
| 	// no value if no storage record offset
 | |
| 	//if ( m_off < 0 ) return true;
 | |
| 
 | |
| 	char *base;
 | |
| 	if ( m_obj == OBJ_COLL ) base = (char *)cr;
 | |
| 	else                     base = (char *)&g_conf;
 | |
| 
 | |
| 	if ( ! base ) {
 | |
| 		log("parms: no collrec (%" INT32 ") to change parm",(int32_t)collnum);
 | |
| 		g_errno = ENOCOLLREC;
 | |
| 		return true;
 | |
| 	}
 | |
| 		
 | |
| 	// point to where to copy the data into collrect
 | |
| 	char *val = (char *)base + m_off;
 | |
| 
 | |
| 	if ( isArray() && occNum < 0 ) {
 | |
| 		log("parms: bad occnum for %s",m_title);
 | |
| 		return false;
 | |
| 	}
 | |
| 
 | |
| 	// add array index to ptr
 | |
| 	if ( isArray() ) val += m_size * occNum;
 | |
| 
 | |
| 
 | |
| 	if ( m_type == TYPE_SAFEBUF ) {
 | |
| 		// point to it
 | |
| 		SafeBuf *sb2 = (SafeBuf *)val;
 | |
| 		return sb->safePrintf("%s",sb2->getBufStart());
 | |
| 	}
 | |
| 
 | |
| 	if ( m_type == TYPE_STRING || 
 | |
| 	     m_type == TYPE_STRINGBOX || 
 | |
| 	     m_type == TYPE_SAFEBUF ||
 | |
| 	     m_type == TYPE_STRINGNONEMPTY )
 | |
| 		return sb->safePrintf("%s",val);
 | |
| 
 | |
| 	if ( m_type == TYPE_LONG || m_type == TYPE_LONG_CONST ) 
 | |
| 		return sb->safePrintf("%" INT32 "",*(int32_t *)val);
 | |
| 
 | |
| 	if ( m_type == TYPE_DATE ) 
 | |
| 		return sb->safePrintf("%" INT32 "",*(int32_t *)val);
 | |
| 
 | |
| 	if ( m_type == TYPE_DATE2 ) 
 | |
| 		return sb->safePrintf("%" INT32 "",*(int32_t *)val);
 | |
| 
 | |
| 	if ( m_type == TYPE_FLOAT ) 
 | |
| 		return sb->safePrintf("%f",*(float *)val);
 | |
| 
 | |
| 	if ( m_type == TYPE_LONG_LONG ) 
 | |
| 		return sb->safePrintf("%" INT64 "",*(int64_t *)val);
 | |
| 
 | |
| 	if ( m_type == TYPE_CHARPTR ) {
 | |
| 		if ( val ) return sb->safePrintf("%s",val);
 | |
| 		return true;
 | |
| 	}
 | |
| 
 | |
| 	if ( m_type == TYPE_BOOL ||
 | |
| 	     m_type == TYPE_BOOL2 ||
 | |
| 	     m_type == TYPE_CHECKBOX ||
 | |
| 	     m_type == TYPE_PRIORITY2 ||
 | |
| 	     m_type == TYPE_UFP ||
 | |
| 	     m_type == TYPE_CHAR )
 | |
| 		return sb->safePrintf("%hhx",*val);
 | |
| 
 | |
| 	if ( m_type == TYPE_CMD ) 
 | |
| 		return sb->safePrintf("CMD");
 | |
| 
 | |
| 	if ( m_type == TYPE_IP )
 | |
| 		// may print 0.0.0.0
 | |
| 		return sb->safePrintf("%s",iptoa(*(int32_t *)val) );
 | |
| 
 | |
| 	log("parms: missing parm type!!");
 | |
| 
 | |
| 	char *xx=NULL;*xx=0;
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| bool printUrlExpressionExamples ( SafeBuf *sb ) {
 | |
| 
 | |
| 		/*
 | |
| 		CollectionRec *cr = (CollectionRec *)THIS;
 | |
| 		// if testUrl is provided, find in the table
 | |
| 		char testUrl [ 1025 ];
 | |
| 		char *tt = r->getString ( "qatest123" , NULL );
 | |
| 		testUrl[0]='\0';
 | |
| 		if ( tt ) strncpy ( testUrl , tt , 1024 );
 | |
| 		char *tu = testUrl;
 | |
| 		if ( ! tu ) tu = "";
 | |
| 		char matchString[12];
 | |
| 		matchString[0] = '\0';
 | |
| 		if ( testUrl[0] ) {
 | |
| 			Url u;
 | |
| 			u.set ( testUrl , gbstrlen(testUrl) );
 | |
| 			//since we don't know the doc's quality, sfn, or
 | |
| 			//other stuff, just give default values
 | |
| 			int32_t n = cr->getRegExpNum ( &u    , 
 | |
| 						    false ,  // links2gb?
 | |
| 						    false ,  // searchboxToGB
 | |
| 						    false ,  // onsite?
 | |
| 						    -1    ,  // docQuality
 | |
| 						    -1    ,  // hopCount
 | |
| 						    false ,  // siteInDmoz?
 | |
| 						    //-1  ,  // ruleset #
 | |
| 						    -1    ,  // langId
 | |
| 						    -1    ,  // parent priority
 | |
| 						    0     ,  // niceness
 | |
| 						    NULL  ,  // tagRec
 | |
| 						    false ,  // isRSS?
 | |
| 						    false ,  // isPermalink?
 | |
| 						    false ,  // new outlink?
 | |
| 						    -1    , // age
 | |
| 						    NULL  , // LinkInfo
 | |
| 						    NULL  , // parentUrl
 | |
| 						    -1    , // priority
 | |
| 						    false , // isAddUrl
 | |
| 						    false , // parentRSS?
 | |
| 						    false , // parentIsNew?
 | |
| 						    false , // parentIsPermlnk
 | |
| 						    false );// isIndexed?
 | |
| 			if ( n == -1 ) sprintf ( matchString , "default" );
 | |
| 			else           sprintf ( matchString, "%" INT32 "", n+1 );
 | |
| 		}
 | |
| 		// test table
 | |
| 		sb.safePrintf (
 | |
| 			  //"</form><form method=get action=/cgi/14.cgi>"
 | |
| 			  //"<input type=hidden name="
 | |
| 			  "<table width=100%% cellpadding=4 border=1 "
 | |
| 			  "bgcolor=#%s>"
 | |
| 			  "<tr><td colspan=2 bgcolor=#%s><center>"
 | |
| 			  //"<font size=+1>"
 | |
| 			  "<b>"
 | |
| 			  "URL Filters Test</b>"
 | |
| 			  //"</font>"
 | |
| 			  "</td></tr>"
 | |
| 			  "<tr><td colspan=2>"
 | |
| 			  "<font size=1>"
 | |
| 			  "To test your URL filters simply enter a URL into "
 | |
| 			  "this box and submit it. The URL filter line number "
 | |
| 			  "that it matches will be displayed to the right."
 | |
| 			  "</font>"
 | |
| 			  "</td></tr>"
 | |
| 			  "<tr>"
 | |
| 			  "<td><b>Test URL</b></td>"
 | |
| 			  "<td><b>Matching Expression #</b></td>"
 | |
| 			  "</tr>"
 | |
| 			  "<tr>"
 | |
| 			  "<td><input type=text size=55 value=\"%s\" "
 | |
| 			  "name=test> "
 | |
| 			  "<input type=submit name=action value=test></td>"
 | |
| 			  "<td>%s</td></tr></table><br><br>\n" ,
 | |
| 			  LIGHT_BLUE , DARK_BLUE , testUrl , matchString );
 | |
| 		*/
 | |
| 
 | |
| 		sb->safePrintf(
 | |
| 			       "<style>"
 | |
| 			       ".poo { background-color:#%s;}\n"
 | |
| 			       "</style>\n" ,
 | |
| 			       LIGHT_BLUE );
 | |
| 
 | |
| 		sb->safePrintf (
 | |
| 			  "<table %s>"
 | |
| 			  "<tr><td colspan=2><center>"
 | |
| 			  "<b>"
 | |
| 			  "Supported Expressions</b>"
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>default</td>"
 | |
| 			  "<td>Matches every url."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>^http://whatever</td>"
 | |
| 			  "<td>Matches if the url begins with "
 | |
| 			  "<i>http://whatever</i>"
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>$.css</td>"
 | |
| 			  "<td>Matches if the url ends with \".css\"."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>foobar</td>"
 | |
| 			  "<td>Matches if the url CONTAINS <i>foobar</i>."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>tld==uk,jp</td>"
 | |
| 			  "<td>Matches if url's TLD ends in \"uk\" or \"jp\"."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  /*
 | |
| 			  "<tr class=poo><td>doc:quality<40</td>"
 | |
| 			  "<td>Matches if document quality is "
 | |
| 			  "less than 40. Can be used for assigning to spider "
 | |
| 			  "priority.</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>doc:quality<40 && tag:ruleset==22</td>"
 | |
| 			  "<td>Matches if document quality less than 40 and "
 | |
| 			  "belongs to ruleset 22. Only for assigning to "
 | |
| 			  "spider priority.</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td><nobr>"
 | |
| 			  "doc:quality<40 && tag:manualban==1</nobr></td>"
 | |
| 			  "<td>Matches if document quality less than 40 and "
 | |
| 			  "is has a value of \"1\" for its \"manualban\" "
 | |
| 			  "tag.</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>tag:ruleset==33 && doc:quality<40</td>"
 | |
| 			  "<td>Matches if document quality less than 40 and "
 | |
| 			  "belongs to ruleset 33. Only for assigning to "
 | |
| 			  "spider priority or a banned ruleset.</td></tr>"
 | |
| 			  */
 | |
| 
 | |
| 			  "<tr class=poo><td><a name=hopcount></a>"
 | |
| 			  "hopcount<4 && iswww</td>"
 | |
| 			  "<td>Matches if document has a hop count of 4, and "
 | |
| 			  "is a \"www\" url (or domain-only url).</td></tr>"
 | |
| 			  
 | |
| 			  "<tr class=poo><td>hopcount</td>"
 | |
| 			  "<td>All root urls, those that have only a single "
 | |
| 			  "slash for their path, and no cgi parms, have a "
 | |
| 			  "hop count of 0. Also, all RSS urls, ping "
 | |
| 			  "server urls and site roots (as defined in the "
 | |
| 			  "site rules table) have a hop count of 0. Their "
 | |
| 			  "outlinks have a hop count of 1, and the outlinks "
 | |
| 			  "of those outlinks a hop count of 2, etc."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>sitepages</td>"
 | |
| 			  "<td>The number of pages that are currently indexed "
 | |
| 			  "for the subdomain of the URL. "
 | |
| 			  "Used for doing quotas."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 			  // MDW: 7/11/2014 take this out until it works.
 | |
| 			  // problem is that the quota table m_localTable
 | |
| 			  // in Spider.cpp gets reset for each firstIp scan,
 | |
| 			  // and we have a.walmart.com and b.walmart.com
 | |
| 			  // with different first ips even though on same 
 | |
| 			  // domain. perhaps we should use the domain as the
 | |
| 			  // key to getting the firstip for and subdomain.
 | |
| 			  // but out whole selection algo in spider.cpp is
 | |
| 			  // firstIp based, so it scans all the spiderrequests
 | |
| 			  // from a single firstip to get the winner for that
 | |
| 			  // firstip. 
 | |
| 			  
 | |
| 
 | |
| 			  // "<tr class=poo><td>domainpages</td>"
 | |
| 			  // "<td>The number of pages that are currently indexed "
 | |
| 			  // "for the domain of the URL. "
 | |
| 			  // "Used for doing quotas."
 | |
| 			  // "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>siteadds</td>"
 | |
| 			  "<td>The number URLs manually added to the "
 | |
| 			  "subdomain of the URL. Used to gauge a subdomain's "
 | |
| 			  "popularity."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  // taken out for the same reason as domainpages
 | |
| 			  // above was taken out. see expanation up there.
 | |
| 			  // "<tr class=poo><td>domainadds</td>"
 | |
| 			  // "<td>The number URLs manually added to the "
 | |
| 			  // "domain of the URL. Used to gauge a domain's "
 | |
| 			  // "popularity."
 | |
| 			  // "</td></tr>"
 | |
| 
 | |
| 
 | |
| 
 | |
| 			  "<tr class=poo><td>isrss | !isrss</td>"
 | |
| 			  "<td>Matches if document is an RSS feed. Will "
 | |
| 			  "only match this rule if the document has been "
 | |
| 			  "successfully spidered before, because it requires "
 | |
| 			  "downloading the document content to see if it "
 | |
| 			  "truly is an RSS feed.."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>isrssext | !isrssext</td>"
 | |
| 			  "<td>Matches if url ends in .xml .rss or .atom. "
 | |
| 			  "TODO: Or if the link was in an "
 | |
| 			  "alternative link tag."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  //"<tr class=poo><td>!isrss</td>"
 | |
| 			  //"<td>Matches if document is NOT an rss feed."
 | |
| 			  //"</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>ispermalink | !ispermalink</td>"
 | |
| 			  "<td>Matches if document is a permalink. "
 | |
| 			  "When harvesting outlinks we <i>guess</i> if they "
 | |
| 			  "are a permalink by looking at the structure "
 | |
| 			  "of the url.</td></tr>"
 | |
| 
 | |
| 			  //"<tr class=poo><td>!ispermalink</td>"
 | |
| 			  //"<td>Matches if document is NOT a permalink."
 | |
| 			  //"</td></tr>"
 | |
| 
 | |
| 			  /*
 | |
| 			  "<tr class=poo><td>outlink | !outlink</td>"
 | |
| 			  "<td>"
 | |
| 			  "<b>This is true if url being added to spiderdb "
 | |
| 			  "is an outlink from the page being spidered. "
 | |
| 			  "Otherwise, the url being added to spiderdb "
 | |
| 			  "directly represents the page being spidered. It "
 | |
| 			  "is often VERY useful to partition the Spiderdb "
 | |
| 			  "records based on this criteria."
 | |
| 			  "</td></tr>"
 | |
| 			  */
 | |
| 
 | |
| 			  "<tr class=poo><td><nobr>isnewoutlink | !isnewoutlink"
 | |
| 			  "</nobr></td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true since the outlink was not there "
 | |
| 			  "the last time we spidered the page we harvested "
 | |
| 			  "it from."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>hasreply | !hasreply</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if we have tried to spider "
 | |
| 			  "this url, even if we got an error while trying."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>isnew | !isnew</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is the opposite of hasreply above. A url "
 | |
| 			  "is new if it has no spider reply, including "
 | |
| 			  "error replies. So once a url has been attempted to "
 | |
| 			  "be spidered then this will be false even if there "
 | |
| 			  "was any kind of error."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>lastspidertime >= "
 | |
| 			  "<b>{roundstart}</b></td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url's last spidered time "
 | |
| 			  "indicates it was spidered already for this "
 | |
| 			  "current round of spidering. When no more urls "
 | |
| 			  "are available for spidering, then gigablast "
 | |
| 			  "automatically sets {roundstart} to the current "
 | |
| 			  "time so all the urls can be spidered again. This "
 | |
| 			  "is how you do round-based spidering. "
 | |
| 			  "You have to use the respider frequency as well "
 | |
| 			  "to adjust how often you want things respidered."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>urlage</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is the time, in seconds, since a url was first "
 | |
| 			  "added to spiderdb to be spidered. This is "
 | |
| 			  "its discovery date. "
 | |
| 			  "Can use <, >, <=, >=, ==, != comparison operators."
 | |
| 			  "</td></tr>"
 | |
| 			  
 | |
| 
 | |
| 			  //"<tr class=poo><td>!newoutlink</td>"
 | |
| 			  //"<td>Matches if document is NOT a new outlink."
 | |
| 			  //"</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>age</td>"
 | |
| 			  "<td>"
 | |
| 			  "How old is the document <b>in seconds</b>. "
 | |
| 			  "The age is based on the publication date of "
 | |
| 			  "the document, which could also be the "
 | |
| 			  "time that the document was last significantly "
 | |
| 			  "modified. If this date is unknown then the age "
 | |
| 			  "will be -1 and only match the expression "
 | |
| 			  "<i>age==-1</i>. "
 | |
| 			  "When harvesting links, we guess the publication "
 | |
| 			  "date of the oulink by detecting dates contained "
 | |
| 			  "in the url itself, which is popular among some "
 | |
| 			  "forms of permalinks. This allows us to put "
 | |
| 			  "older permalinks into a slower spider queue."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>spiderwaited < 3600</td>"
 | |
| 			  "<td>"
 | |
| 			  "<i>spiderwaited</i> is how many seconds have elapsed "
 | |
| 			  "since the last time "
 | |
| 			  "we tried to spider/download the url. "
 | |
| 			  "The constaint containing <i>spiderwaited</i> will "
 | |
| 			  "fail to be matched if the url has never been "
 | |
| 			  "attempted to be spidered/downloaded before. Therefore, "
 | |
| 			  "it will only ever match urls that have a spider reply "
 | |
| 			  "of some sort, so there is no need to add an additional "
 | |
| 			  "<i>hasreply</i>-based constraint."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 			  "<tr class=poo><td>"
 | |
| 			  "<a name=insitelist>"
 | |
| 			  "insitelist | !insitelist"
 | |
| 			  "</a>"
 | |
| 			  "</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url matches a pattern in "
 | |
| 			  "the list of sites on the <a href=/admin/sites>"
 | |
| 			  "site list</a> page. That site list is useful for "
 | |
| 			  "adding a large number of sites that can not be "
 | |
| 			  "accommodated by the url filters table. Plus "
 | |
| 			  "it is higher performance and easier to use, but "
 | |
| 			  "lacks the url filter table's "
 | |
| 			  "fine level of control."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>"
 | |
| 			  "<a name=isaddurl>"
 | |
| 			  "isaddurl | !isaddurl"
 | |
| 			  "</a>"
 | |
| 			  "</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url was added from the add "
 | |
| 			  "url interface or API."
 | |
| 			  //"This replaces the add url priority "
 | |
| 			  //"parm."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>isinjected | !isinjected</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url was directly "
 | |
| 			  "injected from the "
 | |
| 			  "<a href=/admin/inject>inject page</a> or API."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>isreindex | !isreindex</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url was added from the "
 | |
| 			  "<a href=/admin/reindex>query reindex</a> "
 | |
| 			  "interface. The request does not contain "
 | |
| 			  "a url, but only a docid, that way we can add "
 | |
| 			  "millions of search results very quickly without "
 | |
| 			  "having to lookup each of their urls. You should "
 | |
| 			  "definitely have this if you use the reindexing "
 | |
| 			  "feature. "
 | |
| 			  "You can set max spiders to 0 "
 | |
| 			  "for non "
 | |
| 			  "isreindex requests while you reindex or delete "
 | |
| 			  "the results of a query for extra speed."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>ismanualadd | !ismanualadd</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url was added manually. "
 | |
| 			  "Which means it matches isaddurl, isinjected, "
 | |
| 			  " or isreindex. as opposed to only "
 | |
| 			  "being discovered from the spider. "
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td><nobr>inpingserver | !inpingserver"
 | |
| 			  "</nobr></td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url has an inlink from "
 | |
| 			  "a recognized ping server. Ping server urls are "
 | |
| 			  "hard-coded in Url.cpp. <b><font color=red> "
 | |
| 			  "pingserver urls are assigned a hop count of 0"
 | |
| 			  "</font></b>"
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>isparentrss | !isparentrss</td>"
 | |
| 			  "<td>"
 | |
| 			  "If a parent of the URL was an RSS page "
 | |
| 			  "then this will be matched."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>isparentsitemap | "
 | |
| 			  "!isparentsitemap</td>"
 | |
| 			  "<td>"
 | |
| 			  "If a parent of the URL was a sitemap.xml page "
 | |
| 			  "then this will be matched."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  /*
 | |
| 			  "<tr class=poo><td>parentisnew | !parentisnew</td>"
 | |
| 			  "<td>"
 | |
| 			  "<b>Parent providing this outlink is not currently "
 | |
| 			  "in the index but is trying to be added right now. "
 | |
| 			  "</b>This is a special expression in that "
 | |
| 			  "it only applies to assigning spider priorities "
 | |
| 			  "to outlinks we are harvesting on a page.</b>" 
 | |
| 			  "</td></tr>"
 | |
| 			  */
 | |
| 
 | |
| 			  "<tr class=poo><td>isindexed | !isindexed</td>"
 | |
| 			  "<td>"
 | |
| 			  "This url matches this if in the index already. "
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>errorcount==1</td>"
 | |
| 			  "<td>"
 | |
| 			  "The number of times the url has failed to "
 | |
| 			  "be indexed. 1 means just the last time, two means "
 | |
| 			  "the last two times. etc. Any kind of error parsing "
 | |
| 			  "the document (bad utf8, bad charset, etc.) "
 | |
| 			  "or any HTTP status error, like 404 or "
 | |
| 			  "505 is included in this count, in addition to "
 | |
| 			  "\"temporary\" errors like DNS timeouts."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>errorcode==32880</td>"
 | |
| 			  "<td>"
 | |
| 			  "If the last time it was spidered it had this "
 | |
| 			  "numeric error code. See the error codes in "
 | |
| 			  "Errno.cpp. In this particular example 32880 is "
 | |
| 			  "for EBADURL."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>hastmperror</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the last spider attempt resulted "
 | |
| 			  "in an error like EDNSTIMEDOUT or a similar error, "
 | |
| 			  "usually indicative of a temporary internet "
 | |
| 			  "failure, or local resource failure, like out of "
 | |
| 			  "memory, and should be retried soon. "
 | |
| 			  "Currently: "
 | |
| 			  "dns timed out, "
 | |
| 			  "tcp timed out, "
 | |
| 			  "dns dead, "
 | |
| 			  "network unreachable, "
 | |
| 			  "host unreachable, "
 | |
| 			  "diffbot internal error, "
 | |
| 			  "out of memory."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>percentchangedperday<=5</td>"
 | |
| 			  "<td>"
 | |
| 			  "Looks at how much a url's page content has changed "
 | |
| 			  "between the last two times it was spidered, and "
 | |
| 			  "divides that percentage by the number of days. "
 | |
| 			  "So if a URL's last two downloads were 10 days "
 | |
| 			  "apart and its page content changed 30%% then "
 | |
| 			  "the <i>percentchangedperday</i> will be 3. "
 | |
| 			  "Can use <, >, <=, >=, ==, != comparison operators. "
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>sitenuminlinks>20</td>"
 | |
| 			  "<td>"
 | |
| 			  "How many inlinks does the URL's site have? "
 | |
| 			  "We only count non-spammy inlinks, and at most only "
 | |
| 			  "one inlink per IP address C-Class is counted "
 | |
| 			  "so that a webmaster who owns an entire C-Class "
 | |
| 			  "of IP addresses will only have his inlinks counted "
 | |
| 			  "once."
 | |
| 			  "Can use <, >, <=, >=, ==, != comparison operators. "
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 			  "<tr class=poo><td>numinlinks>20</td>"
 | |
| 			  "<td>"
 | |
| 			  "How many inlinks does the URL itself have? "
 | |
| 			  "We only count one link per unique C-Class IP "
 | |
| 			  "address "
 | |
| 			  "so that a webmaster who owns an entire C-Class "
 | |
| 			  "of IP addresses will only have her inlinks counted "
 | |
| 			  "once."
 | |
| 			  "Can use <, >, <=, >=, ==, != comparison operators. "
 | |
| 			  "This is useful for spidering popular URLs quickly."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 			  "<tr class=poo><td>httpstatus==404</td>"
 | |
| 			  "<td>"
 | |
| 			  "For matching the URL based on the http status "
 | |
| 			  "of its last download. Does not apply to URLs "
 | |
| 			  "that have not yet been successfully downloaded."
 | |
| 			  "Can use <, >, <=, >=, ==, != comparison operators. "
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  /*
 | |
| 			  "<tr class=poo><td>priority==30</td>"
 | |
| 			  "<td>"
 | |
| 			  "<b>If the current priority of the url is 30, then "
 | |
| 			  "it will match this expression. Does not apply "
 | |
| 			  "to outlinks, of course."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>parentpriority==30</td>"
 | |
| 			  "<td>"
 | |
| 			  "<b>This is a special expression in that "
 | |
| 			  "it only applies to assigning spider priorities "
 | |
| 			  "to outlinks we are harvesting on a page.</b> "
 | |
| 			  "Matches if the url being added to spider queue "
 | |
| 			  "is from a parent url in priority queue 30. "
 | |
| 			  "The parent's priority queue is the one it got "
 | |
| 			  "moved into while being spidered. So if it was "
 | |
| 			  "in priority 20, but ended up in 25, then 25 will "
 | |
| 			  "be used when scanning the URL Filters table for "
 | |
| 			  "each of its outlinks. Only applies "
 | |
| 			  "to the FIRST time the url is added to spiderdb. "
 | |
| 			  "Use <i>parentpriority==-3</i> to indicate the "
 | |
| 			  "parent was FILTERED and <i>-2</i> to indicate "
 | |
| 			  "the parent was BANNED. A parentpriority of "
 | |
| 			  "<i>-1</i>"
 | |
| 			  " means that the urls is not a link being added to "
 | |
| 			  "spiderdb but rather a url being spidered."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>inlink==...</td>"
 | |
| 			  "<td>"
 | |
| 			  "If the url has an inlinker which contains the "
 | |
| 			  "given substring, then this rule is matched. "
 | |
| 			  "We use this like <i>inlink=www.weblogs.com/"
 | |
| 			  "int16_tChanges.xml</i> to detect if a page is in "
 | |
| 			  "the ping server or not, and if it is, then we "
 | |
| 			  "assign it to a slower-spidering queue, because "
 | |
| 			  "we can reply on the ping server for updates. Saves "
 | |
| 			  "us from having to spider all the blogspot.com "
 | |
| 			  "subdomains a couple times a day each."
 | |
| 			  "</td></tr>"
 | |
| 			  */
 | |
| 
 | |
| 			  //"NOTE: Until we get the link info to get the doc "
 | |
| 			  //"quality before calling msg8 in Msg16.cpp, we "
 | |
| 			  //"can not involve doc:quality for purposes of "
 | |
| 			  //"assigning a ruleset, unless banning it.</td>"
 | |
| 
 | |
| 			  "<tr class=poo><td><nobr>tld!=com,org,edu"// && "
 | |
| 			  //"doc:quality<70"
 | |
| 			  "</nobr></td>"
 | |
| 			  "<td>Matches if the "
 | |
| 			  "url's TLD does NOT end in \"com\", \"org\" or "
 | |
| 			  "\"edu\". "
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td><nobr>lang==zh_cn,de"
 | |
| 			  "</nobr></td>"
 | |
| 			  "<td>Matches if "
 | |
| 			  "the url's content is in the language \"zh_cn\" or "
 | |
| 			  "\"de\". See table below for supported language "
 | |
| 			  "abbreviations. Used to only keep certain languages "
 | |
| 			  "in the index. This is hacky because the language "
 | |
| 			  "may not be known at spider time, so Gigablast "
 | |
| 			  "will check after downloading the document to "
 | |
| 			  "see if the language <i>spider priority</i> is "
 | |
| 			  "DELETE thereby discarding it.</td></tr>"
 | |
| 			  //"NOTE: Until we move the language "
 | |
| 			  //"detection up before any call to XmlDoc::set1() "
 | |
| 			  //"in Msg16.cpp, we can not use for purposes of "
 | |
| 			  //"assigning a ruleset, unless banning it.</td>"
 | |
| 			  //"</tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td><nobr>lang!=xx,en,de"
 | |
| 			  "</nobr></td>"
 | |
| 			  "<td>Matches if "
 | |
| 			  "the url's content is NOT in the language \"xx\" "
 | |
| 			  "(unknown), \"en\" or \"de\". "
 | |
| 			  "See table below for supported language "
 | |
| 			  "abbreviations.</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td><nobr>parentlang==zh_cn,zh_tw,xx"
 | |
| 			  "</nobr></td>"
 | |
| 			  "<td>Matches if "
 | |
| 			  "the url's referring parent url is primarily in "
 | |
| 			  "this language. Useful for prioritizing spidering "
 | |
| 			  "pages of a certain language."
 | |
| 			  "See table below for supported language "
 | |
| 			  "abbreviations."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  /*
 | |
| 			  "<tr class=poo><td>link:gigablast</td>"
 | |
| 			  "<td>Matches if the document links to gigablast."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>searchbox:gigablast</td>"
 | |
| 			  "<td>Matches if the document has a submit form "
 | |
| 			  "to gigablast."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>site:dmoz</td>"
 | |
| 			  "<td>Matches if the document is directly or "
 | |
| 			  "indirectly in the DMOZ directory."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>tag:spam>X</td>"
 | |
| 			  "<td>Matches if the document's tagdb record "
 | |
| 			  "has a score greater than X for the sitetype, "
 | |
| 			  "'spam' in this case. "
 | |
| 			  "Can use <, >, <=, >=, ==, != comparison operators. "
 | |
| 			  "Other sitetypes include: "
 | |
| 			  "..."
 | |
| 			  "</td></tr>"
 | |
| 			  */
 | |
| 
 | |
| 			  "<tr class=poo><td>iswww | !iswww</td>"
 | |
| 			  "<td>Matches if the url's hostname is www or domain "
 | |
| 			  "only. For example: <i>www.xyz.com</i> would match, "
 | |
| 			  "and so would <i>abc.com</i>, but "
 | |
| 			  "<i>foo.somesite.com</i> would NOT match."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 			  "<tr class=poo><td>isroot | !isroot</td>"
 | |
| 			  "<td>Matches if the URL is a root URL. Like if "
 | |
| 			  "its path is just '/'. Example: http://www.abc.com "
 | |
| 			  "is a root ur but http://www.abc.com/foo is not. "
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 			  "<tr class=poo><td>isonsamedomain | !isonsamedomain</td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url is from the same "
 | |
| 			  "DOMAIN as the page from which it was "
 | |
| 			  "harvested."
 | |
| 			  //"Only effective for links being added from a page "
 | |
| 			  //"being spidered, because this information is "
 | |
| 			  //"not preserved in the titleRec."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 			  "<tr class=poo><td><nobr>"
 | |
| 			  "isonsamesubdomain | !isonsamesubdomain"
 | |
| 			  "</nobr></td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url is from the same "
 | |
| 			  "SUBDOMAIN as the page from which it was "
 | |
| 			  "harvested."
 | |
| 			  //"Only effective for links being added from a page "
 | |
| 			  //"being spidered, because this information is "
 | |
| 			  //"not preserved in the titleRec."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 			  "<tr class=poo><td>ismedia | !ismedia</td>"
 | |
| 			  "<td>"
 | |
| 			  "Does the url have a media or css related "
 | |
| 			  "extension. Like gif, jpg, mpeg, css, etc.? "
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 			  "<tr class=poo><td>tag:<i>tagname</i></td>"
 | |
| 			  "<td>"
 | |
| 			  "This is true if the url is tagged with this "
 | |
| 			  "<i>tagname</i> in the site list. Read about tags "
 | |
| 			  "on the <a href=/admin/settings>"//#examples>"
 | |
| 			  "site list</a> "
 | |
| 			  "page."
 | |
| 			  "</td></tr>"
 | |
| 
 | |
| 
 | |
| 
 | |
| 			  "</td></tr></table><br><br>\n",
 | |
| 			  TABLE_STYLE );
 | |
| 
 | |
| 
 | |
| 		// show the languages you can use
 | |
| 		sb->safePrintf (
 | |
| 			  "<table %s>"
 | |
| 			  "<tr><td colspan=2><center>"
 | |
| 			  "<b>"
 | |
| 			  "Supported Language Abbreviations "
 | |
| 			  "for lang== Filter</b>"
 | |
| 			  "</td></tr>",
 | |
| 			  TABLE_STYLE );
 | |
| 		for ( int32_t i = 0 ; i < 256 ; i++ ) {
 | |
| 			char *lang1 = getLanguageAbbr   ( i );
 | |
| 			char *lang2 = getLanguageString ( i );
 | |
| 			if ( ! lang1 ) continue;
 | |
| 			sb->safePrintf("<tr class=poo>"
 | |
| 				       "<td>%s</td><td>%s</td></tr>\n",
 | |
| 				      lang1,lang2);
 | |
| 		}
 | |
| 		// wrap it up
 | |
| 		sb->safePrintf("</table><br><br>");
 | |
| 		return true;
 | |
| }
 | |
| 
 | |
| // . copy/clone parms from one collrec to another
 | |
| // . returns false and sets g_errno on error
 | |
| // . if doing this after creating a new collection on host #0 we have to call
 | |
| //   syncParmsWithHost0() to get all the shards in sync.
 | |
| bool Parms::cloneCollRec ( char *dstCR , char *srcCR ) {
 | |
| 
 | |
| 	// now set THIS based on the parameters in the xml file
 | |
| 	for ( int32_t i = 0 ; i < m_numParms ; i++ ) {
 | |
| 
 | |
| 		// get it
 | |
| 		Parm *m = &m_parms[i];
 | |
| 		if ( m->m_obj != OBJ_COLL ) continue;
 | |
| 
 | |
| 		//log(LOG_DEBUG, "Parms: %s: parm: %s", filename, m->m_xml);
 | |
| 		// . there are 2 object types, coll recs and g_conf, aka
 | |
| 		//   OBJ_COLL and OBJ_CONF.
 | |
| 
 | |
| 		// skip comments and command
 | |
| 		if ( !(m->m_flags & PF_CLONE) ) continue;
 | |
| 
 | |
| 		// get parm data ptr
 | |
| 		char *src = srcCR + m->m_off;
 | |
| 		char *dst = dstCR + m->m_off;
 | |
| 
 | |
| 		// if not an array use this
 | |
| 		if ( ! m->isArray() ) {
 | |
| 			if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 				SafeBuf *a = (SafeBuf *)src;
 | |
| 				SafeBuf *b = (SafeBuf *)dst;
 | |
| 				b->reset();
 | |
| 				b->safeMemcpy ( a );
 | |
| 				b->nullTerm();
 | |
| 			}
 | |
| 			else {
 | |
| 				// this should work for most types
 | |
| 				gbmemcpy ( dst , src , m->m_size );
 | |
| 			}
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		//
 | |
| 		// arrays only below here
 | |
| 		//
 | |
| 
 | |
| 		// for arrays only
 | |
| 		int32_t *srcNum = (int32_t *)(src-4);
 | |
| 		int32_t *dstNum = (int32_t *)(dst-4);
 | |
| 
 | |
| 		// array can have multiple values
 | |
| 		for ( int32_t j = 0 ; j < *srcNum ; j++ ) {
 | |
| 
 | |
| 			if ( m->m_type == TYPE_SAFEBUF ) {
 | |
| 				SafeBuf *a = (SafeBuf *)src;
 | |
| 				SafeBuf *b = (SafeBuf *)dst;
 | |
| 				b->reset();
 | |
| 				b->safeMemcpy ( a );
 | |
| 				b->nullTerm();
 | |
| 			}
 | |
| 			else {
 | |
| 				// this should work for most types
 | |
| 				gbmemcpy ( dst , src , m->m_size );
 | |
| 			}
 | |
| 
 | |
| 			src += m->m_size;
 | |
| 			dst += m->m_size;
 | |
| 
 | |
| 		}
 | |
| 
 | |
| 		// update # elements in array
 | |
| 		*dstNum = *srcNum;
 | |
| 
 | |
| 	}
 | |
| 	return true;
 | |
| }
 |