integrate diffbot from svn back into git.
This commit is contained in:
parent
9696c7936a
commit
5dc7bd2ab4
CollectionRec.cppCollectionRec.hCollectiondb.cppConf.cppConf.hErrno.cppErrno.hHostdb.cppHostdb.hHttpMime.cppHttpRequest.cppHttpRequest.hHttpServer.cppMakefileMem.cppMsg0.cppPageResults.cppPages.cppParms.cppParms.hProcess.cppRdb.cppRdbTree.cppRdbTree.hSafeBuf.cppSafeBuf.hSearchInput.cppSearchInput.hSpider.cppSpider.hTcpServer.cppTcpSocket.hTitle.cppXmlDoc.cppXmlDoc.hmain.cpp
@ -59,6 +59,10 @@ CollectionRec::CollectionRec() {
|
||||
// *(m_regExs[i]) = '\0';
|
||||
//}
|
||||
m_numRegExs = 0;
|
||||
|
||||
// for diffbot caching the global spider stats
|
||||
reset();
|
||||
|
||||
// add default reg ex if we do not have one
|
||||
fixRec();
|
||||
}
|
||||
@ -74,12 +78,22 @@ void CollectionRec::setToDefaults ( ) {
|
||||
fixRec ();
|
||||
}
|
||||
|
||||
void CollectionRec::reset() {
|
||||
m_localCrawlInfo.reset();
|
||||
m_globalCrawlInfo.reset();
|
||||
m_globalCrawlInfoUpdateTime = 0;
|
||||
m_requests = 0;
|
||||
m_replies = 0;
|
||||
}
|
||||
|
||||
// . load this data from a conf file
|
||||
// . values we do not explicitly have will be taken from "default",
|
||||
// collection config file. if it does not have them then we use
|
||||
// the value we received from call to setToDefaults()
|
||||
// . returns false and sets g_errno on load error
|
||||
bool CollectionRec::load ( char *coll , long i ) {
|
||||
// also reset some counts not included in parms list
|
||||
reset();
|
||||
// before we load, set to defaults in case some are not in xml file
|
||||
g_parms.setToDefault ( (char *)this );
|
||||
// get the filename with that id
|
||||
@ -111,6 +125,47 @@ bool CollectionRec::load ( char *coll , long i ) {
|
||||
// add default reg ex
|
||||
fixRec ();
|
||||
|
||||
//
|
||||
// LOAD the crawlinfo class in the collectionrec for diffbot
|
||||
//
|
||||
if ( g_conf.m_useDiffbot ) {
|
||||
// LOAD LOCAL
|
||||
sprintf ( tmp1 , "%scoll.%s.%li/localcrawlinfo.txt",
|
||||
g_hostdb.m_dir , m_coll , (long)m_collnum );
|
||||
log("coll: loading %s",tmp1);
|
||||
SafeBuf sb;
|
||||
// fillfromfile returns 0 if does not exist, -1 on read error
|
||||
if ( sb.fillFromFile ( tmp1 ) > 0 )
|
||||
sscanf ( sb.getBufStart() ,
|
||||
"indexAttempts:%lli\n"
|
||||
"processAttempts:%lli\n"
|
||||
"downloadAttempts:%lli\n"
|
||||
, &m_localCrawlInfo.m_pageIndexAttempts
|
||||
, &m_localCrawlInfo.m_pageProcessAttempts
|
||||
, &m_localCrawlInfo.m_pageDownloadAttempts
|
||||
);
|
||||
// LOAD GLOBAL
|
||||
sprintf ( tmp1 , "%scoll.%s.%li/globalcrawlinfo.txt",
|
||||
g_hostdb.m_dir , m_coll , (long)m_collnum );
|
||||
log("coll: loading %s",tmp1);
|
||||
sb.reset();
|
||||
if ( sb.fillFromFile ( tmp1 ) > 0 )
|
||||
sscanf ( sb.getBufStart() ,
|
||||
"indexAttempts:%lli\n"
|
||||
"processAttempts:%lli\n"
|
||||
"downloadAttempts:%lli\n"
|
||||
"lastupdate:%lu\n"
|
||||
, &m_globalCrawlInfo.m_pageIndexAttempts
|
||||
, &m_globalCrawlInfo.m_pageProcessAttempts
|
||||
, &m_globalCrawlInfo.m_pageDownloadAttempts
|
||||
, &m_globalCrawlInfoUpdateTime
|
||||
);
|
||||
// ignore errors i guess
|
||||
g_errno = 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// always turn on distributed spider locking because otherwise
|
||||
// we end up calling Msg50 which calls Msg25 for the same root url
|
||||
// at the same time, thereby wasting massive resources. it is also
|
||||
@ -242,6 +297,7 @@ void CollectionRec::fixRec ( ) {
|
||||
|
||||
//strcpy(m_regExs [n],"default");
|
||||
m_regExs[n].set("default");
|
||||
m_regExs[n].nullTerm();
|
||||
m_numRegExs++;
|
||||
|
||||
m_spiderFreqs [n] = 30; // 30 days default
|
||||
@ -281,6 +337,50 @@ bool CollectionRec::save ( ) {
|
||||
if ( ! g_parms.saveToXml ( (char *)this , tmp ) ) return false;
|
||||
// log msg
|
||||
log (LOG_INFO,"db: Saved %s.",tmp);//f.getFilename());
|
||||
|
||||
//
|
||||
// save the crawlinfo class in the collectionrec for diffbot
|
||||
//
|
||||
if ( g_conf.m_useDiffbot ) {
|
||||
// SAVE LOCAL
|
||||
sprintf ( tmp , "%scoll.%s.%li/localcrawlinfo.txt",
|
||||
g_hostdb.m_dir , m_coll , (long)m_collnum );
|
||||
log("coll: saving %s",tmp);
|
||||
SafeBuf sb;
|
||||
sb.safePrintf("indexAttempts:%lli\n"
|
||||
"processAttempts:%lli\n"
|
||||
"downloadAttempts:%lli\n"
|
||||
, m_localCrawlInfo.m_pageIndexAttempts
|
||||
, m_localCrawlInfo.m_pageProcessAttempts
|
||||
, m_localCrawlInfo.m_pageDownloadAttempts
|
||||
);
|
||||
if ( sb.dumpToFile ( tmp ) == -1 ) {
|
||||
log("coll: failed to save file %s : %s",
|
||||
tmp,mstrerror(g_errno));
|
||||
g_errno = 0;
|
||||
}
|
||||
// SAVE GLOBAL
|
||||
sprintf ( tmp , "%scoll.%s.%li/globalcrawlinfo.txt",
|
||||
g_hostdb.m_dir , m_coll , (long)m_collnum );
|
||||
log("coll: saving %s",tmp);
|
||||
sb.reset();
|
||||
sb.safePrintf("indexAttempts:%lli\n"
|
||||
"processAttempts:%lli\n"
|
||||
"downloadAttempts:%lli\n"
|
||||
"lastupdate:%lu\n"
|
||||
, m_globalCrawlInfo.m_pageIndexAttempts
|
||||
, m_globalCrawlInfo.m_pageProcessAttempts
|
||||
, m_globalCrawlInfo.m_pageDownloadAttempts
|
||||
, m_globalCrawlInfoUpdateTime
|
||||
);
|
||||
if ( sb.dumpToFile ( tmp ) == -1 ) {
|
||||
log("coll: failed to save file %s : %s",
|
||||
tmp,mstrerror(g_errno));
|
||||
g_errno = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// do not need a save now
|
||||
m_needsSave = false;
|
||||
return true;
|
||||
|
@ -69,6 +69,15 @@
|
||||
#include "RdbList.h"
|
||||
#include "Rdb.h" // for RdbBase
|
||||
|
||||
// used by diffbot to control spidering per collection
|
||||
class CrawlInfo {
|
||||
public:
|
||||
long long m_pageIndexAttempts;
|
||||
long long m_pageProcessAttempts;
|
||||
long long m_pageDownloadAttempts;
|
||||
void reset() { memset ( this , 0 , sizeof(CrawlInfo) ); };
|
||||
};
|
||||
|
||||
|
||||
class CollectionRec {
|
||||
|
||||
@ -136,6 +145,7 @@ class CollectionRec {
|
||||
bool m_needsSave;
|
||||
|
||||
bool load ( char *coll , long collNum ) ;
|
||||
void reset();
|
||||
|
||||
void fixRec ( );
|
||||
|
||||
@ -355,6 +365,40 @@ class CollectionRec {
|
||||
// priority of urls being retried, usually higher than normal
|
||||
char m_retryPriority;
|
||||
|
||||
// new diffbot parms
|
||||
SafeBuf m_diffbotToken;
|
||||
SafeBuf m_diffbotSeed;
|
||||
SafeBuf m_diffbotApi;
|
||||
SafeBuf m_diffbotApiQueryString;
|
||||
SafeBuf m_diffbotUrlCrawlPattern;
|
||||
SafeBuf m_diffbotUrlProcessPattern;
|
||||
SafeBuf m_diffbotPageProcessPattern;
|
||||
SafeBuf m_diffbotClassify;
|
||||
// format of output. "csv" or "xml" or "json" or null
|
||||
SafeBuf m_diffbotFormat;
|
||||
// what fields to return in the json output: (api dependent)
|
||||
SafeBuf m_diffbotFields;
|
||||
long long m_diffbotMaxToCrawl;
|
||||
long long m_diffbotMaxToProcess;
|
||||
long long m_diffbotCrawlStartTime;
|
||||
long long m_diffbotCrawlEndTime;
|
||||
|
||||
// for testing their regexes etc...
|
||||
char m_isDiffbotTestCrawl;
|
||||
|
||||
// our local crawling stats
|
||||
CrawlInfo m_localCrawlInfo;
|
||||
// total crawling stats summed up from all hosts in network
|
||||
CrawlInfo m_globalCrawlInfo;
|
||||
// last time we computed global crawl info
|
||||
time_t m_globalCrawlInfoUpdateTime;
|
||||
// for counting replies
|
||||
long m_replies;
|
||||
long m_requests;
|
||||
// for storing callbacks waiting in line for freshest crawl info
|
||||
SafeBuf m_callbackQueue;
|
||||
|
||||
|
||||
// . now the url regular expressions
|
||||
// . we chain down the regular expressions
|
||||
// . if a url matches we use that tagdb rec #
|
||||
|
@ -396,28 +396,30 @@ bool Collectiondb::addRec ( char *coll , char *cpc , long cpclen , bool isNew ,
|
||||
// if we are doing a dump from the command line, skip this stuff
|
||||
if ( isDump ) return true;
|
||||
if(isNew) verify = false;
|
||||
|
||||
|
||||
// tell rdbs to add one, too
|
||||
//if ( ! g_indexdb.addColl ( coll, verify ) ) goto hadError;
|
||||
if ( ! g_posdb.addColl ( coll, verify ) ) goto hadError;
|
||||
//if ( ! g_datedb.addColl ( coll, verify ) ) goto hadError;
|
||||
|
||||
|
||||
if ( ! g_titledb.addColl ( coll, verify ) ) goto hadError;
|
||||
//if ( ! g_revdb.addColl ( coll, verify ) ) goto hadError;
|
||||
//if ( ! g_sectiondb.addColl ( coll, verify ) ) goto hadError;
|
||||
if ( ! g_tagdb.addColl ( coll, verify ) ) goto hadError;
|
||||
//if ( ! g_catdb.addColl ( coll, verify ) ) goto hadError;
|
||||
//if ( ! g_checksumdb.addColl ( coll, verify ) ) goto hadError;
|
||||
if ( ! g_spiderdb.addColl ( coll, verify ) ) goto hadError;
|
||||
if ( ! g_doledb.addColl ( coll, verify ) ) goto hadError;
|
||||
//if ( ! g_tfndb.addColl ( coll, verify ) ) goto hadError;
|
||||
if ( ! g_clusterdb.addColl ( coll, verify ) ) goto hadError;
|
||||
if ( ! g_linkdb.addColl ( coll, verify ) ) goto hadError;
|
||||
if ( ! g_spiderdb.addColl ( coll, verify ) ) goto hadError;
|
||||
if ( ! g_doledb.addColl ( coll, verify ) ) goto hadError;
|
||||
|
||||
|
||||
// if first time adding a collrec, initialize the collectionless
|
||||
// rdbs so they call Rdb::addColl() which makes a new RdbBase for them
|
||||
// and stores ptr to that base in CollectionRec::m_bases[]
|
||||
if ( m_numRecsUsed <= 1 ) {
|
||||
if ( m_numRecsUsed == 1 ) {
|
||||
g_statsdb.addColl ( NULL );
|
||||
g_cachedb.addColl ( NULL );
|
||||
g_serpdb.addColl ( NULL );
|
||||
@ -505,12 +507,12 @@ bool Collectiondb::deleteRec ( char *coll , bool deleteTurkdb ) {
|
||||
deleteTurkdb = true;
|
||||
// no spiders can be out. they may be referencing the CollectionRec
|
||||
// in XmlDoc.cpp... quite likely.
|
||||
if ( g_conf.m_spideringEnabled ||
|
||||
g_spiderLoop.m_numSpidersOut > 0 ) {
|
||||
log("admin: Can not delete collection while "
|
||||
"spiders are enabled or active.");
|
||||
return false;
|
||||
}
|
||||
//if ( g_conf.m_spideringEnabled ||
|
||||
// g_spiderLoop.m_numSpidersOut > 0 ) {
|
||||
// log("admin: Can not delete collection while "
|
||||
// "spiders are enabled or active.");
|
||||
// return false;
|
||||
//}
|
||||
// do not allow this if in repair mode
|
||||
if ( g_repairMode > 0 ) {
|
||||
log("admin: Can not delete collection while in repair mode.");
|
||||
@ -531,6 +533,16 @@ bool Collectiondb::deleteRec ( char *coll , bool deleteTurkdb ) {
|
||||
}
|
||||
CollectionRec *cr = m_recs [ collnum ];
|
||||
if ( ! cr ) return log("admin: Collection id problem. Delete failed.");
|
||||
// spiders off
|
||||
if ( cr->m_spiderColl &&
|
||||
cr->m_spiderColl->getTotalOutstandingSpiders() > 0 ) {
|
||||
log("admin: Can not delete collection while "
|
||||
"spiders are oustanding for collection. Turn off "
|
||||
"spiders and wait for them to exit.");
|
||||
return false;
|
||||
}
|
||||
// note it
|
||||
log("coll: deleting coll %s",cr->m_coll);
|
||||
// we need a save
|
||||
m_needsSave = true;
|
||||
// nuke it on disk
|
||||
|
14
Conf.cpp
14
Conf.cpp
@ -156,8 +156,12 @@ bool Conf::init ( char *dir ) { // , long hostId ) {
|
||||
if ( g_conf.m_isLive ) g_conf.m_doConsistencyTesting = false;
|
||||
// and this on
|
||||
g_conf.m_indexDeletes = true;
|
||||
|
||||
// leave it turned off for diffbot since it always needs to be crawling
|
||||
#ifndef DIFFBOT
|
||||
// these off
|
||||
g_conf.m_spideringEnabled = false;
|
||||
#endif
|
||||
// this off
|
||||
g_conf.m_repairingEnabled = false;
|
||||
// make this 1 day for now (in seconds)
|
||||
@ -203,7 +207,15 @@ bool Conf::init ( char *dir ) { // , long hostId ) {
|
||||
// and always keep a decent site quality cache of at least 3M
|
||||
if ( g_conf.m_siteQualityMaxCacheMem < 3000000 )
|
||||
g_conf.m_siteQualityMaxCacheMem = 3000000;
|
||||
|
||||
|
||||
|
||||
m_useDiffbot = false;
|
||||
|
||||
#ifdef DIFFBOT
|
||||
// make sure all collections index into a single unified collection
|
||||
m_useDiffbot = true;
|
||||
#endif
|
||||
|
||||
// HACK: set this now
|
||||
setRootIps();
|
||||
|
||||
|
4
Conf.h
4
Conf.h
@ -183,6 +183,10 @@ class Conf {
|
||||
long m_clusterdbMinFilesToMerge;
|
||||
bool m_clusterdbSaveCache;
|
||||
|
||||
// if this is true, all collections index into the "main" collection
|
||||
// but keep their own spiderdb in their collection.
|
||||
bool m_useDiffbot;
|
||||
|
||||
//bool m_indexEventsOnly;
|
||||
|
||||
// linkdb for storing linking relations
|
||||
|
@ -156,6 +156,9 @@ case EDOCIDCOLLISION : return "DocId collision in titledb";
|
||||
case ESSLERROR : return "SSL error of some kind";
|
||||
case EPERMDENIED : return "Permission denied";
|
||||
case ENOFUNDS : return "Not enough funds in account";
|
||||
case EDIFFBOTINTERNALERROR: return "Diffbot internal error";
|
||||
case EDIFFBOTMIMEERROR: return "Diffbot mime error";
|
||||
case EDIFFBOTBADHTTPSTATUS: return "Diffbot reply bad http status";
|
||||
}
|
||||
// if the remote error bit is clear it must be a regulare errno
|
||||
//if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum );
|
||||
|
5
Errno.h
5
Errno.h
@ -159,6 +159,9 @@ enum {
|
||||
EDOCIDCOLLISION ,
|
||||
ESSLERROR ,
|
||||
EPERMDENIED ,
|
||||
ENOFUNDS
|
||||
ENOFUNDS ,
|
||||
EDIFFBOTINTERNALERROR,
|
||||
EDIFFBOTMIMEERROR,
|
||||
EDIFFBOTBADHTTPSTATUS
|
||||
};
|
||||
#endif
|
||||
|
16
Hostdb.cpp
16
Hostdb.cpp
@ -1434,6 +1434,22 @@ unsigned long Hostdb::makeGroupMask ( long numGroups ) {
|
||||
return makeGroupId ( numGroups - 1 , numGroups );
|
||||
}
|
||||
|
||||
// return first alive host in a group/shard
|
||||
Host *Hostdb::getLiveHostInGroup ( long groupId ) {
|
||||
Host *group = getGroup ( groupId );
|
||||
Host *live = NULL;
|
||||
for ( long i = 0 ; i < m_numHostsPerGroup ; i++ ) {
|
||||
// get it
|
||||
Host *h = &group[i];
|
||||
// skip if dead
|
||||
if ( isDead(h->m_hostId) ) continue;
|
||||
// return it if alive
|
||||
return h;
|
||||
}
|
||||
// return first one if all dead
|
||||
return &group[0];
|
||||
}
|
||||
|
||||
// . get the Hosts in group with "groupId"
|
||||
Host *Hostdb::getGroup ( unsigned long groupId , long *numHosts ) {
|
||||
// set hosts per group
|
||||
|
2
Hostdb.h
2
Hostdb.h
@ -354,6 +354,8 @@ class Hostdb {
|
||||
|
||||
long long getNumGlobalEvents ( );
|
||||
|
||||
Host *getLiveHostInGroup ( long groupId );
|
||||
|
||||
// . returns false if blocks and will call your callback later
|
||||
// . returns true if doesn't block
|
||||
// . sets errno on error
|
||||
|
19
HttpMime.cpp
19
HttpMime.cpp
@ -746,14 +746,21 @@ void HttpMime::makeMime ( long totalContentLen ,
|
||||
//sprintf ( m_buf ,
|
||||
p += sprintf( p,
|
||||
"HTTP/1.0 %li%s\r\n"
|
||||
// make it at least 4 spaces so we can change
|
||||
// the length of the content should we insert
|
||||
// a login bar in Proxy::storeLoginBar()
|
||||
"Content-Length: %04li\r\n"
|
||||
, httpStatus , smsg );
|
||||
// if content length is not known, as in diffbot.cpp, then
|
||||
// do not print it into the mime
|
||||
if ( totalContentLen >= 0 )
|
||||
p += sprintf ( p ,
|
||||
// make it at least 4 spaces so we can
|
||||
// change the length of the content
|
||||
// should we insert a login bar in
|
||||
// Proxy::storeLoginBar()
|
||||
"Content-Length: %04li\r\n"
|
||||
, totalContentLen );
|
||||
p += sprintf ( p ,
|
||||
"%s"
|
||||
"Content-Type: %s",
|
||||
httpStatus , smsg ,
|
||||
totalContentLen , enc , contentType );
|
||||
enc , contentType );
|
||||
if ( charset ) p += sprintf ( p , "; charset=%s", charset );
|
||||
p += sprintf ( p , "\r\n");
|
||||
p += sprintf ( p ,
|
||||
|
@ -614,6 +614,11 @@ bool HttpRequest::set (char *url,long offset,long size,time_t ifModifiedSince,
|
||||
// procog's ip
|
||||
// if ( sock && strncmp(iptoa(sock->m_ip),"216.168.36.21",13) == 0)
|
||||
// m_isLocal = true;
|
||||
#ifdef DIFFBOT
|
||||
// diffbot comcast
|
||||
if ( sock && strncmp(iptoa(sock->m_ip),"50.168.3.61",11) == 0)
|
||||
m_isLocal = true;
|
||||
#endif
|
||||
|
||||
// roadrunner ip
|
||||
// if ( sock && strncmp(iptoa(sock->m_ip),"66.162.42.131",13) == 0)
|
||||
@ -1022,9 +1027,9 @@ long HttpRequest::getLong ( char *field , long defaultLong ) {
|
||||
if ( i >= len || !is_digit(value[i]) ) return defaultLong;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
long long HttpRequest::getLongLong ( char *field ,
|
||||
long long HttpRequest::getLongLong ( char *field ,
|
||||
long long defaultLongLong ) {
|
||||
long len;
|
||||
char *value = getValue ( field, &len, NULL );
|
||||
@ -1043,7 +1048,7 @@ long HttpRequest::getLong ( char *field , long defaultLong ) {
|
||||
if ( i >= len || !is_digit(value[i]) ) return defaultLongLong;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
float HttpRequest::getFloat ( char *field , double defaultFloat ) {
|
||||
long len;
|
||||
@ -1091,6 +1096,22 @@ double HttpRequest::getDouble ( char *field , double defaultDouble ) {
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
bool HttpRequest::hasField ( char *field ) {
|
||||
// how long is it?
|
||||
long fieldLen = gbstrlen ( field );
|
||||
// scan the field table directly
|
||||
long i = 0;
|
||||
for ( ; i < m_numFields ; i++ ) {
|
||||
if ( fieldLen != m_fieldLens[i] ) continue;
|
||||
if ( strncmp ( field, m_fields[i], fieldLen ) != 0 ) continue;
|
||||
// got a match return the true
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
char *HttpRequest::getValue ( char *field , long *len, long *next ) {
|
||||
// how long is it?
|
||||
long fieldLen = gbstrlen ( field );
|
||||
@ -1146,8 +1167,21 @@ void HttpRequest::parseFields ( char *s , long slen ) {
|
||||
m_fields [ n ] = s;
|
||||
// point to = sign
|
||||
char *equal = strchr ( s , '=' );
|
||||
// try next field if none here
|
||||
if ( ! equal ) { s += gbstrlen ( s ) + 1; continue; }
|
||||
// if no equal sign, maybe it is one of diffbot's valueless
|
||||
// fields, so support that now
|
||||
if ( ! equal ) {
|
||||
// just set value to NULL
|
||||
char *end = strchr(s,'&');
|
||||
long len = end - s;
|
||||
if ( ! end ) len = gbstrlen(s);
|
||||
m_fieldLens[n] = len;
|
||||
s[len] = '\0';
|
||||
m_fieldValues[n] = NULL;
|
||||
n++;
|
||||
// skip over the '&' too
|
||||
s += len + 1;
|
||||
continue;
|
||||
}
|
||||
// set field len
|
||||
m_fieldLens [ n ] = equal - s;
|
||||
// set = to \0 so getField() returns NULL terminated field name
|
||||
|
@ -118,6 +118,9 @@ class HttpRequest {
|
||||
char *defaultString = NULL ,
|
||||
long *next=NULL);
|
||||
|
||||
|
||||
bool hasField ( char *field );
|
||||
|
||||
// are we a redir? if so return non-NULL
|
||||
char *getRedir ( ) { return m_redir; };
|
||||
long getRedirLen ( ) { return m_redirLen; };
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "XmlDoc.h" // gbzip
|
||||
#include "UdpServer.h"
|
||||
#include "Proxy.h"
|
||||
#include "Diffbot.h"
|
||||
|
||||
// a global class extern'd in .h file
|
||||
HttpServer g_httpServer;
|
||||
@ -128,6 +129,11 @@ bool HttpServer::getDoc ( char *url ,
|
||||
char *proto ,
|
||||
bool doPost ,
|
||||
char *cookie ) {
|
||||
// sanity
|
||||
if ( ip == -1 )
|
||||
log("http: you probably didn't mean to set ip=-1 did you? "
|
||||
"try setting to 0.");
|
||||
|
||||
//log(LOG_WARN, "http: get doc %s", url->getUrl());
|
||||
// use the HttpRequest class
|
||||
HttpRequest r;
|
||||
@ -886,6 +892,22 @@ bool HttpServer::sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin) {
|
||||
return sendErrorReply(s,404,"bad request");
|
||||
|
||||
|
||||
// . if we get a request for this then allow Diffbot.cpp to
|
||||
// handle it and send back the right stuff
|
||||
if ( strcmp ( path , "/dev/crawl" ) == 0 ||
|
||||
strcmp ( path , "/dev/crawl/" ) == 0 )
|
||||
// this will call g_httpServer.sendDynamicPage() to send
|
||||
// back the reply when it is done generating the reply.
|
||||
// this function is in Diffbot.cpp.
|
||||
return printCrawlBotPage ( s , r );
|
||||
|
||||
// . is it a diffbot api request, like "GET /api/*"
|
||||
// . ie "/api/startcrawl" or "/api/stopcrawl" etc.?
|
||||
if ( strncmp ( path , "/api/" , 5 ) == 0 )
|
||||
// this will call g_httpServer.sendDynamicPage() to send
|
||||
// back the reply when it is done generating the reply.
|
||||
// this function is in Diffbot.cpp.
|
||||
return handleDiffbotRequest ( s , r );
|
||||
|
||||
|
||||
// for adding to browser list of search engines
|
||||
|
14
Makefile
14
Makefile
@ -60,7 +60,7 @@ OBJS = Tfndb.o UdpSlot.o \
|
||||
Users.o Images.o Wiki.o Wiktionary.o Scraper.o \
|
||||
Dates.o Sections.o SiteGetter.o Syncdb.o \
|
||||
Placedb.o Address.o Test.o GeoIP.o GeoIPCity.o Synonyms.o \
|
||||
Cachedb.o Monitordb.o dlstubs.o
|
||||
Cachedb.o Monitordb.o dlstubs.o Diffbot.o
|
||||
|
||||
CHECKFORMATSTRING = -D_CHECK_FORMAT_STRING_
|
||||
|
||||
@ -71,6 +71,7 @@ HOST=$(shell hostname)
|
||||
#print_vars:
|
||||
# $(HOST)
|
||||
|
||||
|
||||
# force 32-bit mode using -m32 (apt-get install gcc-multilib to ensure works)
|
||||
# and -m32 should use /usr/lib32/ as the library path.
|
||||
# for old kernel 2.4 we don't use pthreads, just clone. so if compiling
|
||||
@ -88,6 +89,13 @@ LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a
|
||||
endif
|
||||
|
||||
|
||||
# special diffbot compiling case to default g_conf.m_useDiffbot to true
|
||||
ifeq ("neo","$(HOST)")
|
||||
CPPFLAGS = -m32 -g -Wall -pipe -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -static -D_PTHREADS_ -Wno-unused-but-set-variable -DDIFFBOT
|
||||
LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
|
||||
endif
|
||||
|
||||
|
||||
# let's keep the libraries in the repo for easier bug reporting and debugging
|
||||
# in general if we can. the includes are still in /usr/include/ however...
|
||||
# which is kinda strange but seems to work so far.
|
||||
@ -285,8 +293,8 @@ RdbBuckets.o:
|
||||
Linkdb.o:
|
||||
$(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp
|
||||
|
||||
XmlDoc.o:
|
||||
$(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp
|
||||
#XmlDoc.o:
|
||||
# $(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp
|
||||
|
||||
seo.o:
|
||||
$(CC) $(DEFS) $(CPPFLAGS) -O3 -c $*.cpp
|
||||
|
4
Mem.cpp
4
Mem.cpp
@ -1459,8 +1459,8 @@ void Mem::gbfree ( void *ptr , int size , const char *note ) {
|
||||
if ( slot < 0 ) {
|
||||
log(LOG_LOGIC,"mem: could not find slot (note=%s)",note);
|
||||
// return for now so procog does not core all the time!
|
||||
return;
|
||||
//char *xx = NULL; *xx = 0;
|
||||
//return;
|
||||
char *xx = NULL; *xx = 0;
|
||||
}
|
||||
|
||||
#ifdef _EFENCE_
|
||||
|
6
Msg0.cpp
6
Msg0.cpp
@ -198,8 +198,12 @@ bool Msg0::getList ( long long hostId , // host to ask (-1 if none)
|
||||
// . groupMask must turn on higher bits first (count downwards kinda)
|
||||
// . titledb and spiderdb use special masks to get groupId
|
||||
|
||||
// if diffbot.cpp is reading spiderdb from each shard we have to
|
||||
// get groupid from hostid here lest we core in getGroupId() below
|
||||
if ( hostId >= 0 && m_rdbId == RDB_SPIDERDB )
|
||||
m_groupId = 0;
|
||||
// did they force it? core until i figure out what this is
|
||||
if ( forceParitySplit >= 0 )
|
||||
else if ( forceParitySplit >= 0 )
|
||||
m_groupId = g_hostdb.getGroupId ( forceParitySplit );
|
||||
else
|
||||
m_groupId = getGroupId ( m_rdbId , startKey , ! noSplit );
|
||||
|
@ -285,6 +285,10 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
|
||||
// show gigabits?
|
||||
long gb = hr->getLong("gigabits",0);
|
||||
if ( gb >= 1 ) sb.safePrintf("&gigabits=%li",gb);
|
||||
// propagate collection
|
||||
long clen;
|
||||
char *coll = hr->getString("c",&clen,"",NULL);
|
||||
if ( coll ) sb.safePrintf("&c=%s",coll);
|
||||
// provide hash of the query so clients can't just pass in
|
||||
// a bogus id to get search results from us
|
||||
unsigned long h32 = hash32n(qstr);
|
||||
@ -390,8 +394,10 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
|
||||
);
|
||||
// contents of search box
|
||||
sb.htmlEncode ( qstr , qlen , false );
|
||||
sb.safePrintf ("\">"
|
||||
"<input type=submit value=\"Search\" border=0>"
|
||||
sb.safePrintf ("\">");
|
||||
// propagate collection on subsequent searches
|
||||
sb.safePrintf("<input name=c type=hidden value=\"%s\">",coll);
|
||||
sb.safePrintf("<input type=submit value=\"Search\" border=0>"
|
||||
"<br>"
|
||||
"<br>"
|
||||
"Try your search (not secure) on: "
|
||||
@ -1186,7 +1192,7 @@ bool gotResults ( void *state ) {
|
||||
// print the word
|
||||
char *t = qw->m_word;
|
||||
long tlen = qw->m_wordLen;
|
||||
sb.utf8Encode ( t , tlen );
|
||||
sb.utf8Encode2 ( t , tlen );
|
||||
sb.safePrintf (" ");
|
||||
}
|
||||
// print tail if we had ignored terms
|
||||
@ -1246,7 +1252,7 @@ bool gotResults ( void *state ) {
|
||||
qe2 );
|
||||
// close it up
|
||||
sb.safePrintf ("\"><i><b>");
|
||||
sb.utf8Encode(st->m_spell, len);
|
||||
sb.utf8Encode2(st->m_spell, len);
|
||||
// then finish it off
|
||||
sb.safePrintf ("</b></i></a></font>\n<br><br>\n");
|
||||
}
|
||||
@ -1830,13 +1836,13 @@ static int printResult ( SafeBuf &sb,
|
||||
backTag,
|
||||
0,
|
||||
0 ); // niceness
|
||||
//if (!sb.utf8Encode(tt, hlen)) return false;
|
||||
//if (!sb.utf8Encode2(tt, hlen)) return false;
|
||||
if ( ! sb.brify ( tt,hlen,0,cols) ) return false;
|
||||
}
|
||||
else if ( str && strLen ) {
|
||||
// determine if TiTle wraps, if it does add a <br> count for
|
||||
// each wrap
|
||||
//if (!sb.utf8Encode(str , strLen )) return false;
|
||||
//if (!sb.utf8Encode2(str , strLen )) return false;
|
||||
if ( ! sb.brify ( str,strLen,0,cols) ) return false;
|
||||
}
|
||||
// . use "UNTITLED" if no title
|
||||
|
75
Pages.cpp
75
Pages.cpp
@ -1624,6 +1624,81 @@ bool Pages::printAdminLinks ( SafeBuf *sb,
|
||||
*/
|
||||
}
|
||||
sb->safePrintf("</center><br/>" );
|
||||
|
||||
if ( top ) return status;
|
||||
|
||||
//
|
||||
// if diffbot give the crawlbot api here mostly for testing
|
||||
//
|
||||
char *hyphen = NULL;
|
||||
if ( g_conf.m_useDiffbot )
|
||||
hyphen = strchr ( coll , '-');
|
||||
|
||||
if ( g_conf.m_useDiffbot ) {
|
||||
sb->safePrintf("<br>"
|
||||
"<center>"
|
||||
"Diffbot API: " );
|
||||
// /api/startcrawl
|
||||
sb->safePrintf(" <a href=/dev/crawl>startcrawl</a>");
|
||||
}
|
||||
|
||||
if ( hyphen ) {
|
||||
|
||||
// /api/stopcrawl
|
||||
sb->safePrintf(" <a href=/api/stopcrawl?token=");
|
||||
sb->safeMemcpy ( coll, hyphen - coll );
|
||||
sb->safePrintf("&id=%s>stopcrawl</a>"
|
||||
,hyphen+1);
|
||||
|
||||
// /api/resumecrawl
|
||||
sb->safePrintf(" <a href=/api/resumecrawl?token=");
|
||||
sb->safeMemcpy ( coll, hyphen - coll );
|
||||
sb->safePrintf("&id=%s>resumecrawl</a>"
|
||||
,hyphen+1);
|
||||
|
||||
// crawls
|
||||
sb->safePrintf(" <a href=/api/crawls?token=");
|
||||
sb->safeMemcpy ( coll, hyphen - coll );
|
||||
sb->safePrintf(" title=\"show all crawl collections\">"
|
||||
"crawls</a>");
|
||||
|
||||
// activecrawls
|
||||
sb->safePrintf(" <a href=/api/activecrawls?id=%s ",
|
||||
hyphen+1);
|
||||
sb->safePrintf(" title=\"show stats on one crawl\">"
|
||||
"activecrawls</a>");
|
||||
|
||||
|
||||
// downloadurls
|
||||
sb->safePrintf(" <a href=/api/downloadurls?id=%s ",
|
||||
hyphen+1);
|
||||
sb->safePrintf(" title=\"download urls in a crawl's "
|
||||
"spiderdb\">downloadurls</a>");
|
||||
|
||||
// download crawl urls
|
||||
sb->safePrintf(" <a href=/api/downloadcrawl?id=%s ",
|
||||
hyphen+1);
|
||||
sb->safePrintf(" title=\"download urls from crawl\">"
|
||||
"downloadcrawl (urls)</a>");
|
||||
|
||||
|
||||
// download json objects
|
||||
sb->safePrintf(" <a href=/api/downloadcrawl?"
|
||||
"id=%s&format=json ",
|
||||
hyphen+1);
|
||||
sb->safePrintf(" title=\"download urls from crawl\">"
|
||||
"downloadcrawl (json)</a>");
|
||||
|
||||
}
|
||||
|
||||
if ( g_conf.m_useDiffbot ) {
|
||||
sb->safePrintf("</center>\n");
|
||||
sb->safePrintf("<br>");
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
//sprintf(p,"</font>\n" );
|
||||
//p += gbstrlen(p);
|
||||
return status;
|
||||
|
184
Parms.cpp
184
Parms.cpp
@ -1204,7 +1204,8 @@ bool Parms::printParms ( SafeBuf* sb , long page , char *username,//long user,
|
||||
status &=printParm ( sb, username,&m_parms[i],i,
|
||||
j, jend, (char *)THIS,
|
||||
coll,NULL,
|
||||
bg,nc,pd);
|
||||
bg,nc,pd,
|
||||
false);
|
||||
continue;
|
||||
}
|
||||
// if not first in a row, skip it, we printed it already
|
||||
@ -1222,7 +1223,7 @@ bool Parms::printParms ( SafeBuf* sb , long page , char *username,//long user,
|
||||
k++ )
|
||||
status &=printParm(sb,username,&m_parms[k],k,
|
||||
newj,jend,(char *)THIS,coll,NULL,bg,
|
||||
nc,pd);
|
||||
nc,pd, j==size-1);
|
||||
}
|
||||
// end array table
|
||||
//if ( m->m_max > 1 ) {
|
||||
@ -1656,7 +1657,8 @@ bool Parms::printParm ( SafeBuf* sb,
|
||||
char *pwd ,
|
||||
char *bg ,
|
||||
long nc ,
|
||||
long pd ) {
|
||||
long pd ,
|
||||
bool lastRow ) {
|
||||
bool status = true;
|
||||
// do not print if no permissions
|
||||
if ( m->m_perms != 0 && !g_users.hasPermission(username,m->m_perms) )
|
||||
@ -1864,8 +1866,14 @@ bool Parms::printParm ( SafeBuf* sb,
|
||||
else if ( t == TYPE_CHECKBOX ) {
|
||||
char *ddd = "";
|
||||
if ( *s ) ddd = " checked";
|
||||
sb->safePrintf("<center>"
|
||||
"<input type=checkbox ");
|
||||
// this is part of the "HACK" fix below. you have to
|
||||
// specify the cgi parm in the POST request, and unchecked
|
||||
// checkboxes are not included in the POST request.
|
||||
if ( lastRow && m->m_page == PAGE_FILTERS )
|
||||
sb->safePrintf("<center><input type=hidden ");
|
||||
else
|
||||
sb->safePrintf("<center>"
|
||||
"<input type=checkbox ");
|
||||
if ( m->m_page == PAGE_FILTERS)
|
||||
sb->safePrintf("id=id_%s ",cgi);
|
||||
|
||||
@ -1933,6 +1941,22 @@ bool Parms::printParm ( SafeBuf* sb,
|
||||
sb->dequote ( s , gbstrlen(s) );
|
||||
sb->safePrintf ("\">");
|
||||
}
|
||||
else if ( t == TYPE_SAFEBUF ) {
|
||||
long size = m->m_size;
|
||||
// give regular expression box on url filters page more room
|
||||
if ( m->m_page == PAGE_FILTERS ) {
|
||||
if ( size > REGEX_TXT_MAX ) size = REGEX_TXT_MAX;
|
||||
}
|
||||
else {
|
||||
if ( size > 20 ) size = 20;
|
||||
}
|
||||
sb->safePrintf ("<input type=text name=%s size=%li value=\"",
|
||||
cgi,size);
|
||||
//sb->dequote ( s , gbstrlen(s) );
|
||||
SafeBuf *sx = (SafeBuf *)s;
|
||||
sb->dequote ( sx->getBufStart() , sx->length() );
|
||||
sb->safePrintf ("\">");
|
||||
}
|
||||
else if ( t == TYPE_STRINGBOX ) {
|
||||
sb->safePrintf("<textarea rows=10 cols=64 name=%s>",cgi);
|
||||
//p += urlEncode ( p , pend - p , s , gbstrlen(s) );
|
||||
@ -2505,12 +2529,21 @@ void Parms::setParm ( char *THIS , Parm *m , long mm , long j , char *s ,
|
||||
}
|
||||
|
||||
// if we are setting a guy in an array AND he is NOT the first
|
||||
// in his row, ensure the guy before has a count of j+1 or more
|
||||
// in his row, ensure the guy before has a count of j+1 or more.
|
||||
//
|
||||
// crap, on the url filters page if you do not check "spidering
|
||||
// enabled" checkbox when adding a new rule at the bottom of the
|
||||
// table, , then the spidering enabled parameter does not transmit so
|
||||
// the "respider frequency" ends up checking the "spider enabled"
|
||||
// array whose "count" was not incremented like it should have been.
|
||||
// HACK: make new line at bottom always have spidering enabled
|
||||
// checkbox set and make it impossible to unset.
|
||||
if ( m->m_max > 1 && m->m_rowid >= 0 && mm > 0 &&
|
||||
m_parms[mm-1].m_rowid == m->m_rowid ) {
|
||||
char *pos = (char *)THIS + m_parms[mm-1].m_off - 4 ;
|
||||
long maxcount = *(long *)pos;
|
||||
if ( j >= maxcount ) {
|
||||
log("admin: parm before \"m\" is limiting us");
|
||||
//log("admin: try nuking the url filters or whatever "
|
||||
// "and re-adding");
|
||||
return;
|
||||
@ -2609,15 +2642,19 @@ void Parms::setParm ( char *THIS , Parm *m , long mm , long j , char *s ,
|
||||
! isHtmlEncoded && oldLen == len &&
|
||||
memcmp ( sb->getBufStart() , s , len ) == 0 )
|
||||
return;
|
||||
// nuke it
|
||||
sb->purge();
|
||||
// this means that we can not use string POINTERS as parms!!
|
||||
if ( ! isHtmlEncoded ) sb->safeMemcpy ( s , len );
|
||||
else len = sb->htmlDecode (s,len,false,0);
|
||||
// ensure null terminated
|
||||
sb->nullTerm();
|
||||
// null term it all
|
||||
//dst[len] = '\0';
|
||||
sb->reserve ( 1 );
|
||||
//sb->reserve ( 1 );
|
||||
// null terminate but do not include as m_length so the
|
||||
// memcmp() above still works right
|
||||
sb->m_buf[sb->m_length] = '\0';
|
||||
//sb->m_buf[sb->m_length] = '\0';
|
||||
// . might have to set length
|
||||
// . used for CollectionRec::m_htmlHeadLen and m_htmlTailLen
|
||||
//if ( m->m_plen >= 0 )
|
||||
@ -2891,6 +2928,7 @@ bool Parms::setFromFile ( void *THIS ,
|
||||
// now, extricate from the <![CDATA[ ... ]]> tag if we need to
|
||||
if ( m->m_type == TYPE_STRING ||
|
||||
m->m_type == TYPE_STRINGBOX ||
|
||||
m->m_type == TYPE_SAFEBUF ||
|
||||
m->m_type == TYPE_STRINGNONEMPTY ) {
|
||||
char *oldv = v;
|
||||
long oldvlen = vlen;
|
||||
@ -3210,6 +3248,10 @@ skip2:
|
||||
}
|
||||
*/
|
||||
|
||||
// debug point
|
||||
//if ( m->m_type == TYPE_SAFEBUF )
|
||||
// log("hey");
|
||||
|
||||
// loop over all in this potential array
|
||||
for ( j = 0 ; j < count ; j++ ) {
|
||||
// the xml
|
||||
@ -3219,6 +3261,7 @@ skip2:
|
||||
// print CDATA if string
|
||||
if ( m->m_type == TYPE_STRING ||
|
||||
m->m_type == TYPE_STRINGBOX ||
|
||||
m->m_type == TYPE_SAFEBUF ||
|
||||
m->m_type == TYPE_STRINGNONEMPTY ) {
|
||||
sprintf ( p , "<![CDATA[" );
|
||||
p += gbstrlen ( p );
|
||||
@ -3233,6 +3276,7 @@ skip2:
|
||||
// print CDATA if string
|
||||
if ( m->m_type == TYPE_STRING ||
|
||||
m->m_type == TYPE_STRINGBOX ||
|
||||
m->m_type == TYPE_SAFEBUF ||
|
||||
m->m_type == TYPE_STRINGNONEMPTY ) {
|
||||
sprintf ( p , "]]>" );
|
||||
p += gbstrlen ( p );
|
||||
@ -3343,6 +3387,14 @@ char *Parms::getParmHtmlEncoded ( char *p , char *pend , Parm *m , char *s ) {
|
||||
sprintf (p,"%li",*(long *)s);
|
||||
else if ( t == TYPE_LONG_LONG )
|
||||
sprintf (p,"%lli",*(long long *)s);
|
||||
else if ( t == TYPE_SAFEBUF ) {
|
||||
SafeBuf *sb = (SafeBuf *)s;
|
||||
p = htmlEncode ( p ,
|
||||
pend ,
|
||||
sb->getBufStart(),
|
||||
sb->getBufStart() + sb->length(),
|
||||
true ); // #?*
|
||||
}
|
||||
else if ( t == TYPE_STRING ||
|
||||
t == TYPE_STRINGBOX ||
|
||||
t == TYPE_STRINGNONEMPTY ||
|
||||
@ -3434,6 +3486,7 @@ bool Parms::serialize( char *buf, long *bufSize ) {
|
||||
if ( m->m_type == TYPE_STRING ) size = m->m_size;
|
||||
if ( m->m_type == TYPE_STRINGBOX ) size = m->m_size;
|
||||
if ( m->m_type == TYPE_STRINGNONEMPTY ) size = m->m_size;
|
||||
if ( m->m_type == TYPE_SAFEBUF ) size = m->m_size;
|
||||
if ( m->m_type == TYPE_SITERULE ) size = 4;
|
||||
|
||||
// . set size to the total size of array
|
||||
@ -3573,6 +3626,7 @@ bool Parms::serializeConfParm( Parm *m, long i, char **p, char *end,
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO: add TYPE_SAFEBUF support
|
||||
bool Parms::serializeCollParm( CollectionRec *cr,
|
||||
Parm *m, long i, char **p, char *end,
|
||||
long size, long cnt,
|
||||
@ -5102,7 +5156,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "seatonep";
|
||||
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail1 - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_def = "0";
|
||||
m->m_priv = 2;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
@ -5156,7 +5210,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "seattwop";
|
||||
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail2 - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_def = "0";
|
||||
m->m_priv = 2;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
@ -5210,7 +5264,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "seatthreep";
|
||||
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail3 - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_def = "0";
|
||||
m->m_priv = 2;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
@ -5265,7 +5319,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "seatfourp";
|
||||
m->m_off = (char *)&g_conf.m_sendParmChangeAlertsToEmail4 - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_def = "0";
|
||||
m->m_priv = 2;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
@ -7659,6 +7713,105 @@ void Parms::init ( ) {
|
||||
m++;
|
||||
*/
|
||||
|
||||
/////////////////////
|
||||
//
|
||||
// DIFFBOT CRAWLBOT PARMS
|
||||
//
|
||||
//////////////////////
|
||||
|
||||
m->m_cgi = "dbseed";
|
||||
m->m_xml = "diffbotSeed";
|
||||
m->m_off = (char *)&cr.m_diffbotSeed - x;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbtoken";
|
||||
m->m_xml = "diffbotToken";
|
||||
m->m_off = (char *)&cr.m_diffbotToken - x;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbapi";
|
||||
m->m_xml = "diffbotApi";
|
||||
m->m_off = (char *)&cr.m_diffbotApi - x;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbapiqs";
|
||||
m->m_xml = "diffbotApiQueryString";
|
||||
m->m_off = (char *)&cr.m_diffbotApiQueryString - x;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbucp";
|
||||
m->m_xml = "diffbotUrlCrawlPattern";
|
||||
m->m_off = (char *)&cr.m_diffbotUrlCrawlPattern - x;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbupp";
|
||||
m->m_xml = "diffbotUrlProcessPattern";
|
||||
m->m_off = (char *)&cr.m_diffbotUrlProcessPattern - x;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbppp";
|
||||
m->m_xml = "diffbotPageProcessPattern";
|
||||
m->m_off = (char *)&cr.m_diffbotPageProcessPattern - x;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbclassify";
|
||||
m->m_xml = "diffbotClassify";
|
||||
m->m_off = (char *)&cr.m_diffbotClassify - x;
|
||||
m->m_type = TYPE_SAFEBUF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbmaxtocrawl";
|
||||
m->m_xml = "diffbotMaxToCrawl";
|
||||
m->m_off = (char *)&cr.m_diffbotMaxToCrawl - x;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbmaxtoprocess";
|
||||
m->m_xml = "diffbotMaxToProcess";
|
||||
m->m_off = (char *)&cr.m_diffbotMaxToProcess - x;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbcrawlstarttime";
|
||||
m->m_xml = "diffbotCrawlStartTime";
|
||||
m->m_off = (char *)&cr.m_diffbotCrawlStartTime - x;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "dbcrawlendtime";
|
||||
m->m_xml = "diffbotCrawlEndTime";
|
||||
m->m_off = (char *)&cr.m_diffbotCrawlEndTime - x;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
m->m_cgi = "isdbtestcrawl";
|
||||
m->m_xml = "isDiffbotTestCrawl";
|
||||
m->m_off = (char *)&cr.m_isDiffbotTestCrawl - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_page = PAGE_NONE;
|
||||
m++;
|
||||
|
||||
|
||||
///////////////////////////////////////////
|
||||
// SPIDER CONTROLS
|
||||
///////////////////////////////////////////
|
||||
@ -7678,7 +7831,7 @@ void Parms::init ( ) {
|
||||
m->m_cgi = "cse";
|
||||
m->m_off = (char *)&cr.m_spideringEnabled - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_def = "1";
|
||||
m++;
|
||||
|
||||
/*
|
||||
@ -12040,7 +12193,8 @@ void Parms::init ( ) {
|
||||
m->m_off = (char *)cr.m_regExs - x;
|
||||
// this is a safebuf, dynamically allocated string really
|
||||
m->m_type = TYPE_SAFEBUF;//STRINGNONEMPTY
|
||||
m->m_size = MAX_REGEX_LEN+1;
|
||||
// the size of each element in the array:
|
||||
m->m_size = sizeof(SafeBuf);//MAX_REGEX_LEN+1;
|
||||
m->m_page = PAGE_FILTERS;
|
||||
m->m_rowid = 1; // if we START a new row
|
||||
m->m_def = "";
|
||||
@ -15096,6 +15250,8 @@ void Parms::overlapTest ( char step ) {
|
||||
m_parms[i].m_desc);
|
||||
}
|
||||
|
||||
log("conf: try including \"m->m_obj = OBJ_COLL;\" or "
|
||||
"\"m->m_obj = OBJ_CONF;\" in your parm definitions");
|
||||
log("conf: failed overlap test. exiting.");
|
||||
exit(-1);
|
||||
|
||||
|
24
Parms.h
24
Parms.h
@ -31,7 +31,7 @@ enum {
|
||||
TYPE_FLOAT ,
|
||||
TYPE_IP ,
|
||||
TYPE_LONG ,
|
||||
TYPE_LONG_LONG ,
|
||||
TYPE_LONG_LONG , // 10
|
||||
TYPE_NONE ,
|
||||
TYPE_PRIORITY ,
|
||||
TYPE_PRIORITY2 ,
|
||||
@ -41,7 +41,7 @@ enum {
|
||||
TYPE_STRINGBOX ,
|
||||
TYPE_STRINGNONEMPTY ,
|
||||
TYPE_TIME ,
|
||||
TYPE_DATE2 ,
|
||||
TYPE_DATE2 , // 20
|
||||
TYPE_DATE ,
|
||||
TYPE_RULESET ,
|
||||
TYPE_FILTER ,
|
||||
@ -50,7 +50,7 @@ enum {
|
||||
TYPE_MONOD2 ,
|
||||
TYPE_MONOM2 ,
|
||||
TYPE_LONG_CONST ,
|
||||
TYPE_SITERULE ,
|
||||
TYPE_SITERULE , // 29
|
||||
TYPE_SAFEBUF
|
||||
};
|
||||
|
||||
@ -147,7 +147,7 @@ class Parm {
|
||||
char * getValueAsString ( class SearchInput *si ) ;
|
||||
};
|
||||
|
||||
#define MAX_PARMS 840
|
||||
#define MAX_PARMS 940
|
||||
|
||||
#define MAX_XML_CONF (200*1024)
|
||||
|
||||
@ -171,15 +171,16 @@ class Parms {
|
||||
long page , char *coll , char *pwd ) ;
|
||||
|
||||
|
||||
char *printParms (char *p, char *pend, TcpSocket *s , HttpRequest *r );
|
||||
//char *printParms (char *p, char *pend, TcpSocket *s, HttpRequest *r);
|
||||
bool printParms (SafeBuf* sb, TcpSocket *s , HttpRequest *r );
|
||||
|
||||
char *printParms (char *p,char *pend,long page,char *username,
|
||||
void *THIS, char *coll , char *pwd ,
|
||||
long nc , long pd ) ;
|
||||
//char *printParms (char *p,char *pend,long page,char *username,
|
||||
// void *THIS, char *coll , char *pwd ,
|
||||
// long nc , long pd ) ;
|
||||
bool printParms (SafeBuf* sb, long page,char *username,void *THIS,
|
||||
char *coll , char *pwd , long nc , long pd ) ;
|
||||
char *coll , char *pwd , long nc , long pd );
|
||||
|
||||
/*
|
||||
char *printParm ( char *p ,
|
||||
char *pend ,
|
||||
//long user ,
|
||||
@ -194,6 +195,8 @@ class Parms {
|
||||
char *bg ,
|
||||
long nc ,
|
||||
long pd ) ;
|
||||
*/
|
||||
|
||||
bool printParm ( SafeBuf* sb,
|
||||
//long user ,
|
||||
char *username,
|
||||
@ -206,7 +209,8 @@ class Parms {
|
||||
char *pwd ,
|
||||
char *bg ,
|
||||
long nc ,
|
||||
long pd ) ;
|
||||
long pd ,
|
||||
bool lastRow ) ;
|
||||
|
||||
char *getTHIS ( HttpRequest *r , long page ) ;
|
||||
|
||||
|
@ -1396,7 +1396,7 @@ void Process::disableTreeWrites ( ) {
|
||||
}
|
||||
// disable all spider trees and tables
|
||||
for ( long i = 0 ; i < g_collectiondb.getNumRecs() ; i++ ) {
|
||||
SpiderColl *sc = g_spiderCache.getSpiderColl(i);
|
||||
SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(i);
|
||||
if ( ! sc ) continue;
|
||||
sc->m_waitingTree .disableWrites();
|
||||
sc->m_waitingTable.disableWrites();
|
||||
@ -1413,7 +1413,7 @@ void Process::enableTreeWrites ( ) {
|
||||
}
|
||||
// enable all waiting trees
|
||||
for ( long i = 0 ; i < g_collectiondb.getNumRecs() ; i++ ) {
|
||||
SpiderColl *sc = g_spiderCache.getSpiderColl(i);
|
||||
SpiderColl *sc = g_spiderCache.getSpiderCollIffNonNull(i);
|
||||
if ( ! sc ) continue;
|
||||
sc->m_waitingTree .enableWrites();
|
||||
sc->m_waitingTable.enableWrites();
|
||||
|
13
Rdb.cpp
13
Rdb.cpp
@ -229,7 +229,9 @@ bool Rdb::init ( char *dir ,
|
||||
m_dbname ,
|
||||
m_ks ,
|
||||
// make useProtection true for debugging
|
||||
false ) ) // use protection?
|
||||
false , // use protection?
|
||||
false , // alowdups?
|
||||
m_rdbId ) )
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
@ -244,7 +246,9 @@ bool Rdb::init ( char *dir ,
|
||||
m_dbname ,
|
||||
m_ks ,
|
||||
// make useProtection true for debugging
|
||||
false ); // use protection?
|
||||
false , // use protection?
|
||||
false , // alowdups?
|
||||
m_rdbId );
|
||||
}
|
||||
// set this then
|
||||
sprintf(m_treeName,"buckets-%s",m_dbname);
|
||||
@ -846,7 +850,8 @@ bool Rdb::loadTree ( ) {
|
||||
//log (0,"Rdb::loadTree: loading %s",filename);
|
||||
// set a BigFile to this filename
|
||||
BigFile file;
|
||||
file.set ( getDir() , filename , NULL ); // getStripeDir() );
|
||||
char *dir = getDir();
|
||||
file.set ( dir , filename , NULL ); // getStripeDir() );
|
||||
bool treeExists = file.doesExist() > 0;
|
||||
bool status = false ;
|
||||
if ( treeExists ) {
|
||||
@ -2163,7 +2168,7 @@ bool Rdb::addRecord ( collnum_t collnum,
|
||||
}
|
||||
else if ( (tn=m_tree.addNode ( collnum, key , data , dataSize ))>=0) {
|
||||
// if adding to spiderdb, add to cache, too
|
||||
if ( m_rdbId != RDB_SPIDERDB || m_rdbId != RDB_DOLEDB )
|
||||
if ( m_rdbId != RDB_SPIDERDB && m_rdbId != RDB_DOLEDB )
|
||||
return true;
|
||||
// or if negative key
|
||||
if ( KEYNEG(key) ) return true;
|
||||
|
11
RdbTree.cpp
11
RdbTree.cpp
@ -92,7 +92,8 @@ bool RdbTree::set ( long fixedDataSize ,
|
||||
char *dbname ,
|
||||
char keySize ,
|
||||
bool useProtection ,
|
||||
bool allowDups ) {
|
||||
bool allowDups ,
|
||||
char rdbId ) {
|
||||
reset();
|
||||
m_fixedDataSize = fixedDataSize;
|
||||
m_doBalancing = doBalancing;
|
||||
@ -120,9 +121,9 @@ bool RdbTree::set ( long fixedDataSize ,
|
||||
if ( dbname ) strncpy ( p , dbname , 8 ); p += 8;
|
||||
*p++ = '\0';
|
||||
// set rdbid
|
||||
m_rdbId = -1;
|
||||
m_rdbId = rdbId; // -1;
|
||||
// if its doledb, set it
|
||||
if ( dbname && strcmp(dbname,"doledb") == 0 ) m_rdbId = RDB_DOLEDB;
|
||||
//if ( dbname && strcmp(dbname,"doledb") == 0 ) m_rdbId = RDB_DOLEDB;
|
||||
// adjust m_maxMem to virtual infinity if it was -1
|
||||
if ( m_maxMem < 0 ) m_maxMem = 0x7fffffff;
|
||||
// . compute each node's memory overhead
|
||||
@ -2994,11 +2995,11 @@ void RdbTree::cleanTree ( ) { // char **bases ) {
|
||||
|
||||
long RdbTree::getNumNegativeKeys ( collnum_t collnum ) {
|
||||
return g_collectiondb.m_recs[collnum]->
|
||||
m_numNegKeysInTree[m_rdbId];
|
||||
m_numNegKeysInTree[(unsigned char)m_rdbId];
|
||||
}
|
||||
|
||||
long RdbTree::getNumPositiveKeys ( collnum_t collnum ) {
|
||||
return g_collectiondb.m_recs[collnum]->
|
||||
m_numPosKeysInTree[m_rdbId];
|
||||
m_numPosKeysInTree[(unsigned char)m_rdbId];
|
||||
}
|
||||
|
||||
|
@ -91,7 +91,8 @@ class RdbTree {
|
||||
bool dataInPtrs = false ,
|
||||
char *dbname = NULL , char keySize = 12 ,
|
||||
bool useProtection = false ,
|
||||
bool allowDups = false );
|
||||
bool allowDups = false ,
|
||||
char rdbId = -1 );
|
||||
|
||||
// . frees the used memory, etc.
|
||||
// . override so derivatives can free up extra header arrays
|
||||
|
179
SafeBuf.cpp
179
SafeBuf.cpp
@ -680,7 +680,7 @@ bool SafeBuf::setEncoding(short cs) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SafeBuf::utf8Encode(char *s, long len, bool encodeHTML,long niceness) {
|
||||
bool SafeBuf::utf8Encode2(char *s, long len, bool encodeHTML,long niceness) {
|
||||
long tmp = m_length;
|
||||
if ( m_encoding == csUTF8 ) {
|
||||
if (! safeMemcpy(s,len)) return false;
|
||||
@ -1786,6 +1786,7 @@ bool SafeBuf::htmlEncodeXmlTags ( char *s , long slen , long niceness ) {
|
||||
}
|
||||
|
||||
bool SafeBuf::safeStrcpy ( char *s ) {
|
||||
if ( ! s ) return true;
|
||||
long slen = gbstrlen(s);
|
||||
return safeMemcpy(s,slen);
|
||||
}
|
||||
@ -2491,6 +2492,182 @@ bool SafeBuf::decodeJSON ( long niceness ) {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SafeBuf::decodeJSONToUtf8 ( long niceness ) {
|
||||
|
||||
//char *x = strstr(m_buf,"Chief European");
|
||||
//if ( x )
|
||||
// log("hey");
|
||||
|
||||
// count how many \u's we got
|
||||
long need = 0;
|
||||
char *p = m_buf;
|
||||
for ( ; *p ; p++ )
|
||||
// for the 'x' and the ';'
|
||||
if ( *p == '\\' && p[1] == 'u' ) need += 2;
|
||||
|
||||
// reserve a little extra if we need it
|
||||
SafeBuf dbuf;
|
||||
dbuf.reserve ( need + m_length + 1);
|
||||
|
||||
char *src = m_buf;
|
||||
char *dst = dbuf.m_buf;
|
||||
for ( ; *src ; ) {
|
||||
QUICKPOLL(niceness);
|
||||
if ( *src == '\\' ) {
|
||||
// \n? (from json.org homepage)
|
||||
if ( src[1] == 'n' ) {
|
||||
*dst++ = '\n';
|
||||
src += 2;
|
||||
continue;
|
||||
}
|
||||
if ( src[1] == 'r' ) {
|
||||
*dst++ = '\r';
|
||||
src += 2;
|
||||
continue;
|
||||
}
|
||||
if ( src[1] == 't' ) {
|
||||
*dst++ = '\t';
|
||||
src += 2;
|
||||
continue;
|
||||
}
|
||||
if ( src[1] == 'b' ) {
|
||||
*dst++ = '\b';
|
||||
src += 2;
|
||||
continue;
|
||||
}
|
||||
if ( src[1] == 'f' ) {
|
||||
*dst++ = '\f';
|
||||
src += 2;
|
||||
continue;
|
||||
}
|
||||
// a "\\" is an encoded backslash
|
||||
if ( src[1] == '\\' ) {
|
||||
*dst++ = '\\';
|
||||
src += 2;
|
||||
continue;
|
||||
}
|
||||
// a "\/" is an encoded forward slash
|
||||
if ( src[1] == '/' ) {
|
||||
*dst++ = '/';
|
||||
src += 2;
|
||||
continue;
|
||||
}
|
||||
// utf8? if not, just skip the slash
|
||||
if ( src[1] != 'u' ) { src++; continue; }
|
||||
// otherwise, decode. can do in place like this...
|
||||
char *p = src + 2;
|
||||
if ( ! is_hex(p[0]) ) continue;
|
||||
if ( ! is_hex(p[1]) ) continue;
|
||||
if ( ! is_hex(p[2]) ) continue;
|
||||
if ( ! is_hex(p[3]) ) continue;
|
||||
// TODO: support surrogate pairs in utf16?
|
||||
UChar32 uc = 0;
|
||||
// store the 16-bit number in lower 16 bits of uc...
|
||||
hexToBin ( p , 2 , ((char *)&uc)+1 );
|
||||
hexToBin ( p+2 , 2 , ((char *)&uc)+0 );
|
||||
//buf[2] = '\0';
|
||||
long size = ::utf8Encode ( (UChar32)uc , (char *)dst );
|
||||
// a quote??? not allowed in json!
|
||||
if ( size == 1 && dst[0] == '\"' ) {
|
||||
size = 2;
|
||||
dst[0] = '\\';
|
||||
dst[1] = '\"';
|
||||
}
|
||||
//short = ahextoshort ( p );
|
||||
dst += size;
|
||||
// skip over /u and 4 digits
|
||||
src += 6;
|
||||
continue;
|
||||
}
|
||||
*dst++ = *src++;
|
||||
}
|
||||
*dst = '\0';
|
||||
dbuf.m_length = dst - dbuf.m_buf;
|
||||
|
||||
// purge ourselves
|
||||
purge();
|
||||
|
||||
// and steal dbuf's m_buf
|
||||
m_buf = dbuf.m_buf;
|
||||
m_length = dbuf.m_length;
|
||||
m_capacity = dbuf.m_capacity;
|
||||
m_usingStack = dbuf.m_usingStack;
|
||||
|
||||
// detach from dbuf so he does not free it
|
||||
dbuf.detachBuf();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// . REALLY just a print vanity function. makes json output prettier
|
||||
//
|
||||
// . after converting JSON to utf8 above we sometimes want to go back.
|
||||
// . just print that out. encode \n's and \r's back to \\n \\r
|
||||
// and backslash to a \\ ... etc.
|
||||
// . but if they originally had a \u<backslash> encoding and we decoded
|
||||
// it to a backslash, here it will be re-encoded as (double backslash)
|
||||
// . like wise if that originally had a \u<quote> encoding we should
|
||||
// have decoded it as a \"!
|
||||
// . this does not need to be super fast because it will be used for
|
||||
// showing cached pages or dumping out the json objects from a crawl for
|
||||
// diffbot
|
||||
// . really we could leave the newlines decoded etc, but it is prettier
|
||||
// for printing
|
||||
bool SafeBuf::safeStrcpyPrettyJSON ( char *decodedJson ) {
|
||||
// how much space do we need?
|
||||
// each single byte \t char for instance will need 2 bytes
|
||||
long need = gbstrlen(decodedJson) * 2 + 1;
|
||||
if ( ! reserve ( need ) ) return false;
|
||||
// scan and copy
|
||||
char *src = decodedJson;
|
||||
// concatenate to what's already there
|
||||
char *dst = m_buf + m_length;
|
||||
for ( ; *src ; src++ ) {
|
||||
|
||||
if ( *src == '\t' ) {
|
||||
*dst++ = '\\';
|
||||
*dst++ = 't';
|
||||
continue;
|
||||
}
|
||||
if ( *src == '\n' ) {
|
||||
*dst++ = '\\';
|
||||
*dst++ = 'n';
|
||||
continue;
|
||||
}
|
||||
if ( *src == '\r' ) {
|
||||
*dst++ = '\\';
|
||||
*dst++ = 'r';
|
||||
continue;
|
||||
}
|
||||
if ( *src == '\f' ) {
|
||||
*dst++ = '\\';
|
||||
*dst++ = 'f';
|
||||
continue;
|
||||
}
|
||||
if ( *src == '\\' ) {
|
||||
*dst++ = '\\';
|
||||
*dst++ = '\\';
|
||||
continue;
|
||||
}
|
||||
//if ( *src == '\/' ) {
|
||||
// *dst++ = '\\';
|
||||
// *dst++ = '/';
|
||||
// continue;
|
||||
//}
|
||||
|
||||
*dst++ = *src;
|
||||
|
||||
}
|
||||
// null term
|
||||
*dst = '\0';
|
||||
|
||||
m_length = dst - m_buf;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool SafeBuf::linkify ( long niceness , long startPos ) {
|
||||
|
||||
|
29
SafeBuf.h
29
SafeBuf.h
@ -57,6 +57,7 @@ struct SafeBuf {
|
||||
bool truncateLongWords ( char *src, long srcLen , long minmax );
|
||||
bool safeTruncateEllipsis ( char *src , long maxLen );
|
||||
bool convertJSONtoXML ( long niceness , long startConvertPos );
|
||||
bool decodeJSONToUtf8 ( long niceness );
|
||||
bool decodeJSON ( long niceness );
|
||||
bool linkify ( long niceness , long startPos );
|
||||
|
||||
@ -70,6 +71,13 @@ struct SafeBuf {
|
||||
return safeStrcpy ( str );
|
||||
};
|
||||
|
||||
void removeLastChar ( char lastChar ) {
|
||||
if ( m_length <= 0 ) return;
|
||||
if ( m_buf[m_length-1] != lastChar ) return;
|
||||
m_length--;
|
||||
m_buf[m_length] = '\0';
|
||||
};
|
||||
|
||||
//MUTATORS
|
||||
#ifdef _CHECK_FORMAT_STRING_
|
||||
bool safePrintf(char *formatString, ...)
|
||||
@ -83,6 +91,7 @@ struct SafeBuf {
|
||||
bool safeMemcpy(SafeBuf *c){return safeMemcpy(c->m_buf,c->m_length);};
|
||||
bool safeMemcpy ( class Words *w , long a , long b ) ;
|
||||
bool safeStrcpy ( char *s ) ;
|
||||
bool safeStrcpyPrettyJSON ( char *decodedJson ) ;
|
||||
//bool pushLong ( long val ) { return safeMemcpy((char *)&val,4); }
|
||||
bool cat(SafeBuf& c);
|
||||
// . only cat the sections/tag that start with "tagFilter"
|
||||
@ -96,7 +105,11 @@ struct SafeBuf {
|
||||
bool reserve(long i, char *label=NULL);
|
||||
bool reserve2x(long i);
|
||||
bool inlineStyleTags();
|
||||
void incrementLength(long i) { m_length += i; }
|
||||
void incrementLength(long i) {
|
||||
m_length += i;
|
||||
// watch out for negative i's
|
||||
if ( m_length < 0 ) m_length = 0;
|
||||
};
|
||||
void setLength(long i) { m_length = i; };
|
||||
char *getNextLine ( char *p ) ;
|
||||
long catFile(char *filename) ;
|
||||
@ -172,9 +185,9 @@ struct SafeBuf {
|
||||
|
||||
//insert strings in their native encoding
|
||||
bool encode ( char *s , long len , long niceness=0) {
|
||||
return utf8Encode(s,len,false,niceness); };
|
||||
return utf8Encode2(s,len,false,niceness); };
|
||||
// htmlEncode default = false
|
||||
bool utf8Encode(char *s, long len, bool htmlEncode=false,
|
||||
bool utf8Encode2(char *s, long len, bool htmlEncode=false,
|
||||
long niceness=0);
|
||||
bool latin1Encode(char *s, long len, bool htmlEncode=false,
|
||||
long niceness=0);
|
||||
@ -230,6 +243,16 @@ struct SafeBuf {
|
||||
|
||||
bool cdataEncode ( char *s ) ;
|
||||
|
||||
// . append a \0 but do not inc m_length
|
||||
// . for null terminating strings
|
||||
bool nullTerm ( ) {
|
||||
if(m_length >= m_capacity && !reserve(m_capacity + 1) )
|
||||
return false;
|
||||
m_buf[m_length] = '\0';
|
||||
return true;
|
||||
};
|
||||
|
||||
|
||||
bool safeCdataMemcpy(char *s, long len);
|
||||
bool pushChar (char i) {
|
||||
if(m_length >= m_capacity)
|
||||
|
@ -196,6 +196,9 @@ class SearchInput *g_si = NULL;
|
||||
|
||||
bool SearchInput::set ( TcpSocket *sock , HttpRequest *r , Query *q ) {
|
||||
|
||||
// save it now
|
||||
m_socket = sock;
|
||||
|
||||
// get coll rec
|
||||
long collLen;
|
||||
char *coll = r->getString ( "c" , &collLen );
|
||||
|
@ -401,6 +401,8 @@ class SearchInput {
|
||||
// make a cookie from parms with m_flags of PF_COOKIE set
|
||||
SafeBuf m_cookieBuf;
|
||||
|
||||
TcpSocket *m_socket;
|
||||
|
||||
//char m_urlParms [ MAX_URLPARMS_LEN ];
|
||||
//char m_postParms [ MAX_URLPARMS_LEN ];
|
||||
|
||||
|
168
Spider.cpp
168
Spider.cpp
@ -747,7 +747,7 @@ void SpiderCache::save ( bool useThread ) {
|
||||
//m_isSaving = true;
|
||||
// loop over all SpiderColls and get the best
|
||||
for ( long i = 0 ; i < g_collectiondb.getNumRecs() ; i++ ) {
|
||||
SpiderColl *sc = getSpiderColl(i);//m_spiderColls[i];
|
||||
SpiderColl *sc = getSpiderCollIffNonNull(i);//m_spiderColls[i];
|
||||
if ( ! sc ) continue;
|
||||
RdbTree *tree = &sc->m_waitingTree;
|
||||
char *filename = "waitingtree";
|
||||
@ -797,7 +797,7 @@ void SpiderCache::save ( bool useThread ) {
|
||||
|
||||
bool SpiderCache::needsSave ( ) {
|
||||
for ( long i = 0 ; i < g_collectiondb.getNumRecs() ; i++ ) {
|
||||
SpiderColl *sc = getSpiderColl(i);//m_spiderColls[i];
|
||||
SpiderColl *sc = getSpiderCollIffNonNull(i);//m_spiderColls[i];
|
||||
if ( ! sc ) continue;
|
||||
if ( sc->m_waitingTree.m_needsSave ) return true;
|
||||
// also the doleIpTable
|
||||
@ -809,7 +809,7 @@ bool SpiderCache::needsSave ( ) {
|
||||
void SpiderCache::reset ( ) {
|
||||
// loop over all SpiderColls and get the best
|
||||
for ( long i = 0 ; i < g_collectiondb.getNumRecs() ; i++ ) {
|
||||
SpiderColl *sc = getSpiderColl(i);
|
||||
SpiderColl *sc = getSpiderCollIffNonNull(i);
|
||||
if ( ! sc ) continue;
|
||||
sc->reset();
|
||||
mdelete ( sc , sizeof(SpiderColl) , "SpiderCache" );
|
||||
@ -821,6 +821,13 @@ void SpiderCache::reset ( ) {
|
||||
//m_numSpiderColls = 0;
|
||||
}
|
||||
|
||||
SpiderColl *SpiderCache::getSpiderCollIffNonNull ( collnum_t collnum ) {
|
||||
// shortcut
|
||||
CollectionRec *cr = g_collectiondb.m_recs[collnum];
|
||||
// return it if non-NULL
|
||||
return cr->m_spiderColl;
|
||||
}
|
||||
|
||||
// get SpiderColl for a collection
|
||||
SpiderColl *SpiderCache::getSpiderColl ( collnum_t collnum ) {
|
||||
// return it if non-NULL
|
||||
@ -867,6 +874,8 @@ SpiderColl *SpiderCache::getSpiderColl ( collnum_t collnum ) {
|
||||
sc->m_cr = cr;
|
||||
// sanity check
|
||||
if ( ! cr ) { char *xx=NULL;*xx=0; }
|
||||
// note it!
|
||||
log("spider: adding new spider collection for %s",cr->m_coll);
|
||||
// that was it
|
||||
return sc;
|
||||
}
|
||||
@ -894,6 +903,13 @@ SpiderColl::SpiderColl () {
|
||||
memset ( m_outstandingSpiders , 0 , 4 * MAX_SPIDER_PRIORITIES );
|
||||
}
|
||||
|
||||
long SpiderColl::getTotalOutstandingSpiders ( ) {
|
||||
long sum = 0;
|
||||
for ( long i = 0 ; i < MAX_SPIDER_PRIORITIES ; i++ )
|
||||
sum += m_outstandingSpiders[i];
|
||||
return sum;
|
||||
}
|
||||
|
||||
// load the tables that we set when m_doInitialScan is true
|
||||
bool SpiderColl::load ( ) {
|
||||
// error?
|
||||
@ -937,6 +953,8 @@ bool SpiderColl::load ( ) {
|
||||
// . try going to 20M now since we hit it again...
|
||||
if (!m_waitingTree.set(0,-1,true,20000000,true,"waittree2",
|
||||
false,"waitingtree",sizeof(key_t)))return false;
|
||||
// prevent core with this
|
||||
m_waitingTree.m_rdbId = RDB_NONE;
|
||||
|
||||
// make dir
|
||||
char dir[500];
|
||||
@ -2326,7 +2344,8 @@ bool SpiderColl::scanSpiderdb ( bool needList ) {
|
||||
if ( sreq->m_url[0] != 'h' &&
|
||||
// might be a docid from a pagereindex.cpp
|
||||
! is_digit(sreq->m_url[0]) ) {
|
||||
log("spider: got corrupt 1 spiderRequest in scan");
|
||||
log("spider: got corrupt 1 spiderRequest in scan "
|
||||
"because url is %s",sreq->m_url);
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -7814,3 +7833,144 @@ void dedupSpiderdbList ( RdbList *list , long niceness , bool removeNegRecs ) {
|
||||
|
||||
//mfree ( oldbuf , oldSize, "oldspbuf");
|
||||
}
|
||||
|
||||
///////
|
||||
//
|
||||
// diffbot uses these for limiting crawls in a collection
|
||||
//
|
||||
///////
|
||||
|
||||
void gotCrawlInfoReply ( void *state , UdpSlot *slot);
|
||||
|
||||
class CallbackEntry2 {
|
||||
public:
|
||||
void *m_state;
|
||||
void (* m_callback ) ( void *state );
|
||||
};
|
||||
|
||||
// . get total # of pages crawled in this collection over whole network
|
||||
// . returns false if blocked
|
||||
// . returns true and sets g_errno on error
|
||||
bool updateCrawlInfo ( CollectionRec *cr ,
|
||||
void *state ,
|
||||
void (* callback)(void *state) ,
|
||||
bool useCache ) {
|
||||
|
||||
long now = getTimeLocal();
|
||||
if ( useCache && now - cr->m_globalCrawlInfoUpdateTime < 60 )
|
||||
return true;
|
||||
|
||||
// wait in line if reply is pending
|
||||
//if ( cr->m_replies < cr->m_requests || ) {
|
||||
// . returns false and sets g_errno on error
|
||||
// . this will store state/callback into a safebuf queue
|
||||
CallbackEntry2 ce2;
|
||||
ce2.m_state = state;
|
||||
ce2.m_callback = callback;
|
||||
if ( ! cr->m_callbackQueue.safeMemcpy ( &ce2, sizeof(CallbackEntry2)) )
|
||||
return true;
|
||||
|
||||
// if we were not the first, we do not initiate it, we just wait
|
||||
// for all the replies to come back
|
||||
if ( cr->m_replies < cr->m_requests ) return false;
|
||||
|
||||
cr->m_globalCrawlInfo.reset();
|
||||
|
||||
cr->m_replies = 0;
|
||||
cr->m_requests = 0;
|
||||
|
||||
// request is just the collnum
|
||||
char *request = (char *)&cr->m_collnum;
|
||||
long requestSize = sizeof(collnum_t);
|
||||
|
||||
// send out the msg request
|
||||
for ( long i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
|
||||
Host *h = g_hostdb.getHost(i);
|
||||
// skip if dead
|
||||
if ( g_hostdb.isDead(i) ) continue;
|
||||
// count it as launched
|
||||
cr->m_requests++;
|
||||
if ( ! g_udpServer.sendRequest ( request,
|
||||
requestSize,
|
||||
0xc1 , // msgtype
|
||||
h->m_ip ,
|
||||
h->m_port ,
|
||||
h->m_hostId ,
|
||||
NULL, // retslot
|
||||
cr , // state
|
||||
gotCrawlInfoReply ) ) {
|
||||
log("spider: error sending c1 request: %s",
|
||||
mstrerror(g_errno));
|
||||
cr->m_replies++;
|
||||
}
|
||||
}
|
||||
|
||||
// return false if we blocked awaiting replies
|
||||
if ( cr->m_replies < cr->m_requests ) return false;
|
||||
|
||||
// somehow we did not block... hmmmm...
|
||||
gotCrawlInfoReply( cr , NULL );
|
||||
|
||||
// we did not block...
|
||||
return true;
|
||||
}
|
||||
|
||||
void gotCrawlInfoReply ( void *state , UdpSlot *slot ) {
|
||||
// cast it
|
||||
CollectionRec *cr = (CollectionRec *)state;
|
||||
// inc it
|
||||
cr->m_replies++;
|
||||
|
||||
// the sendbuf should never be freed! it points into collrec
|
||||
slot->m_sendBufAlloc = NULL;
|
||||
|
||||
// add it in to the stats
|
||||
if ( slot ) {
|
||||
CrawlInfo *stats = (CrawlInfo *)(slot->m_readBuf);
|
||||
cr->m_globalCrawlInfo.m_pageIndexAttempts +=
|
||||
stats->m_pageIndexAttempts;
|
||||
cr->m_globalCrawlInfo.m_pageProcessAttempts +=
|
||||
stats->m_pageProcessAttempts;
|
||||
cr->m_globalCrawlInfo.m_pageDownloadAttempts +=
|
||||
stats->m_pageDownloadAttempts;
|
||||
}
|
||||
// return if still waiting on more to come in
|
||||
if ( cr->m_replies < cr->m_requests ) return;
|
||||
|
||||
// update cache time
|
||||
cr->m_globalCrawlInfoUpdateTime = getTime();
|
||||
|
||||
// make it save to disk i guess
|
||||
cr->m_needsSave = true;
|
||||
|
||||
// call all callbacks
|
||||
long nc = cr->m_callbackQueue.length() / sizeof(CallbackEntry2);
|
||||
char *p = cr->m_callbackQueue.getBufStart();
|
||||
for ( long i = 0 ; i < nc ; i++ ) {
|
||||
CallbackEntry2 *ce2 = (CallbackEntry2 *)p;
|
||||
p += sizeof(CallbackEntry2);
|
||||
// clear g_errno just in case
|
||||
g_errno = 0;
|
||||
// call that callback waiting in the queue
|
||||
ce2->m_callback ( ce2->m_state );
|
||||
}
|
||||
|
||||
// save the mem!
|
||||
cr->m_callbackQueue.purge();
|
||||
}
|
||||
|
||||
void handleRequestc1 ( UdpSlot *slot , long niceness ) {
|
||||
char *request = slot->m_readBuf;
|
||||
// just a single collnum
|
||||
if ( slot->m_readBufSize != sizeof(collnum_t) ) { char *xx=NULL;*xx=0;}
|
||||
collnum_t collnum = *(collnum_t *)request;
|
||||
CollectionRec *cr = g_collectiondb.getRec(collnum);
|
||||
char *reply = slot->m_tmpBuf;
|
||||
if ( TMPBUFSIZE < sizeof(CrawlInfo) ) { char *xx=NULL;*xx=0; }
|
||||
memcpy ( reply , &cr->m_localCrawlInfo , sizeof(CrawlInfo) );
|
||||
g_udpServer.sendReply_ass ( reply ,
|
||||
sizeof(CrawlInfo) ,
|
||||
reply , // alloc
|
||||
sizeof(CrawlInfo) , //alloc size
|
||||
slot );
|
||||
}
|
||||
|
13
Spider.h
13
Spider.h
@ -24,6 +24,13 @@
|
||||
#include "Msg4.h"
|
||||
#include "hash.h"
|
||||
|
||||
// for diffbot, this is for xmldoc.cpp to update CollectionRec::m_crawlInfo
|
||||
// which has m_pagesCrawled and m_pagesProcessed.
|
||||
bool updateCrawlInfo ( CollectionRec *cr ,
|
||||
void *state ,
|
||||
void (* callback)(void *state) ,
|
||||
bool useCache = true ) ;
|
||||
|
||||
///////////////////////////////////////
|
||||
//
|
||||
// QUICK OVERVIEW
|
||||
@ -828,6 +835,8 @@ class SpiderColl {
|
||||
|
||||
bool load();
|
||||
|
||||
long getTotalOutstandingSpiders ( ) ;
|
||||
|
||||
key128_t m_firstKey;
|
||||
// spiderdb is now 128bit keys
|
||||
key128_t m_nextKey;
|
||||
@ -966,6 +975,8 @@ class SpiderCache {
|
||||
// what SpiderColl does a SpiderRec with this key belong?
|
||||
SpiderColl *getSpiderColl ( collnum_t collNum ) ;
|
||||
|
||||
SpiderColl *getSpiderCollIffNonNull ( collnum_t collNum ) ;
|
||||
|
||||
// called by main.cpp on exit to free memory
|
||||
void reset();
|
||||
|
||||
@ -1024,7 +1035,7 @@ class Msg12 {
|
||||
};
|
||||
|
||||
void handleRequest12 ( UdpSlot *udpSlot , long niceness ) ;
|
||||
|
||||
void handleRequestc1 ( UdpSlot *slot , long niceness ) ;
|
||||
|
||||
// . the spider loop
|
||||
// . it gets urls to spider from the SpiderCache global class, g_spiderCache
|
||||
|
@ -1425,6 +1425,9 @@ void writeSocketWrapper ( int sd , void *state ) {
|
||||
}
|
||||
// if socket has nothing to send yet cuz we're waiting, wait...
|
||||
if ( s->m_sendBufUsed == 0 ) return;
|
||||
|
||||
sendAgain:
|
||||
|
||||
// . writeSocket returns false if blocked, true otherwise
|
||||
// . it also sets g_errno on errro
|
||||
// . don't call it if we have g_errno set, however
|
||||
@ -1435,8 +1438,16 @@ void writeSocketWrapper ( int sd , void *state ) {
|
||||
if ( status == 1 && ! s->m_readBuf ) return;
|
||||
// good?
|
||||
g_errno = 0;
|
||||
// otherwise, call callback on done reading or error
|
||||
// otherwise, call callback on done writing or error
|
||||
THIS->makeCallback ( s );
|
||||
|
||||
// if callback changed socket status to ST_SEND_AGAIN
|
||||
// then let's send the new buffer that it has. Diffbot.cpp uses this.
|
||||
if ( s->m_sockState == ST_SEND_AGAIN ) {
|
||||
s->m_sockState = ST_WRITING;
|
||||
goto sendAgain;
|
||||
}
|
||||
|
||||
// . destroy the socket on error, recycle on transaction completion
|
||||
// . this will also unregister all our callbacks for the socket
|
||||
if ( status == -1 ) THIS->destroySocket ( s );
|
||||
|
@ -26,6 +26,10 @@
|
||||
#define ST_CLOSE_CALLED 7
|
||||
#define ST_SSL_ACCEPT 8
|
||||
#define ST_SSL_SHUTDOWN 9
|
||||
// hack to repopulate the socket's send buf when its done sending
|
||||
// it's current sendbuf in order to transmit large amounts of data that
|
||||
// can't all fit in memory at the same time:
|
||||
#define ST_SEND_AGAIN 10
|
||||
|
||||
#define TCP_READ_BUF_SIZE 1024
|
||||
|
||||
|
29
Title.cpp
29
Title.cpp
@ -103,6 +103,35 @@ bool Title::setTitle ( XmlDoc *xd ,
|
||||
|
||||
long long startTime = gettimeofdayInMilliseconds();
|
||||
|
||||
// . reset so matches.cpp using this does not core
|
||||
// . assume no title tag
|
||||
m_titleTagStart = -1;
|
||||
m_titleTagEnd = -1;
|
||||
|
||||
// if we are a json object
|
||||
if ( ! xd->m_contentTypeValid ) { char *xx=NULL;*xx=0; }
|
||||
char *val = NULL;
|
||||
long vlen;
|
||||
// look for the "title:" field in json then use that
|
||||
if ( xd->m_contentType == CT_JSON )
|
||||
val = getJSONFieldValue ( xd->ptr_utf8Content,"title",&vlen);
|
||||
// if we had a title: field in the json...
|
||||
if ( val ) {
|
||||
char *dst = NULL;
|
||||
m_titleBytes = vlen;
|
||||
if ( m_titleBytes+1 < TITLE_LOCAL_SIZE )
|
||||
dst = m_localBuf;
|
||||
else {
|
||||
dst = (char *)mmalloc ( m_titleBytes+1,"titdst" );
|
||||
if ( ! dst ) return false;
|
||||
}
|
||||
m_title = dst;
|
||||
memcpy ( dst , val , m_titleBytes );
|
||||
dst[m_titleBytes] = '\0';
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool status = setTitle4 ( xd ,
|
||||
xml ,
|
||||
words ,
|
||||
|
840
XmlDoc.cpp
840
XmlDoc.cpp
File diff suppressed because it is too large
Load Diff
60
XmlDoc.h
60
XmlDoc.h
@ -91,8 +91,16 @@ bool setLangVec ( class Words *words ,
|
||||
class Sections *sections ,
|
||||
long niceness ) ;
|
||||
|
||||
char *getJSONFieldValue ( char *json, char *field , long *valueLen ) ;
|
||||
|
||||
bool logQueryLogs ( );
|
||||
|
||||
bool checkRegex ( SafeBuf *regex ,
|
||||
char *target ,
|
||||
bool *boolVal ,
|
||||
bool *boolValValid ,
|
||||
long *compileError = NULL ) ;
|
||||
|
||||
// Address.cpp calls this to make a vector from the "place name" for comparing
|
||||
// to other places in placedb using the computeSimilarity() function. if
|
||||
// we got a >75% similarity we set the AF_VERIFIED_PLACE_NAME bit in the
|
||||
@ -283,7 +291,13 @@ class XmlDoc {
|
||||
char m_reserved3b;
|
||||
uint16_t m_reserved4;//externalLinkTextWeight;
|
||||
uint16_t m_reserved5;//internalLinkTextWeight;
|
||||
uint16_t m_reserved6;//conceptWeight;
|
||||
|
||||
// a new parm from reserved6. need to know the count so we can
|
||||
// delete the json objects derived from this page if we want to
|
||||
// delete this page. or if this page is respidered then we get the
|
||||
// json objects for it, REject the old json object urls, and inject
|
||||
// the new ones i guess.
|
||||
uint16_t m_diffbotJSONCount;
|
||||
|
||||
// these do not include header/footer (dup) addresses
|
||||
//int16_t m_numAddresses;
|
||||
@ -311,7 +325,24 @@ class XmlDoc {
|
||||
uint16_t m_hasSiteVenue:1;
|
||||
uint16_t m_hasContactInfo:1;
|
||||
uint16_t m_isSiteRoot:1;
|
||||
uint16_t m_reserved8;
|
||||
|
||||
uint16_t m_isDiffbotJSONObject:1;
|
||||
uint16_t m_reserved802:1;
|
||||
uint16_t m_reserved803:1;
|
||||
uint16_t m_reserved804:1;
|
||||
uint16_t m_reserved805:1;
|
||||
uint16_t m_reserved806:1;
|
||||
uint16_t m_reserved807:1;
|
||||
uint16_t m_reserved808:1;
|
||||
uint16_t m_reserved809:1;
|
||||
uint16_t m_reserved810:1;
|
||||
uint16_t m_reserved811:1;
|
||||
uint16_t m_reserved812:1;
|
||||
uint16_t m_reserved813:1;
|
||||
uint16_t m_reserved814:1;
|
||||
uint16_t m_reserved815:1;
|
||||
uint16_t m_reserved816:1;
|
||||
|
||||
|
||||
char *ptr_firstUrl;
|
||||
char *ptr_redirUrl;
|
||||
@ -1205,6 +1236,11 @@ class XmlDoc {
|
||||
bool m_numOutlinksAddedValid;
|
||||
bool m_baseUrlValid;
|
||||
bool m_replyValid;
|
||||
bool m_diffbotReplyValid;
|
||||
bool m_diffbotUrlCrawlPatternMatchValid;
|
||||
bool m_diffbotUrlProcessPatternMatchValid;
|
||||
bool m_diffbotPageProcessPatternMatchValid;
|
||||
bool m_crawlInfoValid;
|
||||
bool m_isPageParserValid;
|
||||
bool m_imageUrlValid;
|
||||
bool m_matchOffsetsValid;
|
||||
@ -1416,6 +1452,7 @@ class XmlDoc {
|
||||
long m_siteHash32;
|
||||
char *m_httpReply;
|
||||
char m_downloadAttempted;
|
||||
char m_incrementedAttemptsCount;
|
||||
char m_redirectFlag;
|
||||
//char m_isScraping;
|
||||
//char m_throttleDownload;
|
||||
@ -1447,6 +1484,25 @@ class XmlDoc {
|
||||
//long *m_outlinkIpVector;
|
||||
Msge1 m_msge1;
|
||||
|
||||
//
|
||||
// diffbot parms for indexing diffbot's json output
|
||||
//
|
||||
XmlDoc *m_dx;
|
||||
char *m_diffbotObj;
|
||||
char *m_diffbotObjEnd;
|
||||
char m_diffbotSavedChar;
|
||||
SafeBuf m_diffbotReply;
|
||||
long m_diffbotReplyError;
|
||||
bool m_diffbotUrlCrawlPatternMatch;
|
||||
bool m_diffbotUrlProcessPatternMatch;
|
||||
bool m_diffbotPageProcessPatternMatch;
|
||||
|
||||
SafeBuf *getDiffbotReply ( ) ;
|
||||
bool doesUrlMatchDiffbotCrawlPattern() ;
|
||||
bool doesUrlMatchDiffbotProcessPattern() ;
|
||||
bool doesPageContentMatchDiffbotProcessPattern() ;
|
||||
char *hashJSON ( HashTableX *table );
|
||||
|
||||
|
||||
//
|
||||
// functions and vars for the seo query matching tool
|
||||
|
1
main.cpp
1
main.cpp
@ -4828,6 +4828,7 @@ bool registerMsgHandlers2(){
|
||||
//if ( ! MsgF ::registerHandler() ) return false;
|
||||
|
||||
//if(! g_udpServer.registerHandler(0x10,handleRequest10)) return false;
|
||||
if ( ! g_udpServer.registerHandler(0xc1,handleRequestc1)) return false;
|
||||
if ( ! g_udpServer.registerHandler(0x39,handleRequest39)) return false;
|
||||
if ( ! g_udpServer.registerHandler(0x2c,handleRequest2c)) return false;
|
||||
if ( ! g_udpServer.registerHandler(0x12,handleRequest12)) return false;
|
||||
|
Loading…
x
Reference in New Issue
Block a user