forked from Mirrors/privacore-open-source-search-engine
Merge branch 'master' into diffbot
Conflicts: PageResults.cpp
This commit is contained in:
7
LICENSE
7
LICENSE
@ -189,3 +189,10 @@ third-party archives.
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
10. Exclusions
|
||||
|
||||
The SEO (Search Engine Optimiazation) functionality is provided in a separate file named "seo.cpp", and is not included in this license. If you would like to license that then you can arrange a licensing agreement with Matt Wells.
|
||||
|
||||
Likewise, the Event datamining logic is in Events.cpp and must be separately licensed as well.
|
||||
|
||||
|
2
Lang.cpp
2
Lang.cpp
@ -196,7 +196,7 @@ static char *s_langAbbr[] = {
|
||||
};
|
||||
|
||||
// fix bug:
|
||||
//#ifndef _PRIVATESTUFF_
|
||||
//#ifndef PRIVATESTUFF
|
||||
#define csISOLatin6 cslatin6
|
||||
//#endif
|
||||
|
||||
|
9
Loop.cpp
9
Loop.cpp
@ -833,9 +833,9 @@ void sigalrmHandler ( int x , siginfo_t *info , void *y ) {
|
||||
|
||||
// if we missed to many, then dump core
|
||||
if ( g_niceness == 1 && g_missedQuickPolls >= 4 ) {
|
||||
g_inSigHandler = true;
|
||||
//g_inSigHandler = true;
|
||||
log("loop: missed quickpoll");
|
||||
g_inSigHandler = false;
|
||||
//g_inSigHandler = false;
|
||||
// seems to core a lot in gbcompress() we need to
|
||||
// put a quickpoll into zlib deflate() or
|
||||
// deflat_slot() or logest_match() function
|
||||
@ -1594,7 +1594,10 @@ void sigHandlerRT ( int x , siginfo_t *info , void *v ) {
|
||||
}
|
||||
//fprintf (stderr,"in rt handler\n");
|
||||
// let everyone know it
|
||||
g_inSigHandler = true;
|
||||
// MDW: turn this off for now, how is it getting set? we dont use
|
||||
// real time signals any more. maybe a pthread is getting such
|
||||
// a signal?
|
||||
//g_inSigHandler = true;
|
||||
// debug msg
|
||||
//if ( g_conf.m_timingDebugEnabled )
|
||||
// log("sigHandlerRT entered");
|
||||
|
51
Make.depend
51
Make.depend
@ -376,7 +376,8 @@ fctypes.o: fctypes.cpp gb-include.h types.h fctypes.h Unicode.h \
|
||||
TcpSocket.h Collectiondb.h Entities.h UCWordIterator.h Timedb.h Rdb.h \
|
||||
RdbBase.h RdbScan.h BigFile.h RdbMap.h RdbList.h RdbDump.h RdbTree.h \
|
||||
RdbMem.h RdbBuckets.h RdbCache.h Msg5.h Msg3.h HashTableX.h RdbMerge.h \
|
||||
Dir.h Titledb.h DiskPageCache.h CollectionRec.h Parms.h HashTable.h
|
||||
Dir.h Titledb.h DiskPageCache.h CollectionRec.h Parms.h HashTable.h \
|
||||
Threads.h
|
||||
File.o: File.cpp gb-include.h types.h fctypes.h Unicode.h \
|
||||
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
|
||||
Log.h File.h Mem.h Conf.h Xml.h XmlNode.h Lang.h Iso8859.h \
|
||||
@ -1536,19 +1537,19 @@ PageStats.o: PageStats.cpp gb-include.h types.h fctypes.h Unicode.h \
|
||||
Msge0.h Msge1.h Msg8b.h SiteGetter.h Title.h Address.h Placedb.h
|
||||
PageStatsdb.o: PageStatsdb.cpp gb-include.h types.h fctypes.h Unicode.h \
|
||||
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
|
||||
Log.h plotter.h iostream.h streambuf.h CollectionRec.h Url.h ip.h \
|
||||
Parms.h Xml.h XmlNode.h Lang.h Iso8859.h iana_charset.h SafeBuf.h \
|
||||
HttpRequest.h Mem.h Conf.h File.h Loop.h Hostdb.h TcpSocket.h \
|
||||
Collectiondb.h HashTable.h HashTableX.h RdbList.h Pages.h HttpServer.h \
|
||||
TcpServer.h MsgC.h UdpServer.h UdpSlot.h UdpProtocol.h Dns.h \
|
||||
DnsProtocol.h RdbCache.h Multicast.h Threads.h Rdb.h RdbBase.h RdbScan.h \
|
||||
BigFile.h RdbMap.h RdbDump.h RdbTree.h RdbMem.h RdbBuckets.h Msg5.h \
|
||||
Msg3.h RdbMerge.h Dir.h HttpMime.h Statsdb.h zlib.h zconf.h Stats.h \
|
||||
IndexReadInfo.h Query.h IndexList.h Indexdb.h DiskPageCache.h Titledb.h \
|
||||
Msg1.h Msg0.h Clusterdb.h Linkdb.h Msg2.h Msg20.h Summary.h matches.h \
|
||||
Words.h StopWords.h Bits.h Pos.h Matches.h HashTableT.h Domains.h \
|
||||
CountryCode.h Tagdb.h Events.h Sections.h Dates.h Msg22.h CatRec.h \
|
||||
Categories.h Catdb.h Datedb.h Msg4.h Process.h Msg28.h SafeList.h
|
||||
Log.h CollectionRec.h Url.h ip.h Parms.h Xml.h XmlNode.h Lang.h \
|
||||
Iso8859.h iana_charset.h SafeBuf.h HttpRequest.h Mem.h Conf.h File.h \
|
||||
Loop.h Hostdb.h TcpSocket.h Collectiondb.h HashTable.h HashTableX.h \
|
||||
RdbList.h Pages.h HttpServer.h TcpServer.h MsgC.h UdpServer.h UdpSlot.h \
|
||||
UdpProtocol.h Dns.h DnsProtocol.h RdbCache.h Multicast.h Threads.h Rdb.h \
|
||||
RdbBase.h RdbScan.h BigFile.h RdbMap.h RdbDump.h RdbTree.h RdbMem.h \
|
||||
RdbBuckets.h Msg5.h Msg3.h RdbMerge.h Dir.h HttpMime.h Statsdb.h zlib.h \
|
||||
zconf.h Stats.h IndexReadInfo.h Query.h IndexList.h Indexdb.h \
|
||||
DiskPageCache.h Titledb.h Msg1.h Msg0.h Clusterdb.h Linkdb.h Msg2.h \
|
||||
Msg20.h Summary.h matches.h Words.h StopWords.h Bits.h Pos.h Matches.h \
|
||||
HashTableT.h Domains.h CountryCode.h Tagdb.h Events.h Sections.h Dates.h \
|
||||
Msg22.h CatRec.h Categories.h Catdb.h Datedb.h Msg4.h Process.h Msg28.h \
|
||||
SafeList.h
|
||||
PageSubmit.o: PageSubmit.cpp gb-include.h types.h fctypes.h Unicode.h \
|
||||
UnicodeProperties.h UCPropTable.h iconv.h UCNormalizer.h hash.h Errno.h \
|
||||
Log.h XmlDoc.h Lang.h Iso8859.h iana_charset.h Words.h Xml.h XmlNode.h \
|
||||
@ -2116,17 +2117,17 @@ Stats.o: Stats.cpp gb-include.h types.h fctypes.h Unicode.h \
|
||||
ip.h Hostdb.h HttpRequest.h Url.h TcpSocket.h Collectiondb.h IndexList.h \
|
||||
RdbList.h Indexdb.h Rdb.h RdbBase.h RdbScan.h BigFile.h RdbMap.h \
|
||||
RdbDump.h RdbTree.h RdbMem.h RdbBuckets.h RdbCache.h Msg5.h Msg3.h \
|
||||
HashTableX.h RdbMerge.h Dir.h DiskPageCache.h Titledb.h plotter.h \
|
||||
iostream.h streambuf.h PingServer.h Repair.h Msg1.h UdpServer.h \
|
||||
UdpSlot.h Multicast.h Threads.h Msg0.h Clusterdb.h Linkdb.h Msg2.h \
|
||||
Msg20.h Summary.h matches.h Words.h StopWords.h Bits.h Pos.h Matches.h \
|
||||
HashTableT.h Domains.h CountryCode.h Tagdb.h CollectionRec.h Parms.h \
|
||||
HashTable.h Events.h Sections.h Dates.h Msg22.h CatRec.h Categories.h \
|
||||
Catdb.h Datedb.h Msg4.h XmlDoc.h Phrases.h LangList.h Images.h Msg36.h \
|
||||
Msg13.h Msge0.h Msge1.h MsgC.h Dns.h DnsProtocol.h Msg8b.h SearchInput.h \
|
||||
Msg40.h Msg39.h Msg37.h Posdb.h TopTree.h IndexTable2.h Msg51.h Msg17.h \
|
||||
Msg2b.h Msg3a.h PostQueryRerank.h Sanity.h SiteGetter.h Title.h \
|
||||
Address.h zlib.h zconf.h Spider.h HttpMime.h
|
||||
HashTableX.h RdbMerge.h Dir.h DiskPageCache.h Titledb.h PingServer.h \
|
||||
Repair.h Msg1.h UdpServer.h UdpSlot.h Multicast.h Threads.h Msg0.h \
|
||||
Clusterdb.h Linkdb.h Msg2.h Msg20.h Summary.h matches.h Words.h \
|
||||
StopWords.h Bits.h Pos.h Matches.h HashTableT.h Domains.h CountryCode.h \
|
||||
Tagdb.h CollectionRec.h Parms.h HashTable.h Events.h Sections.h Dates.h \
|
||||
Msg22.h CatRec.h Categories.h Catdb.h Datedb.h Msg4.h XmlDoc.h Phrases.h \
|
||||
LangList.h Images.h Msg36.h Msg13.h Msge0.h Msge1.h MsgC.h Dns.h \
|
||||
DnsProtocol.h Msg8b.h SearchInput.h Msg40.h Msg39.h Msg37.h Posdb.h \
|
||||
TopTree.h IndexTable2.h Msg51.h Msg17.h Msg2b.h Msg3a.h \
|
||||
PostQueryRerank.h Sanity.h SiteGetter.h Title.h Address.h zlib.h zconf.h \
|
||||
Spider.h HttpMime.h
|
||||
Statsdb.o: Statsdb.cpp Conf.h Xml.h XmlNode.h gb-include.h types.h \
|
||||
fctypes.h Unicode.h UnicodeProperties.h UCPropTable.h iconv.h \
|
||||
UCNormalizer.h hash.h Errno.h Log.h Lang.h Iso8859.h iana_charset.h \
|
||||
|
2
Makefile
2
Makefile
@ -79,7 +79,7 @@ HOST=$(shell hostname)
|
||||
# we can only build a 32-bit binary, so we have to use the 32-bit libraries
|
||||
# provided for now.
|
||||
ifeq ("titan","$(HOST)")
|
||||
CPPFLAGS = -D_PRIVATESTUFF_ -m32 -g -Wall -pipe -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -static
|
||||
CPPFLAGS = -DPRIVATESTUFF -m32 -g -Wall -pipe -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -static
|
||||
LIBS = ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a
|
||||
OBJS:=$(OBJS) seo.o
|
||||
$(shell rm seo.o)
|
||||
|
4
Mem.cpp
4
Mem.cpp
@ -163,12 +163,12 @@ static bool s_initialized = 0;
|
||||
|
||||
// our own memory manager
|
||||
//static MemPoolVar s_pool;
|
||||
void operator delete ( void *ptr ) {
|
||||
void operator delete (void *ptr) throw () {
|
||||
// now just call this
|
||||
g_mem.gbfree ( (char *)ptr , -1 , NULL );
|
||||
}
|
||||
|
||||
void operator delete [] ( void *ptr ) {
|
||||
void operator delete [] ( void *ptr ) throw () {
|
||||
// now just call this
|
||||
g_mem.gbfree ( ((char *)ptr-4) , -1 , NULL );
|
||||
}
|
||||
|
2
Mem.h
2
Mem.h
@ -210,7 +210,7 @@ inline bool relabel ( void *ptr , long size , const char *note ) {
|
||||
//#define delete(X) { delete X; g_mem.m_freed += sizeof(*X); }
|
||||
//#endif
|
||||
//#ifndef DMALLOC
|
||||
void operator delete ( void *p ) ;
|
||||
void operator delete ( void *p ) throw();
|
||||
void * operator new (size_t size) throw (std::bad_alloc);
|
||||
// you MUST call mmalloc, mcalloc and mrealloc!!
|
||||
#define malloc coreme
|
||||
|
@ -978,6 +978,10 @@ bool Msg3a::mergeLists ( ) {
|
||||
// or whatever, so let it through regardless
|
||||
ksPtr[maxj]->n0 != 0LL &&
|
||||
ksPtr[maxj]->n1 != 0 ) {
|
||||
// if family filter on and is adult...
|
||||
if ( m_r->m_familyFilter &&
|
||||
g_clusterdb.hasAdultContent((char *)ksPtr[maxj]) )
|
||||
goto skip;
|
||||
// get the hostname hash, a long long
|
||||
long sh = g_clusterdb.getSiteHash26 ((char *)ksPtr[maxj]);
|
||||
// do we have enough from this hostname already?
|
||||
|
@ -497,6 +497,7 @@ bool Msg40::getDocIds ( bool recall ) {
|
||||
m_r.m_debug = m_si->m_debug ;
|
||||
m_r.m_getDocIdScoringInfo = m_si->m_getDocIdScoringInfo;
|
||||
m_r.m_doSiteClustering = m_si->m_doSiteClustering ;
|
||||
m_r.m_familyFilter = m_si->m_familyFilter;
|
||||
m_r.m_useMinAlgo = m_si->m_useMinAlgo;
|
||||
m_r.m_useNewAlgo = m_si->m_useNewAlgo;
|
||||
m_r.m_doMaxScoreAlgo = m_si->m_doMaxScoreAlgo;
|
||||
|
@ -66,7 +66,7 @@ bool sendPageParser2 ( TcpSocket *s ,
|
||||
//addCheckboxSpan ( uh64 , divTag , addNum );
|
||||
// make basic reply
|
||||
char *reply;
|
||||
reply = "HTTP/1.1 200 OK\r\n"
|
||||
reply = "HTTP/1.0 200 OK\r\n"
|
||||
"Connection: Close\r\n";
|
||||
// that is it! send a basic reply ok
|
||||
bool status = g_httpServer.sendDynamicPage( s ,
|
||||
|
@ -289,6 +289,9 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
|
||||
long clen;
|
||||
char *coll = hr->getString("c",&clen,"",NULL);
|
||||
if ( coll ) sb.safePrintf("&c=%s",coll);
|
||||
// forward the "ff" family filter as well
|
||||
long ff = hr->getLong("ff",0);
|
||||
if ( ff ) sb.safePrintf("&ff=%li",ff);
|
||||
// provide hash of the query so clients can't just pass in
|
||||
// a bogus id to get search results from us
|
||||
unsigned long h32 = hash32n(qstr);
|
||||
@ -343,7 +346,11 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
|
||||
" "
|
||||
|
||||
/* SEO functionality not included yet - so redir to gigablast. */
|
||||
#ifdef PRIVATESTUFF
|
||||
"<a title=\"Rank higher in Google\" href='/seo'>"
|
||||
#else
|
||||
"<a title=\"Rank higher in Google\" href='https://www.gigablast.com/seo'>"
|
||||
#endif
|
||||
"seo"
|
||||
"</a>"
|
||||
|
||||
@ -356,6 +363,9 @@ bool sendPageResults ( TcpSocket *s , HttpRequest *hr ) {
|
||||
|
||||
" "
|
||||
|
||||
// i'm not sure why this was removed. perhaps
|
||||
// because it is not working yet because of
|
||||
// some bugs...
|
||||
"<!-- <a title=\"Advanced web search\" "
|
||||
"href=/adv.html>"
|
||||
"advanced"
|
||||
@ -2071,7 +2081,11 @@ static int printResult ( SafeBuf &sb,
|
||||
// "c=%s&\">scoring</a>",
|
||||
// coll );
|
||||
//sb.safePrintf(" - <a href=\"/print?c=%s&",coll);
|
||||
#ifdef PRIVATESTUFF
|
||||
sb.safePrintf(" - <a href=\"/seo?");//c=%s&",coll);
|
||||
#else
|
||||
sb.safePrintf(" - <a href=\"https://www.gigablast.com/seo?");//c=%s&",coll);
|
||||
#endif
|
||||
//sb.safePrintf("d=%lli",mr->m_docId);
|
||||
sb.safePrintf("u=");
|
||||
sb.urlEncode ( url , gbstrlen(url) , false );
|
||||
@ -3238,7 +3252,11 @@ bool printPairScore ( SafeBuf &sb , SearchInput *si , PairScore *ps ,
|
||||
sb.safePrintf("<td>"
|
||||
//"<a href=\"/print?d="
|
||||
//"&page=4&recycle=1&"
|
||||
#ifdef PRIVATESTUFF
|
||||
"<a href=\"/seo?d="
|
||||
#else
|
||||
"<a href=\"https://www.gigablast.com/seo?d="
|
||||
#endif
|
||||
"%lli"
|
||||
"&page=sections&"
|
||||
"hipos=%li&c=%s\">"
|
||||
@ -3316,7 +3334,11 @@ bool printPairScore ( SafeBuf &sb , SearchInput *si , PairScore *ps ,
|
||||
//"<a href=\"/print?d="
|
||||
//"%lli"
|
||||
//"&page=4&recycle=1&"
|
||||
#ifdef PRIVATESTUFF
|
||||
"<a href=\"/seo?d="
|
||||
#else
|
||||
"<a href=\"https://www.gigablast.com/seo?d="
|
||||
#endif
|
||||
"%lli"
|
||||
"&page=sections&"
|
||||
"hipos=%li&c=%s\">"
|
||||
|
14
PageRoot.cpp
14
PageRoot.cpp
@ -103,7 +103,12 @@ bool printWebHomePage ( SafeBuf &sb , HttpRequest *r ) {
|
||||
//sb.safePrintf("<body>\n");
|
||||
//g_proxy.insertLoginBarDirective ( &sb );
|
||||
sb.safePrintf("<br><br>\n");
|
||||
// try to avoid using https for images. it is like 10ms slower.
|
||||
#ifdef PRIVATESTUFF
|
||||
sb.safePrintf("<center><a href=/><img border=0 width=500 height=122 src=http://www.gigablast.com/logo-med.jpg></a>\n");
|
||||
#else
|
||||
sb.safePrintf("<center><a href=/><img border=0 width=500 height=122 src=/logo-med.jpg></a>\n");
|
||||
#endif
|
||||
sb.safePrintf("<br><br>\n");
|
||||
sb.safePrintf("<br><br><br>\n");
|
||||
sb.safePrintf("<b>web</b> <a href=/seo>seo</a> <a href=\"http://www.gigablast.com/?c=dmoz3\">directory</a> \n");
|
||||
@ -305,7 +310,11 @@ bool printAddUrlHomePage ( SafeBuf &sb , char *url , HttpRequest *r ) {
|
||||
|
||||
sb.safePrintf("\n");
|
||||
sb.safePrintf("<br><br>\n");
|
||||
#ifdef PRIVATESTUFF
|
||||
sb.safePrintf("<center><a href=/><img border=0 width=500 height=122 src=http://www.gigablast.com/logo-med.jpg></a>\n");
|
||||
#else
|
||||
sb.safePrintf("<center><a href=/><img border=0 width=500 height=122 src=/logo-med.jpg></a>\n");
|
||||
#endif
|
||||
sb.safePrintf("<br><br>\n");
|
||||
sb.safePrintf("<br><br><br>\n");
|
||||
sb.safePrintf("<a href=/>web</a> <a href=/seo>seo</a> <a href=\"http://www.gigablast.com/?c=dmoz3\">directory</a> \n");
|
||||
@ -434,7 +443,12 @@ bool printDirHomePage ( SafeBuf &sb , HttpRequest *r ) {
|
||||
sb.safePrintf("<body onload=\"x()\">\n");
|
||||
sb.safePrintf("<body>\n");
|
||||
sb.safePrintf("<br><br>\n");
|
||||
// try to avoid using https for images. it is like 10ms slower.
|
||||
#ifdef PRIVATESTUFF
|
||||
sb.safePrintf("<center><a href=/><img border=0 width=500 height=122 src=http://www.gigablast.com/logo-med.jpg></a>\n");
|
||||
#else
|
||||
sb.safePrintf("<center><a href=/><img border=0 width=500 height=122 src=/logo-med.jpg></a>\n");
|
||||
#endif
|
||||
sb.safePrintf("<br><br>\n");
|
||||
sb.safePrintf("<br><br><br>\n");
|
||||
sb.safePrintf("<a href=/>web</a> <a href=/seo>seo</a> <b>directory</b> \n");
|
||||
|
@ -1,7 +1,7 @@
|
||||
#include "gb-include.h"
|
||||
|
||||
#define X_DISPLAY_MISSING 1
|
||||
#include <plotter.h>
|
||||
//#include <plotter.h>
|
||||
//#include <fstream.h>
|
||||
#include <math.h>
|
||||
|
||||
@ -109,7 +109,7 @@ bool sendPageStatsdb ( TcpSocket *s, HttpRequest *r ) {
|
||||
// st->m_columns = DEF_COLUMNS;
|
||||
|
||||
if ( st->m_now )
|
||||
st->m_startDate = (time_t)getTimeGlobal();
|
||||
st->m_startDate = (time_t)getTimeGlobalNoCore();
|
||||
|
||||
st->m_startDateR = st->m_startDate;
|
||||
st->m_endDateR = st->m_endDate;
|
||||
|
@ -220,7 +220,7 @@ static WebPage s_pages[] = {
|
||||
// "get queries a url matches",
|
||||
// sendPageMatchingQueries , 2 } ,
|
||||
|
||||
#ifdef _PRIVATESTUFF_
|
||||
#ifdef PRIVATESTUFF
|
||||
{ PAGE_SEO, "seo",0,"seo" , 0 , 0 ,
|
||||
"SEO info",
|
||||
sendPageSEO , 2 } ,
|
||||
@ -415,7 +415,7 @@ bool Pages::sendDynamicReply ( TcpSocket *s , HttpRequest *r , long page ) {
|
||||
if ( page == PAGE_ROOT ) publicPage = true;
|
||||
// do not deny /NM/Albuquerque urls
|
||||
if ( page == PAGE_RESULTS ) publicPage = true;
|
||||
#ifdef _PRIVATESTUFF_
|
||||
#ifdef PRIVATESTUFF
|
||||
if ( page == PAGE_SEO ) publicPage = true;
|
||||
#endif
|
||||
if ( page == PAGE_ADDURL ) publicPage = true;
|
||||
|
2
Pages.h
2
Pages.h
@ -358,7 +358,7 @@ enum {
|
||||
PAGE_REINDEX ,
|
||||
PAGE_INJECT ,
|
||||
//PAGE_KEYWORDS ,
|
||||
#ifdef _PRIVATESTUFF_
|
||||
#ifdef PRIVATESTUFF
|
||||
PAGE_SEO ,
|
||||
#endif
|
||||
PAGE_ACCESS , //40
|
||||
|
@ -7995,6 +7995,10 @@ void Parms::init ( ) {
|
||||
m->m_def = "1";
|
||||
m++;
|
||||
|
||||
/*
|
||||
|
||||
MDW: use the "onsite" directive in the url filters page now...
|
||||
|
||||
m->m_title = "only spider links from same host";
|
||||
m->m_desc = "If this is true the spider will only harvest links "
|
||||
"to pages that are contained on the same host as the page "
|
||||
@ -8012,6 +8016,7 @@ void Parms::init ( ) {
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
*/
|
||||
|
||||
m->m_title = "do not re-add old outlinks more than this many days";
|
||||
m->m_desc = "If less than this many days have elapsed since the "
|
||||
|
@ -2712,7 +2712,7 @@ void checkKernelErrors( int fd, void *state ){
|
||||
void PingServer::sendEmailMsg ( long *lastTimeStamp , char *msg ) {
|
||||
// leave if we already sent and alert within 5 mins
|
||||
//static long s_lasttime = 0;
|
||||
long now = getTimeGlobal();
|
||||
long now = getTimeGlobalNoCore();
|
||||
if ( now - *lastTimeStamp < 5*60 ) return;
|
||||
// prepare msg to send
|
||||
//Host *h0 = g_hostdb.getHost ( 0 );
|
||||
|
@ -678,6 +678,8 @@ bool Proxy::handleRequest (TcpSocket *s){
|
||||
userId32b = ui->m_userId32;
|
||||
break;
|
||||
}
|
||||
// code is invalid if is not for an old client
|
||||
if ( userId32b == 0 ) code = NULL;
|
||||
}
|
||||
// if we have both a code and userid, check to see if it is correct
|
||||
if ( code ) {
|
||||
@ -3545,7 +3547,7 @@ bool Proxy::hitCreditCard ( StateUser *su ) {
|
||||
//
|
||||
// INSERT YOUR secret transaction/api key for authorize.net
|
||||
//
|
||||
#ifdef _PRIVATESTUFF_
|
||||
#ifdef PRIVATESTUFF
|
||||
url.safePrintf("&x_tran_key=%s",g_secret_tran_key);
|
||||
url.safePrintf("&x_login=%s",g_secret_api_key);
|
||||
#else
|
||||
|
17
Rdb.cpp
17
Rdb.cpp
@ -1715,6 +1715,23 @@ bool Rdb::needsDump ( ) {
|
||||
if ( m_useTree) {if(m_tree.is90PercentFull() ) return true;}
|
||||
else if(m_buckets.needsDump() ) return true;
|
||||
|
||||
// if adding to doledb and it has been > 1 day then force a dump
|
||||
// so that all the negative keys in the tree annihilate with the
|
||||
// keys on disk to make it easier to read a doledb list
|
||||
if ( m_rdbId != RDB_DOLEDB ) return false;
|
||||
|
||||
// set this if not valid
|
||||
//static long s_lastDumpTryTime = -1;
|
||||
//if ( s_lastDumpTryTime == -1 )
|
||||
// s_lastDumpTryTime = getTimeLocal();
|
||||
// try to dump doledb every 24 hrs
|
||||
//long now = getTimeLocal();
|
||||
//if ( now - s_lastDumpTryTime >= 3600*24 ) return true;
|
||||
|
||||
// or dump doledb if a ton of negative recs...
|
||||
if ( m_tree.getNumNegativeKeys() > 50000 ) return true;
|
||||
|
||||
// otherwise, no need to dump doledb just yet
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -797,6 +797,7 @@ m if (! cr->hasSearchPermission ( sock, encapIp ) ) {
|
||||
}
|
||||
|
||||
|
||||
m_familyFilter = r->getLong("ff",0);
|
||||
|
||||
long codeLen;
|
||||
char *code = r->getString ("code",&codeLen,NULL);
|
||||
|
21
Spider.cpp
21
Spider.cpp
@ -3323,6 +3323,7 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
// . if priority is -1 that means try next priority
|
||||
// . DO NOT reset the whole scan. that was what was happening
|
||||
// when we just had "goto loop;" here
|
||||
// . this means a reset above!!!
|
||||
if ( m_sc->m_pri == -1 ) return;
|
||||
// bail if waiting for lock reply, no point in reading more
|
||||
if ( m_msg12.m_gettingLocks ) return;
|
||||
@ -3377,13 +3378,25 @@ bool SpiderLoop::gotDoledbList2 ( ) {
|
||||
|
||||
// bail if list is empty
|
||||
if ( m_list.getListSize() <= 0 ) {
|
||||
if ( g_conf.m_logDebugSpider )
|
||||
log("spider: resetting doledb priority pri=%li",
|
||||
m_sc->m_pri);
|
||||
//if ( g_conf.m_logDebugSpider )
|
||||
// log("spider: resetting doledb priority pri=%li",
|
||||
// m_sc->m_pri);
|
||||
// trigger a reset
|
||||
m_sc->m_pri = -1;
|
||||
//m_sc->m_pri = -1;
|
||||
// . let the sleep timer init the loop again!
|
||||
// . no, just continue the loop
|
||||
//return true;
|
||||
// this priority is EMPTY, try next
|
||||
m_sc->m_pri = m_sc->m_pri - 1;
|
||||
// how can this happen?
|
||||
if ( m_sc->m_pri < -1 ) m_sc->m_pri = -1;
|
||||
// all done if priority is negative, it will start over
|
||||
// at the top most priority, we've completed a round
|
||||
if ( m_sc->m_pri < 0 ) return true;
|
||||
// set to next priority otherwise
|
||||
//m_sc->m_nextDoledbKey=g_doledb.makeFirstKey2 ( m_sc->m_pri );
|
||||
m_sc->m_nextDoledbKey = m_sc->m_nextKeys [m_sc->m_pri];
|
||||
// and load that list from doledb for that priority
|
||||
return true;
|
||||
}
|
||||
|
||||
|
25
Spider.h
25
Spider.h
@ -39,10 +39,29 @@ bool updateCrawlInfo ( CollectionRec *cr ,
|
||||
// There are SpiderRequests and SpiderReplies in Spiderdb. they now use
|
||||
// 16 byte keys (key128_t). when a new spiderdb rec is added to spiderdb
|
||||
// in Rdb.cpp we call addSpiderRequest() or addSpiderReply(). then
|
||||
// that rec might be added to the waiting tree. then the waiting tree
|
||||
// that rec might be added to the waiting tree. the waiting tree
|
||||
// is scanned for IPs that have a SpiderRequest whose spiderTime is
|
||||
// <= now and we grab ONE from spiderdb and add to doledb. any host
|
||||
// in our group can spider a request in doledb, but they must lock it
|
||||
// <= now and we grab ONE from spiderdb and add to doledb. we try to
|
||||
// store every IP (firstIp) we have in Spiderdb into the waiting tree,
|
||||
// but the IP is also paired up with a spider priority representing a
|
||||
// SpiderRequest in that priority from that IP. then the entries in
|
||||
// waiting tree are sorted by scheduled spider time. waiting tree does not
|
||||
// even store the SpiderRequest, but just the scheduling info for each
|
||||
// ip/priority pair. then we can quickly scan waiting tree to find
|
||||
// the next ip/priority ready for spidering, so we read the SpiderRequest
|
||||
// from spiderdb for the ip/priority/time we choose and add that
|
||||
// SpiderRequest to doledb to be spidered.
|
||||
//
|
||||
// The waiting tree is populated at
|
||||
// startup by scanning spiderdb, which might take a while to complete,
|
||||
// so it is running in the background while the gb server is up. it will
|
||||
// log "10836674298 spiderdb bytes scanned for waiting tree re-population"
|
||||
// periodically in the log as it tries to do a complete spiderdb scan
|
||||
// every 24 hours. it should not be necessary to scan spiderdb more than
|
||||
// once, but it seems we are leaking ips somehow so we do the follow-up
|
||||
// scans for now. (see populateWaitingTreeFromSpiderdb() in Spider.cpp)
|
||||
//
|
||||
// any host in our group can spider a request in doledb, but they must lock it
|
||||
// by calling getLocks() first and all hosts in the group must grant
|
||||
// them the lock for that url otherwise they remove all the locks and
|
||||
// try again on another spiderRequest in doledb.
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include <errno.h>
|
||||
#include "Stats.h"
|
||||
#define X_DISPLAY_MISSING 1
|
||||
#include <plotter.h>
|
||||
//#include <plotter.h>
|
||||
#include <math.h>
|
||||
#include "Conf.h"
|
||||
#include "PingServer.h"
|
||||
|
@ -1025,7 +1025,11 @@ void UdpServer::process_ass ( long long now , long maxNiceness) {
|
||||
// bail if no main sock
|
||||
if ( m_sock < 0 ) return ;
|
||||
|
||||
long long startTimer = gettimeofdayInMillisecondsLocal();
|
||||
// if we call this while in the sighandler it crashes since
|
||||
// gettimeofdayInMillisecondsLocal() is not async safe
|
||||
long long startTimer;
|
||||
if ( ! g_inSigHandler )
|
||||
startTimer = gettimeofdayInMillisecondsLocal();
|
||||
bigloop:
|
||||
// . if we're real time, and not in a sig handler, turn 'em off
|
||||
// . readSock() and doSending() are not Async Signal Safe (ass)
|
||||
@ -1088,7 +1092,11 @@ void UdpServer::process_ass ( long long now , long maxNiceness) {
|
||||
}
|
||||
callBottom:
|
||||
if(maxNiceness < 1) return;
|
||||
long long elapsed = gettimeofdayInMillisecondsLocal() - startTimer;
|
||||
// if we call this while in the sighandler it crashes since
|
||||
// gettimeofdayInMillisecondsLocal() is not async safe
|
||||
long long elapsed = 0;
|
||||
if ( ! g_inSigHandler )
|
||||
elapsed = gettimeofdayInMillisecondsLocal() - startTimer;
|
||||
if(elapsed < 10) {
|
||||
// we did not call any, so resort to nice callbacks
|
||||
makeCallbacks_ass ( /*niceness level*/ 1 ) ;
|
||||
|
22
fctypes.cpp
22
fctypes.cpp
@ -1817,10 +1817,15 @@ long long gettimeofdayInMillisecondsGlobal() {
|
||||
return gettimeofdayInMillisecondsSynced();
|
||||
}
|
||||
|
||||
#include "Threads.h"
|
||||
|
||||
long long gettimeofdayInMillisecondsSynced() {
|
||||
// if in a sig handler then return g_now
|
||||
//if ( g_inSigHandler ) return g_nowGlobal;
|
||||
if ( g_inSigHandler ) { char *xx = NULL; *xx = 0; }
|
||||
// i find that a pthread can call this function even though
|
||||
// a signal handler is underway in the main thread!
|
||||
if ( g_inSigHandler && ! g_threads.amThread() ) {
|
||||
char *xx = NULL; *xx = 0; }
|
||||
// sanity check
|
||||
if ( ! isClockInSync() ) { char *xx = NULL; *xx = 0; }
|
||||
//if ( ! g_clockInSync )
|
||||
@ -1841,7 +1846,10 @@ long long gettimeofdayInMillisecondsSynced() {
|
||||
long long gettimeofdayInMillisecondsGlobalNoCore() {
|
||||
// if in a sig handler then return g_now
|
||||
//if ( g_inSigHandler ) return g_nowGlobal;
|
||||
if ( g_inSigHandler ) { char *xx = NULL; *xx = 0; }
|
||||
// i find that a pthread can call this function even though
|
||||
// a signal handler is underway in the main thread!
|
||||
if ( g_inSigHandler && ! g_threads.amThread() ) {
|
||||
char *xx = NULL; *xx = 0; }
|
||||
// sanity check
|
||||
//if ( ! g_clockInSync ) { char *xx = NULL; *xx = 0; }
|
||||
//if ( ! g_clockInSync )
|
||||
@ -1873,7 +1881,10 @@ uint64_t gettimeofdayInMicroseconds(void) {
|
||||
long long gettimeofdayInMilliseconds() {
|
||||
// if in a sig handler then return g_now
|
||||
//if ( g_inSigHandler ) return g_now;
|
||||
if ( g_inSigHandler ) { char *xx = NULL; *xx = 0; }
|
||||
// i find that a pthread can call this function even though
|
||||
// a signal handler is underway in the main thread!
|
||||
if ( g_inSigHandler && ! g_threads.amThread() ) {
|
||||
char *xx = NULL; *xx = 0; }
|
||||
// this isn't async signal safe...
|
||||
struct timeval tv;
|
||||
//g_loop.disableTimer();
|
||||
@ -1900,7 +1911,10 @@ time_t getTime () {
|
||||
time_t getTimeLocal () {
|
||||
// if in a sig handler then return g_now/1000
|
||||
//if ( g_inSigHandler ) return (time_t)(g_now / 1000);
|
||||
if ( g_inSigHandler ) { char *xx = NULL; *xx = 0; }
|
||||
// i find that a pthread can call this function even though
|
||||
// a signal handler is underway in the main thread!
|
||||
if ( g_inSigHandler && ! g_threads.amThread() ) {
|
||||
char *xx = NULL; *xx = 0; }
|
||||
// get time now
|
||||
unsigned long now = gettimeofdayInMilliseconds() / 1000;
|
||||
// and adjust it
|
||||
|
@ -141,6 +141,7 @@ void setS99Local () {
|
||||
// make raid and mount if not already mounted
|
||||
// a lot of times after reboot something fails!
|
||||
fprintf(fd,
|
||||
"raidstop /dev/md0\n"
|
||||
"mkraid -c /etc/raidtab --really-force /dev/md0\n"
|
||||
"mount /dev/md0\n"
|
||||
"\n"
|
||||
@ -725,7 +726,7 @@ void setEtcNetworkInterfaces() {
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf("ip for %s is %s\n",g_name,ips);
|
||||
fprintf(stderr,"ip for %s is %s\n",g_name,ips);
|
||||
|
||||
fprintf ( fd ,
|
||||
"auto lo\n"
|
||||
|
20
main.cpp
20
main.cpp
@ -2781,7 +2781,7 @@ int main ( int argc , char *argv[] ) {
|
||||
}
|
||||
|
||||
// the query log split
|
||||
#ifdef _PRIVATESTUFF_
|
||||
#ifdef PRIVATESTUFF
|
||||
if ( ! loadQueryLog() ) return 1;
|
||||
#endif
|
||||
|
||||
@ -3105,7 +3105,7 @@ int main ( int argc , char *argv[] ) {
|
||||
log("db: Failed to init merge sleep callback.");
|
||||
|
||||
// SEO MODULE
|
||||
#ifdef _PRIVATESTUFF_
|
||||
#ifdef PRIVATESTUFF
|
||||
if ( ! g_loop.registerSleepCallback(2000,(void *)1,runSEOQueryLoop))
|
||||
log("db: Failed to register seo query loop");
|
||||
#endif
|
||||
@ -4802,7 +4802,7 @@ bool registerMsgHandlers1(){
|
||||
// to make things compile we need to declare this stuff since the seo
|
||||
// module is not in the open source version
|
||||
//
|
||||
#ifndef _PRIVATESTUFF_
|
||||
#ifndef PRIVATESTUFF
|
||||
SafeBuf g_qbuf;
|
||||
long g_qbufNeedSave = false;
|
||||
bool sendPageSEO(TcpSocket *, HttpRequest *) { return true;}
|
||||
@ -4835,7 +4835,7 @@ bool registerMsgHandlers2(){
|
||||
|
||||
if ( ! registerHandler4 () ) return false;
|
||||
|
||||
#ifdef _PRIVATESTUFF_
|
||||
#ifdef PRIVATESTUFF
|
||||
// seo module handlers
|
||||
if(! g_udpServer.registerHandler(0x8e,handleRequest8e)) return false;
|
||||
if(! g_udpServer.registerHandler(0x4f,handleRequest4f)) return false;
|
||||
@ -5851,6 +5851,18 @@ void dumpDoledb (char *coll,long startFileNum,long numFiles,bool includeTree){
|
||||
if ( (drec[0] & 0x01) == 0x00 ) {char *xx=NULL;*xx=0; }
|
||||
// get spider rec in it
|
||||
char *srec = drec + 12 + 4;
|
||||
// print doledb info first then spider request
|
||||
fprintf(stdout,"dolekey=%s (n1=%lu n0=%llu) "
|
||||
"pri=%li "
|
||||
"spidertime=%lu "
|
||||
"uh48=0x%llx\n",
|
||||
KEYSTR(&k,12),
|
||||
k.n1,
|
||||
k.n0,
|
||||
(long)g_doledb.getPriority(&k),
|
||||
g_doledb.getSpiderTime(&k),
|
||||
g_doledb.getUrlHash48(&k));
|
||||
fprintf(stdout,"spiderkey=");
|
||||
// print it
|
||||
g_spiderdb.print ( srec );
|
||||
// the \n
|
||||
|
BIN
pdftohtml
BIN
pdftohtml
Binary file not shown.
18
types.h
18
types.h
@ -944,15 +944,15 @@ inline char *KEYMIN() { return "\0\0\0\0"
|
||||
"\0\0\0\0"
|
||||
"\0\0\0\0"
|
||||
"\0\0\0\0"; };
|
||||
static char s_foo[] = { 0xff , 0xff , 0xff , 0xff ,
|
||||
0xff , 0xff , 0xff , 0xff ,
|
||||
0xff , 0xff , 0xff , 0xff ,
|
||||
0xff , 0xff , 0xff , 0xff ,
|
||||
0xff , 0xff , 0xff , 0xff ,
|
||||
0xff , 0xff , 0xff , 0xff ,
|
||||
0xff , 0xff , 0xff , 0xff ,
|
||||
0xff , 0xff , 0xff , 0xff };
|
||||
inline char *KEYMAX() { return s_foo; };
|
||||
static int s_foo[] = { 0xffffffff ,
|
||||
0xffffffff ,
|
||||
0xffffffff ,
|
||||
0xffffffff ,
|
||||
0xffffffff ,
|
||||
0xffffffff ,
|
||||
0xffffffff ,
|
||||
0xffffffff };
|
||||
inline char *KEYMAX() { return (char *)s_foo; };
|
||||
|
||||
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user