Conflicts:
	MsgC.cpp
This commit is contained in:
Zak Betz 2015-09-14 00:53:40 -06:00
commit 78125c809b
14 changed files with 96 additions and 34 deletions

@ -35,6 +35,7 @@ BigFile::~BigFile () {
BigFile::BigFile () {
m_permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
m_flags = O_RDWR ; // | O_DIRECT;
m_usePartFiles = true;
// NULLify all ptrs to files
//for ( int32_t i = 0 ; i < MAX_PART_FILES ; i++ ) m_files[i] = NULL;
m_maxParts = 0;
@ -74,6 +75,8 @@ bool BigFile::set ( char *dir , char *baseFilename , char *stripeDir ) {
m_dir .setLabel("bfd");
m_baseFilename.setLabel("bfbf");
m_usePartFiles = true;
// use this 32 byte char buf to avoid a malloc if possible
m_baseFilename.setBuf (m_tmpBaseBuf,32,0,false);
@ -267,12 +270,12 @@ static int64_t s_vfd = 0;
// do not use part files for this open so we can open regular really >2GB
// sized files with it
bool BigFile::open2 ( int flags ,
void *pc ,
int64_t maxFileSize ,
int permissions ) {
return open ( flags , pc , maxFileSize , permissions , false );
}
// bool BigFile::open2 ( int flags ,
// void *pc ,
// int64_t maxFileSize ,
// int permissions ) {
// return open ( flags , pc , maxFileSize , permissions , false );
// }
// . overide File::open so we can set m_numParts
// . set maxFileSize when opening a new file for writing and using
@ -282,15 +285,14 @@ bool BigFile::open ( int flags ,
//class DiskPageCache *pc ,
void *pc ,
int64_t maxFileSize ,
int permissions ,
bool usePartFiles ) {
int permissions ) {
m_flags = flags;
//m_pc = pc;
m_permissions = permissions;
m_isClosing = false;
// this is true except when parsing big warc files
m_usePartFiles = usePartFiles;
m_usePartFiles = true;//usePartFiles;
// . init the page cache for this vfd
// . this returns our "virtual fd", not the same as File::m_vfd
// . returns -1 and sets g_errno on failure
@ -1378,10 +1380,17 @@ bool readwrite_r ( FileState *fstate , ThreadEntry *t ) {
log("disk: Read of %"INT32" bytes at offset %"INT64" "
" failed because file is too short for that "
"offset? Our fd was probably stolen from us by another "
"thread. Will retry. error=%s.",
"thread. fd1=%i fd2=%i len=%i filenum=%i "
"localoffset=%i. usepart=%i error=%s.",
(int32_t)len,fstate->m_offset,
//fstate->m_this->getDir(),
//fstate->m_this->getFilename(),
fstate->m_fd1,
fstate->m_fd2,
len,
filenum,
localOffset,
fstate->m_usePartFiles,
mstrerror(errno));
errno = EBADENGINEER;
return false; // log("disk::read/write: offset too big");

@ -143,17 +143,17 @@ class BigFile {
void *pc = NULL ,
int64_t maxFileSize = -1 ,
int permissions =
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ,
bool usePartFiles = true );
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
//bool usePartFiles = true );
// this will set usepartfiles to false! so use this to open large
// warc or arc files
bool open2 ( int flags ,
//class DiskPageCache *pc = NULL ,
void *pc = NULL ,
int64_t maxFileSize = -1 ,
int permissions =
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
//bool open2 ( int flags ,
// //class DiskPageCache *pc = NULL ,
// void *pc = NULL ,
// int64_t maxFileSize = -1 ,
// int permissions =
// S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );

@ -1700,13 +1700,24 @@ collnum_t Collectiondb::reserveCollNum ( ) {
return next;
}
// collnum_t is signed right now because we use -1 to indicate a
// bad collnum.
int32_t scanned = 0;
// search for an empty slot
for ( int32_t i = m_wrapped ; i < m_numRecs ; i++ ) {
for ( int32_t i = m_wrapped ; ; i++ ) {
// because collnum_t is 2 bytes, signed, limit this here
if ( i > 0x7fff ) i = 0;
// how can this happen?
if ( i < 0 ) i = 0;
// if we scanned the max # of recs we could have, we are done
if ( ++scanned >= m_numRecs ) break;
// skip if this is in use
if ( m_recs[i] ) continue;
// start after this one next time
m_wrapped = i+1;
// note it
log("colldb: returning wrapped collnum of %"INT32"",(int32_t)i);
log("colldb: returning wrapped collnum "
"of %"INT32"",(int32_t)i);
return (collnum_t)i;
}

1
Conf.h

@ -682,6 +682,7 @@ class Conf {
bool m_diffbotMsg13Hack ;
bool m_logDebugUrlAttempts ;
bool m_logDebugTcp ;
bool m_logDebugTcpBuf ;
bool m_logDebugThread ;
bool m_logDebugTimedb ;
bool m_logDebugTitle ;

@ -5,6 +5,7 @@
#include "sort.h"
#include "XmlDoc.h" // score32to8()
#include "Rebalance.h"
#include "Process.h"
Linkdb g_linkdb;
Linkdb g_linkdb2;
@ -1130,6 +1131,12 @@ bool Msg25::doReadLoop ( ) {
ms,m_site,m_url,m_docId,KEYSTR(&startKey,LDBKS));
}
if ( g_process.m_mode == EXIT_MODE ) {
log("linkdb: shutting down. exiting link text loop.");
g_errno = ESHUTTINGDOWN;
return false;
}
m_gettingList = true;
CollectionRec *cr = g_collectiondb.getRec ( m_collnum );

@ -88,8 +88,8 @@ bool MsgC::getIp(char *hostname , int32_t hostnameLen ,
if ( g_dns.isInCache ( key , ip ) ) {
if ( *ip == 3 ) { char *xx=NULL;*xx=0; }
// debug msg
// log(LOG_DEBUG, "dns::getIp: %s (key=%"UINT64") has ip=%s in cache!!!",
// tmp,key.n0,iptoa(*ip));
//log(LOG_DEBUG, "dns::getIp: %s (key=%"UINT64") has ip=%s in cache!!!",
// tmp,key.n0,iptoa(*ip));
return true;
}

@ -19750,6 +19750,16 @@ void Parms::init ( ) {
m->m_obj = OBJ_CONF;
m++;
m->m_title = "log debug tcp buffer messages";
m->m_cgi = "ldtb";
m->m_off = (char *)&g_conf.m_logDebugTcpBuf - g;
m->m_type = TYPE_BOOL;
m->m_def = "0";
m->m_priv = 1;
m->m_page = PAGE_LOG;
m->m_obj = OBJ_CONF;
m++;
m->m_title = "log debug thread messages";
m->m_cgi = "ldth";
m->m_off = (char *)&g_conf.m_logDebugThread - g;

@ -1865,7 +1865,7 @@ Profiler::printRealTimeInfo(SafeBuf *sb,
int fd = open ( filename , O_RDWR | O_CREAT , S_IRWXU );
if ( fd < 0 ) {
sb->safePrintf("FAILED TO OPEN %s for writing: %s"
,ff.getBufStart(),strerror(errno));
,ff.getBufStart(),mstrerror(errno));
return false;
}
for ( ; ip < ipEnd ; ip += sizeof(uint64_t) ) {
@ -1892,6 +1892,13 @@ Profiler::printRealTimeInfo(SafeBuf *sb,
// restrict to top 100 lines
char *x = out.getBufStart();
if ( ! x ) {
sb->safePrintf("FAILED TO READ trash/output.txt: %s"
,mstrerror(g_errno));
return false;
}
int lineCount = 0;
for ( ; *x ; x++ ) {
if ( *x != '\n' ) continue;

@ -148,6 +148,7 @@ bool RdbScan::setRead ( BigFile *file ,
// ensure we don't mess around
m_fstate.m_allocBuf = NULL;
m_fstate.m_buf = NULL;
//m_fstate.m_usePartFiles = true;
// debug msg
//log("diskOff=%"INT64" nb=%"INT32"",offset,bytesToRead);
//if ( offset == 16386 && bytesToRead == 16386 )

@ -13620,10 +13620,10 @@ void gotCrawlInfoReply ( void *state , UdpSlot *slot ) {
// just copy into the stats buf
if ( ! cr->m_crawlInfoBuf.getBufStart() ) {
int32_t need = sizeof(CrawlInfo) * g_hostdb.m_numHosts;
cr->m_crawlInfoBuf.setLabel("cibuf");
cr->m_crawlInfoBuf.reserve(need);
// in case one was udp server timed out or something
cr->m_crawlInfoBuf.zeroOut();
cr->m_crawlInfoBuf.setLabel("cibuf");
}
CrawlInfo *cia = (CrawlInfo *)cr->m_crawlInfoBuf.getBufStart();

@ -5,7 +5,10 @@
#include "HttpServer.h"
#include "SpiderProxy.h"
#define LOADPOINT_EXPIRE_MS (10*60*1000)
//#define LOADPOINT_EXPIRE_MS (10*60*1000)
// make it 15 seconds not 10 minutes otherwise it gets too full with dup
// keys and really clogs things up
#define LOADPOINT_EXPIRE_MS (15*1000)
//
// BASIC DETAILS
@ -927,12 +930,12 @@ void handleRequest54 ( UdpSlot *udpSlot , int32_t niceness ) {
// and the loadbucket id
//*(int32_t *)p = bb.m_id; p += 4;
int32_t sanityCount = s_loadTable.getNumSlots();
//int32_t sanityCount = 0;//s_loadTable.getNumSlots();
// top:
// now remove old entries from the load table. entries that
// have completed and have a download end time more than 10 mins ago
for ( int32_t i = 0 ; i < s_loadTable.getNumSlots() ; i++ ) {
if ( sanityCount-- < 0 ) break;
// skip if empty
if ( ! s_loadTable.m_flags[i] ) continue;
// get the bucket
@ -941,8 +944,12 @@ void handleRequest54 ( UdpSlot *udpSlot , int32_t niceness ) {
if ( pp->m_downloadEndTimeMS == 0LL ) continue;
// delta t
int64_t took = nowms - pp->m_downloadEndTimeMS;
// < 10 mins?
// < 10 mins? now it's < 15 seconds to prevent clogging.
if ( took < LOADPOINT_EXPIRE_MS ) continue;
// 100 at a time
//if ( sanityCount++ > 100 ) break;
// ok, its too old, nuke it to save memory
s_loadTable.removeSlot(i);
// the keys might have buried us but we really should not
@ -950,6 +957,7 @@ void handleRequest54 ( UdpSlot *udpSlot , int32_t niceness ) {
// should we? TODO: figure it out. if we miss a few it's not
// a big deal.
i--;
//goto top;
}
// send the proxy ip/port/LBid back to user
@ -1041,6 +1049,7 @@ bool initSpiderProxyStuff() {
128,
NULL,
0,
// this slows us down
true, // allow dups?
MAX_NICENESS,
"lbtab",

@ -598,7 +598,7 @@ bool TcpServer::sendMsg ( int32_t ip ,
// we think that they are using an sd used by a streaming socket,
// who closed, but then proceed to use TcpSocket class as if he
// had not closed it.
if ( 1==2 && g_hostdb.m_hostId == 0 ) {
if ( g_conf.m_logDebugTcpBuf ) {
SafeBuf sb;
sb.safePrintf("tcp: open newsd=%i sendbuf=",s->m_sd);
sb.safeTruncateEllipsis (sendBuf,sendBufSize,200);
@ -2276,7 +2276,7 @@ void TcpServer::destroySocket ( TcpSocket *s ) {
// 0 is the FD for stdin so i don't know how that is happening.
if ( sd != 0 ) cret = ::close ( sd );
if ( 1==2 && g_hostdb.m_hostId == 0 ) {
if ( g_conf.m_logDebugTcpBuf ) {
SafeBuf sb;
sb.safePrintf("tcp: closing sd=%i bytessent=%i "
"sendbufused=%i streaming=%i "
@ -2619,7 +2619,7 @@ TcpSocket *TcpServer::acceptSocket ( ) {
// we think that they are using an sd used by a streaming socket,
// who closed, but then proceed to use TcpSocket class as if he
// had not closed it.
if ( 1==2 && g_hostdb.m_hostId == 0 ) {
if ( g_conf.m_logDebugTcpBuf ) {
SafeBuf sb;
sb.safePrintf("tcp: accept newsd=%i incoming req",newsd);
//sb.safeTruncateEllipsis (sendBuf,sendBufSize,200);

@ -226,7 +226,7 @@ void XmlDoc::reset ( ) {
if ( ! msg7 ) continue;
if(msg7->m_inUse) {
log("build: archive: reseting xmldoc when msg7s are outstanding");
}
mdelete ( msg7 , sizeof(Msg7) , "xdmsg7" );
delete ( msg7 );
@ -3353,6 +3353,10 @@ void doneInjectingArchiveRec ( void *state ) {
xd->m_numInjectionsOut--;
log("build: archive: injection thread returned. %"INT32" out now.",
xd->m_numInjectionsOut);
// reset g_errno so it doesn't error out in ::indexDoc() when
// we are injecting a ton of these msg7s and then xmldoc ends up
// getting reset and when a msg7 reply comes back in, we core
g_errno = 0;
xd->m_masterLoop ( xd );
}
@ -19359,8 +19363,10 @@ BigFile *XmlDoc::getUtf8ContentInFile ( int64_t *fileSizeArg ) {
m_fileSize = m_file.getFileSize();
m_fileValid = true;
*fileSizeArg = m_fileSize;
// open2() has usepartfiles = false!!!
m_file.open2(O_RDONLY);
m_file.open(O_RDONLY);
// explicitly set it to false now to make it harder for
// it not to be true because that messes things up
m_file.m_usePartFiles = false;
return &m_file;
}

@ -27,6 +27,7 @@ bool sendPageSEO(TcpSocket *s, HttpRequest *hr) {return true;}
//SafeBuf g_qbuf;
bool g_recoveryMode;
int32_t g_recoveryLevel;
int g_inMemcpy;