fix file descriptor leak in Dir class.

try to fix core from Thread getting SIGALRM.
try to set NOFILES to 1024 at startup in case
more are allowed.
This commit is contained in:
Matt Wells 2013-11-19 13:41:56 -08:00
parent 35d22bd9aa
commit c669f8c138
9 changed files with 55 additions and 9 deletions

@ -247,7 +247,12 @@ bool Collectiondb::addRec ( char *coll , char *cpc , long cpclen , bool isNew ,
// MDW: ensure not created on disk since time of last load
char dname[512];
sprintf(dname, "%scoll.%s.%li/",g_hostdb.m_dir,coll,i);
if ( isNew && opendir ( dname ) ) {
DIR *dir = NULL;
if ( isNew )
dir = opendir ( dname );
if ( dir )
closedir ( dir );
if ( isNew && dir ) {
g_errno = EEXIST;
return log("admin: Trying to create collection %s but "
"directory %s already exists on disk.",coll,dname);
@ -911,7 +916,10 @@ bool Collectiondb::resetColl ( char *coll , WaitEntry *we , bool purgeSeeds) {
g_hostdb.m_dir,
cr->m_coll,
(long)newCollnum);
if ( opendir ( dname ) ) {
DIR *dir = opendir ( dname );
if ( dir )
closedir ( dir );
if ( dir ) {
//g_errno = EEXIST;
log("admin: Trying to create collection %s but "
"directory %s already exists on disk.",coll,dname);

@ -5,6 +5,7 @@
Dir::Dir ( ) {
m_dirname = NULL;
m_dir = NULL;
m_needsClose = false;
}
@ -40,7 +41,8 @@ bool Dir::set ( char *dirname ) {
}
bool Dir::close ( ) {
if ( m_dir ) closedir ( m_dir );
if ( m_dir && m_needsClose ) closedir ( m_dir );
m_needsClose = false;
return true;
}
@ -56,6 +58,7 @@ bool Dir::open ( ) {
if ( ! m_dir )
return log("disk: opendir(%s) : %s",
m_dirname,strerror( g_errno ) );
m_needsClose = true;
return true;
}

1
Dir.h

@ -49,6 +49,7 @@ class Dir {
char *m_dirname;
DIR *m_dir;
bool m_needsClose;
};
#endif

4
Rdb.h

@ -10,7 +10,7 @@
#include "RdbMem.h"
#include "RdbCache.h"
#include "RdbDump.h"
#include "Dir.h"
//#include "Dir.h"
#include "RdbBuckets.h"
// . each Rdb instance has an ID
@ -353,7 +353,7 @@ class Rdb {
bool m_dedup;
long m_fixedDataSize;
Dir m_dir;
//Dir m_dir;
char m_dbname [32];
long m_dbnameLen;

@ -610,6 +610,8 @@ bool RdbBase::setFiles ( ) {
return false;
}
m_dir.close();
if ( ! converting ) return true;
// now if we are converting old titledb names to new...

@ -1207,6 +1207,8 @@ bool SpiderColl::makeDoleIPTable ( ) {
return true;
}
// . now use collnum as top long.
// . it'd be nice to have a growTree() function
key_t makeWaitingTreeKey ( uint64_t spiderTimeMS , long firstIp ) {
// sanity
if ( ((long long)spiderTimeMS) < 0 ) { char *xx=NULL;*xx=0; }

@ -780,7 +780,10 @@ TcpSocket *TcpServer::getNewSocket ( ) {
log("tcp: using statically linked libc that only supports "
"an fd of up to %li, but got an fd = %li. fd_set is "
"only geared for 1024 bits of file descriptors for "
"doing poll() in Loop.cpp",
"doing poll() in Loop.cpp. Ensure 'ulimit -a' limits "
"open files to 1024. "
"Check open fds using ls /proc/<gb-pid>/fds/ and ensure "
"they are all BELOW 1024.",
(long)MAX_NUM_FDS,(long)sd);
char *xx=NULL;*xx=0;
}

@ -2025,9 +2025,24 @@ bool ThreadQueue::launchThread ( ThreadEntry *te ) {
// assume it does not go through
t->m_needsJoin = false;
// pthread inherits our sigmask, so don't let it handle sigalrm
// signals in Loop.cpp, it'll screw things up. that handler
// is only meant to be called by the main process. if we end up
// double calling it, this thread may think g_callback is non-null
// then it gets set to NULL, then the thread cores! seen it...
sigset_t sigs;
sigemptyset ( &sigs );
sigaddset ( &sigs , SIGALRM );
if ( sigprocmask ( SIG_BLOCK , &sigs , NULL ) < 0 )
log("threads: failed to block sig");
// this returns 0 on success, or the errno otherwise
g_errno = pthread_create ( &t->m_joinTid , &s_attr, startUp2 , t) ;
if ( sigprocmask ( SIG_UNBLOCK , &sigs , NULL ) < 0 )
log("threads: failed to unblock sig");
#endif
// we're back from pthread_create

@ -2453,9 +2453,21 @@ int main ( int argc , char *argv[] ) {
if ( setrlimit(RLIMIT_CORE,&lim) )
log("db: setrlimit: %s.", mstrerror(errno) );
// limit fds
//lim.rlim_cur = lim.rlim_max = 511;
//if ( setrlimit(RLIMIT_NOFILE,&lim))
// log("db: setrlimit2: %s.", mstrerror(errno) );
// try to prevent core from systems where it is above 1024
// because our FD_ISSET() libc function will core! (it's older)
long NOFILE = 1024;
lim.rlim_cur = lim.rlim_max = NOFILE;
if ( setrlimit(RLIMIT_NOFILE,&lim))
log("db: setrlimit RLIMIT_NOFILE %li: %s.",
NOFILE,mstrerror(errno) );
struct rlimit rlim;
getrlimit ( RLIMIT_NOFILE,&rlim);
if ( (long)rlim.rlim_max > NOFILE || (long)rlim.rlim_cur > NOFILE ) {
log("db: setrlimit RLIMIT_NOFILE failed!");
char *xx=NULL;*xx=0;
}
log("db: RLIMIT_NOFILE = %li",(long)rlim.rlim_max);
//exit(0);
// . disable o/s's and hard drive's read ahead
// . set multcount to 16 --> 1 interrupt for every 16 sectors read
// . multcount of 16 reduces OS overhead by 30%-50% (more throughput)