forked from Mirrors/privacore-open-source-search-engine
Merge branch 'ia-zak' into testing
Conflicts: main.cpp
This commit is contained in:
BigFile.cppBigFile.hCollectiondb.cppConf.cppConf.hFile.cppFile.hHashTableX.cppHostdb.cppHostdb.hImages.cppLanguage.cppLog.cppLoop.cppMsg13.cppMsg1f.cppMsg20.cppMsg4.cppMsg40.cppPageInject.cppPageStatsdb.cppParms.cppPosdb.cppProfiler.cppRdb.cppRdbBase.cppRdbBuckets.cppRdbCache.cppRdbDump.cppRdbList.cppRdbTree.cppSafeBuf.cppSpeller.cppSpider.cppStatsdb.cppTcpServer.cppThreads.cppUdpServer.cppUdpServer.hXmlDoc.cppfctypes.cppgbfilter.cppmain.cppqa.cpp
script
@ -33,7 +33,7 @@ BigFile::~BigFile () {
|
||||
//#define O_DIRECT 040000
|
||||
|
||||
BigFile::BigFile () {
|
||||
m_permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
|
||||
//m_permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
|
||||
m_flags = O_RDWR ; // | O_DIRECT;
|
||||
m_usePartFiles = true;
|
||||
// NULLify all ptrs to files
|
||||
@ -289,7 +289,7 @@ bool BigFile::open ( int flags ,
|
||||
|
||||
m_flags = flags;
|
||||
//m_pc = pc;
|
||||
m_permissions = permissions;
|
||||
//m_permissions = permissions;
|
||||
m_isClosing = false;
|
||||
// this is true except when parsing big warc files
|
||||
m_usePartFiles = true;//usePartFiles;
|
||||
@ -363,7 +363,7 @@ int BigFile::getfd ( int32_t n , bool forReading ) { // , int64_t *vfd ) {
|
||||
}
|
||||
// open it if not opened
|
||||
if ( ! f->calledOpen() ) {
|
||||
if ( ! f->open ( m_flags , m_permissions ) ) {
|
||||
if ( ! f->open ( m_flags , getFileCreationFlags() ) ) {
|
||||
log("disk: Failed to open file part #%"INT32".",n);
|
||||
return -1;
|
||||
}
|
||||
|
@ -353,7 +353,7 @@ class BigFile {
|
||||
SafeBuf m_newBaseFilenameDir ;//[256];
|
||||
|
||||
|
||||
int32_t m_permissions;
|
||||
//int32_t m_permissions;
|
||||
int32_t m_flags;
|
||||
|
||||
// determined in open() override
|
||||
|
@ -630,10 +630,11 @@ bool Collectiondb::addNewColl ( char *coll ,
|
||||
|
||||
// MDW: create the new directory
|
||||
retry22:
|
||||
if ( ::mkdir ( dname ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) ) {
|
||||
if ( ::mkdir ( dname ,
|
||||
getDirCreationFlags() ) ) {
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) ) {
|
||||
// valgrind?
|
||||
if ( errno == EINTR ) goto retry22;
|
||||
g_errno = errno;
|
||||
@ -1398,10 +1399,11 @@ bool Collectiondb::resetColl2( collnum_t oldCollnum,
|
||||
log("admin: Trying to create collection %s but "
|
||||
"directory %s already exists on disk.",cr->m_coll,dname);
|
||||
}
|
||||
if ( ::mkdir ( dname ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) ) {
|
||||
if ( ::mkdir ( dname ,
|
||||
getDirCreationFlags() ) ) {
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) ) {
|
||||
// valgrind?
|
||||
//if ( errno == EINTR ) goto retry22;
|
||||
//g_errno = errno;
|
||||
|
18
Conf.cpp
18
Conf.cpp
@ -9,6 +9,24 @@
|
||||
|
||||
Conf g_conf;
|
||||
|
||||
static bool s_setUmask = false;;
|
||||
|
||||
mode_t getFileCreationFlags() {
|
||||
if ( ! s_setUmask ) {
|
||||
s_setUmask = true;
|
||||
umask ( 0 );
|
||||
}
|
||||
return S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
|
||||
}
|
||||
|
||||
mode_t getDirCreationFlags() {
|
||||
if ( ! s_setUmask ) {
|
||||
s_setUmask = true;
|
||||
umask ( 0 );
|
||||
}
|
||||
return S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
|
||||
}
|
||||
|
||||
Conf::Conf ( ) {
|
||||
m_save = true;
|
||||
m_doingCommandLine = false;
|
||||
|
7
Conf.h
7
Conf.h
@ -43,6 +43,9 @@
|
||||
|
||||
#define MAX_GEOCODERS 4
|
||||
|
||||
mode_t getFileCreationFlags();
|
||||
mode_t getDirCreationFlags ();
|
||||
|
||||
class Conf {
|
||||
|
||||
public:
|
||||
@ -180,7 +183,9 @@ class Conf {
|
||||
//bool m_tagdbUseSeals;
|
||||
//int32_t m_tagdbMinFilesToMerge;
|
||||
//bool m_tagdbSaveCache;
|
||||
|
||||
|
||||
//bool m_makeAllFilesGroupWritable;
|
||||
|
||||
// catdb parameters
|
||||
int32_t m_catdbMaxTreeMem;
|
||||
//int32_t m_catdbMaxDiskPageCacheMem;
|
||||
|
9
File.cpp
9
File.cpp
@ -238,7 +238,10 @@ bool File::open ( int flags , int permissions ) {
|
||||
}
|
||||
// save these in case we need to reopen in getfd()
|
||||
m_flags = flags;
|
||||
m_permissions = permissions;
|
||||
//m_permissions = permissions;
|
||||
// just override and use system settings so we can get the group
|
||||
// writable/readable/executable bits if set that way in g_conf
|
||||
//m_permissions = getFileCreationFlags();
|
||||
m_calledOpen = true;
|
||||
// sanity check
|
||||
//int32_t ss = 0;
|
||||
@ -668,7 +671,7 @@ int File::getfd () {
|
||||
if ( fd == -1 ) {
|
||||
t1 = gettimeofdayInMilliseconds();
|
||||
retry7:
|
||||
fd = ::open ( getFilename() , m_flags , m_permissions );
|
||||
fd = ::open ( getFilename() , m_flags,getFileCreationFlags());
|
||||
// valgrind
|
||||
if ( fd == -1 && errno == EINTR ) goto retry7;
|
||||
// 0 means stdout, right? why am i seeing it get assigned???
|
||||
@ -676,7 +679,7 @@ int File::getfd () {
|
||||
log("disk: Got fd of 0 when opening %s.",
|
||||
getFilename());
|
||||
if ( fd == 0 )
|
||||
fd = ::open ( getFilename(), m_flags , m_permissions );
|
||||
fd=::open(getFilename(),m_flags,getFileCreationFlags());
|
||||
if ( fd == 0 )
|
||||
log("disk: Got fd of 0 when opening2 %s.",
|
||||
getFilename());
|
||||
|
2
File.h
2
File.h
@ -193,7 +193,7 @@ class File {
|
||||
|
||||
// save the permission and flag sets in case of re-opening
|
||||
int m_flags;
|
||||
int m_permissions;
|
||||
//int m_permissions;
|
||||
|
||||
char m_calledOpen;
|
||||
char m_calledSet;
|
||||
|
@ -623,8 +623,10 @@ bool HashTableX::save ( char *dir ,
|
||||
char s[1024];
|
||||
sprintf ( s , "%s/%s", dir , filename );
|
||||
int fd = ::open ( s ,
|
||||
O_RDWR | O_CREAT | O_TRUNC , S_IRUSR | S_IWUSR |
|
||||
S_IRGRP | S_IWGRP | S_IROTH);
|
||||
O_RDWR | O_CREAT | O_TRUNC ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IROTH);
|
||||
if ( fd < 0 ) {
|
||||
//m_saveErrno = errno;
|
||||
return log("db: Could not open %s for writing: %s.",
|
||||
|
@ -2005,8 +2005,9 @@ bool Hostdb::saveHostsConf ( ) {
|
||||
sprintf ( filename, "%shosts.conf", m_dir );
|
||||
log ( LOG_INFO, "conf: Writing hosts.conf file to: %s",
|
||||
filename );
|
||||
int32_t fd = open ( filename, O_CREAT|O_WRONLY|O_TRUNC,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH );
|
||||
int32_t fd = open ( filename, O_CREAT|O_WRONLY|O_TRUNC ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH );
|
||||
if ( !fd ) {
|
||||
log ( "conf: Failed to open %s for writing.", filename );
|
||||
return false;
|
||||
|
1
Hostdb.h
1
Hostdb.h
@ -211,6 +211,7 @@ class Host {
|
||||
int64_t m_lastPing;
|
||||
|
||||
char m_tmpBuf[4];
|
||||
int16_t m_tmpCount;
|
||||
|
||||
// . first time we sent an unanswered ping request to this host
|
||||
// . used so we can determine when to send an email alert
|
||||
|
@ -1007,7 +1007,10 @@ void Images::thumbStart_r ( bool amThread ) {
|
||||
|
||||
// Open/Create temporary file to store image to
|
||||
int fhndl;
|
||||
if( (fhndl = open( in, O_RDWR+O_CREAT, S_IWUSR+S_IRUSR )) < 0 ) {
|
||||
if( (fhndl = open( in, O_RDWR+O_CREAT ,
|
||||
getFileCreationFlags()
|
||||
// // S_IWUSR+S_IRUSR
|
||||
)) < 0 ) {
|
||||
log( "image: Could not open file, %s, for writing: %s - %d.",
|
||||
in, mstrerror( m_errno ), fhndl );
|
||||
m_imgDataSize = 0;
|
||||
|
26
Language.cpp
26
Language.cpp
@ -145,7 +145,7 @@ bool Language::convertLatin1DictToUTF8( char *infile ){
|
||||
// then open a new one for appending
|
||||
int fdw = open ( ff ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 ){
|
||||
return log("lang: Could not open for %s "
|
||||
"writing: %s.",ff, strerror(errno));
|
||||
@ -2763,7 +2763,7 @@ bool Language::makeWordFiles ( int32_t numWordsToDump , int32_t numWordsPerPhras
|
||||
// then open a new one for appending
|
||||
fds[i] = open ( ff ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fds[i] < 0 )
|
||||
return log("lang: Could not open %s for writing: "
|
||||
"%s.",ff, strerror(errno));
|
||||
@ -3146,7 +3146,7 @@ bool Language::makePopFiles ( int32_t numWordsToDump , int32_t numWordsPerPhrase
|
||||
// then open a new one for appending
|
||||
fds[i] = open ( ff ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fds[i] < 0 )
|
||||
return log("lang: Could not open %s for writing: "
|
||||
"%s.",ff, strerror(errno));
|
||||
@ -3683,7 +3683,7 @@ bool Language::makeQueryFiles ( ) {
|
||||
// then open a new one for appending
|
||||
int fdw = open ( ff ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 ){
|
||||
return log("lang: Could not open for %s "
|
||||
"writing: %s.",ff, strerror(errno));
|
||||
@ -3874,7 +3874,7 @@ bool Language::makeWikiFiles( ) {
|
||||
// then open a new one for appending
|
||||
int fdw = open ( ff ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 ){
|
||||
log("lang: Could not open for %s "
|
||||
"writing: %s.",ff, strerror(errno));
|
||||
@ -4250,7 +4250,7 @@ bool Language::gotTermFreqs( StateDict *st ){
|
||||
// then open a new one for appending
|
||||
fd = open ( ff ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fd < 0 ){
|
||||
log("lang: Could not open %s for writing: "
|
||||
"%s.",ff, strerror(errno));
|
||||
@ -4338,7 +4338,7 @@ bool StateAff::openAffinityFile( ){
|
||||
unlink ( ff );
|
||||
// then open a new one for appending
|
||||
m_fdw = open ( ff , O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( m_fdw < 0 ){
|
||||
log("lang: Could not open for %s "
|
||||
"writing: %s.",ff, strerror(errno));
|
||||
@ -4537,7 +4537,7 @@ bool Language::cleanDictFile ( ) {
|
||||
// then open a new one for appending
|
||||
int fdw = open ( ff ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 ){
|
||||
return log("lang: Could not open for %s "
|
||||
"writing: %s.",ff, strerror(errno));
|
||||
@ -4590,7 +4590,7 @@ bool Language::makePhonet( char *infile){
|
||||
// then open a new one for appending
|
||||
fdw = open ( outfile ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 )
|
||||
return log("lang: Could not open %s for writing: "
|
||||
"%s.", outfile, strerror(errno));
|
||||
@ -4711,7 +4711,7 @@ bool Language::genTopPopFile ( char *infile ){
|
||||
// then open a new one for appending
|
||||
fdw = open ( outfile ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 )
|
||||
return log("lang: Could not open %s for writing: "
|
||||
"%s.", outfile, strerror(errno));
|
||||
@ -4761,7 +4761,8 @@ bool Language::genDistributedPopFile ( char *infile, uint32_t myHash ){
|
||||
// then open a new one for appending
|
||||
fdw = open ( outfile ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 )
|
||||
return log("lang: Could not open %s for writing: "
|
||||
"%s.", outfile, strerror(errno));
|
||||
@ -4848,7 +4849,8 @@ int32_t Language::spellcheckDict(){
|
||||
// then open a new one for appending
|
||||
fdw = open ( outfile ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 )
|
||||
return log("lang: Could not open %s for writing: "
|
||||
"%s.", outfile, strerror(errno));
|
||||
|
10
Log.cpp
10
Log.cpp
@ -132,8 +132,9 @@ bool Log::init ( char *filename ) {
|
||||
// open it for appending.
|
||||
// create with -rw-rw-r-- permissions if it's not there.
|
||||
m_fd = open ( m_filename ,
|
||||
O_APPEND | O_CREAT | O_RDWR ,
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
|
||||
O_APPEND | O_CREAT | O_RDWR ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
|
||||
if ( m_fd >= 0 ) return true;
|
||||
// bitch to stderr and return false on error
|
||||
fprintf(stderr,"could not open log file %s for appending\n",
|
||||
@ -422,8 +423,9 @@ bool Log::makeNewLogFile ( ) {
|
||||
// open it for appending.
|
||||
// create with -rw-rw-r-- permissions if it's not there.
|
||||
m_fd = open ( m_filename ,
|
||||
O_APPEND | O_CREAT | O_RDWR ,
|
||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
|
||||
O_APPEND | O_CREAT | O_RDWR ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
|
||||
if ( m_fd >= 0 ) return true;
|
||||
// bitch to stderr and return false on error
|
||||
fprintf(stderr,"could not open new log file %s for appending\n",
|
||||
|
6
Loop.cpp
6
Loop.cpp
@ -1386,9 +1386,9 @@ bool Loop::runLoop ( ) {
|
||||
if ( m_shutdown == 2 ) {
|
||||
//log(0,"Thread is saving & shutting down urgently.");
|
||||
//while ( 1 == 1 ) sleep (50000);
|
||||
log("loop: Resuming despite thread crash.");
|
||||
m_shutdown = 0;
|
||||
goto BIGLOOP;
|
||||
//log("loop: Resuming despite thread crash.");
|
||||
//m_shutdown = 0;
|
||||
//goto BIGLOOP;
|
||||
}
|
||||
// otherwise, thread did not save, so we must do it
|
||||
log ( LOG_INIT ,"loop: Saving and shutting down urgently.");
|
||||
|
@ -2356,7 +2356,7 @@ bool getTestSpideredDate ( Url *u , int32_t *origSpideredDate , char *testDir )
|
||||
bool addTestSpideredDate ( Url *u , int32_t spideredTime , char *testDir ) {
|
||||
|
||||
// ensure dir exists
|
||||
::mkdir(testDir,S_IRWXU);
|
||||
::mkdir(testDir,getDirCreationFlags());
|
||||
|
||||
// set this
|
||||
int64_t uh64 = hash64(u->getUrl(),u->getUrlLen());
|
||||
|
@ -57,8 +57,9 @@ void handleRequest ( UdpSlot *slot , int32_t netnice ) {
|
||||
return;
|
||||
}
|
||||
|
||||
int32_t fd = open ( filename , O_RDONLY,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH );
|
||||
int32_t fd = open ( filename , O_RDONLY ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH );
|
||||
if ( ! fd ) {
|
||||
log(LOG_DEBUG, "logviewer: Failed to open %s for reading: ",
|
||||
filename);
|
||||
|
@ -399,7 +399,9 @@ void handleRequest20 ( UdpSlot *slot , int32_t netnice ) {
|
||||
// sanity check, the size include the \0
|
||||
if ( req->m_collnum < 0 ) {
|
||||
log("query: Got empty collection in msg20 handler. FIX!");
|
||||
char *xx =NULL; *xx = 0;
|
||||
g_udpServer.sendErrorReply ( slot , ENOTFOUND );
|
||||
return;
|
||||
//char *xx =NULL; *xx = 0;
|
||||
}
|
||||
// if it's not stored locally that's an error
|
||||
if ( req->m_docId >= 0 && ! g_titledb.isLocal ( req->m_docId ) ) {
|
||||
|
5
Msg4.cpp
5
Msg4.cpp
@ -1434,8 +1434,9 @@ bool saveAddsInProgress ( char *prefix ) {
|
||||
sprintf ( filename , "%s%saddsinprogress.saving",
|
||||
g_hostdb.m_dir , prefix );
|
||||
|
||||
int32_t fd = open ( filename, O_RDWR | O_CREAT | O_TRUNC ,
|
||||
S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH );
|
||||
int32_t fd = open ( filename, O_RDWR | O_CREAT | O_TRUNC ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH );
|
||||
if ( fd < 0 ) {
|
||||
log ("build: Failed to open %s for writing: %s",
|
||||
filename,strerror(errno));
|
||||
|
@ -696,7 +696,7 @@ bool Msg40::federatedLoop ( ) {
|
||||
// and mult based on index size
|
||||
numDocIdSplits *= mult;
|
||||
// prevent going OOM for type:article AND html
|
||||
//if ( numDocIdSplits < 5 ) numDocIdSplits = 5;
|
||||
if ( numDocIdSplits < 5 ) numDocIdSplits = 5;
|
||||
//}
|
||||
|
||||
if ( cr ) mr.m_maxQueryTerms = cr->m_maxQueryTerms;
|
||||
@ -2437,6 +2437,9 @@ bool Msg40::gotSummary ( ) {
|
||||
for ( int32_t i = 0 ; dedupPercent && i < m_numReplies ; i++ ) {
|
||||
// skip if already invisible
|
||||
if ( m_msg3a.m_clusterLevels[i] != CR_OK ) continue;
|
||||
// Skip if invalid
|
||||
if ( m_msg20[i]->m_errno ) continue;
|
||||
|
||||
// start with the first docid we have not yet checked!
|
||||
//int32_t m = oldNumContiguous;
|
||||
// get it
|
||||
@ -2455,6 +2458,8 @@ bool Msg40::gotSummary ( ) {
|
||||
// skip if already invisible
|
||||
if ( *level != CR_OK ) continue;
|
||||
// get it
|
||||
if ( m_msg20[m]->m_errno ) continue;
|
||||
|
||||
Msg20Reply *mrm = m_msg20[m]->m_r;
|
||||
// do not dedup CT_STATUS results, those are
|
||||
// spider reply "documents" that indicate the last
|
||||
|
@ -75,6 +75,13 @@ void setInjectionRequestFromParms ( TcpSocket *sock ,
|
||||
int32_t def = atoll(m->m_def);
|
||||
*ii = (char)hr->getLong(m->m_cgi,def);
|
||||
}
|
||||
else if ( m->m_type == TYPE_IP ) {
|
||||
char *ii = (char *)((char *)ir + m->m_off);
|
||||
char *is = hr->getString(m->m_cgi,NULL);
|
||||
*(int32_t *)ii = 0; // default ip to 0
|
||||
// otherwise, set the ip
|
||||
if ( is ) *(int32_t *)ii = atoip(is);
|
||||
}
|
||||
// if unsupported let developer know
|
||||
else { char *xx=NULL;*xx=0; }
|
||||
}
|
||||
@ -124,6 +131,53 @@ Host *getHostToHandleInjection ( char *url ) {
|
||||
Host *group = g_hostdb.getShard ( shardNum );
|
||||
int32_t hostNum = docId % g_hostdb.m_numHostsPerShard;
|
||||
Host *host = &group[hostNum];
|
||||
|
||||
bool isWarcInjection = false;
|
||||
int32_t ulen = gbstrlen(url);
|
||||
if ( ulen > 10 && strcmp(url+ulen-8,".warc.gz") == 0 )
|
||||
isWarcInjection = true;
|
||||
if ( ulen > 10 && strcmp(url+ulen-5,".warc") == 0 )
|
||||
isWarcInjection = true;
|
||||
|
||||
if ( ! isWarcInjection ) return host;
|
||||
|
||||
// warc files end up calling XmlDoc::indexWarcOrArc() which spawns
|
||||
// a msg7 injection request for each doc in the warc/arc file
|
||||
// so let's do load balancing differently for them so one host
|
||||
// doesn't end up doing a bunch of wget/gunzips on warc files
|
||||
// thereby bottlenecking the cluster. get the first hostid that
|
||||
// we have not sent a msg7 injection request to that is still out
|
||||
for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
|
||||
Host *h = g_hostdb.getHost(i);
|
||||
h->m_tmpCount = 0;
|
||||
}
|
||||
for ( UdpSlot *slot = g_udpServer.m_head2 ;
|
||||
slot ;
|
||||
slot = slot->m_next2 ) {
|
||||
// skip if not injection request
|
||||
if ( slot->m_msgType != 0x07 ) continue;
|
||||
//if ( ! slot->m_weInitiated ) continue;
|
||||
// if we did not initiate the injection request, i.e. if
|
||||
// it is to us, skip it
|
||||
if ( ! slot->m_callback ) continue;
|
||||
// who is it from?
|
||||
int32_t hostId = slot->m_hostId;
|
||||
if ( hostId < 0 ) continue;
|
||||
Host *h = g_hostdb.getHost ( hostId );
|
||||
if ( ! h ) continue;
|
||||
h->m_tmpCount++;
|
||||
}
|
||||
int32_t min = 999999;
|
||||
Host *minh = NULL;
|
||||
for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
|
||||
Host *h = g_hostdb.getHost(i);
|
||||
if ( h->m_tmpCount == 0 ) return h;
|
||||
if ( h->m_tmpCount >= min ) continue;
|
||||
min = h->m_tmpCount;
|
||||
minh = h;
|
||||
}
|
||||
if ( minh ) return minh;
|
||||
// how can this happen?
|
||||
return host;
|
||||
}
|
||||
|
||||
@ -608,7 +662,7 @@ void sendUdpReply7 ( void *state ) {
|
||||
|
||||
uint32_t statColor = 0xccffcc;
|
||||
if(xd->m_indexCode) {
|
||||
statColor = 0x4e99e9;
|
||||
statColor = 0xaaddaa;//0x4e99e9;
|
||||
}
|
||||
g_stats.addStat_r ( xd->m_rawUtf8ContentSize,
|
||||
xd->m_injectStartTime,
|
||||
@ -645,7 +699,6 @@ void sendUdpReply7 ( void *state ) {
|
||||
|
||||
void handleRequest7 ( UdpSlot *slot , int32_t netnice ) {
|
||||
|
||||
|
||||
InjectionRequest *ir = (InjectionRequest *)slot->m_readBuf;
|
||||
|
||||
// now just supply the first guy's char ** and size ptr
|
||||
|
@ -49,9 +49,18 @@ class StateStatsdb {
|
||||
static time_t genDate( char *date, int32_t dateLen ) ;
|
||||
static void sendReply ( void *st ) ;
|
||||
|
||||
static bool s_graphInUse = false;
|
||||
|
||||
// . returns false if blocked, otherwise true
|
||||
// . sets g_errno on error
|
||||
bool sendPageGraph ( TcpSocket *s, HttpRequest *r ) {
|
||||
|
||||
if ( s_graphInUse ) {
|
||||
char *msg = "stats graph calculating for another user. "
|
||||
"Try again later.";
|
||||
g_httpServer.sendErrorReply(s,500,msg);
|
||||
return true;
|
||||
}
|
||||
|
||||
char *cgi;
|
||||
int32_t cgiLen;
|
||||
@ -121,7 +130,6 @@ bool sendPageGraph ( TcpSocket *s, HttpRequest *r ) {
|
||||
st->m_endDate = st->m_endDateR;
|
||||
}
|
||||
|
||||
g_statsdb.addDocsIndexed();
|
||||
//
|
||||
// this is no longer a gif, but an html graph in g_statsdb.m_sb
|
||||
//
|
||||
@ -130,8 +138,10 @@ bool sendPageGraph ( TcpSocket *s, HttpRequest *r ) {
|
||||
st->m_samples ,
|
||||
&st->m_sb2 ,
|
||||
st ,
|
||||
sendReply ) )
|
||||
sendReply ) ) {
|
||||
s_graphInUse = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// if we didn't block call it ourselves directly
|
||||
sendReply ( st );
|
||||
@ -186,6 +196,8 @@ void genStatsGraphTable(SafeBuf *buf, StateStatsdb *st) {
|
||||
|
||||
void sendReply ( void *state ) {
|
||||
|
||||
s_graphInUse = false;
|
||||
|
||||
StateStatsdb *st = (StateStatsdb *)state;
|
||||
|
||||
if ( g_errno ) {
|
||||
|
41
Parms.cpp
41
Parms.cpp
@ -11297,7 +11297,7 @@ void Parms::init ( ) {
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
@ -12415,12 +12415,30 @@ void Parms::init ( ) {
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_flags = PF_API;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
/*
|
||||
m->m_title = "files group writable";
|
||||
m->m_desc = "Make all created files group writable? If you have "
|
||||
"multiple user accounts starting Gigablast processes you "
|
||||
"will want the files to be group writable. You will "
|
||||
"need to make sure you run gigablast under the "
|
||||
"primary group you want to use for gigablast administration.";
|
||||
m->m_cgi = "afgw";
|
||||
m->m_off = (char *)&g_conf.m_makeAllFilesGroupWritable - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_API;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
*/
|
||||
|
||||
|
||||
m->m_title = "max spider read threads";
|
||||
@ -15107,6 +15125,19 @@ void Parms::init ( ) {
|
||||
m->m_off = (char *)&ir.m_hopCount - (char *)&ir;
|
||||
m++;
|
||||
|
||||
m->m_title = "url IP";
|
||||
m->m_desc = "Use this IP when injecting the document. Do not use or "
|
||||
"set to 0.0.0.0, if unknown. If provided, it will save an IP "
|
||||
"lookup.";
|
||||
m->m_cgi = "urlip";
|
||||
m->m_obj = OBJ_IR;
|
||||
m->m_type = TYPE_IP;
|
||||
m->m_def = "0.0.0.0";
|
||||
m->m_flags = PF_API;
|
||||
m->m_page = PAGE_INJECT;
|
||||
m->m_off = (char *)&ir.m_injectDocIp - (char *)&ir;
|
||||
m++;
|
||||
|
||||
m->m_title = "last spider time";
|
||||
m->m_desc = "Override last time spidered";
|
||||
m->m_cgi = "lastspidered";
|
||||
@ -15213,7 +15244,10 @@ void Parms::init ( ) {
|
||||
"Separate MIME from actual content with two returns. "
|
||||
"At least put a single space in here if you want to "
|
||||
"inject empty content, otherwise the content will "
|
||||
"be downloaded from the url.";
|
||||
"be downloaded from the url. This is because the "
|
||||
"page injection form always submits the content text area "
|
||||
"even if it is empty, which should signify that the "
|
||||
"content should be downloaded.";
|
||||
m->m_cgi = "content";
|
||||
m->m_obj = OBJ_IR;
|
||||
m->m_type = TYPE_CHARPTR;
|
||||
@ -22490,6 +22524,7 @@ bool Parm::printVal ( SafeBuf *sb , collnum_t collnum , int32_t occNum ) {
|
||||
return sb->safePrintf("CMD");
|
||||
|
||||
if ( m_type == TYPE_IP )
|
||||
// may print 0.0.0.0
|
||||
return sb->safePrintf("%s",iptoa(*(int32_t *)val) );
|
||||
|
||||
log("parms: missing parm type!!");
|
||||
|
@ -6775,6 +6775,8 @@ void PosdbTable::intersectLists10_r ( ) {
|
||||
nwp[mink] = NULL;
|
||||
// avoid breach of core below now
|
||||
if ( mptr < mptrEnd ) goto mergeMore;
|
||||
// wrap it up here since done merging
|
||||
miniMergedEnd[j] = mptr;
|
||||
}
|
||||
|
||||
// breach?
|
||||
|
@ -1862,7 +1862,7 @@ Profiler::printRealTimeInfo(SafeBuf *sb,
|
||||
ff.safePrintf("%strash/profile.txt",g_hostdb.m_dir);
|
||||
char *filename = ff.getBufStart();
|
||||
unlink ( filename );
|
||||
int fd = open ( filename , O_RDWR | O_CREAT , S_IRWXU );
|
||||
int fd = open ( filename , O_RDWR | O_CREAT , getFileCreationFlags() );
|
||||
if ( fd < 0 ) {
|
||||
sb->safePrintf("FAILED TO OPEN %s for writing: %s"
|
||||
,ff.getBufStart(),mstrerror(errno));
|
||||
@ -2090,7 +2090,7 @@ Profiler::printRealTimeInfo(SafeBuf *sb,
|
||||
ff.reset();
|
||||
ff.safePrintf("%strash/qp.txt",g_hostdb.m_dir);
|
||||
filename = ff.getBufStart();
|
||||
fd = open ( filename , O_RDWR | O_CREAT , S_IRWXU );
|
||||
//fd = open ( filename , O_RDWR | O_CREAT , S_IRWXU );
|
||||
if ( fd < 0 ) {
|
||||
sb->safePrintf("FAILED TO OPEN %s for writing: %s"
|
||||
,ff.getBufStart(),strerror(errno));
|
||||
|
24
Rdb.cpp
24
Rdb.cpp
@ -374,16 +374,16 @@ bool Rdb::updateToRebuildFiles ( Rdb *rdb2 , char *coll ) {
|
||||
char dstDir[256];
|
||||
// make the trash dir if not there
|
||||
sprintf ( dstDir , "%s/trash/" , g_hostdb.m_dir );
|
||||
int32_t status = ::mkdir ( dstDir ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) ;
|
||||
int32_t status = ::mkdir ( dstDir , getDirCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) ;
|
||||
// we have to create it
|
||||
sprintf ( dstDir , "%s/trash/rebuilt%"UINT32"/" , g_hostdb.m_dir , t );
|
||||
status = ::mkdir ( dstDir ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) ;
|
||||
status = ::mkdir ( dstDir , getDirCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) ;
|
||||
if ( status && errno != EEXIST ) {
|
||||
g_errno = errno;
|
||||
return log("repair: Could not mkdir(%s): %s",dstDir,
|
||||
@ -643,10 +643,10 @@ bool Rdb::deleteAllRecs ( collnum_t collnum ) {
|
||||
bool makeTrashDir() {
|
||||
char trash[1024];
|
||||
sprintf(trash, "%strash/",g_hostdb.m_dir);
|
||||
if ( ::mkdir ( trash,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) == -1 ) {
|
||||
if ( ::mkdir ( trash , getDirCreationFlags() ) ) {
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) == -1 ) {
|
||||
if ( errno != EEXIST ) {
|
||||
log("dir: mkdir %s had error: %s",
|
||||
trash,mstrerror(errno));
|
||||
|
32
RdbBase.cpp
32
RdbBase.cpp
@ -165,10 +165,10 @@ bool RdbBase::init ( char *dir ,
|
||||
}
|
||||
// make a special "cat" dir for it if we need to
|
||||
sprintf ( tmp , "%s%s" , dir , dbname );
|
||||
int32_t status = ::mkdir ( tmp ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH );
|
||||
int32_t status = ::mkdir ( tmp , getDirCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH );
|
||||
if ( status == -1 && errno != EEXIST && errno )
|
||||
return log("db: Failed to make directory %s: %s.",
|
||||
tmp,mstrerror(errno));
|
||||
@ -186,9 +186,9 @@ bool RdbBase::init ( char *dir ,
|
||||
// make a special "cat" dir for it if we need to
|
||||
sprintf ( tmp , "%scat" , dir );
|
||||
if ( ::mkdir ( tmp ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) == -1 && errno != EEXIST )
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) == -1 && errno != EEXIST )
|
||||
return log("db: Failed to make directory %s: %s.",
|
||||
tmp,mstrerror(errno));
|
||||
}
|
||||
@ -202,9 +202,9 @@ bool RdbBase::init ( char *dir ,
|
||||
// make a special "stats" dir for it if necessary
|
||||
sprintf ( tmp , "%sstats" , dir );
|
||||
if ( ::mkdir ( tmp ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) == -1 && errno != EEXIST )
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) == -1 && errno != EEXIST )
|
||||
return log( "db: Failed to make directory %s: %s.",
|
||||
tmp, mstrerror( errno ) );
|
||||
}
|
||||
@ -218,9 +218,9 @@ bool RdbBase::init ( char *dir ,
|
||||
// make a special "stats" dir for it if necessary
|
||||
sprintf ( tmp , "%saccess" , dir );
|
||||
if ( ::mkdir ( tmp ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) == -1 && errno != EEXIST )
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) == -1 && errno != EEXIST )
|
||||
return log( "db: Failed to make directory %s: %s.",
|
||||
tmp, mstrerror( errno ) );
|
||||
}
|
||||
@ -234,9 +234,9 @@ bool RdbBase::init ( char *dir ,
|
||||
// make a special "stats" dir for it if necessary
|
||||
sprintf ( tmp , "%ssyncdb" , dir );
|
||||
if ( ::mkdir ( tmp ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH ) == -1 && errno != EEXIST )
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH ) == -1 && errno != EEXIST )
|
||||
return log( "db: Failed to make directory %s: %s.",
|
||||
tmp, mstrerror( errno ) );
|
||||
}
|
||||
|
@ -2060,8 +2060,10 @@ bool RdbBuckets::fastSave_r() {
|
||||
char s[1024];
|
||||
sprintf ( s , "%s/%s-buckets-saving.dat", m_dir , m_dbname );
|
||||
int fd = ::open ( s ,
|
||||
O_RDWR | O_CREAT | O_TRUNC , S_IRUSR | S_IWUSR |
|
||||
S_IRGRP | S_IWGRP | S_IROTH);
|
||||
O_RDWR | O_CREAT | O_TRUNC ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IROTH);
|
||||
if ( fd < 0 ) {
|
||||
m_saveErrno = errno;
|
||||
return log("db: Could not open %s for writing: %s.",
|
||||
|
@ -1474,7 +1474,7 @@ bool RdbCache::save_r ( ) {
|
||||
//f.set ( g_hostdb.m_dir , filename );
|
||||
// open the file
|
||||
//if ( ! f.open ( O_RDWR | O_CREAT ) )
|
||||
int fd = open ( filename , O_RDWR | O_CREAT , S_IRWXU );
|
||||
int fd = open ( filename , O_RDWR | O_CREAT , getFileCreationFlags() );
|
||||
if ( fd < 0 )
|
||||
return log("db: Had opening file to save cache to: %s.",
|
||||
mstrerror(errno));
|
||||
|
31
RdbDump.cpp
31
RdbDump.cpp
@ -405,12 +405,15 @@ bool RdbDump::dumpTree ( bool recall ) {
|
||||
m_totalNegDumped += m_numNegRecs;
|
||||
// . check the list we got from the tree for problems
|
||||
// . ensures keys are ordered from lowest to highest as well
|
||||
#ifdef GBSANITYCHECK
|
||||
log("dump: verifying list before dumping");
|
||||
m_list->checkList_r ( false , // removeNegRecs?
|
||||
false , // sleep on problem?
|
||||
m_rdb->m_rdbId );
|
||||
#endif
|
||||
//#ifdef GBSANITYCHECK
|
||||
if ( g_conf.m_verifyWrites ) {
|
||||
char *s = "none";
|
||||
if ( m_rdb ) s = getDbnameFromId(m_rdb->m_rdbId);
|
||||
log("dump: verifying list before dumping (rdb=%s)",s);
|
||||
m_list->checkList_r ( false , // removeNegRecs?
|
||||
false , // sleep on problem?
|
||||
m_rdb->m_rdbId );
|
||||
}
|
||||
// if list is empty, we're done!
|
||||
if ( status && m_list->isEmpty() ) {
|
||||
// consider that a rollover?
|
||||
@ -486,15 +489,15 @@ bool RdbDump::dumpList ( RdbList *list , int32_t niceness , bool recall ) {
|
||||
if ( m_list->isEmpty() ) return true;
|
||||
// we're now in dump mode again
|
||||
m_isDumping = true;
|
||||
#ifdef GBSANITYCHECK
|
||||
//#ifdef GBSANITYCHECK
|
||||
// don't check list if we're dumping an unordered list from tree!
|
||||
if ( m_orderedDump ) {
|
||||
if ( g_conf.m_verifyWrites && m_orderedDump ) {
|
||||
m_list->checkList_r ( false /*removedNegRecs?*/ );
|
||||
// print list stats
|
||||
log("dump: sk=%s ",KEYSTR(m_list->m_startKey,m_ks));
|
||||
log("dump: ek=%s ",KEYSTR(m_list->m_endKey,m_ks));
|
||||
// log("dump: sk=%s ",KEYSTR(m_list->m_startKey,m_ks));
|
||||
// log("dump: ek=%s ",KEYSTR(m_list->m_endKey,m_ks));
|
||||
}
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
// before calling RdbMap::addList(), always reset list ptr
|
||||
// since we no longer call this in RdbMap::addList() so we don't
|
||||
@ -525,8 +528,10 @@ bool RdbDump::dumpList ( RdbList *list , int32_t niceness , bool recall ) {
|
||||
}
|
||||
}
|
||||
|
||||
if ( m_ks==18 ) {
|
||||
m_list->checkList_r(false,false,RDB_POSDB);
|
||||
if ( g_conf.m_verifyWrites ) {
|
||||
char rdbId = 0;
|
||||
if ( m_rdb ) rdbId = m_rdb->m_rdbId;
|
||||
m_list->checkList_r(false,false,rdbId);//RDB_POSDB);
|
||||
m_list->resetListPtr();
|
||||
}
|
||||
|
||||
|
47
RdbList.cpp
47
RdbList.cpp
@ -693,9 +693,9 @@ bool RdbList::checkList_r ( bool removeNegRecs , bool sleepOnProblem ,
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( m_useHalfKeys && m_ks == 12 ) // m_ks != 18 && m_ks != 24 )
|
||||
return checkIndexList_r ( removeNegRecs ,
|
||||
sleepOnProblem );
|
||||
// if ( m_useHalfKeys && m_ks == 12 ) // m_ks != 18 && m_ks != 24 )
|
||||
// return checkIndexList_r ( removeNegRecs ,
|
||||
// sleepOnProblem );
|
||||
|
||||
//log("m_list=%"INT32"",(int32_t)m_list);
|
||||
//key_t oldk;
|
||||
@ -721,6 +721,10 @@ bool RdbList::checkList_r ( bool removeNegRecs , bool sleepOnProblem ,
|
||||
if ( KEYCMP(acceptable,KEYMIN(),m_ks)==0 )
|
||||
KEYSET ( acceptable , m_endKey , m_ks );
|
||||
char k[MAX_KEY_BYTES];
|
||||
|
||||
static int32_t th = 0;
|
||||
if ( ! th ) th = hash64Lower_a ( "roottitles" , 10 );
|
||||
|
||||
while ( ! isExhausted() ) {
|
||||
//key_t k = getCurrentKey();
|
||||
getCurrentKey( k );
|
||||
@ -734,6 +738,43 @@ bool RdbList::checkList_r ( bool removeNegRecs , bool sleepOnProblem ,
|
||||
*(int32_t *)data > 100000000 ) ) {
|
||||
char *xx = NULL; *xx = 0; }
|
||||
}
|
||||
// tagrec?
|
||||
if ( rdbId == RDB_TAGDB && ! KEYNEG(k) ) {
|
||||
//TagRec *gr = (TagRec *)getCurrentRec();
|
||||
//Tag *tag = gr->getFirstTag ( );
|
||||
//for ( ; tag ; tag = gr->getNextTag ( tag ) ) {
|
||||
Tag *tag = (Tag *)getCurrentRec();
|
||||
if ( tag->m_type == th ) {
|
||||
char *tdata = tag->getTagData();
|
||||
int32_t tsize = tag->getTagDataSize();
|
||||
// core if tag val is not \0 terminated
|
||||
if ( tsize > 0 && tdata[tsize-1]!='\0' ) {
|
||||
log("db: bad root title tag");
|
||||
char *xx=NULL;*xx=0; }
|
||||
}
|
||||
}
|
||||
if ( rdbId == RDB_SPIDERDB && ! KEYNEG(k) &&
|
||||
getCurrentDataSize() > 0 ) {
|
||||
//char *data = getCurrentData();
|
||||
char *rec = getCurrentRec();
|
||||
// bad url in spider request?
|
||||
if ( g_spiderdb.isSpiderRequest ( (key128_t *)rec ) ){
|
||||
SpiderRequest *sr = (SpiderRequest *)rec;
|
||||
if ( strncmp(sr->m_url,"http",4) != 0 ) {
|
||||
log("db: spider req url");
|
||||
char *xx=NULL;*xx=0;
|
||||
}
|
||||
}
|
||||
}
|
||||
// title bad uncompress size?
|
||||
if ( rdbId == RDB_TITLEDB && ! KEYNEG(k) ) {
|
||||
char *rec = getCurrentRec();
|
||||
int32_t usize = *(int32_t *)(rec+12+4);
|
||||
if ( usize <= 0 ) {
|
||||
log("db: bad titlerec uncompress size");
|
||||
char *xx=NULL;*xx=0;
|
||||
}
|
||||
}
|
||||
// debug msg
|
||||
// pause if it's google
|
||||
//if ((((k.n0) >> 1) & 0x0000003fffffffffLL) == 70166155664)
|
||||
|
24
RdbTree.cpp
24
RdbTree.cpp
@ -2467,8 +2467,8 @@ void threadDoneWrapper ( void *state , ThreadEntry *t ) {
|
||||
THIS->m_dbname,mstrerror(g_errno));
|
||||
else
|
||||
// log it
|
||||
log("db: Done saving %s/%s-saved.dat",
|
||||
THIS->m_dir,THIS->m_dbname);
|
||||
log("db: Done saving %s/%s-saved.dat (wrote %"INT64" bytes)",
|
||||
THIS->m_dir,THIS->m_dbname,THIS->m_bytesWritten);
|
||||
// . call callback
|
||||
if ( THIS->m_callback ) THIS->m_callback ( THIS->m_state );
|
||||
}
|
||||
@ -2488,13 +2488,29 @@ bool RdbTree::fastSave_r() {
|
||||
char s[1024];
|
||||
sprintf ( s , "%s/%s-saving.dat", m_dir , m_dbname );
|
||||
int fd = ::open ( s ,
|
||||
O_RDWR | O_CREAT | O_TRUNC , S_IRUSR | S_IWUSR |
|
||||
S_IRGRP | S_IWGRP | S_IROTH);
|
||||
O_RDWR | O_CREAT | O_TRUNC ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IROTH);
|
||||
if ( fd < 0 ) {
|
||||
m_saveErrno = errno;
|
||||
return log("db: Could not open %s for writing: %s.",
|
||||
s,mstrerror(errno));
|
||||
}
|
||||
|
||||
redo:
|
||||
// verify the tree
|
||||
if ( g_conf.m_verifyWrites ) {
|
||||
log("db: verify writes is enabled, checking tree before "
|
||||
"saving.");
|
||||
if ( ! checkTree( false , true ) ) {
|
||||
log("db: fixing tree and re-checking");
|
||||
fixTree ( );
|
||||
goto redo;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// clear our own errno
|
||||
errno = 0;
|
||||
// . save the header
|
||||
|
16
SafeBuf.cpp
16
SafeBuf.cpp
@ -431,7 +431,7 @@ bool SafeBuf::reserve(int32_t i , char *label, bool clearIt ) {
|
||||
//buffer size.
|
||||
bool SafeBuf::reserve2x(int32_t i, char *label) {
|
||||
//watch out for overflow!
|
||||
if((m_capacity << 1) + i < 0) return false;
|
||||
if((m_capacity << 1) + i < m_capacity) return false;
|
||||
if(i + m_length >= m_capacity)
|
||||
return reserve(m_capacity + i,label);
|
||||
else return true;
|
||||
@ -449,8 +449,9 @@ int32_t SafeBuf::save ( char *fullFilename ) {
|
||||
|
||||
int32_t SafeBuf::dumpToFile(char *filename ) {
|
||||
retry22:
|
||||
int32_t fd = open ( filename , O_CREAT | O_WRONLY | O_TRUNC,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH );
|
||||
int32_t fd = open ( filename , O_CREAT | O_WRONLY | O_TRUNC ,
|
||||
getFileCreationFlags() );
|
||||
//S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH );
|
||||
if ( fd < 0 ) {
|
||||
// valgrind
|
||||
if ( errno == EINTR ) goto retry22;
|
||||
@ -484,8 +485,9 @@ int32_t SafeBuf::safeSave (char *filename ) {
|
||||
fn.safePrintf( "%s.saving",filename );
|
||||
|
||||
int32_t fd = open ( fn.getBufStart() ,
|
||||
O_CREAT | O_WRONLY | O_TRUNC,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH );
|
||||
O_CREAT | O_WRONLY | O_TRUNC ,
|
||||
getFileCreationFlags() );
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH );
|
||||
if ( fd < 0 ) {
|
||||
// valgrind
|
||||
if ( errno == EINTR ) goto retry22;
|
||||
@ -571,8 +573,8 @@ int32_t SafeBuf::fillFromFile(char *filename) {
|
||||
reserve(results.st_size+1);
|
||||
|
||||
retry:
|
||||
int32_t fd = open ( filename , O_RDONLY,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH );
|
||||
int32_t fd = open ( filename , O_RDONLY , getFileCreationFlags() );
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH );
|
||||
if ( ! fd ) {
|
||||
// valgrind
|
||||
if ( errno == EINTR ) goto retry;
|
||||
|
@ -1805,7 +1805,8 @@ bool Speller::createUnifiedDict (){
|
||||
// then open a new one for appending
|
||||
int fdw = open ( ff ,
|
||||
O_CREAT | O_RDWR | O_APPEND ,
|
||||
S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
getFileCreationFlags());
|
||||
// S_IRUSR |S_IWUSR |S_IRGRP |S_IWGRP| S_IROTH);
|
||||
if ( fdw < 0 ){
|
||||
return log("lang: Could not open for %s "
|
||||
"writing: %s.",ff, strerror(errno));
|
||||
|
13
Spider.cpp
13
Spider.cpp
@ -4785,7 +4785,8 @@ bool SpiderColl::scanListForWinners ( ) {
|
||||
// firstip in the record!
|
||||
if ( sreq->m_firstIp != firstIp ) {
|
||||
log("spider: request %s firstip does not match "
|
||||
"firstip in key",sreq->m_url);
|
||||
"firstip in key collnum=%i",sreq->m_url,
|
||||
(int)m_collnum);
|
||||
log("spider: ip1=%s",iptoa(sreq->m_firstIp));
|
||||
log("spider: ip2=%s",iptoa(firstIp));
|
||||
continue;
|
||||
@ -11504,6 +11505,16 @@ int32_t getUrlFilterNum2 ( SpiderRequest *sreq ,
|
||||
// never repeated after that!
|
||||
errCode != EBADIP &&
|
||||
// assume diffbot is temporarily experiencing errs
|
||||
// but the crawl, if recurring, should retry these
|
||||
// at a later point
|
||||
errCode != EDIFFBOTUNABLETOAPPLYRULES &&
|
||||
errCode != EDIFFBOTCOULDNOTPARSE &&
|
||||
errCode != EDIFFBOTCOULDNOTDOWNLOAD &&
|
||||
errCode != EDIFFBOTINVALIDAPI &&
|
||||
errCode != EDIFFBOTVERSIONREQ &&
|
||||
errCode != EDIFFBOTURLPROCESSERROR &&
|
||||
errCode != EDIFFBOTTOKENEXPIRED &&
|
||||
errCode != EDIFFBOTUNKNOWNERROR &&
|
||||
errCode != EDIFFBOTINTERNALERROR &&
|
||||
// if diffbot received empty content when d'lding
|
||||
errCode != EDIFFBOTEMPTYCONTENT &&
|
||||
|
77
Statsdb.cpp
77
Statsdb.cpp
@ -65,21 +65,19 @@ static Label s_labels[] = {
|
||||
// . 300MB/s is max read rate regardless to stop graph shrinkage
|
||||
// . use 1KB as the min resolution per pixel
|
||||
// . stored in Bps so use 1/1000 as scalar to get into KBps
|
||||
{ GRAPH_QUANTITY,200,"disk_read",1,"%.0f MBps",1.0/(1000.0*1000.0),0x000000,
|
||||
"disk read"},
|
||||
{ GRAPH_QUANTITY,200,"disk_read",1,"%.0f MBps",1.0/(1000.0*1000.0),0x000000,"disk read"},
|
||||
|
||||
// . 300MB/s is max write rate regardless to stop graph shrinkage
|
||||
// . use 1KB as the min resolution per pixel
|
||||
// . stored in Bps so use 1/1000 as scalar to get into KBps
|
||||
{GRAPH_QUANTITY,200,"disk_write",1,"%.0f Mbps",1.0/(1000.0*1000.0), 0xff0000,
|
||||
"disk write"},
|
||||
{GRAPH_QUANTITY,200,"disk_write",1,"%.0f Mbps",1.0/(1000.0*1000.0), 0xff0000, "disk write"},
|
||||
|
||||
// . 20 is the max dps regardless to stop graph shrinkage
|
||||
// . use .03 qps as the min resolution per pixel
|
||||
{GRAPH_OPS,20,"parse_doc", .005,"%.1f dps" , 1.0 , 0x00fea915,"parsed doc" },
|
||||
|
||||
|
||||
{GRAPH_QUANTITY_PER_OP,1000,"docs_per_second", .005,"%.1f docs" , .001 , 0x1F2F5C,"docs per second" },
|
||||
{GRAPH_QUANTITY_PER_OP,-1,"docs_per_second", .1,"%.1f docs per second" , -1 , 0x1F2F5C,"*successfully* indexed docs per second" },
|
||||
|
||||
// . use .1 * 1000 docs as the min resolution per pixel
|
||||
// . max = -1, means dynamic size the ymax!
|
||||
@ -88,7 +86,7 @@ static Label s_labels[] = {
|
||||
// . make it 2M now not 50M. seems like it is per pixel and theres
|
||||
// like 1000 pixels vertically. but we need to autoscale it
|
||||
// eventually
|
||||
{GRAPH_QUANTITY,2000000.0,"docs_indexed", .1,"%.0fK docs" , .001 , 0x00cc0099,"docs indexed" }
|
||||
{GRAPH_QUANTITY,-1,"docs_indexed", .1,"%.0f docs" , -1, 0x00cc0099,"docs indexed" }
|
||||
|
||||
|
||||
//{ "termlist_intersect",0x0000ff00},
|
||||
@ -122,6 +120,7 @@ Label *Statsdb::getLabel ( int32_t labelHash ) {
|
||||
return *label;
|
||||
}
|
||||
|
||||
|
||||
Statsdb::Statsdb ( ) {
|
||||
m_init = false;
|
||||
m_disabled = true;
|
||||
@ -246,6 +245,8 @@ void flushStatsWrapper ( int fd , void *state ) {
|
||||
void Statsdb::addDocsIndexed ( ) {
|
||||
|
||||
if ( ! isClockInSync() ) return;
|
||||
if ( g_hostdb.hasDeadHost() ) return;
|
||||
|
||||
|
||||
// only host #0 needs this
|
||||
if ( g_hostdb.m_hostId != 0 ) return;
|
||||
@ -270,18 +271,23 @@ void Statsdb::addDocsIndexed ( ) {
|
||||
// divide by # of groups
|
||||
total /= g_hostdb.getNumHostsPerShard();
|
||||
// skip if no change
|
||||
|
||||
if ( total == s_lastTotal ) return;
|
||||
|
||||
int32_t docsIndexedInInterval = total - s_lastTotal;
|
||||
float docsPerSecond = docsIndexedInInterval / (float)interval;
|
||||
|
||||
s_lastTotal = total;
|
||||
log("build: total docs indexed: %f. docs per second %f %i %i", (float)total, docsPerSecond, docsIndexedInInterval, interval);
|
||||
|
||||
// add it if changed though
|
||||
int64_t nowms = gettimeofdayInMillisecondsGlobal();
|
||||
addStat ( MAX_NICENESS,"docs_indexed", nowms, nowms, (float)total );
|
||||
addStat ( MAX_NICENESS,"docs_per_second", nowms, nowms, docsPerSecond );
|
||||
// Prevent a datapoint which adds all of the docs indexed to date.
|
||||
if( s_lastTotal != 0 ) {
|
||||
addStat ( MAX_NICENESS,"docs_per_second", nowms, nowms, docsPerSecond );
|
||||
}
|
||||
|
||||
s_lastTotal = total;
|
||||
}
|
||||
|
||||
// . m_key bitmap in statsdb:
|
||||
@ -896,12 +902,13 @@ char *Statsdb::plotGraph ( char *pstart ,
|
||||
bool needMax = true;
|
||||
float ymin = 0.0;
|
||||
float ymax = 0.0;
|
||||
|
||||
float yscalar = label->m_yscalar;
|
||||
char *p = pstart;
|
||||
|
||||
for ( ; p < pend ; p += 12 ) {
|
||||
// breathe
|
||||
QUICKPOLL ( m_niceness );
|
||||
if ( m_gw.getLength() > 10000000 ) break;
|
||||
// get the y
|
||||
float y2 = *(float *)(p+4);
|
||||
// get color of this point
|
||||
@ -909,7 +916,8 @@ char *Statsdb::plotGraph ( char *pstart ,
|
||||
// stop if not us
|
||||
if ( gh != graphHash ) continue;
|
||||
// put into scaled space right away
|
||||
y2 = y2 * label->m_yscalar;
|
||||
if (label->m_yscalar >= 0)
|
||||
y2 = y2 * label->m_yscalar;
|
||||
// . limit y to absolute max
|
||||
// . these units should be scaled as well!
|
||||
if ( y2 > label->m_absYMax && label->m_absYMax > 0.0 )
|
||||
@ -922,13 +930,21 @@ char *Statsdb::plotGraph ( char *pstart ,
|
||||
}
|
||||
|
||||
// force to zero for now
|
||||
ymin = 0.0;
|
||||
//ymin = 0.0;
|
||||
// . and force to ymax for now as well
|
||||
// . -1 indicates dynamic though!
|
||||
if ( label->m_absYMax > 0.0 ) ymax = label->m_absYMax;
|
||||
// add a 20% ceiling
|
||||
else ymax *= 1.20;
|
||||
// else ymax *= 1.20;
|
||||
|
||||
|
||||
if( label->m_yscalar <= 0 ) {
|
||||
if(ymax == ymin) {
|
||||
yscalar = 0;
|
||||
} else {
|
||||
yscalar = (float)DY2 / (ymax - ymin);
|
||||
}
|
||||
}
|
||||
// return that!
|
||||
char *retp = p;
|
||||
|
||||
@ -951,7 +967,7 @@ char *Statsdb::plotGraph ( char *pstart ,
|
||||
|
||||
// . pad y range if total range is small
|
||||
// . only do this for certain types of stats, like qps and disk i/o
|
||||
if ( ourDiff < minDiff ) {
|
||||
if ( label->m_yscalar >=0 && ourDiff < minDiff ) {
|
||||
float pad = (minDiff - ourDiff) / 2;
|
||||
// pad it out
|
||||
ymin -= pad ;
|
||||
@ -981,16 +997,23 @@ char *Statsdb::plotGraph ( char *pstart ,
|
||||
for ( ; p < pend ; ) {
|
||||
// breathe
|
||||
QUICKPOLL ( m_niceness );
|
||||
if ( m_gw.getLength() > 10000000 ) break;
|
||||
// first is x pixel pos
|
||||
int32_t x2 = *(int32_t *)p; p += 4;
|
||||
// then y pos
|
||||
float y2 = *(float *)p; p += 4;
|
||||
|
||||
// scale it right away
|
||||
y2 *= label->m_yscalar;
|
||||
if(label->m_yscalar < 0) {
|
||||
y2 = (y2 - ymin) * yscalar;
|
||||
}
|
||||
else {
|
||||
y2 *= yscalar;
|
||||
|
||||
}
|
||||
// adjust
|
||||
if ( y2 > ymax ) y2 = ymax;
|
||||
if ( y2 < 0 ) y2 = 0;
|
||||
|
||||
// then graphHash
|
||||
int32_t gh = *(int32_t *)p; p += 4;
|
||||
@ -1003,8 +1026,10 @@ char *Statsdb::plotGraph ( char *pstart ,
|
||||
float y1 = lasty;
|
||||
|
||||
// normalize y into pixel space
|
||||
y2 = ((float)DY2 * (y2 - ymin)) / (ymax-ymin);
|
||||
|
||||
if(label->m_yscalar >= 0 && ymax != ymin) {
|
||||
y2 = ((float)DY2 * (y2 - ymin)) / (ymax-ymin);
|
||||
}
|
||||
|
||||
// set lasts for next iteration of this loop
|
||||
lastx = x2;
|
||||
lasty = y2;
|
||||
@ -1073,13 +1098,20 @@ char *Statsdb::plotGraph ( char *pstart ,
|
||||
}
|
||||
|
||||
|
||||
float lastZ = -1;
|
||||
for ( float z = ymin ; z < ymax ; z += deltaz ) {
|
||||
// breathe
|
||||
QUICKPOLL ( m_niceness );
|
||||
// draw it
|
||||
drawHR ( z , ymin , ymax , m_gw , label , zoff , color );
|
||||
if(z == lastZ) break;
|
||||
lastZ = z;
|
||||
//if ( m_gw.getLength() > 10000000 ) break;
|
||||
}
|
||||
|
||||
if ( m_gw.getLength() > 10000000 )
|
||||
log("statsdb: graph too big");
|
||||
|
||||
return retp;
|
||||
//#endif
|
||||
|
||||
@ -1158,7 +1190,7 @@ void Statsdb::drawHR ( float z ,
|
||||
"font-size:14px;"
|
||||
"min-height:20px;"
|
||||
"min-width:3px;\""
|
||||
" class=\"color-%"XINT32"\";"
|
||||
" class=\"color-%"XINT32"\""
|
||||
">%s</div>\n"
|
||||
, (int32_t)(m_bx)
|
||||
, (int32_t)z2 +m_by
|
||||
@ -1194,6 +1226,13 @@ bool Statsdb::processList ( ) {
|
||||
m_done = true;
|
||||
}
|
||||
|
||||
// HACK: the user can request all of the events, it can
|
||||
// become quite large. so limit to 100 mb right now.
|
||||
if( m_sb3.length() > 100000000) {
|
||||
log("statsdb: truncating statsdb results.");
|
||||
m_done = true;
|
||||
}
|
||||
|
||||
|
||||
//
|
||||
// all these points are accumulated into 1-second buckets
|
||||
@ -1590,7 +1629,7 @@ void Statsdb::drawLine3 ( SafeBuf &sb ,
|
||||
"z-index:-5;"
|
||||
"min-height:%"INT32"px;"
|
||||
"min-width:%"INT32"px;\""
|
||||
"class=\"color-%"XINT32"\"></div>\n"
|
||||
" class=\"color-%"XINT32"\"></div>\n"
|
||||
, x1 + m_bx
|
||||
, (fy1 - width/2) + m_by
|
||||
, color
|
||||
@ -1599,3 +1638,5 @@ void Statsdb::drawLine3 ( SafeBuf &sb ,
|
||||
, color
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
@ -2289,11 +2289,11 @@ void TcpServer::destroySocket ( TcpSocket *s ) {
|
||||
sb.safeTruncateEllipsis(s->m_sendBuf,
|
||||
s->m_sendBufSize,
|
||||
200);
|
||||
sb.safePrintf(" readbuf=");
|
||||
sb.safePrintf(" bytesread=%i readbuf=",(int)s->m_readOffset);
|
||||
if ( s->m_readBuf )
|
||||
sb.safeTruncateEllipsis(s->m_readBuf,
|
||||
s->m_readBufSize,
|
||||
200);
|
||||
s->m_readOffset,
|
||||
2000);
|
||||
log("%s",sb.getBufStart());
|
||||
}
|
||||
|
||||
|
@ -323,7 +323,7 @@ bool Threads::init ( ) {
|
||||
if ( ! g_threads.registerType ( INTERSECT_THREAD,max,200) )
|
||||
return log("thread: Failed to register thread type." );
|
||||
// filter thread spawned to call popen() to filter an http reply
|
||||
if ( ! g_threads.registerType ( FILTER_THREAD , 1/*maxThreads*/,300) )
|
||||
if ( ! g_threads.registerType ( FILTER_THREAD, 2/*maxThreads*/,300) )
|
||||
return log("thread: Failed to register thread type." );
|
||||
// RdbTree uses this to save itself
|
||||
if ( ! g_threads.registerType ( SAVETREE_THREAD,1/*maxThreads*/,100) )
|
||||
|
@ -286,6 +286,7 @@ bool UdpServer::init ( uint16_t port, UdpProtocol *proto, int32_t niceness,
|
||||
// no requests waiting yet
|
||||
m_requestsInWaiting = 0;
|
||||
// special count
|
||||
m_msg07sInWaiting = 0;
|
||||
m_msg10sInWaiting = 0;
|
||||
m_msgc1sInWaiting = 0;
|
||||
//m_msgDsInWaiting = 0;
|
||||
@ -1005,7 +1006,7 @@ UdpSlot *UdpServer::getBestSlotToSend ( int64_t now ) {
|
||||
UdpSlot *maxi = NULL;
|
||||
int32_t score;
|
||||
//UdpSlot *slot;
|
||||
// . we send dgrams with the lowest "score" first
|
||||
// . we send dgrams with the lowest "score" first
|
||||
// . the "score" is just number of ACKs you're waiting for
|
||||
// . that way transmissions that are the most caught up to their ACKs
|
||||
// are considered faster so we send to them first
|
||||
@ -1482,6 +1483,9 @@ int32_t UdpServer::readSock_ass ( UdpSlot **slotPtr , int64_t now ) {
|
||||
// rate, these are pretty lightweight. msg 0x10 reply gen times
|
||||
// are VERY low. MDW
|
||||
bool getSlot = true;
|
||||
if ( msgType == 0x07 && m_msg07sInWaiting >= 100 )
|
||||
getSlot = false;
|
||||
|
||||
if ( msgType == 0x10 && m_msg10sInWaiting >= 50 )
|
||||
getSlot = false;
|
||||
// crawl update info from Spider.cpp
|
||||
@ -1671,6 +1675,7 @@ int32_t UdpServer::readSock_ass ( UdpSlot **slotPtr , int64_t now ) {
|
||||
// if we connected to a request slot, count it
|
||||
m_requestsInWaiting++;
|
||||
// special count
|
||||
if ( msgType == 0x07 ) m_msg07sInWaiting++;
|
||||
if ( msgType == 0x10 ) m_msg10sInWaiting++;
|
||||
if ( msgType == 0xc1 ) m_msgc1sInWaiting++;
|
||||
//if ( msgType == 0xd ) m_msgDsInWaiting++;
|
||||
@ -3122,6 +3127,7 @@ void UdpServer::destroySlot ( UdpSlot *slot ) {
|
||||
// one less request in waiting
|
||||
m_requestsInWaiting--;
|
||||
// special count
|
||||
if ( slot->m_msgType == 0x07 ) m_msg07sInWaiting--;
|
||||
if ( slot->m_msgType == 0x10 ) m_msg10sInWaiting--;
|
||||
if ( slot->m_msgType == 0xc1 ) m_msgc1sInWaiting--;
|
||||
//if ( slot->m_msgType == 0xd ) m_msgDsInWaiting--;
|
||||
|
@ -390,6 +390,7 @@ class UdpServer {
|
||||
int32_t m_requestsInWaiting;
|
||||
|
||||
// like m_requestsInWaiting but requests which spawn other requests
|
||||
int32_t m_msg07sInWaiting;
|
||||
int32_t m_msg10sInWaiting;
|
||||
int32_t m_msgc1sInWaiting;
|
||||
//int32_t m_msgDsInWaiting;
|
||||
|
276
XmlDoc.cpp
276
XmlDoc.cpp
@ -1258,6 +1258,12 @@ bool XmlDoc::set4 ( SpiderRequest *sreq ,
|
||||
utf8Content = m_mime.getContent();
|
||||
}
|
||||
|
||||
// use this to avoid ip lookup if it is not zero
|
||||
if ( forcedIp ) {
|
||||
m_ip = forcedIp;
|
||||
m_ipValid = true;
|
||||
}
|
||||
|
||||
// sometimes they supply the content they want! like when zaks'
|
||||
// injects pages from PageInject.cpp
|
||||
if ( utf8Content ) {
|
||||
@ -1290,11 +1296,6 @@ bool XmlDoc::set4 ( SpiderRequest *sreq ,
|
||||
// use this ip as well for now to avoid ip lookup
|
||||
//m_ip = atoip("127.0.0.1");
|
||||
//m_ipValid = true;
|
||||
// use this to avoid ip lookup if it is not zero
|
||||
if ( forcedIp ) {
|
||||
m_ip = forcedIp;
|
||||
m_ipValid = true;
|
||||
}
|
||||
// do not need robots.txt then
|
||||
m_isAllowed = true;
|
||||
m_isAllowedValid = true;
|
||||
@ -12077,11 +12078,25 @@ XmlDoc **XmlDoc::getRootXmlDoc ( int32_t maxCacheAge ) {
|
||||
mnew ( m_rootDoc , sizeof(XmlDoc),"xmldoc3");
|
||||
// if we had the title rec, set from that
|
||||
if ( *rtr ) {
|
||||
m_rootDoc->set2 ( m_rootTitleRec ,
|
||||
m_rootTitleRecSize , // maxSize ,
|
||||
cr->m_coll ,
|
||||
NULL , // pbuf
|
||||
m_niceness );
|
||||
if ( ! m_rootDoc->set2 ( m_rootTitleRec ,
|
||||
m_rootTitleRecSize , // maxSize ,
|
||||
cr->m_coll ,
|
||||
NULL , // pbuf
|
||||
m_niceness ) ) {
|
||||
// it was corrupted... delete this
|
||||
// possibly printed
|
||||
// " uncompress uncompressed size=..." bad uncompress
|
||||
log("build: rootdoc set2 failed");
|
||||
mdelete ( m_rootDoc , sizeof(XmlDoc) , "xdnuke");
|
||||
delete ( m_rootDoc );
|
||||
// call it empty for now, we don't want to return
|
||||
// NULL with g_errno set because it could stop
|
||||
// the whole indexing pipeline
|
||||
m_rootDoc = NULL;
|
||||
m_rootDocValid = true;
|
||||
return &m_rootDoc;
|
||||
//return NULL;
|
||||
}
|
||||
}
|
||||
// . otherwise, set the url and download it on demand
|
||||
// . this junk copied from the contactDoc->* stuff below
|
||||
@ -16948,8 +16963,9 @@ char **XmlDoc::getHttpReply2 ( ) {
|
||||
bool isInjecting = getIsInjecting();
|
||||
if ( ! isInjecting && m_sreqValid && m_sreq.m_hopCount == 0 )
|
||||
r->m_isRootSeedUrl = 1;
|
||||
if ( ! isInjecting && m_hopCountValid && m_hopCount == 0 )
|
||||
r->m_isRootSeedUrl = 1;
|
||||
// only if it was a seed for now... so comment out
|
||||
// if ( ! isInjecting && m_hopCountValid && m_hopCount == 0 )
|
||||
// r->m_isRootSeedUrl = 1;
|
||||
|
||||
// sanity check
|
||||
if ( ! m_firstIpValid ) { char *xx=NULL;*xx=0; }
|
||||
@ -17814,6 +17830,91 @@ Url **XmlDoc::getCanonicalRedirUrl ( ) {
|
||||
return &m_canonicalRedirUrlPtr;
|
||||
}
|
||||
|
||||
// returns false if none found
|
||||
bool setMetaRedirUrlFromTag ( char *p , Url *metaRedirUrl , char niceness ,
|
||||
Url *cu ) {
|
||||
// limit scan
|
||||
char *limit = p + 30;
|
||||
// skip whitespace
|
||||
for ( ; *p && p < limit && is_wspace_a(*p) ; p++ );
|
||||
// must be a num
|
||||
if ( ! is_digit(*p) ) return false;
|
||||
// init delay
|
||||
int32_t delay = atol ( p );
|
||||
// ignore long delays
|
||||
if ( delay >= 10 ) return false;
|
||||
// now find the semicolon, if any
|
||||
for ( ; *p && p < limit && *p != ';' ; p++ );
|
||||
// must have semicolon
|
||||
if ( *p != ';' ) return false;
|
||||
// skip it
|
||||
p++;
|
||||
// skip whitespace some more
|
||||
for ( ; *p && p < limit && is_wspace_a(*p) ; p++ );
|
||||
// must have URL
|
||||
if ( strncasecmp(p,"URL",3) ) return false;
|
||||
// skip that
|
||||
p += 3;
|
||||
// skip white space
|
||||
for ( ; *p && p < limit && is_wspace_a(*p) ; p++ );
|
||||
// then an equal sign
|
||||
if ( *p != '=' ) return false;
|
||||
// skip equal sign
|
||||
p++;
|
||||
// them maybe more whitespace
|
||||
for ( ; *p && p < limit && is_wspace_a(*p) ; p++ );
|
||||
// an optional quote
|
||||
if ( *p == '\"' ) p++;
|
||||
// can also be a single quote!
|
||||
if ( *p == '\'' ) p++;
|
||||
// set the url start
|
||||
char *url = p;
|
||||
// now advance to next quote or space or >
|
||||
for ( ; *p && !is_wspace_a(*p) &&
|
||||
*p !='\'' &&
|
||||
*p !='\"' &&
|
||||
*p !='>' ;
|
||||
p++);
|
||||
// that is the end
|
||||
char *urlEnd = p;
|
||||
// get size
|
||||
int32_t usize = urlEnd - url;
|
||||
// skip if too big
|
||||
if ( usize > 1024 ) {
|
||||
log("build: meta redirurl of %"INT32" bytes too big",usize);
|
||||
return false;
|
||||
}
|
||||
// get our current utl
|
||||
//Url *cu = getCurrentUrl();
|
||||
// decode what we got
|
||||
char decoded[MAX_URL_LEN];
|
||||
// convert & to "&"
|
||||
int32_t decBytes = htmlDecode(decoded,url,usize,false,niceness);
|
||||
decoded[decBytes]='\0';
|
||||
// . then the url
|
||||
// . set the url to the one in the redirect tag
|
||||
// . but if the http-equiv meta redirect url starts with a '?'
|
||||
// then just replace our cgi with that one
|
||||
if ( *url == '?' ) {
|
||||
char foob[MAX_URL_LEN*2];
|
||||
char *pf = foob;
|
||||
int32_t cuBytes = cu->getPathEnd() - cu->getUrl();
|
||||
gbmemcpy(foob,cu->getUrl(),cuBytes);
|
||||
pf += cuBytes;
|
||||
gbmemcpy ( pf , decoded , decBytes );
|
||||
pf += decBytes;
|
||||
*pf = '\0';
|
||||
metaRedirUrl->set(foob);
|
||||
}
|
||||
// . otherwise, append it right on
|
||||
// . use "url" as the base Url
|
||||
// . it may be the original url or the one we redirected to
|
||||
// . redirUrl is set to the original at the top
|
||||
else
|
||||
// addWWW = false, stripSessId=true
|
||||
metaRedirUrl->set(cu,decoded,decBytes,false,true);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// scan document for <meta http-equiv="refresh" content="0;URL=xxx">
|
||||
@ -17840,6 +17941,14 @@ Url **XmlDoc::getMetaRedirUrl ( ) {
|
||||
if ( cr->m_recycleContent || m_recycleContent )
|
||||
return &m_metaRedirUrlPtr;
|
||||
|
||||
// will this work in here?
|
||||
//uint8_t *ct = getContentType();
|
||||
//if ( ! ct ) return NULL;
|
||||
|
||||
Url *cu = getCurrentUrl();
|
||||
|
||||
bool gotOne = false;
|
||||
|
||||
// advance a bit, we are initially looking for the 'v' char
|
||||
p += 10;
|
||||
// begin the string matching loop
|
||||
@ -17879,91 +17988,64 @@ Url **XmlDoc::getMetaRedirUrl ( ) {
|
||||
p += 8;
|
||||
// skip possible quote
|
||||
if ( *p == '\"' ) p++;
|
||||
// limit scan
|
||||
limit = p + 30;
|
||||
// skip whitespace
|
||||
for ( ; *p && p < limit && is_wspace_a(*p) ; p++ );
|
||||
// must be a num
|
||||
if ( ! is_digit(*p) ) continue;
|
||||
// init delay
|
||||
int32_t delay = atol ( p );
|
||||
// ignore int32_t delays
|
||||
if ( delay >= 10 ) continue;
|
||||
// now find the semicolon, if any
|
||||
for ( ; *p && p < limit && *p != ';' ; p++ );
|
||||
// must have semicolon
|
||||
if ( *p != ';' ) continue;
|
||||
// skip it
|
||||
p++;
|
||||
// skip whitespace some more
|
||||
for ( ; *p && p < limit && is_wspace_a(*p) ; p++ );
|
||||
// must have URL
|
||||
if ( strncasecmp(p,"URL",3) ) continue;
|
||||
// skip that
|
||||
p += 3;
|
||||
// skip white space
|
||||
for ( ; *p && p < limit && is_wspace_a(*p) ; p++ );
|
||||
// then an equal sign
|
||||
if ( *p != '=' ) continue;
|
||||
// skip equal sign
|
||||
p++;
|
||||
// them maybe more whitespace
|
||||
for ( ; *p && p < limit && is_wspace_a(*p) ; p++ );
|
||||
// an optional quote
|
||||
if ( *p == '\"' ) p++;
|
||||
// can also be a single quote!
|
||||
if ( *p == '\'' ) p++;
|
||||
// set the url start
|
||||
char *url = p;
|
||||
// now advance to next quote or space or >
|
||||
for ( ; *p && !is_wspace_a(*p) &&
|
||||
*p !='\'' &&
|
||||
*p !='\"' &&
|
||||
*p !='>' ;
|
||||
p++);
|
||||
// that is the end
|
||||
char *urlEnd = p;
|
||||
// get size
|
||||
int32_t usize = urlEnd - url;
|
||||
// skip if too big
|
||||
if ( usize > 1024 ) {
|
||||
log("build: meta redirurl of %"INT32" bytes too big",usize);
|
||||
// PARSE OUT THE URL
|
||||
Url dummy;
|
||||
if ( ! setMetaRedirUrlFromTag ( p , &dummy , m_niceness ,cu))
|
||||
continue;
|
||||
gotOne = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if ( ! gotOne )
|
||||
return &m_metaRedirUrlPtr;
|
||||
|
||||
// to fix issue with scripts containing
|
||||
// document.write('<meta http-equiv="Refresh" content="0;URL=http://ww
|
||||
// we have to get the Xml. we can't call getXml() because of
|
||||
// recursion bugs so just do it directly here
|
||||
|
||||
Xml xml;
|
||||
if ( ! xml.set ( m_httpReply ,
|
||||
m_httpReplySize - 1, // make it a length
|
||||
false , // ownData?
|
||||
0 , // allocSize
|
||||
false , // pure xml?
|
||||
m_version ,
|
||||
false , // setParentsArg?
|
||||
m_niceness ,
|
||||
// assume html since getContentType() is recursive
|
||||
// on us.
|
||||
CT_HTML ) ) // *ct ) )
|
||||
// return NULL on error with g_errno set
|
||||
return NULL;
|
||||
|
||||
XmlNode *nodes = xml.getNodes();
|
||||
int32_t n = xml.getNumNodes();
|
||||
// find the first meta summary node
|
||||
for ( int32_t i = 0 ; i < n ; i++ ) {
|
||||
// continue if not a meta tag
|
||||
if ( nodes[i].m_nodeId != 68 ) continue;
|
||||
// only get content for <meta http-equiv=..>
|
||||
int32_t tagLen;
|
||||
char *tag ;
|
||||
tag = xml.getString ( i , "http-equiv" , &tagLen );
|
||||
// skip if empty
|
||||
if ( ! tag || tagLen <= 0 ) continue;
|
||||
// if not a refresh, skip it
|
||||
if ( strncasecmp ( tag , "refresh", 7 ) ) continue;
|
||||
// get the content
|
||||
tag = xml.getString ( i ,"content", &tagLen );
|
||||
// skip if empty
|
||||
if ( ! tag || tagLen <= 0 ) continue;
|
||||
// PARSE OUT THE URL
|
||||
if (!setMetaRedirUrlFromTag(p,&m_metaRedirUrl,m_niceness,cu) )
|
||||
continue;
|
||||
}
|
||||
// get our current utl
|
||||
Url *cu = getCurrentUrl();
|
||||
// decode what we got
|
||||
char decoded[MAX_URL_LEN];
|
||||
// convert & to "&"
|
||||
int32_t decBytes = htmlDecode(decoded,url,usize,false,m_niceness);
|
||||
decoded[decBytes]='\0';
|
||||
// . then the url
|
||||
// . set the url to the one in the redirect tag
|
||||
// . but if the http-equiv meta redirect url starts with a '?'
|
||||
// then just replace our cgi with that one
|
||||
if ( *url == '?' ) {
|
||||
char foob[MAX_URL_LEN*2];
|
||||
char *pf = foob;
|
||||
int32_t cuBytes = cu->getPathEnd() - cu->getUrl();
|
||||
gbmemcpy(foob,cu->getUrl(),cuBytes);
|
||||
pf += cuBytes;
|
||||
gbmemcpy ( pf , decoded , decBytes );
|
||||
pf += decBytes;
|
||||
*pf = '\0';
|
||||
m_metaRedirUrl.set(foob);
|
||||
}
|
||||
// . otherwise, append it right on
|
||||
// . use "url" as the base Url
|
||||
// . it may be the original url or the one we redirected to
|
||||
// . redirUrl is set to the original at the top
|
||||
else
|
||||
// addWWW = false, stripSessId=true
|
||||
m_metaRedirUrl.set(cu,decoded,decBytes,false,true);
|
||||
// set it
|
||||
m_metaRedirUrlPtr = &m_metaRedirUrl;
|
||||
// return it
|
||||
break;
|
||||
return &m_metaRedirUrlPtr;
|
||||
}
|
||||
|
||||
// nothing found
|
||||
return &m_metaRedirUrlPtr;
|
||||
}
|
||||
@ -18566,7 +18648,7 @@ void XmlDoc::filterStart_r ( bool amThread ) {
|
||||
errno = 0;
|
||||
// open the input file
|
||||
retry11:
|
||||
int fd = open ( in , O_WRONLY | O_CREAT , S_IRWXU );
|
||||
int fd = open ( in , O_WRONLY | O_CREAT , getFileCreationFlags() );
|
||||
if ( fd < 0 ) {
|
||||
// valgrind
|
||||
if ( errno == EINTR ) goto retry11;
|
||||
@ -34730,7 +34812,7 @@ int gbcompress7 ( unsigned char *dest ,
|
||||
errno = 0;
|
||||
// open the input file
|
||||
retry11:
|
||||
int fd = open ( in , O_WRONLY | O_CREAT , S_IRWXU );
|
||||
int fd = open ( in , O_WRONLY | O_CREAT , getFileCreationFlags() );
|
||||
if ( fd < 0 ) {
|
||||
// valgrind
|
||||
if ( errno == EINTR ) goto retry11;
|
||||
@ -39371,6 +39453,12 @@ char **XmlDoc::getRootTitleBuf ( ) {
|
||||
// sanity check, must include the null ni the size
|
||||
if ( m_rootTitleBufSize > 0 &&
|
||||
m_rootTitleBuf [ m_rootTitleBufSize - 1 ] ) {
|
||||
log("build: bad root titlebuf size not end in null char for "
|
||||
"collnum=%i",(int)m_collnum);
|
||||
ptr_rootTitleBuf = NULL;
|
||||
size_rootTitleBuf = 0;
|
||||
m_rootTitleBufValid = true;
|
||||
return (char **)&m_rootTitleBuf;
|
||||
char *xx=NULL;*xx=0;
|
||||
//m_rootTitleBuf [ m_rootTitleBufSize - 1 ] = '\0';
|
||||
//m_rootTitleBufSize++;
|
||||
|
@ -2618,4 +2618,3 @@ bool verifyUtf8 ( char *txt ) {
|
||||
int32_t tlen = gbstrlen(txt);
|
||||
return verifyUtf8(txt,tlen);
|
||||
}
|
||||
|
||||
|
@ -236,7 +236,7 @@ int filterContent ( char *buf , int32_t n , int32_t mimeLen , char ctype , int32
|
||||
|
||||
//fprintf(stderr,"in=%s\n",in);
|
||||
|
||||
int fd = open ( in , O_CREAT | O_RDWR , S_IRWXU );
|
||||
int fd = open ( in , O_CREAT | O_RDWR , S_IRWXU | S_IRWXG );
|
||||
if ( fd < 0 ) {
|
||||
fprintf(stderr,"gbfilter: open: %s\n",strerror(errno));
|
||||
return -1;
|
||||
|
6
main.cpp
6
main.cpp
@ -5022,7 +5022,7 @@ int install ( install_flag_konst_t installFlag , int32_t hostId , char *dir ,
|
||||
if ( ! f.doesExist() ) target = "gb";
|
||||
|
||||
sprintf(tmp,
|
||||
"scp " // blowfish is faster
|
||||
"scp " // blowfish is faster
|
||||
"%s%s "
|
||||
"%s:%s/gb.installed%s",
|
||||
dir,
|
||||
@ -16849,7 +16849,7 @@ void dumpCachedRecs (char *coll,int32_t startFileNum,int32_t numFiles,bool inclu
|
||||
int32_t filenum = 0;
|
||||
char filename[64];
|
||||
sprintf(filename, "%s-%"INT32".ddmp", coll, filenum);
|
||||
int FD = open(filename, O_CREAT|O_WRONLY, S_IROTH);
|
||||
//int FD = open(filename, O_CREAT|O_WRONLY, S_IROTH);
|
||||
int32_t numDumped = 0;
|
||||
uint32_t bytesDumped = 0;
|
||||
loop:
|
||||
@ -17016,7 +17016,7 @@ void dumpCachedRecs (char *coll,int32_t startFileNum,int32_t numFiles,bool inclu
|
||||
filenum++;
|
||||
sprintf(filename, "%s-%"INT32".ddmp", coll, filenum);
|
||||
close(FD);
|
||||
FD = open(filename, O_CREAT|O_WRONLY, S_IROTH);
|
||||
//FD = open(filename, O_CREAT|O_WRONLY, S_IROTH);
|
||||
bytesDumped = 0;
|
||||
fprintf(stderr, "Started new file: %s. starts at docId: %"INT64".\n",filename, lastDocId);
|
||||
}
|
||||
|
8
qa.cpp
8
qa.cpp
@ -248,10 +248,10 @@ void makeQADir ( ) {
|
||||
char dir[1024];
|
||||
snprintf(dir,1000,"%sqa",g_hostdb.m_dir);
|
||||
log("mkdir mkdir %s",dir);
|
||||
int32_t status = ::mkdir ( dir ,
|
||||
S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
S_IROTH | S_IXOTH );
|
||||
int32_t status = ::mkdir ( dir ,getDirCreationFlags() );
|
||||
// S_IRUSR | S_IWUSR | S_IXUSR |
|
||||
// S_IRGRP | S_IWGRP | S_IXGRP |
|
||||
// S_IROTH | S_IXOTH );
|
||||
if ( status == -1 && errno != EEXIST && errno )
|
||||
log("qa: Failed to make directory %s: %s.",
|
||||
dir,mstrerror(errno));
|
||||
|
@ -14,6 +14,8 @@ import time
|
||||
import flask
|
||||
import signal, os
|
||||
import random
|
||||
from itertools import repeat
|
||||
|
||||
|
||||
app = flask.Flask(__name__)
|
||||
app.secret_key = 'oaisj84alwsdkjhf9238u'
|
||||
@ -118,7 +120,8 @@ def injectItem(item, db, mode):
|
||||
postVars = {'url':'http://archive.org/download/%s/%s' %
|
||||
(item,ff['name']),
|
||||
'metadata':json.dumps(itemMetadata),
|
||||
'c':'ait'}
|
||||
'c':'ait',
|
||||
'spiderlinks':0}
|
||||
start = time.time()
|
||||
if mode == 'production':
|
||||
try:
|
||||
@ -315,21 +318,39 @@ def main():
|
||||
|
||||
if len(sys.argv) == 3:
|
||||
|
||||
if sys.argv[1] == 'force':
|
||||
itemName = sys.argv[2]
|
||||
db = getDb()
|
||||
injectItem(itemName, db, 'production')
|
||||
sys.exit(0)
|
||||
|
||||
if sys.argv[1] == 'forcefile':
|
||||
global staleTime
|
||||
staleTime = datetime.timedelta(0,0,0)
|
||||
from multiprocessing.pool import ThreadPool
|
||||
fileName = sys.argv[2]
|
||||
items = filter(lambda x: x, open(fileName, 'r').read().split('\n'))
|
||||
pool = ThreadPool(processes=len(items))
|
||||
#print zip(files, repeat(getDb(), len(files)), repeat('production', len(files)))
|
||||
def injectItemTupleWrapper(itemName):
|
||||
db = getDb()
|
||||
ret = injectItem(itemName, db, 'production')
|
||||
db.close()
|
||||
return ret
|
||||
|
||||
answer = pool.map(injectItemTupleWrapper, items)
|
||||
sys.exit(0)
|
||||
|
||||
|
||||
if sys.argv[1] == 'run':
|
||||
threads = int(sys.argv[2])
|
||||
runInjects(threads)
|
||||
|
||||
# else:
|
||||
# #getPage(3)
|
||||
# from multiprocessing.pool import ThreadPool
|
||||
# pool = ThreadPool(processes=150)
|
||||
# pool.map(getPage, xrange(1,1300))
|
||||
|
||||
def runInjects(threads, mode='production'):
|
||||
from multiprocessing.pool import ThreadPool
|
||||
pool = ThreadPool(processes=threads)
|
||||
try:
|
||||
from itertools import repeat
|
||||
maxPages = 1300
|
||||
answer = pool.map(getPage, zip(xrange(1,maxPages), repeat(mode, maxPages)))
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
|
@ -100,7 +100,7 @@ def getSplitTime():
|
||||
|
||||
|
||||
|
||||
def copyToTwins(fname):
|
||||
def copyToTwins(fname, backToFront=False):
|
||||
fh = open(fname, 'r')
|
||||
ret = {}
|
||||
hosts = []
|
||||
@ -117,23 +117,25 @@ def copyToTwins(fname):
|
||||
continue
|
||||
#print directory, ip1, note
|
||||
step = len(hosts)/2
|
||||
hostPlex = {}
|
||||
someIp = None
|
||||
cmds = []
|
||||
for hostId, dnsPort, httpsPort, httpPort, udbPort,ip1, ip2, directory, note in hosts[:step]:
|
||||
if ip1 not in hostPlex:
|
||||
hostPlex[ip1] = []
|
||||
someIp = ip1
|
||||
hostPlex[ip1].append('scp -r %s:%s* %s:%s. ' % (ip1, directory, (hosts[hostId + step][5]), (hosts[hostId + step][7])))
|
||||
backHostId, backDnsPort, backHttpsPort, backHttpPort, backUdbPort,backIp1, backIp2, backDirectory, backNote = hosts[hostId + step]
|
||||
|
||||
if note != directory:
|
||||
print 'oh looks like you overlooked host %s' % hostId
|
||||
if backNote != backDirectory:
|
||||
print 'oh looks like you overlooked host %s' % backHostId
|
||||
|
||||
if backToFront:
|
||||
cmd = 'scp -r %s:%s* %s:%s. &' % (backIp1, backDirectory, ip1, directory )
|
||||
else:
|
||||
cmd = 'scp -r %s:%s* %s:%s. &' % (ip1, directory, backIp1, backDirectory)
|
||||
cmds.append(cmd)
|
||||
#print 'scp -r %s:%s* %s:%s. &' % (ip1, directory, (hosts[hostId + step][5]), (hosts[hostId + step][7]))
|
||||
|
||||
while len(hostPlex[someIp]) > 0:
|
||||
cmd = []
|
||||
for cmd in cmds:
|
||||
print cmd
|
||||
|
||||
for ip in hostPlex.iterkeys():
|
||||
cmd.append(hostPlex[ip].pop())
|
||||
#print hostPlex[ip].pop()
|
||||
|
||||
print '&\n'.join(cmd), ';'
|
||||
|
||||
|
||||
def testDiskSpeed(host, directory):
|
||||
|
Binary file not shown.
Reference in New Issue
Block a user