Added more checks for corrupted titledb records

This commit is contained in:
Brian Rasmusson
2017-05-21 23:22:07 +02:00
parent 30bed51d1d
commit 6c30ebdc6a
5 changed files with 184 additions and 57 deletions

@ -10,10 +10,10 @@
#include "JobScheduler.h"
#include "ip.h"
#include "Mem.h"
#include "Titledb.h" // for Titledb::validateSerializedRecord
#include <sys/stat.h> //stat()
#include <fcntl.h>
#ifdef _VALGRIND_
#include <valgrind/memcheck.h>
#endif
@ -143,8 +143,8 @@ static void Msg4In::handleRequest4(UdpSlot *slot, int32_t /*netnice*/) {
// sanity check
if ( used != readBufSize ) {
logError("msg4: got corrupted request from hostid %" PRId32" used [%" PRId32"] != readBufSize [%" PRId32"]",
slot->m_host->m_hostId, used, readBufSize);
logError("msg4: got corrupted request from hostid %" PRId32" used [%" PRId32"] != readBufSize [%" PRId32"]. tid=%" PRId32 "",
slot->m_host->m_hostId, used, readBufSize, slot->getTransId());
loghex(LOG_ERROR, readBuf, (readBufSize < 160 ? readBufSize : 160), "readBuf (first max. 160 bytes)");
gbshutdownAbort(true);
@ -231,6 +231,11 @@ static bool Msg4In::addMetaList(const char *p, UdpSlot *slot) {
const char *rec = p;
// Sanity. Shut down if data sizes are wrong.
if( rdbId == RDB_TITLEDB ) {
Titledb::validateSerializedRecord( rec, recSize );
}
// . get the rdb to which it belongs, use Msg0::getRdb()
// . do not call this for every rec if we do not have to
if (rdbId != lastRdbId || !rdb) {

@ -16,12 +16,12 @@
#include "Mem.h"
#include "GbMutex.h"
#include "ScopedLock.h"
#include "Titledb.h" // for Titledb::validateSerializedRecord
#include <sys/stat.h> //stat()
#include <fcntl.h>
#include <algorithm>
#include <deque>
#ifdef _VALGRIND_
#include <valgrind/memcheck.h>
#endif
@ -576,6 +576,11 @@ static bool storeRec(collnum_t collnum, char rdbId, int32_t hostId, const char *
*(int32_t *)p = recSize; p += 4;
memcpy( p , rec , recSize ); p += recSize;
// Sanity. Shut down if data sizes are wrong.
if( rdbId == RDB_TITLEDB ) {
Titledb::validateSerializedRecord( rec, recSize );
}
// update buffer used
*(int32_t *)buf = used + (p - start);

@ -209,6 +209,39 @@ void Titledb::printKey(const char *k) {
KEYNEG(k));
}
void Titledb::validateSerializedRecord(const char *rec, int32_t recSize) {
char *debugp = (char*)rec;
key96_t debug_titleRecKey = *(key96_t *)debugp;
bool debug_keyneg = (debug_titleRecKey.n0 & 0x01) == 0x00;
int64_t debug_docId = Titledb::getDocIdFromKey(&debug_titleRecKey);
if ( debug_keyneg ) {
log(LOG_DEBUG, "TitleDB rec verified. Delete key for DocId=%" PRId64 "", debug_docId);
}
else {
debugp += sizeof(key96_t);
// the size of the data that follows
int32_t debug_dataSize = *(int32_t *) debugp;
if( debug_dataSize < 4 ) {
log(LOG_ERROR, "TITLEDB CORRUPTION. Record shows size of %" PRId32" which is too small. DocId=%" PRId64 "", debug_dataSize, debug_docId);
gbshutdownLogicError();
}
debugp += 4;
// what's the size of the uncompressed compressed stuff below here?
int32_t debug_ubufSize = *(int32_t *) debugp;
if( debug_ubufSize <= 0 ) {
log(LOG_ERROR, "TITLEDB CORRUPTION. Record shows uncompressed size of %" PRId32". DocId=%" PRId64 "", debug_ubufSize, debug_docId);
gbshutdownLogicError();
}
log(LOG_DEBUG, "TitleDB rec verified. recSize %" PRId32 ", uncompressed %" PRId32". DocId=%" PRId64 "", recSize, debug_ubufSize, debug_docId);
}
}
void filterTitledbList(RdbList *list) {
char *newList = list->getList();
char *dst = newList;

@ -128,6 +128,7 @@ public:
}
static void printKey(const char *key);
static void validateSerializedRecord(const char *rec, int32_t recSize);
// Rdb init variables
static inline int32_t getFixedDataSize() { return -1; }

@ -834,13 +834,15 @@ bool XmlDoc::set2 ( char *titleRec ,
// . should we free m_cbuf on our reset/destruction?
// . no because doCOnsistencyCheck calls XmlDoc::set2 with a titleRec
// that should not be freed, besides the alloc size is not known!
m_titleRecBuf.setBuf ( titleRec ,
titleRecSize , // bufmax
titleRecSize , // bytes in use
false); // ownData?
if( !m_titleRecBuf.setBuf( titleRec,
titleRecSize, // bufmax
titleRecSize, // bytes in use
false) ) { // ownData?
log(LOG_ERROR, "m_titleRecBuf.setBuf of size %" PRId32 " failed", titleRecSize);
gbshutdownLogicError();
}
m_titleRecBufValid = true;
//m_coll = coll;
m_pbuf = pbuf;
m_niceness = niceness;
@ -865,13 +867,15 @@ bool XmlDoc::set2 ( char *titleRec ,
//m_titleRecAllocSize = maxSize;
// get a parse ptr
char *p = titleRec ;
char *p = titleRec;
// . this is just like a serialized RdbList key/dataSize/data of 1 rec
// . first thing is the key
// . key should have docId embedded in it
m_titleRecKey = *(key96_t *) p ;
//m_titleRecKeyValid = true;
p += sizeof(key96_t);
// bail on error
if ( (m_titleRecKey.n0 & 0x01) == 0x00 ) {
g_errno = EBADTITLEREC;
@ -897,43 +901,58 @@ bool XmlDoc::set2 ( char *titleRec ,
// bail on error
if ( dataSize < 4 ) {
g_errno = EBADTITLEREC;
log(LOG_WARN, "db: Titledb record has size of %" PRId32" which is less then 4. Probable disk corruption in a "
"titledb file.", dataSize);
return false;
log(LOG_ERROR, "TITLEDB CORRUPTION. Record has size of %" PRId32" which is too small. Probable disk corruption in a titledb file. DocId=%" PRId64 "", dataSize, m_docId);
gbshutdownLogicError();
// return false;
}
// what is the size of cbuf/titleRec in bytes?
int32_t cbufSize = dataSize + 4 + sizeof(key96_t);
// . the actual data follows "dataSize"
// . what's the size of the uncompressed compressed stuff below here?
m_ubufSize = *(int32_t *) p ; p += 4;
// . because of disk/network data corruption this may be wrong!
// . we can now have absolutely huge titlerecs...
if ( m_ubufSize <= 0 ) { //m_ubufSize > 2*1024*1024 || m_ubufSize < 0 )
if ( m_ubufSize == 0 ) { //m_ubufSize > 2*1024*1024 || m_ubufSize < 0 )
g_errno = EBADTITLEREC;
log(LOG_WARN, "db: TitleRec::set: uncompress uncompressed size=%" PRId32".",m_ubufSize );
log(LOG_ERROR, "POSSIBLE TITLEDB CORRUPTION. Uncompressed size=%" PRId32", docId=%" PRId64 ", dataSize=%" PRId32 ", cbufSize=%" PRId32 "", m_ubufSize, m_docId, dataSize, cbufSize);
loghex(LOG_ERROR, titleRec, (cbufSize < 400 ? cbufSize : 400), "titleRec (first max. 400 bytes)");
return false;
//gbshutdownLogicError();
//return false;
}
if ( m_ubufSize < 0 ) { //m_ubufSize > 2*1024*1024 || m_ubufSize < 0 )
g_errno = EBADTITLEREC;
log(LOG_ERROR, "TITLEDB CORRUPTION. Uncompressed size=%" PRId32", docId=%" PRId64 ", dataSize=%" PRId32 ", cbufSize=%" PRId32 "", m_ubufSize, m_docId, dataSize, cbufSize);
loghex(LOG_ERROR, titleRec, (cbufSize < 400 ? cbufSize : 400), "titleRec (first max. 400 bytes)");
gbshutdownLogicError();
//return false;
}
// trying to uncompress corrupt titlerecs sometimes results in
// a seg fault... watch out
if ( m_ubufSize > 100*1024*1024 ) {
g_errno = EBADTITLEREC;
log(LOG_WARN, "db: TitleRec::set: uncompress uncompressed size=%" PRId32" > 100MB. unacceptable, probable "
"corruption.", m_ubufSize);
return false;
log(LOG_ERROR, "TITLEDB CORRUPTION. Uncompressed size=%" PRId32" > 100MB. unacceptable, probable corruption. docId=%" PRId64 "", m_ubufSize, m_docId);
loghex(LOG_ERROR, titleRec, (cbufSize < 400 ? cbufSize : 400), "titleRec (first max. 400 bytes)");
gbshutdownLogicError();
//return false;
}
// make buf space for holding the uncompressed stuff
m_ubufAlloc = m_ubufSize;
m_ubuf = (char *) mmalloc ( m_ubufAlloc ,"TitleRecu1");
// log("xmldoc: m_ubuf=%" PTRFMT" this=%" PTRFMT
// , (PTRTYPE) m_ubuf
// , (PTRTYPE) this
// );
if ( ! m_ubuf ) {
// we had bad ubufsizes on gb6, like > 1GB print out key
// so we can manually make a titledb.dat file to delete these
// bad keys
log("build: alloc failed ubufsize=%" PRId32" key.n1=%" PRIu32" "
"n0=%" PRIu64,
log("build: alloc failed ubufsize=%" PRId32" key.n1=%" PRIu32" n0=%" PRIu64,
m_ubufAlloc,m_titleRecKey.n1,m_titleRecKey.n0);
return false;
}
@ -953,23 +972,26 @@ bool XmlDoc::set2 ( char *titleRec ,
(uint32_t ) (dataSize - 4) );
// hmmmm...
if ( err == Z_BUF_ERROR ) {
log(LOG_WARN, "db: Buffer is too small to hold uncompressed document. Probable disk corruption in a titledb file.");
log(LOG_ERROR, "!!! Buffer is too small to hold uncompressed document. Probable disk corruption in a titledb file.");
g_errno = EUNCOMPRESSERROR;
return false;
}
// set g_errno and return false on error
if ( err != Z_OK ) {
g_errno = EUNCOMPRESSERROR;
log(LOG_WARN, "db: Uncompress of document failed. ZG_ERRNO=%i. cbufSize=%" PRId32" ubufsize=%" PRId32" realSize=%" PRId32,
log(LOG_ERROR, "!!! Uncompress of document failed. ZG_ERRNO=%i. cbufSize=%" PRId32" ubufsize=%" PRId32" realSize=%" PRId32,
err , cbufSize , m_ubufSize , realSize );
return false;
}
if ( realSize != m_ubufSize ) {
g_errno = EBADENGINEER;
log(LOG_WARN, "db: Uncompressed document size is not what we recorded it to be. Probable disk corruption in "
"a titledb file.");
return false;
log(LOG_ERROR,"CORRUPTED TITLEREC detected for docId %" PRId64 "", m_docId);
gbshutdownLogicError();
//g_errno = EBADENGINEER;
//log(LOG_WARN, "db: Uncompressed document size is not what we recorded it to be. Probable disk corruption in a titledb file.");
//return false;
}
// . add the stat
// . use white for the stat
g_stats.addStat_r(0, startTime, gettimeofdayInMilliseconds(), 0x00ffffff);
@ -980,9 +1002,10 @@ bool XmlDoc::set2 ( char *titleRec ,
int32_t shouldbe = (char *)&ptr_firstUrl - (char *)&m_headerSize;
if ( headerSize != shouldbe ) {
g_errno = ECORRUPTDATA;
log(LOG_WARN, "doc: bad header size in title rec");
return false;
log(LOG_ERROR,"CORRUPTED TITLEREC detected for docId %" PRId64 "", m_docId);
gbshutdownLogicError();
//g_errno = ECORRUPTDATA;
//return false;
}
// set our easy stuff
@ -990,7 +1013,6 @@ bool XmlDoc::set2 ( char *titleRec ,
// NOW set the XmlDoc::ptr_* and XmlDoc::size_* members
// like in Msg.cpp and Msg20Reply.cpp
if ( m_pbuf ) {
int32_t crc = hash32(m_ubuf,headerSize);
m_pbuf->safePrintf("crchdr=0x%" PRIx32" sizehdr=%" PRId32", ",
@ -1021,21 +1043,37 @@ bool XmlDoc::set2 ( char *titleRec ,
// make the mask
uint32_t mask = 1 << i ;
// do we have this member? skip if not.
if ( ! (m_internalFlags1 & mask) ) continue;
if ( ! (m_internalFlags1 & mask) ) {
continue;
}
// watch out for corruption
if ( up > upend ) {
g_errno = ECORRUPTDATA;
log(LOG_WARN, "doc: corrupt titlerec.");
return false;
log(LOG_ERROR,"CORRUPTED TITLEREC detected for docId %" PRId64 "", m_docId);
gbshutdownLogicError();
//g_errno = ECORRUPTDATA;
//return false;
}
// get the size
*ps = *(int32_t *)up;
// this should never be 0, otherwise, why was its flag set?
if ( *ps <= 0 ) { g_process.shutdownAbort(true); }
if ( *ps <= 0 ) {
log(LOG_ERROR,"CORRUPTED TITLEREC detected for docId %" PRId64 "", m_docId);
gbshutdownLogicError();
}
// skip over to point to data
up += 4;
// point to the data. could be 64-bit ptr.
*pd = up;//(int32_t)up;
// Sanity - bail if size set, but no data
if( *ps && !pd ) {
log(LOG_ERROR,"CORRUPTED TITLEREC detected for docId %" PRId64 "", m_docId);
gbshutdownLogicError();
}
// debug
if ( m_pbuf ) {
int32_t crc = hash32(up,*ps);
@ -1044,11 +1082,13 @@ bool XmlDoc::set2 ( char *titleRec ,
}
// skip over data
up += *ps;
// watch out for corruption
if ( up > upend ) {
g_errno = ECORRUPTDATA;
log(LOG_WARN, "doc: corrupt titlerec.");
return false;
log(LOG_ERROR,"CORRUPTED TITLEREC detected for docId %" PRId64 "", m_docId);
gbshutdownLogicError();
//g_errno = ECORRUPTDATA;
//return false;
}
}
// cap it
@ -2728,6 +2768,7 @@ char *XmlDoc::prepareToMakeTitleRec ( ) {
return (char *)1;
}
// . create and store the titlerec into "buf".
// . it is basically the header part of all the member vars in this XmlDoc.
// . it has a key,dataSize,compressedData so it can be a record in an Rdb
@ -2796,6 +2837,12 @@ bool XmlDoc::setTitleRecBuf ( SafeBuf *tbuf, int64_t docId, int64_t uh48 ){
// or empty string ptr
if ( ! *pd ) continue;
// Sanity
if( *ps < 0 ) {
log(LOG_ERROR,"DATA CORRUPTION AVOIDED in setTitleRec. Variable length data item %" PRId32 " has negative length: %" PRId32 "", i, *ps);
gbshutdownLogicError();
}
// store size first
*(int32_t *)p = *ps;
p += 4;
@ -2852,7 +2899,7 @@ bool XmlDoc::setTitleRecBuf ( SafeBuf *tbuf, int64_t docId, int64_t uh48 ){
if ( err == Z_OK && size > (need2 - hdrSize ) ) {
tbuf->purge();
g_errno = ECOMPRESSFAILED;
log("db: Failed to compress document of %" PRId32" bytes. "
log(LOG_ERROR, "!!! Failed to compress document of %" PRId32" bytes. "
"Provided buffer of %" PRId32" bytes.",
size, (need2 - hdrSize ) );
return false;
@ -2862,7 +2909,7 @@ bool XmlDoc::setTitleRecBuf ( SafeBuf *tbuf, int64_t docId, int64_t uh48 ){
if ( err != Z_OK ) {
tbuf->purge();
g_errno = ECOMPRESSFAILED;
log("db: Failed to compress document.");
log(LOG_ERROR,"!!! Failed to compress document.");
return false;
}
@ -2882,13 +2929,18 @@ bool XmlDoc::setTitleRecBuf ( SafeBuf *tbuf, int64_t docId, int64_t uh48 ){
p += 4;
// store uncompressed size in header
*(int32_t *) p = need1 ; p += 4;
*(int32_t *) p = need1;
p += 4;
// sanity check
if ( p != cbuf + hdrSize ) { g_process.shutdownAbort(true); }
if ( p != cbuf + hdrSize ) {
g_process.shutdownAbort(true);
}
// sanity check
if ( need1 <= 0 ) { g_process.shutdownAbort(true); }
if ( need1 <= 0 ) {
g_process.shutdownAbort(true);
}
// advance over data
p += size;
@ -2896,9 +2948,11 @@ bool XmlDoc::setTitleRecBuf ( SafeBuf *tbuf, int64_t docId, int64_t uh48 ){
// update safebuf::m_length so it is correct
tbuf->setLength ( p - cbuf );
logTrace( g_conf.m_logTraceXmlDoc, "dataSize=%" PRId32 ", uncompressed=%" PRId32 ", docId=%" PRId64 "", dataSize, need1, docId);
return true;
}
// . return NULL and sets g_errno on error
// . returns -1 if blocked
SafeBuf *XmlDoc::getTitleRecBuf ( ) {
@ -5952,9 +6006,12 @@ SafeBuf *XmlDoc::getTimeAxisUrl ( ) {
// . the twin brother of XmlDoc::getTitleRecBuf() which makes the title rec
// from scratch. this loads it from titledb.
// . NULL is a valid value (EDOCNOTFOUND) so return a char **
char **XmlDoc::getOldTitleRec ( ) {
char **XmlDoc::getOldTitleRec() {
// if valid return that
if ( m_oldTitleRecValid ) return &m_oldTitleRec;
if ( m_oldTitleRecValid ) {
return &m_oldTitleRec;
}
// update status msg
setStatus ( "getting old title rec");
// if we are set from a title rec, we are the old doc
@ -6001,7 +6058,9 @@ char **XmlDoc::getOldTitleRec ( ) {
return NULL;
}
CollectionRec *cr = getCollRec();
if ( ! cr ) return NULL;
if ( ! cr ) {
return NULL;
}
// if using time axis then append the timestamp to the end of
// the url. this way Msg22::getAvailDocId() will return a docid
@ -6026,9 +6085,11 @@ char **XmlDoc::getOldTitleRec ( ) {
m_masterState ,
m_masterLoop ,
m_niceness , // niceness
999999 )) // timeout seconds
999999 )) {// timeout seconds
// return -1 if we blocked
return (char **)-1;
}
// not really an error
if ( g_errno == ENOTFOUND ) {
g_errno = 0;
@ -6038,10 +6099,12 @@ char **XmlDoc::getOldTitleRec ( ) {
if ( g_errno ) {
return NULL;
}
// got it
return &m_oldTitleRec;
}
// . look up TitleRec using Msg22 if we need to
// . set our m_titleRec member from titledb
// . the twin brother of XmlDoc::getTitleRecBuf() which makes the title rec
@ -13407,6 +13470,14 @@ char *XmlDoc::getMetaList(bool forDelete) {
int32_t tsize = (forDelete) ? sizeof(key96_t) : m_titleRecBuf.length();
gbmemcpy ( m_p , m_titleRecBuf.getBufStart() , tsize );
// Sanity. Shut down if data sizes are wrong.
if( !forDelete) {
Titledb::validateSerializedRecord( m_p, tsize );
}
else {
logTrace(g_conf.m_logTraceXmlDoc, "Storing delete key for DocId=%" PRId64 "", m_docId);
}
m_p += tsize;
}
@ -15936,14 +16007,22 @@ Msg20Reply *XmlDoc::getMsg20ReplyStepwise() {
// . cache it for one hour
// . this will set our ptr_ and size_ member vars
char **otr = getOldTitleRec ( );
if ( ! otr || otr == (void *)-1 ) { checkPointerError(otr); return (Msg20Reply *)otr; }
char **otr = getOldTitleRec();
if ( ! otr || otr == (void *)-1 ) {
checkPointerError(otr);
return (Msg20Reply *)otr;
}
// must have a title rec in titledb
if ( ! *otr ) { g_errno = ENOTFOUND; return NULL; }
if ( ! *otr ) {
g_errno = ENOTFOUND;
return NULL;
}
// sanity
if ( *otr != m_oldTitleRec ) { g_process.shutdownAbort(true); }
if ( *otr != m_oldTitleRec ) {
g_process.shutdownAbort(true);
}
// . set our ptr_ and size_ member vars from it after uncompressing
// . returns false and sets g_errno on error
@ -15953,10 +16032,14 @@ Msg20Reply *XmlDoc::getMsg20ReplyStepwise() {
bool status = set2( *otr, 0, cr->m_coll, NULL, m_niceness);
// sanity check
if ( ! status && ! g_errno ) { g_process.shutdownAbort(true); }
if ( ! status && ! g_errno ) {
g_process.shutdownAbort(true);
}
// if there was an error, g_errno should be set.
if ( ! status ) return NULL;
if ( ! status ) {
return NULL;
}
m_setTr = true;
}