Make sure posdb keys in RdbBuckets are removed as well when document is respidered/deleted

This commit is contained in:
Ai Lin Chia
2016-09-28 11:42:29 +02:00
parent 321a41b6e6
commit 0c76a6a578
5 changed files with 25 additions and 30 deletions

@ -86,6 +86,8 @@
#define HASHGROUP_INMENU 10 // body implied
#define HASHGROUP_END 11
#define POSDB_DELETEDOC_TERMID 0
const char *getHashGroupString ( unsigned char hg );
float getTermFreqWeight ( int64_t termFreq , int64_t numDocsInColl );

16
Rdb.cpp

@ -930,7 +930,6 @@ bool Rdb::loadTree ( ) {
g_process.shutdownAbort(true);
}
if(treeExists) {
m_buckets.addTree( &m_tree );
if ( m_buckets.getNumKeys() - numKeys > 0 ) {
@ -1932,7 +1931,6 @@ bool Rdb::addRecord(collnum_t collnum, char *key, char *data, int32_t dataSize)
return true;
}
/// @todo ALC is this necessary? we remove delete keys when we dump to Rdb anyway for the first file
// if we have no files on disk for this db, don't bother preserving a a negative rec, it just wastes tree space
if (KEYNEG(key)) {
// return if all data is in the tree
@ -1940,6 +1938,20 @@ bool Rdb::addRecord(collnum_t collnum, char *key, char *data, int32_t dataSize)
logTrace(g_conf.m_logTraceRdb, "END. %s: Negative key with all data in tree. Returning true", m_dbname);
return true;
}
// we should only store special delete keys (eg: posdb with termId 0)
// we will have non-special keys here to simplify logic in XmlDoc::getMetaList (and we can't really be sure
// if the key we're adding is in RdbTree/RdbBuckets at that point of time. It could potentially be dumped
// after the check.
if (m_rdbId == RDB_POSDB || m_rdbId == RDB2_POSDB2) {
if (Posdb::getTermId(key) != POSDB_DELETEDOC_TERMID) {
logTrace(g_conf.m_logTraceRdb, "END. %s: Negative key with non-zero termId found. Returning true", m_dbname);
return true;
}
} else {
/// @todo ALC cater for other rdb types here
gbshutdownLogicError();
}
}
} else {
if (m_useTree) {

@ -13965,22 +13965,6 @@ skipNewAdd2:
g_process.shutdownAbort(true);
}
/// @todo ALC we're allocating too much here, we can only have 1 del key per doc when index is used
Rdb *rdb = getRdbFromId(rdbId);
if (rdb->isUseIndexFile()) {
// Do not store records in the hash table of old values when we're using index file.
// This makes sure that no delete records are stored in rdb for existing records,
// which is needed for the new no-merge feature.
/// @todo ALC verify that we need to cater for secondary rdb use for rebuild
if (rdbId == RDB_POSDB || rdbId == RDB2_POSDB2) {
continue;
}
/// @todo ALC we need to cater for other rdb file down below
gbshutdownLogicError();
}
if (!dt8.addKey(&hk, &rec)) {
logTrace(g_conf.m_logTraceXmlDoc, "addKey failed");
return NULL;
@ -14056,11 +14040,6 @@ skipNewAdd2:
// as a delete key below
dt8.removeSlot(slot);
if (rdbId == RDB_LINKDB) {
// all done for this key
continue;
}
// but do add like a titledb rec that has the
// same key, because its data is probably
// different...
@ -14069,7 +14048,11 @@ skipNewAdd2:
// geico got deleted but not the title rec!!
// MAKE SURE TITLEREC gets deleted then!!!
if (ds == 0 && g_conf.m_doIncrementalUpdating) {
continue;
// don't do incremental updating when using index file
Rdb *rdb = getRdbFromId(rdbId);
if (!rdb->isUseIndexFile()) {
continue;
}
}
}
@ -14162,7 +14145,7 @@ skipNewAdd2:
char key[MAX_KEY_BYTES];
// add posdb delete key
Posdb::makeStartKey(&key, 0, *od->getDocId());
Posdb::makeStartKey(&key, POSDB_DELETEDOC_TERMID, *od->getDocId());
*nptr++ = RDB_POSDB;
memcpy(nptr, &key, sizeof(posdbkey_t));
nptr += sizeof(posdbkey_t);

@ -22,6 +22,8 @@ static void deletePosdb() {
RdbBase *base = g_posdb.getRdb()->getBase(0);
for (int i = 0; i < base->getNumFiles(); ++i) {
base->getFile(i)->unlink();
base->getMap(i)->unlink();
base->getIndex(i)->unlink();
}
// delete posdb bucket file
@ -286,18 +288,14 @@ TEST_F(PosdbNoMergeTest, AddDeleteRecordMultiple) {
// second round
// doc contains 3 words (a, c, d)
addPosdbKey(1, docId, true);
addPosdbKey(2, docId, true);
addPosdbKey(3, docId, true);
addPosdbKey(1, docId);
addPosdbKey(3, docId);
addPosdbKey(4, docId);
// third round
// doc contains 4 words (a, d, e, f)
addPosdbKey(1, docId, true);
addPosdbKey(3, docId, true);
addPosdbKey(4, docId, true);
addPosdbKey(1, docId);
addPosdbKey(4, docId);

@ -128,4 +128,4 @@ TEST(RdbBucketsTest, PosdbAddDeleteNodeCheckEachDelete) {
}
EXPECT_TRUE(list.isExhausted());
}
}
}