forked from Mirrors/privacore-open-source-search-engine
Make sure posdb keys in RdbBuckets are removed as well when document is respidered/deleted
This commit is contained in:
2
Posdb.h
2
Posdb.h
@ -86,6 +86,8 @@
|
||||
#define HASHGROUP_INMENU 10 // body implied
|
||||
#define HASHGROUP_END 11
|
||||
|
||||
#define POSDB_DELETEDOC_TERMID 0
|
||||
|
||||
const char *getHashGroupString ( unsigned char hg );
|
||||
float getTermFreqWeight ( int64_t termFreq , int64_t numDocsInColl );
|
||||
|
||||
|
16
Rdb.cpp
16
Rdb.cpp
@ -930,7 +930,6 @@ bool Rdb::loadTree ( ) {
|
||||
g_process.shutdownAbort(true);
|
||||
}
|
||||
|
||||
|
||||
if(treeExists) {
|
||||
m_buckets.addTree( &m_tree );
|
||||
if ( m_buckets.getNumKeys() - numKeys > 0 ) {
|
||||
@ -1932,7 +1931,6 @@ bool Rdb::addRecord(collnum_t collnum, char *key, char *data, int32_t dataSize)
|
||||
return true;
|
||||
}
|
||||
|
||||
/// @todo ALC is this necessary? we remove delete keys when we dump to Rdb anyway for the first file
|
||||
// if we have no files on disk for this db, don't bother preserving a a negative rec, it just wastes tree space
|
||||
if (KEYNEG(key)) {
|
||||
// return if all data is in the tree
|
||||
@ -1940,6 +1938,20 @@ bool Rdb::addRecord(collnum_t collnum, char *key, char *data, int32_t dataSize)
|
||||
logTrace(g_conf.m_logTraceRdb, "END. %s: Negative key with all data in tree. Returning true", m_dbname);
|
||||
return true;
|
||||
}
|
||||
|
||||
// we should only store special delete keys (eg: posdb with termId 0)
|
||||
// we will have non-special keys here to simplify logic in XmlDoc::getMetaList (and we can't really be sure
|
||||
// if the key we're adding is in RdbTree/RdbBuckets at that point of time. It could potentially be dumped
|
||||
// after the check.
|
||||
if (m_rdbId == RDB_POSDB || m_rdbId == RDB2_POSDB2) {
|
||||
if (Posdb::getTermId(key) != POSDB_DELETEDOC_TERMID) {
|
||||
logTrace(g_conf.m_logTraceRdb, "END. %s: Negative key with non-zero termId found. Returning true", m_dbname);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
/// @todo ALC cater for other rdb types here
|
||||
gbshutdownLogicError();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (m_useTree) {
|
||||
|
29
XmlDoc.cpp
29
XmlDoc.cpp
@ -13965,22 +13965,6 @@ skipNewAdd2:
|
||||
g_process.shutdownAbort(true);
|
||||
}
|
||||
|
||||
/// @todo ALC we're allocating too much here, we can only have 1 del key per doc when index is used
|
||||
|
||||
Rdb *rdb = getRdbFromId(rdbId);
|
||||
if (rdb->isUseIndexFile()) {
|
||||
// Do not store records in the hash table of old values when we're using index file.
|
||||
// This makes sure that no delete records are stored in rdb for existing records,
|
||||
// which is needed for the new no-merge feature.
|
||||
/// @todo ALC verify that we need to cater for secondary rdb use for rebuild
|
||||
if (rdbId == RDB_POSDB || rdbId == RDB2_POSDB2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
/// @todo ALC we need to cater for other rdb file down below
|
||||
gbshutdownLogicError();
|
||||
}
|
||||
|
||||
if (!dt8.addKey(&hk, &rec)) {
|
||||
logTrace(g_conf.m_logTraceXmlDoc, "addKey failed");
|
||||
return NULL;
|
||||
@ -14056,11 +14040,6 @@ skipNewAdd2:
|
||||
// as a delete key below
|
||||
dt8.removeSlot(slot);
|
||||
|
||||
if (rdbId == RDB_LINKDB) {
|
||||
// all done for this key
|
||||
continue;
|
||||
}
|
||||
|
||||
// but do add like a titledb rec that has the
|
||||
// same key, because its data is probably
|
||||
// different...
|
||||
@ -14069,7 +14048,11 @@ skipNewAdd2:
|
||||
// geico got deleted but not the title rec!!
|
||||
// MAKE SURE TITLEREC gets deleted then!!!
|
||||
if (ds == 0 && g_conf.m_doIncrementalUpdating) {
|
||||
continue;
|
||||
// don't do incremental updating when using index file
|
||||
Rdb *rdb = getRdbFromId(rdbId);
|
||||
if (!rdb->isUseIndexFile()) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -14162,7 +14145,7 @@ skipNewAdd2:
|
||||
char key[MAX_KEY_BYTES];
|
||||
|
||||
// add posdb delete key
|
||||
Posdb::makeStartKey(&key, 0, *od->getDocId());
|
||||
Posdb::makeStartKey(&key, POSDB_DELETEDOC_TERMID, *od->getDocId());
|
||||
*nptr++ = RDB_POSDB;
|
||||
memcpy(nptr, &key, sizeof(posdbkey_t));
|
||||
nptr += sizeof(posdbkey_t);
|
||||
|
@ -22,6 +22,8 @@ static void deletePosdb() {
|
||||
RdbBase *base = g_posdb.getRdb()->getBase(0);
|
||||
for (int i = 0; i < base->getNumFiles(); ++i) {
|
||||
base->getFile(i)->unlink();
|
||||
base->getMap(i)->unlink();
|
||||
base->getIndex(i)->unlink();
|
||||
}
|
||||
|
||||
// delete posdb bucket file
|
||||
@ -286,18 +288,14 @@ TEST_F(PosdbNoMergeTest, AddDeleteRecordMultiple) {
|
||||
|
||||
// second round
|
||||
// doc contains 3 words (a, c, d)
|
||||
addPosdbKey(1, docId, true);
|
||||
addPosdbKey(2, docId, true);
|
||||
addPosdbKey(3, docId, true);
|
||||
addPosdbKey(1, docId);
|
||||
addPosdbKey(3, docId);
|
||||
addPosdbKey(4, docId);
|
||||
|
||||
// third round
|
||||
// doc contains 4 words (a, d, e, f)
|
||||
addPosdbKey(1, docId, true);
|
||||
addPosdbKey(3, docId, true);
|
||||
addPosdbKey(4, docId, true);
|
||||
|
||||
addPosdbKey(1, docId);
|
||||
addPosdbKey(4, docId);
|
||||
|
@ -128,4 +128,4 @@ TEST(RdbBucketsTest, PosdbAddDeleteNodeCheckEachDelete) {
|
||||
}
|
||||
EXPECT_TRUE(list.isExhausted());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user