mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-14 02:36:06 -04:00
Fix bug in posdbMerge. We can't we assume that list number == rdb file idx.
This commit is contained in:
4
Msg5.cpp
4
Msg5.cpp
@ -1148,9 +1148,9 @@ void Msg5::mergeLists() {
|
||||
// one of them was PROBABLY in the dump queue and we decided in
|
||||
// Rdb::addRecord() NOT to do the annihilation, therefore it's good
|
||||
// to do the merge to do the annihilation
|
||||
m_list->merge_r(m_listPtrs, m_numListPtrs, m_startKey, m_minEndKey, m_minRecSizes, m_removeNegRecs, m_rdbId, m_collnum);
|
||||
m_list->merge_r(m_listPtrs, m_numListPtrs, m_startKey, m_minEndKey, m_minRecSizes, m_removeNegRecs, m_rdbId, m_collnum, m_startFileNum);
|
||||
m_list->resetListPtr(); //merge_r() doesn't rewind the list iterator/pointer (?)
|
||||
|
||||
|
||||
// maintain this info for truncation purposes
|
||||
if ( m_list->isLastKeyValid() )
|
||||
//m_prevKey = m_list->getLastKey();
|
||||
|
@ -1690,7 +1690,7 @@ bool RdbList::posdbConstrain(const char *startKey, char *endKey, int32_t minRecS
|
||||
// . CAUTION: you should call constrain() on all "lists" before calling this
|
||||
// so we don't have to do boundary checks on the keys here
|
||||
void RdbList::merge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes,
|
||||
bool removeNegRecs, rdbid_t rdbId, collnum_t collNum) {
|
||||
bool removeNegRecs, rdbid_t rdbId, collnum_t collNum, int32_t startFileNum) {
|
||||
// sanity
|
||||
if (!m_ownData) {
|
||||
log(LOG_ERROR, "list: merge_r data not owned");
|
||||
@ -1760,7 +1760,7 @@ void RdbList::merge_r(RdbList **lists, int32_t numLists, const char *startKey, c
|
||||
|
||||
Rdb* rdb = getRdbFromId(rdbId);
|
||||
if (rdbId == RDB_POSDB) {
|
||||
posdbMerge_r(lists, numLists, startKey, endKey, m_mergeMinListSize, removeNegRecs, rdb->isUseIndexFile(), collNum);
|
||||
posdbMerge_r(lists, numLists, startKey, endKey, m_mergeMinListSize, removeNegRecs, rdb->isUseIndexFile(), collNum, startFileNum);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2088,7 +2088,7 @@ skip:
|
||||
///////
|
||||
|
||||
bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes,
|
||||
bool removeNegKeys, bool useIndexFile, collnum_t collNum) {
|
||||
bool removeNegKeys, bool useIndexFile, collnum_t collNum, int32_t startFileNum) {
|
||||
logTrace(g_conf.m_logTraceRdbList, "BEGIN");
|
||||
|
||||
// sanity
|
||||
@ -2269,8 +2269,7 @@ bool RdbList::posdbMerge_r(RdbList **lists, int32_t numLists, const char *startK
|
||||
|
||||
int32_t filePos = rdbIndexQuery.getFilePos(docId);
|
||||
|
||||
/// @todo ALC can we assume list number == rdb file idx?
|
||||
if (filePos > mini + listOffset) {
|
||||
if (filePos > (mini + listOffset) + startFileNum) {
|
||||
// docId is present in newer file
|
||||
logTrace(g_conf.m_logTraceRdbList, "docId in newer list. skip. filePos=%" PRId32" mini=%" PRId16, filePos, mini);
|
||||
goto skip;
|
||||
|
@ -189,7 +189,7 @@ public:
|
||||
// . set our startKey/endKey to "startKey"/"endKey"
|
||||
// . exclude any records from lists not in that range
|
||||
void merge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes,
|
||||
bool removeNegRecs, rdbid_t rdbId, collnum_t collNum);
|
||||
bool removeNegRecs, rdbid_t rdbId, collnum_t collNum, int32_t startFileNum);
|
||||
|
||||
bool growList(int32_t newSize);
|
||||
|
||||
@ -262,7 +262,7 @@ private:
|
||||
int32_t hintOffset, const char *hintKey, const char *filename);
|
||||
|
||||
bool posdbMerge_r(RdbList **lists, int32_t numLists, const char *startKey, const char *endKey, int32_t minRecSizes,
|
||||
bool removeNegKeys, bool useIndexFile, collnum_t collNum);
|
||||
bool removeNegKeys, bool useIndexFile, collnum_t collNum, int32_t startFileNum);
|
||||
|
||||
// the unalterd raw list. keys may be outside of [m_startKey,m_endKey]
|
||||
char *m_list;
|
||||
|
@ -51,7 +51,7 @@ TEST_F(RdbListTest, MergeTestPosdbEmptyAll) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0, 0);
|
||||
|
||||
// verify merged list
|
||||
EXPECT_EQ(0, final1.getListSize());
|
||||
@ -82,7 +82,7 @@ TEST_F(RdbListTest, MergeTestPosdbEmptyOne) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0, 0);
|
||||
|
||||
// verify merged list
|
||||
EXPECT_EQ(list1.getListSize(), final1.getListSize());
|
||||
@ -101,7 +101,7 @@ TEST_F(RdbListTest, MergeTestPosdbEmptyOne) {
|
||||
RdbList final2;
|
||||
final2.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final2.prepareForMerge(lists2, lists2_size, -1);
|
||||
final2.merge_r(lists2, lists2_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0);
|
||||
final2.merge_r(lists2, lists2_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0, 0);
|
||||
|
||||
// verify merged list
|
||||
EXPECT_EQ(list1.getListSize(), final2.getListSize());
|
||||
@ -142,7 +142,7 @@ TEST_F(RdbListTest, MergeTestPosdbVerifyListOrder) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0, 0);
|
||||
|
||||
// verify merged list
|
||||
EXPECT_EQ(0, final1.getListSize());
|
||||
@ -157,7 +157,7 @@ TEST_F(RdbListTest, MergeTestPosdbVerifyListOrder) {
|
||||
RdbList final2;
|
||||
final2.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final2.prepareForMerge(lists2, lists2_size, -1);
|
||||
final2.merge_r(lists2, lists2_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0);
|
||||
final2.merge_r(lists2, lists2_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0, 0);
|
||||
|
||||
// verify merged list
|
||||
EXPECT_EQ(list1.getListSize(), final2.getListSize());
|
||||
@ -199,7 +199,7 @@ TEST_F(RdbListTest, MergeTestPosdbVerifyRemoveNegRecords) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, true, RDB_POSDB, 0, 0);
|
||||
|
||||
// verify merged list
|
||||
EXPECT_EQ(0, final1.getListSize());
|
||||
@ -208,7 +208,7 @@ TEST_F(RdbListTest, MergeTestPosdbVerifyRemoveNegRecords) {
|
||||
RdbList final2;
|
||||
final2.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final2.prepareForMerge(lists1, lists1_size, -1);
|
||||
final2.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, 0);
|
||||
final2.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, 0, 0);
|
||||
|
||||
// verify merged list
|
||||
EXPECT_EQ(list2.getListSize(), final2.getListSize());
|
||||
@ -302,7 +302,7 @@ TEST_F(RdbListNoMergeTest, MergeTestPosdbSingleDocSpiderSpiderSpider) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, 0);
|
||||
|
||||
EXPECT_EQ(list3.getListSize(), final1.getListSize());
|
||||
for (list3.resetListPtr(), final1.resetListPtr(); !final1.isExhausted(); list3.skipCurrentRecord(), final1.skipCurrentRecord()) {
|
||||
@ -355,7 +355,7 @@ TEST_F(RdbListNoMergeTest, MergeTestPosdbSingleDocSpiderSpiderDelete) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, 0);
|
||||
|
||||
EXPECT_EQ(list3.getListSize(), final1.getListSize());
|
||||
for (list3.resetListPtr(), final1.resetListPtr(); !final1.isExhausted(); list3.skipCurrentRecord(), final1.skipCurrentRecord()) {
|
||||
@ -408,7 +408,64 @@ TEST_F(RdbListNoMergeTest, MergeTestPosdbSingleDocSpiderDeleteSpider) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, 0);
|
||||
|
||||
EXPECT_EQ(list3.getListSize(), final1.getListSize());
|
||||
for (list3.resetListPtr(), final1.resetListPtr(); !final1.isExhausted(); list3.skipCurrentRecord(), final1.skipCurrentRecord()) {
|
||||
EXPECT_EQ(list3.getCurrentRecSize(), final1.getCurrentRecSize());
|
||||
EXPECT_EQ(0, memcmp(list3.getCurrentRec(), final1.getCurrentRec(), list3.getCurrentRecSize()));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(RdbListNoMergeTest, MergeTestPosdbSingleDocMergeStartSecondFile) {
|
||||
char key[MAX_KEY_BYTES];
|
||||
const rdbid_t rdbId = RDB_POSDB;
|
||||
const collnum_t collNum = 0;
|
||||
const int64_t docId = 1;
|
||||
|
||||
|
||||
// spider doc (a, b, c, d, e)
|
||||
RdbList list1;
|
||||
list1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
list1.addRecord(makePosdbKey(key, 'a', docId, 0, false), 0, nullptr);
|
||||
list1.addRecord(makePosdbKey(key, 'b', docId, 1, false), 0, nullptr);
|
||||
list1.addRecord(makePosdbKey(key, 'c', docId, 2, false), 0, nullptr);
|
||||
list1.addRecord(makePosdbKey(key, 'd', docId, 3, false), 0, nullptr);
|
||||
list1.addRecord(makePosdbKey(key, 'e', docId, 4, false), 0, nullptr);
|
||||
addListToTree(rdbId, collNum, &list1);
|
||||
|
||||
// respider doc (a, c, b, e, d)
|
||||
RdbList list2;
|
||||
list2.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
list2.addRecord(makePosdbKey(key, 'a', docId, 0, false), 0, nullptr);
|
||||
list2.addRecord(makePosdbKey(key, 'c', docId, 1, false), 0, nullptr);
|
||||
list2.addRecord(makePosdbKey(key, 'b', docId, 2, false), 0, nullptr);
|
||||
list2.addRecord(makePosdbKey(key, 'e', docId, 3, false), 0, nullptr);
|
||||
list2.addRecord(makePosdbKey(key, 'd', docId, 4, false), 0, nullptr);
|
||||
addListToTree(rdbId, collNum, &list2);
|
||||
|
||||
// respider doc (r, s, t, l, n)
|
||||
RdbList list3;
|
||||
list3.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
list3.addRecord(makePosdbKey(key, 'r', docId, 0, false), 0, nullptr);
|
||||
list3.addRecord(makePosdbKey(key, 's', docId, 1, false), 0, nullptr);
|
||||
list3.addRecord(makePosdbKey(key, 't', docId, 2, false), 0, nullptr);
|
||||
list3.addRecord(makePosdbKey(key, 'l', docId, 3, false), 0, nullptr);
|
||||
list3.addRecord(makePosdbKey(key, 'n', docId, 4, false), 0, nullptr);
|
||||
addListToTree(rdbId, collNum, &list3);
|
||||
|
||||
// keys go from oldest to newest
|
||||
RdbList *lists1[2];
|
||||
lists1[0] = &list2;
|
||||
lists1[1] = &list3;
|
||||
|
||||
size_t lists1_size = sizeof_arr(lists1);
|
||||
|
||||
// merge
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, 1);
|
||||
|
||||
EXPECT_EQ(list3.getListSize(), final1.getListSize());
|
||||
for (list3.resetListPtr(), final1.resetListPtr(); !final1.isExhausted(); list3.skipCurrentRecord(), final1.skipCurrentRecord()) {
|
||||
@ -471,7 +528,7 @@ TEST_F(RdbListNoMergeTest, MergeTestPosdbMultiDocS1S2N1S2S1N2) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, 0);
|
||||
|
||||
EXPECT_EQ(list2.getListSize() + list3.getListSize(), final1.getListSize());
|
||||
|
||||
@ -539,7 +596,7 @@ TEST_F(RdbListNoMergeTest, MergeTestPosdbMultiDocS1N2N1S2S1N2) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, 0);
|
||||
|
||||
EXPECT_EQ(list2.getListSize() + list3.getListSize(), final1.getListSize());
|
||||
|
||||
@ -610,7 +667,7 @@ TEST_F(RdbListNoMergeTest, MergeTestPosdbMultiDocS1S2D1S2S1N2) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, 0);
|
||||
|
||||
// first record from list2 is not in output list
|
||||
list2.resetListPtr();
|
||||
@ -685,7 +742,7 @@ TEST_F(RdbListNoMergeTest, MergeTestPosdbMultiDocS1S2D1S2S1D2) {
|
||||
RdbList final1;
|
||||
final1.set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1.prepareForMerge(lists1, lists1_size, -1);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum);
|
||||
final1.merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, 0);
|
||||
|
||||
EXPECT_EQ(list3.getListSize(), final1.getListSize());
|
||||
for (list3.resetListPtr(), final1.resetListPtr(); !final1.isExhausted(); list3.skipCurrentRecord(), final1.skipCurrentRecord()) {
|
||||
@ -748,7 +805,7 @@ static void createPosdbList(collnum_t collNum, RdbList *list, int64_t termId, ui
|
||||
addListToTree(RDB_POSDB, collNum, list);
|
||||
}
|
||||
|
||||
static void mergePosdbLists(collnum_t collNum, RdbList *final1, RdbList *list1, RdbList *list2) {
|
||||
static void mergePosdbLists(collnum_t collNum, RdbList *final1, RdbList *list1, RdbList *list2, int32_t startFileNum = 0) {
|
||||
// keys go from oldest to newest
|
||||
RdbList *lists1[2];
|
||||
lists1[0] = list1;
|
||||
@ -759,7 +816,7 @@ static void mergePosdbLists(collnum_t collNum, RdbList *final1, RdbList *list1,
|
||||
// merge
|
||||
final1->set(nullptr, 0, nullptr, 0, Posdb::getFixedDataSize(), true, Posdb::getUseHalfKeys(), Posdb::getKeySize());
|
||||
final1->prepareForMerge(lists1, lists1_size, -1);
|
||||
final1->merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum);
|
||||
final1->merge_r(lists1, lists1_size, KEYMIN(), KEYMAX(), -1, false, RDB_POSDB, collNum, startFileNum);
|
||||
}
|
||||
|
||||
static void expectEqualList(RdbList *list1, RdbList *list2) {
|
||||
@ -904,7 +961,7 @@ TEST(RdbListTest, DISABLED_MergeTest1) {
|
||||
logf(LOG_DEBUG,"starting merge");
|
||||
int64_t t = gettimeofdayInMilliseconds();
|
||||
// do it
|
||||
list.merge_r(lists, numToMerge, KEYMIN(), KEYMAX(), numKeysWanted, true, RDB_NONE, 0);
|
||||
list.merge_r(lists, numToMerge, KEYMIN(), KEYMAX(), numKeysWanted, true, RDB_NONE, 0, 0);
|
||||
|
||||
// completed
|
||||
int64_t now = gettimeofdayInMilliseconds();
|
||||
@ -980,7 +1037,7 @@ TEST(RdbListTest, DISABLED_MergeTest2) {
|
||||
logf(LOG_DEBUG, "-------list #3-------");
|
||||
list3.printList();
|
||||
|
||||
final.merge_r(lists, 3, KEYMIN(), KEYMAX(), min, true, RDB_NONE, 0);
|
||||
final.merge_r(lists, 3, KEYMIN(), KEYMAX(), min, true, RDB_NONE, 0, 0);
|
||||
|
||||
logf(LOG_DEBUG,"------list final------");
|
||||
final.printList();
|
||||
|
Reference in New Issue
Block a user