Files
privacore-open-source-searc…/test/unit/PosdbTest.cpp

542 lines
16 KiB
C++
Raw Normal View History

#include <gtest/gtest.h>
#include <Msg5.h>
2016-09-19 12:13:24 +02:00
#include "Posdb.h"
2016-10-11 12:38:41 +02:00
#include "GigablastTestUtils.h"
2016-11-14 17:32:16 +01:00
#include "Conf.h"
2016-09-27 13:45:20 +02:00
static void saveAndReloadPosdbBucket() {
2017-04-05 22:49:50 +02:00
Rdb *rdb = g_posdb.getRdb();
rdb->saveTree(false, NULL, NULL);
rdb->getBuckets()->clear();
rdb->loadTree();
if (g_posdb.getRdb()->isUseIndexFile()) {
g_posdb.getRdb()->getBase(0)->getTreeIndex()->writeIndex(false);
}
2016-09-27 13:45:20 +02:00
}
static void dumpPosdb() {
g_posdb.getRdb()->submitRdbDumpJob(true);
2017-05-08 14:10:10 +02:00
while (g_posdb.getRdb()->hasPendingRdbDumpJob()) {
usleep(100000); //sleep 100ms
}
g_posdb.getRdb()->getBase(0)->markNewFileReadable();
g_posdb.getRdb()->getBase(0)->generateGlobalIndex();
}
static void mergePosdb() {
g_posdb.getRdb()->getBase(0)->attemptMerge(0, true);
g_posdb.getRdb()->getBase(0)->attemptMerge(0, true);
}
2016-09-27 13:45:20 +02:00
class PosdbNoMergeTest : public ::testing::Test {
protected:
void SetUp() {
2016-10-11 12:38:41 +02:00
GbTest::initializeRdbs();
2016-10-11 13:43:21 +02:00
m_rdb = g_posdb.getRdb();
2016-09-27 13:45:20 +02:00
}
void TearDown() {
2016-10-11 12:38:41 +02:00
GbTest::resetRdbs();
2016-09-27 13:45:20 +02:00
}
2016-10-11 13:43:21 +02:00
Rdb *m_rdb;
2016-09-27 13:45:20 +02:00
};
TEST_F(PosdbNoMergeTest, AddRecord) {
static const int total_records = 10;
static const int64_t docId = 1;
for (int i = 1; i <= total_records; i++) {
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, i, docId, 0);
2016-09-27 13:45:20 +02:00
}
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
int32_t numPosRecs = 0;
int32_t numNegRecs = 0;
RdbBuckets *buckets = g_posdb.getRdb()->getBuckets();
RdbList list;
buckets->getList(0, startKey, endKey, -1, &list, &numPosRecs, &numNegRecs, Posdb::getUseHalfKeys());
// verify that data returned is the same as data inserted above
for (int i = 1; i <= total_records; ++i, list.skipCurrentRecord()) {
const char *rec = list.getCurrentRec();
EXPECT_EQ(i, Posdb::getTermId(rec));
EXPECT_EQ(docId, Posdb::getDocId(rec));
}
}
TEST_F(PosdbNoMergeTest, AddDeleteRecord) {
static const int total_records = 10;
static const int64_t docId = 1;
// first round
for (int i = 1; i <= total_records; i++) {
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, i, docId, 0);
2016-09-27 13:45:20 +02:00
}
saveAndReloadPosdbBucket();
// second round (document deleted)
for (int i = 1; i <= total_records; i++) {
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, i, docId, 0, true);
2016-09-27 13:45:20 +02:00
}
saveAndReloadPosdbBucket();
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
int32_t numPosRecs = 0;
int32_t numNegRecs = 0;
RdbBuckets *buckets = g_posdb.getRdb()->getBuckets();
RdbList list;
buckets->getList(0, startKey, endKey, -1, &list, &numPosRecs, &numNegRecs, Posdb::getUseHalfKeys());
EXPECT_TRUE(list.isExhausted());
}
static void expectRecord(RdbList *list, int64_t termId, int64_t docId, bool isDel = false, bool isShardByTermId = false) {
2016-09-27 13:45:20 +02:00
ASSERT_FALSE(list->isExhausted());
const char *rec = list->getCurrentRec();
EXPECT_EQ(termId, Posdb::getTermId(rec));
EXPECT_EQ(docId, Posdb::getDocId(rec));
EXPECT_EQ(isDel, KEYNEG(rec));
EXPECT_EQ(isShardByTermId, Posdb::isShardedByTermId(rec));
2016-09-27 13:45:20 +02:00
list->skipCurrentRecord();
}
TEST_F(PosdbNoMergeTest, AddDeleteRecordMultiple) {
2016-09-21 13:20:24 +02:00
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
2016-09-21 13:20:24 +02:00
// second round
// doc contains 3 words (a, c, d)
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, 'b', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
2016-09-21 13:20:24 +02:00
// third round
// doc contains 4 words (a, d, e, f)
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, 'c', docId, 0, true);
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
GbTest::addPosdbKey(m_rdb, 'e', docId, 0);
GbTest::addPosdbKey(m_rdb, 'f', docId, 0);
2016-09-21 13:20:24 +02:00
2016-09-27 13:45:20 +02:00
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
int32_t numPosRecs = 0;
int32_t numNegRecs = 0;
RdbBuckets *buckets = g_posdb.getRdb()->getBuckets();
RdbList list;
buckets->getList(0, startKey, endKey, -1, &list, &numPosRecs, &numNegRecs, Posdb::getUseHalfKeys());
2016-10-11 12:38:41 +02:00
expectRecord(&list, 'a', docId);
expectRecord(&list, 'd', docId);
expectRecord(&list, 'e', docId);
expectRecord(&list, 'f', docId);
2016-09-27 13:45:20 +02:00
EXPECT_TRUE(list.isExhausted());
}
TEST_F(PosdbNoMergeTest, AddRecordDeleteDocWithoutRdbFiles) {
static const int total_records = 10;
static const int64_t docId = 1;
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
int32_t numPosRecs = 0;
int32_t numNegRecs = 0;
RdbBuckets *buckets = g_posdb.getRdb()->getBuckets();
RdbList list;
2016-09-29 12:16:41 +02:00
// spider document
for (int i = 1; i < total_records; ++i) {
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, i, docId, 0);
2016-09-29 12:16:41 +02:00
}
saveAndReloadPosdbBucket();
// verify that data returned is the same as data inserted above
2016-09-27 13:45:20 +02:00
buckets->getList(0, startKey, endKey, -1, &list, &numPosRecs, &numNegRecs, Posdb::getUseHalfKeys());
2016-09-29 12:16:41 +02:00
for (int i = 1; i < total_records; ++i, list.skipCurrentRecord()) {
const char *rec = list.getCurrentRec();
EXPECT_EQ(i, Posdb::getTermId(rec));
EXPECT_EQ(docId, Posdb::getDocId(rec));
EXPECT_FALSE(KEYNEG(rec));
}
EXPECT_TRUE(list.isExhausted());
// deleted document
for (int i = 0; i < total_records; ++i) {
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, i, docId, 0, true);
2016-09-29 12:16:41 +02:00
}
saveAndReloadPosdbBucket();
2016-09-27 13:45:20 +02:00
// verify that data returned is the same as data inserted above
2016-09-29 12:16:41 +02:00
buckets->getList(0, startKey, endKey, -1, &list, &numPosRecs, &numNegRecs, Posdb::getUseHalfKeys());
EXPECT_TRUE(list.isExhausted());
2016-09-29 12:16:41 +02:00
// respidered document
for (int i = 0; i < total_records; ++i) {
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, i, docId, 0);
2016-09-29 12:16:41 +02:00
}
saveAndReloadPosdbBucket();
// verify that data returned is the same as data inserted above
buckets->getList(0, startKey, endKey, -1, &list, &numPosRecs, &numNegRecs, Posdb::getUseHalfKeys());
for (int i = 1; i < total_records; ++i, list.skipCurrentRecord()) {
2016-09-27 13:45:20 +02:00
const char *rec = list.getCurrentRec();
EXPECT_EQ(i, Posdb::getTermId(rec));
EXPECT_EQ(docId, Posdb::getDocId(rec));
EXPECT_FALSE(KEYNEG(rec));
}
EXPECT_TRUE(list.isExhausted());
2016-09-21 13:20:24 +02:00
}
TEST_F(PosdbNoMergeTest, AddRecordDeleteDocWithRdbFiles) {
2016-09-21 13:20:24 +02:00
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
dumpPosdb();
2016-09-21 13:20:24 +02:00
// second round
// doc contains 3 words (a, c, d)
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
dumpPosdb();
// third round
// doc contains 4 words (a, d, e, f)
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
GbTest::addPosdbKey(m_rdb, 'e', docId, 0);
GbTest::addPosdbKey(m_rdb, 'f', docId, 0);
dumpPosdb();
2016-10-11 13:43:21 +02:00
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, true);
2016-09-21 13:20:24 +02:00
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
int32_t numPosRecs = 0;
int32_t numNegRecs = 0;
RdbBuckets *buckets = g_posdb.getRdb()->getBuckets();
RdbList list;
buckets->getList(0, startKey, endKey, -1, &list, &numPosRecs, &numNegRecs, Posdb::getUseHalfKeys());
// verify that data returned is the same as data inserted above
expectRecord(&list, 0, docId, true);
2016-11-14 17:32:16 +01:00
}
TEST_F(PosdbNoMergeTest, SingleDocSpiderSpider) {
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, false, true);
// second round
// doc contains 3 words (a, c, d)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, true, true);
GbTest::addPosdbKey(m_rdb, 'y', docId, 0, false, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, false);
saveAndReloadPosdbBucket();
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
Msg5 msg5;
RdbList list;
2017-05-08 15:45:34 +02:00
ASSERT_TRUE(msg5.getList(RDB_POSDB, 0, &list, startKey, endKey, -1, true, 0, -1, NULL, NULL, 0, false, 0, false));
list.resetListPtr();
// verify that data returned is the same as data inserted above
expectRecord(&list, 'a', docId);
expectRecord(&list, 'c', docId);
expectRecord(&list, 'd', docId);
expectRecord(&list, 'y', docId, false, true);
EXPECT_TRUE(list.isExhausted());
}
TEST_F(PosdbNoMergeTest, SingleDocSpiderDumpSpider) {
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, false, true);
dumpPosdb();
// second round
// doc contains 3 words (a, c, d)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, true, true);
GbTest::addPosdbKey(m_rdb, 'y', docId, 0, false, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, false);
saveAndReloadPosdbBucket();
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
Msg5 msg5;
RdbList list;
2017-05-08 15:45:34 +02:00
ASSERT_TRUE(msg5.getList(RDB_POSDB, 0, &list, startKey, endKey, -1, true, 0, -1, NULL, NULL, 0, false, 0, false));
list.resetListPtr();
// verify that data returned is the same as data inserted above
expectRecord(&list, 'a', docId);
expectRecord(&list, 'c', docId);
expectRecord(&list, 'd', docId);
expectRecord(&list, 'y', docId, false, true);
EXPECT_TRUE(list.isExhausted());
}
TEST_F(PosdbNoMergeTest, SingleDocSpiderDelete) {
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, false, true);
// second round
// doc deleted
GbTest::addPosdbKey(m_rdb, 'a', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, true, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, true);
saveAndReloadPosdbBucket();
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
Msg5 msg5;
RdbList list;
2017-05-08 15:45:34 +02:00
ASSERT_TRUE(msg5.getList(RDB_POSDB, 0, &list, startKey, endKey, -1, true, 0, -1, NULL, NULL, 0, false, 0, false));
list.resetListPtr();
// verify that data returned is the same as data inserted above
EXPECT_TRUE(list.isExhausted());
}
TEST_F(PosdbNoMergeTest, SingleDocSpiderDumpDelete) {
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, false, true);
dumpPosdb();
// second round
// doc deleted
GbTest::addPosdbKey(m_rdb, 'a', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, true, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, true);
saveAndReloadPosdbBucket();
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
Msg5 msg5;
RdbList list;
2017-05-08 15:45:34 +02:00
ASSERT_TRUE(msg5.getList(RDB_POSDB, 0, &list, startKey, endKey, -1, true, 0, -1, NULL, NULL, 0, false, 0, false));
list.resetListPtr();
// verify that data returned is the same as data inserted above
EXPECT_TRUE(list.isExhausted());
}
TEST_F(PosdbNoMergeTest, SingleDocSpiderDumpDeleteSpider) {
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, false, true);
dumpPosdb();
// second round
// doc deleted
GbTest::addPosdbKey(m_rdb, 'a', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, true, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, true);
// third round
// doc contains 3 words (d, e, f)
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
GbTest::addPosdbKey(m_rdb, 'e', docId, 0);
GbTest::addPosdbKey(m_rdb, 'f', docId, 0);
GbTest::addPosdbKey(m_rdb, 'y', docId, 0, false, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, false);
saveAndReloadPosdbBucket();
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
Msg5 msg5;
RdbList list;
2017-05-08 15:45:34 +02:00
ASSERT_TRUE(msg5.getList(RDB_POSDB, 0, &list, startKey, endKey, -1, true, 0, -1, NULL, NULL, 0, false, 0, false));
list.resetListPtr();
// verify that data returned is the same as data inserted above
expectRecord(&list, 'd', docId);
expectRecord(&list, 'e', docId);
expectRecord(&list, 'f', docId);
expectRecord(&list, 'y', docId, false, true);
EXPECT_TRUE(list.isExhausted());
}
TEST_F(PosdbNoMergeTest, SingleDocSpiderDumpDeleteDumpSpider) {
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, false, true);
dumpPosdb();
// second round
// doc deleted
GbTest::addPosdbKey(m_rdb, 'a', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, true, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, true);
dumpPosdb();
// third round
// doc contains 3 words (d, e, f)
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
GbTest::addPosdbKey(m_rdb, 'e', docId, 0);
GbTest::addPosdbKey(m_rdb, 'f', docId, 0);
GbTest::addPosdbKey(m_rdb, 'y', docId, 0, false, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, false);
saveAndReloadPosdbBucket();
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
Msg5 msg5;
RdbList list;
2017-05-08 15:45:34 +02:00
ASSERT_TRUE(msg5.getList(RDB_POSDB, 0, &list, startKey, endKey, -1, true, 0, -1, NULL, NULL, 0, false, 0, false));
list.resetListPtr();
// verify that data returned is the same as data inserted above
expectRecord(&list, 'd', docId);
expectRecord(&list, 'e', docId);
expectRecord(&list, 'f', docId);
expectRecord(&list, 'y', docId, false, true);
EXPECT_TRUE(list.isExhausted());
2017-04-05 22:49:50 +02:00
}
TEST_F(PosdbNoMergeTest, SingleDocSpiderDumpDeleteDumpSpiderMerging) {
static const int64_t docId = 1;
// first round
// doc contains 3 words (a, b, c)
GbTest::addPosdbKey(m_rdb, 'a', docId, 0);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, false, true);
dumpPosdb();
// second round
// doc deleted
GbTest::addPosdbKey(m_rdb, 'a', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'b', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'c', docId, 0, true);
GbTest::addPosdbKey(m_rdb, 'z', docId, 0, true, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, true);
dumpPosdb();
// third round
// doc contains 3 words (d, e, f)
GbTest::addPosdbKey(m_rdb, 'd', docId, 0);
GbTest::addPosdbKey(m_rdb, 'e', docId, 0);
GbTest::addPosdbKey(m_rdb, 'f', docId, 0);
GbTest::addPosdbKey(m_rdb, 'y', docId, 0, false, true);
GbTest::addPosdbKey(m_rdb, POSDB_DELETEDOC_TERMID, docId, 0, false);
saveAndReloadPosdbBucket();
mergePosdb();
// use extremes
const char *startKey = KEYMIN();
const char *endKey = KEYMAX();
Msg5 msg5;
RdbList list;
ASSERT_TRUE(msg5.getList(RDB_POSDB, 0, &list, startKey, endKey, -1, true, 0, -1, NULL, NULL, 0, false, 0, false));
list.resetListPtr();
// verify that data returned is the same as data inserted above
expectRecord(&list, 'd', docId);
expectRecord(&list, 'e', docId);
expectRecord(&list, 'f', docId);
expectRecord(&list, 'y', docId, false, true);
EXPECT_TRUE(list.isExhausted());
}