privacore-open-source-searc.../HighFrequencyTermShortcuts.h
Ivan Skytte Jørgensen 8da6618685 If a word in a query is a high-freq-term then ignore it if possible
If a word is in the high-freq-term-cache then don't search for that word. bigrams thereof is fine. Also, if it is the only word in the query then allow searching for it anyway.
2017-02-13 16:44:34 +01:00

44 lines
1.0 KiB
C++

#ifndef GB_HIGHFREQUENCYTERMSHORTCUTS_H
#define GB_HIGHFREQUENCYTERMSHORTCUTS_H
#include <inttypes.h>
#include <map>
#include <stddef.h>
//A set of PosDB shortcuts for high-frequency terms, aka stop words
class HighFrequencyTermShortcuts {
HighFrequencyTermShortcuts(const HighFrequencyTermShortcuts&);
HighFrequencyTermShortcuts& operator=(const HighFrequencyTermShortcuts&);
struct TermEntry {
const void *p;
size_t bytes;
char start_key[18];
char end_key[18];
};
std::map<uint64_t,TermEntry> entries;
void *buffer;
public:
HighFrequencyTermShortcuts() : entries() {
buffer=NULL;
}
~HighFrequencyTermShortcuts()
{ }
bool load();
void unload();
bool empty() const { return entries.empty(); }
bool query_term_shortcut(uint64_t term_id,
const void **posdb_entries, size_t *bytes,
void *start_key, void *end_key);
bool is_registered_term(uint64_t term_id);
};
extern HighFrequencyTermShortcuts g_hfts;
#endif // GB_HIGHFREQUENCYTERMSHORTCUTS_H