forked from Mirrors/privacore-open-source-search-engine
Cache document summaries
Implemented two caches for document summaries (Msg20Reply), split is based on how stable the summary is whcih depends on highlighting and how the summary was generated. Max cache age workds, but memory limit is not supported yet.
This commit is contained in:
4
Conf.h
4
Conf.h
@ -353,6 +353,10 @@ class Conf {
|
||||
int64_t m_clusterdbFileCacheSize;
|
||||
int64_t m_titledbFileCacheSize;
|
||||
int64_t m_spiderdbFileCacheSize;
|
||||
int64_t m_stableSummaryCacheSize;
|
||||
int64_t m_stableSummaryCacheMaxAge;
|
||||
int64_t m_unstableSummaryCacheSize;
|
||||
int64_t m_unstableSummaryCacheMaxAge;
|
||||
|
||||
//bool m_quickpollCoreOnError;
|
||||
bool m_useShotgun;
|
||||
|
1
Makefile
1
Makefile
@ -67,6 +67,7 @@ OBJS = UdpSlot.o Rebalance.o \
|
||||
Punycode.o Version.o \
|
||||
HighFrequencyTermShortcuts.o \
|
||||
IPAddressChecks.o \
|
||||
SummaryCache.o \
|
||||
|
||||
# common flags
|
||||
DEFS = -D_REENTRANT_ -D_CHECK_FORMAT_STRING_ -I.
|
||||
|
79
Msg20.cpp
79
Msg20.cpp
@ -3,11 +3,16 @@
|
||||
#ifdef _VALGRIND_
|
||||
#include <valgrind/memcheck.h>
|
||||
#endif
|
||||
#include "SummaryCache.h"
|
||||
|
||||
static void gotReplyWrapper20 ( void *state , void *state20 ) ;
|
||||
static void handleRequest20 ( UdpSlot *slot , int32_t netnice );
|
||||
static bool gotReplyWrapperxd ( void *state ) ;
|
||||
|
||||
|
||||
static bool sendCachedReply ( Msg20Request *req, const void *cached_summary, size_t cached_summary_len, UdpSlot *slot );
|
||||
|
||||
|
||||
Msg20::Msg20 () { constructor(); }
|
||||
Msg20::~Msg20() { reset(); }
|
||||
|
||||
@ -356,6 +361,18 @@ void handleRequest20 ( UdpSlot *slot , int32_t netnice ) {
|
||||
return;
|
||||
}
|
||||
|
||||
int64_t cache_key = req->makeCacheKey();
|
||||
const void *cached_summary;
|
||||
size_t cached_summary_len;
|
||||
if(g_stable_summary_cache.lookup(cache_key, &cached_summary, &cached_summary_len) ||
|
||||
g_unstable_summary_cache.lookup(cache_key, &cached_summary, &cached_summary_len))
|
||||
{
|
||||
log(LOG_DEBUG, "Summary cache hit");
|
||||
sendCachedReply(req,cached_summary,cached_summary_len,slot);
|
||||
return;
|
||||
} else
|
||||
log(LOG_DEBUG, "Summary cache miss");
|
||||
|
||||
// if it's not stored locally that's an error
|
||||
if ( req->m_docId >= 0 && ! g_titledb.isLocal ( req->m_docId ) ) {
|
||||
log("query: Got msg20 request for non-local docId %"INT64"",
|
||||
@ -447,7 +464,7 @@ bool gotReplyWrapperxd ( void *state ) {
|
||||
}
|
||||
|
||||
// error?
|
||||
if ( g_errno ) { xd->m_reply.sendReply ( xd ); return true; }
|
||||
if ( g_errno ) { xd->m_reply.sendReply ( req, xd ); return true; }
|
||||
// this should not block now
|
||||
Msg20Reply *reply = xd->getMsg20Reply ( );
|
||||
// sanity check, should not block here now
|
||||
@ -455,7 +472,7 @@ bool gotReplyWrapperxd ( void *state ) {
|
||||
// NULL means error, -1 means blocked. on error g_errno should be set
|
||||
if ( ! reply && ! g_errno ) { char *xx=NULL;*xx=0;}
|
||||
// send it off. will send an error reply if g_errno is set
|
||||
return reply->sendReply ( xd );
|
||||
return reply->sendReply ( req, xd );
|
||||
}
|
||||
|
||||
Msg20Reply::Msg20Reply ( ) {
|
||||
@ -481,7 +498,7 @@ void Msg20Reply::destructor ( ) {
|
||||
|
||||
// . return ptr to the buffer we serialize into
|
||||
// . return NULL and set g_errno on error
|
||||
bool Msg20Reply::sendReply ( XmlDoc *xd ) {
|
||||
bool Msg20Reply::sendReply ( Msg20Request *req, XmlDoc *xd ) {
|
||||
|
||||
// get it
|
||||
UdpSlot *slot = (UdpSlot *)xd->m_slot;
|
||||
@ -531,6 +548,12 @@ bool Msg20Reply::sendReply ( XmlDoc *xd ) {
|
||||
color );
|
||||
|
||||
|
||||
//put the reply into the summary cache
|
||||
if(m_isDisplaySumSetFromTags && !req->m_highlightQueryTerms)
|
||||
g_stable_summary_cache.insert(req->makeCacheKey(), buf, need);
|
||||
else
|
||||
g_unstable_summary_cache.insert(req->makeCacheKey(), buf, need);
|
||||
|
||||
// . del the list at this point, we've copied all the data into reply
|
||||
// . this will free a non-null State20::m_ps (ParseState) for us
|
||||
mdelete ( xd , sizeof(XmlDoc) , "xd20" );
|
||||
@ -541,6 +564,23 @@ bool Msg20Reply::sendReply ( XmlDoc *xd ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static bool sendCachedReply ( Msg20Request *req, const void *cached_summary, size_t cached_summary_len, UdpSlot *slot )
|
||||
{
|
||||
//copy the cached summary to a new temporary buffer, so that UDPSlot/Server can free it when possible
|
||||
char *buf = (char *)mmalloc ( cached_summary_len , "Msg20Reply" );
|
||||
if(!buf) {
|
||||
g_udpServer.sendErrorReply ( slot , g_errno ) ;
|
||||
return true;
|
||||
}
|
||||
memcpy(buf,cached_summary,cached_summary_len);
|
||||
|
||||
g_udpServer.sendReply_ass ( buf , cached_summary_len , NULL , 0 , slot );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// . this is destructive on the "buf". it converts offs to ptrs
|
||||
// . sets m_r to the modified "buf" when done
|
||||
// . sets g_errno and returns -1 on error, otherwise # of bytes deseril
|
||||
@ -622,6 +662,39 @@ int32_t Msg20Request::deserialize ( ) {
|
||||
return (int32_t)sizeof(Msg20Request) + (p - m_buf);
|
||||
}
|
||||
|
||||
|
||||
//make a cache key for a request
|
||||
int64_t Msg20Request::makeCacheKey() const
|
||||
{
|
||||
SafeBuf hash_buffer;
|
||||
hash_buffer.pushLong(m_version);
|
||||
hash_buffer.pushLong(m_numSummaryLines);
|
||||
hash_buffer.pushLong(m_getHeaderTag);
|
||||
hash_buffer.pushLongLong(m_docId);
|
||||
hash_buffer.pushLong(m_titleMaxLen);
|
||||
hash_buffer.pushLong(m_summaryMaxLen);
|
||||
hash_buffer.pushLong(m_summaryMaxNumCharsPerLine);
|
||||
hash_buffer.pushLong(m_collnum);
|
||||
hash_buffer.pushLong(m_highlightQueryTerms);
|
||||
hash_buffer.pushLong(m_getSummaryVector);
|
||||
hash_buffer.pushLong(m_showBanned);
|
||||
hash_buffer.pushLong(m_includeCachedCopy);
|
||||
hash_buffer.pushLong(m_doLinkSpamCheck);
|
||||
hash_buffer.pushLong(m_isLinkSpam);
|
||||
hash_buffer.pushLong(m_isSiteLinkInfo);
|
||||
hash_buffer.pushLong(m_getLinkInfo);
|
||||
hash_buffer.pushLong(m_onlyNeedGoodInlinks);
|
||||
hash_buffer.pushLong(m_getLinkText);
|
||||
if(m_highlightQueryTerms)
|
||||
hash_buffer.safeMemcpy(ptr_qbuf,size_qbuf);
|
||||
hash_buffer.safeMemcpy(ptr_ubuf,size_ubuf);
|
||||
hash_buffer.safeMemcpy(ptr_linkee,size_linkee);
|
||||
hash_buffer.safeMemcpy(ptr_displayMetas,size_displayMetas);
|
||||
int64_t h = hash64(hash_buffer.getBufStart(), hash_buffer.length());
|
||||
return h;
|
||||
}
|
||||
|
||||
|
||||
int32_t Msg20Reply::getStoredSize ( ) {
|
||||
int32_t size = (int32_t)sizeof(Msg20Reply);
|
||||
// add up string buffer sizes
|
||||
|
3
Msg20.h
3
Msg20.h
@ -39,6 +39,7 @@ class Msg20Request {
|
||||
int32_t getStoredSize ( );
|
||||
char *serialize ( int32_t *sizePtr );
|
||||
int32_t deserialize ( );
|
||||
int64_t makeCacheKey() const;
|
||||
|
||||
char m_version ; // non-zero default
|
||||
char m_numSummaryLines ; // non-zero default
|
||||
@ -121,7 +122,7 @@ public:
|
||||
int32_t serialize ( char *buf , int32_t bufSize );
|
||||
|
||||
|
||||
bool sendReply ( class XmlDoc *xd ) ;
|
||||
bool sendReply ( Msg20Request *req, class XmlDoc *xd ) ;
|
||||
|
||||
// after calling these, when serialize() is called again it will
|
||||
// exclude these strings which were "cleared". Used by Msg40 to
|
||||
|
57
Parms.cpp
57
Parms.cpp
@ -6408,6 +6408,62 @@ void Parms::init ( ) {
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
m->m_title = "stable-summary cache size";
|
||||
m->m_desc = "How much memory to use for stable summaries, viz. generated from meta tags and the same for all users and queries";
|
||||
m->m_cgi = "stablesumcachemem";
|
||||
m->m_xml = "StableSummaryCacheSize";
|
||||
m->m_off = (char *)&g_conf.m_stableSummaryCacheSize - g;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_def = "30000000";
|
||||
m->m_units = "bytes";
|
||||
m->m_flags = 0;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
m->m_title = "stable-summary cache max age";
|
||||
m->m_desc = "How long to cache stable summaries, in milliseconds";
|
||||
m->m_cgi = "stablesumcacheage";
|
||||
m->m_xml = "StableSummaryCacheAge";
|
||||
m->m_off = offsetof(Conf,m_stableSummaryCacheMaxAge);
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_def = "86400000";
|
||||
m->m_units = "milliseconds";
|
||||
m->m_flags = 0;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
m->m_title = "unstable-summary cache size";
|
||||
m->m_desc = "How much memory to use for stable summaries, viz. generated from content and depends on user and search terms";
|
||||
m->m_cgi = "unstablesumcachemem";
|
||||
m->m_xml = "UnstableSummaryCacheSize";
|
||||
m->m_off = offsetof(Conf,m_unstableSummaryCacheSize);
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_def = "30000000";
|
||||
m->m_units = "bytes";
|
||||
m->m_flags = 0;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
m->m_title = "stable-summary cache max age";
|
||||
m->m_desc = "How long to cache unstable summaries, in milliseconds";
|
||||
m->m_cgi = "unstablesumcacheage";
|
||||
m->m_xml = "UnstableSummaryCacheAge";
|
||||
m->m_off = offsetof(Conf,m_unstableSummaryCacheMaxAge);
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_def = "3600000";
|
||||
m->m_units = "milliseconds";
|
||||
m->m_flags = 0;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
m->m_title = "scan all if not found";
|
||||
m->m_desc = "Scan all titledb files if rec not found. You should "
|
||||
"keep this on to avoid corruption. Do not turn it off unless "
|
||||
@ -6416,6 +6472,7 @@ void Parms::init ( ) {
|
||||
m->m_off = (char *)&g_conf.m_scanAllIfNotFound - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_units = "";
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
|
106
SummaryCache.cpp
Normal file
106
SummaryCache.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
#include "SummaryCache.h"
|
||||
#include "Mem.h"
|
||||
#include "fctypes.h"
|
||||
|
||||
|
||||
SummaryCache g_stable_summary_cache;
|
||||
SummaryCache g_unstable_summary_cache;
|
||||
|
||||
|
||||
|
||||
static const char memory_note[] = "cached_summary";
|
||||
|
||||
|
||||
SummaryCache::SummaryCache()
|
||||
: m(),
|
||||
purge_iter(m.begin()),
|
||||
max_age(1000), //1 second
|
||||
max_memory(1000000), //1 megabyte
|
||||
memory_used(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
void SummaryCache::configure(int64_t max_age_, size_t max_memory_)
|
||||
{
|
||||
max_age = max_age_;
|
||||
max_memory = max_memory_;
|
||||
}
|
||||
|
||||
|
||||
void SummaryCache::clear()
|
||||
{
|
||||
for(std::map<int64_t,Item>::iterator iter = m.begin();
|
||||
iter!=m.end();
|
||||
++iter)
|
||||
mfree(iter->second.data,iter->second.datalen,memory_note);
|
||||
m.clear();
|
||||
purge_iter = m.begin();
|
||||
memory_used = 0;
|
||||
}
|
||||
|
||||
|
||||
void SummaryCache::insert(int64_t key, const void *data, size_t datalen)
|
||||
{
|
||||
purge_step();
|
||||
|
||||
std::map<int64_t,Item>::iterator iter = m.find(key);
|
||||
if(iter!=m.end()) {
|
||||
//remove the old entry first
|
||||
if(purge_iter==iter)
|
||||
++purge_iter;
|
||||
mfree(iter->second.data,iter->second.datalen,memory_note);
|
||||
memory_used -= iter->second.datalen;
|
||||
m.erase(iter);
|
||||
}
|
||||
|
||||
Item item;
|
||||
item.timestamp = 0; //temporarily, for exception+memoryleak reason
|
||||
item.data = 0;
|
||||
item.datalen = 0;
|
||||
|
||||
iter = m.insert(std::make_pair(key,item)).first;
|
||||
|
||||
void *datacopy = mmalloc(datalen, memory_note);
|
||||
if(!datacopy) {
|
||||
m.erase(iter);
|
||||
return;
|
||||
}
|
||||
memcpy(datacopy,data,datalen);
|
||||
|
||||
iter->second.data = datacopy;
|
||||
iter->second.datalen = datalen;
|
||||
iter->second.timestamp = gettimeofdayInMilliseconds();
|
||||
memory_used += datalen;
|
||||
}
|
||||
|
||||
|
||||
bool SummaryCache::lookup(int64_t key, const void **data, size_t *datalen)
|
||||
{
|
||||
purge_step();
|
||||
std::map<int64_t,Item>::iterator iter = m.find(key);
|
||||
if(iter!=m.end() && iter->second.timestamp+max_age>=gettimeofdayInMilliseconds()) {
|
||||
*data = iter->second.data;
|
||||
*datalen = iter->second.datalen;
|
||||
return true;
|
||||
} else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void SummaryCache::purge_step()
|
||||
{
|
||||
if(purge_iter==m.end())
|
||||
purge_iter = m.begin();
|
||||
else {
|
||||
int64_t now = gettimeofdayInMilliseconds();
|
||||
if(purge_iter->second.timestamp+max_age<now) {
|
||||
std::map<int64_t,Item>::iterator iter = purge_iter;
|
||||
++purge_iter;
|
||||
mfree(iter->second.data,iter->second.datalen,memory_note);
|
||||
memory_used -= iter->second.datalen;
|
||||
m.erase(iter);
|
||||
} else
|
||||
++purge_iter;
|
||||
}
|
||||
}
|
42
SummaryCache.h
Normal file
42
SummaryCache.h
Normal file
@ -0,0 +1,42 @@
|
||||
#ifndef SUMMARY_CACHE_
|
||||
#define SUMMARY_CACHE_
|
||||
|
||||
#include <inttypes.h>
|
||||
#include <stddef.h>
|
||||
#include <map>
|
||||
|
||||
class SummaryCache {
|
||||
SummaryCache(const SummaryCache&);
|
||||
SummaryCache& operator=(const SummaryCache&);
|
||||
|
||||
struct Item {
|
||||
int64_t timestamp;
|
||||
void *data;
|
||||
size_t datalen;
|
||||
};
|
||||
std::map<int64_t,Item> m;
|
||||
std::map<int64_t,Item>::iterator purge_iter;
|
||||
int64_t max_age;
|
||||
size_t max_memory;
|
||||
size_t memory_used;
|
||||
|
||||
public:
|
||||
SummaryCache();
|
||||
~SummaryCache() { clear(); }
|
||||
|
||||
void configure(int64_t max_age, size_t max_memory);
|
||||
|
||||
void clear();
|
||||
|
||||
void insert(int64_t key, const void *data, size_t datalen);
|
||||
bool lookup(int64_t key, const void **data, size_t *datalen);
|
||||
|
||||
private:
|
||||
void purge_step();
|
||||
};
|
||||
|
||||
|
||||
extern SummaryCache g_stable_summary_cache; //for summaries based on tags and no highlighting
|
||||
extern SummaryCache g_unstable_summary_cache; //for summaries based on content or with highlighting
|
||||
|
||||
#endif
|
4
main.cpp
4
main.cpp
@ -45,6 +45,7 @@
|
||||
#include "Pos.h"
|
||||
#include "Title.h"
|
||||
#include "Speller.h"
|
||||
#include "SummaryCache.h"
|
||||
|
||||
// include all msgs that have request handlers, cuz we register them with g_udp
|
||||
#include "Msg0.h"
|
||||
@ -2724,6 +2725,9 @@ int main2 ( int argc , char *argv[] ) {
|
||||
//if ( ! g_dnsLocal.init( 0 , false ) ) {
|
||||
// log("db: Dns local client init failed." ); return 1; }
|
||||
|
||||
g_stable_summary_cache.configure(g_conf.m_stableSummaryCacheMaxAge, g_conf.m_stableSummaryCacheSize);
|
||||
g_unstable_summary_cache.configure(g_conf.m_unstableSummaryCacheMaxAge, g_conf.m_unstableSummaryCacheSize);
|
||||
|
||||
// . then webserver
|
||||
// . server should listen to a socket and register with g_loop
|
||||
if ( ! g_httpServer.init( h9->m_httpPort, h9->m_httpsPort ) ) {
|
||||
|
Reference in New Issue
Block a user