Add g_contentRetryProxyList & rename BlockList to MatchList
This commit is contained in:
parent
a990000fe4
commit
dcb7aa46ee
2
Conf.cpp
2
Conf.cpp
@ -233,7 +233,7 @@ Conf::Conf ( ) {
|
||||
m_logDebugUrlAttempts = false;
|
||||
m_logDebugVagus = false;
|
||||
m_logTraceBigFile = false;
|
||||
m_logTraceBlockList = false;
|
||||
m_logTraceMatchList = false;
|
||||
m_logTraceContentTypeBlockList = false;
|
||||
m_logTraceDocProcess = false;
|
||||
m_logTraceDns = false;
|
||||
|
2
Conf.h
2
Conf.h
@ -381,7 +381,7 @@ class Conf {
|
||||
bool m_logDebugVagus;
|
||||
|
||||
bool m_logTraceBigFile;
|
||||
bool m_logTraceBlockList;
|
||||
bool m_logTraceMatchList;
|
||||
bool m_logTraceContentTypeBlockList;
|
||||
bool m_logTraceDocProcess;
|
||||
bool m_logTraceDns;
|
||||
|
41
ContentMatchList.cpp
Normal file
41
ContentMatchList.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
//
|
||||
// Copyright (C) 2017 Privacore ApS - https://www.privacore.com
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#include "ContentMatchList.h"
|
||||
#include "Log.h"
|
||||
#include "Conf.h"
|
||||
|
||||
ContentMatchList g_contentRetryProxyList;
|
||||
|
||||
static const char s_filename[] = "contentretryproxylist.txt";
|
||||
|
||||
ContentMatchList::ContentMatchList()
|
||||
: MatchList(s_filename) {
|
||||
}
|
||||
|
||||
bool ContentMatchList::isContentMatched(const char *content, size_t contentLen) {
|
||||
auto contentMatchList = getMatchList();
|
||||
|
||||
for (auto const &contentMatch : *contentMatchList) {
|
||||
if (strncasestr(content, contentLen, contentMatch.c_str())) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
32
ContentMatchList.h
Normal file
32
ContentMatchList.h
Normal file
@ -0,0 +1,32 @@
|
||||
//
|
||||
// Copyright (C) 2017 Privacore ApS - https://www.privacore.com
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#ifndef FX_CONTENTMATCHLIST_H
|
||||
#define FX_CONTENTMATCHLIST_H
|
||||
|
||||
#include "MatchList.h"
|
||||
|
||||
class ContentMatchList : public MatchList<std::string> {
|
||||
public:
|
||||
ContentMatchList();
|
||||
bool isContentMatched(const char *content, size_t contentLen);
|
||||
};
|
||||
|
||||
extern ContentMatchList g_contentRetryProxyList;
|
||||
|
||||
#endif // FX_CONTENTMATCHLIST_H
|
@ -29,7 +29,7 @@ static const char s_contenttype_filename[] = "contenttypeblocklist.txt";
|
||||
static const char s_contenttype_allowed_filename[] = "contenttypeallowed.txt";
|
||||
|
||||
ContentTypeBlockList::ContentTypeBlockList()
|
||||
: BlockList(s_contenttype_filename)
|
||||
: MatchList(s_contenttype_filename)
|
||||
, m_contenttype_allowed()
|
||||
, m_contenttype_allowed_mtx(PTHREAD_MUTEX_INITIALIZER) {
|
||||
}
|
||||
@ -43,7 +43,7 @@ bool ContentTypeBlockList::init() {
|
||||
m_contenttype_allowed.push_back(line);
|
||||
}
|
||||
|
||||
return BlockList::init();
|
||||
return MatchList::init();
|
||||
}
|
||||
|
||||
void ContentTypeBlockList::addContentTypeAllowed(const char *contentType, size_t contentTypeLen) {
|
||||
@ -65,7 +65,7 @@ bool ContentTypeBlockList::isContentTypeBlocked(const char *contentType, size_t
|
||||
return false;
|
||||
}
|
||||
|
||||
auto contentTypeBlockList = getBlockList();
|
||||
auto contentTypeBlockList = getMatchList();
|
||||
|
||||
for (auto const &contentTypeBlock : *contentTypeBlockList) {
|
||||
if (contentTypeBlock.back() == '*') {
|
||||
|
@ -20,11 +20,11 @@
|
||||
#define FX_CONTENTTYPEBLOCKLIST_H
|
||||
|
||||
|
||||
#include "BlockList.h"
|
||||
#include "MatchList.h"
|
||||
#include <pthread.h>
|
||||
#include <vector>
|
||||
|
||||
class ContentTypeBlockList : public BlockList<std::string> {
|
||||
class ContentTypeBlockList : public MatchList<std::string> {
|
||||
public:
|
||||
ContentTypeBlockList();
|
||||
|
||||
|
@ -25,11 +25,11 @@ DnsBlockList g_dnsBlockList;
|
||||
static const char s_dns_filename[] = "dnsblocklist.txt";
|
||||
|
||||
DnsBlockList::DnsBlockList()
|
||||
: BlockList(s_dns_filename) {
|
||||
: MatchList(s_dns_filename) {
|
||||
}
|
||||
|
||||
bool DnsBlockList::isDnsBlocked(const char *dns) {
|
||||
auto dnsBlockList = getBlockList();
|
||||
auto dnsBlockList = getMatchList();
|
||||
|
||||
for (auto const &dnsBlock : *dnsBlockList) {
|
||||
if (dnsBlock.front() == '*') {
|
||||
|
@ -19,9 +19,9 @@
|
||||
#ifndef FX_DNSBLOCKLIST_H
|
||||
#define FX_DNSBLOCKLIST_H
|
||||
|
||||
#include "BlockList.h"
|
||||
#include "MatchList.h"
|
||||
|
||||
class DnsBlockList : public BlockList<std::string> {
|
||||
class DnsBlockList : public MatchList<std::string> {
|
||||
public:
|
||||
DnsBlockList();
|
||||
bool isDnsBlocked(const char *dns);
|
||||
|
@ -26,11 +26,11 @@ IpBlockList g_ipBlockList;
|
||||
static const char s_ip_filename[] = "ipblocklist.txt";
|
||||
|
||||
IpBlockList::IpBlockList()
|
||||
: BlockList(s_ip_filename) {
|
||||
: MatchList(s_ip_filename) {
|
||||
}
|
||||
|
||||
bool IpBlockList::isIpBlocked(uint32_t ip) {
|
||||
auto ipBlockList = getBlockList();
|
||||
auto ipBlockList = getMatchList();
|
||||
|
||||
for (auto const &ipBlock : *ipBlockList) {
|
||||
if (ipBlock == ip) {
|
||||
@ -42,7 +42,7 @@ bool IpBlockList::isIpBlocked(uint32_t ip) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void IpBlockList::addToBlockList(blocklist_ptr_t<uint32_t> &blockList, const std::string &line) {
|
||||
void IpBlockList::addToBlockList(matchlist_ptr_t<uint32_t> &blockList, const std::string &line) {
|
||||
in_addr addr;
|
||||
|
||||
if (inet_pton(AF_INET, line.c_str(), &addr) != 1) {
|
||||
|
@ -19,15 +19,15 @@
|
||||
#ifndef FX_IPBLOCKLIST_H
|
||||
#define FX_IPBLOCKLIST_H
|
||||
|
||||
#include "BlockList.h"
|
||||
#include "MatchList.h"
|
||||
|
||||
class IpBlockList : public BlockList<uint32_t> {
|
||||
class IpBlockList : public MatchList<uint32_t> {
|
||||
public:
|
||||
IpBlockList();
|
||||
bool isIpBlocked(uint32_t ip);
|
||||
|
||||
protected:
|
||||
void addToBlockList(blocklist_ptr_t<uint32_t> &blockList, const std::string &line);
|
||||
void addToBlockList(matchlist_ptr_t<uint32_t> &blockList, const std::string &line);
|
||||
|
||||
};
|
||||
|
||||
|
4
Makefile
4
Makefile
@ -59,8 +59,8 @@ OBJS_O2 = \
|
||||
|
||||
|
||||
OBJS_O3 = \
|
||||
BlockList.o \
|
||||
ContentTypeBlockList.o \
|
||||
MatchList.o \
|
||||
ContentMatchList.o ContentTypeBlockList.o \
|
||||
DocDelete.o DocProcess.o DocRebuild.o DocReindex.o DnsBlockList.o \
|
||||
IPAddressChecks.o IpBlockList.o \
|
||||
LanguageResultOverride.o Linkdb.o \
|
||||
|
@ -16,7 +16,7 @@
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#include "BlockList.h"
|
||||
#include "MatchList.h"
|
||||
#include "Log.h"
|
||||
#include "Conf.h"
|
||||
#include "Loop.h"
|
||||
@ -26,31 +26,31 @@
|
||||
#include <atomic>
|
||||
|
||||
template <class T>
|
||||
BlockList<T>::BlockList(const char *filename)
|
||||
MatchList<T>::MatchList(const char *filename)
|
||||
: m_filename(filename)
|
||||
, m_loading(false)
|
||||
, m_blockList(new blocklist_t<T>)
|
||||
, m_matchList(new matchlist_t<T>)
|
||||
, m_lastModifiedTime(0) {
|
||||
}
|
||||
|
||||
template <class T>
|
||||
bool BlockList<T>::init() {
|
||||
log(LOG_INFO, "Initializing BlockList with %s", m_filename);
|
||||
bool MatchList<T>::init() {
|
||||
log(LOG_INFO, "Initializing MatchList with %s", m_filename);
|
||||
|
||||
if (!g_loop.registerSleepCallback(60000, this, &reload, "BlockList<T>::reload", 0)) {
|
||||
log(LOG_WARN, "BlockList<T>:: Failed to register callback.");
|
||||
if (!g_loop.registerSleepCallback(60000, this, &reload, "MatchList<T>::reload", 0)) {
|
||||
log(LOG_WARN, "MatchList<T>:: Failed to register callback.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// we do a load here instead of using sleep callback with immediate set to true so
|
||||
// we don't rely on g_loop being up and running to use blocklist
|
||||
// we don't rely on g_loop being up and running to use matchlist
|
||||
load();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void BlockList<T>::reload(int /*fd*/, void *state) {
|
||||
void MatchList<T>::reload(int /*fd*/, void *state) {
|
||||
if (g_jobScheduler.submit(reload, nullptr, state, thread_type_config_load, 0)) {
|
||||
return;
|
||||
}
|
||||
@ -60,36 +60,36 @@ void BlockList<T>::reload(int /*fd*/, void *state) {
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void BlockList<T>::reload(void *state) {
|
||||
BlockList *blockList = static_cast<BlockList*>(state);
|
||||
void MatchList<T>::reload(void *state) {
|
||||
MatchList *matchList = static_cast<MatchList*>(state);
|
||||
|
||||
// don't load multiple times at the same time
|
||||
if (blockList->m_loading.exchange(true)) {
|
||||
if (matchList->m_loading.exchange(true)) {
|
||||
return;
|
||||
}
|
||||
|
||||
blockList->load();
|
||||
blockList->m_loading = false;
|
||||
matchList->load();
|
||||
matchList->m_loading = false;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
bool BlockList<T>::load() {
|
||||
logTrace(g_conf.m_logTraceBlockList, "Loading %s", m_filename);
|
||||
bool MatchList<T>::load() {
|
||||
logTrace(g_conf.m_logTraceMatchList, "Loading %s", m_filename);
|
||||
|
||||
struct stat st;
|
||||
if (stat(m_filename, &st) != 0) {
|
||||
// probably not found
|
||||
log(LOG_INFO, "BlockList<T>::load: Unable to stat %s", m_filename);
|
||||
log(LOG_INFO, "MatchList<T>::load: Unable to stat %s", m_filename);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_lastModifiedTime != 0 && m_lastModifiedTime == st.st_mtime) {
|
||||
// not modified. assume successful
|
||||
logTrace(g_conf.m_logTraceBlockList, "%s not modified", m_filename);
|
||||
logTrace(g_conf.m_logTraceMatchList, "%s not modified", m_filename);
|
||||
return true;
|
||||
}
|
||||
|
||||
blocklist_ptr_t<T> tmpBlockList(new blocklist_t<T>);
|
||||
matchlist_ptr_t<T> tmpMatchList(new matchlist_t<T>);
|
||||
|
||||
std::ifstream file(m_filename);
|
||||
std::string line;
|
||||
@ -99,37 +99,37 @@ bool BlockList<T>::load() {
|
||||
continue;
|
||||
}
|
||||
|
||||
addToBlockList(tmpBlockList, line);
|
||||
logTrace(g_conf.m_logTraceBlockList, "Adding criteria '%s' to list", line.c_str());
|
||||
addToMatchList(tmpMatchList, line);
|
||||
logTrace(g_conf.m_logTraceMatchList, "Adding criteria '%s' to list", line.c_str());
|
||||
}
|
||||
|
||||
swapBlockList(tmpBlockList);
|
||||
swapMatchList(tmpMatchList);
|
||||
m_lastModifiedTime = st.st_mtime;
|
||||
|
||||
logTrace(g_conf.m_logTraceBlockList, "Loaded %s", m_filename);
|
||||
logTrace(g_conf.m_logTraceMatchList, "Loaded %s", m_filename);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void BlockList<T>::addToBlockList(blocklist_ptr_t<T> &blockList, const std::string &line) {
|
||||
void MatchList<T>::addToMatchList(matchlist_ptr_t<T> &matchList, const std::string &line) {
|
||||
gbshutdownLogicError();
|
||||
}
|
||||
|
||||
template <>
|
||||
void BlockList<std::string>::addToBlockList(blocklist_ptr_t<std::string> &blockList, const std::string &line) {
|
||||
blockList->emplace_back(line);
|
||||
void MatchList<std::string>::addToMatchList(matchlist_ptr_t<std::string> &matchList, const std::string &line) {
|
||||
matchList->emplace_back(line);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
blocklistconst_ptr_t<T> BlockList<T>::getBlockList() {
|
||||
return m_blockList;
|
||||
matchlistconst_ptr_t<T> MatchList<T>::getMatchList() {
|
||||
return m_matchList;
|
||||
}
|
||||
|
||||
template <class T>
|
||||
void BlockList<T>::swapBlockList(blocklistconst_ptr_t<T> blockList) {
|
||||
std::atomic_store(&m_blockList, blockList);
|
||||
void MatchList<T>::swapMatchList(matchlistconst_ptr_t<T> matchList) {
|
||||
std::atomic_store(&m_matchList, matchList);
|
||||
}
|
||||
|
||||
// explicit instantiations
|
||||
template class BlockList<std::string>;
|
||||
template class BlockList<uint32_t>;
|
||||
template class MatchList<std::string>;
|
||||
template class MatchList<uint32_t>;
|
@ -16,8 +16,8 @@
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#ifndef FX_BLOCKLIST_H
|
||||
#define FX_BLOCKLIST_H
|
||||
#ifndef FX_MATCHLIST_H
|
||||
#define FX_MATCHLIST_H
|
||||
|
||||
|
||||
#include <memory>
|
||||
@ -25,14 +25,14 @@
|
||||
#include <string>
|
||||
#include <atomic>
|
||||
|
||||
template <typename T> using blocklist_t = std::vector<T>;
|
||||
template <typename T> using blocklist_ptr_t = std::shared_ptr<std::vector<T>>;
|
||||
template <typename T> using blocklistconst_ptr_t = std::shared_ptr<const std::vector<T>>;
|
||||
template <typename T> using matchlist_t = std::vector<T>;
|
||||
template <typename T> using matchlist_ptr_t = std::shared_ptr<std::vector<T>>;
|
||||
template <typename T> using matchlistconst_ptr_t = std::shared_ptr<const std::vector<T>>;
|
||||
|
||||
template<class T> class BlockList {
|
||||
template<class T> class MatchList {
|
||||
public:
|
||||
explicit BlockList(const char *filename);
|
||||
virtual ~BlockList() = default;
|
||||
explicit MatchList(const char *filename);
|
||||
virtual ~MatchList() = default;
|
||||
|
||||
virtual bool init();
|
||||
|
||||
@ -42,18 +42,18 @@ public:
|
||||
protected:
|
||||
bool load();
|
||||
|
||||
virtual void addToBlockList(blocklist_ptr_t<T> &blockList, const std::string &line);
|
||||
blocklistconst_ptr_t<T> getBlockList();
|
||||
virtual void addToMatchList(matchlist_ptr_t<T> &matchList, const std::string &line);
|
||||
matchlistconst_ptr_t<T> getMatchList();
|
||||
|
||||
const char *m_filename;
|
||||
|
||||
private:
|
||||
void swapBlockList(blocklistconst_ptr_t<T> blockList);
|
||||
void swapMatchList(matchlistconst_ptr_t<T> matchList);
|
||||
|
||||
std::atomic_bool m_loading;
|
||||
blocklistconst_ptr_t<T> m_blockList;
|
||||
matchlistconst_ptr_t<T> m_matchList;
|
||||
|
||||
time_t m_lastModifiedTime;
|
||||
};
|
||||
|
||||
#endif //FX_BLOCKLIST_H
|
||||
#endif //FX_MATCHLIST_H
|
@ -19,6 +19,7 @@
|
||||
#include "Statistics.h"
|
||||
#include "Sanity.h"
|
||||
#include "UrlMatchList.h"
|
||||
#include "ContentMatchList.h"
|
||||
#include <string.h>
|
||||
|
||||
|
||||
@ -1066,9 +1067,10 @@ static bool retryProxy(TcpSocket *ts, const char **msg, Msg13Request *r) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// @todo ALC check content
|
||||
|
||||
return false;
|
||||
size_t pre_size = mime.getMimeLen(); //size of http response line, mime headers and empty line separator
|
||||
size_t haystack_size = ts->m_readOffset - pre_size;
|
||||
const char *haystack = ts->m_readBuf + pre_size;
|
||||
return g_contentRetryProxyList.isContentMatched(haystack, haystack_size);
|
||||
}
|
||||
|
||||
static void appendCrawlBan(const char *group, const char *url, int urlLen) {
|
||||
|
@ -9049,9 +9049,9 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
||||
m->m_title = "log trace info for BlockList";
|
||||
m->m_title = "log trace info for MatchList";
|
||||
m->m_cgi = "ltrc_bl";
|
||||
simple_m_set(Conf,m_logTraceBlockList);
|
||||
simple_m_set(Conf,m_logTraceMatchList);
|
||||
m->m_def = "0";
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
2
main.cpp
2
main.cpp
@ -105,6 +105,7 @@
|
||||
#include "IpBlockList.h"
|
||||
#include "SpiderdbSqlite.h"
|
||||
#include "QueryLanguage.h"
|
||||
#include "ContentMatchList.h"
|
||||
|
||||
|
||||
#include <sys/stat.h> //umask()
|
||||
@ -1304,6 +1305,7 @@ int main2 ( int argc , char *argv[] ) {
|
||||
g_dnsBlockList.init();
|
||||
g_contentTypeBlockList.init();
|
||||
g_ipBlockList.init();
|
||||
g_contentRetryProxyList.init();
|
||||
|
||||
g_urlBlackList.init();
|
||||
g_urlWhiteList.init();
|
||||
|
Loading…
x
Reference in New Issue
Block a user