mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-03-11 09:01:12 -04:00
Add ContentTypeBlockList to block by http content-type
This commit is contained in:
parent
09bf57e147
commit
9a3f87b56e
115
BlockList.cpp
Normal file
115
BlockList.cpp
Normal file
@ -0,0 +1,115 @@
|
||||
//
|
||||
// Copyright (C) 2017 Privacore ApS - https://www.privacore.com
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#include "BlockList.h"
|
||||
#include "Log.h"
|
||||
#include "Conf.h"
|
||||
#include "Loop.h"
|
||||
#include "JobScheduler.h"
|
||||
#include <fstream>
|
||||
#include <sys/stat.h>
|
||||
#include <atomic>
|
||||
|
||||
BlockList::BlockList(const char *filename)
|
||||
: m_filename(filename)
|
||||
, m_loading(false)
|
||||
, m_blockList(new blocklist_t)
|
||||
, m_lastModifiedTime(0) {
|
||||
}
|
||||
|
||||
bool BlockList::init() {
|
||||
log(LOG_INFO, "Initializing BlockList with %s", m_filename);
|
||||
|
||||
if (!g_loop.registerSleepCallback(60000, this, &reload, "BlockList::reload", 0)) {
|
||||
log(LOG_WARN, "BlockList:: Failed to register callback.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// we do a load here instead of using sleep callback with immediate set to true so
|
||||
// we don't rely on g_loop being up and running to use blocklist
|
||||
load();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void BlockList::reload(int /*fd*/, void *state) {
|
||||
if (g_jobScheduler.submit(reload, nullptr, state, thread_type_config_load, 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// unable to submit job (load on main thread)
|
||||
reload(state);
|
||||
}
|
||||
|
||||
void BlockList::reload(void *state) {
|
||||
BlockList *blockList = static_cast<BlockList*>(state);
|
||||
|
||||
// don't load multiple times at the same time
|
||||
if (blockList->m_loading.exchange(true)) {
|
||||
return;
|
||||
}
|
||||
|
||||
blockList->load();
|
||||
blockList->m_loading = false;
|
||||
}
|
||||
|
||||
bool BlockList::load() {
|
||||
logTrace(g_conf.m_logTraceBlockList, "Loading %s", m_filename);
|
||||
|
||||
struct stat st;
|
||||
if (stat(m_filename, &st) != 0) {
|
||||
// probably not found
|
||||
log(LOG_INFO, "BlockList::load: Unable to stat %s", m_filename);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_lastModifiedTime != 0 && m_lastModifiedTime == st.st_mtime) {
|
||||
// not modified. assume successful
|
||||
logTrace(g_conf.m_logTraceBlockList, "%s not modified", m_filename);
|
||||
return true;
|
||||
}
|
||||
|
||||
blocklist_ptr_t tmpBlockList(new blocklist_t);
|
||||
|
||||
std::ifstream file(m_filename);
|
||||
std::string line;
|
||||
while (std::getline(file, line)) {
|
||||
// ignore comments & empty lines
|
||||
if (line.length() == 0 || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
tmpBlockList->emplace_back(line);
|
||||
logTrace(g_conf.m_logTraceBlockList, "Adding criteria '%s' to list", line.c_str());
|
||||
}
|
||||
|
||||
swapBlockList(tmpBlockList);
|
||||
m_lastModifiedTime = st.st_mtime;
|
||||
|
||||
logTrace(g_conf.m_logTraceBlockList, "Loaded %s", m_filename);
|
||||
return true;
|
||||
}
|
||||
|
||||
blocklistconst_ptr_t BlockList::getBlockList() {
|
||||
return m_blockList;
|
||||
}
|
||||
|
||||
void BlockList::swapBlockList(blocklistconst_ptr_t blockList) {
|
||||
std::atomic_store(&m_blockList, blockList);
|
||||
}
|
||||
|
58
BlockList.h
Normal file
58
BlockList.h
Normal file
@ -0,0 +1,58 @@
|
||||
//
|
||||
// Copyright (C) 2017 Privacore ApS - https://www.privacore.com
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#ifndef FX_BLOCKLIST_H
|
||||
#define FX_BLOCKLIST_H
|
||||
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <atomic>
|
||||
|
||||
typedef std::vector<std::string> blocklist_t;
|
||||
typedef std::shared_ptr<blocklist_t> blocklist_ptr_t;
|
||||
typedef std::shared_ptr<const blocklist_t> blocklistconst_ptr_t;
|
||||
|
||||
class BlockList {
|
||||
public:
|
||||
BlockList(const char *filename);
|
||||
|
||||
bool init();
|
||||
|
||||
static void reload(int /*fd*/, void *state);
|
||||
static void reload(void *state);
|
||||
|
||||
protected:
|
||||
bool load();
|
||||
|
||||
const char *m_filename;
|
||||
|
||||
blocklistconst_ptr_t getBlockList();
|
||||
|
||||
private:
|
||||
void swapBlockList(blocklistconst_ptr_t blockList);
|
||||
|
||||
std::atomic_bool m_loading;
|
||||
blocklistconst_ptr_t m_blockList;
|
||||
|
||||
time_t m_lastModifiedTime;
|
||||
};
|
||||
|
||||
|
||||
#endif //FX_BLOCKLIST_H
|
2
Conf.cpp
2
Conf.cpp
@ -233,6 +233,8 @@ Conf::Conf ( ) {
|
||||
m_logDebugUrlAttempts = false;
|
||||
m_logDebugVagus = false;
|
||||
m_logTraceBigFile = false;
|
||||
m_logTraceBlockList = false;
|
||||
m_logTraceContentTypeBlockList = false;
|
||||
m_logTraceDocDelete = false;
|
||||
m_logTraceDns = false;
|
||||
m_logTraceDnsBlockList = false;
|
||||
|
2
Conf.h
2
Conf.h
@ -383,6 +383,8 @@ class Conf {
|
||||
bool m_logDebugVagus;
|
||||
|
||||
bool m_logTraceBigFile;
|
||||
bool m_logTraceBlockList;
|
||||
bool m_logTraceContentTypeBlockList;
|
||||
bool m_logTraceDocDelete;
|
||||
bool m_logTraceDns;
|
||||
bool m_logTraceDnsBlockList;
|
||||
|
79
ContentTypeBlockList.cpp
Normal file
79
ContentTypeBlockList.cpp
Normal file
@ -0,0 +1,79 @@
|
||||
//
|
||||
// Copyright (C) 2017 Privacore ApS - https://www.privacore.com
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#include "ContentTypeBlockList.h"
|
||||
#include "ScopedLock.h"
|
||||
#include "Log.h"
|
||||
#include "Conf.h"
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
|
||||
ContentTypeBlockList g_contentTypeBlockList;
|
||||
|
||||
static const char s_contenttype_filename[] = "contenttypeblocklist.txt";
|
||||
static const char s_contenttype_allowed_filename[] = "contenttypeallowed.txt";
|
||||
|
||||
ContentTypeBlockList::ContentTypeBlockList()
|
||||
: BlockList(s_contenttype_filename)
|
||||
, m_contenttype_allowed()
|
||||
, m_contenttype_allowed_mtx(PTHREAD_MUTEX_INITIALIZER) {
|
||||
std::ifstream file(s_contenttype_allowed_filename);
|
||||
std::string line;
|
||||
|
||||
ScopedLock sl(m_contenttype_allowed_mtx);
|
||||
while (std::getline(file, line)) {
|
||||
m_contenttype_allowed.push_back(line);
|
||||
}
|
||||
}
|
||||
|
||||
void ContentTypeBlockList::addContentTypeAllowed(const std::string &contentType) {
|
||||
ScopedLock sl(m_contenttype_allowed_mtx);
|
||||
if (std::find(m_contenttype_allowed.begin(), m_contenttype_allowed.end(), contentType) != m_contenttype_allowed.end()) {
|
||||
return;
|
||||
}
|
||||
|
||||
m_contenttype_allowed.push_back(contentType);
|
||||
std::ofstream file(s_contenttype_allowed_filename, (std::ios::out | std::ios::app));
|
||||
file << contentType << std::endl;
|
||||
}
|
||||
|
||||
bool ContentTypeBlockList::isContentTypeBlocked(const char *contentType, size_t contentTypeLen) {
|
||||
if (contentTypeLen == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto contentTypeBlockList = getBlockList();
|
||||
|
||||
for (auto const &contentTypeBlock : *contentTypeBlockList) {
|
||||
if (contentTypeBlock.back() == '*') {
|
||||
// prefix
|
||||
if (contentTypeLen >= contentTypeBlock.size() - 1 && strncasecmp(contentTypeBlock.c_str(), contentType, contentTypeBlock.size() - 1) == 0) {
|
||||
logTrace(g_conf.m_logTraceContentTypeBlockList, "Content type block criteria %s matched contenttype '%.*s'", contentTypeBlock.c_str(), static_cast<int>(contentTypeLen), contentType);
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
if (contentTypeLen == contentTypeBlock.size() && strncasecmp(contentTypeBlock.c_str(), contentType, contentTypeLen) == 0) {
|
||||
logTrace(g_conf.m_logTraceContentTypeBlockList, "Content type block criteria %s matched contenttype '%.*s'", contentTypeBlock.c_str(), static_cast<int>(contentTypeLen), contentType);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
addContentTypeAllowed(std::string(contentType, contentTypeLen));
|
||||
return false;
|
||||
}
|
42
ContentTypeBlockList.h
Normal file
42
ContentTypeBlockList.h
Normal file
@ -0,0 +1,42 @@
|
||||
//
|
||||
// Copyright (C) 2017 Privacore ApS - https://www.privacore.com
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#ifndef FX_CONTENTTYPEBLOCKLIST_H
|
||||
#define FX_CONTENTTYPEBLOCKLIST_H
|
||||
|
||||
|
||||
#include "BlockList.h"
|
||||
#include <pthread.h>
|
||||
#include <vector>
|
||||
|
||||
class ContentTypeBlockList : public BlockList {
|
||||
public:
|
||||
ContentTypeBlockList();
|
||||
|
||||
bool isContentTypeBlocked(const char *contentType, size_t contentTypeLen);
|
||||
void addContentTypeAllowed(const std::string &contentType);
|
||||
|
||||
private:
|
||||
std::vector<std::string> m_contenttype_allowed;
|
||||
mutable pthread_mutex_t m_contenttype_allowed_mtx;
|
||||
};
|
||||
|
||||
extern ContentTypeBlockList g_contentTypeBlockList;
|
||||
|
||||
|
||||
#endif //FX_CONTENTTYPEBLOCKLIST_H
|
111
DnsBlockList.cpp
111
DnsBlockList.cpp
@ -1,98 +1,35 @@
|
||||
//
|
||||
// Copyright (C) 2017 Privacore ApS - https://www.privacore.com
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#include "DnsBlockList.h"
|
||||
#include "Log.h"
|
||||
#include "Conf.h"
|
||||
#include "Loop.h"
|
||||
#include "JobScheduler.h"
|
||||
#include <fstream>
|
||||
#include <sys/stat.h>
|
||||
#include <atomic>
|
||||
|
||||
DnsBlockList g_dnsBlockList;
|
||||
|
||||
static const char s_dns_filename[] = "dnsblocklist.txt";
|
||||
|
||||
DnsBlockList::DnsBlockList()
|
||||
: m_filename(s_dns_filename)
|
||||
, m_loading(false)
|
||||
, m_dnsBlockList(new dnsblocklist_t)
|
||||
, m_lastModifiedTime(0) {
|
||||
}
|
||||
|
||||
bool DnsBlockList::init() {
|
||||
log(LOG_INFO, "Initializing DnsBlockList with %s", m_filename);
|
||||
|
||||
if (!g_loop.registerSleepCallback(60000, this, &reload, "DnsBlockList::reload", 0)) {
|
||||
log(LOG_WARN, "DnsBlockList:: Failed to register callback.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// we do a load here instead of using sleep callback with immediate set to true so
|
||||
// we don't rely on g_loop being up and running to use dnsblocklist
|
||||
load();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DnsBlockList::reload(int /*fd*/, void *state) {
|
||||
if (g_jobScheduler.submit(reload, nullptr, state, thread_type_config_load, 0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// unable to submit job (load on main thread)
|
||||
reload(state);
|
||||
}
|
||||
|
||||
void DnsBlockList::reload(void *state) {
|
||||
DnsBlockList *dnsBlockList = static_cast<DnsBlockList*>(state);
|
||||
|
||||
// don't load multiple times at the same time
|
||||
if (dnsBlockList->m_loading.exchange(true)) {
|
||||
return;
|
||||
}
|
||||
|
||||
dnsBlockList->load();
|
||||
dnsBlockList->m_loading = false;
|
||||
}
|
||||
|
||||
bool DnsBlockList::load() {
|
||||
logTrace(g_conf.m_logTraceDnsBlockList, "Loading %s", m_filename);
|
||||
|
||||
struct stat st;
|
||||
if (stat(m_filename, &st) != 0) {
|
||||
// probably not found
|
||||
log(LOG_INFO, "DnsBlockList::load: Unable to stat %s", m_filename);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (m_lastModifiedTime != 0 && m_lastModifiedTime == st.st_mtime) {
|
||||
// not modified. assume successful
|
||||
logTrace(g_conf.m_logTraceDnsBlockList, "Not modified");
|
||||
return true;
|
||||
}
|
||||
|
||||
dnsblocklist_ptr_t tmpDnsBlockList(new dnsblocklist_t);
|
||||
|
||||
std::ifstream file(m_filename);
|
||||
std::string line;
|
||||
while (std::getline(file, line)) {
|
||||
// ignore comments & empty lines
|
||||
if (line.length() == 0 || line[0] == '#') {
|
||||
continue;
|
||||
}
|
||||
|
||||
tmpDnsBlockList->emplace_back(line);
|
||||
logTrace(g_conf.m_logTraceDnsBlockList, "Adding criteria '%s' to list", line.c_str());
|
||||
}
|
||||
|
||||
swapDnsBlockList(tmpDnsBlockList);
|
||||
m_lastModifiedTime = st.st_mtime;
|
||||
|
||||
logTrace(g_conf.m_logTraceDnsBlockList, "Loaded %s", m_filename);
|
||||
return true;
|
||||
: BlockList(s_dns_filename) {
|
||||
}
|
||||
|
||||
bool DnsBlockList::isDnsBlocked(const char *dns) {
|
||||
auto dnsBlockList = getDnsBlockList();
|
||||
auto dnsBlockList = getBlockList();
|
||||
|
||||
for (auto const &dnsBlock : *dnsBlockList) {
|
||||
if (dnsBlock.front() == '*') {
|
||||
@ -112,11 +49,3 @@ bool DnsBlockList::isDnsBlocked(const char *dns) {
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
dnsblocklistconst_ptr_t DnsBlockList::getDnsBlockList() {
|
||||
return m_dnsBlockList;
|
||||
}
|
||||
|
||||
void DnsBlockList::swapDnsBlockList(dnsblocklistconst_ptr_t dnsBlockList) {
|
||||
std::atomic_store(&m_dnsBlockList, dnsBlockList);
|
||||
}
|
||||
|
@ -1,41 +1,32 @@
|
||||
#ifndef GB_DNSBLOCKLIST_H
|
||||
#define GB_DNSBLOCKLIST_H
|
||||
//
|
||||
// Copyright (C) 2017 Privacore ApS - https://www.privacore.com
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as
|
||||
// published by the Free Software Foundation, either version 3 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// License TL;DR: If you change this file, you must publish your changes.
|
||||
//
|
||||
#ifndef FX_DNSBLOCKLIST_H
|
||||
#define FX_DNSBLOCKLIST_H
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <atomic>
|
||||
#include "BlockList.h"
|
||||
|
||||
typedef std::vector<std::string> dnsblocklist_t;
|
||||
typedef std::shared_ptr<dnsblocklist_t> dnsblocklist_ptr_t;
|
||||
typedef std::shared_ptr<const dnsblocklist_t> dnsblocklistconst_ptr_t;
|
||||
|
||||
class DnsBlockList {
|
||||
class DnsBlockList : public BlockList {
|
||||
public:
|
||||
DnsBlockList();
|
||||
|
||||
bool init();
|
||||
|
||||
bool isDnsBlocked(const char *dns);
|
||||
|
||||
static void reload(int /*fd*/, void *state);
|
||||
static void reload(void *state);
|
||||
|
||||
protected:
|
||||
bool load();
|
||||
|
||||
const char *m_filename;
|
||||
|
||||
private:
|
||||
dnsblocklistconst_ptr_t getDnsBlockList();
|
||||
void swapDnsBlockList(dnsblocklistconst_ptr_t dnsBlockList);
|
||||
|
||||
std::atomic_bool m_loading;
|
||||
dnsblocklistconst_ptr_t m_dnsBlockList;
|
||||
|
||||
time_t m_lastModifiedTime;
|
||||
};
|
||||
|
||||
extern DnsBlockList g_dnsBlockList;
|
||||
|
||||
#endif //GB_DNSBLOCKLIST_H
|
||||
#endif //FX_DNSBLOCKLIST_H
|
||||
|
@ -74,6 +74,7 @@ void HttpMime::reset ( ) {
|
||||
m_contentEncodingPos = NULL;
|
||||
m_contentLengthPos = NULL;
|
||||
m_contentTypePos = NULL;
|
||||
m_contentTypeLen = 0;
|
||||
|
||||
m_cookies.clear();
|
||||
}
|
||||
@ -625,6 +626,7 @@ bool HttpMime::parseContentType(const char *field, size_t fieldLen) {
|
||||
|
||||
if (getValue(&value, &valueLen)) {
|
||||
m_contentTypePos = value;
|
||||
m_contentTypeLen = valueLen;
|
||||
m_contentType = getContentTypePrivate(value, valueLen);
|
||||
}
|
||||
|
||||
|
@ -129,6 +129,7 @@ public:
|
||||
const char *getContentEncodingPos() { return m_contentEncodingPos; }
|
||||
const char *getContentLengthPos() { return m_contentLengthPos; }
|
||||
const char *getContentTypePos() { return m_contentTypePos; }
|
||||
int32_t getContentTypeLen() const { return m_contentTypeLen; }
|
||||
|
||||
// convert a file extension like "gif" to "images/gif"
|
||||
static const char *getContentTypeFromExtension ( const char *ext ) ;
|
||||
@ -237,7 +238,9 @@ private:
|
||||
int32_t m_contentEncoding;
|
||||
const char *m_contentEncodingPos;
|
||||
const char *m_contentLengthPos;
|
||||
|
||||
const char *m_contentTypePos;
|
||||
size_t m_contentTypeLen;
|
||||
|
||||
// Content-Type: text/html;charset=euc-jp // japanese (euc-jp)
|
||||
// Content-Type: text/html;charset=gb2312 // chinese (gb2312)
|
||||
|
2
Makefile
2
Makefile
@ -58,6 +58,8 @@ OBJS_O2 = \
|
||||
|
||||
|
||||
OBJS_O3 = \
|
||||
BlockList.o \
|
||||
ContentTypeBlockList.o \
|
||||
DocDelete.o DnsBlockList.o \
|
||||
IPAddressChecks.o \
|
||||
LanguageResultOverride.o Linkdb.o \
|
||||
|
14
Parms.cpp
14
Parms.cpp
@ -8649,6 +8649,20 @@ void Parms::init ( ) {
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
||||
m->m_title = "log trace info for BlockList";
|
||||
m->m_cgi = "ltrc_bl";
|
||||
simple_m_set(Conf,m_logTraceBlockList);
|
||||
m->m_def = "0";
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
||||
m->m_title = "log trace info for ContentTypeBlockList";
|
||||
m->m_cgi = "ltrc_ctbl";
|
||||
simple_m_set(Conf,m_logTraceContentTypeBlockList);
|
||||
m->m_def = "0";
|
||||
m->m_page = PAGE_LOG;
|
||||
m++;
|
||||
|
||||
m->m_title = "log trace info for DocDelete";
|
||||
m->m_cgi = "ltrc_docdel";
|
||||
simple_m_set(Conf,m_logTraceDocDelete);
|
||||
|
10
XmlDoc.cpp
10
XmlDoc.cpp
@ -50,6 +50,7 @@
|
||||
#include "GbDns.h"
|
||||
#include "RobotsCheckList.h"
|
||||
#include "UrlResultOverride.h"
|
||||
#include "ContentTypeBlockList.h"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
@ -2280,6 +2281,13 @@ int32_t *XmlDoc::getIndexCode ( ) {
|
||||
return (int32_t *)mime;
|
||||
}
|
||||
|
||||
if (g_contentTypeBlockList.isContentTypeBlocked(mime->getContentTypePos(), mime->getContentTypeLen())) {
|
||||
m_indexCode = EDOCBADCONTENTTYPE;
|
||||
m_indexCodeValid = true;
|
||||
logTrace(g_conf.m_logTraceXmlDoc, "END, EDOCBADCONTENTTYPE");
|
||||
return &m_indexCode;
|
||||
}
|
||||
|
||||
// check redir url
|
||||
Url **redirp = getRedirUrl();
|
||||
if ( ! redirp || redirp == (void *)-1 ) {
|
||||
@ -9240,8 +9248,6 @@ char **XmlDoc::getFilteredContent ( ) {
|
||||
|
||||
if ( *ct == CT_TEXT ) return &m_filteredContent;
|
||||
if ( *ct == CT_XML ) return &m_filteredContent;
|
||||
// javascript - sometimes has address information in it, so keep it!
|
||||
if ( *ct == CT_JS ) return &m_filteredContent;
|
||||
if ( m_contentLen == 0 ) return &m_filteredContent;
|
||||
|
||||
// we now support JSON for diffbot
|
||||
|
2
main.cpp
2
main.cpp
@ -81,6 +81,7 @@
|
||||
#include "Dir.h"
|
||||
#include "File.h"
|
||||
#include "DnsBlockList.h"
|
||||
#include "ContentTypeBlockList.h"
|
||||
#include "UrlMatchList.h"
|
||||
#include "UrlBlockCheck.h"
|
||||
#include "DocDelete.h"
|
||||
@ -1666,6 +1667,7 @@ int main2 ( int argc , char *argv[] ) {
|
||||
|
||||
// load block lists
|
||||
g_dnsBlockList.init();
|
||||
g_contentTypeBlockList.init();
|
||||
|
||||
g_urlBlackList.init();
|
||||
g_urlWhiteList.init();
|
||||
|
29
test/unit/ContentTypeBlockListTest.cpp
Normal file
29
test/unit/ContentTypeBlockListTest.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "ContentTypeBlockList.h"
|
||||
|
||||
class TestContentTypeBlockList : public ContentTypeBlockList {
|
||||
public:
|
||||
TestContentTypeBlockList(const char *filename)
|
||||
: ContentTypeBlockList() {
|
||||
m_filename = filename;
|
||||
}
|
||||
|
||||
using ContentTypeBlockList::load;
|
||||
|
||||
bool isContentTypeBlocked(const char *str) {
|
||||
return ContentTypeBlockList::isContentTypeBlocked(str, strlen(str));
|
||||
}
|
||||
};
|
||||
|
||||
TEST(ContentTypeBlockListTest, BlockList) {
|
||||
TestContentTypeBlockList contentTypeBlockList("blocklist/contenttype.txt");
|
||||
contentTypeBlockList.load();
|
||||
|
||||
// full match
|
||||
EXPECT_TRUE(contentTypeBlockList.isContentTypeBlocked("application/font-woff"));
|
||||
EXPECT_FALSE(contentTypeBlockList.isContentTypeBlocked("application/font-woff-2"));
|
||||
EXPECT_FALSE(contentTypeBlockList.isContentTypeBlocked("naudio/"));
|
||||
EXPECT_TRUE(contentTypeBlockList.isContentTypeBlocked("audio/"));
|
||||
EXPECT_TRUE(contentTypeBlockList.isContentTypeBlocked("audio/CN"));
|
||||
EXPECT_TRUE(contentTypeBlockList.isContentTypeBlocked("audio/DAT12"));
|
||||
}
|
@ -6,6 +6,7 @@ BASE_DIR ?= ../..
|
||||
TARGET = GigablastTest
|
||||
OBJECTS = GigablastTest.o GigablastTestUtils.o \
|
||||
BitOperationsTest.o BigFileTest.o \
|
||||
ContentTypeBlockListTest.o \
|
||||
DirTest.o DnsBlockListTest.o \
|
||||
FctypesTest.o \
|
||||
GbCacheTest.o \
|
||||
|
2
test/unit/blocklist/contenttype.txt
Normal file
2
test/unit/blocklist/contenttype.txt
Normal file
@ -0,0 +1,2 @@
|
||||
application/font-woff
|
||||
audio/*
|
Loading…
Reference in New Issue
Block a user