Files
privacore-open-source-searc…/UrlBlock.cpp

154 lines
4.5 KiB
C++

#include "UrlBlock.h"
#include "Url.h"
#include "hash.h"
#include "GbUtil.h"
#include "Log.h"
#include "Conf.h"
#include <algorithm>
urlblocktld_t::urlblocktld_t(const std::string &tlds)
: m_tldsStr(tlds)
, m_tlds(split(tlds, ',')) {
}
urlblockdomain_t::urlblockdomain_t(const std::string &domain, const std::string &allow, pathcriteria_t pathcriteria)
: m_domain(domain)
, m_allow(split(allow, ','))
, m_pathcriteria(pathcriteria) {
}
urlblockhost_t::urlblockhost_t(const std::string &host, const std::string &path)
: m_host(host)
, m_path(path) {
}
urlblockpath_t::urlblockpath_t(const std::string &path)
: m_path(path) {
}
urlblockregex_t::urlblockregex_t(const std::string &regexStr, const GbRegex &regex, const std::string &domain)
: m_regex(regex)
, m_regexStr(regexStr)
, m_domain(domain) {
}
UrlBlock::UrlBlock(const std::shared_ptr<urlblocktld_t> &urlblocktld)
: m_type(url_block_tld)
, m_tld(urlblocktld) {
}
UrlBlock::UrlBlock(const std::shared_ptr<urlblockdomain_t> &urlblockdomain)
: m_type(url_block_domain)
, m_domain(urlblockdomain) {
}
UrlBlock::UrlBlock(const std::shared_ptr<urlblockhost_t> &urlblockhost)
: m_type(url_block_host)
, m_host(urlblockhost) {
}
UrlBlock::UrlBlock(const std::shared_ptr<urlblockpath_t> &urlblockpath)
: m_type(url_block_path)
, m_path(urlblockpath) {
}
UrlBlock::UrlBlock(const std::shared_ptr<urlblockregex_t> &urlblockregex)
: m_type(url_block_regex)
, m_regex(urlblockregex) {
}
bool UrlBlock::match(const Url &url) const {
switch (m_type) {
case url_block_domain:
if (m_domain->m_domain.length() == static_cast<size_t>(url.getDomainLen()) &&
memcmp(m_domain->m_domain.c_str(), url.getDomain(), url.getDomainLen()) == 0) {
// check subdomain
if (!m_domain->m_allow.empty()) {
auto subDomainLen = (url.getDomain() == url.getHost()) ? 0 : url.getDomain() - url.getHost() - 1;
std::string subDomain(url.getHost(), subDomainLen);
bool match = (std::find(m_domain->m_allow.cbegin(), m_domain->m_allow.cend(), subDomain) == m_domain->m_allow.cend());
if (!match) {
// check for pathcriteria
switch (m_domain->m_pathcriteria) {
case urlblockdomain_t::pathcriteria_allow_all:
return false;
case urlblockdomain_t::pathcriteria_allow_index_only:
return (url.getPathLen() > 1);
case urlblockdomain_t::pathcriteria_allow_rootpages_only:
return (url.getPathDepth(false) > 0);
}
}
}
return true;
}
break;
case url_block_host:
if (m_host->m_host.length() == static_cast<size_t>(url.getHostLen()) &&
memcmp(m_host->m_host.c_str(), url.getHost(), url.getHostLen()) == 0) {
if (m_host->m_path.empty()) {
return true;
}
return (m_host->m_path.length() <= static_cast<size_t>(url.getPathLenWithCgi()) &&
memcmp(m_host->m_path.c_str(), url.getPath(), m_host->m_path.length()) == 0);
}
break;
case url_block_path:
return (m_path->m_path.length() <= static_cast<size_t>(url.getPathLenWithCgi()) &&
memcmp(m_path->m_path.c_str(), url.getPath(), m_path->m_path.length()) == 0);
case url_block_regex:
if (m_regex->m_domain.empty() || (!m_regex->m_domain.empty() &&
m_regex->m_domain.length() == static_cast<size_t>(url.getDomainLen()) &&
memcmp(m_regex->m_domain.c_str(), url.getDomain(), url.getDomainLen()) == 0)) {
return m_regex->m_regex.match(url.getUrl());
}
break;
case url_block_tld:
if (!m_tld->m_tlds.empty()) {
const char *tld = url.getTLD();
size_t tldLen = static_cast<size_t>(url.getTLDLen());
const char *dotPos = static_cast<const char *>(memchr(tld, '.', tldLen));
if (dotPos) {
tldLen -= (dotPos - tld + 1);
tld = dotPos + 1;
}
return (std::find(m_tld->m_tlds.cbegin(), m_tld->m_tlds.cend(), std::string(tld, tldLen)) != m_tld->m_tlds.cend());
}
break;
}
return false;
}
void UrlBlock::logMatch(const Url &url) const {
const char *type = NULL;
const char *value = NULL;
switch (m_type) {
case url_block_domain:
type = "domain";
value = m_domain->m_domain.c_str();
break;
case url_block_host:
type = "host";
value = m_host->m_host.c_str();
break;
case url_block_path:
type = "path";
value = m_path->m_path.c_str();
break;
case url_block_regex:
type = "regex";
value = m_regex->m_regexStr.c_str();
break;
case url_block_tld:
type = "tld";
value = m_tld->m_tldsStr.c_str();
}
logTrace(g_conf.m_logTraceUrlBlockList, "Url block criteria %s='%s' matched url '%s'", type, value, url.getUrl());
}