mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-15 02:36:08 -04:00
Add block by file for UrlMatch
This commit is contained in:
18
UrlMatch.cpp
18
UrlMatch.cpp
@ -17,6 +17,12 @@ urlmatchdomain_t::urlmatchdomain_t(const std::string &domain, const std::string
|
||||
, m_pathcriteria(pathcriteria) {
|
||||
}
|
||||
|
||||
|
||||
urlmatchfile_t::urlmatchfile_t(const std::string &file)
|
||||
: m_file(file) {
|
||||
}
|
||||
|
||||
|
||||
urlmatchhost_t::urlmatchhost_t(const std::string &host, const std::string &path)
|
||||
: m_host(host)
|
||||
, m_path(path) {
|
||||
@ -42,6 +48,11 @@ UrlMatch::UrlMatch(const std::shared_ptr<urlmatchdomain_t> &urlmatchdomain)
|
||||
, m_domain(urlmatchdomain) {
|
||||
}
|
||||
|
||||
UrlMatch::UrlMatch(const std::shared_ptr<urlmatchfile_t> &urlmatchfile)
|
||||
: m_type(url_match_file)
|
||||
, m_file(urlmatchfile) {
|
||||
}
|
||||
|
||||
UrlMatch::UrlMatch(const std::shared_ptr<urlmatchhost_t> &urlmatchhost)
|
||||
: m_type(url_match_host)
|
||||
, m_host(urlmatchhost) {
|
||||
@ -84,6 +95,9 @@ bool UrlMatch::match(const Url &url) const {
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case url_match_file:
|
||||
return (m_file->m_file.length() == static_cast<size_t>(url.getFilenameLen()) &&
|
||||
memcmp(m_file->m_file.c_str(), url.getFilename(), url.getFilenameLen()) == 0);
|
||||
case url_match_host:
|
||||
if (m_host->m_host.length() == static_cast<size_t>(url.getHostLen()) &&
|
||||
memcmp(m_host->m_host.c_str(), url.getHost(), url.getHostLen()) == 0) {
|
||||
@ -132,6 +146,10 @@ void UrlMatch::logMatch(const Url &url) const {
|
||||
type = "domain";
|
||||
value = m_domain->m_domain.c_str();
|
||||
break;
|
||||
case url_match_file:
|
||||
type = "file";
|
||||
value = m_file->m_file.c_str();
|
||||
break;
|
||||
case url_match_host:
|
||||
type = "host";
|
||||
value = m_host->m_host.c_str();
|
||||
|
@ -20,6 +20,12 @@ struct urlmatchdomain_t {
|
||||
pathcriteria_t m_pathcriteria;
|
||||
};
|
||||
|
||||
struct urlmatchfile_t {
|
||||
urlmatchfile_t(const std::string &file);
|
||||
|
||||
std::string m_file;
|
||||
};
|
||||
|
||||
struct urlmatchhost_t {
|
||||
urlmatchhost_t(const std::string &host, const std::string &path);
|
||||
|
||||
@ -53,6 +59,7 @@ class Url;
|
||||
class UrlMatch {
|
||||
public:
|
||||
UrlMatch(const std::shared_ptr<urlmatchdomain_t> &urlmatchdomain);
|
||||
UrlMatch(const std::shared_ptr<urlmatchfile_t> &urlmatchfile);
|
||||
UrlMatch(const std::shared_ptr<urlmatchhost_t> &urlmatchhost);
|
||||
UrlMatch(const std::shared_ptr<urlmatchpath_t> &urlmatchpath);
|
||||
UrlMatch(const std::shared_ptr<urlmatchregex_t> &urlmatchregex);
|
||||
@ -64,6 +71,7 @@ public:
|
||||
private:
|
||||
enum urlmatchtype_t {
|
||||
url_match_domain,
|
||||
url_match_file,
|
||||
url_match_host,
|
||||
url_match_path,
|
||||
url_match_regex,
|
||||
@ -73,6 +81,7 @@ private:
|
||||
urlmatchtype_t m_type;
|
||||
|
||||
std::shared_ptr<urlmatchdomain_t> m_domain;
|
||||
std::shared_ptr<urlmatchfile_t> m_file;
|
||||
std::shared_ptr<urlmatchhost_t> m_host;
|
||||
std::shared_ptr<urlmatchpath_t> m_path;
|
||||
std::shared_ptr<urlmatchregex_t> m_regex;
|
||||
|
@ -117,17 +117,26 @@ bool UrlMatchList::load() {
|
||||
switch (line[0]) {
|
||||
case 'd':
|
||||
// domain
|
||||
if (firstColEnd==5 && memcmp(line.data(), "domain", 5) != 0) {
|
||||
logTrace(g_conf.m_logTraceUrlMatchList, "");
|
||||
if (firstColEnd == 5 && memcmp(line.data(), "domain", 5) != 0) {
|
||||
logError("Invalid line found. Ignoring line='%s'", line.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
parseDomain(&tmpUrlMatchList, col2, col3, col4);
|
||||
break;
|
||||
case 'f':
|
||||
// file
|
||||
if (firstColEnd == 4 && memcmp(line.data(), "file", 4) != 0) {
|
||||
logError("Invalid line found. Ignoring line='%s'", line.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
tmpUrlMatchList->emplace_back(std::shared_ptr<urlmatchfile_t>(new urlmatchfile_t(col2)));
|
||||
break;
|
||||
case 'h':
|
||||
// host
|
||||
if (firstColEnd==4 && memcmp(line.data(), "host", 4) != 0) {
|
||||
logTrace(g_conf.m_logTraceUrlMatchList, "");
|
||||
if (firstColEnd == 4 && memcmp(line.data(), "host", 4) != 0) {
|
||||
logError("Invalid line found. Ignoring line='%s'", line.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -135,8 +144,8 @@ bool UrlMatchList::load() {
|
||||
break;
|
||||
case 'p':
|
||||
// path
|
||||
if (firstColEnd==4 && memcmp(line.data(), "path", 4) != 0) {
|
||||
logTrace(g_conf.m_logTraceUrlMatchList, "");
|
||||
if (firstColEnd == 4 && memcmp(line.data(), "path", 4) != 0) {
|
||||
logError("Invalid line found. Ignoring line='%s'", line.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -144,8 +153,8 @@ bool UrlMatchList::load() {
|
||||
break;
|
||||
case 'r':
|
||||
// regex
|
||||
if (firstColEnd==5 && memcmp(line.data(), "regex", 5) != 0) {
|
||||
logTrace(g_conf.m_logTraceUrlMatchList, "");
|
||||
if (firstColEnd == 5 && memcmp(line.data(), "regex", 5) != 0) {
|
||||
logError("Invalid line found. Ignoring line='%s'", line.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -162,8 +171,8 @@ bool UrlMatchList::load() {
|
||||
tmpUrlMatchList->emplace_back(std::shared_ptr<urlmatchregex_t>(new urlmatchregex_t(col3, GbRegex(col3.c_str(), PCRE_NO_AUTO_CAPTURE, PCRE_STUDY_JIT_COMPILE), col2)));
|
||||
break;
|
||||
case 't':
|
||||
if (firstColEnd==3 && memcmp(line.data(), "tld", 3) != 0) {
|
||||
logTrace(g_conf.m_logTraceUrlMatchList, "");
|
||||
if (firstColEnd == 3 && memcmp(line.data(), "tld", 3) != 0) {
|
||||
logError("Invalid line found. Ignoring line='%s'", line.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -140,6 +140,12 @@ TEST(UrlMatchListTest, Path) {
|
||||
EXPECT_TRUE(urlMatchList.isUrlMatched("http://www.example.com/wp-admin/"));
|
||||
EXPECT_FALSE(urlMatchList.isUrlMatched("http://www.example.com/tag/wp-admin/"));
|
||||
|
||||
EXPECT_TRUE(urlMatchList.isUrlMatched("http://www.example.com/blog/wp-login.php"));
|
||||
EXPECT_TRUE(urlMatchList.isUrlMatched("http://www.example.com/wp-login.php"));
|
||||
EXPECT_FALSE(urlMatchList.isUrlMatched("http://www.example.com/awp-login.php"));
|
||||
EXPECT_FALSE(urlMatchList.isUrlMatched("http://www.example.com/wp-login.php5"));
|
||||
EXPECT_TRUE(urlMatchList.isUrlMatched("http://www.example.com/blog/wp-login.php?param=value¶m2=value2"));
|
||||
|
||||
EXPECT_TRUE(urlMatchList.isUrlMatched("https://www.host.com/file1.html"));
|
||||
EXPECT_TRUE(urlMatchList.isUrlMatched("https://www.example.com/file1.html"));
|
||||
}
|
||||
|
@ -1,2 +1,3 @@
|
||||
path /wp-admin/
|
||||
regex * https?://[^/]+/file1.html
|
||||
file wp-login.php
|
||||
regex * https?://[^/]+/file1.html
|
||||
|
Reference in New Issue
Block a user