mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-14 02:36:06 -04:00
Merge branch 'master' into sqlite
This commit is contained in:
BlockList.cppBlockList.hCollectiondb.cppCollectiondb.hConf.cppConf.hContentTypeBlockList.cppContentTypeBlockList.hDnsBlockList.cppDnsBlockList.hErrno.cppErrno.hHttpMime.cppHttpMime.hHttpServer.cppImages.cppJenkinsfileLang.cppLang.hMakefileMsg13.cppMsg39.cppMsg3a.cppPageGet.cppPageReindex.cppPageReindex.hPageResults.cppPageSpiderdbLookup.cppParms.cppProxy.cppQuery.cppQuery.hSafeBuf.cppSafeBuf.hSearchInput.cppSearchInput.hSpeller.cppSpeller.hStatistics.cppTcpServer.cppTcpServer.hTcpSocket.hXmlDoc.cppXmlDoc.hmain.cpp
test/unit
tools
29
test/unit/ContentTypeBlockListTest.cpp
Normal file
29
test/unit/ContentTypeBlockListTest.cpp
Normal file
@ -0,0 +1,29 @@
|
||||
#include <gtest/gtest.h>
|
||||
#include "ContentTypeBlockList.h"
|
||||
|
||||
class TestContentTypeBlockList : public ContentTypeBlockList {
|
||||
public:
|
||||
TestContentTypeBlockList(const char *filename)
|
||||
: ContentTypeBlockList() {
|
||||
m_filename = filename;
|
||||
}
|
||||
|
||||
using ContentTypeBlockList::load;
|
||||
|
||||
bool isContentTypeBlocked(const char *str) {
|
||||
return ContentTypeBlockList::isContentTypeBlocked(str, strlen(str));
|
||||
}
|
||||
};
|
||||
|
||||
TEST(ContentTypeBlockListTest, BlockList) {
|
||||
TestContentTypeBlockList contentTypeBlockList("blocklist/contenttype.txt");
|
||||
contentTypeBlockList.load();
|
||||
|
||||
// full match
|
||||
EXPECT_TRUE(contentTypeBlockList.isContentTypeBlocked("application/font-woff"));
|
||||
EXPECT_FALSE(contentTypeBlockList.isContentTypeBlocked("application/font-woff-2"));
|
||||
EXPECT_FALSE(contentTypeBlockList.isContentTypeBlocked("naudio/"));
|
||||
EXPECT_TRUE(contentTypeBlockList.isContentTypeBlocked("audio/"));
|
||||
EXPECT_TRUE(contentTypeBlockList.isContentTypeBlocked("audio/CN"));
|
||||
EXPECT_TRUE(contentTypeBlockList.isContentTypeBlocked("audio/DAT12"));
|
||||
}
|
@ -187,6 +187,117 @@ TEST(HttpMimeTest, GetNextLineMultiEnd) {
|
||||
expectLine(&httpMime);
|
||||
}
|
||||
|
||||
TEST(HttpMimeTest, GetContentTypeNoCharset) {
|
||||
char httpResponse[] =
|
||||
"HTTP/1.1 200 OK\r\n"
|
||||
"Cache-Control: max-age=604800\r\n"
|
||||
"Content-Type: text/html \r\n"
|
||||
"Date: Fri, 17 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Etag: \"359670651+ident\"\r\n"
|
||||
"Expires: Fri, 24 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT\r\n"
|
||||
"Server: ECS (dca/249E)\r\n"
|
||||
"Vary: Accept-Encoding\r\n"
|
||||
"X-Cache: HIT\r\n"
|
||||
"Content-Length: 1270\r\n"
|
||||
"\r\n";
|
||||
|
||||
TestHttpMime httpMime(httpResponse, "http://www.example.com");
|
||||
EXPECT_EQ(strlen("text/html"), httpMime.getContentTypeLen());
|
||||
std::string contentType(httpMime.getContentTypePos(), httpMime.getContentTypeLen());
|
||||
EXPECT_STREQ(contentType.c_str(), "text/html");
|
||||
}
|
||||
|
||||
TEST(HttpMimeTest, GetContentTypeNoCharsetNoSpace) {
|
||||
char httpResponse[] =
|
||||
"HTTP/1.1 200 OK\r\n"
|
||||
"Cache-Control: max-age=604800\r\n"
|
||||
"Content-Type:text/html\r\n"
|
||||
"Date: Fri, 17 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Etag: \"359670651+ident\"\r\n"
|
||||
"Expires: Fri, 24 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT\r\n"
|
||||
"Server: ECS (dca/249E)\r\n"
|
||||
"Vary: Accept-Encoding\r\n"
|
||||
"X-Cache: HIT\r\n"
|
||||
"Content-Length: 1270\r\n"
|
||||
"\r\n";
|
||||
|
||||
TestHttpMime httpMime(httpResponse, "http://www.example.com");
|
||||
EXPECT_EQ(strlen("text/html"), httpMime.getContentTypeLen());
|
||||
std::string contentType(httpMime.getContentTypePos(), httpMime.getContentTypeLen());
|
||||
EXPECT_STREQ(contentType.c_str(), "text/html");
|
||||
}
|
||||
|
||||
TEST(HttpMimeTest, GetContentTypeWithCharsetUTF8) {
|
||||
char httpResponse[] =
|
||||
"HTTP/1.1 200 OK\r\n"
|
||||
"Cache-Control: max-age=604800\r\n"
|
||||
"Content-Type: text/html ; charset=utf-8 \r\n"
|
||||
"Date: Fri, 17 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Etag: \"359670651+ident\"\r\n"
|
||||
"Expires: Fri, 24 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT\r\n"
|
||||
"Server: ECS (dca/249E)\r\n"
|
||||
"Vary: Accept-Encoding\r\n"
|
||||
"X-Cache: HIT\r\n"
|
||||
"Content-Length: 1270\r\n"
|
||||
"\r\n";
|
||||
|
||||
TestHttpMime httpMime(httpResponse, "http://www.example.com");
|
||||
EXPECT_EQ(strlen("text/html"), httpMime.getContentTypeLen());
|
||||
std::string contentType(httpMime.getContentTypePos(), httpMime.getContentTypeLen());
|
||||
EXPECT_STREQ(contentType.c_str(), "text/html");
|
||||
std::string charset(httpMime.getCharset(), httpMime.getCharsetLen());
|
||||
EXPECT_STREQ(charset.c_str(), "utf-8");
|
||||
}
|
||||
|
||||
TEST(HttpMimeTest, GetContentTypeWithCharsetISO88691) {
|
||||
char httpResponse[] =
|
||||
"HTTP/1.1 200 OK\r\n"
|
||||
"Cache-Control: max-age=604800\r\n"
|
||||
"Content-Type: text/html; charset=iso-8859-1\r\n"
|
||||
"Date: Fri, 17 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Etag: \"359670651+ident\"\r\n"
|
||||
"Expires: Fri, 24 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT\r\n"
|
||||
"Server: ECS (dca/249E)\r\n"
|
||||
"Vary: Accept-Encoding\r\n"
|
||||
"X-Cache: HIT\r\n"
|
||||
"Content-Length: 1270\r\n"
|
||||
"\r\n";
|
||||
|
||||
TestHttpMime httpMime(httpResponse, "http://www.example.com");
|
||||
EXPECT_EQ(strlen("text/html"), httpMime.getContentTypeLen());
|
||||
std::string contentType(httpMime.getContentTypePos(), httpMime.getContentTypeLen());
|
||||
EXPECT_STREQ(contentType.c_str(), "text/html");
|
||||
std::string charset(httpMime.getCharset(), httpMime.getCharsetLen());
|
||||
EXPECT_STREQ(charset.c_str(), "iso-8859-1");
|
||||
}
|
||||
|
||||
TEST(HttpMimeTest, GetContentTypeWithCharsetNoSpace) {
|
||||
char httpResponse[] =
|
||||
"HTTP/1.1 200 OK\r\n"
|
||||
"Cache-Control: max-age=604800\r\n"
|
||||
"Content-Type: text/html;charset=iso-8859-1\r\n"
|
||||
"Date: Fri, 17 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Etag: \"359670651+ident\"\r\n"
|
||||
"Expires: Fri, 24 Nov 2017 10:39:20 GMT\r\n"
|
||||
"Last-Modified: Fri, 09 Aug 2013 23:54:35 GMT\r\n"
|
||||
"Server: ECS (dca/249E)\r\n"
|
||||
"Vary: Accept-Encoding\r\n"
|
||||
"X-Cache: HIT\r\n"
|
||||
"Content-Length: 1270\r\n"
|
||||
"\r\n";
|
||||
|
||||
TestHttpMime httpMime(httpResponse, "http://www.example.com");
|
||||
EXPECT_EQ(strlen("text/html"), httpMime.getContentTypeLen());
|
||||
std::string contentType(httpMime.getContentTypePos(), httpMime.getContentTypeLen());
|
||||
EXPECT_STREQ(contentType.c_str(), "text/html");
|
||||
std::string charset(httpMime.getCharset(), httpMime.getCharsetLen());
|
||||
EXPECT_STREQ(charset.c_str(), "iso-8859-1");
|
||||
}
|
||||
|
||||
static void verifyCookieDate(const char *cookieDateStr, time_t expectedCookieDate) {
|
||||
time_t cookieDate = 0;
|
||||
bool expectedResult = (expectedCookieDate != 0);
|
||||
|
@ -6,6 +6,7 @@ BASE_DIR ?= ../..
|
||||
TARGET = GigablastTest
|
||||
OBJECTS = GigablastTest.o GigablastTestUtils.o \
|
||||
BitOperationsTest.o BigFileTest.o \
|
||||
ContentTypeBlockListTest.o \
|
||||
DirTest.o DnsBlockListTest.o \
|
||||
FctypesTest.o \
|
||||
GbCacheTest.o \
|
||||
|
@ -35,7 +35,7 @@ static void generateSummary( Summary &summary, char *htmlInput, const char *quer
|
||||
ASSERT_TRUE(sections.set(&words, &bits, &url, "", CT_HTML));
|
||||
|
||||
Query query;
|
||||
ASSERT_TRUE(query.set2(queryStr, langEnglish, true, true, false));
|
||||
ASSERT_TRUE(query.set2(queryStr, langEnglish, true, true, false, ABS_MAX_QUERY_TERMS));
|
||||
|
||||
LinkInfo linkInfo;
|
||||
memset ( &linkInfo , 0 , sizeof(LinkInfo) );
|
||||
|
@ -128,6 +128,30 @@ TEST(XmlTest, GetCanonicalLink) {
|
||||
ASSERT_EQ(xml.getTagValue("rel", "canonical", "href", &value, &valueLen, true, TAG_LINK), (strlen(output) != 0));
|
||||
std::string valueStr(value, valueLen);
|
||||
|
||||
EXPECT_EQ(strlen(output), valueLen);
|
||||
EXPECT_STREQ(output, valueStr.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(XmlTest, GetRobotsTag) {
|
||||
std::vector<std::tuple<const char*, const char*>> testcases = {
|
||||
std::make_tuple("<html><head><meta name=\"ROBOTS\" content=\"NOINDEX\"></head><body></body></html>", "NOINDEX"),
|
||||
std::make_tuple("<html><head></head><body><gbframe><html><head><meta name=\"ROBOTS\" content=\"NOINDEX\"></head><body></body></html></gbframe></body></html>", "")
|
||||
};
|
||||
|
||||
for (const auto &testcase : testcases) {
|
||||
Xml xml;
|
||||
char input[MAX_BUF_SIZE];
|
||||
std::sprintf(input, "%s", std::get<0>(testcase));
|
||||
|
||||
ASSERT_TRUE(xml.set(input, strlen(input), 0, CT_HTML));
|
||||
|
||||
const char *output = std::get<1>(testcase);
|
||||
const char *value = NULL;
|
||||
int32_t valueLen = 0;
|
||||
ASSERT_EQ(xml.getTagValue("name", "robots", "content", &value, &valueLen, true, TAG_META), (strlen(output) != 0));
|
||||
std::string valueStr(value, valueLen);
|
||||
|
||||
EXPECT_EQ(strlen(output), valueLen);
|
||||
EXPECT_STREQ(output, valueStr.c_str());
|
||||
}
|
||||
|
2
test/unit/blocklist/contenttype.txt
Normal file
2
test/unit/blocklist/contenttype.txt
Normal file
@ -0,0 +1,2 @@
|
||||
application/font-woff
|
||||
audio/*
|
Reference in New Issue
Block a user