forked from Mirrors/privacore-open-source-search-engine
Make Robots::print public
This commit is contained in:
@ -21,8 +21,6 @@ Robots::Robots( const char* robotsTxt, int32_t robotsTxtLen, const char *userAge
|
||||
, m_defaultCrawlDelay( -1 ) {
|
||||
// parse robots.txt into what we need
|
||||
parse();
|
||||
|
||||
print();
|
||||
}
|
||||
|
||||
bool Robots::getNextLine() {
|
||||
|
4
Robots.h
4
Robots.h
@ -14,6 +14,8 @@ public:
|
||||
bool isAllowed( Url *url );
|
||||
int32_t getCrawlDelay();
|
||||
|
||||
void print() const;
|
||||
|
||||
static bool isAllowed( Url *url, const char *userAgent, const char *file, int32_t fileLen,
|
||||
bool *userAgentFound, bool substringMatch, int32_t *crawlDelay,
|
||||
bool *hadAllowOrDisallow );
|
||||
@ -57,8 +59,6 @@ private:
|
||||
bool parseAllow( const char *field, int32_t fieldLen, bool isUserAgent );
|
||||
bool parseDisallow( const char *field, int32_t fieldLen, bool isUserAgent );
|
||||
|
||||
void print() const;
|
||||
|
||||
const char *m_robotsTxt;
|
||||
int32_t m_robotsTxtLen;
|
||||
|
||||
|
@ -36,7 +36,7 @@ void expectRobotRule( const char *urlPath, const char *rulePath, bool expectedMa
|
||||
// Test cases roughly based on Matching Wildcards article on Dr. Dobb's
|
||||
// http://www.drdobbs.com/architecture-and-design/matching-wildcards-an-empirical-way-to-t/240169123#ListingOne
|
||||
//
|
||||
TEST( RobotRuleTest, Wildcard ) {
|
||||
TEST( RobotRuleTest, WildcardCharacterRepeat ) {
|
||||
// Cases with repeating character sequences.
|
||||
expectRobotRule("/abcccd", "*ccd", true);
|
||||
expectRobotRule("/mississipissippi", "*issip*ss*", true);
|
||||
@ -52,13 +52,17 @@ TEST( RobotRuleTest, Wildcard ) {
|
||||
expectRobotRule("/aaazz", "/a*zz*", true);
|
||||
expectRobotRule("/a12b12", "*12*23", false);
|
||||
expectRobotRule("/a12b12", "*12*12*", true);
|
||||
}
|
||||
|
||||
// Additional cases where the '*' char appears in the tame string.
|
||||
TEST( RobotRuleTest, WildcardCharacterHaystack ) {
|
||||
// Additional cases where the '*' char appears in haystack
|
||||
expectRobotRule("/*", "*", true);
|
||||
expectRobotRule("/a*abab", "/a*b", true);
|
||||
expectRobotRule("/a*r", "/a*", true);
|
||||
expectRobotRule("/a*ar", "/a*aar", false);
|
||||
}
|
||||
|
||||
TEST( RobotRuleTest, WildcardDouble ) {
|
||||
// More double wildcard scenarios.
|
||||
expectRobotRule("/XYXYXYZYXYz", "/XY*Z*XYz", true);
|
||||
expectRobotRule("/missisSIPpi", "*SIP*", true);
|
||||
@ -72,7 +76,9 @@ TEST( RobotRuleTest, Wildcard ) {
|
||||
expectRobotRule("/A12b12", "*12*23", false);
|
||||
expectRobotRule("/a12B12", "*12*12*", true);
|
||||
expectRobotRule("/oWn", "/*oWn*", true);
|
||||
}
|
||||
|
||||
TEST( RobotRuleTest, WildcardMultiple ) {
|
||||
// Many-wildcard scenarios.
|
||||
expectRobotRule("/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "/a*a*a*a*a*a*aa*aaa*a*a*b", true);
|
||||
expectRobotRule("/abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "/*a*b*ba*ca*a*aa*aaa*fa*ga*b*", true);
|
||||
@ -93,9 +99,11 @@ TEST( RobotRuleTest, Wildcard ) {
|
||||
|
||||
expectRobotRule( "/--------------------------------abc-def-", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-" , true );
|
||||
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" , true );
|
||||
|
||||
expectRobotRule( "/acgfhdhbbcfbhchacchigdhfibhcifabnhieahnaaibcafhigbihaihj", "/*a*b*c*d*e*f*g*h*i*j", true );
|
||||
}
|
||||
|
||||
TEST( RobotRuleTest, WildcardLineAnchor ) {
|
||||
TEST( RobotRuleTest, WildcardMultipleLineAnchor ) {
|
||||
expectRobotRule("/abc*abcd*abcd*abc*abcd", "/abc*abc*abc*abc*abc$", false);
|
||||
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-$" , false );
|
||||
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*f$" , true );
|
||||
|
@ -12,6 +12,7 @@ class TestRobots : public Robots {
|
||||
public:
|
||||
TestRobots( const char* robotsTxt, int32_t robotsTxtLen, const char *userAgent = "testbot" )
|
||||
: Robots (robotsTxt, robotsTxtLen, userAgent ) {
|
||||
print();
|
||||
}
|
||||
|
||||
using Robots::getNextLine;
|
||||
@ -44,7 +45,6 @@ static void expectRobotsNoNextLine( TestRobots *robots ) {
|
||||
}
|
||||
|
||||
static void expectRobots( TestRobots *robots, const char *expectedLine, const char *expectedField = "", const char *expectedValue = "" ) {
|
||||
logf(LOG_INFO, "expectLine='%s' expectField='%s' expectValue='%s'", expectedLine, expectedField, expectedValue);
|
||||
std::stringstream ss;
|
||||
ss << __func__ << ":"
|
||||
<< " expectedLine='" << expectedLine << "'"
|
||||
@ -147,11 +147,6 @@ TEST( RobotsTest, RobotsGetFieldValue ) {
|
||||
// helper method
|
||||
//
|
||||
|
||||
static void logRobotsTxt( const char *robotsTxt ) {
|
||||
logf (LOG_INFO, "===== robots.txt =====\n%s", robotsTxt);
|
||||
TestRobots robots( robotsTxt, strlen(robotsTxt) );
|
||||
}
|
||||
|
||||
static void generateRobotsTxt ( char *robotsTxt, size_t robotsTxtSize, int32_t *pos, const char *userAgent = "testbot", const char *allow = "", const char *disallow = "", bool reversed = false ) {
|
||||
if ( *pos != 0 ) {
|
||||
*pos += snprintf ( robotsTxt + *pos, robotsTxtSize - *pos, "\n" );
|
||||
@ -175,13 +170,11 @@ static void generateRobotsTxt ( char *robotsTxt, size_t robotsTxtSize, int32_t *
|
||||
static void generateTestRobotsTxt ( char *robotsTxt, size_t robotsTxtSize, const char *allow = "", const char *disallow = "" ) {
|
||||
int32_t pos = 0;
|
||||
generateRobotsTxt( robotsTxt, robotsTxtSize, &pos, "testbot", allow, disallow);
|
||||
logRobotsTxt( robotsTxt );
|
||||
}
|
||||
|
||||
static void generateTestReversedRobotsTxt ( char *robotsTxt, size_t robotsTxtSize, const char *allow = "", const char *disallow = "" ) {
|
||||
int32_t pos = 0;
|
||||
generateRobotsTxt( robotsTxt, robotsTxtSize, &pos, "testbot", allow, disallow, true);
|
||||
logRobotsTxt( robotsTxt );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
@ -501,7 +494,7 @@ TEST( RobotsTest, UserAgentFieldCaseInsensitive ) {
|
||||
// //
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
TEST(RobotsTest, CommentsFullLine) {
|
||||
TEST( RobotsTest, CommentsFullLine ) {
|
||||
char robotsTxt[1024] = "user-agent: *\n"
|
||||
"#user-agent: testbot\n"
|
||||
"user-agent: defbot\n"
|
||||
@ -516,11 +509,10 @@ TEST(RobotsTest, CommentsFullLine) {
|
||||
EXPECT_EQ( 1000, robots.getCrawlDelay() );
|
||||
}
|
||||
|
||||
TEST(RobotsTest, CommentsAfterWithSpace) {
|
||||
TEST( RobotsTest, CommentsAfterWithSpace ) {
|
||||
int32_t pos = 0;
|
||||
char robotsTxt[1024];
|
||||
generateRobotsTxt( robotsTxt, 1024, &pos, "testbot #user-agent", "/test #allow", "/ #disallow");
|
||||
logRobotsTxt( robotsTxt );
|
||||
|
||||
TestRobots robots( robotsTxt, strlen(robotsTxt) );
|
||||
|
||||
@ -529,11 +521,10 @@ TEST(RobotsTest, CommentsAfterWithSpace) {
|
||||
EXPECT_TRUE( robots.isAllowed( "/test.html" ) );
|
||||
}
|
||||
|
||||
TEST(RobotsTest, CommentsAfterNoSpace) {
|
||||
TEST( RobotsTest, CommentsAfterNoSpace ) {
|
||||
int32_t pos = 0;
|
||||
char robotsTxt[1024];
|
||||
generateRobotsTxt( robotsTxt, 1024, &pos, "testbot#user-agent", "/test#allow", "/#disallow");
|
||||
logRobotsTxt( robotsTxt );
|
||||
|
||||
TestRobots robots( robotsTxt, strlen(robotsTxt) );
|
||||
|
||||
|
Reference in New Issue
Block a user