Make Robots::print public

This commit is contained in:
Ai Lin Chia
2016-03-31 15:16:55 +02:00
parent a8b5aa6d24
commit 1b8f66902b
4 changed files with 17 additions and 20 deletions

@ -21,8 +21,6 @@ Robots::Robots( const char* robotsTxt, int32_t robotsTxtLen, const char *userAge
, m_defaultCrawlDelay( -1 ) {
// parse robots.txt into what we need
parse();
print();
}
bool Robots::getNextLine() {

@ -14,6 +14,8 @@ public:
bool isAllowed( Url *url );
int32_t getCrawlDelay();
void print() const;
static bool isAllowed( Url *url, const char *userAgent, const char *file, int32_t fileLen,
bool *userAgentFound, bool substringMatch, int32_t *crawlDelay,
bool *hadAllowOrDisallow );
@ -57,8 +59,6 @@ private:
bool parseAllow( const char *field, int32_t fieldLen, bool isUserAgent );
bool parseDisallow( const char *field, int32_t fieldLen, bool isUserAgent );
void print() const;
const char *m_robotsTxt;
int32_t m_robotsTxtLen;

@ -36,7 +36,7 @@ void expectRobotRule( const char *urlPath, const char *rulePath, bool expectedMa
// Test cases roughly based on Matching Wildcards article on Dr. Dobb's
// http://www.drdobbs.com/architecture-and-design/matching-wildcards-an-empirical-way-to-t/240169123#ListingOne
//
TEST( RobotRuleTest, Wildcard ) {
TEST( RobotRuleTest, WildcardCharacterRepeat ) {
// Cases with repeating character sequences.
expectRobotRule("/abcccd", "*ccd", true);
expectRobotRule("/mississipissippi", "*issip*ss*", true);
@ -52,13 +52,17 @@ TEST( RobotRuleTest, Wildcard ) {
expectRobotRule("/aaazz", "/a*zz*", true);
expectRobotRule("/a12b12", "*12*23", false);
expectRobotRule("/a12b12", "*12*12*", true);
}
// Additional cases where the '*' char appears in the tame string.
TEST( RobotRuleTest, WildcardCharacterHaystack ) {
// Additional cases where the '*' char appears in haystack
expectRobotRule("/*", "*", true);
expectRobotRule("/a*abab", "/a*b", true);
expectRobotRule("/a*r", "/a*", true);
expectRobotRule("/a*ar", "/a*aar", false);
}
TEST( RobotRuleTest, WildcardDouble ) {
// More double wildcard scenarios.
expectRobotRule("/XYXYXYZYXYz", "/XY*Z*XYz", true);
expectRobotRule("/missisSIPpi", "*SIP*", true);
@ -72,7 +76,9 @@ TEST( RobotRuleTest, Wildcard ) {
expectRobotRule("/A12b12", "*12*23", false);
expectRobotRule("/a12B12", "*12*12*", true);
expectRobotRule("/oWn", "/*oWn*", true);
}
TEST( RobotRuleTest, WildcardMultiple ) {
// Many-wildcard scenarios.
expectRobotRule("/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "/a*a*a*a*a*a*aa*aaa*a*a*b", true);
expectRobotRule("/abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "/*a*b*ba*ca*a*aa*aaa*fa*ga*b*", true);
@ -93,9 +99,11 @@ TEST( RobotRuleTest, Wildcard ) {
expectRobotRule( "/--------------------------------abc-def-", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-" , true );
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" , true );
expectRobotRule( "/acgfhdhbbcfbhchacchigdhfibhcifabnhieahnaaibcafhigbihaihj", "/*a*b*c*d*e*f*g*h*i*j", true );
}
TEST( RobotRuleTest, WildcardLineAnchor ) {
TEST( RobotRuleTest, WildcardMultipleLineAnchor ) {
expectRobotRule("/abc*abcd*abcd*abc*abcd", "/abc*abc*abc*abc*abc$", false);
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-$" , false );
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*f$" , true );

@ -12,6 +12,7 @@ class TestRobots : public Robots {
public:
TestRobots( const char* robotsTxt, int32_t robotsTxtLen, const char *userAgent = "testbot" )
: Robots (robotsTxt, robotsTxtLen, userAgent ) {
print();
}
using Robots::getNextLine;
@ -44,7 +45,6 @@ static void expectRobotsNoNextLine( TestRobots *robots ) {
}
static void expectRobots( TestRobots *robots, const char *expectedLine, const char *expectedField = "", const char *expectedValue = "" ) {
logf(LOG_INFO, "expectLine='%s' expectField='%s' expectValue='%s'", expectedLine, expectedField, expectedValue);
std::stringstream ss;
ss << __func__ << ":"
<< " expectedLine='" << expectedLine << "'"
@ -147,11 +147,6 @@ TEST( RobotsTest, RobotsGetFieldValue ) {
// helper method
//
static void logRobotsTxt( const char *robotsTxt ) {
logf (LOG_INFO, "===== robots.txt =====\n%s", robotsTxt);
TestRobots robots( robotsTxt, strlen(robotsTxt) );
}
static void generateRobotsTxt ( char *robotsTxt, size_t robotsTxtSize, int32_t *pos, const char *userAgent = "testbot", const char *allow = "", const char *disallow = "", bool reversed = false ) {
if ( *pos != 0 ) {
*pos += snprintf ( robotsTxt + *pos, robotsTxtSize - *pos, "\n" );
@ -175,13 +170,11 @@ static void generateRobotsTxt ( char *robotsTxt, size_t robotsTxtSize, int32_t *
static void generateTestRobotsTxt ( char *robotsTxt, size_t robotsTxtSize, const char *allow = "", const char *disallow = "" ) {
int32_t pos = 0;
generateRobotsTxt( robotsTxt, robotsTxtSize, &pos, "testbot", allow, disallow);
logRobotsTxt( robotsTxt );
}
static void generateTestReversedRobotsTxt ( char *robotsTxt, size_t robotsTxtSize, const char *allow = "", const char *disallow = "" ) {
int32_t pos = 0;
generateRobotsTxt( robotsTxt, robotsTxtSize, &pos, "testbot", allow, disallow, true);
logRobotsTxt( robotsTxt );
}
////////////////////////////////////////////////////////////////////////////////
@ -501,7 +494,7 @@ TEST( RobotsTest, UserAgentFieldCaseInsensitive ) {
// //
////////////////////////////////////////////////////////////////////////////////
TEST(RobotsTest, CommentsFullLine) {
TEST( RobotsTest, CommentsFullLine ) {
char robotsTxt[1024] = "user-agent: *\n"
"#user-agent: testbot\n"
"user-agent: defbot\n"
@ -516,11 +509,10 @@ TEST(RobotsTest, CommentsFullLine) {
EXPECT_EQ( 1000, robots.getCrawlDelay() );
}
TEST(RobotsTest, CommentsAfterWithSpace) {
TEST( RobotsTest, CommentsAfterWithSpace ) {
int32_t pos = 0;
char robotsTxt[1024];
generateRobotsTxt( robotsTxt, 1024, &pos, "testbot #user-agent", "/test #allow", "/ #disallow");
logRobotsTxt( robotsTxt );
TestRobots robots( robotsTxt, strlen(robotsTxt) );
@ -529,11 +521,10 @@ TEST(RobotsTest, CommentsAfterWithSpace) {
EXPECT_TRUE( robots.isAllowed( "/test.html" ) );
}
TEST(RobotsTest, CommentsAfterNoSpace) {
TEST( RobotsTest, CommentsAfterNoSpace ) {
int32_t pos = 0;
char robotsTxt[1024];
generateRobotsTxt( robotsTxt, 1024, &pos, "testbot#user-agent", "/test#allow", "/#disallow");
logRobotsTxt( robotsTxt );
TestRobots robots( robotsTxt, strlen(robotsTxt) );