privacore-open-source-searc.../test/unit/RobotRuleTest.cpp

124 lines
6.0 KiB
C++

#include <gtest/gtest.h>
#include "RobotRule.h"
#include "Url.h"
#include "Log.h"
#define TEST_DOMAIN "http://example.com"
//
// test class
//
class TestRobotRule : public RobotRule {
public:
TestRobotRule( const char *rulePath )
: RobotRule( false, rulePath, strlen( rulePath ) ) {
}
bool isMatching( const char *urlPath ) {
char urlStr[1024];
snprintf( urlStr, 1024, TEST_DOMAIN "%s", urlPath );
Url url;
url.set( urlStr );
return RobotRule::isMatching( &url );
}
};
void expectRobotRule( const char *urlPath, const char *rulePath, bool expectedMatching ) {
TestRobotRule robotRule( rulePath );
EXPECT_EQ( expectedMatching, robotRule.isMatching( urlPath ) );
}
//
// Test cases roughly based on Matching Wildcards article on Dr. Dobb's
// http://www.drdobbs.com/architecture-and-design/matching-wildcards-an-empirical-way-to-t/240169123#ListingOne
//
TEST( RobotRuleTest, WildcardCharacter ) {
expectRobotRule("", "*", true);
expectRobotRule("/", "/*", true);
expectRobotRule("/a", "/a*", true);
expectRobotRule("/a", "/*a", true);
}
TEST( RobotRuleTest, WildcardCharacterRepeat ) {
// Cases with repeating character sequences.
expectRobotRule("/abcccd", "*ccd", true);
expectRobotRule("/mississipissippi", "*issip*ss*", true);
expectRobotRule("/xxxx*zzzzzzzzy*f", "/xxxx*zzy*fffff", false);
expectRobotRule("/xxxx*zzzzzzzzy*f", "/xxx*zzy*f", true);
expectRobotRule("/xxxxzzzzzzzzyf", "/xxxx*zzy*fffff", false);
expectRobotRule("/xxxxzzzzzzzzyf", "/xxxx*zzy*f", true);
expectRobotRule("/xyxyxyzyxyz", "/xy*z*xyz", true);
expectRobotRule("/mississippi", "*sip*", true);
expectRobotRule("/xyxyxyxyz", "/xy*xyz", true);
expectRobotRule("/mississippi", "/mi*sip*", true);
expectRobotRule("/ababac", "/*abac*", true);
expectRobotRule("/aaazz", "/a*zz*", true);
expectRobotRule("/a12b12", "*12*23", false);
expectRobotRule("/a12b12", "*12*12*", true);
}
TEST( RobotRuleTest, WildcardCharacterHaystack ) {
// Additional cases where the '*' char appears in haystack
expectRobotRule("/*", "*", true);
expectRobotRule("/a*abab", "/a*b", true);
expectRobotRule("/a*r", "/a*", true);
expectRobotRule("/a*ar", "/a*aar", false);
}
TEST( RobotRuleTest, WildcardDouble ) {
// More double wildcard scenarios.
expectRobotRule("/XYXYXYZYXYz", "/XY*Z*XYz", true);
expectRobotRule("/missisSIPpi", "*SIP*", true);
expectRobotRule("/mississipPI", "*issip*PI", true);
expectRobotRule("/xyxyxyxyz", "/xy*xyz", true);
expectRobotRule("/miSsissippi", "/mi*sip*", true);
expectRobotRule("/miSsissippi", "/mi*Sip*", false);
expectRobotRule("/abAbac", "*Abac*", true);
expectRobotRule("/abAbac", "*Abac*", true);
expectRobotRule("/aAazz", "/a*zz*", true);
expectRobotRule("/A12b12", "*12*23", false);
expectRobotRule("/a12B12", "*12*12*", true);
expectRobotRule("/oWn", "/*oWn*", true);
}
TEST( RobotRuleTest, WildcardMultiple ) {
// Many-wildcard scenarios.
expectRobotRule("/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "/a*a*a*a*a*a*aa*aaa*a*a*b", true);
expectRobotRule("/abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "/*a*b*ba*ca*a*aa*aaa*fa*ga*b*", true);
expectRobotRule("/abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "/*a*b*ba*ca*a*x*aaa*fa*ga*b*", false);
expectRobotRule("/abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "/*a*b*ba*ca*aaaa*fa*ga*gggg*b*", false);
expectRobotRule("/abababababababababababababababababababaacacacacacacacadaeafagahaiajakalaaaaaaaaaaaaaaaaaffafagaagggagaaaaaaaab", "/*a*b*ba*ca*aaaa*fa*ga*ggg*b*", true);
expectRobotRule("/aaabbaabbaab", "*aabbaa*a*", true);
expectRobotRule("/a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", "/a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", true);
expectRobotRule("/aaaaaaaaaaaaaaaaa", "*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", true);
expectRobotRule("/aaaaaaaaaaaaaaaa", "*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*a*", false);
expectRobotRule("/abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn", "/abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*", false);
expectRobotRule("/abc*abcd*abcde*abcdef*abcdefg*abcdefgh*abcdefghi*abcdefghij*abcdefghijk*abcdefghijkl*abcdefghijklm*abcdefghijklmn", "/abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*", true);
expectRobotRule("/abc*abcd*abcd*abc*abcd*abcd*abc*abcd*abc*abc*abcd", "/abc*abc*abc*abc*abc*abc*abc*abc*abc*abc*abcd", true);
expectRobotRule("/abc", "********a********b********c********", true);
expectRobotRule("/********a********b********c********", "/abc", false);
expectRobotRule("/abc", "/********a********b********b********", false);
expectRobotRule("/*abc*", "/***a*b*c***", true);
expectRobotRule( "/--------------------------------abc-def-", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-" , true );
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*" , true );
expectRobotRule( "/acgfhdhbbcfbhchacchigdhfibhcifabnhieahnaaibcafhigbihaihj", "/*a*b*c*d*e*f*g*h*i*j", true );
expectRobotRule( "/ababcbdccedfdgeheifjfkgghhiijjkkllmmnnop", "/*a*b*c*d*e*f*g*h*i*j*k*l*m*n*o*p", true );
}
TEST( RobotRuleTest, WildcardMultipleLineAnchor ) {
expectRobotRule("/abc*abcd*abcd*abc*abcd", "/abc*abc*abc*abc*abc$", false);
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-$" , false );
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*f$" , true );
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*c-*f$" , true );
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-c-f$" , false );
expectRobotRule( "/---------------------------------abc-def", "/-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*z-f$" , false );
expectRobotRule("/aa", "*$", true);
expectRobotRule("/abefcdgiescdfimde", "/ab*cd*i*de$", true);
}