mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-07-13 02:36:06 -04:00
Merge branch 'master' of github.com:privacore/open-source-search-engine
This commit is contained in:
73
Url.cpp
73
Url.cpp
@ -633,46 +633,27 @@ static void stripParameters( UrlParser *urlParser ) {
|
||||
UrlComponent *cUrlComponent = ( cQueryMatches.size() == 1 ) ? cQueryMatches[0] : NULL;
|
||||
UrlComponent *oUrlComponent = ( oQueryMatches.size() == 1 ) ? oQueryMatches[0] : NULL;
|
||||
|
||||
bool deleteC = false;
|
||||
bool deleteO = false;
|
||||
|
||||
if ( cUrlComponent ) {
|
||||
if ( cUrlComponent->getValueLen() == 0 ) {
|
||||
deleteC = true;
|
||||
urlParser->deleteComponent( cUrlComponent );
|
||||
} else if ( cUrlComponent->getValueLen() == 1 ) {
|
||||
char c = *( cUrlComponent->getValue() );
|
||||
if ( c == 'N' || c == 'M' || c == 'S' || c == 'D' ) {
|
||||
deleteC = true;
|
||||
urlParser->deleteComponent( cUrlComponent );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( oUrlComponent ) {
|
||||
if ( oUrlComponent->getValueLen() == 0 ) {
|
||||
deleteO = true;
|
||||
urlParser->deleteComponent( oUrlComponent );
|
||||
} else if ( oUrlComponent->getValueLen() == 1 ) {
|
||||
char o = *( oUrlComponent->getValue() );
|
||||
if ( o == 'A' || o == 'D' ) {
|
||||
deleteO = true;
|
||||
urlParser->deleteComponent( oUrlComponent );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( urlParser->getQueryParamCount() == 2 ) {
|
||||
if ( deleteC && deleteO ) {
|
||||
urlParser->deleteComponent( cUrlComponent );
|
||||
urlParser->deleteComponent( oUrlComponent );
|
||||
}
|
||||
} else {
|
||||
if ( deleteC ) {
|
||||
urlParser->deleteComponent( cUrlComponent );
|
||||
}
|
||||
|
||||
if ( deleteO ) {
|
||||
oUrlComponent->setDeleted();
|
||||
urlParser->deleteComponent( oUrlComponent );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// @todo ALC token?
|
||||
@ -774,31 +755,35 @@ static void stripParameters( UrlParser *urlParser ) {
|
||||
|
||||
/// @todo ALC cater for more affiliate links here
|
||||
|
||||
if ( strncmp( urlParser->getDomain(), "amazon.", 7 ) == 0 ) {
|
||||
// amazon
|
||||
// https://www.reddit.com/r/GameDeals/wiki/affiliate
|
||||
// only check domain specific logic when we have a domain
|
||||
if ( urlParser->getDomain() ) {
|
||||
if ( strncmp( urlParser->getDomain(), "amazon.", 7 ) == 0 ) {
|
||||
// amazon
|
||||
// https://www.reddit.com/r/GameDeals/wiki/affiliate
|
||||
|
||||
// affiliate
|
||||
urlParser->removeQueryParam( "tag" );
|
||||
// affiliate
|
||||
urlParser->removeQueryParam( "tag" );
|
||||
|
||||
// wishlist
|
||||
urlParser->removeQueryParam( "coliid" );
|
||||
urlParser->removeQueryParam( "colid" );
|
||||
// wishlist
|
||||
urlParser->removeQueryParam( "coliid" );
|
||||
urlParser->removeQueryParam( "colid" );
|
||||
|
||||
// reference
|
||||
urlParser->removeQueryParam( "ref" );
|
||||
urlParser->removePathParam( UrlComponent::Matcher( "ref" ), UrlComponent::Validator( 0, 0, false, ALLOW_ALL, MANDATORY_PUNCTUATION ) );
|
||||
} else if ( strncmp( urlParser->getDomain(), "ebay.", 5 ) == 0 ) {
|
||||
// ebay
|
||||
// http://www.ebaypartnernetworkblog.com/en/2009/05/new-link-generator-tool-additional-information/
|
||||
// reference
|
||||
urlParser->removeQueryParam( "ref" );
|
||||
urlParser->removePathParam( UrlComponent::Matcher( "ref" ),
|
||||
UrlComponent::Validator( 0, 0, false, ALLOW_ALL, MANDATORY_PUNCTUATION ) );
|
||||
} else if ( strncmp( urlParser->getDomain(), "ebay.", 5 ) == 0 ) {
|
||||
// ebay
|
||||
// http://www.ebaypartnernetworkblog.com/en/2009/05/new-link-generator-tool-additional-information/
|
||||
|
||||
urlParser->removeQueryParam( "icep_ff3" );
|
||||
urlParser->removeQueryParam( "pub" );
|
||||
urlParser->removeQueryParam( "toolid" );
|
||||
urlParser->removeQueryParam( "campid" );
|
||||
urlParser->removeQueryParam( "customid" );
|
||||
urlParser->removeQueryParam( "afepn" );
|
||||
urlParser->removeQueryParam( "pid" );
|
||||
urlParser->removeQueryParam( "icep_ff3" );
|
||||
urlParser->removeQueryParam( "pub" );
|
||||
urlParser->removeQueryParam( "toolid" );
|
||||
urlParser->removeQueryParam( "campid" );
|
||||
urlParser->removeQueryParam( "customid" );
|
||||
urlParser->removeQueryParam( "afepn" );
|
||||
urlParser->removeQueryParam( "pid" );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,7 +97,7 @@ void UrlParser::parse() {
|
||||
const char *userInfoPos = static_cast<const char *>( memchr( m_authority, '@', m_authorityLen ) );
|
||||
if ( userInfoPos != NULL ) {
|
||||
m_host = userInfoPos + 1;
|
||||
m_hostLen = m_authorityLen - ( userInfoPos - m_authority );
|
||||
m_hostLen = m_authorityLen - ( userInfoPos - m_authority ) - 1;
|
||||
} else {
|
||||
m_host = m_authority;
|
||||
m_hostLen = m_authorityLen;
|
||||
@ -119,14 +119,11 @@ void UrlParser::parse() {
|
||||
if ( m_domain ) {
|
||||
m_domain += 1;
|
||||
m_domainLen = m_hostLen - ( m_domain - m_host );
|
||||
} else {
|
||||
m_domain = m_host;
|
||||
m_domainLen = m_hostLen;
|
||||
}
|
||||
}
|
||||
|
||||
// defaults to host
|
||||
if ( !m_domain ) {
|
||||
m_domain = m_host;
|
||||
m_domainLen = m_hostLen;
|
||||
}
|
||||
}
|
||||
|
||||
const char *queryPos = static_cast<const char*>( memchr( currentPos, '?', urlEnd - currentPos ) );
|
||||
|
@ -30,6 +30,14 @@ TEST( UrlParserTest, ParseSchemeNone ) {
|
||||
}
|
||||
|
||||
TEST( UrlParserTest, ParseUserInfo ) {
|
||||
std::string url( "http://username:password@www.example.com/param1=abc-123" );
|
||||
UrlParser urlParser( url.c_str(), url.size() );
|
||||
|
||||
checkResult( "username:password@www.example.com", urlParser.getAuthority(), urlParser.getAuthorityLen() );
|
||||
checkResult( "example.com", urlParser.getDomain(), urlParser.getDomainLen() );
|
||||
}
|
||||
|
||||
TEST( UrlParserTest, ParseUserInfoPort ) {
|
||||
std::string url( "http://username:password@www.example.com:8080/param1=abc-123" );
|
||||
UrlParser urlParser( url.c_str(), url.size() );
|
||||
|
||||
@ -53,6 +61,22 @@ TEST( UrlParserTest, ParsePortSchemeNone ) {
|
||||
checkResult( "example.com", urlParser.getDomain(), urlParser.getDomainLen() );
|
||||
}
|
||||
|
||||
TEST( UrlParserTest, ParseIP ) {
|
||||
std::string url( "http://127.0.0.1/param1=abc-123" );
|
||||
UrlParser urlParser( url.c_str(), url.size() );
|
||||
|
||||
checkResult( "127.0.0.1", urlParser.getAuthority(), urlParser.getAuthorityLen() );
|
||||
checkResult( "", urlParser.getDomain(), urlParser.getDomainLen() );
|
||||
}
|
||||
|
||||
TEST( UrlParserTest, ParseIPPort ) {
|
||||
std::string url( "http://127.0.0.1:8080/param1=abc-123" );
|
||||
UrlParser urlParser( url.c_str(), url.size() );
|
||||
|
||||
checkResult( "127.0.0.1:8080", urlParser.getAuthority(), urlParser.getAuthorityLen() );
|
||||
checkResult( "", urlParser.getDomain(), urlParser.getDomainLen() );
|
||||
}
|
||||
|
||||
TEST( UrlParserTest, ParseSubdomainNone ) {
|
||||
std::string url( "http://example.com/param1=abc-123" );
|
||||
UrlParser urlParser( url.c_str(), url.size() );
|
||||
@ -83,7 +107,7 @@ TEST( UrlParserTest, ParseTLDNone ) {
|
||||
UrlParser urlParser( url.c_str(), url.size() );
|
||||
|
||||
checkResult( "ok", urlParser.getAuthority(), urlParser.getAuthorityLen() );
|
||||
checkResult( "ok", urlParser.getDomain(), urlParser.getDomainLen() );
|
||||
checkResult( "", urlParser.getDomain(), urlParser.getDomainLen() );
|
||||
}
|
||||
|
||||
TEST( UrlParserTest, ParseSLD ) {
|
||||
|
@ -521,7 +521,7 @@ TEST( UrlTest, StripApacheDirSort ) {
|
||||
std::make_tuple( "http://www.3ddx.com/blog/wp-includes/SimplePie/Decode/HTML/?C=N;O=D",
|
||||
"http://www.3ddx.com/blog/wp-includes/SimplePie/Decode/HTML/" ),
|
||||
std::make_tuple( "http://macports.mirror.ac.za/release/ports/www/midori/?C=M&O=A",
|
||||
"http://macports.mirror.ac.za/release/ports/www/midori/" )
|
||||
"http://macports.mirror.ac.za/release/ports/www/midori/" )
|
||||
};
|
||||
|
||||
strip_param_tests( test_cases, 123 );
|
||||
|
Reference in New Issue
Block a user