mirror of
https://github.com/privacore/open-source-search-engine.git
synced 2025-04-18 14:48:48 -04:00
Make sure we strip session id even if query parameter is separated by '?'
This commit is contained in:
parent
606a89d852
commit
0ff7b161ed
@ -33,7 +33,10 @@
|
||||
//#define TITLEREC_CURRENT_VERSION 127
|
||||
|
||||
// explicit keywords field
|
||||
#define TITLEREC_CURRENT_VERSION 128
|
||||
//#define TITLEREC_CURRENT_VERSION 128
|
||||
|
||||
// make sure parameter is stripped even if query parameter is separated with '?'
|
||||
#define TITLEREC_CURRENT_VERSION 129
|
||||
|
||||
#define TITLEREC_CURRENT_VERSION_STR TO_STRING(TITLEREC_CURRENT_VERSION)
|
||||
|
||||
|
@ -41,11 +41,12 @@ void UrlComponent::normalize( std::string *component ) {
|
||||
}
|
||||
}
|
||||
|
||||
UrlComponent::UrlComponent( UrlComponent::Type type, const char *pos, size_t len, char separator )
|
||||
UrlComponent::UrlComponent( UrlComponent::Type type, const char *pos, size_t len, char separator, bool isFirst )
|
||||
: m_type ( type )
|
||||
, m_componentStr( pos, len )
|
||||
, m_separator( separator )
|
||||
, m_deleted( false) {
|
||||
, m_isFirst( isFirst )
|
||||
, m_deleted( false ) {
|
||||
// normalize string
|
||||
normalize( &m_componentStr );
|
||||
m_keyLen = m_componentStr.size();
|
||||
|
@ -10,7 +10,7 @@ public:
|
||||
TYPE_QUERY
|
||||
};
|
||||
|
||||
UrlComponent( Type type, const char *pos, size_t len, char separator );
|
||||
UrlComponent( Type type, const char *pos, size_t len, char separator, bool isFirst );
|
||||
|
||||
void print() const;
|
||||
|
||||
@ -54,6 +54,10 @@ public:
|
||||
return m_componentStr.size() - m_keyLen - 1;
|
||||
}
|
||||
|
||||
bool isFirst() const {
|
||||
return m_isFirst;
|
||||
}
|
||||
|
||||
bool isDeleted() const {
|
||||
return m_deleted;
|
||||
}
|
||||
@ -73,6 +77,7 @@ private:
|
||||
std::string m_componentStr;
|
||||
|
||||
char m_separator;
|
||||
bool m_isFirst;
|
||||
|
||||
std::string m_key;
|
||||
size_t m_keyLen;
|
||||
|
@ -171,6 +171,7 @@ void UrlParser::parse() {
|
||||
const char *queryEnd = fragmentPos ? fragmentPos : urlEnd;
|
||||
|
||||
// path
|
||||
bool isFirstComponent = true;
|
||||
bool updatePathEncChar = false;
|
||||
const char *prevPos = pathPos + 1;
|
||||
while (prevPos && (prevPos <= pathEnd)) {
|
||||
@ -180,7 +181,8 @@ void UrlParser::parse() {
|
||||
len = currentPos - prevPos;
|
||||
}
|
||||
|
||||
UrlComponent urlPart = UrlComponent(UrlComponent::TYPE_PATH, prevPos, len, *(prevPos - 1));
|
||||
UrlComponent urlPart = UrlComponent(UrlComponent::TYPE_PATH, prevPos, len, *(prevPos - 1), isFirstComponent);
|
||||
isFirstComponent = false;
|
||||
|
||||
// check for special cases before adding to m_paths
|
||||
if (len == 1 && memcmp(prevPos, ".", 1) == 0) {
|
||||
@ -227,15 +229,19 @@ void UrlParser::parse() {
|
||||
if (queryPos) {
|
||||
prevPos = queryPos + 1;
|
||||
|
||||
bool isFirstComponent = true;
|
||||
bool isPrevAmpersand = false;
|
||||
while (prevPos && (prevPos < queryEnd)) {
|
||||
const char *querySeparator = m_titledbVersion <= 128 ? "&;" : "&;?";
|
||||
size_t len = queryEnd - prevPos;
|
||||
currentPos = strnpbrk(prevPos, len, "&;");
|
||||
currentPos = strnpbrk(prevPos, len, querySeparator);
|
||||
if (currentPos) {
|
||||
len = currentPos - prevPos;
|
||||
}
|
||||
|
||||
UrlComponent urlPart = UrlComponent(UrlComponent::TYPE_QUERY, prevPos, len, *(prevPos - 1));
|
||||
UrlComponent urlPart = UrlComponent(UrlComponent::TYPE_QUERY, prevPos, len, *(prevPos - 1), isFirstComponent);
|
||||
isFirstComponent = false;
|
||||
|
||||
std::string key = urlPart.getKey();
|
||||
|
||||
// check previous urlPart
|
||||
@ -332,7 +338,9 @@ void UrlParser::unparse() {
|
||||
isFirst = false;
|
||||
m_urlParsed.append("?");
|
||||
} else {
|
||||
m_urlParsed += (query.getSeparator() == '?') ? '&' : query.getSeparator();
|
||||
// we should preserve '?' that is not the first separator
|
||||
// because '?' should not have any special meaning after query parameter starts
|
||||
m_urlParsed += (query.isFirst() && query.getSeparator() == '?') ? '&' : query.getSeparator();
|
||||
}
|
||||
|
||||
m_urlParsed.append(query.getString());
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include "UrlComponent.h"
|
||||
|
||||
TEST(UrlComponentTest, ComponentParamSingle) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "Param1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "Param1=Value1", 13, '?', true);
|
||||
EXPECT_STREQ("param1", urlComponent.getKey().c_str());
|
||||
|
||||
EXPECT_EQ(6, urlComponent.getValueLen());
|
||||
@ -11,7 +11,7 @@ TEST(UrlComponentTest, ComponentParamSingle) {
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, ComponentParamDouble) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "Param1=Value1&Param2=Value2", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "Param1=Value1&Param2=Value2", 13, '?', true);
|
||||
EXPECT_STREQ("param1", urlComponent.getKey().c_str());
|
||||
|
||||
EXPECT_EQ(6, urlComponent.getValueLen());
|
||||
@ -51,7 +51,7 @@ TEST(UrlComponentTest, ComponentNormalize) {
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, MatcherMatchDefault) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
|
||||
|
||||
UrlComponent::Matcher matcherLower("param1");
|
||||
EXPECT_TRUE(matcherLower.isMatching(urlComponent));
|
||||
@ -85,7 +85,7 @@ TEST(UrlComponentTest, MatcherMatchDefault) {
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, MatcherMatchCase) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
|
||||
|
||||
MatchCriteria matchCriteria = MATCH_CASE;
|
||||
|
||||
@ -122,7 +122,7 @@ TEST(UrlComponentTest, MatcherMatchCase) {
|
||||
|
||||
|
||||
TEST(UrlComponentTest, MatcherMatchPartial) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
|
||||
|
||||
MatchCriteria matchCriteria = MATCH_PARTIAL;
|
||||
|
||||
@ -158,7 +158,7 @@ TEST(UrlComponentTest, MatcherMatchPartial) {
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, MatcherMatchPrefix) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
|
||||
|
||||
MatchCriteria matchCriteria = MATCH_PREFIX;
|
||||
|
||||
@ -194,7 +194,7 @@ TEST(UrlComponentTest, MatcherMatchPrefix) {
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, MatcherMatchSuffix) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
|
||||
|
||||
MatchCriteria matchCriteria = MATCH_SUFFIX;
|
||||
|
||||
@ -230,7 +230,7 @@ TEST(UrlComponentTest, MatcherMatchSuffix) {
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, MatcherMatchCaseMatchPartial) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
|
||||
|
||||
MatchCriteria matchCriteria = (MATCH_CASE | MATCH_PARTIAL);
|
||||
|
||||
@ -266,7 +266,7 @@ TEST(UrlComponentTest, MatcherMatchCaseMatchPartial) {
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, MatcherMatchCaseMatchPrefix) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
|
||||
|
||||
MatchCriteria matchCriteria = (MATCH_CASE | MATCH_PREFIX);
|
||||
|
||||
@ -302,7 +302,7 @@ TEST(UrlComponentTest, MatcherMatchCaseMatchPrefix) {
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, MatcherMatchCaseMatchSuffix) {
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
|
||||
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
|
||||
|
||||
MatchCriteria matchCriteria = (MATCH_CASE | MATCH_SUFFIX);
|
||||
|
||||
@ -338,7 +338,7 @@ TEST(UrlComponentTest, MatcherMatchCaseMatchSuffix) {
|
||||
}
|
||||
|
||||
UrlComponent createUrlComponent(UrlComponent::Type type, const char *component) {
|
||||
return UrlComponent(type, component, strlen(component), '?');
|
||||
return UrlComponent(type, component, strlen(component), '?', true);
|
||||
}
|
||||
|
||||
TEST(UrlComponentTest, ValidatorDefault) {
|
||||
|
@ -314,3 +314,14 @@ TEST(UrlParserTest, ParseIDN) {
|
||||
checkResult(version, "http://www.xn--relgeroskilde-5fb0y.dk/", urlParser.getUrlParsed(), urlParser.getUrlParsedLen());
|
||||
}
|
||||
}
|
||||
|
||||
TEST(UrlParserTest, ParseDoubleQuestionMark) {
|
||||
std::string url("http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5");
|
||||
|
||||
for (int version = 123; version <= TITLEREC_CURRENT_VERSION; ++version) {
|
||||
UrlParser urlParser(url.c_str(), url.size(), version);
|
||||
|
||||
urlParser.unparse();
|
||||
checkResult(version, "http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5", urlParser.getUrlParsed(), urlParser.getUrlParsedLen());
|
||||
}
|
||||
}
|
@ -150,6 +150,7 @@ static void strip_param_tests(const std::vector<std::tuple<const char *, const c
|
||||
Url url;
|
||||
url.set(input_url, strlen(input_url), false, true, titleDbVersion);
|
||||
|
||||
SCOPED_TRACE(testing::Message() << "version=" << version);
|
||||
EXPECT_STREQ(std::get<1>(*it), (const char *)url.getUrl());
|
||||
}
|
||||
}
|
||||
@ -249,6 +250,26 @@ TEST(UrlTest, StripParamsOsCommerce) {
|
||||
strip_param_tests(test_cases, 123);
|
||||
}
|
||||
|
||||
TEST(UrlTest, StripParamsOsCommerceV128) {
|
||||
std::vector<std::tuple<const char *, const char *>> test_cases = {
|
||||
// osCsid
|
||||
std::make_tuple("http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5",
|
||||
"http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5")
|
||||
};
|
||||
|
||||
strip_param_tests(test_cases, 128);
|
||||
}
|
||||
|
||||
TEST(UrlTest, StripParamsOsCommerceV129) {
|
||||
std::vector<std::tuple<const char *, const char *>> test_cases = {
|
||||
// osCsid
|
||||
std::make_tuple("http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5",
|
||||
"http://www.yutaka.dk/info_pages.php?pages_id=28")
|
||||
};
|
||||
|
||||
strip_param_tests(test_cases, 129);
|
||||
}
|
||||
|
||||
TEST(UrlTest, StripParamsXTCommerce) {
|
||||
std::vector<std::tuple<const char *, const char *>> test_cases = {
|
||||
// XT-commerce
|
||||
|
Loading…
Reference in New Issue
Block a user