Make sure we strip session id even if query parameter is separated by '?'

This commit is contained in:
Ai Lin Chia 2018-04-12 15:24:55 +02:00
parent 606a89d852
commit 0ff7b161ed
7 changed files with 68 additions and 19 deletions

View File

@ -33,7 +33,10 @@
//#define TITLEREC_CURRENT_VERSION 127
// explicit keywords field
#define TITLEREC_CURRENT_VERSION 128
//#define TITLEREC_CURRENT_VERSION 128
// make sure parameter is stripped even if query parameter is separated with '?'
#define TITLEREC_CURRENT_VERSION 129
#define TITLEREC_CURRENT_VERSION_STR TO_STRING(TITLEREC_CURRENT_VERSION)

View File

@ -41,11 +41,12 @@ void UrlComponent::normalize( std::string *component ) {
}
}
UrlComponent::UrlComponent( UrlComponent::Type type, const char *pos, size_t len, char separator )
UrlComponent::UrlComponent( UrlComponent::Type type, const char *pos, size_t len, char separator, bool isFirst )
: m_type ( type )
, m_componentStr( pos, len )
, m_separator( separator )
, m_deleted( false) {
, m_isFirst( isFirst )
, m_deleted( false ) {
// normalize string
normalize( &m_componentStr );
m_keyLen = m_componentStr.size();

View File

@ -10,7 +10,7 @@ public:
TYPE_QUERY
};
UrlComponent( Type type, const char *pos, size_t len, char separator );
UrlComponent( Type type, const char *pos, size_t len, char separator, bool isFirst );
void print() const;
@ -54,6 +54,10 @@ public:
return m_componentStr.size() - m_keyLen - 1;
}
bool isFirst() const {
return m_isFirst;
}
bool isDeleted() const {
return m_deleted;
}
@ -73,6 +77,7 @@ private:
std::string m_componentStr;
char m_separator;
bool m_isFirst;
std::string m_key;
size_t m_keyLen;

View File

@ -171,6 +171,7 @@ void UrlParser::parse() {
const char *queryEnd = fragmentPos ? fragmentPos : urlEnd;
// path
bool isFirstComponent = true;
bool updatePathEncChar = false;
const char *prevPos = pathPos + 1;
while (prevPos && (prevPos <= pathEnd)) {
@ -180,7 +181,8 @@ void UrlParser::parse() {
len = currentPos - prevPos;
}
UrlComponent urlPart = UrlComponent(UrlComponent::TYPE_PATH, prevPos, len, *(prevPos - 1));
UrlComponent urlPart = UrlComponent(UrlComponent::TYPE_PATH, prevPos, len, *(prevPos - 1), isFirstComponent);
isFirstComponent = false;
// check for special cases before adding to m_paths
if (len == 1 && memcmp(prevPos, ".", 1) == 0) {
@ -227,15 +229,19 @@ void UrlParser::parse() {
if (queryPos) {
prevPos = queryPos + 1;
bool isFirstComponent = true;
bool isPrevAmpersand = false;
while (prevPos && (prevPos < queryEnd)) {
const char *querySeparator = m_titledbVersion <= 128 ? "&;" : "&;?";
size_t len = queryEnd - prevPos;
currentPos = strnpbrk(prevPos, len, "&;");
currentPos = strnpbrk(prevPos, len, querySeparator);
if (currentPos) {
len = currentPos - prevPos;
}
UrlComponent urlPart = UrlComponent(UrlComponent::TYPE_QUERY, prevPos, len, *(prevPos - 1));
UrlComponent urlPart = UrlComponent(UrlComponent::TYPE_QUERY, prevPos, len, *(prevPos - 1), isFirstComponent);
isFirstComponent = false;
std::string key = urlPart.getKey();
// check previous urlPart
@ -332,7 +338,9 @@ void UrlParser::unparse() {
isFirst = false;
m_urlParsed.append("?");
} else {
m_urlParsed += (query.getSeparator() == '?') ? '&' : query.getSeparator();
// we should preserve '?' that is not the first separator
// because '?' should not have any special meaning after query parameter starts
m_urlParsed += (query.isFirst() && query.getSeparator() == '?') ? '&' : query.getSeparator();
}
m_urlParsed.append(query.getString());

View File

@ -3,7 +3,7 @@
#include "UrlComponent.h"
TEST(UrlComponentTest, ComponentParamSingle) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "Param1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "Param1=Value1", 13, '?', true);
EXPECT_STREQ("param1", urlComponent.getKey().c_str());
EXPECT_EQ(6, urlComponent.getValueLen());
@ -11,7 +11,7 @@ TEST(UrlComponentTest, ComponentParamSingle) {
}
TEST(UrlComponentTest, ComponentParamDouble) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "Param1=Value1&Param2=Value2", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "Param1=Value1&Param2=Value2", 13, '?', true);
EXPECT_STREQ("param1", urlComponent.getKey().c_str());
EXPECT_EQ(6, urlComponent.getValueLen());
@ -51,7 +51,7 @@ TEST(UrlComponentTest, ComponentNormalize) {
}
TEST(UrlComponentTest, MatcherMatchDefault) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
UrlComponent::Matcher matcherLower("param1");
EXPECT_TRUE(matcherLower.isMatching(urlComponent));
@ -85,7 +85,7 @@ TEST(UrlComponentTest, MatcherMatchDefault) {
}
TEST(UrlComponentTest, MatcherMatchCase) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
MatchCriteria matchCriteria = MATCH_CASE;
@ -122,7 +122,7 @@ TEST(UrlComponentTest, MatcherMatchCase) {
TEST(UrlComponentTest, MatcherMatchPartial) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
MatchCriteria matchCriteria = MATCH_PARTIAL;
@ -158,7 +158,7 @@ TEST(UrlComponentTest, MatcherMatchPartial) {
}
TEST(UrlComponentTest, MatcherMatchPrefix) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
MatchCriteria matchCriteria = MATCH_PREFIX;
@ -194,7 +194,7 @@ TEST(UrlComponentTest, MatcherMatchPrefix) {
}
TEST(UrlComponentTest, MatcherMatchSuffix) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
MatchCriteria matchCriteria = MATCH_SUFFIX;
@ -230,7 +230,7 @@ TEST(UrlComponentTest, MatcherMatchSuffix) {
}
TEST(UrlComponentTest, MatcherMatchCaseMatchPartial) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
MatchCriteria matchCriteria = (MATCH_CASE | MATCH_PARTIAL);
@ -266,7 +266,7 @@ TEST(UrlComponentTest, MatcherMatchCaseMatchPartial) {
}
TEST(UrlComponentTest, MatcherMatchCaseMatchPrefix) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
MatchCriteria matchCriteria = (MATCH_CASE | MATCH_PREFIX);
@ -302,7 +302,7 @@ TEST(UrlComponentTest, MatcherMatchCaseMatchPrefix) {
}
TEST(UrlComponentTest, MatcherMatchCaseMatchSuffix) {
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?');
UrlComponent urlComponent(UrlComponent::TYPE_QUERY, "PaRam1=Value1", 13, '?', true);
MatchCriteria matchCriteria = (MATCH_CASE | MATCH_SUFFIX);
@ -338,7 +338,7 @@ TEST(UrlComponentTest, MatcherMatchCaseMatchSuffix) {
}
UrlComponent createUrlComponent(UrlComponent::Type type, const char *component) {
return UrlComponent(type, component, strlen(component), '?');
return UrlComponent(type, component, strlen(component), '?', true);
}
TEST(UrlComponentTest, ValidatorDefault) {

View File

@ -314,3 +314,14 @@ TEST(UrlParserTest, ParseIDN) {
checkResult(version, "http://www.xn--relgeroskilde-5fb0y.dk/", urlParser.getUrlParsed(), urlParser.getUrlParsedLen());
}
}
TEST(UrlParserTest, ParseDoubleQuestionMark) {
std::string url("http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5");
for (int version = 123; version <= TITLEREC_CURRENT_VERSION; ++version) {
UrlParser urlParser(url.c_str(), url.size(), version);
urlParser.unparse();
checkResult(version, "http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5", urlParser.getUrlParsed(), urlParser.getUrlParsedLen());
}
}

View File

@ -150,6 +150,7 @@ static void strip_param_tests(const std::vector<std::tuple<const char *, const c
Url url;
url.set(input_url, strlen(input_url), false, true, titleDbVersion);
SCOPED_TRACE(testing::Message() << "version=" << version);
EXPECT_STREQ(std::get<1>(*it), (const char *)url.getUrl());
}
}
@ -249,6 +250,26 @@ TEST(UrlTest, StripParamsOsCommerce) {
strip_param_tests(test_cases, 123);
}
TEST(UrlTest, StripParamsOsCommerceV128) {
std::vector<std::tuple<const char *, const char *>> test_cases = {
// osCsid
std::make_tuple("http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5",
"http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5")
};
strip_param_tests(test_cases, 128);
}
TEST(UrlTest, StripParamsOsCommerceV129) {
std::vector<std::tuple<const char *, const char *>> test_cases = {
// osCsid
std::make_tuple("http://www.yutaka.dk/info_pages.php?pages_id=28?osCsid=514e6c48fc4d0ac437b54977dd0b35d5",
"http://www.yutaka.dk/info_pages.php?pages_id=28")
};
strip_param_tests(test_cases, 129);
}
TEST(UrlTest, StripParamsXTCommerce) {
std::vector<std::tuple<const char *, const char *>> test_cases = {
// XT-commerce