Skip to content

Commit

Permalink
Add AnyP::Uri::Decode() (squid-cache#1626)
Browse files Browse the repository at this point in the history
Implement percent-decoding according to RFC 3986. Currently unused.
TODO: Upgrade rfc1738_unescape() callers.
  • Loading branch information
kinkie authored and squid-anubis committed Jan 31, 2024
1 parent ff9d945 commit 26256f2
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 3 deletions.
23 changes: 23 additions & 0 deletions src/anyp/Uri.cc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,29 @@ AnyP::Uri::Encode(const SBuf &buf, const CharacterSet &ignore)
return output;
}

SBuf
AnyP::Uri::Decode(const SBuf &buf)
{
SBuf output;
Parser::Tokenizer tok(buf);
while (!tok.atEnd()) {
SBuf token;
static const auto unencodedChars = CharacterSet("percent", "%").complement("unencoded");
if (tok.prefix(token, unencodedChars))
output.append(token);

// we are either at '%' or at end of input
if (tok.skip('%')) {
int64_t hex1 = 0, hex2 = 0;
if (tok.int64(hex1, 16, false, 1) && tok.int64(hex2, 16, false, 1))
output.append(static_cast<char>((hex1 << 4) | hex2));
else
throw TextException("invalid pct-encoded triplet", Here());
}
}
return output;
}

const SBuf &
AnyP::Uri::Asterisk()
{
Expand Down
3 changes: 3 additions & 0 deletions src/anyp/Uri.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ class Uri
/// the provided set of expected characters.
static SBuf Encode(const SBuf &, const CharacterSet &expected);

/// %-decode the given buffer
static SBuf Decode(const SBuf &);

/**
* The authority-form URI for currently stored values.
*
Expand Down
8 changes: 8 additions & 0 deletions src/base/CharacterSet.cc
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,11 @@ CharacterSet::ETAGC("ETAGC", {{0x21,0x21},{0x23,0x7e},{0x80,0xff}}),
CharacterSet::TOKEN68C("TOKEN68C","-._~+/0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
;

const CharacterSet &
CharacterSet::RFC3986_UNRESERVED()
{
// RFC 3986: unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
static const auto chars = new CharacterSet("RFC3986_UNRESERVED", "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ-._~");
return *chars;
}

3 changes: 3 additions & 0 deletions src/base/CharacterSet.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ class CharacterSet
// token68 (internal characters only, excludes '=' terminator)
static const CharacterSet TOKEN68C;

/// allowed URI characters that do not have a reserved purpose, RFC 3986
static const CharacterSet &RFC3986_UNRESERVED();

private:
/** index of characters in this set
*
Expand Down
4 changes: 1 addition & 3 deletions src/http/one/RequestParser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,7 @@ UriValidCharacters()
CharacterSet("gen-delims", ":/?#[]@") +
CharacterSet("sub-delims", "!$&'()*+,;=") +
// RFC 3986 section 2.3 - unreserved characters
CharacterSet::ALPHA +
CharacterSet::DIGIT +
CharacterSet("unreserved", "-._~") +
CharacterSet::RFC3986_UNRESERVED() +
// RFC 3986 section 2.1 - percent encoding "%" HEXDIG
CharacterSet("pct-encoded", "%") +
CharacterSet::HEXDIG;
Expand Down
48 changes: 48 additions & 0 deletions src/tests/testURL.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
#include "squid.h"

#include "anyp/Uri.h"
#include "base/CharacterSet.h"
#include "base/TextException.h"
#include "compat/cppunit.h"
#include "debug/Stream.h"
#include "sbuf/Stream.h"
#include "unitTestMain.h"

#include <cppunit/TestAssert.h>
Expand All @@ -25,11 +28,13 @@ class TestUri : public CPPUNIT_NS::TestFixture
CPPUNIT_TEST_SUITE(TestUri);
CPPUNIT_TEST(testConstructScheme);
CPPUNIT_TEST(testDefaultConstructor);
CPPUNIT_TEST(testEncoding);
CPPUNIT_TEST_SUITE_END();

protected:
void testConstructScheme();
void testDefaultConstructor();
void testEncoding();
};
CPPUNIT_TEST_SUITE_REGISTRATION(TestUri);

Expand Down Expand Up @@ -81,6 +86,49 @@ TestUri::testDefaultConstructor()
delete urlPointer;
}

void
TestUri::testEncoding()
{
const std::vector< std::pair<SBuf, SBuf> > basicTestCases = {
{SBuf(""), SBuf("")},
{SBuf("foo"), SBuf("foo")},
{SBuf("%"), SBuf("%25")},
{SBuf("%foo"), SBuf("%25foo")},
{SBuf("foo%"), SBuf("foo%25")},
{SBuf("fo%o"), SBuf("fo%25o")},
{SBuf("fo%%o"), SBuf("fo%25%25o")},
{SBuf("fo o"), SBuf("fo%20o")},
{SBuf("?1"), SBuf("%3F1")},
{SBuf("\377"), SBuf("%FF")},
{SBuf("fo\0o", 4), SBuf("fo%00o")},
};

for (const auto &testCase: basicTestCases) {
CPPUNIT_ASSERT_EQUAL(testCase.first, AnyP::Uri::Decode(testCase.second));
CPPUNIT_ASSERT_EQUAL(testCase.second, AnyP::Uri::Encode(testCase.first, CharacterSet::RFC3986_UNRESERVED()));
};

const auto invalidEncodings = {
SBuf("%"),
SBuf("%%"),
SBuf("%%%"),
SBuf("%1"),
SBuf("%1Z"),
SBuf("%1\000", 2),
SBuf("%1\377"),
SBuf("%\0002", 3),
SBuf("%\3772"),
};

for (const auto &invalidEncoding: invalidEncodings) {
// test various input positions of an invalid escape sequence
CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(invalidEncoding), TextException);
CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf("word", invalidEncoding)), TextException);
CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf(invalidEncoding, "word")), TextException);
CPPUNIT_ASSERT_THROW(AnyP::Uri::Decode(ToSBuf("word", invalidEncoding, "word")), TextException);
};
}

int
main(int argc, char *argv[])
{
Expand Down

0 comments on commit 26256f2

Please sign in to comment.