diff --git a/src/anyp/Uri.cc b/src/anyp/Uri.cc index 5263ba5752e..1ce47dd65df 100644 --- a/src/anyp/Uri.cc +++ b/src/anyp/Uri.cc @@ -69,6 +69,19 @@ PathChars() return pathValid; } +/// Characters which are valid within a URI query section +static const CharacterSet & +QueryChars() +{ + /* + * RFC 3986 section 3.4 + * + * query = *( pchar / "/" / "?" ) + */ + static const auto queryValid = CharacterSet("query", "/?") + PathChars(); + return queryValid; +} + /** * Governed by RFC 3986 section 2.1 */ @@ -208,6 +221,17 @@ AnyP::Uri::path() const return path_; } +const SBuf & +AnyP::Uri::query() const +{ + // FTP does not have a query section + static SBuf empty; + if (scheme_ == AnyP::PROTO_FTP) + return empty; + + return query_; +} + void urlInitialize(void) { @@ -310,6 +334,47 @@ urlAppendDomain(char *host) return true; } +/// \returns whether whitespace has 'chopped' the URL apart. +static bool +StripAnyWsp(char *buf) +{ + if (stringHasWhitespace(buf)) { + switch (Config.uri_whitespace) { + + case URI_WHITESPACE_DENY: + throw TextException("URI has whitespace", Here()); + + case URI_WHITESPACE_ALLOW: + break; + + case URI_WHITESPACE_ENCODE: { + auto *t = rfc1738_escape_unescaped(buf); + xstrncpy(buf, t, MAX_URL); + } + break; + + case URI_WHITESPACE_CHOP: + *(buf + strcspn(buf, w_space)) = '\0'; + return true; + + case URI_WHITESPACE_STRIP: + default: { + char *t = buf; + char *q = buf; + while (*t) { + if (!xisspace(*t)) { + *q = *t; + ++q; + } + ++t; + } + *q = '\0'; + } + } + } + return false; +} + /* * Parse a URI/URL. * @@ -330,14 +395,14 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl) LOCAL_ARRAY(char, login, MAX_URL); LOCAL_ARRAY(char, foundHost, MAX_URL); LOCAL_ARRAY(char, urlpath, MAX_URL); + LOCAL_ARRAY(char, foundQuery, MAX_URL); char *t = nullptr; - char *q = nullptr; int foundPort; int l; int i; const char *src; char *dst; - foundHost[0] = urlpath[0] = login[0] = '\0'; + foundHost[0] = urlpath[0] = foundQuery[0] = login[0] = '\0'; if ((l = rawUrl.length()) + Config.appendDomainLen > (MAX_URL - 1)) { debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)"); @@ -411,23 +476,58 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl) return false; *dst = '\0'; - // We are looking at path-abempty. - if (*src != '/') { - // path-empty, including the end of the `src` c-string cases - urlpath[0] = '/'; - dst = &urlpath[1]; - } else { + bool chopped = false; + if (*src == '/') { dst = urlpath; - } - /* Then everything from / (inclusive) until \r\n or \0 - that's urlpath */ - for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) { - *dst = *src; + /* Then everything from / (inclusive) until '?', '#', '\r\n' or '\0' - that's urlpath */ + for (; i < l && *src != '?' && *src != '#' && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) { + *dst = *src; + } + *dst = '\0'; + chopped = StripAnyWsp(urlpath); + rfc1738_unescape(urlpath); + } else { + // We should be looking at path-abempty. relative-path is not supported (yet). + urlpath[0] = '/'; + urlpath[1] = '\0'; } /* We -could- be at the end of the buffer here */ if (i > l) return false; - *dst = '\0'; + + if (*src == '?') { + dst = foundQuery; + ++src; // skip '?' delimiter + ++i; // keep track of bytes 'consumed' from src + /* Then everything from '?' (exclusive) until '#', '\r\n' or '\0' - that's query */ + for (; i < l && *src != '#' && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) { + *dst = *src; + } + *dst = '\0'; + if (chopped) + *foundQuery = '\0'; + else + chopped = StripAnyWsp(foundQuery); + rfc1738_unescape(foundQuery); + } + if (i > l) + return false; + + if (*src == '#') { + ++src; // skip '#' delimiter + ++i; // keep track of bytes 'consumed' from src + /* Then everything from '#' (exclusive) until '\r\n' or '\0' - that's fragment */ + for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src) { + ; // fragment component is not expected in network protocols. discard for now. + } + } + + if (i > l) + return false; + + if (*src != '\r' && *src != '\n' && *src != '\0') + throw TextException("invalid URL", Here()); // If the parsed scheme has no (known) default port, and there is no // explicit port, then we will reject the zero port during foundPort @@ -490,27 +590,20 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl) *t = '\0'; ++t; foundPort = atoi(t); + // BUG: fails to detect non-DIGIT garbage like "http://localhost:80fubar" } } for (t = foundHost; *t; ++t) *t = xtolower(*t); - if (stringHasWhitespace(foundHost)) { - if (URI_WHITESPACE_STRIP == Config.uri_whitespace) { - t = q = foundHost; - while (*t) { - if (!xisspace(*t)) { - *q = *t; - ++q; - } - ++t; - } - *q = '\0'; - } + if (StripAnyWsp(foundHost)) { + urlpath[0] = '/'; + urlpath[1] = '\0'; + *foundQuery = '\0'; } - debugs(23, 3, "Split URL '" << rawUrl << "' into proto='" << scheme.image() << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'"); + debugs(23, 3, "Split URL '" << rawUrl << "' into proto='" << scheme.image() << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "', query='" << foundQuery << "'"); if (Config.onoff.check_hostnames && strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) { @@ -536,41 +629,8 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl) return false; } - if (stringHasWhitespace(urlpath)) { - debugs(23, 2, "URI has whitespace: {" << rawUrl << "}"); - - switch (Config.uri_whitespace) { - - case URI_WHITESPACE_DENY: - return false; - - case URI_WHITESPACE_ALLOW: - break; - - case URI_WHITESPACE_ENCODE: - t = rfc1738_escape_unescaped(urlpath); - xstrncpy(urlpath, t, MAX_URL); - break; - - case URI_WHITESPACE_CHOP: - *(urlpath + strcspn(urlpath, w_space)) = '\0'; - break; - - case URI_WHITESPACE_STRIP: - default: - t = q = urlpath; - while (*t) { - if (!xisspace(*t)) { - *q = *t; - ++q; - } - ++t; - } - *q = '\0'; - } - } - setScheme(scheme); + query(SBuf(foundQuery)); path(urlpath); host(foundHost); userInfo(SBuf(login)); @@ -775,8 +835,12 @@ SBuf & AnyP::Uri::absolutePath() const { if (absolutePath_.isEmpty()) { - // TODO: Encode each URI subcomponent in path_ as needed. - absolutePath_ = Encode(path(), PathChars()); + if (!path().isEmpty()) + absolutePath_ = Encode(path(), PathChars()); + if (!query().isEmpty()) { + absolutePath_.append("?"); + absolutePath_.append(Encode(query(), QueryChars())); + } } return absolutePath_; diff --git a/src/anyp/Uri.h b/src/anyp/Uri.h index 70ff6b74115..41fdab8eb41 100644 --- a/src/anyp/Uri.h +++ b/src/anyp/Uri.h @@ -97,6 +97,9 @@ class Uri void path(const SBuf &p) {path_=p; touch();} const SBuf &path() const; + void query(const SBuf &p) { query_=p; touch(); } + const SBuf &query() const; + /** * Merge a relative-path URL into the existing URI details. * Implements RFC 3986 section 5.2.3 @@ -191,8 +194,8 @@ class Uri Port port_; ///< authority port subcomponent - // XXX: for now includes query-string. - SBuf path_; ///< URI path segment + SBuf path_; ///< URI path component + SBuf query_; ///< URI query component // pre-assembled URI forms mutable SBuf authorityHttp_; ///< RFC 7230 section 5.3.3 authority, maybe without default-port diff --git a/src/cache_manager.cc b/src/cache_manager.cc index 334c4698331..fd6c6f0bd8d 100644 --- a/src/cache_manager.cc +++ b/src/cache_manager.cc @@ -125,17 +125,14 @@ CacheManager::WellKnownUrlPathPrefix() Mgr::Command::Pointer CacheManager::ParseUrl(const AnyP::Uri &uri) { - Parser::Tokenizer tok(uri.path()); - - Assure(tok.skip(WellKnownUrlPathPrefix())); + auto upath = uri.path(); + Assure(upath.startsWith(WellKnownUrlPathPrefix())); Mgr::Command::Pointer cmd = new Mgr::Command(); cmd->params.httpUri = SBufToString(uri.absolute()); - static const auto fieldChars = CharacterSet("mgr-field", "?#").complement(); - - SBuf action; - if (!tok.prefix(action, fieldChars)) { + SBuf action = upath.substr(WellKnownUrlPathPrefix().length()); // path without the prefix + if (action.isEmpty()) { static const SBuf indexReport("index"); action = indexReport; } @@ -150,16 +147,9 @@ CacheManager::ParseUrl(const AnyP::Uri &uri) throw TextException(ToSBuf("action '", action, "' is ", prot), Here()); cmd->profile = profile; - // TODO: fix when AnyP::Uri::parse() separates path?query#fragment - SBuf params; - if (tok.skip('?')) { - params = tok.remaining(); - Mgr::QueryParams::Parse(tok, cmd->params.queryParams); - } - - if (!tok.skip('#') && !tok.atEnd()) - throw TextException("invalid characters in URL", Here()); - // else ignore #fragment (if any) + SBuf params = uri.query(); + if (!params.isEmpty()) + Mgr::QueryParams::Parse(params, cmd->params.queryParams); debugs(16, 3, "MGR request: host=" << uri.host() << ", action=" << action << ", params=" << params); diff --git a/src/carp.cc b/src/carp.cc index b04ee7b8760..6d25ec77c9f 100644 --- a/src/carp.cc +++ b/src/carp.cc @@ -185,14 +185,11 @@ carpSelectParent(PeerSelector *ps) key.appendf(":%hu", request->url.port().value_or(0)); } if (tp->options.carp_key.path) { - // XXX: fix when path and query are separate - key.append(request->url.absolutePath().substr(0,request->url.absolutePath().find('?'))); // 0..N + key.append(request->url.path()); } if (tp->options.carp_key.params) { - // XXX: fix when path and query are separate - SBuf::size_type pos; - if ((pos=request->url.absolutePath().find('?')) != SBuf::npos) - key.append(request->url.absolutePath().substr(pos)); // N..npos + key.append('?'); + key.append(request->url.query()); } } // if the url-based key is empty, e.g. because the user is diff --git a/src/mgr/QueryParams.cc b/src/mgr/QueryParams.cc index cdab9f46751..1df58e3d792 100644 --- a/src/mgr/QueryParams.cc +++ b/src/mgr/QueryParams.cc @@ -109,18 +109,15 @@ ParseParamValue(const SBuf &rawValue) * value = *pchar | ( 1*DIGIT *( ',' 1*DIGIT ) ) */ void -Mgr::QueryParams::Parse(Parser::Tokenizer &tok, QueryParams &aParams) +Mgr::QueryParams::Parse(SBuf &buf, QueryParams &aParams) { static const CharacterSet nameChars = CharacterSet("param-name", "_") + CharacterSet::ALPHA + CharacterSet::DIGIT; static const CharacterSet valueChars = CharacterSet("param-value", "&= #").complement(); static const CharacterSet delimChars("param-delim", "&"); - while (!tok.atEnd()) { + Parser::Tokenizer tok(buf); - // TODO: remove '#' processing when AnyP::Uri splits 'query#fragment' properly - // #fragment handled by caller. Do not throw. - if (tok.remaining()[0] == '#') - return; + while (!tok.atEnd()) { if (tok.skipAll(delimChars)) continue; diff --git a/src/mgr/QueryParams.h b/src/mgr/QueryParams.h index 40c9d707ef3..dedfa3f3c69 100644 --- a/src/mgr/QueryParams.h +++ b/src/mgr/QueryParams.h @@ -13,7 +13,7 @@ #include "ipc/forward.h" #include "mgr/QueryParam.h" -#include "parser/forward.h" +#include "sbuf/forward.h" #include "SquidString.h" #include @@ -34,7 +34,7 @@ class QueryParams void pack(Ipc::TypedMsgHdr& msg) const; ///< store params into msg void unpack(const Ipc::TypedMsgHdr& msg); ///< load params from msg /// parses the query string parameters - static void Parse(Parser::Tokenizer &, QueryParams &); + static void Parse(SBuf &, QueryParams &); private: /// find query parameter by name diff --git a/src/tests/stub_libmgr.cc b/src/tests/stub_libmgr.cc index c8e9a829f0f..8a60cf8c475 100644 --- a/src/tests/stub_libmgr.cc +++ b/src/tests/stub_libmgr.cc @@ -176,7 +176,7 @@ void Mgr::IoAction::dump(StoreEntry*) STUB Mgr::QueryParam::Pointer Mgr::QueryParams::get(const String&) const STUB_RETVAL(Mgr::QueryParam::Pointer(nullptr)) void Mgr::QueryParams::pack(Ipc::TypedMsgHdr&) const STUB void Mgr::QueryParams::unpack(const Ipc::TypedMsgHdr&) STUB -void Mgr::QueryParams::Parse(Parser::Tokenizer &, QueryParams &) STUB +void Mgr::QueryParams::Parse(SBuf &, QueryParams &) STUB //private: //Params::const_iterator Mgr::QueryParams::find(const String&) const STUB_RETVAL(new Mgr::Params::const_iterator(*this)) Mgr::QueryParam::Pointer Mgr::QueryParams::CreateParam(QueryParam::Type) STUB_RETVAL(Mgr::QueryParam::Pointer(nullptr)) diff --git a/src/tests/testCacheManager.cc b/src/tests/testCacheManager.cc index d62043803fb..420f389e5fb 100644 --- a/src/tests/testCacheManager.cc +++ b/src/tests/testCacheManager.cc @@ -12,6 +12,7 @@ #include "compat/cppunit.h" #include "mgr/Action.h" #include "mgr/Registration.h" +#include "sbuf/Stream.h" #include "Store.h" #include "unitTestMain.h" @@ -145,57 +146,6 @@ TestCacheManager::testParseUrl() "INVALID" // any unregistered name }; - const std::vector validParams = { - "", - "?", - "?&", - "?&&&&&&&&&&&&", - "?foo=bar", - "?0123456789=bar", - "?foo=bar&", - "?foo=bar&&&&", - "?&foo=bar", - "?&&&&foo=bar", - "?&foo=bar&", - "?&&&&foo=bar&&&&", - "?foo=?_weird?~`:[]stuff&bar=okay&&&&&&", - "?intlist=1", - "?intlist=1,2,3,4,5", - "?string=1a", - "?string=1,2,3,4,z", - "?string=1,2,3,4,[0]", - "?intlist=1,2,3,4,5&string=1,2,3,4,y" - }; - - const std::vector invalidParams = { - "?/", - "?foo", - "?/foo", - "?foo/", - "?foo=", - "?foo=&", - "?=foo", - "? foo=bar", - "? &", - "?& ", - "?=&", - "?&=", - "? &&&", - "?& &&", - "?&& &", - "?=&&&", - "?&=&&", - "?&&=&" - }; - - const std::vector validFragments = { - "", - "#", - "##", - "#?a=b", - "#fragment" - }; - const auto &prefix = CacheManager::WellKnownUrlPathPrefix(); assert(prefix.length()); @@ -209,48 +159,32 @@ TestCacheManager::testParseUrl() // they violate CacheManager::ParseUrl()'s ForSomeCacheManager() // precondition. for (const auto *action : validActions) { - for (const auto *param : validParams) { - for (const auto *frag : validFragments) { - SBuf bits; - bits.append(insufficientPrefix); - bits.append(action); - bits.append(param); - bits.append(frag); - mgrUrl.path(bits); - mgr->testInvalidUrl(mgrUrl, "insufficient prefix"); - } - } + SBuf bits; + bits.append(insufficientPrefix); + bits.append(action); + mgrUrl.path(bits); + mgr->testInvalidUrl(mgrUrl, "insufficient prefix"); } // Check that the parser accepts valid URLs. for (const auto action: validActions) { - for (const auto param: validParams) { - for (const auto frag: validFragments) { - SBuf bits; - bits.append(prefix); - bits.append(action); - bits.append(param); - bits.append(frag); - mgrUrl.path(bits); - mgr->testValidUrl(mgrUrl); - } - } + SBuf bits; + bits.append(prefix); + bits.append(action); + mgrUrl.path(bits); + mgr->testValidUrl(mgrUrl); } - // Check that the parser rejects URLs with invalid parameters. - for (const auto action: validActions) { - for (const auto invalidParam: invalidParams) { - for (const auto frag: validFragments) { - SBuf bits; - bits.append(prefix); - bits.append(action); - bits.append(invalidParam); - bits.append(frag); - mgrUrl.path(bits); - mgr->testInvalidUrl(mgrUrl, invalidParam); - } - } + // Check that the parser rejects unknown actions + for (const auto *action : invalidActions) { + SBuf bits; + bits.append(prefix); + bits.append(action); + mgrUrl.path(bits); + auto msg = ToSBuf("action '", action, "' not found"); + mgr->testInvalidUrl(mgrUrl, msg.c_str()); } + } } diff --git a/src/tests/testURL.cc b/src/tests/testURL.cc index b53abe12e73..1f61c99b68d 100644 --- a/src/tests/testURL.cc +++ b/src/tests/testURL.cc @@ -13,7 +13,9 @@ #include "base/TextException.h" #include "compat/cppunit.h" #include "debug/Stream.h" +#include "http/RequestMethod.h" #include "sbuf/Stream.h" +#include "SquidConfig.h" #include "unitTestMain.h" #include @@ -29,12 +31,14 @@ class TestUri : public CPPUNIT_NS::TestFixture CPPUNIT_TEST(testConstructScheme); CPPUNIT_TEST(testDefaultConstructor); CPPUNIT_TEST(testEncoding); + CPPUNIT_TEST(testParse); CPPUNIT_TEST_SUITE_END(); protected: void testConstructScheme(); void testDefaultConstructor(); void testEncoding(); + void testParse(); }; CPPUNIT_TEST_SUITE_REGISTRATION(TestUri); @@ -133,6 +137,127 @@ TestUri::testEncoding() }; } +void +TestUri::testParse() +{ + // we are testing the strict parse (whitespace is invalid) + Config.uri_whitespace = URI_WHITESPACE_DENY; + + std::vector validSchemes = { + AnyP::PROTO_HTTP, + AnyP::PROTO_HTTPS, + AnyP::PROTO_FTP + }; + + // TODO: test variations of (in)valid authority as well + const SBuf authority("localhost:3128"); + + const std::vector validPaths = { + "", + "/", + "/abs", + "/./" + }; + + const std::vector invalidPaths = { + // XXX: Uri::parse sees all pre-'/' as as a "port" and fails to detect non-DIGIT garbage + // XXX: "relative-path", + "/white space" + }; + + const std::vector validQuery = { + "", + "?", + "?/", + "?foo", + "?/foo", + "?foo/", + "?foo=", + "?foo=&", + "?&", + "?&&&&&&&&&&&&", + "?foo=bar", + "?0123456789=bar", + "?foo=bar&", + "?foo=bar&&&&", + "?&foo=bar", + "?&&&&foo=bar", + "?&foo=bar&", + "?&&&&foo=bar&&&&", + "?foo=?_weird?~`:[]stuff&bar=okay&&&&&&", + "?intlist=1", + "?intlist=1,2,3,4,5", + "?string=1a", + "?string=1,2,3,4,z", + "?string=1,2,3,4,[0]", + "?intlist=1,2,3,4,5&string=1,2,3,4,y", + "?https://example.com:80/" + "?https://example.com:80/?foo=bar" + }; + + const std::vector validFragments = { + "", + "#", + "##", + "#?a=b", + "#fragment" + }; + + const auto method = HttpRequestMethod(Http::METHOD_GET); + + for (const auto &schemeType : validSchemes) { + AnyP::UriScheme scheme(schemeType); + // Check that the parser accepts valid URLs + for (const auto path: validPaths) { + for (const auto query: validQuery) { + for (const auto frag: validFragments) { + SBuf bits; + bits.append(scheme.image()); + bits.append("://", 3); + bits.append(authority); + bits.append(path); + bits.append(query); + bits.append(frag); + + AnyP::Uri url; + CPPUNIT_ASSERT(url.parse(method, bits)); + CPPUNIT_ASSERT_EQUAL(scheme, url.getScheme()); + CPPUNIT_ASSERT_EQUAL(SBuf(authority), url.authority()); + CPPUNIT_ASSERT_EQUAL(AnyP::KnownPort(3128), url.port().value_or(0)); + + if (path[0] == '\0') // path-empty + CPPUNIT_ASSERT_EQUAL(AnyP::Uri::SlashPath(), url.path()); + else + CPPUNIT_ASSERT_EQUAL(SBuf(path), url.path()); + + if (scheme == AnyP::PROTO_FTP) // no query for ftp:// + CPPUNIT_ASSERT(url.query().isEmpty()); + else + CPPUNIT_ASSERT_EQUAL(SBuf(query).substr(1), url.query()); + } + } + } + + // Check that the parser rejects URLs with invalid path + for (const auto path: invalidPaths) { + for (const auto query: validQuery) { + for (const auto frag: validFragments) { + SBuf bits; + bits.append(scheme.image()); + bits.append("://", 3); + bits.append(authority); + bits.append(path); + bits.append(query); + bits.append(frag); + + AnyP::Uri url; + CPPUNIT_ASSERT(!url.parse(method, bits)); + } + } + } + } +} + int main(int argc, char *argv[]) {