squid-cache · yadij · Aug 15, 2025 · Oct 30, 2025 · Oct 30, 2025 · rousskov
diff --git a/src/anyp/Uri.cc b/src/anyp/Uri.cc
@@ -69,6 +69,19 @@ PathChars()
     return pathValid;
 }
 
+/// Characters which are valid within a URI query section
+static const CharacterSet &
+QueryChars()
+{
+    /*
+     * RFC 3986 section 3.4
+     *
+     *  query         = *( pchar / "/" / "?" )
+     */
+    static const auto queryValid = CharacterSet("query", "/?") + PathChars();
+    return queryValid;
+}
+
 /**
  * Governed by RFC 3986 section 2.1
  */
@@ -208,6 +221,17 @@ AnyP::Uri::path() const
     return path_;
 }
 
+const SBuf &
+AnyP::Uri::query() const
+{
+    // FTP does not have a query section
+    static SBuf empty;
+    if (scheme_ == AnyP::PROTO_FTP)
+        return empty;
+
+    return query_;
+}
+
 void
 urlInitialize(void)
 {
@@ -310,6 +334,47 @@ urlAppendDomain(char *host)
     return true;
 }
 
+/// \returns whether whitespace has 'chopped' the URL apart.
+static bool
+StripAnyWsp(char *buf)
+{
+    if (stringHasWhitespace(buf)) {
+        switch (Config.uri_whitespace) {
+
+        case URI_WHITESPACE_DENY:
+            throw TextException("URI has whitespace", Here());
+
+        case URI_WHITESPACE_ALLOW:
+            break;
+
+        case URI_WHITESPACE_ENCODE: {
+            auto *t = rfc1738_escape_unescaped(buf);
+            xstrncpy(buf, t, MAX_URL);
+            }
+            break;
+
+        case URI_WHITESPACE_CHOP:
+            *(buf + strcspn(buf, w_space)) = '\0';
+            return true;
+
+        case URI_WHITESPACE_STRIP:
+        default: {
+            char *t = buf;
+            char *q = buf;
+            while (*t) {
+                if (!xisspace(*t)) {
+                    *q = *t;
+                    ++q;
+                }
+                ++t;
+            }
+            *q = '\0';
+            }
+        }
+    }
+    return false;
+}
+
 /*
  * Parse a URI/URL.
  *
@@ -330,14 +395,14 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
         LOCAL_ARRAY(char, login, MAX_URL);
         LOCAL_ARRAY(char, foundHost, MAX_URL);
         LOCAL_ARRAY(char, urlpath, MAX_URL);
+        LOCAL_ARRAY(char, foundQuery, MAX_URL);
         char *t = nullptr;
-        char *q = nullptr;
         int foundPort;
         int l;
         int i;
         const char *src;
         char *dst;
-        foundHost[0] = urlpath[0] = login[0] = '\0';
+        foundHost[0] = urlpath[0] = foundQuery[0] = login[0] = '\0';
 
         if ((l = rawUrl.length()) + Config.appendDomainLen > (MAX_URL - 1)) {
             debugs(23, DBG_IMPORTANT, MYNAME << "URL too large (" << l << " bytes)");
@@ -411,23 +476,58 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
                 return false;
             *dst = '\0';
 
-            // We are looking at path-abempty.
-            if (*src != '/') {
-                // path-empty, including the end of the `src` c-string cases
-                urlpath[0] = '/';
-                dst = &urlpath[1];
-            } else {
+            bool chopped = false;
+            if (*src == '/') {
                 dst = urlpath;
-            }
-            /* Then everything from / (inclusive) until \r\n or \0 - that's urlpath */
-            for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
-                *dst = *src;
+                /* Then everything from / (inclusive) until '?', '#', '\r\n' or '\0' - that's urlpath */
+                for (; i < l && *src != '?' && *src != '#' && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
+                    *dst = *src;
+                }
+                *dst = '\0';
+                chopped = StripAnyWsp(urlpath);
+                rfc1738_unescape(urlpath);
+            } else {
+                // We should be looking at path-abempty. relative-path is not supported (yet).
+                urlpath[0] = '/';
+                urlpath[1] = '\0';
             }
 
             /* We -could- be at the end of the buffer here */
             if (i > l)
                 return false;
-            *dst = '\0';
+
+            if (*src == '?') {
+                dst = foundQuery;
+                ++src; // skip '?' delimiter
+                ++i; // keep track of bytes 'consumed' from src
+                /* Then everything from '?' (exclusive) until '#', '\r\n' or '\0' - that's query */
+                for (; i < l && *src != '#' && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src, ++dst) {
+                    *dst = *src;
+                }
+                *dst = '\0';
+                if (chopped)
+                    *foundQuery = '\0';
+                else
+                    chopped = StripAnyWsp(foundQuery);
+                rfc1738_unescape(foundQuery);
+            }
+            if (i > l)
+                return false;
+
+            if (*src == '#') {
+                ++src; // skip '#' delimiter
+                ++i; // keep track of bytes 'consumed' from src
+                /* Then everything from '#' (exclusive) until '\r\n' or '\0' - that's fragment */
+                for (; i < l && *src != '\r' && *src != '\n' && *src != '\0'; ++i, ++src) {
+                    ; // fragment component is not expected in network protocols. discard for now.
+                }
+            }
+
+            if (i > l)
+                return false;
+
+            if (*src != '\r' && *src != '\n' && *src != '\0')
+                throw TextException("invalid URL", Here());
 
             // If the parsed scheme has no (known) default port, and there is no
             // explicit port, then we will reject the zero port during foundPort
@@ -490,27 +590,20 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
                 *t = '\0';
                 ++t;
                 foundPort = atoi(t);
+                // BUG: fails to detect non-DIGIT garbage like "http://localhost:80fubar"
             }
         }
 
         for (t = foundHost; *t; ++t)
             *t = xtolower(*t);
 
-        if (stringHasWhitespace(foundHost)) {
-            if (URI_WHITESPACE_STRIP == Config.uri_whitespace) {
-                t = q = foundHost;
-                while (*t) {
-                    if (!xisspace(*t)) {
-                        *q = *t;
-                        ++q;
-                    }
-                    ++t;
-                }
-                *q = '\0';
-            }
+        if (StripAnyWsp(foundHost)) {
+            urlpath[0] = '/';
+            urlpath[1] = '\0';
+            *foundQuery = '\0';
         }
 
-        debugs(23, 3, "Split URL '" << rawUrl << "' into proto='" << scheme.image() << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "'");
+        debugs(23, 3, "Split URL '" << rawUrl << "' into proto='" << scheme.image() << "', host='" << foundHost << "', port='" << foundPort << "', path='" << urlpath << "', query='" << foundQuery << "'");
 
         if (Config.onoff.check_hostnames &&
                 strspn(foundHost, Config.onoff.allow_underscore ? valid_hostname_chars_u : valid_hostname_chars) != strlen(foundHost)) {
@@ -536,41 +629,8 @@ AnyP::Uri::parse(const HttpRequestMethod& method, const SBuf &rawUrl)
             return false;
         }
 
-        if (stringHasWhitespace(urlpath)) {
-            debugs(23, 2, "URI has whitespace: {" << rawUrl << "}");
-
-            switch (Config.uri_whitespace) {
-
-            case URI_WHITESPACE_DENY:
-                return false;
-
-            case URI_WHITESPACE_ALLOW:
-                break;
-
-            case URI_WHITESPACE_ENCODE:
-                t = rfc1738_escape_unescaped(urlpath);
-                xstrncpy(urlpath, t, MAX_URL);
-                break;
-
-            case URI_WHITESPACE_CHOP:
-                *(urlpath + strcspn(urlpath, w_space)) = '\0';
-                break;
-
-            case URI_WHITESPACE_STRIP:
-            default:
-                t = q = urlpath;
-                while (*t) {
-                    if (!xisspace(*t)) {
-                        *q = *t;
-                        ++q;
-                    }
-                    ++t;
-                }
-                *q = '\0';
-            }
-        }
-
         setScheme(scheme);
+        query(SBuf(foundQuery));
         path(urlpath);
         host(foundHost);
         userInfo(SBuf(login));
@@ -775,8 +835,12 @@ SBuf &
 AnyP::Uri::absolutePath() const
 {
     if (absolutePath_.isEmpty()) {
-        // TODO: Encode each URI subcomponent in path_ as needed.
-        absolutePath_ = Encode(path(), PathChars());
+        if (!path().isEmpty())
+            absolutePath_ = Encode(path(), PathChars());
+        if (!query().isEmpty()) {
+            absolutePath_.append("?");
+            absolutePath_.append(Encode(query(), QueryChars()));
+        }
     }
 
     return absolutePath_;

diff --git a/src/anyp/Uri.h b/src/anyp/Uri.h
@@ -97,6 +97,9 @@ class Uri
     void path(const SBuf &p) {path_=p; touch();}
     const SBuf &path() const;
 
+    void query(const SBuf &p) { query_=p; touch(); }
+    const SBuf &query() const;
+
     /**
      * Merge a relative-path URL into the existing URI details.
      * Implements RFC 3986 section 5.2.3
@@ -191,8 +194,8 @@ class Uri
 
     Port port_; ///< authority port subcomponent
 
-    // XXX: for now includes query-string.
-    SBuf path_;     ///< URI path segment
+    SBuf path_; ///< URI path component
+    SBuf query_; ///< URI query component
 
     // pre-assembled URI forms
     mutable SBuf authorityHttp_;     ///< RFC 7230 section 5.3.3 authority, maybe without default-port

diff --git a/src/cache_manager.cc b/src/cache_manager.cc
@@ -125,17 +125,14 @@ CacheManager::WellKnownUrlPathPrefix()
 Mgr::Command::Pointer
 CacheManager::ParseUrl(const AnyP::Uri &uri)
 {
-    Parser::Tokenizer tok(uri.path());
-
-    Assure(tok.skip(WellKnownUrlPathPrefix()));
+    auto upath = uri.path();
+    Assure(upath.startsWith(WellKnownUrlPathPrefix()));
 
     Mgr::Command::Pointer cmd = new Mgr::Command();
     cmd->params.httpUri = SBufToString(uri.absolute());
 
-    static const auto fieldChars = CharacterSet("mgr-field", "?#").complement();
-
-    SBuf action;
-    if (!tok.prefix(action, fieldChars)) {
+    SBuf action = upath.substr(WellKnownUrlPathPrefix().length()); // path without the prefix
+    if (action.isEmpty()) {
         static const SBuf indexReport("index");
         action = indexReport;
     }
@@ -150,16 +147,9 @@ CacheManager::ParseUrl(const AnyP::Uri &uri)
         throw TextException(ToSBuf("action '", action, "' is ", prot), Here());
     cmd->profile = profile;
 
-    // TODO: fix when AnyP::Uri::parse() separates path?query#fragment
-    SBuf params;
-    if (tok.skip('?')) {
-        params = tok.remaining();
-        Mgr::QueryParams::Parse(tok, cmd->params.queryParams);
-    }
-
-    if (!tok.skip('#') && !tok.atEnd())
-        throw TextException("invalid characters in URL", Here());
-    // else ignore #fragment (if any)
+    SBuf params = uri.query();
+    if (!params.isEmpty())
+        Mgr::QueryParams::Parse(params, cmd->params.queryParams);
 
     debugs(16, 3, "MGR request: host=" << uri.host() << ", action=" << action << ", params=" << params);
 

diff --git a/src/carp.cc b/src/carp.cc
@@ -185,14 +185,11 @@ carpSelectParent(PeerSelector *ps)
                 key.appendf(":%hu", request->url.port().value_or(0));
             }
             if (tp->options.carp_key.path) {
-                // XXX: fix when path and query are separate
-                key.append(request->url.absolutePath().substr(0,request->url.absolutePath().find('?'))); // 0..N
+                key.append(request->url.path());
             }
             if (tp->options.carp_key.params) {
-                // XXX: fix when path and query are separate
-                SBuf::size_type pos;
-                if ((pos=request->url.absolutePath().find('?')) != SBuf::npos)
-                    key.append(request->url.absolutePath().substr(pos)); // N..npos
+                key.append('?');
+                key.append(request->url.query());
             }
         }
         // if the url-based key is empty, e.g. because the user is

diff --git a/src/mgr/QueryParams.cc b/src/mgr/QueryParams.cc
@@ -109,18 +109,15 @@ ParseParamValue(const SBuf &rawValue)
  *   value  = *pchar | ( 1*DIGIT *( ',' 1*DIGIT ) )
  */
 void
-Mgr::QueryParams::Parse(Parser::Tokenizer &tok, QueryParams &aParams)
+Mgr::QueryParams::Parse(SBuf &buf, QueryParams &aParams)
 {
     static const CharacterSet nameChars = CharacterSet("param-name", "_") + CharacterSet::ALPHA + CharacterSet::DIGIT;
     static const CharacterSet valueChars = CharacterSet("param-value", "&= #").complement();
     static const CharacterSet delimChars("param-delim", "&");
 
-    while (!tok.atEnd()) {
+    Parser::Tokenizer tok(buf);
 
-        // TODO: remove '#' processing when AnyP::Uri splits 'query#fragment' properly
-        // #fragment handled by caller. Do not throw.
-        if (tok.remaining()[0] == '#')
-            return;
+    while (!tok.atEnd()) {
 
         if (tok.skipAll(delimChars))
             continue;

diff --git a/src/mgr/QueryParams.h b/src/mgr/QueryParams.h
@@ -13,7 +13,7 @@
 
 #include "ipc/forward.h"
 #include "mgr/QueryParam.h"
-#include "parser/forward.h"
+#include "sbuf/forward.h"
 #include "SquidString.h"
 
 #include <utility>
@@ -34,7 +34,7 @@ class QueryParams
     void pack(Ipc::TypedMsgHdr& msg) const; ///< store params into msg
     void unpack(const Ipc::TypedMsgHdr& msg); ///< load params from msg
     /// parses the query string parameters
-    static void Parse(Parser::Tokenizer &, QueryParams &);
+    static void Parse(SBuf &, QueryParams &);
 
 private:
     /// find query parameter by name

diff --git a/src/tests/stub_libmgr.cc b/src/tests/stub_libmgr.cc
@@ -176,7 +176,7 @@ void Mgr::IoAction::dump(StoreEntry*) STUB
 Mgr::QueryParam::Pointer Mgr::QueryParams::get(const String&) const STUB_RETVAL(Mgr::QueryParam::Pointer(nullptr))
 void Mgr::QueryParams::pack(Ipc::TypedMsgHdr&) const STUB
 void Mgr::QueryParams::unpack(const Ipc::TypedMsgHdr&) STUB
-void Mgr::QueryParams::Parse(Parser::Tokenizer &, QueryParams &) STUB
+void Mgr::QueryParams::Parse(SBuf &, QueryParams &) STUB
 //private:
 //Params::const_iterator Mgr::QueryParams::find(const String&) const STUB_RETVAL(new Mgr::Params::const_iterator(*this))
 Mgr::QueryParam::Pointer Mgr::QueryParams::CreateParam(QueryParam::Type) STUB_RETVAL(Mgr::QueryParam::Pointer(nullptr))