MINOR: uri_normalizer: Add a `percent-decode-unreserved` normalizer
This normalizer decodes percent encoded characters within the RFC 3986
unreserved set.
See GitHub Issue #714.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 343f2b4..375eeda 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -6029,6 +6029,7 @@
http-request normalize-uri path-merge-slashes [ { if | unless } <condition> ]
http-request normalize-uri path-strip-dot [ { if | unless } <condition> ]
http-request normalize-uri path-strip-dotdot [ full ] [ { if | unless } <condition> ]
+http-request normalize-uri percent-decode-unreserved [ strict ] [ { if | unless } <condition> ]
http-request normalize-uri percent-to-uppercase [ strict ] [ { if | unless } <condition> ]
http-request normalize-uri query-sort-by-name [ { if | unless } <condition> ]
@@ -6048,11 +6049,25 @@
filesystem. However it might break routing of an API that expects a specific
number of segments in the path.
+ It is important to note that some normalizers might result in unsafe
+ transformations for broken URIs. It might also be possible that a combination
+ of normalizers that are safe by themselves results in unsafe transformations
+ when improperly combined.
+
+ As an example the "percent-decode-unreserved" normalizer might result in
+ unexpected results when a broken URI includes bare percent characters. One
+ such a broken URI is "/%%36%36" which would be decoded to "/%66" which in
+ turn is equivalent to "/f". By specifying the "strict" option requests to
+ such a broken URI would safely be rejected.
+
The following normalizers are available:
- path-strip-dot: Removes "/./" segments within the "path" component
(RFC 3986#6.2.2.3).
+ Segments including percent encoded dots ("%2E") will not be detected. Use
+ the "percent-decode-unreserved" normalizer first if this is undesired.
+
Example:
- /. -> /
- /./bar/ -> /bar/
@@ -6063,8 +6078,13 @@
(RFC 3986#6.2.2.3).
This merges segments that attempt to access the parent directory with
- their preceding segment. Empty segments do not receive special treatment.
- Use the "path-merge-slashes" normalizer first if this is undesired.
+ their preceding segment.
+
+ Empty segments do not receive special treatment. Use the "merge-slashes"
+ normalizer first if this is undesired.
+
+ Segments including percent encoded dots ("%2E") will not be detected. Use
+ the "percent-decode-unreserved" normalizer first if this is undesired.
Example:
- /foo/../ -> /
@@ -6073,6 +6093,7 @@
- /../bar/ -> /../bar/
- /bar/../../ -> /../
- /foo//../ -> /foo/
+ - /foo/%2E%2E/ -> /foo/%2E%2E/
If the "full" option is specified then "../" at the beginning will be
removed as well:
@@ -6088,6 +6109,25 @@
- // -> /
- /foo//bar -> /foo/bar
+ - percent-decode-unreserved: Decodes unreserved percent encoded characters to
+ their representation as a regular character (RFC 3986#6.2.2.2).
+
+ The set of unreserved characters includes all letters, all digits, "-",
+ ".", "_", and "~".
+
+ Example:
+ - /%61dmin -> /admin
+ - /foo%3Fbar=baz -> /foo%3Fbar=baz (no change)
+ - /%%36%36 -> /%66 (unsafe)
+ - /%ZZ -> /%ZZ
+
+ If the "strict" option is specified then invalid sequences will result
+ in a HTTP 400 Bad Request being returned.
+
+ Example:
+ - /%%36%36 -> HTTP 400
+ - /%ZZ -> HTTP 400
+
- percent-to-uppercase: Uppercases letters within percent-encoded sequences
(RFC 3986#6.2.2.1).
diff --git a/include/haproxy/action-t.h b/include/haproxy/action-t.h
index 2e3edea..1d0b0d2 100644
--- a/include/haproxy/action-t.h
+++ b/include/haproxy/action-t.h
@@ -109,6 +109,8 @@
ACT_NORMALIZE_URI_QUERY_SORT_BY_NAME,
ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE,
ACT_NORMALIZE_URI_PERCENT_TO_UPPERCASE_STRICT,
+ ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED,
+ ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT,
};
/* NOTE: if <.action_ptr> is defined, the referenced function will always be
diff --git a/include/haproxy/uri_normalizer.h b/include/haproxy/uri_normalizer.h
index 81c7e00..06f237e 100644
--- a/include/haproxy/uri_normalizer.h
+++ b/include/haproxy/uri_normalizer.h
@@ -18,6 +18,7 @@
#include <haproxy/uri_normalizer-t.h>
+enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst);
enum uri_normalizer_err uri_normalizer_percent_upper(const struct ist input, int strict, struct ist *dst);
enum uri_normalizer_err uri_normalizer_path_dot(const struct ist path, struct ist *dst);
enum uri_normalizer_err uri_normalizer_path_dotdot(const struct ist path, int full, struct ist *dst);
diff --git a/reg-tests/http-rules/normalize_uri.vtc b/reg-tests/http-rules/normalize_uri.vtc
index 9884b6c..cc88060 100644
--- a/reg-tests/http-rules/normalize_uri.vtc
+++ b/reg-tests/http-rules/normalize_uri.vtc
@@ -8,7 +8,7 @@
server s1 {
rxreq
txresp
-} -repeat 54 -start
+} -repeat 63 -start
haproxy h1 -conf {
defaults
@@ -94,6 +94,30 @@
default_backend be
+ frontend fe_percent_decode_unreserved
+ bind "fd@${fe_percent_decode_unreserved}"
+
+ http-request set-var(txn.before) url
+ http-request normalize-uri percent-decode-unreserved
+ http-request set-var(txn.after) url
+
+ http-response add-header before %[var(txn.before)]
+ http-response add-header after %[var(txn.after)]
+
+ default_backend be
+
+ frontend fe_percent_decode_unreserved_strict
+ bind "fd@${fe_percent_decode_unreserved_strict}"
+
+ http-request set-var(txn.before) url
+ http-request normalize-uri percent-decode-unreserved strict
+ http-request set-var(txn.after) url
+
+ http-response add-header before %[var(txn.before)]
+ http-response add-header after %[var(txn.after)]
+
+ default_backend be
+
backend be
server s1 ${s1_addr}:${s1_port}
@@ -391,3 +415,52 @@
expect resp.http.before == "/?a=/./"
expect resp.http.after == "/?a=/./"
} -run
+
+client c7 -connect ${h1_fe_percent_decode_unreserved_sock} {
+ txreq -url "/a?a=a"
+ rxresp
+ expect resp.http.before == "/a?a=a"
+ expect resp.http.after == "/a?a=a"
+
+ txreq -url "/%61?%61=%61"
+ rxresp
+ expect resp.http.before == "/%61?%61=%61"
+ expect resp.http.after == "/a?a=a"
+
+ txreq -url "/%3F?foo=bar"
+ rxresp
+ expect resp.http.before == "/%3F?foo=bar"
+ expect resp.http.after == "/%3F?foo=bar"
+
+ txreq -url "/%%36%36"
+ rxresp
+ expect resp.status == 200
+ expect resp.http.before == "/%%36%36"
+ expect resp.http.after == "/%66"
+
+ txreq -req OPTIONS -url "*"
+ rxresp
+ expect resp.http.before == "*"
+ expect resp.http.after == "*"
+} -run
+
+client c8 -connect ${h1_fe_percent_decode_unreserved_strict_sock} {
+ txreq -url "/a?a=a"
+ rxresp
+ expect resp.http.before == "/a?a=a"
+ expect resp.http.after == "/a?a=a"
+
+ txreq -url "/%61?%61=%61"
+ rxresp
+ expect resp.http.before == "/%61?%61=%61"
+ expect resp.http.after == "/a?a=a"
+
+ txreq -url "/%3F?foo=bar"
+ rxresp
+ expect resp.http.before == "/%3F?foo=bar"
+ expect resp.http.after == "/%3F?foo=bar"
+
+ txreq -url "/%%36%36"
+ rxresp
+ expect resp.status == 400
+} -run
diff --git a/src/http_act.c b/src/http_act.c
index df2bbe4..58e1bb8 100644
--- a/src/http_act.c
+++ b/src/http_act.c
@@ -302,6 +302,24 @@
break;
}
+ case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED:
+ case ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT: {
+ const struct ist path = http_get_path(uri);
+ struct ist newpath = ist2(replace->area, replace->size);
+
+ if (!isttest(path))
+ goto leave;
+
+ err = uri_normalizer_percent_decode_unreserved(path, rule->action == ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT, &newpath);
+
+ if (err != URI_NORMALIZER_ERR_NONE)
+ break;
+
+ if (!http_replace_req_path(htx, newpath, 1))
+ goto fail_rewrite;
+
+ break;
+ }
}
switch (err) {
@@ -407,6 +425,21 @@
return ACT_RET_PRS_ERR;
}
}
+ else if (strcmp(args[cur_arg], "percent-decode-unreserved") == 0) {
+ cur_arg++;
+
+ if (strcmp(args[cur_arg], "strict") == 0) {
+ cur_arg++;
+ rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED_STRICT;
+ }
+ else if (!*args[cur_arg]) {
+ rule->action = ACT_NORMALIZE_URI_PERCENT_DECODE_UNRESERVED;
+ }
+ else if (strcmp(args[cur_arg], "if") != 0 && strcmp(args[cur_arg], "unless") != 0) {
+ memprintf(err, "unknown argument '%s' for 'percent-decode-unreserved' normalizer", args[cur_arg]);
+ return ACT_RET_PRS_ERR;
+ }
+ }
else {
memprintf(err, "unknown normalizer '%s'", args[cur_arg]);
return ACT_RET_PRS_ERR;
diff --git a/src/uri_normalizer.c b/src/uri_normalizer.c
index 8d95936..4fd783d 100644
--- a/src/uri_normalizer.c
+++ b/src/uri_normalizer.c
@@ -18,6 +18,101 @@
#include <haproxy/tools.h>
#include <haproxy/uri_normalizer.h>
+/* Returns 1 if the given character is part of the 'unreserved' set in the
+ * RFC 3986 ABNF.
+ * Returns 0 if not.
+ */
+static int is_unreserved_character(unsigned char c)
+{
+ switch (c) {
+ case 'A'...'Z': /* ALPHA */
+ case 'a'...'z': /* ALPHA */
+ case '0'...'9': /* DIGIT */
+ case '-':
+ case '.':
+ case '_':
+ case '~':
+ return 1;
+ default:
+ return 0;
+ }
+}
+
+/* Decodes percent encoded characters that are part of the 'unreserved' set.
+ *
+ * RFC 3986, section 2.3:
+ * > URIs that differ in the replacement of an unreserved character with
+ * > its corresponding percent-encoded US-ASCII octet are equivalent [...]
+ * > when found in a URI, should be decoded to their corresponding unreserved
+ * > characters by URI normalizers.
+ *
+ * If `strict` is set to 0 then percent characters that are not followed by a
+ * hexadecimal digit are returned as-is without performing any decoding.
+ * If `strict` is set to 1 then `URI_NORMALIZER_ERR_INVALID_INPUT` is returned
+ * for invalid sequences.
+ */
+enum uri_normalizer_err uri_normalizer_percent_decode_unreserved(const struct ist input, int strict, struct ist *dst)
+{
+ enum uri_normalizer_err err;
+
+ const size_t size = istclear(dst);
+ struct ist output = *dst;
+
+ struct ist scanner = input;
+
+ /* The output will either be shortened or have the same length. */
+ if (size < istlen(input)) {
+ err = URI_NORMALIZER_ERR_ALLOC;
+ goto fail;
+ }
+
+ while (istlen(scanner)) {
+ const char current = istshift(&scanner);
+
+ if (current == '%') {
+ if (istlen(scanner) >= 2) {
+ if (ishex(istptr(scanner)[0]) && ishex(istptr(scanner)[1])) {
+ char hex1, hex2, c;
+
+ hex1 = istshift(&scanner);
+ hex2 = istshift(&scanner);
+ c = (hex2i(hex1) << 4) + hex2i(hex2);
+
+ if (is_unreserved_character(c)) {
+ output = __istappend(output, c);
+ }
+ else {
+ output = __istappend(output, current);
+ output = __istappend(output, hex1);
+ output = __istappend(output, hex2);
+ }
+
+ continue;
+ }
+ }
+
+ if (strict) {
+ err = URI_NORMALIZER_ERR_INVALID_INPUT;
+ goto fail;
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+ else {
+ output = __istappend(output, current);
+ }
+ }
+
+ *dst = output;
+
+ return URI_NORMALIZER_ERR_NONE;
+
+ fail:
+
+ return err;
+}
+
/* Uppercases letters used in percent encoding.
*
* If `strict` is set to 0 then percent characters that are not followed by a