BUG/MEDIUM: h1: Improve authority validation for CONNCET request

From time to time, users complain to get 400-Bad-request responses for
totally valid CONNECT requests. After analysis, it is due to the H1 parser
performs an exact match between the authority and the host header value. For
non-CONNECT requests, it is valid. But for CONNECT requests the authority
must contain a port while it is often omitted from the host header value
(for default ports).

So, to be sure to not reject valid CONNECT requests, a basic authority
validation is now performed during the message parsing. In addition, the
host header value is normalized. It means the default port is removed if
possible.

This patch should solve the issue #1761. It must be backported to 2.6 and
probably as far as 2.4.

(cherry picked from commit 3f5fbe940733bba84b5ee875af5b13aa3144aa41)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit 766f9a5352e2aaac371828e2981deadb175f50ff)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit 59e1f2ad2184e2d7b5614c23213ceb20c7f0e4cd)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
diff --git a/reg-tests/http-messaging/h1_host_normalization.vtc b/reg-tests/http-messaging/h1_host_normalization.vtc
new file mode 100644
index 0000000..6ea32d2
--- /dev/null
+++ b/reg-tests/http-messaging/h1_host_normalization.vtc
@@ -0,0 +1,276 @@
+varnishtest "H1 authority validation and host normalizarion based on the scheme (rfc3982 6.3.2) or the method (connect)"
+
+#REQUIRE_VERSION=2.4
+feature ignore_unknown_macro
+
+syslog S1 -level info {
+    # C1
+    recv
+    expect ~ "^.* uri: GET http://toto:poue@hostname/c1 HTTP/1.1; host: {hostname}$"
+
+    # C2
+    recv
+    expect ~ "^.* uri: GET http://hostname:8080/c2 HTTP/1.1; host: {hostname:8080}$"
+
+    # C3
+    recv
+    expect ~ "^.* uri: GET https://hostname/c3 HTTP/1.1; host: {hostname}$"
+
+    # C4
+    recv
+    expect ~ "^.* uri: GET https://hostname:80/c4 HTTP/1.1; host: {hostname:80}$"
+
+    # C5
+    recv
+    expect ~ "^.* uri: CONNECT hostname:80 HTTP/1.1; host: {hostname}$"
+    recv
+    expect ~ "^.* uri: CONNECT hostname:80 HTTP/1.1; host: {hostname}$"
+
+    # C6
+    recv
+    expect ~ "^.* uri: CONNECT hostname:443 HTTP/1.1; host: {hostname}$"
+    recv
+    expect ~ "^.* uri: CONNECT hostname:443 HTTP/1.1; host: {hostname}$"
+
+    recv
+    expect ~ "^.* uri: CONNECT hostname:8443 HTTP/1.1; host: {hostname:8443}$"
+} -start
+
+haproxy h1 -conf {
+    defaults
+        mode http
+        timeout connect 1s
+        timeout client  1s
+        timeout server  1s
+
+    frontend fe
+        bind "fd@${fe}"
+
+        http-request capture req.hdr(host) len 512
+        log-format "uri: %r; host: %hr"
+        log ${S1_addr}:${S1_port} len 2048 local0 debug err
+
+        http-request return status 200
+} -start
+
+# default port 80 with http scheme => should be normalized
+# Be sure userinfo are skipped
+client c1 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://toto:poue@hostname:80/c1" \
+      -hdr "host: hostname:80"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# port 8080 with http scheme => no normalization
+client c2 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname:8080/c2" \
+      -hdr "host: hostname:8080"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# default port 443 with https scheme => should be normalized
+client c3 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "https://hostname:443/c3" \
+      -hdr "host: hostname:443"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# port 80 with https scheme => no normalization
+client c4 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "https://hostname:80/c4" \
+      -hdr "host: hostname:80"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# CONNECT on port 80 => should be normalized
+client c5 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:80" \
+      -hdr "host: hostname:80"
+
+    rxresp
+    expect resp.status == 200
+} -run
+client c5 -connect ${h1_fe_sock} {
+
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:80" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# CONNECT on port 443 => should be normalized
+client c6 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:443" \
+      -hdr "host: hostname:443"
+
+    rxresp
+    expect resp.status == 200
+} -run
+client c6 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:443" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# CONNECT on port non-default port => no normalization
+client c7 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:8443" \
+      -hdr "host: hostname:8443"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# host miss-match => error
+client c8 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname1/" \
+      -hdr "host: hostname2"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# port miss-match => error
+client c9 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname:80/" \
+      -hdr "host: hostname:81"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# no host port with a non-default port in abs-uri  => error
+client c10 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname:8080/" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# non-default host port with a default in abs-uri  => error
+client c11 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname/" \
+      -hdr "host: hostname:81"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# miss-match between host headers => error
+client c12 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname1/" \
+      -hdr "host: hostname1" \
+      -hdr "host: hostname2"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# miss-match between host headers but with a normalization => error
+client c13 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname1/" \
+      -hdr "host: hostname1:80" \
+      -hdr "host: hostname1"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# CONNECT authoriy without port => error
+client c14 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# host miss-match with CONNECT => error
+client c15 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname1:80" \
+      -hdr "host: hostname2:80"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# port miss-match with CONNECT => error
+client c16 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:80" \
+      -hdr "host: hostname:443"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# no host port with non-default port in CONNECT authority => error
+client c17 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:8080" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# no authority => error
+client c18 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "/" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+syslog S1 -wait
diff --git a/src/h1.c b/src/h1.c
index 3a6c1c3..1e5223b 100644
--- a/src/h1.c
+++ b/src/h1.c
@@ -130,6 +130,50 @@
 	}
 }
 
+/* Validate the authority and the host header value for CONNECT method. If there
+ * is hast header, its value is normalized. 0 is returned on success, -1 if the
+ * authority is invalid and -2 if the host is invalid.
+ */
+static int h1_validate_connect_authority(struct ist authority, struct ist *host_hdr)
+{
+	struct ist uri_host, uri_port, host, host_port;
+
+	if (!isttest(authority))
+		goto invalid_authority;
+	uri_host = authority;
+	uri_port = http_get_host_port(authority);
+	if (!isttest(uri_port))
+		goto invalid_authority;
+	uri_host.len -= (istlen(uri_port) + 1);
+
+	if (!host_hdr || !isttest(*host_hdr))
+		goto end;
+
+	/* Get the port of the host header value, if any */
+	host = *host_hdr;
+	host_port = http_get_host_port(*host_hdr);
+	if (isttest(host_port)) {
+		host.len -= (istlen(host_port) + 1);
+		if (!isteqi(host, uri_host) || !isteq(host_port, uri_port))
+			goto invalid_host;
+		if (http_is_default_port(IST_NULL, uri_port))
+			*host_hdr = host; /* normalize */
+	}
+	else {
+		if (!http_is_default_port(IST_NULL, uri_port) || !isteqi(host, uri_host))
+			goto invalid_host;
+	}
+
+  end:
+	return 0;
+
+  invalid_authority:
+	return -1;
+
+  invalid_host:
+	return -2;
+}
+
 /* Parse the Connection: header of an HTTP/1 request, looking for "close",
  * "keep-alive", and "upgrade" values, and updating h1m->flags according to
  * what was found there. Note that flags are only added, not removed, so the
@@ -868,22 +912,9 @@
 				else if (isteqi(n, ist("upgrade"))) {
 					h1_parse_upgrade_header(h1m, v);
 				}
-				else if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP)) && isteqi(n, ist("host"))) {
-					if (host_idx == -1) {
-						struct ist authority;
-
-						authority = http_get_authority(sl.rq.u, 1);
-						if (authority.len && !isteqi(v, authority)) {
-							if (h1m->err_pos < -1) {
-								state = H1_MSG_HDR_L2_LWS;
-								ptr = v.ptr; /* Set ptr on the error */
-								goto http_msg_invalid;
-							}
-							if (h1m->err_pos == -1) /* capture the error pointer */
-								h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
-						}
+				else if (!(h1m->flags & H1_MF_RESP) && isteqi(n, ist("host"))) {
+					if (host_idx == -1)
 						host_idx = hdr_count;
-					}
 					else {
 						if (!isteqi(v, hdr[host_idx].v)) {
 							state = H1_MSG_HDR_L2_LWS;
@@ -934,6 +965,43 @@
 		if (restarting)
 			goto restart;
 
+
+		if (!(h1m->flags & (H1_MF_HDRS_ONLY|H1_MF_RESP))) {
+			struct ist authority;
+
+			authority = http_get_authority(sl.rq.u, 1);
+			if (sl.rq.meth == HTTP_METH_CONNECT) {
+				struct ist *host = ((host_idx != -1) ? &hdr[host_idx].v : NULL);
+				int ret;
+
+				ret = h1_validate_connect_authority(authority, host);
+				if (ret < 0) {
+					if (h1m->err_pos < -1) {
+						state = H1_MSG_LAST_LF;
+						ptr = ((ret == -1) ? sl.rq.u.ptr : host->ptr); /* Set ptr on the error */
+						goto http_msg_invalid;
+					}
+					if (h1m->err_pos == -1) /* capture the error pointer */
+						h1m->err_pos = ((ret == -1) ? sl.rq.u.ptr : host->ptr) - start + skip; /* >= 0 now */
+				}
+			}
+			else if (host_idx != -1 && istlen(authority)) {
+				struct ist host = hdr[host_idx].v;
+
+				/* For non-CONNECT method, the authority must match the host header value */
+				if (!isteqi(authority, host)) {
+					if (h1m->err_pos < -1) {
+						state = H1_MSG_LAST_LF;
+						ptr = host.ptr; /* Set ptr on the error */
+						goto http_msg_invalid;
+					}
+					if (h1m->err_pos == -1) /* capture the error pointer */
+						h1m->err_pos = v.ptr - start + skip; /* >= 0 now */
+				}
+
+			}
+		}
+
 		state = H1_MSG_DATA;
 		if (h1m->flags & H1_MF_XFER_ENC) {
 			if (h1m->flags & H1_MF_CLEN) {