BUG/MEDIUM: h1: Improve authority validation for CONNCET request

From time to time, users complain to get 400-Bad-request responses for
totally valid CONNECT requests. After analysis, it is due to the H1 parser
performs an exact match between the authority and the host header value. For
non-CONNECT requests, it is valid. But for CONNECT requests the authority
must contain a port while it is often omitted from the host header value
(for default ports).

So, to be sure to not reject valid CONNECT requests, a basic authority
validation is now performed during the message parsing. In addition, the
host header value is normalized. It means the default port is removed if
possible.

This patch should solve the issue #1761. It must be backported to 2.6 and
probably as far as 2.4.

(cherry picked from commit 3f5fbe940733bba84b5ee875af5b13aa3144aa41)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit 766f9a5352e2aaac371828e2981deadb175f50ff)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit 59e1f2ad2184e2d7b5614c23213ceb20c7f0e4cd)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
diff --git a/reg-tests/http-messaging/h1_host_normalization.vtc b/reg-tests/http-messaging/h1_host_normalization.vtc
new file mode 100644
index 0000000..6ea32d2
--- /dev/null
+++ b/reg-tests/http-messaging/h1_host_normalization.vtc
@@ -0,0 +1,276 @@
+varnishtest "H1 authority validation and host normalizarion based on the scheme (rfc3982 6.3.2) or the method (connect)"
+
+#REQUIRE_VERSION=2.4
+feature ignore_unknown_macro
+
+syslog S1 -level info {
+    # C1
+    recv
+    expect ~ "^.* uri: GET http://toto:poue@hostname/c1 HTTP/1.1; host: {hostname}$"
+
+    # C2
+    recv
+    expect ~ "^.* uri: GET http://hostname:8080/c2 HTTP/1.1; host: {hostname:8080}$"
+
+    # C3
+    recv
+    expect ~ "^.* uri: GET https://hostname/c3 HTTP/1.1; host: {hostname}$"
+
+    # C4
+    recv
+    expect ~ "^.* uri: GET https://hostname:80/c4 HTTP/1.1; host: {hostname:80}$"
+
+    # C5
+    recv
+    expect ~ "^.* uri: CONNECT hostname:80 HTTP/1.1; host: {hostname}$"
+    recv
+    expect ~ "^.* uri: CONNECT hostname:80 HTTP/1.1; host: {hostname}$"
+
+    # C6
+    recv
+    expect ~ "^.* uri: CONNECT hostname:443 HTTP/1.1; host: {hostname}$"
+    recv
+    expect ~ "^.* uri: CONNECT hostname:443 HTTP/1.1; host: {hostname}$"
+
+    recv
+    expect ~ "^.* uri: CONNECT hostname:8443 HTTP/1.1; host: {hostname:8443}$"
+} -start
+
+haproxy h1 -conf {
+    defaults
+        mode http
+        timeout connect 1s
+        timeout client  1s
+        timeout server  1s
+
+    frontend fe
+        bind "fd@${fe}"
+
+        http-request capture req.hdr(host) len 512
+        log-format "uri: %r; host: %hr"
+        log ${S1_addr}:${S1_port} len 2048 local0 debug err
+
+        http-request return status 200
+} -start
+
+# default port 80 with http scheme => should be normalized
+# Be sure userinfo are skipped
+client c1 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://toto:poue@hostname:80/c1" \
+      -hdr "host: hostname:80"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# port 8080 with http scheme => no normalization
+client c2 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname:8080/c2" \
+      -hdr "host: hostname:8080"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# default port 443 with https scheme => should be normalized
+client c3 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "https://hostname:443/c3" \
+      -hdr "host: hostname:443"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# port 80 with https scheme => no normalization
+client c4 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "https://hostname:80/c4" \
+      -hdr "host: hostname:80"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# CONNECT on port 80 => should be normalized
+client c5 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:80" \
+      -hdr "host: hostname:80"
+
+    rxresp
+    expect resp.status == 200
+} -run
+client c5 -connect ${h1_fe_sock} {
+
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:80" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# CONNECT on port 443 => should be normalized
+client c6 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:443" \
+      -hdr "host: hostname:443"
+
+    rxresp
+    expect resp.status == 200
+} -run
+client c6 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:443" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# CONNECT on port non-default port => no normalization
+client c7 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:8443" \
+      -hdr "host: hostname:8443"
+
+    rxresp
+    expect resp.status == 200
+} -run
+
+# host miss-match => error
+client c8 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname1/" \
+      -hdr "host: hostname2"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# port miss-match => error
+client c9 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname:80/" \
+      -hdr "host: hostname:81"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# no host port with a non-default port in abs-uri  => error
+client c10 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname:8080/" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# non-default host port with a default in abs-uri  => error
+client c11 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname/" \
+      -hdr "host: hostname:81"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# miss-match between host headers => error
+client c12 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname1/" \
+      -hdr "host: hostname1" \
+      -hdr "host: hostname2"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# miss-match between host headers but with a normalization => error
+client c13 -connect ${h1_fe_sock} {
+    txreq \
+      -req "GET" \
+      -url "http://hostname1/" \
+      -hdr "host: hostname1:80" \
+      -hdr "host: hostname1"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# CONNECT authoriy without port => error
+client c14 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# host miss-match with CONNECT => error
+client c15 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname1:80" \
+      -hdr "host: hostname2:80"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# port miss-match with CONNECT => error
+client c16 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:80" \
+      -hdr "host: hostname:443"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# no host port with non-default port in CONNECT authority => error
+client c17 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "hostname:8080" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+# no authority => error
+client c18 -connect ${h1_fe_sock} {
+    txreq \
+      -req "CONNECT" \
+      -url "/" \
+      -hdr "host: hostname"
+
+    rxresp
+    expect resp.status == 400
+} -run
+
+syslog S1 -wait