MINOR: connection: add sample fetches to report per-connection glitches
Now with fc_glitches and bc_glitches we can retrieve the number of
detected glitches on a front or back connection. On the backend it
can indicate a bug in a server that may induce frequent reconnections
hence CPU usage in TLS reconnections, and on the frontend it may
indicate an abusive client that may be trying to attack the stack
or to fingerprint it. Small non-zero values are definitely expected
and can be caused by network glitches for example, as well as rare
bugs in the other component (or maybe even in haproxy). These should
never be considered as alarming as long as they remain low (i.e.
much less than one per request). A reg-test is provided.
(cherry picked from commit 6e5aa1614557a2db4c11c7b640350bdaf8ffad65)
Signed-off-by: Willy Tarreau <w@1wt.eu>
(cherry picked from commit 23edca555567b74e16abdf5eb0f65d094e6c371a)
[wt: adj ctx in doc]
Signed-off-by: Willy Tarreau <w@1wt.eu>
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 0f21aa8..2a0e0d5 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -19883,6 +19883,20 @@
"fc_err_str" fetch for a full list of error codes and their
corresponding error message.
+bc_glitches : integer
+ Returns the number of protocol glitches counted on the backend connection.
+ These generally cover protocol violations as well as small anomalies that
+ generally indicate a bogus or misbehaving server that may cause trouble in
+ the infrastructure (e.g. cause connections to be aborted early, inducing
+ frequent TLS renegotiations). These may also be caused by too large responses
+ that cannot fit into a single buffer, explaining HTTP 502 errors. Ideally
+ this number should remain zero, though it's generally fine if it remains very
+ low compared to the total number of requests. These values should normally
+ not be considered as alarming (especially small ones), though a sudden jump
+ may indicate an anomaly somewhere. Not all protocol multiplexers measure this
+ metric and the only way to get more details about the events is to enable
+ traces to capture all exchanges.
+
bc_http_major : integer
Returns the backend connection's HTTP major version encoding, which may be 1
for HTTP/0.9 to HTTP/1.1 or 2 for HTTP/2. Note, this is based on the on-wire
@@ -20068,6 +20082,23 @@
not TCP or if the operating system does not support TCP_INFO, for example
Linux kernels before 2.4, the sample fetch fails.
+fc_glitches : integer
+ Returns the number of protocol glitches counted on the frontend connection.
+ These generally cover protocol violations as well as small anomalies that
+ generally indicate a bogus or misbehaving client that may cause trouble in
+ the infrastructure, such as excess of errors in the logs, or many connections
+ being aborted early, inducing frequent TLS renegotiations. These may also be
+ caused by too large requests that cannot fit into a single buffer, explaining
+ HTTP 400 errors. Ideally this number should remain zero, though it may be
+ possible that some browsers playing with the protocol boundaries trigger it
+ once in a while. These values should normally not be considered as alarming
+ (especially small ones), though a sudden jump may indicate an anomaly
+ somewhere. Large values (i.e. hundreds to thousands per connection, or as
+ many as the requests) may indicate a purposely built client that is trying to
+ fingerprint or attack the protocol stack. Not all protocol multiplexers
+ measure this metric, and the only way to get more details about the events is
+ to enable traces to capture all exchanges.
+
fc_http_major : integer
Reports the front connection's HTTP major version encoding, which may be 1
for HTTP/0.9 to HTTP/1.1 or 2 for HTTP/2. Note, this is based on the on-wire
diff --git a/reg-tests/connection/h2_glitches.vtc b/reg-tests/connection/h2_glitches.vtc
new file mode 100644
index 0000000..39ec4d6
--- /dev/null
+++ b/reg-tests/connection/h2_glitches.vtc
@@ -0,0 +1,108 @@
+# This test verifies that H2 anomalies counted as glitches are properly detected
+# and fetched.
+
+varnishtest "h2 glitches"
+feature ignore_unknown_macro
+
+# haproxy frontend
+haproxy hap -conf {
+ defaults
+ mode http
+
+ listen fe1
+ bind "fd@${fe1}" proto h2
+ http-request return status 200 hdr x-glitches %[fc_glitches]
+} -start
+
+# valid request: no glitch
+client c1 -connect ${hap_fe1_sock} {
+ txpri
+ stream 0 {
+ txsettings
+ rxsettings
+ txsettings -ack
+ rxsettings
+ expect settings.ack == true
+ } -run
+
+ stream 1 {
+ txreq \
+ -method "GET" \
+ -scheme "http" \
+ -url "/"
+ rxresp
+ expect resp.status == 200
+ expect resp.http.x-glitches == 0
+ } -run
+
+ stream 3 {
+ txreq \
+ -method "GET" \
+ -scheme "http" \
+ -url "/"
+ rxresp
+ expect resp.status == 200
+ expect resp.http.x-glitches == 0
+ } -run
+} -run
+
+# invalid path: => req decoding error => glitch++
+client c2-path -connect ${hap_fe1_sock} {
+ txpri
+ stream 0 {
+ txsettings
+ rxsettings
+ txsettings -ack
+ rxsettings
+ expect settings.ack == true
+ } -run
+
+ stream 1 {
+ txreq \
+ -method "GET" \
+ -scheme "http" \
+ -url "hello-world"
+ rxrst
+ } -run
+
+ stream 3 {
+ txreq \
+ -method "GET" \
+ -scheme "http" \
+ -url "/"
+ rxresp
+ expect resp.status == 200
+ expect resp.http.x-glitches == 1
+ } -run
+} -run
+
+# invalid scheme: blocked at HTX layer, not counted
+client c3-scheme -connect ${hap_fe1_sock} {
+ txpri
+ stream 0 {
+ txsettings
+ rxsettings
+ txsettings -ack
+ rxsettings
+ expect settings.ack == true
+ } -run
+
+ stream 1 {
+ txreq \
+ -method "GET" \
+ -scheme "http://localhost/?" \
+ -url "/"
+ rxresp
+ expect resp.status == 400
+ } -run
+
+ stream 3 {
+ txreq \
+ -method "GET" \
+ -scheme "http" \
+ -url "/"
+ rxresp
+ expect resp.status == 200
+ expect resp.http.x-glitches == 0
+ } -run
+} -run
diff --git a/src/connection.c b/src/connection.c
index f8a0a42..282aac4 100644
--- a/src/connection.c
+++ b/src/connection.c
@@ -2096,6 +2096,40 @@
return buf->data - old_len;
}
+/* return the number of glitches experienced on the mux connection. */
+static int
+smp_fetch_fc_glitches(const struct arg *args, struct sample *smp, const char *kw, void *private)
+{
+ struct connection *conn = NULL;
+ int ret;
+
+ if (obj_type(smp->sess->origin) == OBJ_TYPE_CHECK)
+ conn = (kw[0] == 'b') ? sc_conn(__objt_check(smp->sess->origin)->sc) : NULL;
+ else
+ conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
+ smp->strm ? sc_conn(smp->strm->scb) : NULL;
+
+ /* No connection or a connection with an unsupported mux */
+ if (!conn || (conn->mux && !conn->mux->ctl))
+ return 0;
+
+ /* Mux not installed yet, this may change */
+ if (!conn->mux) {
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ }
+
+ ret = conn->mux->ctl(conn, MUX_CTL_GET_GLITCHES, NULL);
+ if (ret < 0) {
+ /* not supported by the mux */
+ return 0;
+ }
+
+ smp->data.type = SMP_T_SINT;
+ smp->data.u.sint = ret;
+ return 1;
+}
+
/* return the major HTTP version as 1 or 2 depending on how the request arrived
* before being processed.
*
@@ -2274,9 +2308,11 @@
static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
{ "bc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "bc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4SRV },
+ { "bc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
{ "fc_err", smp_fetch_fc_err, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_err_str", smp_fetch_fc_err_str, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
+ { "fc_glitches", smp_fetch_fc_glitches, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
{ "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
{ "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },