MEDIUM: checks: capture groups in expect regexes

Parse back-references in comments of tcp-check expect rules.  If references are
made, capture groups in the match and replace references to it within the
comment when logging the error. Both text and binary regex can caputre groups
and reference them in the expect rule comment.

[Cf: I slightly updated the patch. exp_replace() function is used instead of a
custom one. And if the trash buffer is too small to contain the comment during
the substitution, the comment is ignored.]
diff --git a/include/types/checks.h b/include/types/checks.h
index d798ae2..fdf4874 100644
--- a/include/types/checks.h
+++ b/include/types/checks.h
@@ -228,6 +228,7 @@
 	struct tcpcheck_rule *head;     /* first expect of a chain. */
 	int length;                     /* Size in bytes of the pattern referenced by string / binary. */
 	int inverse;                    /* Match is inversed. */
+	int with_capture;               /* Match will store captured groups for back-reference in comment. */
 	int min_recv;                   /* Minimum amount of data before an expect can be applied. (default: -1, ignored) */
 };
 
diff --git a/reg-tests/checks/tcp-check_comment-with-capture.vtc b/reg-tests/checks/tcp-check_comment-with-capture.vtc
new file mode 100644
index 0000000..31f8820
--- /dev/null
+++ b/reg-tests/checks/tcp-check_comment-with-capture.vtc
@@ -0,0 +1,60 @@
+varnishtest "tcp-check expect rule with capture groups"
+#EXCLUDE_TARGETS=freebsd,osx,generic
+#REQUIRE_VERSION=2.2
+#REGTEST_TYPE=slow
+# This script tests expect rules matching a regex with capture groups and
+# defining a comment with backreferences. Text and binary regex are tested.
+feature ignore_unknown_macro
+
+syslog S1 -level notice {
+    recv
+    expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Proxy be1 started."
+    recv
+    expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be1/srv1 failed.*code=400 reason=Bad Request"
+} -start
+
+syslog S2 -level notice {
+    recv
+    expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Proxy be2 started."
+    recv
+    expect ~ "[^:\\[ ]\\[${h1_pid}\\]: Health check for server be2/srv1 failed.*code=400 reason=Bad Request"
+} -start
+
+server s1 {
+    rxreq
+    txresp -status 400 -reason "Bad Request"
+} -start
+
+server s2 {
+    rxreq
+    txresp -status 400 -reason "Bad Request"
+} -start
+
+haproxy h1 -conf {
+  defaults
+    timeout client 5s
+    timeout connect 5s
+    timeout server 5s
+    timeout check 5s
+
+  backend be1
+    log ${S1_addr}:${S1_port} len 2048 local0
+    option tcp-check
+    option log-health-checks
+    tcp-check connect
+    tcp-check send "GET / HTTP/1.1\r\n\r\n"
+    tcp-check expect !rstring "HTTP/1\\.1\s+([45][0-9]{2})\s+([^\r\n]*)" comment " Bad response: code=\\1 reason=\\2"
+    server srv1 ${s1_addr}:${s1_port} check inter 1000ms rise 1 fall 1
+
+  backend be2
+    log ${S2_addr}:${S2_port} len 2048 local0
+    option tcp-check
+    option log-health-checks
+    tcp-check connect
+    tcp-check send-binary "474554202f20485454502f312e31200d0a0d0a" # GET / HTTP/1.1\r\n\r\n
+    tcp-check expect !rbinary "485454502F312E3120(34[0-9]{4}|35[0-9]{4})20(([^0].)*)" comment " Bad response: code=\\1 reason=\\2"
+    server srv1 ${s2_addr}:${s2_port} check inter 1000ms rise 1 fall 1
+} -start
+
+syslog S1 -wait
+syslog S2 -wait
diff --git a/src/cfgparse-listen.c b/src/cfgparse-listen.c
index 13ce19c..06ed3e4 100644
--- a/src/cfgparse-listen.c
+++ b/src/cfgparse-listen.c
@@ -162,6 +162,55 @@
 		warnif_misplaced_tcp_sess(proxy, file, line, arg);
 }
 
+/* Parse a comment string for an expect check rule to find a potential
+ * regex backreference. If so, check that it is valid.
+ * returns:
+ *   0 if none found.
+ *   1 if at least one found and all are valid.
+ *  -1 if at least one found and at least one is invalid.
+ */
+static int find_and_check_backreferences(const char *str, char **err)
+{
+	static char *errors[] = {
+		"invalid backreference value",
+		"backreference is not within range [1, 9]",
+	};
+	char *backslash;
+	unsigned long int ref;
+	int found = 0;
+
+	while ((backslash = strchr(str, '\\'))) {
+		char *next, *end;
+
+		next = backslash + 1;
+		if (!isdigit(*next)) {
+			str = next;
+			continue;
+		}
+
+		errno = 0;
+		ref = strtoul(next, &end, 10);
+		if (errno == EINVAL) {
+			*err = errors[0];
+			return -1;
+		}
+		else if (errno == ERANGE) {
+			*err = errors[1];
+			return -1;
+		}
+
+		if (ref == 0 || ref > 9) {
+			*err = errors[1];
+			return -1;
+		}
+
+		found = 1;
+		str = end;
+	}
+
+	return found;
+}
+
 int cfg_parse_listen(const char *file, int linenum, char **args, int kwm)
 {
 	static struct proxy *curproxy = NULL;
@@ -3318,6 +3367,22 @@
 					goto out;
 				}
 				tcpcheck->comment = strdup(args[cur_arg + 1]);
+				rc = find_and_check_backreferences(tcpcheck->comment, &error);
+				if (rc > 0) {
+					if (!inverse) {
+						ha_warning("parsing [%s:%d] : "
+						           "using backreference in a positive expect comment is useless.\n",
+						           file, linenum);
+						err_code |= ERR_WARN;
+					}
+					expect->with_capture = 1;
+				}
+				else if (rc < 0) {
+					ha_alert("parsing [%s:%d] : %s.\n",
+						 file, linenum, error);
+					err_code |= ERR_ALERT | ERR_FATAL;
+					goto out;
+				}
 			}
 
 			/* All tcp-check expect points back to the first inverse expect rule
diff --git a/src/checks.c b/src/checks.c
index a3e72ee..b2fb34d 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -3187,13 +3187,21 @@
 				match = my_memmem(b_head(&check->bi), b_data(&check->bi), expect->string, expect->length) != NULL;
 				break;
 			case TCPCHK_EXPECT_REGEX:
-				match = regex_exec2(expect->regex, b_head(&check->bi), MIN(b_data(&check->bi), b_size(&check->bi)-1));
+				if (expect->with_capture)
+					match = regex_exec_match2(expect->regex, b_head(&check->bi), MIN(b_data(&check->bi), b_size(&check->bi)-1),
+								  MAX_MATCH, pmatch, 0);
+				else
+					match = regex_exec2(expect->regex, b_head(&check->bi), MIN(b_data(&check->bi), b_size(&check->bi)-1));
 				break;
 
 			case TCPCHK_EXPECT_REGEX_BINARY:
 				chunk_reset(&trash);
 				dump_binary(&trash, b_head(&check->bi), b_data(&check->bi));
-				match = regex_exec2(expect->regex, b_head(&trash), MIN(b_data(&trash), b_size(&trash)-1));
+				if (expect->with_capture)
+					match = regex_exec_match2(expect->regex, b_head(&trash), MIN(b_data(&trash), b_size(&trash)-1),
+								  MAX_MATCH, pmatch, 0);
+				else
+					match = regex_exec2(expect->regex, b_head(&trash), MIN(b_data(&trash), b_size(&trash)-1));
 				break;
 			case TCPCHK_EXPECT_UNDEF:
 				/* Should never happen. */
@@ -3248,6 +3256,19 @@
 			case TCPCHK_EXPECT_REGEX_BINARY:
 				chunk_printf(&trash, "TCPCHK %s (binary regex) at step %d",
 					     diag, step);
+
+				/* If references to the matched text were made,
+				 * divide the offsets by 2 to match offset of
+				 * the original response buffer.
+				 */
+				if (expect->with_capture) {
+					int i;
+
+					for (i = 1; i < MAX_MATCH && pmatch[i].rm_so != -1; i++) {
+						pmatch[i].rm_so /= 2; /* at first matched char. */
+						pmatch[i].rm_eo /= 2; /* at last matched char. */
+					}
+				}
 				break;
 			case TCPCHK_EXPECT_UNDEF:
 				/* Should never happen. */
@@ -3256,11 +3277,16 @@
 			}
 
 			comment = tcpcheck_get_step_comment(check);
-			if (comment)
-				chunk_appendf(&trash, " comment: '%s'", comment);
-			set_server_check_status(check, HCHK_STATUS_L7RSP,
-						trash.area);
-
+			if (comment) {
+				if (expect->with_capture) {
+					ret = exp_replace(b_tail(&trash), b_room(&trash), b_head(&check->bi), comment, pmatch);
+					if (ret > 0) /* ignore comment if too large */
+						trash.data += ret;
+				}
+				else
+					chunk_appendf(&trash, " comment: '%s'", comment);
+			}
+			set_server_check_status(check, HCHK_STATUS_L7RSP, trash.area);
 			goto out_end_tcpcheck;
 		} /* end expect */
 	} /* end loop over double chained step list */