MINOR: sample: Add converters to parse FIX messages

This patch implements a couple of converters to validate and extract tag value
from a FIX (Financial Information eXchange) message. The validation consists in
a few checks such as mandatory fields and checksum computation. The extraction
can get any tag value based on a tag string or tag id.

This patch requires the istend() function. Thus it depends on "MINOR: ist: Add
istend() function to return a pointer to the end of the string".

Reviewed and Fixed by Christopher Faulet <cfaulet@haproxy.com>
diff --git a/Makefile b/Makefile
index 27aac33..11c3ebf 100644
--- a/Makefile
+++ b/Makefile
@@ -843,7 +843,7 @@
         src/ebimtree.o src/uri_auth.o src/freq_ctr.o src/ebsttree.o            \
         src/ebistree.o src/auth.o src/wdt.o src/http_acl.o                     \
         src/hpack-enc.o src/hpack-huff.o src/ebtree.o src/base64.o             \
-        src/hash.o src/dgram.o src/version.o
+        src/hash.o src/dgram.o src/version.o src/fix.o
 
 ifneq ($(TRACE),)
 OBJS += src/calltrace.o
diff --git a/doc/configuration.txt b/doc/configuration.txt
index e85c525..a7dc4b6 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -15133,6 +15133,52 @@
       str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
       str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3
 
+fix_is_valid
+  Parses a binary payload and performs sanity checks regarding FIX (Financial
+  Information eXchange):
+
+  - checks that all tag IDs and values are not empty and the tags IDs are well
+    numeric
+  - checks the BeginString tag is the first tag with a valide FIX version
+  - checks the BodyLength tag is the second one with the right body length
+  - checks the MstType tag is the third tag.
+  - checks that last tag in the message is the CheckSum tag with a valid
+    checksum
+
+  Due to current HAProxy design, only the first message sent by the client and
+  the server can be parsed.
+
+  This converter returns a boolean, true if the payload contains a valid FIX
+  message, false if not.
+
+  See also the fix_tag_value converter.
+
+  Example:
+      tcp-request inspect-delay 10s
+      tcp-request content reject unless { req.payload(0,0),fix_is_valid }
+
+fix_tag_value(<tag>)
+  Parses a FIX (Financial Information eXchange) message and extracts the value
+  from the tag <tag>. <tag> can be a string or an integer pointing to the
+  desired tag. Any integer value is accepted, but only the following strings
+  are translated into their integer equivalent: BeginString, BodyLength,
+  MsgType, SenderComID, TargetComID, CheckSum. More tag names can be easily
+  added.
+
+  Due to current HAProxy design, only the first message sent by the client and
+  the server can be parsed. No message validation is performed by this
+  converter. It is highly recommended to validate the message first using
+  fix_is_valid converter.
+
+  See also the fix_is_valid converter.
+
+  Example:
+      tcp-request inspect-delay 10s
+      tcp-request content reject unless { req.payload(0,0),fix_is_valid }
+      # MsgType tag ID is 35, so both lines below will return the same content
+      tcp-request content set-var(txn.foo) req.payload(0,0),fix_tag_value(35)
+      tcp-request content set-var(txn.bar) req.payload(0,0),fix_tag_value(MsgType)
+
 hex
   Converts a binary input sample to a hex string containing two hex digits per
   input byte. It is used to log or transfer hex dumps of some binary input data
diff --git a/include/haproxy/fix-t.h b/include/haproxy/fix-t.h
new file mode 100644
index 0000000..bba9bd6
--- /dev/null
+++ b/include/haproxy/fix-t.h
@@ -0,0 +1,70 @@
+/*
+ * include/haproxy/fix-t.h
+ * This file contains structure declarations for FIX protocol.
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _HAPROXY_FIX_T_H
+#define _HAPROXY_FIX_T_H
+
+#include <import/ist.h>
+
+/*
+ * FIX messages are composed by a list of Tag=Value separated by a 'delimiter'
+ */
+#define FIX_DELIMITER 0x01
+
+/*
+ * know FIX version strings
+ */
+#define FIX_4_0     (ist("FIX.4.0"))
+#define FIX_4_1     (ist("FIX.4.1"))
+#define FIX_4_2     (ist("FIX.4.2"))
+#define FIX_4_3     (ist("FIX.4.3"))
+#define FIX_4_4     (ist("FIX.4.4"))
+#define FIX_5_0     (ist("FIXT.1.1"))
+/* FIX_5_0SP1 and FIX_5_0SP2 have the same version string than FIX5_0 */
+
+/*
+ * Supported FIX tag ID
+ */
+#define FIX_TAG_BeginString    8
+#define FIX_TAG_BodyLength     9
+#define FIX_TAG_CheckSum       10
+#define FIX_TAG_MsgType        35
+#define FIX_TAG_SenderComID    49
+#define FIX_TAG_TargetComID    56
+
+
+#define FIX_MSG_MINSIZE        26 /* Minimal length for a FIX Message */
+#define FIX_CHKSUM_SIZE        7  /* Length of the CheckSum tag (10=NNN<delim>) */
+/*
+ * return code when parsing / validating FIX messages
+ */
+#define FIX_INVALID_MESSAGE   -1
+#define FIX_NEED_MORE_DATA     0
+#define FIX_VALID_MESSAGE      1
+
+#endif /* _HAPROXY_FIX_T_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/include/haproxy/fix.h b/include/haproxy/fix.h
new file mode 100644
index 0000000..1d242e7
--- /dev/null
+++ b/include/haproxy/fix.h
@@ -0,0 +1,97 @@
+/*
+ * include/haproxy/fix.h
+ * This file contains functions and macros declarations for FIX protocol decoding.
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _HAPROXY_FIX_H
+#define _HAPROXY_FIX_H
+
+#include <import/ist.h>
+
+#include <haproxy/fix-t.h>
+#include <haproxy/tools.h>
+
+unsigned int fix_check_id(const struct ist str, const struct ist version);
+int fix_validate_message(const struct ist msg);
+struct ist fix_tag_value(const struct ist msg, unsigned int tagid);
+
+/*
+ * Return the FIX version string (one of FIX_X_Y macros) correspoding to
+ * <str> or IST_NULL if not found.
+ */
+static inline struct ist fix_version(const struct ist str)
+{
+	/* 7 is the minimal size for the FIX version string */
+	if (istlen(str) < 7)
+		return IST_NULL;
+
+	if (isteq(FIX_4_0, str))
+		return FIX_4_0;
+	else if (isteq(FIX_4_1, str))
+		return FIX_4_1;
+	else if (isteq(FIX_4_2, str))
+		return FIX_4_2;
+	else if (isteq(FIX_4_3, str))
+		return FIX_4_3;
+	else if (isteq(FIX_4_4, str))
+		return FIX_4_4;
+	else if (isteq(FIX_5_0, str))
+		return FIX_5_0;
+
+	return IST_NULL;
+}
+
+/*
+ * Return the FIX tag ID corresponding to <tag> if one found or 0 if not.
+ *
+ * full list of tag ID available here, just in case we need to support
+ * more "string" equivalent in the future:
+ *   https://www.onixs.biz/fix-dictionary/4.2/fields_by_tag.html
+ */
+static inline unsigned int fix_tagid(const struct ist tag)
+{
+	unsigned id = fix_check_id(tag, IST_NULL);
+
+	if (id)
+		return id;
+
+	else if (isteqi(tag, ist("MsgType")))
+		return FIX_TAG_MsgType;
+	else if (isteqi(tag, ist("CheckSum")))
+		return FIX_TAG_CheckSum;
+	else if (isteqi(tag, ist("BodyLength")))
+		return FIX_TAG_BodyLength;
+	else if (isteqi(tag, ist("TargetComID")))
+		return FIX_TAG_TargetComID;
+	else if (isteqi(tag, ist("BeginString")))
+		return FIX_TAG_BeginString;
+	else if (isteqi(tag, ist("SenderComID")))
+		return FIX_TAG_SenderComID;
+
+	return 0;
+}
+
+#endif /* _HAPROXY_FIX_H */
+
+/*
+ * Local variables:
+ *  c-indent-level: 8
+ *  c-basic-offset: 8
+ * End:
+ */
diff --git a/src/fix.c b/src/fix.c
new file mode 100644
index 0000000..82af1c0
--- /dev/null
+++ b/src/fix.c
@@ -0,0 +1,264 @@
+/*
+ * Financial Information eXchange Protocol
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/intops.h>
+#include <haproxy/fix.h>
+/*
+ * Return the corresponding numerical tag id if <str> looks like a valid FIX
+ * protocol tag ID. Otherwise, 0 is returned (0 is an invalid id).
+ *
+ * If <version> is given, it must be one of a defined FIX version string (see
+ * FIX_X_Y macros). In this case, the function will also check tag ID ranges. If
+ * no <version> is provided, any strictly positive integer is valid.
+ *
+ * tag ID range depends on FIX protocol version:
+ *    - FIX.4.0:    1-140
+ *    - FIX.4.1:    1-211
+ *    - FIX.4.2:    1-446
+ *    - FIX.4.3:    1-659
+ *    - FIX.4.4:    1-956
+ *    - FIX.5.0:    1-1139
+ *    - FIX.5.0SP1: 1-1426
+ *    - FIX.5.0SP2: 1-1621
+ * range 10000 to 19999 is for "user defined tags"
+ */
+unsigned int fix_check_id(const struct ist str, const struct ist version) {
+	const char *s, *end;
+	unsigned int ret;
+
+	s = istptr(str);
+	end = istend(str);
+	ret = read_uint(&s, end);
+
+	/* we did not consume all characters from <str>, this is an error */
+	if (s != end)
+		return 0;
+
+	/* field ID can't be 0 */
+	if (ret == 0)
+		return 0;
+
+	/* we can leave now if version was not provided */
+	if (!isttest(version))
+		return ret;
+
+	/* we can leave now if this is a "user defined tag id" */
+	if (ret >= 10000 && ret <= 19999)
+		return ret;
+
+	/* now perform checking per FIX version */
+	if (istissame(FIX_4_0, version) && (ret <= 140))
+		return ret;
+	else if (istissame(FIX_4_1, version) && (ret <= 211))
+		return ret;
+	else if (istissame(FIX_4_2, version) && (ret <= 446))
+		return ret;
+	else if (istissame(FIX_4_3, version) && (ret <= 659))
+		return ret;
+	else if (istissame(FIX_4_4, version) && (ret <= 956))
+		return ret;
+	/* version string is the same for all 5.0 versions, so we can only take
+	 * into consideration the biggest range
+	 */
+	else if (istissame(FIX_5_0, version) && (ret <= 1621))
+		return ret;
+
+	return 0;
+}
+
+/*
+ * Parse a FIX message <msg> and performs following sanity checks:
+ *
+ *   - checks tag ids and values are not empty
+ *   - checks tag ids are numerical value
+ *   - checks the first tag is BeginString with a valid version
+ *   - checks the second tag is BodyLength with the right body length
+ *   - checks the third tag is MsgType
+ *   - checks the last tag is CheckSum with a valid checksum
+ *
+ * Returns:
+ *  FIX_INVALID_MESSAGE if the message is invalid
+ *  FIX_NEED_MORE_DATA  if we need more data to fully validate the message
+ *  FIX_VALID_MESSAGE   if the message looks valid
+ */
+int fix_validate_message(const struct ist msg)
+{
+	struct ist parser, version;
+	unsigned int tagnum, bodylen;
+	unsigned char checksum;
+	char *body;
+	int ret = FIX_INVALID_MESSAGE;
+
+	if (istlen(msg) < FIX_MSG_MINSIZE) {
+		ret = FIX_NEED_MORE_DATA;
+		goto end;
+	}
+
+	/* parsing the whole message to compute the checksum and check all tag
+	 * ids are properly set. Here we are sure to have the 2 first tags. Thus
+	 * the version and the body length can be checked.
+	 */
+	parser = msg;
+	version = IST_NULL;
+	checksum = tagnum = bodylen = 0;
+	body = NULL;
+	while (istlen(parser) > 0) {
+		struct ist tag, value;
+		unsigned int tagid;
+		const char *p, *end;
+
+		/* parse the tag ID and its value and perform first sanity checks */
+		value = iststop(istfind(parser, '='), FIX_DELIMITER);
+
+		/* end of value not found */
+		if (istend(value) == istend(parser)) {
+			ret = FIX_NEED_MORE_DATA;
+			goto end;
+		}
+		/* empty tag or empty value are forbbiden */
+		if (istptr(parser) == istptr(value) ||!istlen(value))
+			goto end;
+
+		/* value points on '='. get the tag and skip '=' */
+		tag = ist2(istptr(parser), istptr(value) - istptr(parser));
+		value = istnext(value);
+
+		/* Check the tag id */
+		tagid = fix_check_id(tag, version);
+		if (!tagid)
+			goto end;
+		tagnum++;
+
+		if (tagnum == 1) {
+			/* the first tag must be BeginString */
+			if (tagid != FIX_TAG_BeginString)
+				goto end;
+
+			version = fix_version(value);
+			if (!isttest(version))
+				goto end;
+		}
+		else if (tagnum == 2) {
+			/* the second tag must be bodyLength */
+			if (tagid != FIX_TAG_BodyLength)
+				goto end;
+
+			p = istptr(value);
+			end = istend(value);
+			bodylen = read_uint(&p, end);
+
+			/* we did not consume all characters from <str> or no body, this is an error.
+			 * There is at least the message type in the body.
+			 */
+			if (p != end || !bodylen)
+				goto end;
+
+			body = istend(value) + 1;
+		}
+		else if (tagnum == 3) {
+			/* the third tag must be MsgType */
+			if (tagid != FIX_TAG_MsgType)
+				goto end;
+		}
+		else if (tagnum > 3 && tagid == FIX_TAG_CheckSum) {
+			/* CheckSum tag should be the last one and is not taken into account
+			 * to compute the checksum itself and the body length. The value is
+			 * a three-octet representation of the checksum decimal value.
+			 */
+			if (bodylen != istptr(parser) - body)
+				goto end;
+
+			if (istlen(value) != 3)
+				goto end;
+			if (checksum != strl2ui(istptr(value), istlen(value)))
+				goto end;
+
+			/* End of the message, exit from the loop */
+			ret = FIX_VALID_MESSAGE;
+			goto end;
+		}
+
+		/* compute checksum of tag=value<delim> */
+		for (p = istptr(tag) ; p < istend(tag) ; ++p)
+			checksum += *p;
+		checksum += '=';
+		for (p = istptr(value) ; p < istend(value) ; ++p)
+			checksum += *p;
+		checksum += FIX_DELIMITER;
+
+		/* move the parser after the value and its delimiter */
+		parser = istadv(parser, istlen(tag) + istlen(value) + 2);
+	}
+
+	if (body) {
+		/* We start to read the body but we don't reached the checksum tag */
+		ret = FIX_NEED_MORE_DATA;
+	}
+
+  end:
+	return ret;
+}
+
+
+/*
+ * Iter on a FIX message <msg> and return the value of <tagid>.
+ *
+ * Returns the corresponding value if <tagid> is found. If <tagid> is not found
+ * because more data are required, the message with a length set to 0 is
+ * returned. If <tagid> is not found in the message or if the message is
+ * invalid, IST_NULL is returned.
+ *
+ * Note: Only simple sanity checks are performed on tags and values (not empty).
+ *
+ * the tag looks like
+ *   <tagid>=<value>FIX_DELIMITER with <tag> and <value> not empty
+ */
+struct ist fix_tag_value(const struct ist msg, unsigned int tagid)
+{
+	struct ist parser, t, v;
+	unsigned int id;
+
+	parser = msg;
+	while (istlen(parser) > 0) {
+		v  = iststop(istfind(parser, '='), FIX_DELIMITER);
+
+		/* delimiter not found, need more data */
+		if (istend(v) == istend(parser))
+			break;
+
+		/* empty tag or empty value, invalid */
+		if (istptr(parser) == istptr(v) || !istlen(v))
+			goto not_found_or_invalid;
+
+		t = ist2(istptr(parser), istptr(v) - istptr(parser));
+		v = istnext(v);
+
+		id = fix_check_id(t, IST_NULL);
+		if (!id)
+			goto not_found_or_invalid;
+		if (id == tagid) {
+			/* <tagId> found, return the corrsponding value */
+			return v;
+		}
+
+		/* CheckSum tag is the last one, no <tagid> found */
+		if (id == FIX_TAG_CheckSum)
+			goto not_found_or_invalid;
+
+		parser = istadv(parser, istlen(t) + istlen(v) + 2);
+	}
+	/* not enough data to find <tagid> */
+	return ist2(istptr(msg), 0);
+
+  not_found_or_invalid:
+	return IST_NULL;
+}
diff --git a/src/sample.c b/src/sample.c
index a9c08ef..7c59517 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -26,9 +26,11 @@
 #include <haproxy/buf.h>
 #include <haproxy/chunk.h>
 #include <haproxy/errors.h>
+#include <haproxy/fix.h>
 #include <haproxy/global.h>
 #include <haproxy/hash.h>
 #include <haproxy/http.h>
+#include <haproxy/istbuf.h>
 #include <haproxy/net_helper.h>
 #include <haproxy/protobuf.h>
 #include <haproxy/proxy.h>
@@ -3214,6 +3216,86 @@
 	return 1;
 }
 
+/*
+ * Extract the tag value of an input binary sample. Takes a mandatory argument:
+ * the FIX protocol tag identifier.
+ * Return 1 if the tag was found, 0 if not.
+ */
+static int sample_conv_fix_tag_value(const struct arg *arg_p, struct sample *smp, void *private)
+{
+	struct ist value;
+
+	smp->flags &= ~SMP_F_MAY_CHANGE;
+	value = fix_tag_value(ist2(smp->data.u.str.area, smp->data.u.str.data),
+			      arg_p[0].data.sint);
+	if (!istlen(value)) {
+		if (!isttest(value)) {
+			/* value != IST_NULL, need more data */
+			smp->flags |= SMP_F_MAY_CHANGE;
+		}
+		return 0;
+	}
+
+	smp->data.u.str = ist2buf(value);
+	smp->flags |= SMP_F_CONST;
+
+	return 1;
+}
+
+/* This function checks the "fix_tag_value" converter configuration.
+ * It expects a "known" (by HAProxy) tag name or ID.
+ * Tag string names are converted to their ID counterpart because this is the
+ * format they are sent over the wire.
+ */
+static int sample_conv_fix_value_check(struct arg *args, struct sample_conv *conv,
+				       const char *file, int line, char **err)
+{
+	struct ist str;
+	unsigned int tag;
+
+	str = ist2(args[0].data.str.area, args[0].data.str.data);
+	tag = fix_tagid(str);
+	if (!tag) {
+		memprintf(err, "Unknown FIX tag name '%s'", args[0].data.str.area);
+		return 0;
+	}
+
+	chunk_destroy(&args[0].data.str);
+	args[0].type = ARGT_SINT;
+	args[0].data.sint = tag;
+
+	return 1;
+}
+
+/*
+ * Checks that a buffer contains a valid FIX message
+ *
+ * Return 1 if the check could be run, 0 if not.
+ * The result of the analyse itsef is stored in <smp> as a boolean
+ */
+static int sample_conv_fix_is_valid(const struct arg *arg_p, struct sample *smp, void *private)
+{
+	struct ist msg;
+
+	msg = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+	smp->flags &= ~SMP_F_MAY_CHANGE;
+	switch (fix_validate_message(msg)) {
+	case FIX_VALID_MESSAGE:
+		smp->data.type = SMP_T_BOOL;
+		smp->data.u.sint = 1;
+		return 1;
+	case FIX_NEED_MORE_DATA:
+		smp->flags |= SMP_F_MAY_CHANGE;
+		return 0;
+	case FIX_INVALID_MESSAGE:
+		smp->data.type = SMP_T_BOOL;
+		smp->data.u.sint = 0;
+		return 1;
+	}
+	return 0;
+}
+
 /* This function checks the "strcmp" converter's arguments and extracts the
  * variable name and its scope.
  */
@@ -3802,6 +3884,10 @@
 	{ "ungrpc", sample_conv_ungrpc,    ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN  },
 	{ "protobuf", sample_conv_protobuf, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN  },
 
+	/* FIX converters */
+	{ "fix_is_valid",  sample_conv_fix_is_valid,  0,           NULL,                        SMP_T_BIN, SMP_T_BOOL  },
+	{ "fix_tag_value", sample_conv_fix_tag_value, ARG1(1,STR), sample_conv_fix_value_check, SMP_T_BIN, SMP_T_BIN  },
+
 	{ "iif", sample_conv_iif, ARG2(2, STR, STR), NULL, SMP_T_BOOL, SMP_T_STR },
 
 	{ "and",    sample_conv_binary_and, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT  },