MINOR: sample: Add converters to parse FIX messages
This patch implements a couple of converters to validate and extract tag value
from a FIX (Financial Information eXchange) message. The validation consists in
a few checks such as mandatory fields and checksum computation. The extraction
can get any tag value based on a tag string or tag id.
This patch requires the istend() function. Thus it depends on "MINOR: ist: Add
istend() function to return a pointer to the end of the string".
Reviewed and Fixed by Christopher Faulet <cfaulet@haproxy.com>
diff --git a/Makefile b/Makefile
index 27aac33..11c3ebf 100644
--- a/Makefile
+++ b/Makefile
@@ -843,7 +843,7 @@
src/ebimtree.o src/uri_auth.o src/freq_ctr.o src/ebsttree.o \
src/ebistree.o src/auth.o src/wdt.o src/http_acl.o \
src/hpack-enc.o src/hpack-huff.o src/ebtree.o src/base64.o \
- src/hash.o src/dgram.o src/version.o
+ src/hash.o src/dgram.o src/version.o src/fix.o
ifneq ($(TRACE),)
OBJS += src/calltrace.o
diff --git a/doc/configuration.txt b/doc/configuration.txt
index e85c525..a7dc4b6 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -15133,6 +15133,52 @@
str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3
+fix_is_valid
+ Parses a binary payload and performs sanity checks regarding FIX (Financial
+ Information eXchange):
+
+ - checks that all tag IDs and values are not empty and the tags IDs are well
+ numeric
+ - checks the BeginString tag is the first tag with a valide FIX version
+ - checks the BodyLength tag is the second one with the right body length
+ - checks the MstType tag is the third tag.
+ - checks that last tag in the message is the CheckSum tag with a valid
+ checksum
+
+ Due to current HAProxy design, only the first message sent by the client and
+ the server can be parsed.
+
+ This converter returns a boolean, true if the payload contains a valid FIX
+ message, false if not.
+
+ See also the fix_tag_value converter.
+
+ Example:
+ tcp-request inspect-delay 10s
+ tcp-request content reject unless { req.payload(0,0),fix_is_valid }
+
+fix_tag_value(<tag>)
+ Parses a FIX (Financial Information eXchange) message and extracts the value
+ from the tag <tag>. <tag> can be a string or an integer pointing to the
+ desired tag. Any integer value is accepted, but only the following strings
+ are translated into their integer equivalent: BeginString, BodyLength,
+ MsgType, SenderComID, TargetComID, CheckSum. More tag names can be easily
+ added.
+
+ Due to current HAProxy design, only the first message sent by the client and
+ the server can be parsed. No message validation is performed by this
+ converter. It is highly recommended to validate the message first using
+ fix_is_valid converter.
+
+ See also the fix_is_valid converter.
+
+ Example:
+ tcp-request inspect-delay 10s
+ tcp-request content reject unless { req.payload(0,0),fix_is_valid }
+ # MsgType tag ID is 35, so both lines below will return the same content
+ tcp-request content set-var(txn.foo) req.payload(0,0),fix_tag_value(35)
+ tcp-request content set-var(txn.bar) req.payload(0,0),fix_tag_value(MsgType)
+
hex
Converts a binary input sample to a hex string containing two hex digits per
input byte. It is used to log or transfer hex dumps of some binary input data
diff --git a/include/haproxy/fix-t.h b/include/haproxy/fix-t.h
new file mode 100644
index 0000000..bba9bd6
--- /dev/null
+++ b/include/haproxy/fix-t.h
@@ -0,0 +1,70 @@
+/*
+ * include/haproxy/fix-t.h
+ * This file contains structure declarations for FIX protocol.
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _HAPROXY_FIX_T_H
+#define _HAPROXY_FIX_T_H
+
+#include <import/ist.h>
+
+/*
+ * FIX messages are composed by a list of Tag=Value separated by a 'delimiter'
+ */
+#define FIX_DELIMITER 0x01
+
+/*
+ * know FIX version strings
+ */
+#define FIX_4_0 (ist("FIX.4.0"))
+#define FIX_4_1 (ist("FIX.4.1"))
+#define FIX_4_2 (ist("FIX.4.2"))
+#define FIX_4_3 (ist("FIX.4.3"))
+#define FIX_4_4 (ist("FIX.4.4"))
+#define FIX_5_0 (ist("FIXT.1.1"))
+/* FIX_5_0SP1 and FIX_5_0SP2 have the same version string than FIX5_0 */
+
+/*
+ * Supported FIX tag ID
+ */
+#define FIX_TAG_BeginString 8
+#define FIX_TAG_BodyLength 9
+#define FIX_TAG_CheckSum 10
+#define FIX_TAG_MsgType 35
+#define FIX_TAG_SenderComID 49
+#define FIX_TAG_TargetComID 56
+
+
+#define FIX_MSG_MINSIZE 26 /* Minimal length for a FIX Message */
+#define FIX_CHKSUM_SIZE 7 /* Length of the CheckSum tag (10=NNN<delim>) */
+/*
+ * return code when parsing / validating FIX messages
+ */
+#define FIX_INVALID_MESSAGE -1
+#define FIX_NEED_MORE_DATA 0
+#define FIX_VALID_MESSAGE 1
+
+#endif /* _HAPROXY_FIX_T_H */
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/include/haproxy/fix.h b/include/haproxy/fix.h
new file mode 100644
index 0000000..1d242e7
--- /dev/null
+++ b/include/haproxy/fix.h
@@ -0,0 +1,97 @@
+/*
+ * include/haproxy/fix.h
+ * This file contains functions and macros declarations for FIX protocol decoding.
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef _HAPROXY_FIX_H
+#define _HAPROXY_FIX_H
+
+#include <import/ist.h>
+
+#include <haproxy/fix-t.h>
+#include <haproxy/tools.h>
+
+unsigned int fix_check_id(const struct ist str, const struct ist version);
+int fix_validate_message(const struct ist msg);
+struct ist fix_tag_value(const struct ist msg, unsigned int tagid);
+
+/*
+ * Return the FIX version string (one of FIX_X_Y macros) correspoding to
+ * <str> or IST_NULL if not found.
+ */
+static inline struct ist fix_version(const struct ist str)
+{
+ /* 7 is the minimal size for the FIX version string */
+ if (istlen(str) < 7)
+ return IST_NULL;
+
+ if (isteq(FIX_4_0, str))
+ return FIX_4_0;
+ else if (isteq(FIX_4_1, str))
+ return FIX_4_1;
+ else if (isteq(FIX_4_2, str))
+ return FIX_4_2;
+ else if (isteq(FIX_4_3, str))
+ return FIX_4_3;
+ else if (isteq(FIX_4_4, str))
+ return FIX_4_4;
+ else if (isteq(FIX_5_0, str))
+ return FIX_5_0;
+
+ return IST_NULL;
+}
+
+/*
+ * Return the FIX tag ID corresponding to <tag> if one found or 0 if not.
+ *
+ * full list of tag ID available here, just in case we need to support
+ * more "string" equivalent in the future:
+ * https://www.onixs.biz/fix-dictionary/4.2/fields_by_tag.html
+ */
+static inline unsigned int fix_tagid(const struct ist tag)
+{
+ unsigned id = fix_check_id(tag, IST_NULL);
+
+ if (id)
+ return id;
+
+ else if (isteqi(tag, ist("MsgType")))
+ return FIX_TAG_MsgType;
+ else if (isteqi(tag, ist("CheckSum")))
+ return FIX_TAG_CheckSum;
+ else if (isteqi(tag, ist("BodyLength")))
+ return FIX_TAG_BodyLength;
+ else if (isteqi(tag, ist("TargetComID")))
+ return FIX_TAG_TargetComID;
+ else if (isteqi(tag, ist("BeginString")))
+ return FIX_TAG_BeginString;
+ else if (isteqi(tag, ist("SenderComID")))
+ return FIX_TAG_SenderComID;
+
+ return 0;
+}
+
+#endif /* _HAPROXY_FIX_H */
+
+/*
+ * Local variables:
+ * c-indent-level: 8
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/src/fix.c b/src/fix.c
new file mode 100644
index 0000000..82af1c0
--- /dev/null
+++ b/src/fix.c
@@ -0,0 +1,264 @@
+/*
+ * Financial Information eXchange Protocol
+ *
+ * Copyright 2020 Baptiste Assmann <bedis9@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ */
+
+#include <haproxy/intops.h>
+#include <haproxy/fix.h>
+/*
+ * Return the corresponding numerical tag id if <str> looks like a valid FIX
+ * protocol tag ID. Otherwise, 0 is returned (0 is an invalid id).
+ *
+ * If <version> is given, it must be one of a defined FIX version string (see
+ * FIX_X_Y macros). In this case, the function will also check tag ID ranges. If
+ * no <version> is provided, any strictly positive integer is valid.
+ *
+ * tag ID range depends on FIX protocol version:
+ * - FIX.4.0: 1-140
+ * - FIX.4.1: 1-211
+ * - FIX.4.2: 1-446
+ * - FIX.4.3: 1-659
+ * - FIX.4.4: 1-956
+ * - FIX.5.0: 1-1139
+ * - FIX.5.0SP1: 1-1426
+ * - FIX.5.0SP2: 1-1621
+ * range 10000 to 19999 is for "user defined tags"
+ */
+unsigned int fix_check_id(const struct ist str, const struct ist version) {
+ const char *s, *end;
+ unsigned int ret;
+
+ s = istptr(str);
+ end = istend(str);
+ ret = read_uint(&s, end);
+
+ /* we did not consume all characters from <str>, this is an error */
+ if (s != end)
+ return 0;
+
+ /* field ID can't be 0 */
+ if (ret == 0)
+ return 0;
+
+ /* we can leave now if version was not provided */
+ if (!isttest(version))
+ return ret;
+
+ /* we can leave now if this is a "user defined tag id" */
+ if (ret >= 10000 && ret <= 19999)
+ return ret;
+
+ /* now perform checking per FIX version */
+ if (istissame(FIX_4_0, version) && (ret <= 140))
+ return ret;
+ else if (istissame(FIX_4_1, version) && (ret <= 211))
+ return ret;
+ else if (istissame(FIX_4_2, version) && (ret <= 446))
+ return ret;
+ else if (istissame(FIX_4_3, version) && (ret <= 659))
+ return ret;
+ else if (istissame(FIX_4_4, version) && (ret <= 956))
+ return ret;
+ /* version string is the same for all 5.0 versions, so we can only take
+ * into consideration the biggest range
+ */
+ else if (istissame(FIX_5_0, version) && (ret <= 1621))
+ return ret;
+
+ return 0;
+}
+
+/*
+ * Parse a FIX message <msg> and performs following sanity checks:
+ *
+ * - checks tag ids and values are not empty
+ * - checks tag ids are numerical value
+ * - checks the first tag is BeginString with a valid version
+ * - checks the second tag is BodyLength with the right body length
+ * - checks the third tag is MsgType
+ * - checks the last tag is CheckSum with a valid checksum
+ *
+ * Returns:
+ * FIX_INVALID_MESSAGE if the message is invalid
+ * FIX_NEED_MORE_DATA if we need more data to fully validate the message
+ * FIX_VALID_MESSAGE if the message looks valid
+ */
+int fix_validate_message(const struct ist msg)
+{
+ struct ist parser, version;
+ unsigned int tagnum, bodylen;
+ unsigned char checksum;
+ char *body;
+ int ret = FIX_INVALID_MESSAGE;
+
+ if (istlen(msg) < FIX_MSG_MINSIZE) {
+ ret = FIX_NEED_MORE_DATA;
+ goto end;
+ }
+
+ /* parsing the whole message to compute the checksum and check all tag
+ * ids are properly set. Here we are sure to have the 2 first tags. Thus
+ * the version and the body length can be checked.
+ */
+ parser = msg;
+ version = IST_NULL;
+ checksum = tagnum = bodylen = 0;
+ body = NULL;
+ while (istlen(parser) > 0) {
+ struct ist tag, value;
+ unsigned int tagid;
+ const char *p, *end;
+
+ /* parse the tag ID and its value and perform first sanity checks */
+ value = iststop(istfind(parser, '='), FIX_DELIMITER);
+
+ /* end of value not found */
+ if (istend(value) == istend(parser)) {
+ ret = FIX_NEED_MORE_DATA;
+ goto end;
+ }
+ /* empty tag or empty value are forbbiden */
+ if (istptr(parser) == istptr(value) ||!istlen(value))
+ goto end;
+
+ /* value points on '='. get the tag and skip '=' */
+ tag = ist2(istptr(parser), istptr(value) - istptr(parser));
+ value = istnext(value);
+
+ /* Check the tag id */
+ tagid = fix_check_id(tag, version);
+ if (!tagid)
+ goto end;
+ tagnum++;
+
+ if (tagnum == 1) {
+ /* the first tag must be BeginString */
+ if (tagid != FIX_TAG_BeginString)
+ goto end;
+
+ version = fix_version(value);
+ if (!isttest(version))
+ goto end;
+ }
+ else if (tagnum == 2) {
+ /* the second tag must be bodyLength */
+ if (tagid != FIX_TAG_BodyLength)
+ goto end;
+
+ p = istptr(value);
+ end = istend(value);
+ bodylen = read_uint(&p, end);
+
+ /* we did not consume all characters from <str> or no body, this is an error.
+ * There is at least the message type in the body.
+ */
+ if (p != end || !bodylen)
+ goto end;
+
+ body = istend(value) + 1;
+ }
+ else if (tagnum == 3) {
+ /* the third tag must be MsgType */
+ if (tagid != FIX_TAG_MsgType)
+ goto end;
+ }
+ else if (tagnum > 3 && tagid == FIX_TAG_CheckSum) {
+ /* CheckSum tag should be the last one and is not taken into account
+ * to compute the checksum itself and the body length. The value is
+ * a three-octet representation of the checksum decimal value.
+ */
+ if (bodylen != istptr(parser) - body)
+ goto end;
+
+ if (istlen(value) != 3)
+ goto end;
+ if (checksum != strl2ui(istptr(value), istlen(value)))
+ goto end;
+
+ /* End of the message, exit from the loop */
+ ret = FIX_VALID_MESSAGE;
+ goto end;
+ }
+
+ /* compute checksum of tag=value<delim> */
+ for (p = istptr(tag) ; p < istend(tag) ; ++p)
+ checksum += *p;
+ checksum += '=';
+ for (p = istptr(value) ; p < istend(value) ; ++p)
+ checksum += *p;
+ checksum += FIX_DELIMITER;
+
+ /* move the parser after the value and its delimiter */
+ parser = istadv(parser, istlen(tag) + istlen(value) + 2);
+ }
+
+ if (body) {
+ /* We start to read the body but we don't reached the checksum tag */
+ ret = FIX_NEED_MORE_DATA;
+ }
+
+ end:
+ return ret;
+}
+
+
+/*
+ * Iter on a FIX message <msg> and return the value of <tagid>.
+ *
+ * Returns the corresponding value if <tagid> is found. If <tagid> is not found
+ * because more data are required, the message with a length set to 0 is
+ * returned. If <tagid> is not found in the message or if the message is
+ * invalid, IST_NULL is returned.
+ *
+ * Note: Only simple sanity checks are performed on tags and values (not empty).
+ *
+ * the tag looks like
+ * <tagid>=<value>FIX_DELIMITER with <tag> and <value> not empty
+ */
+struct ist fix_tag_value(const struct ist msg, unsigned int tagid)
+{
+ struct ist parser, t, v;
+ unsigned int id;
+
+ parser = msg;
+ while (istlen(parser) > 0) {
+ v = iststop(istfind(parser, '='), FIX_DELIMITER);
+
+ /* delimiter not found, need more data */
+ if (istend(v) == istend(parser))
+ break;
+
+ /* empty tag or empty value, invalid */
+ if (istptr(parser) == istptr(v) || !istlen(v))
+ goto not_found_or_invalid;
+
+ t = ist2(istptr(parser), istptr(v) - istptr(parser));
+ v = istnext(v);
+
+ id = fix_check_id(t, IST_NULL);
+ if (!id)
+ goto not_found_or_invalid;
+ if (id == tagid) {
+ /* <tagId> found, return the corrsponding value */
+ return v;
+ }
+
+ /* CheckSum tag is the last one, no <tagid> found */
+ if (id == FIX_TAG_CheckSum)
+ goto not_found_or_invalid;
+
+ parser = istadv(parser, istlen(t) + istlen(v) + 2);
+ }
+ /* not enough data to find <tagid> */
+ return ist2(istptr(msg), 0);
+
+ not_found_or_invalid:
+ return IST_NULL;
+}
diff --git a/src/sample.c b/src/sample.c
index a9c08ef..7c59517 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -26,9 +26,11 @@
#include <haproxy/buf.h>
#include <haproxy/chunk.h>
#include <haproxy/errors.h>
+#include <haproxy/fix.h>
#include <haproxy/global.h>
#include <haproxy/hash.h>
#include <haproxy/http.h>
+#include <haproxy/istbuf.h>
#include <haproxy/net_helper.h>
#include <haproxy/protobuf.h>
#include <haproxy/proxy.h>
@@ -3214,6 +3216,86 @@
return 1;
}
+/*
+ * Extract the tag value of an input binary sample. Takes a mandatory argument:
+ * the FIX protocol tag identifier.
+ * Return 1 if the tag was found, 0 if not.
+ */
+static int sample_conv_fix_tag_value(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist value;
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ value = fix_tag_value(ist2(smp->data.u.str.area, smp->data.u.str.data),
+ arg_p[0].data.sint);
+ if (!istlen(value)) {
+ if (!isttest(value)) {
+ /* value != IST_NULL, need more data */
+ smp->flags |= SMP_F_MAY_CHANGE;
+ }
+ return 0;
+ }
+
+ smp->data.u.str = ist2buf(value);
+ smp->flags |= SMP_F_CONST;
+
+ return 1;
+}
+
+/* This function checks the "fix_tag_value" converter configuration.
+ * It expects a "known" (by HAProxy) tag name or ID.
+ * Tag string names are converted to their ID counterpart because this is the
+ * format they are sent over the wire.
+ */
+static int sample_conv_fix_value_check(struct arg *args, struct sample_conv *conv,
+ const char *file, int line, char **err)
+{
+ struct ist str;
+ unsigned int tag;
+
+ str = ist2(args[0].data.str.area, args[0].data.str.data);
+ tag = fix_tagid(str);
+ if (!tag) {
+ memprintf(err, "Unknown FIX tag name '%s'", args[0].data.str.area);
+ return 0;
+ }
+
+ chunk_destroy(&args[0].data.str);
+ args[0].type = ARGT_SINT;
+ args[0].data.sint = tag;
+
+ return 1;
+}
+
+/*
+ * Checks that a buffer contains a valid FIX message
+ *
+ * Return 1 if the check could be run, 0 if not.
+ * The result of the analyse itsef is stored in <smp> as a boolean
+ */
+static int sample_conv_fix_is_valid(const struct arg *arg_p, struct sample *smp, void *private)
+{
+ struct ist msg;
+
+ msg = ist2(smp->data.u.str.area, smp->data.u.str.data);
+
+ smp->flags &= ~SMP_F_MAY_CHANGE;
+ switch (fix_validate_message(msg)) {
+ case FIX_VALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 1;
+ return 1;
+ case FIX_NEED_MORE_DATA:
+ smp->flags |= SMP_F_MAY_CHANGE;
+ return 0;
+ case FIX_INVALID_MESSAGE:
+ smp->data.type = SMP_T_BOOL;
+ smp->data.u.sint = 0;
+ return 1;
+ }
+ return 0;
+}
+
/* This function checks the "strcmp" converter's arguments and extracts the
* variable name and its scope.
*/
@@ -3802,6 +3884,10 @@
{ "ungrpc", sample_conv_ungrpc, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN },
{ "protobuf", sample_conv_protobuf, ARG2(1,PBUF_FNUM,STR), sample_conv_protobuf_check, SMP_T_BIN, SMP_T_BIN },
+ /* FIX converters */
+ { "fix_is_valid", sample_conv_fix_is_valid, 0, NULL, SMP_T_BIN, SMP_T_BOOL },
+ { "fix_tag_value", sample_conv_fix_tag_value, ARG1(1,STR), sample_conv_fix_value_check, SMP_T_BIN, SMP_T_BIN },
+
{ "iif", sample_conv_iif, ARG2(2, STR, STR), NULL, SMP_T_BOOL, SMP_T_STR },
{ "and", sample_conv_binary_and, ARG1(1,STR), check_operator, SMP_T_SINT, SMP_T_SINT },