MEDIUM: arg: make make_arg_list() support quotes in arguments
Now it becomes possible to reuse the quotes within arguments, allowing
the parser to distinguish a ',' or ')' that is part of the value from
one which delimits the argument. In addition, ',' and ')' may be escaped
using a backslash. However, it is also important to keep in mind that
just like in shell, quotes are first resolved by the word tokenizer, so
in order to pass quotes that are visible to the argument parser, a second
level is needed, either using backslash escaping, or by using an alternate
type.
For example, it's possible to write this to append a comma:
http-request add-header paren-comma-paren "%[str('(--,--)')]"
or this:
http-request add-header paren-comma-paren '%[str("(--,--)")]'
or this:
http-request add-header paren-comma-paren %[str(\'(--,--)\')]
or this:
http-request add-header paren-comma-paren %[str(\"(--,--)\")]
or this:
http-request add-header paren-comma-paren %[str(\"(\"--\',\'--\")\")]
Note that due to the wide use of '\' in front of parenthesis in regex,
the backslash character will purposely *not* escape parenthesis, so that
'\)' placed in quotes is passed verbatim to a regex engine.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index aa4826f..8bf0506 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -13752,14 +13752,16 @@
appended after the variable. It may also be omitted. Together, these elements
allow to concatenate variables with delimiters to an existing set of
variables. This can be used to build new variables made of a succession of
- other variables, such as colon-delimited values. Note that due to the config
- parser, it is not possible to use a comma nor a closing parenthesis as
- delimiters.
+ other variables, such as colon-delimited values. If commas or closing
+ parethesis are needed as delimiters, they must be protected by quotes or
+ backslashes, themselves protected so that they are not stripped by the first
+ level parser. See examples below.
Example:
tcp-request session set-var(sess.src) src
tcp-request session set-var(sess.dn) ssl_c_s_dn
tcp-request session set-var(txn.sig) str(),concat(<ip=,sess.ip,>),concat(<dn=,sess.dn,>)
+ tcp-request session set-var(txn.ipport) "str(),concat('addr=(',sess.ip),concat(',',sess.port,')')"
http-request set-header x-hap-sig %[var(txn.sig)]
cpl
@@ -14139,18 +14141,22 @@
regex case insensitive by adding the flag "i" in <flags>. Since <flags> is a
string, it is made up from the concatenation of all desired flags. Thus if
both "i" and "g" are desired, using "gi" or "ig" will have the same effect.
- It is important to note that due to the current limitations of the
- configuration parser, some characters such as closing parenthesis, closing
- square brackets or comma are not possible to use in the arguments. The first
- use of this converter is to replace certain characters or sequence of
- characters with other ones.
+ The first use of this converter is to replace certain characters or sequence
+ of characters with other ones.
+
+ It is highly recommended to enclose the regex part using protected quotes to
+ improve clarity and never have a closing parenthesis from the regex mixed up
+ with the parenthesis from the function. Just like in Bourne shell, the first
+ level of quotes is processed when delimiting word groups on the line, a
+ second level is usable for argument. It is recommended to use single quotes
+ outside since these ones do not try to resolve backslashes nor dollar signs.
Example :
# de-duplicate "/" in header "x-path".
# input: x-path: /////a///b/c/xzxyz/
# output: x-path: /a/b/c/xzxyz/
- http-request set-header x-path %[hdr(x-path),regsub(/+,/,g)]
+ http-request set-header x-path "%[hdr(x-path),regsub('/+','/','g')]"
capture-req(<id>)
Capture the string entry in the request slot <id> and returns the entry as
diff --git a/src/arg.c b/src/arg.c
index 927aaa4..3843a9b 100644
--- a/src/arg.c
+++ b/src/arg.c
@@ -152,20 +152,68 @@
/* Note: empty arguments after a comma always exist. */
while (pos < nbarg) {
unsigned int uint;
+ int squote = 0, dquote = 0;
+ char *out;
- beg = in;
- while (len && *in != ',' && *in && *in != ')') {
- in++;
- len--;
- }
+ chunk_reset(&trash);
+ out = trash.area;
- /* we have a new argument between <beg> and <in> (not included).
- * For ease of handling, we copy it into a zero-terminated word.
- * By default, the output argument will be the same type of the
- * expected one.
- */
- if (!chunk_strncpy(&trash, beg, in - beg))
- goto buffer_err;
+ while (len && *in && trash.data < trash.size - 1) {
+ if (*in == '"' && !squote) { /* double quote outside single quotes */
+ if (dquote)
+ dquote = 0;
+ else
+ dquote = 1;
+ in++; len--;
+ continue;
+ }
+ else if (*in == '\'' && !dquote) { /* single quote outside double quotes */
+ if (squote)
+ squote = 0;
+ else
+ squote = 1;
+ in++; len--;
+ continue;
+ }
+ else if (*in == '\\' && !squote && len != 1) {
+ /* '\', ', ' ', '"' support being escaped by '\' */
+ if (len == 1 || in[1] == 0)
+ goto unquote_err;
+
+ if (in[1] == '\\' || in[1] == ' ' || in[1] == '"' || in[1] == '\'') {
+ in++; len--;
+ *out++ = *in;
+ }
+ else if (in[1] == 'r') {
+ in++; len--;
+ *out++ = '\r';
+ }
+ else if (in[1] == 'n') {
+ in++; len--;
+ *out++ = '\n';
+ }
+ else if (in[1] == 't') {
+ in++; len--;
+ *out++ = '\t';
+ }
+ else {
+ /* just a lone '\' */
+ *out++ = *in;
+ }
+ in++; len--;
+ }
+ else {
+ if (!squote && !dquote && (*in == ',' || *in == ')')) {
+ /* end of argument */
+ break;
+ }
+ /* verbatim copy */
+ *out++ = *in++;
+ len--;
+ }
+ trash.data = out - trash.area;
+ }
+ trash.area[trash.data] = 0;
arg->type = (mask >> (pos * ARGT_BITS)) & ARGT_MASK;
@@ -362,4 +410,14 @@
memprintf(err_msg, "too small buffer size to store decoded argument %d, increase bufsize ?",
pos + 1);
goto err;
+
+ unquote_err:
+ /* come here with the parsed part in <trash.area>:<trash.data> and the
+ * unparsable part in <in>.
+ */
+ trash.area[trash.data] = 0;
+ memprintf(err_msg, "failed to parse '%s' after '%s' as type '%s' at position %d",
+ in, trash.area, arg_type_names[(mask >> (pos * ARGT_BITS)) & ARGT_MASK], pos + 1);
+ goto err;
+
}