MEDIUM: sample: Extend functionality for field/word converters
Extend functionality of field/word converters, so it's possible
to extract field(s)/word(s) counting from the beginning/end and/or
extract multiple fields/words (including separators) eg.
str(f1_f2_f3__f5),field(2,_,2) # f2_f3
str(f1_f2_f3__f5),field(2,_,0) # f2_f3__f5
str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3
str(w1_w2_w3___w4),word(3,_,2) # w3___w4
str(w1_w2_w3___w4),word(2,_,0) # w2_w3___w4
str(w1_w2_w3___w4),word(-2,_,3) # w1_w2_w3
str(w1_w2_w3___w4),word(-3,_,0) # w1_w2
Change is backward compatible.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 22cd736..c687722 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -12907,10 +12907,20 @@
Returns a boolean TRUE if the input value of type signed integer is even
otherwise returns FALSE. It is functionally equivalent to "not,and(1),bool".
-field(<index>,<delimiters>)
- Extracts the substring at the given index considering given delimiters from
- an input string. Indexes start at 1 and delimiters are a string formatted
- list of chars.
+field(<index>,<delimiters>[,<count>])
+ Extracts the substring at the given index counting from the beginning
+ (positive index) or from the end (negative index) considering given delimiters
+ from an input string. Indexes start at 1 or -1 and delimiters are a string
+ formatted list of chars. Optionally you can specify <count> of fields to
+ extract (default: 1). Value of 0 indicates extraction of all remaining
+ fields.
+
+ Example :
+ str(f1_f2_f3__f5),field(5,_) # f5
+ str(f1_f2_f3__f5),field(2,_,0) # f2_f3__f5
+ str(f1_f2_f3__f5),field(2,_,2) # f2_f3
+ str(f1_f2_f3__f5),field(-2,_,3) # f2_f3_
+ str(f1_f2_f3__f5),field(-3,_,0) # f1_f2_f3
hex
Converts a binary input sample to a hex string containing two hex digits per
@@ -13440,9 +13450,19 @@
# e.g. 20140710162350 127.0.0.1:57325
log-format %[date,utime(%Y%m%d%H%M%S)]\ %ci:%cp
-word(<index>,<delimiters>)
- Extracts the nth word considering given delimiters from an input string.
- Indexes start at 1 and delimiters are a string formatted list of chars.
+word(<index>,<delimiters>[,<count>])
+ Extracts the nth word counting from the beginning (positive index) or from
+ the end (negative index) considering given delimiters from an input string.
+ Indexes start at 1 or -1 and delimiters are a string formatted list of chars.
+ Optionally you can specify <count> of words to extract (default: 1).
+ Value of 0 indicates extraction of all remaining words.
+
+ Example :
+ str(f1_f2_f3__f5),word(4,_) # f5
+ str(f1_f2_f3__f5),word(2,_,0) # f2_f3__f5
+ str(f1_f2_f3__f5),word(3,_,2) # f3__f5
+ str(f1_f2_f3__f5),word(-2,_,3) # f1_f2_f3
+ str(f1_f2_f3__f5),word(-3,_,0) # f1_f2
wt6([<avalanche>])
Hashes a binary input sample into an unsigned 32-bit quantity using the WT6
diff --git a/src/sample.c b/src/sample.c
index 71ee59f..154beb5 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -1997,27 +1997,54 @@
*/
static int sample_conv_field(const struct arg *arg_p, struct sample *smp, void *private)
{
- unsigned int field;
+ int field;
char *start, *end;
int i;
+ int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
if (!arg_p[0].data.sint)
return 0;
- field = 1;
- end = start = smp->data.u.str.str;
- while (end - smp->data.u.str.str < smp->data.u.str.len) {
-
- for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
- if (*end == arg_p[1].data.str.str[i]) {
- if (field == arg_p[0].data.sint)
- goto found;
- start = end+1;
- field++;
- break;
+ if (arg_p[0].data.sint < 0) {
+ field = -1;
+ end = start = smp->data.u.str.str + smp->data.u.str.len;
+ while (start > smp->data.u.str.str) {
+ for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
+ if (*(start-1) == arg_p[1].data.str.str[i]) {
+ if (field == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ } else {
+ end = start-1;
+ field--;
+ }
+ break;
+ }
}
+ start--;
}
- end++;
+ } else {
+ field = 1;
+ end = start = smp->data.u.str.str;
+ while (end - smp->data.u.str.str < smp->data.u.str.len) {
+ for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
+ if (*end == arg_p[1].data.str.str[i]) {
+ if (field == arg_p[0].data.sint) {
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ } else {
+ start = end+1;
+ field++;
+ }
+ break;
+ }
+ }
+ end++;
+ }
}
/* Field not found */
@@ -2048,37 +2075,74 @@
*/
static int sample_conv_word(const struct arg *arg_p, struct sample *smp, void *private)
{
- unsigned int word;
+ int word;
char *start, *end;
int i, issep, inword;
+ int count = (arg_p[2].type == ARGT_SINT) ? arg_p[2].data.sint : 1;
if (!arg_p[0].data.sint)
return 0;
word = 0;
inword = 0;
- end = start = smp->data.u.str.str;
- while (end - smp->data.u.str.str < smp->data.u.str.len) {
- issep = 0;
- for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
- if (*end == arg_p[1].data.str.str[i]) {
- issep = 1;
- break;
+ if (arg_p[0].data.sint < 0) {
+ end = start = smp->data.u.str.str + smp->data.u.str.len;
+ while (start > smp->data.u.str.str) {
+ issep = 0;
+ for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
+ if (*(start-1) == arg_p[1].data.str.str[i]) {
+ issep = 1;
+ break;
+ }
}
- }
- if (!inword) {
- if (!issep) {
- word++;
- start = end;
- inword = 1;
+ if (!inword) {
+ if (!issep) {
+ if (word != arg_p[0].data.sint) {
+ word--;
+ end = start;
+ }
+ inword = 1;
+ }
}
+ else if (issep) {
+ if (word == arg_p[0].data.sint)
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ inword = 0;
+ }
+ start--;
}
- else if (issep) {
- if (word == arg_p[0].data.sint)
- goto found;
- inword = 0;
+ } else {
+ end = start = smp->data.u.str.str;
+ while (end - smp->data.u.str.str < smp->data.u.str.len) {
+ issep = 0;
+ for (i = 0 ; i < arg_p[1].data.str.len ; i++) {
+ if (*end == arg_p[1].data.str.str[i]) {
+ issep = 1;
+ break;
+ }
+ }
+ if (!inword) {
+ if (!issep) {
+ if (word != arg_p[0].data.sint) {
+ word++;
+ start = end;
+ }
+ inword = 1;
+ }
+ }
+ else if (issep) {
+ if (word == arg_p[0].data.sint)
+ if (count == 1)
+ goto found;
+ else if (count > 1)
+ count--;
+ inword = 0;
+ }
+ end++;
}
- end++;
}
/* Field not found */
@@ -2928,8 +2992,8 @@
{ "xxh64", sample_conv_xxh64, ARG1(0,SINT), NULL, SMP_T_BIN, SMP_T_SINT },
{ "json", sample_conv_json, ARG1(1,STR), sample_conv_json_check, SMP_T_STR, SMP_T_STR },
{ "bytes", sample_conv_bytes, ARG2(1,SINT,SINT), NULL, SMP_T_BIN, SMP_T_BIN },
- { "field", sample_conv_field, ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
- { "word", sample_conv_word, ARG2(2,SINT,STR), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
+ { "field", sample_conv_field, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
+ { "word", sample_conv_word, ARG3(2,SINT,STR,SINT), sample_conv_field_check, SMP_T_STR, SMP_T_STR },
{ "regsub", sample_conv_regsub, ARG3(2,REG,STR,STR), sample_conv_regsub_check, SMP_T_STR, SMP_T_STR },
{ "sha1", sample_conv_sha1, 0, NULL, SMP_T_BIN, SMP_T_BIN },
{ "concat", sample_conv_concat, ARG3(1,STR,STR,STR), smp_check_concat, SMP_T_STR, SMP_T_STR },