MINOR: map: Add regex matching replacement
This patch declares a new map which provides a string based on
a string with back references replaced by the content matched
by the regex.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 7dd5744..25b94af 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -11815,13 +11815,19 @@
-----------+--------------+-----------------+-----------------+---------------
str | end | map_end | map_end_int | map_end_ip
-----------+--------------+-----------------+-----------------+---------------
- str | reg | map_reg | map_reg_int | map_reg_ip
+ | | map_reg | |
+ str | reg +-----------------+ map_reg_int | map_reg_ip
+ | | map_regm | |
-----------+--------------+-----------------+-----------------+---------------
int | int | map_int | map_int_int | map_int_ip
-----------+--------------+-----------------+-----------------+---------------
ip | ip | map_ip | map_ip_int | map_ip_ip
-----------+--------------+-----------------+-----------------+---------------
+ The special map called "map_regm" expect matching zone in the regular
+ expression and modify the output replacing back reference (like "\1") by
+ the corresponding match text.
+
The file contains one key + value per line. Lines which start with '#' are
ignored, just like empty lines. Leading tabs and spaces are stripped. The key
is then the first "word" (series of non-space/tabs characters), and the value
diff --git a/include/proto/pattern.h b/include/proto/pattern.h
index 0a132f2..9c93db9 100644
--- a/include/proto/pattern.h
+++ b/include/proto/pattern.h
@@ -67,6 +67,7 @@
int pat_idx_list_ptr(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_list_str(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err);
+int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_tree_str(struct pattern_expr *expr, struct pattern *pat, char **err);
int pat_idx_tree_pfx(struct pattern_expr *expr, struct pattern *pat, char **err);
@@ -174,6 +175,7 @@
* and restores the previous character when leaving.
*/
struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill);
+struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill);
/*
* pattern_ref manipulation.
diff --git a/include/types/pattern.h b/include/types/pattern.h
index a71c343..912e086 100644
--- a/include/types/pattern.h
+++ b/include/types/pattern.h
@@ -87,6 +87,7 @@
PAT_MATCH_DOM, /* domain-like sub-string (str) */
PAT_MATCH_END, /* end of string (str) */
PAT_MATCH_REG, /* regex (str -> reg) */
+ PAT_MATCH_REGM, /* regex (str -> reg) with match zones */
/* keep this one last */
PAT_MATCH_NUM
};
diff --git a/src/map.c b/src/map.c
index 35feea9..a28ceda 100644
--- a/src/map.c
+++ b/src/map.c
@@ -163,6 +163,7 @@
{
struct map_descriptor *desc;
struct pattern *pat;
+ struct chunk *str;
/* get config */
desc = arg_p[0].data.map;
@@ -172,8 +173,19 @@
/* Match case. */
if (pat) {
- /* Copy sample. */
if (pat->data) {
+ /* In the regm case, merge the sample with the input. */
+ if ((long)private == PAT_MATCH_REGM) {
+ str = get_trash_chunk();
+ str->len = exp_replace(str->str, str->size, smp->data.u.str.str,
+ pat->data->u.str.str,
+ (regmatch_t *)smp->ctx.a[0]);
+ if (str->len == -1)
+ return 0;
+ smp->data.u.str = *str;
+ return 1;
+ }
+ /* Copy sample. */
smp->data = *pat->data;
smp->flags |= SMP_F_CONST;
return 1;
@@ -242,6 +254,7 @@
{ "map_dom", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_DOM },
{ "map_end", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_END },
{ "map_reg", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REG },
+ { "map_regm", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_STR, SMP_T_STR, (void *)PAT_MATCH_REGM},
{ "map_int", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_SINT, SMP_T_STR, (void *)PAT_MATCH_INT },
{ "map_ip", sample_conv_map, ARG2(1,STR,STR), sample_load_map, SMP_T_ADDR, SMP_T_STR, (void *)PAT_MATCH_IP },
diff --git a/src/pattern.c b/src/pattern.c
index 254c106..b4cb8e9 100644
--- a/src/pattern.c
+++ b/src/pattern.c
@@ -41,6 +41,7 @@
[PAT_MATCH_DOM] = "dom",
[PAT_MATCH_END] = "end",
[PAT_MATCH_REG] = "reg",
+ [PAT_MATCH_REGM] = "regm",
};
int (*pat_parse_fcts[PAT_MATCH_NUM])(const char *, struct pattern *, int, char **) = {
@@ -57,6 +58,7 @@
[PAT_MATCH_DOM] = pat_parse_str,
[PAT_MATCH_END] = pat_parse_str,
[PAT_MATCH_REG] = pat_parse_reg,
+ [PAT_MATCH_REGM] = pat_parse_reg,
};
int (*pat_index_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pattern *, char **) = {
@@ -73,6 +75,7 @@
[PAT_MATCH_DOM] = pat_idx_list_str,
[PAT_MATCH_END] = pat_idx_list_str,
[PAT_MATCH_REG] = pat_idx_list_reg,
+ [PAT_MATCH_REGM] = pat_idx_list_regm,
};
void (*pat_delete_fcts[PAT_MATCH_NUM])(struct pattern_expr *, struct pat_ref_elt *) = {
@@ -89,6 +92,7 @@
[PAT_MATCH_DOM] = pat_del_list_ptr,
[PAT_MATCH_END] = pat_del_list_ptr,
[PAT_MATCH_REG] = pat_del_list_reg,
+ [PAT_MATCH_REGM] = pat_del_list_reg,
};
void (*pat_prune_fcts[PAT_MATCH_NUM])(struct pattern_expr *) = {
@@ -105,6 +109,7 @@
[PAT_MATCH_DOM] = pat_prune_ptr,
[PAT_MATCH_END] = pat_prune_ptr,
[PAT_MATCH_REG] = pat_prune_reg,
+ [PAT_MATCH_REGM] = pat_prune_reg,
};
struct pattern *(*pat_match_fcts[PAT_MATCH_NUM])(struct sample *, struct pattern_expr *, int) = {
@@ -121,6 +126,7 @@
[PAT_MATCH_DOM] = pat_match_dom,
[PAT_MATCH_END] = pat_match_end,
[PAT_MATCH_REG] = pat_match_reg,
+ [PAT_MATCH_REGM] = pat_match_regm,
};
/* Just used for checking configuration compatibility */
@@ -138,6 +144,7 @@
[PAT_MATCH_DOM] = SMP_T_STR,
[PAT_MATCH_END] = SMP_T_STR,
[PAT_MATCH_REG] = SMP_T_STR,
+ [PAT_MATCH_REGM] = SMP_T_STR,
};
/* this struct is used to return information */
@@ -540,6 +547,30 @@
}
/* Executes a regex. It temporarily changes the data to add a trailing zero,
+ * and restores the previous character when leaving. This function fills
+ * a matching array.
+ */
+struct pattern *pat_match_regm(struct sample *smp, struct pattern_expr *expr, int fill)
+{
+ struct pattern_list *lst;
+ struct pattern *pattern;
+ struct pattern *ret = NULL;
+
+ list_for_each_entry(lst, &expr->patterns, list) {
+ pattern = &lst->pat;
+
+ if (regex_exec_match2(pattern->ptr.reg, smp->data.u.str.str, smp->data.u.str.len,
+ MAX_MATCH, pmatch, 0)) {
+ ret = pattern;
+ smp->ctx.a[0] = pmatch;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* Executes a regex. It temporarily changes the data to add a trailing zero,
* and restores the previous character when leaving.
*/
struct pattern *pat_match_reg(struct sample *smp, struct pattern_expr *expr, int fill)
@@ -1146,7 +1177,7 @@
return 1;
}
-int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
+int pat_idx_list_reg_cap(struct pattern_expr *expr, struct pattern *pat, int cap, char **err)
{
struct pattern_list *patl;
@@ -1169,7 +1200,8 @@
}
/* compile regex */
- if (!regex_comp(pat->ptr.str, patl->pat.ptr.reg, !(expr->mflags & PAT_MF_IGNORE_CASE), 0, err)) {
+ if (!regex_comp(pat->ptr.str, patl->pat.ptr.reg,
+ !(expr->mflags & PAT_MF_IGNORE_CASE), cap, err)) {
free(patl->pat.ptr.reg);
free(patl);
return 0;
@@ -1183,6 +1215,16 @@
return 1;
}
+int pat_idx_list_reg(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ return pat_idx_list_reg_cap(expr, pat, 0, err);
+}
+
+int pat_idx_list_regm(struct pattern_expr *expr, struct pattern *pat, char **err)
+{
+ return pat_idx_list_reg_cap(expr, pat, 1, err);
+}
+
int pat_idx_tree_ip(struct pattern_expr *expr, struct pattern *pat, char **err)
{
unsigned int mask;