MEDIUM: pattern: link all final elements from the reference
There is a data model issue in the current pattern design that makes
pattern deletion extremely expensive: there's no direct way from a
reference to access all indexed occurrences. As such, the only way
to remove all indexed entries corresponding to a reference update
is to scan all expressions's lists and trees to find a link to the
reference. While this was possibly OK when map removal was not
common and most maps were small, this is not conceivable anymore
with GeoIP maps containing 10M+ entries and del-map operations that
are triggered from http-request rulesets.
This patch introduces two list heads from the pattern reference, one
for the objects linked by lists and one for those linked by tree node.
Ideally a single list would be enough but the linked elements are too
much unrelated to be distinguished at the moment, so we'll need two
lists. However for the long term a single-linked list will suffice but
for now it's not possible due to the way elements are removed from
expressions. As such this patch adds 32 bytes of memory usage per
reference plus 16 per indexed entry, but both will be cut in half
later.
The links are not yet used for deletion, this patch only ensures the
list is always consistent.
diff --git a/include/haproxy/pattern-t.h b/include/haproxy/pattern-t.h
index a4d29d5..2f5f58c 100644
--- a/include/haproxy/pattern-t.h
+++ b/include/haproxy/pattern-t.h
@@ -111,12 +111,15 @@
__decl_thread(HA_SPINLOCK_T lock); /* Lock used to protect pat ref elements */
};
-/* This is a part of struct pat_ref. Each entry contain one
- * pattern and one associated value as original string.
+/* This is a part of struct pat_ref. Each entry contains one pattern and one
+ * associated value as original string. All derivative forms (via exprs) are
+ * accessed from list_head or tree_head.
*/
struct pat_ref_elt {
struct list list; /* Used to chain elements. */
struct list back_refs; /* list of users tracking this pat ref */
+ struct list list_head; /* all pattern_list derived from this reference */
+ struct list tree_head; /* all pattern_tree derived from this reference */
char *pattern;
char *sample;
int line;
@@ -126,6 +129,7 @@
* "sample" with a tree entry. It is used with maps.
*/
struct pattern_tree {
+ struct list from_ref; // pattern_tree linked from pat_ref_elt
struct sample_data *data;
struct pat_ref_elt *ref;
struct ebmb_node node;
@@ -170,6 +174,7 @@
/* This struct is just used for chaining patterns */
struct pattern_list {
+ struct list from_ref; // pattern_tree linked from pat_ref_elt
struct list list;
struct pattern pat;
};
diff --git a/src/pattern.c b/src/pattern.c
index faf23ef..3045c3c 100644
--- a/src/pattern.c
+++ b/src/pattern.c
@@ -1082,6 +1082,7 @@
next = eb_next(node);
eb_delete(node);
elt = container_of(node, struct pattern_tree, node);
+ LIST_DEL(&elt->from_ref);
free(elt->data);
free(elt);
node = next;
@@ -1094,6 +1095,7 @@
list_for_each_entry_safe(pat, tmp, &expr->patterns, list) {
LIST_DEL(&pat->list);
+ LIST_DEL(&pat->from_ref);
if (pat->pat.sflags & PAT_SF_REGFREE)
regex_free(pat->pat.ptr.ptr);
else
@@ -1130,6 +1132,8 @@
/* chain pattern in the expression */
LIST_ADDQ(&expr->patterns, &patl->list);
+ /* and from the reference */
+ LIST_ADDQ(&pat->ref->list_head, &patl->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@@ -1159,6 +1163,8 @@
/* chain pattern in the expression */
LIST_ADDQ(&expr->patterns, &patl->list);
+ /* and from the reference */
+ LIST_ADDQ(&pat->ref->list_head, &patl->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@@ -1189,6 +1195,8 @@
/* chain pattern in the expression */
LIST_ADDQ(&expr->patterns, &patl->list);
+ /* and from the reference */
+ LIST_ADDQ(&pat->ref->list_head, &patl->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@@ -1219,6 +1227,8 @@
/* chain pattern in the expression */
LIST_ADDQ(&expr->patterns, &patl->list);
+ /* and from the reference */
+ LIST_ADDQ(&pat->ref->list_head, &patl->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@@ -1268,6 +1278,7 @@
/* Insert the entry. */
ebmb_insert_prefix(&expr->pattern_tree, &node->node, 4);
+ LIST_ADDQ(&pat->ref->tree_head, &node->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@@ -1296,6 +1307,7 @@
/* Insert the entry. */
ebmb_insert_prefix(&expr->pattern_tree_2, &node->node, 16);
+ LIST_ADDQ(&pat->ref->tree_head, &node->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@@ -1340,6 +1352,7 @@
/* index the new node */
ebst_insert(&expr->pattern_tree, &node->node);
+ LIST_ADDQ(&pat->ref->tree_head, &node->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@@ -1382,6 +1395,7 @@
/* index the new node */
ebmb_insert_prefix(&expr->pattern_tree, &node->node, len);
+ LIST_ADDQ(&pat->ref->tree_head, &node->from_ref);
expr->ref->revision = rdtsc();
/* that's ok */
@@ -1400,6 +1414,7 @@
/* Delete and free entry. */
LIST_DEL(&pat->list);
+ LIST_DEL(&pat->from_ref);
if (pat->pat.sflags & PAT_SF_REGFREE)
regex_free(pat->pat.ptr.reg);
else
@@ -1428,6 +1443,7 @@
/* Delete and free entry. */
ebmb_delete(node);
+ LIST_DEL(&elt->from_ref);
free(elt->data);
free(elt);
}
@@ -1448,6 +1464,7 @@
/* Delete and free entry. */
ebmb_delete(node);
+ LIST_DEL(&elt->from_ref);
free(elt->data);
free(elt);
}
@@ -1476,6 +1493,7 @@
/* Delete and free entry. */
ebmb_delete(node);
+ LIST_DEL(&elt->from_ref);
free(elt->data);
free(elt);
}
@@ -1858,6 +1876,8 @@
}
LIST_INIT(&elt->back_refs);
+ LIST_INIT(&elt->list_head);
+ LIST_INIT(&elt->tree_head);
LIST_ADDQ(&ref->head, &elt->list);
return elt;
fail:
@@ -1975,6 +1995,8 @@
bref->ref = NULL;
}
LIST_DEL(&elt->list);
+ LIST_DEL(&elt->list_head);
+ LIST_DEL(&elt->tree_head);
free(elt->pattern);
free(elt->sample);
free(elt);