blob: fc6a5002ee69ff11a3c66b53e925423b9c886417 [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreauf3bfede2011-07-25 11:38:17 +02003 * Version 6.0.6
Willy Tarreau414c4b22011-01-04 13:21:06 +01004 * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +02006 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation, version 2.1
9 * exclusively.
Willy Tarreauc2186022009-10-26 19:48:54 +010010 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020011 * This library is distributed in the hope that it will be useful,
Willy Tarreauc2186022009-10-26 19:48:54 +010012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Willy Tarreauf3bfede2011-07-25 11:38:17 +020013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
Willy Tarreauc2186022009-10-26 19:48:54 +010015 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020016 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Willy Tarreauc2186022009-10-26 19:48:54 +010019 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
Willy Tarreauc2186022009-10-26 19:48:54 +010030/*
31 * Exported functions and macros.
32 * Many of them are always inlined because they are extremely small, and
33 * are generally called at most once or twice in a program.
34 */
35
36/* Return leftmost node in the tree, or NULL if none */
37static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
38{
39 return ebmb_entry(eb_first(root), struct ebmb_node, node);
40}
41
42/* Return rightmost node in the tree, or NULL if none */
43static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
44{
45 return ebmb_entry(eb_last(root), struct ebmb_node, node);
46}
47
48/* Return next node in the tree, or NULL if none */
49static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
50{
51 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
52}
53
54/* Return previous node in the tree, or NULL if none */
55static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
56{
57 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
58}
59
Willy Tarreau2b570202013-05-07 15:58:28 +020060/* Return next leaf node within a duplicate sub-tree, or NULL if none. */
61static inline struct ebmb_node *ebmb_next_dup(struct ebmb_node *ebmb)
62{
63 return ebmb_entry(eb_next_dup(&ebmb->node), struct ebmb_node, node);
64}
65
66/* Return previous leaf node within a duplicate sub-tree, or NULL if none. */
67static inline struct ebmb_node *ebmb_prev_dup(struct ebmb_node *ebmb)
68{
69 return ebmb_entry(eb_prev_dup(&ebmb->node), struct ebmb_node, node);
70}
71
Willy Tarreauc2186022009-10-26 19:48:54 +010072/* Return next node in the tree, skipping duplicates, or NULL if none */
73static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
74{
75 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
76}
77
78/* Return previous node in the tree, skipping duplicates, or NULL if none */
79static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
80{
81 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
82}
83
84/* Delete node from the tree if it was linked in. Mark the node unused. Note
85 * that this function relies on a non-inlined generic function: eb_delete.
86 */
87static forceinline void ebmb_delete(struct ebmb_node *ebmb)
88{
89 eb_delete(&ebmb->node);
90}
91
92/* The following functions are not inlined by default. They are declared
93 * in ebmbtree.c, which simply relies on their inline version.
94 */
Willy Tarreau03e78532020-02-25 07:38:05 +010095struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
96struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
97struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
98struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
99struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100100
Willy Tarreau81f3b802022-08-01 10:37:29 +0200101/* start from a valid leaf and find the next matching prefix that's either a
102 * duplicate, or immediately shorter than the node's current one and still
103 * matches it. The purpose is to permit a caller that is not satisfied with a
104 * result provided by ebmb_lookup_longest() to evaluate the next matching
105 * entry. Given that shorter keys are necessarily attached to nodes located
106 * above the current one, it's sufficient to restart from the current leaf and
107 * go up until we find a shorter prefix, or a non-matching one.
108 */
109static inline struct ebmb_node *ebmb_lookup_shorter(struct ebmb_node *start)
110{
111 eb_troot_t *t = start->node.leaf_p;
112 struct ebmb_node *node;
113
Ilya Shipitsin4a689da2022-10-29 09:34:32 +0500114 /* first, check for duplicates */
Willy Tarreau81f3b802022-08-01 10:37:29 +0200115 node = ebmb_next_dup(start);
116 if (node)
117 return node;
118
119 while (1) {
120 if (eb_gettag(t) == EB_LEFT) {
121 /* Walking up from left branch. We must ensure that we never
122 * walk beyond root.
123 */
124 if (unlikely(eb_clrtag((eb_untag(t, EB_LEFT))->b[EB_RGHT]) == NULL))
125 return NULL;
126 node = container_of(eb_root_to_node(eb_untag(t, EB_LEFT)), struct ebmb_node, node);
127 } else {
128 /* Walking up from right branch, so we cannot be below
129 * root. However, if we end up on a node with an even
130 * and positive bit, this is a cover node, which mandates
131 * that the left branch only contains cover values, so we
132 * must descend it.
133 */
134 node = container_of(eb_root_to_node(eb_untag(t, EB_RGHT)), struct ebmb_node, node);
135 if (node->node.bit > 0 && !(node->node.bit & 1))
136 return ebmb_entry(eb_walk_down(t, EB_LEFT), struct ebmb_node, node);
137 }
138
139 /* Note that <t> cannot be NULL at this stage */
140 t = node->node.node_p;
141
142 /* this is a node attached to a deeper (and possibly different)
143 * leaf, not interesting for us.
144 */
145 if (node->node.pfx >= start->node.pfx)
146 continue;
147
148 if (check_bits(start->key, node->key, 0, node->node.pfx) == 0)
149 break;
150 }
151 return node;
152}
153
Willy Tarreauc2186022009-10-26 19:48:54 +0100154/* The following functions are less likely to be used directly, because their
155 * code is larger. The non-inlined version is preferred.
156 */
157
158/* Delete node from the tree if it was linked in. Mark the node unused. */
159static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
160{
161 __eb_delete(&ebmb->node);
162}
163
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800164/* Find the first occurrence of a key of a least <len> bytes matching <x> in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100165 * tree <root>. The caller is responsible for ensuring that <len> will not exceed
166 * the common parts between the tree's keys and <x>. In case of multiple matches,
167 * the leftmost node is returned. This means that this function can be used to
168 * lookup string keys by prefix if all keys in the tree are zero-terminated. If
169 * no match is found, NULL is returned. Returns first node if <len> is zero.
Willy Tarreauc2186022009-10-26 19:48:54 +0100170 */
171static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
172{
173 struct ebmb_node *node;
174 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200175 int pos, side;
176 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100177
178 troot = root->b[EB_LEFT];
179 if (unlikely(troot == NULL))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100180 goto ret_null;
Willy Tarreauc2186022009-10-26 19:48:54 +0100181
Willy Tarreau414c4b22011-01-04 13:21:06 +0100182 if (unlikely(len == 0))
183 goto walk_down;
184
Willy Tarreau3a932442010-05-09 19:29:23 +0200185 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100186 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200187 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100188 node = container_of(eb_untag(troot, EB_LEAF),
189 struct ebmb_node, node.branches);
Willy Tarreau853926a2020-06-16 11:10:53 +0200190 if (eb_memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100191 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200192 else
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100193 goto ret_node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100194 }
195 node = container_of(eb_untag(troot, EB_NODE),
196 struct ebmb_node, node.branches);
197
Willy Tarreau3a932442010-05-09 19:29:23 +0200198 node_bit = node->node.bit;
199 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100200 /* We have a dup tree now. Either it's for the same
201 * value, and we walk down left, or it's a different
202 * one and we don't have our key.
203 */
Willy Tarreau853926a2020-06-16 11:10:53 +0200204 if (eb_memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100205 goto ret_null;
206 else
207 goto walk_left;
Willy Tarreauc2186022009-10-26 19:48:54 +0100208 }
209
Willy Tarreau3a932442010-05-09 19:29:23 +0200210 /* OK, normal data node, let's walk down. We check if all full
211 * bytes are equal, and we start from the last one we did not
212 * completely check. We stop as soon as we reach the last byte,
213 * because we must decide to go left/right or abort.
214 */
215 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
216 if (node_bit < 0) {
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800217 /* This surprising construction gives better performance
Willy Tarreau3a932442010-05-09 19:29:23 +0200218 * because gcc does not try to reorder the loop. Tested to
219 * be fine with 2.95 to 4.2.
220 */
221 while (1) {
Willy Tarreau414c4b22011-01-04 13:21:06 +0100222 if (node->key[pos++] ^ *(unsigned char*)(x++))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100223 goto ret_null; /* more than one full byte is different */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100224 if (--len == 0)
225 goto walk_left; /* return first node if all bytes matched */
Willy Tarreau3a932442010-05-09 19:29:23 +0200226 node_bit += 8;
227 if (node_bit >= 0)
228 break;
229 }
230 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100231
Willy Tarreau3a932442010-05-09 19:29:23 +0200232 /* here we know that only the last byte differs, so node_bit < 8.
233 * We have 2 possibilities :
234 * - more than the last bit differs => return NULL
235 * - walk down on side = (x[pos] >> node_bit) & 1
236 */
237 side = *(unsigned char *)x >> node_bit;
238 if (((node->key[pos] >> node_bit) ^ side) > 1)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100239 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200240 side &= 1;
241 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100242 }
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100243 walk_left:
244 troot = node->node.branches.b[EB_LEFT];
245 walk_down:
246 while (eb_gettag(troot) != EB_LEAF)
247 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
248 node = container_of(eb_untag(troot, EB_LEAF),
249 struct ebmb_node, node.branches);
250 ret_node:
251 return node;
252 ret_null:
253 return NULL;
Willy Tarreauc2186022009-10-26 19:48:54 +0100254}
255
256/* Insert ebmb_node <new> into subtree starting at node root <root>.
257 * Only new->key needs be set with the key. The ebmb_node is returned.
258 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
Willy Tarreau414c4b22011-01-04 13:21:06 +0100259 * len is specified in bytes. It is absolutely mandatory that this length
260 * is the same for all keys in the tree. This function cannot be used to
261 * insert strings.
Willy Tarreauc2186022009-10-26 19:48:54 +0100262 */
263static forceinline struct ebmb_node *
264__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
265{
266 struct ebmb_node *old;
267 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200268 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200269 eb_troot_t *root_right;
Willy Tarreauc2186022009-10-26 19:48:54 +0100270 int diff;
271 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200272 eb_troot_t *new_left, *new_rght;
273 eb_troot_t *new_leaf;
274 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100275
276 side = EB_LEFT;
277 troot = root->b[EB_LEFT];
278 root_right = root->b[EB_RGHT];
279 if (unlikely(troot == NULL)) {
280 /* Tree is empty, insert the leaf part below the left branch */
281 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
282 new->node.leaf_p = eb_dotag(root, EB_LEFT);
283 new->node.node_p = NULL; /* node part unused */
284 return new;
285 }
286
Willy Tarreauc2186022009-10-26 19:48:54 +0100287 /* The tree descent is fairly easy :
288 * - first, check if we have reached a leaf node
289 * - second, check if we have gone too far
290 * - third, reiterate
291 * Everywhere, we use <new> for the node node we are inserting, <root>
292 * for the node we attach it to, and <old> for the node we are
293 * displacing below <new>. <troot> will always point to the future node
294 * (tagged with its type). <side> carries the side the node <new> is
295 * attached to below its parent, which is also where previous node
296 * was attached.
297 */
298
299 bit = 0;
300 while (1) {
301 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200302 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100303 old = container_of(eb_untag(troot, EB_LEAF),
304 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100305 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200306 up_ptr = &old->node.leaf_p;
307 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100308 }
309
310 /* OK we're walking down this link */
311 old = container_of(eb_untag(troot, EB_NODE),
312 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200313 old_node_bit = old->node.bit;
314
315 if (unlikely(old->node.bit < 0)) {
316 /* We're above a duplicate tree, so we must compare the whole value */
317 new->node.node_p = old->node.node_p;
318 up_ptr = &old->node.node_p;
319 check_bit_and_break:
320 bit = equal_bits(new->key, old->key, bit, len << 3);
321 break;
322 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100323
324 /* Stop going down when we don't have common bits anymore. We
325 * also stop in front of a duplicates tree because it means we
326 * have to insert above. Note: we can compare more bits than
327 * the current node's because as long as they are identical, we
328 * know we descend along the correct side.
329 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200330
331 bit = equal_bits(new->key, old->key, bit, old_node_bit);
332 if (unlikely(bit < old_node_bit)) {
333 /* The tree did not contain the key, so we insert <new> before the
334 * node <old>, and set ->bit to designate the lowest bit position in
335 * <new> which applies to ->branches.b[].
336 */
337 new->node.node_p = old->node.node_p;
338 up_ptr = &old->node.node_p;
339 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100340 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200341 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
342 * However, since we're going down around <old_node_bit>, we know it will be
343 * properly matched, so we can skip this bit.
344 */
345 bit = old_node_bit + 1;
346
347 /* walk down */
348 root = &old->node.branches;
349 side = old_node_bit & 7;
350 side ^= 7;
351 side = (new->key[old_node_bit >> 3] >> side) & 1;
352 troot = root->b[side];
353 }
354
355 new_left = eb_dotag(&new->node.branches, EB_LEFT);
356 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
357 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
358
Willy Tarreau3a932442010-05-09 19:29:23 +0200359 new->node.bit = bit;
Willy Tarreaua4a1cd12012-06-09 15:43:36 +0200360
361 /* Note: we can compare more bits than the current node's because as
362 * long as they are identical, we know we descend along the correct
363 * side. However we don't want to start to compare past the end.
364 */
365 diff = 0;
366 if (((unsigned)bit >> 3) < len)
367 diff = cmp_bits(new->key, old->key, bit);
368
Willy Tarreau3a932442010-05-09 19:29:23 +0200369 if (diff == 0) {
370 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100371
Willy Tarreau3a932442010-05-09 19:29:23 +0200372 if (likely(eb_gettag(root_right))) {
373 /* we refuse to duplicate this key if the tree is
374 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100375 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200376 return old;
377 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100378
Willy Tarreau3a932442010-05-09 19:29:23 +0200379 if (eb_gettag(troot) != EB_LEAF) {
380 /* there was already a dup tree below */
381 struct eb_node *ret;
382 ret = eb_insert_dup(&old->node, &new->node);
383 return container_of(ret, struct ebmb_node, node);
384 }
385 /* otherwise fall through */
386 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100387
Willy Tarreau3a932442010-05-09 19:29:23 +0200388 if (diff >= 0) {
389 new->node.branches.b[EB_LEFT] = troot;
390 new->node.branches.b[EB_RGHT] = new_leaf;
391 new->node.leaf_p = new_rght;
392 *up_ptr = new_left;
393 }
Tim Düsterhusa8bfb4d2021-09-11 17:02:33 +0200394 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200395 new->node.branches.b[EB_LEFT] = new_leaf;
396 new->node.branches.b[EB_RGHT] = troot;
397 new->node.leaf_p = new_left;
398 *up_ptr = new_rght;
399 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100400
Willy Tarreau3a932442010-05-09 19:29:23 +0200401 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
402 * parent is already set to <new>, and the <root>'s branch is still in
403 * <side>. Update the root's leaf till we have it. Note that we can also
404 * find the side by checking the side of new->node.node_p.
405 */
406
407 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
408 return new;
409}
410
411
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800412/* Find the first occurrence of the longest prefix matching a key <x> in the
Willy Tarreau3a932442010-05-09 19:29:23 +0200413 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200414 * least as long as the keys in the tree. Note that this can be ensured by
415 * having a byte at the end of <x> which cannot be part of any prefix, typically
416 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200417 */
418static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
419{
420 struct ebmb_node *node;
421 eb_troot_t *troot, *cover;
422 int pos, side;
423 int node_bit;
424
425 troot = root->b[EB_LEFT];
426 if (unlikely(troot == NULL))
427 return NULL;
428
429 cover = NULL;
430 pos = 0;
431 while (1) {
432 if ((eb_gettag(troot) == EB_LEAF)) {
433 node = container_of(eb_untag(troot, EB_LEAF),
434 struct ebmb_node, node.branches);
435 if (check_bits(x - pos, node->key, pos, node->node.pfx))
436 goto not_found;
437
438 return node;
439 }
440 node = container_of(eb_untag(troot, EB_NODE),
441 struct ebmb_node, node.branches);
442
443 node_bit = node->node.bit;
444 if (node_bit < 0) {
445 /* We have a dup tree now. Either it's for the same
446 * value, and we walk down left, or it's a different
447 * one and we don't have our key.
448 */
449 if (check_bits(x - pos, node->key, pos, node->node.pfx))
450 goto not_found;
451
452 troot = node->node.branches.b[EB_LEFT];
453 while (eb_gettag(troot) != EB_LEAF)
454 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
455 node = container_of(eb_untag(troot, EB_LEAF),
456 struct ebmb_node, node.branches);
457 return node;
458 }
459
460 node_bit >>= 1; /* strip cover bit */
461 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
462 if (node_bit < 0) {
463 /* This uncommon construction gives better performance
464 * because gcc does not try to reorder the loop. Tested to
465 * be fine with 2.95 to 4.2.
466 */
467 while (1) {
468 x++; pos++;
469 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
470 goto not_found; /* more than one full byte is different */
471 node_bit += 8;
472 if (node_bit >= 0)
473 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100474 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200475 }
476
477 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
478 * We have 2 possibilities :
479 * - more than the last bit differs => data does not match
480 * - walk down on side = (x[pos] >> node_bit) & 1
481 */
482 side = *(unsigned char *)x >> node_bit;
483 if (((node->key[pos] >> node_bit) ^ side) > 1)
484 goto not_found;
485
486 if (!(node->node.bit & 1)) {
487 /* This is a cover node, let's keep a reference to it
488 * for later. The covering subtree is on the left, and
489 * the covered subtree is on the right, so we have to
490 * walk down right.
491 */
492 cover = node->node.branches.b[EB_LEFT];
493 troot = node->node.branches.b[EB_RGHT];
494 continue;
495 }
496 side &= 1;
497 troot = node->node.branches.b[side];
498 }
499
500 not_found:
Thayne McCombs8f0cc5c2021-01-07 21:35:52 -0700501 /* Walk down last cover tree if it exists. It does not matter if cover is NULL */
Willy Tarreau3a932442010-05-09 19:29:23 +0200502 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
503}
504
505
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800506/* Find the first occurrence of a prefix matching a key <x> of <pfx> BITS in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100507 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200508 * least as long as the keys in the tree. Note that this can be ensured by
509 * having a byte at the end of <x> which cannot be part of any prefix, typically
510 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200511 */
512static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
513{
514 struct ebmb_node *node;
515 eb_troot_t *troot;
516 int pos, side;
517 int node_bit;
518
519 troot = root->b[EB_LEFT];
520 if (unlikely(troot == NULL))
521 return NULL;
522
523 pos = 0;
524 while (1) {
525 if ((eb_gettag(troot) == EB_LEAF)) {
526 node = container_of(eb_untag(troot, EB_LEAF),
527 struct ebmb_node, node.branches);
528 if (node->node.pfx != pfx)
529 return NULL;
530 if (check_bits(x - pos, node->key, pos, node->node.pfx))
531 return NULL;
532 return node;
533 }
534 node = container_of(eb_untag(troot, EB_NODE),
535 struct ebmb_node, node.branches);
536
537 node_bit = node->node.bit;
538 if (node_bit < 0) {
539 /* We have a dup tree now. Either it's for the same
540 * value, and we walk down left, or it's a different
541 * one and we don't have our key.
542 */
543 if (node->node.pfx != pfx)
544 return NULL;
545 if (check_bits(x - pos, node->key, pos, node->node.pfx))
546 return NULL;
547
548 troot = node->node.branches.b[EB_LEFT];
549 while (eb_gettag(troot) != EB_LEAF)
550 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
551 node = container_of(eb_untag(troot, EB_LEAF),
552 struct ebmb_node, node.branches);
553 return node;
554 }
555
556 node_bit >>= 1; /* strip cover bit */
557 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
558 if (node_bit < 0) {
559 /* This uncommon construction gives better performance
560 * because gcc does not try to reorder the loop. Tested to
561 * be fine with 2.95 to 4.2.
562 */
563 while (1) {
564 x++; pos++;
565 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
566 return NULL; /* more than one full byte is different */
567 node_bit += 8;
568 if (node_bit >= 0)
569 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100570 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200571 }
572
573 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
574 * We have 2 possibilities :
575 * - more than the last bit differs => data does not match
576 * - walk down on side = (x[pos] >> node_bit) & 1
577 */
578 side = *(unsigned char *)x >> node_bit;
579 if (((node->key[pos] >> node_bit) ^ side) > 1)
580 return NULL;
581
582 if (!(node->node.bit & 1)) {
583 /* This is a cover node, it may be the entry we're
584 * looking for. We already know that it matches all the
585 * bits, let's compare prefixes and descend the cover
586 * subtree if they match.
587 */
Willy Tarreau22c0a932011-07-25 12:22:44 +0200588 if ((unsigned short)node->node.bit >> 1 == pfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200589 troot = node->node.branches.b[EB_LEFT];
590 else
591 troot = node->node.branches.b[EB_RGHT];
592 continue;
593 }
594 side &= 1;
595 troot = node->node.branches.b[side];
596 }
597}
598
599
600/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
601 * Only new->key and new->pfx need be set with the key and its prefix length.
Ilya Shipitsin01881082021-08-07 14:41:56 +0500602 * Note that bits between <pfx> and <len> are theoretically ignored and should be
Willy Tarreau3a932442010-05-09 19:29:23 +0200603 * zero, as it is not certain yet that they will always be ignored everywhere
604 * (eg in bit compare functions).
605 * The ebmb_node is returned.
606 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
607 * len is specified in bytes.
608 */
609static forceinline struct ebmb_node *
610__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
611{
612 struct ebmb_node *old;
613 unsigned int side;
614 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200615 eb_troot_t *root_right;
Willy Tarreau3a932442010-05-09 19:29:23 +0200616 int diff;
617 int bit;
618 eb_troot_t *new_left, *new_rght;
619 eb_troot_t *new_leaf;
620 int old_node_bit;
Willy Tarreaubf13e532022-11-15 08:08:24 +0100621 unsigned int npfx = new->node.pfx;
622 unsigned int npfx1 = npfx << 1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200623
624 side = EB_LEFT;
625 troot = root->b[EB_LEFT];
626 root_right = root->b[EB_RGHT];
627 if (unlikely(troot == NULL)) {
628 /* Tree is empty, insert the leaf part below the left branch */
629 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
630 new->node.leaf_p = eb_dotag(root, EB_LEFT);
631 new->node.node_p = NULL; /* node part unused */
632 return new;
633 }
634
635 len <<= 3;
Willy Tarreaubf13e532022-11-15 08:08:24 +0100636 if (len > npfx)
637 len = npfx;
Willy Tarreau3a932442010-05-09 19:29:23 +0200638
639 /* The tree descent is fairly easy :
640 * - first, check if we have reached a leaf node
641 * - second, check if we have gone too far
642 * - third, reiterate
643 * Everywhere, we use <new> for the node node we are inserting, <root>
644 * for the node we attach it to, and <old> for the node we are
645 * displacing below <new>. <troot> will always point to the future node
646 * (tagged with its type). <side> carries the side the node <new> is
647 * attached to below its parent, which is also where previous node
648 * was attached.
649 */
650
651 bit = 0;
652 while (1) {
653 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
654 /* Insert above a leaf. Note that this leaf could very
655 * well be part of a cover node.
656 */
657 old = container_of(eb_untag(troot, EB_LEAF),
658 struct ebmb_node, node.branches);
659 new->node.node_p = old->node.leaf_p;
660 up_ptr = &old->node.leaf_p;
661 goto check_bit_and_break;
662 }
663
664 /* OK we're walking down this link */
665 old = container_of(eb_untag(troot, EB_NODE),
666 struct ebmb_node, node.branches);
667 old_node_bit = old->node.bit;
668 /* Note that old_node_bit can be :
669 * < 0 : dup tree
670 * = 2N : cover node for N bits
671 * = 2N+1 : normal node at N bits
672 */
673
674 if (unlikely(old_node_bit < 0)) {
675 /* We're above a duplicate tree, so we must compare the whole value */
676 new->node.node_p = old->node.node_p;
677 up_ptr = &old->node.node_p;
678 check_bit_and_break:
679 /* No need to compare everything if the leaves are shorter than the new one. */
680 if (len > old->node.pfx)
681 len = old->node.pfx;
682 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100683 break;
684 }
685
Willy Tarreau3a932442010-05-09 19:29:23 +0200686 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
687
688 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
689 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200690
691 /* we must always check that our prefix is larger than the nodes
692 * we visit, otherwise we have to stop going down. The following
693 * test is able to stop before both normal and cover nodes.
694 */
Willy Tarreaubf13e532022-11-15 08:08:24 +0100695 if (bit >= npfx1 && npfx1 < old_node_bit) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200696 /* insert cover node here on the left */
697 new->node.node_p = old->node.node_p;
698 up_ptr = &old->node.node_p;
Willy Tarreaubf13e532022-11-15 08:08:24 +0100699 new->node.bit = npfx1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200700 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200701 goto insert_above;
702 }
703
704 if (unlikely(bit < old_node_bit)) {
705 /* The tree did not contain the key, so we insert <new> before the
706 * node <old>, and set ->bit to designate the lowest bit position in
707 * <new> which applies to ->branches.b[]. We know that the bit is not
708 * greater than the prefix length thanks to the test above.
709 */
710 new->node.node_p = old->node.node_p;
711 up_ptr = &old->node.node_p;
712 new->node.bit = bit;
713 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200714 goto insert_above;
715 }
716
717 if (!(old_node_bit & 1)) {
718 /* if we encounter a cover node with our exact prefix length, it's
719 * necessarily the same value, so we insert there as a duplicate on
720 * the left. For that, we go down on the left and the leaf detection
721 * code will finish the job.
722 */
Willy Tarreaubf13e532022-11-15 08:08:24 +0100723 if (npfx1 == old_node_bit) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200724 root = &old->node.branches;
725 side = EB_LEFT;
726 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200727 continue;
728 }
729
730 /* cover nodes are always walked through on the right */
731 side = EB_RGHT;
732 bit = old_node_bit >> 1; /* recheck that bit */
733 root = &old->node.branches;
734 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200735 continue;
736 }
737
738 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
739 * However, since we're going down around <old_node_bit>, we know it will be
740 * properly matched, so we can skip this bit.
741 */
742 old_node_bit >>= 1;
743 bit = old_node_bit + 1;
744
Willy Tarreauc2186022009-10-26 19:48:54 +0100745 /* walk down */
746 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200747 side = old_node_bit & 7;
748 side ^= 7;
749 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100750 troot = root->b[side];
751 }
752
Willy Tarreau3a932442010-05-09 19:29:23 +0200753 /* Right here, we have 4 possibilities :
754 * - the tree does not contain any leaf matching the
755 * key, and we have new->key < old->key. We insert
756 * new above old, on the left ;
757 *
758 * - the tree does not contain any leaf matching the
759 * key, and we have new->key > old->key. We insert
760 * new above old, on the right ;
761 *
762 * - the tree does contain the key with the same prefix
763 * length. We add the new key next to it as a first
764 * duplicate (since it was alone).
765 *
766 * The last two cases can easily be partially merged.
767 *
768 * - the tree contains a leaf matching the key, we have
769 * to insert above it as a cover node. The leaf with
770 * the shortest prefix becomes the left subtree and
771 * the leaf with the longest prefix becomes the right
772 * one. The cover node gets the min of both prefixes
773 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100774 */
775
Willy Tarreau3a932442010-05-09 19:29:23 +0200776 /* first we want to ensure that we compare the correct bit, which means
777 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100778 */
Willy Tarreaubf13e532022-11-15 08:08:24 +0100779 if (bit > npfx)
780 bit = npfx;
Willy Tarreau3a932442010-05-09 19:29:23 +0200781 if (bit > old->node.pfx)
782 bit = old->node.pfx;
783
Willy Tarreau3a932442010-05-09 19:29:23 +0200784 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
785
786 /* if one prefix is included in the second one, we don't compare bits
787 * because they won't necessarily match, we just proceed with a cover
788 * node insertion.
789 */
790 diff = 0;
Willy Tarreaubf13e532022-11-15 08:08:24 +0100791 if (bit < old->node.pfx && bit < npfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200792 diff = cmp_bits(new->key, old->key, bit);
793
794 if (diff == 0) {
795 /* Both keys match. Either it's a duplicate entry or we have to
796 * put the shortest prefix left and the largest one right below
797 * a new cover node. By default, diff==0 means we'll be inserted
798 * on the right.
799 */
800 new->node.bit--; /* anticipate cover node insertion */
Willy Tarreaubf13e532022-11-15 08:08:24 +0100801 if (npfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200802 new->node.bit = -1; /* mark as new dup tree, just in case */
803
804 if (unlikely(eb_gettag(root_right))) {
805 /* we refuse to duplicate this key if the tree is
806 * tagged as containing only unique keys.
807 */
808 return old;
809 }
810
811 if (eb_gettag(troot) != EB_LEAF) {
812 /* there was already a dup tree below */
813 struct eb_node *ret;
814 ret = eb_insert_dup(&old->node, &new->node);
815 return container_of(ret, struct ebmb_node, node);
816 }
817 /* otherwise fall through to insert first duplicate */
818 }
819 /* otherwise we just rely on the tests below to select the right side */
Willy Tarreaubf13e532022-11-15 08:08:24 +0100820 else if (npfx < old->node.pfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200821 diff = -1; /* force insertion to left side */
822 }
823
824 insert_above:
825 new_left = eb_dotag(&new->node.branches, EB_LEFT);
826 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
827 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
828
829 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200830 new->node.branches.b[EB_LEFT] = troot;
831 new->node.branches.b[EB_RGHT] = new_leaf;
832 new->node.leaf_p = new_rght;
833 *up_ptr = new_left;
834 }
835 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200836 new->node.branches.b[EB_LEFT] = new_leaf;
837 new->node.branches.b[EB_RGHT] = troot;
838 new->node.leaf_p = new_left;
839 *up_ptr = new_rght;
840 }
841
Willy Tarreauc2186022009-10-26 19:48:54 +0100842 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
843 return new;
844}
845
Willy Tarreau3a932442010-05-09 19:29:23 +0200846
847
Willy Tarreauead63a02009-11-02 14:41:23 +0100848#endif /* _EBMBTREE_H */
849