blob: 2d63ed124d42e7f04bd3a24601b482d484a04932 [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreauf3bfede2011-07-25 11:38:17 +02003 * Version 6.0.6
Willy Tarreau414c4b22011-01-04 13:21:06 +01004 * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +02006 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation, version 2.1
9 * exclusively.
Willy Tarreauc2186022009-10-26 19:48:54 +010010 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020011 * This library is distributed in the hope that it will be useful,
Willy Tarreauc2186022009-10-26 19:48:54 +010012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Willy Tarreauf3bfede2011-07-25 11:38:17 +020013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
Willy Tarreauc2186022009-10-26 19:48:54 +010015 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020016 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Willy Tarreauc2186022009-10-26 19:48:54 +010019 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
30#define EBMB_ROOT EB_ROOT
31#define EBMB_TREE_HEAD EB_TREE_HEAD
32
33/* This structure carries a node, a leaf, and a key. It must start with the
34 * eb_node so that it can be cast into an eb_node. We could also have put some
35 * sort of transparent union here to reduce the indirection level, but the fact
36 * is, the end user is not meant to manipulate internals, so this is pointless.
37 * The 'node.bit' value here works differently from scalar types, as it contains
38 * the number of identical bits between the two branches.
Willy Tarreau41136de2020-02-22 15:55:33 +010039 * Note that we take a great care of making sure the key is located exactly at
40 * the end of the struct even if that involves holes before it, so that it
41 * always aliases any external key a user would append after. This is why the
42 * key uses the same alignment as the struct.
Willy Tarreauc2186022009-10-26 19:48:54 +010043 */
44struct ebmb_node {
45 struct eb_node node; /* the tree node, must be at the beginning */
Willy Tarreau41136de2020-02-22 15:55:33 +010046 ALWAYS_ALIGN(sizeof(void*));
Willy Tarreauc2186022009-10-26 19:48:54 +010047 unsigned char key[0]; /* the key, its size depends on the application */
Willy Tarreau41136de2020-02-22 15:55:33 +010048} ALIGNED(sizeof(void*));
Willy Tarreauc2186022009-10-26 19:48:54 +010049
50/*
51 * Exported functions and macros.
52 * Many of them are always inlined because they are extremely small, and
53 * are generally called at most once or twice in a program.
54 */
55
56/* Return leftmost node in the tree, or NULL if none */
57static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
58{
59 return ebmb_entry(eb_first(root), struct ebmb_node, node);
60}
61
62/* Return rightmost node in the tree, or NULL if none */
63static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
64{
65 return ebmb_entry(eb_last(root), struct ebmb_node, node);
66}
67
68/* Return next node in the tree, or NULL if none */
69static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
70{
71 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
72}
73
74/* Return previous node in the tree, or NULL if none */
75static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
76{
77 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
78}
79
Willy Tarreau2b570202013-05-07 15:58:28 +020080/* Return next leaf node within a duplicate sub-tree, or NULL if none. */
81static inline struct ebmb_node *ebmb_next_dup(struct ebmb_node *ebmb)
82{
83 return ebmb_entry(eb_next_dup(&ebmb->node), struct ebmb_node, node);
84}
85
86/* Return previous leaf node within a duplicate sub-tree, or NULL if none. */
87static inline struct ebmb_node *ebmb_prev_dup(struct ebmb_node *ebmb)
88{
89 return ebmb_entry(eb_prev_dup(&ebmb->node), struct ebmb_node, node);
90}
91
Willy Tarreauc2186022009-10-26 19:48:54 +010092/* Return next node in the tree, skipping duplicates, or NULL if none */
93static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
94{
95 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
96}
97
98/* Return previous node in the tree, skipping duplicates, or NULL if none */
99static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
100{
101 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
102}
103
104/* Delete node from the tree if it was linked in. Mark the node unused. Note
105 * that this function relies on a non-inlined generic function: eb_delete.
106 */
107static forceinline void ebmb_delete(struct ebmb_node *ebmb)
108{
109 eb_delete(&ebmb->node);
110}
111
112/* The following functions are not inlined by default. They are declared
113 * in ebmbtree.c, which simply relies on their inline version.
114 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100115struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
116struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
117struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
118struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
119struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100120
Willy Tarreauc6abc082022-08-01 10:37:29 +0200121/* start from a valid leaf and find the next matching prefix that's either a
122 * duplicate, or immediately shorter than the node's current one and still
123 * matches it. The purpose is to permit a caller that is not satisfied with a
124 * result provided by ebmb_lookup_longest() to evaluate the next matching
125 * entry. Given that shorter keys are necessarily attached to nodes located
126 * above the current one, it's sufficient to restart from the current leaf and
127 * go up until we find a shorter prefix, or a non-matching one.
128 */
129static inline struct ebmb_node *ebmb_lookup_shorter(struct ebmb_node *start)
130{
131 eb_troot_t *t = start->node.leaf_p;
132 struct ebmb_node *node;
133
134 /* first, chcek for duplicates */
135 node = ebmb_next_dup(start);
136 if (node)
137 return node;
138
139 while (1) {
140 if (eb_gettag(t) == EB_LEFT) {
141 /* Walking up from left branch. We must ensure that we never
142 * walk beyond root.
143 */
144 if (unlikely(eb_clrtag((eb_untag(t, EB_LEFT))->b[EB_RGHT]) == NULL))
145 return NULL;
146 node = container_of(eb_root_to_node(eb_untag(t, EB_LEFT)), struct ebmb_node, node);
147 } else {
148 /* Walking up from right branch, so we cannot be below
149 * root. However, if we end up on a node with an even
150 * and positive bit, this is a cover node, which mandates
151 * that the left branch only contains cover values, so we
152 * must descend it.
153 */
154 node = container_of(eb_root_to_node(eb_untag(t, EB_RGHT)), struct ebmb_node, node);
155 if (node->node.bit > 0 && !(node->node.bit & 1))
156 return ebmb_entry(eb_walk_down(t, EB_LEFT), struct ebmb_node, node);
157 }
158
159 /* Note that <t> cannot be NULL at this stage */
160 t = node->node.node_p;
161
162 /* this is a node attached to a deeper (and possibly different)
163 * leaf, not interesting for us.
164 */
165 if (node->node.pfx >= start->node.pfx)
166 continue;
167
168 if (check_bits(start->key, node->key, 0, node->node.pfx) == 0)
169 break;
170 }
171 return node;
172}
173
Willy Tarreauc2186022009-10-26 19:48:54 +0100174/* The following functions are less likely to be used directly, because their
175 * code is larger. The non-inlined version is preferred.
176 */
177
178/* Delete node from the tree if it was linked in. Mark the node unused. */
179static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
180{
181 __eb_delete(&ebmb->node);
182}
183
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800184/* Find the first occurrence of a key of a least <len> bytes matching <x> in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100185 * tree <root>. The caller is responsible for ensuring that <len> will not exceed
186 * the common parts between the tree's keys and <x>. In case of multiple matches,
187 * the leftmost node is returned. This means that this function can be used to
188 * lookup string keys by prefix if all keys in the tree are zero-terminated. If
189 * no match is found, NULL is returned. Returns first node if <len> is zero.
Willy Tarreauc2186022009-10-26 19:48:54 +0100190 */
191static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
192{
193 struct ebmb_node *node;
194 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200195 int pos, side;
196 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100197
198 troot = root->b[EB_LEFT];
199 if (unlikely(troot == NULL))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100200 goto ret_null;
Willy Tarreauc2186022009-10-26 19:48:54 +0100201
Willy Tarreau414c4b22011-01-04 13:21:06 +0100202 if (unlikely(len == 0))
203 goto walk_down;
204
Willy Tarreau3a932442010-05-09 19:29:23 +0200205 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100206 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200207 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100208 node = container_of(eb_untag(troot, EB_LEAF),
209 struct ebmb_node, node.branches);
Willy Tarreau853926a2020-06-16 11:10:53 +0200210 if (eb_memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100211 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200212 else
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100213 goto ret_node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100214 }
215 node = container_of(eb_untag(troot, EB_NODE),
216 struct ebmb_node, node.branches);
217
Willy Tarreau3a932442010-05-09 19:29:23 +0200218 node_bit = node->node.bit;
219 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100220 /* We have a dup tree now. Either it's for the same
221 * value, and we walk down left, or it's a different
222 * one and we don't have our key.
223 */
Willy Tarreau853926a2020-06-16 11:10:53 +0200224 if (eb_memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100225 goto ret_null;
226 else
227 goto walk_left;
Willy Tarreauc2186022009-10-26 19:48:54 +0100228 }
229
Willy Tarreau3a932442010-05-09 19:29:23 +0200230 /* OK, normal data node, let's walk down. We check if all full
231 * bytes are equal, and we start from the last one we did not
232 * completely check. We stop as soon as we reach the last byte,
233 * because we must decide to go left/right or abort.
234 */
235 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
236 if (node_bit < 0) {
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800237 /* This surprising construction gives better performance
Willy Tarreau3a932442010-05-09 19:29:23 +0200238 * because gcc does not try to reorder the loop. Tested to
239 * be fine with 2.95 to 4.2.
240 */
241 while (1) {
Willy Tarreau414c4b22011-01-04 13:21:06 +0100242 if (node->key[pos++] ^ *(unsigned char*)(x++))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100243 goto ret_null; /* more than one full byte is different */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100244 if (--len == 0)
245 goto walk_left; /* return first node if all bytes matched */
Willy Tarreau3a932442010-05-09 19:29:23 +0200246 node_bit += 8;
247 if (node_bit >= 0)
248 break;
249 }
250 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100251
Willy Tarreau3a932442010-05-09 19:29:23 +0200252 /* here we know that only the last byte differs, so node_bit < 8.
253 * We have 2 possibilities :
254 * - more than the last bit differs => return NULL
255 * - walk down on side = (x[pos] >> node_bit) & 1
256 */
257 side = *(unsigned char *)x >> node_bit;
258 if (((node->key[pos] >> node_bit) ^ side) > 1)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100259 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200260 side &= 1;
261 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100262 }
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100263 walk_left:
264 troot = node->node.branches.b[EB_LEFT];
265 walk_down:
266 while (eb_gettag(troot) != EB_LEAF)
267 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
268 node = container_of(eb_untag(troot, EB_LEAF),
269 struct ebmb_node, node.branches);
270 ret_node:
271 return node;
272 ret_null:
273 return NULL;
Willy Tarreauc2186022009-10-26 19:48:54 +0100274}
275
276/* Insert ebmb_node <new> into subtree starting at node root <root>.
277 * Only new->key needs be set with the key. The ebmb_node is returned.
278 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
Willy Tarreau414c4b22011-01-04 13:21:06 +0100279 * len is specified in bytes. It is absolutely mandatory that this length
280 * is the same for all keys in the tree. This function cannot be used to
281 * insert strings.
Willy Tarreauc2186022009-10-26 19:48:54 +0100282 */
283static forceinline struct ebmb_node *
284__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
285{
286 struct ebmb_node *old;
287 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200288 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200289 eb_troot_t *root_right;
Willy Tarreauc2186022009-10-26 19:48:54 +0100290 int diff;
291 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200292 eb_troot_t *new_left, *new_rght;
293 eb_troot_t *new_leaf;
294 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100295
296 side = EB_LEFT;
297 troot = root->b[EB_LEFT];
298 root_right = root->b[EB_RGHT];
299 if (unlikely(troot == NULL)) {
300 /* Tree is empty, insert the leaf part below the left branch */
301 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
302 new->node.leaf_p = eb_dotag(root, EB_LEFT);
303 new->node.node_p = NULL; /* node part unused */
304 return new;
305 }
306
Willy Tarreauc2186022009-10-26 19:48:54 +0100307 /* The tree descent is fairly easy :
308 * - first, check if we have reached a leaf node
309 * - second, check if we have gone too far
310 * - third, reiterate
311 * Everywhere, we use <new> for the node node we are inserting, <root>
312 * for the node we attach it to, and <old> for the node we are
313 * displacing below <new>. <troot> will always point to the future node
314 * (tagged with its type). <side> carries the side the node <new> is
315 * attached to below its parent, which is also where previous node
316 * was attached.
317 */
318
319 bit = 0;
320 while (1) {
321 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200322 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100323 old = container_of(eb_untag(troot, EB_LEAF),
324 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100325 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200326 up_ptr = &old->node.leaf_p;
327 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100328 }
329
330 /* OK we're walking down this link */
331 old = container_of(eb_untag(troot, EB_NODE),
332 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200333 old_node_bit = old->node.bit;
334
335 if (unlikely(old->node.bit < 0)) {
336 /* We're above a duplicate tree, so we must compare the whole value */
337 new->node.node_p = old->node.node_p;
338 up_ptr = &old->node.node_p;
339 check_bit_and_break:
340 bit = equal_bits(new->key, old->key, bit, len << 3);
341 break;
342 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100343
344 /* Stop going down when we don't have common bits anymore. We
345 * also stop in front of a duplicates tree because it means we
346 * have to insert above. Note: we can compare more bits than
347 * the current node's because as long as they are identical, we
348 * know we descend along the correct side.
349 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200350
351 bit = equal_bits(new->key, old->key, bit, old_node_bit);
352 if (unlikely(bit < old_node_bit)) {
353 /* The tree did not contain the key, so we insert <new> before the
354 * node <old>, and set ->bit to designate the lowest bit position in
355 * <new> which applies to ->branches.b[].
356 */
357 new->node.node_p = old->node.node_p;
358 up_ptr = &old->node.node_p;
359 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100360 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200361 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
362 * However, since we're going down around <old_node_bit>, we know it will be
363 * properly matched, so we can skip this bit.
364 */
365 bit = old_node_bit + 1;
366
367 /* walk down */
368 root = &old->node.branches;
369 side = old_node_bit & 7;
370 side ^= 7;
371 side = (new->key[old_node_bit >> 3] >> side) & 1;
372 troot = root->b[side];
373 }
374
375 new_left = eb_dotag(&new->node.branches, EB_LEFT);
376 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
377 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
378
Willy Tarreau3a932442010-05-09 19:29:23 +0200379 new->node.bit = bit;
Willy Tarreaua4a1cd12012-06-09 15:43:36 +0200380
381 /* Note: we can compare more bits than the current node's because as
382 * long as they are identical, we know we descend along the correct
383 * side. However we don't want to start to compare past the end.
384 */
385 diff = 0;
386 if (((unsigned)bit >> 3) < len)
387 diff = cmp_bits(new->key, old->key, bit);
388
Willy Tarreau3a932442010-05-09 19:29:23 +0200389 if (diff == 0) {
390 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100391
Willy Tarreau3a932442010-05-09 19:29:23 +0200392 if (likely(eb_gettag(root_right))) {
393 /* we refuse to duplicate this key if the tree is
394 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100395 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200396 return old;
397 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100398
Willy Tarreau3a932442010-05-09 19:29:23 +0200399 if (eb_gettag(troot) != EB_LEAF) {
400 /* there was already a dup tree below */
401 struct eb_node *ret;
402 ret = eb_insert_dup(&old->node, &new->node);
403 return container_of(ret, struct ebmb_node, node);
404 }
405 /* otherwise fall through */
406 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100407
Willy Tarreau3a932442010-05-09 19:29:23 +0200408 if (diff >= 0) {
409 new->node.branches.b[EB_LEFT] = troot;
410 new->node.branches.b[EB_RGHT] = new_leaf;
411 new->node.leaf_p = new_rght;
412 *up_ptr = new_left;
413 }
414 else if (diff < 0) {
415 new->node.branches.b[EB_LEFT] = new_leaf;
416 new->node.branches.b[EB_RGHT] = troot;
417 new->node.leaf_p = new_left;
418 *up_ptr = new_rght;
419 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100420
Willy Tarreau3a932442010-05-09 19:29:23 +0200421 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
422 * parent is already set to <new>, and the <root>'s branch is still in
423 * <side>. Update the root's leaf till we have it. Note that we can also
424 * find the side by checking the side of new->node.node_p.
425 */
426
427 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
428 return new;
429}
430
431
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800432/* Find the first occurrence of the longest prefix matching a key <x> in the
Willy Tarreau3a932442010-05-09 19:29:23 +0200433 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200434 * least as long as the keys in the tree. Note that this can be ensured by
435 * having a byte at the end of <x> which cannot be part of any prefix, typically
436 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200437 */
438static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
439{
440 struct ebmb_node *node;
441 eb_troot_t *troot, *cover;
442 int pos, side;
443 int node_bit;
444
445 troot = root->b[EB_LEFT];
446 if (unlikely(troot == NULL))
447 return NULL;
448
449 cover = NULL;
450 pos = 0;
451 while (1) {
452 if ((eb_gettag(troot) == EB_LEAF)) {
453 node = container_of(eb_untag(troot, EB_LEAF),
454 struct ebmb_node, node.branches);
455 if (check_bits(x - pos, node->key, pos, node->node.pfx))
456 goto not_found;
457
458 return node;
459 }
460 node = container_of(eb_untag(troot, EB_NODE),
461 struct ebmb_node, node.branches);
462
463 node_bit = node->node.bit;
464 if (node_bit < 0) {
465 /* We have a dup tree now. Either it's for the same
466 * value, and we walk down left, or it's a different
467 * one and we don't have our key.
468 */
469 if (check_bits(x - pos, node->key, pos, node->node.pfx))
470 goto not_found;
471
472 troot = node->node.branches.b[EB_LEFT];
473 while (eb_gettag(troot) != EB_LEAF)
474 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
475 node = container_of(eb_untag(troot, EB_LEAF),
476 struct ebmb_node, node.branches);
477 return node;
478 }
479
480 node_bit >>= 1; /* strip cover bit */
481 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
482 if (node_bit < 0) {
483 /* This uncommon construction gives better performance
484 * because gcc does not try to reorder the loop. Tested to
485 * be fine with 2.95 to 4.2.
486 */
487 while (1) {
488 x++; pos++;
489 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
490 goto not_found; /* more than one full byte is different */
491 node_bit += 8;
492 if (node_bit >= 0)
493 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100494 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200495 }
496
497 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
498 * We have 2 possibilities :
499 * - more than the last bit differs => data does not match
500 * - walk down on side = (x[pos] >> node_bit) & 1
501 */
502 side = *(unsigned char *)x >> node_bit;
503 if (((node->key[pos] >> node_bit) ^ side) > 1)
504 goto not_found;
505
506 if (!(node->node.bit & 1)) {
507 /* This is a cover node, let's keep a reference to it
508 * for later. The covering subtree is on the left, and
509 * the covered subtree is on the right, so we have to
510 * walk down right.
511 */
512 cover = node->node.branches.b[EB_LEFT];
513 troot = node->node.branches.b[EB_RGHT];
514 continue;
515 }
516 side &= 1;
517 troot = node->node.branches.b[side];
518 }
519
520 not_found:
Thayne McCombs8f0cc5c2021-01-07 21:35:52 -0700521 /* Walk down last cover tree if it exists. It does not matter if cover is NULL */
Willy Tarreau3a932442010-05-09 19:29:23 +0200522 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
523}
524
525
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800526/* Find the first occurrence of a prefix matching a key <x> of <pfx> BITS in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100527 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200528 * least as long as the keys in the tree. Note that this can be ensured by
529 * having a byte at the end of <x> which cannot be part of any prefix, typically
530 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200531 */
532static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
533{
534 struct ebmb_node *node;
535 eb_troot_t *troot;
536 int pos, side;
537 int node_bit;
538
539 troot = root->b[EB_LEFT];
540 if (unlikely(troot == NULL))
541 return NULL;
542
543 pos = 0;
544 while (1) {
545 if ((eb_gettag(troot) == EB_LEAF)) {
546 node = container_of(eb_untag(troot, EB_LEAF),
547 struct ebmb_node, node.branches);
548 if (node->node.pfx != pfx)
549 return NULL;
550 if (check_bits(x - pos, node->key, pos, node->node.pfx))
551 return NULL;
552 return node;
553 }
554 node = container_of(eb_untag(troot, EB_NODE),
555 struct ebmb_node, node.branches);
556
557 node_bit = node->node.bit;
558 if (node_bit < 0) {
559 /* We have a dup tree now. Either it's for the same
560 * value, and we walk down left, or it's a different
561 * one and we don't have our key.
562 */
563 if (node->node.pfx != pfx)
564 return NULL;
565 if (check_bits(x - pos, node->key, pos, node->node.pfx))
566 return NULL;
567
568 troot = node->node.branches.b[EB_LEFT];
569 while (eb_gettag(troot) != EB_LEAF)
570 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
571 node = container_of(eb_untag(troot, EB_LEAF),
572 struct ebmb_node, node.branches);
573 return node;
574 }
575
576 node_bit >>= 1; /* strip cover bit */
577 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
578 if (node_bit < 0) {
579 /* This uncommon construction gives better performance
580 * because gcc does not try to reorder the loop. Tested to
581 * be fine with 2.95 to 4.2.
582 */
583 while (1) {
584 x++; pos++;
585 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
586 return NULL; /* more than one full byte is different */
587 node_bit += 8;
588 if (node_bit >= 0)
589 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100590 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200591 }
592
593 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
594 * We have 2 possibilities :
595 * - more than the last bit differs => data does not match
596 * - walk down on side = (x[pos] >> node_bit) & 1
597 */
598 side = *(unsigned char *)x >> node_bit;
599 if (((node->key[pos] >> node_bit) ^ side) > 1)
600 return NULL;
601
602 if (!(node->node.bit & 1)) {
603 /* This is a cover node, it may be the entry we're
604 * looking for. We already know that it matches all the
605 * bits, let's compare prefixes and descend the cover
606 * subtree if they match.
607 */
Willy Tarreau22c0a932011-07-25 12:22:44 +0200608 if ((unsigned short)node->node.bit >> 1 == pfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200609 troot = node->node.branches.b[EB_LEFT];
610 else
611 troot = node->node.branches.b[EB_RGHT];
612 continue;
613 }
614 side &= 1;
615 troot = node->node.branches.b[side];
616 }
617}
618
619
620/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
621 * Only new->key and new->pfx need be set with the key and its prefix length.
Ilya Shipitsinc6ecf562021-08-07 14:41:56 +0500622 * Note that bits between <pfx> and <len> are theoretically ignored and should be
Willy Tarreau3a932442010-05-09 19:29:23 +0200623 * zero, as it is not certain yet that they will always be ignored everywhere
624 * (eg in bit compare functions).
625 * The ebmb_node is returned.
626 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
627 * len is specified in bytes.
628 */
629static forceinline struct ebmb_node *
630__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
631{
632 struct ebmb_node *old;
633 unsigned int side;
634 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200635 eb_troot_t *root_right;
Willy Tarreau3a932442010-05-09 19:29:23 +0200636 int diff;
637 int bit;
638 eb_troot_t *new_left, *new_rght;
639 eb_troot_t *new_leaf;
640 int old_node_bit;
641
642 side = EB_LEFT;
643 troot = root->b[EB_LEFT];
644 root_right = root->b[EB_RGHT];
645 if (unlikely(troot == NULL)) {
646 /* Tree is empty, insert the leaf part below the left branch */
647 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
648 new->node.leaf_p = eb_dotag(root, EB_LEFT);
649 new->node.node_p = NULL; /* node part unused */
650 return new;
651 }
652
653 len <<= 3;
654 if (len > new->node.pfx)
655 len = new->node.pfx;
656
657 /* The tree descent is fairly easy :
658 * - first, check if we have reached a leaf node
659 * - second, check if we have gone too far
660 * - third, reiterate
661 * Everywhere, we use <new> for the node node we are inserting, <root>
662 * for the node we attach it to, and <old> for the node we are
663 * displacing below <new>. <troot> will always point to the future node
664 * (tagged with its type). <side> carries the side the node <new> is
665 * attached to below its parent, which is also where previous node
666 * was attached.
667 */
668
669 bit = 0;
670 while (1) {
671 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
672 /* Insert above a leaf. Note that this leaf could very
673 * well be part of a cover node.
674 */
675 old = container_of(eb_untag(troot, EB_LEAF),
676 struct ebmb_node, node.branches);
677 new->node.node_p = old->node.leaf_p;
678 up_ptr = &old->node.leaf_p;
679 goto check_bit_and_break;
680 }
681
682 /* OK we're walking down this link */
683 old = container_of(eb_untag(troot, EB_NODE),
684 struct ebmb_node, node.branches);
685 old_node_bit = old->node.bit;
686 /* Note that old_node_bit can be :
687 * < 0 : dup tree
688 * = 2N : cover node for N bits
689 * = 2N+1 : normal node at N bits
690 */
691
692 if (unlikely(old_node_bit < 0)) {
693 /* We're above a duplicate tree, so we must compare the whole value */
694 new->node.node_p = old->node.node_p;
695 up_ptr = &old->node.node_p;
696 check_bit_and_break:
697 /* No need to compare everything if the leaves are shorter than the new one. */
698 if (len > old->node.pfx)
699 len = old->node.pfx;
700 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100701 break;
702 }
703
Willy Tarreau3a932442010-05-09 19:29:23 +0200704 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
705
706 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
707 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200708
709 /* we must always check that our prefix is larger than the nodes
710 * we visit, otherwise we have to stop going down. The following
711 * test is able to stop before both normal and cover nodes.
712 */
713 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
714 /* insert cover node here on the left */
715 new->node.node_p = old->node.node_p;
716 up_ptr = &old->node.node_p;
717 new->node.bit = new->node.pfx << 1;
718 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200719 goto insert_above;
720 }
721
722 if (unlikely(bit < old_node_bit)) {
723 /* The tree did not contain the key, so we insert <new> before the
724 * node <old>, and set ->bit to designate the lowest bit position in
725 * <new> which applies to ->branches.b[]. We know that the bit is not
726 * greater than the prefix length thanks to the test above.
727 */
728 new->node.node_p = old->node.node_p;
729 up_ptr = &old->node.node_p;
730 new->node.bit = bit;
731 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200732 goto insert_above;
733 }
734
735 if (!(old_node_bit & 1)) {
736 /* if we encounter a cover node with our exact prefix length, it's
737 * necessarily the same value, so we insert there as a duplicate on
738 * the left. For that, we go down on the left and the leaf detection
739 * code will finish the job.
740 */
741 if ((new->node.pfx << 1) == old_node_bit) {
742 root = &old->node.branches;
743 side = EB_LEFT;
744 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200745 continue;
746 }
747
748 /* cover nodes are always walked through on the right */
749 side = EB_RGHT;
750 bit = old_node_bit >> 1; /* recheck that bit */
751 root = &old->node.branches;
752 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200753 continue;
754 }
755
756 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
757 * However, since we're going down around <old_node_bit>, we know it will be
758 * properly matched, so we can skip this bit.
759 */
760 old_node_bit >>= 1;
761 bit = old_node_bit + 1;
762
Willy Tarreauc2186022009-10-26 19:48:54 +0100763 /* walk down */
764 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200765 side = old_node_bit & 7;
766 side ^= 7;
767 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100768 troot = root->b[side];
769 }
770
Willy Tarreau3a932442010-05-09 19:29:23 +0200771 /* Right here, we have 4 possibilities :
772 * - the tree does not contain any leaf matching the
773 * key, and we have new->key < old->key. We insert
774 * new above old, on the left ;
775 *
776 * - the tree does not contain any leaf matching the
777 * key, and we have new->key > old->key. We insert
778 * new above old, on the right ;
779 *
780 * - the tree does contain the key with the same prefix
781 * length. We add the new key next to it as a first
782 * duplicate (since it was alone).
783 *
784 * The last two cases can easily be partially merged.
785 *
786 * - the tree contains a leaf matching the key, we have
787 * to insert above it as a cover node. The leaf with
788 * the shortest prefix becomes the left subtree and
789 * the leaf with the longest prefix becomes the right
790 * one. The cover node gets the min of both prefixes
791 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100792 */
793
Willy Tarreau3a932442010-05-09 19:29:23 +0200794 /* first we want to ensure that we compare the correct bit, which means
795 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100796 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200797 if (bit > new->node.pfx)
798 bit = new->node.pfx;
799 if (bit > old->node.pfx)
800 bit = old->node.pfx;
801
Willy Tarreau3a932442010-05-09 19:29:23 +0200802 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
803
804 /* if one prefix is included in the second one, we don't compare bits
805 * because they won't necessarily match, we just proceed with a cover
806 * node insertion.
807 */
808 diff = 0;
809 if (bit < old->node.pfx && bit < new->node.pfx)
810 diff = cmp_bits(new->key, old->key, bit);
811
812 if (diff == 0) {
813 /* Both keys match. Either it's a duplicate entry or we have to
814 * put the shortest prefix left and the largest one right below
815 * a new cover node. By default, diff==0 means we'll be inserted
816 * on the right.
817 */
818 new->node.bit--; /* anticipate cover node insertion */
819 if (new->node.pfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200820 new->node.bit = -1; /* mark as new dup tree, just in case */
821
822 if (unlikely(eb_gettag(root_right))) {
823 /* we refuse to duplicate this key if the tree is
824 * tagged as containing only unique keys.
825 */
826 return old;
827 }
828
829 if (eb_gettag(troot) != EB_LEAF) {
830 /* there was already a dup tree below */
831 struct eb_node *ret;
832 ret = eb_insert_dup(&old->node, &new->node);
833 return container_of(ret, struct ebmb_node, node);
834 }
835 /* otherwise fall through to insert first duplicate */
836 }
837 /* otherwise we just rely on the tests below to select the right side */
838 else if (new->node.pfx < old->node.pfx)
839 diff = -1; /* force insertion to left side */
840 }
841
842 insert_above:
843 new_left = eb_dotag(&new->node.branches, EB_LEFT);
844 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
845 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
846
847 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200848 new->node.branches.b[EB_LEFT] = troot;
849 new->node.branches.b[EB_RGHT] = new_leaf;
850 new->node.leaf_p = new_rght;
851 *up_ptr = new_left;
852 }
853 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200854 new->node.branches.b[EB_LEFT] = new_leaf;
855 new->node.branches.b[EB_RGHT] = troot;
856 new->node.leaf_p = new_left;
857 *up_ptr = new_rght;
858 }
859
Willy Tarreauc2186022009-10-26 19:48:54 +0100860 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
861 return new;
862}
863
Willy Tarreau3a932442010-05-09 19:29:23 +0200864
865
Willy Tarreauead63a02009-11-02 14:41:23 +0100866#endif /* _EBMBTREE_H */
867