blob: ac7de5186c34ce60d102113b89ca07b46ae8dcad [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreauf3bfede2011-07-25 11:38:17 +02003 * Version 6.0.6
Willy Tarreau414c4b22011-01-04 13:21:06 +01004 * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +02006 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation, version 2.1
9 * exclusively.
Willy Tarreauc2186022009-10-26 19:48:54 +010010 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020011 * This library is distributed in the hope that it will be useful,
Willy Tarreauc2186022009-10-26 19:48:54 +010012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Willy Tarreauf3bfede2011-07-25 11:38:17 +020013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
Willy Tarreauc2186022009-10-26 19:48:54 +010015 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020016 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Willy Tarreauc2186022009-10-26 19:48:54 +010019 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
Willy Tarreauc2186022009-10-26 19:48:54 +010030/*
31 * Exported functions and macros.
32 * Many of them are always inlined because they are extremely small, and
33 * are generally called at most once or twice in a program.
34 */
35
36/* Return leftmost node in the tree, or NULL if none */
37static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
38{
39 return ebmb_entry(eb_first(root), struct ebmb_node, node);
40}
41
42/* Return rightmost node in the tree, or NULL if none */
43static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
44{
45 return ebmb_entry(eb_last(root), struct ebmb_node, node);
46}
47
48/* Return next node in the tree, or NULL if none */
49static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
50{
51 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
52}
53
54/* Return previous node in the tree, or NULL if none */
55static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
56{
57 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
58}
59
Willy Tarreau2b570202013-05-07 15:58:28 +020060/* Return next leaf node within a duplicate sub-tree, or NULL if none. */
61static inline struct ebmb_node *ebmb_next_dup(struct ebmb_node *ebmb)
62{
63 return ebmb_entry(eb_next_dup(&ebmb->node), struct ebmb_node, node);
64}
65
66/* Return previous leaf node within a duplicate sub-tree, or NULL if none. */
67static inline struct ebmb_node *ebmb_prev_dup(struct ebmb_node *ebmb)
68{
69 return ebmb_entry(eb_prev_dup(&ebmb->node), struct ebmb_node, node);
70}
71
Willy Tarreauc2186022009-10-26 19:48:54 +010072/* Return next node in the tree, skipping duplicates, or NULL if none */
73static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
74{
75 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
76}
77
78/* Return previous node in the tree, skipping duplicates, or NULL if none */
79static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
80{
81 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
82}
83
84/* Delete node from the tree if it was linked in. Mark the node unused. Note
85 * that this function relies on a non-inlined generic function: eb_delete.
86 */
87static forceinline void ebmb_delete(struct ebmb_node *ebmb)
88{
89 eb_delete(&ebmb->node);
90}
91
92/* The following functions are not inlined by default. They are declared
93 * in ebmbtree.c, which simply relies on their inline version.
94 */
Willy Tarreau03e78532020-02-25 07:38:05 +010095struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
96struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
97struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
98struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
99struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100100
101/* The following functions are less likely to be used directly, because their
102 * code is larger. The non-inlined version is preferred.
103 */
104
105/* Delete node from the tree if it was linked in. Mark the node unused. */
106static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
107{
108 __eb_delete(&ebmb->node);
109}
110
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800111/* Find the first occurrence of a key of a least <len> bytes matching <x> in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100112 * tree <root>. The caller is responsible for ensuring that <len> will not exceed
113 * the common parts between the tree's keys and <x>. In case of multiple matches,
114 * the leftmost node is returned. This means that this function can be used to
115 * lookup string keys by prefix if all keys in the tree are zero-terminated. If
116 * no match is found, NULL is returned. Returns first node if <len> is zero.
Willy Tarreauc2186022009-10-26 19:48:54 +0100117 */
118static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
119{
120 struct ebmb_node *node;
121 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200122 int pos, side;
123 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100124
125 troot = root->b[EB_LEFT];
126 if (unlikely(troot == NULL))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100127 goto ret_null;
Willy Tarreauc2186022009-10-26 19:48:54 +0100128
Willy Tarreau414c4b22011-01-04 13:21:06 +0100129 if (unlikely(len == 0))
130 goto walk_down;
131
Willy Tarreau3a932442010-05-09 19:29:23 +0200132 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100133 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200134 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100135 node = container_of(eb_untag(troot, EB_LEAF),
136 struct ebmb_node, node.branches);
Willy Tarreau853926a2020-06-16 11:10:53 +0200137 if (eb_memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100138 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200139 else
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100140 goto ret_node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100141 }
142 node = container_of(eb_untag(troot, EB_NODE),
143 struct ebmb_node, node.branches);
144
Willy Tarreau3a932442010-05-09 19:29:23 +0200145 node_bit = node->node.bit;
146 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100147 /* We have a dup tree now. Either it's for the same
148 * value, and we walk down left, or it's a different
149 * one and we don't have our key.
150 */
Willy Tarreau853926a2020-06-16 11:10:53 +0200151 if (eb_memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100152 goto ret_null;
153 else
154 goto walk_left;
Willy Tarreauc2186022009-10-26 19:48:54 +0100155 }
156
Willy Tarreau3a932442010-05-09 19:29:23 +0200157 /* OK, normal data node, let's walk down. We check if all full
158 * bytes are equal, and we start from the last one we did not
159 * completely check. We stop as soon as we reach the last byte,
160 * because we must decide to go left/right or abort.
161 */
162 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
163 if (node_bit < 0) {
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800164 /* This surprising construction gives better performance
Willy Tarreau3a932442010-05-09 19:29:23 +0200165 * because gcc does not try to reorder the loop. Tested to
166 * be fine with 2.95 to 4.2.
167 */
168 while (1) {
Willy Tarreau414c4b22011-01-04 13:21:06 +0100169 if (node->key[pos++] ^ *(unsigned char*)(x++))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100170 goto ret_null; /* more than one full byte is different */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100171 if (--len == 0)
172 goto walk_left; /* return first node if all bytes matched */
Willy Tarreau3a932442010-05-09 19:29:23 +0200173 node_bit += 8;
174 if (node_bit >= 0)
175 break;
176 }
177 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100178
Willy Tarreau3a932442010-05-09 19:29:23 +0200179 /* here we know that only the last byte differs, so node_bit < 8.
180 * We have 2 possibilities :
181 * - more than the last bit differs => return NULL
182 * - walk down on side = (x[pos] >> node_bit) & 1
183 */
184 side = *(unsigned char *)x >> node_bit;
185 if (((node->key[pos] >> node_bit) ^ side) > 1)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100186 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200187 side &= 1;
188 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100189 }
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100190 walk_left:
191 troot = node->node.branches.b[EB_LEFT];
192 walk_down:
193 while (eb_gettag(troot) != EB_LEAF)
194 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
195 node = container_of(eb_untag(troot, EB_LEAF),
196 struct ebmb_node, node.branches);
197 ret_node:
198 return node;
199 ret_null:
200 return NULL;
Willy Tarreauc2186022009-10-26 19:48:54 +0100201}
202
203/* Insert ebmb_node <new> into subtree starting at node root <root>.
204 * Only new->key needs be set with the key. The ebmb_node is returned.
205 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
Willy Tarreau414c4b22011-01-04 13:21:06 +0100206 * len is specified in bytes. It is absolutely mandatory that this length
207 * is the same for all keys in the tree. This function cannot be used to
208 * insert strings.
Willy Tarreauc2186022009-10-26 19:48:54 +0100209 */
210static forceinline struct ebmb_node *
211__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
212{
213 struct ebmb_node *old;
214 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200215 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200216 eb_troot_t *root_right;
Willy Tarreauc2186022009-10-26 19:48:54 +0100217 int diff;
218 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200219 eb_troot_t *new_left, *new_rght;
220 eb_troot_t *new_leaf;
221 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100222
223 side = EB_LEFT;
224 troot = root->b[EB_LEFT];
225 root_right = root->b[EB_RGHT];
226 if (unlikely(troot == NULL)) {
227 /* Tree is empty, insert the leaf part below the left branch */
228 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
229 new->node.leaf_p = eb_dotag(root, EB_LEFT);
230 new->node.node_p = NULL; /* node part unused */
231 return new;
232 }
233
Willy Tarreauc2186022009-10-26 19:48:54 +0100234 /* The tree descent is fairly easy :
235 * - first, check if we have reached a leaf node
236 * - second, check if we have gone too far
237 * - third, reiterate
238 * Everywhere, we use <new> for the node node we are inserting, <root>
239 * for the node we attach it to, and <old> for the node we are
240 * displacing below <new>. <troot> will always point to the future node
241 * (tagged with its type). <side> carries the side the node <new> is
242 * attached to below its parent, which is also where previous node
243 * was attached.
244 */
245
246 bit = 0;
247 while (1) {
248 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200249 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100250 old = container_of(eb_untag(troot, EB_LEAF),
251 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100252 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200253 up_ptr = &old->node.leaf_p;
254 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100255 }
256
257 /* OK we're walking down this link */
258 old = container_of(eb_untag(troot, EB_NODE),
259 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200260 old_node_bit = old->node.bit;
261
262 if (unlikely(old->node.bit < 0)) {
263 /* We're above a duplicate tree, so we must compare the whole value */
264 new->node.node_p = old->node.node_p;
265 up_ptr = &old->node.node_p;
266 check_bit_and_break:
267 bit = equal_bits(new->key, old->key, bit, len << 3);
268 break;
269 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100270
271 /* Stop going down when we don't have common bits anymore. We
272 * also stop in front of a duplicates tree because it means we
273 * have to insert above. Note: we can compare more bits than
274 * the current node's because as long as they are identical, we
275 * know we descend along the correct side.
276 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200277
278 bit = equal_bits(new->key, old->key, bit, old_node_bit);
279 if (unlikely(bit < old_node_bit)) {
280 /* The tree did not contain the key, so we insert <new> before the
281 * node <old>, and set ->bit to designate the lowest bit position in
282 * <new> which applies to ->branches.b[].
283 */
284 new->node.node_p = old->node.node_p;
285 up_ptr = &old->node.node_p;
286 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100287 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200288 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
289 * However, since we're going down around <old_node_bit>, we know it will be
290 * properly matched, so we can skip this bit.
291 */
292 bit = old_node_bit + 1;
293
294 /* walk down */
295 root = &old->node.branches;
296 side = old_node_bit & 7;
297 side ^= 7;
298 side = (new->key[old_node_bit >> 3] >> side) & 1;
299 troot = root->b[side];
300 }
301
302 new_left = eb_dotag(&new->node.branches, EB_LEFT);
303 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
304 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
305
Willy Tarreau3a932442010-05-09 19:29:23 +0200306 new->node.bit = bit;
Willy Tarreaua4a1cd12012-06-09 15:43:36 +0200307
308 /* Note: we can compare more bits than the current node's because as
309 * long as they are identical, we know we descend along the correct
310 * side. However we don't want to start to compare past the end.
311 */
312 diff = 0;
313 if (((unsigned)bit >> 3) < len)
314 diff = cmp_bits(new->key, old->key, bit);
315
Willy Tarreau3a932442010-05-09 19:29:23 +0200316 if (diff == 0) {
317 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100318
Willy Tarreau3a932442010-05-09 19:29:23 +0200319 if (likely(eb_gettag(root_right))) {
320 /* we refuse to duplicate this key if the tree is
321 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100322 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200323 return old;
324 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100325
Willy Tarreau3a932442010-05-09 19:29:23 +0200326 if (eb_gettag(troot) != EB_LEAF) {
327 /* there was already a dup tree below */
328 struct eb_node *ret;
329 ret = eb_insert_dup(&old->node, &new->node);
330 return container_of(ret, struct ebmb_node, node);
331 }
332 /* otherwise fall through */
333 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100334
Willy Tarreau3a932442010-05-09 19:29:23 +0200335 if (diff >= 0) {
336 new->node.branches.b[EB_LEFT] = troot;
337 new->node.branches.b[EB_RGHT] = new_leaf;
338 new->node.leaf_p = new_rght;
339 *up_ptr = new_left;
340 }
Tim Düsterhusa8bfb4d2021-09-11 17:02:33 +0200341 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200342 new->node.branches.b[EB_LEFT] = new_leaf;
343 new->node.branches.b[EB_RGHT] = troot;
344 new->node.leaf_p = new_left;
345 *up_ptr = new_rght;
346 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100347
Willy Tarreau3a932442010-05-09 19:29:23 +0200348 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
349 * parent is already set to <new>, and the <root>'s branch is still in
350 * <side>. Update the root's leaf till we have it. Note that we can also
351 * find the side by checking the side of new->node.node_p.
352 */
353
354 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
355 return new;
356}
357
358
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800359/* Find the first occurrence of the longest prefix matching a key <x> in the
Willy Tarreau3a932442010-05-09 19:29:23 +0200360 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200361 * least as long as the keys in the tree. Note that this can be ensured by
362 * having a byte at the end of <x> which cannot be part of any prefix, typically
363 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200364 */
365static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
366{
367 struct ebmb_node *node;
368 eb_troot_t *troot, *cover;
369 int pos, side;
370 int node_bit;
371
372 troot = root->b[EB_LEFT];
373 if (unlikely(troot == NULL))
374 return NULL;
375
376 cover = NULL;
377 pos = 0;
378 while (1) {
379 if ((eb_gettag(troot) == EB_LEAF)) {
380 node = container_of(eb_untag(troot, EB_LEAF),
381 struct ebmb_node, node.branches);
382 if (check_bits(x - pos, node->key, pos, node->node.pfx))
383 goto not_found;
384
385 return node;
386 }
387 node = container_of(eb_untag(troot, EB_NODE),
388 struct ebmb_node, node.branches);
389
390 node_bit = node->node.bit;
391 if (node_bit < 0) {
392 /* We have a dup tree now. Either it's for the same
393 * value, and we walk down left, or it's a different
394 * one and we don't have our key.
395 */
396 if (check_bits(x - pos, node->key, pos, node->node.pfx))
397 goto not_found;
398
399 troot = node->node.branches.b[EB_LEFT];
400 while (eb_gettag(troot) != EB_LEAF)
401 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
402 node = container_of(eb_untag(troot, EB_LEAF),
403 struct ebmb_node, node.branches);
404 return node;
405 }
406
407 node_bit >>= 1; /* strip cover bit */
408 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
409 if (node_bit < 0) {
410 /* This uncommon construction gives better performance
411 * because gcc does not try to reorder the loop. Tested to
412 * be fine with 2.95 to 4.2.
413 */
414 while (1) {
415 x++; pos++;
416 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
417 goto not_found; /* more than one full byte is different */
418 node_bit += 8;
419 if (node_bit >= 0)
420 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100421 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200422 }
423
424 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
425 * We have 2 possibilities :
426 * - more than the last bit differs => data does not match
427 * - walk down on side = (x[pos] >> node_bit) & 1
428 */
429 side = *(unsigned char *)x >> node_bit;
430 if (((node->key[pos] >> node_bit) ^ side) > 1)
431 goto not_found;
432
433 if (!(node->node.bit & 1)) {
434 /* This is a cover node, let's keep a reference to it
435 * for later. The covering subtree is on the left, and
436 * the covered subtree is on the right, so we have to
437 * walk down right.
438 */
439 cover = node->node.branches.b[EB_LEFT];
440 troot = node->node.branches.b[EB_RGHT];
441 continue;
442 }
443 side &= 1;
444 troot = node->node.branches.b[side];
445 }
446
447 not_found:
Thayne McCombs8f0cc5c2021-01-07 21:35:52 -0700448 /* Walk down last cover tree if it exists. It does not matter if cover is NULL */
Willy Tarreau3a932442010-05-09 19:29:23 +0200449 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
450}
451
452
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800453/* Find the first occurrence of a prefix matching a key <x> of <pfx> BITS in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100454 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200455 * least as long as the keys in the tree. Note that this can be ensured by
456 * having a byte at the end of <x> which cannot be part of any prefix, typically
457 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200458 */
459static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
460{
461 struct ebmb_node *node;
462 eb_troot_t *troot;
463 int pos, side;
464 int node_bit;
465
466 troot = root->b[EB_LEFT];
467 if (unlikely(troot == NULL))
468 return NULL;
469
470 pos = 0;
471 while (1) {
472 if ((eb_gettag(troot) == EB_LEAF)) {
473 node = container_of(eb_untag(troot, EB_LEAF),
474 struct ebmb_node, node.branches);
475 if (node->node.pfx != pfx)
476 return NULL;
477 if (check_bits(x - pos, node->key, pos, node->node.pfx))
478 return NULL;
479 return node;
480 }
481 node = container_of(eb_untag(troot, EB_NODE),
482 struct ebmb_node, node.branches);
483
484 node_bit = node->node.bit;
485 if (node_bit < 0) {
486 /* We have a dup tree now. Either it's for the same
487 * value, and we walk down left, or it's a different
488 * one and we don't have our key.
489 */
490 if (node->node.pfx != pfx)
491 return NULL;
492 if (check_bits(x - pos, node->key, pos, node->node.pfx))
493 return NULL;
494
495 troot = node->node.branches.b[EB_LEFT];
496 while (eb_gettag(troot) != EB_LEAF)
497 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
498 node = container_of(eb_untag(troot, EB_LEAF),
499 struct ebmb_node, node.branches);
500 return node;
501 }
502
503 node_bit >>= 1; /* strip cover bit */
504 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
505 if (node_bit < 0) {
506 /* This uncommon construction gives better performance
507 * because gcc does not try to reorder the loop. Tested to
508 * be fine with 2.95 to 4.2.
509 */
510 while (1) {
511 x++; pos++;
512 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
513 return NULL; /* more than one full byte is different */
514 node_bit += 8;
515 if (node_bit >= 0)
516 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100517 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200518 }
519
520 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
521 * We have 2 possibilities :
522 * - more than the last bit differs => data does not match
523 * - walk down on side = (x[pos] >> node_bit) & 1
524 */
525 side = *(unsigned char *)x >> node_bit;
526 if (((node->key[pos] >> node_bit) ^ side) > 1)
527 return NULL;
528
529 if (!(node->node.bit & 1)) {
530 /* This is a cover node, it may be the entry we're
531 * looking for. We already know that it matches all the
532 * bits, let's compare prefixes and descend the cover
533 * subtree if they match.
534 */
Willy Tarreau22c0a932011-07-25 12:22:44 +0200535 if ((unsigned short)node->node.bit >> 1 == pfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200536 troot = node->node.branches.b[EB_LEFT];
537 else
538 troot = node->node.branches.b[EB_RGHT];
539 continue;
540 }
541 side &= 1;
542 troot = node->node.branches.b[side];
543 }
544}
545
546
547/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
548 * Only new->key and new->pfx need be set with the key and its prefix length.
Ilya Shipitsin01881082021-08-07 14:41:56 +0500549 * Note that bits between <pfx> and <len> are theoretically ignored and should be
Willy Tarreau3a932442010-05-09 19:29:23 +0200550 * zero, as it is not certain yet that they will always be ignored everywhere
551 * (eg in bit compare functions).
552 * The ebmb_node is returned.
553 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
554 * len is specified in bytes.
555 */
556static forceinline struct ebmb_node *
557__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
558{
559 struct ebmb_node *old;
560 unsigned int side;
561 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200562 eb_troot_t *root_right;
Willy Tarreau3a932442010-05-09 19:29:23 +0200563 int diff;
564 int bit;
565 eb_troot_t *new_left, *new_rght;
566 eb_troot_t *new_leaf;
567 int old_node_bit;
568
569 side = EB_LEFT;
570 troot = root->b[EB_LEFT];
571 root_right = root->b[EB_RGHT];
572 if (unlikely(troot == NULL)) {
573 /* Tree is empty, insert the leaf part below the left branch */
574 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
575 new->node.leaf_p = eb_dotag(root, EB_LEFT);
576 new->node.node_p = NULL; /* node part unused */
577 return new;
578 }
579
580 len <<= 3;
581 if (len > new->node.pfx)
582 len = new->node.pfx;
583
584 /* The tree descent is fairly easy :
585 * - first, check if we have reached a leaf node
586 * - second, check if we have gone too far
587 * - third, reiterate
588 * Everywhere, we use <new> for the node node we are inserting, <root>
589 * for the node we attach it to, and <old> for the node we are
590 * displacing below <new>. <troot> will always point to the future node
591 * (tagged with its type). <side> carries the side the node <new> is
592 * attached to below its parent, which is also where previous node
593 * was attached.
594 */
595
596 bit = 0;
597 while (1) {
598 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
599 /* Insert above a leaf. Note that this leaf could very
600 * well be part of a cover node.
601 */
602 old = container_of(eb_untag(troot, EB_LEAF),
603 struct ebmb_node, node.branches);
604 new->node.node_p = old->node.leaf_p;
605 up_ptr = &old->node.leaf_p;
606 goto check_bit_and_break;
607 }
608
609 /* OK we're walking down this link */
610 old = container_of(eb_untag(troot, EB_NODE),
611 struct ebmb_node, node.branches);
612 old_node_bit = old->node.bit;
613 /* Note that old_node_bit can be :
614 * < 0 : dup tree
615 * = 2N : cover node for N bits
616 * = 2N+1 : normal node at N bits
617 */
618
619 if (unlikely(old_node_bit < 0)) {
620 /* We're above a duplicate tree, so we must compare the whole value */
621 new->node.node_p = old->node.node_p;
622 up_ptr = &old->node.node_p;
623 check_bit_and_break:
624 /* No need to compare everything if the leaves are shorter than the new one. */
625 if (len > old->node.pfx)
626 len = old->node.pfx;
627 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100628 break;
629 }
630
Willy Tarreau3a932442010-05-09 19:29:23 +0200631 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
632
633 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
634 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200635
636 /* we must always check that our prefix is larger than the nodes
637 * we visit, otherwise we have to stop going down. The following
638 * test is able to stop before both normal and cover nodes.
639 */
640 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
641 /* insert cover node here on the left */
642 new->node.node_p = old->node.node_p;
643 up_ptr = &old->node.node_p;
644 new->node.bit = new->node.pfx << 1;
645 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200646 goto insert_above;
647 }
648
649 if (unlikely(bit < old_node_bit)) {
650 /* The tree did not contain the key, so we insert <new> before the
651 * node <old>, and set ->bit to designate the lowest bit position in
652 * <new> which applies to ->branches.b[]. We know that the bit is not
653 * greater than the prefix length thanks to the test above.
654 */
655 new->node.node_p = old->node.node_p;
656 up_ptr = &old->node.node_p;
657 new->node.bit = bit;
658 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200659 goto insert_above;
660 }
661
662 if (!(old_node_bit & 1)) {
663 /* if we encounter a cover node with our exact prefix length, it's
664 * necessarily the same value, so we insert there as a duplicate on
665 * the left. For that, we go down on the left and the leaf detection
666 * code will finish the job.
667 */
668 if ((new->node.pfx << 1) == old_node_bit) {
669 root = &old->node.branches;
670 side = EB_LEFT;
671 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200672 continue;
673 }
674
675 /* cover nodes are always walked through on the right */
676 side = EB_RGHT;
677 bit = old_node_bit >> 1; /* recheck that bit */
678 root = &old->node.branches;
679 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200680 continue;
681 }
682
683 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
684 * However, since we're going down around <old_node_bit>, we know it will be
685 * properly matched, so we can skip this bit.
686 */
687 old_node_bit >>= 1;
688 bit = old_node_bit + 1;
689
Willy Tarreauc2186022009-10-26 19:48:54 +0100690 /* walk down */
691 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200692 side = old_node_bit & 7;
693 side ^= 7;
694 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100695 troot = root->b[side];
696 }
697
Willy Tarreau3a932442010-05-09 19:29:23 +0200698 /* Right here, we have 4 possibilities :
699 * - the tree does not contain any leaf matching the
700 * key, and we have new->key < old->key. We insert
701 * new above old, on the left ;
702 *
703 * - the tree does not contain any leaf matching the
704 * key, and we have new->key > old->key. We insert
705 * new above old, on the right ;
706 *
707 * - the tree does contain the key with the same prefix
708 * length. We add the new key next to it as a first
709 * duplicate (since it was alone).
710 *
711 * The last two cases can easily be partially merged.
712 *
713 * - the tree contains a leaf matching the key, we have
714 * to insert above it as a cover node. The leaf with
715 * the shortest prefix becomes the left subtree and
716 * the leaf with the longest prefix becomes the right
717 * one. The cover node gets the min of both prefixes
718 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100719 */
720
Willy Tarreau3a932442010-05-09 19:29:23 +0200721 /* first we want to ensure that we compare the correct bit, which means
722 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100723 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200724 if (bit > new->node.pfx)
725 bit = new->node.pfx;
726 if (bit > old->node.pfx)
727 bit = old->node.pfx;
728
Willy Tarreau3a932442010-05-09 19:29:23 +0200729 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
730
731 /* if one prefix is included in the second one, we don't compare bits
732 * because they won't necessarily match, we just proceed with a cover
733 * node insertion.
734 */
735 diff = 0;
736 if (bit < old->node.pfx && bit < new->node.pfx)
737 diff = cmp_bits(new->key, old->key, bit);
738
739 if (diff == 0) {
740 /* Both keys match. Either it's a duplicate entry or we have to
741 * put the shortest prefix left and the largest one right below
742 * a new cover node. By default, diff==0 means we'll be inserted
743 * on the right.
744 */
745 new->node.bit--; /* anticipate cover node insertion */
746 if (new->node.pfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200747 new->node.bit = -1; /* mark as new dup tree, just in case */
748
749 if (unlikely(eb_gettag(root_right))) {
750 /* we refuse to duplicate this key if the tree is
751 * tagged as containing only unique keys.
752 */
753 return old;
754 }
755
756 if (eb_gettag(troot) != EB_LEAF) {
757 /* there was already a dup tree below */
758 struct eb_node *ret;
759 ret = eb_insert_dup(&old->node, &new->node);
760 return container_of(ret, struct ebmb_node, node);
761 }
762 /* otherwise fall through to insert first duplicate */
763 }
764 /* otherwise we just rely on the tests below to select the right side */
765 else if (new->node.pfx < old->node.pfx)
766 diff = -1; /* force insertion to left side */
767 }
768
769 insert_above:
770 new_left = eb_dotag(&new->node.branches, EB_LEFT);
771 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
772 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
773
774 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200775 new->node.branches.b[EB_LEFT] = troot;
776 new->node.branches.b[EB_RGHT] = new_leaf;
777 new->node.leaf_p = new_rght;
778 *up_ptr = new_left;
779 }
780 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200781 new->node.branches.b[EB_LEFT] = new_leaf;
782 new->node.branches.b[EB_RGHT] = troot;
783 new->node.leaf_p = new_left;
784 *up_ptr = new_rght;
785 }
786
Willy Tarreauc2186022009-10-26 19:48:54 +0100787 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
788 return new;
789}
790
Willy Tarreau3a932442010-05-09 19:29:23 +0200791
792
Willy Tarreauead63a02009-11-02 14:41:23 +0100793#endif /* _EBMBTREE_H */
794