blob: f99c16ba9a4b1d4ea58fa751d18f25ac083a7d82 [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreauf3bfede2011-07-25 11:38:17 +02003 * Version 6.0.6
Willy Tarreau414c4b22011-01-04 13:21:06 +01004 * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +02006 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation, version 2.1
9 * exclusively.
Willy Tarreauc2186022009-10-26 19:48:54 +010010 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020011 * This library is distributed in the hope that it will be useful,
Willy Tarreauc2186022009-10-26 19:48:54 +010012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Willy Tarreauf3bfede2011-07-25 11:38:17 +020013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
Willy Tarreauc2186022009-10-26 19:48:54 +010015 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020016 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Willy Tarreauc2186022009-10-26 19:48:54 +010019 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
30#define EBMB_ROOT EB_ROOT
31#define EBMB_TREE_HEAD EB_TREE_HEAD
32
33/* This structure carries a node, a leaf, and a key. It must start with the
34 * eb_node so that it can be cast into an eb_node. We could also have put some
35 * sort of transparent union here to reduce the indirection level, but the fact
36 * is, the end user is not meant to manipulate internals, so this is pointless.
37 * The 'node.bit' value here works differently from scalar types, as it contains
38 * the number of identical bits between the two branches.
Willy Tarreau41136de2020-02-22 15:55:33 +010039 * Note that we take a great care of making sure the key is located exactly at
40 * the end of the struct even if that involves holes before it, so that it
41 * always aliases any external key a user would append after. This is why the
42 * key uses the same alignment as the struct.
Willy Tarreauc2186022009-10-26 19:48:54 +010043 */
44struct ebmb_node {
45 struct eb_node node; /* the tree node, must be at the beginning */
Willy Tarreau41136de2020-02-22 15:55:33 +010046 ALWAYS_ALIGN(sizeof(void*));
Willy Tarreauc2186022009-10-26 19:48:54 +010047 unsigned char key[0]; /* the key, its size depends on the application */
Willy Tarreau41136de2020-02-22 15:55:33 +010048} ALIGNED(sizeof(void*));
Willy Tarreauc2186022009-10-26 19:48:54 +010049
50/*
51 * Exported functions and macros.
52 * Many of them are always inlined because they are extremely small, and
53 * are generally called at most once or twice in a program.
54 */
55
56/* Return leftmost node in the tree, or NULL if none */
57static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
58{
59 return ebmb_entry(eb_first(root), struct ebmb_node, node);
60}
61
62/* Return rightmost node in the tree, or NULL if none */
63static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
64{
65 return ebmb_entry(eb_last(root), struct ebmb_node, node);
66}
67
68/* Return next node in the tree, or NULL if none */
69static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
70{
71 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
72}
73
74/* Return previous node in the tree, or NULL if none */
75static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
76{
77 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
78}
79
Willy Tarreau2b570202013-05-07 15:58:28 +020080/* Return next leaf node within a duplicate sub-tree, or NULL if none. */
81static inline struct ebmb_node *ebmb_next_dup(struct ebmb_node *ebmb)
82{
83 return ebmb_entry(eb_next_dup(&ebmb->node), struct ebmb_node, node);
84}
85
86/* Return previous leaf node within a duplicate sub-tree, or NULL if none. */
87static inline struct ebmb_node *ebmb_prev_dup(struct ebmb_node *ebmb)
88{
89 return ebmb_entry(eb_prev_dup(&ebmb->node), struct ebmb_node, node);
90}
91
Willy Tarreauc2186022009-10-26 19:48:54 +010092/* Return next node in the tree, skipping duplicates, or NULL if none */
93static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
94{
95 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
96}
97
98/* Return previous node in the tree, skipping duplicates, or NULL if none */
99static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
100{
101 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
102}
103
104/* Delete node from the tree if it was linked in. Mark the node unused. Note
105 * that this function relies on a non-inlined generic function: eb_delete.
106 */
107static forceinline void ebmb_delete(struct ebmb_node *ebmb)
108{
109 eb_delete(&ebmb->node);
110}
111
112/* The following functions are not inlined by default. They are declared
113 * in ebmbtree.c, which simply relies on their inline version.
114 */
Willy Tarreau03e78532020-02-25 07:38:05 +0100115struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
116struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
117struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
118struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
119struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100120
121/* The following functions are less likely to be used directly, because their
122 * code is larger. The non-inlined version is preferred.
123 */
124
125/* Delete node from the tree if it was linked in. Mark the node unused. */
126static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
127{
128 __eb_delete(&ebmb->node);
129}
130
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800131/* Find the first occurrence of a key of a least <len> bytes matching <x> in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100132 * tree <root>. The caller is responsible for ensuring that <len> will not exceed
133 * the common parts between the tree's keys and <x>. In case of multiple matches,
134 * the leftmost node is returned. This means that this function can be used to
135 * lookup string keys by prefix if all keys in the tree are zero-terminated. If
136 * no match is found, NULL is returned. Returns first node if <len> is zero.
Willy Tarreauc2186022009-10-26 19:48:54 +0100137 */
138static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
139{
140 struct ebmb_node *node;
141 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200142 int pos, side;
143 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100144
145 troot = root->b[EB_LEFT];
146 if (unlikely(troot == NULL))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100147 goto ret_null;
Willy Tarreauc2186022009-10-26 19:48:54 +0100148
Willy Tarreau414c4b22011-01-04 13:21:06 +0100149 if (unlikely(len == 0))
150 goto walk_down;
151
Willy Tarreau3a932442010-05-09 19:29:23 +0200152 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100153 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200154 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100155 node = container_of(eb_untag(troot, EB_LEAF),
156 struct ebmb_node, node.branches);
Willy Tarreau853926a2020-06-16 11:10:53 +0200157 if (eb_memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100158 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200159 else
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100160 goto ret_node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100161 }
162 node = container_of(eb_untag(troot, EB_NODE),
163 struct ebmb_node, node.branches);
164
Willy Tarreau3a932442010-05-09 19:29:23 +0200165 node_bit = node->node.bit;
166 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100167 /* We have a dup tree now. Either it's for the same
168 * value, and we walk down left, or it's a different
169 * one and we don't have our key.
170 */
Willy Tarreau853926a2020-06-16 11:10:53 +0200171 if (eb_memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100172 goto ret_null;
173 else
174 goto walk_left;
Willy Tarreauc2186022009-10-26 19:48:54 +0100175 }
176
Willy Tarreau3a932442010-05-09 19:29:23 +0200177 /* OK, normal data node, let's walk down. We check if all full
178 * bytes are equal, and we start from the last one we did not
179 * completely check. We stop as soon as we reach the last byte,
180 * because we must decide to go left/right or abort.
181 */
182 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
183 if (node_bit < 0) {
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800184 /* This surprising construction gives better performance
Willy Tarreau3a932442010-05-09 19:29:23 +0200185 * because gcc does not try to reorder the loop. Tested to
186 * be fine with 2.95 to 4.2.
187 */
188 while (1) {
Willy Tarreau414c4b22011-01-04 13:21:06 +0100189 if (node->key[pos++] ^ *(unsigned char*)(x++))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100190 goto ret_null; /* more than one full byte is different */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100191 if (--len == 0)
192 goto walk_left; /* return first node if all bytes matched */
Willy Tarreau3a932442010-05-09 19:29:23 +0200193 node_bit += 8;
194 if (node_bit >= 0)
195 break;
196 }
197 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100198
Willy Tarreau3a932442010-05-09 19:29:23 +0200199 /* here we know that only the last byte differs, so node_bit < 8.
200 * We have 2 possibilities :
201 * - more than the last bit differs => return NULL
202 * - walk down on side = (x[pos] >> node_bit) & 1
203 */
204 side = *(unsigned char *)x >> node_bit;
205 if (((node->key[pos] >> node_bit) ^ side) > 1)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100206 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200207 side &= 1;
208 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100209 }
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100210 walk_left:
211 troot = node->node.branches.b[EB_LEFT];
212 walk_down:
213 while (eb_gettag(troot) != EB_LEAF)
214 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
215 node = container_of(eb_untag(troot, EB_LEAF),
216 struct ebmb_node, node.branches);
217 ret_node:
218 return node;
219 ret_null:
220 return NULL;
Willy Tarreauc2186022009-10-26 19:48:54 +0100221}
222
223/* Insert ebmb_node <new> into subtree starting at node root <root>.
224 * Only new->key needs be set with the key. The ebmb_node is returned.
225 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
Willy Tarreau414c4b22011-01-04 13:21:06 +0100226 * len is specified in bytes. It is absolutely mandatory that this length
227 * is the same for all keys in the tree. This function cannot be used to
228 * insert strings.
Willy Tarreauc2186022009-10-26 19:48:54 +0100229 */
230static forceinline struct ebmb_node *
231__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
232{
233 struct ebmb_node *old;
234 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200235 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200236 eb_troot_t *root_right;
Willy Tarreauc2186022009-10-26 19:48:54 +0100237 int diff;
238 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200239 eb_troot_t *new_left, *new_rght;
240 eb_troot_t *new_leaf;
241 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100242
243 side = EB_LEFT;
244 troot = root->b[EB_LEFT];
245 root_right = root->b[EB_RGHT];
246 if (unlikely(troot == NULL)) {
247 /* Tree is empty, insert the leaf part below the left branch */
248 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
249 new->node.leaf_p = eb_dotag(root, EB_LEFT);
250 new->node.node_p = NULL; /* node part unused */
251 return new;
252 }
253
Willy Tarreauc2186022009-10-26 19:48:54 +0100254 /* The tree descent is fairly easy :
255 * - first, check if we have reached a leaf node
256 * - second, check if we have gone too far
257 * - third, reiterate
258 * Everywhere, we use <new> for the node node we are inserting, <root>
259 * for the node we attach it to, and <old> for the node we are
260 * displacing below <new>. <troot> will always point to the future node
261 * (tagged with its type). <side> carries the side the node <new> is
262 * attached to below its parent, which is also where previous node
263 * was attached.
264 */
265
266 bit = 0;
267 while (1) {
268 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200269 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100270 old = container_of(eb_untag(troot, EB_LEAF),
271 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100272 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200273 up_ptr = &old->node.leaf_p;
274 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100275 }
276
277 /* OK we're walking down this link */
278 old = container_of(eb_untag(troot, EB_NODE),
279 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200280 old_node_bit = old->node.bit;
281
282 if (unlikely(old->node.bit < 0)) {
283 /* We're above a duplicate tree, so we must compare the whole value */
284 new->node.node_p = old->node.node_p;
285 up_ptr = &old->node.node_p;
286 check_bit_and_break:
287 bit = equal_bits(new->key, old->key, bit, len << 3);
288 break;
289 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100290
291 /* Stop going down when we don't have common bits anymore. We
292 * also stop in front of a duplicates tree because it means we
293 * have to insert above. Note: we can compare more bits than
294 * the current node's because as long as they are identical, we
295 * know we descend along the correct side.
296 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200297
298 bit = equal_bits(new->key, old->key, bit, old_node_bit);
299 if (unlikely(bit < old_node_bit)) {
300 /* The tree did not contain the key, so we insert <new> before the
301 * node <old>, and set ->bit to designate the lowest bit position in
302 * <new> which applies to ->branches.b[].
303 */
304 new->node.node_p = old->node.node_p;
305 up_ptr = &old->node.node_p;
306 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100307 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200308 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
309 * However, since we're going down around <old_node_bit>, we know it will be
310 * properly matched, so we can skip this bit.
311 */
312 bit = old_node_bit + 1;
313
314 /* walk down */
315 root = &old->node.branches;
316 side = old_node_bit & 7;
317 side ^= 7;
318 side = (new->key[old_node_bit >> 3] >> side) & 1;
319 troot = root->b[side];
320 }
321
322 new_left = eb_dotag(&new->node.branches, EB_LEFT);
323 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
324 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
325
Willy Tarreau3a932442010-05-09 19:29:23 +0200326 new->node.bit = bit;
Willy Tarreaua4a1cd12012-06-09 15:43:36 +0200327
328 /* Note: we can compare more bits than the current node's because as
329 * long as they are identical, we know we descend along the correct
330 * side. However we don't want to start to compare past the end.
331 */
332 diff = 0;
333 if (((unsigned)bit >> 3) < len)
334 diff = cmp_bits(new->key, old->key, bit);
335
Willy Tarreau3a932442010-05-09 19:29:23 +0200336 if (diff == 0) {
337 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100338
Willy Tarreau3a932442010-05-09 19:29:23 +0200339 if (likely(eb_gettag(root_right))) {
340 /* we refuse to duplicate this key if the tree is
341 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100342 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200343 return old;
344 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100345
Willy Tarreau3a932442010-05-09 19:29:23 +0200346 if (eb_gettag(troot) != EB_LEAF) {
347 /* there was already a dup tree below */
348 struct eb_node *ret;
349 ret = eb_insert_dup(&old->node, &new->node);
350 return container_of(ret, struct ebmb_node, node);
351 }
352 /* otherwise fall through */
353 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100354
Willy Tarreau3a932442010-05-09 19:29:23 +0200355 if (diff >= 0) {
356 new->node.branches.b[EB_LEFT] = troot;
357 new->node.branches.b[EB_RGHT] = new_leaf;
358 new->node.leaf_p = new_rght;
359 *up_ptr = new_left;
360 }
361 else if (diff < 0) {
362 new->node.branches.b[EB_LEFT] = new_leaf;
363 new->node.branches.b[EB_RGHT] = troot;
364 new->node.leaf_p = new_left;
365 *up_ptr = new_rght;
366 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100367
Willy Tarreau3a932442010-05-09 19:29:23 +0200368 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
369 * parent is already set to <new>, and the <root>'s branch is still in
370 * <side>. Update the root's leaf till we have it. Note that we can also
371 * find the side by checking the side of new->node.node_p.
372 */
373
374 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
375 return new;
376}
377
378
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800379/* Find the first occurrence of the longest prefix matching a key <x> in the
Willy Tarreau3a932442010-05-09 19:29:23 +0200380 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200381 * least as long as the keys in the tree. Note that this can be ensured by
382 * having a byte at the end of <x> which cannot be part of any prefix, typically
383 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200384 */
385static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
386{
387 struct ebmb_node *node;
388 eb_troot_t *troot, *cover;
389 int pos, side;
390 int node_bit;
391
392 troot = root->b[EB_LEFT];
393 if (unlikely(troot == NULL))
394 return NULL;
395
396 cover = NULL;
397 pos = 0;
398 while (1) {
399 if ((eb_gettag(troot) == EB_LEAF)) {
400 node = container_of(eb_untag(troot, EB_LEAF),
401 struct ebmb_node, node.branches);
402 if (check_bits(x - pos, node->key, pos, node->node.pfx))
403 goto not_found;
404
405 return node;
406 }
407 node = container_of(eb_untag(troot, EB_NODE),
408 struct ebmb_node, node.branches);
409
410 node_bit = node->node.bit;
411 if (node_bit < 0) {
412 /* We have a dup tree now. Either it's for the same
413 * value, and we walk down left, or it's a different
414 * one and we don't have our key.
415 */
416 if (check_bits(x - pos, node->key, pos, node->node.pfx))
417 goto not_found;
418
419 troot = node->node.branches.b[EB_LEFT];
420 while (eb_gettag(troot) != EB_LEAF)
421 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
422 node = container_of(eb_untag(troot, EB_LEAF),
423 struct ebmb_node, node.branches);
424 return node;
425 }
426
427 node_bit >>= 1; /* strip cover bit */
428 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
429 if (node_bit < 0) {
430 /* This uncommon construction gives better performance
431 * because gcc does not try to reorder the loop. Tested to
432 * be fine with 2.95 to 4.2.
433 */
434 while (1) {
435 x++; pos++;
436 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
437 goto not_found; /* more than one full byte is different */
438 node_bit += 8;
439 if (node_bit >= 0)
440 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100441 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200442 }
443
444 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
445 * We have 2 possibilities :
446 * - more than the last bit differs => data does not match
447 * - walk down on side = (x[pos] >> node_bit) & 1
448 */
449 side = *(unsigned char *)x >> node_bit;
450 if (((node->key[pos] >> node_bit) ^ side) > 1)
451 goto not_found;
452
453 if (!(node->node.bit & 1)) {
454 /* This is a cover node, let's keep a reference to it
455 * for later. The covering subtree is on the left, and
456 * the covered subtree is on the right, so we have to
457 * walk down right.
458 */
459 cover = node->node.branches.b[EB_LEFT];
460 troot = node->node.branches.b[EB_RGHT];
461 continue;
462 }
463 side &= 1;
464 troot = node->node.branches.b[side];
465 }
466
467 not_found:
Thayne McCombs8f0cc5c2021-01-07 21:35:52 -0700468 /* Walk down last cover tree if it exists. It does not matter if cover is NULL */
Willy Tarreau3a932442010-05-09 19:29:23 +0200469 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
470}
471
472
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800473/* Find the first occurrence of a prefix matching a key <x> of <pfx> BITS in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100474 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200475 * least as long as the keys in the tree. Note that this can be ensured by
476 * having a byte at the end of <x> which cannot be part of any prefix, typically
477 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200478 */
479static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
480{
481 struct ebmb_node *node;
482 eb_troot_t *troot;
483 int pos, side;
484 int node_bit;
485
486 troot = root->b[EB_LEFT];
487 if (unlikely(troot == NULL))
488 return NULL;
489
490 pos = 0;
491 while (1) {
492 if ((eb_gettag(troot) == EB_LEAF)) {
493 node = container_of(eb_untag(troot, EB_LEAF),
494 struct ebmb_node, node.branches);
495 if (node->node.pfx != pfx)
496 return NULL;
497 if (check_bits(x - pos, node->key, pos, node->node.pfx))
498 return NULL;
499 return node;
500 }
501 node = container_of(eb_untag(troot, EB_NODE),
502 struct ebmb_node, node.branches);
503
504 node_bit = node->node.bit;
505 if (node_bit < 0) {
506 /* We have a dup tree now. Either it's for the same
507 * value, and we walk down left, or it's a different
508 * one and we don't have our key.
509 */
510 if (node->node.pfx != pfx)
511 return NULL;
512 if (check_bits(x - pos, node->key, pos, node->node.pfx))
513 return NULL;
514
515 troot = node->node.branches.b[EB_LEFT];
516 while (eb_gettag(troot) != EB_LEAF)
517 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
518 node = container_of(eb_untag(troot, EB_LEAF),
519 struct ebmb_node, node.branches);
520 return node;
521 }
522
523 node_bit >>= 1; /* strip cover bit */
524 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
525 if (node_bit < 0) {
526 /* This uncommon construction gives better performance
527 * because gcc does not try to reorder the loop. Tested to
528 * be fine with 2.95 to 4.2.
529 */
530 while (1) {
531 x++; pos++;
532 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
533 return NULL; /* more than one full byte is different */
534 node_bit += 8;
535 if (node_bit >= 0)
536 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100537 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200538 }
539
540 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
541 * We have 2 possibilities :
542 * - more than the last bit differs => data does not match
543 * - walk down on side = (x[pos] >> node_bit) & 1
544 */
545 side = *(unsigned char *)x >> node_bit;
546 if (((node->key[pos] >> node_bit) ^ side) > 1)
547 return NULL;
548
549 if (!(node->node.bit & 1)) {
550 /* This is a cover node, it may be the entry we're
551 * looking for. We already know that it matches all the
552 * bits, let's compare prefixes and descend the cover
553 * subtree if they match.
554 */
Willy Tarreau22c0a932011-07-25 12:22:44 +0200555 if ((unsigned short)node->node.bit >> 1 == pfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200556 troot = node->node.branches.b[EB_LEFT];
557 else
558 troot = node->node.branches.b[EB_RGHT];
559 continue;
560 }
561 side &= 1;
562 troot = node->node.branches.b[side];
563 }
564}
565
566
567/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
568 * Only new->key and new->pfx need be set with the key and its prefix length.
Ilya Shipitsin01881082021-08-07 14:41:56 +0500569 * Note that bits between <pfx> and <len> are theoretically ignored and should be
Willy Tarreau3a932442010-05-09 19:29:23 +0200570 * zero, as it is not certain yet that they will always be ignored everywhere
571 * (eg in bit compare functions).
572 * The ebmb_node is returned.
573 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
574 * len is specified in bytes.
575 */
576static forceinline struct ebmb_node *
577__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
578{
579 struct ebmb_node *old;
580 unsigned int side;
581 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200582 eb_troot_t *root_right;
Willy Tarreau3a932442010-05-09 19:29:23 +0200583 int diff;
584 int bit;
585 eb_troot_t *new_left, *new_rght;
586 eb_troot_t *new_leaf;
587 int old_node_bit;
588
589 side = EB_LEFT;
590 troot = root->b[EB_LEFT];
591 root_right = root->b[EB_RGHT];
592 if (unlikely(troot == NULL)) {
593 /* Tree is empty, insert the leaf part below the left branch */
594 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
595 new->node.leaf_p = eb_dotag(root, EB_LEFT);
596 new->node.node_p = NULL; /* node part unused */
597 return new;
598 }
599
600 len <<= 3;
601 if (len > new->node.pfx)
602 len = new->node.pfx;
603
604 /* The tree descent is fairly easy :
605 * - first, check if we have reached a leaf node
606 * - second, check if we have gone too far
607 * - third, reiterate
608 * Everywhere, we use <new> for the node node we are inserting, <root>
609 * for the node we attach it to, and <old> for the node we are
610 * displacing below <new>. <troot> will always point to the future node
611 * (tagged with its type). <side> carries the side the node <new> is
612 * attached to below its parent, which is also where previous node
613 * was attached.
614 */
615
616 bit = 0;
617 while (1) {
618 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
619 /* Insert above a leaf. Note that this leaf could very
620 * well be part of a cover node.
621 */
622 old = container_of(eb_untag(troot, EB_LEAF),
623 struct ebmb_node, node.branches);
624 new->node.node_p = old->node.leaf_p;
625 up_ptr = &old->node.leaf_p;
626 goto check_bit_and_break;
627 }
628
629 /* OK we're walking down this link */
630 old = container_of(eb_untag(troot, EB_NODE),
631 struct ebmb_node, node.branches);
632 old_node_bit = old->node.bit;
633 /* Note that old_node_bit can be :
634 * < 0 : dup tree
635 * = 2N : cover node for N bits
636 * = 2N+1 : normal node at N bits
637 */
638
639 if (unlikely(old_node_bit < 0)) {
640 /* We're above a duplicate tree, so we must compare the whole value */
641 new->node.node_p = old->node.node_p;
642 up_ptr = &old->node.node_p;
643 check_bit_and_break:
644 /* No need to compare everything if the leaves are shorter than the new one. */
645 if (len > old->node.pfx)
646 len = old->node.pfx;
647 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100648 break;
649 }
650
Willy Tarreau3a932442010-05-09 19:29:23 +0200651 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
652
653 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
654 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200655
656 /* we must always check that our prefix is larger than the nodes
657 * we visit, otherwise we have to stop going down. The following
658 * test is able to stop before both normal and cover nodes.
659 */
660 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
661 /* insert cover node here on the left */
662 new->node.node_p = old->node.node_p;
663 up_ptr = &old->node.node_p;
664 new->node.bit = new->node.pfx << 1;
665 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200666 goto insert_above;
667 }
668
669 if (unlikely(bit < old_node_bit)) {
670 /* The tree did not contain the key, so we insert <new> before the
671 * node <old>, and set ->bit to designate the lowest bit position in
672 * <new> which applies to ->branches.b[]. We know that the bit is not
673 * greater than the prefix length thanks to the test above.
674 */
675 new->node.node_p = old->node.node_p;
676 up_ptr = &old->node.node_p;
677 new->node.bit = bit;
678 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200679 goto insert_above;
680 }
681
682 if (!(old_node_bit & 1)) {
683 /* if we encounter a cover node with our exact prefix length, it's
684 * necessarily the same value, so we insert there as a duplicate on
685 * the left. For that, we go down on the left and the leaf detection
686 * code will finish the job.
687 */
688 if ((new->node.pfx << 1) == old_node_bit) {
689 root = &old->node.branches;
690 side = EB_LEFT;
691 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200692 continue;
693 }
694
695 /* cover nodes are always walked through on the right */
696 side = EB_RGHT;
697 bit = old_node_bit >> 1; /* recheck that bit */
698 root = &old->node.branches;
699 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200700 continue;
701 }
702
703 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
704 * However, since we're going down around <old_node_bit>, we know it will be
705 * properly matched, so we can skip this bit.
706 */
707 old_node_bit >>= 1;
708 bit = old_node_bit + 1;
709
Willy Tarreauc2186022009-10-26 19:48:54 +0100710 /* walk down */
711 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200712 side = old_node_bit & 7;
713 side ^= 7;
714 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100715 troot = root->b[side];
716 }
717
Willy Tarreau3a932442010-05-09 19:29:23 +0200718 /* Right here, we have 4 possibilities :
719 * - the tree does not contain any leaf matching the
720 * key, and we have new->key < old->key. We insert
721 * new above old, on the left ;
722 *
723 * - the tree does not contain any leaf matching the
724 * key, and we have new->key > old->key. We insert
725 * new above old, on the right ;
726 *
727 * - the tree does contain the key with the same prefix
728 * length. We add the new key next to it as a first
729 * duplicate (since it was alone).
730 *
731 * The last two cases can easily be partially merged.
732 *
733 * - the tree contains a leaf matching the key, we have
734 * to insert above it as a cover node. The leaf with
735 * the shortest prefix becomes the left subtree and
736 * the leaf with the longest prefix becomes the right
737 * one. The cover node gets the min of both prefixes
738 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100739 */
740
Willy Tarreau3a932442010-05-09 19:29:23 +0200741 /* first we want to ensure that we compare the correct bit, which means
742 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100743 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200744 if (bit > new->node.pfx)
745 bit = new->node.pfx;
746 if (bit > old->node.pfx)
747 bit = old->node.pfx;
748
Willy Tarreau3a932442010-05-09 19:29:23 +0200749 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
750
751 /* if one prefix is included in the second one, we don't compare bits
752 * because they won't necessarily match, we just proceed with a cover
753 * node insertion.
754 */
755 diff = 0;
756 if (bit < old->node.pfx && bit < new->node.pfx)
757 diff = cmp_bits(new->key, old->key, bit);
758
759 if (diff == 0) {
760 /* Both keys match. Either it's a duplicate entry or we have to
761 * put the shortest prefix left and the largest one right below
762 * a new cover node. By default, diff==0 means we'll be inserted
763 * on the right.
764 */
765 new->node.bit--; /* anticipate cover node insertion */
766 if (new->node.pfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200767 new->node.bit = -1; /* mark as new dup tree, just in case */
768
769 if (unlikely(eb_gettag(root_right))) {
770 /* we refuse to duplicate this key if the tree is
771 * tagged as containing only unique keys.
772 */
773 return old;
774 }
775
776 if (eb_gettag(troot) != EB_LEAF) {
777 /* there was already a dup tree below */
778 struct eb_node *ret;
779 ret = eb_insert_dup(&old->node, &new->node);
780 return container_of(ret, struct ebmb_node, node);
781 }
782 /* otherwise fall through to insert first duplicate */
783 }
784 /* otherwise we just rely on the tests below to select the right side */
785 else if (new->node.pfx < old->node.pfx)
786 diff = -1; /* force insertion to left side */
787 }
788
789 insert_above:
790 new_left = eb_dotag(&new->node.branches, EB_LEFT);
791 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
792 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
793
794 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200795 new->node.branches.b[EB_LEFT] = troot;
796 new->node.branches.b[EB_RGHT] = new_leaf;
797 new->node.leaf_p = new_rght;
798 *up_ptr = new_left;
799 }
800 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200801 new->node.branches.b[EB_LEFT] = new_leaf;
802 new->node.branches.b[EB_RGHT] = troot;
803 new->node.leaf_p = new_left;
804 *up_ptr = new_rght;
805 }
806
Willy Tarreauc2186022009-10-26 19:48:54 +0100807 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
808 return new;
809}
810
Willy Tarreau3a932442010-05-09 19:29:23 +0200811
812
Willy Tarreauead63a02009-11-02 14:41:23 +0100813#endif /* _EBMBTREE_H */
814