blob: 6ed7de4b84e392e340e9006c97dc3584764a6772 [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreauf3bfede2011-07-25 11:38:17 +02003 * Version 6.0.6
Willy Tarreau414c4b22011-01-04 13:21:06 +01004 * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +02006 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation, version 2.1
9 * exclusively.
Willy Tarreauc2186022009-10-26 19:48:54 +010010 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020011 * This library is distributed in the hope that it will be useful,
Willy Tarreauc2186022009-10-26 19:48:54 +010012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Willy Tarreauf3bfede2011-07-25 11:38:17 +020013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
Willy Tarreauc2186022009-10-26 19:48:54 +010015 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020016 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Willy Tarreauc2186022009-10-26 19:48:54 +010019 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
30#define EBMB_ROOT EB_ROOT
31#define EBMB_TREE_HEAD EB_TREE_HEAD
32
33/* This structure carries a node, a leaf, and a key. It must start with the
34 * eb_node so that it can be cast into an eb_node. We could also have put some
35 * sort of transparent union here to reduce the indirection level, but the fact
36 * is, the end user is not meant to manipulate internals, so this is pointless.
37 * The 'node.bit' value here works differently from scalar types, as it contains
38 * the number of identical bits between the two branches.
39 */
40struct ebmb_node {
41 struct eb_node node; /* the tree node, must be at the beginning */
42 unsigned char key[0]; /* the key, its size depends on the application */
43};
44
45/*
46 * Exported functions and macros.
47 * Many of them are always inlined because they are extremely small, and
48 * are generally called at most once or twice in a program.
49 */
50
51/* Return leftmost node in the tree, or NULL if none */
52static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
53{
54 return ebmb_entry(eb_first(root), struct ebmb_node, node);
55}
56
57/* Return rightmost node in the tree, or NULL if none */
58static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
59{
60 return ebmb_entry(eb_last(root), struct ebmb_node, node);
61}
62
63/* Return next node in the tree, or NULL if none */
64static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
65{
66 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
67}
68
69/* Return previous node in the tree, or NULL if none */
70static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
71{
72 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
73}
74
Willy Tarreau2b570202013-05-07 15:58:28 +020075/* Return next leaf node within a duplicate sub-tree, or NULL if none. */
76static inline struct ebmb_node *ebmb_next_dup(struct ebmb_node *ebmb)
77{
78 return ebmb_entry(eb_next_dup(&ebmb->node), struct ebmb_node, node);
79}
80
81/* Return previous leaf node within a duplicate sub-tree, or NULL if none. */
82static inline struct ebmb_node *ebmb_prev_dup(struct ebmb_node *ebmb)
83{
84 return ebmb_entry(eb_prev_dup(&ebmb->node), struct ebmb_node, node);
85}
86
Willy Tarreauc2186022009-10-26 19:48:54 +010087/* Return next node in the tree, skipping duplicates, or NULL if none */
88static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
89{
90 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
91}
92
93/* Return previous node in the tree, skipping duplicates, or NULL if none */
94static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
95{
96 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
97}
98
99/* Delete node from the tree if it was linked in. Mark the node unused. Note
100 * that this function relies on a non-inlined generic function: eb_delete.
101 */
102static forceinline void ebmb_delete(struct ebmb_node *ebmb)
103{
104 eb_delete(&ebmb->node);
105}
106
107/* The following functions are not inlined by default. They are declared
108 * in ebmbtree.c, which simply relies on their inline version.
109 */
110REGPRM3 struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
111REGPRM3 struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreau3a932442010-05-09 19:29:23 +0200112REGPRM2 struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
113REGPRM3 struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
114REGPRM3 struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100115
116/* The following functions are less likely to be used directly, because their
117 * code is larger. The non-inlined version is preferred.
118 */
119
120/* Delete node from the tree if it was linked in. Mark the node unused. */
121static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
122{
123 __eb_delete(&ebmb->node);
124}
125
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800126/* Find the first occurrence of a key of a least <len> bytes matching <x> in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100127 * tree <root>. The caller is responsible for ensuring that <len> will not exceed
128 * the common parts between the tree's keys and <x>. In case of multiple matches,
129 * the leftmost node is returned. This means that this function can be used to
130 * lookup string keys by prefix if all keys in the tree are zero-terminated. If
131 * no match is found, NULL is returned. Returns first node if <len> is zero.
Willy Tarreauc2186022009-10-26 19:48:54 +0100132 */
133static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
134{
135 struct ebmb_node *node;
136 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200137 int pos, side;
138 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100139
140 troot = root->b[EB_LEFT];
141 if (unlikely(troot == NULL))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100142 goto ret_null;
Willy Tarreauc2186022009-10-26 19:48:54 +0100143
Willy Tarreau414c4b22011-01-04 13:21:06 +0100144 if (unlikely(len == 0))
145 goto walk_down;
146
Willy Tarreau3a932442010-05-09 19:29:23 +0200147 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100148 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200149 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100150 node = container_of(eb_untag(troot, EB_LEAF),
151 struct ebmb_node, node.branches);
Willy Tarreau414c4b22011-01-04 13:21:06 +0100152 if (memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100153 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200154 else
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100155 goto ret_node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100156 }
157 node = container_of(eb_untag(troot, EB_NODE),
158 struct ebmb_node, node.branches);
159
Willy Tarreau3a932442010-05-09 19:29:23 +0200160 node_bit = node->node.bit;
161 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100162 /* We have a dup tree now. Either it's for the same
163 * value, and we walk down left, or it's a different
164 * one and we don't have our key.
165 */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100166 if (memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100167 goto ret_null;
168 else
169 goto walk_left;
Willy Tarreauc2186022009-10-26 19:48:54 +0100170 }
171
Willy Tarreau3a932442010-05-09 19:29:23 +0200172 /* OK, normal data node, let's walk down. We check if all full
173 * bytes are equal, and we start from the last one we did not
174 * completely check. We stop as soon as we reach the last byte,
175 * because we must decide to go left/right or abort.
176 */
177 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
178 if (node_bit < 0) {
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800179 /* This surprising construction gives better performance
Willy Tarreau3a932442010-05-09 19:29:23 +0200180 * because gcc does not try to reorder the loop. Tested to
181 * be fine with 2.95 to 4.2.
182 */
183 while (1) {
Willy Tarreau414c4b22011-01-04 13:21:06 +0100184 if (node->key[pos++] ^ *(unsigned char*)(x++))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100185 goto ret_null; /* more than one full byte is different */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100186 if (--len == 0)
187 goto walk_left; /* return first node if all bytes matched */
Willy Tarreau3a932442010-05-09 19:29:23 +0200188 node_bit += 8;
189 if (node_bit >= 0)
190 break;
191 }
192 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100193
Willy Tarreau3a932442010-05-09 19:29:23 +0200194 /* here we know that only the last byte differs, so node_bit < 8.
195 * We have 2 possibilities :
196 * - more than the last bit differs => return NULL
197 * - walk down on side = (x[pos] >> node_bit) & 1
198 */
199 side = *(unsigned char *)x >> node_bit;
200 if (((node->key[pos] >> node_bit) ^ side) > 1)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100201 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200202 side &= 1;
203 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100204 }
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100205 walk_left:
206 troot = node->node.branches.b[EB_LEFT];
207 walk_down:
208 while (eb_gettag(troot) != EB_LEAF)
209 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
210 node = container_of(eb_untag(troot, EB_LEAF),
211 struct ebmb_node, node.branches);
212 ret_node:
213 return node;
214 ret_null:
215 return NULL;
Willy Tarreauc2186022009-10-26 19:48:54 +0100216}
217
218/* Insert ebmb_node <new> into subtree starting at node root <root>.
219 * Only new->key needs be set with the key. The ebmb_node is returned.
220 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
Willy Tarreau414c4b22011-01-04 13:21:06 +0100221 * len is specified in bytes. It is absolutely mandatory that this length
222 * is the same for all keys in the tree. This function cannot be used to
223 * insert strings.
Willy Tarreauc2186022009-10-26 19:48:54 +0100224 */
225static forceinline struct ebmb_node *
226__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
227{
228 struct ebmb_node *old;
229 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200230 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200231 eb_troot_t *root_right;
Willy Tarreauc2186022009-10-26 19:48:54 +0100232 int diff;
233 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200234 eb_troot_t *new_left, *new_rght;
235 eb_troot_t *new_leaf;
236 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100237
238 side = EB_LEFT;
239 troot = root->b[EB_LEFT];
240 root_right = root->b[EB_RGHT];
241 if (unlikely(troot == NULL)) {
242 /* Tree is empty, insert the leaf part below the left branch */
243 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
244 new->node.leaf_p = eb_dotag(root, EB_LEFT);
245 new->node.node_p = NULL; /* node part unused */
246 return new;
247 }
248
Willy Tarreauc2186022009-10-26 19:48:54 +0100249 /* The tree descent is fairly easy :
250 * - first, check if we have reached a leaf node
251 * - second, check if we have gone too far
252 * - third, reiterate
253 * Everywhere, we use <new> for the node node we are inserting, <root>
254 * for the node we attach it to, and <old> for the node we are
255 * displacing below <new>. <troot> will always point to the future node
256 * (tagged with its type). <side> carries the side the node <new> is
257 * attached to below its parent, which is also where previous node
258 * was attached.
259 */
260
261 bit = 0;
262 while (1) {
263 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200264 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100265 old = container_of(eb_untag(troot, EB_LEAF),
266 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100267 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200268 up_ptr = &old->node.leaf_p;
269 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100270 }
271
272 /* OK we're walking down this link */
273 old = container_of(eb_untag(troot, EB_NODE),
274 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200275 old_node_bit = old->node.bit;
276
277 if (unlikely(old->node.bit < 0)) {
278 /* We're above a duplicate tree, so we must compare the whole value */
279 new->node.node_p = old->node.node_p;
280 up_ptr = &old->node.node_p;
281 check_bit_and_break:
282 bit = equal_bits(new->key, old->key, bit, len << 3);
283 break;
284 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100285
286 /* Stop going down when we don't have common bits anymore. We
287 * also stop in front of a duplicates tree because it means we
288 * have to insert above. Note: we can compare more bits than
289 * the current node's because as long as they are identical, we
290 * know we descend along the correct side.
291 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200292
293 bit = equal_bits(new->key, old->key, bit, old_node_bit);
294 if (unlikely(bit < old_node_bit)) {
295 /* The tree did not contain the key, so we insert <new> before the
296 * node <old>, and set ->bit to designate the lowest bit position in
297 * <new> which applies to ->branches.b[].
298 */
299 new->node.node_p = old->node.node_p;
300 up_ptr = &old->node.node_p;
301 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100302 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200303 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
304 * However, since we're going down around <old_node_bit>, we know it will be
305 * properly matched, so we can skip this bit.
306 */
307 bit = old_node_bit + 1;
308
309 /* walk down */
310 root = &old->node.branches;
311 side = old_node_bit & 7;
312 side ^= 7;
313 side = (new->key[old_node_bit >> 3] >> side) & 1;
314 troot = root->b[side];
315 }
316
317 new_left = eb_dotag(&new->node.branches, EB_LEFT);
318 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
319 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
320
Willy Tarreau3a932442010-05-09 19:29:23 +0200321 new->node.bit = bit;
Willy Tarreaua4a1cd12012-06-09 15:43:36 +0200322
323 /* Note: we can compare more bits than the current node's because as
324 * long as they are identical, we know we descend along the correct
325 * side. However we don't want to start to compare past the end.
326 */
327 diff = 0;
328 if (((unsigned)bit >> 3) < len)
329 diff = cmp_bits(new->key, old->key, bit);
330
Willy Tarreau3a932442010-05-09 19:29:23 +0200331 if (diff == 0) {
332 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100333
Willy Tarreau3a932442010-05-09 19:29:23 +0200334 if (likely(eb_gettag(root_right))) {
335 /* we refuse to duplicate this key if the tree is
336 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100337 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200338 return old;
339 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100340
Willy Tarreau3a932442010-05-09 19:29:23 +0200341 if (eb_gettag(troot) != EB_LEAF) {
342 /* there was already a dup tree below */
343 struct eb_node *ret;
344 ret = eb_insert_dup(&old->node, &new->node);
345 return container_of(ret, struct ebmb_node, node);
346 }
347 /* otherwise fall through */
348 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100349
Willy Tarreau3a932442010-05-09 19:29:23 +0200350 if (diff >= 0) {
351 new->node.branches.b[EB_LEFT] = troot;
352 new->node.branches.b[EB_RGHT] = new_leaf;
353 new->node.leaf_p = new_rght;
354 *up_ptr = new_left;
355 }
356 else if (diff < 0) {
357 new->node.branches.b[EB_LEFT] = new_leaf;
358 new->node.branches.b[EB_RGHT] = troot;
359 new->node.leaf_p = new_left;
360 *up_ptr = new_rght;
361 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100362
Willy Tarreau3a932442010-05-09 19:29:23 +0200363 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
364 * parent is already set to <new>, and the <root>'s branch is still in
365 * <side>. Update the root's leaf till we have it. Note that we can also
366 * find the side by checking the side of new->node.node_p.
367 */
368
369 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
370 return new;
371}
372
373
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800374/* Find the first occurrence of the longest prefix matching a key <x> in the
Willy Tarreau3a932442010-05-09 19:29:23 +0200375 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200376 * least as long as the keys in the tree. Note that this can be ensured by
377 * having a byte at the end of <x> which cannot be part of any prefix, typically
378 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200379 */
380static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
381{
382 struct ebmb_node *node;
383 eb_troot_t *troot, *cover;
384 int pos, side;
385 int node_bit;
386
387 troot = root->b[EB_LEFT];
388 if (unlikely(troot == NULL))
389 return NULL;
390
391 cover = NULL;
392 pos = 0;
393 while (1) {
394 if ((eb_gettag(troot) == EB_LEAF)) {
395 node = container_of(eb_untag(troot, EB_LEAF),
396 struct ebmb_node, node.branches);
397 if (check_bits(x - pos, node->key, pos, node->node.pfx))
398 goto not_found;
399
400 return node;
401 }
402 node = container_of(eb_untag(troot, EB_NODE),
403 struct ebmb_node, node.branches);
404
405 node_bit = node->node.bit;
406 if (node_bit < 0) {
407 /* We have a dup tree now. Either it's for the same
408 * value, and we walk down left, or it's a different
409 * one and we don't have our key.
410 */
411 if (check_bits(x - pos, node->key, pos, node->node.pfx))
412 goto not_found;
413
414 troot = node->node.branches.b[EB_LEFT];
415 while (eb_gettag(troot) != EB_LEAF)
416 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
417 node = container_of(eb_untag(troot, EB_LEAF),
418 struct ebmb_node, node.branches);
419 return node;
420 }
421
422 node_bit >>= 1; /* strip cover bit */
423 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
424 if (node_bit < 0) {
425 /* This uncommon construction gives better performance
426 * because gcc does not try to reorder the loop. Tested to
427 * be fine with 2.95 to 4.2.
428 */
429 while (1) {
430 x++; pos++;
431 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
432 goto not_found; /* more than one full byte is different */
433 node_bit += 8;
434 if (node_bit >= 0)
435 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100436 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200437 }
438
439 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
440 * We have 2 possibilities :
441 * - more than the last bit differs => data does not match
442 * - walk down on side = (x[pos] >> node_bit) & 1
443 */
444 side = *(unsigned char *)x >> node_bit;
445 if (((node->key[pos] >> node_bit) ^ side) > 1)
446 goto not_found;
447
448 if (!(node->node.bit & 1)) {
449 /* This is a cover node, let's keep a reference to it
450 * for later. The covering subtree is on the left, and
451 * the covered subtree is on the right, so we have to
452 * walk down right.
453 */
454 cover = node->node.branches.b[EB_LEFT];
455 troot = node->node.branches.b[EB_RGHT];
456 continue;
457 }
458 side &= 1;
459 troot = node->node.branches.b[side];
460 }
461
462 not_found:
463 /* Walk down last cover tre if it exists. It does not matter if cover is NULL */
464 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
465}
466
467
Joseph Herlant7c16c0e2018-11-13 19:55:57 -0800468/* Find the first occurrence of a prefix matching a key <x> of <pfx> BITS in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100469 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
Willy Tarreau9f791932014-05-10 08:34:01 +0200470 * least as long as the keys in the tree. Note that this can be ensured by
471 * having a byte at the end of <x> which cannot be part of any prefix, typically
472 * the trailing zero for a string. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200473 */
474static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
475{
476 struct ebmb_node *node;
477 eb_troot_t *troot;
478 int pos, side;
479 int node_bit;
480
481 troot = root->b[EB_LEFT];
482 if (unlikely(troot == NULL))
483 return NULL;
484
485 pos = 0;
486 while (1) {
487 if ((eb_gettag(troot) == EB_LEAF)) {
488 node = container_of(eb_untag(troot, EB_LEAF),
489 struct ebmb_node, node.branches);
490 if (node->node.pfx != pfx)
491 return NULL;
492 if (check_bits(x - pos, node->key, pos, node->node.pfx))
493 return NULL;
494 return node;
495 }
496 node = container_of(eb_untag(troot, EB_NODE),
497 struct ebmb_node, node.branches);
498
499 node_bit = node->node.bit;
500 if (node_bit < 0) {
501 /* We have a dup tree now. Either it's for the same
502 * value, and we walk down left, or it's a different
503 * one and we don't have our key.
504 */
505 if (node->node.pfx != pfx)
506 return NULL;
507 if (check_bits(x - pos, node->key, pos, node->node.pfx))
508 return NULL;
509
510 troot = node->node.branches.b[EB_LEFT];
511 while (eb_gettag(troot) != EB_LEAF)
512 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
513 node = container_of(eb_untag(troot, EB_LEAF),
514 struct ebmb_node, node.branches);
515 return node;
516 }
517
518 node_bit >>= 1; /* strip cover bit */
519 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
520 if (node_bit < 0) {
521 /* This uncommon construction gives better performance
522 * because gcc does not try to reorder the loop. Tested to
523 * be fine with 2.95 to 4.2.
524 */
525 while (1) {
526 x++; pos++;
527 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
528 return NULL; /* more than one full byte is different */
529 node_bit += 8;
530 if (node_bit >= 0)
531 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100532 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200533 }
534
535 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
536 * We have 2 possibilities :
537 * - more than the last bit differs => data does not match
538 * - walk down on side = (x[pos] >> node_bit) & 1
539 */
540 side = *(unsigned char *)x >> node_bit;
541 if (((node->key[pos] >> node_bit) ^ side) > 1)
542 return NULL;
543
544 if (!(node->node.bit & 1)) {
545 /* This is a cover node, it may be the entry we're
546 * looking for. We already know that it matches all the
547 * bits, let's compare prefixes and descend the cover
548 * subtree if they match.
549 */
Willy Tarreau22c0a932011-07-25 12:22:44 +0200550 if ((unsigned short)node->node.bit >> 1 == pfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200551 troot = node->node.branches.b[EB_LEFT];
552 else
553 troot = node->node.branches.b[EB_RGHT];
554 continue;
555 }
556 side &= 1;
557 troot = node->node.branches.b[side];
558 }
559}
560
561
562/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
563 * Only new->key and new->pfx need be set with the key and its prefix length.
564 * Note that bits between <pfx> and <len> are theorically ignored and should be
565 * zero, as it is not certain yet that they will always be ignored everywhere
566 * (eg in bit compare functions).
567 * The ebmb_node is returned.
568 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
569 * len is specified in bytes.
570 */
571static forceinline struct ebmb_node *
572__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
573{
574 struct ebmb_node *old;
575 unsigned int side;
576 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200577 eb_troot_t *root_right;
Willy Tarreau3a932442010-05-09 19:29:23 +0200578 int diff;
579 int bit;
580 eb_troot_t *new_left, *new_rght;
581 eb_troot_t *new_leaf;
582 int old_node_bit;
583
584 side = EB_LEFT;
585 troot = root->b[EB_LEFT];
586 root_right = root->b[EB_RGHT];
587 if (unlikely(troot == NULL)) {
588 /* Tree is empty, insert the leaf part below the left branch */
589 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
590 new->node.leaf_p = eb_dotag(root, EB_LEFT);
591 new->node.node_p = NULL; /* node part unused */
592 return new;
593 }
594
595 len <<= 3;
596 if (len > new->node.pfx)
597 len = new->node.pfx;
598
599 /* The tree descent is fairly easy :
600 * - first, check if we have reached a leaf node
601 * - second, check if we have gone too far
602 * - third, reiterate
603 * Everywhere, we use <new> for the node node we are inserting, <root>
604 * for the node we attach it to, and <old> for the node we are
605 * displacing below <new>. <troot> will always point to the future node
606 * (tagged with its type). <side> carries the side the node <new> is
607 * attached to below its parent, which is also where previous node
608 * was attached.
609 */
610
611 bit = 0;
612 while (1) {
613 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
614 /* Insert above a leaf. Note that this leaf could very
615 * well be part of a cover node.
616 */
617 old = container_of(eb_untag(troot, EB_LEAF),
618 struct ebmb_node, node.branches);
619 new->node.node_p = old->node.leaf_p;
620 up_ptr = &old->node.leaf_p;
621 goto check_bit_and_break;
622 }
623
624 /* OK we're walking down this link */
625 old = container_of(eb_untag(troot, EB_NODE),
626 struct ebmb_node, node.branches);
627 old_node_bit = old->node.bit;
628 /* Note that old_node_bit can be :
629 * < 0 : dup tree
630 * = 2N : cover node for N bits
631 * = 2N+1 : normal node at N bits
632 */
633
634 if (unlikely(old_node_bit < 0)) {
635 /* We're above a duplicate tree, so we must compare the whole value */
636 new->node.node_p = old->node.node_p;
637 up_ptr = &old->node.node_p;
638 check_bit_and_break:
639 /* No need to compare everything if the leaves are shorter than the new one. */
640 if (len > old->node.pfx)
641 len = old->node.pfx;
642 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100643 break;
644 }
645
Willy Tarreau3a932442010-05-09 19:29:23 +0200646 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
647
648 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
649 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200650
651 /* we must always check that our prefix is larger than the nodes
652 * we visit, otherwise we have to stop going down. The following
653 * test is able to stop before both normal and cover nodes.
654 */
655 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
656 /* insert cover node here on the left */
657 new->node.node_p = old->node.node_p;
658 up_ptr = &old->node.node_p;
659 new->node.bit = new->node.pfx << 1;
660 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200661 goto insert_above;
662 }
663
664 if (unlikely(bit < old_node_bit)) {
665 /* The tree did not contain the key, so we insert <new> before the
666 * node <old>, and set ->bit to designate the lowest bit position in
667 * <new> which applies to ->branches.b[]. We know that the bit is not
668 * greater than the prefix length thanks to the test above.
669 */
670 new->node.node_p = old->node.node_p;
671 up_ptr = &old->node.node_p;
672 new->node.bit = bit;
673 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200674 goto insert_above;
675 }
676
677 if (!(old_node_bit & 1)) {
678 /* if we encounter a cover node with our exact prefix length, it's
679 * necessarily the same value, so we insert there as a duplicate on
680 * the left. For that, we go down on the left and the leaf detection
681 * code will finish the job.
682 */
683 if ((new->node.pfx << 1) == old_node_bit) {
684 root = &old->node.branches;
685 side = EB_LEFT;
686 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200687 continue;
688 }
689
690 /* cover nodes are always walked through on the right */
691 side = EB_RGHT;
692 bit = old_node_bit >> 1; /* recheck that bit */
693 root = &old->node.branches;
694 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200695 continue;
696 }
697
698 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
699 * However, since we're going down around <old_node_bit>, we know it will be
700 * properly matched, so we can skip this bit.
701 */
702 old_node_bit >>= 1;
703 bit = old_node_bit + 1;
704
Willy Tarreauc2186022009-10-26 19:48:54 +0100705 /* walk down */
706 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200707 side = old_node_bit & 7;
708 side ^= 7;
709 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100710 troot = root->b[side];
711 }
712
Willy Tarreau3a932442010-05-09 19:29:23 +0200713 /* Right here, we have 4 possibilities :
714 * - the tree does not contain any leaf matching the
715 * key, and we have new->key < old->key. We insert
716 * new above old, on the left ;
717 *
718 * - the tree does not contain any leaf matching the
719 * key, and we have new->key > old->key. We insert
720 * new above old, on the right ;
721 *
722 * - the tree does contain the key with the same prefix
723 * length. We add the new key next to it as a first
724 * duplicate (since it was alone).
725 *
726 * The last two cases can easily be partially merged.
727 *
728 * - the tree contains a leaf matching the key, we have
729 * to insert above it as a cover node. The leaf with
730 * the shortest prefix becomes the left subtree and
731 * the leaf with the longest prefix becomes the right
732 * one. The cover node gets the min of both prefixes
733 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100734 */
735
Willy Tarreau3a932442010-05-09 19:29:23 +0200736 /* first we want to ensure that we compare the correct bit, which means
737 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100738 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200739 if (bit > new->node.pfx)
740 bit = new->node.pfx;
741 if (bit > old->node.pfx)
742 bit = old->node.pfx;
743
Willy Tarreau3a932442010-05-09 19:29:23 +0200744 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
745
746 /* if one prefix is included in the second one, we don't compare bits
747 * because they won't necessarily match, we just proceed with a cover
748 * node insertion.
749 */
750 diff = 0;
751 if (bit < old->node.pfx && bit < new->node.pfx)
752 diff = cmp_bits(new->key, old->key, bit);
753
754 if (diff == 0) {
755 /* Both keys match. Either it's a duplicate entry or we have to
756 * put the shortest prefix left and the largest one right below
757 * a new cover node. By default, diff==0 means we'll be inserted
758 * on the right.
759 */
760 new->node.bit--; /* anticipate cover node insertion */
761 if (new->node.pfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200762 new->node.bit = -1; /* mark as new dup tree, just in case */
763
764 if (unlikely(eb_gettag(root_right))) {
765 /* we refuse to duplicate this key if the tree is
766 * tagged as containing only unique keys.
767 */
768 return old;
769 }
770
771 if (eb_gettag(troot) != EB_LEAF) {
772 /* there was already a dup tree below */
773 struct eb_node *ret;
774 ret = eb_insert_dup(&old->node, &new->node);
775 return container_of(ret, struct ebmb_node, node);
776 }
777 /* otherwise fall through to insert first duplicate */
778 }
779 /* otherwise we just rely on the tests below to select the right side */
780 else if (new->node.pfx < old->node.pfx)
781 diff = -1; /* force insertion to left side */
782 }
783
784 insert_above:
785 new_left = eb_dotag(&new->node.branches, EB_LEFT);
786 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
787 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
788
789 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200790 new->node.branches.b[EB_LEFT] = troot;
791 new->node.branches.b[EB_RGHT] = new_leaf;
792 new->node.leaf_p = new_rght;
793 *up_ptr = new_left;
794 }
795 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200796 new->node.branches.b[EB_LEFT] = new_leaf;
797 new->node.branches.b[EB_RGHT] = troot;
798 new->node.leaf_p = new_left;
799 *up_ptr = new_rght;
800 }
801
Willy Tarreauc2186022009-10-26 19:48:54 +0100802 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
803 return new;
804}
805
Willy Tarreau3a932442010-05-09 19:29:23 +0200806
807
Willy Tarreauead63a02009-11-02 14:41:23 +0100808#endif /* _EBMBTREE_H */
809