blob: 78a17c1a3cf7827446afc6e18fbee3614eec8df9 [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreau3a932442010-05-09 19:29:23 +02003 * Version 6.0
4 * (C) 2002-2010 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
Willy Tarreau3a932442010-05-09 19:29:23 +020021#define dprintf(x,...) do { } while(0)
22
Willy Tarreauead63a02009-11-02 14:41:23 +010023#ifndef _EBMBTREE_H
24#define _EBMBTREE_H
25
Willy Tarreauc2186022009-10-26 19:48:54 +010026#include <string.h>
27#include "ebtree.h"
28
29/* Return the structure of type <type> whose member <member> points to <ptr> */
30#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
31
32#define EBMB_ROOT EB_ROOT
33#define EBMB_TREE_HEAD EB_TREE_HEAD
34
35/* This structure carries a node, a leaf, and a key. It must start with the
36 * eb_node so that it can be cast into an eb_node. We could also have put some
37 * sort of transparent union here to reduce the indirection level, but the fact
38 * is, the end user is not meant to manipulate internals, so this is pointless.
39 * The 'node.bit' value here works differently from scalar types, as it contains
40 * the number of identical bits between the two branches.
41 */
42struct ebmb_node {
43 struct eb_node node; /* the tree node, must be at the beginning */
44 unsigned char key[0]; /* the key, its size depends on the application */
45};
46
47/*
48 * Exported functions and macros.
49 * Many of them are always inlined because they are extremely small, and
50 * are generally called at most once or twice in a program.
51 */
52
53/* Return leftmost node in the tree, or NULL if none */
54static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
55{
56 return ebmb_entry(eb_first(root), struct ebmb_node, node);
57}
58
59/* Return rightmost node in the tree, or NULL if none */
60static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
61{
62 return ebmb_entry(eb_last(root), struct ebmb_node, node);
63}
64
65/* Return next node in the tree, or NULL if none */
66static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
67{
68 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
69}
70
71/* Return previous node in the tree, or NULL if none */
72static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
73{
74 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
75}
76
77/* Return next node in the tree, skipping duplicates, or NULL if none */
78static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
79{
80 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
81}
82
83/* Return previous node in the tree, skipping duplicates, or NULL if none */
84static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
85{
86 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
87}
88
89/* Delete node from the tree if it was linked in. Mark the node unused. Note
90 * that this function relies on a non-inlined generic function: eb_delete.
91 */
92static forceinline void ebmb_delete(struct ebmb_node *ebmb)
93{
94 eb_delete(&ebmb->node);
95}
96
97/* The following functions are not inlined by default. They are declared
98 * in ebmbtree.c, which simply relies on their inline version.
99 */
100REGPRM3 struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
101REGPRM3 struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreau3a932442010-05-09 19:29:23 +0200102REGPRM2 struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
103REGPRM3 struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
104REGPRM3 struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100105
106/* The following functions are less likely to be used directly, because their
107 * code is larger. The non-inlined version is preferred.
108 */
109
110/* Delete node from the tree if it was linked in. Mark the node unused. */
111static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
112{
113 __eb_delete(&ebmb->node);
114}
115
116/* Find the first occurence of a key of <len> bytes in the tree <root>.
117 * If none can be found, return NULL.
118 */
119static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
120{
121 struct ebmb_node *node;
122 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200123 int pos, side;
124 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100125
126 troot = root->b[EB_LEFT];
127 if (unlikely(troot == NULL))
128 return NULL;
129
Willy Tarreau3a932442010-05-09 19:29:23 +0200130 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100131 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200132 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100133 node = container_of(eb_untag(troot, EB_LEAF),
134 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200135 if (memcmp(node->key + pos, x, len - pos) != 0)
Willy Tarreauc2186022009-10-26 19:48:54 +0100136 return NULL;
Willy Tarreau3a932442010-05-09 19:29:23 +0200137 else
138 return node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100139 }
140 node = container_of(eb_untag(troot, EB_NODE),
141 struct ebmb_node, node.branches);
142
Willy Tarreau3a932442010-05-09 19:29:23 +0200143 node_bit = node->node.bit;
144 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100145 /* We have a dup tree now. Either it's for the same
146 * value, and we walk down left, or it's a different
147 * one and we don't have our key.
148 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200149 if (memcmp(node->key + pos, x, len - pos) != 0)
Willy Tarreauc2186022009-10-26 19:48:54 +0100150 return NULL;
151
152 troot = node->node.branches.b[EB_LEFT];
153 while (eb_gettag(troot) != EB_LEAF)
154 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
155 node = container_of(eb_untag(troot, EB_LEAF),
156 struct ebmb_node, node.branches);
157 return node;
158 }
159
Willy Tarreau3a932442010-05-09 19:29:23 +0200160 /* OK, normal data node, let's walk down. We check if all full
161 * bytes are equal, and we start from the last one we did not
162 * completely check. We stop as soon as we reach the last byte,
163 * because we must decide to go left/right or abort.
164 */
165 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
166 if (node_bit < 0) {
167 /* This surprizing construction gives better performance
168 * because gcc does not try to reorder the loop. Tested to
169 * be fine with 2.95 to 4.2.
170 */
171 while (1) {
172 x++; pos++;
173 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
174 return NULL; /* more than one full byte is different */
175 node_bit += 8;
176 if (node_bit >= 0)
177 break;
178 }
179 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100180
Willy Tarreau3a932442010-05-09 19:29:23 +0200181 /* here we know that only the last byte differs, so node_bit < 8.
182 * We have 2 possibilities :
183 * - more than the last bit differs => return NULL
184 * - walk down on side = (x[pos] >> node_bit) & 1
185 */
186 side = *(unsigned char *)x >> node_bit;
187 if (((node->key[pos] >> node_bit) ^ side) > 1)
188 return NULL;
189 side &= 1;
190 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100191 }
192}
193
194/* Insert ebmb_node <new> into subtree starting at node root <root>.
195 * Only new->key needs be set with the key. The ebmb_node is returned.
196 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
197 * len is specified in bytes.
198 */
199static forceinline struct ebmb_node *
200__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
201{
202 struct ebmb_node *old;
203 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200204 eb_troot_t *troot, **up_ptr;
Willy Tarreauc2186022009-10-26 19:48:54 +0100205 eb_troot_t *root_right = root;
206 int diff;
207 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200208 eb_troot_t *new_left, *new_rght;
209 eb_troot_t *new_leaf;
210 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100211
212 side = EB_LEFT;
213 troot = root->b[EB_LEFT];
214 root_right = root->b[EB_RGHT];
215 if (unlikely(troot == NULL)) {
216 /* Tree is empty, insert the leaf part below the left branch */
217 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
218 new->node.leaf_p = eb_dotag(root, EB_LEFT);
219 new->node.node_p = NULL; /* node part unused */
220 return new;
221 }
222
Willy Tarreauc2186022009-10-26 19:48:54 +0100223 /* The tree descent is fairly easy :
224 * - first, check if we have reached a leaf node
225 * - second, check if we have gone too far
226 * - third, reiterate
227 * Everywhere, we use <new> for the node node we are inserting, <root>
228 * for the node we attach it to, and <old> for the node we are
229 * displacing below <new>. <troot> will always point to the future node
230 * (tagged with its type). <side> carries the side the node <new> is
231 * attached to below its parent, which is also where previous node
232 * was attached.
233 */
234
235 bit = 0;
236 while (1) {
237 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200238 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100239 old = container_of(eb_untag(troot, EB_LEAF),
240 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100241 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200242 up_ptr = &old->node.leaf_p;
243 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100244 }
245
246 /* OK we're walking down this link */
247 old = container_of(eb_untag(troot, EB_NODE),
248 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200249 old_node_bit = old->node.bit;
250
251 if (unlikely(old->node.bit < 0)) {
252 /* We're above a duplicate tree, so we must compare the whole value */
253 new->node.node_p = old->node.node_p;
254 up_ptr = &old->node.node_p;
255 check_bit_and_break:
256 bit = equal_bits(new->key, old->key, bit, len << 3);
257 break;
258 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100259
260 /* Stop going down when we don't have common bits anymore. We
261 * also stop in front of a duplicates tree because it means we
262 * have to insert above. Note: we can compare more bits than
263 * the current node's because as long as they are identical, we
264 * know we descend along the correct side.
265 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200266
267 bit = equal_bits(new->key, old->key, bit, old_node_bit);
268 if (unlikely(bit < old_node_bit)) {
269 /* The tree did not contain the key, so we insert <new> before the
270 * node <old>, and set ->bit to designate the lowest bit position in
271 * <new> which applies to ->branches.b[].
272 */
273 new->node.node_p = old->node.node_p;
274 up_ptr = &old->node.node_p;
275 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100276 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200277 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
278 * However, since we're going down around <old_node_bit>, we know it will be
279 * properly matched, so we can skip this bit.
280 */
281 bit = old_node_bit + 1;
282
283 /* walk down */
284 root = &old->node.branches;
285 side = old_node_bit & 7;
286 side ^= 7;
287 side = (new->key[old_node_bit >> 3] >> side) & 1;
288 troot = root->b[side];
289 }
290
291 new_left = eb_dotag(&new->node.branches, EB_LEFT);
292 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
293 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
294
295 /* Note: we can compare more bits than
296 * the current node's because as long as they are identical, we
297 * know we descend along the correct side.
298 */
299 new->node.bit = bit;
300 diff = cmp_bits(new->key, old->key, bit);
301 if (diff == 0) {
302 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100303
Willy Tarreau3a932442010-05-09 19:29:23 +0200304 if (likely(eb_gettag(root_right))) {
305 /* we refuse to duplicate this key if the tree is
306 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100307 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200308 return old;
309 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100310
Willy Tarreau3a932442010-05-09 19:29:23 +0200311 if (eb_gettag(troot) != EB_LEAF) {
312 /* there was already a dup tree below */
313 struct eb_node *ret;
314 ret = eb_insert_dup(&old->node, &new->node);
315 return container_of(ret, struct ebmb_node, node);
316 }
317 /* otherwise fall through */
318 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100319
Willy Tarreau3a932442010-05-09 19:29:23 +0200320 if (diff >= 0) {
321 new->node.branches.b[EB_LEFT] = troot;
322 new->node.branches.b[EB_RGHT] = new_leaf;
323 new->node.leaf_p = new_rght;
324 *up_ptr = new_left;
325 }
326 else if (diff < 0) {
327 new->node.branches.b[EB_LEFT] = new_leaf;
328 new->node.branches.b[EB_RGHT] = troot;
329 new->node.leaf_p = new_left;
330 *up_ptr = new_rght;
331 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100332
Willy Tarreau3a932442010-05-09 19:29:23 +0200333 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
334 * parent is already set to <new>, and the <root>'s branch is still in
335 * <side>. Update the root's leaf till we have it. Note that we can also
336 * find the side by checking the side of new->node.node_p.
337 */
338
339 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
340 return new;
341}
342
343
344/* Find the first occurence of the longest prefix matching a key <x> in the
345 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
346 * least as long as the keys in the tree. If none can be found, return NULL.
347 */
348static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
349{
350 struct ebmb_node *node;
351 eb_troot_t *troot, *cover;
352 int pos, side;
353 int node_bit;
354
355 troot = root->b[EB_LEFT];
356 if (unlikely(troot == NULL))
357 return NULL;
358
359 cover = NULL;
360 pos = 0;
361 while (1) {
362 if ((eb_gettag(troot) == EB_LEAF)) {
363 node = container_of(eb_untag(troot, EB_LEAF),
364 struct ebmb_node, node.branches);
365 if (check_bits(x - pos, node->key, pos, node->node.pfx))
366 goto not_found;
367
368 return node;
369 }
370 node = container_of(eb_untag(troot, EB_NODE),
371 struct ebmb_node, node.branches);
372
373 node_bit = node->node.bit;
374 if (node_bit < 0) {
375 /* We have a dup tree now. Either it's for the same
376 * value, and we walk down left, or it's a different
377 * one and we don't have our key.
378 */
379 if (check_bits(x - pos, node->key, pos, node->node.pfx))
380 goto not_found;
381
382 troot = node->node.branches.b[EB_LEFT];
383 while (eb_gettag(troot) != EB_LEAF)
384 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
385 node = container_of(eb_untag(troot, EB_LEAF),
386 struct ebmb_node, node.branches);
387 return node;
388 }
389
390 node_bit >>= 1; /* strip cover bit */
391 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
392 if (node_bit < 0) {
393 /* This uncommon construction gives better performance
394 * because gcc does not try to reorder the loop. Tested to
395 * be fine with 2.95 to 4.2.
396 */
397 while (1) {
398 x++; pos++;
399 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
400 goto not_found; /* more than one full byte is different */
401 node_bit += 8;
402 if (node_bit >= 0)
403 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100404 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200405 }
406
407 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
408 * We have 2 possibilities :
409 * - more than the last bit differs => data does not match
410 * - walk down on side = (x[pos] >> node_bit) & 1
411 */
412 side = *(unsigned char *)x >> node_bit;
413 if (((node->key[pos] >> node_bit) ^ side) > 1)
414 goto not_found;
415
416 if (!(node->node.bit & 1)) {
417 /* This is a cover node, let's keep a reference to it
418 * for later. The covering subtree is on the left, and
419 * the covered subtree is on the right, so we have to
420 * walk down right.
421 */
422 cover = node->node.branches.b[EB_LEFT];
423 troot = node->node.branches.b[EB_RGHT];
424 continue;
425 }
426 side &= 1;
427 troot = node->node.branches.b[side];
428 }
429
430 not_found:
431 /* Walk down last cover tre if it exists. It does not matter if cover is NULL */
432 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
433}
434
435
436/* Find the first occurence of a prefix matching a key <x> of <pfx> BITS in the
437 * tree <root>. If none can be found, return NULL.
438 */
439static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
440{
441 struct ebmb_node *node;
442 eb_troot_t *troot;
443 int pos, side;
444 int node_bit;
445
446 troot = root->b[EB_LEFT];
447 if (unlikely(troot == NULL))
448 return NULL;
449
450 pos = 0;
451 while (1) {
452 if ((eb_gettag(troot) == EB_LEAF)) {
453 node = container_of(eb_untag(troot, EB_LEAF),
454 struct ebmb_node, node.branches);
455 if (node->node.pfx != pfx)
456 return NULL;
457 if (check_bits(x - pos, node->key, pos, node->node.pfx))
458 return NULL;
459 return node;
460 }
461 node = container_of(eb_untag(troot, EB_NODE),
462 struct ebmb_node, node.branches);
463
464 node_bit = node->node.bit;
465 if (node_bit < 0) {
466 /* We have a dup tree now. Either it's for the same
467 * value, and we walk down left, or it's a different
468 * one and we don't have our key.
469 */
470 if (node->node.pfx != pfx)
471 return NULL;
472 if (check_bits(x - pos, node->key, pos, node->node.pfx))
473 return NULL;
474
475 troot = node->node.branches.b[EB_LEFT];
476 while (eb_gettag(troot) != EB_LEAF)
477 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
478 node = container_of(eb_untag(troot, EB_LEAF),
479 struct ebmb_node, node.branches);
480 return node;
481 }
482
483 node_bit >>= 1; /* strip cover bit */
484 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
485 if (node_bit < 0) {
486 /* This uncommon construction gives better performance
487 * because gcc does not try to reorder the loop. Tested to
488 * be fine with 2.95 to 4.2.
489 */
490 while (1) {
491 x++; pos++;
492 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
493 return NULL; /* more than one full byte is different */
494 node_bit += 8;
495 if (node_bit >= 0)
496 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100497 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200498 }
499
500 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
501 * We have 2 possibilities :
502 * - more than the last bit differs => data does not match
503 * - walk down on side = (x[pos] >> node_bit) & 1
504 */
505 side = *(unsigned char *)x >> node_bit;
506 if (((node->key[pos] >> node_bit) ^ side) > 1)
507 return NULL;
508
509 if (!(node->node.bit & 1)) {
510 /* This is a cover node, it may be the entry we're
511 * looking for. We already know that it matches all the
512 * bits, let's compare prefixes and descend the cover
513 * subtree if they match.
514 */
515 if (node->node.bit >> 1 == pfx)
516 troot = node->node.branches.b[EB_LEFT];
517 else
518 troot = node->node.branches.b[EB_RGHT];
519 continue;
520 }
521 side &= 1;
522 troot = node->node.branches.b[side];
523 }
524}
525
526
527/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
528 * Only new->key and new->pfx need be set with the key and its prefix length.
529 * Note that bits between <pfx> and <len> are theorically ignored and should be
530 * zero, as it is not certain yet that they will always be ignored everywhere
531 * (eg in bit compare functions).
532 * The ebmb_node is returned.
533 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
534 * len is specified in bytes.
535 */
536static forceinline struct ebmb_node *
537__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
538{
539 struct ebmb_node *old;
540 unsigned int side;
541 eb_troot_t *troot, **up_ptr;
542 eb_troot_t *root_right = root;
543 int diff;
544 int bit;
545 eb_troot_t *new_left, *new_rght;
546 eb_troot_t *new_leaf;
547 int old_node_bit;
548
549 side = EB_LEFT;
550 troot = root->b[EB_LEFT];
551 root_right = root->b[EB_RGHT];
552 if (unlikely(troot == NULL)) {
553 /* Tree is empty, insert the leaf part below the left branch */
554 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
555 new->node.leaf_p = eb_dotag(root, EB_LEFT);
556 new->node.node_p = NULL; /* node part unused */
557 return new;
558 }
559
560 len <<= 3;
561 if (len > new->node.pfx)
562 len = new->node.pfx;
563
564 /* The tree descent is fairly easy :
565 * - first, check if we have reached a leaf node
566 * - second, check if we have gone too far
567 * - third, reiterate
568 * Everywhere, we use <new> for the node node we are inserting, <root>
569 * for the node we attach it to, and <old> for the node we are
570 * displacing below <new>. <troot> will always point to the future node
571 * (tagged with its type). <side> carries the side the node <new> is
572 * attached to below its parent, which is also where previous node
573 * was attached.
574 */
575
576 bit = 0;
577 while (1) {
578 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
579 /* Insert above a leaf. Note that this leaf could very
580 * well be part of a cover node.
581 */
582 old = container_of(eb_untag(troot, EB_LEAF),
583 struct ebmb_node, node.branches);
584 new->node.node_p = old->node.leaf_p;
585 up_ptr = &old->node.leaf_p;
586 goto check_bit_and_break;
587 }
588
589 /* OK we're walking down this link */
590 old = container_of(eb_untag(troot, EB_NODE),
591 struct ebmb_node, node.branches);
592 old_node_bit = old->node.bit;
593 /* Note that old_node_bit can be :
594 * < 0 : dup tree
595 * = 2N : cover node for N bits
596 * = 2N+1 : normal node at N bits
597 */
598
599 if (unlikely(old_node_bit < 0)) {
600 /* We're above a duplicate tree, so we must compare the whole value */
601 new->node.node_p = old->node.node_p;
602 up_ptr = &old->node.node_p;
603 check_bit_and_break:
604 /* No need to compare everything if the leaves are shorter than the new one. */
605 if (len > old->node.pfx)
606 len = old->node.pfx;
607 bit = equal_bits(new->key, old->key, bit, len);
608 dprintf(" [new=%p, old=%p] obit=%d, eqbit=%d\n", new, old, old->node.bit, bit);
Willy Tarreauc2186022009-10-26 19:48:54 +0100609 break;
610 }
611
Willy Tarreau3a932442010-05-09 19:29:23 +0200612 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
613
614 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
615 bit = (bit << 1) + 1; // assume comparisons with normal nodes
616 dprintf(" [old=%p, new=%p] bit=%d/2, old_bit=%d/2\n", old, new, bit, old_node_bit);
617
618 /* we must always check that our prefix is larger than the nodes
619 * we visit, otherwise we have to stop going down. The following
620 * test is able to stop before both normal and cover nodes.
621 */
622 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
623 /* insert cover node here on the left */
624 new->node.node_p = old->node.node_p;
625 up_ptr = &old->node.node_p;
626 new->node.bit = new->node.pfx << 1;
627 diff = -1;
628 dprintf(" [new=%p, old=%p] obit=%d, nbit=%d (1)\n", new, old, old->node.bit, new->node.bit);
629 goto insert_above;
630 }
631
632 if (unlikely(bit < old_node_bit)) {
633 /* The tree did not contain the key, so we insert <new> before the
634 * node <old>, and set ->bit to designate the lowest bit position in
635 * <new> which applies to ->branches.b[]. We know that the bit is not
636 * greater than the prefix length thanks to the test above.
637 */
638 new->node.node_p = old->node.node_p;
639 up_ptr = &old->node.node_p;
640 new->node.bit = bit;
641 diff = cmp_bits(new->key, old->key, bit >> 1);
642 dprintf(" --> diff=%d, node.bit=%d/2\n", diff, new->node.bit);
643 goto insert_above;
644 }
645
646 if (!(old_node_bit & 1)) {
647 /* if we encounter a cover node with our exact prefix length, it's
648 * necessarily the same value, so we insert there as a duplicate on
649 * the left. For that, we go down on the left and the leaf detection
650 * code will finish the job.
651 */
652 if ((new->node.pfx << 1) == old_node_bit) {
653 root = &old->node.branches;
654 side = EB_LEFT;
655 troot = root->b[side];
656 dprintf(" --> going down cover by left\n");
657 continue;
658 }
659
660 /* cover nodes are always walked through on the right */
661 side = EB_RGHT;
662 bit = old_node_bit >> 1; /* recheck that bit */
663 root = &old->node.branches;
664 troot = root->b[side];
665 dprintf(" --> going down cover by right\n");
666 continue;
667 }
668
669 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
670 * However, since we're going down around <old_node_bit>, we know it will be
671 * properly matched, so we can skip this bit.
672 */
673 old_node_bit >>= 1;
674 bit = old_node_bit + 1;
675
Willy Tarreauc2186022009-10-26 19:48:54 +0100676 /* walk down */
677 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200678 side = old_node_bit & 7;
679 side ^= 7;
680 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100681 troot = root->b[side];
682 }
683
Willy Tarreau3a932442010-05-09 19:29:23 +0200684 /* Right here, we have 4 possibilities :
685 * - the tree does not contain any leaf matching the
686 * key, and we have new->key < old->key. We insert
687 * new above old, on the left ;
688 *
689 * - the tree does not contain any leaf matching the
690 * key, and we have new->key > old->key. We insert
691 * new above old, on the right ;
692 *
693 * - the tree does contain the key with the same prefix
694 * length. We add the new key next to it as a first
695 * duplicate (since it was alone).
696 *
697 * The last two cases can easily be partially merged.
698 *
699 * - the tree contains a leaf matching the key, we have
700 * to insert above it as a cover node. The leaf with
701 * the shortest prefix becomes the left subtree and
702 * the leaf with the longest prefix becomes the right
703 * one. The cover node gets the min of both prefixes
704 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100705 */
706
Willy Tarreau3a932442010-05-09 19:29:23 +0200707 /* first we want to ensure that we compare the correct bit, which means
708 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100709 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200710 if (bit > new->node.pfx)
711 bit = new->node.pfx;
712 if (bit > old->node.pfx)
713 bit = old->node.pfx;
714
715 dprintf(" [old=%p, new=%p] bit2=%d\n", old, new, bit);
716 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
717
718 /* if one prefix is included in the second one, we don't compare bits
719 * because they won't necessarily match, we just proceed with a cover
720 * node insertion.
721 */
722 diff = 0;
723 if (bit < old->node.pfx && bit < new->node.pfx)
724 diff = cmp_bits(new->key, old->key, bit);
725
726 if (diff == 0) {
727 /* Both keys match. Either it's a duplicate entry or we have to
728 * put the shortest prefix left and the largest one right below
729 * a new cover node. By default, diff==0 means we'll be inserted
730 * on the right.
731 */
732 new->node.bit--; /* anticipate cover node insertion */
733 if (new->node.pfx == old->node.pfx) {
734 dprintf(" [inserting dup %p->%p]\n", old, new);
735 new->node.bit = -1; /* mark as new dup tree, just in case */
736
737 if (unlikely(eb_gettag(root_right))) {
738 /* we refuse to duplicate this key if the tree is
739 * tagged as containing only unique keys.
740 */
741 return old;
742 }
743
744 if (eb_gettag(troot) != EB_LEAF) {
745 /* there was already a dup tree below */
746 struct eb_node *ret;
747 ret = eb_insert_dup(&old->node, &new->node);
748 return container_of(ret, struct ebmb_node, node);
749 }
750 /* otherwise fall through to insert first duplicate */
751 }
752 /* otherwise we just rely on the tests below to select the right side */
753 else if (new->node.pfx < old->node.pfx)
754 diff = -1; /* force insertion to left side */
755 }
756
757 insert_above:
758 new_left = eb_dotag(&new->node.branches, EB_LEFT);
759 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
760 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
761
762 if (diff >= 0) {
763 dprintf(" [old=%p, new=%p] inserting right, obit=%d/2, nbit=%d/2\n", old, new, old->node.bit, new->node.bit);
764 new->node.branches.b[EB_LEFT] = troot;
765 new->node.branches.b[EB_RGHT] = new_leaf;
766 new->node.leaf_p = new_rght;
767 *up_ptr = new_left;
768 }
769 else {
770 dprintf(" [old=%p, new=%p] inserting left, obit=%d/2, nbit=%d/2\n", old, new, old->node.bit, new->node.bit);
771 new->node.branches.b[EB_LEFT] = new_leaf;
772 new->node.branches.b[EB_RGHT] = troot;
773 new->node.leaf_p = new_left;
774 *up_ptr = new_rght;
775 }
776
Willy Tarreauc2186022009-10-26 19:48:54 +0100777 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
778 return new;
779}
780
Willy Tarreau3a932442010-05-09 19:29:23 +0200781
782
Willy Tarreauead63a02009-11-02 14:41:23 +0100783#endif /* _EBMBTREE_H */
784