blob: 6497bd9cae9ac9130b906da9c2177da1ab3510cc [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreaufdc10182010-05-16 21:13:24 +02003 * Version 6.0.1
Willy Tarreau3a932442010-05-09 19:29:23 +02004 * (C) 2002-2010 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
30#define EBMB_ROOT EB_ROOT
31#define EBMB_TREE_HEAD EB_TREE_HEAD
32
33/* This structure carries a node, a leaf, and a key. It must start with the
34 * eb_node so that it can be cast into an eb_node. We could also have put some
35 * sort of transparent union here to reduce the indirection level, but the fact
36 * is, the end user is not meant to manipulate internals, so this is pointless.
37 * The 'node.bit' value here works differently from scalar types, as it contains
38 * the number of identical bits between the two branches.
39 */
40struct ebmb_node {
41 struct eb_node node; /* the tree node, must be at the beginning */
42 unsigned char key[0]; /* the key, its size depends on the application */
43};
44
45/*
46 * Exported functions and macros.
47 * Many of them are always inlined because they are extremely small, and
48 * are generally called at most once or twice in a program.
49 */
50
51/* Return leftmost node in the tree, or NULL if none */
52static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
53{
54 return ebmb_entry(eb_first(root), struct ebmb_node, node);
55}
56
57/* Return rightmost node in the tree, or NULL if none */
58static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
59{
60 return ebmb_entry(eb_last(root), struct ebmb_node, node);
61}
62
63/* Return next node in the tree, or NULL if none */
64static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
65{
66 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
67}
68
69/* Return previous node in the tree, or NULL if none */
70static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
71{
72 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
73}
74
75/* Return next node in the tree, skipping duplicates, or NULL if none */
76static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
77{
78 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
79}
80
81/* Return previous node in the tree, skipping duplicates, or NULL if none */
82static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
83{
84 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
85}
86
87/* Delete node from the tree if it was linked in. Mark the node unused. Note
88 * that this function relies on a non-inlined generic function: eb_delete.
89 */
90static forceinline void ebmb_delete(struct ebmb_node *ebmb)
91{
92 eb_delete(&ebmb->node);
93}
94
95/* The following functions are not inlined by default. They are declared
96 * in ebmbtree.c, which simply relies on their inline version.
97 */
98REGPRM3 struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
99REGPRM3 struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreau3a932442010-05-09 19:29:23 +0200100REGPRM2 struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
101REGPRM3 struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
102REGPRM3 struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100103
104/* The following functions are less likely to be used directly, because their
105 * code is larger. The non-inlined version is preferred.
106 */
107
108/* Delete node from the tree if it was linked in. Mark the node unused. */
109static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
110{
111 __eb_delete(&ebmb->node);
112}
113
114/* Find the first occurence of a key of <len> bytes in the tree <root>.
115 * If none can be found, return NULL.
116 */
117static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
118{
119 struct ebmb_node *node;
120 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200121 int pos, side;
122 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100123
124 troot = root->b[EB_LEFT];
125 if (unlikely(troot == NULL))
126 return NULL;
127
Willy Tarreau3a932442010-05-09 19:29:23 +0200128 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100129 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200130 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100131 node = container_of(eb_untag(troot, EB_LEAF),
132 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200133 if (memcmp(node->key + pos, x, len - pos) != 0)
Willy Tarreauc2186022009-10-26 19:48:54 +0100134 return NULL;
Willy Tarreau3a932442010-05-09 19:29:23 +0200135 else
136 return node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100137 }
138 node = container_of(eb_untag(troot, EB_NODE),
139 struct ebmb_node, node.branches);
140
Willy Tarreau3a932442010-05-09 19:29:23 +0200141 node_bit = node->node.bit;
142 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100143 /* We have a dup tree now. Either it's for the same
144 * value, and we walk down left, or it's a different
145 * one and we don't have our key.
146 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200147 if (memcmp(node->key + pos, x, len - pos) != 0)
Willy Tarreauc2186022009-10-26 19:48:54 +0100148 return NULL;
149
150 troot = node->node.branches.b[EB_LEFT];
151 while (eb_gettag(troot) != EB_LEAF)
152 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
153 node = container_of(eb_untag(troot, EB_LEAF),
154 struct ebmb_node, node.branches);
155 return node;
156 }
157
Willy Tarreau3a932442010-05-09 19:29:23 +0200158 /* OK, normal data node, let's walk down. We check if all full
159 * bytes are equal, and we start from the last one we did not
160 * completely check. We stop as soon as we reach the last byte,
161 * because we must decide to go left/right or abort.
162 */
163 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
164 if (node_bit < 0) {
165 /* This surprizing construction gives better performance
166 * because gcc does not try to reorder the loop. Tested to
167 * be fine with 2.95 to 4.2.
168 */
169 while (1) {
170 x++; pos++;
171 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
172 return NULL; /* more than one full byte is different */
173 node_bit += 8;
174 if (node_bit >= 0)
175 break;
176 }
177 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100178
Willy Tarreau3a932442010-05-09 19:29:23 +0200179 /* here we know that only the last byte differs, so node_bit < 8.
180 * We have 2 possibilities :
181 * - more than the last bit differs => return NULL
182 * - walk down on side = (x[pos] >> node_bit) & 1
183 */
184 side = *(unsigned char *)x >> node_bit;
185 if (((node->key[pos] >> node_bit) ^ side) > 1)
186 return NULL;
187 side &= 1;
188 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100189 }
190}
191
192/* Insert ebmb_node <new> into subtree starting at node root <root>.
193 * Only new->key needs be set with the key. The ebmb_node is returned.
194 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
195 * len is specified in bytes.
196 */
197static forceinline struct ebmb_node *
198__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
199{
200 struct ebmb_node *old;
201 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200202 eb_troot_t *troot, **up_ptr;
Willy Tarreauc2186022009-10-26 19:48:54 +0100203 eb_troot_t *root_right = root;
204 int diff;
205 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200206 eb_troot_t *new_left, *new_rght;
207 eb_troot_t *new_leaf;
208 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100209
210 side = EB_LEFT;
211 troot = root->b[EB_LEFT];
212 root_right = root->b[EB_RGHT];
213 if (unlikely(troot == NULL)) {
214 /* Tree is empty, insert the leaf part below the left branch */
215 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
216 new->node.leaf_p = eb_dotag(root, EB_LEFT);
217 new->node.node_p = NULL; /* node part unused */
218 return new;
219 }
220
Willy Tarreauc2186022009-10-26 19:48:54 +0100221 /* The tree descent is fairly easy :
222 * - first, check if we have reached a leaf node
223 * - second, check if we have gone too far
224 * - third, reiterate
225 * Everywhere, we use <new> for the node node we are inserting, <root>
226 * for the node we attach it to, and <old> for the node we are
227 * displacing below <new>. <troot> will always point to the future node
228 * (tagged with its type). <side> carries the side the node <new> is
229 * attached to below its parent, which is also where previous node
230 * was attached.
231 */
232
233 bit = 0;
234 while (1) {
235 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200236 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100237 old = container_of(eb_untag(troot, EB_LEAF),
238 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100239 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200240 up_ptr = &old->node.leaf_p;
241 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100242 }
243
244 /* OK we're walking down this link */
245 old = container_of(eb_untag(troot, EB_NODE),
246 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200247 old_node_bit = old->node.bit;
248
249 if (unlikely(old->node.bit < 0)) {
250 /* We're above a duplicate tree, so we must compare the whole value */
251 new->node.node_p = old->node.node_p;
252 up_ptr = &old->node.node_p;
253 check_bit_and_break:
254 bit = equal_bits(new->key, old->key, bit, len << 3);
255 break;
256 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100257
258 /* Stop going down when we don't have common bits anymore. We
259 * also stop in front of a duplicates tree because it means we
260 * have to insert above. Note: we can compare more bits than
261 * the current node's because as long as they are identical, we
262 * know we descend along the correct side.
263 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200264
265 bit = equal_bits(new->key, old->key, bit, old_node_bit);
266 if (unlikely(bit < old_node_bit)) {
267 /* The tree did not contain the key, so we insert <new> before the
268 * node <old>, and set ->bit to designate the lowest bit position in
269 * <new> which applies to ->branches.b[].
270 */
271 new->node.node_p = old->node.node_p;
272 up_ptr = &old->node.node_p;
273 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100274 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200275 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
276 * However, since we're going down around <old_node_bit>, we know it will be
277 * properly matched, so we can skip this bit.
278 */
279 bit = old_node_bit + 1;
280
281 /* walk down */
282 root = &old->node.branches;
283 side = old_node_bit & 7;
284 side ^= 7;
285 side = (new->key[old_node_bit >> 3] >> side) & 1;
286 troot = root->b[side];
287 }
288
289 new_left = eb_dotag(&new->node.branches, EB_LEFT);
290 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
291 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
292
293 /* Note: we can compare more bits than
294 * the current node's because as long as they are identical, we
295 * know we descend along the correct side.
296 */
297 new->node.bit = bit;
298 diff = cmp_bits(new->key, old->key, bit);
299 if (diff == 0) {
300 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100301
Willy Tarreau3a932442010-05-09 19:29:23 +0200302 if (likely(eb_gettag(root_right))) {
303 /* we refuse to duplicate this key if the tree is
304 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100305 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200306 return old;
307 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100308
Willy Tarreau3a932442010-05-09 19:29:23 +0200309 if (eb_gettag(troot) != EB_LEAF) {
310 /* there was already a dup tree below */
311 struct eb_node *ret;
312 ret = eb_insert_dup(&old->node, &new->node);
313 return container_of(ret, struct ebmb_node, node);
314 }
315 /* otherwise fall through */
316 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100317
Willy Tarreau3a932442010-05-09 19:29:23 +0200318 if (diff >= 0) {
319 new->node.branches.b[EB_LEFT] = troot;
320 new->node.branches.b[EB_RGHT] = new_leaf;
321 new->node.leaf_p = new_rght;
322 *up_ptr = new_left;
323 }
324 else if (diff < 0) {
325 new->node.branches.b[EB_LEFT] = new_leaf;
326 new->node.branches.b[EB_RGHT] = troot;
327 new->node.leaf_p = new_left;
328 *up_ptr = new_rght;
329 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100330
Willy Tarreau3a932442010-05-09 19:29:23 +0200331 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
332 * parent is already set to <new>, and the <root>'s branch is still in
333 * <side>. Update the root's leaf till we have it. Note that we can also
334 * find the side by checking the side of new->node.node_p.
335 */
336
337 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
338 return new;
339}
340
341
342/* Find the first occurence of the longest prefix matching a key <x> in the
343 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
344 * least as long as the keys in the tree. If none can be found, return NULL.
345 */
346static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
347{
348 struct ebmb_node *node;
349 eb_troot_t *troot, *cover;
350 int pos, side;
351 int node_bit;
352
353 troot = root->b[EB_LEFT];
354 if (unlikely(troot == NULL))
355 return NULL;
356
357 cover = NULL;
358 pos = 0;
359 while (1) {
360 if ((eb_gettag(troot) == EB_LEAF)) {
361 node = container_of(eb_untag(troot, EB_LEAF),
362 struct ebmb_node, node.branches);
363 if (check_bits(x - pos, node->key, pos, node->node.pfx))
364 goto not_found;
365
366 return node;
367 }
368 node = container_of(eb_untag(troot, EB_NODE),
369 struct ebmb_node, node.branches);
370
371 node_bit = node->node.bit;
372 if (node_bit < 0) {
373 /* We have a dup tree now. Either it's for the same
374 * value, and we walk down left, or it's a different
375 * one and we don't have our key.
376 */
377 if (check_bits(x - pos, node->key, pos, node->node.pfx))
378 goto not_found;
379
380 troot = node->node.branches.b[EB_LEFT];
381 while (eb_gettag(troot) != EB_LEAF)
382 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
383 node = container_of(eb_untag(troot, EB_LEAF),
384 struct ebmb_node, node.branches);
385 return node;
386 }
387
388 node_bit >>= 1; /* strip cover bit */
389 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
390 if (node_bit < 0) {
391 /* This uncommon construction gives better performance
392 * because gcc does not try to reorder the loop. Tested to
393 * be fine with 2.95 to 4.2.
394 */
395 while (1) {
396 x++; pos++;
397 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
398 goto not_found; /* more than one full byte is different */
399 node_bit += 8;
400 if (node_bit >= 0)
401 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100402 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200403 }
404
405 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
406 * We have 2 possibilities :
407 * - more than the last bit differs => data does not match
408 * - walk down on side = (x[pos] >> node_bit) & 1
409 */
410 side = *(unsigned char *)x >> node_bit;
411 if (((node->key[pos] >> node_bit) ^ side) > 1)
412 goto not_found;
413
414 if (!(node->node.bit & 1)) {
415 /* This is a cover node, let's keep a reference to it
416 * for later. The covering subtree is on the left, and
417 * the covered subtree is on the right, so we have to
418 * walk down right.
419 */
420 cover = node->node.branches.b[EB_LEFT];
421 troot = node->node.branches.b[EB_RGHT];
422 continue;
423 }
424 side &= 1;
425 troot = node->node.branches.b[side];
426 }
427
428 not_found:
429 /* Walk down last cover tre if it exists. It does not matter if cover is NULL */
430 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
431}
432
433
434/* Find the first occurence of a prefix matching a key <x> of <pfx> BITS in the
435 * tree <root>. If none can be found, return NULL.
436 */
437static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
438{
439 struct ebmb_node *node;
440 eb_troot_t *troot;
441 int pos, side;
442 int node_bit;
443
444 troot = root->b[EB_LEFT];
445 if (unlikely(troot == NULL))
446 return NULL;
447
448 pos = 0;
449 while (1) {
450 if ((eb_gettag(troot) == EB_LEAF)) {
451 node = container_of(eb_untag(troot, EB_LEAF),
452 struct ebmb_node, node.branches);
453 if (node->node.pfx != pfx)
454 return NULL;
455 if (check_bits(x - pos, node->key, pos, node->node.pfx))
456 return NULL;
457 return node;
458 }
459 node = container_of(eb_untag(troot, EB_NODE),
460 struct ebmb_node, node.branches);
461
462 node_bit = node->node.bit;
463 if (node_bit < 0) {
464 /* We have a dup tree now. Either it's for the same
465 * value, and we walk down left, or it's a different
466 * one and we don't have our key.
467 */
468 if (node->node.pfx != pfx)
469 return NULL;
470 if (check_bits(x - pos, node->key, pos, node->node.pfx))
471 return NULL;
472
473 troot = node->node.branches.b[EB_LEFT];
474 while (eb_gettag(troot) != EB_LEAF)
475 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
476 node = container_of(eb_untag(troot, EB_LEAF),
477 struct ebmb_node, node.branches);
478 return node;
479 }
480
481 node_bit >>= 1; /* strip cover bit */
482 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
483 if (node_bit < 0) {
484 /* This uncommon construction gives better performance
485 * because gcc does not try to reorder the loop. Tested to
486 * be fine with 2.95 to 4.2.
487 */
488 while (1) {
489 x++; pos++;
490 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
491 return NULL; /* more than one full byte is different */
492 node_bit += 8;
493 if (node_bit >= 0)
494 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100495 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200496 }
497
498 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
499 * We have 2 possibilities :
500 * - more than the last bit differs => data does not match
501 * - walk down on side = (x[pos] >> node_bit) & 1
502 */
503 side = *(unsigned char *)x >> node_bit;
504 if (((node->key[pos] >> node_bit) ^ side) > 1)
505 return NULL;
506
507 if (!(node->node.bit & 1)) {
508 /* This is a cover node, it may be the entry we're
509 * looking for. We already know that it matches all the
510 * bits, let's compare prefixes and descend the cover
511 * subtree if they match.
512 */
513 if (node->node.bit >> 1 == pfx)
514 troot = node->node.branches.b[EB_LEFT];
515 else
516 troot = node->node.branches.b[EB_RGHT];
517 continue;
518 }
519 side &= 1;
520 troot = node->node.branches.b[side];
521 }
522}
523
524
525/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
526 * Only new->key and new->pfx need be set with the key and its prefix length.
527 * Note that bits between <pfx> and <len> are theorically ignored and should be
528 * zero, as it is not certain yet that they will always be ignored everywhere
529 * (eg in bit compare functions).
530 * The ebmb_node is returned.
531 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
532 * len is specified in bytes.
533 */
534static forceinline struct ebmb_node *
535__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
536{
537 struct ebmb_node *old;
538 unsigned int side;
539 eb_troot_t *troot, **up_ptr;
540 eb_troot_t *root_right = root;
541 int diff;
542 int bit;
543 eb_troot_t *new_left, *new_rght;
544 eb_troot_t *new_leaf;
545 int old_node_bit;
546
547 side = EB_LEFT;
548 troot = root->b[EB_LEFT];
549 root_right = root->b[EB_RGHT];
550 if (unlikely(troot == NULL)) {
551 /* Tree is empty, insert the leaf part below the left branch */
552 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
553 new->node.leaf_p = eb_dotag(root, EB_LEFT);
554 new->node.node_p = NULL; /* node part unused */
555 return new;
556 }
557
558 len <<= 3;
559 if (len > new->node.pfx)
560 len = new->node.pfx;
561
562 /* The tree descent is fairly easy :
563 * - first, check if we have reached a leaf node
564 * - second, check if we have gone too far
565 * - third, reiterate
566 * Everywhere, we use <new> for the node node we are inserting, <root>
567 * for the node we attach it to, and <old> for the node we are
568 * displacing below <new>. <troot> will always point to the future node
569 * (tagged with its type). <side> carries the side the node <new> is
570 * attached to below its parent, which is also where previous node
571 * was attached.
572 */
573
574 bit = 0;
575 while (1) {
576 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
577 /* Insert above a leaf. Note that this leaf could very
578 * well be part of a cover node.
579 */
580 old = container_of(eb_untag(troot, EB_LEAF),
581 struct ebmb_node, node.branches);
582 new->node.node_p = old->node.leaf_p;
583 up_ptr = &old->node.leaf_p;
584 goto check_bit_and_break;
585 }
586
587 /* OK we're walking down this link */
588 old = container_of(eb_untag(troot, EB_NODE),
589 struct ebmb_node, node.branches);
590 old_node_bit = old->node.bit;
591 /* Note that old_node_bit can be :
592 * < 0 : dup tree
593 * = 2N : cover node for N bits
594 * = 2N+1 : normal node at N bits
595 */
596
597 if (unlikely(old_node_bit < 0)) {
598 /* We're above a duplicate tree, so we must compare the whole value */
599 new->node.node_p = old->node.node_p;
600 up_ptr = &old->node.node_p;
601 check_bit_and_break:
602 /* No need to compare everything if the leaves are shorter than the new one. */
603 if (len > old->node.pfx)
604 len = old->node.pfx;
605 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100606 break;
607 }
608
Willy Tarreau3a932442010-05-09 19:29:23 +0200609 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
610
611 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
612 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200613
614 /* we must always check that our prefix is larger than the nodes
615 * we visit, otherwise we have to stop going down. The following
616 * test is able to stop before both normal and cover nodes.
617 */
618 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
619 /* insert cover node here on the left */
620 new->node.node_p = old->node.node_p;
621 up_ptr = &old->node.node_p;
622 new->node.bit = new->node.pfx << 1;
623 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200624 goto insert_above;
625 }
626
627 if (unlikely(bit < old_node_bit)) {
628 /* The tree did not contain the key, so we insert <new> before the
629 * node <old>, and set ->bit to designate the lowest bit position in
630 * <new> which applies to ->branches.b[]. We know that the bit is not
631 * greater than the prefix length thanks to the test above.
632 */
633 new->node.node_p = old->node.node_p;
634 up_ptr = &old->node.node_p;
635 new->node.bit = bit;
636 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200637 goto insert_above;
638 }
639
640 if (!(old_node_bit & 1)) {
641 /* if we encounter a cover node with our exact prefix length, it's
642 * necessarily the same value, so we insert there as a duplicate on
643 * the left. For that, we go down on the left and the leaf detection
644 * code will finish the job.
645 */
646 if ((new->node.pfx << 1) == old_node_bit) {
647 root = &old->node.branches;
648 side = EB_LEFT;
649 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200650 continue;
651 }
652
653 /* cover nodes are always walked through on the right */
654 side = EB_RGHT;
655 bit = old_node_bit >> 1; /* recheck that bit */
656 root = &old->node.branches;
657 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200658 continue;
659 }
660
661 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
662 * However, since we're going down around <old_node_bit>, we know it will be
663 * properly matched, so we can skip this bit.
664 */
665 old_node_bit >>= 1;
666 bit = old_node_bit + 1;
667
Willy Tarreauc2186022009-10-26 19:48:54 +0100668 /* walk down */
669 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200670 side = old_node_bit & 7;
671 side ^= 7;
672 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100673 troot = root->b[side];
674 }
675
Willy Tarreau3a932442010-05-09 19:29:23 +0200676 /* Right here, we have 4 possibilities :
677 * - the tree does not contain any leaf matching the
678 * key, and we have new->key < old->key. We insert
679 * new above old, on the left ;
680 *
681 * - the tree does not contain any leaf matching the
682 * key, and we have new->key > old->key. We insert
683 * new above old, on the right ;
684 *
685 * - the tree does contain the key with the same prefix
686 * length. We add the new key next to it as a first
687 * duplicate (since it was alone).
688 *
689 * The last two cases can easily be partially merged.
690 *
691 * - the tree contains a leaf matching the key, we have
692 * to insert above it as a cover node. The leaf with
693 * the shortest prefix becomes the left subtree and
694 * the leaf with the longest prefix becomes the right
695 * one. The cover node gets the min of both prefixes
696 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100697 */
698
Willy Tarreau3a932442010-05-09 19:29:23 +0200699 /* first we want to ensure that we compare the correct bit, which means
700 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100701 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200702 if (bit > new->node.pfx)
703 bit = new->node.pfx;
704 if (bit > old->node.pfx)
705 bit = old->node.pfx;
706
Willy Tarreau3a932442010-05-09 19:29:23 +0200707 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
708
709 /* if one prefix is included in the second one, we don't compare bits
710 * because they won't necessarily match, we just proceed with a cover
711 * node insertion.
712 */
713 diff = 0;
714 if (bit < old->node.pfx && bit < new->node.pfx)
715 diff = cmp_bits(new->key, old->key, bit);
716
717 if (diff == 0) {
718 /* Both keys match. Either it's a duplicate entry or we have to
719 * put the shortest prefix left and the largest one right below
720 * a new cover node. By default, diff==0 means we'll be inserted
721 * on the right.
722 */
723 new->node.bit--; /* anticipate cover node insertion */
724 if (new->node.pfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200725 new->node.bit = -1; /* mark as new dup tree, just in case */
726
727 if (unlikely(eb_gettag(root_right))) {
728 /* we refuse to duplicate this key if the tree is
729 * tagged as containing only unique keys.
730 */
731 return old;
732 }
733
734 if (eb_gettag(troot) != EB_LEAF) {
735 /* there was already a dup tree below */
736 struct eb_node *ret;
737 ret = eb_insert_dup(&old->node, &new->node);
738 return container_of(ret, struct ebmb_node, node);
739 }
740 /* otherwise fall through to insert first duplicate */
741 }
742 /* otherwise we just rely on the tests below to select the right side */
743 else if (new->node.pfx < old->node.pfx)
744 diff = -1; /* force insertion to left side */
745 }
746
747 insert_above:
748 new_left = eb_dotag(&new->node.branches, EB_LEFT);
749 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
750 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
751
752 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200753 new->node.branches.b[EB_LEFT] = troot;
754 new->node.branches.b[EB_RGHT] = new_leaf;
755 new->node.leaf_p = new_rght;
756 *up_ptr = new_left;
757 }
758 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200759 new->node.branches.b[EB_LEFT] = new_leaf;
760 new->node.branches.b[EB_RGHT] = troot;
761 new->node.leaf_p = new_left;
762 *up_ptr = new_rght;
763 }
764
Willy Tarreauc2186022009-10-26 19:48:54 +0100765 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
766 return new;
767}
768
Willy Tarreau3a932442010-05-09 19:29:23 +0200769
770
Willy Tarreauead63a02009-11-02 14:41:23 +0100771#endif /* _EBMBTREE_H */
772