blob: 6859bc51b863d9a514d53761d5765bb60625094f [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreau414c4b22011-01-04 13:21:06 +01003 * Version 6.0.5
4 * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
30#define EBMB_ROOT EB_ROOT
31#define EBMB_TREE_HEAD EB_TREE_HEAD
32
33/* This structure carries a node, a leaf, and a key. It must start with the
34 * eb_node so that it can be cast into an eb_node. We could also have put some
35 * sort of transparent union here to reduce the indirection level, but the fact
36 * is, the end user is not meant to manipulate internals, so this is pointless.
37 * The 'node.bit' value here works differently from scalar types, as it contains
38 * the number of identical bits between the two branches.
39 */
40struct ebmb_node {
41 struct eb_node node; /* the tree node, must be at the beginning */
42 unsigned char key[0]; /* the key, its size depends on the application */
43};
44
45/*
46 * Exported functions and macros.
47 * Many of them are always inlined because they are extremely small, and
48 * are generally called at most once or twice in a program.
49 */
50
51/* Return leftmost node in the tree, or NULL if none */
52static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
53{
54 return ebmb_entry(eb_first(root), struct ebmb_node, node);
55}
56
57/* Return rightmost node in the tree, or NULL if none */
58static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
59{
60 return ebmb_entry(eb_last(root), struct ebmb_node, node);
61}
62
63/* Return next node in the tree, or NULL if none */
64static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
65{
66 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
67}
68
69/* Return previous node in the tree, or NULL if none */
70static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
71{
72 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
73}
74
75/* Return next node in the tree, skipping duplicates, or NULL if none */
76static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
77{
78 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
79}
80
81/* Return previous node in the tree, skipping duplicates, or NULL if none */
82static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
83{
84 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
85}
86
87/* Delete node from the tree if it was linked in. Mark the node unused. Note
88 * that this function relies on a non-inlined generic function: eb_delete.
89 */
90static forceinline void ebmb_delete(struct ebmb_node *ebmb)
91{
92 eb_delete(&ebmb->node);
93}
94
95/* The following functions are not inlined by default. They are declared
96 * in ebmbtree.c, which simply relies on their inline version.
97 */
98REGPRM3 struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
99REGPRM3 struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreau3a932442010-05-09 19:29:23 +0200100REGPRM2 struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
101REGPRM3 struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
102REGPRM3 struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100103
104/* The following functions are less likely to be used directly, because their
105 * code is larger. The non-inlined version is preferred.
106 */
107
108/* Delete node from the tree if it was linked in. Mark the node unused. */
109static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
110{
111 __eb_delete(&ebmb->node);
112}
113
Willy Tarreau414c4b22011-01-04 13:21:06 +0100114/* Find the first occurence of a key of a least <len> bytes matching <x> in the
115 * tree <root>. The caller is responsible for ensuring that <len> will not exceed
116 * the common parts between the tree's keys and <x>. In case of multiple matches,
117 * the leftmost node is returned. This means that this function can be used to
118 * lookup string keys by prefix if all keys in the tree are zero-terminated. If
119 * no match is found, NULL is returned. Returns first node if <len> is zero.
Willy Tarreauc2186022009-10-26 19:48:54 +0100120 */
121static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
122{
123 struct ebmb_node *node;
124 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200125 int pos, side;
126 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100127
128 troot = root->b[EB_LEFT];
129 if (unlikely(troot == NULL))
130 return NULL;
131
Willy Tarreau414c4b22011-01-04 13:21:06 +0100132 if (unlikely(len == 0))
133 goto walk_down;
134
Willy Tarreau3a932442010-05-09 19:29:23 +0200135 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100136 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200137 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100138 node = container_of(eb_untag(troot, EB_LEAF),
139 struct ebmb_node, node.branches);
Willy Tarreau414c4b22011-01-04 13:21:06 +0100140 if (memcmp(node->key + pos, x, len) != 0)
Willy Tarreauc2186022009-10-26 19:48:54 +0100141 return NULL;
Willy Tarreau3a932442010-05-09 19:29:23 +0200142 else
143 return node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100144 }
145 node = container_of(eb_untag(troot, EB_NODE),
146 struct ebmb_node, node.branches);
147
Willy Tarreau3a932442010-05-09 19:29:23 +0200148 node_bit = node->node.bit;
149 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100150 /* We have a dup tree now. Either it's for the same
151 * value, and we walk down left, or it's a different
152 * one and we don't have our key.
153 */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100154 if (memcmp(node->key + pos, x, len) != 0)
Willy Tarreauc2186022009-10-26 19:48:54 +0100155 return NULL;
Willy Tarreau414c4b22011-01-04 13:21:06 +0100156 walk_left:
Willy Tarreauc2186022009-10-26 19:48:54 +0100157 troot = node->node.branches.b[EB_LEFT];
Willy Tarreau414c4b22011-01-04 13:21:06 +0100158 walk_down:
Willy Tarreauc2186022009-10-26 19:48:54 +0100159 while (eb_gettag(troot) != EB_LEAF)
160 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
161 node = container_of(eb_untag(troot, EB_LEAF),
162 struct ebmb_node, node.branches);
163 return node;
164 }
165
Willy Tarreau3a932442010-05-09 19:29:23 +0200166 /* OK, normal data node, let's walk down. We check if all full
167 * bytes are equal, and we start from the last one we did not
168 * completely check. We stop as soon as we reach the last byte,
169 * because we must decide to go left/right or abort.
170 */
171 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
172 if (node_bit < 0) {
173 /* This surprizing construction gives better performance
174 * because gcc does not try to reorder the loop. Tested to
175 * be fine with 2.95 to 4.2.
176 */
177 while (1) {
Willy Tarreau414c4b22011-01-04 13:21:06 +0100178 if (node->key[pos++] ^ *(unsigned char*)(x++))
Willy Tarreau3a932442010-05-09 19:29:23 +0200179 return NULL; /* more than one full byte is different */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100180 if (--len == 0)
181 goto walk_left; /* return first node if all bytes matched */
Willy Tarreau3a932442010-05-09 19:29:23 +0200182 node_bit += 8;
183 if (node_bit >= 0)
184 break;
185 }
186 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100187
Willy Tarreau3a932442010-05-09 19:29:23 +0200188 /* here we know that only the last byte differs, so node_bit < 8.
189 * We have 2 possibilities :
190 * - more than the last bit differs => return NULL
191 * - walk down on side = (x[pos] >> node_bit) & 1
192 */
193 side = *(unsigned char *)x >> node_bit;
194 if (((node->key[pos] >> node_bit) ^ side) > 1)
195 return NULL;
196 side &= 1;
197 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100198 }
199}
200
201/* Insert ebmb_node <new> into subtree starting at node root <root>.
202 * Only new->key needs be set with the key. The ebmb_node is returned.
203 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
Willy Tarreau414c4b22011-01-04 13:21:06 +0100204 * len is specified in bytes. It is absolutely mandatory that this length
205 * is the same for all keys in the tree. This function cannot be used to
206 * insert strings.
Willy Tarreauc2186022009-10-26 19:48:54 +0100207 */
208static forceinline struct ebmb_node *
209__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
210{
211 struct ebmb_node *old;
212 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200213 eb_troot_t *troot, **up_ptr;
Willy Tarreauc2186022009-10-26 19:48:54 +0100214 eb_troot_t *root_right = root;
215 int diff;
216 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200217 eb_troot_t *new_left, *new_rght;
218 eb_troot_t *new_leaf;
219 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100220
221 side = EB_LEFT;
222 troot = root->b[EB_LEFT];
223 root_right = root->b[EB_RGHT];
224 if (unlikely(troot == NULL)) {
225 /* Tree is empty, insert the leaf part below the left branch */
226 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
227 new->node.leaf_p = eb_dotag(root, EB_LEFT);
228 new->node.node_p = NULL; /* node part unused */
229 return new;
230 }
231
Willy Tarreauc2186022009-10-26 19:48:54 +0100232 /* The tree descent is fairly easy :
233 * - first, check if we have reached a leaf node
234 * - second, check if we have gone too far
235 * - third, reiterate
236 * Everywhere, we use <new> for the node node we are inserting, <root>
237 * for the node we attach it to, and <old> for the node we are
238 * displacing below <new>. <troot> will always point to the future node
239 * (tagged with its type). <side> carries the side the node <new> is
240 * attached to below its parent, which is also where previous node
241 * was attached.
242 */
243
244 bit = 0;
245 while (1) {
246 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200247 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100248 old = container_of(eb_untag(troot, EB_LEAF),
249 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100250 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200251 up_ptr = &old->node.leaf_p;
252 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100253 }
254
255 /* OK we're walking down this link */
256 old = container_of(eb_untag(troot, EB_NODE),
257 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200258 old_node_bit = old->node.bit;
259
260 if (unlikely(old->node.bit < 0)) {
261 /* We're above a duplicate tree, so we must compare the whole value */
262 new->node.node_p = old->node.node_p;
263 up_ptr = &old->node.node_p;
264 check_bit_and_break:
265 bit = equal_bits(new->key, old->key, bit, len << 3);
266 break;
267 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100268
269 /* Stop going down when we don't have common bits anymore. We
270 * also stop in front of a duplicates tree because it means we
271 * have to insert above. Note: we can compare more bits than
272 * the current node's because as long as they are identical, we
273 * know we descend along the correct side.
274 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200275
276 bit = equal_bits(new->key, old->key, bit, old_node_bit);
277 if (unlikely(bit < old_node_bit)) {
278 /* The tree did not contain the key, so we insert <new> before the
279 * node <old>, and set ->bit to designate the lowest bit position in
280 * <new> which applies to ->branches.b[].
281 */
282 new->node.node_p = old->node.node_p;
283 up_ptr = &old->node.node_p;
284 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100285 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200286 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
287 * However, since we're going down around <old_node_bit>, we know it will be
288 * properly matched, so we can skip this bit.
289 */
290 bit = old_node_bit + 1;
291
292 /* walk down */
293 root = &old->node.branches;
294 side = old_node_bit & 7;
295 side ^= 7;
296 side = (new->key[old_node_bit >> 3] >> side) & 1;
297 troot = root->b[side];
298 }
299
300 new_left = eb_dotag(&new->node.branches, EB_LEFT);
301 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
302 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
303
304 /* Note: we can compare more bits than
305 * the current node's because as long as they are identical, we
306 * know we descend along the correct side.
307 */
308 new->node.bit = bit;
309 diff = cmp_bits(new->key, old->key, bit);
310 if (diff == 0) {
311 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100312
Willy Tarreau3a932442010-05-09 19:29:23 +0200313 if (likely(eb_gettag(root_right))) {
314 /* we refuse to duplicate this key if the tree is
315 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100316 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200317 return old;
318 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100319
Willy Tarreau3a932442010-05-09 19:29:23 +0200320 if (eb_gettag(troot) != EB_LEAF) {
321 /* there was already a dup tree below */
322 struct eb_node *ret;
323 ret = eb_insert_dup(&old->node, &new->node);
324 return container_of(ret, struct ebmb_node, node);
325 }
326 /* otherwise fall through */
327 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100328
Willy Tarreau3a932442010-05-09 19:29:23 +0200329 if (diff >= 0) {
330 new->node.branches.b[EB_LEFT] = troot;
331 new->node.branches.b[EB_RGHT] = new_leaf;
332 new->node.leaf_p = new_rght;
333 *up_ptr = new_left;
334 }
335 else if (diff < 0) {
336 new->node.branches.b[EB_LEFT] = new_leaf;
337 new->node.branches.b[EB_RGHT] = troot;
338 new->node.leaf_p = new_left;
339 *up_ptr = new_rght;
340 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100341
Willy Tarreau3a932442010-05-09 19:29:23 +0200342 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
343 * parent is already set to <new>, and the <root>'s branch is still in
344 * <side>. Update the root's leaf till we have it. Note that we can also
345 * find the side by checking the side of new->node.node_p.
346 */
347
348 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
349 return new;
350}
351
352
353/* Find the first occurence of the longest prefix matching a key <x> in the
354 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
355 * least as long as the keys in the tree. If none can be found, return NULL.
356 */
357static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
358{
359 struct ebmb_node *node;
360 eb_troot_t *troot, *cover;
361 int pos, side;
362 int node_bit;
363
364 troot = root->b[EB_LEFT];
365 if (unlikely(troot == NULL))
366 return NULL;
367
368 cover = NULL;
369 pos = 0;
370 while (1) {
371 if ((eb_gettag(troot) == EB_LEAF)) {
372 node = container_of(eb_untag(troot, EB_LEAF),
373 struct ebmb_node, node.branches);
374 if (check_bits(x - pos, node->key, pos, node->node.pfx))
375 goto not_found;
376
377 return node;
378 }
379 node = container_of(eb_untag(troot, EB_NODE),
380 struct ebmb_node, node.branches);
381
382 node_bit = node->node.bit;
383 if (node_bit < 0) {
384 /* We have a dup tree now. Either it's for the same
385 * value, and we walk down left, or it's a different
386 * one and we don't have our key.
387 */
388 if (check_bits(x - pos, node->key, pos, node->node.pfx))
389 goto not_found;
390
391 troot = node->node.branches.b[EB_LEFT];
392 while (eb_gettag(troot) != EB_LEAF)
393 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
394 node = container_of(eb_untag(troot, EB_LEAF),
395 struct ebmb_node, node.branches);
396 return node;
397 }
398
399 node_bit >>= 1; /* strip cover bit */
400 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
401 if (node_bit < 0) {
402 /* This uncommon construction gives better performance
403 * because gcc does not try to reorder the loop. Tested to
404 * be fine with 2.95 to 4.2.
405 */
406 while (1) {
407 x++; pos++;
408 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
409 goto not_found; /* more than one full byte is different */
410 node_bit += 8;
411 if (node_bit >= 0)
412 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100413 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200414 }
415
416 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
417 * We have 2 possibilities :
418 * - more than the last bit differs => data does not match
419 * - walk down on side = (x[pos] >> node_bit) & 1
420 */
421 side = *(unsigned char *)x >> node_bit;
422 if (((node->key[pos] >> node_bit) ^ side) > 1)
423 goto not_found;
424
425 if (!(node->node.bit & 1)) {
426 /* This is a cover node, let's keep a reference to it
427 * for later. The covering subtree is on the left, and
428 * the covered subtree is on the right, so we have to
429 * walk down right.
430 */
431 cover = node->node.branches.b[EB_LEFT];
432 troot = node->node.branches.b[EB_RGHT];
433 continue;
434 }
435 side &= 1;
436 troot = node->node.branches.b[side];
437 }
438
439 not_found:
440 /* Walk down last cover tre if it exists. It does not matter if cover is NULL */
441 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
442}
443
444
445/* Find the first occurence of a prefix matching a key <x> of <pfx> BITS in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100446 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
447 * least as long as the keys in the tree. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200448 */
449static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
450{
451 struct ebmb_node *node;
452 eb_troot_t *troot;
453 int pos, side;
454 int node_bit;
455
456 troot = root->b[EB_LEFT];
457 if (unlikely(troot == NULL))
458 return NULL;
459
460 pos = 0;
461 while (1) {
462 if ((eb_gettag(troot) == EB_LEAF)) {
463 node = container_of(eb_untag(troot, EB_LEAF),
464 struct ebmb_node, node.branches);
465 if (node->node.pfx != pfx)
466 return NULL;
467 if (check_bits(x - pos, node->key, pos, node->node.pfx))
468 return NULL;
469 return node;
470 }
471 node = container_of(eb_untag(troot, EB_NODE),
472 struct ebmb_node, node.branches);
473
474 node_bit = node->node.bit;
475 if (node_bit < 0) {
476 /* We have a dup tree now. Either it's for the same
477 * value, and we walk down left, or it's a different
478 * one and we don't have our key.
479 */
480 if (node->node.pfx != pfx)
481 return NULL;
482 if (check_bits(x - pos, node->key, pos, node->node.pfx))
483 return NULL;
484
485 troot = node->node.branches.b[EB_LEFT];
486 while (eb_gettag(troot) != EB_LEAF)
487 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
488 node = container_of(eb_untag(troot, EB_LEAF),
489 struct ebmb_node, node.branches);
490 return node;
491 }
492
493 node_bit >>= 1; /* strip cover bit */
494 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
495 if (node_bit < 0) {
496 /* This uncommon construction gives better performance
497 * because gcc does not try to reorder the loop. Tested to
498 * be fine with 2.95 to 4.2.
499 */
500 while (1) {
501 x++; pos++;
502 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
503 return NULL; /* more than one full byte is different */
504 node_bit += 8;
505 if (node_bit >= 0)
506 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100507 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200508 }
509
510 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
511 * We have 2 possibilities :
512 * - more than the last bit differs => data does not match
513 * - walk down on side = (x[pos] >> node_bit) & 1
514 */
515 side = *(unsigned char *)x >> node_bit;
516 if (((node->key[pos] >> node_bit) ^ side) > 1)
517 return NULL;
518
519 if (!(node->node.bit & 1)) {
520 /* This is a cover node, it may be the entry we're
521 * looking for. We already know that it matches all the
522 * bits, let's compare prefixes and descend the cover
523 * subtree if they match.
524 */
525 if (node->node.bit >> 1 == pfx)
526 troot = node->node.branches.b[EB_LEFT];
527 else
528 troot = node->node.branches.b[EB_RGHT];
529 continue;
530 }
531 side &= 1;
532 troot = node->node.branches.b[side];
533 }
534}
535
536
537/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
538 * Only new->key and new->pfx need be set with the key and its prefix length.
539 * Note that bits between <pfx> and <len> are theorically ignored and should be
540 * zero, as it is not certain yet that they will always be ignored everywhere
541 * (eg in bit compare functions).
542 * The ebmb_node is returned.
543 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
544 * len is specified in bytes.
545 */
546static forceinline struct ebmb_node *
547__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
548{
549 struct ebmb_node *old;
550 unsigned int side;
551 eb_troot_t *troot, **up_ptr;
552 eb_troot_t *root_right = root;
553 int diff;
554 int bit;
555 eb_troot_t *new_left, *new_rght;
556 eb_troot_t *new_leaf;
557 int old_node_bit;
558
559 side = EB_LEFT;
560 troot = root->b[EB_LEFT];
561 root_right = root->b[EB_RGHT];
562 if (unlikely(troot == NULL)) {
563 /* Tree is empty, insert the leaf part below the left branch */
564 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
565 new->node.leaf_p = eb_dotag(root, EB_LEFT);
566 new->node.node_p = NULL; /* node part unused */
567 return new;
568 }
569
570 len <<= 3;
571 if (len > new->node.pfx)
572 len = new->node.pfx;
573
574 /* The tree descent is fairly easy :
575 * - first, check if we have reached a leaf node
576 * - second, check if we have gone too far
577 * - third, reiterate
578 * Everywhere, we use <new> for the node node we are inserting, <root>
579 * for the node we attach it to, and <old> for the node we are
580 * displacing below <new>. <troot> will always point to the future node
581 * (tagged with its type). <side> carries the side the node <new> is
582 * attached to below its parent, which is also where previous node
583 * was attached.
584 */
585
586 bit = 0;
587 while (1) {
588 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
589 /* Insert above a leaf. Note that this leaf could very
590 * well be part of a cover node.
591 */
592 old = container_of(eb_untag(troot, EB_LEAF),
593 struct ebmb_node, node.branches);
594 new->node.node_p = old->node.leaf_p;
595 up_ptr = &old->node.leaf_p;
596 goto check_bit_and_break;
597 }
598
599 /* OK we're walking down this link */
600 old = container_of(eb_untag(troot, EB_NODE),
601 struct ebmb_node, node.branches);
602 old_node_bit = old->node.bit;
603 /* Note that old_node_bit can be :
604 * < 0 : dup tree
605 * = 2N : cover node for N bits
606 * = 2N+1 : normal node at N bits
607 */
608
609 if (unlikely(old_node_bit < 0)) {
610 /* We're above a duplicate tree, so we must compare the whole value */
611 new->node.node_p = old->node.node_p;
612 up_ptr = &old->node.node_p;
613 check_bit_and_break:
614 /* No need to compare everything if the leaves are shorter than the new one. */
615 if (len > old->node.pfx)
616 len = old->node.pfx;
617 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100618 break;
619 }
620
Willy Tarreau3a932442010-05-09 19:29:23 +0200621 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
622
623 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
624 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200625
626 /* we must always check that our prefix is larger than the nodes
627 * we visit, otherwise we have to stop going down. The following
628 * test is able to stop before both normal and cover nodes.
629 */
630 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
631 /* insert cover node here on the left */
632 new->node.node_p = old->node.node_p;
633 up_ptr = &old->node.node_p;
634 new->node.bit = new->node.pfx << 1;
635 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200636 goto insert_above;
637 }
638
639 if (unlikely(bit < old_node_bit)) {
640 /* The tree did not contain the key, so we insert <new> before the
641 * node <old>, and set ->bit to designate the lowest bit position in
642 * <new> which applies to ->branches.b[]. We know that the bit is not
643 * greater than the prefix length thanks to the test above.
644 */
645 new->node.node_p = old->node.node_p;
646 up_ptr = &old->node.node_p;
647 new->node.bit = bit;
648 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200649 goto insert_above;
650 }
651
652 if (!(old_node_bit & 1)) {
653 /* if we encounter a cover node with our exact prefix length, it's
654 * necessarily the same value, so we insert there as a duplicate on
655 * the left. For that, we go down on the left and the leaf detection
656 * code will finish the job.
657 */
658 if ((new->node.pfx << 1) == old_node_bit) {
659 root = &old->node.branches;
660 side = EB_LEFT;
661 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200662 continue;
663 }
664
665 /* cover nodes are always walked through on the right */
666 side = EB_RGHT;
667 bit = old_node_bit >> 1; /* recheck that bit */
668 root = &old->node.branches;
669 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200670 continue;
671 }
672
673 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
674 * However, since we're going down around <old_node_bit>, we know it will be
675 * properly matched, so we can skip this bit.
676 */
677 old_node_bit >>= 1;
678 bit = old_node_bit + 1;
679
Willy Tarreauc2186022009-10-26 19:48:54 +0100680 /* walk down */
681 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200682 side = old_node_bit & 7;
683 side ^= 7;
684 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100685 troot = root->b[side];
686 }
687
Willy Tarreau3a932442010-05-09 19:29:23 +0200688 /* Right here, we have 4 possibilities :
689 * - the tree does not contain any leaf matching the
690 * key, and we have new->key < old->key. We insert
691 * new above old, on the left ;
692 *
693 * - the tree does not contain any leaf matching the
694 * key, and we have new->key > old->key. We insert
695 * new above old, on the right ;
696 *
697 * - the tree does contain the key with the same prefix
698 * length. We add the new key next to it as a first
699 * duplicate (since it was alone).
700 *
701 * The last two cases can easily be partially merged.
702 *
703 * - the tree contains a leaf matching the key, we have
704 * to insert above it as a cover node. The leaf with
705 * the shortest prefix becomes the left subtree and
706 * the leaf with the longest prefix becomes the right
707 * one. The cover node gets the min of both prefixes
708 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100709 */
710
Willy Tarreau3a932442010-05-09 19:29:23 +0200711 /* first we want to ensure that we compare the correct bit, which means
712 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100713 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200714 if (bit > new->node.pfx)
715 bit = new->node.pfx;
716 if (bit > old->node.pfx)
717 bit = old->node.pfx;
718
Willy Tarreau3a932442010-05-09 19:29:23 +0200719 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
720
721 /* if one prefix is included in the second one, we don't compare bits
722 * because they won't necessarily match, we just proceed with a cover
723 * node insertion.
724 */
725 diff = 0;
726 if (bit < old->node.pfx && bit < new->node.pfx)
727 diff = cmp_bits(new->key, old->key, bit);
728
729 if (diff == 0) {
730 /* Both keys match. Either it's a duplicate entry or we have to
731 * put the shortest prefix left and the largest one right below
732 * a new cover node. By default, diff==0 means we'll be inserted
733 * on the right.
734 */
735 new->node.bit--; /* anticipate cover node insertion */
736 if (new->node.pfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200737 new->node.bit = -1; /* mark as new dup tree, just in case */
738
739 if (unlikely(eb_gettag(root_right))) {
740 /* we refuse to duplicate this key if the tree is
741 * tagged as containing only unique keys.
742 */
743 return old;
744 }
745
746 if (eb_gettag(troot) != EB_LEAF) {
747 /* there was already a dup tree below */
748 struct eb_node *ret;
749 ret = eb_insert_dup(&old->node, &new->node);
750 return container_of(ret, struct ebmb_node, node);
751 }
752 /* otherwise fall through to insert first duplicate */
753 }
754 /* otherwise we just rely on the tests below to select the right side */
755 else if (new->node.pfx < old->node.pfx)
756 diff = -1; /* force insertion to left side */
757 }
758
759 insert_above:
760 new_left = eb_dotag(&new->node.branches, EB_LEFT);
761 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
762 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
763
764 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200765 new->node.branches.b[EB_LEFT] = troot;
766 new->node.branches.b[EB_RGHT] = new_leaf;
767 new->node.leaf_p = new_rght;
768 *up_ptr = new_left;
769 }
770 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200771 new->node.branches.b[EB_LEFT] = new_leaf;
772 new->node.branches.b[EB_RGHT] = troot;
773 new->node.leaf_p = new_left;
774 *up_ptr = new_rght;
775 }
776
Willy Tarreauc2186022009-10-26 19:48:54 +0100777 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
778 return new;
779}
780
Willy Tarreau3a932442010-05-09 19:29:23 +0200781
782
Willy Tarreauead63a02009-11-02 14:41:23 +0100783#endif /* _EBMBTREE_H */
784