blob: fa25ac58aa2bb7673e4e54b220c252d06f7207c0 [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreauf3bfede2011-07-25 11:38:17 +02003 * Version 6.0.6
Willy Tarreau414c4b22011-01-04 13:21:06 +01004 * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +02006 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation, version 2.1
9 * exclusively.
Willy Tarreauc2186022009-10-26 19:48:54 +010010 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020011 * This library is distributed in the hope that it will be useful,
Willy Tarreauc2186022009-10-26 19:48:54 +010012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Willy Tarreauf3bfede2011-07-25 11:38:17 +020013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
Willy Tarreauc2186022009-10-26 19:48:54 +010015 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020016 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Willy Tarreauc2186022009-10-26 19:48:54 +010019 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
30#define EBMB_ROOT EB_ROOT
31#define EBMB_TREE_HEAD EB_TREE_HEAD
32
33/* This structure carries a node, a leaf, and a key. It must start with the
34 * eb_node so that it can be cast into an eb_node. We could also have put some
35 * sort of transparent union here to reduce the indirection level, but the fact
36 * is, the end user is not meant to manipulate internals, so this is pointless.
37 * The 'node.bit' value here works differently from scalar types, as it contains
38 * the number of identical bits between the two branches.
39 */
40struct ebmb_node {
41 struct eb_node node; /* the tree node, must be at the beginning */
42 unsigned char key[0]; /* the key, its size depends on the application */
43};
44
45/*
46 * Exported functions and macros.
47 * Many of them are always inlined because they are extremely small, and
48 * are generally called at most once or twice in a program.
49 */
50
51/* Return leftmost node in the tree, or NULL if none */
52static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
53{
54 return ebmb_entry(eb_first(root), struct ebmb_node, node);
55}
56
57/* Return rightmost node in the tree, or NULL if none */
58static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
59{
60 return ebmb_entry(eb_last(root), struct ebmb_node, node);
61}
62
63/* Return next node in the tree, or NULL if none */
64static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
65{
66 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
67}
68
69/* Return previous node in the tree, or NULL if none */
70static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
71{
72 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
73}
74
75/* Return next node in the tree, skipping duplicates, or NULL if none */
76static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
77{
78 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
79}
80
81/* Return previous node in the tree, skipping duplicates, or NULL if none */
82static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
83{
84 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
85}
86
87/* Delete node from the tree if it was linked in. Mark the node unused. Note
88 * that this function relies on a non-inlined generic function: eb_delete.
89 */
90static forceinline void ebmb_delete(struct ebmb_node *ebmb)
91{
92 eb_delete(&ebmb->node);
93}
94
95/* The following functions are not inlined by default. They are declared
96 * in ebmbtree.c, which simply relies on their inline version.
97 */
98REGPRM3 struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
99REGPRM3 struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreau3a932442010-05-09 19:29:23 +0200100REGPRM2 struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
101REGPRM3 struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
102REGPRM3 struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100103
104/* The following functions are less likely to be used directly, because their
105 * code is larger. The non-inlined version is preferred.
106 */
107
108/* Delete node from the tree if it was linked in. Mark the node unused. */
109static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
110{
111 __eb_delete(&ebmb->node);
112}
113
Willy Tarreau414c4b22011-01-04 13:21:06 +0100114/* Find the first occurence of a key of a least <len> bytes matching <x> in the
115 * tree <root>. The caller is responsible for ensuring that <len> will not exceed
116 * the common parts between the tree's keys and <x>. In case of multiple matches,
117 * the leftmost node is returned. This means that this function can be used to
118 * lookup string keys by prefix if all keys in the tree are zero-terminated. If
119 * no match is found, NULL is returned. Returns first node if <len> is zero.
Willy Tarreauc2186022009-10-26 19:48:54 +0100120 */
121static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
122{
123 struct ebmb_node *node;
124 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200125 int pos, side;
126 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100127
128 troot = root->b[EB_LEFT];
129 if (unlikely(troot == NULL))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100130 goto ret_null;
Willy Tarreauc2186022009-10-26 19:48:54 +0100131
Willy Tarreau414c4b22011-01-04 13:21:06 +0100132 if (unlikely(len == 0))
133 goto walk_down;
134
Willy Tarreau3a932442010-05-09 19:29:23 +0200135 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100136 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200137 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100138 node = container_of(eb_untag(troot, EB_LEAF),
139 struct ebmb_node, node.branches);
Willy Tarreau414c4b22011-01-04 13:21:06 +0100140 if (memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100141 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200142 else
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100143 goto ret_node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100144 }
145 node = container_of(eb_untag(troot, EB_NODE),
146 struct ebmb_node, node.branches);
147
Willy Tarreau3a932442010-05-09 19:29:23 +0200148 node_bit = node->node.bit;
149 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100150 /* We have a dup tree now. Either it's for the same
151 * value, and we walk down left, or it's a different
152 * one and we don't have our key.
153 */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100154 if (memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100155 goto ret_null;
156 else
157 goto walk_left;
Willy Tarreauc2186022009-10-26 19:48:54 +0100158 }
159
Willy Tarreau3a932442010-05-09 19:29:23 +0200160 /* OK, normal data node, let's walk down. We check if all full
161 * bytes are equal, and we start from the last one we did not
162 * completely check. We stop as soon as we reach the last byte,
163 * because we must decide to go left/right or abort.
164 */
165 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
166 if (node_bit < 0) {
167 /* This surprizing construction gives better performance
168 * because gcc does not try to reorder the loop. Tested to
169 * be fine with 2.95 to 4.2.
170 */
171 while (1) {
Willy Tarreau414c4b22011-01-04 13:21:06 +0100172 if (node->key[pos++] ^ *(unsigned char*)(x++))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100173 goto ret_null; /* more than one full byte is different */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100174 if (--len == 0)
175 goto walk_left; /* return first node if all bytes matched */
Willy Tarreau3a932442010-05-09 19:29:23 +0200176 node_bit += 8;
177 if (node_bit >= 0)
178 break;
179 }
180 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100181
Willy Tarreau3a932442010-05-09 19:29:23 +0200182 /* here we know that only the last byte differs, so node_bit < 8.
183 * We have 2 possibilities :
184 * - more than the last bit differs => return NULL
185 * - walk down on side = (x[pos] >> node_bit) & 1
186 */
187 side = *(unsigned char *)x >> node_bit;
188 if (((node->key[pos] >> node_bit) ^ side) > 1)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100189 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200190 side &= 1;
191 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100192 }
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100193 walk_left:
194 troot = node->node.branches.b[EB_LEFT];
195 walk_down:
196 while (eb_gettag(troot) != EB_LEAF)
197 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
198 node = container_of(eb_untag(troot, EB_LEAF),
199 struct ebmb_node, node.branches);
200 ret_node:
201 return node;
202 ret_null:
203 return NULL;
Willy Tarreauc2186022009-10-26 19:48:54 +0100204}
205
206/* Insert ebmb_node <new> into subtree starting at node root <root>.
207 * Only new->key needs be set with the key. The ebmb_node is returned.
208 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
Willy Tarreau414c4b22011-01-04 13:21:06 +0100209 * len is specified in bytes. It is absolutely mandatory that this length
210 * is the same for all keys in the tree. This function cannot be used to
211 * insert strings.
Willy Tarreauc2186022009-10-26 19:48:54 +0100212 */
213static forceinline struct ebmb_node *
214__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
215{
216 struct ebmb_node *old;
217 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200218 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200219 eb_troot_t *root_right;
Willy Tarreauc2186022009-10-26 19:48:54 +0100220 int diff;
221 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200222 eb_troot_t *new_left, *new_rght;
223 eb_troot_t *new_leaf;
224 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100225
226 side = EB_LEFT;
227 troot = root->b[EB_LEFT];
228 root_right = root->b[EB_RGHT];
229 if (unlikely(troot == NULL)) {
230 /* Tree is empty, insert the leaf part below the left branch */
231 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
232 new->node.leaf_p = eb_dotag(root, EB_LEFT);
233 new->node.node_p = NULL; /* node part unused */
234 return new;
235 }
236
Willy Tarreauc2186022009-10-26 19:48:54 +0100237 /* The tree descent is fairly easy :
238 * - first, check if we have reached a leaf node
239 * - second, check if we have gone too far
240 * - third, reiterate
241 * Everywhere, we use <new> for the node node we are inserting, <root>
242 * for the node we attach it to, and <old> for the node we are
243 * displacing below <new>. <troot> will always point to the future node
244 * (tagged with its type). <side> carries the side the node <new> is
245 * attached to below its parent, which is also where previous node
246 * was attached.
247 */
248
249 bit = 0;
250 while (1) {
251 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200252 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100253 old = container_of(eb_untag(troot, EB_LEAF),
254 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100255 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200256 up_ptr = &old->node.leaf_p;
257 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100258 }
259
260 /* OK we're walking down this link */
261 old = container_of(eb_untag(troot, EB_NODE),
262 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200263 old_node_bit = old->node.bit;
264
265 if (unlikely(old->node.bit < 0)) {
266 /* We're above a duplicate tree, so we must compare the whole value */
267 new->node.node_p = old->node.node_p;
268 up_ptr = &old->node.node_p;
269 check_bit_and_break:
270 bit = equal_bits(new->key, old->key, bit, len << 3);
271 break;
272 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100273
274 /* Stop going down when we don't have common bits anymore. We
275 * also stop in front of a duplicates tree because it means we
276 * have to insert above. Note: we can compare more bits than
277 * the current node's because as long as they are identical, we
278 * know we descend along the correct side.
279 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200280
281 bit = equal_bits(new->key, old->key, bit, old_node_bit);
282 if (unlikely(bit < old_node_bit)) {
283 /* The tree did not contain the key, so we insert <new> before the
284 * node <old>, and set ->bit to designate the lowest bit position in
285 * <new> which applies to ->branches.b[].
286 */
287 new->node.node_p = old->node.node_p;
288 up_ptr = &old->node.node_p;
289 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100290 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200291 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
292 * However, since we're going down around <old_node_bit>, we know it will be
293 * properly matched, so we can skip this bit.
294 */
295 bit = old_node_bit + 1;
296
297 /* walk down */
298 root = &old->node.branches;
299 side = old_node_bit & 7;
300 side ^= 7;
301 side = (new->key[old_node_bit >> 3] >> side) & 1;
302 troot = root->b[side];
303 }
304
305 new_left = eb_dotag(&new->node.branches, EB_LEFT);
306 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
307 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
308
Willy Tarreau3a932442010-05-09 19:29:23 +0200309 new->node.bit = bit;
Willy Tarreaua4a1cd12012-06-09 15:43:36 +0200310
311 /* Note: we can compare more bits than the current node's because as
312 * long as they are identical, we know we descend along the correct
313 * side. However we don't want to start to compare past the end.
314 */
315 diff = 0;
316 if (((unsigned)bit >> 3) < len)
317 diff = cmp_bits(new->key, old->key, bit);
318
Willy Tarreau3a932442010-05-09 19:29:23 +0200319 if (diff == 0) {
320 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100321
Willy Tarreau3a932442010-05-09 19:29:23 +0200322 if (likely(eb_gettag(root_right))) {
323 /* we refuse to duplicate this key if the tree is
324 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100325 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200326 return old;
327 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100328
Willy Tarreau3a932442010-05-09 19:29:23 +0200329 if (eb_gettag(troot) != EB_LEAF) {
330 /* there was already a dup tree below */
331 struct eb_node *ret;
332 ret = eb_insert_dup(&old->node, &new->node);
333 return container_of(ret, struct ebmb_node, node);
334 }
335 /* otherwise fall through */
336 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100337
Willy Tarreau3a932442010-05-09 19:29:23 +0200338 if (diff >= 0) {
339 new->node.branches.b[EB_LEFT] = troot;
340 new->node.branches.b[EB_RGHT] = new_leaf;
341 new->node.leaf_p = new_rght;
342 *up_ptr = new_left;
343 }
344 else if (diff < 0) {
345 new->node.branches.b[EB_LEFT] = new_leaf;
346 new->node.branches.b[EB_RGHT] = troot;
347 new->node.leaf_p = new_left;
348 *up_ptr = new_rght;
349 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100350
Willy Tarreau3a932442010-05-09 19:29:23 +0200351 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
352 * parent is already set to <new>, and the <root>'s branch is still in
353 * <side>. Update the root's leaf till we have it. Note that we can also
354 * find the side by checking the side of new->node.node_p.
355 */
356
357 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
358 return new;
359}
360
361
362/* Find the first occurence of the longest prefix matching a key <x> in the
363 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
364 * least as long as the keys in the tree. If none can be found, return NULL.
365 */
366static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
367{
368 struct ebmb_node *node;
369 eb_troot_t *troot, *cover;
370 int pos, side;
371 int node_bit;
372
373 troot = root->b[EB_LEFT];
374 if (unlikely(troot == NULL))
375 return NULL;
376
377 cover = NULL;
378 pos = 0;
379 while (1) {
380 if ((eb_gettag(troot) == EB_LEAF)) {
381 node = container_of(eb_untag(troot, EB_LEAF),
382 struct ebmb_node, node.branches);
383 if (check_bits(x - pos, node->key, pos, node->node.pfx))
384 goto not_found;
385
386 return node;
387 }
388 node = container_of(eb_untag(troot, EB_NODE),
389 struct ebmb_node, node.branches);
390
391 node_bit = node->node.bit;
392 if (node_bit < 0) {
393 /* We have a dup tree now. Either it's for the same
394 * value, and we walk down left, or it's a different
395 * one and we don't have our key.
396 */
397 if (check_bits(x - pos, node->key, pos, node->node.pfx))
398 goto not_found;
399
400 troot = node->node.branches.b[EB_LEFT];
401 while (eb_gettag(troot) != EB_LEAF)
402 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
403 node = container_of(eb_untag(troot, EB_LEAF),
404 struct ebmb_node, node.branches);
405 return node;
406 }
407
408 node_bit >>= 1; /* strip cover bit */
409 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
410 if (node_bit < 0) {
411 /* This uncommon construction gives better performance
412 * because gcc does not try to reorder the loop. Tested to
413 * be fine with 2.95 to 4.2.
414 */
415 while (1) {
416 x++; pos++;
417 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
418 goto not_found; /* more than one full byte is different */
419 node_bit += 8;
420 if (node_bit >= 0)
421 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100422 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200423 }
424
425 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
426 * We have 2 possibilities :
427 * - more than the last bit differs => data does not match
428 * - walk down on side = (x[pos] >> node_bit) & 1
429 */
430 side = *(unsigned char *)x >> node_bit;
431 if (((node->key[pos] >> node_bit) ^ side) > 1)
432 goto not_found;
433
434 if (!(node->node.bit & 1)) {
435 /* This is a cover node, let's keep a reference to it
436 * for later. The covering subtree is on the left, and
437 * the covered subtree is on the right, so we have to
438 * walk down right.
439 */
440 cover = node->node.branches.b[EB_LEFT];
441 troot = node->node.branches.b[EB_RGHT];
442 continue;
443 }
444 side &= 1;
445 troot = node->node.branches.b[side];
446 }
447
448 not_found:
449 /* Walk down last cover tre if it exists. It does not matter if cover is NULL */
450 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
451}
452
453
454/* Find the first occurence of a prefix matching a key <x> of <pfx> BITS in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100455 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
456 * least as long as the keys in the tree. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200457 */
458static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
459{
460 struct ebmb_node *node;
461 eb_troot_t *troot;
462 int pos, side;
463 int node_bit;
464
465 troot = root->b[EB_LEFT];
466 if (unlikely(troot == NULL))
467 return NULL;
468
469 pos = 0;
470 while (1) {
471 if ((eb_gettag(troot) == EB_LEAF)) {
472 node = container_of(eb_untag(troot, EB_LEAF),
473 struct ebmb_node, node.branches);
474 if (node->node.pfx != pfx)
475 return NULL;
476 if (check_bits(x - pos, node->key, pos, node->node.pfx))
477 return NULL;
478 return node;
479 }
480 node = container_of(eb_untag(troot, EB_NODE),
481 struct ebmb_node, node.branches);
482
483 node_bit = node->node.bit;
484 if (node_bit < 0) {
485 /* We have a dup tree now. Either it's for the same
486 * value, and we walk down left, or it's a different
487 * one and we don't have our key.
488 */
489 if (node->node.pfx != pfx)
490 return NULL;
491 if (check_bits(x - pos, node->key, pos, node->node.pfx))
492 return NULL;
493
494 troot = node->node.branches.b[EB_LEFT];
495 while (eb_gettag(troot) != EB_LEAF)
496 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
497 node = container_of(eb_untag(troot, EB_LEAF),
498 struct ebmb_node, node.branches);
499 return node;
500 }
501
502 node_bit >>= 1; /* strip cover bit */
503 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
504 if (node_bit < 0) {
505 /* This uncommon construction gives better performance
506 * because gcc does not try to reorder the loop. Tested to
507 * be fine with 2.95 to 4.2.
508 */
509 while (1) {
510 x++; pos++;
511 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
512 return NULL; /* more than one full byte is different */
513 node_bit += 8;
514 if (node_bit >= 0)
515 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100516 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200517 }
518
519 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
520 * We have 2 possibilities :
521 * - more than the last bit differs => data does not match
522 * - walk down on side = (x[pos] >> node_bit) & 1
523 */
524 side = *(unsigned char *)x >> node_bit;
525 if (((node->key[pos] >> node_bit) ^ side) > 1)
526 return NULL;
527
528 if (!(node->node.bit & 1)) {
529 /* This is a cover node, it may be the entry we're
530 * looking for. We already know that it matches all the
531 * bits, let's compare prefixes and descend the cover
532 * subtree if they match.
533 */
Willy Tarreau22c0a932011-07-25 12:22:44 +0200534 if ((unsigned short)node->node.bit >> 1 == pfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200535 troot = node->node.branches.b[EB_LEFT];
536 else
537 troot = node->node.branches.b[EB_RGHT];
538 continue;
539 }
540 side &= 1;
541 troot = node->node.branches.b[side];
542 }
543}
544
545
546/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
547 * Only new->key and new->pfx need be set with the key and its prefix length.
548 * Note that bits between <pfx> and <len> are theorically ignored and should be
549 * zero, as it is not certain yet that they will always be ignored everywhere
550 * (eg in bit compare functions).
551 * The ebmb_node is returned.
552 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
553 * len is specified in bytes.
554 */
555static forceinline struct ebmb_node *
556__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
557{
558 struct ebmb_node *old;
559 unsigned int side;
560 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200561 eb_troot_t *root_right;
Willy Tarreau3a932442010-05-09 19:29:23 +0200562 int diff;
563 int bit;
564 eb_troot_t *new_left, *new_rght;
565 eb_troot_t *new_leaf;
566 int old_node_bit;
567
568 side = EB_LEFT;
569 troot = root->b[EB_LEFT];
570 root_right = root->b[EB_RGHT];
571 if (unlikely(troot == NULL)) {
572 /* Tree is empty, insert the leaf part below the left branch */
573 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
574 new->node.leaf_p = eb_dotag(root, EB_LEFT);
575 new->node.node_p = NULL; /* node part unused */
576 return new;
577 }
578
579 len <<= 3;
580 if (len > new->node.pfx)
581 len = new->node.pfx;
582
583 /* The tree descent is fairly easy :
584 * - first, check if we have reached a leaf node
585 * - second, check if we have gone too far
586 * - third, reiterate
587 * Everywhere, we use <new> for the node node we are inserting, <root>
588 * for the node we attach it to, and <old> for the node we are
589 * displacing below <new>. <troot> will always point to the future node
590 * (tagged with its type). <side> carries the side the node <new> is
591 * attached to below its parent, which is also where previous node
592 * was attached.
593 */
594
595 bit = 0;
596 while (1) {
597 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
598 /* Insert above a leaf. Note that this leaf could very
599 * well be part of a cover node.
600 */
601 old = container_of(eb_untag(troot, EB_LEAF),
602 struct ebmb_node, node.branches);
603 new->node.node_p = old->node.leaf_p;
604 up_ptr = &old->node.leaf_p;
605 goto check_bit_and_break;
606 }
607
608 /* OK we're walking down this link */
609 old = container_of(eb_untag(troot, EB_NODE),
610 struct ebmb_node, node.branches);
611 old_node_bit = old->node.bit;
612 /* Note that old_node_bit can be :
613 * < 0 : dup tree
614 * = 2N : cover node for N bits
615 * = 2N+1 : normal node at N bits
616 */
617
618 if (unlikely(old_node_bit < 0)) {
619 /* We're above a duplicate tree, so we must compare the whole value */
620 new->node.node_p = old->node.node_p;
621 up_ptr = &old->node.node_p;
622 check_bit_and_break:
623 /* No need to compare everything if the leaves are shorter than the new one. */
624 if (len > old->node.pfx)
625 len = old->node.pfx;
626 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100627 break;
628 }
629
Willy Tarreau3a932442010-05-09 19:29:23 +0200630 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
631
632 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
633 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200634
635 /* we must always check that our prefix is larger than the nodes
636 * we visit, otherwise we have to stop going down. The following
637 * test is able to stop before both normal and cover nodes.
638 */
639 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
640 /* insert cover node here on the left */
641 new->node.node_p = old->node.node_p;
642 up_ptr = &old->node.node_p;
643 new->node.bit = new->node.pfx << 1;
644 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200645 goto insert_above;
646 }
647
648 if (unlikely(bit < old_node_bit)) {
649 /* The tree did not contain the key, so we insert <new> before the
650 * node <old>, and set ->bit to designate the lowest bit position in
651 * <new> which applies to ->branches.b[]. We know that the bit is not
652 * greater than the prefix length thanks to the test above.
653 */
654 new->node.node_p = old->node.node_p;
655 up_ptr = &old->node.node_p;
656 new->node.bit = bit;
657 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200658 goto insert_above;
659 }
660
661 if (!(old_node_bit & 1)) {
662 /* if we encounter a cover node with our exact prefix length, it's
663 * necessarily the same value, so we insert there as a duplicate on
664 * the left. For that, we go down on the left and the leaf detection
665 * code will finish the job.
666 */
667 if ((new->node.pfx << 1) == old_node_bit) {
668 root = &old->node.branches;
669 side = EB_LEFT;
670 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200671 continue;
672 }
673
674 /* cover nodes are always walked through on the right */
675 side = EB_RGHT;
676 bit = old_node_bit >> 1; /* recheck that bit */
677 root = &old->node.branches;
678 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200679 continue;
680 }
681
682 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
683 * However, since we're going down around <old_node_bit>, we know it will be
684 * properly matched, so we can skip this bit.
685 */
686 old_node_bit >>= 1;
687 bit = old_node_bit + 1;
688
Willy Tarreauc2186022009-10-26 19:48:54 +0100689 /* walk down */
690 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200691 side = old_node_bit & 7;
692 side ^= 7;
693 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100694 troot = root->b[side];
695 }
696
Willy Tarreau3a932442010-05-09 19:29:23 +0200697 /* Right here, we have 4 possibilities :
698 * - the tree does not contain any leaf matching the
699 * key, and we have new->key < old->key. We insert
700 * new above old, on the left ;
701 *
702 * - the tree does not contain any leaf matching the
703 * key, and we have new->key > old->key. We insert
704 * new above old, on the right ;
705 *
706 * - the tree does contain the key with the same prefix
707 * length. We add the new key next to it as a first
708 * duplicate (since it was alone).
709 *
710 * The last two cases can easily be partially merged.
711 *
712 * - the tree contains a leaf matching the key, we have
713 * to insert above it as a cover node. The leaf with
714 * the shortest prefix becomes the left subtree and
715 * the leaf with the longest prefix becomes the right
716 * one. The cover node gets the min of both prefixes
717 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100718 */
719
Willy Tarreau3a932442010-05-09 19:29:23 +0200720 /* first we want to ensure that we compare the correct bit, which means
721 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100722 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200723 if (bit > new->node.pfx)
724 bit = new->node.pfx;
725 if (bit > old->node.pfx)
726 bit = old->node.pfx;
727
Willy Tarreau3a932442010-05-09 19:29:23 +0200728 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
729
730 /* if one prefix is included in the second one, we don't compare bits
731 * because they won't necessarily match, we just proceed with a cover
732 * node insertion.
733 */
734 diff = 0;
735 if (bit < old->node.pfx && bit < new->node.pfx)
736 diff = cmp_bits(new->key, old->key, bit);
737
738 if (diff == 0) {
739 /* Both keys match. Either it's a duplicate entry or we have to
740 * put the shortest prefix left and the largest one right below
741 * a new cover node. By default, diff==0 means we'll be inserted
742 * on the right.
743 */
744 new->node.bit--; /* anticipate cover node insertion */
745 if (new->node.pfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200746 new->node.bit = -1; /* mark as new dup tree, just in case */
747
748 if (unlikely(eb_gettag(root_right))) {
749 /* we refuse to duplicate this key if the tree is
750 * tagged as containing only unique keys.
751 */
752 return old;
753 }
754
755 if (eb_gettag(troot) != EB_LEAF) {
756 /* there was already a dup tree below */
757 struct eb_node *ret;
758 ret = eb_insert_dup(&old->node, &new->node);
759 return container_of(ret, struct ebmb_node, node);
760 }
761 /* otherwise fall through to insert first duplicate */
762 }
763 /* otherwise we just rely on the tests below to select the right side */
764 else if (new->node.pfx < old->node.pfx)
765 diff = -1; /* force insertion to left side */
766 }
767
768 insert_above:
769 new_left = eb_dotag(&new->node.branches, EB_LEFT);
770 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
771 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
772
773 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200774 new->node.branches.b[EB_LEFT] = troot;
775 new->node.branches.b[EB_RGHT] = new_leaf;
776 new->node.leaf_p = new_rght;
777 *up_ptr = new_left;
778 }
779 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200780 new->node.branches.b[EB_LEFT] = new_leaf;
781 new->node.branches.b[EB_RGHT] = troot;
782 new->node.leaf_p = new_left;
783 *up_ptr = new_rght;
784 }
785
Willy Tarreauc2186022009-10-26 19:48:54 +0100786 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
787 return new;
788}
789
Willy Tarreau3a932442010-05-09 19:29:23 +0200790
791
Willy Tarreauead63a02009-11-02 14:41:23 +0100792#endif /* _EBMBTREE_H */
793