blob: 48dc13082f1ddc8340a6ec7c06cf0d048059b207 [file] [log] [blame]
Willy Tarreauc2186022009-10-26 19:48:54 +01001/*
2 * Elastic Binary Trees - macros and structures for Multi-Byte data nodes.
Willy Tarreauf3bfede2011-07-25 11:38:17 +02003 * Version 6.0.6
Willy Tarreau414c4b22011-01-04 13:21:06 +01004 * (C) 2002-2011 - Willy Tarreau <w@1wt.eu>
Willy Tarreauc2186022009-10-26 19:48:54 +01005 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +02006 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation, version 2.1
9 * exclusively.
Willy Tarreauc2186022009-10-26 19:48:54 +010010 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020011 * This library is distributed in the hope that it will be useful,
Willy Tarreauc2186022009-10-26 19:48:54 +010012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
Willy Tarreauf3bfede2011-07-25 11:38:17 +020013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
Willy Tarreauc2186022009-10-26 19:48:54 +010015 *
Willy Tarreauf3bfede2011-07-25 11:38:17 +020016 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
Willy Tarreauc2186022009-10-26 19:48:54 +010019 */
20
Willy Tarreauead63a02009-11-02 14:41:23 +010021#ifndef _EBMBTREE_H
22#define _EBMBTREE_H
23
Willy Tarreauc2186022009-10-26 19:48:54 +010024#include <string.h>
25#include "ebtree.h"
26
27/* Return the structure of type <type> whose member <member> points to <ptr> */
28#define ebmb_entry(ptr, type, member) container_of(ptr, type, member)
29
30#define EBMB_ROOT EB_ROOT
31#define EBMB_TREE_HEAD EB_TREE_HEAD
32
33/* This structure carries a node, a leaf, and a key. It must start with the
34 * eb_node so that it can be cast into an eb_node. We could also have put some
35 * sort of transparent union here to reduce the indirection level, but the fact
36 * is, the end user is not meant to manipulate internals, so this is pointless.
37 * The 'node.bit' value here works differently from scalar types, as it contains
38 * the number of identical bits between the two branches.
39 */
40struct ebmb_node {
41 struct eb_node node; /* the tree node, must be at the beginning */
42 unsigned char key[0]; /* the key, its size depends on the application */
43};
44
45/*
46 * Exported functions and macros.
47 * Many of them are always inlined because they are extremely small, and
48 * are generally called at most once or twice in a program.
49 */
50
51/* Return leftmost node in the tree, or NULL if none */
52static forceinline struct ebmb_node *ebmb_first(struct eb_root *root)
53{
54 return ebmb_entry(eb_first(root), struct ebmb_node, node);
55}
56
57/* Return rightmost node in the tree, or NULL if none */
58static forceinline struct ebmb_node *ebmb_last(struct eb_root *root)
59{
60 return ebmb_entry(eb_last(root), struct ebmb_node, node);
61}
62
63/* Return next node in the tree, or NULL if none */
64static forceinline struct ebmb_node *ebmb_next(struct ebmb_node *ebmb)
65{
66 return ebmb_entry(eb_next(&ebmb->node), struct ebmb_node, node);
67}
68
69/* Return previous node in the tree, or NULL if none */
70static forceinline struct ebmb_node *ebmb_prev(struct ebmb_node *ebmb)
71{
72 return ebmb_entry(eb_prev(&ebmb->node), struct ebmb_node, node);
73}
74
Willy Tarreau2b570202013-05-07 15:58:28 +020075/* Return next leaf node within a duplicate sub-tree, or NULL if none. */
76static inline struct ebmb_node *ebmb_next_dup(struct ebmb_node *ebmb)
77{
78 return ebmb_entry(eb_next_dup(&ebmb->node), struct ebmb_node, node);
79}
80
81/* Return previous leaf node within a duplicate sub-tree, or NULL if none. */
82static inline struct ebmb_node *ebmb_prev_dup(struct ebmb_node *ebmb)
83{
84 return ebmb_entry(eb_prev_dup(&ebmb->node), struct ebmb_node, node);
85}
86
Willy Tarreauc2186022009-10-26 19:48:54 +010087/* Return next node in the tree, skipping duplicates, or NULL if none */
88static forceinline struct ebmb_node *ebmb_next_unique(struct ebmb_node *ebmb)
89{
90 return ebmb_entry(eb_next_unique(&ebmb->node), struct ebmb_node, node);
91}
92
93/* Return previous node in the tree, skipping duplicates, or NULL if none */
94static forceinline struct ebmb_node *ebmb_prev_unique(struct ebmb_node *ebmb)
95{
96 return ebmb_entry(eb_prev_unique(&ebmb->node), struct ebmb_node, node);
97}
98
99/* Delete node from the tree if it was linked in. Mark the node unused. Note
100 * that this function relies on a non-inlined generic function: eb_delete.
101 */
102static forceinline void ebmb_delete(struct ebmb_node *ebmb)
103{
104 eb_delete(&ebmb->node);
105}
106
107/* The following functions are not inlined by default. They are declared
108 * in ebmbtree.c, which simply relies on their inline version.
109 */
110REGPRM3 struct ebmb_node *ebmb_lookup(struct eb_root *root, const void *x, unsigned int len);
111REGPRM3 struct ebmb_node *ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreau3a932442010-05-09 19:29:23 +0200112REGPRM2 struct ebmb_node *ebmb_lookup_longest(struct eb_root *root, const void *x);
113REGPRM3 struct ebmb_node *ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx);
114REGPRM3 struct ebmb_node *ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100115
116/* The following functions are less likely to be used directly, because their
117 * code is larger. The non-inlined version is preferred.
118 */
119
120/* Delete node from the tree if it was linked in. Mark the node unused. */
121static forceinline void __ebmb_delete(struct ebmb_node *ebmb)
122{
123 __eb_delete(&ebmb->node);
124}
125
Willy Tarreau414c4b22011-01-04 13:21:06 +0100126/* Find the first occurence of a key of a least <len> bytes matching <x> in the
127 * tree <root>. The caller is responsible for ensuring that <len> will not exceed
128 * the common parts between the tree's keys and <x>. In case of multiple matches,
129 * the leftmost node is returned. This means that this function can be used to
130 * lookup string keys by prefix if all keys in the tree are zero-terminated. If
131 * no match is found, NULL is returned. Returns first node if <len> is zero.
Willy Tarreauc2186022009-10-26 19:48:54 +0100132 */
133static forceinline struct ebmb_node *__ebmb_lookup(struct eb_root *root, const void *x, unsigned int len)
134{
135 struct ebmb_node *node;
136 eb_troot_t *troot;
Willy Tarreau3a932442010-05-09 19:29:23 +0200137 int pos, side;
138 int node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100139
140 troot = root->b[EB_LEFT];
141 if (unlikely(troot == NULL))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100142 goto ret_null;
Willy Tarreauc2186022009-10-26 19:48:54 +0100143
Willy Tarreau414c4b22011-01-04 13:21:06 +0100144 if (unlikely(len == 0))
145 goto walk_down;
146
Willy Tarreau3a932442010-05-09 19:29:23 +0200147 pos = 0;
Willy Tarreauc2186022009-10-26 19:48:54 +0100148 while (1) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200149 if (eb_gettag(troot) == EB_LEAF) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100150 node = container_of(eb_untag(troot, EB_LEAF),
151 struct ebmb_node, node.branches);
Willy Tarreau414c4b22011-01-04 13:21:06 +0100152 if (memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100153 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200154 else
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100155 goto ret_node;
Willy Tarreauc2186022009-10-26 19:48:54 +0100156 }
157 node = container_of(eb_untag(troot, EB_NODE),
158 struct ebmb_node, node.branches);
159
Willy Tarreau3a932442010-05-09 19:29:23 +0200160 node_bit = node->node.bit;
161 if (node_bit < 0) {
Willy Tarreauc2186022009-10-26 19:48:54 +0100162 /* We have a dup tree now. Either it's for the same
163 * value, and we walk down left, or it's a different
164 * one and we don't have our key.
165 */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100166 if (memcmp(node->key + pos, x, len) != 0)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100167 goto ret_null;
168 else
169 goto walk_left;
Willy Tarreauc2186022009-10-26 19:48:54 +0100170 }
171
Willy Tarreau3a932442010-05-09 19:29:23 +0200172 /* OK, normal data node, let's walk down. We check if all full
173 * bytes are equal, and we start from the last one we did not
174 * completely check. We stop as soon as we reach the last byte,
175 * because we must decide to go left/right or abort.
176 */
177 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
178 if (node_bit < 0) {
179 /* This surprizing construction gives better performance
180 * because gcc does not try to reorder the loop. Tested to
181 * be fine with 2.95 to 4.2.
182 */
183 while (1) {
Willy Tarreau414c4b22011-01-04 13:21:06 +0100184 if (node->key[pos++] ^ *(unsigned char*)(x++))
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100185 goto ret_null; /* more than one full byte is different */
Willy Tarreau414c4b22011-01-04 13:21:06 +0100186 if (--len == 0)
187 goto walk_left; /* return first node if all bytes matched */
Willy Tarreau3a932442010-05-09 19:29:23 +0200188 node_bit += 8;
189 if (node_bit >= 0)
190 break;
191 }
192 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100193
Willy Tarreau3a932442010-05-09 19:29:23 +0200194 /* here we know that only the last byte differs, so node_bit < 8.
195 * We have 2 possibilities :
196 * - more than the last bit differs => return NULL
197 * - walk down on side = (x[pos] >> node_bit) & 1
198 */
199 side = *(unsigned char *)x >> node_bit;
200 if (((node->key[pos] >> node_bit) ^ side) > 1)
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100201 goto ret_null;
Willy Tarreau3a932442010-05-09 19:29:23 +0200202 side &= 1;
203 troot = node->node.branches.b[side];
Willy Tarreauc2186022009-10-26 19:48:54 +0100204 }
Willy Tarreauce3d44a2011-01-04 14:07:36 +0100205 walk_left:
206 troot = node->node.branches.b[EB_LEFT];
207 walk_down:
208 while (eb_gettag(troot) != EB_LEAF)
209 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
210 node = container_of(eb_untag(troot, EB_LEAF),
211 struct ebmb_node, node.branches);
212 ret_node:
213 return node;
214 ret_null:
215 return NULL;
Willy Tarreauc2186022009-10-26 19:48:54 +0100216}
217
218/* Insert ebmb_node <new> into subtree starting at node root <root>.
219 * Only new->key needs be set with the key. The ebmb_node is returned.
220 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
Willy Tarreau414c4b22011-01-04 13:21:06 +0100221 * len is specified in bytes. It is absolutely mandatory that this length
222 * is the same for all keys in the tree. This function cannot be used to
223 * insert strings.
Willy Tarreauc2186022009-10-26 19:48:54 +0100224 */
225static forceinline struct ebmb_node *
226__ebmb_insert(struct eb_root *root, struct ebmb_node *new, unsigned int len)
227{
228 struct ebmb_node *old;
229 unsigned int side;
Willy Tarreau3a932442010-05-09 19:29:23 +0200230 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200231 eb_troot_t *root_right;
Willy Tarreauc2186022009-10-26 19:48:54 +0100232 int diff;
233 int bit;
Willy Tarreau3a932442010-05-09 19:29:23 +0200234 eb_troot_t *new_left, *new_rght;
235 eb_troot_t *new_leaf;
236 int old_node_bit;
Willy Tarreauc2186022009-10-26 19:48:54 +0100237
238 side = EB_LEFT;
239 troot = root->b[EB_LEFT];
240 root_right = root->b[EB_RGHT];
241 if (unlikely(troot == NULL)) {
242 /* Tree is empty, insert the leaf part below the left branch */
243 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
244 new->node.leaf_p = eb_dotag(root, EB_LEFT);
245 new->node.node_p = NULL; /* node part unused */
246 return new;
247 }
248
Willy Tarreauc2186022009-10-26 19:48:54 +0100249 /* The tree descent is fairly easy :
250 * - first, check if we have reached a leaf node
251 * - second, check if we have gone too far
252 * - third, reiterate
253 * Everywhere, we use <new> for the node node we are inserting, <root>
254 * for the node we attach it to, and <old> for the node we are
255 * displacing below <new>. <troot> will always point to the future node
256 * (tagged with its type). <side> carries the side the node <new> is
257 * attached to below its parent, which is also where previous node
258 * was attached.
259 */
260
261 bit = 0;
262 while (1) {
263 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200264 /* insert above a leaf */
Willy Tarreauc2186022009-10-26 19:48:54 +0100265 old = container_of(eb_untag(troot, EB_LEAF),
266 struct ebmb_node, node.branches);
Willy Tarreauc2186022009-10-26 19:48:54 +0100267 new->node.node_p = old->node.leaf_p;
Willy Tarreau3a932442010-05-09 19:29:23 +0200268 up_ptr = &old->node.leaf_p;
269 goto check_bit_and_break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100270 }
271
272 /* OK we're walking down this link */
273 old = container_of(eb_untag(troot, EB_NODE),
274 struct ebmb_node, node.branches);
Willy Tarreau3a932442010-05-09 19:29:23 +0200275 old_node_bit = old->node.bit;
276
277 if (unlikely(old->node.bit < 0)) {
278 /* We're above a duplicate tree, so we must compare the whole value */
279 new->node.node_p = old->node.node_p;
280 up_ptr = &old->node.node_p;
281 check_bit_and_break:
282 bit = equal_bits(new->key, old->key, bit, len << 3);
283 break;
284 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100285
286 /* Stop going down when we don't have common bits anymore. We
287 * also stop in front of a duplicates tree because it means we
288 * have to insert above. Note: we can compare more bits than
289 * the current node's because as long as they are identical, we
290 * know we descend along the correct side.
291 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200292
293 bit = equal_bits(new->key, old->key, bit, old_node_bit);
294 if (unlikely(bit < old_node_bit)) {
295 /* The tree did not contain the key, so we insert <new> before the
296 * node <old>, and set ->bit to designate the lowest bit position in
297 * <new> which applies to ->branches.b[].
298 */
299 new->node.node_p = old->node.node_p;
300 up_ptr = &old->node.node_p;
301 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100302 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200303 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
304 * However, since we're going down around <old_node_bit>, we know it will be
305 * properly matched, so we can skip this bit.
306 */
307 bit = old_node_bit + 1;
308
309 /* walk down */
310 root = &old->node.branches;
311 side = old_node_bit & 7;
312 side ^= 7;
313 side = (new->key[old_node_bit >> 3] >> side) & 1;
314 troot = root->b[side];
315 }
316
317 new_left = eb_dotag(&new->node.branches, EB_LEFT);
318 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
319 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
320
Willy Tarreau3a932442010-05-09 19:29:23 +0200321 new->node.bit = bit;
Willy Tarreaua4a1cd12012-06-09 15:43:36 +0200322
323 /* Note: we can compare more bits than the current node's because as
324 * long as they are identical, we know we descend along the correct
325 * side. However we don't want to start to compare past the end.
326 */
327 diff = 0;
328 if (((unsigned)bit >> 3) < len)
329 diff = cmp_bits(new->key, old->key, bit);
330
Willy Tarreau3a932442010-05-09 19:29:23 +0200331 if (diff == 0) {
332 new->node.bit = -1; /* mark as new dup tree, just in case */
Willy Tarreauc2186022009-10-26 19:48:54 +0100333
Willy Tarreau3a932442010-05-09 19:29:23 +0200334 if (likely(eb_gettag(root_right))) {
335 /* we refuse to duplicate this key if the tree is
336 * tagged as containing only unique keys.
Willy Tarreauc2186022009-10-26 19:48:54 +0100337 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200338 return old;
339 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100340
Willy Tarreau3a932442010-05-09 19:29:23 +0200341 if (eb_gettag(troot) != EB_LEAF) {
342 /* there was already a dup tree below */
343 struct eb_node *ret;
344 ret = eb_insert_dup(&old->node, &new->node);
345 return container_of(ret, struct ebmb_node, node);
346 }
347 /* otherwise fall through */
348 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100349
Willy Tarreau3a932442010-05-09 19:29:23 +0200350 if (diff >= 0) {
351 new->node.branches.b[EB_LEFT] = troot;
352 new->node.branches.b[EB_RGHT] = new_leaf;
353 new->node.leaf_p = new_rght;
354 *up_ptr = new_left;
355 }
356 else if (diff < 0) {
357 new->node.branches.b[EB_LEFT] = new_leaf;
358 new->node.branches.b[EB_RGHT] = troot;
359 new->node.leaf_p = new_left;
360 *up_ptr = new_rght;
361 }
Willy Tarreauc2186022009-10-26 19:48:54 +0100362
Willy Tarreau3a932442010-05-09 19:29:23 +0200363 /* Ok, now we are inserting <new> between <root> and <old>. <old>'s
364 * parent is already set to <new>, and the <root>'s branch is still in
365 * <side>. Update the root's leaf till we have it. Note that we can also
366 * find the side by checking the side of new->node.node_p.
367 */
368
369 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
370 return new;
371}
372
373
374/* Find the first occurence of the longest prefix matching a key <x> in the
375 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
376 * least as long as the keys in the tree. If none can be found, return NULL.
377 */
378static forceinline struct ebmb_node *__ebmb_lookup_longest(struct eb_root *root, const void *x)
379{
380 struct ebmb_node *node;
381 eb_troot_t *troot, *cover;
382 int pos, side;
383 int node_bit;
384
385 troot = root->b[EB_LEFT];
386 if (unlikely(troot == NULL))
387 return NULL;
388
389 cover = NULL;
390 pos = 0;
391 while (1) {
392 if ((eb_gettag(troot) == EB_LEAF)) {
393 node = container_of(eb_untag(troot, EB_LEAF),
394 struct ebmb_node, node.branches);
395 if (check_bits(x - pos, node->key, pos, node->node.pfx))
396 goto not_found;
397
398 return node;
399 }
400 node = container_of(eb_untag(troot, EB_NODE),
401 struct ebmb_node, node.branches);
402
403 node_bit = node->node.bit;
404 if (node_bit < 0) {
405 /* We have a dup tree now. Either it's for the same
406 * value, and we walk down left, or it's a different
407 * one and we don't have our key.
408 */
409 if (check_bits(x - pos, node->key, pos, node->node.pfx))
410 goto not_found;
411
412 troot = node->node.branches.b[EB_LEFT];
413 while (eb_gettag(troot) != EB_LEAF)
414 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
415 node = container_of(eb_untag(troot, EB_LEAF),
416 struct ebmb_node, node.branches);
417 return node;
418 }
419
420 node_bit >>= 1; /* strip cover bit */
421 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
422 if (node_bit < 0) {
423 /* This uncommon construction gives better performance
424 * because gcc does not try to reorder the loop. Tested to
425 * be fine with 2.95 to 4.2.
426 */
427 while (1) {
428 x++; pos++;
429 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
430 goto not_found; /* more than one full byte is different */
431 node_bit += 8;
432 if (node_bit >= 0)
433 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100434 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200435 }
436
437 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
438 * We have 2 possibilities :
439 * - more than the last bit differs => data does not match
440 * - walk down on side = (x[pos] >> node_bit) & 1
441 */
442 side = *(unsigned char *)x >> node_bit;
443 if (((node->key[pos] >> node_bit) ^ side) > 1)
444 goto not_found;
445
446 if (!(node->node.bit & 1)) {
447 /* This is a cover node, let's keep a reference to it
448 * for later. The covering subtree is on the left, and
449 * the covered subtree is on the right, so we have to
450 * walk down right.
451 */
452 cover = node->node.branches.b[EB_LEFT];
453 troot = node->node.branches.b[EB_RGHT];
454 continue;
455 }
456 side &= 1;
457 troot = node->node.branches.b[side];
458 }
459
460 not_found:
461 /* Walk down last cover tre if it exists. It does not matter if cover is NULL */
462 return ebmb_entry(eb_walk_down(cover, EB_LEFT), struct ebmb_node, node);
463}
464
465
466/* Find the first occurence of a prefix matching a key <x> of <pfx> BITS in the
Willy Tarreau414c4b22011-01-04 13:21:06 +0100467 * tree <root>. It's the caller's responsibility to ensure that key <x> is at
468 * least as long as the keys in the tree. If none can be found, return NULL.
Willy Tarreau3a932442010-05-09 19:29:23 +0200469 */
470static forceinline struct ebmb_node *__ebmb_lookup_prefix(struct eb_root *root, const void *x, unsigned int pfx)
471{
472 struct ebmb_node *node;
473 eb_troot_t *troot;
474 int pos, side;
475 int node_bit;
476
477 troot = root->b[EB_LEFT];
478 if (unlikely(troot == NULL))
479 return NULL;
480
481 pos = 0;
482 while (1) {
483 if ((eb_gettag(troot) == EB_LEAF)) {
484 node = container_of(eb_untag(troot, EB_LEAF),
485 struct ebmb_node, node.branches);
486 if (node->node.pfx != pfx)
487 return NULL;
488 if (check_bits(x - pos, node->key, pos, node->node.pfx))
489 return NULL;
490 return node;
491 }
492 node = container_of(eb_untag(troot, EB_NODE),
493 struct ebmb_node, node.branches);
494
495 node_bit = node->node.bit;
496 if (node_bit < 0) {
497 /* We have a dup tree now. Either it's for the same
498 * value, and we walk down left, or it's a different
499 * one and we don't have our key.
500 */
501 if (node->node.pfx != pfx)
502 return NULL;
503 if (check_bits(x - pos, node->key, pos, node->node.pfx))
504 return NULL;
505
506 troot = node->node.branches.b[EB_LEFT];
507 while (eb_gettag(troot) != EB_LEAF)
508 troot = (eb_untag(troot, EB_NODE))->b[EB_LEFT];
509 node = container_of(eb_untag(troot, EB_LEAF),
510 struct ebmb_node, node.branches);
511 return node;
512 }
513
514 node_bit >>= 1; /* strip cover bit */
515 node_bit = ~node_bit + (pos << 3) + 8; // = (pos<<3) + (7 - node_bit)
516 if (node_bit < 0) {
517 /* This uncommon construction gives better performance
518 * because gcc does not try to reorder the loop. Tested to
519 * be fine with 2.95 to 4.2.
520 */
521 while (1) {
522 x++; pos++;
523 if (node->key[pos-1] ^ *(unsigned char*)(x-1))
524 return NULL; /* more than one full byte is different */
525 node_bit += 8;
526 if (node_bit >= 0)
527 break;
Willy Tarreauc2186022009-10-26 19:48:54 +0100528 }
Willy Tarreau3a932442010-05-09 19:29:23 +0200529 }
530
531 /* here we know that only the last byte differs, so 0 <= node_bit <= 7.
532 * We have 2 possibilities :
533 * - more than the last bit differs => data does not match
534 * - walk down on side = (x[pos] >> node_bit) & 1
535 */
536 side = *(unsigned char *)x >> node_bit;
537 if (((node->key[pos] >> node_bit) ^ side) > 1)
538 return NULL;
539
540 if (!(node->node.bit & 1)) {
541 /* This is a cover node, it may be the entry we're
542 * looking for. We already know that it matches all the
543 * bits, let's compare prefixes and descend the cover
544 * subtree if they match.
545 */
Willy Tarreau22c0a932011-07-25 12:22:44 +0200546 if ((unsigned short)node->node.bit >> 1 == pfx)
Willy Tarreau3a932442010-05-09 19:29:23 +0200547 troot = node->node.branches.b[EB_LEFT];
548 else
549 troot = node->node.branches.b[EB_RGHT];
550 continue;
551 }
552 side &= 1;
553 troot = node->node.branches.b[side];
554 }
555}
556
557
558/* Insert ebmb_node <new> into a prefix subtree starting at node root <root>.
559 * Only new->key and new->pfx need be set with the key and its prefix length.
560 * Note that bits between <pfx> and <len> are theorically ignored and should be
561 * zero, as it is not certain yet that they will always be ignored everywhere
562 * (eg in bit compare functions).
563 * The ebmb_node is returned.
564 * If root->b[EB_RGHT]==1, the tree may only contain unique keys. The
565 * len is specified in bytes.
566 */
567static forceinline struct ebmb_node *
568__ebmb_insert_prefix(struct eb_root *root, struct ebmb_node *new, unsigned int len)
569{
570 struct ebmb_node *old;
571 unsigned int side;
572 eb_troot_t *troot, **up_ptr;
Willy Tarreau6258f7b2011-09-19 20:48:00 +0200573 eb_troot_t *root_right;
Willy Tarreau3a932442010-05-09 19:29:23 +0200574 int diff;
575 int bit;
576 eb_troot_t *new_left, *new_rght;
577 eb_troot_t *new_leaf;
578 int old_node_bit;
579
580 side = EB_LEFT;
581 troot = root->b[EB_LEFT];
582 root_right = root->b[EB_RGHT];
583 if (unlikely(troot == NULL)) {
584 /* Tree is empty, insert the leaf part below the left branch */
585 root->b[EB_LEFT] = eb_dotag(&new->node.branches, EB_LEAF);
586 new->node.leaf_p = eb_dotag(root, EB_LEFT);
587 new->node.node_p = NULL; /* node part unused */
588 return new;
589 }
590
591 len <<= 3;
592 if (len > new->node.pfx)
593 len = new->node.pfx;
594
595 /* The tree descent is fairly easy :
596 * - first, check if we have reached a leaf node
597 * - second, check if we have gone too far
598 * - third, reiterate
599 * Everywhere, we use <new> for the node node we are inserting, <root>
600 * for the node we attach it to, and <old> for the node we are
601 * displacing below <new>. <troot> will always point to the future node
602 * (tagged with its type). <side> carries the side the node <new> is
603 * attached to below its parent, which is also where previous node
604 * was attached.
605 */
606
607 bit = 0;
608 while (1) {
609 if (unlikely(eb_gettag(troot) == EB_LEAF)) {
610 /* Insert above a leaf. Note that this leaf could very
611 * well be part of a cover node.
612 */
613 old = container_of(eb_untag(troot, EB_LEAF),
614 struct ebmb_node, node.branches);
615 new->node.node_p = old->node.leaf_p;
616 up_ptr = &old->node.leaf_p;
617 goto check_bit_and_break;
618 }
619
620 /* OK we're walking down this link */
621 old = container_of(eb_untag(troot, EB_NODE),
622 struct ebmb_node, node.branches);
623 old_node_bit = old->node.bit;
624 /* Note that old_node_bit can be :
625 * < 0 : dup tree
626 * = 2N : cover node for N bits
627 * = 2N+1 : normal node at N bits
628 */
629
630 if (unlikely(old_node_bit < 0)) {
631 /* We're above a duplicate tree, so we must compare the whole value */
632 new->node.node_p = old->node.node_p;
633 up_ptr = &old->node.node_p;
634 check_bit_and_break:
635 /* No need to compare everything if the leaves are shorter than the new one. */
636 if (len > old->node.pfx)
637 len = old->node.pfx;
638 bit = equal_bits(new->key, old->key, bit, len);
Willy Tarreauc2186022009-10-26 19:48:54 +0100639 break;
640 }
641
Willy Tarreau3a932442010-05-09 19:29:23 +0200642 /* WARNING: for the two blocks below, <bit> is counted in half-bits */
643
644 bit = equal_bits(new->key, old->key, bit, old_node_bit >> 1);
645 bit = (bit << 1) + 1; // assume comparisons with normal nodes
Willy Tarreau3a932442010-05-09 19:29:23 +0200646
647 /* we must always check that our prefix is larger than the nodes
648 * we visit, otherwise we have to stop going down. The following
649 * test is able to stop before both normal and cover nodes.
650 */
651 if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
652 /* insert cover node here on the left */
653 new->node.node_p = old->node.node_p;
654 up_ptr = &old->node.node_p;
655 new->node.bit = new->node.pfx << 1;
656 diff = -1;
Willy Tarreau3a932442010-05-09 19:29:23 +0200657 goto insert_above;
658 }
659
660 if (unlikely(bit < old_node_bit)) {
661 /* The tree did not contain the key, so we insert <new> before the
662 * node <old>, and set ->bit to designate the lowest bit position in
663 * <new> which applies to ->branches.b[]. We know that the bit is not
664 * greater than the prefix length thanks to the test above.
665 */
666 new->node.node_p = old->node.node_p;
667 up_ptr = &old->node.node_p;
668 new->node.bit = bit;
669 diff = cmp_bits(new->key, old->key, bit >> 1);
Willy Tarreau3a932442010-05-09 19:29:23 +0200670 goto insert_above;
671 }
672
673 if (!(old_node_bit & 1)) {
674 /* if we encounter a cover node with our exact prefix length, it's
675 * necessarily the same value, so we insert there as a duplicate on
676 * the left. For that, we go down on the left and the leaf detection
677 * code will finish the job.
678 */
679 if ((new->node.pfx << 1) == old_node_bit) {
680 root = &old->node.branches;
681 side = EB_LEFT;
682 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200683 continue;
684 }
685
686 /* cover nodes are always walked through on the right */
687 side = EB_RGHT;
688 bit = old_node_bit >> 1; /* recheck that bit */
689 root = &old->node.branches;
690 troot = root->b[side];
Willy Tarreau3a932442010-05-09 19:29:23 +0200691 continue;
692 }
693
694 /* we don't want to skip bits for further comparisons, so we must limit <bit>.
695 * However, since we're going down around <old_node_bit>, we know it will be
696 * properly matched, so we can skip this bit.
697 */
698 old_node_bit >>= 1;
699 bit = old_node_bit + 1;
700
Willy Tarreauc2186022009-10-26 19:48:54 +0100701 /* walk down */
702 root = &old->node.branches;
Willy Tarreau3a932442010-05-09 19:29:23 +0200703 side = old_node_bit & 7;
704 side ^= 7;
705 side = (new->key[old_node_bit >> 3] >> side) & 1;
Willy Tarreauc2186022009-10-26 19:48:54 +0100706 troot = root->b[side];
707 }
708
Willy Tarreau3a932442010-05-09 19:29:23 +0200709 /* Right here, we have 4 possibilities :
710 * - the tree does not contain any leaf matching the
711 * key, and we have new->key < old->key. We insert
712 * new above old, on the left ;
713 *
714 * - the tree does not contain any leaf matching the
715 * key, and we have new->key > old->key. We insert
716 * new above old, on the right ;
717 *
718 * - the tree does contain the key with the same prefix
719 * length. We add the new key next to it as a first
720 * duplicate (since it was alone).
721 *
722 * The last two cases can easily be partially merged.
723 *
724 * - the tree contains a leaf matching the key, we have
725 * to insert above it as a cover node. The leaf with
726 * the shortest prefix becomes the left subtree and
727 * the leaf with the longest prefix becomes the right
728 * one. The cover node gets the min of both prefixes
729 * as its new bit.
Willy Tarreauc2186022009-10-26 19:48:54 +0100730 */
731
Willy Tarreau3a932442010-05-09 19:29:23 +0200732 /* first we want to ensure that we compare the correct bit, which means
733 * the largest common to both nodes.
Willy Tarreauc2186022009-10-26 19:48:54 +0100734 */
Willy Tarreau3a932442010-05-09 19:29:23 +0200735 if (bit > new->node.pfx)
736 bit = new->node.pfx;
737 if (bit > old->node.pfx)
738 bit = old->node.pfx;
739
Willy Tarreau3a932442010-05-09 19:29:23 +0200740 new->node.bit = (bit << 1) + 1; /* assume normal node by default */
741
742 /* if one prefix is included in the second one, we don't compare bits
743 * because they won't necessarily match, we just proceed with a cover
744 * node insertion.
745 */
746 diff = 0;
747 if (bit < old->node.pfx && bit < new->node.pfx)
748 diff = cmp_bits(new->key, old->key, bit);
749
750 if (diff == 0) {
751 /* Both keys match. Either it's a duplicate entry or we have to
752 * put the shortest prefix left and the largest one right below
753 * a new cover node. By default, diff==0 means we'll be inserted
754 * on the right.
755 */
756 new->node.bit--; /* anticipate cover node insertion */
757 if (new->node.pfx == old->node.pfx) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200758 new->node.bit = -1; /* mark as new dup tree, just in case */
759
760 if (unlikely(eb_gettag(root_right))) {
761 /* we refuse to duplicate this key if the tree is
762 * tagged as containing only unique keys.
763 */
764 return old;
765 }
766
767 if (eb_gettag(troot) != EB_LEAF) {
768 /* there was already a dup tree below */
769 struct eb_node *ret;
770 ret = eb_insert_dup(&old->node, &new->node);
771 return container_of(ret, struct ebmb_node, node);
772 }
773 /* otherwise fall through to insert first duplicate */
774 }
775 /* otherwise we just rely on the tests below to select the right side */
776 else if (new->node.pfx < old->node.pfx)
777 diff = -1; /* force insertion to left side */
778 }
779
780 insert_above:
781 new_left = eb_dotag(&new->node.branches, EB_LEFT);
782 new_rght = eb_dotag(&new->node.branches, EB_RGHT);
783 new_leaf = eb_dotag(&new->node.branches, EB_LEAF);
784
785 if (diff >= 0) {
Willy Tarreau3a932442010-05-09 19:29:23 +0200786 new->node.branches.b[EB_LEFT] = troot;
787 new->node.branches.b[EB_RGHT] = new_leaf;
788 new->node.leaf_p = new_rght;
789 *up_ptr = new_left;
790 }
791 else {
Willy Tarreau3a932442010-05-09 19:29:23 +0200792 new->node.branches.b[EB_LEFT] = new_leaf;
793 new->node.branches.b[EB_RGHT] = troot;
794 new->node.leaf_p = new_left;
795 *up_ptr = new_rght;
796 }
797
Willy Tarreauc2186022009-10-26 19:48:54 +0100798 root->b[side] = eb_dotag(&new->node.branches, EB_NODE);
799 return new;
800}
801
Willy Tarreau3a932442010-05-09 19:29:23 +0200802
803
Willy Tarreauead63a02009-11-02 14:41:23 +0100804#endif /* _EBMBTREE_H */
805