OPTIM: ebtree: make ebmb_insert_prefix() keep a copy the new node's pfx
looking at a perf profile while loading a conf with a huge map, it
appeared that there was a hot spot on the access to the new node's
prefix, which is unexpectedly being reloaded for each visited node
during the tree descent. Better keep a copy of it because with large
trees that don't fit into the L3 cache the memory bandwidth is scarce.
Doing so reduces the load time from 8.0 to 7.5 seconds.
diff --git a/include/import/ebmbtree.h b/include/import/ebmbtree.h
index b2dd144..fc6a500 100644
--- a/include/import/ebmbtree.h
+++ b/include/import/ebmbtree.h
@@ -618,6 +618,8 @@
eb_troot_t *new_left, *new_rght;
eb_troot_t *new_leaf;
int old_node_bit;
+ unsigned int npfx = new->node.pfx;
+ unsigned int npfx1 = npfx << 1;
side = EB_LEFT;
troot = root->b[EB_LEFT];
@@ -631,8 +633,8 @@
}
len <<= 3;
- if (len > new->node.pfx)
- len = new->node.pfx;
+ if (len > npfx)
+ len = npfx;
/* The tree descent is fairly easy :
* - first, check if we have reached a leaf node
@@ -690,11 +692,11 @@
* we visit, otherwise we have to stop going down. The following
* test is able to stop before both normal and cover nodes.
*/
- if (bit >= (new->node.pfx << 1) && (new->node.pfx << 1) < old_node_bit) {
+ if (bit >= npfx1 && npfx1 < old_node_bit) {
/* insert cover node here on the left */
new->node.node_p = old->node.node_p;
up_ptr = &old->node.node_p;
- new->node.bit = new->node.pfx << 1;
+ new->node.bit = npfx1;
diff = -1;
goto insert_above;
}
@@ -718,7 +720,7 @@
* the left. For that, we go down on the left and the leaf detection
* code will finish the job.
*/
- if ((new->node.pfx << 1) == old_node_bit) {
+ if (npfx1 == old_node_bit) {
root = &old->node.branches;
side = EB_LEFT;
troot = root->b[side];
@@ -774,8 +776,8 @@
/* first we want to ensure that we compare the correct bit, which means
* the largest common to both nodes.
*/
- if (bit > new->node.pfx)
- bit = new->node.pfx;
+ if (bit > npfx)
+ bit = npfx;
if (bit > old->node.pfx)
bit = old->node.pfx;
@@ -786,7 +788,7 @@
* node insertion.
*/
diff = 0;
- if (bit < old->node.pfx && bit < new->node.pfx)
+ if (bit < old->node.pfx && bit < npfx)
diff = cmp_bits(new->key, old->key, bit);
if (diff == 0) {
@@ -796,7 +798,7 @@
* on the right.
*/
new->node.bit--; /* anticipate cover node insertion */
- if (new->node.pfx == old->node.pfx) {
+ if (npfx == old->node.pfx) {
new->node.bit = -1; /* mark as new dup tree, just in case */
if (unlikely(eb_gettag(root_right))) {
@@ -815,7 +817,7 @@
/* otherwise fall through to insert first duplicate */
}
/* otherwise we just rely on the tests below to select the right side */
- else if (new->node.pfx < old->node.pfx)
+ else if (npfx < old->node.pfx)
diff = -1; /* force insertion to left side */
}