OPTIM: halog: make use of memchr() on platforms which provide a fast one
glibc-2.11 on x86_64 provides a machine-specific memchr() which is faster
than the generic C implementation by around 40%, so let's make it possible
to use it instead of the hand-coded version.
diff --git a/contrib/halog/Makefile b/contrib/halog/Makefile
index 26758e8..5e687c0 100644
--- a/contrib/halog/Makefile
+++ b/contrib/halog/Makefile
@@ -6,10 +6,17 @@
# note: it is recommended to also add -fomit-frame-pointer on i386
OPTIMIZE = -O3
+# most recent glibc provide platform-specific optimizations that make
+# memchr faster than the generic C implementation (eg: SSE and prefetch
+# on x86_64). Try with an without. In general, on x86_64 it's better to
+# use memchr using the define below.
+# DEFINE = -DUSE_MEMCHR
+DEFINE =
+
OBJS = halog
halog: halog.c fgets2.c
- $(CC) $(OPTIMIZE) -o $@ $(INCLUDE) $(EBTREE_DIR)/ebtree.c $(EBTREE_DIR)/eb32tree.c $(EBTREE_DIR)/eb64tree.c $(EBTREE_DIR)/ebmbtree.c $(EBTREE_DIR)/ebsttree.c $(EBTREE_DIR)/ebistree.c $(EBTREE_DIR)/ebimtree.c $^
+ $(CC) $(OPTIMIZE) $(DEFINE) -o $@ $(INCLUDE) $(EBTREE_DIR)/ebtree.c $(EBTREE_DIR)/eb32tree.c $(EBTREE_DIR)/eb64tree.c $(EBTREE_DIR)/ebmbtree.c $(EBTREE_DIR)/ebsttree.c $(EBTREE_DIR)/ebistree.c $(EBTREE_DIR)/ebimtree.c $^
clean:
rm -f $(OBJS) *.[oas]
diff --git a/contrib/halog/fgets2.c b/contrib/halog/fgets2.c
index 88c4d5c..3db762c 100644
--- a/contrib/halog/fgets2.c
+++ b/contrib/halog/fgets2.c
@@ -86,115 +86,134 @@
return (sizeof(x) == 8) ? has_zero64(x) : has_zero32(x);
}
-const char *fgets2(FILE *stream)
+/* find a '\n' between <next> and <end>. Warning: may read slightly past <end>.
+ * If no '\n' is found, <end> is returned.
+ */
+static char *find_lf(char *next, char *end)
{
- static char buffer[FGETS2_BUFSIZE + 68]; /* Note: +32 is enough on 32-bit systems */
- static char *end = buffer;
- static char *line = buffer;
- char *next;
- int ret;
-
- next = line;
+#if defined USE_MEMCHR
+ /* some recent libc use platform-specific optimizations to provide more
+ * efficient byte search than below (eg: glibc 2.11 on x86_64).
+ */
+ next = memchr(next, '\n', end - next);
+ if (!next)
+ next = end;
+#else
+ if (sizeof(long) == 4) { /* 32-bit system */
+ /* this is a speed-up, we read 32 bits at once and check for an
+ * LF character there. We stop if found then continue one at a
+ * time.
+ */
+ while (next < end && (((unsigned long)next) & 3) && *next != '\n')
+ next++;
- while (1) {
- if (sizeof(long) == 4) { /* 32-bit system */
- /* this is a speed-up, we read 32 bits at once and check for an
- * LF character there. We stop if found then continue one at a
- * time.
- */
- while (next < end && (((unsigned long)next) & 3) && *next != '\n')
+ /* Now next is multiple of 4 or equal to end. We know we can safely
+ * read up to 32 bytes past end if needed because they're allocated.
+ */
+ while (next < end) {
+ if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+ break;
+ next += 4;
+ }
+ }
+ else { /* 64-bit system */
+ /* this is a speed-up, we read 64 bits at once and check for an
+ * LF character there. We stop if found then continue one at a
+ * time.
+ */
+ if (next <= end) {
+ /* max 3 bytes tested here */
+ while ((((unsigned long)next) & 3) && *next != '\n')
next++;
- /* Now next is multiple of 4 or equal to end. We know we can safely
- * read up to 32 bytes past end if needed because they're allocated.
- */
- while (next < end) {
- if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
- break;
- next += 4;
- if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
- break;
- next += 4;
- if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
- break;
- next += 4;
- if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
- break;
- next += 4;
- if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
- break;
+ /* maybe we have can skip 4 more bytes */
+ if ((((unsigned long)next) & 4) && !has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
next += 4;
- if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
- break;
- next += 4;
- if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
- break;
- next += 4;
- if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
- break;
- next += 4;
- }
}
- else { /* 64-bit system */
- /* this is a speed-up, we read 64 bits at once and check for an
- * LF character there. We stop if found then continue one at a
- * time.
- */
- if (next <= end) {
- /* max 3 bytes tested here */
- while ((((unsigned long)next) & 3) && *next != '\n')
- next++;
- /* maybe we have can skip 4 more bytes */
- if ((((unsigned long)next) & 4) && !has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
- next += 4;
- }
+ /* now next is multiple of 8 or equal to end */
+ while (next <= (end-68)) {
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+ break;
+ next += 8;
+ }
- /* now next is multiple of 8 or equal to end */
- while (next <= (end-68)) {
- if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
- break;
- next += 8;
- if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
- break;
- next += 8;
- if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
- break;
- next += 8;
- if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
- break;
- next += 8;
- if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
- break;
- next += 8;
- if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
- break;
- next += 8;
- if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
- break;
- next += 8;
- if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
- break;
- next += 8;
- }
+ /* maybe we can skip 4 more bytes */
+ if (!has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
+ next += 4;
+ }
- /* maybe we can skip 4 more bytes */
- if (!has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
- next += 4;
- }
+ /* We finish if needed : if <next> is below <end>, it means we
+ * found an LF in one of the 4 following bytes.
+ */
+ while (next < end) {
+ if (*next == '\n')
+ break;
+ next++;
+ }
+#endif
+ return next;
+}
- /* We finish if needed : if <next> is below <end>, it means we
- * found an LF in one of the 4 following bytes.
- */
- while (next < end) {
- if (*next == '\n') {
- const char *start = line;
+const char *fgets2(FILE *stream)
+{
+ static char buffer[FGETS2_BUFSIZE + 68]; /* Note: +32 is enough on 32-bit systems */
+ static char *end = buffer;
+ static char *line = buffer;
+ char *next;
+ int ret;
- *next = '\0';
- line = next + 1;
- return start;
- }
- next++;
+ next = line;
+
+ while (1) {
+ next = find_lf(next, end);
+ if (next < end) {
+ const char *start = line;
+ *next = '\0';
+ line = next + 1;
+ return start;
}
/* we found an incomplete line. First, let's move the