OPTIM: halog: make use of memchr() on platforms which provide a fast one glibc-2.11 on x86_64 provides a machine-specific memchr() which is faster than the generic C implementation by around 40%, so let's make it possible to use it instead of the hand-coded version.

commit: 419a598eae03f9e074e3ba61c5e6579d73f92c34 [log] [tgz]
author: Willy Tarreau <w@1wt.eu> Tue Jun 12 08:52:22 2012 +0200
committer: Willy Tarreau <w@1wt.eu> Tue Jun 12 08:52:22 2012 +0200
tree: f12f936aa521fb1c429630db1fe0e1a731a3cb64
parent: 8ad4193100aafa19f04929670371bf823dbe11d0 [diff]
diff --git a/contrib/halog/Makefile b/contrib/halog/Makefile
index 26758e8..5e687c0 100644
--- a/contrib/halog/Makefile
+++ b/contrib/halog/Makefile

@@ -6,10 +6,17 @@
 # note: it is recommended to also add -fomit-frame-pointer on i386
 OPTIMIZE = -O3
 
+# most recent glibc provide platform-specific optimizations that make
+# memchr faster than the generic C implementation (eg: SSE and prefetch
+# on x86_64). Try with an without. In general, on x86_64 it's better to
+# use memchr using the define below.
+# DEFINE   = -DUSE_MEMCHR
+DEFINE   =
+
 OBJS     = halog
 
 halog: halog.c fgets2.c
-	$(CC) $(OPTIMIZE) -o $@ $(INCLUDE) $(EBTREE_DIR)/ebtree.c $(EBTREE_DIR)/eb32tree.c $(EBTREE_DIR)/eb64tree.c $(EBTREE_DIR)/ebmbtree.c $(EBTREE_DIR)/ebsttree.c $(EBTREE_DIR)/ebistree.c $(EBTREE_DIR)/ebimtree.c $^
+	$(CC) $(OPTIMIZE) $(DEFINE) -o $@ $(INCLUDE) $(EBTREE_DIR)/ebtree.c $(EBTREE_DIR)/eb32tree.c $(EBTREE_DIR)/eb64tree.c $(EBTREE_DIR)/ebmbtree.c $(EBTREE_DIR)/ebsttree.c $(EBTREE_DIR)/ebistree.c $(EBTREE_DIR)/ebimtree.c $^
 
 clean:
 	rm -f $(OBJS) *.[oas]

diff --git a/contrib/halog/fgets2.c b/contrib/halog/fgets2.c
index 88c4d5c..3db762c 100644
--- a/contrib/halog/fgets2.c
+++ b/contrib/halog/fgets2.c

@@ -86,115 +86,134 @@
 	return (sizeof(x) == 8) ? has_zero64(x) : has_zero32(x);
 }
 
-const char *fgets2(FILE *stream)
+/* find a '\n' between <next> and <end>. Warning: may read slightly past <end>.
+ * If no '\n' is found, <end> is returned.
+ */
+static char *find_lf(char *next, char *end)
 {
-	static char buffer[FGETS2_BUFSIZE + 68]; /* Note: +32 is enough on 32-bit systems */
-	static char *end = buffer;
-	static char *line = buffer;
-	char *next;
-	int ret;
-
-	next = line;
+#if defined USE_MEMCHR
+	/* some recent libc use platform-specific optimizations to provide more
+	 * efficient byte search than below (eg: glibc 2.11 on x86_64).
+	 */
+	next = memchr(next, '\n', end - next);
+	if (!next)
+		next = end;
+#else
+	if (sizeof(long) == 4) {  /* 32-bit system */
+		/* this is a speed-up, we read 32 bits at once and check for an
+		 * LF character there. We stop if found then continue one at a
+		 * time.
+		 */
+		while (next < end && (((unsigned long)next) & 3) && *next != '\n')
+			next++;
 
-	while (1) {
-		if (sizeof(long) == 4) {  /* 32-bit system */
-			/* this is a speed-up, we read 32 bits at once and check for an
-			 * LF character there. We stop if found then continue one at a
-			 * time.
-			 */
-			while (next < end && (((unsigned long)next) & 3) && *next != '\n')
+		/* Now next is multiple of 4 or equal to end. We know we can safely
+		 * read up to 32 bytes past end if needed because they're allocated.
+		 */
+		while (next < end) {
+			if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+			if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
+				break;
+			next += 4;
+		}
+	}
+	else {  /* 64-bit system */
+		/* this is a speed-up, we read 64 bits at once and check for an
+		 * LF character there. We stop if found then continue one at a
+		 * time.
+		 */
+		if (next <= end) {
+			/* max 3 bytes tested here */
+			while ((((unsigned long)next) & 3) && *next != '\n')
 				next++;
 
-			/* Now next is multiple of 4 or equal to end. We know we can safely
-			 * read up to 32 bytes past end if needed because they're allocated.
-			 */
-			while (next < end) {
-				if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
-					break;
-				next += 4;
-				if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
-					break;
-				next += 4;
-				if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
-					break;
-				next += 4;
-				if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
-					break;
-				next += 4;
-				if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
-					break;
+			/* maybe we have can skip 4 more bytes */
+			if ((((unsigned long)next) & 4) && !has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
 				next += 4;
-				if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
-					break;
-				next += 4;
-				if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
-					break;
-				next += 4;
-				if (has_zero32(*(unsigned int *)next ^ 0x0A0A0A0A))
-					break;
-				next += 4;
-			}
 		}
-		else {  /* 64-bit system */
-			/* this is a speed-up, we read 64 bits at once and check for an
-			 * LF character there. We stop if found then continue one at a
-			 * time.
-			 */
-			if (next <= end) {
-				/* max 3 bytes tested here */
-				while ((((unsigned long)next) & 3) && *next != '\n')
-					next++;
 
-				/* maybe we have can skip 4 more bytes */
-				if ((((unsigned long)next) & 4) && !has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
-					next += 4;
-			}
+		/* now next is multiple of 8 or equal to end */
+		while (next <= (end-68)) {
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+			if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
+				break;
+			next += 8;
+		}
 
-			/* now next is multiple of 8 or equal to end */
-			while (next <= (end-68)) {
-				if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
-					break;
-				next += 8;
-				if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
-					break;
-				next += 8;
-				if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
-					break;
-				next += 8;
-				if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
-					break;
-				next += 8;
-				if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
-					break;
-				next += 8;
-				if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
-					break;
-				next += 8;
-				if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
-					break;
-				next += 8;
-				if (has_zero64(*(unsigned long long *)next ^ 0x0A0A0A0A0A0A0A0AULL))
-					break;
-				next += 8;
-			}
+		/* maybe we can skip 4 more bytes */
+		if (!has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
+			next += 4;
+	}
 
-			/* maybe we can skip 4 more bytes */
-			if (!has_zero32(*(unsigned int *)next ^ 0x0A0A0A0AU))
-				next += 4;
-		}
+	/* We finish if needed : if <next> is below <end>, it means we
+	 * found an LF in one of the 4 following bytes.
+	 */
+	while (next < end) {
+		if (*next == '\n')
+			break;
+		next++;
+	}
+#endif
+	return next;
+}
 
-		/* We finish if needed : if <next> is below <end>, it means we
-		 * found an LF in one of the 4 following bytes.
-		 */
-		while (next < end) {
-			if (*next == '\n') {
-				const char *start = line;
+const char *fgets2(FILE *stream)
+{
+	static char buffer[FGETS2_BUFSIZE + 68]; /* Note: +32 is enough on 32-bit systems */
+	static char *end = buffer;
+	static char *line = buffer;
+	char *next;
+	int ret;
 
-				*next = '\0';
-				line = next + 1;
-				return start;
-			}
-			next++;
+	next = line;
+
+	while (1) {
+		next = find_lf(next, end);
+		if (next < end) {
+			const char *start = line;
+			*next = '\0';
+			line = next + 1;
+			return start;
 		}
 
 		/* we found an incomplete line. First, let's move the
commit	419a598eae03f9e074e3ba61c5e6579d73f92c34	[log] [tgz]
author	Willy Tarreau <w@1wt.eu>	Tue Jun 12 08:52:22 2012 +0200
committer	Willy Tarreau <w@1wt.eu>	Tue Jun 12 08:52:22 2012 +0200
tree	f12f936aa521fb1c429630db1fe0e1a731a3cb64
parent	8ad4193100aafa19f04929670371bf823dbe11d0 [diff]