[OPTIM] halog: make fgets parse more bytes by blocks

By adding a "landing area" at the end of the buffer, it becomes safe to
parse more bytes at once. On 32-bit this makes fgets run about 4% faster
but it does not save anything on 64-bit.
diff --git a/contrib/halog/fgets2-64.c b/contrib/halog/fgets2-64.c
index 7556a31..1be6f22 100644
--- a/contrib/halog/fgets2-64.c
+++ b/contrib/halog/fgets2-64.c
@@ -58,7 +58,7 @@
 #define FGETS2_BUFSIZE		(256*1024)
 const char *fgets2(FILE *stream)
 {
-	static char buffer[FGETS2_BUFSIZE + 9]; // +9 to have zeroes past the end
+	static char buffer[FGETS2_BUFSIZE + 68];
 	static char *end = buffer;
 	static char *line = buffer;
 
@@ -73,7 +73,7 @@
 		 * time.
 		 */
 
-		if (next <= (end-12)) {
+		if (next <= end) {
 			/* max 3 bytes tested here */
 			while ((((unsigned long)next) & 3) && *next != '\n')
 				next++;
@@ -115,8 +115,8 @@
 		if (!has_zero(*(unsigned int *)next ^ 0x0A0A0A0AU))
 			next += 4;
 
-		/* we finish if needed. Note that next might be slightly higher
-		 * than end here because we might have gone past it above.
+		/* We finish if needed : if <next> is below <end>, it means we
+		 * found an LF in one of the 4 following bytes.
 		 */
 		while (next < end) {
 			if (*next == '\n') {
@@ -131,7 +131,8 @@
 
 		/* we found an incomplete line. First, let's move the
 		 * remaining part of the buffer to the beginning, then
-		 * try to complete the buffer with a new read.
+		 * try to complete the buffer with a new read. We can't
+		 * rely on <next> anymore because it went past <end>.
 		 */
 		if (line > buffer) {
 			if (end != line)
@@ -156,6 +157,7 @@
 		}
 
 		end += ret;
+		*end = '\n'; /* make parser stop ASAP */
 		/* search for '\n' again */
 	}
 }
diff --git a/contrib/halog/fgets2.c b/contrib/halog/fgets2.c
index 11baa23..1fd19d7 100644
--- a/contrib/halog/fgets2.c
+++ b/contrib/halog/fgets2.c
@@ -47,7 +47,7 @@
 #define FGETS2_BUFSIZE		(256*1024)
 const char *fgets2(FILE *stream)
 {
-	static char buffer[FGETS2_BUFSIZE + 5];
+	static char buffer[FGETS2_BUFSIZE + 32];
 	static char *end = buffer;
 	static char *line = buffer;
 
@@ -64,8 +64,10 @@
 		while (next < end && (((unsigned long)next) & 3) && *next != '\n')
 			next++;
 
-		/* now next is multiple of 4 or equal to end */
-		while (next <= (end-32)) {
+		/* Now next is multiple of 4 or equal to end. We know we can safely
+		 * read up to 32 bytes past end if needed because they're allocated.
+		 */
+		while (next < end) {
 			if (has_zero(*(unsigned int *)next ^ 0x0A0A0A0A))
 				break;
 			next += 4;
@@ -92,8 +94,8 @@
 			next += 4;
 		}
 
-		/* we finish if needed. Note that next might be slightly higher
-		 * than end here because we might have gone past it above.
+		/* We finish if needed : if <next> is below <end>, it means we
+		 * found an LF in one of the 4 following bytes.
 		 */
 		while (next < end) {
 			if (*next == '\n') {
@@ -108,7 +110,8 @@
 
 		/* we found an incomplete line. First, let's move the
 		 * remaining part of the buffer to the beginning, then
-		 * try to complete the buffer with a new read.
+		 * try to complete the buffer with a new read. We can't
+		 * rely on <next> anymore because it went past <end>.
 		 */
 		if (line > buffer) {
 			if (end != line)
@@ -133,6 +136,7 @@
 		}
 
 		end += ret;
+		*end = '\n';  /* make parser stop ASAP */
 		/* search for '\n' again */
 	}
 }