[MEDIUM] http: revert to use a swap buffer for realignment

The bounce realign function was algorithmically good but as expected
it was not cache-friendly. Using it with large requests caused so many
cache thrashing that the function itself could drain 70% of the total
CPU time for only 0.5% of the calls !

Revert back to a standard memcpy() using a specially allocated swap
buffer. We're now back to 2M req/s on pipelined requests.
diff --git a/include/types/global.h b/include/types/global.h
index 463bd38..9c62461 100644
--- a/include/types/global.h
+++ b/include/types/global.h
@@ -1,23 +1,23 @@
 /*
-  include/types/global.h
-  Global variables.
-
-  Copyright (C) 2000-2007 Willy Tarreau - w@1wt.eu
-  
-  This library is free software; you can redistribute it and/or
-  modify it under the terms of the GNU Lesser General Public
-  License as published by the Free Software Foundation, version 2.1
-  exclusively.
-
-  This library is distributed in the hope that it will be useful,
-  but WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  Lesser General Public License for more details.
-
-  You should have received a copy of the GNU Lesser General Public
-  License along with this library; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-*/
+ * include/types/global.h
+ * Global variables.
+ *
+ * Copyright (C) 2000-2010 Willy Tarreau - w@1wt.eu
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation, version 2.1
+ * exclusively.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ */
 
 #ifndef _TYPES_GLOBAL_H
 #define _TYPES_GLOBAL_H
@@ -103,6 +103,7 @@
 extern int  actconn;            /* # of active sessions */
 extern int listeners;
 extern char trash[BUFSIZE];
+extern char *swap_buffer;
 extern const int zero;
 extern const int one;
 extern const struct linger nolinger;
diff --git a/src/haproxy.c b/src/haproxy.c
index d8c584c..e82ab56 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -138,6 +138,11 @@
 /* this is used to drain data, and as a temporary buffer for sprintf()... */
 char trash[BUFSIZE];
 
+/* this buffer is always the same size as standard buffers and is used for
+ * swapping data inside a buffer.
+ */
+char *swap_buffer = NULL;
+
 const int zero = 0;
 const int one = 1;
 const struct linger nolinger = { .l_onoff = 1, .l_linger = 0 };
@@ -653,6 +658,8 @@
 	if (global.nbproc < 1)
 		global.nbproc = 1;
 
+	swap_buffer = (char *)calloc(1, global.tune.bufsize);
+
 	fdinfo = (struct fdinfo *)calloc(1,
 				       sizeof(struct fdinfo) * (global.maxsock));
 	fdtab = (struct fdtab *)calloc(1,
diff --git a/src/proto_http.c b/src/proto_http.c
index cb51205..aff5ca3 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -2259,14 +2259,21 @@
 
 	/* two possible cases :
 	 *   - the buffer is in one contiguous block, we move it in-place
-	 *   - the buffer is in two blocks, we move it via the trash
+	 *   - the buffer is in two blocks, we move it via the swap_buffer
 	 */
 	if (buf->l) {
-		if (buf->r <= buf->w)
+		int block1 = buf->l;
+		int block2 = 0;
+		if (buf->r <= buf->w) {
 			/* non-contiguous block */
-			buffer_bounce_realign(buf);
-		else
-			memmove(buf->data, buf->w, buf->l);
+			block1 = buf->data + buf->size - buf->w;
+			block2 = buf->r - buf->data;
+		}
+		if (block2)
+			memcpy(swap_buffer, buf->data, block2);
+		memmove(buf->data, buf->w, block1);
+		if (block2)
+			memcpy(buf->data + block1, swap_buffer, block2);
 	}
 
 	/* adjust all known pointers */