[MEDIUM] http: add support for "http-no-delay"

There are some very rare server-to-server applications that abuse the HTTP
protocol and expect the payload phase to be highly interactive, with many
interleaved data chunks in both directions within a single request. This is
absolutely not supported by the HTTP specification and will not work across
most proxies or servers. When such applications attempt to do this through
haproxy, it works but they will experience high delays due to the network
optimizations which favor performance by instructing the system to wait for
enough data to be available in order to only send full packets. Typical
delays are around 200 ms per round trip. Note that this only happens with
abnormal uses. Normal uses such as CONNECT requests nor WebSockets are not
affected.

When "option http-no-delay" is present in either the frontend or the backend
used by a connection, all such optimizations will be disabled in order to
make the exchanges as fast as possible. Of course this offers no guarantee on
the functionality, as it may break at any other place. But if it works via
HAProxy, it will work as fast as possible. This option should never be used
by default, and should never be used at all unless such a buggy application
is discovered. The impact of using this option is an increase of bandwidth
usage and CPU usage, which may significantly lower performance in high
latency environments.

This change should be backported to 1.4 since the first report of such a
misuse was in 1.4. Next patch will also be needed.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 00ee8f0..5493b8c 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -984,6 +984,7 @@
 option forceclose                    (*)  X          X         X         X
 -- keyword -------------------------- defaults - frontend - listen -- backend -
 option forwardfor                         X          X         X         X
+option http-no-delay                 (*)  X          X         X         X
 option http-pretend-keepalive        (*)  X          X         X         X
 option http-server-close             (*)  X          X         X         X
 option http-use-proxy-header         (*)  X          X         X         -
@@ -3120,6 +3121,38 @@
   See also : "option httpclose"
 
 
+option http-no-delay
+no option http-no-delay
+  Instruct the system to favor low interactive delays over performance in HTTP
+  May be used in sections :   defaults | frontend | listen | backend
+                                 yes   |    yes   |   yes  |   yes
+  Arguments : none
+
+  In HTTP, each payload is unidirectional and has no notion of interactivity.
+  Any agent is expected to queue data somewhat for a reasonably low delay.
+  There are some very rare server-to-server applications that abuse the HTTP
+  protocol and expect the payload phase to be highly interactive, with many
+  interleaved data chunks in both directions within a single request. This is
+  absolutely not supported by the HTTP specification and will not work across
+  most proxies or servers. When such applications attempt to do this through
+  haproxy, it works but they will experience high delays due to the network
+  optimizations which favor performance by instructing the system to wait for
+  enough data to be available in order to only send full packets. Typical
+  delays are around 200 ms per round trip. Note that this only happens with
+  abnormal uses. Normal uses such as CONNECT requests nor WebSockets are not
+  affected.
+
+  When "option http-no-delay" is present in either the frontend or the backend
+  used by a connection, all such optimizations will be disabled in order to
+  make the exchanges as fast as possible. Of course this offers no guarantee on
+  the functionality, as it may break at any other place. But if it works via
+  HAProxy, it will work as fast as possible. This option should never be used
+  by default, and should never be used at all unless such a buggy application
+  is discovered. The impact of using this option is an increase of bandwidth
+  usage and CPU usage, which may significantly lower performance in high
+  latency environments.
+
+
 option http-pretend-keepalive
 no option http-pretend-keepalive
   Define whether haproxy will announce keepalive to the server or not
diff --git a/include/types/buffers.h b/include/types/buffers.h
index 2669f05..39a1192 100644
--- a/include/types/buffers.h
+++ b/include/types/buffers.h
@@ -114,8 +114,9 @@
 #define BF_DONT_READ     0x1000000  /* disable reading for now */
 #define BF_EXPECT_MORE   0x2000000  /* more data expected to be sent very soon (one-shoot) */
 #define BF_SEND_DONTWAIT 0x4000000  /* don't wait for sending data (one-shoot) */
+#define BF_NEVER_WAIT    0x8000000  /* never wait for sending data (permanent) */
 
-#define BF_WAKE_ONCE     0x8000000  /* pretend there is activity on this buffer (one-shoot) */
+#define BF_WAKE_ONCE    0x10000000  /* pretend there is activity on this buffer (one-shoot) */
 
 /* Use these masks to clear the flags before going back to lower layers */
 #define BF_CLEAR_READ     (~(BF_READ_NULL|BF_READ_PARTIAL|BF_READ_ERROR|BF_READ_ATTACHED))
diff --git a/include/types/proxy.h b/include/types/proxy.h
index e9532ed..fea8a5e 100644
--- a/include/types/proxy.h
+++ b/include/types/proxy.h
@@ -152,6 +152,7 @@
 
 #define PR_O2_PGSQL_CHK 0x10000000      /* use PGSQL check for server health */
 #define PR_O2_DISPATCH  0x20000000      /* use dispatch mode */
+#define PR_O2_NODELAY   0x40000000      /* fully interactive mode, never delay outgoing data */
 /* end of proxy->options2 */
 
 /* bits for sticking rules */
diff --git a/src/cfgparse.c b/src/cfgparse.c
index 14c5131..dedd5c7 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -160,6 +160,7 @@
 	{ "independant-streams",          PR_O2_INDEPSTR,  PR_CAP_FE|PR_CAP_BE, 0, 0 },
 	{ "http-use-proxy-header",        PR_O2_USE_PXHDR, PR_CAP_FE, 0, PR_MODE_HTTP },
 	{ "http-pretend-keepalive",       PR_O2_FAKE_KA,   PR_CAP_FE|PR_CAP_BE, 0, PR_MODE_HTTP },
+	{ "http-no-delay",                PR_O2_NODELAY,   PR_CAP_FE|PR_CAP_BE, 0, PR_MODE_HTTP },
 	{ NULL, 0, 0, 0 }
 };
 
diff --git a/src/proto_http.c b/src/proto_http.c
index 07faf1f..391d630 100644
--- a/src/proto_http.c
+++ b/src/proto_http.c
@@ -3987,8 +3987,8 @@
 	s->req->cons->err_loc   = NULL;
 	s->req->cons->exp       = TICK_ETERNITY;
 	s->req->cons->flags     = SI_FL_NONE;
-	s->req->flags &= ~(BF_SHUTW|BF_SHUTW_NOW|BF_AUTO_CONNECT|BF_WRITE_ERROR|BF_STREAMER|BF_STREAMER_FAST);
-	s->rep->flags &= ~(BF_SHUTR|BF_SHUTR_NOW|BF_READ_ATTACHED|BF_READ_ERROR|BF_READ_NOEXP|BF_STREAMER|BF_STREAMER_FAST|BF_WRITE_PARTIAL);
+	s->req->flags &= ~(BF_SHUTW|BF_SHUTW_NOW|BF_AUTO_CONNECT|BF_WRITE_ERROR|BF_STREAMER|BF_STREAMER_FAST|BF_NEVER_WAIT);
+	s->rep->flags &= ~(BF_SHUTR|BF_SHUTR_NOW|BF_READ_ATTACHED|BF_READ_ERROR|BF_READ_NOEXP|BF_STREAMER|BF_STREAMER_FAST|BF_WRITE_PARTIAL|BF_NEVER_WAIT);
 	s->flags &= ~(SN_DIRECT|SN_ASSIGNED|SN_ADDR_SET|SN_BE_ASSIGNED|SN_FORCE_PRST|SN_IGNORE_PRST);
 	s->flags &= ~(SN_CURR_SESS|SN_REDIRECTABLE);
 	s->txn.meth = 0;
@@ -3997,6 +3997,11 @@
 	if (s->fe->options2 & PR_O2_INDEPSTR)
 		s->req->cons->flags |= SI_FL_INDEP_STR;
 
+	if (s->fe->options2 & PR_O2_NODELAY) {
+		s->req->flags |= BF_NEVER_WAIT;
+		s->rep->flags |= BF_NEVER_WAIT;
+	}
+
 	/* if the request buffer is not empty, it means we're
 	 * about to process another request, so send pending
 	 * data with MSG_MORE to merge TCP packets when possible.
diff --git a/src/proxy.c b/src/proxy.c
index 0129b12..02e1ee5 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -793,6 +793,11 @@
 		hdr_idx_init(&s->txn.hdr_idx);
 	}
 
+	if (be->options2 & PR_O2_NODELAY) {
+		s->req->flags |= BF_NEVER_WAIT;
+		s->rep->flags |= BF_NEVER_WAIT;
+	}
+
 	/* We want to enable the backend-specific analysers except those which
 	 * were already run as part of the frontend/listener. Note that it would
 	 * be more reliable to store the list of analysers that have been run,
diff --git a/src/session.c b/src/session.c
index 27f4fbd..2c1d359 100644
--- a/src/session.c
+++ b/src/session.c
@@ -257,6 +257,11 @@
 	s->si[0].ob = s->si[1].ib = s->rep;
 	s->rep->analysers = 0;
 
+	if (s->fe->options2 & PR_O2_NODELAY) {
+		s->req->flags |= BF_NEVER_WAIT;
+		s->rep->flags |= BF_NEVER_WAIT;
+	}
+
 	s->rep->rto = TICK_ETERNITY;
 	s->rep->wto = TICK_ETERNITY;
 	s->rep->rex = TICK_ETERNITY;
diff --git a/src/stream_sock.c b/src/stream_sock.c
index 3641b4e..b08134a 100644
--- a/src/stream_sock.c
+++ b/src/stream_sock.c
@@ -643,8 +643,9 @@
 		if (MSG_NOSIGNAL && MSG_MORE) {
 			unsigned int send_flag = MSG_DONTWAIT | MSG_NOSIGNAL;
 
-			if ((b->to_forward && b->to_forward != BUF_INFINITE_FORWARD) ||
-			    (b->flags & BF_EXPECT_MORE) ||
+			if ((!(b->flags & BF_NEVER_WAIT) &&
+			    ((b->to_forward && b->to_forward != BUF_INFINITE_FORWARD) ||
+			     (b->flags & BF_EXPECT_MORE))) ||
 			    ((b->flags & (BF_SHUTW|BF_SHUTW_NOW|BF_HIJACK)) == BF_SHUTW_NOW && (max == b->send_max)) ||
 			    (max != b->l && max != b->send_max)) {
 				send_flag |= MSG_MORE;