BUG/MEDIUM: stream-int: do not rely on the connection error once established
Historically the stream-interface code used to check for connection
errors by itself. Later this was partially deferred to muxes, but
only once the mux is installed or the connection is at least in the
established state. But probably as a safety practice the connection
error tests remained.
The problem is that they are causing trouble on when a response received
from a mux is mixed with an error report. The typical case is an upload
that is interrupted by the server sending an error or redirect without
draining all data, causing an RST to be queued just after the data. In
this case the mux has the data, the CO_FL_ERROR flag is present on the
connection, and unfortunately the stream-interface refuses to retrieve
the data due to this flag, and return an error to the client.
It's about time to only rely on CS_FL_ERROR which is set by the mux, but
the stream-interface is still responsible for the connection during its
setup. However everywhere the CO_FL_ERROR is checked, CS_FL_ERROR is
also checked.
This commit addresses this by:
- adding a new function si_is_conn_error() that checks the SI state
and only reports the status of CO_FL_ERROR for states before
SI_ST_EST.
- eliminating all checks for CO_FL_ERORR in places where CS_FL_ERROR
is already checked and either the presence of a mux was already
validated or the stream-int's state was already checked as being
SI_ST_EST or higher.
CO_FL_ERROR tests on the send() direction are also inappropriate as they
may cause the loss of pending data. Now this doesn't happen anymore and
such events are only converted to CS_FL_ERROR by the mux once notified of
the problem. As such, this must not cause the loss of any error event.
Now an early error reported on a backend mux doesn't prevent the queued
response from being read and forwarded to the client (the list of syscalls
below was trimmed and epoll_ctl is not represented):
recvfrom(10, "POST / HTTP/1.1\r\nConnection: clo"..., 16320, 0, NULL, NULL) = 66
sendto(11, "POST / HTTP/1.1\r\ntransfer-encodi"..., 47, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 47
epoll_wait(3, [{events=EPOLLIN|EPOLLERR|EPOLLHUP|EPOLLRDHUP, data={u32=11, u64=11}}], 200, 15001) = 1
recvfrom(11, "HTTP/1.1 200 OK\r\ncontent-length:"..., 16320, 0, NULL, NULL) = 57
sendto(10, "HTTP/1.1 200 OK\r\ncontent-length:"..., 57, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 57
epoll_wait(3, [{events=EPOLLIN|EPOLLERR|EPOLLHUP|EPOLLRDHUP, data={u32=11, u64=11}}], 200, 13001) = 1
epoll_wait(3, [{events=EPOLLIN, data={u32=10, u64=10}}], 200, 13001) = 1
recvfrom(10, "A\n0123456789\r\n0\r\n\r\n", 16320, 0, NULL, NULL) = 19
shutdown(10, SHUT_WR) = 0
close(11) = 0
close(10) = 0
Above the server is an haproxy configured with the following:
listen blah
bind :8002
mode http
timeout connect 5s
timeout client 5s
timeout server 5s
option httpclose
option nolinger
http-request return status 200 hdr connection close
And the client takes care of sending requests and data in two distinct
parts:
while :; do
./dev/tcploop/tcploop 8001 C T S:"POST / HTTP/1.1\r\nConnection: close\r\nTransfer-encoding: chunked\r\n\r\n" P1 S:"A\n0123456789\r\n0\r\n\r\n" P R F;
done
With this, a small percentage of the requests will reproduce the behavior
above. Note that this fix requires the following patch to be applied for
the test above to work:
BUG/MEDIUM: mux-h1: only turn CO_FL_ERROR to CS_FL_ERROR with empty ibuf
This should be backported with after a few weeks of observation, and
likely one version at a time. During the backports, the patch might
need to be adjusted at each check of CO_FL_ERORR to follow the
principles explained above.
(cherry picked from commit d1480cc8a44395c27c715c0dd431d3e37a83a4c8)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
(cherry picked from commit ba25116c1e0f5c37d5141cf38cc1ede26de21d98)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
diff --git a/src/stream_interface.c b/src/stream_interface.c
index afcf3c0..e4f8bd3 100644
--- a/src/stream_interface.c
+++ b/src/stream_interface.c
@@ -554,6 +554,23 @@
ic->flags &= ~CF_READ_DONTWAIT;
}
+/* The stream interface is only responsible for the connection during the early
+ * states, before plugging a mux. Thus it should only care about CO_FL_ERROR
+ * before SI_ST_EST, and after that it must absolutely ignore it since the mux
+ * may hold pending data. This function returns true if such an error was
+ * reported. Both the CS and the CONN must be valid.
+ */
+static inline int si_is_conn_error(const struct stream_interface *si)
+{
+ struct connection *conn;
+
+ if (si->state >= SI_ST_EST)
+ return 0;
+
+ conn = __objt_cs(si->end)->conn;
+ BUG_ON(!conn);
+ return !!(conn->flags & CO_FL_ERROR);
+}
/* Called by I/O handlers after completion.. It propagates
* connection flags to the stream interface, updates the stream (which may or
@@ -584,9 +601,11 @@
* wake callback. Otherwise si_cs_recv()/si_cs_send() already take
* care of it.
*/
- if (si->state >= SI_ST_CON &&
- (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR))
- si->flags |= SI_FL_ERR;
+
+ if (si->state >= SI_ST_CON) {
+ if ((cs->flags & CS_FL_ERROR) || si_is_conn_error(si))
+ si->flags |= SI_FL_ERR;
+ }
/* If we had early data, and the handshake ended, then
* we can remove the flag, and attempt to wake the task up,
@@ -655,7 +674,7 @@
int ret;
int did_send = 0;
- if (conn->flags & CO_FL_ERROR || cs->flags & (CS_FL_ERROR|CS_FL_ERR_PENDING)) {
+ if (cs->flags & (CS_FL_ERROR|CS_FL_ERR_PENDING) || si_is_conn_error(si)) {
/* We're probably there because the tasklet was woken up,
* but process_stream() ran before, detected there were an
* error and put the si back to SI_ST_TAR. There's still
@@ -778,7 +797,7 @@
si_rx_room_rdy(si_opposite(si));
}
- if (conn->flags & CO_FL_ERROR || cs->flags & (CS_FL_ERROR|CS_FL_ERR_PENDING)) {
+ if (cs->flags & (CS_FL_ERROR|CS_FL_ERR_PENDING)) {
si->flags |= SI_FL_ERR;
return 1;
}
@@ -1133,7 +1152,7 @@
if (!(si->wait_event.events & SUB_RETRY_SEND) && !channel_is_empty(si_oc(si)))
si_cs_send(cs);
- if (cs->flags & (CS_FL_ERROR|CS_FL_ERR_PENDING) || cs->conn->flags & CO_FL_ERROR) {
+ if (cs->flags & (CS_FL_ERROR|CS_FL_ERR_PENDING) || si_is_conn_error(si)) {
/* Write error on the file descriptor */
if (si->state >= SI_ST_CON)
si->flags |= SI_FL_ERR;
@@ -1248,7 +1267,7 @@
if (!(cs->flags & CS_FL_RCV_MORE)) {
if (!conn_xprt_ready(conn))
return 0;
- if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR)
+ if (cs->flags & CS_FL_ERROR)
goto end_recv;
}
@@ -1305,7 +1324,7 @@
ic->flags |= CF_READ_PARTIAL;
}
- if (conn->flags & CO_FL_ERROR || cs->flags & (CS_FL_EOS|CS_FL_ERROR))
+ if (cs->flags & (CS_FL_EOS|CS_FL_ERROR))
goto end_recv;
if (conn->flags & CO_FL_WAIT_ROOM) {
@@ -1361,7 +1380,7 @@
* recv().
*/
while ((cs->flags & CS_FL_RCV_MORE) ||
- (!(conn->flags & (CO_FL_ERROR | CO_FL_HANDSHAKE)) &&
+ (!(conn->flags & CO_FL_HANDSHAKE) &&
(!(cs->flags & (CS_FL_ERROR|CS_FL_EOS))) && !(ic->flags & CF_SHUTR))) {
int cur_flags = flags;
@@ -1507,8 +1526,7 @@
ret = 1;
}
- if (conn->flags & CO_FL_ERROR || cs->flags & CS_FL_ERROR) {
- cs->flags |= CS_FL_ERROR;
+ if (cs->flags & CS_FL_ERROR) {
si->flags |= SI_FL_ERR;
ret = 1;
}