[BUG] fix truncated responses with sepoll

Due to the way Linux delivers EPOLLIN and EPOLLHUP, a closed connection
received after some server data sometimes results in truncated responses
if the client disconnects before server starts to respond. The reason
is that the EPOLLHUP flag is processed as an indication of end of
transfer while some data may remain in the system's socket buffers.

This problem could only be triggered with sepoll, although nothing should
prevent it from happening with normal epoll. In fact, the work factoring
performed by sepoll increases the risk that this bug appears.

The fix consists in making FD_POLL_HUP and FD_POLL_ERR sticky and that
they are only checked if FD_POLL_IN is not set, meaning that we have
read all pending data.

That way, the problem is definitely fixed and sepoll still remains about
17% faster than epoll since it can take into account all information
returned by the kernel.
diff --git a/src/checks.c b/src/checks.c
index 20ff8db..ff02d01 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -240,12 +240,12 @@
 	task_wakeup(t);
  out_nowake:
 	EV_FD_CLR(fd, DIR_WR);   /* nothing more to write */
-	fdtab[fd].ev &= ~FD_POLL_WR;
+	fdtab[fd].ev &= ~FD_POLL_OUT;
 	return 1;
  out_poll:
 	/* The connection is still pending. We'll have to poll it
 	 * before attempting to go further. */
-	fdtab[fd].ev &= ~FD_POLL_WR;
+	fdtab[fd].ev &= ~FD_POLL_OUT;
 	return 0;
  out_error:
 	s->result |= SRV_CHK_ERROR;
@@ -296,7 +296,7 @@
 #endif
 	if (unlikely(len < 0 && errno == EAGAIN)) {
 		/* we want some polling to happen first */
-		fdtab[fd].ev &= ~FD_POLL_RD;
+		fdtab[fd].ev &= ~FD_POLL_IN;
 		return 0;
 	}
 
@@ -346,7 +346,7 @@
 
 	EV_FD_CLR(fd, DIR_RD);
 	task_wakeup(t);
-	fdtab[fd].ev &= ~FD_POLL_RD;
+	fdtab[fd].ev &= ~FD_POLL_IN;
 	return 1;
 }
 
@@ -476,7 +476,6 @@
 						fdtab[fd].peeraddr = (struct sockaddr *)&sa;
 						fdtab[fd].peerlen = sizeof(sa);
 						fdtab[fd].state = FD_STCONN; /* connection in progress */
-						fdtab[fd].ev = 0;
 						EV_FD_SET(fd, DIR_WR);  /* for connect status */
 #ifdef DEBUG_FULL
 						assert (!EV_FD_ISSET(fd, DIR_RD));
diff --git a/src/client.c b/src/client.c
index d1d763d..bff5cd9 100644
--- a/src/client.c
+++ b/src/client.c
@@ -366,7 +366,6 @@
 		fdtab[cfd].cb[DIR_WR].b = s->rep;
 		fdtab[cfd].peeraddr = (struct sockaddr *)&s->cli_addr;
 		fdtab[cfd].peerlen = sizeof(s->cli_addr);
-		fdtab[cfd].ev = 0;
 
 		if ((p->mode == PR_MODE_HTTP && (s->flags & SN_MONITOR)) ||
 		    (p->mode == PR_MODE_HEALTH && (p->options & PR_O_HTTP_CHK))) {
diff --git a/src/ev_sepoll.c b/src/ev_sepoll.c
index c58bf53..61f1c6e 100644
--- a/src/ev_sepoll.c
+++ b/src/ev_sepoll.c
@@ -16,6 +16,7 @@
 
 #include <common/compat.h>
 #include <common/config.h>
+#include <common/debug.h>
 #include <common/standard.h>
 #include <common/time.h>
 #include <common/tools.h>
@@ -285,7 +286,7 @@
 		 * the WAIT status.
 		 */
 
-		fdtab[fd].ev = 0;
+		fdtab[fd].ev &= FD_POLL_STICKY;
 		if ((eo & FD_EV_MASK_R) == FD_EV_SPEC_R) {
 			/* The owner is interested in reading from this FD */
 			if (fdtab[fd].state != FD_STCLOSE && fdtab[fd].state != FD_STERROR) {
@@ -412,7 +413,12 @@
 		/* it looks complicated but gcc can optimize it away when constants
 		 * have same values.
 		 */
-		fdtab[fd].ev = 
+		DPRINTF(stderr, "%s:%d: fd=%d, ev=0x%08x, e=0x%08x\n",
+			__FUNCTION__, __LINE__,
+			fd, fdtab[fd].ev, e);
+
+		fdtab[fd].ev &= FD_POLL_STICKY;
+		fdtab[fd].ev |= 
 			((e & EPOLLIN ) ? FD_POLL_IN  : 0) |
 			((e & EPOLLPRI) ? FD_POLL_PRI : 0) |
 			((e & EPOLLOUT) ? FD_POLL_OUT : 0) |
@@ -422,14 +428,14 @@
 		if ((fd_list[fd].e & FD_EV_MASK_R) == FD_EV_WAIT_R) {
 			if (fdtab[fd].state == FD_STCLOSE || fdtab[fd].state == FD_STERROR)
 				continue;
-			if (fdtab[fd].ev & (FD_POLL_RD|FD_POLL_HUP|FD_POLL_ERR))
+			if (fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP|FD_POLL_ERR))
 				fdtab[fd].cb[DIR_RD].f(fd);
 		}
 
 		if ((fd_list[fd].e & FD_EV_MASK_W) == FD_EV_WAIT_W) {
 			if (fdtab[fd].state == FD_STCLOSE || fdtab[fd].state == FD_STERROR)
 				continue;
-			if (fdtab[fd].ev & (FD_POLL_WR|FD_POLL_ERR))
+			if (fdtab[fd].ev & (FD_POLL_OUT|FD_POLL_ERR))
 				fdtab[fd].cb[DIR_WR].f(fd);
 		}
 	}
diff --git a/src/proto_tcp.c b/src/proto_tcp.c
index 0891faa..d122a03 100644
--- a/src/proto_tcp.c
+++ b/src/proto_tcp.c
@@ -271,7 +271,6 @@
 	fdtab[fd].peeraddr = NULL;
 	fdtab[fd].peerlen = 0;
 	fdtab[fd].listener = NULL;
-	fdtab[fd].ev = 0;
  tcp_return:
 	if (msg && errlen)
 		strlcpy2(errmsg, msg, errlen);
diff --git a/src/proto_uxst.c b/src/proto_uxst.c
index d367e2b..9054722 100644
--- a/src/proto_uxst.c
+++ b/src/proto_uxst.c
@@ -276,7 +276,6 @@
 	fdtab[fd].peeraddr = NULL;
 	fdtab[fd].peerlen = 0;
 	fdtab[fd].listener = NULL;
-	fdtab[fd].ev = 0;
 	return ERR_NONE;
 }
 
@@ -499,8 +498,6 @@
 		fdtab[cfd].cb[DIR_WR].b = s->rep;
 		fdtab[cfd].peeraddr = (struct sockaddr *)&s->cli_addr;
 		fdtab[cfd].peerlen = sizeof(s->cli_addr);
-		fdtab[cfd].ev = 0;
-
 
 		tv_eternity(&s->req->rex);
 		tv_eternity(&s->req->wex);
diff --git a/src/stream_sock.c b/src/stream_sock.c
index 8c47d31..08cc65b 100644
--- a/src/stream_sock.c
+++ b/src/stream_sock.c
@@ -21,6 +21,7 @@
 
 #include <common/compat.h>
 #include <common/config.h>
+#include <common/debug.h>
 #include <common/standard.h>
 #include <common/time.h>
 
@@ -41,25 +42,24 @@
  * otherwise.
  */
 int stream_sock_read(int fd) {
-	__label__ out_eternity, out_wakeup, out_error;
+	__label__ out_eternity, out_wakeup, out_shutdown_r, out_error;
 	struct buffer *b = fdtab[fd].cb[DIR_RD].b;
 	int ret, max, retval;
 	int read_poll = MAX_READ_POLL_LOOPS;
 
 #ifdef DEBUG_FULL
-	fprintf(stderr,"stream_sock_read : fd=%d, owner=%p\n", fd, fdtab[fd].owner);
+	fprintf(stderr,"stream_sock_read : fd=%d, ev=0x%02x, owner=%p\n", fd, fdtab[fd].ev, fdtab[fd].owner);
 #endif
 
 	retval = 1;
 
-	if (unlikely(fdtab[fd].ev & FD_POLL_HUP)) {
-		/* connection closed */
-		b->flags |= BF_READ_NULL;
-		goto out_eternity;
-	}
-	else if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))) {
+	/* stop immediately on errors */
+	if (fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))
 		goto out_error;
-	}
+
+	/* stop here if we reached the end of data */
+	if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP)
+		goto out_shutdown_r;
 
 	while (1) {
 		/*
@@ -128,10 +128,15 @@
 
 			/* if too many bytes were missing from last read, it means that
 			 * it's pointless trying to read again because the system does
-			 * not have them in buffers.
+			 * not have them in buffers. BTW, if FD_POLL_HUP was present,
+			 * it means that we have reached the end and that the connection
+			 * is closed.
 			 */
-			if (ret < max)
+			if (ret < max) {
+				if (fdtab[fd].ev & FD_POLL_HUP)
+					goto out_shutdown_r;
 				break;
+			}
 
 			/* generally if we read something smaller than 1 or 2 MSS,
 			 * it means that it's not worth trying to read again. It may
@@ -147,8 +152,7 @@
 		}
 		else if (ret == 0) {
 			/* connection closed */
-			b->flags |= BF_READ_NULL;
-			goto out_eternity;
+			goto out_shutdown_r;
 		}
 		else if (errno == EAGAIN) {
 			/* Ignore EAGAIN but inform the poller that there is
@@ -180,14 +184,20 @@
  out_wakeup:
 	if (b->flags & BF_READ_STATUS)
 		task_wakeup(fdtab[fd].owner);
-	fdtab[fd].ev &= ~FD_POLL_RD;
+	fdtab[fd].ev &= ~FD_POLL_IN;
 	return retval;
 
+ out_shutdown_r:
+	fdtab[fd].ev &= ~FD_POLL_HUP;
+	b->flags |= BF_READ_NULL;
+	goto out_eternity;
+
  out_error:
 	/* There was an error. we must wakeup the task. No need to clear
 	 * the events, the task will do it.
 	 */
 	fdtab[fd].state = FD_STERROR;
+	fdtab[fd].ev &= ~FD_POLL_STICKY;
 	b->flags |= BF_READ_ERROR;
 	goto out_eternity;
 }
@@ -210,7 +220,7 @@
 #endif
 
 	retval = 1;
-	if (unlikely(fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR)))
+	if (fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))
 		goto out_error;
 
 	while (1) {
@@ -337,7 +347,7 @@
  out_wakeup:
 	if (b->flags & BF_WRITE_STATUS)
 		task_wakeup(fdtab[fd].owner);
-	fdtab[fd].ev &= ~FD_POLL_WR;
+	fdtab[fd].ev &= ~FD_POLL_OUT;
 	return retval;
 
  out_error:
@@ -345,6 +355,7 @@
 	 * the events, the task will do it.
 	 */
 	fdtab[fd].state = FD_STERROR;
+	fdtab[fd].ev &= ~FD_POLL_STICKY;
 	b->flags |= BF_WRITE_ERROR;
 	goto out_eternity;