blob: 52860eaee5a825c66662383f8e983f09869053e2 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Functions operating on SOCK_STREAM and buffers.
3 *
Willy Tarreau0c303ee2008-07-07 00:09:58 +02004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17
18#include <sys/socket.h>
19#include <sys/stat.h>
20#include <sys/types.h>
21
Willy Tarreau2dd0d472006-06-29 17:53:05 +020022#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020023#include <common/config.h>
Willy Tarreaud6f087e2008-01-18 17:20:13 +010024#include <common/debug.h>
Willy Tarreau83749182007-04-15 20:56:27 +020025#include <common/standard.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020026#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020027#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020028
Willy Tarreau2d212792008-08-27 21:41:35 +020029#include <proto/buffers.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020030#include <proto/client.h>
31#include <proto/fd.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020032#include <proto/stream_sock.h>
33#include <proto/task.h>
34
35
36/*
Willy Tarreaud7971282006-07-29 18:36:34 +020037 * this function is called on a read event from a stream socket.
Willy Tarreau83749182007-04-15 20:56:27 +020038 * It returns 0 if we have a high confidence that we will not be
39 * able to read more data without polling first. Returns non-zero
40 * otherwise.
Willy Tarreaubaaee002006-06-26 02:48:02 +020041 */
Willy Tarreaud7971282006-07-29 18:36:34 +020042int stream_sock_read(int fd) {
Willy Tarreau0c303ee2008-07-07 00:09:58 +020043 __label__ out_wakeup, out_shutdown_r, out_error;
Willy Tarreau54469402006-07-29 16:59:06 +020044 struct buffer *b = fdtab[fd].cb[DIR_RD].b;
Willy Tarreau8a7af602008-05-03 23:07:14 +020045 int ret, max, retval, cur_read;
Willy Tarreaub8949f12007-03-23 22:39:59 +010046 int read_poll = MAX_READ_POLL_LOOPS;
Willy Tarreaubaaee002006-06-26 02:48:02 +020047
48#ifdef DEBUG_FULL
Willy Tarreaud6f087e2008-01-18 17:20:13 +010049 fprintf(stderr,"stream_sock_read : fd=%d, ev=0x%02x, owner=%p\n", fd, fdtab[fd].ev, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +020050#endif
51
Willy Tarreau83749182007-04-15 20:56:27 +020052 retval = 1;
53
Willy Tarreaud6f087e2008-01-18 17:20:13 +010054 /* stop immediately on errors */
55 if (fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))
Willy Tarreau6996e152007-04-30 14:37:43 +020056 goto out_error;
Willy Tarreaud6f087e2008-01-18 17:20:13 +010057
58 /* stop here if we reached the end of data */
59 if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP)
60 goto out_shutdown_r;
Willy Tarreau83749182007-04-15 20:56:27 +020061
Willy Tarreau8a7af602008-05-03 23:07:14 +020062 cur_read = 0;
Willy Tarreau6996e152007-04-30 14:37:43 +020063 while (1) {
64 /*
65 * 1. compute the maximum block size we can read at once.
66 */
Willy Tarreau83749182007-04-15 20:56:27 +020067 if (b->l == 0) { /* let's realign the buffer to optimize I/O */
68 b->r = b->w = b->lr = b->data;
69 max = b->rlim - b->data;
70 }
71 else if (b->r > b->w) {
72 max = b->rlim - b->r;
73 }
74 else {
75 max = b->w - b->r;
76 /* FIXME: theorically, if w>0, we shouldn't have rlim < data+size anymore
77 * since it means that the rewrite protection has been removed. This
78 * implies that the if statement can be removed.
79 */
80 if (max > b->rlim - b->data)
Willy Tarreaubaaee002006-06-26 02:48:02 +020081 max = b->rlim - b->data;
Willy Tarreau83749182007-04-15 20:56:27 +020082 }
Willy Tarreaubaaee002006-06-26 02:48:02 +020083
Willy Tarreau6996e152007-04-30 14:37:43 +020084 if (unlikely(max == 0)) {
85 /* Not anymore room to store data. This should theorically
86 * never happen, but better safe than sorry !
87 */
Willy Tarreaue393fe22008-08-16 22:18:07 +020088 b->flags |= BF_FULL;
Willy Tarreau83749182007-04-15 20:56:27 +020089 EV_FD_CLR(fd, DIR_RD);
Willy Tarreau0c303ee2008-07-07 00:09:58 +020090 b->rex = TICK_ETERNITY;
91 goto out_wakeup;
Willy Tarreau83749182007-04-15 20:56:27 +020092 }
Willy Tarreaubaaee002006-06-26 02:48:02 +020093
Willy Tarreau6996e152007-04-30 14:37:43 +020094 /*
95 * 2. read the largest possible block
96 */
Willy Tarreaubaaee002006-06-26 02:48:02 +020097#ifndef MSG_NOSIGNAL
Willy Tarreau83749182007-04-15 20:56:27 +020098 {
99 int skerr;
100 socklen_t lskerr = sizeof(skerr);
101
102 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
103 if (ret == -1 || skerr)
104 ret = -1;
105 else
106 ret = recv(fd, b->r, max, 0);
107 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200108#else
Willy Tarreau83749182007-04-15 20:56:27 +0200109 ret = recv(fd, b->r, max, MSG_NOSIGNAL);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200110#endif
Willy Tarreau83749182007-04-15 20:56:27 +0200111 if (ret > 0) {
112 b->r += ret;
113 b->l += ret;
Willy Tarreau8a7af602008-05-03 23:07:14 +0200114 cur_read += ret;
Willy Tarreau3da77c52008-08-29 09:58:42 +0200115 b->flags |= BF_READ_PARTIAL;
Willy Tarreaue393fe22008-08-16 22:18:07 +0200116 b->flags &= ~BF_EMPTY;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200117
Willy Tarreau83749182007-04-15 20:56:27 +0200118 if (b->r == b->data + BUFSIZE) {
119 b->r = b->data; /* wrap around the buffer */
120 }
Willy Tarreau9641e8f2007-03-23 23:02:09 +0100121
Willy Tarreau83749182007-04-15 20:56:27 +0200122 b->total += ret;
Willy Tarreau9641e8f2007-03-23 23:02:09 +0100123
Willy Tarreaue393fe22008-08-16 22:18:07 +0200124 if (b->l >= b->rlim - b->data) {
Willy Tarreau6996e152007-04-30 14:37:43 +0200125 /* The buffer is now full, there's no point in going through
126 * the loop again.
127 */
Willy Tarreau8a7af602008-05-03 23:07:14 +0200128 if (!(b->flags & BF_STREAMER_FAST) && (cur_read == b->l)) {
129 b->xfer_small = 0;
130 b->xfer_large++;
131 if (b->xfer_large >= 3) {
132 /* we call this buffer a fast streamer if it manages
133 * to be filled in one call 3 consecutive times.
134 */
135 b->flags |= (BF_STREAMER | BF_STREAMER_FAST);
136 //fputc('+', stderr);
137 }
138 }
139 else if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) &&
140 (cur_read <= BUFSIZE / 2)) {
141 b->xfer_large = 0;
142 b->xfer_small++;
143 if (b->xfer_small >= 2) {
144 /* if the buffer has been at least half full twice,
145 * we receive faster than we send, so at least it
146 * is not a "fast streamer".
147 */
148 b->flags &= ~BF_STREAMER_FAST;
149 //fputc('-', stderr);
150 }
151 }
152 else {
153 b->xfer_small = 0;
154 b->xfer_large = 0;
155 }
156
Willy Tarreaue393fe22008-08-16 22:18:07 +0200157 b->flags |= BF_FULL;
Willy Tarreau6996e152007-04-30 14:37:43 +0200158 EV_FD_CLR(fd, DIR_RD);
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200159 b->rex = TICK_ETERNITY;
160 goto out_wakeup;
Willy Tarreau6996e152007-04-30 14:37:43 +0200161 }
162
Willy Tarreauab3e1d32007-06-03 14:10:36 +0200163 /* if too many bytes were missing from last read, it means that
164 * it's pointless trying to read again because the system does
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100165 * not have them in buffers. BTW, if FD_POLL_HUP was present,
166 * it means that we have reached the end and that the connection
167 * is closed.
Willy Tarreauab3e1d32007-06-03 14:10:36 +0200168 */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100169 if (ret < max) {
Willy Tarreau8a7af602008-05-03 23:07:14 +0200170 if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) &&
171 (cur_read <= BUFSIZE / 2)) {
172 b->xfer_large = 0;
173 b->xfer_small++;
174 if (b->xfer_small >= 3) {
175 /* we have read less than half of the buffer in
176 * one pass, and this happened at least 3 times.
177 * This is definitely not a streamer.
178 */
179 b->flags &= ~(BF_STREAMER | BF_STREAMER_FAST);
180 //fputc('!', stderr);
181 }
182 }
Willy Tarreau2bea3a12008-08-28 09:47:43 +0200183 /* unfortunately, on level-triggered events, POLL_HUP
184 * is generally delivered AFTER the system buffer is
185 * empty, so this one might never match.
186 */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100187 if (fdtab[fd].ev & FD_POLL_HUP)
188 goto out_shutdown_r;
Willy Tarreau2bea3a12008-08-28 09:47:43 +0200189
190 /* if a streamer has read few data, it may be because we
191 * have exhausted system buffers. It's not worth trying
192 * again.
193 */
194 if (b->flags & BF_STREAMER)
195 break;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100196 }
Willy Tarreauab3e1d32007-06-03 14:10:36 +0200197
198 /* generally if we read something smaller than 1 or 2 MSS,
Willy Tarreau2bea3a12008-08-28 09:47:43 +0200199 * it means that either we have exhausted the system's
200 * buffers (streamer or question-response protocol) or that
201 * the connection will be closed. Streamers are easily
202 * detected so we return early. For other cases, it's still
203 * better to perform a last read to be sure, because it may
204 * save one complete poll/read/wakeup cycle in case of shutdown.
Willy Tarreau83749182007-04-15 20:56:27 +0200205 */
Willy Tarreau2bea3a12008-08-28 09:47:43 +0200206 if (ret < MIN_RET_FOR_READ_LOOP && b->flags & BF_STREAMER)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200207 break;
Willy Tarreau83749182007-04-15 20:56:27 +0200208
Willy Tarreau6996e152007-04-30 14:37:43 +0200209 if (--read_poll <= 0)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200210 break;
Willy Tarreau83749182007-04-15 20:56:27 +0200211 }
212 else if (ret == 0) {
Willy Tarreau6996e152007-04-30 14:37:43 +0200213 /* connection closed */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100214 goto out_shutdown_r;
Willy Tarreau83749182007-04-15 20:56:27 +0200215 }
Willy Tarreau9f195292007-04-15 21:26:58 +0200216 else if (errno == EAGAIN) {
217 /* Ignore EAGAIN but inform the poller that there is
Willy Tarreau6996e152007-04-30 14:37:43 +0200218 * nothing to read left. But we may have done some work
219 * justifying to notify the task.
Willy Tarreau9f195292007-04-15 21:26:58 +0200220 */
Willy Tarreau83749182007-04-15 20:56:27 +0200221 retval = 0;
222 break;
223 }
224 else {
Willy Tarreau6996e152007-04-30 14:37:43 +0200225 goto out_error;
Willy Tarreau83749182007-04-15 20:56:27 +0200226 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200227 } /* while (1) */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200228
Willy Tarreau6996e152007-04-30 14:37:43 +0200229 /*
230 * The only way to get out of this loop is to have stopped reading
231 * without any error nor close, either by limiting the number of
232 * loops, or because of an EAGAIN. We only rearm the timer if we
233 * have at least read something.
234 */
235
Willy Tarreau3da77c52008-08-29 09:58:42 +0200236 if (tick_isset(b->rex) && b->flags & BF_READ_PARTIAL)
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200237 b->rex = tick_add_ifset(now_ms, b->rto);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200238
Willy Tarreau3da77c52008-08-29 09:58:42 +0200239 if (!(b->flags & BF_READ_ACTIVITY))
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200240 goto out_skip_wakeup;
Willy Tarreau6996e152007-04-30 14:37:43 +0200241 out_wakeup:
Willy Tarreaufdccded2008-08-29 18:19:04 +0200242 task_wakeup(fdtab[fd].owner, TASK_WOKEN_IO);
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200243
244 out_skip_wakeup:
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100245 fdtab[fd].ev &= ~FD_POLL_IN;
Willy Tarreau83749182007-04-15 20:56:27 +0200246 return retval;
Willy Tarreau6996e152007-04-30 14:37:43 +0200247
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100248 out_shutdown_r:
249 fdtab[fd].ev &= ~FD_POLL_HUP;
250 b->flags |= BF_READ_NULL;
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200251 b->rex = TICK_ETERNITY;
252 goto out_wakeup;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100253
Willy Tarreau6996e152007-04-30 14:37:43 +0200254 out_error:
255 /* There was an error. we must wakeup the task. No need to clear
256 * the events, the task will do it.
257 */
258 fdtab[fd].state = FD_STERROR;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100259 fdtab[fd].ev &= ~FD_POLL_STICKY;
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200260 b->rex = TICK_ETERNITY;
261 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200262}
263
264
265/*
Willy Tarreauf8306d52006-07-29 19:01:31 +0200266 * this function is called on a write event from a stream socket.
Willy Tarreau83749182007-04-15 20:56:27 +0200267 * It returns 0 if we have a high confidence that we will not be
268 * able to write more data without polling first. Returns non-zero
269 * otherwise.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200270 */
Willy Tarreauf8306d52006-07-29 19:01:31 +0200271int stream_sock_write(int fd) {
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200272 __label__ out_wakeup, out_error;
Willy Tarreau54469402006-07-29 16:59:06 +0200273 struct buffer *b = fdtab[fd].cb[DIR_WR].b;
Willy Tarreau83749182007-04-15 20:56:27 +0200274 int ret, max, retval;
275 int write_poll = MAX_WRITE_POLL_LOOPS;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200276
277#ifdef DEBUG_FULL
Willy Tarreauf8306d52006-07-29 19:01:31 +0200278 fprintf(stderr,"stream_sock_write : fd=%d, owner=%p\n", fd, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200279#endif
280
Willy Tarreau83749182007-04-15 20:56:27 +0200281 retval = 1;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100282 if (fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))
Willy Tarreau6996e152007-04-30 14:37:43 +0200283 goto out_error;
Willy Tarreau83749182007-04-15 20:56:27 +0200284
Willy Tarreau6996e152007-04-30 14:37:43 +0200285 while (1) {
Willy Tarreau83749182007-04-15 20:56:27 +0200286 if (b->l == 0) { /* let's realign the buffer to optimize I/O */
287 b->r = b->w = b->lr = b->data;
288 max = 0;
289 }
290 else if (b->r > b->w) {
291 max = b->r - b->w;
292 }
293 else {
294 max = b->data + BUFSIZE - b->w;
295 }
296
Willy Tarreaubaaee002006-06-26 02:48:02 +0200297 if (max == 0) {
Willy Tarreauf8306d52006-07-29 19:01:31 +0200298 /* may be we have received a connection acknowledgement in TCP mode without data */
Willy Tarreau6996e152007-04-30 14:37:43 +0200299 if (likely(fdtab[fd].state == FD_STCONN)) {
Willy Tarreau6996e152007-04-30 14:37:43 +0200300 /* We have no data to send to check the connection, and
301 * getsockopt() will not inform us whether the connection
302 * is still pending. So we'll reuse connect() to check the
303 * state of the socket. This has the advantage of givig us
304 * the following info :
305 * - error
306 * - connecting (EALREADY, EINPROGRESS)
307 * - connected (EISCONN, 0)
308 */
Willy Tarreaue94ebd02007-10-09 17:14:37 +0200309 if ((connect(fd, fdtab[fd].peeraddr, fdtab[fd].peerlen) == 0))
Willy Tarreau6996e152007-04-30 14:37:43 +0200310 errno = 0;
311
312 if (errno == EALREADY || errno == EINPROGRESS) {
313 retval = 0;
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200314 goto out_may_wakeup;
Willy Tarreauf8306d52006-07-29 19:01:31 +0200315 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200316
317 if (errno && errno != EISCONN)
318 goto out_error;
319
320 /* OK we just need to indicate that we got a connection
321 * and that we wrote nothing.
322 */
323 b->flags |= BF_WRITE_NULL;
324 fdtab[fd].state = FD_STREADY;
Willy Tarreauf8306d52006-07-29 19:01:31 +0200325 }
326
Willy Tarreau6996e152007-04-30 14:37:43 +0200327 /* Funny, we were called to write something but there wasn't
328 * anything. Theorically we cannot get there, but just in case,
329 * let's disable the write event and pretend we never came there.
330 */
Willy Tarreauf161a342007-04-08 16:59:42 +0200331 EV_FD_CLR(fd, DIR_WR);
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200332 b->wex = TICK_ETERNITY;
333 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200334 }
335
336#ifndef MSG_NOSIGNAL
337 {
338 int skerr;
339 socklen_t lskerr = sizeof(skerr);
340
Willy Tarreauc6423482006-10-15 14:59:03 +0200341 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
342 if (ret == -1 || skerr)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200343 ret = -1;
344 else
345 ret = send(fd, b->w, max, MSG_DONTWAIT);
346 }
347#else
348 ret = send(fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL);
349#endif
350
351 if (ret > 0) {
352 b->l -= ret;
353 b->w += ret;
354
Willy Tarreau3da77c52008-08-29 09:58:42 +0200355 b->flags |= BF_WRITE_PARTIAL;
Willy Tarreaue393fe22008-08-16 22:18:07 +0200356
357 if (b->l < b->rlim - b->data)
358 b->flags &= ~BF_FULL;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200359
360 if (b->w == b->data + BUFSIZE) {
361 b->w = b->data; /* wrap around the buffer */
362 }
Willy Tarreau83749182007-04-15 20:56:27 +0200363
Willy Tarreau6996e152007-04-30 14:37:43 +0200364 if (!b->l) {
Willy Tarreaue393fe22008-08-16 22:18:07 +0200365 b->flags |= BF_EMPTY;
Willy Tarreau6996e152007-04-30 14:37:43 +0200366 EV_FD_CLR(fd, DIR_WR);
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200367 b->wex = TICK_ETERNITY;
368 goto out_wakeup;
Willy Tarreau6996e152007-04-30 14:37:43 +0200369 }
Willy Tarreau83749182007-04-15 20:56:27 +0200370
Willy Tarreauab3e1d32007-06-03 14:10:36 +0200371 /* if the system buffer is full, don't insist */
372 if (ret < max)
373 break;
374
Willy Tarreau6996e152007-04-30 14:37:43 +0200375 if (--write_poll <= 0)
376 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200377 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200378 else if (ret == 0 || errno == EAGAIN) {
379 /* nothing written, just pretend we were never called
380 * and wait for the socket to be ready. But we may have
381 * done some work justifying to notify the task.
382 */
Willy Tarreau83749182007-04-15 20:56:27 +0200383 retval = 0;
384 break;
385 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200386 else {
Willy Tarreau6996e152007-04-30 14:37:43 +0200387 goto out_error;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200388 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200389 } /* while (1) */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200390
Willy Tarreau6996e152007-04-30 14:37:43 +0200391 /*
392 * The only way to get out of this loop is to have stopped writing
393 * without any error, either by limiting the number of loops, or
394 * because of an EAGAIN. We only rearm the timer if we have at least
395 * written something.
396 */
397
Willy Tarreau3da77c52008-08-29 09:58:42 +0200398 if (tick_isset(b->wex) && b->flags & BF_WRITE_PARTIAL) {
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200399 b->wex = tick_add_ifset(now_ms, b->wto);
Willy Tarreauadfb8562008-08-11 15:24:42 +0200400 if (tick_isset(b->wex)) {
Willy Tarreau83749182007-04-15 20:56:27 +0200401 /* FIXME: to prevent the client from expiring read timeouts during writes,
402 * we refresh it. A solution would be to merge read+write timeouts into a
403 * unique one, although that needs some study particularly on full-duplex
404 * TCP connections. */
Willy Tarreauba392ce2008-08-16 21:13:23 +0200405 if (tick_isset(b->rex) && !(b->flags & BF_SHUTR))
Willy Tarreaufa645582007-06-03 15:59:52 +0200406 b->rex = b->wex;
Willy Tarreau83749182007-04-15 20:56:27 +0200407 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200408 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200409
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200410 out_may_wakeup:
Willy Tarreau3da77c52008-08-29 09:58:42 +0200411 if (!(b->flags & BF_WRITE_ACTIVITY))
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200412 goto out_skip_wakeup;
Willy Tarreau6996e152007-04-30 14:37:43 +0200413 out_wakeup:
Willy Tarreaufdccded2008-08-29 18:19:04 +0200414 task_wakeup(fdtab[fd].owner, TASK_WOKEN_IO);
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200415
416 out_skip_wakeup:
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100417 fdtab[fd].ev &= ~FD_POLL_OUT;
Willy Tarreau83749182007-04-15 20:56:27 +0200418 return retval;
Willy Tarreau6996e152007-04-30 14:37:43 +0200419
420 out_error:
421 /* There was an error. we must wakeup the task. No need to clear
422 * the events, the task will do it.
423 */
424 fdtab[fd].state = FD_STERROR;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100425 fdtab[fd].ev &= ~FD_POLL_STICKY;
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200426 b->wex = TICK_ETERNITY;
427 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200428}
429
Willy Tarreaubaaee002006-06-26 02:48:02 +0200430
Willy Tarreau2d212792008-08-27 21:41:35 +0200431/*
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200432 * This function only has to be called once after a wakeup event during a data
433 * phase. It controls the file descriptor's status, as well as read and write
434 * timeouts.
Willy Tarreau2d212792008-08-27 21:41:35 +0200435 */
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200436int stream_sock_data_check_errors(int fd)
Willy Tarreau2d212792008-08-27 21:41:35 +0200437{
438 struct buffer *ib = fdtab[fd].cb[DIR_RD].b;
439 struct buffer *ob = fdtab[fd].cb[DIR_WR].b;
440
441 DPRINTF(stderr,"[%u] %s: fd=%d owner=%p ib=%p, ob=%p, exp(r,w)=%u,%u ibf=%08x obf=%08x ibl=%d obl=%d\n",
442 now_ms, __FUNCTION__,
443 fd, fdtab[fd].owner,
444 ib, ob,
445 ib->rex, ob->wex,
446 ib->flags, ob->flags,
447 ib->l, ob->l);
448
449 /* Read or write error on the file descriptor */
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200450 if (unlikely(fdtab[fd].state == FD_STERROR)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200451 //trace_term(t, TT_HTTP_SRV_6);
452 if (!ob->cons->err_type) {
453 //ob->cons->err_loc = t->srv;
454 ob->cons->err_type = SI_ET_DATA_ERR;
455 }
456 buffer_shutw(ob);
457 ob->flags |= BF_WRITE_ERROR;
458 buffer_shutr(ib);
459 ib->flags |= BF_READ_ERROR;
460
461 do_close_and_return:
462 fd_delete(fd);
463 ob->cons->state = SI_ST_CLO;
464 return 0;
465 }
466
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200467 /* Read timeout */
468 if (unlikely(!(ib->flags & (BF_SHUTR|BF_READ_TIMEOUT)) && tick_is_expired(ib->rex, now_ms))) {
469 //trace_term(t, TT_HTTP_SRV_12);
470 ib->flags |= BF_READ_TIMEOUT;
471 if (!ob->cons->err_type) {
472 //ob->cons->err_loc = t->srv;
473 ob->cons->err_type = SI_ET_DATA_TO;
474 }
475 buffer_shutr(ib);
476 if (ob->flags & BF_SHUTW)
477 goto do_close_and_return;
478 EV_FD_CLR(fd, DIR_RD);
479 }
480
481 /* Write timeout */
482 if (unlikely(!(ob->flags & (BF_SHUTW|BF_WRITE_TIMEOUT)) && tick_is_expired(ob->wex, now_ms))) {
483 //trace_term(t, TT_HTTP_SRV_13);
484 ob->flags |= BF_WRITE_TIMEOUT;
485 if (!ob->cons->err_type) {
486 //ob->cons->err_loc = t->srv;
487 ob->cons->err_type = SI_ET_DATA_TO;
488 }
489 buffer_shutw(ob);
490 if (ib->flags & BF_SHUTR)
491 goto do_close_and_return;
492
493 EV_FD_CLR(fd, DIR_WR);
494 shutdown(fd, SHUT_WR);
495 }
496 return 0;
497}
498
499/*
500 * Manages a stream_sock connection during its data phase. The buffers are
501 * examined for various cases of shutdown, then file descriptor and buffers'
502 * flags are updated accordingly.
503 */
504int stream_sock_data_update(int fd)
505{
506 struct buffer *ib = fdtab[fd].cb[DIR_RD].b;
507 struct buffer *ob = fdtab[fd].cb[DIR_WR].b;
508
509 DPRINTF(stderr,"[%u] %s: fd=%d owner=%p ib=%p, ob=%p, exp(r,w)=%u,%u ibf=%08x obf=%08x ibl=%d obl=%d\n",
510 now_ms, __FUNCTION__,
511 fd, fdtab[fd].owner,
512 ib, ob,
513 ib->rex, ob->wex,
514 ib->flags, ob->flags,
515 ib->l, ob->l);
516
Willy Tarreau2d212792008-08-27 21:41:35 +0200517 /* Check if we need to close the read side */
518 if (!(ib->flags & BF_SHUTR)) {
519 /* Last read, forced read-shutdown, or other end closed */
520 if (ib->flags & (BF_READ_NULL|BF_SHUTR_NOW|BF_SHUTW)) {
521 //trace_term(t, TT_HTTP_SRV_10);
Willy Tarreau2d212792008-08-27 21:41:35 +0200522 buffer_shutr(ib);
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200523 if (ob->flags & BF_SHUTW) {
524 fd_delete(fd);
525 ob->cons->state = SI_ST_CLO;
526 return 0;
527 }
Willy Tarreau2d212792008-08-27 21:41:35 +0200528 EV_FD_CLR(fd, DIR_RD);
529 }
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200530 }
531
532 /* Check if we need to close the write side */
533 if (!(ob->flags & BF_SHUTW)) {
534 /* Forced write-shutdown or other end closed with empty buffer. */
535 if ((ob->flags & BF_SHUTW_NOW) ||
Willy Tarreau3da77c52008-08-29 09:58:42 +0200536 (ob->flags & (BF_EMPTY|BF_HIJACK|BF_WRITE_ENA|BF_SHUTR)) == (BF_EMPTY|BF_WRITE_ENA|BF_SHUTR)) {
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200537 //trace_term(t, TT_HTTP_SRV_11);
538 buffer_shutw(ob);
539 if (ib->flags & BF_SHUTR) {
540 fd_delete(fd);
541 ob->cons->state = SI_ST_CLO;
542 return 0;
Willy Tarreau2d212792008-08-27 21:41:35 +0200543 }
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200544 EV_FD_CLR(fd, DIR_WR);
545 shutdown(fd, SHUT_WR);
Willy Tarreau2d212792008-08-27 21:41:35 +0200546 }
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200547 }
548 return 0; /* other cases change nothing */
549}
550
551
552/*
553 * Updates a connected stream_sock file descriptor status and timeouts
554 * according to the buffers' flags. It should only be called once after the
555 * buffer flags have settled down, and before they are cleared. It doesn't
556 * harm to call it as often as desired (it just slightly hurts performance).
557 */
558int stream_sock_data_finish(int fd)
559{
560 struct buffer *ib = fdtab[fd].cb[DIR_RD].b;
561 struct buffer *ob = fdtab[fd].cb[DIR_WR].b;
562
563 DPRINTF(stderr,"[%u] %s: fd=%d owner=%p ib=%p, ob=%p, exp(r,w)=%u,%u ibf=%08x obf=%08x ibl=%d obl=%d\n",
564 now_ms, __FUNCTION__,
565 fd, fdtab[fd].owner,
566 ib, ob,
567 ib->rex, ob->wex,
568 ib->flags, ob->flags,
569 ib->l, ob->l);
570
571 /* Check if we need to close the read side */
572 if (!(ib->flags & BF_SHUTR)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200573 /* Read not closed, update FD status and timeout for reads */
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200574 if (ib->flags & (BF_FULL|BF_HIJACK)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200575 /* stop reading */
576 EV_FD_COND_C(fd, DIR_RD);
577 ib->rex = TICK_ETERNITY;
578 }
579 else {
580 /* (re)start reading and update timeout. Note: we don't recompute the timeout
581 * everytime we get here, otherwise it would risk never to expire. We only
582 * update it if is was not yet set, or if we already got some read status.
583 */
584 EV_FD_COND_S(fd, DIR_RD);
Willy Tarreau3da77c52008-08-29 09:58:42 +0200585 if (!tick_isset(ib->rex) || ib->flags & BF_READ_ACTIVITY)
Willy Tarreau2d212792008-08-27 21:41:35 +0200586 ib->rex = tick_add_ifset(now_ms, ib->rto);
587 }
588 }
589
590 /* Check if we need to close the write side */
591 if (!(ob->flags & BF_SHUTW)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200592 /* Write not closed, update FD status and timeout for writes */
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200593 if ((ob->flags & BF_EMPTY) ||
Willy Tarreau3da77c52008-08-29 09:58:42 +0200594 (ob->flags & (BF_HIJACK|BF_WRITE_ENA)) == 0) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200595 /* stop writing */
596 EV_FD_COND_C(fd, DIR_WR);
597 ob->wex = TICK_ETERNITY;
598 }
599 else {
600 /* (re)start writing and update timeout. Note: we don't recompute the timeout
601 * everytime we get here, otherwise it would risk never to expire. We only
602 * update it if is was not yet set, or if we already got some write status.
603 */
604 EV_FD_COND_S(fd, DIR_WR);
Willy Tarreau3da77c52008-08-29 09:58:42 +0200605 if (!tick_isset(ob->wex) || ob->flags & BF_WRITE_ACTIVITY) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200606 ob->wex = tick_add_ifset(now_ms, ob->wto);
Willy Tarreau21e1be82008-08-29 11:30:14 +0200607 if (tick_isset(ob->wex) && tick_isset(ib->rex)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200608 /* Note: depending on the protocol, we don't know if we're waiting
609 * for incoming data or not. So in order to prevent the socket from
610 * expiring read timeouts during writes, we refresh the read timeout,
611 * except if it was already infinite.
612 */
613 ib->rex = ob->wex;
614 }
615 }
616 }
617 }
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200618 return 0;
Willy Tarreau2d212792008-08-27 21:41:35 +0200619}
620
Willy Tarreaubaaee002006-06-26 02:48:02 +0200621
622/*
623 * Local variables:
624 * c-indent-level: 8
625 * c-basic-offset: 8
626 * End:
627 */