blob: 47fd8fa698a55b2b1c26cd512de40affdc2aec66 [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Functions operating on SOCK_STREAM and buffers.
3 *
Willy Tarreau0c303ee2008-07-07 00:09:58 +02004 * Copyright 2000-2008 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau6b4aad42009-01-18 21:59:13 +010013#define _GNU_SOURCE
Willy Tarreaubaaee002006-06-26 02:48:02 +020014#include <errno.h>
15#include <fcntl.h>
16#include <stdio.h>
17#include <stdlib.h>
18
19#include <sys/socket.h>
20#include <sys/stat.h>
21#include <sys/types.h>
22
Willy Tarreau2dd0d472006-06-29 17:53:05 +020023#include <common/compat.h>
Willy Tarreaue3ba5f02006-06-29 18:54:54 +020024#include <common/config.h>
Willy Tarreaud6f087e2008-01-18 17:20:13 +010025#include <common/debug.h>
Willy Tarreau83749182007-04-15 20:56:27 +020026#include <common/standard.h>
Willy Tarreau0c303ee2008-07-07 00:09:58 +020027#include <common/ticks.h>
Willy Tarreau2dd0d472006-06-29 17:53:05 +020028#include <common/time.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020029
Willy Tarreau2d212792008-08-27 21:41:35 +020030#include <proto/buffers.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020031#include <proto/client.h>
32#include <proto/fd.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020033#include <proto/stream_sock.h>
34#include <proto/task.h>
35
Willy Tarreau5bd8c372009-01-19 00:32:22 +010036#include <types/global.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020037
Willy Tarreau6b4aad42009-01-18 21:59:13 +010038/* On recent Linux kernels, the splice() syscall may be used for faster data copy.
39 * But it's not always defined on some OS versions, and it even happens that some
40 * definitions are wrong with some glibc due to an offset bug in syscall().
41 */
42
43#if defined(CONFIG_HAP_LINUX_SPLICE)
44#include <unistd.h>
45#include <sys/syscall.h>
46
47#ifndef SPLICE_F_MOVE
48#define SPLICE_F_MOVE 0x1
49#endif
50
51#ifndef SPLICE_F_NONBLOCK
52#define SPLICE_F_NONBLOCK 0x2
53#endif
54
55#ifndef SPLICE_F_MORE
56#define SPLICE_F_MORE 0x4
57#endif
58
59#ifndef __NR_splice
60#if defined(__powerpc__) || defined(__powerpc64__)
61#define __NR_splice 283
62#elif defined(__sparc__) || defined(__sparc64__)
63#define __NR_splice 232
64#elif defined(__x86_64__)
65#define __NR_splice 275
66#elif defined(__alpha__)
67#define __NR_splice 468
68#elif defined (__i386__)
69#define __NR_splice 313
70#else
71#warning unsupported architecture, guessing __NR_splice=313 like x86...
72#define __NR_splice 313
73#endif /* $arch */
74
75_syscall6(int, splice, int, fdin, loff_t *, off_in, int, fdout, loff_t *, off_out, size_t, len, unsigned long, flags)
76
77#endif /* __NR_splice */
Willy Tarreau5bd8c372009-01-19 00:32:22 +010078
79/* A pipe contains 16 segments max, and it's common to see segments of 1448 bytes
80 * because of timestamps. Use this as a hint for not looping on splice().
81 */
82#define SPLICE_FULL_HINT 16*1448
83
84/* Returns :
85 * -1 if splice is not possible or not possible anymore and we must switch to
86 * user-land copy (eg: to_forward reached)
87 * 0 when we know that polling is required to get more data (EAGAIN)
88 * 1 for all other cases (we can safely try again, or if an activity has been
89 * detected (DATA/NULL/ERR))
90 * Sets :
91 * BF_READ_NULL
92 * BF_READ_PARTIAL
93 * BF_WRITE_PARTIAL (during copy)
94 * BF_EMPTY (during copy)
95 * SI_FL_ERR
96 * SI_FL_WAIT_ROOM
97 * (SI_FL_WAIT_RECV)
98 */
99static int stream_sock_splice_in(struct buffer *b, struct stream_interface *si)
100{
101 int fd = si->fd;
102 int ret, max, total = 0;
103 int retval = 1;
104
105 if (!b->to_forward)
106 return -1;
107
108 if (!(b->flags & BF_KERN_SPLICING))
109 return -1;
110
111 if (b->l) {
112 /* We're embarrassed, there are already data pending in
113 * the buffer and we don't want to have them at two
114 * locations at a time. Let's indicate we need some
115 * place and ask the consumer to hurry.
116 */
117 si->flags |= SI_FL_WAIT_ROOM;
118 EV_FD_CLR(fd, DIR_RD);
119 b->rex = TICK_ETERNITY;
120 b->cons->chk_snd(b->cons);
121 return 1;
122 }
123
124 if (unlikely(b->splice.prod == -1)) {
125 int pipefd[2];
126 if (usedpipes >= global.maxpipes || pipe(pipefd) < 0) {
127 b->flags &= ~BF_KERN_SPLICING;
128 return -1;
129 }
130 usedpipes++;
131 b->splice.prod = pipefd[1];
132 b->splice.cons = pipefd[0];
133 }
134
135 while (1) {
136 max = b->to_forward;
137 if (max <= 0) {
138 /* It looks like the buffer + the pipe already contain
139 * the maximum amount of data to be transferred. Try to
140 * send those data immediately on the other side if it
141 * is currently waiting.
142 */
143 retval = -1; /* end of forwarding */
144 break;
145 }
146
147 ret = splice(fd, NULL, b->splice.prod, NULL, max,
148 SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
149
150 if (ret <= 0) {
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100151 if (ret == 0) {
Willy Tarreau98b306b2009-01-25 11:11:32 +0100152 /* connection closed. This is only detected by
153 * recent kernels (>= 2.6.27.13).
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100154 */
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100155 b->flags |= BF_READ_NULL;
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100156 retval = 1; /* no need for further polling */
157 break;
158 }
159
160 if (errno == EAGAIN) {
161 /* there are two reasons for EAGAIN :
162 * - nothing in the socket buffer (standard)
163 * - pipe is full
Willy Tarreau98b306b2009-01-25 11:11:32 +0100164 * - the connection is closed (kernel < 2.6.27.13)
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100165 * Since we don't know if pipe is full, we'll
166 * stop if the pipe is not empty. Anyway, we
167 * will almost always fill/empty the pipe.
168 */
169
170 if (b->splice_len > 0) {
171 si->flags |= SI_FL_WAIT_ROOM;
172 retval = 1;
173 break;
174 }
175
Willy Tarreau98b306b2009-01-25 11:11:32 +0100176 /* We don't know if the connection was closed.
177 * But if we're called upon POLLIN with an empty
178 * pipe and get EAGAIN, it is suspect enought to
179 * try to fall back to the normal recv scheme
180 * which will be able to deal with the situation.
181 */
182 retval = -1;
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100183 break;
184 }
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100185 /* here we have another error */
186 si->flags |= SI_FL_ERR;
187 retval = 1;
188 break;
189 } /* ret <= 0 */
190
191 b->to_forward -= ret;
192 total += ret;
193 b->total += ret;
194 b->splice_len += ret;
195 b->flags |= BF_READ_PARTIAL;
196 b->flags &= ~BF_EMPTY; /* to prevent shutdowns */
197
198 if (b->splice_len >= SPLICE_FULL_HINT) {
199 /* We've read enough of it for this time. */
200 retval = 1;
201 break;
202 }
203 } /* while */
204
205 return retval;
206}
207
Willy Tarreau6b4aad42009-01-18 21:59:13 +0100208#endif /* CONFIG_HAP_LINUX_SPLICE */
209
210
Willy Tarreaubaaee002006-06-26 02:48:02 +0200211/*
Willy Tarreaud7971282006-07-29 18:36:34 +0200212 * this function is called on a read event from a stream socket.
Willy Tarreau83749182007-04-15 20:56:27 +0200213 * It returns 0 if we have a high confidence that we will not be
214 * able to read more data without polling first. Returns non-zero
215 * otherwise.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200216 */
Willy Tarreaud7971282006-07-29 18:36:34 +0200217int stream_sock_read(int fd) {
Willy Tarreaue5ed4062008-08-30 03:17:31 +0200218 struct stream_interface *si = fdtab[fd].owner;
Willy Tarreau48adac52008-08-30 04:58:38 +0200219 struct buffer *b = si->ib;
Willy Tarreau8a7af602008-05-03 23:07:14 +0200220 int ret, max, retval, cur_read;
Willy Tarreaub8949f12007-03-23 22:39:59 +0100221 int read_poll = MAX_READ_POLL_LOOPS;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200222
223#ifdef DEBUG_FULL
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100224 fprintf(stderr,"stream_sock_read : fd=%d, ev=0x%02x, owner=%p\n", fd, fdtab[fd].ev, fdtab[fd].owner);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200225#endif
226
Willy Tarreau83749182007-04-15 20:56:27 +0200227 retval = 1;
228
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100229 /* stop immediately on errors */
230 if (fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))
Willy Tarreau6996e152007-04-30 14:37:43 +0200231 goto out_error;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100232
233 /* stop here if we reached the end of data */
234 if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_HUP)) == FD_POLL_HUP)
235 goto out_shutdown_r;
Willy Tarreau83749182007-04-15 20:56:27 +0200236
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100237#if defined(CONFIG_HAP_LINUX_SPLICE)
238 if (b->to_forward && b->flags & BF_KERN_SPLICING) {
Willy Tarreau98b306b2009-01-25 11:11:32 +0100239
240 /* Under Linux, if FD_POLL_HUP is set, we have reached the end.
241 * Since older splice() implementations were buggy and returned
242 * EAGAIN on end of read, let's bypass the call to splice() now.
243 */
244 if (fdtab[fd].ev & FD_POLL_HUP)
245 goto out_shutdown_r;
246
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100247 retval = stream_sock_splice_in(b, si);
248
249 if (retval >= 0) {
250 if (si->flags & SI_FL_ERR)
251 goto out_error;
252 if (b->flags & BF_READ_NULL)
253 goto out_shutdown_r;
254 goto out_wakeup;
255 }
256 /* splice not possible (anymore), let's go on on standard copy */
257 }
258#endif
Willy Tarreau8a7af602008-05-03 23:07:14 +0200259 cur_read = 0;
Willy Tarreau6996e152007-04-30 14:37:43 +0200260 while (1) {
261 /*
262 * 1. compute the maximum block size we can read at once.
263 */
Willy Tarreau03d60bb2009-01-09 11:13:00 +0100264 if (b->l == 0) {
265 /* let's realign the buffer to optimize I/O */
266 b->r = b->w = b->lr = b->data;
267 max = b->max_len;
Willy Tarreau83749182007-04-15 20:56:27 +0200268 }
269 else if (b->r > b->w) {
Willy Tarreau03d60bb2009-01-09 11:13:00 +0100270 max = b->data + b->max_len - b->r;
Willy Tarreau83749182007-04-15 20:56:27 +0200271 }
272 else {
273 max = b->w - b->r;
Willy Tarreau03d60bb2009-01-09 11:13:00 +0100274 if (max > b->max_len)
275 max = b->max_len;
Willy Tarreau83749182007-04-15 20:56:27 +0200276 }
Willy Tarreau74ab2ac2008-11-23 17:23:07 +0100277
Willy Tarreau9c0fe592009-01-18 16:25:31 +0100278 if (max == 0) {
279 b->flags |= BF_FULL;
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100280 si->flags |= SI_FL_WAIT_ROOM;
Willy Tarreau9c0fe592009-01-18 16:25:31 +0100281 break;
282 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200283
Willy Tarreau6996e152007-04-30 14:37:43 +0200284 /*
285 * 2. read the largest possible block
286 */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200287#ifndef MSG_NOSIGNAL
Willy Tarreau83749182007-04-15 20:56:27 +0200288 {
289 int skerr;
290 socklen_t lskerr = sizeof(skerr);
291
292 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
293 if (ret == -1 || skerr)
294 ret = -1;
295 else
296 ret = recv(fd, b->r, max, 0);
297 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200298#else
Willy Tarreau83749182007-04-15 20:56:27 +0200299 ret = recv(fd, b->r, max, MSG_NOSIGNAL);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200300#endif
Willy Tarreau83749182007-04-15 20:56:27 +0200301 if (ret > 0) {
302 b->r += ret;
303 b->l += ret;
Willy Tarreau8a7af602008-05-03 23:07:14 +0200304 cur_read += ret;
Willy Tarreaub38903c2008-11-23 21:33:29 +0100305
Willy Tarreau0abebcc2009-01-08 00:09:41 +0100306 /* if we're allowed to directly forward data, we must update send_max */
307 if (b->to_forward > 0) {
308 int fwd = MIN(b->to_forward, ret);
309 b->send_max += fwd;
310 b->to_forward -= fwd;
311 }
Willy Tarreauf890dc92008-12-13 21:12:26 +0100312
Willy Tarreaub38903c2008-11-23 21:33:29 +0100313 if (fdtab[fd].state == FD_STCONN)
314 fdtab[fd].state = FD_STREADY;
315
Willy Tarreau3da77c52008-08-29 09:58:42 +0200316 b->flags |= BF_READ_PARTIAL;
Willy Tarreaue393fe22008-08-16 22:18:07 +0200317 b->flags &= ~BF_EMPTY;
Willy Tarreau74ab2ac2008-11-23 17:23:07 +0100318
Willy Tarreau83749182007-04-15 20:56:27 +0200319 if (b->r == b->data + BUFSIZE) {
320 b->r = b->data; /* wrap around the buffer */
321 }
Willy Tarreau9641e8f2007-03-23 23:02:09 +0100322
Willy Tarreau83749182007-04-15 20:56:27 +0200323 b->total += ret;
Willy Tarreau9641e8f2007-03-23 23:02:09 +0100324
Willy Tarreau03d60bb2009-01-09 11:13:00 +0100325 if (b->l >= b->max_len) {
Willy Tarreau6996e152007-04-30 14:37:43 +0200326 /* The buffer is now full, there's no point in going through
327 * the loop again.
328 */
Willy Tarreau8a7af602008-05-03 23:07:14 +0200329 if (!(b->flags & BF_STREAMER_FAST) && (cur_read == b->l)) {
330 b->xfer_small = 0;
331 b->xfer_large++;
332 if (b->xfer_large >= 3) {
333 /* we call this buffer a fast streamer if it manages
334 * to be filled in one call 3 consecutive times.
335 */
336 b->flags |= (BF_STREAMER | BF_STREAMER_FAST);
337 //fputc('+', stderr);
338 }
339 }
340 else if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) &&
341 (cur_read <= BUFSIZE / 2)) {
342 b->xfer_large = 0;
343 b->xfer_small++;
344 if (b->xfer_small >= 2) {
345 /* if the buffer has been at least half full twice,
346 * we receive faster than we send, so at least it
347 * is not a "fast streamer".
348 */
349 b->flags &= ~BF_STREAMER_FAST;
350 //fputc('-', stderr);
351 }
352 }
353 else {
354 b->xfer_small = 0;
355 b->xfer_large = 0;
356 }
Willy Tarreau9c0fe592009-01-18 16:25:31 +0100357
358 b->flags |= BF_FULL;
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100359 si->flags |= SI_FL_WAIT_ROOM;
Willy Tarreau9c0fe592009-01-18 16:25:31 +0100360 break;
Willy Tarreau6996e152007-04-30 14:37:43 +0200361 }
362
Willy Tarreauab3e1d32007-06-03 14:10:36 +0200363 /* if too many bytes were missing from last read, it means that
364 * it's pointless trying to read again because the system does
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100365 * not have them in buffers. BTW, if FD_POLL_HUP was present,
366 * it means that we have reached the end and that the connection
367 * is closed.
Willy Tarreauab3e1d32007-06-03 14:10:36 +0200368 */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100369 if (ret < max) {
Willy Tarreau8a7af602008-05-03 23:07:14 +0200370 if ((b->flags & (BF_STREAMER | BF_STREAMER_FAST)) &&
371 (cur_read <= BUFSIZE / 2)) {
372 b->xfer_large = 0;
373 b->xfer_small++;
374 if (b->xfer_small >= 3) {
375 /* we have read less than half of the buffer in
376 * one pass, and this happened at least 3 times.
377 * This is definitely not a streamer.
378 */
379 b->flags &= ~(BF_STREAMER | BF_STREAMER_FAST);
380 //fputc('!', stderr);
381 }
382 }
Willy Tarreau2bea3a12008-08-28 09:47:43 +0200383 /* unfortunately, on level-triggered events, POLL_HUP
384 * is generally delivered AFTER the system buffer is
385 * empty, so this one might never match.
386 */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100387 if (fdtab[fd].ev & FD_POLL_HUP)
388 goto out_shutdown_r;
Willy Tarreau2bea3a12008-08-28 09:47:43 +0200389
390 /* if a streamer has read few data, it may be because we
391 * have exhausted system buffers. It's not worth trying
392 * again.
393 */
394 if (b->flags & BF_STREAMER)
395 break;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100396 }
Willy Tarreauab3e1d32007-06-03 14:10:36 +0200397
398 /* generally if we read something smaller than 1 or 2 MSS,
Willy Tarreau2bea3a12008-08-28 09:47:43 +0200399 * it means that either we have exhausted the system's
400 * buffers (streamer or question-response protocol) or that
401 * the connection will be closed. Streamers are easily
402 * detected so we return early. For other cases, it's still
403 * better to perform a last read to be sure, because it may
404 * save one complete poll/read/wakeup cycle in case of shutdown.
Willy Tarreau83749182007-04-15 20:56:27 +0200405 */
Willy Tarreau2bea3a12008-08-28 09:47:43 +0200406 if (ret < MIN_RET_FOR_READ_LOOP && b->flags & BF_STREAMER)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200407 break;
Willy Tarreau83749182007-04-15 20:56:27 +0200408
Willy Tarreau6996e152007-04-30 14:37:43 +0200409 if (--read_poll <= 0)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200410 break;
Willy Tarreau83749182007-04-15 20:56:27 +0200411 }
412 else if (ret == 0) {
Willy Tarreau6996e152007-04-30 14:37:43 +0200413 /* connection closed */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100414 goto out_shutdown_r;
Willy Tarreau83749182007-04-15 20:56:27 +0200415 }
Willy Tarreau9f195292007-04-15 21:26:58 +0200416 else if (errno == EAGAIN) {
417 /* Ignore EAGAIN but inform the poller that there is
Willy Tarreauaf78d0f2009-01-08 10:09:08 +0100418 * nothing to read left if we did not read much, ie
419 * less than what we were still expecting to read.
420 * But we may have done some work justifying to notify
421 * the task.
Willy Tarreau9f195292007-04-15 21:26:58 +0200422 */
Willy Tarreauaf78d0f2009-01-08 10:09:08 +0100423 if (cur_read < MIN_RET_FOR_READ_LOOP)
424 retval = 0;
Willy Tarreau83749182007-04-15 20:56:27 +0200425 break;
426 }
427 else {
Willy Tarreau6996e152007-04-30 14:37:43 +0200428 goto out_error;
Willy Tarreau83749182007-04-15 20:56:27 +0200429 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200430 } /* while (1) */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200431
Willy Tarreau6996e152007-04-30 14:37:43 +0200432 out_wakeup:
Willy Tarreau9c0fe592009-01-18 16:25:31 +0100433 /* We might have some data the consumer is waiting for */
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100434 if ((b->send_max || b->splice_len) && (b->cons->flags & SI_FL_WAIT_DATA)) {
435 int last_len = b->splice_len;
436
Willy Tarreau3ffeba12008-12-14 14:42:35 +0100437 b->cons->chk_snd(b->cons);
438
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100439 /* check if the consumer has freed some space */
440 if (!(b->flags & BF_FULL) && (!last_len || b->splice_len < last_len))
441 si->flags &= ~SI_FL_WAIT_ROOM;
442 }
443
444 if (si->flags & SI_FL_WAIT_ROOM) {
Willy Tarreau9c0fe592009-01-18 16:25:31 +0100445 EV_FD_CLR(fd, DIR_RD);
446 b->rex = TICK_ETERNITY;
447 }
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100448 else if ((b->flags & (BF_READ_PARTIAL|BF_FULL|BF_READ_NOEXP)) == BF_READ_PARTIAL)
449 b->rex = tick_add_ifset(now_ms, b->rto);
Willy Tarreau9c0fe592009-01-18 16:25:31 +0100450
Willy Tarreau6b66f3e2008-12-14 17:31:54 +0100451 /* we have to wake up if there is a special event or if we don't have
452 * any more data to forward.
453 */
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100454 if ((b->flags & (BF_READ_NULL|BF_READ_ERROR|BF_SHUTR)) ||
455 !b->to_forward ||
456 si->state != SI_ST_EST ||
457 b->cons->state != SI_ST_EST ||
458 (si->flags & SI_FL_ERR))
Willy Tarreau6b66f3e2008-12-14 17:31:54 +0100459 task_wakeup(si->owner, TASK_WOKEN_IO);
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100460
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100461 fdtab[fd].ev &= ~FD_POLL_IN;
Willy Tarreau83749182007-04-15 20:56:27 +0200462 return retval;
Willy Tarreau6996e152007-04-30 14:37:43 +0200463
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100464 out_shutdown_r:
Willy Tarreaue5ed4062008-08-30 03:17:31 +0200465 /* we received a shutdown */
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100466 fdtab[fd].ev &= ~FD_POLL_HUP;
467 b->flags |= BF_READ_NULL;
Willy Tarreau99126c32008-11-27 10:30:51 +0100468 stream_sock_shutr(si);
Willy Tarreau0c303ee2008-07-07 00:09:58 +0200469 goto out_wakeup;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100470
Willy Tarreau6996e152007-04-30 14:37:43 +0200471 out_error:
Willy Tarreaucff64112008-11-03 06:26:53 +0100472 /* Read error on the file descriptor. We mark the FD as STERROR so
473 * that we don't use it anymore. The error is reported to the stream
474 * interface which will take proper action. We must not perturbate the
475 * buffer because the stream interface wants to ensure transparent
476 * connection retries.
Willy Tarreau6996e152007-04-30 14:37:43 +0200477 */
Willy Tarreaucff64112008-11-03 06:26:53 +0100478
Willy Tarreau6996e152007-04-30 14:37:43 +0200479 fdtab[fd].state = FD_STERROR;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100480 fdtab[fd].ev &= ~FD_POLL_STICKY;
Willy Tarreaucff64112008-11-03 06:26:53 +0100481 si->flags |= SI_FL_ERR;
Willy Tarreau9c0fe592009-01-18 16:25:31 +0100482 retval = 1;
483 goto out_wakeup;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200484}
485
486
487/*
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100488 * This function is called to send buffer data to a stream socket.
489 * It returns -1 in case of unrecoverable error, 0 if the caller needs to poll
490 * before calling it again, otherwise 1.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200491 */
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100492static int stream_sock_write_loop(struct stream_interface *si, struct buffer *b)
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100493{
Willy Tarreau83749182007-04-15 20:56:27 +0200494 int write_poll = MAX_WRITE_POLL_LOOPS;
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100495 int retval = 1;
496 int ret, max;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200497
Willy Tarreau5bd8c372009-01-19 00:32:22 +0100498#if defined(CONFIG_HAP_LINUX_SPLICE)
499 while (b->splice_len) {
500 ret = splice(b->splice.cons, NULL, si->fd, NULL, b->splice_len,
501 SPLICE_F_MOVE|SPLICE_F_NONBLOCK);
502 if (ret <= 0) {
503 if (ret == 0 || errno == EAGAIN) {
504 retval = 0;
505 return retval;
506 }
507 /* here we have another error */
508 retval = -1;
509 return retval;
510 }
511
512 b->flags |= BF_WRITE_PARTIAL;
513 b->splice_len -= ret;
514
515 if (!b->splice_len)
516 break;
517
518 if (--write_poll <= 0)
519 return retval;
520 }
521
522 /* At this point, the pipe is empty, but we may still have data pending
523 * in the normal buffer.
524 */
525 if (!b->l) {
526 b->flags |= BF_EMPTY;
527 return retval;
528 }
529#endif
Willy Tarreaud2def0f2009-01-18 17:37:33 +0100530 if (!b->send_max)
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100531 return retval;
Willy Tarreau83749182007-04-15 20:56:27 +0200532
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100533 /* when we're in this loop, we already know that there is no spliced
534 * data left, and that there are sendable buffered data.
535 */
Willy Tarreau6996e152007-04-30 14:37:43 +0200536 while (1) {
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100537 if (b->r > b->w)
Willy Tarreau83749182007-04-15 20:56:27 +0200538 max = b->r - b->w;
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100539 else
Willy Tarreau83749182007-04-15 20:56:27 +0200540 max = b->data + BUFSIZE - b->w;
Willy Tarreau83749182007-04-15 20:56:27 +0200541
Willy Tarreauf890dc92008-12-13 21:12:26 +0100542 /* limit the amount of outgoing data if required */
543 if (max > b->send_max)
544 max = b->send_max;
545
Willy Tarreaubaaee002006-06-26 02:48:02 +0200546#ifndef MSG_NOSIGNAL
547 {
548 int skerr;
549 socklen_t lskerr = sizeof(skerr);
550
Willy Tarreauc6423482006-10-15 14:59:03 +0200551 ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, &skerr, &lskerr);
552 if (ret == -1 || skerr)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200553 ret = -1;
554 else
555 ret = send(fd, b->w, max, MSG_DONTWAIT);
556 }
557#else
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100558 ret = send(si->fd, b->w, max, MSG_DONTWAIT | MSG_NOSIGNAL);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200559#endif
560
561 if (ret > 0) {
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100562 if (fdtab[si->fd].state == FD_STCONN)
563 fdtab[si->fd].state = FD_STREADY;
Willy Tarreaub38903c2008-11-23 21:33:29 +0100564
Willy Tarreau3da77c52008-08-29 09:58:42 +0200565 b->flags |= BF_WRITE_PARTIAL;
Willy Tarreaue393fe22008-08-16 22:18:07 +0200566
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100567 b->w += ret;
568 if (b->w == b->data + BUFSIZE)
569 b->w = b->data; /* wrap around the buffer */
570
571 b->l -= ret;
572 if (likely(b->l < b->max_len))
Willy Tarreaue393fe22008-08-16 22:18:07 +0200573 b->flags &= ~BF_FULL;
Willy Tarreau74ab2ac2008-11-23 17:23:07 +0100574
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100575 if (likely(!b->l)) {
576 /* optimize data alignment in the buffer */
577 b->r = b->w = b->lr = b->data;
578 if (likely(!b->splice_len))
579 b->flags |= BF_EMPTY;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200580 }
Willy Tarreau83749182007-04-15 20:56:27 +0200581
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100582 b->send_max -= ret;
583 if (!b->send_max || !b->l)
584 break;
Willy Tarreau83749182007-04-15 20:56:27 +0200585
Willy Tarreauab3e1d32007-06-03 14:10:36 +0200586 /* if the system buffer is full, don't insist */
587 if (ret < max)
588 break;
589
Willy Tarreau6996e152007-04-30 14:37:43 +0200590 if (--write_poll <= 0)
591 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200592 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200593 else if (ret == 0 || errno == EAGAIN) {
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100594 /* nothing written, we need to poll for write first */
Willy Tarreau83749182007-04-15 20:56:27 +0200595 retval = 0;
596 break;
597 }
Willy Tarreaubaaee002006-06-26 02:48:02 +0200598 else {
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100599 /* bad, we got an error */
600 retval = -1;
601 break;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200602 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200603 } /* while (1) */
Willy Tarreaubaaee002006-06-26 02:48:02 +0200604
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100605 return retval;
606}
Willy Tarreau6996e152007-04-30 14:37:43 +0200607
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100608
609/*
610 * This function is called on a write event from a stream socket.
611 * It returns 0 if the caller needs to poll before calling it again, otherwise
612 * non-zero.
613 */
614int stream_sock_write(int fd)
615{
616 struct stream_interface *si = fdtab[fd].owner;
617 struct buffer *b = si->ob;
618 int retval = 1;
619
620#ifdef DEBUG_FULL
621 fprintf(stderr,"stream_sock_write : fd=%d, owner=%p\n", fd, fdtab[fd].owner);
622#endif
623
624 retval = 1;
625 if (fdtab[fd].state == FD_STERROR || (fdtab[fd].ev & FD_POLL_ERR))
626 goto out_error;
627
628 if (likely(!(b->flags & BF_EMPTY))) {
629 /* OK there are data waiting to be sent */
630 retval = stream_sock_write_loop(si, b);
631 if (retval < 0)
632 goto out_error;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200633 }
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100634 else {
635 /* may be we have received a connection acknowledgement in TCP mode without data */
636 if (likely(fdtab[fd].state == FD_STCONN)) {
637 /* We have no data to send to check the connection, and
638 * getsockopt() will not inform us whether the connection
639 * is still pending. So we'll reuse connect() to check the
640 * state of the socket. This has the advantage of givig us
641 * the following info :
642 * - error
643 * - connecting (EALREADY, EINPROGRESS)
644 * - connected (EISCONN, 0)
645 */
646 if ((connect(fd, fdtab[fd].peeraddr, fdtab[fd].peerlen) == 0))
647 errno = 0;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200648
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100649 if (errno == EALREADY || errno == EINPROGRESS) {
650 retval = 0;
651 goto out_may_wakeup;
652 }
Willy Tarreau3ffeba12008-12-14 14:42:35 +0100653
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100654 if (errno && errno != EISCONN)
655 goto out_error;
Willy Tarreaufa7e1022008-10-19 07:30:41 +0200656
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100657 /* OK we just need to indicate that we got a connection
658 * and that we wrote nothing.
659 */
660 b->flags |= BF_WRITE_NULL;
661 fdtab[fd].state = FD_STREADY;
662 }
Willy Tarreau6996e152007-04-30 14:37:43 +0200663
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100664 /* Funny, we were called to write something but there wasn't
665 * anything. We can get there, for example if we were woken up
666 * on a write event to finish the splice, but the send_max is 0
667 * so we cannot write anything from the buffer. Let's disable
668 * the write event and pretend we never came there.
669 */
670 }
671
Willy Tarreaud2def0f2009-01-18 17:37:33 +0100672 if (!b->splice_len && !b->send_max) {
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100673 /* the connection is established but we can't write. Either the
674 * buffer is empty, or we just refrain from sending because the
675 * send_max limit was reached. Maybe we just wrote the last
676 * chunk and need to close.
677 */
678 if (((b->flags & (BF_SHUTW|BF_EMPTY|BF_HIJACK|BF_WRITE_ENA|BF_SHUTR)) ==
679 (BF_EMPTY|BF_WRITE_ENA|BF_SHUTR)) &&
680 (si->state == SI_ST_EST)) {
681 stream_sock_shutw(si);
682 goto out_wakeup;
683 }
684
685 if (b->flags & BF_EMPTY)
Willy Tarreauac128fe2009-01-09 13:05:19 +0100686 si->flags |= SI_FL_WAIT_DATA;
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100687
Willy Tarreauac128fe2009-01-09 13:05:19 +0100688 EV_FD_CLR(fd, DIR_WR);
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100689 b->wex = TICK_ETERNITY;
Willy Tarreauac128fe2009-01-09 13:05:19 +0100690 }
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100691
692 out_may_wakeup:
693 if (b->flags & BF_WRITE_ACTIVITY) {
694 /* update timeout if we have written something */
Willy Tarreaud2def0f2009-01-18 17:37:33 +0100695 if ((b->send_max || b->splice_len) &&
696 (b->flags & (BF_SHUTW|BF_WRITE_PARTIAL)) == BF_WRITE_PARTIAL)
Willy Tarreau0c2fc1f2009-01-18 15:30:37 +0100697 b->wex = tick_add_ifset(now_ms, b->wto);
698
699 out_wakeup:
700 if (tick_isset(si->ib->rex)) {
701 /* Note: to prevent the client from expiring read timeouts
702 * during writes, we refresh it. A better solution would be
703 * to merge read+write timeouts into a unique one, although
704 * that needs some study particularly on full-duplex TCP
705 * connections.
706 */
707 si->ib->rex = tick_add_ifset(now_ms, si->ib->rto);
708 }
709
710 /* the producer might be waiting for more room to store data */
711 if (likely((b->flags & (BF_WRITE_PARTIAL|BF_FULL)) == BF_WRITE_PARTIAL &&
712 (b->prod->flags & SI_FL_WAIT_ROOM)))
713 b->prod->chk_rcv(b->prod);
714
715 /* we have to wake up if there is a special event or if we don't have
716 * any more data to forward and it's not planned to send any more.
717 */
718 if (likely((b->flags & (BF_WRITE_NULL|BF_WRITE_ERROR|BF_SHUTW)) ||
719 (!b->to_forward && !b->send_max && !b->splice_len) ||
720 si->state != SI_ST_EST ||
721 b->prod->state != SI_ST_EST))
722 task_wakeup(si->owner, TASK_WOKEN_IO);
723 }
724
725 fdtab[fd].ev &= ~FD_POLL_OUT;
726 return retval;
Willy Tarreauac128fe2009-01-09 13:05:19 +0100727
Willy Tarreau6996e152007-04-30 14:37:43 +0200728 out_error:
Willy Tarreaucff64112008-11-03 06:26:53 +0100729 /* Write error on the file descriptor. We mark the FD as STERROR so
730 * that we don't use it anymore. The error is reported to the stream
731 * interface which will take proper action. We must not perturbate the
732 * buffer because the stream interface wants to ensure transparent
733 * connection retries.
Willy Tarreau6996e152007-04-30 14:37:43 +0200734 */
Willy Tarreaucff64112008-11-03 06:26:53 +0100735
Willy Tarreau6996e152007-04-30 14:37:43 +0200736 fdtab[fd].state = FD_STERROR;
Willy Tarreaud6f087e2008-01-18 17:20:13 +0100737 fdtab[fd].ev &= ~FD_POLL_STICKY;
Willy Tarreaucff64112008-11-03 06:26:53 +0100738 si->flags |= SI_FL_ERR;
Willy Tarreaue5ed4062008-08-30 03:17:31 +0200739 task_wakeup(si->owner, TASK_WOKEN_IO);
740 return 1;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200741}
742
Willy Tarreau48adac52008-08-30 04:58:38 +0200743/*
Willy Tarreau3c6ab2e2008-09-04 11:19:41 +0200744 * This function performs a shutdown-write on a stream interface in a connected or
745 * init state (it does nothing for other states). It either shuts the write side
Willy Tarreau99126c32008-11-27 10:30:51 +0100746 * or closes the file descriptor and marks itself as closed. The buffer flags are
747 * updated to reflect the new state.
Willy Tarreau48adac52008-08-30 04:58:38 +0200748 */
Willy Tarreau0a5d5dd2008-11-23 19:31:35 +0100749void stream_sock_shutw(struct stream_interface *si)
Willy Tarreau48adac52008-08-30 04:58:38 +0200750{
Willy Tarreau99126c32008-11-27 10:30:51 +0100751 if (si->ob->flags & BF_SHUTW)
752 return;
753 si->ob->flags |= BF_SHUTW;
754 si->ob->wex = TICK_ETERNITY;
Willy Tarreaub0ef7352008-12-14 13:26:20 +0100755 si->flags &= ~SI_FL_WAIT_DATA;
Willy Tarreau99126c32008-11-27 10:30:51 +0100756
Willy Tarreaub38903c2008-11-23 21:33:29 +0100757 switch (si->state) {
Willy Tarreaub38903c2008-11-23 21:33:29 +0100758 case SI_ST_EST:
759 if (!(si->ib->flags & BF_SHUTR)) {
760 EV_FD_CLR(si->fd, DIR_WR);
761 shutdown(si->fd, SHUT_WR);
762 return;
763 }
764 /* fall through */
765 case SI_ST_CON:
Willy Tarreaub0ef7352008-12-14 13:26:20 +0100766 si->flags &= ~SI_FL_WAIT_ROOM;
Willy Tarreau8bfa4262008-11-27 09:25:45 +0100767 /* we may have to close a pending connection, and mark the
768 * response buffer as shutr
769 */
Willy Tarreau48adac52008-08-30 04:58:38 +0200770 fd_delete(si->fd);
Willy Tarreaufe3718a2008-11-30 18:14:12 +0100771 /* fall through */
772 case SI_ST_CER:
Willy Tarreau7f006512008-12-07 14:04:04 +0100773 si->state = SI_ST_DIS;
774 default:
Willy Tarreau99126c32008-11-27 10:30:51 +0100775 si->ib->flags |= BF_SHUTR;
Willy Tarreaufe3718a2008-11-30 18:14:12 +0100776 si->ib->rex = TICK_ETERNITY;
Willy Tarreau0a5d5dd2008-11-23 19:31:35 +0100777 return;
Willy Tarreau48adac52008-08-30 04:58:38 +0200778 }
Willy Tarreau48adac52008-08-30 04:58:38 +0200779}
Willy Tarreaubaaee002006-06-26 02:48:02 +0200780
Willy Tarreau2d212792008-08-27 21:41:35 +0200781/*
Willy Tarreau3c6ab2e2008-09-04 11:19:41 +0200782 * This function performs a shutdown-read on a stream interface in a connected or
Willy Tarreau0a5d5dd2008-11-23 19:31:35 +0100783 * init state (it does nothing for other states). It either shuts the read side
Willy Tarreau99126c32008-11-27 10:30:51 +0100784 * or closes the file descriptor and marks itself as closed. The buffer flags are
785 * updated to reflect the new state.
Willy Tarreau3c6ab2e2008-09-04 11:19:41 +0200786 */
Willy Tarreau0a5d5dd2008-11-23 19:31:35 +0100787void stream_sock_shutr(struct stream_interface *si)
Willy Tarreau3c6ab2e2008-09-04 11:19:41 +0200788{
Willy Tarreau99126c32008-11-27 10:30:51 +0100789 if (si->ib->flags & BF_SHUTR)
790 return;
791 si->ib->flags |= BF_SHUTR;
792 si->ib->rex = TICK_ETERNITY;
Willy Tarreaub0ef7352008-12-14 13:26:20 +0100793 si->flags &= ~SI_FL_WAIT_ROOM;
Willy Tarreau99126c32008-11-27 10:30:51 +0100794
Willy Tarreau8bfa4262008-11-27 09:25:45 +0100795 if (si->state != SI_ST_EST && si->state != SI_ST_CON)
Willy Tarreau0a5d5dd2008-11-23 19:31:35 +0100796 return;
Willy Tarreau3c6ab2e2008-09-04 11:19:41 +0200797
Willy Tarreaucff64112008-11-03 06:26:53 +0100798 if (si->ob->flags & BF_SHUTW) {
Willy Tarreau3c6ab2e2008-09-04 11:19:41 +0200799 fd_delete(si->fd);
Willy Tarreau74ab2ac2008-11-23 17:23:07 +0100800 si->state = SI_ST_DIS;
Willy Tarreau0a5d5dd2008-11-23 19:31:35 +0100801 return;
Willy Tarreau3c6ab2e2008-09-04 11:19:41 +0200802 }
803 EV_FD_CLR(si->fd, DIR_RD);
Willy Tarreau0a5d5dd2008-11-23 19:31:35 +0100804 return;
Willy Tarreau3c6ab2e2008-09-04 11:19:41 +0200805}
806
807/*
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200808 * Updates a connected stream_sock file descriptor status and timeouts
809 * according to the buffers' flags. It should only be called once after the
810 * buffer flags have settled down, and before they are cleared. It doesn't
811 * harm to call it as often as desired (it just slightly hurts performance).
812 */
Willy Tarreaub0253252008-11-30 21:37:12 +0100813void stream_sock_data_finish(struct stream_interface *si)
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200814{
Willy Tarreaub0253252008-11-30 21:37:12 +0100815 struct buffer *ib = si->ib;
816 struct buffer *ob = si->ob;
817 int fd = si->fd;
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200818
Willy Tarreaue5ed4062008-08-30 03:17:31 +0200819 DPRINTF(stderr,"[%u] %s: fd=%d owner=%p ib=%p, ob=%p, exp(r,w)=%u,%u ibf=%08x obf=%08x ibl=%d obl=%d si=%d\n",
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200820 now_ms, __FUNCTION__,
821 fd, fdtab[fd].owner,
822 ib, ob,
823 ib->rex, ob->wex,
824 ib->flags, ob->flags,
Willy Tarreaub0253252008-11-30 21:37:12 +0100825 ib->l, ob->l, si->state);
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200826
827 /* Check if we need to close the read side */
828 if (!(ib->flags & BF_SHUTR)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200829 /* Read not closed, update FD status and timeout for reads */
Willy Tarreau3a16b2c2008-08-28 08:54:27 +0200830 if (ib->flags & (BF_FULL|BF_HIJACK)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200831 /* stop reading */
Willy Tarreaub0ef7352008-12-14 13:26:20 +0100832 if ((ib->flags & (BF_FULL|BF_HIJACK)) == BF_FULL)
833 si->flags |= SI_FL_WAIT_ROOM;
Willy Tarreau2d212792008-08-27 21:41:35 +0200834 EV_FD_COND_C(fd, DIR_RD);
835 ib->rex = TICK_ETERNITY;
836 }
837 else {
838 /* (re)start reading and update timeout. Note: we don't recompute the timeout
839 * everytime we get here, otherwise it would risk never to expire. We only
840 * update it if is was not yet set, or if we already got some read status.
841 */
Willy Tarreaub0ef7352008-12-14 13:26:20 +0100842 si->flags &= ~SI_FL_WAIT_ROOM;
Willy Tarreau2d212792008-08-27 21:41:35 +0200843 EV_FD_COND_S(fd, DIR_RD);
Willy Tarreau86491c32008-12-14 09:04:47 +0100844 if (!(ib->flags & BF_READ_NOEXP) &&
845 (!tick_isset(ib->rex) || ib->flags & BF_READ_ACTIVITY))
Willy Tarreau2d212792008-08-27 21:41:35 +0200846 ib->rex = tick_add_ifset(now_ms, ib->rto);
847 }
848 }
849
850 /* Check if we need to close the write side */
851 if (!(ob->flags & BF_SHUTW)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200852 /* Write not closed, update FD status and timeout for writes */
Willy Tarreaudcef33f2009-01-07 19:33:39 +0100853 if ((ob->send_max == 0 && ob->splice_len == 0) ||
Willy Tarreau3ffeba12008-12-14 14:42:35 +0100854 (ob->flags & BF_EMPTY) ||
Willy Tarreau3da77c52008-08-29 09:58:42 +0200855 (ob->flags & (BF_HIJACK|BF_WRITE_ENA)) == 0) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200856 /* stop writing */
Willy Tarreaub0ef7352008-12-14 13:26:20 +0100857 if ((ob->flags & (BF_EMPTY|BF_HIJACK|BF_WRITE_ENA)) == (BF_EMPTY|BF_WRITE_ENA))
858 si->flags |= SI_FL_WAIT_DATA;
Willy Tarreau2d212792008-08-27 21:41:35 +0200859 EV_FD_COND_C(fd, DIR_WR);
860 ob->wex = TICK_ETERNITY;
861 }
862 else {
863 /* (re)start writing and update timeout. Note: we don't recompute the timeout
864 * everytime we get here, otherwise it would risk never to expire. We only
865 * update it if is was not yet set, or if we already got some write status.
866 */
Willy Tarreaub0ef7352008-12-14 13:26:20 +0100867 si->flags &= ~SI_FL_WAIT_DATA;
Willy Tarreau2d212792008-08-27 21:41:35 +0200868 EV_FD_COND_S(fd, DIR_WR);
Willy Tarreau3da77c52008-08-29 09:58:42 +0200869 if (!tick_isset(ob->wex) || ob->flags & BF_WRITE_ACTIVITY) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200870 ob->wex = tick_add_ifset(now_ms, ob->wto);
Willy Tarreau21e1be82008-08-29 11:30:14 +0200871 if (tick_isset(ob->wex) && tick_isset(ib->rex)) {
Willy Tarreau2d212792008-08-27 21:41:35 +0200872 /* Note: depending on the protocol, we don't know if we're waiting
873 * for incoming data or not. So in order to prevent the socket from
874 * expiring read timeouts during writes, we refresh the read timeout,
875 * except if it was already infinite.
876 */
877 ib->rex = ob->wex;
878 }
879 }
880 }
881 }
Willy Tarreau2d212792008-08-27 21:41:35 +0200882}
883
Willy Tarreau3ffeba12008-12-14 14:42:35 +0100884/* This function is used for inter-stream-interface calls. It is called by the
885 * consumer to inform the producer side that it may be interested in checking
886 * for free space in the buffer. Note that it intentionally does not update
887 * timeouts, so that we can still check them later at wake-up.
888 */
889void stream_sock_chk_rcv(struct stream_interface *si)
890{
891 struct buffer *ib = si->ib;
892
893 DPRINTF(stderr,"[%u] %s: fd=%d owner=%p ib=%p, ob=%p, exp(r,w)=%u,%u ibf=%08x obf=%08x ibl=%d obl=%d si=%d\n",
894 now_ms, __FUNCTION__,
895 fd, fdtab[fd].owner,
896 ib, ob,
897 ib->rex, ob->wex,
898 ib->flags, ob->flags,
899 ib->l, ob->l, si->state);
900
901 if (unlikely(si->state != SI_ST_EST || (ib->flags & BF_SHUTR)))
902 return;
903
904 if (ib->flags & (BF_FULL|BF_HIJACK)) {
905 /* stop reading */
906 if ((ib->flags & (BF_FULL|BF_HIJACK)) == BF_FULL)
907 si->flags |= SI_FL_WAIT_ROOM;
908 EV_FD_COND_C(si->fd, DIR_RD);
909 }
910 else {
911 /* (re)start reading */
912 si->flags &= ~SI_FL_WAIT_ROOM;
913 EV_FD_COND_S(si->fd, DIR_RD);
914 }
915}
916
917
918/* This function is used for inter-stream-interface calls. It is called by the
919 * producer to inform the consumer side that it may be interested in checking
920 * for data in the buffer. Note that it intentionally does not update timeouts,
921 * so that we can still check them later at wake-up.
922 */
923void stream_sock_chk_snd(struct stream_interface *si)
924{
925 struct buffer *ob = si->ob;
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100926 int retval;
Willy Tarreau3ffeba12008-12-14 14:42:35 +0100927
928 DPRINTF(stderr,"[%u] %s: fd=%d owner=%p ib=%p, ob=%p, exp(r,w)=%u,%u ibf=%08x obf=%08x ibl=%d obl=%d si=%d\n",
929 now_ms, __FUNCTION__,
930 fd, fdtab[fd].owner,
931 ib, ob,
932 ib->rex, ob->wex,
933 ib->flags, ob->flags,
934 ib->l, ob->l, si->state);
935
936 if (unlikely(si->state != SI_ST_EST || (ob->flags & BF_SHUTW)))
937 return;
938
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100939 if (!(si->flags & SI_FL_WAIT_DATA) || /* not waiting for data */
940 (fdtab[si->fd].ev & FD_POLL_OUT) || /* we'll be called anyway */
941 !(ob->send_max || ob->splice_len) || /* called with nothing to send ! */
942 !(ob->flags & (BF_HIJACK|BF_WRITE_ENA))) /* we may not write */
943 return;
944
945 retval = stream_sock_write_loop(si, ob);
946 if (retval < 0) {
947 /* Write error on the file descriptor. We mark the FD as STERROR so
948 * that we don't use it anymore and we notify the task.
949 */
950 fdtab[si->fd].state = FD_STERROR;
951 fdtab[si->fd].ev &= ~FD_POLL_STICKY;
952 si->flags |= SI_FL_ERR;
953 goto out_wakeup;
954 }
955
956 if (retval > 0 || (ob->send_max == 0 && ob->splice_len == 0)) {
957 /* the connection is established but we can't write. Either the
958 * buffer is empty, or we just refrain from sending because the
959 * send_max limit was reached. Maybe we just wrote the last
960 * chunk and need to close.
961 */
962 if (((ob->flags & (BF_SHUTW|BF_EMPTY|BF_HIJACK|BF_WRITE_ENA|BF_SHUTR)) ==
963 (BF_EMPTY|BF_WRITE_ENA|BF_SHUTR)) &&
964 (si->state == SI_ST_EST)) {
965 stream_sock_shutw(si);
966 goto out_wakeup;
967 }
968
Willy Tarreau3ffeba12008-12-14 14:42:35 +0100969 if ((ob->flags & (BF_EMPTY|BF_HIJACK|BF_WRITE_ENA)) == (BF_EMPTY|BF_WRITE_ENA))
970 si->flags |= SI_FL_WAIT_DATA;
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100971 ob->wex = TICK_ETERNITY;
Willy Tarreau3ffeba12008-12-14 14:42:35 +0100972 }
973 else {
974 /* (re)start writing. */
975 si->flags &= ~SI_FL_WAIT_DATA;
976 EV_FD_COND_S(si->fd, DIR_WR);
977 }
Willy Tarreaua456f2a2009-01-18 17:38:44 +0100978
979 /* in case of special condition (error, shutdown, end of write...), we
980 * have to notify the task.
981 */
982 if (likely((ob->flags & (BF_WRITE_NULL|BF_WRITE_ERROR|BF_SHUTW)) ||
983 (!ob->to_forward && !ob->send_max && !ob->splice_len) ||
984 si->state != SI_ST_EST)) {
985 out_wakeup:
986 task_wakeup(si->owner, TASK_WOKEN_IO);
987 }
Willy Tarreau3ffeba12008-12-14 14:42:35 +0100988}
989
Willy Tarreaubaaee002006-06-26 02:48:02 +0200990
991/*
992 * Local variables:
993 * c-indent-level: 8
994 * c-basic-offset: 8
995 * End:
996 */