blob: 111071a98635076afaa0e057ea37673377b13dfe [file] [log] [blame]
Willy Tarreaucff64112008-11-03 06:26:53 +01001/*
2 * Functions managing stream_interface structures
3 *
Willy Tarreauf873d752012-05-11 17:47:17 +02004 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
Willy Tarreaucff64112008-11-03 06:26:53 +01005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <errno.h>
14#include <fcntl.h>
15#include <stdio.h>
16#include <stdlib.h>
17
18#include <sys/socket.h>
19#include <sys/stat.h>
20#include <sys/types.h>
21
Willy Tarreaubf883e02014-11-25 21:10:35 +010022#include <common/buffer.h>
Willy Tarreaucff64112008-11-03 06:26:53 +010023#include <common/compat.h>
24#include <common/config.h>
25#include <common/debug.h>
26#include <common/standard.h>
27#include <common/ticks.h>
28#include <common/time.h>
29
Willy Tarreau8a8d83b2015-04-13 13:24:54 +020030#include <proto/applet.h>
Willy Tarreauc7e42382012-08-24 19:22:53 +020031#include <proto/channel.h>
Willy Tarreau8b117082012-08-06 15:06:49 +020032#include <proto/connection.h>
Willy Tarreau96199b12012-08-24 00:46:52 +020033#include <proto/pipe.h>
Willy Tarreau87b09662015-04-03 00:22:06 +020034#include <proto/stream.h>
Willy Tarreau269358d2009-09-20 20:14:49 +020035#include <proto/stream_interface.h>
Willy Tarreaucff64112008-11-03 06:26:53 +010036#include <proto/task.h>
37
Willy Tarreaufd31e532012-07-23 18:24:25 +020038#include <types/pipe.h>
39
Willy Tarreauf873d752012-05-11 17:47:17 +020040/* socket functions used when running a stream interface as a task */
Willy Tarreau6fe15412013-09-29 15:16:03 +020041static void stream_int_shutr(struct stream_interface *si);
42static void stream_int_shutw(struct stream_interface *si);
Willy Tarreauf873d752012-05-11 17:47:17 +020043static void stream_int_chk_rcv(struct stream_interface *si);
44static void stream_int_chk_snd(struct stream_interface *si);
Willy Tarreau6fe15412013-09-29 15:16:03 +020045static void stream_int_shutr_conn(struct stream_interface *si);
46static void stream_int_shutw_conn(struct stream_interface *si);
Willy Tarreauc5788912012-08-24 18:12:41 +020047static void stream_int_chk_rcv_conn(struct stream_interface *si);
48static void stream_int_chk_snd_conn(struct stream_interface *si);
Willy Tarreaud45b9f82015-04-13 16:30:14 +020049static void stream_int_shutr_applet(struct stream_interface *si);
50static void stream_int_shutw_applet(struct stream_interface *si);
51static void stream_int_chk_rcv_applet(struct stream_interface *si);
52static void stream_int_chk_snd_applet(struct stream_interface *si);
Willy Tarreau4aa36832012-10-02 20:07:22 +020053static void si_conn_recv_cb(struct connection *conn);
54static void si_conn_send_cb(struct connection *conn);
Willy Tarreau2396c1c2012-10-03 21:12:16 +020055static int si_conn_wake_cb(struct connection *conn);
Willy Tarreau27375622013-12-17 00:00:28 +010056static int si_idle_conn_wake_cb(struct connection *conn);
57static void si_idle_conn_null_cb(struct connection *conn);
Willy Tarreauf873d752012-05-11 17:47:17 +020058
Willy Tarreauc5788912012-08-24 18:12:41 +020059/* stream-interface operations for embedded tasks */
60struct si_ops si_embedded_ops = {
Willy Tarreau5c979a92012-05-07 17:15:39 +020061 .chk_rcv = stream_int_chk_rcv,
62 .chk_snd = stream_int_chk_snd,
Willy Tarreau8b3d7df2013-09-29 14:51:58 +020063 .shutr = stream_int_shutr,
64 .shutw = stream_int_shutw,
Willy Tarreau5c979a92012-05-07 17:15:39 +020065};
66
Willy Tarreauc5788912012-08-24 18:12:41 +020067/* stream-interface operations for connections */
68struct si_ops si_conn_ops = {
69 .update = stream_int_update_conn,
70 .chk_rcv = stream_int_chk_rcv_conn,
71 .chk_snd = stream_int_chk_snd_conn,
Willy Tarreau8b3d7df2013-09-29 14:51:58 +020072 .shutr = stream_int_shutr_conn,
73 .shutw = stream_int_shutw_conn,
Willy Tarreauc5788912012-08-24 18:12:41 +020074};
75
Willy Tarreaud45b9f82015-04-13 16:30:14 +020076/* stream-interface operations for connections */
77struct si_ops si_applet_ops = {
78 .update = stream_int_update_applet,
79 .chk_rcv = stream_int_chk_rcv_applet,
80 .chk_snd = stream_int_chk_snd_applet,
81 .shutr = stream_int_shutr_applet,
82 .shutw = stream_int_shutw_applet,
83};
84
Willy Tarreau74beec32012-10-03 00:41:04 +020085struct data_cb si_conn_cb = {
Willy Tarreauc5788912012-08-24 18:12:41 +020086 .recv = si_conn_recv_cb,
87 .send = si_conn_send_cb,
Willy Tarreau4aa36832012-10-02 20:07:22 +020088 .wake = si_conn_wake_cb,
Willy Tarreauc5788912012-08-24 18:12:41 +020089};
90
Willy Tarreau27375622013-12-17 00:00:28 +010091struct data_cb si_idle_conn_cb = {
92 .recv = si_idle_conn_null_cb,
93 .send = si_idle_conn_null_cb,
94 .wake = si_idle_conn_wake_cb,
95};
96
Willy Tarreaucff64112008-11-03 06:26:53 +010097/*
98 * This function only has to be called once after a wakeup event in case of
99 * suspected timeout. It controls the stream interface timeouts and sets
100 * si->flags accordingly. It does NOT close anything, as this timeout may
101 * be used for any purpose. It returns 1 if the timeout fired, otherwise
102 * zero.
103 */
104int stream_int_check_timeouts(struct stream_interface *si)
105{
106 if (tick_is_expired(si->exp, now_ms)) {
107 si->flags |= SI_FL_EXP;
108 return 1;
109 }
110 return 0;
111}
112
Willy Tarreaufe3718a2008-11-30 18:14:12 +0100113/* to be called only when in SI_ST_DIS with SI_FL_ERR */
Willy Tarreaucff64112008-11-03 06:26:53 +0100114void stream_int_report_error(struct stream_interface *si)
115{
116 if (!si->err_type)
117 si->err_type = SI_ET_DATA_ERR;
118
Willy Tarreau2bb4a962014-11-28 11:11:05 +0100119 si_oc(si)->flags |= CF_WRITE_ERROR;
120 si_ic(si)->flags |= CF_READ_ERROR;
Willy Tarreaucff64112008-11-03 06:26:53 +0100121}
122
123/*
Willy Tarreaudded32d2008-11-30 19:48:07 +0100124 * Returns a message to the client ; the connection is shut down for read,
125 * and the request is cleared so that no server connection can be initiated.
126 * The buffer is marked for read shutdown on the other side to protect the
127 * message, and the buffer write is enabled. The message is contained in a
Willy Tarreau148d0992010-01-10 10:21:21 +0100128 * "chunk". If it is null, then an empty message is used. The reply buffer does
129 * not need to be empty before this, and its contents will not be overwritten.
130 * The primary goal of this function is to return error messages to a client.
Willy Tarreaudded32d2008-11-30 19:48:07 +0100131 */
132void stream_int_retnclose(struct stream_interface *si, const struct chunk *msg)
133{
Willy Tarreauafc8a222014-11-28 15:46:27 +0100134 struct channel *ic = si_ic(si);
135 struct channel *oc = si_oc(si);
136
137 channel_auto_read(ic);
138 channel_abort(ic);
139 channel_auto_close(ic);
140 channel_erase(ic);
141 channel_truncate(oc);
Willy Tarreau798e1282010-12-12 13:06:00 +0100142
Willy Tarreau148d0992010-01-10 10:21:21 +0100143 if (likely(msg && msg->len))
Willy Tarreauafc8a222014-11-28 15:46:27 +0100144 bo_inject(oc, msg->str, msg->len);
Willy Tarreaudded32d2008-11-30 19:48:07 +0100145
Willy Tarreauafc8a222014-11-28 15:46:27 +0100146 oc->wex = tick_add_ifset(now_ms, oc->wto);
147 channel_auto_read(oc);
148 channel_auto_close(oc);
149 channel_shutr_now(oc);
Willy Tarreau5d881d02009-12-27 22:51:06 +0100150}
151
Willy Tarreau4a36b562012-08-06 19:31:45 +0200152/*
Willy Tarreaud45b9f82015-04-13 16:30:14 +0200153 * This function performs a shutdown-read on a detached stream interface in a
154 * connected or init state (it does nothing for other states). It either shuts
155 * the read side or marks itself as closed. The buffer flags are updated to
156 * reflect the new state. If the stream interface has SI_FL_NOHALF, we also
157 * forward the close to the write side. The owner task is woken up if it exists.
Willy Tarreau4a36b562012-08-06 19:31:45 +0200158 */
Willy Tarreau6fe15412013-09-29 15:16:03 +0200159static void stream_int_shutr(struct stream_interface *si)
Willy Tarreaufb90d942009-09-05 20:57:35 +0200160{
Willy Tarreauafc8a222014-11-28 15:46:27 +0100161 struct channel *ic = si_ic(si);
162
163 ic->flags &= ~CF_SHUTR_NOW;
164 if (ic->flags & CF_SHUTR)
Willy Tarreau6fe15412013-09-29 15:16:03 +0200165 return;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100166 ic->flags |= CF_SHUTR;
167 ic->rex = TICK_ETERNITY;
Willy Tarreaufb90d942009-09-05 20:57:35 +0200168 si->flags &= ~SI_FL_WAIT_ROOM;
169
170 if (si->state != SI_ST_EST && si->state != SI_ST_CON)
Willy Tarreau6fe15412013-09-29 15:16:03 +0200171 return;
Willy Tarreaufb90d942009-09-05 20:57:35 +0200172
Willy Tarreau2bb4a962014-11-28 11:11:05 +0100173 if (si_oc(si)->flags & CF_SHUTW) {
Willy Tarreaufb90d942009-09-05 20:57:35 +0200174 si->state = SI_ST_DIS;
175 si->exp = TICK_ETERNITY;
Willy Tarreaud8ccffe2010-09-07 16:16:50 +0200176 }
Willy Tarreau4a36b562012-08-06 19:31:45 +0200177 else if (si->flags & SI_FL_NOHALF) {
178 /* we want to immediately forward this close to the write side */
179 return stream_int_shutw(si);
180 }
Willy Tarreau0bd05ea2010-07-02 11:18:03 +0200181
Willy Tarreau4a36b562012-08-06 19:31:45 +0200182 /* note that if the task exists, it must unregister itself once it runs */
Willy Tarreau07373b82014-11-28 12:08:47 +0100183 if (!(si->flags & SI_FL_DONT_WAKE))
184 task_wakeup(si_task(si), TASK_WOKEN_IO);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200185}
186
Willy Tarreau4a36b562012-08-06 19:31:45 +0200187/*
Willy Tarreaud45b9f82015-04-13 16:30:14 +0200188 * This function performs a shutdown-write on a detached stream interface in a
189 * connected or init state (it does nothing for other states). It either shuts
190 * the write side or marks itself as closed. The buffer flags are updated to
191 * reflect the new state. It does also close everything if the SI was marked as
192 * being in error state. The owner task is woken up if it exists.
Willy Tarreau4a36b562012-08-06 19:31:45 +0200193 */
Willy Tarreau6fe15412013-09-29 15:16:03 +0200194static void stream_int_shutw(struct stream_interface *si)
Willy Tarreaufb90d942009-09-05 20:57:35 +0200195{
Willy Tarreauafc8a222014-11-28 15:46:27 +0100196 struct channel *ic = si_ic(si);
197 struct channel *oc = si_oc(si);
198
199 oc->flags &= ~CF_SHUTW_NOW;
200 if (oc->flags & CF_SHUTW)
Willy Tarreau6fe15412013-09-29 15:16:03 +0200201 return;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100202 oc->flags |= CF_SHUTW;
203 oc->wex = TICK_ETERNITY;
Willy Tarreaufb90d942009-09-05 20:57:35 +0200204 si->flags &= ~SI_FL_WAIT_DATA;
205
206 switch (si->state) {
207 case SI_ST_EST:
Willy Tarreau4a36b562012-08-06 19:31:45 +0200208 /* we have to shut before closing, otherwise some short messages
209 * may never leave the system, especially when there are remaining
210 * unread data in the socket input buffer, or when nolinger is set.
211 * However, if SI_FL_NOLINGER is explicitly set, we know there is
212 * no risk so we close both sides immediately.
213 */
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200214 if (!(si->flags & (SI_FL_ERR | SI_FL_NOLINGER)) &&
Willy Tarreauafc8a222014-11-28 15:46:27 +0100215 !(ic->flags & (CF_SHUTR|CF_DONT_READ)))
Willy Tarreau6fe15412013-09-29 15:16:03 +0200216 return;
Willy Tarreaufb90d942009-09-05 20:57:35 +0200217
218 /* fall through */
219 case SI_ST_CON:
220 case SI_ST_CER:
Willy Tarreau32d3ee92010-12-29 14:03:02 +0100221 case SI_ST_QUE:
222 case SI_ST_TAR:
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200223 /* Note that none of these states may happen with applets */
Willy Tarreaufb90d942009-09-05 20:57:35 +0200224 si->state = SI_ST_DIS;
Willy Tarreaufb90d942009-09-05 20:57:35 +0200225 default:
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200226 si->flags &= ~(SI_FL_WAIT_ROOM | SI_FL_NOLINGER);
Willy Tarreauafc8a222014-11-28 15:46:27 +0100227 ic->flags &= ~CF_SHUTR_NOW;
228 ic->flags |= CF_SHUTR;
229 ic->rex = TICK_ETERNITY;
Willy Tarreaufb90d942009-09-05 20:57:35 +0200230 si->exp = TICK_ETERNITY;
231 }
232
Willy Tarreau4a36b562012-08-06 19:31:45 +0200233 /* note that if the task exists, it must unregister itself once it runs */
Willy Tarreau07373b82014-11-28 12:08:47 +0100234 if (!(si->flags & SI_FL_DONT_WAKE))
235 task_wakeup(si_task(si), TASK_WOKEN_IO);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200236}
237
238/* default chk_rcv function for scheduled tasks */
Willy Tarreauf873d752012-05-11 17:47:17 +0200239static void stream_int_chk_rcv(struct stream_interface *si)
Willy Tarreaufb90d942009-09-05 20:57:35 +0200240{
Willy Tarreauafc8a222014-11-28 15:46:27 +0100241 struct channel *ic = si_ic(si);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200242
Willy Tarreauafc8a222014-11-28 15:46:27 +0100243 DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n",
Willy Tarreaufb90d942009-09-05 20:57:35 +0200244 __FUNCTION__,
Willy Tarreauafc8a222014-11-28 15:46:27 +0100245 si, si->state, ic->flags, si_oc(si)->flags);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200246
Willy Tarreauafc8a222014-11-28 15:46:27 +0100247 if (unlikely(si->state != SI_ST_EST || (ic->flags & (CF_SHUTR|CF_DONT_READ))))
Willy Tarreaufb90d942009-09-05 20:57:35 +0200248 return;
249
Willy Tarreauafc8a222014-11-28 15:46:27 +0100250 if (!channel_may_recv(ic) || ic->pipe) {
Willy Tarreaufb90d942009-09-05 20:57:35 +0200251 /* stop reading */
Willy Tarreau3bf1b2b2012-08-27 20:46:07 +0200252 si->flags |= SI_FL_WAIT_ROOM;
Willy Tarreaufb90d942009-09-05 20:57:35 +0200253 }
254 else {
255 /* (re)start reading */
256 si->flags &= ~SI_FL_WAIT_ROOM;
Willy Tarreau07373b82014-11-28 12:08:47 +0100257 if (!(si->flags & SI_FL_DONT_WAKE))
258 task_wakeup(si_task(si), TASK_WOKEN_IO);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200259 }
260}
261
262/* default chk_snd function for scheduled tasks */
Willy Tarreauf873d752012-05-11 17:47:17 +0200263static void stream_int_chk_snd(struct stream_interface *si)
Willy Tarreaufb90d942009-09-05 20:57:35 +0200264{
Willy Tarreauafc8a222014-11-28 15:46:27 +0100265 struct channel *oc = si_oc(si);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200266
Willy Tarreauafc8a222014-11-28 15:46:27 +0100267 DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n",
Willy Tarreaufb90d942009-09-05 20:57:35 +0200268 __FUNCTION__,
Willy Tarreauafc8a222014-11-28 15:46:27 +0100269 si, si->state, si_ic(si)->flags, oc->flags);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200270
Willy Tarreauafc8a222014-11-28 15:46:27 +0100271 if (unlikely(si->state != SI_ST_EST || (oc->flags & CF_SHUTW)))
Willy Tarreaufb90d942009-09-05 20:57:35 +0200272 return;
273
274 if (!(si->flags & SI_FL_WAIT_DATA) || /* not waiting for data */
Willy Tarreauafc8a222014-11-28 15:46:27 +0100275 channel_is_empty(oc)) /* called with nothing to send ! */
Willy Tarreaufb90d942009-09-05 20:57:35 +0200276 return;
277
278 /* Otherwise there are remaining data to be sent in the buffer,
279 * so we tell the handler.
280 */
281 si->flags &= ~SI_FL_WAIT_DATA;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100282 if (!tick_isset(oc->wex))
283 oc->wex = tick_add_ifset(now_ms, oc->wto);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200284
Willy Tarreau07373b82014-11-28 12:08:47 +0100285 if (!(si->flags & SI_FL_DONT_WAKE))
286 task_wakeup(si_task(si), TASK_WOKEN_IO);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200287}
288
Willy Tarreaua9ff5e62015-07-19 18:46:30 +0200289/* Register an applet to handle a stream_interface as a new appctx. The SI will
290 * wake it up everytime it is solicited. The appctx must be deleted by the task
291 * handler using si_release_endpoint(), possibly from within the function itself.
292 * It also pre-initializes the applet's context and returns it (or NULL in case
293 * it could not be allocated).
Willy Tarreaufb90d942009-09-05 20:57:35 +0200294 */
Willy Tarreau30576452015-04-13 13:50:30 +0200295struct appctx *stream_int_register_handler(struct stream_interface *si, struct applet *app)
Willy Tarreaufb90d942009-09-05 20:57:35 +0200296{
Willy Tarreau0a23bcb2013-12-01 11:31:38 +0100297 struct appctx *appctx;
298
Willy Tarreau07373b82014-11-28 12:08:47 +0100299 DPRINTF(stderr, "registering handler %p for si %p (was %p)\n", app, si, si_task(si));
Willy Tarreaufb90d942009-09-05 20:57:35 +0200300
Willy Tarreaua7513f52015-04-05 00:15:26 +0200301 appctx = si_alloc_appctx(si, app);
Willy Tarreaua69fc9f2014-12-22 19:34:00 +0100302 if (!appctx)
Willy Tarreau0a23bcb2013-12-01 11:31:38 +0100303 return NULL;
304
Willy Tarreaufe127932015-04-21 19:23:39 +0200305 si_applet_cant_get(si);
Willy Tarreau828824a2015-04-19 17:20:03 +0200306 appctx_wakeup(appctx);
Willy Tarreau1fbe1c92013-12-01 09:35:41 +0100307 return si_appctx(si);
Willy Tarreaufb90d942009-09-05 20:57:35 +0200308}
309
Willy Tarreau2c6be842012-07-06 17:12:34 +0200310/* This callback is used to send a valid PROXY protocol line to a socket being
Willy Tarreauafad0e02012-08-09 14:45:22 +0200311 * established. It returns 0 if it fails in a fatal way or needs to poll to go
312 * further, otherwise it returns non-zero and removes itself from the connection's
Willy Tarreaua1a74742012-08-24 12:14:49 +0200313 * flags (the bit is provided in <flag> by the caller). It is designed to be
314 * called by the connection handler and relies on it to commit polling changes.
Willy Tarreau57cd3e42013-10-24 22:01:26 +0200315 * Note that it can emit a PROXY line by relying on the other end's address
316 * when the connection is attached to a stream interface, or by resolving the
317 * local address otherwise (also called a LOCAL line).
Willy Tarreau2c6be842012-07-06 17:12:34 +0200318 */
319int conn_si_send_proxy(struct connection *conn, unsigned int flag)
320{
Willy Tarreau2c6be842012-07-06 17:12:34 +0200321 /* we might have been called just after an asynchronous shutw */
Willy Tarreaua1a74742012-08-24 12:14:49 +0200322 if (conn->flags & CO_FL_SOCK_WR_SH)
Willy Tarreau2c6be842012-07-06 17:12:34 +0200323 goto out_error;
324
Willy Tarreaud02cdd22013-12-15 10:23:20 +0100325 if (!conn_ctrl_ready(conn))
Willy Tarreauf79c8172013-10-21 16:30:56 +0200326 goto out_error;
327
Willy Tarreau2c6be842012-07-06 17:12:34 +0200328 /* If we have a PROXY line to send, we'll use this to validate the
329 * connection, in which case the connection is validated only once
330 * we've sent the whole proxy line. Otherwise we use connect().
331 */
Willy Tarreaub8020ce2013-10-24 21:10:08 +0200332 while (conn->send_proxy_ofs) {
Willy Tarreau2c6be842012-07-06 17:12:34 +0200333 int ret;
334
335 /* The target server expects a PROXY line to be sent first.
336 * If the send_proxy_ofs is negative, it corresponds to the
337 * offset to start sending from then end of the proxy string
338 * (which is recomputed every time since it's constant). If
339 * it is positive, it means we have to send from the start.
Willy Tarreau57cd3e42013-10-24 22:01:26 +0200340 * We can only send a "normal" PROXY line when the connection
341 * is attached to a stream interface. Otherwise we can only
342 * send a LOCAL line (eg: for use with health checks).
Willy Tarreau2c6be842012-07-06 17:12:34 +0200343 */
Willy Tarreau57cd3e42013-10-24 22:01:26 +0200344 if (conn->data == &si_conn_cb) {
345 struct stream_interface *si = conn->owner;
Willy Tarreau50fe03b2014-11-28 13:59:31 +0100346 struct connection *remote = objt_conn(si_opposite(si)->end);
David Safb76832014-05-08 23:42:08 -0400347 ret = make_proxy_line(trash.str, trash.size, objt_server(conn->target), remote);
Willy Tarreau57cd3e42013-10-24 22:01:26 +0200348 }
349 else {
350 /* The target server expects a LOCAL line to be sent first. Retrieving
351 * local or remote addresses may fail until the connection is established.
352 */
353 conn_get_from_addr(conn);
354 if (!(conn->flags & CO_FL_ADDR_FROM_SET))
355 goto out_wait;
356
357 conn_get_to_addr(conn);
358 if (!(conn->flags & CO_FL_ADDR_TO_SET))
359 goto out_wait;
360
David Safb76832014-05-08 23:42:08 -0400361 ret = make_proxy_line(trash.str, trash.size, objt_server(conn->target), conn);
Willy Tarreau57cd3e42013-10-24 22:01:26 +0200362 }
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200363
Willy Tarreau2c6be842012-07-06 17:12:34 +0200364 if (!ret)
365 goto out_error;
366
Willy Tarreaub8020ce2013-10-24 21:10:08 +0200367 if (conn->send_proxy_ofs > 0)
368 conn->send_proxy_ofs = -ret; /* first call */
Willy Tarreau2c6be842012-07-06 17:12:34 +0200369
Willy Tarreaua1a74742012-08-24 12:14:49 +0200370 /* we have to send trash from (ret+sp for -sp bytes). If the
371 * data layer has a pending write, we'll also set MSG_MORE.
372 */
Willy Tarreau0a03c0f2015-03-13 00:05:28 +0100373 ret = conn_sock_send(conn, trash.str + ret + conn->send_proxy_ofs, -conn->send_proxy_ofs,
374 (conn->flags & CO_FL_DATA_WR_ENA) ? MSG_MORE : 0);
Willy Tarreau2c6be842012-07-06 17:12:34 +0200375
Willy Tarreau0a03c0f2015-03-13 00:05:28 +0100376 if (ret < 0)
Willy Tarreau2c6be842012-07-06 17:12:34 +0200377 goto out_error;
Willy Tarreau2c6be842012-07-06 17:12:34 +0200378
Willy Tarreaub8020ce2013-10-24 21:10:08 +0200379 conn->send_proxy_ofs += ret; /* becomes zero once complete */
380 if (conn->send_proxy_ofs != 0)
Willy Tarreau2c6be842012-07-06 17:12:34 +0200381 goto out_wait;
382
383 /* OK we've sent the whole line, we're connected */
Willy Tarreau7fe45692013-12-04 23:37:56 +0100384 break;
Willy Tarreau2c6be842012-07-06 17:12:34 +0200385 }
386
Willy Tarreaua1a74742012-08-24 12:14:49 +0200387 /* The connection is ready now, simply return and let the connection
388 * handler notify upper layers if needed.
Willy Tarreau2c6be842012-07-06 17:12:34 +0200389 */
390 if (conn->flags & CO_FL_WAIT_L4_CONN)
391 conn->flags &= ~CO_FL_WAIT_L4_CONN;
Willy Tarreau2c6be842012-07-06 17:12:34 +0200392 conn->flags &= ~flag;
Willy Tarreauafad0e02012-08-09 14:45:22 +0200393 return 1;
Willy Tarreau2c6be842012-07-06 17:12:34 +0200394
395 out_error:
Willy Tarreauafad0e02012-08-09 14:45:22 +0200396 /* Write error on the file descriptor */
Willy Tarreau2c6be842012-07-06 17:12:34 +0200397 conn->flags |= CO_FL_ERROR;
Willy Tarreauafad0e02012-08-09 14:45:22 +0200398 return 0;
Willy Tarreau2c6be842012-07-06 17:12:34 +0200399
400 out_wait:
Willy Tarreaua1a74742012-08-24 12:14:49 +0200401 __conn_sock_stop_recv(conn);
Willy Tarreauafad0e02012-08-09 14:45:22 +0200402 return 0;
Willy Tarreau2c6be842012-07-06 17:12:34 +0200403}
404
Willy Tarreau27375622013-12-17 00:00:28 +0100405
406/* Tiny I/O callback called on recv/send I/O events on idle connections.
407 * It simply sets the CO_FL_SOCK_RD_SH flag so that si_idle_conn_wake_cb()
408 * is notified and can kill the connection.
409 */
410static void si_idle_conn_null_cb(struct connection *conn)
411{
Willy Tarreaud85c4852015-03-13 00:40:28 +0100412 conn_sock_drain(conn);
Willy Tarreau27375622013-12-17 00:00:28 +0100413}
414
415/* Callback to be used by connection I/O handlers when some activity is detected
416 * on an idle server connection. Its main purpose is to kill the connection once
417 * a close was detected on it. It returns 0 if it did nothing serious, or -1 if
418 * it killed the connection.
419 */
420static int si_idle_conn_wake_cb(struct connection *conn)
421{
422 struct stream_interface *si = conn->owner;
423
424 if (!conn_ctrl_ready(conn))
425 return 0;
426
427 if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH)) {
428 /* warning, we can't do anything on <conn> after this call ! */
Willy Tarreauc4b56e42015-09-23 17:56:02 +0200429 si_release_endpoint(si);
Willy Tarreau27375622013-12-17 00:00:28 +0100430 return -1;
431 }
432 return 0;
433}
434
Willy Tarreau615f28b2015-09-23 18:40:09 +0200435/* This function is the equivalent to stream_int_update() except that it's
436 * designed to be called from outside the stream handlers, typically the lower
437 * layers (applets, connections) after I/O completion. After updating the stream
438 * interface and timeouts, it will try to forward what can be forwarded, then to
439 * wake the associated task up if an important event requires special handling.
440 * It should not be called from within the stream itself, stream_int_update()
441 * is designed for this.
442 */
443void stream_int_notify(struct stream_interface *si)
444{
445 struct channel *ic = si_ic(si);
446 struct channel *oc = si_oc(si);
447
448 /* process consumer side */
449 if (channel_is_empty(oc)) {
450 if (((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW) &&
451 (si->state == SI_ST_EST))
452 si_shutw(si);
453 oc->wex = TICK_ETERNITY;
454 }
455
456 /* indicate that we may be waiting for data from the output channel */
457 if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == 0 && channel_may_recv(oc))
458 si->flags |= SI_FL_WAIT_DATA;
459
460 /* update OC timeouts and wake the other side up if it's waiting for room */
461 if (oc->flags & CF_WRITE_ACTIVITY) {
462 if ((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL)) == CF_WRITE_PARTIAL &&
463 !channel_is_empty(oc))
464 if (tick_isset(oc->wex))
465 oc->wex = tick_add_ifset(now_ms, oc->wto);
466
467 if (!(si->flags & SI_FL_INDEP_STR))
468 if (tick_isset(ic->rex))
469 ic->rex = tick_add_ifset(now_ms, ic->rto);
470
471 if (likely((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL|CF_DONT_READ)) == CF_WRITE_PARTIAL &&
472 channel_may_recv(oc) &&
473 (si_opposite(si)->flags & SI_FL_WAIT_ROOM)))
474 si_chk_rcv(si_opposite(si));
475 }
476
477 /* Notify the other side when we've injected data into the IC that
478 * needs to be forwarded. We can do fast-forwarding as soon as there
479 * are output data, but we avoid doing this if some of the data are
480 * not yet scheduled for being forwarded, because it is very likely
481 * that it will be done again immediately afterwards once the following
482 * data are parsed (eg: HTTP chunking). We only SI_FL_WAIT_ROOM once
483 * we've emptied *some* of the output buffer, and not just when there
484 * is available room, because applets are often forced to stop before
485 * the buffer is full. We must not stop based on input data alone because
486 * an HTTP parser might need more data to complete the parsing.
487 */
488 if (!channel_is_empty(ic) &&
489 (si_opposite(si)->flags & SI_FL_WAIT_DATA) &&
490 (ic->buf->i == 0 || ic->pipe)) {
491 int new_len, last_len;
492
493 last_len = ic->buf->o;
494 if (ic->pipe)
495 last_len += ic->pipe->data;
496
497 si_chk_snd(si_opposite(si));
498
499 new_len = ic->buf->o;
500 if (ic->pipe)
501 new_len += ic->pipe->data;
502
503 /* check if the consumer has freed some space either in the
504 * buffer or in the pipe.
505 */
506 if (channel_may_recv(ic) && new_len < last_len)
507 si->flags &= ~SI_FL_WAIT_ROOM;
508 }
509
510 if (si->flags & SI_FL_WAIT_ROOM) {
511 ic->rex = TICK_ETERNITY;
512 }
513 else if ((ic->flags & (CF_SHUTR|CF_READ_PARTIAL|CF_DONT_READ)) == CF_READ_PARTIAL &&
514 channel_may_recv(ic)) {
515 /* we must re-enable reading if si_chk_snd() has freed some space */
516 if (!(ic->flags & CF_READ_NOEXP) && tick_isset(ic->rex))
517 ic->rex = tick_add_ifset(now_ms, ic->rto);
518 }
519
520 /* wake the task up only when needed */
521 if (/* changes on the production side */
522 (ic->flags & (CF_READ_NULL|CF_READ_ERROR)) ||
523 si->state != SI_ST_EST ||
524 (si->flags & SI_FL_ERR) ||
525 ((ic->flags & CF_READ_PARTIAL) &&
526 (!ic->to_forward || si_opposite(si)->state != SI_ST_EST)) ||
527
528 /* changes on the consumption side */
529 (oc->flags & (CF_WRITE_NULL|CF_WRITE_ERROR)) ||
530 ((oc->flags & CF_WRITE_ACTIVITY) &&
531 ((oc->flags & CF_SHUTW) ||
532 ((oc->flags & CF_WAKE_WRITE) &&
533 (si_opposite(si)->state != SI_ST_EST ||
534 (channel_is_empty(oc) && !oc->to_forward)))))) {
535 task_wakeup(si_task(si), TASK_WOKEN_IO);
536 }
537 if (ic->flags & CF_READ_ACTIVITY)
538 ic->flags &= ~CF_READ_DONTWAIT;
539
540 stream_release_buffers(si_strm(si));
541}
542
543
Willy Tarreau651e1822015-09-23 20:06:13 +0200544/* Callback to be used by connection I/O handlers upon completion. It propagates
545 * connection flags to the stream interface, updates the stream (which may or
546 * may not take this opportunity to try to forward data), then update the
547 * connection's polling based on the channels and stream interface's final
548 * states. The function always returns 0.
Willy Tarreau100c4672012-08-20 12:06:26 +0200549 */
Willy Tarreau2396c1c2012-10-03 21:12:16 +0200550static int si_conn_wake_cb(struct connection *conn)
Willy Tarreaufd31e532012-07-23 18:24:25 +0200551{
Willy Tarreaue603e692012-09-27 22:20:41 +0200552 struct stream_interface *si = conn->owner;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100553 struct channel *ic = si_ic(si);
554 struct channel *oc = si_oc(si);
Willy Tarreaufd31e532012-07-23 18:24:25 +0200555
Willy Tarreau651e1822015-09-23 20:06:13 +0200556 /* First step, report to the stream-int what was detected at the
557 * connection layer : errors and connection establishment.
558 */
Willy Tarreau3c55ec22012-07-23 19:19:51 +0200559 if (conn->flags & CO_FL_ERROR)
560 si->flags |= SI_FL_ERR;
561
Willy Tarreauc76ae332012-07-12 15:32:13 +0200562 if (unlikely(!(conn->flags & (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN | CO_FL_CONNECTED)))) {
Willy Tarreau8f8c92f2012-07-23 19:45:44 +0200563 si->exp = TICK_ETERNITY;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100564 oc->flags |= CF_WRITE_NULL;
Willy Tarreau8f8c92f2012-07-23 19:45:44 +0200565 }
566
Willy Tarreau651e1822015-09-23 20:06:13 +0200567 /* Second step : update the stream-int and channels, try to forward any
568 * pending data, then possibly wake the stream up based on the new
569 * stream-int status.
Willy Tarreau44b5dc62012-08-24 12:12:53 +0200570 */
Willy Tarreau651e1822015-09-23 20:06:13 +0200571 stream_int_notify(si);
Willy Tarreauea3cc482015-09-23 19:37:00 +0200572
Willy Tarreau651e1822015-09-23 20:06:13 +0200573 /* Third step : update the connection's polling status based on what
574 * was done above (eg: maybe some buffers got emptied).
575 */
576 if (channel_is_empty(oc))
577 __conn_data_stop_send(conn);
Willy Tarreauea3cc482015-09-23 19:37:00 +0200578
Willy Tarreaufd31e532012-07-23 18:24:25 +0200579
Willy Tarreau44b5dc62012-08-24 12:12:53 +0200580 if (si->flags & SI_FL_WAIT_ROOM) {
Willy Tarreauf16723e2012-08-24 12:52:22 +0200581 __conn_data_stop_recv(conn);
Willy Tarreau44b5dc62012-08-24 12:12:53 +0200582 }
Willy Tarreauafc8a222014-11-28 15:46:27 +0100583 else if ((ic->flags & (CF_SHUTR|CF_READ_PARTIAL|CF_DONT_READ)) == CF_READ_PARTIAL &&
584 channel_may_recv(ic)) {
Willy Tarreau9f7c6a12012-11-19 16:43:14 +0100585 __conn_data_want_recv(conn);
Willy Tarreaufd31e532012-07-23 18:24:25 +0200586 }
Willy Tarreau2396c1c2012-10-03 21:12:16 +0200587 return 0;
Willy Tarreaufd31e532012-07-23 18:24:25 +0200588}
Willy Tarreau2c6be842012-07-06 17:12:34 +0200589
Willy Tarreau5368d802012-08-21 18:22:06 +0200590/*
591 * This function is called to send buffer data to a stream socket.
Godbache68e02d2013-10-11 15:48:29 +0800592 * It calls the transport layer's snd_buf function. It relies on the
Godbach4f489902013-12-04 17:24:06 +0800593 * caller to commit polling changes. The caller should check conn->flags
594 * for errors.
Willy Tarreau5368d802012-08-21 18:22:06 +0200595 */
Godbach4f489902013-12-04 17:24:06 +0800596static void si_conn_send(struct connection *conn)
Willy Tarreau5368d802012-08-21 18:22:06 +0200597{
Willy Tarreaue603e692012-09-27 22:20:41 +0200598 struct stream_interface *si = conn->owner;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100599 struct channel *oc = si_oc(si);
Willy Tarreau5368d802012-08-21 18:22:06 +0200600 int ret;
601
Willy Tarreauafc8a222014-11-28 15:46:27 +0100602 if (oc->pipe && conn->xprt->snd_pipe) {
603 ret = conn->xprt->snd_pipe(conn, oc->pipe);
Willy Tarreau96199b12012-08-24 00:46:52 +0200604 if (ret > 0)
Willy Tarreauafc8a222014-11-28 15:46:27 +0100605 oc->flags |= CF_WRITE_PARTIAL | CF_WROTE_DATA;
Willy Tarreau5368d802012-08-21 18:22:06 +0200606
Willy Tarreauafc8a222014-11-28 15:46:27 +0100607 if (!oc->pipe->data) {
608 put_pipe(oc->pipe);
609 oc->pipe = NULL;
Willy Tarreau5368d802012-08-21 18:22:06 +0200610 }
611
Willy Tarreau96199b12012-08-24 00:46:52 +0200612 if (conn->flags & CO_FL_ERROR)
Godbach4f489902013-12-04 17:24:06 +0800613 return;
Willy Tarreau5368d802012-08-21 18:22:06 +0200614 }
615
616 /* At this point, the pipe is empty, but we may still have data pending
617 * in the normal buffer.
618 */
Willy Tarreauafc8a222014-11-28 15:46:27 +0100619 if (!oc->buf->o)
Godbach4f489902013-12-04 17:24:06 +0800620 return;
Willy Tarreau5368d802012-08-21 18:22:06 +0200621
Godbache68e02d2013-10-11 15:48:29 +0800622 /* when we're here, we already know that there is no spliced
Willy Tarreau5368d802012-08-21 18:22:06 +0200623 * data left, and that there are sendable buffered data.
624 */
Willy Tarreau310987a2014-01-22 19:46:33 +0100625 if (!(conn->flags & (CO_FL_ERROR | CO_FL_SOCK_WR_SH | CO_FL_DATA_WR_SH | CO_FL_WAIT_DATA | CO_FL_HANDSHAKE))) {
Willy Tarreau5368d802012-08-21 18:22:06 +0200626 /* check if we want to inform the kernel that we're interested in
627 * sending more data after this call. We want this if :
628 * - we're about to close after this last send and want to merge
629 * the ongoing FIN with the last segment.
630 * - we know we can't send everything at once and must get back
631 * here because of unaligned data
632 * - there is still a finite amount of data to forward
633 * The test is arranged so that the most common case does only 2
634 * tests.
635 */
Willy Tarreau1049b1f2014-02-02 01:51:17 +0100636 unsigned int send_flag = 0;
Willy Tarreau5368d802012-08-21 18:22:06 +0200637
Willy Tarreauafc8a222014-11-28 15:46:27 +0100638 if ((!(oc->flags & (CF_NEVER_WAIT|CF_SEND_DONTWAIT)) &&
639 ((oc->to_forward && oc->to_forward != CHN_INFINITE_FORWARD) ||
640 (oc->flags & CF_EXPECT_MORE))) ||
641 ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == CF_SHUTW_NOW))
Willy Tarreau1049b1f2014-02-02 01:51:17 +0100642 send_flag |= CO_SFL_MSG_MORE;
Willy Tarreau5368d802012-08-21 18:22:06 +0200643
Willy Tarreauafc8a222014-11-28 15:46:27 +0100644 if (oc->flags & CF_STREAMER)
Willy Tarreau7bed9452014-02-02 02:00:24 +0100645 send_flag |= CO_SFL_STREAMER;
646
Willy Tarreauafc8a222014-11-28 15:46:27 +0100647 ret = conn->xprt->snd_buf(conn, oc->buf, send_flag);
Godbache68e02d2013-10-11 15:48:29 +0800648 if (ret > 0) {
Willy Tarreauafc8a222014-11-28 15:46:27 +0100649 oc->flags |= CF_WRITE_PARTIAL | CF_WROTE_DATA;
Willy Tarreau5368d802012-08-21 18:22:06 +0200650
Willy Tarreauafc8a222014-11-28 15:46:27 +0100651 if (!oc->buf->o) {
Godbache68e02d2013-10-11 15:48:29 +0800652 /* Always clear both flags once everything has been sent, they're one-shot */
Willy Tarreauafc8a222014-11-28 15:46:27 +0100653 oc->flags &= ~(CF_EXPECT_MORE | CF_SEND_DONTWAIT);
Godbache68e02d2013-10-11 15:48:29 +0800654 }
Willy Tarreau5368d802012-08-21 18:22:06 +0200655
Godbache68e02d2013-10-11 15:48:29 +0800656 /* if some data remain in the buffer, it's only because the
657 * system buffers are full, we will try next time.
658 */
Willy Tarreau5368d802012-08-21 18:22:06 +0200659 }
Godbache68e02d2013-10-11 15:48:29 +0800660 }
Willy Tarreau5368d802012-08-21 18:22:06 +0200661}
662
Willy Tarreau25f13102015-09-24 11:32:22 +0200663/* This function is designed to be called from within the stream handler to
664 * update the channels' expiration timers and the stream interface's flags
665 * based on the channels' flags. It needs to be called only once after the
666 * channels' flags have settled down, and before they are cleared, though it
667 * doesn't harm to call it as often as desired (it just slightly hurts
668 * performance). It must not be called from outside of the stream handler,
669 * as what it does will be used to compute the stream task's expiration.
670 */
671void stream_int_update(struct stream_interface *si)
672{
673 struct channel *ic = si_ic(si);
674 struct channel *oc = si_oc(si);
675
676 if (!(ic->flags & CF_SHUTR)) {
677 /* Read not closed, update FD status and timeout for reads */
678 if ((ic->flags & CF_DONT_READ) || !channel_may_recv(ic)) {
679 /* stop reading */
680 if (!(si->flags & SI_FL_WAIT_ROOM)) {
681 if (!(ic->flags & CF_DONT_READ)) /* full */
682 si->flags |= SI_FL_WAIT_ROOM;
683 ic->rex = TICK_ETERNITY;
684 }
685 }
686 else {
687 /* (re)start reading and update timeout. Note: we don't recompute the timeout
688 * everytime we get here, otherwise it would risk never to expire. We only
689 * update it if is was not yet set. The stream socket handler will already
690 * have updated it if there has been a completed I/O.
691 */
692 si->flags &= ~SI_FL_WAIT_ROOM;
693 if (!(ic->flags & (CF_READ_NOEXP|CF_DONT_READ)) && !tick_isset(ic->rex))
694 ic->rex = tick_add_ifset(now_ms, ic->rto);
695 }
696 }
697
698 if (!(oc->flags & CF_SHUTW)) {
699 /* Write not closed, update FD status and timeout for writes */
700 if (channel_is_empty(oc)) {
701 /* stop writing */
702 if (!(si->flags & SI_FL_WAIT_DATA)) {
703 if ((oc->flags & CF_SHUTW_NOW) == 0)
704 si->flags |= SI_FL_WAIT_DATA;
705 oc->wex = TICK_ETERNITY;
706 }
707 }
708 else {
709 /* (re)start writing and update timeout. Note: we don't recompute the timeout
710 * everytime we get here, otherwise it would risk never to expire. We only
711 * update it if is was not yet set. The stream socket handler will already
712 * have updated it if there has been a completed I/O.
713 */
714 si->flags &= ~SI_FL_WAIT_DATA;
715 if (!tick_isset(oc->wex)) {
716 oc->wex = tick_add_ifset(now_ms, oc->wto);
717 if (tick_isset(ic->rex) && !(si->flags & SI_FL_INDEP_STR)) {
718 /* Note: depending on the protocol, we don't know if we're waiting
719 * for incoming data or not. So in order to prevent the socket from
720 * expiring read timeouts during writes, we refresh the read timeout,
721 * except if it was already infinite or if we have explicitly setup
722 * independent streams.
723 */
724 ic->rex = tick_add_ifset(now_ms, ic->rto);
725 }
726 }
727 }
728 }
729}
730
Willy Tarreau452c7d52015-09-25 10:39:16 +0200731/* Updates the polling status of a connection outside of the connection handler
732 * based on the channel's flags and the stream interface's flags. It needs to be
733 * called once after the channels' flags have settled down and the stream has
734 * been updated. It is not designed to be called from within the connection
735 * handler itself.
Willy Tarreau100c4672012-08-20 12:06:26 +0200736 */
737void stream_int_update_conn(struct stream_interface *si)
738{
Willy Tarreauafc8a222014-11-28 15:46:27 +0100739 struct channel *ic = si_ic(si);
740 struct channel *oc = si_oc(si);
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200741 struct connection *conn = __objt_conn(si->end);
Willy Tarreau100c4672012-08-20 12:06:26 +0200742
Willy Tarreau2f4e7022015-09-25 10:50:59 +0200743 if (!(ic->flags & CF_SHUTR)) {
744 /* Read not closed */
745 if ((ic->flags & CF_DONT_READ) || !channel_may_recv(ic))
746 __conn_data_stop_recv(conn);
747 else
748 __conn_data_want_recv(conn);
749 }
750
751 if (!(oc->flags & CF_SHUTW)) {
752 /* Write not closed */
753 if (channel_is_empty(oc))
754 __conn_data_stop_send(conn);
755 else
756 __conn_data_want_send(conn);
757 }
758
759 conn_cond_update_data_polling(conn);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200760}
761
762/*
763 * This function performs a shutdown-read on a stream interface attached to
764 * a connection in a connected or init state (it does nothing for other
765 * states). It either shuts the read side or marks itself as closed. The buffer
766 * flags are updated to reflect the new state. If the stream interface has
767 * SI_FL_NOHALF, we also forward the close to the write side. If a control
768 * layer is defined, then it is supposed to be a socket layer and file
Willy Tarreau6fe15412013-09-29 15:16:03 +0200769 * descriptors are then shutdown or closed accordingly. The function
770 * automatically disables polling if needed.
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200771 */
Willy Tarreau6fe15412013-09-29 15:16:03 +0200772static void stream_int_shutr_conn(struct stream_interface *si)
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200773{
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200774 struct connection *conn = __objt_conn(si->end);
Willy Tarreauafc8a222014-11-28 15:46:27 +0100775 struct channel *ic = si_ic(si);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200776
Willy Tarreauafc8a222014-11-28 15:46:27 +0100777 ic->flags &= ~CF_SHUTR_NOW;
778 if (ic->flags & CF_SHUTR)
Willy Tarreau6fe15412013-09-29 15:16:03 +0200779 return;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100780 ic->flags |= CF_SHUTR;
781 ic->rex = TICK_ETERNITY;
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200782 si->flags &= ~SI_FL_WAIT_ROOM;
783
784 if (si->state != SI_ST_EST && si->state != SI_ST_CON)
Willy Tarreau6fe15412013-09-29 15:16:03 +0200785 return;
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200786
Willy Tarreau2bb4a962014-11-28 11:11:05 +0100787 if (si_oc(si)->flags & CF_SHUTW) {
Willy Tarreau6fe15412013-09-29 15:16:03 +0200788 conn_full_close(conn);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200789 si->state = SI_ST_DIS;
790 si->exp = TICK_ETERNITY;
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200791 }
792 else if (si->flags & SI_FL_NOHALF) {
793 /* we want to immediately forward this close to the write side */
794 return stream_int_shutw_conn(si);
795 }
796 else if (conn->ctrl) {
797 /* we want the caller to disable polling on this FD */
Willy Tarreau6fe15412013-09-29 15:16:03 +0200798 conn_data_stop_recv(conn);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200799 }
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200800}
801
802/*
803 * This function performs a shutdown-write on a stream interface attached to
804 * a connection in a connected or init state (it does nothing for other
805 * states). It either shuts the write side or marks itself as closed. The
806 * buffer flags are updated to reflect the new state. It does also close
807 * everything if the SI was marked as being in error state. If there is a
Willy Tarreau1398aa12015-03-12 23:04:07 +0100808 * data-layer shutdown, it is called.
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200809 */
Willy Tarreau6fe15412013-09-29 15:16:03 +0200810static void stream_int_shutw_conn(struct stream_interface *si)
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200811{
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200812 struct connection *conn = __objt_conn(si->end);
Willy Tarreauafc8a222014-11-28 15:46:27 +0100813 struct channel *ic = si_ic(si);
814 struct channel *oc = si_oc(si);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200815
Willy Tarreauafc8a222014-11-28 15:46:27 +0100816 oc->flags &= ~CF_SHUTW_NOW;
817 if (oc->flags & CF_SHUTW)
Willy Tarreau6fe15412013-09-29 15:16:03 +0200818 return;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100819 oc->flags |= CF_SHUTW;
820 oc->wex = TICK_ETERNITY;
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200821 si->flags &= ~SI_FL_WAIT_DATA;
822
823 switch (si->state) {
824 case SI_ST_EST:
825 /* we have to shut before closing, otherwise some short messages
826 * may never leave the system, especially when there are remaining
827 * unread data in the socket input buffer, or when nolinger is set.
828 * However, if SI_FL_NOLINGER is explicitly set, we know there is
829 * no risk so we close both sides immediately.
830 */
831 if (si->flags & SI_FL_ERR) {
832 /* quick close, the socket is alredy shut anyway */
833 }
834 else if (si->flags & SI_FL_NOLINGER) {
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200835 /* unclean data-layer shutdown */
Willy Tarreau1398aa12015-03-12 23:04:07 +0100836 conn_data_shutw_hard(conn);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200837 }
838 else {
839 /* clean data-layer shutdown */
Willy Tarreau1398aa12015-03-12 23:04:07 +0100840 conn_data_shutw(conn);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200841
842 /* If the stream interface is configured to disable half-open
843 * connections, we'll skip the shutdown(), but only if the
844 * read size is already closed. Otherwise we can't support
845 * closed write with pending read (eg: abortonclose while
846 * waiting for the server).
847 */
Willy Tarreauafc8a222014-11-28 15:46:27 +0100848 if (!(si->flags & SI_FL_NOHALF) || !(ic->flags & (CF_SHUTR|CF_DONT_READ))) {
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200849 /* We shutdown transport layer */
Willy Tarreau4dfd54f2015-03-12 22:44:53 +0100850 conn_sock_shutw(conn);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200851
Willy Tarreauafc8a222014-11-28 15:46:27 +0100852 if (!(ic->flags & (CF_SHUTR|CF_DONT_READ))) {
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200853 /* OK just a shutw, but we want the caller
854 * to disable polling on this FD if exists.
855 */
Willy Tarreau1398aa12015-03-12 23:04:07 +0100856 conn_cond_update_polling(conn);
Willy Tarreau6fe15412013-09-29 15:16:03 +0200857 return;
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200858 }
859 }
860 }
861
862 /* fall through */
863 case SI_ST_CON:
864 /* we may have to close a pending connection, and mark the
865 * response buffer as shutr
866 */
Willy Tarreau6fe15412013-09-29 15:16:03 +0200867 conn_full_close(conn);
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200868 /* fall through */
869 case SI_ST_CER:
870 case SI_ST_QUE:
871 case SI_ST_TAR:
872 si->state = SI_ST_DIS;
Willy Tarreau4a59f2f2013-10-24 20:10:45 +0200873 /* fall through */
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200874 default:
875 si->flags &= ~(SI_FL_WAIT_ROOM | SI_FL_NOLINGER);
Willy Tarreauafc8a222014-11-28 15:46:27 +0100876 ic->flags &= ~CF_SHUTR_NOW;
877 ic->flags |= CF_SHUTR;
878 ic->rex = TICK_ETERNITY;
Willy Tarreau8b3d7df2013-09-29 14:51:58 +0200879 si->exp = TICK_ETERNITY;
Willy Tarreau100c4672012-08-20 12:06:26 +0200880 }
881}
882
Willy Tarreau46a8d922012-08-20 12:38:36 +0200883/* This function is used for inter-stream-interface calls. It is called by the
884 * consumer to inform the producer side that it may be interested in checking
885 * for free space in the buffer. Note that it intentionally does not update
886 * timeouts, so that we can still check them later at wake-up. This function is
887 * dedicated to connection-based stream interfaces.
888 */
Willy Tarreauc5788912012-08-24 18:12:41 +0200889static void stream_int_chk_rcv_conn(struct stream_interface *si)
Willy Tarreau46a8d922012-08-20 12:38:36 +0200890{
Willy Tarreauafc8a222014-11-28 15:46:27 +0100891 struct channel *ic = si_ic(si);
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200892 struct connection *conn = __objt_conn(si->end);
Willy Tarreau46a8d922012-08-20 12:38:36 +0200893
Willy Tarreauafc8a222014-11-28 15:46:27 +0100894 if (unlikely(si->state > SI_ST_EST || (ic->flags & CF_SHUTR)))
Willy Tarreau46a8d922012-08-20 12:38:36 +0200895 return;
896
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200897 conn_refresh_polling_flags(conn);
Willy Tarreau7d281492012-12-16 19:19:13 +0100898
Willy Tarreauafc8a222014-11-28 15:46:27 +0100899 if ((ic->flags & CF_DONT_READ) || !channel_may_recv(ic)) {
Willy Tarreau46a8d922012-08-20 12:38:36 +0200900 /* stop reading */
Willy Tarreauafc8a222014-11-28 15:46:27 +0100901 if (!(ic->flags & CF_DONT_READ)) /* full */
Willy Tarreau46a8d922012-08-20 12:38:36 +0200902 si->flags |= SI_FL_WAIT_ROOM;
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200903 __conn_data_stop_recv(conn);
Willy Tarreau46a8d922012-08-20 12:38:36 +0200904 }
905 else {
906 /* (re)start reading */
907 si->flags &= ~SI_FL_WAIT_ROOM;
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200908 __conn_data_want_recv(conn);
Willy Tarreau46a8d922012-08-20 12:38:36 +0200909 }
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200910 conn_cond_update_data_polling(conn);
Willy Tarreau46a8d922012-08-20 12:38:36 +0200911}
912
913
Willy Tarreaude5722c2012-08-20 15:01:10 +0200914/* This function is used for inter-stream-interface calls. It is called by the
915 * producer to inform the consumer side that it may be interested in checking
916 * for data in the buffer. Note that it intentionally does not update timeouts,
917 * so that we can still check them later at wake-up.
918 */
Willy Tarreauc5788912012-08-24 18:12:41 +0200919static void stream_int_chk_snd_conn(struct stream_interface *si)
Willy Tarreaude5722c2012-08-20 15:01:10 +0200920{
Willy Tarreauafc8a222014-11-28 15:46:27 +0100921 struct channel *oc = si_oc(si);
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200922 struct connection *conn = __objt_conn(si->end);
Willy Tarreaude5722c2012-08-20 15:01:10 +0200923
Willy Tarreauafc8a222014-11-28 15:46:27 +0100924 if (unlikely(si->state > SI_ST_EST || (oc->flags & CF_SHUTW)))
Willy Tarreaude5722c2012-08-20 15:01:10 +0200925 return;
Willy Tarreaude5722c2012-08-20 15:01:10 +0200926
Willy Tarreauafc8a222014-11-28 15:46:27 +0100927 if (unlikely(channel_is_empty(oc))) /* called with nothing to send ! */
Willy Tarreaude5722c2012-08-20 15:01:10 +0200928 return;
929
Willy Tarreauafc8a222014-11-28 15:46:27 +0100930 if (!oc->pipe && /* spliced data wants to be forwarded ASAP */
Willy Tarreaub0165872012-12-15 10:12:39 +0100931 !(si->flags & SI_FL_WAIT_DATA)) /* not waiting for data */
Willy Tarreaude5722c2012-08-20 15:01:10 +0200932 return;
933
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200934 if (conn->flags & (CO_FL_DATA_WR_ENA|CO_FL_CURR_WR_ENA)) {
Willy Tarreau5007d2a2013-07-18 22:09:48 +0200935 /* already subscribed to write notifications, will be called
936 * anyway, so let's avoid calling it especially if the reader
937 * is not ready.
938 */
939 return;
940 }
941
Willy Tarreau708e7172014-01-21 10:27:49 +0100942 /* Before calling the data-level operations, we have to prepare
943 * the polling flags to ensure we properly detect changes.
944 */
945 conn_refresh_polling_flags(conn);
946 __conn_data_want_send(conn);
Willy Tarreaud29a0662012-12-10 16:33:38 +0100947
Willy Tarreau708e7172014-01-21 10:27:49 +0100948 if (!(conn->flags & (CO_FL_HANDSHAKE|CO_FL_WAIT_L4_CONN|CO_FL_WAIT_L6_CONN))) {
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200949 si_conn_send(conn);
Willy Tarreau798c3c92014-01-21 10:30:08 +0100950 if (conn->flags & CO_FL_ERROR) {
Willy Tarreaud29a0662012-12-10 16:33:38 +0100951 /* Write error on the file descriptor */
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200952 __conn_data_stop_both(conn);
Willy Tarreaud29a0662012-12-10 16:33:38 +0100953 si->flags |= SI_FL_ERR;
Willy Tarreaud29a0662012-12-10 16:33:38 +0100954 goto out_wakeup;
955 }
Willy Tarreaude5722c2012-08-20 15:01:10 +0200956 }
957
958 /* OK, so now we know that some data might have been sent, and that we may
959 * have to poll first. We have to do that too if the buffer is not empty.
960 */
Willy Tarreauafc8a222014-11-28 15:46:27 +0100961 if (channel_is_empty(oc)) {
Willy Tarreaude5722c2012-08-20 15:01:10 +0200962 /* the connection is established but we can't write. Either the
963 * buffer is empty, or we just refrain from sending because the
964 * ->o limit was reached. Maybe we just wrote the last
965 * chunk and need to close.
966 */
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200967 __conn_data_stop_send(conn);
Willy Tarreauafc8a222014-11-28 15:46:27 +0100968 if (((oc->flags & (CF_SHUTW|CF_AUTO_CLOSE|CF_SHUTW_NOW)) ==
Willy Tarreau03cdb7c2012-08-27 23:14:58 +0200969 (CF_AUTO_CLOSE|CF_SHUTW_NOW)) &&
Willy Tarreaude5722c2012-08-20 15:01:10 +0200970 (si->state == SI_ST_EST)) {
971 si_shutw(si);
972 goto out_wakeup;
973 }
974
Willy Tarreauafc8a222014-11-28 15:46:27 +0100975 if ((oc->flags & (CF_SHUTW|CF_SHUTW_NOW)) == 0)
Willy Tarreaude5722c2012-08-20 15:01:10 +0200976 si->flags |= SI_FL_WAIT_DATA;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100977 oc->wex = TICK_ETERNITY;
Willy Tarreaude5722c2012-08-20 15:01:10 +0200978 }
979 else {
980 /* Otherwise there are remaining data to be sent in the buffer,
981 * which means we have to poll before doing so.
982 */
Willy Tarreaub363a1f2013-10-01 10:45:07 +0200983 __conn_data_want_send(conn);
Willy Tarreaude5722c2012-08-20 15:01:10 +0200984 si->flags &= ~SI_FL_WAIT_DATA;
Willy Tarreauafc8a222014-11-28 15:46:27 +0100985 if (!tick_isset(oc->wex))
986 oc->wex = tick_add_ifset(now_ms, oc->wto);
Willy Tarreaude5722c2012-08-20 15:01:10 +0200987 }
988
Willy Tarreauafc8a222014-11-28 15:46:27 +0100989 if (likely(oc->flags & CF_WRITE_ACTIVITY)) {
990 struct channel *ic = si_ic(si);
991
Willy Tarreaude5722c2012-08-20 15:01:10 +0200992 /* update timeout if we have written something */
Willy Tarreauafc8a222014-11-28 15:46:27 +0100993 if ((oc->flags & (CF_SHUTW|CF_WRITE_PARTIAL)) == CF_WRITE_PARTIAL &&
994 !channel_is_empty(oc))
995 oc->wex = tick_add_ifset(now_ms, oc->wto);
Willy Tarreaude5722c2012-08-20 15:01:10 +0200996
Willy Tarreauafc8a222014-11-28 15:46:27 +0100997 if (tick_isset(ic->rex) && !(si->flags & SI_FL_INDEP_STR)) {
Willy Tarreaude5722c2012-08-20 15:01:10 +0200998 /* Note: to prevent the client from expiring read timeouts
999 * during writes, we refresh it. We only do this if the
1000 * interface is not configured for "independent streams",
1001 * because for some applications it's better not to do this,
1002 * for instance when continuously exchanging small amounts
1003 * of data which can full the socket buffers long before a
1004 * write timeout is detected.
1005 */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001006 ic->rex = tick_add_ifset(now_ms, ic->rto);
Willy Tarreaude5722c2012-08-20 15:01:10 +02001007 }
1008 }
1009
1010 /* in case of special condition (error, shutdown, end of write...), we
1011 * have to notify the task.
1012 */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001013 if (likely((oc->flags & (CF_WRITE_NULL|CF_WRITE_ERROR|CF_SHUTW)) ||
1014 ((oc->flags & CF_WAKE_WRITE) &&
1015 ((channel_is_empty(oc) && !oc->to_forward) ||
Willy Tarreaue6300be2014-01-25 02:33:21 +01001016 si->state != SI_ST_EST)))) {
Willy Tarreaude5722c2012-08-20 15:01:10 +02001017 out_wakeup:
Willy Tarreau07373b82014-11-28 12:08:47 +01001018 if (!(si->flags & SI_FL_DONT_WAKE))
1019 task_wakeup(si_task(si), TASK_WOKEN_IO);
Willy Tarreaude5722c2012-08-20 15:01:10 +02001020 }
Willy Tarreauf16723e2012-08-24 12:52:22 +02001021
1022 /* commit possible polling changes */
Willy Tarreaub363a1f2013-10-01 10:45:07 +02001023 conn_cond_update_polling(conn);
Willy Tarreaude5722c2012-08-20 15:01:10 +02001024}
1025
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001026/*
Willy Tarreauce323de2012-08-20 21:41:06 +02001027 * This is the callback which is called by the connection layer to receive data
Willy Tarreauf7bc57c2012-10-03 00:19:48 +02001028 * into the buffer from the connection. It iterates over the transport layer's
1029 * rcv_buf function.
Willy Tarreauce323de2012-08-20 21:41:06 +02001030 */
Willy Tarreau4aa36832012-10-02 20:07:22 +02001031static void si_conn_recv_cb(struct connection *conn)
Willy Tarreauce323de2012-08-20 21:41:06 +02001032{
Willy Tarreaue603e692012-09-27 22:20:41 +02001033 struct stream_interface *si = conn->owner;
Willy Tarreauafc8a222014-11-28 15:46:27 +01001034 struct channel *ic = si_ic(si);
Willy Tarreauce323de2012-08-20 21:41:06 +02001035 int ret, max, cur_read;
1036 int read_poll = MAX_READ_POLL_LOOPS;
1037
1038 /* stop immediately on errors. Note that we DON'T want to stop on
1039 * POLL_ERR, as the poller might report a write error while there
1040 * are still data available in the recv buffer. This typically
1041 * happens when we send too large a request to a backend server
1042 * which rejects it before reading it all.
1043 */
1044 if (conn->flags & CO_FL_ERROR)
Godbach4f489902013-12-04 17:24:06 +08001045 return;
Willy Tarreauce323de2012-08-20 21:41:06 +02001046
1047 /* stop here if we reached the end of data */
1048 if (conn_data_read0_pending(conn))
1049 goto out_shutdown_r;
1050
1051 /* maybe we were called immediately after an asynchronous shutr */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001052 if (ic->flags & CF_SHUTR)
Willy Tarreauce323de2012-08-20 21:41:06 +02001053 return;
1054
Willy Tarreau96199b12012-08-24 00:46:52 +02001055 cur_read = 0;
Willy Tarreau96199b12012-08-24 00:46:52 +02001056
Willy Tarreauafc8a222014-11-28 15:46:27 +01001057 if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) && !ic->buf->o &&
Willy Tarreau7e312732014-02-12 16:35:14 +01001058 global.tune.idle_timer &&
Willy Tarreauafc8a222014-11-28 15:46:27 +01001059 (unsigned short)(now_ms - ic->last_read) >= global.tune.idle_timer) {
Willy Tarreauc5890e62014-02-09 17:47:01 +01001060 /* The buffer was empty and nothing was transferred for more
1061 * than one second. This was caused by a pause and not by
1062 * congestion. Reset any streaming mode to reduce latency.
1063 */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001064 ic->xfer_small = 0;
1065 ic->xfer_large = 0;
1066 ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST);
Willy Tarreauc5890e62014-02-09 17:47:01 +01001067 }
1068
Willy Tarreau96199b12012-08-24 00:46:52 +02001069 /* First, let's see if we may splice data across the channel without
1070 * using a buffer.
1071 */
Willy Tarreauf7bc57c2012-10-03 00:19:48 +02001072 if (conn->xprt->rcv_pipe &&
Willy Tarreauafc8a222014-11-28 15:46:27 +01001073 (ic->pipe || ic->to_forward >= MIN_SPLICE_FORWARD) &&
1074 ic->flags & CF_KERN_SPLICING) {
1075 if (buffer_not_empty(ic->buf)) {
Willy Tarreau96199b12012-08-24 00:46:52 +02001076 /* We're embarrassed, there are already data pending in
1077 * the buffer and we don't want to have them at two
1078 * locations at a time. Let's indicate we need some
1079 * place and ask the consumer to hurry.
1080 */
1081 goto abort_splice;
1082 }
Willy Tarreauce323de2012-08-20 21:41:06 +02001083
Willy Tarreauafc8a222014-11-28 15:46:27 +01001084 if (unlikely(ic->pipe == NULL)) {
1085 if (pipes_used >= global.maxpipes || !(ic->pipe = get_pipe())) {
1086 ic->flags &= ~CF_KERN_SPLICING;
Willy Tarreau96199b12012-08-24 00:46:52 +02001087 goto abort_splice;
1088 }
1089 }
1090
Willy Tarreauafc8a222014-11-28 15:46:27 +01001091 ret = conn->xprt->rcv_pipe(conn, ic->pipe, ic->to_forward);
Willy Tarreau96199b12012-08-24 00:46:52 +02001092 if (ret < 0) {
1093 /* splice not supported on this end, let's disable it */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001094 ic->flags &= ~CF_KERN_SPLICING;
Willy Tarreau96199b12012-08-24 00:46:52 +02001095 goto abort_splice;
1096 }
Willy Tarreauce323de2012-08-20 21:41:06 +02001097
Willy Tarreau96199b12012-08-24 00:46:52 +02001098 if (ret > 0) {
Willy Tarreauafc8a222014-11-28 15:46:27 +01001099 if (ic->to_forward != CHN_INFINITE_FORWARD)
1100 ic->to_forward -= ret;
1101 ic->total += ret;
Willy Tarreau96199b12012-08-24 00:46:52 +02001102 cur_read += ret;
Willy Tarreauafc8a222014-11-28 15:46:27 +01001103 ic->flags |= CF_READ_PARTIAL;
Willy Tarreauce323de2012-08-20 21:41:06 +02001104 }
Willy Tarreau96199b12012-08-24 00:46:52 +02001105
1106 if (conn_data_read0_pending(conn))
1107 goto out_shutdown_r;
1108
1109 if (conn->flags & CO_FL_ERROR)
Godbach4f489902013-12-04 17:24:06 +08001110 return;
Willy Tarreau96199b12012-08-24 00:46:52 +02001111
Willy Tarreau61d39a02013-07-18 21:49:32 +02001112 if (conn->flags & CO_FL_WAIT_ROOM) {
1113 /* the pipe is full or we have read enough data that it
1114 * could soon be full. Let's stop before needing to poll.
1115 */
Willy Tarreau56a77e52012-09-02 18:34:44 +02001116 si->flags |= SI_FL_WAIT_ROOM;
Willy Tarreau61d39a02013-07-18 21:49:32 +02001117 __conn_data_stop_recv(conn);
1118 }
Willy Tarreau56a77e52012-09-02 18:34:44 +02001119
Willy Tarreauce323de2012-08-20 21:41:06 +02001120 /* splice not possible (anymore), let's go on on standard copy */
1121 }
Willy Tarreau96199b12012-08-24 00:46:52 +02001122
1123 abort_splice:
Willy Tarreauafc8a222014-11-28 15:46:27 +01001124 if (ic->pipe && unlikely(!ic->pipe->data)) {
1125 put_pipe(ic->pipe);
1126 ic->pipe = NULL;
Willy Tarreau96199b12012-08-24 00:46:52 +02001127 }
1128
Willy Tarreau10fc09e2014-11-25 19:46:36 +01001129 /* now we'll need a buffer */
Willy Tarreau87b09662015-04-03 00:22:06 +02001130 if (!stream_alloc_recv_buffer(ic)) {
Willy Tarreau10fc09e2014-11-25 19:46:36 +01001131 si->flags |= SI_FL_WAIT_ROOM;
1132 goto end_recv;
1133 }
1134
Willy Tarreau61d39a02013-07-18 21:49:32 +02001135 /* Important note : if we're called with POLL_IN|POLL_HUP, it means the read polling
1136 * was enabled, which implies that the recv buffer was not full. So we have a guarantee
1137 * that if such an event is not handled above in splice, it will be handled here by
1138 * recv().
1139 */
Willy Tarreau310987a2014-01-22 19:46:33 +01001140 while (!(conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_DATA_RD_SH | CO_FL_WAIT_ROOM | CO_FL_HANDSHAKE))) {
Willy Tarreauafc8a222014-11-28 15:46:27 +01001141 max = channel_recv_max(ic);
Willy Tarreauce323de2012-08-20 21:41:06 +02001142
1143 if (!max) {
Willy Tarreau56a77e52012-09-02 18:34:44 +02001144 si->flags |= SI_FL_WAIT_ROOM;
Willy Tarreauce323de2012-08-20 21:41:06 +02001145 break;
1146 }
1147
Willy Tarreauafc8a222014-11-28 15:46:27 +01001148 ret = conn->xprt->rcv_buf(conn, ic->buf, max);
Willy Tarreauce323de2012-08-20 21:41:06 +02001149 if (ret <= 0)
1150 break;
1151
1152 cur_read += ret;
1153
1154 /* if we're allowed to directly forward data, we must update ->o */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001155 if (ic->to_forward && !(ic->flags & (CF_SHUTW|CF_SHUTW_NOW))) {
Willy Tarreauce323de2012-08-20 21:41:06 +02001156 unsigned long fwd = ret;
Willy Tarreauafc8a222014-11-28 15:46:27 +01001157 if (ic->to_forward != CHN_INFINITE_FORWARD) {
1158 if (fwd > ic->to_forward)
1159 fwd = ic->to_forward;
1160 ic->to_forward -= fwd;
Willy Tarreauce323de2012-08-20 21:41:06 +02001161 }
Willy Tarreauafc8a222014-11-28 15:46:27 +01001162 b_adv(ic->buf, fwd);
Willy Tarreauce323de2012-08-20 21:41:06 +02001163 }
1164
Willy Tarreauafc8a222014-11-28 15:46:27 +01001165 ic->flags |= CF_READ_PARTIAL;
1166 ic->total += ret;
Willy Tarreauce323de2012-08-20 21:41:06 +02001167
Willy Tarreauafc8a222014-11-28 15:46:27 +01001168 if (!channel_may_recv(ic)) {
Willy Tarreauce323de2012-08-20 21:41:06 +02001169 si->flags |= SI_FL_WAIT_ROOM;
1170 break;
1171 }
1172
Willy Tarreauafc8a222014-11-28 15:46:27 +01001173 if ((ic->flags & CF_READ_DONTWAIT) || --read_poll <= 0) {
Willy Tarreau34ac5662012-12-19 18:01:02 +01001174 si->flags |= SI_FL_WAIT_ROOM;
Willy Tarreaud486ef52012-12-10 17:03:52 +01001175 __conn_data_stop_recv(conn);
Willy Tarreauce323de2012-08-20 21:41:06 +02001176 break;
Willy Tarreau5fddab02012-11-09 18:27:26 +01001177 }
Willy Tarreauce323de2012-08-20 21:41:06 +02001178
1179 /* if too many bytes were missing from last read, it means that
1180 * it's pointless trying to read again because the system does
1181 * not have them in buffers.
1182 */
1183 if (ret < max) {
Willy Tarreauce323de2012-08-20 21:41:06 +02001184 /* if a streamer has read few data, it may be because we
1185 * have exhausted system buffers. It's not worth trying
1186 * again.
1187 */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001188 if (ic->flags & CF_STREAMER)
Willy Tarreauce323de2012-08-20 21:41:06 +02001189 break;
1190
1191 /* if we read a large block smaller than what we requested,
1192 * it's almost certain we'll never get anything more.
1193 */
1194 if (ret >= global.tune.recv_enough)
1195 break;
1196 }
1197 } /* while !flags */
1198
Willy Tarreauc5890e62014-02-09 17:47:01 +01001199 if (cur_read) {
Willy Tarreauafc8a222014-11-28 15:46:27 +01001200 if ((ic->flags & (CF_STREAMER | CF_STREAMER_FAST)) &&
1201 (cur_read <= ic->buf->size / 2)) {
1202 ic->xfer_large = 0;
1203 ic->xfer_small++;
1204 if (ic->xfer_small >= 3) {
Willy Tarreauc5890e62014-02-09 17:47:01 +01001205 /* we have read less than half of the buffer in
1206 * one pass, and this happened at least 3 times.
1207 * This is definitely not a streamer.
1208 */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001209 ic->flags &= ~(CF_STREAMER | CF_STREAMER_FAST);
Willy Tarreauc5890e62014-02-09 17:47:01 +01001210 }
Willy Tarreauafc8a222014-11-28 15:46:27 +01001211 else if (ic->xfer_small >= 2) {
Willy Tarreauc5890e62014-02-09 17:47:01 +01001212 /* if the buffer has been at least half full twice,
1213 * we receive faster than we send, so at least it
1214 * is not a "fast streamer".
1215 */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001216 ic->flags &= ~CF_STREAMER_FAST;
Willy Tarreauc5890e62014-02-09 17:47:01 +01001217 }
1218 }
Willy Tarreauafc8a222014-11-28 15:46:27 +01001219 else if (!(ic->flags & CF_STREAMER_FAST) &&
1220 (cur_read >= ic->buf->size - global.tune.maxrewrite)) {
Willy Tarreauc5890e62014-02-09 17:47:01 +01001221 /* we read a full buffer at once */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001222 ic->xfer_small = 0;
1223 ic->xfer_large++;
1224 if (ic->xfer_large >= 3) {
Willy Tarreauc5890e62014-02-09 17:47:01 +01001225 /* we call this buffer a fast streamer if it manages
1226 * to be filled in one call 3 consecutive times.
1227 */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001228 ic->flags |= (CF_STREAMER | CF_STREAMER_FAST);
Willy Tarreauc5890e62014-02-09 17:47:01 +01001229 }
1230 }
1231 else {
Willy Tarreauafc8a222014-11-28 15:46:27 +01001232 ic->xfer_small = 0;
1233 ic->xfer_large = 0;
Willy Tarreauc5890e62014-02-09 17:47:01 +01001234 }
Willy Tarreauafc8a222014-11-28 15:46:27 +01001235 ic->last_read = now_ms;
Willy Tarreauc5890e62014-02-09 17:47:01 +01001236 }
1237
Willy Tarreau10fc09e2014-11-25 19:46:36 +01001238 end_recv:
1239 if (conn->flags & CO_FL_ERROR)
1240 return;
1241
Willy Tarreauce323de2012-08-20 21:41:06 +02001242 if (conn_data_read0_pending(conn))
1243 /* connection closed */
1244 goto out_shutdown_r;
1245
1246 return;
1247
1248 out_shutdown_r:
1249 /* we received a shutdown */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001250 ic->flags |= CF_READ_NULL;
1251 if (ic->flags & CF_AUTO_CLOSE)
1252 channel_shutw_now(ic);
Willy Tarreauce323de2012-08-20 21:41:06 +02001253 stream_sock_read0(si);
1254 conn_data_read0(conn);
1255 return;
Willy Tarreauce323de2012-08-20 21:41:06 +02001256}
1257
1258/*
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001259 * This is the callback which is called by the connection layer to send data
Willy Tarreauf7bc57c2012-10-03 00:19:48 +02001260 * from the buffer to the connection. It iterates over the transport layer's
1261 * snd_buf function.
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001262 */
Willy Tarreau4aa36832012-10-02 20:07:22 +02001263static void si_conn_send_cb(struct connection *conn)
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001264{
Willy Tarreaue603e692012-09-27 22:20:41 +02001265 struct stream_interface *si = conn->owner;
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001266
1267 if (conn->flags & CO_FL_ERROR)
Godbach4f489902013-12-04 17:24:06 +08001268 return;
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001269
Willy Tarreaub363a1f2013-10-01 10:45:07 +02001270 if (conn->flags & CO_FL_HANDSHAKE)
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001271 /* a handshake was requested */
1272 return;
1273
1274 /* we might have been called just after an asynchronous shutw */
Willy Tarreauafc8a222014-11-28 15:46:27 +01001275 if (si_oc(si)->flags & CF_SHUTW)
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001276 return;
1277
1278 /* OK there are data waiting to be sent */
Godbach4f489902013-12-04 17:24:06 +08001279 si_conn_send(conn);
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001280
1281 /* OK all done */
1282 return;
Willy Tarreaueecf6ca2012-08-20 15:09:53 +02001283}
1284
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001285/*
1286 * This function propagates a null read received on a socket-based connection.
1287 * It updates the stream interface. If the stream interface has SI_FL_NOHALF,
Willy Tarreau11405122015-03-12 22:32:27 +01001288 * the close is also forwarded to the write side as an abort.
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001289 */
1290void stream_sock_read0(struct stream_interface *si)
1291{
Willy Tarreaub363a1f2013-10-01 10:45:07 +02001292 struct connection *conn = __objt_conn(si->end);
Willy Tarreauafc8a222014-11-28 15:46:27 +01001293 struct channel *ic = si_ic(si);
1294 struct channel *oc = si_oc(si);
Willy Tarreaub363a1f2013-10-01 10:45:07 +02001295
Willy Tarreauafc8a222014-11-28 15:46:27 +01001296 ic->flags &= ~CF_SHUTR_NOW;
1297 if (ic->flags & CF_SHUTR)
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001298 return;
Willy Tarreauafc8a222014-11-28 15:46:27 +01001299 ic->flags |= CF_SHUTR;
1300 ic->rex = TICK_ETERNITY;
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001301 si->flags &= ~SI_FL_WAIT_ROOM;
1302
1303 if (si->state != SI_ST_EST && si->state != SI_ST_CON)
1304 return;
1305
Willy Tarreauafc8a222014-11-28 15:46:27 +01001306 if (oc->flags & CF_SHUTW)
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001307 goto do_close;
1308
1309 if (si->flags & SI_FL_NOHALF) {
1310 /* we want to immediately forward this close to the write side */
Willy Tarreau87b09662015-04-03 00:22:06 +02001311 /* force flag on ssl to keep stream in cache */
Willy Tarreau1398aa12015-03-12 23:04:07 +01001312 conn_data_shutw_hard(conn);
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001313 goto do_close;
1314 }
1315
1316 /* otherwise that's just a normal read shutdown */
Willy Tarreaub363a1f2013-10-01 10:45:07 +02001317 __conn_data_stop_recv(conn);
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001318 return;
1319
1320 do_close:
Willy Tarreauf9fbfe82012-11-21 21:51:53 +01001321 /* OK we completely close the socket here just as if we went through si_shut[rw]() */
Willy Tarreaub363a1f2013-10-01 10:45:07 +02001322 conn_full_close(conn);
Willy Tarreauf9fbfe82012-11-21 21:51:53 +01001323
Willy Tarreauafc8a222014-11-28 15:46:27 +01001324 ic->flags &= ~CF_SHUTR_NOW;
1325 ic->flags |= CF_SHUTR;
1326 ic->rex = TICK_ETERNITY;
Willy Tarreauf9fbfe82012-11-21 21:51:53 +01001327
Willy Tarreauafc8a222014-11-28 15:46:27 +01001328 oc->flags &= ~CF_SHUTW_NOW;
1329 oc->flags |= CF_SHUTW;
1330 oc->wex = TICK_ETERNITY;
Willy Tarreauf9fbfe82012-11-21 21:51:53 +01001331
1332 si->flags &= ~(SI_FL_WAIT_DATA | SI_FL_WAIT_ROOM);
1333
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001334 si->state = SI_ST_DIS;
1335 si->exp = TICK_ETERNITY;
Willy Tarreau9bf9c142012-08-20 15:38:41 +02001336 return;
1337}
1338
Willy Tarreau651e1822015-09-23 20:06:13 +02001339/* Callback to be used by applet handlers upon completion. It updates the stream
1340 * (which may or may not take this opportunity to try to forward data), then
1341 * may disable the applet's based on the channels and stream interface's final
1342 * states.
1343 */
Willy Tarreaue5f86492015-04-19 15:16:35 +02001344void si_applet_done(struct stream_interface *si)
1345{
Willy Tarreau651e1822015-09-23 20:06:13 +02001346 /* update the stream-int, channels, and possibly wake the stream up */
1347 stream_int_notify(si);
Willy Tarreau388a2382015-09-23 19:55:42 +02001348
1349 /* Get away from the active list if we can't work anymore.
1350 * We also do that if the main task has already scheduled, because it
1351 * saves a useless wakeup/pause/wakeup cycle causing one useless call
1352 * per session on average.
1353 */
1354 if (task_in_rq(si_task(si)) ||
1355 (((si->flags & (SI_FL_WANT_PUT|SI_FL_WAIT_ROOM)) != SI_FL_WANT_PUT) &&
1356 ((si->flags & (SI_FL_WANT_GET|SI_FL_WAIT_DATA)) != SI_FL_WANT_GET)))
1357 appctx_pause(si_appctx(si));
Willy Tarreaue5f86492015-04-19 15:16:35 +02001358}
1359
Willy Tarreau452c7d52015-09-25 10:39:16 +02001360
1361/* Updates the activity status of an applet outside of the applet handler based
1362 * on the channel's flags and the stream interface's flags. It needs to be
1363 * called once after the channels' flags have settled down and the stream has
1364 * been updated. It is not designed to be called from within the applet handler
1365 * itself.
Willy Tarreau563cc372015-04-19 18:13:56 +02001366 */
1367void stream_int_update_applet(struct stream_interface *si)
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001368{
Willy Tarreaufe127932015-04-21 19:23:39 +02001369 if (((si->flags & (SI_FL_WANT_PUT|SI_FL_WAIT_ROOM)) == SI_FL_WANT_PUT) ||
1370 ((si->flags & (SI_FL_WANT_GET|SI_FL_WAIT_DATA)) == SI_FL_WANT_GET))
Willy Tarreau563cc372015-04-19 18:13:56 +02001371 appctx_wakeup(si_appctx(si));
Willy Tarreaufe127932015-04-21 19:23:39 +02001372 else
1373 appctx_pause(si_appctx(si));
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001374}
1375
1376/*
1377 * This function performs a shutdown-read on a stream interface attached to an
1378 * applet in a connected or init state (it does nothing for other states). It
1379 * either shuts the read side or marks itself as closed. The buffer flags are
1380 * updated to reflect the new state. If the stream interface has SI_FL_NOHALF,
1381 * we also forward the close to the write side. The owner task is woken up if
1382 * it exists.
1383 */
1384static void stream_int_shutr_applet(struct stream_interface *si)
1385{
1386 struct channel *ic = si_ic(si);
1387
1388 ic->flags &= ~CF_SHUTR_NOW;
1389 if (ic->flags & CF_SHUTR)
1390 return;
1391 ic->flags |= CF_SHUTR;
1392 ic->rex = TICK_ETERNITY;
1393 si->flags &= ~SI_FL_WAIT_ROOM;
1394
Willy Tarreau828824a2015-04-19 17:20:03 +02001395 /* Note: on shutr, we don't call the applet */
1396
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001397 if (si->state != SI_ST_EST && si->state != SI_ST_CON)
1398 return;
1399
1400 if (si_oc(si)->flags & CF_SHUTW) {
1401 si->state = SI_ST_DIS;
1402 si->exp = TICK_ETERNITY;
1403 si_applet_release(si);
1404 }
1405 else if (si->flags & SI_FL_NOHALF) {
1406 /* we want to immediately forward this close to the write side */
1407 return stream_int_shutw_applet(si);
1408 }
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001409}
1410
1411/*
1412 * This function performs a shutdown-write on a stream interface attached to an
1413 * applet in a connected or init state (it does nothing for other states). It
1414 * either shuts the write side or marks itself as closed. The buffer flags are
1415 * updated to reflect the new state. It does also close everything if the SI
1416 * was marked as being in error state. The owner task is woken up if it exists.
1417 */
1418static void stream_int_shutw_applet(struct stream_interface *si)
1419{
1420 struct channel *ic = si_ic(si);
1421 struct channel *oc = si_oc(si);
1422
1423 oc->flags &= ~CF_SHUTW_NOW;
1424 if (oc->flags & CF_SHUTW)
1425 return;
1426 oc->flags |= CF_SHUTW;
1427 oc->wex = TICK_ETERNITY;
1428 si->flags &= ~SI_FL_WAIT_DATA;
1429
Willy Tarreau828824a2015-04-19 17:20:03 +02001430 /* on shutw we always wake the applet up */
1431 appctx_wakeup(si_appctx(si));
1432
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001433 switch (si->state) {
1434 case SI_ST_EST:
1435 /* we have to shut before closing, otherwise some short messages
1436 * may never leave the system, especially when there are remaining
1437 * unread data in the socket input buffer, or when nolinger is set.
1438 * However, if SI_FL_NOLINGER is explicitly set, we know there is
1439 * no risk so we close both sides immediately.
1440 */
1441 if (!(si->flags & (SI_FL_ERR | SI_FL_NOLINGER)) &&
1442 !(ic->flags & (CF_SHUTR|CF_DONT_READ)))
1443 return;
1444
1445 /* fall through */
1446 case SI_ST_CON:
1447 case SI_ST_CER:
1448 case SI_ST_QUE:
1449 case SI_ST_TAR:
1450 /* Note that none of these states may happen with applets */
1451 si->state = SI_ST_DIS;
1452 si_applet_release(si);
1453 default:
1454 si->flags &= ~(SI_FL_WAIT_ROOM | SI_FL_NOLINGER);
1455 ic->flags &= ~CF_SHUTR_NOW;
1456 ic->flags |= CF_SHUTR;
1457 ic->rex = TICK_ETERNITY;
1458 si->exp = TICK_ETERNITY;
1459 }
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001460}
1461
1462/* chk_rcv function for applets */
1463static void stream_int_chk_rcv_applet(struct stream_interface *si)
1464{
1465 struct channel *ic = si_ic(si);
1466
1467 DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n",
1468 __FUNCTION__,
1469 si, si->state, ic->flags, si_oc(si)->flags);
1470
1471 if (unlikely(si->state != SI_ST_EST || (ic->flags & (CF_SHUTR|CF_DONT_READ))))
1472 return;
Willy Tarreau828824a2015-04-19 17:20:03 +02001473 /* here we only wake the applet up if it was waiting for some room */
1474 if (!(si->flags & SI_FL_WAIT_ROOM))
1475 return;
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001476
Willy Tarreau828824a2015-04-19 17:20:03 +02001477 if (channel_may_recv(ic) && !ic->pipe) {
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001478 /* (re)start reading */
Willy Tarreau828824a2015-04-19 17:20:03 +02001479 appctx_wakeup(si_appctx(si));
Thierry FOURNIER5bc2cbf2015-09-04 18:40:36 +02001480 }
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001481}
1482
1483/* chk_snd function for applets */
1484static void stream_int_chk_snd_applet(struct stream_interface *si)
1485{
1486 struct channel *oc = si_oc(si);
1487
1488 DPRINTF(stderr, "%s: si=%p, si->state=%d ic->flags=%08x oc->flags=%08x\n",
1489 __FUNCTION__,
1490 si, si->state, si_ic(si)->flags, oc->flags);
1491
1492 if (unlikely(si->state != SI_ST_EST || (oc->flags & CF_SHUTW)))
1493 return;
1494
Willy Tarreau828824a2015-04-19 17:20:03 +02001495 /* we only wake the applet up if it was waiting for some data */
1496
1497 if (!(si->flags & SI_FL_WAIT_DATA))
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001498 return;
1499
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001500 if (!tick_isset(oc->wex))
1501 oc->wex = tick_add_ifset(now_ms, oc->wto);
1502
Willy Tarreau828824a2015-04-19 17:20:03 +02001503 if (!channel_is_empty(oc)) {
1504 /* (re)start sending */
1505 appctx_wakeup(si_appctx(si));
1506 }
Willy Tarreaud45b9f82015-04-13 16:30:14 +02001507}
1508
Willy Tarreaudded32d2008-11-30 19:48:07 +01001509/*
Willy Tarreaucff64112008-11-03 06:26:53 +01001510 * Local variables:
1511 * c-indent-level: 8
1512 * c-basic-offset: 8
1513 * End:
1514 */