blob: 7e35630f47c404478289e533c52d4fd93c43cb7c [file] [log] [blame]
Willy Tarreau59f98392012-07-06 14:13:49 +02001/*
2 * Connection management functions
3 *
4 * Copyright 2000-2012 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreaue1e4a612012-10-05 00:10:55 +020013#include <errno.h>
14
Willy Tarreau59f98392012-07-06 14:13:49 +020015#include <common/compat.h>
16#include <common/config.h>
Willy Tarreau0108d902018-11-25 19:14:37 +010017#include <common/initcall.h>
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +010018#include <common/namespace.h>
Emmanuel Hocdet4399c752018-02-05 15:26:43 +010019#include <common/hash.h>
20#include <common/net_helper.h>
Willy Tarreau59f98392012-07-06 14:13:49 +020021
Willy Tarreauc5788912012-08-24 18:12:41 +020022#include <proto/connection.h>
Willy Tarreaudd2f85e2012-09-02 22:34:23 +020023#include <proto/fd.h>
Willy Tarreau5f1504f2012-10-04 23:55:57 +020024#include <proto/frontend.h>
Willy Tarreau2da156f2012-07-23 15:07:23 +020025#include <proto/proto_tcp.h>
Willy Tarreau2c6be842012-07-06 17:12:34 +020026#include <proto/stream_interface.h>
Emeric Brun4f603012017-01-05 15:11:44 +010027#include <proto/sample.h>
Emeric Brun46591952012-05-18 15:47:34 +020028#include <proto/ssl_sock.h>
Emeric Brun46591952012-05-18 15:47:34 +020029
Alexander Liu2a54bb72019-05-22 19:44:48 +080030#include <common/debug.h>
31
Willy Tarreau8ceae722018-11-26 11:58:30 +010032DECLARE_POOL(pool_head_connection, "connection", sizeof(struct connection));
33DECLARE_POOL(pool_head_connstream, "conn_stream", sizeof(struct conn_stream));
Willy Tarreauff5d57b2019-07-17 18:37:02 +020034DECLARE_POOL(pool_head_sockaddr, "sockaddr", sizeof(struct sockaddr_storage));
Geoff Simmons7185b782019-08-27 18:31:16 +020035DECLARE_POOL(pool_head_authority, "authority", PP2_AUTHORITY_MAX);
Willy Tarreau8ceae722018-11-26 11:58:30 +010036
Willy Tarreau13e14102016-12-22 20:25:26 +010037struct xprt_ops *registered_xprt[XPRT_ENTRIES] = { NULL, };
Willy Tarreauf2943dc2012-10-26 20:10:28 +020038
Christopher Faulet32f61c02018-04-10 14:33:41 +020039/* List head of all known muxes for PROTO */
40struct mux_proto_list mux_proto_list = {
41 .list = LIST_HEAD_INIT(mux_proto_list.list)
Willy Tarreau2386be62017-09-21 19:40:52 +020042};
43
Olivier Houchard477902b2020-01-22 18:08:48 +010044int conn_create_mux(struct connection *conn)
45{
Olivier Houchard477902b2020-01-22 18:08:48 +010046 if (conn_is_back(conn)) {
47 struct server *srv;
48 struct conn_stream *cs = conn->ctx;
49
50 if (conn->flags & CO_FL_ERROR)
51 goto fail;
Olivier Houcharda8a415d2020-01-23 13:15:14 +010052 /* Verify if the connection just established. */
53 if (unlikely(!(conn->flags & (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN | CO_FL_CONNECTED))))
54 conn->flags |= CO_FL_CONNECTED;
55
Olivier Houchard477902b2020-01-22 18:08:48 +010056 if (conn_install_mux_be(conn, conn->ctx, conn->owner) < 0)
57 goto fail;
58 srv = objt_server(conn->target);
59 if (srv && ((srv->proxy->options & PR_O_REUSE_MASK) != PR_O_REUSE_NEVR) &&
60 conn->mux->avail_streams(conn) > 0)
61 LIST_ADD(&srv->idle_conns[tid], &conn->list);
62 return 0;
63fail:
64 /* let the upper layer know the connection failed */
65 cs->data_cb->wake(cs);
66 return -1;
67 } else
68 return conn_complete_session(conn);
69
70}
71
Willy Tarreau59f98392012-07-06 14:13:49 +020072/* I/O callback for fd-based connections. It calls the read/write handlers
Willy Tarreau7a798e52016-04-14 11:13:20 +020073 * provided by the connection's sock_ops, which must be valid.
Willy Tarreau59f98392012-07-06 14:13:49 +020074 */
Willy Tarreau7a798e52016-04-14 11:13:20 +020075void conn_fd_handler(int fd)
Willy Tarreau59f98392012-07-06 14:13:49 +020076{
Willy Tarreau80184712012-07-06 14:54:49 +020077 struct connection *conn = fdtab[fd].owner;
Willy Tarreau9e272bf2012-10-03 21:04:48 +020078 unsigned int flags;
Olivier Houchardaf4021e2018-08-09 13:06:55 +020079 int io_available = 0;
Willy Tarreau59f98392012-07-06 14:13:49 +020080
Willy Tarreaud80cb4e2018-01-20 19:30:13 +010081 if (unlikely(!conn)) {
82 activity[tid].conn_dead++;
Willy Tarreau7a798e52016-04-14 11:13:20 +020083 return;
Willy Tarreaud80cb4e2018-01-20 19:30:13 +010084 }
Willy Tarreau59f98392012-07-06 14:13:49 +020085
Willy Tarreau916e12d2017-10-25 09:22:43 +020086 conn->flags |= CO_FL_WILL_UPDATE;
87
Willy Tarreau7d281492012-12-16 19:19:13 +010088 flags = conn->flags & ~CO_FL_ERROR; /* ensure to call the wake handler upon error */
Willy Tarreaud29a0662012-12-10 16:33:38 +010089
Willy Tarreaub2a7ab02019-12-27 10:54:22 +010090 if (unlikely(conn->flags & CO_FL_WAIT_L4_CONN) &&
91 ((fd_send_ready(fd) && fd_send_active(fd)) ||
92 (fd_recv_ready(fd) && fd_recv_active(fd)))) {
93 /* Still waiting for a connection to establish and nothing was
94 * attempted yet to probe the connection. this will clear the
95 * CO_FL_WAIT_L4_CONN flag on success.
96 */
97 if (!conn_fd_check(conn))
98 goto leave;
99 }
100
Willy Tarreau8081abe2019-11-28 18:08:49 +0100101 if (fd_send_ready(fd) && fd_send_active(fd)) {
Willy Tarreau3c0cc492017-03-19 07:54:28 +0100102 /* force reporting of activity by clearing the previous flags :
103 * we'll have at least ERROR or CONNECTED at the end of an I/O,
104 * both of which will be detected below.
Willy Tarreau9e272bf2012-10-03 21:04:48 +0200105 */
Willy Tarreau3c0cc492017-03-19 07:54:28 +0100106 flags = 0;
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100107 if (conn->subs && conn->subs->events & SUB_RETRY_SEND) {
108 tasklet_wakeup(conn->subs->tasklet);
109 conn->subs->events &= ~SUB_RETRY_SEND;
110 if (!conn->subs->events)
111 conn->subs = NULL;
Olivier Houchardfa8aa862018-10-10 18:25:41 +0200112 } else
113 io_available = 1;
Olivier Houchard53216e72018-10-10 15:46:36 +0200114 __conn_xprt_stop_send(conn);
Willy Tarreau9e272bf2012-10-03 21:04:48 +0200115 }
Willy Tarreau59f98392012-07-06 14:13:49 +0200116
Willy Tarreau57ec32f2017-04-11 19:59:33 +0200117 /* The data transfer starts here and stops on error and handshakes. Note
118 * that we must absolutely test conn->xprt at each step in case it suddenly
119 * changes due to a quick unexpected close().
120 */
Willy Tarreau8081abe2019-11-28 18:08:49 +0100121 if (fd_recv_ready(fd) && fd_recv_active(fd)) {
Willy Tarreau3c0cc492017-03-19 07:54:28 +0100122 /* force reporting of activity by clearing the previous flags :
123 * we'll have at least ERROR or CONNECTED at the end of an I/O,
124 * both of which will be detected below.
Willy Tarreau9e272bf2012-10-03 21:04:48 +0200125 */
Willy Tarreau3c0cc492017-03-19 07:54:28 +0100126 flags = 0;
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100127 if (conn->subs && conn->subs->events & SUB_RETRY_RECV) {
128 tasklet_wakeup(conn->subs->tasklet);
129 conn->subs->events &= ~SUB_RETRY_RECV;
130 if (!conn->subs->events)
131 conn->subs = NULL;
Olivier Houchardfa8aa862018-10-10 18:25:41 +0200132 } else
133 io_available = 1;
Olivier Houchard53216e72018-10-10 15:46:36 +0200134 __conn_xprt_stop_recv(conn);
Willy Tarreau9e272bf2012-10-03 21:04:48 +0200135 }
Willy Tarreau2da156f2012-07-23 15:07:23 +0200136
Willy Tarreau2c6be842012-07-06 17:12:34 +0200137 leave:
Willy Tarreaucbcf77e2019-12-27 14:49:19 +0100138 /* Verify if the connection just established. */
139 if (unlikely(!(conn->flags & (CO_FL_WAIT_L4_CONN | CO_FL_WAIT_L6_CONN | CO_FL_CONNECTED))))
140 conn->flags |= CO_FL_CONNECTED;
141
Olivier Houchard477902b2020-01-22 18:08:48 +0100142 /* If we don't yet have a mux, that means we were waiting for
143 * informations to create one, typically from the ALPN. If we're
144 * done with the handshake, attempt to create one.
Willy Tarreau8e3c6ce2017-08-28 15:46:01 +0200145 */
Olivier Houchard477902b2020-01-22 18:08:48 +0100146 if (unlikely(!conn->mux) && !(conn->flags & CO_FL_HANDSHAKE))
147 if (conn_create_mux(conn) < 0)
148 return;
Willy Tarreau8e3c6ce2017-08-28 15:46:01 +0200149
Willy Tarreau3c0cc492017-03-19 07:54:28 +0100150 /* The wake callback is normally used to notify the data layer about
151 * data layer activity (successful send/recv), connection establishment,
152 * shutdown and fatal errors. We need to consider the following
153 * situations to wake up the data layer :
Willy Tarreau0fbc3182019-12-27 14:57:45 +0100154 * - change among the CO_FL_NOTIFY_DONE flags :
155 * SOCK_{RD,WR}_SH, ERROR,
Willy Tarreau3c0cc492017-03-19 07:54:28 +0100156 * - absence of any of {L4,L6}_CONN and CONNECTED, indicating the
157 * end of handshake and transition to CONNECTED
158 * - raise of CONNECTED with HANDSHAKE down
159 * - end of HANDSHAKE with CONNECTED set
160 * - regular data layer activity
161 *
162 * Note that the wake callback is allowed to release the connection and
163 * the fd (and return < 0 in this case).
Willy Tarreau2396c1c2012-10-03 21:12:16 +0200164 */
Willy Tarreau0fbc3182019-12-27 14:57:45 +0100165 if ((io_available || (((conn->flags ^ flags) & CO_FL_NOTIFY_DONE) ||
Willy Tarreau3c0cc492017-03-19 07:54:28 +0100166 ((flags & (CO_FL_CONNECTED|CO_FL_HANDSHAKE)) != CO_FL_CONNECTED &&
Olivier Houchard910b2bc2018-07-17 18:49:38 +0200167 (conn->flags & (CO_FL_CONNECTED|CO_FL_HANDSHAKE)) == CO_FL_CONNECTED))) &&
Olivier Houchardfe50bfb2019-05-27 12:09:19 +0200168 conn->mux && conn->mux->wake && conn->mux->wake(conn) < 0)
Willy Tarreau7a798e52016-04-14 11:13:20 +0200169 return;
Willy Tarreaufd31e532012-07-23 18:24:25 +0200170
Willy Tarreauf9dabec2012-08-17 17:33:53 +0200171 /* commit polling changes */
Willy Tarreau916e12d2017-10-25 09:22:43 +0200172 conn->flags &= ~CO_FL_WILL_UPDATE;
Willy Tarreauf9dabec2012-08-17 17:33:53 +0200173 conn_cond_update_polling(conn);
Willy Tarreau7a798e52016-04-14 11:13:20 +0200174 return;
Willy Tarreau59f98392012-07-06 14:13:49 +0200175}
Willy Tarreaub5e2cbd2012-08-17 11:55:04 +0200176
Willy Tarreaue9dfa792012-09-01 17:26:16 +0200177/* Update polling on connection <c>'s file descriptor depending on its current
Willy Tarreau3381bf82020-01-17 17:39:35 +0100178 * state as reported in the connection's CO_FL_XPRT_* flags. The connection
179 * flags are updated with the new flags at the end of the operation. Polling
180 * is totally disabled if an error was reported.
Willy Tarreaub5e2cbd2012-08-17 11:55:04 +0200181 */
Olivier Houchard1a0545f2017-09-13 18:30:23 +0200182void conn_update_xprt_polling(struct connection *c)
Willy Tarreaub5e2cbd2012-08-17 11:55:04 +0200183{
Willy Tarreaue9dfa792012-09-01 17:26:16 +0200184 unsigned int f = c->flags;
Willy Tarreaub5e2cbd2012-08-17 11:55:04 +0200185
Willy Tarreau3c728722014-01-23 13:50:42 +0100186 if (!conn_ctrl_ready(c))
Willy Tarreauf79c8172013-10-21 16:30:56 +0200187 return;
188
Willy Tarreaub5e2cbd2012-08-17 11:55:04 +0200189 /* update read status if needed */
Willy Tarreau3381bf82020-01-17 17:39:35 +0100190 if (f & CO_FL_XPRT_RD_ENA)
Willy Tarreau585744b2017-08-24 14:31:19 +0200191 fd_want_recv(c->handle.fd);
Willy Tarreau3381bf82020-01-17 17:39:35 +0100192 else
Willy Tarreau585744b2017-08-24 14:31:19 +0200193 fd_stop_recv(c->handle.fd);
Willy Tarreaub5e2cbd2012-08-17 11:55:04 +0200194
195 /* update write status if needed */
Willy Tarreau3381bf82020-01-17 17:39:35 +0100196 if (f & CO_FL_XPRT_WR_ENA)
Willy Tarreau585744b2017-08-24 14:31:19 +0200197 fd_want_send(c->handle.fd);
Willy Tarreau3381bf82020-01-17 17:39:35 +0100198 else
Willy Tarreau585744b2017-08-24 14:31:19 +0200199 fd_stop_send(c->handle.fd);
Willy Tarreaue9dfa792012-09-01 17:26:16 +0200200}
201
Willy Tarreau4970e5a2019-12-27 10:40:21 +0100202/* This is the callback which is set when a connection establishment is pending
203 * and we have nothing to send. It may update the FD polling status to indicate
204 * !READY. It returns 0 if it fails in a fatal way or needs to poll to go
205 * further, otherwise it returns non-zero and removes the CO_FL_WAIT_L4_CONN
206 * flag from the connection's flags. In case of error, it sets CO_FL_ERROR and
207 * leaves the error code in errno.
208 */
209int conn_fd_check(struct connection *conn)
210{
211 struct sockaddr_storage *addr;
212 int fd = conn->handle.fd;
213
214 if (conn->flags & CO_FL_ERROR)
215 return 0;
216
217 if (!conn_ctrl_ready(conn))
218 return 0;
219
220 if (!(conn->flags & CO_FL_WAIT_L4_CONN))
221 return 1; /* strange we were called while ready */
222
223 if (!fd_send_ready(fd))
224 return 0;
225
226 /* Here we have 2 cases :
227 * - modern pollers, able to report ERR/HUP. If these ones return any
228 * of these flags then it's likely a failure, otherwise it possibly
229 * is a success (i.e. there may have been data received just before
230 * the error was reported).
231 * - select, which doesn't report these and with which it's always
232 * necessary either to try connect() again or to check for SO_ERROR.
233 * In order to simplify everything, we double-check using connect() as
234 * soon as we meet either of these delicate situations. Note that
235 * SO_ERROR would clear the error after reporting it!
236 */
237 if (cur_poller.flags & HAP_POLL_F_ERRHUP) {
238 /* modern poller, able to report ERR/HUP */
239 if ((fdtab[fd].ev & (FD_POLL_IN|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_IN)
240 goto done;
241 if ((fdtab[fd].ev & (FD_POLL_OUT|FD_POLL_ERR|FD_POLL_HUP)) == FD_POLL_OUT)
242 goto done;
243 if (!(fdtab[fd].ev & (FD_POLL_ERR|FD_POLL_HUP)))
244 goto wait;
245 /* error present, fall through common error check path */
246 }
247
248 /* Use connect() to check the state of the socket. This has the double
249 * advantage of *not* clearing the error (so that health checks can
250 * still use getsockopt(SO_ERROR)) and giving us the following info :
251 * - error
252 * - connecting (EALREADY, EINPROGRESS)
253 * - connected (EISCONN, 0)
254 */
255 addr = conn->dst;
256 if ((conn->flags & CO_FL_SOCKS4) && obj_type(conn->target) == OBJ_TYPE_SERVER)
257 addr = &objt_server(conn->target)->socks4_addr;
258
259 if (connect(fd, (const struct sockaddr *)addr, get_addr_len(addr)) == -1) {
260 if (errno == EALREADY || errno == EINPROGRESS)
261 goto wait;
262
263 if (errno && errno != EISCONN)
264 goto out_error;
265 }
266
267 done:
268 /* The FD is ready now, we'll mark the connection as complete and
269 * forward the event to the transport layer which will notify the
270 * data layer.
271 */
272 conn->flags &= ~CO_FL_WAIT_L4_CONN;
273 fd_may_send(fd);
274 fd_cond_recv(fd);
275 errno = 0; // make health checks happy
276 return 1;
277
278 out_error:
279 /* Write error on the file descriptor. Report it to the connection
280 * and disable polling on this FD.
281 */
282 fdtab[fd].linger_risk = 0;
283 conn->flags |= CO_FL_ERROR | CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
284 __conn_xprt_stop_both(conn);
285 return 0;
286
287 wait:
288 __conn_xprt_want_send(conn);
289 fd_cant_send(fd);
290 return 0;
291}
292
Willy Tarreauff3e6482015-03-12 23:56:52 +0100293/* Send a message over an established connection. It makes use of send() and
294 * returns the same return code and errno. If the socket layer is not ready yet
295 * then -1 is returned and ENOTSOCK is set into errno. If the fd is not marked
296 * as ready, or if EAGAIN or ENOTCONN is returned, then we return 0. It returns
297 * EMSGSIZE if called with a zero length message. The purpose is to simplify
298 * some rare attempts to directly write on the socket from above the connection
299 * (typically send_proxy). In case of EAGAIN, the fd is marked as "cant_send".
300 * It automatically retries on EINTR. Other errors cause the connection to be
301 * marked as in error state. It takes similar arguments as send() except the
302 * first one which is the connection instead of the file descriptor. Note,
303 * MSG_DONTWAIT and MSG_NOSIGNAL are forced on the flags.
304 */
305int conn_sock_send(struct connection *conn, const void *buf, int len, int flags)
306{
307 int ret;
308
309 ret = -1;
310 errno = ENOTSOCK;
311
312 if (conn->flags & CO_FL_SOCK_WR_SH)
313 goto fail;
314
315 if (!conn_ctrl_ready(conn))
316 goto fail;
317
318 errno = EMSGSIZE;
319 if (!len)
320 goto fail;
321
Willy Tarreau585744b2017-08-24 14:31:19 +0200322 if (!fd_send_ready(conn->handle.fd))
Willy Tarreauff3e6482015-03-12 23:56:52 +0100323 goto wait;
324
325 do {
Willy Tarreau585744b2017-08-24 14:31:19 +0200326 ret = send(conn->handle.fd, buf, len, flags | MSG_DONTWAIT | MSG_NOSIGNAL);
Willy Tarreauff3e6482015-03-12 23:56:52 +0100327 } while (ret < 0 && errno == EINTR);
328
329
Willy Tarreauccf3f6d2019-09-05 17:05:05 +0200330 if (ret > 0) {
331 if (conn->flags & CO_FL_WAIT_L4_CONN) {
332 conn->flags &= ~CO_FL_WAIT_L4_CONN;
333 fd_may_send(conn->handle.fd);
334 fd_cond_recv(conn->handle.fd);
335 }
Willy Tarreauff3e6482015-03-12 23:56:52 +0100336 return ret;
Willy Tarreauccf3f6d2019-09-05 17:05:05 +0200337 }
Willy Tarreauff3e6482015-03-12 23:56:52 +0100338
339 if (ret == 0 || errno == EAGAIN || errno == ENOTCONN) {
340 wait:
Willy Tarreau585744b2017-08-24 14:31:19 +0200341 fd_cant_send(conn->handle.fd);
Willy Tarreauff3e6482015-03-12 23:56:52 +0100342 return 0;
343 }
344 fail:
345 conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH | CO_FL_ERROR;
346 return ret;
347}
348
Willy Tarreauee1a6fc2020-01-17 07:52:13 +0100349/* Called from the upper layer, to subscribe <es> to events <event_type>. The
350 * event subscriber <es> is not allowed to change from a previous call as long
351 * as at least one event is still subscribed. The <event_type> must only be a
352 * combination of SUB_RETRY_RECV and SUB_RETRY_SEND. It always returns 0.
353 */
354int conn_unsubscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
Olivier Houchard83a0cd82018-09-28 17:57:58 +0200355{
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100356 BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
Willy Tarreauee1a6fc2020-01-17 07:52:13 +0100357 BUG_ON(conn->subs && conn->subs != es);
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100358
Willy Tarreauee1a6fc2020-01-17 07:52:13 +0100359 es->events &= ~event_type;
360 if (!es->events)
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100361 conn->subs = NULL;
362
363 if (event_type & SUB_RETRY_RECV)
Olivier Houchard53216e72018-10-10 15:46:36 +0200364 __conn_xprt_stop_recv(conn);
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100365
366 if (event_type & SUB_RETRY_SEND)
Olivier Houchard53216e72018-10-10 15:46:36 +0200367 __conn_xprt_stop_send(conn);
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100368
Olivier Houchard53216e72018-10-10 15:46:36 +0200369 conn_update_xprt_polling(conn);
Olivier Houchard83a0cd82018-09-28 17:57:58 +0200370 return 0;
371}
372
Willy Tarreauee1a6fc2020-01-17 07:52:13 +0100373/* Called from the upper layer, to unsubscribe <es> from events <event_type>
374 * (undo fcgi_subscribe). The <es> struct is not allowed to differ from the one
375 * passed to the subscribe() call. It always returns zero.
376 */
377int conn_subscribe(struct connection *conn, void *xprt_ctx, int event_type, struct wait_event *es)
Olivier Houchard6ff20392018-07-17 18:46:31 +0200378{
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100379 BUG_ON(event_type & ~(SUB_RETRY_SEND|SUB_RETRY_RECV));
380 BUG_ON(conn->subs && conn->subs->events & event_type);
Willy Tarreauee1a6fc2020-01-17 07:52:13 +0100381 BUG_ON(conn->subs && conn->subs != es);
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100382
Willy Tarreauee1a6fc2020-01-17 07:52:13 +0100383 conn->subs = es;
384 es->events |= event_type;
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100385
386 if (event_type & SUB_RETRY_RECV)
Olivier Houchard53216e72018-10-10 15:46:36 +0200387 __conn_xprt_want_recv(conn);
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100388
389 if (event_type & SUB_RETRY_SEND)
Olivier Houchard53216e72018-10-10 15:46:36 +0200390 __conn_xprt_want_send(conn);
Willy Tarreau7872d1f2020-01-10 07:06:05 +0100391
Olivier Houchard53216e72018-10-10 15:46:36 +0200392 conn_update_xprt_polling(conn);
Olivier Houchard83a0cd82018-09-28 17:57:58 +0200393 return 0;
Olivier Houchard6ff20392018-07-17 18:46:31 +0200394}
395
Willy Tarreaud85c4852015-03-13 00:40:28 +0100396/* Drains possibly pending incoming data on the file descriptor attached to the
397 * connection and update the connection's flags accordingly. This is used to
398 * know whether we need to disable lingering on close. Returns non-zero if it
399 * is safe to close without disabling lingering, otherwise zero. The SOCK_RD_SH
400 * flag may also be updated if the incoming shutdown was reported by the drain()
401 * function.
402 */
403int conn_sock_drain(struct connection *conn)
404{
Willy Tarreaue215bba2018-08-24 14:31:53 +0200405 int turns = 2;
406 int len;
407
Willy Tarreaud85c4852015-03-13 00:40:28 +0100408 if (!conn_ctrl_ready(conn))
409 return 1;
410
411 if (conn->flags & (CO_FL_ERROR | CO_FL_SOCK_RD_SH))
412 return 1;
413
Willy Tarreaue215bba2018-08-24 14:31:53 +0200414 if (fdtab[conn->handle.fd].ev & (FD_POLL_ERR|FD_POLL_HUP))
415 goto shut;
Willy Tarreaud85c4852015-03-13 00:40:28 +0100416
Willy Tarreaue215bba2018-08-24 14:31:53 +0200417 if (!fd_recv_ready(conn->handle.fd))
418 return 0;
Willy Tarreaud85c4852015-03-13 00:40:28 +0100419
Willy Tarreaue215bba2018-08-24 14:31:53 +0200420 if (conn->ctrl->drain) {
Willy Tarreau585744b2017-08-24 14:31:19 +0200421 if (conn->ctrl->drain(conn->handle.fd) <= 0)
Willy Tarreaud85c4852015-03-13 00:40:28 +0100422 return 0;
Willy Tarreaue215bba2018-08-24 14:31:53 +0200423 goto shut;
424 }
425
426 /* no drain function defined, use the generic one */
427
428 while (turns) {
429#ifdef MSG_TRUNC_CLEARS_INPUT
430 len = recv(conn->handle.fd, NULL, INT_MAX, MSG_DONTWAIT | MSG_NOSIGNAL | MSG_TRUNC);
431 if (len == -1 && errno == EFAULT)
432#endif
433 len = recv(conn->handle.fd, trash.area, trash.size,
434 MSG_DONTWAIT | MSG_NOSIGNAL);
435
436 if (len == 0)
437 goto shut;
438
439 if (len < 0) {
440 if (errno == EAGAIN) {
441 /* connection not closed yet */
442 fd_cant_recv(conn->handle.fd);
443 break;
444 }
445 if (errno == EINTR) /* oops, try again */
446 continue;
447 /* other errors indicate a dead connection, fine. */
448 goto shut;
449 }
450 /* OK we read some data, let's try again once */
451 turns--;
Willy Tarreaud85c4852015-03-13 00:40:28 +0100452 }
453
Willy Tarreaue215bba2018-08-24 14:31:53 +0200454 /* some data are still present, give up */
455 return 0;
456
457 shut:
458 /* we're certain the connection was shut down */
459 fdtab[conn->handle.fd].linger_risk = 0;
Willy Tarreaud85c4852015-03-13 00:40:28 +0100460 conn->flags |= CO_FL_SOCK_RD_SH;
461 return 1;
462}
463
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +0100464/*
465 * Get data length from tlv
466 */
467static int get_tlv_length(const struct tlv *src)
468{
469 return (src->length_hi << 8) | src->length_lo;
470}
471
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200472/* This handshake handler waits a PROXY protocol header at the beginning of the
473 * raw data stream. The header looks like this :
474 *
475 * "PROXY" <SP> PROTO <SP> SRC3 <SP> DST3 <SP> SRC4 <SP> <DST4> "\r\n"
476 *
477 * There must be exactly one space between each field. Fields are :
478 * - PROTO : layer 4 protocol, which must be "TCP4" or "TCP6".
479 * - SRC3 : layer 3 (eg: IP) source address in standard text form
480 * - DST3 : layer 3 (eg: IP) destination address in standard text form
481 * - SRC4 : layer 4 (eg: TCP port) source address in standard text form
482 * - DST4 : layer 4 (eg: TCP port) destination address in standard text form
483 *
484 * This line MUST be at the beginning of the buffer and MUST NOT wrap.
485 *
486 * The header line is small and in all cases smaller than the smallest normal
487 * TCP MSS. So it MUST always be delivered as one segment, which ensures we
488 * can safely use MSG_PEEK and avoid buffering.
489 *
490 * Once the data is fetched, the values are set in the connection's address
491 * fields, and data are removed from the socket's buffer. The function returns
492 * zero if it needs to wait for more data or if it fails, or 1 if it completed
493 * and removed itself.
494 */
495int conn_recv_proxy(struct connection *conn, int flag)
496{
497 char *line, *end;
Willy Tarreau77992672014-06-14 11:06:17 +0200498 struct proxy_hdr_v2 *hdr_v2;
499 const char v2sig[] = PP2_SIGNATURE;
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +0100500 int tlv_length = 0;
KOVACS Krisztian7209c202015-07-03 14:09:10 +0200501 int tlv_offset = 0;
Willy Tarreaub406b872018-08-22 05:20:32 +0200502 int ret;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200503
504 /* we might have been called just after an asynchronous shutr */
505 if (conn->flags & CO_FL_SOCK_RD_SH)
506 goto fail;
507
Willy Tarreau3c728722014-01-23 13:50:42 +0100508 if (!conn_ctrl_ready(conn))
Willy Tarreauf79c8172013-10-21 16:30:56 +0200509 goto fail;
510
Willy Tarreauca79f592019-07-17 19:04:47 +0200511 if (!sockaddr_alloc(&conn->src) || !sockaddr_alloc(&conn->dst))
512 goto fail;
513
Willy Tarreau585744b2017-08-24 14:31:19 +0200514 if (!fd_recv_ready(conn->handle.fd))
Willy Tarreau6499b9d2019-06-03 08:17:30 +0200515 goto not_ready;
Willy Tarreaufd803bb2014-01-20 15:13:07 +0100516
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200517 do {
Willy Tarreaub406b872018-08-22 05:20:32 +0200518 ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
519 if (ret < 0) {
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200520 if (errno == EINTR)
521 continue;
522 if (errno == EAGAIN) {
Willy Tarreau585744b2017-08-24 14:31:19 +0200523 fd_cant_recv(conn->handle.fd);
Willy Tarreau6499b9d2019-06-03 08:17:30 +0200524 goto not_ready;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200525 }
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100526 goto recv_abort;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200527 }
Willy Tarreaub406b872018-08-22 05:20:32 +0200528 trash.data = ret;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200529 } while (0);
530
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200531 if (!trash.data) {
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100532 /* client shutdown */
533 conn->err_code = CO_ER_PRX_EMPTY;
534 goto fail;
535 }
536
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200537 if (trash.data < 6)
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200538 goto missing;
539
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200540 line = trash.area;
541 end = trash.area + trash.data;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200542
543 /* Decode a possible proxy request, fail early if it does not match */
Willy Tarreau77992672014-06-14 11:06:17 +0200544 if (strncmp(line, "PROXY ", 6) != 0)
545 goto not_v1;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200546
547 line += 6;
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200548 if (trash.data < 9) /* shortest possible line */
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200549 goto missing;
550
David CARLIER42ff05e2016-03-24 09:22:36 +0000551 if (memcmp(line, "TCP4 ", 5) == 0) {
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200552 u32 src3, dst3, sport, dport;
553
554 line += 5;
555
556 src3 = inetaddr_host_lim_ret(line, end, &line);
557 if (line == end)
558 goto missing;
559 if (*line++ != ' ')
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100560 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200561
562 dst3 = inetaddr_host_lim_ret(line, end, &line);
563 if (line == end)
564 goto missing;
565 if (*line++ != ' ')
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100566 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200567
568 sport = read_uint((const char **)&line, end);
569 if (line == end)
570 goto missing;
571 if (*line++ != ' ')
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100572 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200573
574 dport = read_uint((const char **)&line, end);
575 if (line > end - 2)
576 goto missing;
577 if (*line++ != '\r')
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100578 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200579 if (*line++ != '\n')
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100580 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200581
582 /* update the session's addresses and mark them set */
Willy Tarreau226572f2019-07-17 14:46:00 +0200583 ((struct sockaddr_in *)conn->src)->sin_family = AF_INET;
584 ((struct sockaddr_in *)conn->src)->sin_addr.s_addr = htonl(src3);
585 ((struct sockaddr_in *)conn->src)->sin_port = htons(sport);
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200586
Willy Tarreau226572f2019-07-17 14:46:00 +0200587 ((struct sockaddr_in *)conn->dst)->sin_family = AF_INET;
588 ((struct sockaddr_in *)conn->dst)->sin_addr.s_addr = htonl(dst3);
589 ((struct sockaddr_in *)conn->dst)->sin_port = htons(dport);
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200590 conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
591 }
David CARLIER42ff05e2016-03-24 09:22:36 +0000592 else if (memcmp(line, "TCP6 ", 5) == 0) {
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200593 u32 sport, dport;
594 char *src_s;
595 char *dst_s, *sport_s, *dport_s;
596 struct in6_addr src3, dst3;
597
598 line += 5;
599
600 src_s = line;
601 dst_s = sport_s = dport_s = NULL;
602 while (1) {
603 if (line > end - 2) {
604 goto missing;
605 }
606 else if (*line == '\r') {
607 *line = 0;
608 line++;
609 if (*line++ != '\n')
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100610 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200611 break;
612 }
613
614 if (*line == ' ') {
615 *line = 0;
616 if (!dst_s)
617 dst_s = line + 1;
618 else if (!sport_s)
619 sport_s = line + 1;
620 else if (!dport_s)
621 dport_s = line + 1;
622 }
623 line++;
624 }
625
626 if (!dst_s || !sport_s || !dport_s)
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100627 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200628
629 sport = read_uint((const char **)&sport_s,dport_s - 1);
630 if (*sport_s != 0)
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100631 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200632
633 dport = read_uint((const char **)&dport_s,line - 2);
634 if (*dport_s != 0)
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100635 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200636
637 if (inet_pton(AF_INET6, src_s, (void *)&src3) != 1)
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100638 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200639
640 if (inet_pton(AF_INET6, dst_s, (void *)&dst3) != 1)
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100641 goto bad_header;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200642
643 /* update the session's addresses and mark them set */
Willy Tarreau226572f2019-07-17 14:46:00 +0200644 ((struct sockaddr_in6 *)conn->src)->sin6_family = AF_INET6;
645 memcpy(&((struct sockaddr_in6 *)conn->src)->sin6_addr, &src3, sizeof(struct in6_addr));
646 ((struct sockaddr_in6 *)conn->src)->sin6_port = htons(sport);
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200647
Willy Tarreau226572f2019-07-17 14:46:00 +0200648 ((struct sockaddr_in6 *)conn->dst)->sin6_family = AF_INET6;
649 memcpy(&((struct sockaddr_in6 *)conn->dst)->sin6_addr, &dst3, sizeof(struct in6_addr));
650 ((struct sockaddr_in6 *)conn->dst)->sin6_port = htons(dport);
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200651 conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
652 }
Willy Tarreau4c20d292014-06-14 11:41:36 +0200653 else if (memcmp(line, "UNKNOWN\r\n", 9) == 0) {
654 /* This can be a UNIX socket forwarded by an haproxy upstream */
655 line += 9;
656 }
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200657 else {
Willy Tarreau4c20d292014-06-14 11:41:36 +0200658 /* The protocol does not match something known (TCP4/TCP6/UNKNOWN) */
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100659 conn->err_code = CO_ER_PRX_BAD_PROTO;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200660 goto fail;
661 }
662
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200663 trash.data = line - trash.area;
Willy Tarreau77992672014-06-14 11:06:17 +0200664 goto eat_header;
665
666 not_v1:
667 /* try PPv2 */
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200668 if (trash.data < PP2_HEADER_LEN)
Willy Tarreau77992672014-06-14 11:06:17 +0200669 goto missing;
670
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200671 hdr_v2 = (struct proxy_hdr_v2 *) trash.area;
Willy Tarreau77992672014-06-14 11:06:17 +0200672
673 if (memcmp(hdr_v2->sig, v2sig, PP2_SIGNATURE_LEN) != 0 ||
674 (hdr_v2->ver_cmd & PP2_VERSION_MASK) != PP2_VERSION) {
675 conn->err_code = CO_ER_PRX_NOT_HDR;
676 goto fail;
677 }
678
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200679 if (trash.data < PP2_HEADER_LEN + ntohs(hdr_v2->len))
Willy Tarreau77992672014-06-14 11:06:17 +0200680 goto missing;
681
682 switch (hdr_v2->ver_cmd & PP2_CMD_MASK) {
683 case 0x01: /* PROXY command */
684 switch (hdr_v2->fam) {
685 case 0x11: /* TCPv4 */
KOVACS Krisztianefd3aa92014-11-19 10:53:20 +0100686 if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET)
687 goto bad_header;
688
Willy Tarreau226572f2019-07-17 14:46:00 +0200689 ((struct sockaddr_in *)conn->src)->sin_family = AF_INET;
690 ((struct sockaddr_in *)conn->src)->sin_addr.s_addr = hdr_v2->addr.ip4.src_addr;
691 ((struct sockaddr_in *)conn->src)->sin_port = hdr_v2->addr.ip4.src_port;
692 ((struct sockaddr_in *)conn->dst)->sin_family = AF_INET;
693 ((struct sockaddr_in *)conn->dst)->sin_addr.s_addr = hdr_v2->addr.ip4.dst_addr;
694 ((struct sockaddr_in *)conn->dst)->sin_port = hdr_v2->addr.ip4.dst_port;
Willy Tarreau77992672014-06-14 11:06:17 +0200695 conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
KOVACS Krisztian7209c202015-07-03 14:09:10 +0200696 tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET;
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +0100697 tlv_length = ntohs(hdr_v2->len) - PP2_ADDR_LEN_INET;
Willy Tarreau77992672014-06-14 11:06:17 +0200698 break;
699 case 0x21: /* TCPv6 */
KOVACS Krisztianefd3aa92014-11-19 10:53:20 +0100700 if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET6)
701 goto bad_header;
702
Willy Tarreau226572f2019-07-17 14:46:00 +0200703 ((struct sockaddr_in6 *)conn->src)->sin6_family = AF_INET6;
704 memcpy(&((struct sockaddr_in6 *)conn->src)->sin6_addr, hdr_v2->addr.ip6.src_addr, 16);
705 ((struct sockaddr_in6 *)conn->src)->sin6_port = hdr_v2->addr.ip6.src_port;
706 ((struct sockaddr_in6 *)conn->dst)->sin6_family = AF_INET6;
707 memcpy(&((struct sockaddr_in6 *)conn->dst)->sin6_addr, hdr_v2->addr.ip6.dst_addr, 16);
708 ((struct sockaddr_in6 *)conn->dst)->sin6_port = hdr_v2->addr.ip6.dst_port;
Willy Tarreau77992672014-06-14 11:06:17 +0200709 conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
KOVACS Krisztian7209c202015-07-03 14:09:10 +0200710 tlv_offset = PP2_HEADER_LEN + PP2_ADDR_LEN_INET6;
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +0100711 tlv_length = ntohs(hdr_v2->len) - PP2_ADDR_LEN_INET6;
Willy Tarreau77992672014-06-14 11:06:17 +0200712 break;
713 }
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +0100714
715 /* TLV parsing */
716 if (tlv_length > 0) {
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200717 while (tlv_offset + TLV_HEADER_SIZE <= trash.data) {
718 const struct tlv *tlv_packet = (struct tlv *) &trash.area[tlv_offset];
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +0100719 const int tlv_len = get_tlv_length(tlv_packet);
720 tlv_offset += tlv_len + TLV_HEADER_SIZE;
721
722 switch (tlv_packet->type) {
Emmanuel Hocdet115df3e2018-02-05 16:23:23 +0100723 case PP2_TYPE_CRC32C: {
724 void *tlv_crc32c_p = (void *)tlv_packet->value;
725 uint32_t n_crc32c = ntohl(read_u32(tlv_crc32c_p));
726 write_u32(tlv_crc32c_p, 0);
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200727 if (hash_crc32c(trash.area, PP2_HEADER_LEN + ntohs(hdr_v2->len)) != n_crc32c)
Emmanuel Hocdet115df3e2018-02-05 16:23:23 +0100728 goto bad_header;
729 break;
730 }
Willy Tarreaue5733232019-05-22 19:24:06 +0200731#ifdef USE_NS
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +0100732 case PP2_TYPE_NETNS: {
733 const struct netns_entry *ns;
734 ns = netns_store_lookup((char*)tlv_packet->value, tlv_len);
735 if (ns)
736 conn->proxy_netns = ns;
737 break;
738 }
739#endif
Geoff Simmons7185b782019-08-27 18:31:16 +0200740 case PP2_TYPE_AUTHORITY: {
741 if (tlv_len > PP2_AUTHORITY_MAX)
742 goto bad_header;
743 conn->proxy_authority = pool_alloc(pool_head_authority);
744 if (conn->proxy_authority == NULL)
745 goto fail;
746 memcpy(conn->proxy_authority, (const char *)tlv_packet->value, tlv_len);
747 conn->proxy_authority_len = tlv_len;
748 break;
749 }
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +0100750 default:
751 break;
752 }
753 }
754 }
755
Willy Tarreau77992672014-06-14 11:06:17 +0200756 /* unsupported protocol, keep local connection address */
757 break;
758 case 0x00: /* LOCAL command */
759 /* keep local connection address for LOCAL */
760 break;
761 default:
762 goto bad_header; /* not a supported command */
763 }
764
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200765 trash.data = PP2_HEADER_LEN + ntohs(hdr_v2->len);
Willy Tarreau77992672014-06-14 11:06:17 +0200766 goto eat_header;
767
768 eat_header:
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200769 /* remove the PROXY line from the request. For this we re-read the
770 * exact line at once. If we don't get the exact same result, we
771 * fail.
772 */
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200773 do {
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200774 int len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200775 if (len2 < 0 && errno == EINTR)
776 continue;
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200777 if (len2 != trash.data)
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100778 goto recv_abort;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200779 } while (0);
780
781 conn->flags &= ~flag;
Emeric Brun4f603012017-01-05 15:11:44 +0100782 conn->flags |= CO_FL_RCVD_PROXY;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200783 return 1;
784
Willy Tarreau6499b9d2019-06-03 08:17:30 +0200785 not_ready:
Willy Tarreau6499b9d2019-06-03 08:17:30 +0200786 return 0;
787
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200788 missing:
789 /* Missing data. Since we're using MSG_PEEK, we can only poll again if
790 * we have not read anything. Otherwise we need to fail because we won't
791 * be able to poll anymore.
792 */
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100793 conn->err_code = CO_ER_PRX_TRUNCATED;
794 goto fail;
795
796 bad_header:
797 /* This is not a valid proxy protocol header */
798 conn->err_code = CO_ER_PRX_BAD_HDR;
799 goto fail;
800
801 recv_abort:
802 conn->err_code = CO_ER_PRX_ABORT;
Willy Tarreau26f4a042013-12-04 23:44:10 +0100803 conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
Willy Tarreau8e3bf692012-12-03 15:41:18 +0100804 goto fail;
805
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200806 fail:
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200807 conn->flags |= CO_FL_ERROR;
Willy Tarreaue1e4a612012-10-05 00:10:55 +0200808 return 0;
809}
810
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100811/* This handshake handler waits a NetScaler Client IP insertion header
Bertrand Jacquin72fa1ec2017-12-12 01:17:23 +0000812 * at the beginning of the raw data stream. The header format is
813 * described in doc/netscaler-client-ip-insertion-protocol.txt
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100814 *
815 * This line MUST be at the beginning of the buffer and MUST NOT be
816 * fragmented.
817 *
818 * The header line is small and in all cases smaller than the smallest normal
819 * TCP MSS. So it MUST always be delivered as one segment, which ensures we
820 * can safely use MSG_PEEK and avoid buffering.
821 *
822 * Once the data is fetched, the values are set in the connection's address
823 * fields, and data are removed from the socket's buffer. The function returns
824 * zero if it needs to wait for more data or if it fails, or 1 if it completed
825 * and removed itself.
826 */
827int conn_recv_netscaler_cip(struct connection *conn, int flag)
828{
829 char *line;
Bertrand Jacquin7d668f92017-12-13 01:23:39 +0000830 uint32_t hdr_len;
Willy Tarreau0ca24aa2019-03-29 17:35:32 +0100831 uint8_t ip_ver;
Willy Tarreaub406b872018-08-22 05:20:32 +0200832 int ret;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100833
834 /* we might have been called just after an asynchronous shutr */
835 if (conn->flags & CO_FL_SOCK_RD_SH)
836 goto fail;
837
838 if (!conn_ctrl_ready(conn))
839 goto fail;
840
Olivier Houchard1a9dbe52020-01-22 15:31:09 +0100841 if (!sockaddr_alloc(&conn->src) || !sockaddr_alloc(&conn->dst))
842 goto fail;
843
Willy Tarreau585744b2017-08-24 14:31:19 +0200844 if (!fd_recv_ready(conn->handle.fd))
Willy Tarreau6499b9d2019-06-03 08:17:30 +0200845 goto not_ready;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100846
847 do {
Willy Tarreaub406b872018-08-22 05:20:32 +0200848 ret = recv(conn->handle.fd, trash.area, trash.size, MSG_PEEK);
849 if (ret < 0) {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100850 if (errno == EINTR)
851 continue;
852 if (errno == EAGAIN) {
Willy Tarreau585744b2017-08-24 14:31:19 +0200853 fd_cant_recv(conn->handle.fd);
Willy Tarreau6499b9d2019-06-03 08:17:30 +0200854 goto not_ready;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100855 }
856 goto recv_abort;
857 }
Willy Tarreaub406b872018-08-22 05:20:32 +0200858 trash.data = ret;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100859 } while (0);
860
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200861 if (!trash.data) {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100862 /* client shutdown */
863 conn->err_code = CO_ER_CIP_EMPTY;
864 goto fail;
865 }
866
867 /* Fail if buffer length is not large enough to contain
Bertrand Jacquin72fa1ec2017-12-12 01:17:23 +0000868 * CIP magic, header length or
869 * CIP magic, CIP length, CIP type, header length */
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200870 if (trash.data < 12)
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100871 goto missing;
872
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200873 line = trash.area;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100874
875 /* Decode a possible NetScaler Client IP request, fail early if
876 * it does not match */
Willy Tarreau55e0da62018-09-20 11:26:52 +0200877 if (ntohl(*(uint32_t *)line) != __objt_listener(conn->target)->bind_conf->ns_cip_magic)
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100878 goto bad_magic;
879
Bertrand Jacquin72fa1ec2017-12-12 01:17:23 +0000880 /* Legacy CIP protocol */
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200881 if ((trash.area[8] & 0xD0) == 0x40) {
Bertrand Jacquin72fa1ec2017-12-12 01:17:23 +0000882 hdr_len = ntohl(*(uint32_t *)(line+4));
883 line += 8;
884 }
885 /* Standard CIP protocol */
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200886 else if (trash.area[8] == 0x00) {
Bertrand Jacquin72fa1ec2017-12-12 01:17:23 +0000887 hdr_len = ntohs(*(uint32_t *)(line+10));
888 line += 12;
889 }
890 /* Unknown CIP protocol */
891 else {
892 conn->err_code = CO_ER_CIP_BAD_PROTO;
893 goto fail;
894 }
895
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100896 /* Fail if buffer length is not large enough to contain
Bertrand Jacquin72fa1ec2017-12-12 01:17:23 +0000897 * a minimal IP header */
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200898 if (trash.data < 20)
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100899 goto missing;
900
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100901 /* Get IP version from the first four bits */
Willy Tarreau0ca24aa2019-03-29 17:35:32 +0100902 ip_ver = (*line & 0xf0) >> 4;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100903
Willy Tarreau0ca24aa2019-03-29 17:35:32 +0100904 if (ip_ver == 4) {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100905 struct ip *hdr_ip4;
David Carlier3015a2e2016-07-04 22:51:33 +0100906 struct my_tcphdr *hdr_tcp;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100907
908 hdr_ip4 = (struct ip *)line;
909
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200910 if (trash.data < 40 || trash.data < hdr_len) {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100911 /* Fail if buffer length is not large enough to contain
Bertrand Jacquin67de5a22017-12-13 01:15:05 +0000912 * IPv4 header, TCP header */
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100913 goto missing;
Bertrand Jacquinb3875912017-12-13 00:58:51 +0000914 }
915 else if (hdr_ip4->ip_p != IPPROTO_TCP) {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100916 /* The protocol does not include a TCP header */
917 conn->err_code = CO_ER_CIP_BAD_PROTO;
918 goto fail;
Bertrand Jacquinb3875912017-12-13 00:58:51 +0000919 }
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100920
David Carlier3015a2e2016-07-04 22:51:33 +0100921 hdr_tcp = (struct my_tcphdr *)(line + (hdr_ip4->ip_hl * 4));
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100922
923 /* update the session's addresses and mark them set */
Willy Tarreau226572f2019-07-17 14:46:00 +0200924 ((struct sockaddr_in *)conn->src)->sin_family = AF_INET;
925 ((struct sockaddr_in *)conn->src)->sin_addr.s_addr = hdr_ip4->ip_src.s_addr;
926 ((struct sockaddr_in *)conn->src)->sin_port = hdr_tcp->source;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100927
Willy Tarreau226572f2019-07-17 14:46:00 +0200928 ((struct sockaddr_in *)conn->dst)->sin_family = AF_INET;
929 ((struct sockaddr_in *)conn->dst)->sin_addr.s_addr = hdr_ip4->ip_dst.s_addr;
930 ((struct sockaddr_in *)conn->dst)->sin_port = hdr_tcp->dest;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100931
932 conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
933 }
Willy Tarreau0ca24aa2019-03-29 17:35:32 +0100934 else if (ip_ver == 6) {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100935 struct ip6_hdr *hdr_ip6;
David Carlier3015a2e2016-07-04 22:51:33 +0100936 struct my_tcphdr *hdr_tcp;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100937
938 hdr_ip6 = (struct ip6_hdr *)line;
939
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200940 if (trash.data < 60 || trash.data < hdr_len) {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100941 /* Fail if buffer length is not large enough to contain
Bertrand Jacquin67de5a22017-12-13 01:15:05 +0000942 * IPv6 header, TCP header */
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100943 goto missing;
Bertrand Jacquinb3875912017-12-13 00:58:51 +0000944 }
945 else if (hdr_ip6->ip6_nxt != IPPROTO_TCP) {
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100946 /* The protocol does not include a TCP header */
947 conn->err_code = CO_ER_CIP_BAD_PROTO;
948 goto fail;
Bertrand Jacquinb3875912017-12-13 00:58:51 +0000949 }
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100950
David Carlier3015a2e2016-07-04 22:51:33 +0100951 hdr_tcp = (struct my_tcphdr *)(line + sizeof(struct ip6_hdr));
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100952
953 /* update the session's addresses and mark them set */
Willy Tarreau226572f2019-07-17 14:46:00 +0200954 ((struct sockaddr_in6 *)conn->src)->sin6_family = AF_INET6;
955 ((struct sockaddr_in6 *)conn->src)->sin6_addr = hdr_ip6->ip6_src;
956 ((struct sockaddr_in6 *)conn->src)->sin6_port = hdr_tcp->source;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100957
Willy Tarreau226572f2019-07-17 14:46:00 +0200958 ((struct sockaddr_in6 *)conn->dst)->sin6_family = AF_INET6;
959 ((struct sockaddr_in6 *)conn->dst)->sin6_addr = hdr_ip6->ip6_dst;
960 ((struct sockaddr_in6 *)conn->dst)->sin6_port = hdr_tcp->dest;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100961
962 conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
963 }
964 else {
965 /* The protocol does not match something known (IPv4/IPv6) */
966 conn->err_code = CO_ER_CIP_BAD_PROTO;
967 goto fail;
968 }
969
Bertrand Jacquin7d668f92017-12-13 01:23:39 +0000970 line += hdr_len;
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200971 trash.data = line - trash.area;
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100972
973 /* remove the NetScaler Client IP header from the request. For this
974 * we re-read the exact line at once. If we don't get the exact same
975 * result, we fail.
976 */
977 do {
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200978 int len2 = recv(conn->handle.fd, trash.area, trash.data, 0);
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100979 if (len2 < 0 && errno == EINTR)
980 continue;
Willy Tarreau843b7cb2018-07-13 10:54:26 +0200981 if (len2 != trash.data)
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100982 goto recv_abort;
983 } while (0);
984
985 conn->flags &= ~flag;
986 return 1;
987
Willy Tarreau6499b9d2019-06-03 08:17:30 +0200988 not_ready:
Willy Tarreau6499b9d2019-06-03 08:17:30 +0200989 return 0;
990
Bertrand Jacquin93b227d2016-06-04 15:11:10 +0100991 missing:
992 /* Missing data. Since we're using MSG_PEEK, we can only poll again if
993 * we have not read anything. Otherwise we need to fail because we won't
994 * be able to poll anymore.
995 */
996 conn->err_code = CO_ER_CIP_TRUNCATED;
997 goto fail;
998
999 bad_magic:
1000 conn->err_code = CO_ER_CIP_BAD_MAGIC;
1001 goto fail;
1002
1003 recv_abort:
1004 conn->err_code = CO_ER_CIP_ABORT;
1005 conn->flags |= CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH;
1006 goto fail;
1007
1008 fail:
Bertrand Jacquin93b227d2016-06-04 15:11:10 +01001009 conn->flags |= CO_FL_ERROR;
1010 return 0;
1011}
1012
Alexander Liu2a54bb72019-05-22 19:44:48 +08001013
1014int conn_send_socks4_proxy_request(struct connection *conn)
1015{
1016 struct socks4_request req_line;
1017
1018 /* we might have been called just after an asynchronous shutw */
1019 if (conn->flags & CO_FL_SOCK_WR_SH)
1020 goto out_error;
1021
1022 if (!conn_ctrl_ready(conn))
1023 goto out_error;
1024
Willy Tarreau226572f2019-07-17 14:46:00 +02001025 if (!conn_get_dst(conn))
1026 goto out_error;
1027
Alexander Liu2a54bb72019-05-22 19:44:48 +08001028 req_line.version = 0x04;
1029 req_line.command = 0x01;
Willy Tarreau226572f2019-07-17 14:46:00 +02001030 req_line.port = get_net_port(conn->dst);
1031 req_line.ip = is_inet_addr(conn->dst);
Alexander Liu2a54bb72019-05-22 19:44:48 +08001032 memcpy(req_line.user_id, "HAProxy\0", 8);
1033
1034 if (conn->send_proxy_ofs > 0) {
1035 /*
1036 * This is the first call to send the request
1037 */
1038 conn->send_proxy_ofs = -(int)sizeof(req_line);
1039 }
1040
1041 if (conn->send_proxy_ofs < 0) {
1042 int ret = 0;
1043
1044 /* we are sending the socks4_req_line here. If the data layer
1045 * has a pending write, we'll also set MSG_MORE.
1046 */
1047 ret = conn_sock_send(
1048 conn,
1049 ((char *)(&req_line)) + (sizeof(req_line)+conn->send_proxy_ofs),
1050 -conn->send_proxy_ofs,
1051 (conn->flags & CO_FL_XPRT_WR_ENA) ? MSG_MORE : 0);
1052
1053 DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Before send remain is [%d], sent [%d]\n",
1054 conn->handle.fd, -conn->send_proxy_ofs, ret);
1055
1056 if (ret < 0) {
1057 goto out_error;
1058 }
1059
1060 conn->send_proxy_ofs += ret; /* becomes zero once complete */
1061 if (conn->send_proxy_ofs != 0) {
1062 goto out_wait;
1063 }
1064 }
1065
1066 /* OK we've the whole request sent */
1067 conn->flags &= ~CO_FL_SOCKS4_SEND;
Alexander Liu2a54bb72019-05-22 19:44:48 +08001068
1069 /* The connection is ready now, simply return and let the connection
1070 * handler notify upper layers if needed.
1071 */
1072 if (conn->flags & CO_FL_WAIT_L4_CONN)
1073 conn->flags &= ~CO_FL_WAIT_L4_CONN;
1074
1075 if (conn->flags & CO_FL_SEND_PROXY) {
1076 /*
1077 * Get the send_proxy_ofs ready for the send_proxy due to we are
1078 * reusing the "send_proxy_ofs", and SOCKS4 handshake should be done
1079 * before sending PROXY Protocol.
1080 */
1081 conn->send_proxy_ofs = 1;
1082 }
1083 return 1;
1084
1085 out_error:
1086 /* Write error on the file descriptor */
1087 conn->flags |= CO_FL_ERROR;
1088 if (conn->err_code == CO_ER_NONE) {
1089 conn->err_code = CO_ER_SOCKS4_SEND;
1090 }
1091 return 0;
1092
1093 out_wait:
Alexander Liu2a54bb72019-05-22 19:44:48 +08001094 return 0;
1095}
1096
1097int conn_recv_socks4_proxy_response(struct connection *conn)
1098{
1099 char line[SOCKS4_HS_RSP_LEN];
1100 int ret;
1101
1102 /* we might have been called just after an asynchronous shutr */
1103 if (conn->flags & CO_FL_SOCK_RD_SH)
1104 goto fail;
1105
1106 if (!conn_ctrl_ready(conn))
1107 goto fail;
1108
1109 if (!fd_recv_ready(conn->handle.fd))
Willy Tarreau6499b9d2019-06-03 08:17:30 +02001110 goto not_ready;
Alexander Liu2a54bb72019-05-22 19:44:48 +08001111
1112 do {
1113 /* SOCKS4 Proxy will response with 8 bytes, 0x00 | 0x5A | 0x00 0x00 | 0x00 0x00 0x00 0x00
1114 * Try to peek into it, before all 8 bytes ready.
1115 */
1116 ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, MSG_PEEK);
1117
1118 if (ret == 0) {
1119 /* the socket has been closed or shutdown for send */
1120 DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d], looks like the socket has been closed or shutdown for send\n",
1121 conn->handle.fd, ret, errno);
1122 if (conn->err_code == CO_ER_NONE) {
1123 conn->err_code = CO_ER_SOCKS4_RECV;
1124 }
1125 goto fail;
1126 }
1127
1128 if (ret > 0) {
1129 if (ret == SOCKS4_HS_RSP_LEN) {
1130 DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received 8 bytes, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
1131 conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
1132 }else{
1133 DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], first byte is [%02X], last bye is [%02X]\n", conn->handle.fd, ret, line[0], line[ret-1]);
1134 }
1135 } else {
1136 DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: Received ret[%d], errno[%d]\n", conn->handle.fd, ret, errno);
1137 }
1138
1139 if (ret < 0) {
1140 if (errno == EINTR) {
1141 continue;
1142 }
1143 if (errno == EAGAIN) {
1144 fd_cant_recv(conn->handle.fd);
Willy Tarreau6499b9d2019-06-03 08:17:30 +02001145 goto not_ready;
Alexander Liu2a54bb72019-05-22 19:44:48 +08001146 }
1147 goto recv_abort;
1148 }
1149 } while (0);
1150
1151 if (ret < SOCKS4_HS_RSP_LEN) {
1152 /* Missing data. Since we're using MSG_PEEK, we can only poll again if
1153 * we are not able to read enough data.
1154 */
Willy Tarreau6499b9d2019-06-03 08:17:30 +02001155 goto not_ready;
Alexander Liu2a54bb72019-05-22 19:44:48 +08001156 }
1157
1158 /*
1159 * Base on the SOCSK4 protocol:
1160 *
1161 * +----+----+----+----+----+----+----+----+
1162 * | VN | CD | DSTPORT | DSTIP |
1163 * +----+----+----+----+----+----+----+----+
1164 * # of bytes: 1 1 2 4
1165 * VN is the version of the reply code and should be 0. CD is the result
1166 * code with one of the following values:
1167 * 90: request granted
1168 * 91: request rejected or failed
1169 * 92: request rejected becasue SOCKS server cannot connect to identd on the client
1170 * 93: request rejected because the client program and identd report different user-ids
1171 * The remaining fields are ignored.
1172 */
1173 if (line[1] != 90) {
1174 conn->flags &= ~CO_FL_SOCKS4_RECV;
1175
1176 DPRINTF(stderr, "SOCKS PROXY HS FD[%04X]: FAIL, the response is [%02X|%02X|%02X %02X|%02X %02X %02X %02X]\n",
1177 conn->handle.fd, line[0], line[1], line[2], line[3], line[4], line[5], line[6], line[7]);
1178 if (conn->err_code == CO_ER_NONE) {
1179 conn->err_code = CO_ER_SOCKS4_DENY;
1180 }
1181 goto fail;
1182 }
1183
1184 /* remove the 8 bytes response from the stream */
1185 do {
1186 ret = recv(conn->handle.fd, line, SOCKS4_HS_RSP_LEN, 0);
1187 if (ret < 0 && errno == EINTR) {
1188 continue;
1189 }
1190 if (ret != SOCKS4_HS_RSP_LEN) {
1191 if (conn->err_code == CO_ER_NONE) {
1192 conn->err_code = CO_ER_SOCKS4_RECV;
1193 }
1194 goto fail;
1195 }
1196 } while (0);
1197
1198 conn->flags &= ~CO_FL_SOCKS4_RECV;
1199 return 1;
1200
Willy Tarreau6499b9d2019-06-03 08:17:30 +02001201 not_ready:
Willy Tarreau6499b9d2019-06-03 08:17:30 +02001202 return 0;
1203
Alexander Liu2a54bb72019-05-22 19:44:48 +08001204 recv_abort:
1205 if (conn->err_code == CO_ER_NONE) {
1206 conn->err_code = CO_ER_SOCKS4_ABORT;
1207 }
1208 conn->flags |= (CO_FL_SOCK_RD_SH | CO_FL_SOCK_WR_SH);
1209 goto fail;
1210
1211 fail:
Alexander Liu2a54bb72019-05-22 19:44:48 +08001212 conn->flags |= CO_FL_ERROR;
1213 return 0;
1214}
1215
Ilya Shipitsinca56fce2018-09-15 00:50:05 +05001216/* Note: <remote> is explicitly allowed to be NULL */
David Safb76832014-05-08 23:42:08 -04001217int make_proxy_line(char *buf, int buf_len, struct server *srv, struct connection *remote)
1218{
1219 int ret = 0;
1220
1221 if (srv && (srv->pp_opts & SRV_PP_V2)) {
1222 ret = make_proxy_line_v2(buf, buf_len, srv, remote);
1223 }
1224 else {
Willy Tarreau226572f2019-07-17 14:46:00 +02001225 if (remote && conn_get_src(remote) && conn_get_dst(remote))
1226 ret = make_proxy_line_v1(buf, buf_len, remote->src, remote->dst);
David Safb76832014-05-08 23:42:08 -04001227 else
1228 ret = make_proxy_line_v1(buf, buf_len, NULL, NULL);
1229 }
1230
1231 return ret;
1232}
1233
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001234/* Makes a PROXY protocol line from the two addresses. The output is sent to
1235 * buffer <buf> for a maximum size of <buf_len> (including the trailing zero).
1236 * It returns the number of bytes composing this line (including the trailing
1237 * LF), or zero in case of failure (eg: not enough space). It supports TCP4,
Willy Tarreau2e1401a2013-10-01 11:41:55 +02001238 * TCP6 and "UNKNOWN" formats. If any of <src> or <dst> is null, UNKNOWN is
1239 * emitted as well.
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001240 */
David Safb76832014-05-08 23:42:08 -04001241int make_proxy_line_v1(char *buf, int buf_len, struct sockaddr_storage *src, struct sockaddr_storage *dst)
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001242{
1243 int ret = 0;
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001244 char * protocol;
1245 char src_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
1246 char dst_str[MAX(INET_ADDRSTRLEN, INET6_ADDRSTRLEN)];
1247 in_port_t src_port;
1248 in_port_t dst_port;
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001249
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001250 if ( !src
1251 || !dst
1252 || (src->ss_family != AF_INET && src->ss_family != AF_INET6)
1253 || (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
1254 /* unknown family combination */
1255 ret = snprintf(buf, buf_len, "PROXY UNKNOWN\r\n");
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001256 if (ret >= buf_len)
1257 return 0;
1258
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001259 return ret;
1260 }
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001261
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001262 /* IPv4 for both src and dst */
1263 if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
1264 protocol = "TCP4";
1265 if (!inet_ntop(AF_INET, &((struct sockaddr_in *)src)->sin_addr, src_str, sizeof(src_str)))
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001266 return 0;
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001267 src_port = ((struct sockaddr_in *)src)->sin_port;
1268 if (!inet_ntop(AF_INET, &((struct sockaddr_in *)dst)->sin_addr, dst_str, sizeof(dst_str)))
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001269 return 0;
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001270 dst_port = ((struct sockaddr_in *)dst)->sin_port;
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001271 }
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001272 /* IPv6 for at least one of src and dst */
1273 else {
1274 struct in6_addr tmp;
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001275
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001276 protocol = "TCP6";
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001277
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001278 if (src->ss_family == AF_INET) {
1279 /* Convert src to IPv6 */
1280 v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
1281 src_port = ((struct sockaddr_in *)src)->sin_port;
1282 }
1283 else {
1284 tmp = ((struct sockaddr_in6 *)src)->sin6_addr;
1285 src_port = ((struct sockaddr_in6 *)src)->sin6_port;
1286 }
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001287
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001288 if (!inet_ntop(AF_INET6, &tmp, src_str, sizeof(src_str)))
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001289 return 0;
1290
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001291 if (dst->ss_family == AF_INET) {
1292 /* Convert dst to IPv6 */
1293 v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
1294 dst_port = ((struct sockaddr_in *)dst)->sin_port;
1295 }
1296 else {
1297 tmp = ((struct sockaddr_in6 *)dst)->sin6_addr;
1298 dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
1299 }
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001300
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001301 if (!inet_ntop(AF_INET6, &tmp, dst_str, sizeof(dst_str)))
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001302 return 0;
1303 }
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001304
1305 ret = snprintf(buf, buf_len, "PROXY %s %s %s %u %u\r\n", protocol, src_str, dst_str, ntohs(src_port), ntohs(dst_port));
1306 if (ret >= buf_len)
1307 return 0;
1308
Willy Tarreaue1e4a612012-10-05 00:10:55 +02001309 return ret;
1310}
David Safb76832014-05-08 23:42:08 -04001311
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +01001312static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const char *value)
David Safb76832014-05-08 23:42:08 -04001313{
1314 struct tlv *tlv;
1315
1316 if (!dest || (length + sizeof(*tlv) > dest_len))
1317 return 0;
1318
1319 tlv = (struct tlv *)dest;
1320
1321 tlv->type = type;
1322 tlv->length_hi = length >> 8;
1323 tlv->length_lo = length & 0x00ff;
1324 memcpy(tlv->value, value, length);
1325 return length + sizeof(*tlv);
1326}
David Safb76832014-05-08 23:42:08 -04001327
Ilya Shipitsinca56fce2018-09-15 00:50:05 +05001328/* Note: <remote> is explicitly allowed to be NULL */
David Safb76832014-05-08 23:42:08 -04001329int make_proxy_line_v2(char *buf, int buf_len, struct server *srv, struct connection *remote)
1330{
Willy Tarreau8fccfa22014-06-14 08:28:06 +02001331 const char pp2_signature[] = PP2_SIGNATURE;
Emmanuel Hocdet4399c752018-02-05 15:26:43 +01001332 void *tlv_crc32c_p = NULL;
David Safb76832014-05-08 23:42:08 -04001333 int ret = 0;
Willy Tarreau8fccfa22014-06-14 08:28:06 +02001334 struct proxy_hdr_v2 *hdr = (struct proxy_hdr_v2 *)buf;
Vincent Bernat6e615892016-05-18 16:17:44 +02001335 struct sockaddr_storage null_addr = { .ss_family = 0 };
David Safb76832014-05-08 23:42:08 -04001336 struct sockaddr_storage *src = &null_addr;
1337 struct sockaddr_storage *dst = &null_addr;
Emmanuel Hocdet404d9782017-10-24 10:55:14 +02001338 const char *value;
1339 int value_len;
David Safb76832014-05-08 23:42:08 -04001340
1341 if (buf_len < PP2_HEADER_LEN)
1342 return 0;
Willy Tarreau8fccfa22014-06-14 08:28:06 +02001343 memcpy(hdr->sig, pp2_signature, PP2_SIGNATURE_LEN);
David Safb76832014-05-08 23:42:08 -04001344
Willy Tarreau226572f2019-07-17 14:46:00 +02001345 if (remote && conn_get_src(remote) && conn_get_dst(remote)) {
1346 src = remote->src;
1347 dst = remote->dst;
David Safb76832014-05-08 23:42:08 -04001348 }
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +01001349
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001350 /* At least one of src or dst is not of AF_INET or AF_INET6 */
1351 if ( !src
1352 || !dst
1353 || (src->ss_family != AF_INET && src->ss_family != AF_INET6)
1354 || (dst->ss_family != AF_INET && dst->ss_family != AF_INET6)) {
David Safb76832014-05-08 23:42:08 -04001355 if (buf_len < PP2_HDR_LEN_UNSPEC)
1356 return 0;
Willy Tarreau8fccfa22014-06-14 08:28:06 +02001357 hdr->ver_cmd = PP2_VERSION | PP2_CMD_LOCAL;
1358 hdr->fam = PP2_FAM_UNSPEC | PP2_TRANS_UNSPEC;
David Safb76832014-05-08 23:42:08 -04001359 ret = PP2_HDR_LEN_UNSPEC;
1360 }
Tim Duesterhus7fec0212018-07-27 18:46:13 +02001361 else {
1362 /* IPv4 for both src and dst */
1363 if (src->ss_family == AF_INET && dst->ss_family == AF_INET) {
1364 if (buf_len < PP2_HDR_LEN_INET)
1365 return 0;
1366 hdr->ver_cmd = PP2_VERSION | PP2_CMD_PROXY;
1367 hdr->fam = PP2_FAM_INET | PP2_TRANS_STREAM;
1368 hdr->addr.ip4.src_addr = ((struct sockaddr_in *)src)->sin_addr.s_addr;
1369 hdr->addr.ip4.src_port = ((struct sockaddr_in *)src)->sin_port;
1370 hdr->addr.ip4.dst_addr = ((struct sockaddr_in *)dst)->sin_addr.s_addr;
1371 hdr->addr.ip4.dst_port = ((struct sockaddr_in *)dst)->sin_port;
1372 ret = PP2_HDR_LEN_INET;
1373 }
1374 /* IPv6 for at least one of src and dst */
1375 else {
1376 struct in6_addr tmp;
1377
1378 if (buf_len < PP2_HDR_LEN_INET6)
1379 return 0;
1380 hdr->ver_cmd = PP2_VERSION | PP2_CMD_PROXY;
1381 hdr->fam = PP2_FAM_INET6 | PP2_TRANS_STREAM;
1382 if (src->ss_family == AF_INET) {
1383 v4tov6(&tmp, &((struct sockaddr_in *)src)->sin_addr);
1384 memcpy(hdr->addr.ip6.src_addr, &tmp, 16);
1385 hdr->addr.ip6.src_port = ((struct sockaddr_in *)src)->sin_port;
1386 }
1387 else {
1388 memcpy(hdr->addr.ip6.src_addr, &((struct sockaddr_in6 *)src)->sin6_addr, 16);
1389 hdr->addr.ip6.src_port = ((struct sockaddr_in6 *)src)->sin6_port;
1390 }
1391 if (dst->ss_family == AF_INET) {
1392 v4tov6(&tmp, &((struct sockaddr_in *)dst)->sin_addr);
1393 memcpy(hdr->addr.ip6.dst_addr, &tmp, 16);
1394 hdr->addr.ip6.src_port = ((struct sockaddr_in *)src)->sin_port;
1395 }
1396 else {
1397 memcpy(hdr->addr.ip6.dst_addr, &((struct sockaddr_in6 *)dst)->sin6_addr, 16);
1398 hdr->addr.ip6.dst_port = ((struct sockaddr_in6 *)dst)->sin6_port;
1399 }
1400
1401 ret = PP2_HDR_LEN_INET6;
1402 }
1403 }
David Safb76832014-05-08 23:42:08 -04001404
Emmanuel Hocdet4399c752018-02-05 15:26:43 +01001405 if (srv->pp_opts & SRV_PP_V2_CRC32C) {
1406 uint32_t zero_crc32c = 0;
1407 if ((buf_len - ret) < sizeof(struct tlv))
1408 return 0;
1409 tlv_crc32c_p = (void *)((struct tlv *)&buf[ret])->value;
1410 ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_CRC32C, sizeof(zero_crc32c), (const char *)&zero_crc32c);
1411 }
1412
Ilya Shipitsinca56fce2018-09-15 00:50:05 +05001413 if (remote && conn_get_alpn(remote, &value, &value_len)) {
Emmanuel Hocdet404d9782017-10-24 10:55:14 +02001414 if ((buf_len - ret) < sizeof(struct tlv))
1415 return 0;
Emmanuel Hocdet571c7ac2017-10-31 18:24:05 +01001416 ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_ALPN, value_len, value);
Emmanuel Hocdet404d9782017-10-24 10:55:14 +02001417 }
1418
Emmanuel Hocdet253c3b72018-02-01 18:29:59 +01001419 if (srv->pp_opts & SRV_PP_V2_AUTHORITY) {
Emmanuel Hocdet8a4ffa02019-08-29 11:54:51 +02001420 value = NULL;
1421 if (remote && remote->proxy_authority) {
1422 value = remote->proxy_authority;
1423 value_len = remote->proxy_authority_len;
1424 }
1425#ifdef USE_OPENSSL
1426 else {
Jerome Magnin78891c72019-09-02 09:53:41 +02001427 if ((value = ssl_sock_get_sni(remote)))
Emmanuel Hocdet8a4ffa02019-08-29 11:54:51 +02001428 value_len = strlen(value);
1429 }
1430#endif
Emmanuel Hocdet253c3b72018-02-01 18:29:59 +01001431 if (value) {
1432 if ((buf_len - ret) < sizeof(struct tlv))
1433 return 0;
Emmanuel Hocdet8a4ffa02019-08-29 11:54:51 +02001434 ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_AUTHORITY, value_len, value);
Emmanuel Hocdet253c3b72018-02-01 18:29:59 +01001435 }
1436 }
1437
Emmanuel Hocdet8a4ffa02019-08-29 11:54:51 +02001438#ifdef USE_OPENSSL
David Safb76832014-05-08 23:42:08 -04001439 if (srv->pp_opts & SRV_PP_V2_SSL) {
Emmanuel Hocdet404d9782017-10-24 10:55:14 +02001440 struct tlv_ssl *tlv;
1441 int ssl_tlv_len = 0;
David Safb76832014-05-08 23:42:08 -04001442 if ((buf_len - ret) < sizeof(struct tlv_ssl))
1443 return 0;
1444 tlv = (struct tlv_ssl *)&buf[ret];
1445 memset(tlv, 0, sizeof(struct tlv_ssl));
1446 ssl_tlv_len += sizeof(struct tlv_ssl);
1447 tlv->tlv.type = PP2_TYPE_SSL;
1448 if (ssl_sock_is_ssl(remote)) {
1449 tlv->client |= PP2_CLIENT_SSL;
Emmanuel Hocdet01da5712017-10-13 16:59:49 +02001450 value = ssl_sock_get_proto_version(remote);
David Safb76832014-05-08 23:42:08 -04001451 if (value) {
Emmanuel Hocdet8c0c34b2018-02-28 12:02:14 +01001452 ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len-ret-ssl_tlv_len), PP2_SUBTYPE_SSL_VERSION, strlen(value), value);
David Safb76832014-05-08 23:42:08 -04001453 }
Dave McCowan328fb582014-07-30 10:39:13 -04001454 if (ssl_sock_get_cert_used_sess(remote)) {
1455 tlv->client |= PP2_CLIENT_CERT_SESS;
David Safb76832014-05-08 23:42:08 -04001456 tlv->verify = htonl(ssl_sock_get_verify_result(remote));
Dave McCowan328fb582014-07-30 10:39:13 -04001457 if (ssl_sock_get_cert_used_conn(remote))
1458 tlv->client |= PP2_CLIENT_CERT_CONN;
David Safb76832014-05-08 23:42:08 -04001459 }
1460 if (srv->pp_opts & SRV_PP_V2_SSL_CN) {
Willy Tarreau83061a82018-07-13 11:56:34 +02001461 struct buffer *cn_trash = get_trash_chunk();
Willy Tarreau3b9a0c92014-07-19 06:37:33 +02001462 if (ssl_sock_get_remote_common_name(remote, cn_trash) > 0) {
Willy Tarreau843b7cb2018-07-13 10:54:26 +02001463 ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CN,
1464 cn_trash->data,
1465 cn_trash->area);
David Safb76832014-05-08 23:42:08 -04001466 }
1467 }
Emmanuel Hocdetfa8d0f12018-02-01 15:53:52 +01001468 if (srv->pp_opts & SRV_PP_V2_SSL_KEY_ALG) {
Willy Tarreau83061a82018-07-13 11:56:34 +02001469 struct buffer *pkey_trash = get_trash_chunk();
Emmanuel Hocdetfa8d0f12018-02-01 15:53:52 +01001470 if (ssl_sock_get_pkey_algo(remote, pkey_trash) > 0) {
Willy Tarreau843b7cb2018-07-13 10:54:26 +02001471 ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_KEY_ALG,
1472 pkey_trash->data,
1473 pkey_trash->area);
Emmanuel Hocdetfa8d0f12018-02-01 15:53:52 +01001474 }
1475 }
1476 if (srv->pp_opts & SRV_PP_V2_SSL_SIG_ALG) {
1477 value = ssl_sock_get_cert_sig(remote);
1478 if (value) {
1479 ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_SIG_ALG, strlen(value), value);
1480 }
1481 }
1482 if (srv->pp_opts & SRV_PP_V2_SSL_CIPHER) {
1483 value = ssl_sock_get_cipher_name(remote);
1484 if (value) {
1485 ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_SUBTYPE_SSL_CIPHER, strlen(value), value);
1486 }
1487 }
David Safb76832014-05-08 23:42:08 -04001488 }
1489 tlv->tlv.length_hi = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) >> 8;
1490 tlv->tlv.length_lo = (uint16_t)(ssl_tlv_len - sizeof(struct tlv)) & 0x00ff;
1491 ret += ssl_tlv_len;
1492 }
1493#endif
1494
Willy Tarreaue5733232019-05-22 19:24:06 +02001495#ifdef USE_NS
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +01001496 if (remote && (remote->proxy_netns)) {
1497 if ((buf_len - ret) < sizeof(struct tlv))
1498 return 0;
Emmanuel Hocdet571c7ac2017-10-31 18:24:05 +01001499 ret += make_tlv(&buf[ret], (buf_len - ret), PP2_TYPE_NETNS, remote->proxy_netns->name_len, remote->proxy_netns->node.key);
KOVACS Krisztianb3e54fe2014-11-17 15:11:45 +01001500 }
1501#endif
1502
Willy Tarreau8fccfa22014-06-14 08:28:06 +02001503 hdr->len = htons((uint16_t)(ret - PP2_HEADER_LEN));
David Safb76832014-05-08 23:42:08 -04001504
Emmanuel Hocdet4399c752018-02-05 15:26:43 +01001505 if (tlv_crc32c_p) {
1506 write_u32(tlv_crc32c_p, htonl(hash_crc32c(buf, ret)));
1507 }
1508
David Safb76832014-05-08 23:42:08 -04001509 return ret;
1510}
Emeric Brun4f603012017-01-05 15:11:44 +01001511
Willy Tarreau60ca10a2017-08-18 15:26:54 +02001512/* return the major HTTP version as 1 or 2 depending on how the request arrived
1513 * before being processed.
1514 */
1515static int
1516smp_fetch_fc_http_major(const struct arg *args, struct sample *smp, const char *kw, void *private)
1517{
Jérôme Magnin86577422018-12-07 09:03:11 +01001518 struct connection *conn = (kw[0] != 'b') ? objt_conn(smp->sess->origin) :
1519 smp->strm ? cs_conn(objt_cs(smp->strm->si[1].end)) : NULL;
Willy Tarreau60ca10a2017-08-18 15:26:54 +02001520
1521 smp->data.type = SMP_T_SINT;
1522 smp->data.u.sint = (conn && strcmp(conn_get_mux_name(conn), "H2") == 0) ? 2 : 1;
1523 return 1;
1524}
1525
Emeric Brun4f603012017-01-05 15:11:44 +01001526/* fetch if the received connection used a PROXY protocol header */
1527int smp_fetch_fc_rcvd_proxy(const struct arg *args, struct sample *smp, const char *kw, void *private)
1528{
1529 struct connection *conn;
1530
1531 conn = objt_conn(smp->sess->origin);
1532 if (!conn)
1533 return 0;
1534
1535 if (!(conn->flags & CO_FL_CONNECTED)) {
1536 smp->flags |= SMP_F_MAY_CHANGE;
1537 return 0;
1538 }
1539
1540 smp->flags = 0;
1541 smp->data.type = SMP_T_BOOL;
1542 smp->data.u.sint = (conn->flags & CO_FL_RCVD_PROXY) ? 1 : 0;
1543
1544 return 1;
1545}
1546
Geoff Simmons7185b782019-08-27 18:31:16 +02001547/* fetch the authority TLV from a PROXY protocol header */
1548int smp_fetch_fc_pp_authority(const struct arg *args, struct sample *smp, const char *kw, void *private)
1549{
1550 struct connection *conn;
1551
1552 conn = objt_conn(smp->sess->origin);
1553 if (!conn)
1554 return 0;
1555
1556 if (!(conn->flags & CO_FL_CONNECTED)) {
1557 smp->flags |= SMP_F_MAY_CHANGE;
1558 return 0;
1559 }
1560
1561 if (conn->proxy_authority == NULL)
1562 return 0;
1563
1564 smp->flags = 0;
1565 smp->data.type = SMP_T_STR;
1566 smp->data.u.str.area = conn->proxy_authority;
1567 smp->data.u.str.data = conn->proxy_authority_len;
1568
1569 return 1;
1570}
1571
Emeric Brun4f603012017-01-05 15:11:44 +01001572/* Note: must not be declared <const> as its list will be overwritten.
1573 * Note: fetches that may return multiple types must be declared as the lowest
1574 * common denominator, the type that can be casted into all other ones. For
1575 * instance v4/v6 must be declared v4.
1576 */
1577static struct sample_fetch_kw_list sample_fetch_keywords = {ILH, {
Willy Tarreau60ca10a2017-08-18 15:26:54 +02001578 { "fc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4CLI },
Jérôme Magnin86577422018-12-07 09:03:11 +01001579 { "bc_http_major", smp_fetch_fc_http_major, 0, NULL, SMP_T_SINT, SMP_USE_L4SRV },
Emeric Brun4f603012017-01-05 15:11:44 +01001580 { "fc_rcvd_proxy", smp_fetch_fc_rcvd_proxy, 0, NULL, SMP_T_BOOL, SMP_USE_L4CLI },
Geoff Simmons7185b782019-08-27 18:31:16 +02001581 { "fc_pp_authority", smp_fetch_fc_pp_authority, 0, NULL, SMP_T_STR, SMP_USE_L4CLI },
Emeric Brun4f603012017-01-05 15:11:44 +01001582 { /* END */ },
1583}};
1584
Willy Tarreau0108d902018-11-25 19:14:37 +01001585INITCALL1(STG_REGISTER, sample_register_fetches, &sample_fetch_keywords);