blob: f85582cfdea9330e3a6feed4149346d7ee876e89 [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * AF_INET/AF_INET6 socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreauf2cda102020-09-17 14:02:01 +020013#include <errno.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020014#include <string.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020015#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020016
17#include <sys/param.h>
18#include <sys/socket.h>
19#include <sys/types.h>
20
21#include <netinet/tcp.h>
22#include <netinet/in.h>
23
24#include <haproxy/api.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020025#include <haproxy/errors.h>
26#include <haproxy/fd.h>
Willy Tarreau37bafdc2020-08-28 17:23:40 +020027#include <haproxy/global.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020028#include <haproxy/namespace.h>
29#include <haproxy/receiver-t.h>
30#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020031#include <haproxy/sock_inet.h>
32#include <haproxy/tools.h>
33
Willy Tarreaub0254cb2020-09-04 08:07:11 +020034struct proto_fam proto_fam_inet4 = {
35 .name = "inet4",
36 .sock_domain = PF_INET,
37 .sock_family = AF_INET,
38 .sock_addrlen = sizeof(struct sockaddr_in),
39 .l3_addrlen = 32/8,
40 .addrcmp = sock_inet4_addrcmp,
41 .bind = sock_inet_bind_receiver,
42 .get_src = sock_get_src,
43 .get_dst = sock_inet_get_dst,
Willy Tarreau73bed9f2020-12-04 14:43:36 +010044 .set_port = sock_inet_set_port,
Willy Tarreaub0254cb2020-09-04 08:07:11 +020045};
46
47struct proto_fam proto_fam_inet6 = {
48 .name = "inet6",
49 .sock_domain = PF_INET6,
50 .sock_family = AF_INET6,
51 .sock_addrlen = sizeof(struct sockaddr_in6),
52 .l3_addrlen = 128/8,
53 .addrcmp = sock_inet6_addrcmp,
54 .bind = sock_inet_bind_receiver,
55 .get_src = sock_get_src,
56 .get_dst = sock_get_dst,
Willy Tarreau73bed9f2020-12-04 14:43:36 +010057 .set_port = sock_inet_set_port,
Willy Tarreaub0254cb2020-09-04 08:07:11 +020058};
Willy Tarreau0d06df62020-08-28 15:10:11 +020059
60/* PLEASE NOTE for function below:
61 * - sock_inet4_* is solely for AF_INET (IPv4)
62 * - sock_inet6_* is solely for AF_INET6 (IPv6)
63 * - sock_inet_* is for either
64 *
65 * The address family SHOULD always be checked. In some cases a function will
66 * be used in a situation where the address family is guaranteed (e.g. protocol
67 * definitions), so the test may be avoided. This special case must then be
68 * mentioned in the comment before the function definition.
69 */
70
Willy Tarreaud88e8c02020-08-28 16:06:01 +020071/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes.
72 * It also remains if IPv6 is not enabled/configured.
73 */
74int sock_inet6_v6only_default = 0;
Willy Tarreau0d06df62020-08-28 15:10:11 +020075
Willy Tarreaue5bdc512020-08-28 18:03:10 +020076/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */
77int sock_inet_tcp_maxseg_default = -1;
78int sock_inet6_tcp_maxseg_default = -1;
79
Willy Tarreau0d06df62020-08-28 15:10:11 +020080/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
81 * if they do not match.
82 */
83int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
84{
85 const struct sockaddr_in *a4 = (const struct sockaddr_in *)a;
86 const struct sockaddr_in *b4 = (const struct sockaddr_in *)b;
87
88 if (a->ss_family != b->ss_family)
89 return -1;
90
91 if (a->ss_family != AF_INET)
92 return -1;
93
94 if (a4->sin_port != b4->sin_port)
95 return -1;
96
97 return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr));
98}
99
100/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or
101 * non-zero if they do not match.
102 */
103int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
104{
105 const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a;
106 const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b;
107
108 if (a->ss_family != b->ss_family)
109 return -1;
110
111 if (a->ss_family != AF_INET6)
112 return -1;
113
114 if (a6->sin6_port != b6->sin6_port)
115 return -1;
116
117 return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr));
118}
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200119
Willy Tarreau73bed9f2020-12-04 14:43:36 +0100120/* Sets the port <port> on IPv4 or IPv6 address <addr>. The address family is
121 * determined from the sockaddr_storage's address family. Nothing is done for
122 * other families.
123 */
124void sock_inet_set_port(struct sockaddr_storage *addr, int port)
125{
126 if (addr->ss_family == AF_INET)
127 ((struct sockaddr_in *)addr)->sin_port = htons(port);
128 else if (addr->ss_family == AF_INET6)
129 ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
130}
131
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200132/*
133 * Retrieves the original destination address for the socket <fd> which must be
134 * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener
135 * (=0) or an initiator (!=0). In the case of a listener, if the original
136 * destination address was translated, the original address is retrieved. It
137 * returns 0 in case of success, -1 in case of error. The socket's source
138 * address is stored in <sa> for <salen> bytes.
139 */
140int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
141{
142 if (dir)
143 return getpeername(fd, sa, &salen);
144 else {
145 int ret = getsockname(fd, sa, &salen);
146
147 if (ret < 0)
148 return ret;
149
150#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
151 /* For TPROXY and Netfilter's NAT, we can retrieve the original
152 * IPv4 address before DNAT/REDIRECT. We must not do that with
153 * other families because v6-mapped IPv4 addresses are still
154 * reported as v4.
155 */
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200156 if (getsockopt(fd, IPPROTO_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200157 return 0;
158#endif
159 return ret;
160 }
161}
Willy Tarreau25140cc2020-08-28 15:40:33 +0200162
Willy Tarreau3fd3bdc2020-09-01 15:12:08 +0200163/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN
Willy Tarreau25140cc2020-08-28 15:40:33 +0200164 * according to the various supported socket options. The socket's address family
165 * must be passed in <family>.
166 */
167int sock_inet_is_foreign(int fd, sa_family_t family)
168{
169 int val __maybe_unused;
170 socklen_t len __maybe_unused;
171
172 switch (family) {
173 case AF_INET:
174#if defined(IP_TRANSPARENT)
175 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200176 if (getsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200177 return 1;
178#endif
179#if defined(IP_FREEBIND)
180 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200181 if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200182 return 1;
183#endif
184#if defined(IP_BINDANY)
185 val = 0; len = sizeof(val);
186 if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val)
187 return 1;
188#endif
189#if defined(SO_BINDANY)
190 val = 0; len = sizeof(val);
191 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
192 return 1;
193#endif
194 break;
195
196 case AF_INET6:
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200197#if defined(IPV6_TRANSPARENT)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200198 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200199 if (getsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200200 return 1;
201#endif
202#if defined(IP_FREEBIND)
203 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200204 if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200205 return 1;
206#endif
207#if defined(IPV6_BINDANY)
208 val = 0; len = sizeof(val);
209 if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val)
210 return 1;
211#endif
212#if defined(SO_BINDANY)
213 val = 0; len = sizeof(val);
214 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
215 return 1;
216#endif
217 break;
218 }
219 return 0;
220}
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200221
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200222/* Attempt all known socket options to prepare an AF_INET4 socket to be bound
223 * to a foreign address. The socket must already exist and must not be bound.
224 * 1 is returned on success, 0 on failure. The caller must check the address
225 * family before calling this function.
226 */
227int sock_inet4_make_foreign(int fd)
228{
229 return
230#if defined(IP_TRANSPARENT)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200231 setsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200232#endif
233#if defined(IP_FREEBIND)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200234 setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200235#endif
236#if defined(IP_BINDANY)
237 setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 ||
238#endif
239#if defined(SO_BINDANY)
240 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
241#endif
242 0;
243}
244
245/* Attempt all known socket options to prepare an AF_INET6 socket to be bound
246 * to a foreign address. The socket must already exist and must not be bound.
247 * 1 is returned on success, 0 on failure. The caller must check the address
248 * family before calling this function.
249 */
250int sock_inet6_make_foreign(int fd)
251{
252 return
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200253#if defined(IPV6_TRANSPARENT)
254 setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200255#endif
256#if defined(IP_FREEBIND)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200257 setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200258#endif
259#if defined(IPV6_BINDANY)
260 setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 ||
261#endif
262#if defined(SO_BINDANY)
263 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
264#endif
265 0;
266}
267
Willy Tarreau233ad282020-10-15 21:45:15 +0200268/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200269 * context, respectively. Returns and error code made of ERR_* bits on failure
270 * or ERR_NONE on success. On failure, an error message may be passed into
271 * <errmsg>.
272 */
Willy Tarreau233ad282020-10-15 21:45:15 +0200273int sock_inet_bind_receiver(struct receiver *rx, char **errmsg)
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200274{
275 int fd, err, ext;
276 /* copy listener addr because sometimes we need to switch family */
277 struct sockaddr_storage addr_inet = rx->addr;
278
279 /* force to classic sock family, not AF_CUST_* */
Willy Tarreauf1f66092020-09-04 08:15:31 +0200280 addr_inet.ss_family = rx->proto->fam->sock_family;
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200281
282 /* ensure we never return garbage */
283 if (errmsg)
284 *errmsg = 0;
285
286 err = ERR_NONE;
287
288 if (rx->flags & RX_F_BOUND)
289 return ERR_NONE;
290
291 /* if no FD was assigned yet, we'll have to either find a compatible
292 * one or create a new one.
293 */
294 if (rx->fd == -1)
295 rx->fd = sock_find_compatible_fd(rx);
296
297 /* if the receiver now has an fd assigned, then we were offered the fd
298 * by an external process (most likely the parent), and we don't want
299 * to create a new socket. However we still want to set a few flags on
300 * the socket.
301 */
302 fd = rx->fd;
303 ext = (fd >= 0);
304
305 if (!ext) {
Willy Tarreauf1f66092020-09-04 08:15:31 +0200306 fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain,
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200307 rx->proto->sock_type, rx->proto->sock_prot);
308 if (fd == -1) {
309 err |= ERR_RETRYABLE | ERR_ALERT;
Willy Tarreau36722d22020-09-17 08:32:17 +0200310 memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200311 goto bind_return;
312 }
313 }
314
Willy Tarreau145b17f2023-01-11 10:59:52 +0100315 if (ext && fd < global.maxsock && fdtab[fd].owner) {
316 /* This FD was already bound so this means that it was already
317 * known and registered before parsing, hence it's an inherited
318 * FD. The only reason why it's already known here is that it
319 * has been registered multiple times (multiple listeners on the
320 * same, or a "shards" directive on the line). There cannot be
321 * multiple listeners on one FD but at least we can create a
322 * new one from the original one. We won't reconfigure it,
323 * however, as this was already done for the first one.
324 */
325 fd = dup(fd);
326 if (fd == -1) {
327 err |= ERR_RETRYABLE | ERR_ALERT;
328 memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
329 goto bind_return;
330 }
331 }
332
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200333 if (fd >= global.maxsock) {
334 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
335 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
336 goto bind_close_return;
337 }
338
Willy Tarreau38247432022-04-26 10:24:14 +0200339 if (fd_set_nonblock(fd) == -1) {
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200340 err |= ERR_FATAL | ERR_ALERT;
341 memprintf(errmsg, "cannot make socket non-blocking");
342 goto bind_close_return;
343 }
344
345 if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
346 /* not fatal but should be reported */
347 memprintf(errmsg, "cannot do so_reuseaddr");
348 err |= ERR_ALERT;
349 }
350
351#ifdef SO_REUSEPORT
352 /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
353 * Linux, it might return an error that we will silently ignore.
354 */
355 if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
356 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
357#endif
358
359 if (!ext && (rx->settings->options & RX_O_FOREIGN)) {
360 switch (addr_inet.ss_family) {
361 case AF_INET:
362 if (!sock_inet4_make_foreign(fd)) {
363 memprintf(errmsg, "cannot make receiving socket transparent");
364 err |= ERR_ALERT;
365 }
366 break;
367 case AF_INET6:
368 if (!sock_inet6_make_foreign(fd)) {
369 memprintf(errmsg, "cannot make receiving socket transparent");
370 err |= ERR_ALERT;
371 }
372 break;
373 }
374 }
375
376#ifdef SO_BINDTODEVICE
377 /* Note: this might fail if not CAP_NET_RAW */
378 if (!ext && rx->settings->interface) {
379 if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
380 rx->settings->interface,
381 strlen(rx->settings->interface) + 1) == -1) {
Willy Tarreauf78b52e2021-10-14 11:41:19 +0200382 memprintf(errmsg, "cannot bind receiver to device '%s' (%s)", rx->settings->interface, strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200383 err |= ERR_WARN;
384 }
385 }
386#endif
387
388#if defined(IPV6_V6ONLY)
389 if (addr_inet.ss_family == AF_INET6 && !ext) {
390 /* Prepare to match the v6only option against what we really want. Note
391 * that sadly the two options are not exclusive to each other and that
392 * v6only is stronger than v4v6.
393 */
394 if ((rx->settings->options & RX_O_V6ONLY) ||
395 (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
396 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
397 else
398 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
399 }
400#endif
401
Willy Tarreauf1f66092020-09-04 08:15:31 +0200402 if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) {
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200403 err |= ERR_RETRYABLE | ERR_ALERT;
Willy Tarreau36722d22020-09-17 08:32:17 +0200404 memprintf(errmsg, "cannot bind socket (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200405 goto bind_close_return;
406 }
407
408 rx->fd = fd;
409 rx->flags |= RX_F_BOUND;
410
Willy Tarreau9464bb12022-07-05 05:16:13 +0200411 fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread);
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200412
413 /* for now, all regularly bound TCP listeners are exportable */
414 if (!(rx->flags & RX_F_INHERITED))
Willy Tarreau9063a662021-04-06 18:09:06 +0200415 HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200416
417 bind_return:
418 if (errmsg && *errmsg) {
419 char pn[INET6_ADDRSTRLEN];
420
421 addr_to_str(&addr_inet, pn, sizeof(pn));
Willy Tarreau6823a3a2021-10-14 11:59:15 +0200422 memprintf(errmsg, "%s for [%s:%d]", *errmsg, pn, get_host_port(&addr_inet));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200423 }
424 return err;
425
426 bind_close_return:
427 close(fd);
428 goto bind_return;
429}
430
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200431static void sock_inet_prepare()
432{
433 int fd, val;
434 socklen_t len;
435
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200436 fd = socket(AF_INET, SOCK_STREAM, 0);
437 if (fd >= 0) {
438#ifdef TCP_MAXSEG
439 /* retrieve the OS' default mss for TCPv4 */
440 len = sizeof(val);
441 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
442 sock_inet_tcp_maxseg_default = val;
443#endif
444 close(fd);
445 }
446
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200447 fd = socket(AF_INET6, SOCK_STREAM, 0);
448 if (fd >= 0) {
449#if defined(IPV6_V6ONLY)
450 /* retrieve the OS' bindv6only value */
451 len = sizeof(val);
452 if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0)
453 sock_inet6_v6only_default = 1;
454#endif
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200455
456#ifdef TCP_MAXSEG
457 /* retrieve the OS' default mss for TCPv6 */
458 len = sizeof(val);
459 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
460 sock_inet6_tcp_maxseg_default = val;
461#endif
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200462 close(fd);
463 }
464}
465
466INITCALL0(STG_PREPARE, sock_inet_prepare);
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200467
468
469REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
470#if defined(IP_TRANSPARENT)
471 " IP_TRANSPARENT"
472#endif
473#if defined(IPV6_TRANSPARENT)
474 " IPV6_TRANSPARENT"
475#endif
476#if defined(IP_FREEBIND)
477 " IP_FREEBIND"
478#endif
479#if defined(IP_BINDANY)
480 " IP_BINDANY"
481#endif
482#if defined(IPV6_BINDANY)
483 " IPV6_BINDANY"
484#endif
485#if defined(SO_BINDANY)
486 " SO_BINDANY"
487#endif
488 "");