blob: 7523617b29fcbdae2244d2979daaba46c08d7661 [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * AF_INET/AF_INET6 socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreauf2cda102020-09-17 14:02:01 +020013#include <errno.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020014#include <fcntl.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020015#include <string.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020016#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020017
18#include <sys/param.h>
19#include <sys/socket.h>
20#include <sys/types.h>
21
22#include <netinet/tcp.h>
23#include <netinet/in.h>
24
25#include <haproxy/api.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020026#include <haproxy/errors.h>
27#include <haproxy/fd.h>
Willy Tarreau37bafdc2020-08-28 17:23:40 +020028#include <haproxy/global.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020029#include <haproxy/namespace.h>
30#include <haproxy/receiver-t.h>
31#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020032#include <haproxy/sock_inet.h>
33#include <haproxy/tools.h>
34
Willy Tarreaub0254cb2020-09-04 08:07:11 +020035struct proto_fam proto_fam_inet4 = {
36 .name = "inet4",
37 .sock_domain = PF_INET,
38 .sock_family = AF_INET,
39 .sock_addrlen = sizeof(struct sockaddr_in),
40 .l3_addrlen = 32/8,
41 .addrcmp = sock_inet4_addrcmp,
42 .bind = sock_inet_bind_receiver,
43 .get_src = sock_get_src,
44 .get_dst = sock_inet_get_dst,
Willy Tarreau73bed9f2020-12-04 14:43:36 +010045 .set_port = sock_inet_set_port,
Willy Tarreaub0254cb2020-09-04 08:07:11 +020046};
47
48struct proto_fam proto_fam_inet6 = {
49 .name = "inet6",
50 .sock_domain = PF_INET6,
51 .sock_family = AF_INET6,
52 .sock_addrlen = sizeof(struct sockaddr_in6),
53 .l3_addrlen = 128/8,
54 .addrcmp = sock_inet6_addrcmp,
55 .bind = sock_inet_bind_receiver,
56 .get_src = sock_get_src,
57 .get_dst = sock_get_dst,
Willy Tarreau73bed9f2020-12-04 14:43:36 +010058 .set_port = sock_inet_set_port,
Willy Tarreaub0254cb2020-09-04 08:07:11 +020059};
Willy Tarreau0d06df62020-08-28 15:10:11 +020060
61/* PLEASE NOTE for function below:
62 * - sock_inet4_* is solely for AF_INET (IPv4)
63 * - sock_inet6_* is solely for AF_INET6 (IPv6)
64 * - sock_inet_* is for either
65 *
66 * The address family SHOULD always be checked. In some cases a function will
67 * be used in a situation where the address family is guaranteed (e.g. protocol
68 * definitions), so the test may be avoided. This special case must then be
69 * mentioned in the comment before the function definition.
70 */
71
Willy Tarreaud88e8c02020-08-28 16:06:01 +020072/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes.
73 * It also remains if IPv6 is not enabled/configured.
74 */
75int sock_inet6_v6only_default = 0;
Willy Tarreau0d06df62020-08-28 15:10:11 +020076
Willy Tarreaue5bdc512020-08-28 18:03:10 +020077/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */
78int sock_inet_tcp_maxseg_default = -1;
79int sock_inet6_tcp_maxseg_default = -1;
80
Willy Tarreau0d06df62020-08-28 15:10:11 +020081/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
82 * if they do not match.
83 */
84int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
85{
86 const struct sockaddr_in *a4 = (const struct sockaddr_in *)a;
87 const struct sockaddr_in *b4 = (const struct sockaddr_in *)b;
88
89 if (a->ss_family != b->ss_family)
90 return -1;
91
92 if (a->ss_family != AF_INET)
93 return -1;
94
95 if (a4->sin_port != b4->sin_port)
96 return -1;
97
98 return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr));
99}
100
101/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or
102 * non-zero if they do not match.
103 */
104int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
105{
106 const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a;
107 const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b;
108
109 if (a->ss_family != b->ss_family)
110 return -1;
111
112 if (a->ss_family != AF_INET6)
113 return -1;
114
115 if (a6->sin6_port != b6->sin6_port)
116 return -1;
117
118 return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr));
119}
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200120
Willy Tarreau73bed9f2020-12-04 14:43:36 +0100121/* Sets the port <port> on IPv4 or IPv6 address <addr>. The address family is
122 * determined from the sockaddr_storage's address family. Nothing is done for
123 * other families.
124 */
125void sock_inet_set_port(struct sockaddr_storage *addr, int port)
126{
127 if (addr->ss_family == AF_INET)
128 ((struct sockaddr_in *)addr)->sin_port = htons(port);
129 else if (addr->ss_family == AF_INET6)
130 ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
131}
132
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200133/*
134 * Retrieves the original destination address for the socket <fd> which must be
135 * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener
136 * (=0) or an initiator (!=0). In the case of a listener, if the original
137 * destination address was translated, the original address is retrieved. It
138 * returns 0 in case of success, -1 in case of error. The socket's source
139 * address is stored in <sa> for <salen> bytes.
140 */
141int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
142{
143 if (dir)
144 return getpeername(fd, sa, &salen);
145 else {
146 int ret = getsockname(fd, sa, &salen);
147
148 if (ret < 0)
149 return ret;
150
151#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
152 /* For TPROXY and Netfilter's NAT, we can retrieve the original
153 * IPv4 address before DNAT/REDIRECT. We must not do that with
154 * other families because v6-mapped IPv4 addresses are still
155 * reported as v4.
156 */
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200157 if (getsockopt(fd, IPPROTO_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200158 return 0;
159#endif
160 return ret;
161 }
162}
Willy Tarreau25140cc2020-08-28 15:40:33 +0200163
Willy Tarreau3fd3bdc2020-09-01 15:12:08 +0200164/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN
Willy Tarreau25140cc2020-08-28 15:40:33 +0200165 * according to the various supported socket options. The socket's address family
166 * must be passed in <family>.
167 */
168int sock_inet_is_foreign(int fd, sa_family_t family)
169{
170 int val __maybe_unused;
171 socklen_t len __maybe_unused;
172
173 switch (family) {
174 case AF_INET:
175#if defined(IP_TRANSPARENT)
176 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200177 if (getsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200178 return 1;
179#endif
180#if defined(IP_FREEBIND)
181 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200182 if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200183 return 1;
184#endif
185#if defined(IP_BINDANY)
186 val = 0; len = sizeof(val);
187 if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val)
188 return 1;
189#endif
190#if defined(SO_BINDANY)
191 val = 0; len = sizeof(val);
192 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
193 return 1;
194#endif
195 break;
196
197 case AF_INET6:
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200198#if defined(IPV6_TRANSPARENT)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200199 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200200 if (getsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200201 return 1;
202#endif
203#if defined(IP_FREEBIND)
204 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200205 if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200206 return 1;
207#endif
208#if defined(IPV6_BINDANY)
209 val = 0; len = sizeof(val);
210 if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val)
211 return 1;
212#endif
213#if defined(SO_BINDANY)
214 val = 0; len = sizeof(val);
215 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
216 return 1;
217#endif
218 break;
219 }
220 return 0;
221}
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200222
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200223/* Attempt all known socket options to prepare an AF_INET4 socket to be bound
224 * to a foreign address. The socket must already exist and must not be bound.
225 * 1 is returned on success, 0 on failure. The caller must check the address
226 * family before calling this function.
227 */
228int sock_inet4_make_foreign(int fd)
229{
230 return
231#if defined(IP_TRANSPARENT)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200232 setsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200233#endif
234#if defined(IP_FREEBIND)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200235 setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200236#endif
237#if defined(IP_BINDANY)
238 setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 ||
239#endif
240#if defined(SO_BINDANY)
241 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
242#endif
243 0;
244}
245
246/* Attempt all known socket options to prepare an AF_INET6 socket to be bound
247 * to a foreign address. The socket must already exist and must not be bound.
248 * 1 is returned on success, 0 on failure. The caller must check the address
249 * family before calling this function.
250 */
251int sock_inet6_make_foreign(int fd)
252{
253 return
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200254#if defined(IPV6_TRANSPARENT)
255 setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200256#endif
257#if defined(IP_FREEBIND)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200258 setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200259#endif
260#if defined(IPV6_BINDANY)
261 setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 ||
262#endif
263#if defined(SO_BINDANY)
264 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
265#endif
266 0;
267}
268
Willy Tarreau233ad282020-10-15 21:45:15 +0200269/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200270 * context, respectively. Returns and error code made of ERR_* bits on failure
271 * or ERR_NONE on success. On failure, an error message may be passed into
272 * <errmsg>.
273 */
Willy Tarreau233ad282020-10-15 21:45:15 +0200274int sock_inet_bind_receiver(struct receiver *rx, char **errmsg)
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200275{
276 int fd, err, ext;
277 /* copy listener addr because sometimes we need to switch family */
278 struct sockaddr_storage addr_inet = rx->addr;
279
280 /* force to classic sock family, not AF_CUST_* */
Willy Tarreauf1f66092020-09-04 08:15:31 +0200281 addr_inet.ss_family = rx->proto->fam->sock_family;
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200282
283 /* ensure we never return garbage */
284 if (errmsg)
285 *errmsg = 0;
286
287 err = ERR_NONE;
288
289 if (rx->flags & RX_F_BOUND)
290 return ERR_NONE;
291
292 /* if no FD was assigned yet, we'll have to either find a compatible
293 * one or create a new one.
294 */
295 if (rx->fd == -1)
296 rx->fd = sock_find_compatible_fd(rx);
297
298 /* if the receiver now has an fd assigned, then we were offered the fd
299 * by an external process (most likely the parent), and we don't want
300 * to create a new socket. However we still want to set a few flags on
301 * the socket.
302 */
303 fd = rx->fd;
304 ext = (fd >= 0);
305
306 if (!ext) {
Willy Tarreauf1f66092020-09-04 08:15:31 +0200307 fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain,
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200308 rx->proto->sock_type, rx->proto->sock_prot);
309 if (fd == -1) {
310 err |= ERR_RETRYABLE | ERR_ALERT;
Willy Tarreau36722d22020-09-17 08:32:17 +0200311 memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200312 goto bind_return;
313 }
314 }
315
Willy Tarreau6789f192023-01-11 10:59:52 +0100316 if (ext && fd < global.maxsock && fdtab[fd].owner) {
317 /* This FD was already bound so this means that it was already
318 * known and registered before parsing, hence it's an inherited
319 * FD. The only reason why it's already known here is that it
320 * has been registered multiple times (multiple listeners on the
321 * same, or a "shards" directive on the line). There cannot be
322 * multiple listeners on one FD but at least we can create a
323 * new one from the original one. We won't reconfigure it,
324 * however, as this was already done for the first one.
325 */
326 fd = dup(fd);
327 if (fd == -1) {
328 err |= ERR_RETRYABLE | ERR_ALERT;
329 memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
330 goto bind_return;
331 }
332 }
333
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200334 if (fd >= global.maxsock) {
335 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
336 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
337 goto bind_close_return;
338 }
339
340 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
341 err |= ERR_FATAL | ERR_ALERT;
342 memprintf(errmsg, "cannot make socket non-blocking");
343 goto bind_close_return;
344 }
345
346 if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
347 /* not fatal but should be reported */
348 memprintf(errmsg, "cannot do so_reuseaddr");
349 err |= ERR_ALERT;
350 }
351
352#ifdef SO_REUSEPORT
353 /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
354 * Linux, it might return an error that we will silently ignore.
355 */
356 if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
357 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
358#endif
359
360 if (!ext && (rx->settings->options & RX_O_FOREIGN)) {
361 switch (addr_inet.ss_family) {
362 case AF_INET:
363 if (!sock_inet4_make_foreign(fd)) {
364 memprintf(errmsg, "cannot make receiving socket transparent");
365 err |= ERR_ALERT;
366 }
367 break;
368 case AF_INET6:
369 if (!sock_inet6_make_foreign(fd)) {
370 memprintf(errmsg, "cannot make receiving socket transparent");
371 err |= ERR_ALERT;
372 }
373 break;
374 }
375 }
376
377#ifdef SO_BINDTODEVICE
378 /* Note: this might fail if not CAP_NET_RAW */
379 if (!ext && rx->settings->interface) {
380 if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
381 rx->settings->interface,
382 strlen(rx->settings->interface) + 1) == -1) {
Willy Tarreau36722d22020-09-17 08:32:17 +0200383 memprintf(errmsg, "cannot bind receiver to device (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200384 err |= ERR_WARN;
385 }
386 }
387#endif
388
389#if defined(IPV6_V6ONLY)
390 if (addr_inet.ss_family == AF_INET6 && !ext) {
391 /* Prepare to match the v6only option against what we really want. Note
392 * that sadly the two options are not exclusive to each other and that
393 * v6only is stronger than v4v6.
394 */
395 if ((rx->settings->options & RX_O_V6ONLY) ||
396 (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
397 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
398 else
399 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
400 }
401#endif
402
Willy Tarreauf1f66092020-09-04 08:15:31 +0200403 if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) {
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200404 err |= ERR_RETRYABLE | ERR_ALERT;
Willy Tarreau36722d22020-09-17 08:32:17 +0200405 memprintf(errmsg, "cannot bind socket (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200406 goto bind_close_return;
407 }
408
409 rx->fd = fd;
410 rx->flags |= RX_F_BOUND;
411
Willy Tarreau233ad282020-10-15 21:45:15 +0200412 fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->settings->bind_thread) & all_threads_mask);
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200413
414 /* for now, all regularly bound TCP listeners are exportable */
415 if (!(rx->flags & RX_F_INHERITED))
Willy Tarreau9063a662021-04-06 18:09:06 +0200416 HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200417
418 bind_return:
419 if (errmsg && *errmsg) {
420 char pn[INET6_ADDRSTRLEN];
421
422 addr_to_str(&addr_inet, pn, sizeof(pn));
423 memprintf(errmsg, "%s [%s:%d]", *errmsg, pn, get_host_port(&addr_inet));
424 }
425 return err;
426
427 bind_close_return:
428 close(fd);
429 goto bind_return;
430}
431
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200432static void sock_inet_prepare()
433{
434 int fd, val;
435 socklen_t len;
436
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200437 fd = socket(AF_INET, SOCK_STREAM, 0);
438 if (fd >= 0) {
439#ifdef TCP_MAXSEG
440 /* retrieve the OS' default mss for TCPv4 */
441 len = sizeof(val);
442 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
443 sock_inet_tcp_maxseg_default = val;
444#endif
445 close(fd);
446 }
447
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200448 fd = socket(AF_INET6, SOCK_STREAM, 0);
449 if (fd >= 0) {
450#if defined(IPV6_V6ONLY)
451 /* retrieve the OS' bindv6only value */
452 len = sizeof(val);
453 if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0)
454 sock_inet6_v6only_default = 1;
455#endif
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200456
457#ifdef TCP_MAXSEG
458 /* retrieve the OS' default mss for TCPv6 */
459 len = sizeof(val);
460 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
461 sock_inet6_tcp_maxseg_default = val;
462#endif
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200463 close(fd);
464 }
465}
466
467INITCALL0(STG_PREPARE, sock_inet_prepare);
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200468
469
470REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
471#if defined(IP_TRANSPARENT)
472 " IP_TRANSPARENT"
473#endif
474#if defined(IPV6_TRANSPARENT)
475 " IPV6_TRANSPARENT"
476#endif
477#if defined(IP_FREEBIND)
478 " IP_FREEBIND"
479#endif
480#if defined(IP_BINDANY)
481 " IP_BINDANY"
482#endif
483#if defined(IPV6_BINDANY)
484 " IPV6_BINDANY"
485#endif
486#if defined(SO_BINDANY)
487 " SO_BINDANY"
488#endif
489 "");