blob: ab881d86969a02ffe64039e9e21cf89167dc5d2d [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * AF_INET/AF_INET6 socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreauf2cda102020-09-17 14:02:01 +020013#include <errno.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020014#include <fcntl.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020015#include <string.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020016#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020017
18#include <sys/param.h>
19#include <sys/socket.h>
20#include <sys/types.h>
21
22#include <netinet/tcp.h>
23#include <netinet/in.h>
24
25#include <haproxy/api.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020026#include <haproxy/errors.h>
27#include <haproxy/fd.h>
Willy Tarreau37bafdc2020-08-28 17:23:40 +020028#include <haproxy/global.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020029#include <haproxy/namespace.h>
30#include <haproxy/receiver-t.h>
31#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020032#include <haproxy/sock_inet.h>
33#include <haproxy/tools.h>
34
Willy Tarreaub0254cb2020-09-04 08:07:11 +020035struct proto_fam proto_fam_inet4 = {
36 .name = "inet4",
37 .sock_domain = PF_INET,
38 .sock_family = AF_INET,
39 .sock_addrlen = sizeof(struct sockaddr_in),
40 .l3_addrlen = 32/8,
41 .addrcmp = sock_inet4_addrcmp,
42 .bind = sock_inet_bind_receiver,
43 .get_src = sock_get_src,
44 .get_dst = sock_inet_get_dst,
Willy Tarreau73bed9f2020-12-04 14:43:36 +010045 .set_port = sock_inet_set_port,
Willy Tarreaub0254cb2020-09-04 08:07:11 +020046};
47
48struct proto_fam proto_fam_inet6 = {
49 .name = "inet6",
50 .sock_domain = PF_INET6,
51 .sock_family = AF_INET6,
52 .sock_addrlen = sizeof(struct sockaddr_in6),
53 .l3_addrlen = 128/8,
54 .addrcmp = sock_inet6_addrcmp,
55 .bind = sock_inet_bind_receiver,
56 .get_src = sock_get_src,
57 .get_dst = sock_get_dst,
Willy Tarreau73bed9f2020-12-04 14:43:36 +010058 .set_port = sock_inet_set_port,
Willy Tarreaub0254cb2020-09-04 08:07:11 +020059};
Willy Tarreau0d06df62020-08-28 15:10:11 +020060
61/* PLEASE NOTE for function below:
62 * - sock_inet4_* is solely for AF_INET (IPv4)
63 * - sock_inet6_* is solely for AF_INET6 (IPv6)
64 * - sock_inet_* is for either
65 *
66 * The address family SHOULD always be checked. In some cases a function will
67 * be used in a situation where the address family is guaranteed (e.g. protocol
68 * definitions), so the test may be avoided. This special case must then be
69 * mentioned in the comment before the function definition.
70 */
71
Willy Tarreaud88e8c02020-08-28 16:06:01 +020072/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes.
73 * It also remains if IPv6 is not enabled/configured.
74 */
75int sock_inet6_v6only_default = 0;
Willy Tarreau0d06df62020-08-28 15:10:11 +020076
Willy Tarreaue5bdc512020-08-28 18:03:10 +020077/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */
78int sock_inet_tcp_maxseg_default = -1;
79int sock_inet6_tcp_maxseg_default = -1;
80
Willy Tarreau0d06df62020-08-28 15:10:11 +020081/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
82 * if they do not match.
83 */
84int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
85{
86 const struct sockaddr_in *a4 = (const struct sockaddr_in *)a;
87 const struct sockaddr_in *b4 = (const struct sockaddr_in *)b;
88
89 if (a->ss_family != b->ss_family)
90 return -1;
91
92 if (a->ss_family != AF_INET)
93 return -1;
94
95 if (a4->sin_port != b4->sin_port)
96 return -1;
97
98 return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr));
99}
100
101/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or
102 * non-zero if they do not match.
103 */
104int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
105{
106 const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a;
107 const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b;
108
109 if (a->ss_family != b->ss_family)
110 return -1;
111
112 if (a->ss_family != AF_INET6)
113 return -1;
114
115 if (a6->sin6_port != b6->sin6_port)
116 return -1;
117
118 return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr));
119}
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200120
Willy Tarreau73bed9f2020-12-04 14:43:36 +0100121/* Sets the port <port> on IPv4 or IPv6 address <addr>. The address family is
122 * determined from the sockaddr_storage's address family. Nothing is done for
123 * other families.
124 */
125void sock_inet_set_port(struct sockaddr_storage *addr, int port)
126{
127 if (addr->ss_family == AF_INET)
128 ((struct sockaddr_in *)addr)->sin_port = htons(port);
129 else if (addr->ss_family == AF_INET6)
130 ((struct sockaddr_in6 *)addr)->sin6_port = htons(port);
131}
132
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200133/*
134 * Retrieves the original destination address for the socket <fd> which must be
135 * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener
136 * (=0) or an initiator (!=0). In the case of a listener, if the original
137 * destination address was translated, the original address is retrieved. It
138 * returns 0 in case of success, -1 in case of error. The socket's source
139 * address is stored in <sa> for <salen> bytes.
140 */
141int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
142{
143 if (dir)
144 return getpeername(fd, sa, &salen);
145 else {
146 int ret = getsockname(fd, sa, &salen);
147
148 if (ret < 0)
149 return ret;
150
151#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
152 /* For TPROXY and Netfilter's NAT, we can retrieve the original
153 * IPv4 address before DNAT/REDIRECT. We must not do that with
154 * other families because v6-mapped IPv4 addresses are still
155 * reported as v4.
156 */
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200157 if (getsockopt(fd, IPPROTO_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200158 return 0;
159#endif
160 return ret;
161 }
162}
Willy Tarreau25140cc2020-08-28 15:40:33 +0200163
Willy Tarreau3fd3bdc2020-09-01 15:12:08 +0200164/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN
Willy Tarreau25140cc2020-08-28 15:40:33 +0200165 * according to the various supported socket options. The socket's address family
166 * must be passed in <family>.
167 */
168int sock_inet_is_foreign(int fd, sa_family_t family)
169{
170 int val __maybe_unused;
171 socklen_t len __maybe_unused;
172
173 switch (family) {
174 case AF_INET:
175#if defined(IP_TRANSPARENT)
176 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200177 if (getsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200178 return 1;
179#endif
180#if defined(IP_FREEBIND)
181 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200182 if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200183 return 1;
184#endif
185#if defined(IP_BINDANY)
186 val = 0; len = sizeof(val);
187 if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val)
188 return 1;
189#endif
190#if defined(SO_BINDANY)
191 val = 0; len = sizeof(val);
192 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
193 return 1;
194#endif
195 break;
196
197 case AF_INET6:
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200198#if defined(IPV6_TRANSPARENT)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200199 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200200 if (getsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200201 return 1;
202#endif
203#if defined(IP_FREEBIND)
204 val = 0; len = sizeof(val);
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200205 if (getsockopt(fd, IPPROTO_IP, IP_FREEBIND, &val, &len) == 0 && val)
Willy Tarreau25140cc2020-08-28 15:40:33 +0200206 return 1;
207#endif
208#if defined(IPV6_BINDANY)
209 val = 0; len = sizeof(val);
210 if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val)
211 return 1;
212#endif
213#if defined(SO_BINDANY)
214 val = 0; len = sizeof(val);
215 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
216 return 1;
217#endif
218 break;
219 }
220 return 0;
221}
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200222
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200223/* Attempt all known socket options to prepare an AF_INET4 socket to be bound
224 * to a foreign address. The socket must already exist and must not be bound.
225 * 1 is returned on success, 0 on failure. The caller must check the address
226 * family before calling this function.
227 */
228int sock_inet4_make_foreign(int fd)
229{
230 return
231#if defined(IP_TRANSPARENT)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200232 setsockopt(fd, IPPROTO_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200233#endif
234#if defined(IP_FREEBIND)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200235 setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200236#endif
237#if defined(IP_BINDANY)
238 setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 ||
239#endif
240#if defined(SO_BINDANY)
241 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
242#endif
243 0;
244}
245
246/* Attempt all known socket options to prepare an AF_INET6 socket to be bound
247 * to a foreign address. The socket must already exist and must not be bound.
248 * 1 is returned on success, 0 on failure. The caller must check the address
249 * family before calling this function.
250 */
251int sock_inet6_make_foreign(int fd)
252{
253 return
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200254#if defined(IPV6_TRANSPARENT)
255 setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200256#endif
257#if defined(IP_FREEBIND)
Willy Tarreau4bfc6632021-03-31 08:45:47 +0200258 setsockopt(fd, IPPROTO_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200259#endif
260#if defined(IPV6_BINDANY)
261 setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 ||
262#endif
263#if defined(SO_BINDANY)
264 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
265#endif
266 0;
267}
268
Willy Tarreau233ad282020-10-15 21:45:15 +0200269/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200270 * context, respectively. Returns and error code made of ERR_* bits on failure
271 * or ERR_NONE on success. On failure, an error message may be passed into
272 * <errmsg>.
273 */
Willy Tarreau233ad282020-10-15 21:45:15 +0200274int sock_inet_bind_receiver(struct receiver *rx, char **errmsg)
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200275{
276 int fd, err, ext;
277 /* copy listener addr because sometimes we need to switch family */
278 struct sockaddr_storage addr_inet = rx->addr;
279
280 /* force to classic sock family, not AF_CUST_* */
Willy Tarreauf1f66092020-09-04 08:15:31 +0200281 addr_inet.ss_family = rx->proto->fam->sock_family;
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200282
283 /* ensure we never return garbage */
284 if (errmsg)
285 *errmsg = 0;
286
287 err = ERR_NONE;
288
289 if (rx->flags & RX_F_BOUND)
290 return ERR_NONE;
291
292 /* if no FD was assigned yet, we'll have to either find a compatible
293 * one or create a new one.
294 */
295 if (rx->fd == -1)
296 rx->fd = sock_find_compatible_fd(rx);
297
298 /* if the receiver now has an fd assigned, then we were offered the fd
299 * by an external process (most likely the parent), and we don't want
300 * to create a new socket. However we still want to set a few flags on
301 * the socket.
302 */
303 fd = rx->fd;
304 ext = (fd >= 0);
305
306 if (!ext) {
Willy Tarreauf1f66092020-09-04 08:15:31 +0200307 fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain,
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200308 rx->proto->sock_type, rx->proto->sock_prot);
309 if (fd == -1) {
310 err |= ERR_RETRYABLE | ERR_ALERT;
Willy Tarreau36722d22020-09-17 08:32:17 +0200311 memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200312 goto bind_return;
313 }
314 }
315
316 if (fd >= global.maxsock) {
317 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
318 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
319 goto bind_close_return;
320 }
321
322 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
323 err |= ERR_FATAL | ERR_ALERT;
324 memprintf(errmsg, "cannot make socket non-blocking");
325 goto bind_close_return;
326 }
327
328 if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
329 /* not fatal but should be reported */
330 memprintf(errmsg, "cannot do so_reuseaddr");
331 err |= ERR_ALERT;
332 }
333
334#ifdef SO_REUSEPORT
335 /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
336 * Linux, it might return an error that we will silently ignore.
337 */
338 if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
339 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
340#endif
341
342 if (!ext && (rx->settings->options & RX_O_FOREIGN)) {
343 switch (addr_inet.ss_family) {
344 case AF_INET:
345 if (!sock_inet4_make_foreign(fd)) {
346 memprintf(errmsg, "cannot make receiving socket transparent");
347 err |= ERR_ALERT;
348 }
349 break;
350 case AF_INET6:
351 if (!sock_inet6_make_foreign(fd)) {
352 memprintf(errmsg, "cannot make receiving socket transparent");
353 err |= ERR_ALERT;
354 }
355 break;
356 }
357 }
358
359#ifdef SO_BINDTODEVICE
360 /* Note: this might fail if not CAP_NET_RAW */
361 if (!ext && rx->settings->interface) {
362 if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
363 rx->settings->interface,
364 strlen(rx->settings->interface) + 1) == -1) {
Willy Tarreau36722d22020-09-17 08:32:17 +0200365 memprintf(errmsg, "cannot bind receiver to device (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200366 err |= ERR_WARN;
367 }
368 }
369#endif
370
371#if defined(IPV6_V6ONLY)
372 if (addr_inet.ss_family == AF_INET6 && !ext) {
373 /* Prepare to match the v6only option against what we really want. Note
374 * that sadly the two options are not exclusive to each other and that
375 * v6only is stronger than v4v6.
376 */
377 if ((rx->settings->options & RX_O_V6ONLY) ||
378 (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
379 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
380 else
381 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
382 }
383#endif
384
Willy Tarreauf1f66092020-09-04 08:15:31 +0200385 if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) {
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200386 err |= ERR_RETRYABLE | ERR_ALERT;
Willy Tarreau36722d22020-09-17 08:32:17 +0200387 memprintf(errmsg, "cannot bind socket (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200388 goto bind_close_return;
389 }
390
391 rx->fd = fd;
392 rx->flags |= RX_F_BOUND;
393
Willy Tarreau233ad282020-10-15 21:45:15 +0200394 fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->settings->bind_thread) & all_threads_mask);
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200395
396 /* for now, all regularly bound TCP listeners are exportable */
397 if (!(rx->flags & RX_F_INHERITED))
398 fdtab[fd].exported = 1;
399
400 bind_return:
401 if (errmsg && *errmsg) {
402 char pn[INET6_ADDRSTRLEN];
403
404 addr_to_str(&addr_inet, pn, sizeof(pn));
405 memprintf(errmsg, "%s [%s:%d]", *errmsg, pn, get_host_port(&addr_inet));
406 }
407 return err;
408
409 bind_close_return:
410 close(fd);
411 goto bind_return;
412}
413
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200414static void sock_inet_prepare()
415{
416 int fd, val;
417 socklen_t len;
418
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200419 fd = socket(AF_INET, SOCK_STREAM, 0);
420 if (fd >= 0) {
421#ifdef TCP_MAXSEG
422 /* retrieve the OS' default mss for TCPv4 */
423 len = sizeof(val);
424 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
425 sock_inet_tcp_maxseg_default = val;
426#endif
427 close(fd);
428 }
429
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200430 fd = socket(AF_INET6, SOCK_STREAM, 0);
431 if (fd >= 0) {
432#if defined(IPV6_V6ONLY)
433 /* retrieve the OS' bindv6only value */
434 len = sizeof(val);
435 if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0)
436 sock_inet6_v6only_default = 1;
437#endif
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200438
439#ifdef TCP_MAXSEG
440 /* retrieve the OS' default mss for TCPv6 */
441 len = sizeof(val);
442 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
443 sock_inet6_tcp_maxseg_default = val;
444#endif
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200445 close(fd);
446 }
447}
448
449INITCALL0(STG_PREPARE, sock_inet_prepare);
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200450
451
452REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
453#if defined(IP_TRANSPARENT)
454 " IP_TRANSPARENT"
455#endif
456#if defined(IPV6_TRANSPARENT)
457 " IPV6_TRANSPARENT"
458#endif
459#if defined(IP_FREEBIND)
460 " IP_FREEBIND"
461#endif
462#if defined(IP_BINDANY)
463 " IP_BINDANY"
464#endif
465#if defined(IPV6_BINDANY)
466 " IPV6_BINDANY"
467#endif
468#if defined(SO_BINDANY)
469 " SO_BINDANY"
470#endif
471 "");