blob: 7c814d05591af0f69270d5d882f9df1e4d4cf04e [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * AF_INET/AF_INET6 socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020013#include <fcntl.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020014#include <string.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020015#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020016
17#include <sys/param.h>
18#include <sys/socket.h>
19#include <sys/types.h>
20
21#include <netinet/tcp.h>
22#include <netinet/in.h>
23
24#include <haproxy/api.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020025#include <haproxy/errors.h>
26#include <haproxy/fd.h>
Willy Tarreau37bafdc2020-08-28 17:23:40 +020027#include <haproxy/global.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020028#include <haproxy/namespace.h>
29#include <haproxy/receiver-t.h>
30#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020031#include <haproxy/sock_inet.h>
32#include <haproxy/tools.h>
33
Willy Tarreaub0254cb2020-09-04 08:07:11 +020034struct proto_fam proto_fam_inet4 = {
35 .name = "inet4",
36 .sock_domain = PF_INET,
37 .sock_family = AF_INET,
38 .sock_addrlen = sizeof(struct sockaddr_in),
39 .l3_addrlen = 32/8,
40 .addrcmp = sock_inet4_addrcmp,
41 .bind = sock_inet_bind_receiver,
42 .get_src = sock_get_src,
43 .get_dst = sock_inet_get_dst,
44};
45
46struct proto_fam proto_fam_inet6 = {
47 .name = "inet6",
48 .sock_domain = PF_INET6,
49 .sock_family = AF_INET6,
50 .sock_addrlen = sizeof(struct sockaddr_in6),
51 .l3_addrlen = 128/8,
52 .addrcmp = sock_inet6_addrcmp,
53 .bind = sock_inet_bind_receiver,
54 .get_src = sock_get_src,
55 .get_dst = sock_get_dst,
56};
Willy Tarreau0d06df62020-08-28 15:10:11 +020057
58/* PLEASE NOTE for function below:
59 * - sock_inet4_* is solely for AF_INET (IPv4)
60 * - sock_inet6_* is solely for AF_INET6 (IPv6)
61 * - sock_inet_* is for either
62 *
63 * The address family SHOULD always be checked. In some cases a function will
64 * be used in a situation where the address family is guaranteed (e.g. protocol
65 * definitions), so the test may be avoided. This special case must then be
66 * mentioned in the comment before the function definition.
67 */
68
Willy Tarreaud88e8c02020-08-28 16:06:01 +020069/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes.
70 * It also remains if IPv6 is not enabled/configured.
71 */
72int sock_inet6_v6only_default = 0;
Willy Tarreau0d06df62020-08-28 15:10:11 +020073
Willy Tarreaue5bdc512020-08-28 18:03:10 +020074/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */
75int sock_inet_tcp_maxseg_default = -1;
76int sock_inet6_tcp_maxseg_default = -1;
77
Willy Tarreau0d06df62020-08-28 15:10:11 +020078/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
79 * if they do not match.
80 */
81int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
82{
83 const struct sockaddr_in *a4 = (const struct sockaddr_in *)a;
84 const struct sockaddr_in *b4 = (const struct sockaddr_in *)b;
85
86 if (a->ss_family != b->ss_family)
87 return -1;
88
89 if (a->ss_family != AF_INET)
90 return -1;
91
92 if (a4->sin_port != b4->sin_port)
93 return -1;
94
95 return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr));
96}
97
98/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or
99 * non-zero if they do not match.
100 */
101int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
102{
103 const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a;
104 const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b;
105
106 if (a->ss_family != b->ss_family)
107 return -1;
108
109 if (a->ss_family != AF_INET6)
110 return -1;
111
112 if (a6->sin6_port != b6->sin6_port)
113 return -1;
114
115 return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr));
116}
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200117
118/*
119 * Retrieves the original destination address for the socket <fd> which must be
120 * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener
121 * (=0) or an initiator (!=0). In the case of a listener, if the original
122 * destination address was translated, the original address is retrieved. It
123 * returns 0 in case of success, -1 in case of error. The socket's source
124 * address is stored in <sa> for <salen> bytes.
125 */
126int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
127{
128 if (dir)
129 return getpeername(fd, sa, &salen);
130 else {
131 int ret = getsockname(fd, sa, &salen);
132
133 if (ret < 0)
134 return ret;
135
136#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
137 /* For TPROXY and Netfilter's NAT, we can retrieve the original
138 * IPv4 address before DNAT/REDIRECT. We must not do that with
139 * other families because v6-mapped IPv4 addresses are still
140 * reported as v4.
141 */
142 if (getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
143 return 0;
144#endif
145 return ret;
146 }
147}
Willy Tarreau25140cc2020-08-28 15:40:33 +0200148
Willy Tarreau3fd3bdc2020-09-01 15:12:08 +0200149/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN
Willy Tarreau25140cc2020-08-28 15:40:33 +0200150 * according to the various supported socket options. The socket's address family
151 * must be passed in <family>.
152 */
153int sock_inet_is_foreign(int fd, sa_family_t family)
154{
155 int val __maybe_unused;
156 socklen_t len __maybe_unused;
157
158 switch (family) {
159 case AF_INET:
160#if defined(IP_TRANSPARENT)
161 val = 0; len = sizeof(val);
162 if (getsockopt(fd, SOL_IP, IP_TRANSPARENT, &val, &len) == 0 && val)
163 return 1;
164#endif
165#if defined(IP_FREEBIND)
166 val = 0; len = sizeof(val);
167 if (getsockopt(fd, SOL_IP, IP_FREEBIND, &val, &len) == 0 && val)
168 return 1;
169#endif
170#if defined(IP_BINDANY)
171 val = 0; len = sizeof(val);
172 if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val)
173 return 1;
174#endif
175#if defined(SO_BINDANY)
176 val = 0; len = sizeof(val);
177 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
178 return 1;
179#endif
180 break;
181
182 case AF_INET6:
183#if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
184 val = 0; len = sizeof(val);
185 if (getsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val)
186 return 1;
187#endif
188#if defined(IP_FREEBIND)
189 val = 0; len = sizeof(val);
190 if (getsockopt(fd, SOL_IP, IP_FREEBIND, &val, &len) == 0 && val)
191 return 1;
192#endif
193#if defined(IPV6_BINDANY)
194 val = 0; len = sizeof(val);
195 if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val)
196 return 1;
197#endif
198#if defined(SO_BINDANY)
199 val = 0; len = sizeof(val);
200 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
201 return 1;
202#endif
203 break;
204 }
205 return 0;
206}
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200207
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200208/* Attempt all known socket options to prepare an AF_INET4 socket to be bound
209 * to a foreign address. The socket must already exist and must not be bound.
210 * 1 is returned on success, 0 on failure. The caller must check the address
211 * family before calling this function.
212 */
213int sock_inet4_make_foreign(int fd)
214{
215 return
216#if defined(IP_TRANSPARENT)
217 setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 ||
218#endif
219#if defined(IP_FREEBIND)
220 setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
221#endif
222#if defined(IP_BINDANY)
223 setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 ||
224#endif
225#if defined(SO_BINDANY)
226 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
227#endif
228 0;
229}
230
231/* Attempt all known socket options to prepare an AF_INET6 socket to be bound
232 * to a foreign address. The socket must already exist and must not be bound.
233 * 1 is returned on success, 0 on failure. The caller must check the address
234 * family before calling this function.
235 */
236int sock_inet6_make_foreign(int fd)
237{
238 return
239#if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
240 setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 ||
241#endif
242#if defined(IP_FREEBIND)
243 setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
244#endif
245#if defined(IPV6_BINDANY)
246 setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 ||
247#endif
248#if defined(SO_BINDANY)
249 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
250#endif
251 0;
252}
253
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200254/* Binds receiver <rx>, and assigns <handler> and rx->owner as the callback and
255 * context, respectively. Returns and error code made of ERR_* bits on failure
256 * or ERR_NONE on success. On failure, an error message may be passed into
257 * <errmsg>.
258 */
259int sock_inet_bind_receiver(struct receiver *rx, void (*handler)(int fd), char **errmsg)
260{
261 int fd, err, ext;
262 /* copy listener addr because sometimes we need to switch family */
263 struct sockaddr_storage addr_inet = rx->addr;
264
265 /* force to classic sock family, not AF_CUST_* */
Willy Tarreauf1f66092020-09-04 08:15:31 +0200266 addr_inet.ss_family = rx->proto->fam->sock_family;
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200267
268 /* ensure we never return garbage */
269 if (errmsg)
270 *errmsg = 0;
271
272 err = ERR_NONE;
273
274 if (rx->flags & RX_F_BOUND)
275 return ERR_NONE;
276
277 /* if no FD was assigned yet, we'll have to either find a compatible
278 * one or create a new one.
279 */
280 if (rx->fd == -1)
281 rx->fd = sock_find_compatible_fd(rx);
282
283 /* if the receiver now has an fd assigned, then we were offered the fd
284 * by an external process (most likely the parent), and we don't want
285 * to create a new socket. However we still want to set a few flags on
286 * the socket.
287 */
288 fd = rx->fd;
289 ext = (fd >= 0);
290
291 if (!ext) {
Willy Tarreauf1f66092020-09-04 08:15:31 +0200292 fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain,
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200293 rx->proto->sock_type, rx->proto->sock_prot);
294 if (fd == -1) {
295 err |= ERR_RETRYABLE | ERR_ALERT;
296 memprintf(errmsg, "cannot create receiving socket");
297 goto bind_return;
298 }
299 }
300
301 if (fd >= global.maxsock) {
302 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
303 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
304 goto bind_close_return;
305 }
306
307 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
308 err |= ERR_FATAL | ERR_ALERT;
309 memprintf(errmsg, "cannot make socket non-blocking");
310 goto bind_close_return;
311 }
312
313 if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
314 /* not fatal but should be reported */
315 memprintf(errmsg, "cannot do so_reuseaddr");
316 err |= ERR_ALERT;
317 }
318
319#ifdef SO_REUSEPORT
320 /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
321 * Linux, it might return an error that we will silently ignore.
322 */
323 if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
324 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
325#endif
326
327 if (!ext && (rx->settings->options & RX_O_FOREIGN)) {
328 switch (addr_inet.ss_family) {
329 case AF_INET:
330 if (!sock_inet4_make_foreign(fd)) {
331 memprintf(errmsg, "cannot make receiving socket transparent");
332 err |= ERR_ALERT;
333 }
334 break;
335 case AF_INET6:
336 if (!sock_inet6_make_foreign(fd)) {
337 memprintf(errmsg, "cannot make receiving socket transparent");
338 err |= ERR_ALERT;
339 }
340 break;
341 }
342 }
343
344#ifdef SO_BINDTODEVICE
345 /* Note: this might fail if not CAP_NET_RAW */
346 if (!ext && rx->settings->interface) {
347 if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
348 rx->settings->interface,
349 strlen(rx->settings->interface) + 1) == -1) {
350 memprintf(errmsg, "cannot bind receiver to device");
351 err |= ERR_WARN;
352 }
353 }
354#endif
355
356#if defined(IPV6_V6ONLY)
357 if (addr_inet.ss_family == AF_INET6 && !ext) {
358 /* Prepare to match the v6only option against what we really want. Note
359 * that sadly the two options are not exclusive to each other and that
360 * v6only is stronger than v4v6.
361 */
362 if ((rx->settings->options & RX_O_V6ONLY) ||
363 (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
364 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
365 else
366 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
367 }
368#endif
369
Willy Tarreauf1f66092020-09-04 08:15:31 +0200370 if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) {
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200371 err |= ERR_RETRYABLE | ERR_ALERT;
372 memprintf(errmsg, "cannot bind socket");
373 goto bind_close_return;
374 }
375
376 rx->fd = fd;
377 rx->flags |= RX_F_BOUND;
378
379 fd_insert(fd, rx->owner, handler, thread_mask(rx->settings->bind_thread) & all_threads_mask);
380
381 /* for now, all regularly bound TCP listeners are exportable */
382 if (!(rx->flags & RX_F_INHERITED))
383 fdtab[fd].exported = 1;
384
385 bind_return:
386 if (errmsg && *errmsg) {
387 char pn[INET6_ADDRSTRLEN];
388
389 addr_to_str(&addr_inet, pn, sizeof(pn));
390 memprintf(errmsg, "%s [%s:%d]", *errmsg, pn, get_host_port(&addr_inet));
391 }
392 return err;
393
394 bind_close_return:
395 close(fd);
396 goto bind_return;
397}
398
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200399static void sock_inet_prepare()
400{
401 int fd, val;
402 socklen_t len;
403
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200404 fd = socket(AF_INET, SOCK_STREAM, 0);
405 if (fd >= 0) {
406#ifdef TCP_MAXSEG
407 /* retrieve the OS' default mss for TCPv4 */
408 len = sizeof(val);
409 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
410 sock_inet_tcp_maxseg_default = val;
411#endif
412 close(fd);
413 }
414
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200415 fd = socket(AF_INET6, SOCK_STREAM, 0);
416 if (fd >= 0) {
417#if defined(IPV6_V6ONLY)
418 /* retrieve the OS' bindv6only value */
419 len = sizeof(val);
420 if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0)
421 sock_inet6_v6only_default = 1;
422#endif
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200423
424#ifdef TCP_MAXSEG
425 /* retrieve the OS' default mss for TCPv6 */
426 len = sizeof(val);
427 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
428 sock_inet6_tcp_maxseg_default = val;
429#endif
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200430 close(fd);
431 }
432}
433
434INITCALL0(STG_PREPARE, sock_inet_prepare);
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200435
436
437REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
438#if defined(IP_TRANSPARENT)
439 " IP_TRANSPARENT"
440#endif
441#if defined(IPV6_TRANSPARENT)
442 " IPV6_TRANSPARENT"
443#endif
444#if defined(IP_FREEBIND)
445 " IP_FREEBIND"
446#endif
447#if defined(IP_BINDANY)
448 " IP_BINDANY"
449#endif
450#if defined(IPV6_BINDANY)
451 " IPV6_BINDANY"
452#endif
453#if defined(SO_BINDANY)
454 " SO_BINDANY"
455#endif
456 "");