blob: 6675be583326692834a8adbaa4386ba76cd422e8 [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * AF_INET/AF_INET6 socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreauf2cda102020-09-17 14:02:01 +020013#include <errno.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020014#include <fcntl.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020015#include <string.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020016#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020017
18#include <sys/param.h>
19#include <sys/socket.h>
20#include <sys/types.h>
21
22#include <netinet/tcp.h>
23#include <netinet/in.h>
24
25#include <haproxy/api.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020026#include <haproxy/errors.h>
27#include <haproxy/fd.h>
Willy Tarreau37bafdc2020-08-28 17:23:40 +020028#include <haproxy/global.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020029#include <haproxy/namespace.h>
30#include <haproxy/receiver-t.h>
31#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020032#include <haproxy/sock_inet.h>
33#include <haproxy/tools.h>
34
Willy Tarreaub0254cb2020-09-04 08:07:11 +020035struct proto_fam proto_fam_inet4 = {
36 .name = "inet4",
37 .sock_domain = PF_INET,
38 .sock_family = AF_INET,
39 .sock_addrlen = sizeof(struct sockaddr_in),
40 .l3_addrlen = 32/8,
41 .addrcmp = sock_inet4_addrcmp,
42 .bind = sock_inet_bind_receiver,
43 .get_src = sock_get_src,
44 .get_dst = sock_inet_get_dst,
45};
46
47struct proto_fam proto_fam_inet6 = {
48 .name = "inet6",
49 .sock_domain = PF_INET6,
50 .sock_family = AF_INET6,
51 .sock_addrlen = sizeof(struct sockaddr_in6),
52 .l3_addrlen = 128/8,
53 .addrcmp = sock_inet6_addrcmp,
54 .bind = sock_inet_bind_receiver,
55 .get_src = sock_get_src,
56 .get_dst = sock_get_dst,
57};
Willy Tarreau0d06df62020-08-28 15:10:11 +020058
59/* PLEASE NOTE for function below:
60 * - sock_inet4_* is solely for AF_INET (IPv4)
61 * - sock_inet6_* is solely for AF_INET6 (IPv6)
62 * - sock_inet_* is for either
63 *
64 * The address family SHOULD always be checked. In some cases a function will
65 * be used in a situation where the address family is guaranteed (e.g. protocol
66 * definitions), so the test may be avoided. This special case must then be
67 * mentioned in the comment before the function definition.
68 */
69
Willy Tarreaud88e8c02020-08-28 16:06:01 +020070/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes.
71 * It also remains if IPv6 is not enabled/configured.
72 */
73int sock_inet6_v6only_default = 0;
Willy Tarreau0d06df62020-08-28 15:10:11 +020074
Willy Tarreaue5bdc512020-08-28 18:03:10 +020075/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */
76int sock_inet_tcp_maxseg_default = -1;
77int sock_inet6_tcp_maxseg_default = -1;
78
Willy Tarreau0d06df62020-08-28 15:10:11 +020079/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
80 * if they do not match.
81 */
82int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
83{
84 const struct sockaddr_in *a4 = (const struct sockaddr_in *)a;
85 const struct sockaddr_in *b4 = (const struct sockaddr_in *)b;
86
87 if (a->ss_family != b->ss_family)
88 return -1;
89
90 if (a->ss_family != AF_INET)
91 return -1;
92
93 if (a4->sin_port != b4->sin_port)
94 return -1;
95
96 return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr));
97}
98
99/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or
100 * non-zero if they do not match.
101 */
102int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
103{
104 const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a;
105 const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b;
106
107 if (a->ss_family != b->ss_family)
108 return -1;
109
110 if (a->ss_family != AF_INET6)
111 return -1;
112
113 if (a6->sin6_port != b6->sin6_port)
114 return -1;
115
116 return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr));
117}
Willy Tarreauc5a94c92020-08-28 15:19:45 +0200118
119/*
120 * Retrieves the original destination address for the socket <fd> which must be
121 * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener
122 * (=0) or an initiator (!=0). In the case of a listener, if the original
123 * destination address was translated, the original address is retrieved. It
124 * returns 0 in case of success, -1 in case of error. The socket's source
125 * address is stored in <sa> for <salen> bytes.
126 */
127int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
128{
129 if (dir)
130 return getpeername(fd, sa, &salen);
131 else {
132 int ret = getsockname(fd, sa, &salen);
133
134 if (ret < 0)
135 return ret;
136
137#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
138 /* For TPROXY and Netfilter's NAT, we can retrieve the original
139 * IPv4 address before DNAT/REDIRECT. We must not do that with
140 * other families because v6-mapped IPv4 addresses are still
141 * reported as v4.
142 */
143 if (getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
144 return 0;
145#endif
146 return ret;
147 }
148}
Willy Tarreau25140cc2020-08-28 15:40:33 +0200149
Willy Tarreau3fd3bdc2020-09-01 15:12:08 +0200150/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN
Willy Tarreau25140cc2020-08-28 15:40:33 +0200151 * according to the various supported socket options. The socket's address family
152 * must be passed in <family>.
153 */
154int sock_inet_is_foreign(int fd, sa_family_t family)
155{
156 int val __maybe_unused;
157 socklen_t len __maybe_unused;
158
159 switch (family) {
160 case AF_INET:
161#if defined(IP_TRANSPARENT)
162 val = 0; len = sizeof(val);
163 if (getsockopt(fd, SOL_IP, IP_TRANSPARENT, &val, &len) == 0 && val)
164 return 1;
165#endif
166#if defined(IP_FREEBIND)
167 val = 0; len = sizeof(val);
168 if (getsockopt(fd, SOL_IP, IP_FREEBIND, &val, &len) == 0 && val)
169 return 1;
170#endif
171#if defined(IP_BINDANY)
172 val = 0; len = sizeof(val);
173 if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val)
174 return 1;
175#endif
176#if defined(SO_BINDANY)
177 val = 0; len = sizeof(val);
178 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
179 return 1;
180#endif
181 break;
182
183 case AF_INET6:
184#if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
185 val = 0; len = sizeof(val);
186 if (getsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val)
187 return 1;
188#endif
189#if defined(IP_FREEBIND)
190 val = 0; len = sizeof(val);
191 if (getsockopt(fd, SOL_IP, IP_FREEBIND, &val, &len) == 0 && val)
192 return 1;
193#endif
194#if defined(IPV6_BINDANY)
195 val = 0; len = sizeof(val);
196 if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val)
197 return 1;
198#endif
199#if defined(SO_BINDANY)
200 val = 0; len = sizeof(val);
201 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
202 return 1;
203#endif
204 break;
205 }
206 return 0;
207}
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200208
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200209/* Attempt all known socket options to prepare an AF_INET4 socket to be bound
210 * to a foreign address. The socket must already exist and must not be bound.
211 * 1 is returned on success, 0 on failure. The caller must check the address
212 * family before calling this function.
213 */
214int sock_inet4_make_foreign(int fd)
215{
216 return
217#if defined(IP_TRANSPARENT)
218 setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 ||
219#endif
220#if defined(IP_FREEBIND)
221 setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
222#endif
223#if defined(IP_BINDANY)
224 setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 ||
225#endif
226#if defined(SO_BINDANY)
227 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
228#endif
229 0;
230}
231
232/* Attempt all known socket options to prepare an AF_INET6 socket to be bound
233 * to a foreign address. The socket must already exist and must not be bound.
234 * 1 is returned on success, 0 on failure. The caller must check the address
235 * family before calling this function.
236 */
237int sock_inet6_make_foreign(int fd)
238{
239 return
240#if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
241 setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 ||
242#endif
243#if defined(IP_FREEBIND)
244 setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
245#endif
246#if defined(IPV6_BINDANY)
247 setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 ||
248#endif
249#if defined(SO_BINDANY)
250 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
251#endif
252 0;
253}
254
Willy Tarreau233ad282020-10-15 21:45:15 +0200255/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200256 * context, respectively. Returns and error code made of ERR_* bits on failure
257 * or ERR_NONE on success. On failure, an error message may be passed into
258 * <errmsg>.
259 */
Willy Tarreau233ad282020-10-15 21:45:15 +0200260int sock_inet_bind_receiver(struct receiver *rx, char **errmsg)
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200261{
262 int fd, err, ext;
263 /* copy listener addr because sometimes we need to switch family */
264 struct sockaddr_storage addr_inet = rx->addr;
265
266 /* force to classic sock family, not AF_CUST_* */
Willy Tarreauf1f66092020-09-04 08:15:31 +0200267 addr_inet.ss_family = rx->proto->fam->sock_family;
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200268
269 /* ensure we never return garbage */
270 if (errmsg)
271 *errmsg = 0;
272
273 err = ERR_NONE;
274
275 if (rx->flags & RX_F_BOUND)
276 return ERR_NONE;
277
278 /* if no FD was assigned yet, we'll have to either find a compatible
279 * one or create a new one.
280 */
281 if (rx->fd == -1)
282 rx->fd = sock_find_compatible_fd(rx);
283
284 /* if the receiver now has an fd assigned, then we were offered the fd
285 * by an external process (most likely the parent), and we don't want
286 * to create a new socket. However we still want to set a few flags on
287 * the socket.
288 */
289 fd = rx->fd;
290 ext = (fd >= 0);
291
292 if (!ext) {
Willy Tarreauf1f66092020-09-04 08:15:31 +0200293 fd = my_socketat(rx->settings->netns, rx->proto->fam->sock_domain,
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200294 rx->proto->sock_type, rx->proto->sock_prot);
295 if (fd == -1) {
296 err |= ERR_RETRYABLE | ERR_ALERT;
Willy Tarreau36722d22020-09-17 08:32:17 +0200297 memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200298 goto bind_return;
299 }
300 }
301
302 if (fd >= global.maxsock) {
303 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
304 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
305 goto bind_close_return;
306 }
307
308 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
309 err |= ERR_FATAL | ERR_ALERT;
310 memprintf(errmsg, "cannot make socket non-blocking");
311 goto bind_close_return;
312 }
313
314 if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
315 /* not fatal but should be reported */
316 memprintf(errmsg, "cannot do so_reuseaddr");
317 err |= ERR_ALERT;
318 }
319
320#ifdef SO_REUSEPORT
321 /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
322 * Linux, it might return an error that we will silently ignore.
323 */
324 if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
325 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
326#endif
327
328 if (!ext && (rx->settings->options & RX_O_FOREIGN)) {
329 switch (addr_inet.ss_family) {
330 case AF_INET:
331 if (!sock_inet4_make_foreign(fd)) {
332 memprintf(errmsg, "cannot make receiving socket transparent");
333 err |= ERR_ALERT;
334 }
335 break;
336 case AF_INET6:
337 if (!sock_inet6_make_foreign(fd)) {
338 memprintf(errmsg, "cannot make receiving socket transparent");
339 err |= ERR_ALERT;
340 }
341 break;
342 }
343 }
344
345#ifdef SO_BINDTODEVICE
346 /* Note: this might fail if not CAP_NET_RAW */
347 if (!ext && rx->settings->interface) {
348 if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
349 rx->settings->interface,
350 strlen(rx->settings->interface) + 1) == -1) {
Willy Tarreau36722d22020-09-17 08:32:17 +0200351 memprintf(errmsg, "cannot bind receiver to device (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200352 err |= ERR_WARN;
353 }
354 }
355#endif
356
357#if defined(IPV6_V6ONLY)
358 if (addr_inet.ss_family == AF_INET6 && !ext) {
359 /* Prepare to match the v6only option against what we really want. Note
360 * that sadly the two options are not exclusive to each other and that
361 * v6only is stronger than v4v6.
362 */
363 if ((rx->settings->options & RX_O_V6ONLY) ||
364 (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
365 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
366 else
367 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
368 }
369#endif
370
Willy Tarreauf1f66092020-09-04 08:15:31 +0200371 if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->fam->sock_addrlen) == -1) {
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200372 err |= ERR_RETRYABLE | ERR_ALERT;
Willy Tarreau36722d22020-09-17 08:32:17 +0200373 memprintf(errmsg, "cannot bind socket (%s)", strerror(errno));
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200374 goto bind_close_return;
375 }
376
377 rx->fd = fd;
378 rx->flags |= RX_F_BOUND;
379
Willy Tarreau233ad282020-10-15 21:45:15 +0200380 fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->settings->bind_thread) & all_threads_mask);
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200381
382 /* for now, all regularly bound TCP listeners are exportable */
383 if (!(rx->flags & RX_F_INHERITED))
384 fdtab[fd].exported = 1;
385
386 bind_return:
387 if (errmsg && *errmsg) {
388 char pn[INET6_ADDRSTRLEN];
389
390 addr_to_str(&addr_inet, pn, sizeof(pn));
391 memprintf(errmsg, "%s [%s:%d]", *errmsg, pn, get_host_port(&addr_inet));
392 }
393 return err;
394
395 bind_close_return:
396 close(fd);
397 goto bind_return;
398}
399
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200400static void sock_inet_prepare()
401{
402 int fd, val;
403 socklen_t len;
404
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200405 fd = socket(AF_INET, SOCK_STREAM, 0);
406 if (fd >= 0) {
407#ifdef TCP_MAXSEG
408 /* retrieve the OS' default mss for TCPv4 */
409 len = sizeof(val);
410 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
411 sock_inet_tcp_maxseg_default = val;
412#endif
413 close(fd);
414 }
415
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200416 fd = socket(AF_INET6, SOCK_STREAM, 0);
417 if (fd >= 0) {
418#if defined(IPV6_V6ONLY)
419 /* retrieve the OS' bindv6only value */
420 len = sizeof(val);
421 if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0)
422 sock_inet6_v6only_default = 1;
423#endif
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200424
425#ifdef TCP_MAXSEG
426 /* retrieve the OS' default mss for TCPv6 */
427 len = sizeof(val);
428 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
429 sock_inet6_tcp_maxseg_default = val;
430#endif
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200431 close(fd);
432 }
433}
434
435INITCALL0(STG_PREPARE, sock_inet_prepare);
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200436
437
438REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
439#if defined(IP_TRANSPARENT)
440 " IP_TRANSPARENT"
441#endif
442#if defined(IPV6_TRANSPARENT)
443 " IPV6_TRANSPARENT"
444#endif
445#if defined(IP_FREEBIND)
446 " IP_FREEBIND"
447#endif
448#if defined(IP_BINDANY)
449 " IP_BINDANY"
450#endif
451#if defined(IPV6_BINDANY)
452 " IPV6_BINDANY"
453#endif
454#if defined(SO_BINDANY)
455 " SO_BINDANY"
456#endif
457 "");