blob: 3bb4f34bfa0b9586616bff1307f45ef1dad59a36 [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * AF_INET/AF_INET6 socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020013#include <fcntl.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020014#include <string.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020015#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020016
17#include <sys/param.h>
18#include <sys/socket.h>
19#include <sys/types.h>
20
21#include <netinet/tcp.h>
22#include <netinet/in.h>
23
24#include <haproxy/api.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020025#include <haproxy/errors.h>
26#include <haproxy/fd.h>
Willy Tarreau37bafdc2020-08-28 17:23:40 +020027#include <haproxy/global.h>
Willy Tarreaud69ce1f2020-09-01 14:18:04 +020028#include <haproxy/namespace.h>
29#include <haproxy/receiver-t.h>
30#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020031#include <haproxy/sock_inet.h>
32#include <haproxy/tools.h>
33
34
35/* PLEASE NOTE for function below:
36 * - sock_inet4_* is solely for AF_INET (IPv4)
37 * - sock_inet6_* is solely for AF_INET6 (IPv6)
38 * - sock_inet_* is for either
39 *
40 * The address family SHOULD always be checked. In some cases a function will
41 * be used in a situation where the address family is guaranteed (e.g. protocol
42 * definitions), so the test may be avoided. This special case must then be
43 * mentioned in the comment before the function definition.
44 */
45
Willy Tarreaud88e8c02020-08-28 16:06:01 +020046/* determine if the operating system uses IPV6_V6ONLY by default. 0=no, 1=yes.
47 * It also remains if IPv6 is not enabled/configured.
48 */
49int sock_inet6_v6only_default = 0;
Willy Tarreau0d06df62020-08-28 15:10:11 +020050
Willy Tarreaue5bdc512020-08-28 18:03:10 +020051/* Default TCPv4/TCPv6 MSS settings. -1=unknown. */
52int sock_inet_tcp_maxseg_default = -1;
53int sock_inet6_tcp_maxseg_default = -1;
54
Willy Tarreau0d06df62020-08-28 15:10:11 +020055/* Compares two AF_INET sockaddr addresses. Returns 0 if they match or non-zero
56 * if they do not match.
57 */
58int sock_inet4_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
59{
60 const struct sockaddr_in *a4 = (const struct sockaddr_in *)a;
61 const struct sockaddr_in *b4 = (const struct sockaddr_in *)b;
62
63 if (a->ss_family != b->ss_family)
64 return -1;
65
66 if (a->ss_family != AF_INET)
67 return -1;
68
69 if (a4->sin_port != b4->sin_port)
70 return -1;
71
72 return memcmp(&a4->sin_addr, &b4->sin_addr, sizeof(a4->sin_addr));
73}
74
75/* Compares two AF_INET6 sockaddr addresses. Returns 0 if they match or
76 * non-zero if they do not match.
77 */
78int sock_inet6_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
79{
80 const struct sockaddr_in6 *a6 = (const struct sockaddr_in6 *)a;
81 const struct sockaddr_in6 *b6 = (const struct sockaddr_in6 *)b;
82
83 if (a->ss_family != b->ss_family)
84 return -1;
85
86 if (a->ss_family != AF_INET6)
87 return -1;
88
89 if (a6->sin6_port != b6->sin6_port)
90 return -1;
91
92 return memcmp(&a6->sin6_addr, &b6->sin6_addr, sizeof(a6->sin6_addr));
93}
Willy Tarreauc5a94c92020-08-28 15:19:45 +020094
95/*
96 * Retrieves the original destination address for the socket <fd> which must be
97 * of family AF_INET (not AF_INET6), with <dir> indicating if we're a listener
98 * (=0) or an initiator (!=0). In the case of a listener, if the original
99 * destination address was translated, the original address is retrieved. It
100 * returns 0 in case of success, -1 in case of error. The socket's source
101 * address is stored in <sa> for <salen> bytes.
102 */
103int sock_inet_get_dst(int fd, struct sockaddr *sa, socklen_t salen, int dir)
104{
105 if (dir)
106 return getpeername(fd, sa, &salen);
107 else {
108 int ret = getsockname(fd, sa, &salen);
109
110 if (ret < 0)
111 return ret;
112
113#if defined(USE_TPROXY) && defined(SO_ORIGINAL_DST)
114 /* For TPROXY and Netfilter's NAT, we can retrieve the original
115 * IPv4 address before DNAT/REDIRECT. We must not do that with
116 * other families because v6-mapped IPv4 addresses are still
117 * reported as v4.
118 */
119 if (getsockopt(fd, SOL_IP, SO_ORIGINAL_DST, sa, &salen) == 0)
120 return 0;
121#endif
122 return ret;
123 }
124}
Willy Tarreau25140cc2020-08-28 15:40:33 +0200125
Willy Tarreau3fd3bdc2020-09-01 15:12:08 +0200126/* Returns true if the passed FD corresponds to a socket bound with RX_O_FOREIGN
Willy Tarreau25140cc2020-08-28 15:40:33 +0200127 * according to the various supported socket options. The socket's address family
128 * must be passed in <family>.
129 */
130int sock_inet_is_foreign(int fd, sa_family_t family)
131{
132 int val __maybe_unused;
133 socklen_t len __maybe_unused;
134
135 switch (family) {
136 case AF_INET:
137#if defined(IP_TRANSPARENT)
138 val = 0; len = sizeof(val);
139 if (getsockopt(fd, SOL_IP, IP_TRANSPARENT, &val, &len) == 0 && val)
140 return 1;
141#endif
142#if defined(IP_FREEBIND)
143 val = 0; len = sizeof(val);
144 if (getsockopt(fd, SOL_IP, IP_FREEBIND, &val, &len) == 0 && val)
145 return 1;
146#endif
147#if defined(IP_BINDANY)
148 val = 0; len = sizeof(val);
149 if (getsockopt(fd, IPPROTO_IP, IP_BINDANY, &val, &len) == 0 && val)
150 return 1;
151#endif
152#if defined(SO_BINDANY)
153 val = 0; len = sizeof(val);
154 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
155 return 1;
156#endif
157 break;
158
159 case AF_INET6:
160#if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
161 val = 0; len = sizeof(val);
162 if (getsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &val, &len) == 0 && val)
163 return 1;
164#endif
165#if defined(IP_FREEBIND)
166 val = 0; len = sizeof(val);
167 if (getsockopt(fd, SOL_IP, IP_FREEBIND, &val, &len) == 0 && val)
168 return 1;
169#endif
170#if defined(IPV6_BINDANY)
171 val = 0; len = sizeof(val);
172 if (getsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &val, &len) == 0 && val)
173 return 1;
174#endif
175#if defined(SO_BINDANY)
176 val = 0; len = sizeof(val);
177 if (getsockopt(fd, SOL_SOCKET, SO_BINDANY, &val, &len) == 0 && val)
178 return 1;
179#endif
180 break;
181 }
182 return 0;
183}
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200184
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200185/* Attempt all known socket options to prepare an AF_INET4 socket to be bound
186 * to a foreign address. The socket must already exist and must not be bound.
187 * 1 is returned on success, 0 on failure. The caller must check the address
188 * family before calling this function.
189 */
190int sock_inet4_make_foreign(int fd)
191{
192 return
193#if defined(IP_TRANSPARENT)
194 setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == 0 ||
195#endif
196#if defined(IP_FREEBIND)
197 setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
198#endif
199#if defined(IP_BINDANY)
200 setsockopt(fd, IPPROTO_IP, IP_BINDANY, &one, sizeof(one)) == 0 ||
201#endif
202#if defined(SO_BINDANY)
203 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
204#endif
205 0;
206}
207
208/* Attempt all known socket options to prepare an AF_INET6 socket to be bound
209 * to a foreign address. The socket must already exist and must not be bound.
210 * 1 is returned on success, 0 on failure. The caller must check the address
211 * family before calling this function.
212 */
213int sock_inet6_make_foreign(int fd)
214{
215 return
216#if defined(IPV6_TRANSPARENT) && defined(SOL_IPV6)
217 setsockopt(fd, SOL_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)) == 0 ||
218#endif
219#if defined(IP_FREEBIND)
220 setsockopt(fd, SOL_IP, IP_FREEBIND, &one, sizeof(one)) == 0 ||
221#endif
222#if defined(IPV6_BINDANY)
223 setsockopt(fd, IPPROTO_IPV6, IPV6_BINDANY, &one, sizeof(one)) == 0 ||
224#endif
225#if defined(SO_BINDANY)
226 setsockopt(fd, SOL_SOCKET, SO_BINDANY, &one, sizeof(one)) == 0 ||
227#endif
228 0;
229}
230
Willy Tarreaud69ce1f2020-09-01 14:18:04 +0200231/* Binds receiver <rx>, and assigns <handler> and rx->owner as the callback and
232 * context, respectively. Returns and error code made of ERR_* bits on failure
233 * or ERR_NONE on success. On failure, an error message may be passed into
234 * <errmsg>.
235 */
236int sock_inet_bind_receiver(struct receiver *rx, void (*handler)(int fd), char **errmsg)
237{
238 int fd, err, ext;
239 /* copy listener addr because sometimes we need to switch family */
240 struct sockaddr_storage addr_inet = rx->addr;
241
242 /* force to classic sock family, not AF_CUST_* */
243 addr_inet.ss_family = rx->proto->sock_family;
244
245 /* ensure we never return garbage */
246 if (errmsg)
247 *errmsg = 0;
248
249 err = ERR_NONE;
250
251 if (rx->flags & RX_F_BOUND)
252 return ERR_NONE;
253
254 /* if no FD was assigned yet, we'll have to either find a compatible
255 * one or create a new one.
256 */
257 if (rx->fd == -1)
258 rx->fd = sock_find_compatible_fd(rx);
259
260 /* if the receiver now has an fd assigned, then we were offered the fd
261 * by an external process (most likely the parent), and we don't want
262 * to create a new socket. However we still want to set a few flags on
263 * the socket.
264 */
265 fd = rx->fd;
266 ext = (fd >= 0);
267
268 if (!ext) {
269 fd = my_socketat(rx->settings->netns, rx->proto->sock_family,
270 rx->proto->sock_type, rx->proto->sock_prot);
271 if (fd == -1) {
272 err |= ERR_RETRYABLE | ERR_ALERT;
273 memprintf(errmsg, "cannot create receiving socket");
274 goto bind_return;
275 }
276 }
277
278 if (fd >= global.maxsock) {
279 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
280 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
281 goto bind_close_return;
282 }
283
284 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
285 err |= ERR_FATAL | ERR_ALERT;
286 memprintf(errmsg, "cannot make socket non-blocking");
287 goto bind_close_return;
288 }
289
290 if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
291 /* not fatal but should be reported */
292 memprintf(errmsg, "cannot do so_reuseaddr");
293 err |= ERR_ALERT;
294 }
295
296#ifdef SO_REUSEPORT
297 /* OpenBSD and Linux 3.9 support this. As it's present in old libc versions of
298 * Linux, it might return an error that we will silently ignore.
299 */
300 if (!ext && (global.tune.options & GTUNE_USE_REUSEPORT))
301 setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
302#endif
303
304 if (!ext && (rx->settings->options & RX_O_FOREIGN)) {
305 switch (addr_inet.ss_family) {
306 case AF_INET:
307 if (!sock_inet4_make_foreign(fd)) {
308 memprintf(errmsg, "cannot make receiving socket transparent");
309 err |= ERR_ALERT;
310 }
311 break;
312 case AF_INET6:
313 if (!sock_inet6_make_foreign(fd)) {
314 memprintf(errmsg, "cannot make receiving socket transparent");
315 err |= ERR_ALERT;
316 }
317 break;
318 }
319 }
320
321#ifdef SO_BINDTODEVICE
322 /* Note: this might fail if not CAP_NET_RAW */
323 if (!ext && rx->settings->interface) {
324 if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
325 rx->settings->interface,
326 strlen(rx->settings->interface) + 1) == -1) {
327 memprintf(errmsg, "cannot bind receiver to device");
328 err |= ERR_WARN;
329 }
330 }
331#endif
332
333#if defined(IPV6_V6ONLY)
334 if (addr_inet.ss_family == AF_INET6 && !ext) {
335 /* Prepare to match the v6only option against what we really want. Note
336 * that sadly the two options are not exclusive to each other and that
337 * v6only is stronger than v4v6.
338 */
339 if ((rx->settings->options & RX_O_V6ONLY) ||
340 (sock_inet6_v6only_default && !(rx->settings->options & RX_O_V4V6)))
341 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &one, sizeof(one));
342 else
343 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
344 }
345#endif
346
347 if (!ext && bind(fd, (struct sockaddr *)&addr_inet, rx->proto->sock_addrlen) == -1) {
348 err |= ERR_RETRYABLE | ERR_ALERT;
349 memprintf(errmsg, "cannot bind socket");
350 goto bind_close_return;
351 }
352
353 rx->fd = fd;
354 rx->flags |= RX_F_BOUND;
355
356 fd_insert(fd, rx->owner, handler, thread_mask(rx->settings->bind_thread) & all_threads_mask);
357
358 /* for now, all regularly bound TCP listeners are exportable */
359 if (!(rx->flags & RX_F_INHERITED))
360 fdtab[fd].exported = 1;
361
362 bind_return:
363 if (errmsg && *errmsg) {
364 char pn[INET6_ADDRSTRLEN];
365
366 addr_to_str(&addr_inet, pn, sizeof(pn));
367 memprintf(errmsg, "%s [%s:%d]", *errmsg, pn, get_host_port(&addr_inet));
368 }
369 return err;
370
371 bind_close_return:
372 close(fd);
373 goto bind_return;
374}
375
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200376static void sock_inet_prepare()
377{
378 int fd, val;
379 socklen_t len;
380
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200381 fd = socket(AF_INET, SOCK_STREAM, 0);
382 if (fd >= 0) {
383#ifdef TCP_MAXSEG
384 /* retrieve the OS' default mss for TCPv4 */
385 len = sizeof(val);
386 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
387 sock_inet_tcp_maxseg_default = val;
388#endif
389 close(fd);
390 }
391
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200392 fd = socket(AF_INET6, SOCK_STREAM, 0);
393 if (fd >= 0) {
394#if defined(IPV6_V6ONLY)
395 /* retrieve the OS' bindv6only value */
396 len = sizeof(val);
397 if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) == 0 && val > 0)
398 sock_inet6_v6only_default = 1;
399#endif
Willy Tarreaue5bdc512020-08-28 18:03:10 +0200400
401#ifdef TCP_MAXSEG
402 /* retrieve the OS' default mss for TCPv6 */
403 len = sizeof(val);
404 if (getsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &val, &len) == 0)
405 sock_inet6_tcp_maxseg_default = val;
406#endif
Willy Tarreaud88e8c02020-08-28 16:06:01 +0200407 close(fd);
408 }
409}
410
411INITCALL0(STG_PREPARE, sock_inet_prepare);
Willy Tarreau37bafdc2020-08-28 17:23:40 +0200412
413
414REGISTER_BUILD_OPTS("Built with transparent proxy support using:"
415#if defined(IP_TRANSPARENT)
416 " IP_TRANSPARENT"
417#endif
418#if defined(IPV6_TRANSPARENT)
419 " IPV6_TRANSPARENT"
420#endif
421#if defined(IP_FREEBIND)
422 " IP_FREEBIND"
423#endif
424#if defined(IP_BINDANY)
425 " IP_BINDANY"
426#endif
427#if defined(IPV6_BINDANY)
428 " IPV6_BINDANY"
429#endif
430#if defined(SO_BINDANY)
431 " SO_BINDANY"
432#endif
433 "");