blob: ef749a53a67057ad4676adf26a41c908cb38ca25 [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * SOCK_UNIX socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
Willy Tarreaueb8cfe62020-09-16 22:15:40 +020014#include <errno.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020015#include <string.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020016#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020017
18#include <sys/param.h>
19#include <sys/socket.h>
20#include <sys/types.h>
21
22#include <sys/socket.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25#include <sys/un.h>
26
27#include <haproxy/api.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020028#include <haproxy/errors.h>
29#include <haproxy/fd.h>
30#include <haproxy/global.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020031#include <haproxy/listener.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020032#include <haproxy/receiver-t.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020033#include <haproxy/namespace.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020034#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020035#include <haproxy/sock_unix.h>
36#include <haproxy/tools.h>
37
38
Willy Tarreaub0254cb2020-09-04 08:07:11 +020039struct proto_fam proto_fam_unix = {
40 .name = "unix",
41 .sock_domain = PF_UNIX,
42 .sock_family = AF_UNIX,
43 .sock_addrlen = sizeof(struct sockaddr_un),
44 .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
45 .addrcmp = sock_unix_addrcmp,
46 .bind = sock_unix_bind_receiver,
47 .get_src = sock_get_src,
48 .get_dst = sock_get_dst,
49};
50
Willy Tarreau0d06df62020-08-28 15:10:11 +020051/* PLEASE NOTE for functions below:
52 *
53 * The address family SHOULD always be checked. In some cases a function will
54 * be used in a situation where the address family is guaranteed (e.g. protocol
55 * definitions), so the test may be avoided. This special case must then be
56 * mentioned in the comment before the function definition.
57 */
58
59
60/* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero
61 * if they do not match. It also supports ABNS socket addresses (those starting
62 * with \0). For regular UNIX sockets however, this does explicitly support
63 * matching names ending exactly with .XXXXX.tmp which are newly bound sockets
64 * about to be replaced; this suffix is then ignored. Note that our UNIX socket
65 * paths are always zero-terminated.
66 */
67int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
68{
69 const struct sockaddr_un *au = (const struct sockaddr_un *)a;
70 const struct sockaddr_un *bu = (const struct sockaddr_un *)b;
71 int idx, dot, idx2;
72
73 if (a->ss_family != b->ss_family)
74 return -1;
75
76 if (a->ss_family != AF_UNIX)
77 return -1;
78
79 if (au->sun_path[0] != bu->sun_path[0])
80 return -1;
81
82 if (au->sun_path[0] == 0)
83 return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path));
84
85 idx = 1; dot = 0;
86 while (au->sun_path[idx] == bu->sun_path[idx]) {
87 if (au->sun_path[idx] == 0)
88 return 0;
89 if (au->sun_path[idx] == '.')
90 dot = idx;
91 idx++;
92 }
93
94 /* Now we have a difference. It's OK if they are within or after a
95 * sequence of digits following a dot, and are followed by ".tmp".
Aurelien DARRAGON2a7903b2023-02-21 17:33:50 +010096 *
97 * make sure to perform the check against tempname if the compared
98 * string is in "final" format (does not end with ".XXXX.tmp").
99 *
100 * Examples:
101 * /tmp/test matches with /tmp/test.1822.tmp
102 * /tmp/test.1822.tmp matches with /tmp/test.XXXX.tmp
Willy Tarreau0d06df62020-08-28 15:10:11 +0200103 */
Aurelien DARRAGON2a7903b2023-02-21 17:33:50 +0100104 if (au->sun_path[idx] == 0 || bu->sun_path[idx] == 0) {
105 if (au->sun_path[idx] == '.' || bu->sun_path[idx] == '.')
106 dot = idx; /* try to match against temp path */
107 else
108 return -1; /* invalid temp path */
109 }
110
Willy Tarreau0d06df62020-08-28 15:10:11 +0200111 if (!dot)
112 return -1;
113
114 /* First, check in path "a" */
115 if (au->sun_path[idx] != 0) {
Willy Tarreau1c34b882020-08-29 06:44:37 +0200116 for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);)
Willy Tarreau0d06df62020-08-28 15:10:11 +0200117 idx2++;
118 if (strcmp(au->sun_path + idx2, ".tmp") != 0)
119 return -1;
120 }
121
122 /* Then check in path "b" */
123 if (bu->sun_path[idx] != 0) {
Willy Tarreau1c34b882020-08-29 06:44:37 +0200124 for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++)
Willy Tarreau0d06df62020-08-28 15:10:11 +0200125 ;
126 if (strcmp(bu->sun_path + idx2, ".tmp") != 0)
127 return -1;
128 }
129
130 /* OK that's a match */
131 return 0;
132}
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200133
Willy Tarreau233ad282020-10-15 21:45:15 +0200134/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
135 * context, respectively, with ->bind_thread as the thread mask. Returns an
136 * error code made of ERR_* bits on failure or ERR_NONE on success. On failure,
137 * an error message may be passed into <errmsg>.
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200138 */
Willy Tarreau233ad282020-10-15 21:45:15 +0200139int sock_unix_bind_receiver(struct receiver *rx, char **errmsg)
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200140{
141 char tempname[MAXPATHLEN];
142 char backname[MAXPATHLEN];
143 struct sockaddr_un addr;
144 const char *path;
145 int maxpathlen;
146 int fd, err, ext, ret;
147
148 /* ensure we never return garbage */
149 if (errmsg)
150 *errmsg = 0;
151
152 err = ERR_NONE;
153
154 if (rx->flags & RX_F_BOUND)
155 return ERR_NONE;
156
Willy Tarreau0e1aaf42023-02-27 16:39:32 +0100157 if (rx->flags & RX_F_MUST_DUP) {
158 /* this is a secondary receiver that is an exact copy of a
159 * reference which must already be bound (or has failed).
160 * We'll try to dup() the other one's FD and take it. We
161 * try hard not to reconfigure the socket since it's shared.
162 */
163 BUG_ON(!rx->shard_info);
164 if (!(rx->shard_info->ref->flags & RX_F_BOUND)) {
165 /* it's assumed that the first one has already reported
166 * the error, let's not spam with another one, and do
167 * not set ERR_ALERT.
168 */
169 err |= ERR_RETRYABLE;
170 goto bind_ret_err;
171 }
172 /* taking the other one's FD will result in it being marked
173 * extern and being dup()ed. Let's mark the receiver as
174 * inherited so that it properly bypasses all second-stage
175 * setup and avoids being passed to new processes.
176 */
177 rx->flags |= RX_F_INHERITED;
178 rx->fd = rx->shard_info->ref->fd;
179 }
180
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200181 /* if no FD was assigned yet, we'll have to either find a compatible
182 * one or create a new one.
183 */
184 if (rx->fd == -1)
185 rx->fd = sock_find_compatible_fd(rx);
186
187 path = ((struct sockaddr_un *)&rx->addr)->sun_path;
188 maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path));
189
190 /* if the listener already has an fd assigned, then we were offered the
191 * fd by an external process (most likely the parent), and we don't want
192 * to create a new socket. However we still want to set a few flags on
193 * the socket.
194 */
195 fd = rx->fd;
196 ext = (fd >= 0);
197 if (ext)
198 goto fd_ready;
199
200 if (path[0]) {
201 ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid);
202 if (ret < 0 || ret >= sizeof(addr.sun_path)) {
203 err |= ERR_FATAL | ERR_ALERT;
204 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
205 goto bind_return;
206 }
207
208 ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid);
209 if (ret < 0 || ret >= maxpathlen) {
210 err |= ERR_FATAL | ERR_ALERT;
211 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
212 goto bind_return;
213 }
214
215 /* 2. clean existing orphaned entries */
216 if (unlink(tempname) < 0 && errno != ENOENT) {
217 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200218 memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200219 goto bind_return;
220 }
221
222 if (unlink(backname) < 0 && errno != ENOENT) {
223 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200224 memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200225 goto bind_return;
226 }
227
228 /* 3. backup existing socket */
229 if (link(path, backname) < 0 && errno != ENOENT) {
230 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200231 memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200232 goto bind_return;
233 }
234
235 /* Note: this test is redundant with the snprintf one above and
236 * will never trigger, it's just added as the only way to shut
237 * gcc's painfully dumb warning about possibly truncated output
238 * during strncpy(). Don't move it above or smart gcc will not
239 * see it!
240 */
241 if (strlen(tempname) >= sizeof(addr.sun_path)) {
242 err |= ERR_FATAL | ERR_ALERT;
243 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
244 goto bind_return;
245 }
246
247 strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1);
248 addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
249 }
250 else {
251 /* first char is zero, it's an abstract socket whose address
252 * is defined by all the bytes past this zero.
253 */
254 memcpy(addr.sun_path, path, sizeof(addr.sun_path));
255 }
256 addr.sun_family = AF_UNIX;
257
258 /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */
Willy Tarreauf1f66092020-09-04 08:15:31 +0200259 fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot);
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200260 if (fd < 0) {
261 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200262 memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200263 goto bind_return;
264 }
265
266 fd_ready:
Willy Tarreau145b17f2023-01-11 10:59:52 +0100267 if (ext && fd < global.maxsock && fdtab[fd].owner) {
268 /* This FD was already bound so this means that it was already
269 * known and registered before parsing, hence it's an inherited
270 * FD. The only reason why it's already known here is that it
271 * has been registered multiple times (multiple listeners on the
272 * same, or a "shards" directive on the line). There cannot be
273 * multiple listeners on one FD but at least we can create a
274 * new one from the original one. We won't reconfigure it,
275 * however, as this was already done for the first one.
276 */
277 fd = dup(fd);
278 if (fd == -1) {
279 err |= ERR_RETRYABLE | ERR_ALERT;
280 memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
281 goto bind_return;
282 }
283 }
284
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200285 if (fd >= global.maxsock) {
286 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
287 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
288 goto bind_close_return;
289 }
290
Willy Tarreau38247432022-04-26 10:24:14 +0200291 if (fd_set_nonblock(fd) == -1) {
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200292 err |= ERR_FATAL | ERR_ALERT;
293 memprintf(errmsg, "cannot make socket non-blocking");
294 goto bind_close_return;
295 }
296
297 if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
298 /* note that bind() creates the socket <tempname> on the file system */
299 if (errno == EADDRINUSE) {
300 /* the old process might still own it, let's retry */
301 err |= ERR_RETRYABLE | ERR_ALERT;
302 memprintf(errmsg, "cannot bind UNIX socket (already in use)");
303 goto bind_close_return;
304 }
305 else {
306 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200307 memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200308 goto bind_close_return;
309 }
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200310 }
311
312 /* <uid> and <gid> different of -1 will be used to change the socket owner.
313 * If <mode> is not 0, it will be used to restrict access to the socket.
314 * While it is known not to be portable on every OS, it's still useful
315 * where it works. We also don't change permissions on abstract sockets.
316 */
317 if (!ext && path[0] &&
318 (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) &&
319 (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) ||
320 (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) {
321 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200322 memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200323 goto err_unlink_temp;
324 }
325
326 /* Point of no return: we are ready, we'll switch the sockets. We don't
327 * fear losing the socket <path> because we have a copy of it in
328 * backname. Abstract sockets are not renamed.
329 */
330 if (!ext && path[0] && rename(tempname, path) < 0) {
331 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200332 memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200333 goto err_rename;
334 }
335
336 /* Cleanup: only unlink if we didn't inherit the fd from the parent */
337 if (!ext && path[0])
338 unlink(backname);
339
340 rx->fd = fd;
341 rx->flags |= RX_F_BOUND;
342
Willy Tarreau3ecb3412023-11-20 10:44:21 +0100343 if (!path[0]) {
344 /* ABNS sockets do not support suspend, and they conflict with
345 * other ones (no reuseport), so they must always be unbound.
346 */
347 rx->flags |= RX_F_NON_SUSPENDABLE;
348 }
349
Willy Tarreau9464bb12022-07-05 05:16:13 +0200350 fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread);
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200351
352 /* for now, all regularly bound TCP listeners are exportable */
353 if (!(rx->flags & RX_F_INHERITED))
Willy Tarreau9063a662021-04-06 18:09:06 +0200354 HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200355
356 return err;
357
358 err_rename:
359 ret = rename(backname, path);
360 if (ret < 0 && errno == ENOENT)
361 unlink(path);
362 err_unlink_temp:
363 if (!ext && path[0])
364 unlink(tempname);
365 close(fd);
366 err_unlink_back:
367 if (!ext && path[0])
368 unlink(backname);
369 bind_return:
370 if (errmsg && *errmsg) {
Aurelien DARRAGONde63efb2023-02-06 19:23:40 +0100371 if (!ext) {
372 char *path_str;
373
374 path_str = sa2str((struct sockaddr_storage *)&rx->addr, 0, 0);
375 memprintf(errmsg, "%s [%s]", *errmsg, ((path_str) ? path_str : ""));
376 ha_free(&path_str);
377 }
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200378 else
379 memprintf(errmsg, "%s [fd %d]", *errmsg, fd);
380 }
Willy Tarreau0e1aaf42023-02-27 16:39:32 +0100381 bind_ret_err:
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200382 return err;
383
384 bind_close_return:
385 close(fd);
386 goto bind_return;
387}