blob: cd6d4f977dce13fa95bb962042a2538d8ee2851c [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * SOCK_UNIX socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
13#include <ctype.h>
Willy Tarreaueb8cfe62020-09-16 22:15:40 +020014#include <errno.h>
15#include <fcntl.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020016#include <string.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020017#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020018
19#include <sys/param.h>
20#include <sys/socket.h>
21#include <sys/types.h>
22
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
26#include <sys/un.h>
27
28#include <haproxy/api.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020029#include <haproxy/errors.h>
30#include <haproxy/fd.h>
31#include <haproxy/global.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020032#include <haproxy/listener.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020033#include <haproxy/receiver-t.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020034#include <haproxy/namespace.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020035#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020036#include <haproxy/sock_unix.h>
37#include <haproxy/tools.h>
38
39
Willy Tarreaub0254cb2020-09-04 08:07:11 +020040struct proto_fam proto_fam_unix = {
41 .name = "unix",
42 .sock_domain = PF_UNIX,
43 .sock_family = AF_UNIX,
44 .sock_addrlen = sizeof(struct sockaddr_un),
45 .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
46 .addrcmp = sock_unix_addrcmp,
47 .bind = sock_unix_bind_receiver,
48 .get_src = sock_get_src,
49 .get_dst = sock_get_dst,
50};
51
Willy Tarreau0d06df62020-08-28 15:10:11 +020052/* PLEASE NOTE for functions below:
53 *
54 * The address family SHOULD always be checked. In some cases a function will
55 * be used in a situation where the address family is guaranteed (e.g. protocol
56 * definitions), so the test may be avoided. This special case must then be
57 * mentioned in the comment before the function definition.
58 */
59
60
61/* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero
62 * if they do not match. It also supports ABNS socket addresses (those starting
63 * with \0). For regular UNIX sockets however, this does explicitly support
64 * matching names ending exactly with .XXXXX.tmp which are newly bound sockets
65 * about to be replaced; this suffix is then ignored. Note that our UNIX socket
66 * paths are always zero-terminated.
67 */
68int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
69{
70 const struct sockaddr_un *au = (const struct sockaddr_un *)a;
71 const struct sockaddr_un *bu = (const struct sockaddr_un *)b;
72 int idx, dot, idx2;
73
74 if (a->ss_family != b->ss_family)
75 return -1;
76
77 if (a->ss_family != AF_UNIX)
78 return -1;
79
80 if (au->sun_path[0] != bu->sun_path[0])
81 return -1;
82
83 if (au->sun_path[0] == 0)
84 return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path));
85
86 idx = 1; dot = 0;
87 while (au->sun_path[idx] == bu->sun_path[idx]) {
88 if (au->sun_path[idx] == 0)
89 return 0;
90 if (au->sun_path[idx] == '.')
91 dot = idx;
92 idx++;
93 }
94
95 /* Now we have a difference. It's OK if they are within or after a
96 * sequence of digits following a dot, and are followed by ".tmp".
Aurelien DARRAGON7aaf88c2023-02-21 17:33:50 +010097 *
98 * make sure to perform the check against tempname if the compared
99 * string is in "final" format (does not end with ".XXXX.tmp").
100 *
101 * Examples:
102 * /tmp/test matches with /tmp/test.1822.tmp
103 * /tmp/test.1822.tmp matches with /tmp/test.XXXX.tmp
Willy Tarreau0d06df62020-08-28 15:10:11 +0200104 */
Aurelien DARRAGON7aaf88c2023-02-21 17:33:50 +0100105 if (au->sun_path[idx] == 0 || bu->sun_path[idx] == 0) {
106 if (au->sun_path[idx] == '.' || bu->sun_path[idx] == '.')
107 dot = idx; /* try to match against temp path */
108 else
109 return -1; /* invalid temp path */
110 }
111
Willy Tarreau0d06df62020-08-28 15:10:11 +0200112 if (!dot)
113 return -1;
114
115 /* First, check in path "a" */
116 if (au->sun_path[idx] != 0) {
Willy Tarreau1c34b882020-08-29 06:44:37 +0200117 for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);)
Willy Tarreau0d06df62020-08-28 15:10:11 +0200118 idx2++;
119 if (strcmp(au->sun_path + idx2, ".tmp") != 0)
120 return -1;
121 }
122
123 /* Then check in path "b" */
124 if (bu->sun_path[idx] != 0) {
Willy Tarreau1c34b882020-08-29 06:44:37 +0200125 for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++)
Willy Tarreau0d06df62020-08-28 15:10:11 +0200126 ;
127 if (strcmp(bu->sun_path + idx2, ".tmp") != 0)
128 return -1;
129 }
130
131 /* OK that's a match */
132 return 0;
133}
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200134
Willy Tarreau233ad282020-10-15 21:45:15 +0200135/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
136 * context, respectively, with ->bind_thread as the thread mask. Returns an
137 * error code made of ERR_* bits on failure or ERR_NONE on success. On failure,
138 * an error message may be passed into <errmsg>.
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200139 */
Willy Tarreau233ad282020-10-15 21:45:15 +0200140int sock_unix_bind_receiver(struct receiver *rx, char **errmsg)
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200141{
142 char tempname[MAXPATHLEN];
143 char backname[MAXPATHLEN];
144 struct sockaddr_un addr;
145 const char *path;
146 int maxpathlen;
147 int fd, err, ext, ret;
148
149 /* ensure we never return garbage */
150 if (errmsg)
151 *errmsg = 0;
152
153 err = ERR_NONE;
154
155 if (rx->flags & RX_F_BOUND)
156 return ERR_NONE;
157
158 /* if no FD was assigned yet, we'll have to either find a compatible
159 * one or create a new one.
160 */
161 if (rx->fd == -1)
162 rx->fd = sock_find_compatible_fd(rx);
163
164 path = ((struct sockaddr_un *)&rx->addr)->sun_path;
165 maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path));
166
167 /* if the listener already has an fd assigned, then we were offered the
168 * fd by an external process (most likely the parent), and we don't want
169 * to create a new socket. However we still want to set a few flags on
170 * the socket.
171 */
172 fd = rx->fd;
173 ext = (fd >= 0);
174 if (ext)
175 goto fd_ready;
176
177 if (path[0]) {
178 ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid);
179 if (ret < 0 || ret >= sizeof(addr.sun_path)) {
180 err |= ERR_FATAL | ERR_ALERT;
181 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
182 goto bind_return;
183 }
184
185 ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid);
186 if (ret < 0 || ret >= maxpathlen) {
187 err |= ERR_FATAL | ERR_ALERT;
188 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
189 goto bind_return;
190 }
191
192 /* 2. clean existing orphaned entries */
193 if (unlink(tempname) < 0 && errno != ENOENT) {
194 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200195 memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200196 goto bind_return;
197 }
198
199 if (unlink(backname) < 0 && errno != ENOENT) {
200 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200201 memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200202 goto bind_return;
203 }
204
205 /* 3. backup existing socket */
206 if (link(path, backname) < 0 && errno != ENOENT) {
207 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200208 memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200209 goto bind_return;
210 }
211
212 /* Note: this test is redundant with the snprintf one above and
213 * will never trigger, it's just added as the only way to shut
214 * gcc's painfully dumb warning about possibly truncated output
215 * during strncpy(). Don't move it above or smart gcc will not
216 * see it!
217 */
218 if (strlen(tempname) >= sizeof(addr.sun_path)) {
219 err |= ERR_FATAL | ERR_ALERT;
220 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
221 goto bind_return;
222 }
223
224 strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1);
225 addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
226 }
227 else {
228 /* first char is zero, it's an abstract socket whose address
229 * is defined by all the bytes past this zero.
230 */
231 memcpy(addr.sun_path, path, sizeof(addr.sun_path));
232 }
233 addr.sun_family = AF_UNIX;
234
235 /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */
Willy Tarreauf1f66092020-09-04 08:15:31 +0200236 fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot);
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200237 if (fd < 0) {
238 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200239 memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200240 goto bind_return;
241 }
242
243 fd_ready:
Willy Tarreau6789f192023-01-11 10:59:52 +0100244 if (ext && fd < global.maxsock && fdtab[fd].owner) {
245 /* This FD was already bound so this means that it was already
246 * known and registered before parsing, hence it's an inherited
247 * FD. The only reason why it's already known here is that it
248 * has been registered multiple times (multiple listeners on the
249 * same, or a "shards" directive on the line). There cannot be
250 * multiple listeners on one FD but at least we can create a
251 * new one from the original one. We won't reconfigure it,
252 * however, as this was already done for the first one.
253 */
254 fd = dup(fd);
255 if (fd == -1) {
256 err |= ERR_RETRYABLE | ERR_ALERT;
257 memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
258 goto bind_return;
259 }
260 }
261
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200262 if (fd >= global.maxsock) {
263 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
264 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
265 goto bind_close_return;
266 }
267
268 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
269 err |= ERR_FATAL | ERR_ALERT;
270 memprintf(errmsg, "cannot make socket non-blocking");
271 goto bind_close_return;
272 }
273
274 if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
275 /* note that bind() creates the socket <tempname> on the file system */
276 if (errno == EADDRINUSE) {
277 /* the old process might still own it, let's retry */
278 err |= ERR_RETRYABLE | ERR_ALERT;
279 memprintf(errmsg, "cannot bind UNIX socket (already in use)");
280 goto bind_close_return;
281 }
282 else {
283 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200284 memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200285 goto bind_close_return;
286 }
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200287 }
288
289 /* <uid> and <gid> different of -1 will be used to change the socket owner.
290 * If <mode> is not 0, it will be used to restrict access to the socket.
291 * While it is known not to be portable on every OS, it's still useful
292 * where it works. We also don't change permissions on abstract sockets.
293 */
294 if (!ext && path[0] &&
295 (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) &&
296 (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) ||
297 (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) {
298 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200299 memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200300 goto err_unlink_temp;
301 }
302
303 /* Point of no return: we are ready, we'll switch the sockets. We don't
304 * fear losing the socket <path> because we have a copy of it in
305 * backname. Abstract sockets are not renamed.
306 */
307 if (!ext && path[0] && rename(tempname, path) < 0) {
308 err |= ERR_FATAL | ERR_ALERT;
Willy Tarreau3cd58bf2020-09-17 08:35:38 +0200309 memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno));
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200310 goto err_rename;
311 }
312
313 /* Cleanup: only unlink if we didn't inherit the fd from the parent */
314 if (!ext && path[0])
315 unlink(backname);
316
317 rx->fd = fd;
318 rx->flags |= RX_F_BOUND;
319
Willy Tarreau89673e22023-11-20 10:44:21 +0100320 if (!path[0]) {
321 /* ABNS sockets do not support suspend, and they conflict with
322 * other ones (no reuseport), so they must always be unbound.
323 */
324 rx->flags |= RX_F_NON_SUSPENDABLE;
325 }
326
Willy Tarreau233ad282020-10-15 21:45:15 +0200327 fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->settings->bind_thread) & all_threads_mask);
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200328
329 /* for now, all regularly bound TCP listeners are exportable */
330 if (!(rx->flags & RX_F_INHERITED))
Willy Tarreau9063a662021-04-06 18:09:06 +0200331 HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200332
333 return err;
334
335 err_rename:
336 ret = rename(backname, path);
337 if (ret < 0 && errno == ENOENT)
338 unlink(path);
339 err_unlink_temp:
340 if (!ext && path[0])
341 unlink(tempname);
342 close(fd);
343 err_unlink_back:
344 if (!ext && path[0])
345 unlink(backname);
346 bind_return:
347 if (errmsg && *errmsg) {
Aurelien DARRAGONa802e142023-02-06 19:23:40 +0100348 if (!ext) {
349 char *path_str;
350
351 path_str = sa2str((struct sockaddr_storage *)&rx->addr, 0, 0);
352 memprintf(errmsg, "%s [%s]", *errmsg, ((path_str) ? path_str : ""));
353 ha_free(&path_str);
354 }
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200355 else
356 memprintf(errmsg, "%s [fd %d]", *errmsg, fd);
357 }
358 return err;
359
360 bind_close_return:
361 close(fd);
362 goto bind_return;
363}