blob: 8bdfed9be88e52df7c7f104ff5c686d0ab538a71 [file] [log] [blame]
Willy Tarreau0d06df62020-08-28 15:10:11 +02001/*
2 * SOCK_UNIX socket management
3 *
4 * Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau1e0a8602020-09-02 17:14:29 +020013#include <fcntl.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020014#include <ctype.h>
15#include <string.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020016#include <unistd.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020017
18#include <sys/param.h>
19#include <sys/socket.h>
20#include <sys/types.h>
21
22#include <sys/socket.h>
23#include <sys/stat.h>
24#include <sys/types.h>
25#include <sys/un.h>
26
27#include <haproxy/api.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020028#include <haproxy/errors.h>
29#include <haproxy/fd.h>
30#include <haproxy/global.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020031#include <haproxy/listener.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020032#include <haproxy/receiver-t.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020033#include <haproxy/namespace.h>
Willy Tarreau1e0a8602020-09-02 17:14:29 +020034#include <haproxy/sock.h>
Willy Tarreau0d06df62020-08-28 15:10:11 +020035#include <haproxy/sock_unix.h>
36#include <haproxy/tools.h>
37
38
Willy Tarreaub0254cb2020-09-04 08:07:11 +020039struct proto_fam proto_fam_unix = {
40 .name = "unix",
41 .sock_domain = PF_UNIX,
42 .sock_family = AF_UNIX,
43 .sock_addrlen = sizeof(struct sockaddr_un),
44 .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
45 .addrcmp = sock_unix_addrcmp,
46 .bind = sock_unix_bind_receiver,
47 .get_src = sock_get_src,
48 .get_dst = sock_get_dst,
49};
50
Willy Tarreau0d06df62020-08-28 15:10:11 +020051/* PLEASE NOTE for functions below:
52 *
53 * The address family SHOULD always be checked. In some cases a function will
54 * be used in a situation where the address family is guaranteed (e.g. protocol
55 * definitions), so the test may be avoided. This special case must then be
56 * mentioned in the comment before the function definition.
57 */
58
59
60/* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero
61 * if they do not match. It also supports ABNS socket addresses (those starting
62 * with \0). For regular UNIX sockets however, this does explicitly support
63 * matching names ending exactly with .XXXXX.tmp which are newly bound sockets
64 * about to be replaced; this suffix is then ignored. Note that our UNIX socket
65 * paths are always zero-terminated.
66 */
67int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b)
68{
69 const struct sockaddr_un *au = (const struct sockaddr_un *)a;
70 const struct sockaddr_un *bu = (const struct sockaddr_un *)b;
71 int idx, dot, idx2;
72
73 if (a->ss_family != b->ss_family)
74 return -1;
75
76 if (a->ss_family != AF_UNIX)
77 return -1;
78
79 if (au->sun_path[0] != bu->sun_path[0])
80 return -1;
81
82 if (au->sun_path[0] == 0)
83 return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path));
84
85 idx = 1; dot = 0;
86 while (au->sun_path[idx] == bu->sun_path[idx]) {
87 if (au->sun_path[idx] == 0)
88 return 0;
89 if (au->sun_path[idx] == '.')
90 dot = idx;
91 idx++;
92 }
93
94 /* Now we have a difference. It's OK if they are within or after a
95 * sequence of digits following a dot, and are followed by ".tmp".
96 */
97 if (!dot)
98 return -1;
99
100 /* First, check in path "a" */
101 if (au->sun_path[idx] != 0) {
Willy Tarreau1c34b882020-08-29 06:44:37 +0200102 for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);)
Willy Tarreau0d06df62020-08-28 15:10:11 +0200103 idx2++;
104 if (strcmp(au->sun_path + idx2, ".tmp") != 0)
105 return -1;
106 }
107
108 /* Then check in path "b" */
109 if (bu->sun_path[idx] != 0) {
Willy Tarreau1c34b882020-08-29 06:44:37 +0200110 for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++)
Willy Tarreau0d06df62020-08-28 15:10:11 +0200111 ;
112 if (strcmp(bu->sun_path + idx2, ".tmp") != 0)
113 return -1;
114 }
115
116 /* OK that's a match */
117 return 0;
118}
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200119
120/* Binds receiver <rx>, and assigns <handler> and rx-> as the callback and
121 * context, respectively, with <tm> as the thread mask. Returns and error code
122 * made of ERR_* bits on failure or ERR_NONE on success. On failure, an error
123 * message may be passed into <errmsg>.
124 */
125int sock_unix_bind_receiver(struct receiver *rx, void (*handler)(int fd), char **errmsg)
126{
127 char tempname[MAXPATHLEN];
128 char backname[MAXPATHLEN];
129 struct sockaddr_un addr;
130 const char *path;
131 int maxpathlen;
132 int fd, err, ext, ret;
133
134 /* ensure we never return garbage */
135 if (errmsg)
136 *errmsg = 0;
137
138 err = ERR_NONE;
139
140 if (rx->flags & RX_F_BOUND)
141 return ERR_NONE;
142
143 /* if no FD was assigned yet, we'll have to either find a compatible
144 * one or create a new one.
145 */
146 if (rx->fd == -1)
147 rx->fd = sock_find_compatible_fd(rx);
148
149 path = ((struct sockaddr_un *)&rx->addr)->sun_path;
150 maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path));
151
152 /* if the listener already has an fd assigned, then we were offered the
153 * fd by an external process (most likely the parent), and we don't want
154 * to create a new socket. However we still want to set a few flags on
155 * the socket.
156 */
157 fd = rx->fd;
158 ext = (fd >= 0);
159 if (ext)
160 goto fd_ready;
161
162 if (path[0]) {
163 ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid);
164 if (ret < 0 || ret >= sizeof(addr.sun_path)) {
165 err |= ERR_FATAL | ERR_ALERT;
166 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
167 goto bind_return;
168 }
169
170 ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid);
171 if (ret < 0 || ret >= maxpathlen) {
172 err |= ERR_FATAL | ERR_ALERT;
173 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
174 goto bind_return;
175 }
176
177 /* 2. clean existing orphaned entries */
178 if (unlink(tempname) < 0 && errno != ENOENT) {
179 err |= ERR_FATAL | ERR_ALERT;
180 memprintf(errmsg, "error when trying to unlink previous UNIX socket");
181 goto bind_return;
182 }
183
184 if (unlink(backname) < 0 && errno != ENOENT) {
185 err |= ERR_FATAL | ERR_ALERT;
186 memprintf(errmsg, "error when trying to unlink previous UNIX socket");
187 goto bind_return;
188 }
189
190 /* 3. backup existing socket */
191 if (link(path, backname) < 0 && errno != ENOENT) {
192 err |= ERR_FATAL | ERR_ALERT;
193 memprintf(errmsg, "error when trying to preserve previous UNIX socket");
194 goto bind_return;
195 }
196
197 /* Note: this test is redundant with the snprintf one above and
198 * will never trigger, it's just added as the only way to shut
199 * gcc's painfully dumb warning about possibly truncated output
200 * during strncpy(). Don't move it above or smart gcc will not
201 * see it!
202 */
203 if (strlen(tempname) >= sizeof(addr.sun_path)) {
204 err |= ERR_FATAL | ERR_ALERT;
205 memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
206 goto bind_return;
207 }
208
209 strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1);
210 addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
211 }
212 else {
213 /* first char is zero, it's an abstract socket whose address
214 * is defined by all the bytes past this zero.
215 */
216 memcpy(addr.sun_path, path, sizeof(addr.sun_path));
217 }
218 addr.sun_family = AF_UNIX;
219
220 /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */
Willy Tarreauf1f66092020-09-04 08:15:31 +0200221 fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot);
Willy Tarreau1e0a8602020-09-02 17:14:29 +0200222 if (fd < 0) {
223 err |= ERR_FATAL | ERR_ALERT;
224 memprintf(errmsg, "cannot create receiving socket");
225 goto bind_return;
226 }
227
228 fd_ready:
229 if (fd >= global.maxsock) {
230 err |= ERR_FATAL | ERR_ABORT | ERR_ALERT;
231 memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
232 goto bind_close_return;
233 }
234
235 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
236 err |= ERR_FATAL | ERR_ALERT;
237 memprintf(errmsg, "cannot make socket non-blocking");
238 goto bind_close_return;
239 }
240
241 if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
242 /* note that bind() creates the socket <tempname> on the file system */
243 if (errno == EADDRINUSE) {
244 /* the old process might still own it, let's retry */
245 err |= ERR_RETRYABLE | ERR_ALERT;
246 memprintf(errmsg, "cannot bind UNIX socket (already in use)");
247 goto bind_close_return;
248 }
249 else {
250 err |= ERR_FATAL | ERR_ALERT;
251 memprintf(errmsg, "cannot bind UNIX socket");
252 goto bind_close_return;
253 }
254 goto err_unlink_temp;
255 }
256
257 /* <uid> and <gid> different of -1 will be used to change the socket owner.
258 * If <mode> is not 0, it will be used to restrict access to the socket.
259 * While it is known not to be portable on every OS, it's still useful
260 * where it works. We also don't change permissions on abstract sockets.
261 */
262 if (!ext && path[0] &&
263 (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) &&
264 (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) ||
265 (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) {
266 err |= ERR_FATAL | ERR_ALERT;
267 memprintf(errmsg, "cannot change UNIX socket ownership");
268 goto err_unlink_temp;
269 }
270
271 /* Point of no return: we are ready, we'll switch the sockets. We don't
272 * fear losing the socket <path> because we have a copy of it in
273 * backname. Abstract sockets are not renamed.
274 */
275 if (!ext && path[0] && rename(tempname, path) < 0) {
276 err |= ERR_FATAL | ERR_ALERT;
277 memprintf(errmsg, "cannot switch final and temporary UNIX sockets");
278 goto err_rename;
279 }
280
281 /* Cleanup: only unlink if we didn't inherit the fd from the parent */
282 if (!ext && path[0])
283 unlink(backname);
284
285 rx->fd = fd;
286 rx->flags |= RX_F_BOUND;
287
288 fd_insert(fd, rx->owner, handler, thread_mask(rx->settings->bind_thread) & all_threads_mask);
289
290 /* for now, all regularly bound TCP listeners are exportable */
291 if (!(rx->flags & RX_F_INHERITED))
292 fdtab[fd].exported = 1;
293
294 return err;
295
296 err_rename:
297 ret = rename(backname, path);
298 if (ret < 0 && errno == ENOENT)
299 unlink(path);
300 err_unlink_temp:
301 if (!ext && path[0])
302 unlink(tempname);
303 close(fd);
304 err_unlink_back:
305 if (!ext && path[0])
306 unlink(backname);
307 bind_return:
308 if (errmsg && *errmsg) {
309 if (!ext)
310 memprintf(errmsg, "%s [%s]", *errmsg, path);
311 else
312 memprintf(errmsg, "%s [fd %d]", *errmsg, fd);
313 }
314 return err;
315
316 bind_close_return:
317 close(fd);
318 goto bind_return;
319}