Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 1 | /* |
| 2 | * SOCK_UNIX socket management |
| 3 | * |
| 4 | * Copyright 2000-2020 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <ctype.h> |
Willy Tarreau | eb8cfe6 | 2020-09-16 22:15:40 +0200 | [diff] [blame] | 14 | #include <errno.h> |
| 15 | #include <fcntl.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 16 | #include <string.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 17 | #include <unistd.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 18 | |
| 19 | #include <sys/param.h> |
| 20 | #include <sys/socket.h> |
| 21 | #include <sys/types.h> |
| 22 | |
| 23 | #include <sys/socket.h> |
| 24 | #include <sys/stat.h> |
| 25 | #include <sys/types.h> |
| 26 | #include <sys/un.h> |
| 27 | |
| 28 | #include <haproxy/api.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 29 | #include <haproxy/errors.h> |
| 30 | #include <haproxy/fd.h> |
| 31 | #include <haproxy/global.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 32 | #include <haproxy/listener.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 33 | #include <haproxy/receiver-t.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 34 | #include <haproxy/namespace.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 35 | #include <haproxy/sock.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 36 | #include <haproxy/sock_unix.h> |
| 37 | #include <haproxy/tools.h> |
| 38 | |
| 39 | |
Willy Tarreau | b0254cb | 2020-09-04 08:07:11 +0200 | [diff] [blame] | 40 | struct proto_fam proto_fam_unix = { |
| 41 | .name = "unix", |
| 42 | .sock_domain = PF_UNIX, |
| 43 | .sock_family = AF_UNIX, |
| 44 | .sock_addrlen = sizeof(struct sockaddr_un), |
| 45 | .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path), |
| 46 | .addrcmp = sock_unix_addrcmp, |
| 47 | .bind = sock_unix_bind_receiver, |
| 48 | .get_src = sock_get_src, |
| 49 | .get_dst = sock_get_dst, |
| 50 | }; |
| 51 | |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 52 | /* PLEASE NOTE for functions below: |
| 53 | * |
| 54 | * The address family SHOULD always be checked. In some cases a function will |
| 55 | * be used in a situation where the address family is guaranteed (e.g. protocol |
| 56 | * definitions), so the test may be avoided. This special case must then be |
| 57 | * mentioned in the comment before the function definition. |
| 58 | */ |
| 59 | |
| 60 | |
| 61 | /* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero |
| 62 | * if they do not match. It also supports ABNS socket addresses (those starting |
| 63 | * with \0). For regular UNIX sockets however, this does explicitly support |
| 64 | * matching names ending exactly with .XXXXX.tmp which are newly bound sockets |
| 65 | * about to be replaced; this suffix is then ignored. Note that our UNIX socket |
| 66 | * paths are always zero-terminated. |
| 67 | */ |
| 68 | int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b) |
| 69 | { |
| 70 | const struct sockaddr_un *au = (const struct sockaddr_un *)a; |
| 71 | const struct sockaddr_un *bu = (const struct sockaddr_un *)b; |
| 72 | int idx, dot, idx2; |
| 73 | |
| 74 | if (a->ss_family != b->ss_family) |
| 75 | return -1; |
| 76 | |
| 77 | if (a->ss_family != AF_UNIX) |
| 78 | return -1; |
| 79 | |
| 80 | if (au->sun_path[0] != bu->sun_path[0]) |
| 81 | return -1; |
| 82 | |
| 83 | if (au->sun_path[0] == 0) |
| 84 | return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path)); |
| 85 | |
| 86 | idx = 1; dot = 0; |
| 87 | while (au->sun_path[idx] == bu->sun_path[idx]) { |
| 88 | if (au->sun_path[idx] == 0) |
| 89 | return 0; |
| 90 | if (au->sun_path[idx] == '.') |
| 91 | dot = idx; |
| 92 | idx++; |
| 93 | } |
| 94 | |
| 95 | /* Now we have a difference. It's OK if they are within or after a |
| 96 | * sequence of digits following a dot, and are followed by ".tmp". |
Aurelien DARRAGON | 7aaf88c | 2023-02-21 17:33:50 +0100 | [diff] [blame] | 97 | * |
| 98 | * make sure to perform the check against tempname if the compared |
| 99 | * string is in "final" format (does not end with ".XXXX.tmp"). |
| 100 | * |
| 101 | * Examples: |
| 102 | * /tmp/test matches with /tmp/test.1822.tmp |
| 103 | * /tmp/test.1822.tmp matches with /tmp/test.XXXX.tmp |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 104 | */ |
Aurelien DARRAGON | 7aaf88c | 2023-02-21 17:33:50 +0100 | [diff] [blame] | 105 | if (au->sun_path[idx] == 0 || bu->sun_path[idx] == 0) { |
| 106 | if (au->sun_path[idx] == '.' || bu->sun_path[idx] == '.') |
| 107 | dot = idx; /* try to match against temp path */ |
| 108 | else |
| 109 | return -1; /* invalid temp path */ |
| 110 | } |
| 111 | |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 112 | if (!dot) |
| 113 | return -1; |
| 114 | |
| 115 | /* First, check in path "a" */ |
| 116 | if (au->sun_path[idx] != 0) { |
Willy Tarreau | 1c34b88 | 2020-08-29 06:44:37 +0200 | [diff] [blame] | 117 | for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);) |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 118 | idx2++; |
| 119 | if (strcmp(au->sun_path + idx2, ".tmp") != 0) |
| 120 | return -1; |
| 121 | } |
| 122 | |
| 123 | /* Then check in path "b" */ |
| 124 | if (bu->sun_path[idx] != 0) { |
Willy Tarreau | 1c34b88 | 2020-08-29 06:44:37 +0200 | [diff] [blame] | 125 | for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++) |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 126 | ; |
| 127 | if (strcmp(bu->sun_path + idx2, ".tmp") != 0) |
| 128 | return -1; |
| 129 | } |
| 130 | |
| 131 | /* OK that's a match */ |
| 132 | return 0; |
| 133 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 134 | |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 135 | /* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and |
| 136 | * context, respectively, with ->bind_thread as the thread mask. Returns an |
| 137 | * error code made of ERR_* bits on failure or ERR_NONE on success. On failure, |
| 138 | * an error message may be passed into <errmsg>. |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 139 | */ |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 140 | int sock_unix_bind_receiver(struct receiver *rx, char **errmsg) |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 141 | { |
| 142 | char tempname[MAXPATHLEN]; |
| 143 | char backname[MAXPATHLEN]; |
| 144 | struct sockaddr_un addr; |
| 145 | const char *path; |
| 146 | int maxpathlen; |
| 147 | int fd, err, ext, ret; |
| 148 | |
| 149 | /* ensure we never return garbage */ |
| 150 | if (errmsg) |
| 151 | *errmsg = 0; |
| 152 | |
| 153 | err = ERR_NONE; |
| 154 | |
| 155 | if (rx->flags & RX_F_BOUND) |
| 156 | return ERR_NONE; |
| 157 | |
| 158 | /* if no FD was assigned yet, we'll have to either find a compatible |
| 159 | * one or create a new one. |
| 160 | */ |
| 161 | if (rx->fd == -1) |
| 162 | rx->fd = sock_find_compatible_fd(rx); |
| 163 | |
| 164 | path = ((struct sockaddr_un *)&rx->addr)->sun_path; |
| 165 | maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path)); |
| 166 | |
| 167 | /* if the listener already has an fd assigned, then we were offered the |
| 168 | * fd by an external process (most likely the parent), and we don't want |
| 169 | * to create a new socket. However we still want to set a few flags on |
| 170 | * the socket. |
| 171 | */ |
| 172 | fd = rx->fd; |
| 173 | ext = (fd >= 0); |
| 174 | if (ext) |
| 175 | goto fd_ready; |
| 176 | |
| 177 | if (path[0]) { |
| 178 | ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid); |
| 179 | if (ret < 0 || ret >= sizeof(addr.sun_path)) { |
| 180 | err |= ERR_FATAL | ERR_ALERT; |
| 181 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 182 | goto bind_return; |
| 183 | } |
| 184 | |
| 185 | ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid); |
| 186 | if (ret < 0 || ret >= maxpathlen) { |
| 187 | err |= ERR_FATAL | ERR_ALERT; |
| 188 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 189 | goto bind_return; |
| 190 | } |
| 191 | |
| 192 | /* 2. clean existing orphaned entries */ |
| 193 | if (unlink(tempname) < 0 && errno != ENOENT) { |
| 194 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 195 | memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 196 | goto bind_return; |
| 197 | } |
| 198 | |
| 199 | if (unlink(backname) < 0 && errno != ENOENT) { |
| 200 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 201 | memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 202 | goto bind_return; |
| 203 | } |
| 204 | |
| 205 | /* 3. backup existing socket */ |
| 206 | if (link(path, backname) < 0 && errno != ENOENT) { |
| 207 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 208 | memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 209 | goto bind_return; |
| 210 | } |
| 211 | |
| 212 | /* Note: this test is redundant with the snprintf one above and |
| 213 | * will never trigger, it's just added as the only way to shut |
| 214 | * gcc's painfully dumb warning about possibly truncated output |
| 215 | * during strncpy(). Don't move it above or smart gcc will not |
| 216 | * see it! |
| 217 | */ |
| 218 | if (strlen(tempname) >= sizeof(addr.sun_path)) { |
| 219 | err |= ERR_FATAL | ERR_ALERT; |
| 220 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 221 | goto bind_return; |
| 222 | } |
| 223 | |
| 224 | strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1); |
| 225 | addr.sun_path[sizeof(addr.sun_path) - 1] = 0; |
| 226 | } |
| 227 | else { |
| 228 | /* first char is zero, it's an abstract socket whose address |
| 229 | * is defined by all the bytes past this zero. |
| 230 | */ |
| 231 | memcpy(addr.sun_path, path, sizeof(addr.sun_path)); |
| 232 | } |
| 233 | addr.sun_family = AF_UNIX; |
| 234 | |
| 235 | /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */ |
Willy Tarreau | f1f6609 | 2020-09-04 08:15:31 +0200 | [diff] [blame] | 236 | fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 237 | if (fd < 0) { |
| 238 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 239 | memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 240 | goto bind_return; |
| 241 | } |
| 242 | |
| 243 | fd_ready: |
Willy Tarreau | 6789f19 | 2023-01-11 10:59:52 +0100 | [diff] [blame] | 244 | if (ext && fd < global.maxsock && fdtab[fd].owner) { |
| 245 | /* This FD was already bound so this means that it was already |
| 246 | * known and registered before parsing, hence it's an inherited |
| 247 | * FD. The only reason why it's already known here is that it |
| 248 | * has been registered multiple times (multiple listeners on the |
| 249 | * same, or a "shards" directive on the line). There cannot be |
| 250 | * multiple listeners on one FD but at least we can create a |
| 251 | * new one from the original one. We won't reconfigure it, |
| 252 | * however, as this was already done for the first one. |
| 253 | */ |
| 254 | fd = dup(fd); |
| 255 | if (fd == -1) { |
| 256 | err |= ERR_RETRYABLE | ERR_ALERT; |
| 257 | memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno)); |
| 258 | goto bind_return; |
| 259 | } |
| 260 | } |
| 261 | |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 262 | if (fd >= global.maxsock) { |
| 263 | err |= ERR_FATAL | ERR_ABORT | ERR_ALERT; |
| 264 | memprintf(errmsg, "not enough free sockets (raise '-n' parameter)"); |
| 265 | goto bind_close_return; |
| 266 | } |
| 267 | |
| 268 | if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { |
| 269 | err |= ERR_FATAL | ERR_ALERT; |
| 270 | memprintf(errmsg, "cannot make socket non-blocking"); |
| 271 | goto bind_close_return; |
| 272 | } |
| 273 | |
| 274 | if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { |
| 275 | /* note that bind() creates the socket <tempname> on the file system */ |
| 276 | if (errno == EADDRINUSE) { |
| 277 | /* the old process might still own it, let's retry */ |
| 278 | err |= ERR_RETRYABLE | ERR_ALERT; |
| 279 | memprintf(errmsg, "cannot bind UNIX socket (already in use)"); |
| 280 | goto bind_close_return; |
| 281 | } |
| 282 | else { |
| 283 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 284 | memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 285 | goto bind_close_return; |
| 286 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 287 | } |
| 288 | |
| 289 | /* <uid> and <gid> different of -1 will be used to change the socket owner. |
| 290 | * If <mode> is not 0, it will be used to restrict access to the socket. |
| 291 | * While it is known not to be portable on every OS, it's still useful |
| 292 | * where it works. We also don't change permissions on abstract sockets. |
| 293 | */ |
| 294 | if (!ext && path[0] && |
| 295 | (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) && |
| 296 | (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) || |
| 297 | (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) { |
| 298 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 299 | memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 300 | goto err_unlink_temp; |
| 301 | } |
| 302 | |
| 303 | /* Point of no return: we are ready, we'll switch the sockets. We don't |
| 304 | * fear losing the socket <path> because we have a copy of it in |
| 305 | * backname. Abstract sockets are not renamed. |
| 306 | */ |
| 307 | if (!ext && path[0] && rename(tempname, path) < 0) { |
| 308 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 309 | memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 310 | goto err_rename; |
| 311 | } |
| 312 | |
| 313 | /* Cleanup: only unlink if we didn't inherit the fd from the parent */ |
| 314 | if (!ext && path[0]) |
| 315 | unlink(backname); |
| 316 | |
| 317 | rx->fd = fd; |
| 318 | rx->flags |= RX_F_BOUND; |
| 319 | |
Willy Tarreau | 89673e2 | 2023-11-20 10:44:21 +0100 | [diff] [blame] | 320 | if (!path[0]) { |
| 321 | /* ABNS sockets do not support suspend, and they conflict with |
| 322 | * other ones (no reuseport), so they must always be unbound. |
| 323 | */ |
| 324 | rx->flags |= RX_F_NON_SUSPENDABLE; |
| 325 | } |
| 326 | |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 327 | fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->settings->bind_thread) & all_threads_mask); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 328 | |
| 329 | /* for now, all regularly bound TCP listeners are exportable */ |
| 330 | if (!(rx->flags & RX_F_INHERITED)) |
Willy Tarreau | 9063a66 | 2021-04-06 18:09:06 +0200 | [diff] [blame] | 331 | HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 332 | |
| 333 | return err; |
| 334 | |
| 335 | err_rename: |
| 336 | ret = rename(backname, path); |
| 337 | if (ret < 0 && errno == ENOENT) |
| 338 | unlink(path); |
| 339 | err_unlink_temp: |
| 340 | if (!ext && path[0]) |
| 341 | unlink(tempname); |
| 342 | close(fd); |
| 343 | err_unlink_back: |
| 344 | if (!ext && path[0]) |
| 345 | unlink(backname); |
| 346 | bind_return: |
| 347 | if (errmsg && *errmsg) { |
Aurelien DARRAGON | a802e14 | 2023-02-06 19:23:40 +0100 | [diff] [blame] | 348 | if (!ext) { |
| 349 | char *path_str; |
| 350 | |
| 351 | path_str = sa2str((struct sockaddr_storage *)&rx->addr, 0, 0); |
| 352 | memprintf(errmsg, "%s [%s]", *errmsg, ((path_str) ? path_str : "")); |
| 353 | ha_free(&path_str); |
| 354 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 355 | else |
| 356 | memprintf(errmsg, "%s [fd %d]", *errmsg, fd); |
| 357 | } |
| 358 | return err; |
| 359 | |
| 360 | bind_close_return: |
| 361 | close(fd); |
| 362 | goto bind_return; |
| 363 | } |