Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 1 | /* |
| 2 | * SOCK_UNIX socket management |
| 3 | * |
| 4 | * Copyright 2000-2020 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <ctype.h> |
Willy Tarreau | eb8cfe6 | 2020-09-16 22:15:40 +0200 | [diff] [blame] | 14 | #include <errno.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 15 | #include <string.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 16 | #include <unistd.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 17 | |
| 18 | #include <sys/param.h> |
| 19 | #include <sys/socket.h> |
| 20 | #include <sys/types.h> |
| 21 | |
| 22 | #include <sys/socket.h> |
| 23 | #include <sys/stat.h> |
| 24 | #include <sys/types.h> |
| 25 | #include <sys/un.h> |
| 26 | |
| 27 | #include <haproxy/api.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 28 | #include <haproxy/errors.h> |
| 29 | #include <haproxy/fd.h> |
| 30 | #include <haproxy/global.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 31 | #include <haproxy/listener.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 32 | #include <haproxy/receiver-t.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 33 | #include <haproxy/namespace.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 34 | #include <haproxy/sock.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 35 | #include <haproxy/sock_unix.h> |
| 36 | #include <haproxy/tools.h> |
| 37 | |
| 38 | |
Willy Tarreau | b0254cb | 2020-09-04 08:07:11 +0200 | [diff] [blame] | 39 | struct proto_fam proto_fam_unix = { |
| 40 | .name = "unix", |
| 41 | .sock_domain = PF_UNIX, |
| 42 | .sock_family = AF_UNIX, |
| 43 | .sock_addrlen = sizeof(struct sockaddr_un), |
| 44 | .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path), |
| 45 | .addrcmp = sock_unix_addrcmp, |
| 46 | .bind = sock_unix_bind_receiver, |
| 47 | .get_src = sock_get_src, |
| 48 | .get_dst = sock_get_dst, |
| 49 | }; |
| 50 | |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 51 | /* PLEASE NOTE for functions below: |
| 52 | * |
| 53 | * The address family SHOULD always be checked. In some cases a function will |
| 54 | * be used in a situation where the address family is guaranteed (e.g. protocol |
| 55 | * definitions), so the test may be avoided. This special case must then be |
| 56 | * mentioned in the comment before the function definition. |
| 57 | */ |
| 58 | |
| 59 | |
| 60 | /* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero |
| 61 | * if they do not match. It also supports ABNS socket addresses (those starting |
| 62 | * with \0). For regular UNIX sockets however, this does explicitly support |
| 63 | * matching names ending exactly with .XXXXX.tmp which are newly bound sockets |
| 64 | * about to be replaced; this suffix is then ignored. Note that our UNIX socket |
| 65 | * paths are always zero-terminated. |
| 66 | */ |
| 67 | int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b) |
| 68 | { |
| 69 | const struct sockaddr_un *au = (const struct sockaddr_un *)a; |
| 70 | const struct sockaddr_un *bu = (const struct sockaddr_un *)b; |
| 71 | int idx, dot, idx2; |
| 72 | |
| 73 | if (a->ss_family != b->ss_family) |
| 74 | return -1; |
| 75 | |
| 76 | if (a->ss_family != AF_UNIX) |
| 77 | return -1; |
| 78 | |
| 79 | if (au->sun_path[0] != bu->sun_path[0]) |
| 80 | return -1; |
| 81 | |
| 82 | if (au->sun_path[0] == 0) |
| 83 | return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path)); |
| 84 | |
| 85 | idx = 1; dot = 0; |
| 86 | while (au->sun_path[idx] == bu->sun_path[idx]) { |
| 87 | if (au->sun_path[idx] == 0) |
| 88 | return 0; |
| 89 | if (au->sun_path[idx] == '.') |
| 90 | dot = idx; |
| 91 | idx++; |
| 92 | } |
| 93 | |
| 94 | /* Now we have a difference. It's OK if they are within or after a |
| 95 | * sequence of digits following a dot, and are followed by ".tmp". |
Aurelien DARRAGON | 2a7903b | 2023-02-21 17:33:50 +0100 | [diff] [blame] | 96 | * |
| 97 | * make sure to perform the check against tempname if the compared |
| 98 | * string is in "final" format (does not end with ".XXXX.tmp"). |
| 99 | * |
| 100 | * Examples: |
| 101 | * /tmp/test matches with /tmp/test.1822.tmp |
| 102 | * /tmp/test.1822.tmp matches with /tmp/test.XXXX.tmp |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 103 | */ |
Aurelien DARRAGON | 2a7903b | 2023-02-21 17:33:50 +0100 | [diff] [blame] | 104 | if (au->sun_path[idx] == 0 || bu->sun_path[idx] == 0) { |
| 105 | if (au->sun_path[idx] == '.' || bu->sun_path[idx] == '.') |
| 106 | dot = idx; /* try to match against temp path */ |
| 107 | else |
| 108 | return -1; /* invalid temp path */ |
| 109 | } |
| 110 | |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 111 | if (!dot) |
| 112 | return -1; |
| 113 | |
| 114 | /* First, check in path "a" */ |
| 115 | if (au->sun_path[idx] != 0) { |
Willy Tarreau | 1c34b88 | 2020-08-29 06:44:37 +0200 | [diff] [blame] | 116 | for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);) |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 117 | idx2++; |
| 118 | if (strcmp(au->sun_path + idx2, ".tmp") != 0) |
| 119 | return -1; |
| 120 | } |
| 121 | |
| 122 | /* Then check in path "b" */ |
| 123 | if (bu->sun_path[idx] != 0) { |
Willy Tarreau | 1c34b88 | 2020-08-29 06:44:37 +0200 | [diff] [blame] | 124 | for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++) |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 125 | ; |
| 126 | if (strcmp(bu->sun_path + idx2, ".tmp") != 0) |
| 127 | return -1; |
| 128 | } |
| 129 | |
| 130 | /* OK that's a match */ |
| 131 | return 0; |
| 132 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 133 | |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 134 | /* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and |
| 135 | * context, respectively, with ->bind_thread as the thread mask. Returns an |
| 136 | * error code made of ERR_* bits on failure or ERR_NONE on success. On failure, |
| 137 | * an error message may be passed into <errmsg>. |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 138 | */ |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 139 | int sock_unix_bind_receiver(struct receiver *rx, char **errmsg) |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 140 | { |
| 141 | char tempname[MAXPATHLEN]; |
| 142 | char backname[MAXPATHLEN]; |
| 143 | struct sockaddr_un addr; |
| 144 | const char *path; |
| 145 | int maxpathlen; |
| 146 | int fd, err, ext, ret; |
| 147 | |
| 148 | /* ensure we never return garbage */ |
| 149 | if (errmsg) |
| 150 | *errmsg = 0; |
| 151 | |
| 152 | err = ERR_NONE; |
| 153 | |
| 154 | if (rx->flags & RX_F_BOUND) |
| 155 | return ERR_NONE; |
| 156 | |
Willy Tarreau | 0e1aaf4 | 2023-02-27 16:39:32 +0100 | [diff] [blame] | 157 | if (rx->flags & RX_F_MUST_DUP) { |
| 158 | /* this is a secondary receiver that is an exact copy of a |
| 159 | * reference which must already be bound (or has failed). |
| 160 | * We'll try to dup() the other one's FD and take it. We |
| 161 | * try hard not to reconfigure the socket since it's shared. |
| 162 | */ |
| 163 | BUG_ON(!rx->shard_info); |
| 164 | if (!(rx->shard_info->ref->flags & RX_F_BOUND)) { |
| 165 | /* it's assumed that the first one has already reported |
| 166 | * the error, let's not spam with another one, and do |
| 167 | * not set ERR_ALERT. |
| 168 | */ |
| 169 | err |= ERR_RETRYABLE; |
| 170 | goto bind_ret_err; |
| 171 | } |
| 172 | /* taking the other one's FD will result in it being marked |
| 173 | * extern and being dup()ed. Let's mark the receiver as |
| 174 | * inherited so that it properly bypasses all second-stage |
| 175 | * setup and avoids being passed to new processes. |
| 176 | */ |
| 177 | rx->flags |= RX_F_INHERITED; |
| 178 | rx->fd = rx->shard_info->ref->fd; |
| 179 | } |
| 180 | |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 181 | /* if no FD was assigned yet, we'll have to either find a compatible |
| 182 | * one or create a new one. |
| 183 | */ |
| 184 | if (rx->fd == -1) |
| 185 | rx->fd = sock_find_compatible_fd(rx); |
| 186 | |
| 187 | path = ((struct sockaddr_un *)&rx->addr)->sun_path; |
| 188 | maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path)); |
| 189 | |
| 190 | /* if the listener already has an fd assigned, then we were offered the |
| 191 | * fd by an external process (most likely the parent), and we don't want |
| 192 | * to create a new socket. However we still want to set a few flags on |
| 193 | * the socket. |
| 194 | */ |
| 195 | fd = rx->fd; |
| 196 | ext = (fd >= 0); |
| 197 | if (ext) |
| 198 | goto fd_ready; |
| 199 | |
| 200 | if (path[0]) { |
| 201 | ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid); |
| 202 | if (ret < 0 || ret >= sizeof(addr.sun_path)) { |
| 203 | err |= ERR_FATAL | ERR_ALERT; |
| 204 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 205 | goto bind_return; |
| 206 | } |
| 207 | |
| 208 | ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid); |
| 209 | if (ret < 0 || ret >= maxpathlen) { |
| 210 | err |= ERR_FATAL | ERR_ALERT; |
| 211 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 212 | goto bind_return; |
| 213 | } |
| 214 | |
| 215 | /* 2. clean existing orphaned entries */ |
| 216 | if (unlink(tempname) < 0 && errno != ENOENT) { |
| 217 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 218 | memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 219 | goto bind_return; |
| 220 | } |
| 221 | |
| 222 | if (unlink(backname) < 0 && errno != ENOENT) { |
| 223 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 224 | memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 225 | goto bind_return; |
| 226 | } |
| 227 | |
| 228 | /* 3. backup existing socket */ |
| 229 | if (link(path, backname) < 0 && errno != ENOENT) { |
| 230 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 231 | memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 232 | goto bind_return; |
| 233 | } |
| 234 | |
| 235 | /* Note: this test is redundant with the snprintf one above and |
| 236 | * will never trigger, it's just added as the only way to shut |
| 237 | * gcc's painfully dumb warning about possibly truncated output |
| 238 | * during strncpy(). Don't move it above or smart gcc will not |
| 239 | * see it! |
| 240 | */ |
| 241 | if (strlen(tempname) >= sizeof(addr.sun_path)) { |
| 242 | err |= ERR_FATAL | ERR_ALERT; |
| 243 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 244 | goto bind_return; |
| 245 | } |
| 246 | |
| 247 | strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1); |
| 248 | addr.sun_path[sizeof(addr.sun_path) - 1] = 0; |
| 249 | } |
| 250 | else { |
| 251 | /* first char is zero, it's an abstract socket whose address |
| 252 | * is defined by all the bytes past this zero. |
| 253 | */ |
| 254 | memcpy(addr.sun_path, path, sizeof(addr.sun_path)); |
| 255 | } |
| 256 | addr.sun_family = AF_UNIX; |
| 257 | |
| 258 | /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */ |
Willy Tarreau | f1f6609 | 2020-09-04 08:15:31 +0200 | [diff] [blame] | 259 | fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 260 | if (fd < 0) { |
| 261 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 262 | memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 263 | goto bind_return; |
| 264 | } |
| 265 | |
| 266 | fd_ready: |
Willy Tarreau | 145b17f | 2023-01-11 10:59:52 +0100 | [diff] [blame] | 267 | if (ext && fd < global.maxsock && fdtab[fd].owner) { |
| 268 | /* This FD was already bound so this means that it was already |
| 269 | * known and registered before parsing, hence it's an inherited |
| 270 | * FD. The only reason why it's already known here is that it |
| 271 | * has been registered multiple times (multiple listeners on the |
| 272 | * same, or a "shards" directive on the line). There cannot be |
| 273 | * multiple listeners on one FD but at least we can create a |
| 274 | * new one from the original one. We won't reconfigure it, |
| 275 | * however, as this was already done for the first one. |
| 276 | */ |
| 277 | fd = dup(fd); |
| 278 | if (fd == -1) { |
| 279 | err |= ERR_RETRYABLE | ERR_ALERT; |
| 280 | memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno)); |
| 281 | goto bind_return; |
| 282 | } |
| 283 | } |
| 284 | |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 285 | if (fd >= global.maxsock) { |
| 286 | err |= ERR_FATAL | ERR_ABORT | ERR_ALERT; |
| 287 | memprintf(errmsg, "not enough free sockets (raise '-n' parameter)"); |
| 288 | goto bind_close_return; |
| 289 | } |
| 290 | |
Willy Tarreau | 3824743 | 2022-04-26 10:24:14 +0200 | [diff] [blame] | 291 | if (fd_set_nonblock(fd) == -1) { |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 292 | err |= ERR_FATAL | ERR_ALERT; |
| 293 | memprintf(errmsg, "cannot make socket non-blocking"); |
| 294 | goto bind_close_return; |
| 295 | } |
| 296 | |
| 297 | if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { |
| 298 | /* note that bind() creates the socket <tempname> on the file system */ |
| 299 | if (errno == EADDRINUSE) { |
| 300 | /* the old process might still own it, let's retry */ |
| 301 | err |= ERR_RETRYABLE | ERR_ALERT; |
| 302 | memprintf(errmsg, "cannot bind UNIX socket (already in use)"); |
| 303 | goto bind_close_return; |
| 304 | } |
| 305 | else { |
| 306 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 307 | memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 308 | goto bind_close_return; |
| 309 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 310 | } |
| 311 | |
| 312 | /* <uid> and <gid> different of -1 will be used to change the socket owner. |
| 313 | * If <mode> is not 0, it will be used to restrict access to the socket. |
| 314 | * While it is known not to be portable on every OS, it's still useful |
| 315 | * where it works. We also don't change permissions on abstract sockets. |
| 316 | */ |
| 317 | if (!ext && path[0] && |
| 318 | (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) && |
| 319 | (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) || |
| 320 | (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) { |
| 321 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 322 | memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 323 | goto err_unlink_temp; |
| 324 | } |
| 325 | |
| 326 | /* Point of no return: we are ready, we'll switch the sockets. We don't |
| 327 | * fear losing the socket <path> because we have a copy of it in |
| 328 | * backname. Abstract sockets are not renamed. |
| 329 | */ |
| 330 | if (!ext && path[0] && rename(tempname, path) < 0) { |
| 331 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 332 | memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 333 | goto err_rename; |
| 334 | } |
| 335 | |
| 336 | /* Cleanup: only unlink if we didn't inherit the fd from the parent */ |
| 337 | if (!ext && path[0]) |
| 338 | unlink(backname); |
| 339 | |
| 340 | rx->fd = fd; |
| 341 | rx->flags |= RX_F_BOUND; |
| 342 | |
Willy Tarreau | 9464bb1 | 2022-07-05 05:16:13 +0200 | [diff] [blame] | 343 | fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 344 | |
| 345 | /* for now, all regularly bound TCP listeners are exportable */ |
| 346 | if (!(rx->flags & RX_F_INHERITED)) |
Willy Tarreau | 9063a66 | 2021-04-06 18:09:06 +0200 | [diff] [blame] | 347 | HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 348 | |
| 349 | return err; |
| 350 | |
| 351 | err_rename: |
| 352 | ret = rename(backname, path); |
| 353 | if (ret < 0 && errno == ENOENT) |
| 354 | unlink(path); |
| 355 | err_unlink_temp: |
| 356 | if (!ext && path[0]) |
| 357 | unlink(tempname); |
| 358 | close(fd); |
| 359 | err_unlink_back: |
| 360 | if (!ext && path[0]) |
| 361 | unlink(backname); |
| 362 | bind_return: |
| 363 | if (errmsg && *errmsg) { |
Aurelien DARRAGON | de63efb | 2023-02-06 19:23:40 +0100 | [diff] [blame] | 364 | if (!ext) { |
| 365 | char *path_str; |
| 366 | |
| 367 | path_str = sa2str((struct sockaddr_storage *)&rx->addr, 0, 0); |
| 368 | memprintf(errmsg, "%s [%s]", *errmsg, ((path_str) ? path_str : "")); |
| 369 | ha_free(&path_str); |
| 370 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 371 | else |
| 372 | memprintf(errmsg, "%s [fd %d]", *errmsg, fd); |
| 373 | } |
Willy Tarreau | 0e1aaf4 | 2023-02-27 16:39:32 +0100 | [diff] [blame] | 374 | bind_ret_err: |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 375 | return err; |
| 376 | |
| 377 | bind_close_return: |
| 378 | close(fd); |
| 379 | goto bind_return; |
| 380 | } |