Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 1 | /* |
| 2 | * SOCK_UNIX socket management |
| 3 | * |
| 4 | * Copyright 2000-2020 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <ctype.h> |
Willy Tarreau | eb8cfe6 | 2020-09-16 22:15:40 +0200 | [diff] [blame] | 14 | #include <errno.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 15 | #include <string.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 16 | #include <unistd.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 17 | |
| 18 | #include <sys/param.h> |
| 19 | #include <sys/socket.h> |
| 20 | #include <sys/types.h> |
| 21 | |
| 22 | #include <sys/socket.h> |
| 23 | #include <sys/stat.h> |
| 24 | #include <sys/types.h> |
| 25 | #include <sys/un.h> |
| 26 | |
| 27 | #include <haproxy/api.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 28 | #include <haproxy/errors.h> |
| 29 | #include <haproxy/fd.h> |
| 30 | #include <haproxy/global.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 31 | #include <haproxy/listener.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 32 | #include <haproxy/receiver-t.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 33 | #include <haproxy/namespace.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 34 | #include <haproxy/sock.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 35 | #include <haproxy/sock_unix.h> |
| 36 | #include <haproxy/tools.h> |
| 37 | |
| 38 | |
Willy Tarreau | b0254cb | 2020-09-04 08:07:11 +0200 | [diff] [blame] | 39 | struct proto_fam proto_fam_unix = { |
| 40 | .name = "unix", |
| 41 | .sock_domain = PF_UNIX, |
| 42 | .sock_family = AF_UNIX, |
| 43 | .sock_addrlen = sizeof(struct sockaddr_un), |
| 44 | .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path), |
| 45 | .addrcmp = sock_unix_addrcmp, |
| 46 | .bind = sock_unix_bind_receiver, |
| 47 | .get_src = sock_get_src, |
| 48 | .get_dst = sock_get_dst, |
| 49 | }; |
| 50 | |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 51 | /* PLEASE NOTE for functions below: |
| 52 | * |
| 53 | * The address family SHOULD always be checked. In some cases a function will |
| 54 | * be used in a situation where the address family is guaranteed (e.g. protocol |
| 55 | * definitions), so the test may be avoided. This special case must then be |
| 56 | * mentioned in the comment before the function definition. |
| 57 | */ |
| 58 | |
| 59 | |
| 60 | /* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero |
| 61 | * if they do not match. It also supports ABNS socket addresses (those starting |
| 62 | * with \0). For regular UNIX sockets however, this does explicitly support |
| 63 | * matching names ending exactly with .XXXXX.tmp which are newly bound sockets |
| 64 | * about to be replaced; this suffix is then ignored. Note that our UNIX socket |
| 65 | * paths are always zero-terminated. |
| 66 | */ |
| 67 | int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b) |
| 68 | { |
| 69 | const struct sockaddr_un *au = (const struct sockaddr_un *)a; |
| 70 | const struct sockaddr_un *bu = (const struct sockaddr_un *)b; |
| 71 | int idx, dot, idx2; |
| 72 | |
| 73 | if (a->ss_family != b->ss_family) |
| 74 | return -1; |
| 75 | |
| 76 | if (a->ss_family != AF_UNIX) |
| 77 | return -1; |
| 78 | |
| 79 | if (au->sun_path[0] != bu->sun_path[0]) |
| 80 | return -1; |
| 81 | |
| 82 | if (au->sun_path[0] == 0) |
| 83 | return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path)); |
| 84 | |
| 85 | idx = 1; dot = 0; |
| 86 | while (au->sun_path[idx] == bu->sun_path[idx]) { |
| 87 | if (au->sun_path[idx] == 0) |
| 88 | return 0; |
| 89 | if (au->sun_path[idx] == '.') |
| 90 | dot = idx; |
| 91 | idx++; |
| 92 | } |
| 93 | |
| 94 | /* Now we have a difference. It's OK if they are within or after a |
| 95 | * sequence of digits following a dot, and are followed by ".tmp". |
| 96 | */ |
| 97 | if (!dot) |
| 98 | return -1; |
| 99 | |
| 100 | /* First, check in path "a" */ |
| 101 | if (au->sun_path[idx] != 0) { |
Willy Tarreau | 1c34b88 | 2020-08-29 06:44:37 +0200 | [diff] [blame] | 102 | for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);) |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 103 | idx2++; |
| 104 | if (strcmp(au->sun_path + idx2, ".tmp") != 0) |
| 105 | return -1; |
| 106 | } |
| 107 | |
| 108 | /* Then check in path "b" */ |
| 109 | if (bu->sun_path[idx] != 0) { |
Willy Tarreau | 1c34b88 | 2020-08-29 06:44:37 +0200 | [diff] [blame] | 110 | for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++) |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 111 | ; |
| 112 | if (strcmp(bu->sun_path + idx2, ".tmp") != 0) |
| 113 | return -1; |
| 114 | } |
| 115 | |
| 116 | /* OK that's a match */ |
| 117 | return 0; |
| 118 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 119 | |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 120 | /* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and |
| 121 | * context, respectively, with ->bind_thread as the thread mask. Returns an |
| 122 | * error code made of ERR_* bits on failure or ERR_NONE on success. On failure, |
| 123 | * an error message may be passed into <errmsg>. |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 124 | */ |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 125 | int sock_unix_bind_receiver(struct receiver *rx, char **errmsg) |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 126 | { |
| 127 | char tempname[MAXPATHLEN]; |
| 128 | char backname[MAXPATHLEN]; |
| 129 | struct sockaddr_un addr; |
| 130 | const char *path; |
| 131 | int maxpathlen; |
| 132 | int fd, err, ext, ret; |
| 133 | |
| 134 | /* ensure we never return garbage */ |
| 135 | if (errmsg) |
| 136 | *errmsg = 0; |
| 137 | |
| 138 | err = ERR_NONE; |
| 139 | |
| 140 | if (rx->flags & RX_F_BOUND) |
| 141 | return ERR_NONE; |
| 142 | |
| 143 | /* if no FD was assigned yet, we'll have to either find a compatible |
| 144 | * one or create a new one. |
| 145 | */ |
| 146 | if (rx->fd == -1) |
| 147 | rx->fd = sock_find_compatible_fd(rx); |
| 148 | |
| 149 | path = ((struct sockaddr_un *)&rx->addr)->sun_path; |
| 150 | maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path)); |
| 151 | |
| 152 | /* if the listener already has an fd assigned, then we were offered the |
| 153 | * fd by an external process (most likely the parent), and we don't want |
| 154 | * to create a new socket. However we still want to set a few flags on |
| 155 | * the socket. |
| 156 | */ |
| 157 | fd = rx->fd; |
| 158 | ext = (fd >= 0); |
| 159 | if (ext) |
| 160 | goto fd_ready; |
| 161 | |
| 162 | if (path[0]) { |
| 163 | ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid); |
| 164 | if (ret < 0 || ret >= sizeof(addr.sun_path)) { |
| 165 | err |= ERR_FATAL | ERR_ALERT; |
| 166 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 167 | goto bind_return; |
| 168 | } |
| 169 | |
| 170 | ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid); |
| 171 | if (ret < 0 || ret >= maxpathlen) { |
| 172 | err |= ERR_FATAL | ERR_ALERT; |
| 173 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 174 | goto bind_return; |
| 175 | } |
| 176 | |
| 177 | /* 2. clean existing orphaned entries */ |
| 178 | if (unlink(tempname) < 0 && errno != ENOENT) { |
| 179 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 180 | memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 181 | goto bind_return; |
| 182 | } |
| 183 | |
| 184 | if (unlink(backname) < 0 && errno != ENOENT) { |
| 185 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 186 | memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 187 | goto bind_return; |
| 188 | } |
| 189 | |
| 190 | /* 3. backup existing socket */ |
| 191 | if (link(path, backname) < 0 && errno != ENOENT) { |
| 192 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 193 | memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 194 | goto bind_return; |
| 195 | } |
| 196 | |
| 197 | /* Note: this test is redundant with the snprintf one above and |
| 198 | * will never trigger, it's just added as the only way to shut |
| 199 | * gcc's painfully dumb warning about possibly truncated output |
| 200 | * during strncpy(). Don't move it above or smart gcc will not |
| 201 | * see it! |
| 202 | */ |
| 203 | if (strlen(tempname) >= sizeof(addr.sun_path)) { |
| 204 | err |= ERR_FATAL | ERR_ALERT; |
| 205 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 206 | goto bind_return; |
| 207 | } |
| 208 | |
| 209 | strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1); |
| 210 | addr.sun_path[sizeof(addr.sun_path) - 1] = 0; |
| 211 | } |
| 212 | else { |
| 213 | /* first char is zero, it's an abstract socket whose address |
| 214 | * is defined by all the bytes past this zero. |
| 215 | */ |
| 216 | memcpy(addr.sun_path, path, sizeof(addr.sun_path)); |
| 217 | } |
| 218 | addr.sun_family = AF_UNIX; |
| 219 | |
| 220 | /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */ |
Willy Tarreau | f1f6609 | 2020-09-04 08:15:31 +0200 | [diff] [blame] | 221 | fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 222 | if (fd < 0) { |
| 223 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 224 | memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 225 | goto bind_return; |
| 226 | } |
| 227 | |
| 228 | fd_ready: |
Willy Tarreau | 145b17f | 2023-01-11 10:59:52 +0100 | [diff] [blame] | 229 | if (ext && fd < global.maxsock && fdtab[fd].owner) { |
| 230 | /* This FD was already bound so this means that it was already |
| 231 | * known and registered before parsing, hence it's an inherited |
| 232 | * FD. The only reason why it's already known here is that it |
| 233 | * has been registered multiple times (multiple listeners on the |
| 234 | * same, or a "shards" directive on the line). There cannot be |
| 235 | * multiple listeners on one FD but at least we can create a |
| 236 | * new one from the original one. We won't reconfigure it, |
| 237 | * however, as this was already done for the first one. |
| 238 | */ |
| 239 | fd = dup(fd); |
| 240 | if (fd == -1) { |
| 241 | err |= ERR_RETRYABLE | ERR_ALERT; |
| 242 | memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno)); |
| 243 | goto bind_return; |
| 244 | } |
| 245 | } |
| 246 | |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 247 | if (fd >= global.maxsock) { |
| 248 | err |= ERR_FATAL | ERR_ABORT | ERR_ALERT; |
| 249 | memprintf(errmsg, "not enough free sockets (raise '-n' parameter)"); |
| 250 | goto bind_close_return; |
| 251 | } |
| 252 | |
Willy Tarreau | 3824743 | 2022-04-26 10:24:14 +0200 | [diff] [blame] | 253 | if (fd_set_nonblock(fd) == -1) { |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 254 | err |= ERR_FATAL | ERR_ALERT; |
| 255 | memprintf(errmsg, "cannot make socket non-blocking"); |
| 256 | goto bind_close_return; |
| 257 | } |
| 258 | |
| 259 | if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { |
| 260 | /* note that bind() creates the socket <tempname> on the file system */ |
| 261 | if (errno == EADDRINUSE) { |
| 262 | /* the old process might still own it, let's retry */ |
| 263 | err |= ERR_RETRYABLE | ERR_ALERT; |
| 264 | memprintf(errmsg, "cannot bind UNIX socket (already in use)"); |
| 265 | goto bind_close_return; |
| 266 | } |
| 267 | else { |
| 268 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 269 | memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 270 | goto bind_close_return; |
| 271 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 272 | } |
| 273 | |
| 274 | /* <uid> and <gid> different of -1 will be used to change the socket owner. |
| 275 | * If <mode> is not 0, it will be used to restrict access to the socket. |
| 276 | * While it is known not to be portable on every OS, it's still useful |
| 277 | * where it works. We also don't change permissions on abstract sockets. |
| 278 | */ |
| 279 | if (!ext && path[0] && |
| 280 | (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) && |
| 281 | (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) || |
| 282 | (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) { |
| 283 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 284 | memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 285 | goto err_unlink_temp; |
| 286 | } |
| 287 | |
| 288 | /* Point of no return: we are ready, we'll switch the sockets. We don't |
| 289 | * fear losing the socket <path> because we have a copy of it in |
| 290 | * backname. Abstract sockets are not renamed. |
| 291 | */ |
| 292 | if (!ext && path[0] && rename(tempname, path) < 0) { |
| 293 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 294 | memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 295 | goto err_rename; |
| 296 | } |
| 297 | |
| 298 | /* Cleanup: only unlink if we didn't inherit the fd from the parent */ |
| 299 | if (!ext && path[0]) |
| 300 | unlink(backname); |
| 301 | |
| 302 | rx->fd = fd; |
| 303 | rx->flags |= RX_F_BOUND; |
| 304 | |
Willy Tarreau | 9464bb1 | 2022-07-05 05:16:13 +0200 | [diff] [blame] | 305 | fd_insert(fd, rx->owner, rx->iocb, rx->bind_tgroup, rx->bind_thread); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 306 | |
| 307 | /* for now, all regularly bound TCP listeners are exportable */ |
| 308 | if (!(rx->flags & RX_F_INHERITED)) |
Willy Tarreau | 9063a66 | 2021-04-06 18:09:06 +0200 | [diff] [blame] | 309 | HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 310 | |
| 311 | return err; |
| 312 | |
| 313 | err_rename: |
| 314 | ret = rename(backname, path); |
| 315 | if (ret < 0 && errno == ENOENT) |
| 316 | unlink(path); |
| 317 | err_unlink_temp: |
| 318 | if (!ext && path[0]) |
| 319 | unlink(tempname); |
| 320 | close(fd); |
| 321 | err_unlink_back: |
| 322 | if (!ext && path[0]) |
| 323 | unlink(backname); |
| 324 | bind_return: |
| 325 | if (errmsg && *errmsg) { |
| 326 | if (!ext) |
| 327 | memprintf(errmsg, "%s [%s]", *errmsg, path); |
| 328 | else |
| 329 | memprintf(errmsg, "%s [fd %d]", *errmsg, fd); |
| 330 | } |
| 331 | return err; |
| 332 | |
| 333 | bind_close_return: |
| 334 | close(fd); |
| 335 | goto bind_return; |
| 336 | } |