Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 1 | /* |
| 2 | * SOCK_UNIX socket management |
| 3 | * |
| 4 | * Copyright 2000-2020 Willy Tarreau <w@1wt.eu> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <ctype.h> |
Willy Tarreau | eb8cfe6 | 2020-09-16 22:15:40 +0200 | [diff] [blame] | 14 | #include <errno.h> |
| 15 | #include <fcntl.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 16 | #include <string.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 17 | #include <unistd.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 18 | |
| 19 | #include <sys/param.h> |
| 20 | #include <sys/socket.h> |
| 21 | #include <sys/types.h> |
| 22 | |
| 23 | #include <sys/socket.h> |
| 24 | #include <sys/stat.h> |
| 25 | #include <sys/types.h> |
| 26 | #include <sys/un.h> |
| 27 | |
| 28 | #include <haproxy/api.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 29 | #include <haproxy/errors.h> |
| 30 | #include <haproxy/fd.h> |
| 31 | #include <haproxy/global.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 32 | #include <haproxy/listener.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 33 | #include <haproxy/receiver-t.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 34 | #include <haproxy/namespace.h> |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 35 | #include <haproxy/sock.h> |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 36 | #include <haproxy/sock_unix.h> |
| 37 | #include <haproxy/tools.h> |
| 38 | |
| 39 | |
Willy Tarreau | b0254cb | 2020-09-04 08:07:11 +0200 | [diff] [blame] | 40 | struct proto_fam proto_fam_unix = { |
| 41 | .name = "unix", |
| 42 | .sock_domain = PF_UNIX, |
| 43 | .sock_family = AF_UNIX, |
| 44 | .sock_addrlen = sizeof(struct sockaddr_un), |
| 45 | .l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path), |
| 46 | .addrcmp = sock_unix_addrcmp, |
| 47 | .bind = sock_unix_bind_receiver, |
| 48 | .get_src = sock_get_src, |
| 49 | .get_dst = sock_get_dst, |
| 50 | }; |
| 51 | |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 52 | /* PLEASE NOTE for functions below: |
| 53 | * |
| 54 | * The address family SHOULD always be checked. In some cases a function will |
| 55 | * be used in a situation where the address family is guaranteed (e.g. protocol |
| 56 | * definitions), so the test may be avoided. This special case must then be |
| 57 | * mentioned in the comment before the function definition. |
| 58 | */ |
| 59 | |
| 60 | |
| 61 | /* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero |
| 62 | * if they do not match. It also supports ABNS socket addresses (those starting |
| 63 | * with \0). For regular UNIX sockets however, this does explicitly support |
| 64 | * matching names ending exactly with .XXXXX.tmp which are newly bound sockets |
| 65 | * about to be replaced; this suffix is then ignored. Note that our UNIX socket |
| 66 | * paths are always zero-terminated. |
| 67 | */ |
| 68 | int sock_unix_addrcmp(const struct sockaddr_storage *a, const struct sockaddr_storage *b) |
| 69 | { |
| 70 | const struct sockaddr_un *au = (const struct sockaddr_un *)a; |
| 71 | const struct sockaddr_un *bu = (const struct sockaddr_un *)b; |
| 72 | int idx, dot, idx2; |
| 73 | |
| 74 | if (a->ss_family != b->ss_family) |
| 75 | return -1; |
| 76 | |
| 77 | if (a->ss_family != AF_UNIX) |
| 78 | return -1; |
| 79 | |
| 80 | if (au->sun_path[0] != bu->sun_path[0]) |
| 81 | return -1; |
| 82 | |
| 83 | if (au->sun_path[0] == 0) |
| 84 | return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path)); |
| 85 | |
| 86 | idx = 1; dot = 0; |
| 87 | while (au->sun_path[idx] == bu->sun_path[idx]) { |
| 88 | if (au->sun_path[idx] == 0) |
| 89 | return 0; |
| 90 | if (au->sun_path[idx] == '.') |
| 91 | dot = idx; |
| 92 | idx++; |
| 93 | } |
| 94 | |
| 95 | /* Now we have a difference. It's OK if they are within or after a |
| 96 | * sequence of digits following a dot, and are followed by ".tmp". |
| 97 | */ |
| 98 | if (!dot) |
| 99 | return -1; |
| 100 | |
| 101 | /* First, check in path "a" */ |
| 102 | if (au->sun_path[idx] != 0) { |
Willy Tarreau | 1c34b88 | 2020-08-29 06:44:37 +0200 | [diff] [blame] | 103 | for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);) |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 104 | idx2++; |
| 105 | if (strcmp(au->sun_path + idx2, ".tmp") != 0) |
| 106 | return -1; |
| 107 | } |
| 108 | |
| 109 | /* Then check in path "b" */ |
| 110 | if (bu->sun_path[idx] != 0) { |
Willy Tarreau | 1c34b88 | 2020-08-29 06:44:37 +0200 | [diff] [blame] | 111 | for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++) |
Willy Tarreau | 0d06df6 | 2020-08-28 15:10:11 +0200 | [diff] [blame] | 112 | ; |
| 113 | if (strcmp(bu->sun_path + idx2, ".tmp") != 0) |
| 114 | return -1; |
| 115 | } |
| 116 | |
| 117 | /* OK that's a match */ |
| 118 | return 0; |
| 119 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 120 | |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 121 | /* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and |
| 122 | * context, respectively, with ->bind_thread as the thread mask. Returns an |
| 123 | * error code made of ERR_* bits on failure or ERR_NONE on success. On failure, |
| 124 | * an error message may be passed into <errmsg>. |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 125 | */ |
Willy Tarreau | 233ad28 | 2020-10-15 21:45:15 +0200 | [diff] [blame] | 126 | int sock_unix_bind_receiver(struct receiver *rx, char **errmsg) |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 127 | { |
| 128 | char tempname[MAXPATHLEN]; |
| 129 | char backname[MAXPATHLEN]; |
| 130 | struct sockaddr_un addr; |
| 131 | const char *path; |
| 132 | int maxpathlen; |
| 133 | int fd, err, ext, ret; |
| 134 | |
| 135 | /* ensure we never return garbage */ |
| 136 | if (errmsg) |
| 137 | *errmsg = 0; |
| 138 | |
| 139 | err = ERR_NONE; |
| 140 | |
| 141 | if (rx->flags & RX_F_BOUND) |
| 142 | return ERR_NONE; |
| 143 | |
| 144 | /* if no FD was assigned yet, we'll have to either find a compatible |
| 145 | * one or create a new one. |
| 146 | */ |
| 147 | if (rx->fd == -1) |
| 148 | rx->fd = sock_find_compatible_fd(rx); |
| 149 | |
| 150 | path = ((struct sockaddr_un *)&rx->addr)->sun_path; |
| 151 | maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path)); |
| 152 | |
| 153 | /* if the listener already has an fd assigned, then we were offered the |
| 154 | * fd by an external process (most likely the parent), and we don't want |
| 155 | * to create a new socket. However we still want to set a few flags on |
| 156 | * the socket. |
| 157 | */ |
| 158 | fd = rx->fd; |
| 159 | ext = (fd >= 0); |
| 160 | if (ext) |
| 161 | goto fd_ready; |
| 162 | |
| 163 | if (path[0]) { |
| 164 | ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid); |
| 165 | if (ret < 0 || ret >= sizeof(addr.sun_path)) { |
| 166 | err |= ERR_FATAL | ERR_ALERT; |
| 167 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 168 | goto bind_return; |
| 169 | } |
| 170 | |
| 171 | ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid); |
| 172 | if (ret < 0 || ret >= maxpathlen) { |
| 173 | err |= ERR_FATAL | ERR_ALERT; |
| 174 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 175 | goto bind_return; |
| 176 | } |
| 177 | |
| 178 | /* 2. clean existing orphaned entries */ |
| 179 | if (unlink(tempname) < 0 && errno != ENOENT) { |
| 180 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 181 | memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 182 | goto bind_return; |
| 183 | } |
| 184 | |
| 185 | if (unlink(backname) < 0 && errno != ENOENT) { |
| 186 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 187 | memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 188 | goto bind_return; |
| 189 | } |
| 190 | |
| 191 | /* 3. backup existing socket */ |
| 192 | if (link(path, backname) < 0 && errno != ENOENT) { |
| 193 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 194 | memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 195 | goto bind_return; |
| 196 | } |
| 197 | |
| 198 | /* Note: this test is redundant with the snprintf one above and |
| 199 | * will never trigger, it's just added as the only way to shut |
| 200 | * gcc's painfully dumb warning about possibly truncated output |
| 201 | * during strncpy(). Don't move it above or smart gcc will not |
| 202 | * see it! |
| 203 | */ |
| 204 | if (strlen(tempname) >= sizeof(addr.sun_path)) { |
| 205 | err |= ERR_FATAL | ERR_ALERT; |
| 206 | memprintf(errmsg, "name too long for UNIX socket (limit usually 97)"); |
| 207 | goto bind_return; |
| 208 | } |
| 209 | |
| 210 | strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1); |
| 211 | addr.sun_path[sizeof(addr.sun_path) - 1] = 0; |
| 212 | } |
| 213 | else { |
| 214 | /* first char is zero, it's an abstract socket whose address |
| 215 | * is defined by all the bytes past this zero. |
| 216 | */ |
| 217 | memcpy(addr.sun_path, path, sizeof(addr.sun_path)); |
| 218 | } |
| 219 | addr.sun_family = AF_UNIX; |
| 220 | |
| 221 | /* WT: shouldn't we use my_socketat(rx->netns) here instead ? */ |
Willy Tarreau | f1f6609 | 2020-09-04 08:15:31 +0200 | [diff] [blame] | 222 | fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 223 | if (fd < 0) { |
| 224 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 225 | memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 226 | goto bind_return; |
| 227 | } |
| 228 | |
| 229 | fd_ready: |
| 230 | if (fd >= global.maxsock) { |
| 231 | err |= ERR_FATAL | ERR_ABORT | ERR_ALERT; |
| 232 | memprintf(errmsg, "not enough free sockets (raise '-n' parameter)"); |
| 233 | goto bind_close_return; |
| 234 | } |
| 235 | |
| 236 | if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) { |
| 237 | err |= ERR_FATAL | ERR_ALERT; |
| 238 | memprintf(errmsg, "cannot make socket non-blocking"); |
| 239 | goto bind_close_return; |
| 240 | } |
| 241 | |
| 242 | if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) { |
| 243 | /* note that bind() creates the socket <tempname> on the file system */ |
| 244 | if (errno == EADDRINUSE) { |
| 245 | /* the old process might still own it, let's retry */ |
| 246 | err |= ERR_RETRYABLE | ERR_ALERT; |
| 247 | memprintf(errmsg, "cannot bind UNIX socket (already in use)"); |
| 248 | goto bind_close_return; |
| 249 | } |
| 250 | else { |
| 251 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 252 | memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 253 | goto bind_close_return; |
| 254 | } |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 255 | } |
| 256 | |
| 257 | /* <uid> and <gid> different of -1 will be used to change the socket owner. |
| 258 | * If <mode> is not 0, it will be used to restrict access to the socket. |
| 259 | * While it is known not to be portable on every OS, it's still useful |
| 260 | * where it works. We also don't change permissions on abstract sockets. |
| 261 | */ |
| 262 | if (!ext && path[0] && |
| 263 | (((rx->settings->ux.uid != -1 || rx->settings->ux.gid != -1) && |
| 264 | (chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) || |
| 265 | (rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) { |
| 266 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 267 | memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 268 | goto err_unlink_temp; |
| 269 | } |
| 270 | |
| 271 | /* Point of no return: we are ready, we'll switch the sockets. We don't |
| 272 | * fear losing the socket <path> because we have a copy of it in |
| 273 | * backname. Abstract sockets are not renamed. |
| 274 | */ |
| 275 | if (!ext && path[0] && rename(tempname, path) < 0) { |
| 276 | err |= ERR_FATAL | ERR_ALERT; |
Willy Tarreau | 3cd58bf | 2020-09-17 08:35:38 +0200 | [diff] [blame] | 277 | memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno)); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 278 | goto err_rename; |
| 279 | } |
| 280 | |
| 281 | /* Cleanup: only unlink if we didn't inherit the fd from the parent */ |
| 282 | if (!ext && path[0]) |
| 283 | unlink(backname); |
| 284 | |
| 285 | rx->fd = fd; |
| 286 | rx->flags |= RX_F_BOUND; |
| 287 | |
Willy Tarreau | 01cac3f | 2021-10-12 08:47:54 +0200 | [diff] [blame] | 288 | fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->bind_thread) & all_threads_mask); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 289 | |
| 290 | /* for now, all regularly bound TCP listeners are exportable */ |
| 291 | if (!(rx->flags & RX_F_INHERITED)) |
Willy Tarreau | 9063a66 | 2021-04-06 18:09:06 +0200 | [diff] [blame] | 292 | HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED); |
Willy Tarreau | 1e0a860 | 2020-09-02 17:14:29 +0200 | [diff] [blame] | 293 | |
| 294 | return err; |
| 295 | |
| 296 | err_rename: |
| 297 | ret = rename(backname, path); |
| 298 | if (ret < 0 && errno == ENOENT) |
| 299 | unlink(path); |
| 300 | err_unlink_temp: |
| 301 | if (!ext && path[0]) |
| 302 | unlink(tempname); |
| 303 | close(fd); |
| 304 | err_unlink_back: |
| 305 | if (!ext && path[0]) |
| 306 | unlink(backname); |
| 307 | bind_return: |
| 308 | if (errmsg && *errmsg) { |
| 309 | if (!ext) |
| 310 | memprintf(errmsg, "%s [%s]", *errmsg, path); |
| 311 | else |
| 312 | memprintf(errmsg, "%s [fd %d]", *errmsg, fd); |
| 313 | } |
| 314 | return err; |
| 315 | |
| 316 | bind_close_return: |
| 317 | close(fd); |
| 318 | goto bind_return; |
| 319 | } |