Blame - src/sock_unix.c - haproxy

blob: 9b60a6157e7c13aedd5626f68f5f7eabc114be86 [file] [log] [blame]

Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	1	/*
				2	* SOCK_UNIX socket management
				3	*
				4	* Copyright 2000-2020 Willy Tarreau <w@1wt.eu>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	*/
				12
				13	#include <ctype.h>
Willy Tarreau	eb8cfe6	2020-09-16 22:15:40 +0200	[diff] [blame]	14	#include <errno.h>
				15	#include <fcntl.h>
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	16	#include <string.h>
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	17	#include <unistd.h>
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	18
				19	#include <sys/param.h>
				20	#include <sys/socket.h>
				21	#include <sys/types.h>
				22
				23	#include <sys/socket.h>
				24	#include <sys/stat.h>
				25	#include <sys/types.h>
				26	#include <sys/un.h>
				27
				28	#include <haproxy/api.h>
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	29	#include <haproxy/errors.h>
				30	#include <haproxy/fd.h>
				31	#include <haproxy/global.h>
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	32	#include <haproxy/listener.h>
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	33	#include <haproxy/receiver-t.h>
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	34	#include <haproxy/namespace.h>
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	35	#include <haproxy/sock.h>
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	36	#include <haproxy/sock_unix.h>
				37	#include <haproxy/tools.h>
				38
				39
Willy Tarreau	b0254cb	2020-09-04 08:07:11 +0200	[diff] [blame]	40	struct proto_fam proto_fam_unix = {
				41	.name = "unix",
				42	.sock_domain = PF_UNIX,
				43	.sock_family = AF_UNIX,
				44	.sock_addrlen = sizeof(struct sockaddr_un),
				45	.l3_addrlen = sizeof(((struct sockaddr_un*)0)->sun_path),
				46	.addrcmp = sock_unix_addrcmp,
				47	.bind = sock_unix_bind_receiver,
				48	.get_src = sock_get_src,
				49	.get_dst = sock_get_dst,
				50	};
				51
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	52	/* PLEASE NOTE for functions below:
				53	*
				54	* The address family SHOULD always be checked. In some cases a function will
				55	* be used in a situation where the address family is guaranteed (e.g. protocol
				56	* definitions), so the test may be avoided. This special case must then be
				57	* mentioned in the comment before the function definition.
				58	*/
				59
				60
				61	/* Compares two AF_UNIX sockaddr addresses. Returns 0 if they match or non-zero
				62	* if they do not match. It also supports ABNS socket addresses (those starting
				63	* with \0). For regular UNIX sockets however, this does explicitly support
				64	* matching names ending exactly with .XXXXX.tmp which are newly bound sockets
				65	* about to be replaced; this suffix is then ignored. Note that our UNIX socket
				66	* paths are always zero-terminated.
				67	*/
				68	int sock_unix_addrcmp(const struct sockaddr_storage a, const struct sockaddr_storage b)
				69	{
				70	const struct sockaddr_un au = (const struct sockaddr_un )a;
				71	const struct sockaddr_un bu = (const struct sockaddr_un )b;
				72	int idx, dot, idx2;
				73
				74	if (a->ss_family != b->ss_family)
				75	return -1;
				76
				77	if (a->ss_family != AF_UNIX)
				78	return -1;
				79
				80	if (au->sun_path[0] != bu->sun_path[0])
				81	return -1;
				82
				83	if (au->sun_path[0] == 0)
				84	return memcmp(au->sun_path, bu->sun_path, sizeof(au->sun_path));
				85
				86	idx = 1; dot = 0;
				87	while (au->sun_path[idx] == bu->sun_path[idx]) {
				88	if (au->sun_path[idx] == 0)
				89	return 0;
				90	if (au->sun_path[idx] == '.')
				91	dot = idx;
				92	idx++;
				93	}
				94
				95	/* Now we have a difference. It's OK if they are within or after a
				96	* sequence of digits following a dot, and are followed by ".tmp".
Aurelien DARRAGON	7aaf88c	2023-02-21 17:33:50 +0100	[diff] [blame]	97	*
				98	* make sure to perform the check against tempname if the compared
				99	* string is in "final" format (does not end with ".XXXX.tmp").
				100	*
				101	* Examples:
				102	* /tmp/test matches with /tmp/test.1822.tmp
				103	* /tmp/test.1822.tmp matches with /tmp/test.XXXX.tmp
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	104	*/
Aurelien DARRAGON	7aaf88c	2023-02-21 17:33:50 +0100	[diff] [blame]	105	if (au->sun_path[idx] == 0 \|\| bu->sun_path[idx] == 0) {
				106	if (au->sun_path[idx] == '.' \|\| bu->sun_path[idx] == '.')
				107	dot = idx; /* try to match against temp path */
				108	else
				109	return -1; /* invalid temp path */
				110	}
				111
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	112	if (!dot)
				113	return -1;
				114
				115	/* First, check in path "a" */
				116	if (au->sun_path[idx] != 0) {
Willy Tarreau	1c34b88	2020-08-29 06:44:37 +0200	[diff] [blame]	117	for (idx2 = dot + 1; idx2 && isdigit((unsigned char)au->sun_path[idx2]);)
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	118	idx2++;
				119	if (strcmp(au->sun_path + idx2, ".tmp") != 0)
				120	return -1;
				121	}
				122
				123	/* Then check in path "b" */
				124	if (bu->sun_path[idx] != 0) {
Willy Tarreau	1c34b88	2020-08-29 06:44:37 +0200	[diff] [blame]	125	for (idx2 = dot + 1; idx2 && isdigit((unsigned char)bu->sun_path[idx2]); idx2++)
Willy Tarreau	0d06df6	2020-08-28 15:10:11 +0200	[diff] [blame]	126	;
				127	if (strcmp(bu->sun_path + idx2, ".tmp") != 0)
				128	return -1;
				129	}
				130
				131	/* OK that's a match */
				132	return 0;
				133	}
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	134
Willy Tarreau	233ad28	2020-10-15 21:45:15 +0200	[diff] [blame]	135	/* Binds receiver <rx>, and assigns rx->iocb and rx->owner as the callback and
				136	* context, respectively, with ->bind_thread as the thread mask. Returns an
				137	* error code made of ERR_* bits on failure or ERR_NONE on success. On failure,
				138	* an error message may be passed into <errmsg>.
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	139	*/
Willy Tarreau	233ad28	2020-10-15 21:45:15 +0200	[diff] [blame]	140	int sock_unix_bind_receiver(struct receiver rx, char *errmsg)
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	141	{
				142	char tempname[MAXPATHLEN];
				143	char backname[MAXPATHLEN];
				144	struct sockaddr_un addr;
				145	const char *path;
				146	int maxpathlen;
				147	int fd, err, ext, ret;
				148
				149	/* ensure we never return garbage */
				150	if (errmsg)
				151	*errmsg = 0;
				152
				153	err = ERR_NONE;
				154
				155	if (rx->flags & RX_F_BOUND)
				156	return ERR_NONE;
				157
				158	/* if no FD was assigned yet, we'll have to either find a compatible
				159	* one or create a new one.
				160	*/
				161	if (rx->fd == -1)
				162	rx->fd = sock_find_compatible_fd(rx);
				163
				164	path = ((struct sockaddr_un *)&rx->addr)->sun_path;
				165	maxpathlen = MIN(MAXPATHLEN, sizeof(addr.sun_path));
				166
				167	/* if the listener already has an fd assigned, then we were offered the
				168	* fd by an external process (most likely the parent), and we don't want
				169	* to create a new socket. However we still want to set a few flags on
				170	* the socket.
				171	*/
				172	fd = rx->fd;
				173	ext = (fd >= 0);
				174	if (ext)
				175	goto fd_ready;
				176
				177	if (path[0]) {
				178	ret = snprintf(tempname, maxpathlen, "%s.%d.tmp", path, pid);
				179	if (ret < 0 \|\| ret >= sizeof(addr.sun_path)) {
				180	err \|= ERR_FATAL \| ERR_ALERT;
				181	memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
				182	goto bind_return;
				183	}
				184
				185	ret = snprintf(backname, maxpathlen, "%s.%d.bak", path, pid);
				186	if (ret < 0 \|\| ret >= maxpathlen) {
				187	err \|= ERR_FATAL \| ERR_ALERT;
				188	memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
				189	goto bind_return;
				190	}
				191
				192	/* 2. clean existing orphaned entries */
				193	if (unlink(tempname) < 0 && errno != ENOENT) {
				194	err \|= ERR_FATAL \| ERR_ALERT;
Willy Tarreau	3cd58bf	2020-09-17 08:35:38 +0200	[diff] [blame]	195	memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	196	goto bind_return;
				197	}
				198
				199	if (unlink(backname) < 0 && errno != ENOENT) {
				200	err \|= ERR_FATAL \| ERR_ALERT;
Willy Tarreau	3cd58bf	2020-09-17 08:35:38 +0200	[diff] [blame]	201	memprintf(errmsg, "error when trying to unlink previous UNIX socket (%s)", strerror(errno));
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	202	goto bind_return;
				203	}
				204
				205	/* 3. backup existing socket */
				206	if (link(path, backname) < 0 && errno != ENOENT) {
				207	err \|= ERR_FATAL \| ERR_ALERT;
Willy Tarreau	3cd58bf	2020-09-17 08:35:38 +0200	[diff] [blame]	208	memprintf(errmsg, "error when trying to preserve previous UNIX socket (%s)", strerror(errno));
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	209	goto bind_return;
				210	}
				211
				212	/* Note: this test is redundant with the snprintf one above and
				213	* will never trigger, it's just added as the only way to shut
				214	* gcc's painfully dumb warning about possibly truncated output
				215	* during strncpy(). Don't move it above or smart gcc will not
				216	* see it!
				217	*/
				218	if (strlen(tempname) >= sizeof(addr.sun_path)) {
				219	err \|= ERR_FATAL \| ERR_ALERT;
				220	memprintf(errmsg, "name too long for UNIX socket (limit usually 97)");
				221	goto bind_return;
				222	}
				223
				224	strncpy(addr.sun_path, tempname, sizeof(addr.sun_path) - 1);
				225	addr.sun_path[sizeof(addr.sun_path) - 1] = 0;
				226	}
				227	else {
				228	/* first char is zero, it's an abstract socket whose address
				229	* is defined by all the bytes past this zero.
				230	*/
				231	memcpy(addr.sun_path, path, sizeof(addr.sun_path));
				232	}
				233	addr.sun_family = AF_UNIX;
				234
				235	/* WT: shouldn't we use my_socketat(rx->netns) here instead ? */
Willy Tarreau	f1f6609	2020-09-04 08:15:31 +0200	[diff] [blame]	236	fd = socket(rx->proto->fam->sock_domain, rx->proto->sock_type, rx->proto->sock_prot);
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	237	if (fd < 0) {
				238	err \|= ERR_FATAL \| ERR_ALERT;
Willy Tarreau	3cd58bf	2020-09-17 08:35:38 +0200	[diff] [blame]	239	memprintf(errmsg, "cannot create receiving socket (%s)", strerror(errno));
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	240	goto bind_return;
				241	}
				242
				243	fd_ready:
Willy Tarreau	6789f19	2023-01-11 10:59:52 +0100	[diff] [blame]	244	if (ext && fd < global.maxsock && fdtab[fd].owner) {
				245	/* This FD was already bound so this means that it was already
				246	* known and registered before parsing, hence it's an inherited
				247	* FD. The only reason why it's already known here is that it
				248	* has been registered multiple times (multiple listeners on the
				249	* same, or a "shards" directive on the line). There cannot be
				250	* multiple listeners on one FD but at least we can create a
				251	* new one from the original one. We won't reconfigure it,
				252	* however, as this was already done for the first one.
				253	*/
				254	fd = dup(fd);
				255	if (fd == -1) {
				256	err \|= ERR_RETRYABLE \| ERR_ALERT;
				257	memprintf(errmsg, "cannot dup() receiving socket (%s)", strerror(errno));
				258	goto bind_return;
				259	}
				260	}
				261
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	262	if (fd >= global.maxsock) {
				263	err \|= ERR_FATAL \| ERR_ABORT \| ERR_ALERT;
				264	memprintf(errmsg, "not enough free sockets (raise '-n' parameter)");
				265	goto bind_close_return;
				266	}
				267
				268	if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
				269	err \|= ERR_FATAL \| ERR_ALERT;
				270	memprintf(errmsg, "cannot make socket non-blocking");
				271	goto bind_close_return;
				272	}
				273
				274	if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
				275	/* note that bind() creates the socket <tempname> on the file system */
				276	if (errno == EADDRINUSE) {
				277	/* the old process might still own it, let's retry */
				278	err \|= ERR_RETRYABLE \| ERR_ALERT;
				279	memprintf(errmsg, "cannot bind UNIX socket (already in use)");
				280	goto bind_close_return;
				281	}
				282	else {
				283	err \|= ERR_FATAL \| ERR_ALERT;
Willy Tarreau	3cd58bf	2020-09-17 08:35:38 +0200	[diff] [blame]	284	memprintf(errmsg, "cannot bind UNIX socket (%s)", strerror(errno));
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	285	goto bind_close_return;
				286	}
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	287	}
				288
				289	/* <uid> and <gid> different of -1 will be used to change the socket owner.
				290	* If <mode> is not 0, it will be used to restrict access to the socket.
				291	* While it is known not to be portable on every OS, it's still useful
				292	* where it works. We also don't change permissions on abstract sockets.
				293	*/
				294	if (!ext && path[0] &&
				295	(((rx->settings->ux.uid != -1 \|\| rx->settings->ux.gid != -1) &&
				296	(chown(tempname, rx->settings->ux.uid, rx->settings->ux.gid) == -1)) \|\|
				297	(rx->settings->ux.mode != 0 && chmod(tempname, rx->settings->ux.mode) == -1))) {
				298	err \|= ERR_FATAL \| ERR_ALERT;
Willy Tarreau	3cd58bf	2020-09-17 08:35:38 +0200	[diff] [blame]	299	memprintf(errmsg, "cannot change UNIX socket ownership (%s)", strerror(errno));
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	300	goto err_unlink_temp;
				301	}
				302
				303	/* Point of no return: we are ready, we'll switch the sockets. We don't
				304	* fear losing the socket <path> because we have a copy of it in
				305	* backname. Abstract sockets are not renamed.
				306	*/
				307	if (!ext && path[0] && rename(tempname, path) < 0) {
				308	err \|= ERR_FATAL \| ERR_ALERT;
Willy Tarreau	3cd58bf	2020-09-17 08:35:38 +0200	[diff] [blame]	309	memprintf(errmsg, "cannot switch final and temporary UNIX sockets (%s)", strerror(errno));
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	310	goto err_rename;
				311	}
				312
				313	/* Cleanup: only unlink if we didn't inherit the fd from the parent */
				314	if (!ext && path[0])
				315	unlink(backname);
				316
				317	rx->fd = fd;
				318	rx->flags \|= RX_F_BOUND;
				319
Willy Tarreau	233ad28	2020-10-15 21:45:15 +0200	[diff] [blame]	320	fd_insert(fd, rx->owner, rx->iocb, thread_mask(rx->settings->bind_thread) & all_threads_mask);
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	321
				322	/* for now, all regularly bound TCP listeners are exportable */
				323	if (!(rx->flags & RX_F_INHERITED))
Willy Tarreau	9063a66	2021-04-06 18:09:06 +0200	[diff] [blame]	324	HA_ATOMIC_OR(&fdtab[fd].state, FD_EXPORTED);
Willy Tarreau	1e0a860	2020-09-02 17:14:29 +0200	[diff] [blame]	325
				326	return err;
				327
				328	err_rename:
				329	ret = rename(backname, path);
				330	if (ret < 0 && errno == ENOENT)
				331	unlink(path);
				332	err_unlink_temp:
				333	if (!ext && path[0])
				334	unlink(tempname);
				335	close(fd);
				336	err_unlink_back:
				337	if (!ext && path[0])
				338	unlink(backname);
				339	bind_return:
				340	if (errmsg && *errmsg) {
				341	if (!ext)
				342	memprintf(errmsg, "%s [%s]", *errmsg, path);
				343	else
				344	memprintf(errmsg, "%s [fd %d]", *errmsg, fd);
				345	}
				346	return err;
				347
				348	bind_close_return:
				349	close(fd);
				350	goto bind_return;
				351	}