MAJOR: listener: support inheriting a listening fd from the parent

Using the address syntax "fd@<num>", a listener may inherit a file
descriptor that the caller process has already bound and passed as
this number. The fd's socket family is detected using getsockname(),
and the usual initialization is performed through the existing code
for that family, but the socket creation is skipped.

Whether the parent has performed the listen() call or not is not
important as this is detected.

For UNIX sockets, we immediately clear the path after preparing a
socket so that we never remove it in case an abort would happen due
to a late error during startup.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 4d57585..86d88a9 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -1616,6 +1616,9 @@
                     - 'ipv4@'  -> address is always IPv4
                     - 'ipv6@'  -> address is always IPv6
                     - 'unix@'  -> address is a path to a local unix socket
+                    - 'fd@<n>' -> use file descriptor <n> inherited from the
+                      parent. The fd must be bound and may or may not already
+                      be listening.
 
     <port_range>  is either a unique TCP port, or a port range for which the
                   proxy will accept connections for the IP address specified
diff --git a/src/cfgparse.c b/src/cfgparse.c
index 3821edb..2c2faf5 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -207,6 +207,7 @@
 
 	while (next && *next) {
 		struct sockaddr_storage ss, *ss2;
+		int fd = -1;
 
 		str = next;
 		/* 1) look for the end of the first address */
@@ -240,6 +241,24 @@
 				goto fail;
 			}
 		}
+		else if (ss2->ss_family == AF_UNSPEC) {
+			socklen_t addr_len;
+
+			/* We want to attach to an already bound fd whose number
+			 * is in the addr part of ss2 when cast to sockaddr_in.
+			 * Note that by definition there is a single listener.
+			 * We still have to determine the address family to
+			 * register the correct protocol.
+			 */
+			fd = ((struct sockaddr_in *)ss2)->sin_addr.s_addr;
+			addr_len = sizeof(*ss2);
+			if (getsockname(fd, (struct sockaddr *)ss2, &addr_len) == -1) {
+				memprintf(err, "cannot use file descriptor '%d' : %s.\n", fd, strerror(errno));
+				goto fail;
+			}
+
+			port = end = get_host_port(ss2);
+		}
 
 		/* OK the address looks correct */
 		ss = *ss2;
@@ -252,7 +271,7 @@
 			l->frontend = curproxy;
 			l->bind_conf = bind_conf;
 
-			l->fd = -1;
+			l->fd = fd;
 			l->addr = ss;
 			l->xprt = &raw_sock;
 			l->state = LI_INIT;
diff --git a/src/proto_tcp.c b/src/proto_tcp.c
index 6bd5761..38b9bdc 100644
--- a/src/proto_tcp.c
+++ b/src/proto_tcp.c
@@ -566,6 +566,8 @@
 {
 	__label__ tcp_return, tcp_close_return;
 	int fd, err;
+	int ext, ready;
+	socklen_t ready_len;
 	const char *msg = NULL;
 
 	/* ensure we never return garbage */
@@ -577,7 +579,15 @@
 
 	err = ERR_NONE;
 
-	if ((fd = socket(listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) {
+	/* if the listener already has an fd assigned, then we were offered the
+	 * fd by an external process (most likely the parent), and we don't want
+	 * to create a new socket. However we still want to set a few flags on
+	 * the socket.
+	 */
+	fd = listener->fd;
+	ext = (fd >= 0);
+
+	if (!ext && (fd = socket(listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) {
 		err |= ERR_RETRYABLE | ERR_ALERT;
 		msg = "cannot create listening socket";
 		goto tcp_return;
@@ -595,7 +605,7 @@
 		goto tcp_close_return;
 	}
 
-	if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
+	if (!ext && setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)) == -1) {
 		/* not fatal but should be reported */
 		msg = "cannot do so_reuseaddr";
 		err |= ERR_ALERT;
@@ -608,10 +618,11 @@
 	/* OpenBSD supports this. As it's present in old libc versions of Linux,
 	 * it might return an error that we will silently ignore.
 	 */
-	setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+	if (!ext)
+		setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
 #endif
 #ifdef CONFIG_HAP_LINUX_TPROXY
-	if (listener->options & LI_O_FOREIGN) {
+	if (!ext && (listener->options & LI_O_FOREIGN)) {
 		switch (listener->addr.ss_family) {
 		case AF_INET:
 			if ((setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)) == -1)
@@ -631,7 +642,7 @@
 #endif
 #ifdef SO_BINDTODEVICE
 	/* Note: this might fail if not CAP_NET_RAW */
-	if (listener->interface) {
+	if (!ext && listener->interface) {
 		if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
 			       listener->interface, strlen(listener->interface) + 1) == -1) {
 			msg = "cannot bind listener to device";
@@ -675,13 +686,19 @@
                 setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero));
 #endif
 
-	if (bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
+	if (!ext && bind(fd, (struct sockaddr *)&listener->addr, listener->proto->sock_addrlen) == -1) {
 		err |= ERR_RETRYABLE | ERR_ALERT;
 		msg = "cannot bind socket";
 		goto tcp_close_return;
 	}
 
-	if (listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) {
+	ready = 0;
+	ready_len = sizeof(ready);
+	if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1)
+		ready = 0;
+
+	if (!(ext && ready) && /* only listen if not already done by external process */
+	    listen(fd, listener->backlog ? listener->backlog : listener->maxconn) == -1) {
 		err |= ERR_RETRYABLE | ERR_ALERT;
 		msg = "cannot listen to socket";
 		goto tcp_close_return;
diff --git a/src/proto_uxst.c b/src/proto_uxst.c
index 3263ea4..85eb3ee 100644
--- a/src/proto_uxst.c
+++ b/src/proto_uxst.c
@@ -111,6 +111,10 @@
 	struct sockaddr_un addr;
 	int sock, ret;
 
+	/* if the path was cleared, we do nothing */
+	if (!*path)
+		return;
+
 	/* We might have been chrooted, so we may not be able to access the
 	 * socket. In order to avoid bothering the other end, we connect with a
 	 * wrong protocol, namely SOCK_DGRAM. The return code from connect()
@@ -157,6 +161,8 @@
 	struct sockaddr_un addr;
 	const char *msg = NULL;
 	const char *path;
+	int ext, ready;
+	socklen_t ready_len;
 
 	int ret;
 
@@ -169,6 +175,16 @@
 		
 	path = ((struct sockaddr_un *)&listener->addr)->sun_path;
 
+	/* if the listener already has an fd assigned, then we were offered the
+	 * fd by an external process (most likely the parent), and we don't want
+	 * to create a new socket. However we still want to set a few flags on
+	 * the socket.
+	 */
+	fd = listener->fd;
+	ext = (fd >= 0);
+	if (ext)
+		goto fd_ready;
+
 	/* 1. create socket names */
 	if (!path[0]) {
 		msg = "Invalid empty name for a UNIX socket";
@@ -215,6 +231,7 @@
 		goto err_unlink_back;
 	}
 
+ fd_ready:
 	if (fd >= global.maxsock) {
 		msg = "socket(): not enough free sockets, raise -n argument";
 		goto err_unlink_temp;
@@ -225,7 +242,7 @@
 		goto err_unlink_temp;
 	}
 	
-	if (bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
+	if (!ext && bind(fd, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
 		/* note that bind() creates the socket <tempname> on the file system */
 		msg = "cannot bind UNIX socket";
 		goto err_unlink_temp;
@@ -236,14 +253,21 @@
 	 * While it is known not to be portable on every OS, it's still useful
 	 * where it works.
 	 */
-	if (((listener->bind_conf->ux.uid != -1 || listener->bind_conf->ux.gid != -1) &&
-	     (chown(tempname, listener->bind_conf->ux.uid, listener->bind_conf->ux.gid) == -1)) ||
-	    (listener->bind_conf->ux.mode != 0 && chmod(tempname, listener->bind_conf->ux.mode) == -1)) {
+	if (!ext &&
+	    (((listener->bind_conf->ux.uid != -1 || listener->bind_conf->ux.gid != -1) &&
+	      (chown(tempname, listener->bind_conf->ux.uid, listener->bind_conf->ux.gid) == -1)) ||
+	     (listener->bind_conf->ux.mode != 0 && chmod(tempname, listener->bind_conf->ux.mode) == -1))) {
 		msg = "cannot change UNIX socket ownership";
 		goto err_unlink_temp;
 	}
 
-	if (listen(fd, listener->backlog ? listener->backlog : listener->maxconn) < 0) {
+	ready = 0;
+	ready_len = sizeof(ready);
+	if (getsockopt(fd, SOL_SOCKET, SO_ACCEPTCONN, &ready, &ready_len) == -1)
+		ready = 0;
+
+	if (!(ext && ready) && /* only listen if not already done by external process */
+	    listen(fd, listener->backlog ? listener->backlog : listener->maxconn) < 0) {
 		msg = "cannot listen to UNIX socket";
 		goto err_unlink_temp;
 	}
@@ -253,13 +277,19 @@
 	 * fear loosing the socket <path> because we have a copy of it in
 	 * backname.
 	 */
-	if (rename(tempname, path) < 0) {
+	if (!ext && rename(tempname, path) < 0) {
 		msg = "cannot switch final and temporary UNIX sockets";
 		goto err_rename;
 	}
 
-	/* 6. cleanup */
-	unlink(backname); /* no need to keep this one either */
+	/* 6. cleanup. If we're bound to an fd inherited from the parent, we
+	 * want to ensure that destroy_uxst_socket() will never remove the
+	 * path, and for this we simply clear the path to the socket.
+	 */
+	if (!ext)
+		unlink(backname);
+	else
+		((struct sockaddr_un *)&listener->addr)->sun_path[0] = 0;
 
 	/* the socket is now listening */
 	listener->fd = fd;
@@ -275,13 +305,19 @@
 	if (ret < 0 && errno == ENOENT)
 		unlink(path);
  err_unlink_temp:
-	unlink(tempname);
+	if (!ext)
+		unlink(tempname);
 	close(fd);
  err_unlink_back:
-	unlink(backname);
+	if (!ext)
+		unlink(backname);
  err_return:
-	if (msg && errlen)
-		snprintf(errmsg, errlen, "%s [%s]", msg, path);
+	if (msg && errlen) {
+		if (!ext)
+			snprintf(errmsg, errlen, "%s [%s]", msg, path);
+		else
+			snprintf(errmsg, errlen, "%s [fd %d]", msg, fd);
+	}
 	return ERR_FATAL | ERR_ALERT;
 }
 
diff --git a/src/standard.c b/src/standard.c
index c670be0..cc22ba7 100644
--- a/src/standard.c
+++ b/src/standard.c
@@ -632,6 +632,7 @@
  *    - "ipv6@"  => force address to resolve as IPv6 and fail if not possible.
  *    - "unix@"  => force address to be a path to a UNIX socket even if the
  *                  path does not start with a '/'
+ *    - "fd@"    => an integer must follow, and is a file descriptor number.
  *
  * Also note that in order to avoid any ambiguity with IPv6 addresses, the ':'
  * is mandatory after the IP address even when no port is specified. NULL is
@@ -640,6 +641,9 @@
  *
  * If <pfx> is non-null, it is used as a string prefix before any path-based
  * address (typically the path to a unix socket).
+ *
+ * When a file descriptor is passed, its value is put into the s_addr part of
+ * the address when cast to sockaddr_in and the address family is AF_UNSPEC.
  */
 struct sockaddr_storage *str2sa_range(const char *str, int *low, int *high, char **err, const char *pfx)
 {
@@ -677,7 +681,21 @@
 	else
 		ss.ss_family = AF_UNSPEC;
 
+	if (ss.ss_family == AF_UNSPEC && strncmp(str2, "fd@", 3) == 0) {
+		char *endptr;
+
+		str2 += 3;
+		((struct sockaddr_in *)&ss)->sin_addr.s_addr = strtol(str2, &endptr, 10);
+
-	if (ss.ss_family == AF_UNIX) {
+		if (!*str2 || *endptr) {
+			memprintf(err, "file descriptor '%s' is not a valid integer\n", str2);
+			goto out;
+		}
+
+		/* we return AF_UNSPEC if we use a file descriptor number */
+		ss.ss_family = AF_UNSPEC;
+	}
+	else if (ss.ss_family == AF_UNIX) {
 		int prefix_path_len;
 		int max_path_len;