MAJOR: namespace: add Linux network namespace support

This patch makes it possible to create binds and servers in separate
namespaces.  This can be used to proxy between multiple completely independent
virtual networks (with possibly overlapping IP addresses) and a
non-namespace-aware proxy implementation that supports the proxy protocol (v2).

The setup is something like this:

net1 on VLAN 1 (namespace 1) -\
net2 on VLAN 2 (namespace 2) -- haproxy ==== proxy (namespace 0)
net3 on VLAN 3 (namespace 3) -/

The proxy is configured to make server connections through haproxy and sending
the expected source/target addresses to haproxy using the proxy protocol.

The network namespace setup on the haproxy node is something like this:

= 8< =
$ cat setup.sh
ip netns add 1
ip link add link eth1 type vlan id 1
ip link set eth1.1 netns 1
ip netns exec 1 ip addr add 192.168.91.2/24 dev eth1.1
ip netns exec 1 ip link set eth1.$id up
...
= 8< =

= 8< =
$ cat haproxy.cfg
frontend clients
  bind 127.0.0.1:50022 namespace 1 transparent
  default_backend scb

backend server
  mode tcp
  server server1 192.168.122.4:2222 namespace 2 send-proxy-v2
= 8< =

A bind line creates the listener in the specified namespace, and connections
originating from that listener also have their network namespace set to
that of the listener.

A server line either forces the connection to be made in a specified
namespace or may use the namespace from the client-side connection if that
was set.

For more documentation please read the documentation included in the patch
itself.

Signed-off-by: KOVACS Tamas <ktamas@balabit.com>
Signed-off-by: Sarkozi Laszlo <laszlo.sarkozi@balabit.com>
Signed-off-by: KOVACS Krisztian <hidden@balabit.com>
diff --git a/src/backend.c b/src/backend.c
index 6fe03f9..e222160 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -26,6 +26,7 @@
 #include <common/hash.h>
 #include <common/ticks.h>
 #include <common/time.h>
+#include <common/namespace.h>
 
 #include <types/global.h>
 
@@ -720,7 +721,6 @@
 	return err;
 }
 
-
 /*
  * This function assigns a server address to a session, and sets SN_ADDR_SET.
  * The address is taken from the currently assigned server, or from the
@@ -803,11 +803,13 @@
 		return SRV_STATUS_INTERNAL;
 	}
 
+	/* Copy network namespace from client connection */
+	srv_conn->proxy_netns = cli_conn->proxy_netns;
+
 	s->flags |= SN_ADDR_SET;
 	return SRV_STATUS_OK;
 }
 
-
 /* This function assigns a server to session <s> if required, and can add the
  * connection to either the assigned server's queue or to the proxy's queue.
  * If ->srv_conn is set, the session is first released from the server.
diff --git a/src/cfgparse.c b/src/cfgparse.c
index a6a0051..c8b1546 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -41,6 +41,7 @@
 #include <common/standard.h>
 #include <common/time.h>
 #include <common/uri_auth.h>
+#include <common/namespace.h>
 
 #include <types/capture.h>
 #include <types/compression.h>
@@ -5645,6 +5646,48 @@
 }
 
 int
+cfg_parse_netns(const char *file, int linenum, char **args, int kwm)
+{
+#ifdef CONFIG_HAP_NS
+	const char *err;
+	const char *item = args[0];
+
+	if (!strcmp(item, "namespace_list")) {
+		return 0;
+	}
+	else if (!strcmp(item, "namespace")) {
+		size_t idx = 1;
+		const char *current;
+		while (*(current = args[idx++])) {
+			err = invalid_char(current);
+			if (err) {
+				Alert("parsing [%s:%d]: character '%c' is not permitted in '%s' name '%s'.\n",
+				      file, linenum, *err, item, current);
+				return ERR_ALERT | ERR_FATAL;
+			}
+
+			if (netns_store_lookup(current, strlen(current))) {
+				Alert("parsing [%s:%d]: Namespace '%s' is already added.\n",
+				      file, linenum, current);
+				return ERR_ALERT | ERR_FATAL;
+			}
+			if (!netns_store_insert(current)) {
+				Alert("parsing [%s:%d]: Cannot open namespace '%s'.\n",
+				      file, linenum, current);
+				return ERR_ALERT | ERR_FATAL;
+			}
+		}
+	}
+
+	return 0;
+#else
+	Alert("parsing [%s:%d]: namespace support is not compiled in.",
+			file, linenum);
+	return ERR_ALERT | ERR_FATAL;
+#endif
+}
+
+int
 cfg_parse_users(const char *file, int linenum, char **args, int kwm)
 {
 
@@ -5856,7 +5899,8 @@
 	    !cfg_register_section("defaults", cfg_parse_listen) ||
 	    !cfg_register_section("global",   cfg_parse_global) ||
 	    !cfg_register_section("userlist", cfg_parse_users)  ||
-	    !cfg_register_section("peers",    cfg_parse_peers))
+	    !cfg_register_section("peers",    cfg_parse_peers)  ||
+	    !cfg_register_section("namespace_list",    cfg_parse_netns))
 		return -1;
 
 	if ((f=fopen(file,"r")) == NULL)
diff --git a/src/connection.c b/src/connection.c
index b9f5c42..c21c98b 100644
--- a/src/connection.c
+++ b/src/connection.c
@@ -14,6 +14,7 @@
 
 #include <common/compat.h>
 #include <common/config.h>
+#include <common/namespace.h>
 
 #include <proto/connection.h>
 #include <proto/fd.h>
@@ -217,6 +218,14 @@
 	c->flags = f;
 }
 
+/*
+ * Get data length from tlv
+ */
+static int get_tlv_length(const struct tlv *src)
+{
+	return (src->length_hi << 8) | src->length_lo;
+}
+
 /* This handshake handler waits a PROXY protocol header at the beginning of the
  * raw data stream. The header looks like this :
  *
@@ -245,6 +254,7 @@
 	char *line, *end;
 	struct proxy_hdr_v2 *hdr_v2;
 	const char v2sig[] = PP2_SIGNATURE;
+	int tlv_length = 0;
 
 	/* we might have been called just after an asynchronous shutr */
 	if (conn->flags & CO_FL_SOCK_RD_SH)
@@ -434,6 +444,7 @@
 			((struct sockaddr_in *)&conn->addr.to)->sin_addr.s_addr = hdr_v2->addr.ip4.dst_addr;
 			((struct sockaddr_in *)&conn->addr.to)->sin_port = hdr_v2->addr.ip4.dst_port;
 			conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
+			tlv_length = ntohs(hdr_v2->len) - PP2_ADDR_LEN_INET;
 			break;
 		case 0x21:  /* TCPv6 */
 			if (ntohs(hdr_v2->len) < PP2_ADDR_LEN_INET6)
@@ -446,8 +457,35 @@
 			memcpy(&((struct sockaddr_in6 *)&conn->addr.to)->sin6_addr, hdr_v2->addr.ip6.dst_addr, 16);
 			((struct sockaddr_in6 *)&conn->addr.to)->sin6_port = hdr_v2->addr.ip6.dst_port;
 			conn->flags |= CO_FL_ADDR_FROM_SET | CO_FL_ADDR_TO_SET;
+			tlv_length = ntohs(hdr_v2->len) - PP2_ADDR_LEN_INET6;
 			break;
 		}
+
+		/* TLV parsing */
+		if (tlv_length > 0) {
+			int tlv_offset = trash.len - tlv_length;
+
+			while (tlv_offset + TLV_HEADER_SIZE <= trash.len) {
+				const struct tlv *tlv_packet = (struct tlv *) &trash.str[tlv_offset];
+				const int tlv_len = get_tlv_length(tlv_packet);
+				tlv_offset += tlv_len + TLV_HEADER_SIZE;
+
+				switch (tlv_packet->type) {
+#ifdef CONFIG_HAP_NS
+				case PP2_TYPE_NETNS: {
+					const struct netns_entry *ns;
+					ns = netns_store_lookup((char*)tlv_packet->value, tlv_len);
+					if (ns)
+						conn->proxy_netns = ns;
+					break;
+				}
+#endif
+				default:
+					break;
+				}
+			}
+		}
+
 		/* unsupported protocol, keep local connection address */
 		break;
 	case 0x00: /* LOCAL command */
@@ -597,8 +635,8 @@
 	return ret;
 }
 
-#ifdef USE_OPENSSL
-static int make_tlv(char *dest, int dest_len, char type, uint16_t length, char *value)
+#if defined(USE_OPENSSL) || defined(CONFIG_HAP_NS)
+static int make_tlv(char *dest, int dest_len, char type, uint16_t length, const char *value)
 {
 	struct tlv *tlv;
 
@@ -623,8 +661,8 @@
 	struct sockaddr_storage null_addr = {0};
 	struct sockaddr_storage *src = &null_addr;
 	struct sockaddr_storage *dst = &null_addr;
+
 #ifdef USE_OPENSSL
-	int tlv_len = 0;
 	char *value = NULL;
 	struct tlv_ssl *tlv;
 	int ssl_tlv_len = 0;
@@ -639,6 +677,7 @@
 		src = &remote->addr.from;
 		dst = &remote->addr.to;
 	}
+
 	if (src && dst && src->ss_family == dst->ss_family && src->ss_family == AF_INET) {
 		if (buf_len < PP2_HDR_LEN_INET)
 			return 0;
@@ -681,8 +720,7 @@
 			tlv->client |= PP2_CLIENT_SSL;
 			value = ssl_sock_get_version(remote);
 			if (value) {
-				tlv_len = make_tlv(&buf[ret+ssl_tlv_len], (buf_len-ret-ssl_tlv_len), PP2_TYPE_SSL_VERSION, strlen(value), value);
-				ssl_tlv_len += tlv_len;
+				ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len-ret-ssl_tlv_len), PP2_TYPE_SSL_VERSION, strlen(value), value);
 			}
 			if (ssl_sock_get_cert_used_sess(remote)) {
 				tlv->client |= PP2_CLIENT_CERT_SESS;
@@ -693,8 +731,7 @@
 			if (srv->pp_opts & SRV_PP_V2_SSL_CN) {
 				cn_trash = get_trash_chunk();
 				if (ssl_sock_get_remote_common_name(remote, cn_trash) > 0) {
-					tlv_len = make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_TYPE_SSL_CN, cn_trash->len, cn_trash->str);
-					ssl_tlv_len += tlv_len;
+					ssl_tlv_len += make_tlv(&buf[ret+ssl_tlv_len], (buf_len - ret - ssl_tlv_len), PP2_TYPE_SSL_CN, cn_trash->len, cn_trash->str);
 				}
 			}
 		}
@@ -704,6 +741,14 @@
 	}
 #endif
 
+#ifdef CONFIG_HAP_NS
+	if (remote && (remote->proxy_netns)) {
+		if ((buf_len - ret) < sizeof(struct tlv))
+			return 0;
+		ret += make_tlv(&buf[ret], buf_len, PP2_TYPE_NETNS, remote->proxy_netns->name_len, remote->proxy_netns->node.key);
+	}
+#endif
+
 	hdr->len = htons((uint16_t)(ret - PP2_HEADER_LEN));
 
 	return ret;
diff --git a/src/haproxy.c b/src/haproxy.c
index 13c3d26..a306952 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -66,6 +66,7 @@
 #include <common/errors.h>
 #include <common/memory.h>
 #include <common/mini-clist.h>
+#include <common/namespace.h>
 #include <common/regex.h>
 #include <common/standard.h>
 #include <common/time.h>
@@ -354,6 +355,10 @@
 #endif
 	       "\n");
 #endif
+
+#if defined(CONFIG_HAP_NS)
+	printf("Built with network namespace support\n");
+#endif
 	putchar('\n');
 
 	list_pollers(stdout);
@@ -721,6 +726,14 @@
 		exit(1);
 	}
 
+#ifdef CONFIG_HAP_NS
+        err_code |= netns_init();
+        if (err_code & (ERR_ABORT|ERR_FATAL)) {
+                Alert("Failed to initialize namespace support.\n");
+                exit(1);
+        }
+#endif
+
 	if (global.mode & MODE_CHECK) {
 		struct peers *pr;
 		struct proxy *px;
diff --git a/src/namespace.c b/src/namespace.c
new file mode 100644
index 0000000..a22f1a5
--- /dev/null
+++ b/src/namespace.c
@@ -0,0 +1,114 @@
+#define _GNU_SOURCE
+
+#include <common/namespace.h>
+#include <common/compiler.h>
+#include <common/hash.h>
+#include <common/errors.h>
+#include <proto/log.h>
+#include <types/global.h>
+
+#include <sched.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/socket.h>
+
+#include <string.h>
+#ifdef CONFIG_HAP_NS
+
+/* Opens the namespace <ns_name> and returns the FD or -1 in case of error
+ * (check errno).
+ */
+static int open_named_namespace(const char *ns_name)
+{
+	if (chunk_printf(&trash, "/var/run/netns/%s", ns_name) < 0)
+		return -1;
+	return open(trash.str, O_RDONLY);
+}
+
+static int default_namespace = -1;
+
+static int init_default_namespace()
+{
+	if (chunk_printf(&trash, "/proc/%d/ns/net", getpid()) < 0)
+		return -1;
+	default_namespace = open(trash.str, O_RDONLY);
+	return default_namespace;
+}
+
+static struct eb_root namespace_tree_root = EB_ROOT;
+
+int netns_init(void)
+{
+	int err_code = 0;
+
+	/* if no namespaces have been defined in the config then
+	 * there is no point in trying to initialize anything:
+	 * my_socketat() will never be called with a valid namespace
+	 * structure and thus switching back to the default namespace
+	 * is not needed either */
+	if (!eb_is_empty(&namespace_tree_root)) {
+		if (init_default_namespace() < 0) {
+			Alert("Failed to open the default namespace.\n");
+			err_code |= ERR_ALERT | ERR_FATAL;
+		}
+	}
+
+	return err_code;
+}
+
+struct netns_entry* netns_store_insert(const char *ns_name)
+{
+	struct netns_entry *entry = NULL;
+	int fd = open_named_namespace(ns_name);
+	if (fd == -1)
+		goto out;
+
+	entry = (struct netns_entry *)calloc(1, sizeof(struct netns_entry));
+	if (!entry)
+		goto out;
+	entry->fd = fd;
+	entry->node.key = strdup(ns_name);
+	entry->name_len = strlen(ns_name);
+	ebis_insert(&namespace_tree_root, &entry->node);
+out:
+	return entry;
+}
+
+const struct netns_entry* netns_store_lookup(const char *ns_name, size_t ns_name_len)
+{
+	struct ebpt_node *node;
+
+	node = ebis_lookup_len(&namespace_tree_root, ns_name, ns_name_len);
+	if (node)
+		return ebpt_entry(node, struct netns_entry, node);
+	else
+		return NULL;
+}
+#endif
+
+/* Opens a socket in the namespace described by <ns> with the parameters <domain>,
+ * <type> and <protocol> and returns the FD or -1 in case of error (check errno).
+ */
+int my_socketat(const struct netns_entry *ns, int domain, int type, int protocol)
+{
+	int sock;
+
+#ifdef CONFIG_HAP_NS
+	if (default_namespace < 0 ||
+	    (ns && setns(ns->fd, CLONE_NEWNET) == -1))
+		return -1;
+#endif
+	sock = socket(domain, type, protocol);
+
+#ifdef CONFIG_HAP_NS
+	if (ns && setns(default_namespace, CLONE_NEWNET) == -1) {
+		close(sock);
+		return -1;
+	}
+#endif
+
+	return sock;
+}
diff --git a/src/proto_tcp.c b/src/proto_tcp.c
index cfa62f7..afb9e15 100644
--- a/src/proto_tcp.c
+++ b/src/proto_tcp.c
@@ -33,6 +33,7 @@
 #include <common/errors.h>
 #include <common/mini-clist.h>
 #include <common/standard.h>
+#include <common/namespace.h>
 
 #include <types/global.h>
 #include <types/capture.h>
@@ -247,6 +248,15 @@
 	return 0;
 }
 
+static int create_server_socket(struct connection *conn)
+{
+	const struct netns_entry *ns = objt_server(conn->target)->netns;
+
+	if (objt_server(conn->target)->flags & SRV_F_USE_NS_FROM_PP)
+		ns = conn->proxy_netns;
+
+	return my_socketat(ns, conn->addr.to.ss_family, SOCK_STREAM, IPPROTO_TCP);
+}
 
 /*
  * This function initiates a TCP connection establishment to the target assigned
@@ -301,7 +311,9 @@
 		return SN_ERR_INTERNAL;
 	}
 
-	if ((fd = conn->t.sock.fd = socket(conn->addr.to.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) {
+	fd = conn->t.sock.fd = create_server_socket(conn);
+
+	if (fd == -1) {
 		qfprintf(stderr, "Cannot get a server socket.\n");
 
 		if (errno == ENFILE) {
@@ -741,10 +753,14 @@
 	fd = listener->fd;
 	ext = (fd >= 0);
 
-	if (!ext && (fd = socket(listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP)) == -1) {
-		err |= ERR_RETRYABLE | ERR_ALERT;
-		msg = "cannot create listening socket";
-		goto tcp_return;
+	if (!ext) {
+		fd = my_socketat(listener->netns, listener->addr.ss_family, SOCK_STREAM, IPPROTO_TCP);
+
+		if (fd == -1) {
+			err |= ERR_RETRYABLE | ERR_ALERT;
+			msg = "cannot create listening socket";
+			goto tcp_return;
+		}
 	}
 
 	if (fd >= global.maxsock) {
@@ -2007,6 +2023,34 @@
 }
 #endif
 
+#ifdef CONFIG_HAP_NS
+/* parse the "namespace" bind keyword */
+static int bind_parse_namespace(char **args, int cur_arg, struct proxy *px, struct bind_conf *conf, char **err)
+{
+	struct listener *l;
+	char *namespace = NULL;
+
+	if (!*args[cur_arg + 1]) {
+		memprintf(err, "'%s' : missing namespace id", args[cur_arg]);
+		return ERR_ALERT | ERR_FATAL;
+	}
+	namespace = args[cur_arg + 1];
+
+	list_for_each_entry(l, &conf->listeners, by_bind) {
+		l->netns = netns_store_lookup(namespace, strlen(namespace));
+
+		if (l->netns == NULL)
+			l->netns = netns_store_insert(namespace);
+
+		if (l->netns == NULL) {
+			Alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
+			return ERR_ALERT | ERR_FATAL;
+		}
+	}
+	return 0;
+}
+#endif
+
 static struct cfg_kw_list cfg_kws = {ILH, {
 	{ CFG_LISTEN, "tcp-request",  tcp_parse_tcp_req },
 	{ CFG_LISTEN, "tcp-response", tcp_parse_tcp_rep },
@@ -2066,6 +2110,9 @@
 	{ "v4v6",          bind_parse_v4v6,         0 }, /* force socket to bind to IPv4+IPv6 */
 	{ "v6only",        bind_parse_v6only,       0 }, /* force socket to bind to IPv6 only */
 #endif
+#ifdef CONFIG_HAP_NS
+	{ "namespace",     bind_parse_namespace,    1 },
+#endif
 	/* the versions with the NULL parse function*/
 	{ "defer-accept",  NULL,  0 },
 	{ "interface",     NULL,  1 },
diff --git a/src/server.c b/src/server.c
index 94a31b6..4f9fad8 100644
--- a/src/server.c
+++ b/src/server.c
@@ -16,6 +16,7 @@
 #include <common/cfgparse.h>
 #include <common/config.h>
 #include <common/errors.h>
+#include <common/namespace.h>
 #include <common/time.h>
 
 #include <types/global.h>
@@ -1501,6 +1502,31 @@
 				err_code |= ERR_ALERT | ERR_FATAL;
 				goto out;
 			}
+			else if (!defsrv && !strcmp(args[cur_arg], "namespace")) {
+#ifdef CONFIG_HAP_NS
+				char *arg = args[cur_arg + 1];
+				if (!strcmp(arg, "*")) {
+					newsrv->flags |= SRV_F_USE_NS_FROM_PP;
+				} else {
+					newsrv->netns = netns_store_lookup(arg, strlen(arg));
+
+					if (newsrv->netns == NULL)
+						newsrv->netns = netns_store_insert(arg);
+
+					if (newsrv->netns == NULL) {
+						Alert("Cannot open namespace '%s'.\n", args[cur_arg + 1]);
+						err_code |= ERR_ALERT | ERR_FATAL;
+						goto out;
+					}
+				}
+#else
+				Alert("parsing [%s:%d] : '%s' : '%s' option not implemented.\n",
+				      file, linenum, args[0], args[cur_arg]);
+				err_code |= ERR_ALERT | ERR_FATAL;
+				goto out;
+#endif
+				cur_arg += 2;
+			}
 			else {
 				static int srv_dumped;
 				struct srv_kw *kw;
diff --git a/src/session.c b/src/session.c
index 675f26d..0ceb031 100644
--- a/src/session.c
+++ b/src/session.c
@@ -90,6 +90,7 @@
 	cli_conn->addr.from = *addr;
 	cli_conn->flags |= CO_FL_ADDR_FROM_SET;
 	cli_conn->target = &l->obj_type;
+	cli_conn->proxy_netns = l->netns;
 
 	if (unlikely((s = pool_alloc2(pool2_session)) == NULL))
 		goto out_free_conn;