[MEDIUM] add support for binding to source port ranges during connect
Some users are already hitting the 64k source port limit when
connecting to servers. The system usually maintains a list of
unused source ports, regardless of the source IP they're bound
to. So in order to go beyond the 64k concurrent connections, we
have to manage the source ip:port lists ourselves.
The solution consists in assigning a source port range to each
server and use a free port in that range when connecting to that
server, either for a proxied connection or for a health check.
The port must then be put back into the server's range when the
connection is closed.
This mechanism is used only when a port range is specified on
a server. It makes it possible to reach 64k connections per
server, possibly all from the same IP address. Right now it
should be more than enough even for huge deployments.
diff --git a/src/backend.c b/src/backend.c
index 321c8a8..b830bdb 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -33,6 +33,7 @@
#include <proto/fd.h>
#include <proto/httperr.h>
#include <proto/log.h>
+#include <proto/port_range.h>
#include <proto/proto_http.h>
#include <proto/proto_tcp.h>
#include <proto/queue.h>
@@ -1812,9 +1813,44 @@
if (s->srv->iface_name)
setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, s->srv->iface_name, s->srv->iface_len + 1);
#endif
- ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
+
+ if (s->srv->sport_range) {
+ int attempts = 10; /* should be more than enough to find a spare port */
+ struct sockaddr_in src;
+
+ ret = 1;
+ src = s->srv->source_addr;
+
+ do {
+ /* note: in case of retry, we may have to release a previously
+ * allocated port, hence this loop's construct.
+ */
+ port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
+ fdtab[fd].port_range = NULL;
+
+ if (!attempts)
+ break;
+ attempts--;
+
+ fdtab[fd].local_port = port_range_alloc_port(s->srv->sport_range);
+ if (!fdtab[fd].local_port)
+ break;
+
+ fdtab[fd].port_range = s->srv->sport_range;
+ src.sin_port = htons(fdtab[fd].local_port);
+
+ ret = tcpv4_bind_socket(fd, flags, &src, remote);
+ } while (ret != 0); /* binding NOK */
+ }
+ else {
+ ret = tcpv4_bind_socket(fd, flags, &s->srv->source_addr, remote);
+ }
+
if (ret) {
+ port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
+ fdtab[fd].port_range = NULL;
close(fd);
+
if (ret == 1) {
Alert("Cannot bind to source address before connect() for server %s/%s. Aborting.\n",
s->be->id, s->srv->id);
@@ -1887,6 +1923,8 @@
msg = "local address already in use";
qfprintf(stderr,"Cannot connect: %s.\n",msg);
+ port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
+ fdtab[fd].port_range = NULL;
close(fd);
send_log(s->be, LOG_EMERG,
"Connect() failed for server %s/%s: %s.\n",
@@ -1894,11 +1932,15 @@
return SN_ERR_RESOURCE;
} else if (errno == ETIMEDOUT) {
//qfprintf(stderr,"Connect(): ETIMEDOUT");
+ port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
+ fdtab[fd].port_range = NULL;
close(fd);
return SN_ERR_SRVTO;
} else {
// (errno == ECONNREFUSED || errno == ENETUNREACH || errno == EACCES || errno == EPERM)
//qfprintf(stderr,"Connect(): %d", errno);
+ port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
+ fdtab[fd].port_range = NULL;
close(fd);
return SN_ERR_SRVCL;
}
diff --git a/src/cfgparse.c b/src/cfgparse.c
index bfcccf2..b268d59 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -40,6 +40,7 @@
#include <proto/dumpstats.h>
#include <proto/httperr.h>
#include <proto/log.h>
+#include <proto/port_range.h>
#include <proto/protocols.h>
#include <proto/proto_tcp.h>
#include <proto/proto_http.h>
@@ -2179,18 +2180,34 @@
cur_arg += 1;
}
else if (!strcmp(args[cur_arg], "source")) { /* address to which we bind when connecting */
+ int port_low, port_high;
if (!*args[cur_arg + 1]) {
#if defined(CONFIG_HAP_CTTPROXY) || defined(CONFIG_HAP_LINUX_TPROXY)
- Alert("parsing [%s:%d] : '%s' expects <addr>[:<port>], and optional '%s' <addr> as argument.\n",
+ Alert("parsing [%s:%d] : '%s' expects <addr>[:<port>[-<port>]], and optional '%s' <addr> as argument.\n",
file, linenum, "source", "usesrc");
#else
- Alert("parsing [%s:%d] : '%s' expects <addr>[:<port>] as argument.\n",
+ Alert("parsing [%s:%d] : '%s' expects <addr>[:<port>[-<port>]] as argument.\n",
file, linenum, "source");
#endif
return -1;
}
newsrv->state |= SRV_BIND_SRC;
- newsrv->source_addr = *str2sa(args[cur_arg + 1]);
+ newsrv->source_addr = *str2sa_range(args[cur_arg + 1], &port_low, &port_high);
+
+ if (port_low != port_high) {
+ int i;
+ if (port_low <= 0 || port_low > 65535 ||
+ port_high <= 0 || port_high > 65535 ||
+ port_low > port_high) {
+ Alert("parsing [%s:%d] : invalid source port range %d-%d.\n",
+ file, linenum, port_low, port_high);
+ return -1;
+ }
+ newsrv->sport_range = port_range_alloc_range(port_high - port_low + 1);
+ for (i = 0; i < newsrv->sport_range->size; i++)
+ newsrv->sport_range->ports[i] = port_low + i;
+ }
+
cur_arg += 2;
while (*(args[cur_arg])) {
if (!strcmp(args[cur_arg], "usesrc")) { /* address to use outside */
diff --git a/src/checks.c b/src/checks.c
index 216d2cb..b4e9857 100644
--- a/src/checks.c
+++ b/src/checks.c
@@ -36,6 +36,7 @@
#include <proto/fd.h>
#include <proto/log.h>
#include <proto/queue.h>
+#include <proto/port_range.h>
#include <proto/proto_http.h>
#include <proto/proto_tcp.h>
#include <proto/proxy.h>
@@ -597,7 +598,38 @@
setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
s->iface_name, s->iface_len + 1);
#endif
- ret = tcpv4_bind_socket(fd, flags, &s->source_addr, remote);
+ if (s->sport_range) {
+ int bind_attempts = 10; /* should be more than enough to find a spare port */
+ struct sockaddr_in src;
+
+ ret = 1;
+ src = s->source_addr;
+
+ do {
+ /* note: in case of retry, we may have to release a previously
+ * allocated port, hence this loop's construct.
+ */
+ port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
+ fdtab[fd].port_range = NULL;
+
+ if (!bind_attempts)
+ break;
+ bind_attempts--;
+
+ fdtab[fd].local_port = port_range_alloc_port(s->sport_range);
+ if (!fdtab[fd].local_port)
+ break;
+
+ fdtab[fd].port_range = s->sport_range;
+ src.sin_port = htons(fdtab[fd].local_port);
+
+ ret = tcpv4_bind_socket(fd, flags, &src, remote);
+ } while (ret != 0); /* binding NOK */
+ }
+ else {
+ ret = tcpv4_bind_socket(fd, flags, &s->source_addr, remote);
+ }
+
if (ret) {
s->result |= SRV_CHK_ERROR;
switch (ret) {
@@ -682,6 +714,8 @@
}
}
}
+ port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
+ fdtab[fd].port_range = NULL;
close(fd); /* socket creation error */
}
diff --git a/src/fd.c b/src/fd.c
index 3d7071c..9dd365a 100644
--- a/src/fd.c
+++ b/src/fd.c
@@ -19,6 +19,7 @@
#include <common/config.h>
#include <proto/fd.h>
+#include <proto/port_range.h>
struct fdtab *fdtab = NULL; /* array of all the file descriptors */
int maxfd; /* # of the highest fd + 1 */
@@ -36,6 +37,8 @@
void fd_delete(int fd)
{
EV_FD_CLO(fd);
+ port_range_release_port(fdtab[fd].port_range, fdtab[fd].local_port);
+ fdtab[fd].port_range = NULL;
close(fd);
fdtab[fd].state = FD_STCLOSE;
diff --git a/src/standard.c b/src/standard.c
index de5b664..9dc56af 100644
--- a/src/standard.c
+++ b/src/standard.c
@@ -1,7 +1,7 @@
/*
* General purpose functions.
*
- * Copyright 2000-2007 Willy Tarreau <w@1wt.eu>
+ * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -241,6 +241,65 @@
}
/*
+ * converts <str> to a struct sockaddr_in* which is locally allocated, and a
+ * port range consisting in two integers. The low and high end are always set
+ * even if the port is unspecified, in which case (0,0) is returned. The low
+ * port is set in the sockaddr_in. Thus, it is enough to check the size of the
+ * returned range to know if an array must be allocated or not. The format is
+ * "addr[:port[-port]]", where "addr" can be a dotted IPv4 address, a host
+ * name, or empty or "*" to indicate INADDR_ANY.
+ */
+struct sockaddr_in *str2sa_range(char *str, int *low, int *high)
+{
+ static struct sockaddr_in sa;
+ char *c;
+ int portl, porth;
+
+ memset(&sa, 0, sizeof(sa));
+ str = strdup(str);
+ if (str == NULL)
+ goto out_nofree;
+
+ if ((c = strrchr(str,':')) != NULL) {
+ char *sep;
+ *c++ = '\0';
+ sep = strchr(c, '-');
+ if (sep)
+ *sep++ = '\0';
+ else
+ sep = c;
+ portl = atol(c);
+ porth = atol(sep);
+ }
+ else {
+ portl = 0;
+ porth = 0;
+ }
+
+ if (*str == '*' || *str == '\0') { /* INADDR_ANY */
+ sa.sin_addr.s_addr = INADDR_ANY;
+ }
+ else if (!inet_pton(AF_INET, str, &sa.sin_addr)) {
+ struct hostent *he;
+
+ if ((he = gethostbyname(str)) == NULL) {
+ Alert("Invalid server name: '%s'\n", str);
+ }
+ else
+ sa.sin_addr = *(struct in_addr *) *(he->h_addr_list);
+ }
+ sa.sin_port = htons(portl);
+ sa.sin_family = AF_INET;
+
+ *low = portl;
+ *high = porth;
+
+ free(str);
+ out_nofree:
+ return &sa;
+}
+
+/*
* converts <str> to two struct in_addr* which must be pre-allocated.
* The format is "addr[/mask]", where "addr" cannot be empty, and mask
* is optionnal and either in the dotted or CIDR notation.