BUG/MEDIUM: resolvers: Add a task on servers to check SRV resolution status
When a server relies on a SRV resolution, a task is created to clean it up
(fqdn/port and address) when the SRV resolution is considered as outdated
(based on the resolvers 'timeout' value). It is only possible if the server
inherits outdated info from a state file and is no longer selected to be
attached to a SRV item. Note that most of time, a server is attached to a
SRV item. Thus when the item becomes obsolete, the server is cleaned
up.
It is important to have such task to be sure the server will be free again
to have a chance to be resolved again with fresh information. Of course,
this patch is a workaround to solve a design issue. But there is no other
obvious way to fix it without rewritting all the resolvers part. And it must
be backportable.
This patch relies on following commits:
* MINOR: resolvers: Clean server in a dedicated function when removing a SRV item
* MINOR: resolvers: Remove server from named_servers tree when removing a SRV item
All the series must be backported as far as 2.2 after some observation
period. Backports to 2.0 and 1.8 must be evaluated.
(cherry picked from commit dcac41806239d4f3b2de5e4d1bacf9c03ca99efb)
Signed-off-by: Christopher Faulet <cfaulet@haproxy.com>
diff --git a/include/haproxy/server-t.h b/include/haproxy/server-t.h
index e866efa..ef91ee9 100644
--- a/include/haproxy/server-t.h
+++ b/include/haproxy/server-t.h
@@ -368,6 +368,7 @@
struct list srv_rec_item; /* to attach server to a srv record item */
struct list ip_rec_item; /* to attach server to a A or AAAA record item */
struct ebpt_node host_dn; /* hostdn store for srvrq and state file matching*/
+ struct task *srvrq_check; /* Task testing SRV record expiration date for this server */
struct {
const char *file; /* file where the section appears */
struct eb32_node id; /* place in the tree of used IDs */
diff --git a/src/resolvers.c b/src/resolvers.c
index 402f03f..12ce70f 100644
--- a/src/resolvers.c
+++ b/src/resolvers.c
@@ -595,8 +595,28 @@
HA_SPIN_UNLOCK(SERVER_LOCK, &srv->lock);
LIST_DELETE(&srv->srv_rec_item);
LIST_APPEND(&srv->srvrq->attached_servers, &srv->srv_rec_item);
+
+ srv->srvrq_check->expire = TICK_ETERNITY;
}
+/* Takes care to cleanup a server resolution when it is outdated. This only
+ * happens for a server relying on a SRV record.
+ */
+static struct task *resolv_srvrq_expire_task(struct task *t, void *context, unsigned int state)
+{
+ struct server *srv = context;
+
+ if (!tick_is_expired(t->expire, now_ms))
+ goto end;
+
+ HA_SPIN_LOCK(DNS_LOCK, &srv->srvrq->resolvers);
+ resolv_srvrq_cleanup_srv(srv);
+ HA_SPIN_UNLOCK(DNS_LOCK, &srv->srvrq->resolvers);
+
+ end:
+ return t;
+}
+
/* Checks for any obsolete record, also identify any SRV request, and try to
* find a corresponding server.
*/
@@ -727,6 +747,7 @@
if (srv) {
/* re-enable DNS resolution for this server by default */
srv->flags &= ~SRV_F_NO_RESOLUTION;
+ srv->srvrq_check->expire = TICK_ETERNITY;
/* Check if an Additional Record is associated to this SRV record.
* Perform some sanity checks too to ensure the record can be used.
@@ -2428,17 +2449,30 @@
continue;
}
srv->resolvers = resolvers;
+ srv->srvrq_check = NULL;
+ if (srv->srvrq) {
+ if (!srv->srvrq->resolvers) {
+ srv->srvrq->resolvers = srv->resolvers;
+ if (resolv_link_resolution(srv->srvrq, OBJ_TYPE_SRVRQ, 0) == -1) {
+ ha_alert("config : %s '%s' : unable to set DNS resolution for server '%s'.\n",
+ proxy_type_str(px), px->id, srv->id);
+ err_code |= (ERR_ALERT|ERR_ABORT);
+ continue;
+ }
+ }
- if (srv->srvrq && !srv->srvrq->resolvers) {
- srv->srvrq->resolvers = srv->resolvers;
- if (resolv_link_resolution(srv->srvrq, OBJ_TYPE_SRVRQ, 0) == -1) {
- ha_alert("config : %s '%s' : unable to set DNS resolution for server '%s'.\n",
+ srv->srvrq_check = task_new(MAX_THREADS_MASK);
+ if (!srv->srvrq_check) {
+ ha_alert("config: %s '%s' : unable to create SRVRQ task for server '%s'.\n",
proxy_type_str(px), px->id, srv->id);
err_code |= (ERR_ALERT|ERR_ABORT);
- continue;
+ goto err;
}
+ srv->srvrq_check->process = resolv_srvrq_expire_task;
+ srv->srvrq_check->context = srv;
+ srv->srvrq_check->expire = TICK_ETERNITY;
}
- if (!srv->srvrq && resolv_link_resolution(srv, OBJ_TYPE_SERVER, 0) == -1) {
+ else if (resolv_link_resolution(srv, OBJ_TYPE_SERVER, 0) == -1) {
ha_alert("config : %s '%s', unable to set DNS resolution for server '%s'.\n",
proxy_type_str(px), px->id, srv->id);
err_code |= (ERR_ALERT|ERR_ABORT);
diff --git a/src/server.c b/src/server.c
index dac8627..843953c 100644
--- a/src/server.c
+++ b/src/server.c
@@ -2205,6 +2205,7 @@
void free_server(struct server *srv)
{
task_destroy(srv->warmup);
+ task_destroy(srv->srvrq_check);
free(srv->id);
free(srv->cookie);
diff --git a/src/server_state.c b/src/server_state.c
index 7a478a2..18d9cd6 100644
--- a/src/server_state.c
+++ b/src/server_state.c
@@ -425,8 +425,9 @@
for (i = 0; tmp[i]; i++)
tmp[i] = tolower(tmp[i]);
- /* insert in tree */
+ /* insert in tree and set the srvrq expiration date */
ebis_insert(&srv->srvrq->named_servers, &srv->host_dn);
+ task_schedule(srv->srvrq_check, tick_add(now_ms, srv->srvrq->resolvers->hold.timeout));
/* Unset SRV_F_MAPPORTS for SRV records.
* SRV_F_MAPPORTS is unfortunately set by parse_server()