BUG/MEDIUM: event_hdl: clean soft-stop handling

soft-stop was not explicitly handled in event_hdl API.

Because of this, event_hdl was causing some leaks on deinit paths.
Moreover, a task responsible for handling events could require some
additional cleanups (ie: advanced async task), and as the task was not
protected against abort when soft-stopping, such cleanup could not be
performed unless the task itself implements the required protections,
which is not optimal.

Consider this new approach:
 'jobs' global variable is incremented whenever an async subscription is
 created to prevent the related task from being aborted before the task
 acknowledges the final END event.

 Once the END event is acknowledged and freed by the task, the 'jobs'
 variable is decremented, and the deinit process may continue (including
 the abortion of remaining tasks not guarded by the 'jobs' variable).

To do this, a new global mt_list is required: known_event_hdl_sub_list
This list tracks the known (initialized) subscription lists within the
process.

sub_lists are automatically added to the "known" list when calling
event_hdl_sub_list_init(), and are removed from the list with
event_hdl_sub_list_destroy().

This allows us to implement a global thread-safe event_hdl deinit()
function that is automatically called on soft-stop thanks to signal(0).
When event_hdl deinit() is initiated, we simply iterate against the known
subscription lists to destroy them.

event_hdl_subscribe_ptr() was slightly modified to make sure that a sub_list
may not accept new subscriptions once it is destroyed (removed from the
known list)
This can occur between the time the soft-stop is initiated (signal(0)) and
haproxy actually enters in the deinit() function (once tasks are either
finished or aborted and other threads already joined).

It is safe to destroy() the subscription list multiple times as long
as the pointer is still valid (ie: first on soft-stop when handling
the '0' signal, then from regular deinit() path): the function does
nothing if the subscription list is already removed.

We partially reverted "BUG/MINOR: event_hdl: make event_hdl_subscribe thread-safe"
since we can use parent mt_list locking instead of a dedicated lock to make
the check gainst duplicate subscription ID.
(insert_lock is not useful anymore)

The check in itself is not changed, only the locking method.

sizeof(event_hdl_sub_list) slightly increases: from 24 bits to 32bits due
to the additional mt_list struct within it.

With that said, having thread-safe list to store known subscription lists
is a good thing: it could help to implement additional management
logic for subcription lists and could be useful to add some stats or
debugging tools in the future.

If 68e692da0 ("MINOR: event_hdl: add event handler base api")
is being backported, then this commit should be backported with it.
diff --git a/include/haproxy/event_hdl-t.h b/include/haproxy/event_hdl-t.h
index 50a2016..e1966fd 100644
--- a/include/haproxy/event_hdl-t.h
+++ b/include/haproxy/event_hdl-t.h
@@ -25,7 +25,6 @@
 #include <stdint.h>
 
 #include <haproxy/api-t.h>
-#include <haproxy/thread-t.h>
 
 /* event data struct are defined as followed */
 struct event_hdl_cb_data_template {
@@ -75,7 +74,7 @@
 
 struct event_hdl_sub_list_head {
 	struct mt_list head;
-	__decl_thread(HA_SPINLOCK_T insert_lock);
+	struct mt_list known; /* api uses this to track known subscription lists */
 };
 
 /* event_hdl_sub_list is an alias (please use this for portability) */
diff --git a/include/haproxy/thread.h b/include/haproxy/thread.h
index 658f6d6..67ceba3 100644
--- a/include/haproxy/thread.h
+++ b/include/haproxy/thread.h
@@ -425,7 +425,6 @@
 	IDLE_CONNS_LOCK,
 	QUIC_LOCK,
 	OCSP_LOCK,
-	EHDL_LOCK,
 	OTHER_LOCK,
 	/* WT: make sure never to use these ones outside of development,
 	 * we need them for lock profiling!
diff --git a/src/event_hdl.c b/src/event_hdl.c
index c3bacfe..9760746 100644
--- a/src/event_hdl.c
+++ b/src/event_hdl.c
@@ -16,6 +16,7 @@
 #include <haproxy/task.h>
 #include <haproxy/tools.h>
 #include <haproxy/errors.h>
+#include <haproxy/signal.h>
 #include <haproxy/xxhash.h>
 
 /* event types changes in event_hdl-t.h file should be reflected in the
@@ -51,10 +52,31 @@
 /* global subscription list (implicit where NULL is used as sublist argument) */
 static event_hdl_sub_list global_event_hdl_sub_list;
 
+/* every known subscription lists are tracked in this list (including the global one) */
+static struct mt_list known_event_hdl_sub_list = MT_LIST_HEAD_INIT(known_event_hdl_sub_list);
+
+static void _event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list);
+
+static void event_hdl_deinit(struct sig_handler *sh)
+{
+	event_hdl_sub_list *cur_list;
+	struct mt_list *elt1, elt2;
+
+	/* destroy all known subscription lists */
+	mt_list_for_each_entry_safe(cur_list, &known_event_hdl_sub_list, known, elt1, elt2) {
+		/* remove cur elem from list */
+		MT_LIST_DELETE_SAFE(elt1);
+		/* then destroy it */
+		_event_hdl_sub_list_destroy(cur_list);
+	}
+}
+
 static void event_hdl_init(void)
 {
 	/* initialize global subscription list */
 	event_hdl_sub_list_init(&global_event_hdl_sub_list);
+	/* register the deinit function, will be called on soft-stop */
+	signal_register_fct(0, event_hdl_deinit, 0);
 }
 
 /* general purpose hashing function when you want to compute
@@ -159,6 +181,7 @@
 		 * (it is already removed from mt_list, no race can occur)
 		 */
 		event_hdl_drop(e->sub_mgmt.this);
+		HA_ATOMIC_DEC(&jobs);
 	}
 	else if (e->_data &&
 	         HA_ATOMIC_SUB_FETCH(&e->_data->refcount, 1) == 0) {
@@ -381,6 +404,7 @@
 	struct event_hdl_sub *new_sub = NULL;
 	struct mt_list *elt1, elt2;
 	struct event_hdl_async_task_default_ctx	*task_ctx = NULL;
+	struct mt_list lock;
 
 	if (!sub_list)
 		sub_list = &global_event_hdl_sub_list; /* fall back to global list */
@@ -453,12 +477,13 @@
 	/* ready for registration */
 	MT_LIST_INIT(&new_sub->mt_list);
 
+	lock = MT_LIST_LOCK_ELT(&sub_list->known);
+
 	/* check if such identified hdl is not already registered */
 	if (hdl.id) {
 		struct event_hdl_sub *cur_sub;
 		uint8_t found = 0;
 
-		HA_SPIN_LOCK(EHDL_LOCK, &sub_list->insert_lock);
 		mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
 			if (hdl.id == cur_sub->hdl.id) {
 				/* we found matching registered hdl */
@@ -468,17 +493,35 @@
 		}
 		if (found) {
 			/* error already registered */
-			HA_SPIN_UNLOCK(EHDL_LOCK, &sub_list->insert_lock);
+			MT_LIST_UNLOCK_ELT(&sub_list->known, lock);
 			event_hdl_report_hdl_state(ha_alert, &hdl, "SUB", "could not subscribe: subscription with this id already exists");
 			goto cleanup;
 		}
 	}
 
+	if (lock.next == &sub_list->known) {
+		/* this is an expected corner case on de-init path, a subscribe attempt
+		 * was made but the subscription list is already destroyed, we pretend
+		 * it is a memory/IO error since it should not be long before haproxy
+		 * enters the deinit() function anyway
+		 */
+		MT_LIST_UNLOCK_ELT(&sub_list->known, lock);
+		goto cleanup;
+	}
+
 	/* Append in list (global or user specified list).
 	 * For now, append when sync mode, and insert when async mode
 	 * so that async handlers are executed first
 	 */
 	if (hdl.async) {
+		/* Prevent the task from being aborted on soft-stop: let's wait
+		 * until the END event is acknowledged by the task.
+		 * (decrease is performed in event_hdl_async_free_event())
+		 *
+		 * If we don't do this, event_hdl API will leak and we won't give
+		 * a chance to the event-handling task to perform cleanup
+		 */
+		HA_ATOMIC_INC(&jobs);
 		/* async mode, insert at the beginning of the list */
 		MT_LIST_INSERT(&sub_list->head, &new_sub->mt_list);
 	} else {
@@ -486,8 +529,7 @@
 		MT_LIST_APPEND(&sub_list->head, &new_sub->mt_list);
 	}
 
-	if (hdl.id)
-		HA_SPIN_UNLOCK(EHDL_LOCK, &sub_list->insert_lock);
+	MT_LIST_UNLOCK_ELT(&sub_list->known, lock);
 
 	return new_sub;
 
@@ -763,26 +805,33 @@
 {
 	BUG_ON(!sub_list); /* unexpected, global sublist is managed internally */
 	MT_LIST_INIT(&sub_list->head);
-	HA_SPIN_INIT(&sub_list->insert_lock);
+	MT_LIST_APPEND(&known_event_hdl_sub_list, &sub_list->known);
 }
 
-/* when a subscription list is no longer used, call this
- * to do the cleanup and make sure all related subscriptions are
- * safely ended according to their types
- */
-void event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list)
+/* internal function, assumes that sub_list ptr is always valid */
+static void _event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list)
 {
 	struct event_hdl_sub *cur_sub;
 	struct mt_list *elt1, elt2;
 
-	BUG_ON(!sub_list); /* unexpected, global sublist is managed internally */
 	mt_list_for_each_entry_safe(cur_sub, &sub_list->head, mt_list, elt1, elt2) {
 		/* remove cur elem from list */
 		MT_LIST_DELETE_SAFE(elt1);
 		/* then free it */
 		_event_hdl_unsubscribe(cur_sub);
 	}
-	HA_SPIN_DESTROY(&sub_list->insert_lock);
+}
+
+/* when a subscription list is no longer used, call this
+ * to do the cleanup and make sure all related subscriptions are
+ * safely ended according to their types
+ */
+void event_hdl_sub_list_destroy(event_hdl_sub_list *sub_list)
+{
+	BUG_ON(!sub_list); /* unexpected, global sublist is managed internally */
+	if (!MT_LIST_DELETE(&sub_list->known))
+		return; /* already destroyed */
+	_event_hdl_sub_list_destroy(sub_list);
 }
 
 INITCALL0(STG_INIT, event_hdl_init);
diff --git a/src/thread.c b/src/thread.c
index 9d7c004..d11df8d 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -444,7 +444,6 @@
 	case IDLE_CONNS_LOCK:      return "IDLE_CONNS";
 	case QUIC_LOCK:            return "QUIC";
 	case OCSP_LOCK:            return "OCSP";
-	case EHDL_LOCK:            return "EVENT_HDL";
 	case OTHER_LOCK:           return "OTHER";
 	case DEBUG1_LOCK:          return "DEBUG1";
 	case DEBUG2_LOCK:          return "DEBUG2";