BUG/MAJOR: pools: fix incomplete backport of lockless pool fix

Commit bc76411e0 ("BUG/MAJOR: pools: fix possible race with free() in
the lockless variant") wasn't complete. The __pool_free() function also
needed a part of the lockless variant of the code that was in
pool_put_to_shared_cache() in 2.4. Without it, a __pool_free() during
a contented pool_alloc() may catch free_list == POOL_BUSY and copy it
into the updated free list, so that the next call to __pool_get_first()
loops forever and gets killed by the watchdog.

Sadly it requires load under contention on a non-glibc system to notice
it :-/

This fix is only for 2.3 and 2.2 since 2.2 now also contains the faulty
backport of the patch above.
diff --git a/include/haproxy/pool.h b/include/haproxy/pool.h
index 1ca7130..f2e1f47 100644
--- a/include/haproxy/pool.h
+++ b/include/haproxy/pool.h
@@ -219,7 +219,7 @@
  */
 static inline void __pool_free(struct pool_head *pool, void *ptr)
 {
-	void **free_list = pool->free_list;
+	void **free_list;
 
 	_HA_ATOMIC_SUB(&pool->used, 1);
 
@@ -227,9 +227,14 @@
 		pool_free_area(ptr, pool->size + POOL_EXTRA);
 		_HA_ATOMIC_SUB(&pool->allocated, 1);
 	} else {
+		free_list = _HA_ATOMIC_LOAD(&pool->free_list);
 		do {
-			*POOL_LINK(pool, ptr) = (void *)free_list;
-			__ha_barrier_store();
+                       while (unlikely(free_list == POOL_BUSY)) {
+                               __ha_cpu_relax();
+                               free_list = _HA_ATOMIC_LOAD(&pool->free_list);
+                       }
+                       _HA_ATOMIC_STORE(POOL_LINK(pool, ptr), (void *)free_list);
+                       __ha_barrier_atomic_store();
 		} while (!_HA_ATOMIC_CAS(&pool->free_list, &free_list, ptr));
 		__ha_barrier_atomic_store();
 	}