MEDIUM: stick-table: make stksess_kill_if_expired() avoid the exclusive lock

stream_store_counters() calls stksess_kill_if_expired() for each active
counter. And this one takes an exclusive lock on the table before
checking if it has any work to do (hint: it almost never has since it
only wants to delete expired entries). However a lock is still neeed for
now to protect the ref_cnt, but we can do it atomically under the read
lock.

Let's change the mechanism. Now what we do is to check out of the lock
if the entry is expired. If it is, we take the write lock, expire it,
and decrement the refcount. Otherwise we just decrement the refcount
under a read lock. With this change alone, the config based on 3
trackers without the previous patches saw a 2.6x improvement, but here
it doesn't yet change anything because some heavy contention remains
on the lookup part.
diff --git a/include/haproxy/stick_table.h b/include/haproxy/stick_table.h
index e1b6d06..c5cd3c8 100644
--- a/include/haproxy/stick_table.h
+++ b/include/haproxy/stick_table.h
@@ -198,15 +198,22 @@
 
 static inline void stksess_kill_if_expired(struct stktable *t, struct stksess *ts, int decrefcnt)
 {
-	HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
 
-	if (decrefcnt)
-		ts->ref_cnt--;
+	if (t->expire != TICK_ETERNITY && tick_is_expired(ts->expire, now_ms)) {
+		HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+		if (decrefcnt)
+			ts->ref_cnt--;
 
-	if (t->expire != TICK_ETERNITY && tick_is_expired(ts->expire, now_ms))
 		__stksess_kill_if_expired(t, ts);
-
-	HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+		HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+	}
+	else {
+		if (decrefcnt) {
+			HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock);
+			HA_ATOMIC_DEC(&ts->ref_cnt);
+			HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock);
+		}
+	}
 }
 
 /* sets the stick counter's entry pointer */
diff --git a/src/stick_table.c b/src/stick_table.c
index b5e04d8..92f662d 100644
--- a/src/stick_table.c
+++ b/src/stick_table.c
@@ -103,9 +103,9 @@
 		dict_entry_unref(&server_key_dict, stktable_data_cast(data, std_t_dict));
 		stktable_data_cast(data, std_t_dict) = NULL;
 	}
-	HA_RWLOCK_WRLOCK(STK_TABLE_LOCK, &t->lock);
+	HA_RWLOCK_RDLOCK(STK_TABLE_LOCK, &t->lock);
 	__stksess_free(t, ts);
-	HA_RWLOCK_WRUNLOCK(STK_TABLE_LOCK, &t->lock);
+	HA_RWLOCK_RDUNLOCK(STK_TABLE_LOCK, &t->lock);
 }
 
 /*