CLEANUP: atomic: add an explicit _FETCH variant for add/sub/and/or

Currently our atomic ops return a value but it's never known whether
the fetch is done before or after the operation, which causes some
confusion each time the value is desired. Let's create an explicit
variant of these operations suffixed with _FETCH to explicitly mention
that the fetch occurs after the operation, and make use of it at the
few call places.
diff --git a/include/haproxy/atomic.h b/include/haproxy/atomic.h
index 8608b12..7a357da 100644
--- a/include/haproxy/atomic.h
+++ b/include/haproxy/atomic.h
@@ -77,6 +77,12 @@
 #define HA_ATOMIC_OR(val, flags)     ({*(val) |= (flags);})
 #define HA_ATOMIC_ADD(val, i)        ({*(val) += (i);})
 #define HA_ATOMIC_SUB(val, i)        ({*(val) -= (i);})
+
+#define HA_ATOMIC_AND_FETCH(val, flags) ({ *(val) &= (flags); })
+#define HA_ATOMIC_OR_FETCH(val, flags)  ({ *(val) |= (flags); })
+#define HA_ATOMIC_ADD_FETCH(val, i)     ({ *(val) += (i); })
+#define HA_ATOMIC_SUB_FETCH(val, i)     ({ *(val) -= (i); })
+
 #define HA_ATOMIC_XADD(val, i)						\
 	({								\
 		typeof((val)) __p_xadd = (val);				\
@@ -196,6 +202,12 @@
 #define HA_ATOMIC_OR(val, flags)     __sync_or_and_fetch(val,  flags)
 #define HA_ATOMIC_ADD(val, i)        __sync_add_and_fetch(val, i)
 #define HA_ATOMIC_SUB(val, i)        __sync_sub_and_fetch(val, i)
+
+#define HA_ATOMIC_AND_FETCH(val, flags) __sync_and_and_fetch(val, flags)
+#define HA_ATOMIC_OR_FETCH(val, flags)  __sync_or_and_fetch(val,  flags)
+#define HA_ATOMIC_ADD_FETCH(val, i)     __sync_add_and_fetch(val, i)
+#define HA_ATOMIC_SUB_FETCH(val, i)     __sync_sub_and_fetch(val, i)
+
 #define HA_ATOMIC_XADD(val, i)       __sync_fetch_and_add(val, i)
 
 #define HA_ATOMIC_BTS(val, bit)						\
@@ -271,6 +283,12 @@
 #define HA_ATOMIC_OR(val, flags)     __atomic_or_fetch(val,  flags, __ATOMIC_SEQ_CST)
 #define HA_ATOMIC_ADD(val, i)        __atomic_add_fetch(val, i, __ATOMIC_SEQ_CST)
 #define HA_ATOMIC_SUB(val, i)        __atomic_sub_fetch(val, i, __ATOMIC_SEQ_CST)
+
+#define HA_ATOMIC_AND_FETCH(val, flags) __atomic_and_fetch(val, flags, __ATOMIC_SEQ_CST)
+#define HA_ATOMIC_OR_FETCH(val, flags)  __atomic_or_fetch(val,  flags, __ATOMIC_SEQ_CST)
+#define HA_ATOMIC_ADD_FETCH(val, i)     __atomic_add_fetch(val, i, __ATOMIC_SEQ_CST)
+#define HA_ATOMIC_SUB_FETCH(val, i)     __atomic_sub_fetch(val, i, __ATOMIC_SEQ_CST)
+
 #define HA_ATOMIC_XADD(val, i)       __atomic_fetch_add(val, i, __ATOMIC_SEQ_CST)
 
 #define HA_ATOMIC_BTS(val, bit)						\
@@ -326,6 +344,12 @@
 #define _HA_ATOMIC_OR(val, flags)     __atomic_or_fetch(val,  flags, __ATOMIC_RELAXED)
 #define _HA_ATOMIC_ADD(val, i)        __atomic_add_fetch(val, i, __ATOMIC_RELAXED)
 #define _HA_ATOMIC_SUB(val, i)        __atomic_sub_fetch(val, i, __ATOMIC_RELAXED)
+
+#define _HA_ATOMIC_AND_FETCH(val, flags) __atomic_and_fetch(val, flags, __ATOMIC_RELAXED)
+#define _HA_ATOMIC_OR_FETCH(val, flags)  __atomic_or_fetch(val,  flags, __ATOMIC_RELAXED)
+#define _HA_ATOMIC_ADD_FETCH(val, i)     __atomic_add_fetch(val, i, __ATOMIC_RELAXED)
+#define _HA_ATOMIC_SUB_FETCH(val, i)     __atomic_sub_fetch(val, i, __ATOMIC_RELAXED)
+
 #define _HA_ATOMIC_XADD(val, i)       __atomic_fetch_add(val, i, __ATOMIC_RELAXED)
 #define _HA_ATOMIC_CAS(val, old, new) __atomic_compare_exchange_n(val, old, new, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED)
 /* warning, n is a pointer to the double value for dwcas */
@@ -634,6 +658,10 @@
 #define _HA_ATOMIC_ADD HA_ATOMIC_ADD
 #endif /* !_HA_ATOMIC_ADD */
 
+#ifndef _HA_ATOMIC_ADD_FETCH
+#define _HA_ATOMIC_ADD_FETCH HA_ATOMIC_ADD_FETCH
+#endif /* !_HA_ATOMIC_ADD_FETCH */
+
 #ifndef _HA_ATOMIC_XADD
 #define _HA_ATOMIC_XADD HA_ATOMIC_XADD
 #endif /* !_HA_ATOMIC_SUB */
@@ -642,14 +670,26 @@
 #define _HA_ATOMIC_SUB HA_ATOMIC_SUB
 #endif /* !_HA_ATOMIC_SUB */
 
+#ifndef _HA_ATOMIC_SUB_FETCH
+#define _HA_ATOMIC_SUB_FETCH HA_ATOMIC_SUB_FETCH
+#endif /* !_HA_ATOMIC_SUB_FETCH */
+
 #ifndef _HA_ATOMIC_AND
 #define _HA_ATOMIC_AND HA_ATOMIC_AND
 #endif /* !_HA_ATOMIC_AND */
 
+#ifndef _HA_ATOMIC_AND_FETCH
+#define _HA_ATOMIC_AND_FETCH HA_ATOMIC_AND_FETCH
+#endif /* !_HA_ATOMIC_AND_FETCH */
+
 #ifndef _HA_ATOMIC_OR
 #define _HA_ATOMIC_OR HA_ATOMIC_OR
 #endif /* !_HA_ATOMIC_OR */
 
+#ifndef _HA_ATOMIC_OR_FETCH
+#define _HA_ATOMIC_OR_FETCH HA_ATOMIC_OR_FETCH
+#endif /* !_HA_ATOMIC_OR_FETCH */
+
 #ifndef _HA_ATOMIC_XCHG
 #define _HA_ATOMIC_XCHG HA_ATOMIC_XCHG
 #endif /* !_HA_ATOMIC_XCHG */
diff --git a/include/haproxy/fd.h b/include/haproxy/fd.h
index 37c9ac5..cb49e25 100644
--- a/include/haproxy/fd.h
+++ b/include/haproxy/fd.h
@@ -126,7 +126,7 @@
 {
 	unsigned long update_mask;
 
-	update_mask = _HA_ATOMIC_AND(&fdtab[fd].update_mask, ~tid_bit);
+	update_mask = _HA_ATOMIC_AND_FETCH(&fdtab[fd].update_mask, ~tid_bit);
 	while ((update_mask & all_threads_mask)== 0) {
 		/* If we were the last one that had to update that entry, remove it from the list */
 		fd_rm_from_fd_list(&update_list, fd, offsetof(struct fdtab, update));
@@ -346,7 +346,7 @@
  */
 static inline long fd_clr_running(int fd)
 {
-	return _HA_ATOMIC_AND(&fdtab[fd].running_mask, ~tid_bit);
+	return _HA_ATOMIC_AND_FETCH(&fdtab[fd].running_mask, ~tid_bit);
 }
 
 /* Update events seen for FD <fd> and its state if needed. This should be
diff --git a/include/haproxy/freq_ctr.h b/include/haproxy/freq_ctr.h
index b1db43e..8792625 100644
--- a/include/haproxy/freq_ctr.h
+++ b/include/haproxy/freq_ctr.h
@@ -51,7 +51,7 @@
 	do {
 		now_tmp = global_now >> 32;
 		if (curr_sec == (now_tmp & 0x7fffffff))
-			return _HA_ATOMIC_ADD(&ctr->curr_ctr, inc);
+			return _HA_ATOMIC_ADD_FETCH(&ctr->curr_ctr, inc);
 
 		/* remove the bit, used for the lock */
 		curr_sec &= 0x7fffffff;
@@ -72,7 +72,7 @@
 	/* release the lock and update the time in case of rotate. */
 	_HA_ATOMIC_STORE(&ctr->curr_sec, curr_sec & 0x7fffffff);
 
-	return _HA_ATOMIC_ADD(&ctr->curr_ctr, inc);
+	return _HA_ATOMIC_ADD_FETCH(&ctr->curr_ctr, inc);
 }
 
 /* Update a frequency counter by <inc> incremental units. It is automatically
@@ -90,7 +90,7 @@
 	do {
 		now_ms_tmp = global_now_ms;
 		if (now_ms_tmp - curr_tick < period)
-			return _HA_ATOMIC_ADD(&ctr->curr_ctr, inc);
+			return _HA_ATOMIC_ADD_FETCH(&ctr->curr_ctr, inc);
 
 		/* remove the bit, used for the lock */
 		curr_tick &= ~1;
@@ -112,7 +112,7 @@
 	/* release the lock and update the time in case of rotate. */
 	_HA_ATOMIC_STORE(&ctr->curr_tick, curr_tick);
 
-	return _HA_ATOMIC_ADD(&ctr->curr_ctr, inc);
+	return _HA_ATOMIC_ADD_FETCH(&ctr->curr_ctr, inc);
 }
 
 /* Read a frequency counter taking history into account for missing time in
diff --git a/include/haproxy/pattern.h b/include/haproxy/pattern.h
index 3f489a9..5b64c0d 100644
--- a/include/haproxy/pattern.h
+++ b/include/haproxy/pattern.h
@@ -204,7 +204,7 @@
  */
 static inline unsigned int pat_ref_newgen(struct pat_ref *ref)
 {
-	return HA_ATOMIC_ADD(&ref->next_gen, 1);
+	return HA_ATOMIC_ADD_FETCH(&ref->next_gen, 1);
 }
 
 /* Give up a previously assigned generation number. By doing this the caller
diff --git a/include/haproxy/server.h b/include/haproxy/server.h
index d3dc1d6..e4450a8 100644
--- a/include/haproxy/server.h
+++ b/include/haproxy/server.h
@@ -252,7 +252,7 @@
 {
 	unsigned int curr;
 
-	curr = _HA_ATOMIC_ADD(&srv->curr_used_conns, 1);
+	curr = _HA_ATOMIC_ADD_FETCH(&srv->curr_used_conns, 1);
 
 	/* It's ok not to do that atomically, we don't need an
 	 * exact max.
@@ -318,7 +318,7 @@
 	    !conn->mux->used_streams(conn) && conn->mux->avail_streams(conn)) {
 		int retadd;
 
-		retadd = _HA_ATOMIC_ADD(&srv->curr_idle_conns, 1);
+		retadd = _HA_ATOMIC_ADD_FETCH(&srv->curr_idle_conns, 1);
 		if (retadd > srv->max_idle_conns) {
 			_HA_ATOMIC_SUB(&srv->curr_idle_conns, 1);
 			return 0;
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index f692a0a..3276c7e 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -207,7 +207,7 @@
 {
 	unsigned int state;
 
-	state = _HA_ATOMIC_OR(&t->state, f);
+	state = _HA_ATOMIC_OR_FETCH(&t->state, f);
 	while (!(state & (TASK_RUNNING | TASK_QUEUED))) {
 		if (_HA_ATOMIC_CAS(&t->state, &state, state | TASK_QUEUED)) {
 #ifdef DEBUG_TASK
diff --git a/src/backend.c b/src/backend.c
index 62be510..1b9c704 100644
--- a/src/backend.c
+++ b/src/backend.c
@@ -1703,7 +1703,7 @@
 		int count;
 
 		s->flags |= SF_CURR_SESS;
-		count = _HA_ATOMIC_ADD(&srv->cur_sess, 1);
+		count = _HA_ATOMIC_ADD_FETCH(&srv->cur_sess, 1);
 		HA_ATOMIC_UPDATE_MAX(&srv->counters.cur_sess_max, count);
 		if (s->be->lbprm.server_take_conn)
 			s->be->lbprm.server_take_conn(srv, 0);
diff --git a/src/dict.c b/src/dict.c
index f3c2a73..ba076d0 100644
--- a/src/dict.c
+++ b/src/dict.c
@@ -116,7 +116,7 @@
 	if (!de)
 		return;
 
-	if (HA_ATOMIC_SUB(&de->refcount, 1) != 0)
+	if (HA_ATOMIC_SUB_FETCH(&de->refcount, 1) != 0)
 		return;
 
 	HA_RWLOCK_WRLOCK(DICT_LOCK, &d->rwlock);
diff --git a/src/fd.c b/src/fd.c
index 8d671bd..588271f 100644
--- a/src/fd.c
+++ b/src/fd.c
@@ -374,7 +374,7 @@
 	int ret = -1;
 
 #ifndef HA_HAVE_CAS_DW
-	if (_HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit) == tid_bit) {
+	if (_HA_ATOMIC_OR_FETCH(&fdtab[fd].running_mask, tid_bit) == tid_bit) {
 		HA_RWLOCK_WRLOCK(OTHER_LOCK, &fd_mig_lock);
 		if (fdtab[fd].owner == expected_owner) {
 			fdtab[fd].thread_mask = tid_bit;
@@ -388,7 +388,7 @@
 
 	new_masks[0] = new_masks[1] = tid_bit;
 
-	old_masks[0] = _HA_ATOMIC_OR(&fdtab[fd].running_mask, tid_bit);
+	old_masks[0] = _HA_ATOMIC_OR_FETCH(&fdtab[fd].running_mask, tid_bit);
 	old_masks[1] = fdtab[fd].thread_mask;
 
 	/* protect ourself against a delete then an insert for the same fd,
diff --git a/src/haproxy.c b/src/haproxy.c
index e7d30b0..a8f2274 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -2385,7 +2385,7 @@
 			int i;
 
 			if (stopping) {
-				if (_HA_ATOMIC_OR(&stopping_thread_mask, tid_bit) == tid_bit) {
+				if (_HA_ATOMIC_OR_FETCH(&stopping_thread_mask, tid_bit) == tid_bit) {
 					/* notify all threads that stopping was just set */
 					for (i = 0; i < global.nbthread; i++)
 						if (((all_threads_mask & ~stopping_thread_mask) >> i) & 1)
diff --git a/src/proxy.c b/src/proxy.c
index fb60bf4..1b2cd2e 100644
--- a/src/proxy.c
+++ b/src/proxy.c
@@ -2126,7 +2126,7 @@
 
 	s->be = be;
 	HA_ATOMIC_UPDATE_MAX(&be->be_counters.conn_max,
-			     HA_ATOMIC_ADD(&be->beconn, 1));
+			     HA_ATOMIC_ADD_FETCH(&be->beconn, 1));
 	proxy_inc_be_ctr(be);
 
 	/* assign new parameters to the stream from the new backend */
diff --git a/src/queue.c b/src/queue.c
index e5ab5db..58f9875 100644
--- a/src/queue.c
+++ b/src/queue.c
@@ -400,7 +400,7 @@
 	if (srv) {
 		unsigned int old_max, new_max;
 
-		new_max = _HA_ATOMIC_ADD(&srv->nbpend, 1);
+		new_max = _HA_ATOMIC_ADD_FETCH(&srv->nbpend, 1);
 		old_max = srv->counters.nbpend_max;
 		while (new_max > old_max) {
 			if (likely(_HA_ATOMIC_CAS(&srv->counters.nbpend_max, &old_max, new_max)))
@@ -416,7 +416,7 @@
 	else {
 		unsigned int old_max, new_max;
 
-		new_max = _HA_ATOMIC_ADD(&px->nbpend, 1);
+		new_max = _HA_ATOMIC_ADD_FETCH(&px->nbpend, 1);
 		old_max = px->be_counters.nbpend_max;
 		while (new_max > old_max) {
 			if (likely(_HA_ATOMIC_CAS(&px->be_counters.nbpend_max, &old_max, new_max)))
diff --git a/src/ssl_sock.c b/src/ssl_sock.c
index b06c2ae..525d02f 100644
--- a/src/ssl_sock.c
+++ b/src/ssl_sock.c
@@ -1943,7 +1943,7 @@
 	 * number */
 	if (X509_set_version(newcrt, 2L) != 1)
 		goto mkcert_error;
-	ASN1_INTEGER_set(X509_get_serialNumber(newcrt), _HA_ATOMIC_ADD(&ssl_ctx_serial, 1));
+	ASN1_INTEGER_set(X509_get_serialNumber(newcrt), _HA_ATOMIC_ADD_FETCH(&ssl_ctx_serial, 1));
 
 	/* Set duration for the certificate */
 	if (!X509_gmtime_adj(X509_getm_notBefore(newcrt), (long)-60*60*24) ||
diff --git a/src/task.c b/src/task.c
index be3262a..59f6ff7 100644
--- a/src/task.c
+++ b/src/task.c
@@ -585,7 +585,7 @@
 				HA_ATOMIC_ADD(&profile_entry->cpu_time, cpu);
 			}
 
-			state = _HA_ATOMIC_AND(&t->state, ~TASK_RUNNING);
+			state = _HA_ATOMIC_AND_FETCH(&t->state, ~TASK_RUNNING);
 			if (unlikely(state & TASK_KILLED)) {
 				task_unlink_wq(t);
 				__task_free(t);