CLEANUP: shctx: remove the different inter-process locking techniques

With a single process, we don't need to USE_PRIVATE_CACHE, USE_FUTEX
nor USE_PTHREAD_PSHARED anymore. Let's only keep the basic spinlock
to lock between threads.
diff --git a/INSTALL b/INSTALL
index 16d449d..447b9a0 100644
--- a/INSTALL
+++ b/INSTALL
@@ -379,11 +379,8 @@
   - clock_gettime() not found
     => your system needs USE_RT=1
 
-  - __sync_sub_and_fetch undefined in cache.o
-    => your system needs either USE_PTHREAD_PSHARED=1 or USE_PRIVATE_CACHE=1
-
   - many __sync_<something> errors in many files
-    => your gcc is too old, build without threads and with private cache.
+    => your gcc is too old, build without threads.
 
   - many openssl errors
     => your OpenSSL version really is too old, do not enable OpenSSL
@@ -505,44 +502,6 @@
 recommended way to build when developing, and it is expected that contributed
 patches were tested with ERR=1.
 
-The SSL stack supports session cache synchronization between all running
-processes. This involves some atomic operations and synchronization operations
-which come in multiple flavors depending on the system and architecture :
-
-  Atomic operations :
-    - internal assembler versions for x86/x86_64 architectures
-
-    - gcc builtins for other architectures. Some architectures might not
-      be fully supported or might require a more recent version of gcc.
-      If your architecture is not supported, you willy have to either use
-      pthread if supported, or to disable the shared cache.
-
-    - pthread (posix threads). Pthreads are very common but inter-process
-      support is not that common, and some older operating systems did not
-      report an error when enabling multi-process mode, so they used to
-      silently fail, possibly causing crashes. Linux's implementation is
-      fine. OpenBSD doesn't support them and doesn't build. FreeBSD 9 builds
-      and reports an error at runtime, while certain older versions might
-      silently fail. Pthreads are enabled using USE_PTHREAD_PSHARED=1.
-
-  Synchronization operations :
-    - internal spinlock : this mode is OS-independent, light but will not
-      scale well to many processes. However, accesses to the session cache
-      are rare enough that this mode could certainly always be used. This
-      is the default mode.
-
-    - Futexes, which are Linux-specific highly scalable light weight mutexes
-      implemented in user-space with some limited assistance from the kernel.
-      This is the default on Linux 2.6 and above and is enabled by passing
-      USE_FUTEX=1
-
-    - pthread (posix threads). See above.
-
-If none of these mechanisms is supported by your platform, you may need to
-build with USE_PRIVATE_CACHE=1 to totally disable SSL cache sharing. Then it
-is better not to run SSL on multiple processes. Note that you don't need these
-features if you only intend to use multi-threading and never multi-process.
-
 If you need to pass other defines, includes, libraries, etc... then please
 check the Makefile to see which ones will be available in your case, and
 use/override the USE_* variables from the Makefile.
diff --git a/Makefile b/Makefile
index e3a6211..b25c9ad 100644
--- a/Makefile
+++ b/Makefile
@@ -22,9 +22,7 @@
 #   USE_PCRE2            : enable use of libpcre2 for regex.
 #   USE_PCRE2_JIT        : enable JIT for faster regex on libpcre2
 #   USE_POLL             : enable poll(). Automatic.
-#   USE_PRIVATE_CACHE    : disable shared memory cache of ssl sessions.
 #   USE_THREAD           : enable threads support.
-#   USE_PTHREAD_PSHARED  : enable pthread process shared mutex on sslcache.
 #   USE_STATIC_PCRE      : enable static libpcre. Recommended.
 #   USE_STATIC_PCRE2     : enable static libpcre2.
 #   USE_TPROXY           : enable transparent proxy. Automatic.
@@ -35,7 +33,6 @@
 #   USE_GETADDRINFO      : use getaddrinfo() to resolve IPv6 host names.
 #   USE_OPENSSL          : enable use of OpenSSL. Recommended, but see below.
 #   USE_LUA              : enable Lua support.
-#   USE_FUTEX            : enable use of futex on kernel 2.6. Automatic.
 #   USE_ACCEPT4          : enable use of accept4() on linux. Automatic.
 #   USE_CLOSEFROM        : enable use of closefrom() on *bsd, solaris. Automatic.
 #   USE_PRCTL            : enable use of prctl(). Automatic.
@@ -308,10 +305,10 @@
 # the reported build options.
 use_opts = USE_EPOLL USE_KQUEUE USE_NETFILTER                                 \
            USE_PCRE USE_PCRE_JIT USE_PCRE2 USE_PCRE2_JIT USE_POLL             \
-           USE_PRIVATE_CACHE USE_THREAD USE_PTHREAD_PSHARED USE_BACKTRACE     \
+           USE_THREAD USE_BACKTRACE                                           \
            USE_STATIC_PCRE USE_STATIC_PCRE2 USE_TPROXY USE_LINUX_TPROXY       \
            USE_LINUX_SPLICE USE_LIBCRYPT USE_CRYPT_H                          \
-           USE_GETADDRINFO USE_OPENSSL USE_LUA USE_FUTEX USE_ACCEPT4          \
+           USE_GETADDRINFO USE_OPENSSL USE_LUA USE_ACCEPT4                    \
            USE_CLOSEFROM USE_ZLIB USE_SLZ USE_CPU_AFFINITY USE_TFO USE_NS     \
            USE_DL USE_RT USE_DEVICEATLAS USE_51DEGREES USE_WURFL USE_SYSTEMD  \
            USE_OBSOLETE_LINKER USE_PRCTL USE_THREAD_DUMP USE_EVPORTS USE_OT   \
@@ -353,7 +350,7 @@
 ifeq ($(TARGET),linux-glibc)
   set_target_defaults = $(call default_opts, \
     USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER  \
-    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY          \
+    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY                    \
     USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO     \
     USE_GETADDRINFO USE_BACKTRACE)
 ifneq ($(shell echo __arm__/__aarch64__ | $(CC) -E -xc - | grep '^[^\#]'),__arm__/__aarch64__)
@@ -365,7 +362,7 @@
 ifeq ($(TARGET),linux-glibc-legacy)
   set_target_defaults = $(call default_opts, \
     USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER  \
-    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY          \
+    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY                    \
     USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_GETADDRINFO)
 endif
 
@@ -373,7 +370,7 @@
 ifeq ($(TARGET),linux-musl)
   set_target_defaults = $(call default_opts, \
     USE_POLL USE_TPROXY USE_LIBCRYPT USE_DL USE_RT USE_CRYPT_H USE_NETFILTER  \
-    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_FUTEX USE_LINUX_TPROXY          \
+    USE_CPU_AFFINITY USE_THREAD USE_EPOLL USE_LINUX_TPROXY                    \
     USE_ACCEPT4 USE_LINUX_SPLICE USE_PRCTL USE_THREAD_DUMP USE_NS USE_TFO     \
     USE_GETADDRINFO)
 ifneq ($(shell echo __arm__/__aarch64__ | $(CC) -E -xc - | grep '^[^\#]'),__arm__/__aarch64__)
@@ -429,7 +426,7 @@
 # AIX 5.1 only
 ifeq ($(TARGET),aix51)
   set_target_defaults = $(call default_opts, \
-    USE_POLL USE_LIBCRYPT USE_OBSOLETE_LINKER USE_PRIVATE_CACHE)
+    USE_POLL USE_LIBCRYPT USE_OBSOLETE_LINKER)
   TARGET_CFLAGS   = -Dss_family=__ss_family -Dip6_hdr=ip6hdr -DSTEVENS_API -D_LINUX_SOURCE_COMPAT -Dunsetenv=my_unsetenv
   DEBUG_CFLAGS    =
 endif
@@ -593,13 +590,6 @@
                 src/quic_frame.o src/quic_cc.o src/quic_cc_newreno.o
 endif
 
-# The private cache option affect the way the shctx is built
-ifeq ($(USE_PRIVATE_CACHE),)
-ifneq ($(USE_PTHREAD_PSHARED),)
-OPTIONS_LDFLAGS += -lpthread
-endif
-endif
-
 ifneq ($(USE_LUA),)
 check_lua_lib = $(shell echo "int main(){}" | $(CC) -o /dev/null -x c - $(2) -l$(1) 2>/dev/null && echo $(1))
 check_lua_inc = $(shell if [ -d $(2)$(1) ]; then echo $(2)$(1); fi;)
diff --git a/include/haproxy/shctx-t.h b/include/haproxy/shctx-t.h
index 533536b..1cd968f 100644
--- a/include/haproxy/shctx-t.h
+++ b/include/haproxy/shctx-t.h
@@ -14,9 +14,6 @@
 #ifndef __HAPROXY_SHCTX_T_H
 #define __HAPROXY_SHCTX_T_H
 
-#if !defined (USE_PRIVATE_CACHE) && defined(USE_PTHREAD_PSHARED)
-#include <pthread.h>
-#endif
 #include <haproxy/api-t.h>
 #include <haproxy/thread-t.h>
 
@@ -49,15 +46,7 @@
 };
 
 struct shared_context {
-#ifndef USE_PRIVATE_CACHE
-#ifdef USE_PTHREAD_PSHARED
-	pthread_mutex_t mutex;
-#else
-	unsigned int waiters;
-#endif
-#else
-	__decl_thread(HA_SPINLOCK_T lock);  // used when USE_PRIVATE_CACHE=1
-#endif
+	__decl_thread(HA_SPINLOCK_T lock);
 	struct list avail;  /* list for active and free blocks */
 	struct list hot;     /* list for locked blocks */
 	unsigned int nbav;  /* number of available blocks */
diff --git a/include/haproxy/shctx.h b/include/haproxy/shctx.h
index 4f8c23f..c9715aa 100644
--- a/include/haproxy/shctx.h
+++ b/include/haproxy/shctx.h
@@ -17,20 +17,7 @@
 #include <haproxy/api.h>
 #include <haproxy/list.h>
 #include <haproxy/shctx-t.h>
-
-#ifndef USE_PRIVATE_CACHE
-#ifdef USE_PTHREAD_PSHARED
-#include <pthread.h>
-#else
-#ifdef USE_SYSCALL_FUTEX
-#include <unistd.h>
-#include <linux/futex.h>
-#include <sys/syscall.h>
-#endif
-#endif
-#else
 #include <haproxy/thread.h>
-#endif
 
 int shctx_init(struct shared_context **orig_shctx,
                int maxblocks, int blocksize, unsigned int maxobjsz,
@@ -48,143 +35,11 @@
 
 /* Lock functions */
 
-#if defined (USE_PRIVATE_CACHE)
 extern int use_shared_mem;
 
 #define shctx_lock(shctx)   if (use_shared_mem) HA_SPIN_LOCK(SHCTX_LOCK, &shctx->lock)
 #define shctx_unlock(shctx) if (use_shared_mem) HA_SPIN_UNLOCK(SHCTX_LOCK, &shctx->lock)
 
-#elif defined (USE_PTHREAD_PSHARED)
-extern int use_shared_mem;
-
-#define shctx_lock(shctx)   if (use_shared_mem) pthread_mutex_lock(&shctx->mutex)
-#define shctx_unlock(shctx) if (use_shared_mem) pthread_mutex_unlock(&shctx->mutex)
-
-#else
-extern int use_shared_mem;
-
-#ifdef USE_SYSCALL_FUTEX
-static inline void _shctx_wait4lock(unsigned int *count, unsigned int *uaddr, int value)
-{
-	syscall(SYS_futex, uaddr, FUTEX_WAIT, value, NULL, 0, 0);
-}
-
-static inline void _shctx_awakelocker(unsigned int *uaddr)
-{
-	syscall(SYS_futex, uaddr, FUTEX_WAKE, 1, NULL, 0, 0);
-}
-
-#else /* internal spin lock */
-
-#if defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__)
-static inline void relax()
-{
-	__asm volatile("rep;nop\n" ::: "memory");
-}
-#else /* if no x86_64 or i586 arch: use less optimized but generic asm */
-static inline void relax()
-{
-	__asm volatile("" ::: "memory");
-}
-#endif
-
-static inline void _shctx_wait4lock(unsigned int *count, unsigned int *uaddr, int value)
-{
-        int i;
-
-        for (i = 0; i < *count; i++) {
-                relax();
-                relax();
-		if (*uaddr != value)
-			return;
-        }
-        *count = (unsigned char)((*count << 1) + 1);
-}
-
-#define _shctx_awakelocker(a)
-
-#endif
-
-#if defined (__i486__) || defined (__i586__) || defined (__i686__) || defined (__x86_64__)
-static inline unsigned int xchg(unsigned int *ptr, unsigned int x)
-{
-	__asm volatile("lock xchgl %0,%1"
-		     : "=r" (x), "+m" (*ptr)
-		     : "0" (x)
-		     : "memory");
-	return x;
-}
-
-static inline unsigned int cmpxchg(unsigned int *ptr, unsigned int old, unsigned int new)
-{
-	unsigned int ret;
-
-	__asm volatile("lock cmpxchgl %2,%1"
-		     : "=a" (ret), "+m" (*ptr)
-		     : "r" (new), "0" (old)
-		     : "memory");
-	return ret;
-}
-
-static inline unsigned char atomic_dec(unsigned int *ptr)
-{
-	unsigned char ret;
-	__asm volatile("lock decl %0\n"
-		     "setne %1\n"
-		     : "+m" (*ptr), "=qm" (ret)
-		     :
-		     : "memory");
-	return ret;
-}
-
-#else /* if no x86_64 or i586 arch: use less optimized gcc >= 4.1 built-ins */
-static inline unsigned int xchg(unsigned int *ptr, unsigned int x)
-{
-	return __sync_lock_test_and_set(ptr, x);
-}
-
-static inline unsigned int cmpxchg(unsigned int *ptr, unsigned int old, unsigned int new)
-{
-	return __sync_val_compare_and_swap(ptr, old, new);
-}
-
-static inline unsigned char atomic_dec(unsigned int *ptr)
-{
-	return __sync_sub_and_fetch(ptr, 1) ? 1 : 0;
-}
-
-#endif
-
-static inline void _shctx_lock(struct shared_context *shctx)
-{
-	unsigned int x;
-	unsigned int count = 3;
-
-	x = cmpxchg(&shctx->waiters, 0, 1);
-	if (x) {
-		if (x != 2)
-			x = xchg(&shctx->waiters, 2);
-
-		while (x) {
-			_shctx_wait4lock(&count, &shctx->waiters, 2);
-			x = xchg(&shctx->waiters, 2);
-		}
-	}
-}
-
-static inline void _shctx_unlock(struct shared_context *shctx)
-{
-	if (atomic_dec(&shctx->waiters)) {
-		shctx->waiters = 0;
-		_shctx_awakelocker(&shctx->waiters);
-	}
-}
-
-#define shctx_lock(shctx)   if (use_shared_mem) _shctx_lock(shctx)
-
-#define shctx_unlock(shctx) if (use_shared_mem) _shctx_unlock(shctx)
-
-#endif
 
 /* List Macros */
 
diff --git a/src/haproxy.c b/src/haproxy.c
index f8fdcc7..c05f18d 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -1998,21 +1998,6 @@
 		exit(1);
 	}
 
-	/* recompute the amount of per-process memory depending on
-	 * the shared SSL cache size
-	 */
-	if (global.rlimit_memmax_all) {
-#if defined (USE_OPENSSL) && !defined(USE_PRIVATE_CACHE)
-		int64_t ssl_cache_bytes = global.tune.sslcachesize * 200LL;
-
-		global.rlimit_memmax =
-			((((int64_t)global.rlimit_memmax_all * 1048576LL) - ssl_cache_bytes) +
-			 ssl_cache_bytes + 1048575LL) / 1048576LL;
-#else
-		global.rlimit_memmax = global.rlimit_memmax_all;
-#endif
-	}
-
 #ifdef USE_NS
         err_code |= netns_init();
         if (err_code & (ERR_ABORT|ERR_FATAL)) {
diff --git a/src/shctx.c b/src/shctx.c
index 0ac3d5e..7745403 100644
--- a/src/shctx.c
+++ b/src/shctx.c
@@ -292,9 +292,6 @@
 	int i;
 	struct shared_context *shctx;
 	int ret;
-#ifdef USE_PTHREAD_PSHARED
-	pthread_mutexattr_t attr;
-#endif
 	void *cur;
 	int maptype = MAP_PRIVATE;
 
@@ -305,8 +302,10 @@
 	blocksize = (blocksize + sizeof(void *) - 1) & -sizeof(void *);
 	extra     = (extra     + sizeof(void *) - 1) & -sizeof(void *);
 
-	if (shared)
+	if (shared) {
 		maptype = MAP_SHARED;
+		use_shared_mem = 1;
+	}
 
 	shctx = (struct shared_context *)mmap(NULL, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)),
 	                                      PROT_READ | PROT_WRITE, maptype | MAP_ANON, -1, 0);
@@ -316,42 +315,9 @@
 		goto err;
 	}
 
+	HA_SPIN_INIT(&shctx->lock);
 	shctx->nbav = 0;
 
-	if (maptype == MAP_SHARED) {
-#ifndef USE_PRIVATE_CACHE
-#ifdef USE_PTHREAD_PSHARED
-		if (pthread_mutexattr_init(&attr)) {
-			munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
-			shctx = NULL;
-			ret = SHCTX_E_INIT_LOCK;
-			goto err;
-		}
-
-		if (pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED)) {
-			pthread_mutexattr_destroy(&attr);
-			munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
-			shctx = NULL;
-			ret = SHCTX_E_INIT_LOCK;
-			goto err;
-		}
-
-		if (pthread_mutex_init(&shctx->mutex, &attr)) {
-			pthread_mutexattr_destroy(&attr);
-			munmap(shctx, sizeof(struct shared_context) + extra + (maxblocks * (sizeof(struct shared_block) + blocksize)));
-			shctx = NULL;
-			ret = SHCTX_E_INIT_LOCK;
-			goto err;
-		}
-#else
-		shctx->waiters = 0;
-#endif
-#else
-		HA_SPIN_INIT(&shctx->lock);
-#endif
-		use_shared_mem = 1;
-	}
-
 	LIST_INIT(&shctx->avail);
 	LIST_INIT(&shctx->hot);