MINOR: threads: flatten the per-thread cpu-map

When we initially experimented with threads and processes support, we
needed to implement arrays of threads per process for cpu-map, but this
is not needed anymore since we support either threads or processes.
Let's simply make the thread-based cpu-map per thread and not per
thread and per process since that's not used anymore. Doing so reduces
the global struct from 33kB to 1.5kB.
diff --git a/include/types/global.h b/include/types/global.h
index ba3738b..b2e7969 100644
--- a/include/types/global.h
+++ b/include/types/global.h
@@ -179,8 +179,8 @@
 	struct vars   vars;         /* list of variables for the process scope. */
 #ifdef USE_CPU_AFFINITY
 	struct {
-		unsigned long proc[MAX_PROCS];             /* list of CPU masks for the 32/64 first processes */
-		unsigned long thread[MAX_PROCS][MAX_THREADS]; /* list of CPU masks for the 32/64 first threads per process */
+		unsigned long proc[MAX_PROCS];      /* list of CPU masks for the 32/64 first processes */
+		unsigned long thread[MAX_THREADS];  /* list of CPU masks for the 32/64 first threads */
 	} cpu_map;
 #endif
 };
diff --git a/src/cfgparse-global.c b/src/cfgparse-global.c
index 1633700..f4037c2 100644
--- a/src/cfgparse-global.c
+++ b/src/cfgparse-global.c
@@ -1009,33 +1009,34 @@
 			goto out;
 		}
 
-		for (i = n = 0; i < MAX_PROCS; i++) {
-			/* No mapping for this process */
-			if (!(proc & (1UL << i)))
-				continue;
-
+		if (atleast2(proc)) {
 			/* Mapping at the process level */
-			if (!thread) {
+			for (i = n = 0; i < MAX_PROCS; i++) {
+				/* No mapping for this process */
+				if (!(proc & (1UL << i)))
+					continue;
+
 				if (!autoinc)
 					global.cpu_map.proc[i] = cpus;
 				else {
 					n += my_ffsl(cpus >> n);
 					global.cpu_map.proc[i] = (1UL << (n-1));
 				}
-				continue;
 			}
+		}
 
+		if (atleast2(thread)) {
 			/* Mapping at the thread level */
-			for (j = 0; j < MAX_THREADS; j++) {
-				/* Np mapping for this thread */
+			for (j = n = 0; j < MAX_THREADS; j++) {
+				/* No mapping for this thread */
 				if (!(thread & (1UL << j)))
 					continue;
 
 				if (!autoinc)
-					global.cpu_map.thread[i][j] = cpus;
+					global.cpu_map.thread[j] = cpus;
 				else {
 					n += my_ffsl(cpus >> n);
-					global.cpu_map.thread[i][j] = (1UL << (n-1));
+					global.cpu_map.thread[j] = (1UL << (n-1));
 				}
 			}
 		}
diff --git a/src/haproxy.c b/src/haproxy.c
index 603f084..1aeae99 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -3152,17 +3152,17 @@
 		/* Now the CPU affinity for all threads */
 		for (i = 0; i < global.nbthread; i++) {
 			if (global.cpu_map.proc[relative_pid-1])
-				global.cpu_map.thread[relative_pid-1][i] &= global.cpu_map.proc[relative_pid-1];
+				global.cpu_map.thread[i] &= global.cpu_map.proc[relative_pid-1];
 
 			if (i < MAX_THREADS &&       /* only the first 32/64 threads may be pinned */
-			    global.cpu_map.thread[relative_pid-1][i]) {/* only do this if the thread has a THREAD map */
+			    global.cpu_map.thread[i]) {/* only do this if the thread has a THREAD map */
 #if defined(__FreeBSD__) || defined(__NetBSD__)
 				cpuset_t cpuset;
 #else
 				cpu_set_t cpuset;
 #endif
 				int j;
-				unsigned long cpu_map = global.cpu_map.thread[relative_pid-1][i];
+				unsigned long cpu_map = global.cpu_map.thread[i];
 
 				CPU_ZERO(&cpuset);