MINOR: config: Add the threads support in cpu-map directive

Now, it is possible to bind CPU at the thread level instead of the process level
by defining a thread set in "cpu-map" directives. Thus, its format is now:

  cpu-map [auto:]<process-set>[/<thread-set>] <cpu-set>...

where <process-set> and <thread-set> must follow the format:

  all | odd | even | number[-[number]]

Having a process range and a thread range in same time with the "auto:" prefix
is not supported. Only one range is supported, the other one must be a fixed
number. But it is allowed when there is no "auto:" prefix.

Because it is possible to define a mapping for a process and another for a
thread on this process, threads will be bound on the intersection of their
mapping and the one of the process on which they are attached. If the
intersection is null, no specific binding will be set for the threads.
diff --git a/src/cfgparse.c b/src/cfgparse.c
index a37a533..1a75686 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -617,7 +617,7 @@
 		unsigned int low, high;
 
 		if (!isdigit((int)*arg)) {
-			memprintf(err, "'%s' is not a valid PROC number.\n", arg);
+			memprintf(err, "'%s' is not a valid number.\n", arg);
 			return -1;
 		}
 
@@ -632,8 +632,8 @@
 		}
 
 		if (low < 1 || low > LONGBITS || high > LONGBITS) {
-			memprintf(err, "'%s' is not a valid PROC number/range."
-				  " It supports PROC numbers from 1 to %d.\n",
+			memprintf(err, "'%s' is not a valid number/range."
+				  " It supports numbers from 1 to %d.\n",
 				  arg, LONGBITS);
 			return 1;
 		}
@@ -1706,8 +1706,9 @@
 	else if (strcmp(args[0], "cpu-map") == 0) {
 		/* map a process list to a CPU set */
 #ifdef USE_CPU_AFFINITY
-		unsigned long proc = 0, cpus;
-		int i, n, autoinc;
+		char *slash;
+		unsigned long proc = 0, thread = 0, cpus;
+		int i, j, n, autoinc;
 
 		if (!*args[1] || !*args[2]) {
 			Alert("parsing [%s:%d] : %s expects a process number "
@@ -1718,32 +1719,76 @@
 			goto out;
 		}
 
+		if ((slash = strchr(args[1], '/')) != NULL)
+			*slash = 0;
+
 		if (parse_process_number(args[1], &proc, &autoinc, &errmsg)) {
 			Alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
 			err_code |= ERR_ALERT | ERR_FATAL;
 			goto out;
 		}
 
+		if (slash) {
+			if (parse_process_number(slash+1, &thread, NULL, &errmsg)) {
+				Alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+				err_code |= ERR_ALERT | ERR_FATAL;
+				goto out;
+			}
+			*slash = '/';
+
+			if (autoinc && my_popcountl(proc) != 1 && my_popcountl(thread) != 1) {
+				Alert("parsing [%s:%d] : %s : '%s' : unable to automatically bind "
+				      "a process range _AND_ a thread range\n",
+				      file, linenum, args[0], args[1]);
+				err_code |= ERR_ALERT | ERR_FATAL;
+				goto out;
+			}
+		}
+
 		if (parse_cpu_set((const char **)args+2, &cpus, &errmsg)) {
 			Alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
 			err_code |= ERR_ALERT | ERR_FATAL;
 			goto out;
 		}
 
-		if (autoinc && my_popcountl(proc) != my_popcountl(cpus)) {
-			Alert("parsing [%s:%d] : %s : PROC range and CPU sets must have the same size to be auto-assigned\n",
+		if (autoinc &&
+		    my_popcountl(proc)  != my_popcountl(cpus) &&
+		    my_popcountl(thread) != my_popcountl(cpus)) {
+			Alert("parsing [%s:%d] : %s : PROC/THREAD range and CPU sets "
+			      "must have the same size to be automatically bound\n",
 			      file, linenum, args[0]);
 			err_code |= ERR_ALERT | ERR_FATAL;
 			goto out;
 		}
+
 		for (i = n = 0; i < LONGBITS; i++) {
-			if (proc & (1UL << i)) {
-				if (autoinc) {
+			/* No mapping for this process */
+			if (!(proc & (1UL << i)))
+				continue;
+
+			/* Mapping at the process level */
+			if (!thread) {
+				if (!autoinc)
+					global.cpu_map.proc[i] = cpus;
+				else {
+					n += my_ffsl(cpus >> n);
+					global.cpu_map.proc[i] = (1UL << (n-1));
+				}
+				continue;
+			}
+
+			/* Mapping at the thread level */
+			for (j = 0; j < LONGBITS; j++) {
+				/* Np mapping for this thread */
+				if (!(thread & (1UL << j)))
+					continue;
+
+				if (!autoinc)
+					global.cpu_map.thread[i][j] = cpus;
+				else {
 					n += my_ffsl(cpus >> n);
-					global.cpu_map[i] = (1UL << (n-1));
+					global.cpu_map.thread[i][j] = (1UL << (n-1));
 				}
-				else
-					global.cpu_map[i] = cpus;
 			}
 		}
 #else
diff --git a/src/haproxy.c b/src/haproxy.c
index dca2a33..381d5a9 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -2726,12 +2726,12 @@
 #ifdef USE_CPU_AFFINITY
 		if (proc < global.nbproc &&  /* child */
 		    proc < LONGBITS &&       /* only the first 32/64 processes may be pinned */
-		    global.cpu_map[proc])    /* only do this if the process has a CPU map */
+		    global.cpu_map.proc[proc])    /* only do this if the process has a CPU map */
 #ifdef __FreeBSD__
 		{
 			cpuset_t cpuset;
 			int i;
-			unsigned long cpu_map = global.cpu_map[proc];
+			unsigned long cpu_map = global.cpu_map.proc[proc];
 
 			CPU_ZERO(&cpuset);
 			while ((i = ffsl(cpu_map)) > 0) {
@@ -2741,7 +2741,7 @@
 			ret = cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1, sizeof(cpuset), &cpuset);
 		}
 #else
-			sched_setaffinity(0, sizeof(unsigned long), (void *)&global.cpu_map[proc]);
+			sched_setaffinity(0, sizeof(unsigned long), (void *)&global.cpu_map.proc[proc]);
 #endif
 #endif
 		/* close the pidfile both in children and father */
@@ -2895,13 +2895,14 @@
 #ifdef USE_CPU_AFFINITY
 		/* Now the CPU affinity for all threads */
 		for (i = 0; i < global.nbthread; i++) {
-			if (global.cpu_map[relative_pid-1])
-				global.thread_map[relative_pid-1][i] &= global.cpu_map[relative_pid-1];
+			if (global.cpu_map.proc[relative_pid-1])
+				global.cpu_map.thread[relative_pid-1][i] &= global.cpu_map.proc[relative_pid-1];
 
 			if (i < LONGBITS &&       /* only the first 32/64 threads may be pinned */
-			    global.thread_map[relative_pid-1][i]) /* only do this if the thread has a THREAD map */
+			    global.cpu_map.thread[relative_pid-1][i]) /* only do this if the thread has a THREAD map */
 				pthread_setaffinity_np(threads[i],
-						       sizeof(unsigned long), (void *)&global.thread_map[relative_pid-1][i]);
+						       sizeof(unsigned long),
+						       (void *)&global.cpu_map.thread[relative_pid-1][i]);
 		}
 #endif /* !USE_CPU_AFFINITY */