MINOR: threads: Add thread-map config parameter in the global section

By default, no affinity is set for threads. To bind threads on CPU, you must
define a "thread-map" in the global section. The format is the same than the
"cpu-map" parameter, with a small difference. The process number must be
defined, with the same format than cpu-map ("all", "even", "odd" or a number
between 1 and 31/63).

A thread will be bound on the intersection of its mapping and the one of the
process on which it is attached. If the intersection is null, no specific bind
will be set for the thread.
diff --git a/doc/configuration.txt b/doc/configuration.txt
index 6f7a99f..786e5c3 100644
--- a/doc/configuration.txt
+++ b/doc/configuration.txt
@@ -558,6 +558,7 @@
    - ssl-default-server-options
    - ssl-dh-param-file
    - ssl-server-verify
+   - thread-map
    - unix-bind
    - unsetenv
    - 51degrees-data-file
@@ -943,6 +944,15 @@
   servers certificates are not verified. The default is 'required' except if
   forced using cmdline option '-dV'.
 
+
+thread-map <"all"|"odd"|"even"|process_num> <"all"|"odd"|"even"|thread_num> <cpu-set>...
+  This setting is only available when support for threads was built in. It
+  binds a thread to a specific CPU set. The process must be specified to allow
+  different mapping for different processes. For details about the arguments,
+  see "cpu-map" directive. A thread will be bound on the intersection of its
+  mapping and the one of the process on which it is attached. If the
+  intersection is null, no specific bind will be set for the thread.
+
 stats socket [<address:port>|<path>] [param*]
   Binds a UNIX socket to <path> or a TCPv4/v6 address to <address:port>.
   Connections to this socket will return various statistics outputs and even
diff --git a/include/types/global.h b/include/types/global.h
index 817c09b..b3aa29c 100644
--- a/include/types/global.h
+++ b/include/types/global.h
@@ -162,7 +162,12 @@
 		} ux;
 	} unix_bind;
 #ifdef USE_CPU_AFFINITY
-	unsigned long cpu_map[LONGBITS];  /* list of CPU masks for the 32/64 first processes */
+	unsigned long cpu_map[LONGBITS];              /* list of CPU masks for the 32/64 first processes */
+
+#ifdef USE_THREAD
+	unsigned long thread_map[LONGBITS][LONGBITS]; /* list of CPU masks for the 32/64 first threads per process */
+#endif
+
 #endif
 	struct proxy *stats_fe;     /* the frontend holding the stats settings */
 	struct vars   vars;         /* list of variables for the process scope. */
diff --git a/src/cfgparse.c b/src/cfgparse.c
index d693bfb..4414f59 100644
--- a/src/cfgparse.c
+++ b/src/cfgparse.c
@@ -586,6 +586,72 @@
 	return ERR_WARN;
 }
 
+/* Parse a string representing a process number or a set of processes. It must
+ * be "all", "odd", "even" or a number between 1 and <LONGBITS>. It returns a
+ * mask where bits are set for corresponding processes or 0 if an error occured.
+ *
+ * Note: this function can also be used to parse a thread number or a set of
+ * threads.
+ */
+static unsigned long parse_process_number(const char *arg)
+{
+	unsigned long proc = 0;
+
+	if (strcmp(arg, "all") == 0)
+		proc = ~0UL;
+	else if (strcmp(arg, "odd") == 0)
+		proc = ~0UL/3UL; /* 0x555....555 */
+	else if (strcmp(arg, "even") == 0)
+		proc = (~0UL/3UL) << 1; /* 0xAAA...AAA */
+	else {
+		proc = atol(arg);
+		if (proc >= 1 && proc <= LONGBITS)
+			proc = 1UL << (proc - 1);
+	}
+	return proc;
+}
+
+/* Parse cpu sets. Each CPU set is either a unique number between 0 and
+ * <LONGBITS> or a range with two such numbers delimited by a dash
+ * ('-'). Multiple CPU numbers or ranges may be specified. On success, it
+ * returns 0. otherwise it returns 1 with an error message in <err>.
+ */
+static unsigned long parse_cpu_set(const char **args, unsigned long *cpu_set, char **err)
+{
+	int cur_arg = 0;
+
+	*cpu_set = 0;
+	while (*args[cur_arg]) {
+		char        *dash;
+		unsigned int low, high;
+
+		if (!isdigit((int)*args[cur_arg])) {
+			memprintf(err, "'%s' is not a CPU range.\n", args[cur_arg]);
+			return -1;
+		}
+
+		low = high = str2uic(args[cur_arg]);
+		if ((dash = strchr(args[cur_arg], '-')) != NULL)
+			high = str2uic(dash + 1);
+
+		if (high < low) {
+			unsigned int swap = low;
+			low = high;
+			high = swap;
+		}
+
+		if (high >= LONGBITS) {
+			memprintf(err, "supports CPU numbers from 0 to %d.\n", LONGBITS - 1);
+			return 1;
+		}
+
+		while (low <= high)
+			*cpu_set |= 1UL << low++;
+
+		cur_arg++;
+	}
+	return 0;
+}
 /*
  * parse a line in a <global> section. Returns the error code, 0 if OK, or
  * any combination of :
@@ -1602,74 +1668,77 @@
 			err_code |= ERR_ALERT | ERR_FATAL;
 		}
 	}
-	else if (strcmp(args[0], "cpu-map") == 0) {  /* map a process list to a CPU set */
+	else if (strcmp(args[0], "cpu-map") == 0) {
+		/* map a process list to a CPU set */
 #ifdef USE_CPU_AFFINITY
-		int cur_arg, i;
-		unsigned long proc = 0;
-		unsigned long cpus = 0;
-
-		if (strcmp(args[1], "all") == 0)
-			proc = ~0UL;
-		else if (strcmp(args[1], "odd") == 0)
-			proc = ~0UL/3UL; /* 0x555....555 */
-		else if (strcmp(args[1], "even") == 0)
-			proc = (~0UL/3UL) << 1; /* 0xAAA...AAA */
-		else {
-			proc = atol(args[1]);
-			if (proc >= 1 && proc <= LONGBITS)
-				proc = 1UL << (proc - 1);
-		}
+		unsigned long proc, cpus;
+		int i;
 
+		proc = parse_process_number(args[1]);
 		if (!proc || !*args[2]) {
-			Alert("parsing [%s:%d]: %s expects a process number including 'all', 'odd', 'even', or a number from 1 to %d, followed by a list of CPU ranges with numbers from 0 to %d.\n",
+			Alert("parsing [%s:%d]: %s expects a process number "
+			      " ('all', 'odd', 'even', or a number from 1 to %d), "
+			      " followed by a list of CPU ranges with numbers from 0 to %d.\n",
 			      file, linenum, args[0], LONGBITS, LONGBITS - 1);
 			err_code |= ERR_ALERT | ERR_FATAL;
 			goto out;
 		}
-
-		cur_arg = 2;
-		while (*args[cur_arg]) {
-			unsigned int low, high;
-
-			if (isdigit((int)*args[cur_arg])) {
-				char *dash = strchr(args[cur_arg], '-');
-
-				low = high = str2uic(args[cur_arg]);
-				if (dash)
-					high = str2uic(dash + 1);
-
-				if (high < low) {
-					unsigned int swap = low;
-					low = high;
-					high = swap;
-				}
-
-				if (high >= LONGBITS) {
-					Alert("parsing [%s:%d]: %s supports CPU numbers from 0 to %d.\n",
-					      file, linenum, args[0], LONGBITS - 1);
-					err_code |= ERR_ALERT | ERR_FATAL;
-					goto out;
-				}
-
-				while (low <= high)
-					cpus |= 1UL << low++;
-			}
-			else {
-				Alert("parsing [%s:%d]: %s : '%s' is not a CPU range.\n",
-				      file, linenum, args[0], args[cur_arg]);
-				err_code |= ERR_ALERT | ERR_FATAL;
-				goto out;
-			}
-			cur_arg++;
+		if (parse_cpu_set((const char **)args+2, &cpus, &errmsg)) {
+			Alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+			err_code |= ERR_ALERT | ERR_FATAL;
+			goto out;
 		}
 		for (i = 0; i < LONGBITS; i++)
 			if (proc & (1UL << i))
 				global.cpu_map[i] = cpus;
 #else
-		Alert("parsing [%s:%d] : '%s' is not enabled, please check build options for USE_CPU_AFFINITY.\n", file, linenum, args[0]);
+		Alert("parsing [%s:%d] : '%s' is not enabled, please check build options for USE_CPU_AFFINITY.\n",
+		      file, linenum, args[0]);
 		err_code |= ERR_ALERT | ERR_FATAL;
 		goto out;
-#endif
+#endif /* ! USE_CPU_AFFINITY */
+	}
+	else if (strcmp(args[0], "thread-map") == 0) {
+		/* map a thread list to a CPU set */
+#ifdef USE_CPU_AFFINITY
+#ifdef USE_THREAD
+		unsigned long proc, thread, cpus;
+		int i, j;
+
+		proc    = parse_process_number(args[1]);
+		thread  = parse_process_number(args[2]);
+		if (!proc || !thread || !*args[3]) {
+			Alert("parsing [%s:%d]: %s expects a process number "
+			      "('all', 'odd', 'even', or a number from 1 to %d), "
+			      " followed by a thread number using the same format, "
+			      " followed by a list of CPU ranges with numbers from 0 to %d.\n",
+			      file, linenum, args[0], LONGBITS, LONGBITS - 1);
+			err_code |= ERR_ALERT | ERR_FATAL;
+			goto out;
+		}
+		if (parse_cpu_set((const char **)args+3, &cpus, &errmsg)) {
+			Alert("parsing [%s:%d] : %s : %s\n", file, linenum, args[0], errmsg);
+			err_code |= ERR_ALERT | ERR_FATAL;
+			goto out;
+		}
+		for (i = 0; i < LONGBITS; i++)
+			if (proc & (1UL << i)) {
+				for (j = 0; j < LONGBITS; j++)
+					if (thread & (1UL << j))
+						global.thread_map[i][j] = cpus;
+			}
+#else
+		Alert("parsing [%s:%d] : '%s' is not enabled, please check build options for USE_THREAD.\n",
+		      file, linenum, args[0]);
+		err_code |= ERR_ALERT | ERR_FATAL;
+		goto out;
+#endif /* ! USE_THREAD*/
+#else
+		Alert("parsing [%s:%d] : '%s' is not enabled, please check build options for USE_CPU_AFFINITY.\n",
+		      file, linenum, args[0]);
+		err_code |= ERR_ALERT | ERR_FATAL;
+		goto out;
+#endif /* ! USE_CPU_AFFINITY */
 	}
 	else if (strcmp(args[0], "setenv") == 0 || strcmp(args[0], "presetenv") == 0) {
 		if (alertif_too_many_args(3, file, linenum, args, &err_code))
diff --git a/src/haproxy.c b/src/haproxy.c
index 30ac157..f4353e1 100644
--- a/src/haproxy.c
+++ b/src/haproxy.c
@@ -2814,6 +2814,15 @@
 		for (i = 0; i < global.nbthread; i++) {
 			tids[i] = i;
 			pthread_create(&threads[i], NULL, &run_thread_poll_loop, &tids[i]);
+#ifdef USE_CPU_AFFINITY
+			if (global.cpu_map[relative_pid-1])
+				global.thread_map[relative_pid-1][i] &= global.cpu_map[relative_pid-1];
+
+			if (i < LONGBITS &&       /* only the first 32/64 threads may be pinned */
+			    global.thread_map[relative_pid-1][i]) /* only do this if the thread has a THREAD map */
+				pthread_setaffinity_np(threads[i],
+						       sizeof(unsigned long), (void *)&global.thread_map[relative_pid-1][i]);
+#endif
 		}
 		for (i = 0; i < global.nbthread; i++)
 			pthread_join(threads[i], NULL);
@@ -2830,6 +2839,17 @@
 	else {
 		tid = 0;
 
+#ifdef USE_THREAD
+#ifdef USE_CPU_AFFINITY
+		if (global.cpu_map[relative_pid-1])
+			global.thread_map[relative_pid-1][tid] &= global.cpu_map[relative_pid-1];
+
+		if (global.thread_map[relative_pid-1][tid]) /* only do this if the thread has a THREAD map */
+			pthread_setaffinity_np(pthread_self(),
+					       sizeof(unsigned long), (void *)&global.thread_map[relative_pid-1][tid]);
+#endif
+#endif
+
 		if (global.mode & MODE_MWORKER)
 			mworker_pipe_register(mworker_pipe);