MINOR: task: move profiling bit to per-thread
Instead of having a global mask of all the profiled threads, let's have
one flag per thread in each thread's flags. They are never accessed more
than one at a time an are better located inside the threads' contexts for
both performance and scalability.
diff --git a/doc/design-thoughts/thread-group.txt b/doc/design-thoughts/thread-group.txt
index 551a551..a3b2b61 100644
--- a/doc/design-thoughts/thread-group.txt
+++ b/doc/design-thoughts/thread-group.txt
@@ -60,10 +60,11 @@
mask, or MAX_THREADS_MASK. How to create a task able to run anywhere
(checks, Lua, ...) ?
-Profiling
+Profiling -> completed
---------
There should be one task_profiling_mask per thread group. Enabling or
disabling profiling should be made per group (possibly by iterating).
+-> not needed anymore, one flag per thread in each thread's context.
Thread isolation
----------------
@@ -405,7 +406,7 @@
- lock debugging must reproduce tgid
- - task profiling must be made per-group (annoying), unless we want to add a
+ * task profiling must be made per-group (annoying), unless we want to add a
per-thread TH_FL_* flag and have the rare places where the bit is changed
iterate over all threads if needed. Sounds preferable overall.
diff --git a/include/haproxy/activity.h b/include/haproxy/activity.h
index f0ca4ee..0c5727b 100644
--- a/include/haproxy/activity.h
+++ b/include/haproxy/activity.h
@@ -26,7 +26,6 @@
#include <haproxy/api.h>
extern unsigned int profiling;
-extern unsigned long task_profiling_mask;
extern struct activity activity[MAX_THREADS];
extern struct sched_activity sched_activity[256];
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index b3e0544..0ad4a1c 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -401,7 +401,7 @@
tl->debug.caller_idx = !tl->debug.caller_idx;
tl->debug.caller_file[tl->debug.caller_idx] = file;
tl->debug.caller_line[tl->debug.caller_idx] = line;
- if (task_profiling_mask & tid_bit)
+ if (th_ctx->flags & TH_FL_TASK_PROFILING)
tl->call_date = now_mono_time();
#endif
__tasklet_wakeup_on(tl, thr);
@@ -452,7 +452,7 @@
tl->debug.caller_idx = !tl->debug.caller_idx;
tl->debug.caller_file[tl->debug.caller_idx] = file;
tl->debug.caller_line[tl->debug.caller_idx] = line;
- if (task_profiling_mask & tid_bit)
+ if (th_ctx->flags & TH_FL_TASK_PROFILING)
tl->call_date = now_mono_time();
#endif
__tasklet_wakeup_on(tl, thr);
diff --git a/include/haproxy/tinfo-t.h b/include/haproxy/tinfo-t.h
index 2c01480..4f3b9f6 100644
--- a/include/haproxy/tinfo-t.h
+++ b/include/haproxy/tinfo-t.h
@@ -37,6 +37,7 @@
/* thread_ctx flags, for ha_thread_ctx[].flags */
#define TH_FL_STUCK 0x00000001
+#define TH_FL_TASK_PROFILING 0x00000002
/* Thread group information. This defines a base and a count of global thread
* IDs which belong to it, and which can be looked up into thread_info/ctx. It
diff --git a/src/activity.c b/src/activity.c
index 10ee65d..eb49169 100644
--- a/src/activity.c
+++ b/src/activity.c
@@ -42,7 +42,6 @@
/* bit field of profiling options. Beware, may be modified at runtime! */
unsigned int profiling __read_mostly = HA_PROF_TASKS_AOFF;
-unsigned long task_profiling_mask __read_mostly = 0;
/* One struct per thread containing all collected measurements */
struct activity activity[MAX_THREADS] __attribute__((aligned(64))) = { };
@@ -384,16 +383,16 @@
* profiling to "on" when automatic, and going back below the "down"
* threshold switches to off. The forced modes don't check the load.
*/
- if (!(task_profiling_mask & tid_bit)) {
+ if (!(th_ctx->flags & TH_FL_TASK_PROFILING)) {
if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_ON ||
((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AON &&
swrate_avg(run_time, TIME_STATS_SAMPLES) >= up)))
- _HA_ATOMIC_OR(&task_profiling_mask, tid_bit);
+ _HA_ATOMIC_OR(&th_ctx->flags, TH_FL_TASK_PROFILING);
} else {
if (unlikely((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_OFF ||
((profiling & HA_PROF_TASKS_MASK) == HA_PROF_TASKS_AOFF &&
swrate_avg(run_time, TIME_STATS_SAMPLES) <= down)))
- _HA_ATOMIC_AND(&task_profiling_mask, ~tid_bit);
+ _HA_ATOMIC_AND(&th_ctx->flags, ~TH_FL_TASK_PROFILING);
}
}
diff --git a/src/debug.c b/src/debug.c
index 9a5c294..9c23d8a 100644
--- a/src/debug.c
+++ b/src/debug.c
@@ -179,12 +179,12 @@
ha_thread_ctx[thr].rq_total,
ha_thread_info[thr].tg->tgid, ha_thread_info[thr].ltid + 1,
stuck,
- !!(task_profiling_mask & thr_bit));
+ !!(th_ctx->flags & TH_FL_TASK_PROFILING));
chunk_appendf(buf,
" harmless=%d wantrdv=%d",
!!(threads_harmless_mask & thr_bit),
- !!(threads_want_rdv_mask & thr_bit));
+ !!(th_ctx->flags & TH_FL_TASK_PROFILING));
chunk_appendf(buf, "\n");
chunk_appendf(buf, " cpu_ns: poll=%llu now=%llu diff=%llu\n", p, n, n-p);
diff --git a/src/task.c b/src/task.c
index 05a5e3c..09b8e72 100644
--- a/src/task.c
+++ b/src/task.c
@@ -218,7 +218,7 @@
t->rq.key += offset;
}
- if (task_profiling_mask & tid_bit)
+ if (th_ctx->flags & TH_FL_TASK_PROFILING)
t->call_date = now_mono_time();
eb32sc_insert(root, &t->rq, t->thread_mask);
@@ -562,7 +562,7 @@
LIST_DEL_INIT(&((struct tasklet *)t)->list);
__ha_barrier_store();
- if (unlikely(task_profiling_mask & tid_bit)) {
+ if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) {
profile_entry = sched_activity_entry(sched_activity, t->process);
before = now_mono_time();
#ifdef DEBUG_TASK
@@ -587,7 +587,7 @@
continue;
}
- if (unlikely(task_profiling_mask & tid_bit)) {
+ if (unlikely(th_ctx->flags & TH_FL_TASK_PROFILING)) {
HA_ATOMIC_INC(&profile_entry->calls);
HA_ATOMIC_ADD(&profile_entry->cpu_time, now_mono_time() - before);
}