MINOR: tasks: do not keep cpu and latency times in struct task
It was a mistake to put these two fields in the struct task. This
was added in 1.9 via commit 9efd7456e ("MEDIUM: tasks: collect per-task
CPU time and latency"). These fields are used solely by streams in
order to report the measurements via the lat_ns* and cpu_ns* sample
fetch functions when task profiling is enabled. For the rest of the
tasks, this is pure CPU waste when profiling is enabled, and memory
waste 100% of the time, as the point where these latencies and usages
are measured is in the profiling array.
Let's move the fields to the stream instead, and have process_stream()
retrieve the relevant info from the thread's context.
The struct task is now back to 120 bytes, i.e. almost two cache lines,
with 32 bit still available.
diff --git a/include/haproxy/stream-t.h b/include/haproxy/stream-t.h
index a09e541..a2928e5 100644
--- a/include/haproxy/stream-t.h
+++ b/include/haproxy/stream-t.h
@@ -168,6 +168,8 @@
struct list back_refs; /* list of users tracking this stream */
struct buffer_wait buffer_wait; /* position in the list of objects waiting for a buffer */
+ uint64_t lat_time; /* total latency time experienced */
+ uint64_t cpu_time; /* total CPU time consumed */
struct freq_ctr call_rate; /* stream task call rate without making progress */
short store_count;
diff --git a/include/haproxy/task-t.h b/include/haproxy/task-t.h
index 33a7f07..373ac29 100644
--- a/include/haproxy/task-t.h
+++ b/include/haproxy/task-t.h
@@ -113,8 +113,6 @@
short nice; /* task prio from -1024 to +1024 */
short tid; /* TID where it's allowed to run, <0 if anywhere */
uint32_t wake_date; /* date of the last task wakeup */
- uint64_t lat_time; /* total latency time experienced */
- uint64_t cpu_time; /* total CPU time consumed */
};
/* lightweight tasks, without priority, mainly used for I/Os */
diff --git a/include/haproxy/task.h b/include/haproxy/task.h
index 33be63f..a711c27 100644
--- a/include/haproxy/task.h
+++ b/include/haproxy/task.h
@@ -498,8 +498,6 @@
t->nice = 0;
t->calls = 0;
t->wake_date = 0;
- t->cpu_time = 0;
- t->lat_time = 0;
t->expire = TICK_ETERNITY;
#ifdef DEBUG_TASK
t->debug.caller_idx = 0;
diff --git a/src/sample.c b/src/sample.c
index 852cd04..cd9fccf 100644
--- a/src/sample.c
+++ b/src/sample.c
@@ -4136,7 +4136,7 @@
return 0;
smp->data.type = SMP_T_SINT;
- smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->cpu_time / smp->strm->task->calls : 0;
+ smp->data.u.sint = smp->strm->task->calls ? smp->strm->cpu_time / smp->strm->task->calls : 0;
return 1;
}
@@ -4148,7 +4148,7 @@
return 0;
smp->data.type = SMP_T_SINT;
- smp->data.u.sint = smp->strm->task->cpu_time;
+ smp->data.u.sint = smp->strm->cpu_time;
return 1;
}
@@ -4160,7 +4160,7 @@
return 0;
smp->data.type = SMP_T_SINT;
- smp->data.u.sint = smp->strm->task->calls ? smp->strm->task->lat_time / smp->strm->task->calls : 0;
+ smp->data.u.sint = smp->strm->task->calls ? smp->strm->lat_time / smp->strm->task->calls : 0;
return 1;
}
@@ -4172,7 +4172,7 @@
return 0;
smp->data.type = SMP_T_SINT;
- smp->data.u.sint = smp->strm->task->lat_time;
+ smp->data.u.sint = smp->strm->lat_time;
return 1;
}
diff --git a/src/stream.c b/src/stream.c
index 8d62754..25a50e7 100644
--- a/src/stream.c
+++ b/src/stream.c
@@ -407,6 +407,7 @@
s->buffer_wait.target = s;
s->buffer_wait.wakeup_cb = stream_buf_available;
+ s->lat_time = s->cpu_time = 0;
s->call_rate.curr_tick = s->call_rate.curr_ctr = s->call_rate.prev_ctr = 0;
s->pcli_next_pid = 0;
s->pcli_flags = 0;
@@ -1557,6 +1558,17 @@
}
/* if the current task's wake_date was set, it's being profiled, thus we may
+ * report latencies and CPU usages in logs, so it's desirable to update the
+ * latency when entering process_stream().
+ */
+static void stream_cond_update_cpu_latency(struct stream *s)
+{
+ uint32_t lat = th_ctx->sched_call_date - th_ctx->sched_wake_date;
+
+ s->lat_time += lat;
+}
+
+/* if the current task's wake_date was set, it's being profiled, thus we may
* report latencies and CPU usages in logs, so it's desirable to do that before
* logging in order to report accurate CPU usage. In this case we count that
* final part and reset the wake date so that the scheduler doesn't do it a
@@ -1573,11 +1585,23 @@
return;
cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
- s->task->cpu_time += cpu;
+ s->cpu_time += cpu;
HA_ATOMIC_ADD(&th_ctx->sched_profile_entry->cpu_time, cpu);
th_ctx->sched_wake_date = 0;
}
+/* this functions is called directly by the scheduler for tasks whose
+ * ->process points to process_stream(), and is used to keep latencies
+ * and CPU usage measurements accurate.
+ */
+void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu)
+{
+ struct stream *s = t->context;
+ s->lat_time += lat;
+ s->cpu_time += cpu;
+}
+
+
/* This macro is very specific to the function below. See the comments in
* process_stream() below to understand the logic and the tests.
*/
@@ -1649,6 +1673,7 @@
DBG_TRACE_ENTER(STRM_EV_STRM_PROC, s);
activity[tid].stream_calls++;
+ stream_cond_update_cpu_latency(s);
req = &s->req;
res = &s->res;
@@ -3376,14 +3401,13 @@
}
else if ((tmpctx = sc_appctx(scf)) != NULL) {
chunk_appendf(&trash,
- " app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
+ " app0=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
tmpctx,
tmpctx->st0,
tmpctx->st1,
tmpctx->applet->name,
tmpctx->t->tid,
- tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
- (unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
+ tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
}
scb = strm->scb;
@@ -3419,14 +3443,13 @@
}
else if ((tmpctx = sc_appctx(scb)) != NULL) {
chunk_appendf(&trash,
- " app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u cpu=%llu lat=%llu\n",
+ " app1=%p st0=%d st1=%d applet=%s tid=%d nice=%d calls=%u rate=%u\n",
tmpctx,
tmpctx->st0,
tmpctx->st1,
tmpctx->applet->name,
tmpctx->t->tid,
- tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate),
- (unsigned long long)tmpctx->t->cpu_time, (unsigned long long)tmpctx->t->lat_time);
+ tmpctx->t->nice, tmpctx->t->calls, read_freq_ctr(&tmpctx->call_rate));
}
chunk_appendf(&trash,
@@ -3682,7 +3705,7 @@
curr_strm->task->state, curr_strm->stream_epoch,
human_time(now.tv_sec - curr_strm->logs.tv_accept.tv_sec, 1),
curr_strm->task->calls, read_freq_ctr(&curr_strm->call_rate),
- (unsigned long long)curr_strm->task->cpu_time, (unsigned long long)curr_strm->task->lat_time);
+ (unsigned long long)curr_strm->cpu_time, (unsigned long long)curr_strm->lat_time);
chunk_appendf(&trash,
" rq[f=%06xh,i=%u,an=%02xh,rx=%s",
diff --git a/src/task.c b/src/task.c
index 3d78680..bb25d50 100644
--- a/src/task.c
+++ b/src/task.c
@@ -25,6 +25,7 @@
#include <haproxy/tools.h>
extern struct task *process_stream(struct task *t, void *context, unsigned int state);
+extern void stream_update_timings(struct task *t, uint64_t lat, uint64_t cpu);
DECLARE_POOL(pool_head_task, "task", sizeof(struct task));
DECLARE_POOL(pool_head_tasklet, "tasklet", sizeof(struct tasklet));
@@ -611,7 +612,6 @@
uint32_t now_ns = now_mono_time();
uint32_t lat = now_ns - t->wake_date;
- t->lat_time += lat;
t->wake_date = 0;
th_ctx->sched_call_date = now_ns;
profile_entry = sched_activity_entry(sched_activity, t->process);
@@ -669,8 +669,6 @@
if (unlikely(th_ctx->sched_wake_date)) {
uint32_t cpu = (uint32_t)now_mono_time() - th_ctx->sched_call_date;
- if (t)
- t->cpu_time += cpu;
HA_ATOMIC_ADD(&profile_entry->cpu_time, cpu);
}