MINOR: spoe: Add metrics in to know time spent in the SPOE
Following metrics are added for each event or group of messages processed in the
SPOE:
* processing time: the delay to process the event or the group. From the
stream point of view, it is the latency added by the SPOE
processing.
* request time : It is the encoding time. It includes ACLs processing, if
any. For fragmented frames, it is the sum of all fragments.
* queue time : the delay before the request gets out the sending queue. For
fragmented frames, it is the sum of all fragments.
* waiting time: the delay before the reponse is received. No fragmentation
supported here.
* response time: the delay to process the response. No fragmentation supported
here.
* total time: (unused for now). It is the sum of all events or groups
processed by the SPOE for a specific threads.
Log messages has been updated. Before, only errors was logged (status_code !=
0). Now every processing is logged, following this format:
SPOE: [AGENT] <TYPE:NAME> sid=STREAM-ID st=STATUC-CODE reqT/qT/wT/resT/pT
where:
AGENT is the agent name
TYPE is EVENT of GROUP
NAME is the event or the group name
STREAM-ID is an integer, the unique id of the stream
STATUS_CODE is the processing's status code
reqT/qT/wT/resT/pT are delays descrive above
For all these delays, -1 means the processing was interrupted before the end. So
-1 for the queue time means the request was never dequeued. For fragmented
frames it is harder to know when the interruption happened.
For now, messages are logged using the same logger than the backend of the
stream which initiated the request.
diff --git a/doc/SPOE.txt b/doc/SPOE.txt
index cde3b1d..8cbf684 100644
--- a/doc/SPOE.txt
+++ b/doc/SPOE.txt
@@ -33,6 +33,7 @@
3.3. Events & messages
3.4. Actions
3.5. Errors & timeouts
+ 4. Logging
0. Terms
@@ -132,10 +133,10 @@
...
spoe-message msg2
...
- spoe-group grp1
+ spoe-group grp1
...
- spoe-group grp2
- ...
+ spoe-group grp2
+ ...
[my-second-engine]
...
@@ -1113,6 +1114,42 @@
connection will be closed by HAProxy. The same is true for hello timeout. You
should choose a lower value than the connect timeout.
+4. Logging
+-----------
+
+Activity of an SPOE is logged using HAProxy's logger. The messages are logged
+in the context of the streams that handle the client and the server
+connections. A message is emitted for each event or group handled by an
+SPOE. Depending on the status code, the log level will be different. In the
+normal case, when no error occurred, the message is logged with the level
+LOG_NOTICE. Otherwise, the message is logged with the level LOG_WARNING.
+
+The messages are logged using the stream's logger and use the following format:
+
+ SPOE: [AGENT] <TYPE:NAME> sid=STREAM-ID st=STATUC-CODE reqT/qT/wT/resT/pT
+
+ AGENT is the agent name
+ TYPE is EVENT of GROUP
+ NAME is the event or the group name
+ STREAM-ID is an integer, the unique id of the stream
+ STATUS_CODE is the processing's status code
+ reqT/qT/wT/resT/pT are the following time events:
+
+ * reqT : the encoding time. It includes ACLs processing, if any. For
+ fragmented frames, it is the sum of all fragments.
+ * qT : the delay before the request gets out the sending queue. For
+ fragmented frames, it is the sum of all fragments.
+ * wT : the delay before the reponse is received. No fragmentation
+ supported here.
+ * resT : the delay to process the response. No fragmentation supported
+ here.
+ * pT : the delay to process the event or the group. From the stream
+ point of view, it is the latency added by the SPOE processing.
+ It is more or less the sum of values above.
+
+For all these time events, -1 means the processing was interrupted before the
+end. So -1 for the queue time means the request was never dequeued. For
+fragmented frames it is harder to know when the interruption happened.
/*
* Local variables:
diff --git a/include/types/spoe.h b/include/types/spoe.h
index 32d2311..b5f8ce3 100644
--- a/include/types/spoe.h
+++ b/include/types/spoe.h
@@ -22,6 +22,8 @@
#ifndef _TYPES_SPOE_H
#define _TYPES_SPOE_H
+#include <sys/time.h>
+
#include <common/buffer.h>
#include <common/mini-clist.h>
#include <common/hathreads.h>
@@ -316,6 +318,20 @@
unsigned int curoff; /* offset in <curarg> from which to resume encoding */
unsigned int flags; /* SPOE_FRM_FL_* */
} frag_ctx; /* Info about fragmented frames, valid on if SPOE_CTX_FL_FRAGMENTED is set */
+
+ struct {
+ struct timeval tv_start; /* start date of the current event/group */
+ struct timeval tv_request; /* date the frame processing starts (reset for each frag) */
+ struct timeval tv_queue; /* date the frame is queued (reset for each frag) */
+ struct timeval tv_wait; /* date the stream starts waiting for a response */
+ struct timeval tv_response; /* date the response processing starts */
+ long t_request; /* delay to encode and push the frame in queue (cumulative for frags) */
+ long t_queue; /* delay before the frame gets out the sending queue (cumulative for frags) */
+ long t_waiting; /* delay before the response is reveived */
+ long t_response; /* delay to process the response (from the stream pov) */
+ long t_process; /* processing time of the last event/group */
+ unsigned long t_total; /* cumulative processing time */
+ } stats; /* Stats for this stream */
};
/* SPOE context inside a appctx */
diff --git a/src/flt_spoe.c b/src/flt_spoe.c
index 5767dfe..7d5eb3d 100644
--- a/src/flt_spoe.c
+++ b/src/flt_spoe.c
@@ -270,6 +270,17 @@
return uuid;
}
+
+static inline void
+spoe_update_stat_time(struct timeval *tv, long *t)
+{
+ if (*t == -1)
+ *t = tv_ms_elapsed(tv, &now);
+ else
+ *t += tv_ms_elapsed(tv, &now);
+ tv_zero(tv);
+}
+
/********************************************************************
* Functions that encode/decode SPOE frames
********************************************************************/
@@ -1246,6 +1257,7 @@
list_for_each_entry_safe(ctx, back, &spoe_appctx->waiting_queue, list) {
LIST_DEL(&ctx->list);
LIST_INIT(&ctx->list);
+ spoe_update_stat_time(&ctx->stats.tv_wait, &ctx->stats.t_waiting);
ctx->state = SPOE_CTX_ST_ERROR;
ctx->status_code = (spoe_appctx->status_code + 0x100);
task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
@@ -1273,6 +1285,7 @@
list_for_each_entry_safe(ctx, back, &agent->rt[tid].sending_queue, list) {
LIST_DEL(&ctx->list);
LIST_INIT(&ctx->list);
+ spoe_update_stat_time(&ctx->stats.tv_queue, &ctx->stats.t_queue);
ctx->state = SPOE_CTX_ST_ERROR;
ctx->status_code = (spoe_appctx->status_code + 0x100);
task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
@@ -1280,6 +1293,7 @@
list_for_each_entry_safe(ctx, back, &agent->rt[tid].waiting_queue, list) {
LIST_DEL(&ctx->list);
LIST_INIT(&ctx->list);
+ spoe_update_stat_time(&ctx->stats.tv_wait, &ctx->stats.t_waiting);
ctx->state = SPOE_CTX_ST_ERROR;
ctx->status_code = (spoe_appctx->status_code + 0x100);
task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
@@ -1472,6 +1486,7 @@
spoe_release_buffer(&ctx->buffer, &ctx->buffer_wait);
LIST_DEL(&ctx->list);
LIST_INIT(&ctx->list);
+ spoe_update_stat_time(&ctx->stats.tv_queue, &ctx->stats.t_queue);
ctx->spoe_appctx = NULL;
ctx->state = SPOE_CTX_ST_ERROR;
ctx->status_code = (SPOE_APPCTX(appctx)->status_code + 0x100);
@@ -1490,6 +1505,7 @@
spoe_release_buffer(&ctx->buffer, &ctx->buffer_wait);
LIST_DEL(&ctx->list);
LIST_INIT(&ctx->list);
+ spoe_update_stat_time(&ctx->stats.tv_queue, &ctx->stats.t_queue);
ctx->spoe_appctx = SPOE_APPCTX(appctx);
if (!(ctx->flags & SPOE_CTX_FL_FRAGMENTED) ||
(ctx->frag_ctx.flags & SPOE_FRM_FL_FIN))
@@ -1523,6 +1539,7 @@
*skip = 1;
LIST_ADDQ(&SPOE_APPCTX(appctx)->waiting_queue, &ctx->list);
}
+ ctx->stats.tv_wait = now;
SPOE_APPCTX(appctx)->frag_ctx.ctx = NULL;
SPOE_APPCTX(appctx)->frag_ctx.cursid = 0;
SPOE_APPCTX(appctx)->frag_ctx.curfid = 0;
@@ -1576,6 +1593,8 @@
default:
LIST_DEL(&ctx->list);
LIST_INIT(&ctx->list);
+ spoe_update_stat_time(&ctx->stats.tv_wait, &ctx->stats.t_waiting);
+ ctx->stats.tv_response = now;
if (ctx->spoe_appctx) {
ctx->spoe_appctx->cur_fpa--;
ctx->spoe_appctx = NULL;
@@ -1589,7 +1608,6 @@
}
else if (appctx->st0 == SPOE_APPCTX_ST_WAITING_SYNC_ACK)
appctx->st0 = SPOE_APPCTX_ST_PROCESSING;
-
task_wakeup(ctx->strm->task, TASK_WOKEN_MSG);
break;
}
@@ -2045,6 +2063,8 @@
/* Add the SPOE context in the sending queue */
LIST_ADDQ(&agent->rt[tid].sending_queue, &ctx->list);
+ spoe_update_stat_time(&ctx->stats.tv_request, &ctx->stats.t_request);
+ ctx->stats.tv_queue = now;
SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
" - Add stream in sending queue"
@@ -2439,6 +2459,15 @@
return 0;
agent->rt[tid].processing++;
+ ctx->stats.tv_start = now;
+ ctx->stats.tv_request = now;
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+
+ ctx->status_code = 0;
/* Set the right flag to prevent request and response processing
* in same time. */
@@ -2469,8 +2498,6 @@
agent->rt[tid].processing--;
ctx->flags &= ~(SPOE_CTX_FL_PROCESS|SPOE_CTX_FL_FRAGMENTED);
- ctx->status_code = 0;
-
/* Reset processing timer */
ctx->process_exp = TICK_ETERNITY;
@@ -2489,6 +2516,20 @@
}
static void
+spoe_update_stats(struct stream *s, struct spoe_agent *agent,
+ struct spoe_context *ctx, int dir)
+{
+ if (!tv_iszero(&ctx->stats.tv_start)) {
+ spoe_update_stat_time(&ctx->stats.tv_start, &ctx->stats.t_process);
+ ctx->stats.t_total += ctx->stats.t_process;
+ tv_zero(&ctx->stats.tv_request);
+ tv_zero(&ctx->stats.tv_queue);
+ tv_zero(&ctx->stats.tv_wait);
+ tv_zero(&ctx->stats.tv_response);
+ }
+}
+
+static void
spoe_handle_processing_error(struct stream *s, struct spoe_agent *agent,
struct spoe_context *ctx, int dir)
{
@@ -2506,13 +2547,6 @@
spoe_set_var(ctx, "txn", agent->var_on_error,
strlen(agent->var_on_error), &smp);
}
- SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
- " - failed to process messages: code=%u\n",
- (int)now.tv_sec, (int)now.tv_usec, agent->id,
- __FUNCTION__, s, ctx->status_code);
- send_log(ctx->strm->be, LOG_WARNING,
- "SPOE: [%s] failed to process messages: code=%u\n",
- agent->id, ctx->status_code);
ctx->state = ((agent->flags & SPOE_FL_CONT_ON_ERR)
? SPOE_CTX_ST_READY
@@ -2569,6 +2603,8 @@
}
if (ctx->state == SPOE_CTX_ST_ENCODING_MSGS) {
+ if (!tv_iszero(&ctx->stats.tv_request))
+ ctx->stats.tv_request = now;
if (!spoe_acquire_buffer(&ctx->buffer, &ctx->buffer_wait))
goto out;
ret = spoe_encode_messages(s, ctx, messages, dir, type);
@@ -2598,6 +2634,7 @@
ret = 1;
ctx->frame_id++;
ctx->state = SPOE_CTX_ST_READY;
+ spoe_update_stat_time(&ctx->stats.tv_response, &ctx->stats.t_response);
goto end;
}
@@ -2610,10 +2647,12 @@
goto end;
skip:
+ tv_zero(&ctx->stats.tv_start);
ctx->state = SPOE_CTX_ST_READY;
ret = 1;
end:
+ spoe_update_stats(s, agent, ctx, dir);
spoe_stop_processing(agent, ctx);
return ret;
}
@@ -2624,12 +2663,13 @@
spoe_process_group(struct stream *s, struct spoe_context *ctx,
struct spoe_group *group, int dir)
{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
int ret;
SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
" - ctx-state=%s - Process messages for group=%s\n",
- (int)now.tv_sec, (int)now.tv_usec,
- ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->id,
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
__FUNCTION__, s, spoe_ctx_state_str[ctx->state],
group->id);
@@ -2637,6 +2677,19 @@
return 1;
ret = spoe_process_messages(s, ctx, &group->messages, dir, SPOE_MSGS_BY_GROUP);
+ if (ret && ctx->stats.t_process != -1) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - <GROUP:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s, s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process);
+ send_log(s->be, (!ctx->status_code ? LOG_NOTICE : LOG_WARNING),
+ "SPOE: [%s] <GROUP:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld\n",
+ agent->id, group->id, s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process);
+ }
return ret;
}
@@ -2646,12 +2699,13 @@
spoe_process_event(struct stream *s, struct spoe_context *ctx,
enum spoe_event ev)
{
+ struct spoe_config *conf = FLT_CONF(ctx->filter);
+ struct spoe_agent *agent = conf->agent;
int dir, ret;
SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
" - ctx-state=%s - Process messages for event=%s\n",
- (int)now.tv_sec, (int)now.tv_usec,
- ((struct spoe_config *)FLT_CONF(ctx->filter))->agent->id,
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
__FUNCTION__, s, spoe_ctx_state_str[ctx->state],
spoe_event_str[ev]);
@@ -2661,6 +2715,19 @@
return 1;
ret = spoe_process_messages(s, ctx, &(ctx->events[ev]), dir, SPOE_MSGS_BY_EVENT);
+ if (ret && ctx->stats.t_process != -1) {
+ SPOE_PRINTF(stderr, "%d.%06d [SPOE/%-15s] %s: stream=%p"
+ " - <EVENT:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld\n",
+ (int)now.tv_sec, (int)now.tv_usec, agent->id,
+ __FUNCTION__, s, spoe_event_str[ev], s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process);
+ send_log(s->be, (!ctx->status_code ? LOG_NOTICE : LOG_WARNING),
+ "SPOE: [%s] <EVENT:%s> sid=%u st=%u %ld/%ld/%ld/%ld/%ld\n",
+ agent->id, spoe_event_str[ev], s->uniq_id, ctx->status_code,
+ ctx->stats.t_request, ctx->stats.t_queue, ctx->stats.t_waiting,
+ ctx->stats.t_response, ctx->stats.t_process);
+ }
return ret;
}
@@ -2741,6 +2808,18 @@
ctx->frame_id = 1;
ctx->process_exp = TICK_ETERNITY;
+ tv_zero(&ctx->stats.tv_start);
+ tv_zero(&ctx->stats.tv_request);
+ tv_zero(&ctx->stats.tv_queue);
+ tv_zero(&ctx->stats.tv_wait);
+ tv_zero(&ctx->stats.tv_response);
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+ ctx->stats.t_total = 0;
+
ctx->strm = s;
ctx->state = SPOE_CTX_ST_READY;
filter->ctx = ctx;
@@ -2767,6 +2846,18 @@
{
ctx->state = SPOE_CTX_ST_READY;
ctx->flags &= ~(SPOE_CTX_FL_PROCESS|SPOE_CTX_FL_FRAGMENTED);
+
+ tv_zero(&ctx->stats.tv_start);
+ tv_zero(&ctx->stats.tv_request);
+ tv_zero(&ctx->stats.tv_queue);
+ tv_zero(&ctx->stats.tv_wait);
+ tv_zero(&ctx->stats.tv_response);
+ ctx->stats.t_request = -1;
+ ctx->stats.t_queue = -1;
+ ctx->stats.t_waiting = -1;
+ ctx->stats.t_response = -1;
+ ctx->stats.t_process = -1;
+ ctx->stats.t_total = 0;
}