MINOR: contrib/prometheus-exporter: Add the last heathcheck duration metric
ST_F_CHECK_DURATION is now part of exported server metrics, named
haproxy_server_check_duration_seconds and expressed in seconds. For a given
server, this value is exported only if the healthcheck is finished (the status
is greater or equal to HCHK_STATUS_CHECKED).
This patch fixes the issue #519. It may be backported as fat as 2.0.
diff --git a/contrib/prometheus-exporter/README b/contrib/prometheus-exporter/README
index a0df4b2..a631020 100644
--- a/contrib/prometheus-exporter/README
+++ b/contrib/prometheus-exporter/README
@@ -272,6 +272,7 @@
| haproxy_server_weight | Service weight. |
| haproxy_server_check_status | Status of last health check, if enabled. (see below for the mapping) |
| haproxy_server_check_code | layer5-7 code, if available of the last health check. |
+| haproxy_server_check_duration_seconds | Total duration of the latest server health check, in seconds. |
| haproxy_server_check_failures_total | Total number of failed check (Only when the server is up). |
| haproxy_server_check_up_down_total | Total number of UP->DOWN transitions. |
| haproxy_server_downtime_seconds_total | Total downtime (in seconds) for the service. |
diff --git a/contrib/prometheus-exporter/service-prometheus.c b/contrib/prometheus-exporter/service-prometheus.c
index fa80147..6e7eca0 100644
--- a/contrib/prometheus-exporter/service-prometheus.c
+++ b/contrib/prometheus-exporter/service-prometheus.c
@@ -388,8 +388,8 @@
[ST_F_RATE_LIM] = 0,
[ST_F_RATE_MAX] = ST_F_LASTSESS,
[ST_F_CHECK_STATUS] = ST_F_CHECK_CODE,
- [ST_F_CHECK_CODE] = ST_F_CHKFAIL,
- [ST_F_CHECK_DURATION] = 0,
+ [ST_F_CHECK_CODE] = ST_F_CHECK_DURATION,
+ [ST_F_CHECK_DURATION] = ST_F_CHKFAIL,
[ST_F_HRSP_1XX] = ST_F_HRSP_2XX,
[ST_F_HRSP_2XX] = ST_F_HRSP_3XX,
[ST_F_HRSP_3XX] = ST_F_HRSP_4XX,
@@ -552,7 +552,7 @@
[ST_F_RATE_MAX] = IST("max_session_rate"),
[ST_F_CHECK_STATUS] = IST("check_status"),
[ST_F_CHECK_CODE] = IST("check_code"),
- [ST_F_CHECK_DURATION] = IST("check_duration_milliseconds"),
+ [ST_F_CHECK_DURATION] = IST("check_duration_seconds"),
[ST_F_HRSP_1XX] = IST("http_responses_total"),
[ST_F_HRSP_2XX] = IST("http_responses_total"),
[ST_F_HRSP_3XX] = IST("http_responses_total"),
@@ -715,7 +715,7 @@
[ST_F_RATE_MAX] = IST("Maximum observed number of sessions per second."),
[ST_F_CHECK_STATUS] = IST("Status of last health check (HCHK_STATUS_* values)."),
[ST_F_CHECK_CODE] = IST("layer5-7 code, if available of the last health check."),
- [ST_F_CHECK_DURATION] = IST("Time in ms took to finish last health check."),
+ [ST_F_CHECK_DURATION] = IST("Total duration of the latest server health check, in seconds."),
[ST_F_HRSP_1XX] = IST("Total number of HTTP responses."),
[ST_F_HRSP_2XX] = IST("Total number of HTTP responses."),
[ST_F_HRSP_3XX] = IST("Total number of HTTP responses."),
@@ -2037,6 +2037,12 @@
goto next_sv;
metric = mkf_u32(FN_OUTPUT, (sv->check.status < HCHK_STATUS_L57DATA) ? 0 : sv->check.code);
break;
+ case ST_F_CHECK_DURATION:
+ if (sv->check.status < HCHK_STATUS_CHECKED)
+ goto next_sv;
+ secs = (double)sv->check.duration / 1000.0;
+ metric = mkf_flt(FN_DURATION, secs);
+ break;
case ST_F_CHKFAIL:
metric = mkf_u64(FN_COUNTER, sv->counters.failed_checks);
break;