MEDIUM: hlua: reliable timeout detection

For non yieldable lua handlers (converters, fetches or yield
incompatible lua functions), current timeout detection relies on now_ms
thread local variable.

But within non-yieldable contexts, now_ms won't be updated if not by us
(because we're momentarily stuck in lua context so we won't
re-enter the polling loop, which is responsible for clock updates).

To circumvent this, clock_update_date(0, 1) was manually performed right
before now_ms is being read for the timeout checks.

But this fails to work consistently, because if no other concurrent
threads periodically run clock_update_global_date(), which do happen if
we're the only active thread (nbthread=1 or low traffic), our
clock_update_date() call won't reliably update our local now_ms variable

Moreover, clock_update_date() is not the right tool for this anyway, as
it was initially meant to be used from the polling context.
Using it could have negative impact on other threads relying on now_ms
to be stable. (because clock_update_date() performs global clock update
from time to time)

-> Introducing hlua multipurpose timer, which is internally based on
now_cpu_time_fast() that provides per-thread consistent clock readings.

Thanks to this new hlua timer API, hlua timeout logic is less error-prone
and more robust.

This allows the timeout detection to work as expected for both yieldable
and non-yieldable lua handlers.

This patch depends on commit "MINOR: clock: add now_cpu_time_fast() function"

While this could theorically be backported to all stable versions,
it is advisable to avoid backports unless we're confident enough
since it could cause slight behavior changes (timing related) in
existing setups.
diff --git a/include/haproxy/hlua-t.h b/include/haproxy/hlua-t.h
index 7b1ed11..22de659 100644
--- a/include/haproxy/hlua-t.h
+++ b/include/haproxy/hlua-t.h
@@ -26,6 +26,7 @@
 
 #include <lua.h>
 #include <lauxlib.h>
+#include <stdint.h>
 
 #include <import/ebtree-t.h>
 
@@ -99,6 +100,13 @@
 	HLUA_E_ERR,    /* LUA stack execution failed without error message. */
 };
 
+struct hlua_timer {
+	uint32_t start;      /* cpu time in ms when the timer was started */
+	uint32_t burst;      /* execution time for the current call in ms */
+	uint32_t cumulative; /* cumulative execution time for the coroutine in ms */
+	uint32_t max;        /* max (cumulative) execution time for the coroutine in ms */
+};
+
 struct hlua {
 	lua_State *T; /* The LUA stack. */
 	int state_id; /* contains the lua state id. 0 is common state, 1 to n are per-thread states.*/
@@ -109,9 +117,7 @@
 	int nargs; /* The number of arguments in the stack at the start of execution. */
 	unsigned int flags; /* The current execution flags. */
 	int wake_time; /* The lua wants to be waked at this time, or before. */
-	unsigned int max_time; /* The max amount of execution time for an Lua process, in ms. */
-	unsigned int start_time; /* The ms time when the Lua starts the last execution. */
-	unsigned int run_time; /* Lua total execution time in ms. */
+	struct hlua_timer timer; /* lua multipurpose timer */
 	struct task *task; /* The task associated with the lua stack execution.
 	                      We must wake this task to continue the task execution */
 	struct list com; /* The list head of the signals attached to this task. */
diff --git a/src/hlua.c b/src/hlua.c
index a00a114..492ee7e 100644
--- a/src/hlua.c
+++ b/src/hlua.c
@@ -345,14 +345,96 @@
 static int class_applet_http_ref;
 static int class_txn_reply_ref;
 
-/* Global Lua execution timeout. By default Lua, execution linked
- * with stream (actions, sample-fetches and converters) have a
- * short timeout. Lua linked with tasks doesn't have a timeout
- * because a task may remain alive during all the haproxy execution.
+/* Lua max execution timeouts. By default, stream-related
+ * lua coroutines (e.g.: actions) have a short timeout.
+ * On the other hand tasks coroutines don't have a timeout because
+ * a task may remain alive during all the haproxy execution.
+ *
+ * Timeouts are expressed in milliseconds, they are meant to be used
+ * with hlua timer's API exclusively.
+ * 0 means no timeout
+ */
+static uint32_t hlua_timeout_session = 4000; /* session timeout. */
+static uint32_t hlua_timeout_task = 0; /* task timeout. */
+static uint32_t hlua_timeout_applet = 4000; /* applet timeout. */
+
+/* hlua multipurpose timer:
+ *  used to compute burst lua time (within a single hlua_ctx_resume())
+ *  and cumulative lua time for a given coroutine, and to check
+ *  the lua coroutine against the configured timeouts
+ */
+
+/* fetch per-thread cpu_time with ms precision (may wrap) */
+static inline uint32_t _hlua_time_ms()
+{
+	/* We're interested in the current cpu time in ms, which will be returned
+	 * as a uint32_t to save some space.
+	 * We must take the following into account:
+	 *
+	 * - now_cpu_time_fast() which returns the time in nanoseconds as a uint64_t
+	 *   will wrap every 585 years.
+	 * - uint32_t may only contain 4294967295ms (~=49.7 days), so _hlua_time_ms()
+	 *   itself will also wrap every 49.7 days.
+	 *
+	 * While we can safely ignore the now_cpu_time_fast() wrap, we must
+	 * take care of the uint32_t wrap by making sure to exclusively
+	 * manipulate the time using uint32_t everywhere _hlua_time_ms()
+	 * is involved.
+	 */
+	return (uint32_t)(now_cpu_time_fast() / 1000000ULL);
+}
+
+/* computes time spent in a single lua execution (in ms) */
+static inline uint32_t _hlua_time_burst(const struct hlua_timer *timer)
+{
+	uint32_t burst_ms;
+
+	/* wrapping is expected and properly
+	 * handled thanks to _hlua_time_ms() and burst_ms
+	 * being of the same type
+	 */
+	burst_ms = _hlua_time_ms() - timer->start;
+	return burst_ms;
+}
+
+static inline void hlua_timer_init(struct hlua_timer *timer, unsigned int max)
+{
+	timer->cumulative = 0;
+	timer->burst = 0;
+	timer->max = max;
+}
+
+/* reset the timer ctx between 2 yields */
+static inline void hlua_timer_reset(struct hlua_timer *timer)
+{
+	timer->cumulative += timer->burst;
+	timer->burst = 0;
+}
+
+/* start the timer right before a new execution */
+static inline void hlua_timer_start(struct hlua_timer *timer)
+{
+	timer->start = _hlua_time_ms();
+}
+
+/* update hlua timer when finishing an execution */
+static inline void hlua_timer_stop(struct hlua_timer *timer)
+{
+	timer->burst += _hlua_time_burst(timer);
+}
+
+/* check the timers for current hlua context
+ * Returns 1 if the check succeeded and 0 if it failed
+ * (ie: timeout exceeded)
  */
-static unsigned int hlua_timeout_session = 4000; /* session timeout. */
-static unsigned int hlua_timeout_task = TICK_ETERNITY; /* task timeout. */
-static unsigned int hlua_timeout_applet = 4000; /* applet timeout. */
+static inline int hlua_timer_check(const struct hlua_timer *timer)
+{
+	uint32_t pburst = _hlua_time_burst(timer); /* pending burst time in ms */
+
+	if (timer->max && (timer->cumulative + timer->burst + pburst) > timer->max)
+		return 0; /* cumulative timeout exceeded */
+	return 1; /* ok */
+}
 
 /* Interrupts the Lua processing each "hlua_nb_instruction" instructions.
  * it is used for preventing infinite loops.
@@ -1323,6 +1405,7 @@
 	lua->gc_count = 0;
 	lua->wake_time = TICK_ETERNITY;
 	lua->state_id = state_id;
+	hlua_timer_init(&lua->timer, 0); /* default value, no timeout */
 	LIST_INIT(&lua->com);
 	MT_LIST_INIT(&lua->hc_list);
 	if (!SET_SAFE_LJMP_PARENT(lua)) {
@@ -1505,17 +1588,12 @@
 		return;
 	}
 
-	/* If we cannot yield, update the clock and check the timeout. */
-	clock_update_date(0, 1);
-	hlua->run_time += now_ms - hlua->start_time;
-	if (hlua->max_time && hlua->run_time >= hlua->max_time) {
+	/* If we cannot yield, check the timeout. */
+	if (!hlua_timer_check(&hlua->timer)) {
 		lua_pushfstring(L, "execution timeout");
 		WILL_LJMP(lua_error(L));
 	}
 
-	/* Update the start time. */
-	hlua->start_time = now_ms;
-
 	/* Try to interrupt the process at the end of the current
 	 * unyieldable function.
 	 */
@@ -1546,15 +1624,17 @@
 	const char *msg;
 	const char *trace;
 
-	/* Initialise run time counter. */
-	if (!HLUA_IS_RUNNING(lua))
-		lua->run_time = 0;
-
 	/* Lock the whole Lua execution. This lock must be before the
 	 * label "resume_execution".
 	 */
 	hlua_lock(lua);
 
+	/* reset the timer as we might be re-entering the function to
+	 * resume the coroutine after a successful yield
+	 * (cumulative time will be updated)
+	 */
+	hlua_timer_reset(&lua->timer);
+
 resume_execution:
 
 	/* This hook interrupts the Lua processing each 'hlua_nb_instruction'
@@ -1571,16 +1651,22 @@
 	if (!yield_allowed)
 		HLUA_SET_NOYIELD(lua);
 
-	/* Update the start time and reset wake_time. */
-	lua->start_time = now_ms;
+	/* reset wake_time. */
 	lua->wake_time = TICK_ETERNITY;
 
+	/* start the timer as we're about to start lua processing */
+	hlua_timer_start(&lua->timer);
+
 	/* Call the function. */
 #if defined(LUA_VERSION_NUM) && LUA_VERSION_NUM >= 504
 	ret = lua_resume(lua->T, hlua_states[lua->state_id], lua->nargs, &nres);
 #else
 	ret = lua_resume(lua->T, hlua_states[lua->state_id], lua->nargs);
 #endif
+
+	/* out of lua processing, stop the timer */
+	hlua_timer_stop(&lua->timer);
+
 	switch (ret) {
 
 	case LUA_OK:
@@ -1588,12 +1674,10 @@
 		break;
 
 	case LUA_YIELD:
-		/* Check if the execution timeout is expired. It it is the case, we
+		/* Check if the execution timeout is expired. If it is the case, we
 		 * break the Lua execution.
 		 */
-		clock_update_date(0, 1);
-		lua->run_time += now_ms - lua->start_time;
-		if (lua->max_time && lua->run_time > lua->max_time) {
+		if (!hlua_timer_check(&lua->timer)) {
 			lua_settop(lua->T, 0); /* Empty the stack. */
 			ret = HLUA_E_ETMOUT;
 			break;
@@ -8626,7 +8710,7 @@
 	 * execution timeouts.
 	 */
 	if (!HLUA_IS_RUNNING(hlua))
-		hlua->max_time = hlua_timeout_task;
+		hlua_timer_init(&hlua->timer, hlua_timeout_task);
 
 	/* Execute the Lua code. */
 	status = hlua_ctx_resume(hlua, 1);
@@ -8849,9 +8933,8 @@
 	/* If it is the first call to the task, we must initialize the
 	 * execution timeouts.
 	 */
-	if (!HLUA_IS_RUNNING(hlua)) {
-		hlua->max_time = hlua_timeout_task;
-	}
+	if (!HLUA_IS_RUNNING(hlua))
+		hlua_timer_init(&hlua->timer, hlua_timeout_task);
 
 	/* make sure to reset the task expiry before each hlua_ctx_resume()
 	 * since the task is re-used for multiple cb function calls
@@ -9353,7 +9436,7 @@
 		}
 
 		/* We must initialize the execution timeouts. */
-		stream->hlua->max_time = hlua_timeout_session;
+		hlua_timer_init(&stream->hlua->timer, hlua_timeout_session);
 
 		/* At this point the execution is safe. */
 		RESET_SAFE_LJMP(stream->hlua);
@@ -9488,7 +9571,7 @@
 		}
 
 		/* We must initialize the execution timeouts. */
-		stream->hlua->max_time = hlua_timeout_session;
+		hlua_timer_init(&stream->hlua->timer, hlua_timeout_session);
 
 		/* At this point the execution is safe. */
 		RESET_SAFE_LJMP(stream->hlua);
@@ -9837,7 +9920,7 @@
 		RESET_SAFE_LJMP(s->hlua);
 
 		/* We must initialize the execution timeouts. */
-		s->hlua->max_time = hlua_timeout_session;
+		hlua_timer_init(&s->hlua->timer, hlua_timeout_session);
 	}
 
 	/* Execute the function. */
@@ -9974,7 +10057,7 @@
 	}
 
 	/* Set timeout according with the applet configuration. */
-	hlua->max_time = ctx->applet->timeout;
+	hlua_timer_init(&hlua->timer, ctx->applet->timeout);
 
 	/* The following Lua calls can fail. */
 	if (!SET_SAFE_LJMP(hlua)) {
@@ -10165,7 +10248,7 @@
 	}
 
 	/* Set timeout according with the applet configuration. */
-	hlua->max_time = ctx->applet->timeout;
+	hlua_timer_init(&hlua->timer, ctx->applet->timeout);
 
 	/* The following Lua calls can fail. */
 	if (!SET_SAFE_LJMP(hlua)) {
@@ -10842,7 +10925,7 @@
 	}
 
 	/* We must initialize the execution timeouts. */
-	hlua->max_time = hlua_timeout_session;
+	hlua_timer_init(&hlua->timer, hlua_timeout_session);
 
 	/* At this point the execution is safe. */
 	RESET_SAFE_LJMP(hlua);
@@ -11308,7 +11391,7 @@
 		s->hlua->nargs = 1;
 
 		/* We must initialize the execution timeouts. */
-		s->hlua->max_time = hlua_timeout_session;
+		hlua_timer_init(&s->hlua->timer, hlua_timeout_session);
 
 		/* At this point the execution is safe. */
 		RESET_SAFE_LJMP(s->hlua);
@@ -11478,7 +11561,7 @@
 		}
 
 		/* We must initialize the execution timeouts. */
-		flt_hlua->max_time = hlua_timeout_session;
+		hlua_timer_init(&flt_hlua->timer, hlua_timeout_session);
 
 		/* At this point the execution is safe. */
 		RESET_SAFE_LJMP(flt_hlua);