Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 1 | /* |
| 2 | include/proto/task.h |
| 3 | Functions for task management. |
| 4 | |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 5 | Copyright (C) 2000-2009 Willy Tarreau - w@1wt.eu |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 6 | |
| 7 | This library is free software; you can redistribute it and/or |
| 8 | modify it under the terms of the GNU Lesser General Public |
| 9 | License as published by the Free Software Foundation, version 2.1 |
| 10 | exclusively. |
| 11 | |
| 12 | This library is distributed in the hope that it will be useful, |
| 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 15 | Lesser General Public License for more details. |
| 16 | |
| 17 | You should have received a copy of the GNU Lesser General Public |
| 18 | License along with this library; if not, write to the Free Software |
| 19 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
| 20 | */ |
| 21 | |
| 22 | #ifndef _PROTO_TASK_H |
| 23 | #define _PROTO_TASK_H |
| 24 | |
| 25 | |
| 26 | #include <sys/time.h> |
Willy Tarreau | e3ba5f0 | 2006-06-29 18:54:54 +0200 | [diff] [blame] | 27 | |
| 28 | #include <common/config.h> |
Willy Tarreau | 2dd0d47 | 2006-06-29 17:53:05 +0200 | [diff] [blame] | 29 | #include <common/memory.h> |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 30 | #include <common/mini-clist.h> |
| 31 | #include <common/standard.h> |
Willy Tarreau | d0a201b | 2009-03-08 15:53:06 +0100 | [diff] [blame] | 32 | #include <common/ticks.h> |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 33 | |
Willy Tarreau | e3ba5f0 | 2006-06-29 18:54:54 +0200 | [diff] [blame] | 34 | #include <types/task.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 35 | |
Willy Tarreau | d0a201b | 2009-03-08 15:53:06 +0100 | [diff] [blame] | 36 | /* Principle of the wait queue. |
| 37 | * |
| 38 | * We want to be able to tell whether an expiration date is before of after the |
| 39 | * current time <now>. We KNOW that expiration dates are never too far apart, |
| 40 | * because they are measured in ticks (milliseconds). We also know that almost |
| 41 | * all dates will be in the future, and that a very small part of them will be |
| 42 | * in the past, they are the ones which have expired since last time we checked |
| 43 | * them. Using ticks, we know if a date is in the future or in the past, but we |
| 44 | * cannot use that to store sorted information because that reference changes |
| 45 | * all the time. |
| 46 | * |
| 47 | * So we cut the time in 3 ranges, only one of which <now> can be. The base of |
| 48 | * the range holding <now> serves as a reference for as long as <now> remains |
| 49 | * in this range : |
| 50 | * - previous : those ones are expired by definition (all before <now>) |
| 51 | * - current : some are expired, some are not (holds <now>) |
| 52 | * - next : none are expired (all after <now>) |
| 53 | * |
| 54 | * We use the higher two bits of the timers expressed in ticks to determine |
| 55 | * which range a timer is in, compared to <now> : |
| 56 | * |
| 57 | * now previous current next0 next1 |
| 58 | * [31:30] [31:30] [31:30] [31:30] [31:30] |
| 59 | * 00 11 00 01 10 |
| 60 | * 01 00 01 10 11 |
| 61 | * 10 01 10 11 00 |
| 62 | * 11 10 11 00 01 |
| 63 | * |
| 64 | * By definition, <current> is the range containing <now> as well as all timers |
| 65 | * which have the same 2 high bits as <now>, <previous> is the range just |
| 66 | * before, which contains all timers whose high bits equal those of <now> minus |
| 67 | * 1. Last, <next> is composed of the two remaining ranges. |
| 68 | * |
| 69 | * For ease of implementation, the timers will then be stored into 4 queues 0-3 |
| 70 | * determined by the 2 higher bits of the timer. The expiration algorithm is |
| 71 | * very simple : |
| 72 | * - expire everything in <previous>=queue[((now>>30)-1)&3] |
| 73 | * - expire from <current>=queue[(now>>30)&3] everything where timer >= now |
| 74 | * |
| 75 | * With this algorithm, it's possible to queue tasks meant to expire 24.8 days |
| 76 | * in the future, and still be able to detect events remaining unprocessed for |
| 77 | * the last 12.4 days! Note that the principle might be extended to any number |
| 78 | * of higher bits as long as there is only one range for expired tasks. For |
| 79 | * instance, using the 8 higher bits to index the range, we would have one past |
| 80 | * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing |
| 81 | * 49.3 days. This would eat more memory for very little added benefit though. |
| 82 | * |
| 83 | * Also, in order to maintain the ability to perform time comparisons, it is |
| 84 | * preferable to avoid using the <next1> range above, as values in this range |
| 85 | * may not easily be compared to <now> outside of these functions as it is the |
| 86 | * opposite of the <current> range, and <timer>-<now> may randomly be positive |
| 87 | * or negative. That means we're left with +/- 12.4 days timers. |
| 88 | * |
| 89 | * To keep timers ordered, we use 4 ebtrees [0..3]. We could have used instead |
| 90 | * of ticks, (seconds*1024)+milliseconds, as well as 1024th of seconds, but |
| 91 | * that makes comparisons with ticks more difficult, so in the end it's better |
| 92 | * to stick to the ticks. |
| 93 | * |
| 94 | * Another nice optimisation is to allow a timer to stay at an old place in the |
| 95 | * queue as long as it's not further than the real expiration date. That way, |
| 96 | * we use the tree as a place holder for a minorant of the real expiration |
| 97 | * date. Since we have a very low chance of hitting a timeout anyway, we can |
| 98 | * bounce the nodes to their right place when we scan the tree if we encounter |
| 99 | * a misplaced node once in a while. This even allows us not to remove the |
| 100 | * infinite timers from the wait queue. |
| 101 | * |
| 102 | * So, to summarize, we have : |
| 103 | * - node->key always defines current position in the wait queue |
| 104 | * - timer is the real expiration date (possibly infinite) |
| 105 | * - node->key <= timer |
| 106 | * |
| 107 | * The run queue works similarly to the wait queue except that the current date |
| 108 | * is replaced by an insertion counter which can also wrap without any problem. |
| 109 | */ |
| 110 | |
| 111 | /* the timers are stored as 32-bit values in the queues */ |
| 112 | #define TIMER_TICK_BITS 32 |
| 113 | #define TIMER_TREE_BITS 2 |
| 114 | #define TIMER_TREES (1 << TIMER_TREE_BITS) |
| 115 | #define TIMER_TREE_SHIFT (TIMER_TICK_BITS - TIMER_TREE_BITS) |
| 116 | #define TIMER_TREE_MASK (TIMER_TREES - 1) |
| 117 | #define TIMER_TICK_MASK ((1U << (TIMER_TICK_BITS-1)) * 2 - 1) |
| 118 | #define TIMER_SIGN_BIT (1 << (TIMER_TICK_BITS - 1)) |
| 119 | |
| 120 | /* a few exported variables */ |
Willy Tarreau | 91e9993 | 2008-06-30 07:51:00 +0200 | [diff] [blame] | 121 | extern unsigned int run_queue; /* run queue size */ |
| 122 | extern unsigned int niced_tasks; /* number of niced tasks in the run queue */ |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 123 | extern struct pool_head *pool2_task; |
Willy Tarreau | ce44f12 | 2008-07-05 18:16:19 +0200 | [diff] [blame] | 124 | extern struct task *last_timer; /* optimization: last queued timer */ |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 125 | |
Willy Tarreau | d0a201b | 2009-03-08 15:53:06 +0100 | [diff] [blame] | 126 | /* Convert ticks to timers. Must not be called with TICK_ETERNITY, which is not |
| 127 | * a problem inside tree scanning functions. Note that ticks are signed while |
| 128 | * timers are not. |
| 129 | */ |
| 130 | static inline unsigned int tick_to_timer(int tick) |
| 131 | { |
| 132 | return tick & TIMER_TICK_MASK; |
| 133 | } |
| 134 | |
| 135 | /* Convert timer to ticks. This operation will be correct only as long as |
| 136 | * timers are stored on a minimum of 32-bit. We take care of not returning zero |
| 137 | * which would mean "eternity" for a tick. Also note that ticks are signed and |
| 138 | * timers are not. |
| 139 | */ |
| 140 | static inline int timer_to_tick(unsigned int timer) |
| 141 | { |
| 142 | return timer ? timer : 1; |
| 143 | } |
| 144 | |
| 145 | /* returns a tree number based on a ticks value */ |
| 146 | static inline unsigned int timer_to_tree(unsigned int timer) |
| 147 | { |
| 148 | return (timer >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK; |
| 149 | } |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 150 | |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 151 | /* return 0 if task is in run queue, otherwise non-zero */ |
| 152 | static inline int task_in_rq(struct task *t) |
| 153 | { |
| 154 | return t->rq.node.leaf_p != NULL; |
| 155 | } |
| 156 | |
| 157 | /* return 0 if task is in wait queue, otherwise non-zero */ |
| 158 | static inline int task_in_wq(struct task *t) |
| 159 | { |
| 160 | return t->wq.node.leaf_p != NULL; |
| 161 | } |
| 162 | |
Willy Tarreau | fdccded | 2008-08-29 18:19:04 +0200 | [diff] [blame] | 163 | /* puts the task <t> in run queue with reason flags <f>, and returns <t> */ |
Willy Tarreau | 4df8206 | 2008-08-29 15:26:14 +0200 | [diff] [blame] | 164 | struct task *__task_wakeup(struct task *t); |
Willy Tarreau | fdccded | 2008-08-29 18:19:04 +0200 | [diff] [blame] | 165 | static inline struct task *task_wakeup(struct task *t, unsigned int f) |
Willy Tarreau | 4df8206 | 2008-08-29 15:26:14 +0200 | [diff] [blame] | 166 | { |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 167 | if (likely(!task_in_rq(t))) |
Willy Tarreau | fdccded | 2008-08-29 18:19:04 +0200 | [diff] [blame] | 168 | __task_wakeup(t); |
| 169 | t->state |= f; |
| 170 | return t; |
Willy Tarreau | 4df8206 | 2008-08-29 15:26:14 +0200 | [diff] [blame] | 171 | } |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 172 | |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 173 | /* |
| 174 | * Unlink the task from the wait queue, and possibly update the last_timer |
| 175 | * pointer. A pointer to the task itself is returned. The task *must* already |
| 176 | * be in the wait queue before calling this function. If unsure, use the safer |
| 177 | * task_unlink_wq() function. |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 178 | */ |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 179 | static inline struct task *__task_unlink_wq(struct task *t) |
| 180 | { |
| 181 | eb32_delete(&t->wq); |
| 182 | if (last_timer == t) |
| 183 | last_timer = NULL; |
| 184 | return t; |
| 185 | } |
| 186 | |
| 187 | static inline struct task *task_unlink_wq(struct task *t) |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 188 | { |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 189 | if (likely(task_in_wq(t))) |
| 190 | __task_unlink_wq(t); |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 191 | return t; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 192 | } |
| 193 | |
| 194 | /* |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 195 | * Unlink the task from the run queue. The run_queue size and number of niced |
| 196 | * tasks are updated too. A pointer to the task itself is returned. The task |
| 197 | * *must* already be in the wait queue before calling this function. If unsure, |
| 198 | * use the safer task_unlink_rq() function. |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 199 | */ |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 200 | static inline struct task *__task_unlink_rq(struct task *t) |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 201 | { |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 202 | eb32_delete(&t->rq); |
| 203 | run_queue--; |
| 204 | if (likely(t->nice)) |
| 205 | niced_tasks--; |
Willy Tarreau | ce44f12 | 2008-07-05 18:16:19 +0200 | [diff] [blame] | 206 | return t; |
| 207 | } |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 208 | |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 209 | static inline struct task *task_unlink_rq(struct task *t) |
| 210 | { |
| 211 | if (likely(task_in_rq(t))) |
| 212 | __task_unlink_rq(t); |
| 213 | return t; |
| 214 | } |
| 215 | |
Willy Tarreau | ce44f12 | 2008-07-05 18:16:19 +0200 | [diff] [blame] | 216 | /* |
| 217 | * Unlinks the task and adjusts run queue stats. |
| 218 | * A pointer to the task itself is returned. |
| 219 | */ |
| 220 | static inline struct task *task_delete(struct task *t) |
| 221 | { |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 222 | task_unlink_wq(t); |
| 223 | task_unlink_rq(t); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 224 | return t; |
| 225 | } |
| 226 | |
| 227 | /* |
| 228 | * Initialize a new task. The bare minimum is performed (queue pointers and state). |
| 229 | * The task is returned. |
| 230 | */ |
| 231 | static inline struct task *task_init(struct task *t) |
| 232 | { |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 233 | t->wq.node.leaf_p = NULL; |
| 234 | t->rq.node.leaf_p = NULL; |
Willy Tarreau | fdccded | 2008-08-29 18:19:04 +0200 | [diff] [blame] | 235 | t->state = TASK_SLEEPING; |
Willy Tarreau | 91e9993 | 2008-06-30 07:51:00 +0200 | [diff] [blame] | 236 | t->nice = 0; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 237 | return t; |
| 238 | } |
| 239 | |
| 240 | /* |
| 241 | * frees a task. Its context must have been freed since it will be lost. |
| 242 | */ |
| 243 | static inline void task_free(struct task *t) |
| 244 | { |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 245 | pool_free2(pool2_task, t); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 246 | } |
| 247 | |
Willy Tarreau | 4726f53 | 2009-03-07 17:25:21 +0100 | [diff] [blame] | 248 | /* Place <task> into the wait queue, where it may already be. If the expiration |
Willy Tarreau | 531cf0c | 2009-03-08 16:35:27 +0100 | [diff] [blame] | 249 | * timer is infinite, do nothing and rely on wake_expired_task to clean up. |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 250 | */ |
Willy Tarreau | 531cf0c | 2009-03-08 16:35:27 +0100 | [diff] [blame] | 251 | void __task_queue(struct task *task); |
| 252 | static inline void task_queue(struct task *task) |
| 253 | { |
| 254 | /* If we already have a place in the wait queue no later than the |
| 255 | * timeout we're trying to set, we'll stay there, because it is very |
| 256 | * unlikely that we will reach the timeout anyway. If the timeout |
| 257 | * has been disabled, it's useless to leave the queue as well. We'll |
| 258 | * rely on wake_expired_tasks() to catch the node and move it to the |
| 259 | * proper place should it ever happen. Finally we only add the task |
| 260 | * to the queue if it was not there or if it was further than what |
| 261 | * we want. |
| 262 | */ |
| 263 | if (!tick_isset(task->expire)) |
| 264 | return; |
| 265 | |
| 266 | if (((tick_to_timer(task->expire) - task->wq.key) & TIMER_SIGN_BIT) |
| 267 | || !task_in_wq(task)) |
| 268 | __task_queue(task); |
| 269 | } |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 270 | |
| 271 | /* |
| 272 | * This does 4 things : |
| 273 | * - wake up all expired tasks |
| 274 | * - call all runnable tasks |
| 275 | * - call maintain_proxies() to enable/disable the listeners |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 276 | * - return the date of next event in <next> or eternity. |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 277 | */ |
| 278 | |
Willy Tarreau | 0c303ee | 2008-07-07 00:09:58 +0200 | [diff] [blame] | 279 | void process_runnable_tasks(int *next); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 280 | |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame] | 281 | /* |
| 282 | * Extract all expired timers from the timer queue, and wakes up all |
| 283 | * associated tasks. Returns the date of next event (or eternity). |
| 284 | */ |
Willy Tarreau | 0c303ee | 2008-07-07 00:09:58 +0200 | [diff] [blame] | 285 | void wake_expired_tasks(int *next); |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame] | 286 | |
Willy Tarreau | d0a201b | 2009-03-08 15:53:06 +0100 | [diff] [blame] | 287 | /* Perform minimal initializations, report 0 in case of error, 1 if OK. */ |
| 288 | int init_task(); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 289 | |
| 290 | #endif /* _PROTO_TASK_H */ |
| 291 | |
| 292 | /* |
| 293 | * Local variables: |
| 294 | * c-indent-level: 8 |
| 295 | * c-basic-offset: 8 |
| 296 | * End: |
| 297 | */ |