Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Task management functions. |
| 3 | * |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 4 | * Copyright 2000-2008 Willy Tarreau <w@1wt.eu> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
Willy Tarreau | 2dd0d47 | 2006-06-29 17:53:05 +0200 | [diff] [blame] | 13 | #include <common/config.h> |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 14 | #include <common/eb32tree.h> |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 15 | #include <common/memory.h> |
Willy Tarreau | 2dd0d47 | 2006-06-29 17:53:05 +0200 | [diff] [blame] | 16 | #include <common/mini-clist.h> |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 17 | #include <common/standard.h> |
Willy Tarreau | a6a6a93 | 2007-04-28 22:40:08 +0200 | [diff] [blame] | 18 | #include <common/time.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 19 | |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 20 | #include <proto/proxy.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 21 | #include <proto/task.h> |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 22 | #include <types/task.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 23 | |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 24 | struct pool_head *pool2_task; |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 25 | |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 26 | unsigned int run_queue = 0; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 27 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 28 | /* Principle of the wait queue. |
| 29 | * |
| 30 | * We want to be able to tell whether an expiration date is before of after the |
| 31 | * current time <now>. We KNOW that expiration dates are never too far apart, |
| 32 | * because they are already computed by adding integer numbers of milliseconds |
| 33 | * to the current date. |
| 34 | * We also know that almost all dates will be in the future, and that a very |
| 35 | * small part of them will be in the past, they are the ones which have expired |
| 36 | * since last time we checked them. |
| 37 | * |
| 38 | * The current implementation uses a wrapping time cut into 3 ranges : |
| 39 | * - previous : those ones are expired by definition |
| 40 | * - current : some are expired, some are not |
| 41 | * - next : none are expired |
| 42 | * |
| 43 | * We use the higher two bits of the timers expressed in ticks (milliseconds) |
| 44 | * to determine which range a timer is in, compared to <now> : |
| 45 | * |
| 46 | * now previous current next0 next1 |
| 47 | * [31:30] [31:30] [31:30] [31:30] [31:30] |
| 48 | * 00 11 00 01 10 |
| 49 | * 01 00 01 10 11 |
| 50 | * 10 01 10 11 00 |
| 51 | * 11 10 11 00 01 |
| 52 | * |
| 53 | * By definition, <current> is the range containing <now> as well as all timers |
| 54 | * which have the same 2 high bits as <now>, <previous> is the range just |
| 55 | * before, which contains all timers whose high bits equal those of <now> minus |
| 56 | * 1. Last, <next> is composed of the two remaining ranges. |
| 57 | * |
| 58 | * For ease of implementation, the timers will then be stored into 4 queues 0-3 |
| 59 | * determined by the 2 higher bits of the timer. The expiration algorithm is |
| 60 | * very simple : |
| 61 | * - expire everything in <previous>=queue[((now>>30)-1)&3] |
| 62 | * - expire from <current>=queue[(now>>30)&3] everything where timer >= now |
| 63 | * |
| 64 | * With this algorithm, it's possible to queue tasks meant to expire 24.8 days |
| 65 | * in the future, and still be able to detect events remaining unprocessed for |
| 66 | * the last 12.4 days! Note that the principle might be extended to any number |
| 67 | * of higher bits as long as there is only one range for expired tasks. For |
| 68 | * instance, using the 8 higher bits to index the range, we would have one past |
| 69 | * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing |
| 70 | * 49.3 days. This would eat more memory for a very little added benefit. |
| 71 | * |
| 72 | * Also, in order to maintain the ability to perform time comparisons, it is |
| 73 | * recommended to avoid using the <next1> range above, as values in this range |
| 74 | * may not easily be compared to <now> outside of these functions as it is the |
| 75 | * opposite of the <current> range, and <timer>-<now> may randomly be positive |
| 76 | * or negative. That means we're left with +/- 12 days timers. |
| 77 | * |
| 78 | * To keep timers ordered, we use 4 ebtrees [0..3]. To keep computation low, we |
| 79 | * may use (seconds*1024)+milliseconds, which preserves ordering eventhough we |
| 80 | * can't do real computations on it. Future evolutions could make use of 1024th |
| 81 | * of seconds instead of milliseconds, with the special value 0 avoided (and |
| 82 | * replaced with 1), so that zero indicates the timer is not set. |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 83 | */ |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 84 | |
| 85 | #define TIMER_TICK_BITS 32 |
| 86 | #define TIMER_TREE_BITS 2 |
| 87 | #define TIMER_TREES (1 << TIMER_TREE_BITS) |
| 88 | #define TIMER_TREE_SHIFT (TIMER_TICK_BITS - TIMER_TREE_BITS) |
| 89 | #define TIMER_TREE_MASK (TIMER_TREES - 1) |
| 90 | #define TIMER_TICK_MASK ((1U << (TIMER_TICK_BITS-1)) * 2 - 1) |
| 91 | #define TIMER_SIGN_BIT (1 << (TIMER_TICK_BITS - 1)) |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 92 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 93 | static struct eb_root timers[TIMER_TREES]; /* trees with MSB 00, 01, 10 and 11 */ |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 94 | static struct eb_root rqueue[TIMER_TREES]; /* trees constituting the run queue */ |
| 95 | static unsigned int rqueue_ticks; /* insertion count */ |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 96 | |
| 97 | /* returns an ordered key based on an expiration date. */ |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 98 | static inline unsigned int timeval_to_ticks(const struct timeval *t) |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 99 | { |
| 100 | unsigned int key; |
| 101 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 102 | key = ((unsigned int)t->tv_sec * 1000) + ((unsigned int)t->tv_usec / 1000); |
| 103 | key &= TIMER_TICK_MASK; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 104 | return key; |
| 105 | } |
| 106 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 107 | /* returns a tree number based on a ticks value */ |
| 108 | static inline unsigned int ticks_to_tree(unsigned int ticks) |
| 109 | { |
| 110 | return (ticks >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK; |
| 111 | } |
| 112 | |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 113 | /* returns a tree number based on an expiration date. */ |
| 114 | static inline unsigned int timeval_to_tree(const struct timeval *t) |
| 115 | { |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 116 | return ticks_to_tree(timeval_to_ticks(t)); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 117 | } |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 118 | |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 119 | /* perform minimal intializations, report 0 in case of error, 1 if OK. */ |
| 120 | int init_task() |
| 121 | { |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 122 | memset(&timers, 0, sizeof(timers)); |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 123 | memset(&rqueue, 0, sizeof(rqueue)); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 124 | pool2_task = create_pool("task", sizeof(struct task), MEM_F_SHARED); |
| 125 | return pool2_task != NULL; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 126 | } |
| 127 | |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 128 | /* puts the task <t> in run queue <q>, and returns <t> */ |
| 129 | struct task *task_wakeup(struct task *t) |
Willy Tarreau | e33aece | 2007-04-30 13:15:14 +0200 | [diff] [blame] | 130 | { |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 131 | if (t->state == TASK_RUNNING) |
| 132 | return t; |
| 133 | |
| 134 | if (likely(t->eb.node.leaf_p)) |
| 135 | eb32_delete(&t->eb); |
| 136 | |
| 137 | run_queue++; |
| 138 | t->eb.key = ++rqueue_ticks; |
| 139 | t->state = TASK_RUNNING; |
| 140 | |
| 141 | eb32_insert(&rqueue[ticks_to_tree(t->eb.key)], &t->eb); |
| 142 | return t; |
Willy Tarreau | e33aece | 2007-04-30 13:15:14 +0200 | [diff] [blame] | 143 | } |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 144 | |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 145 | /* |
| 146 | * task_queue() |
| 147 | * |
| 148 | * Inserts a task into the wait queue at the position given by its expiration |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 149 | * date. Note that the task must *not* already be in the wait queue nor in the |
| 150 | * run queue, otherwise unpredictable results may happen. Tasks queued with an |
| 151 | * eternity expiration date are simply returned. Last, tasks must not be queued |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 152 | * further than the end of the next tree, which is between <now_ms> and |
| 153 | * <now_ms> + TIMER_SIGN_BIT ms (now+12days..24days in 32bit). |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 154 | */ |
| 155 | struct task *task_queue(struct task *task) |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 156 | { |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 157 | if (unlikely(tv_iseternity(&task->expire))) |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 158 | return task; |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 159 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 160 | task->eb.key = timeval_to_ticks(&task->expire); |
| 161 | #ifdef DEBUG_CHECK_INVALID_EXPIRATION_DATES |
| 162 | if ((task->eb.key - now_ms) & TIMER_SIGN_BIT) |
| 163 | /* we're queuing too far away or in the past (most likely) */ |
| 164 | return task; |
| 165 | #endif |
| 166 | eb32_insert(&timers[ticks_to_tree(task->eb.key)], &task->eb); |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 167 | return task; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 168 | } |
| 169 | |
| 170 | |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 171 | /* |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 172 | * Extract all expired timers from the timer queue, and wakes up all |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 173 | * associated tasks. Returns the date of next event (or eternity). |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 174 | */ |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 175 | void wake_expired_tasks(struct timeval *next) |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 176 | { |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 177 | struct task *task; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 178 | struct eb32_node *eb; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 179 | unsigned int now_tree; |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 180 | unsigned int tree; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 181 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 182 | /* In theory, we should : |
| 183 | * - wake all tasks from the <previous> tree |
| 184 | * - wake all expired tasks from the <current> tree |
| 185 | * - scan <next> trees for next expiration date if not found earlier. |
| 186 | * But we can do all this more easily : we scan all 3 trees before we |
| 187 | * wrap, and wake everything expired from there, then stop on the first |
| 188 | * non-expired entry. |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 189 | */ |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 190 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 191 | now_tree = ticks_to_tree(now_ms); |
| 192 | tree = (now_tree - 1) & TIMER_TREE_MASK; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 193 | do { |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 194 | eb = eb32_first(&timers[tree]); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 195 | while (eb) { |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 196 | task = eb32_entry(eb, struct task, eb); |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 197 | if ((now_ms - eb->key) & TIMER_SIGN_BIT) { |
| 198 | /* note that we don't need this check for the <previous> |
| 199 | * tree, but it's cheaper than duplicating the code. |
| 200 | */ |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 201 | *next = task->expire; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 202 | return; |
| 203 | } |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 204 | |
Willy Tarreau | af754fc | 2008-06-29 19:25:52 +0200 | [diff] [blame] | 205 | /* detach the task from the queue and add the task to the run queue */ |
| 206 | eb = eb32_next(eb); |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 207 | task_wakeup(task); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 208 | } |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 209 | tree = (tree + 1) & TIMER_TREE_MASK; |
| 210 | } while (((tree - now_tree) & TIMER_TREE_MASK) < TIMER_TREES/2); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 211 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 212 | /* We have found no task to expire in any tree */ |
| 213 | tv_eternity(next); |
| 214 | return; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 215 | } |
| 216 | |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 217 | /* The run queue is chronologically sorted in a tree. An insertion counter is |
| 218 | * used to assign a position to each task. This counter may be combined with |
| 219 | * other variables (eg: nice value) to set the final position in the tree. The |
| 220 | * counter may wrap without a problem, of course. We then limit the number of |
| 221 | * tasks processed at once to 1/8 of the number of tasks in the queue, so that |
| 222 | * general latency remains low and so that task positions have a chance to be |
| 223 | * considered. It also reduces the number of trees to be evaluated when no task |
| 224 | * remains. |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 225 | * |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 226 | * Just like with timers, we start with tree[(current - 1)], which holds past |
| 227 | * values, and stop when we reach the middle of the list. In practise, we visit |
| 228 | * 3 out of 4 trees. |
| 229 | * |
| 230 | * The function adjusts <next> if a new event is closer. |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 231 | */ |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 232 | void process_runnable_tasks(struct timeval *next) |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 233 | { |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 234 | struct timeval temp; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 235 | struct task *t; |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 236 | struct eb32_node *eb; |
| 237 | unsigned int tree, stop; |
| 238 | unsigned int max_processed; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 239 | |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 240 | if (!run_queue) |
| 241 | return; |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 242 | |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 243 | max_processed = (run_queue + 7) / 8; |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 244 | |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 245 | tree = ticks_to_tree(rqueue_ticks); |
| 246 | stop = (tree + TIMER_TREES / 2) & TIMER_TREE_MASK; |
| 247 | tree = (tree - 1) & TIMER_TREE_MASK; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 248 | |
Willy Tarreau | 58b458d | 2008-06-29 22:40:23 +0200 | [diff] [blame^] | 249 | do { |
| 250 | eb = eb32_first(&rqueue[tree]); |
| 251 | while (eb) { |
| 252 | t = eb32_entry(eb, struct task, eb); |
| 253 | |
| 254 | /* detach the task from the queue and add the task to the run queue */ |
| 255 | eb = eb32_next(eb); |
| 256 | |
| 257 | run_queue--; |
| 258 | t->state = TASK_IDLE; |
| 259 | eb32_delete(&t->eb); |
| 260 | |
| 261 | t->process(t, &temp); |
| 262 | tv_bound(next, &temp); |
| 263 | |
| 264 | if (!--max_processed) |
| 265 | return; |
| 266 | } |
| 267 | tree = (tree + 1) & TIMER_TREE_MASK; |
| 268 | } while (tree != stop); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 269 | } |
| 270 | |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 271 | /* |
| 272 | * Local variables: |
| 273 | * c-indent-level: 8 |
| 274 | * c-basic-offset: 8 |
| 275 | * End: |
| 276 | */ |