Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Task management functions. |
| 3 | * |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 4 | * Copyright 2000-2008 Willy Tarreau <w@1wt.eu> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | */ |
| 12 | |
Willy Tarreau | 2dd0d47 | 2006-06-29 17:53:05 +0200 | [diff] [blame] | 13 | #include <common/config.h> |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 14 | #include <common/eb32tree.h> |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 15 | #include <common/memory.h> |
Willy Tarreau | 2dd0d47 | 2006-06-29 17:53:05 +0200 | [diff] [blame] | 16 | #include <common/mini-clist.h> |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 17 | #include <common/standard.h> |
Willy Tarreau | a6a6a93 | 2007-04-28 22:40:08 +0200 | [diff] [blame] | 18 | #include <common/time.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 19 | |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 20 | #include <proto/proxy.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 21 | #include <proto/task.h> |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 22 | #include <types/task.h> |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 23 | |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 24 | struct pool_head *pool2_task; |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 25 | |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 26 | void *run_queue = NULL; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 27 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 28 | /* Principle of the wait queue. |
| 29 | * |
| 30 | * We want to be able to tell whether an expiration date is before of after the |
| 31 | * current time <now>. We KNOW that expiration dates are never too far apart, |
| 32 | * because they are already computed by adding integer numbers of milliseconds |
| 33 | * to the current date. |
| 34 | * We also know that almost all dates will be in the future, and that a very |
| 35 | * small part of them will be in the past, they are the ones which have expired |
| 36 | * since last time we checked them. |
| 37 | * |
| 38 | * The current implementation uses a wrapping time cut into 3 ranges : |
| 39 | * - previous : those ones are expired by definition |
| 40 | * - current : some are expired, some are not |
| 41 | * - next : none are expired |
| 42 | * |
| 43 | * We use the higher two bits of the timers expressed in ticks (milliseconds) |
| 44 | * to determine which range a timer is in, compared to <now> : |
| 45 | * |
| 46 | * now previous current next0 next1 |
| 47 | * [31:30] [31:30] [31:30] [31:30] [31:30] |
| 48 | * 00 11 00 01 10 |
| 49 | * 01 00 01 10 11 |
| 50 | * 10 01 10 11 00 |
| 51 | * 11 10 11 00 01 |
| 52 | * |
| 53 | * By definition, <current> is the range containing <now> as well as all timers |
| 54 | * which have the same 2 high bits as <now>, <previous> is the range just |
| 55 | * before, which contains all timers whose high bits equal those of <now> minus |
| 56 | * 1. Last, <next> is composed of the two remaining ranges. |
| 57 | * |
| 58 | * For ease of implementation, the timers will then be stored into 4 queues 0-3 |
| 59 | * determined by the 2 higher bits of the timer. The expiration algorithm is |
| 60 | * very simple : |
| 61 | * - expire everything in <previous>=queue[((now>>30)-1)&3] |
| 62 | * - expire from <current>=queue[(now>>30)&3] everything where timer >= now |
| 63 | * |
| 64 | * With this algorithm, it's possible to queue tasks meant to expire 24.8 days |
| 65 | * in the future, and still be able to detect events remaining unprocessed for |
| 66 | * the last 12.4 days! Note that the principle might be extended to any number |
| 67 | * of higher bits as long as there is only one range for expired tasks. For |
| 68 | * instance, using the 8 higher bits to index the range, we would have one past |
| 69 | * range of 4.6 hours (24 bits in ms), and 254 ranges in the future totalizing |
| 70 | * 49.3 days. This would eat more memory for a very little added benefit. |
| 71 | * |
| 72 | * Also, in order to maintain the ability to perform time comparisons, it is |
| 73 | * recommended to avoid using the <next1> range above, as values in this range |
| 74 | * may not easily be compared to <now> outside of these functions as it is the |
| 75 | * opposite of the <current> range, and <timer>-<now> may randomly be positive |
| 76 | * or negative. That means we're left with +/- 12 days timers. |
| 77 | * |
| 78 | * To keep timers ordered, we use 4 ebtrees [0..3]. To keep computation low, we |
| 79 | * may use (seconds*1024)+milliseconds, which preserves ordering eventhough we |
| 80 | * can't do real computations on it. Future evolutions could make use of 1024th |
| 81 | * of seconds instead of milliseconds, with the special value 0 avoided (and |
| 82 | * replaced with 1), so that zero indicates the timer is not set. |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 83 | */ |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 84 | |
| 85 | #define TIMER_TICK_BITS 32 |
| 86 | #define TIMER_TREE_BITS 2 |
| 87 | #define TIMER_TREES (1 << TIMER_TREE_BITS) |
| 88 | #define TIMER_TREE_SHIFT (TIMER_TICK_BITS - TIMER_TREE_BITS) |
| 89 | #define TIMER_TREE_MASK (TIMER_TREES - 1) |
| 90 | #define TIMER_TICK_MASK ((1U << (TIMER_TICK_BITS-1)) * 2 - 1) |
| 91 | #define TIMER_SIGN_BIT (1 << (TIMER_TICK_BITS - 1)) |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 92 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 93 | static struct eb_root timers[TIMER_TREES]; /* trees with MSB 00, 01, 10 and 11 */ |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 94 | |
| 95 | /* returns an ordered key based on an expiration date. */ |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 96 | static inline unsigned int timeval_to_ticks(const struct timeval *t) |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 97 | { |
| 98 | unsigned int key; |
| 99 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 100 | key = ((unsigned int)t->tv_sec * 1000) + ((unsigned int)t->tv_usec / 1000); |
| 101 | key &= TIMER_TICK_MASK; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 102 | return key; |
| 103 | } |
| 104 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 105 | /* returns a tree number based on a ticks value */ |
| 106 | static inline unsigned int ticks_to_tree(unsigned int ticks) |
| 107 | { |
| 108 | return (ticks >> TIMER_TREE_SHIFT) & TIMER_TREE_MASK; |
| 109 | } |
| 110 | |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 111 | /* returns a tree number based on an expiration date. */ |
| 112 | static inline unsigned int timeval_to_tree(const struct timeval *t) |
| 113 | { |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 114 | return ticks_to_tree(timeval_to_ticks(t)); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 115 | } |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 116 | |
Willy Tarreau | c6ca1a0 | 2007-05-13 19:43:47 +0200 | [diff] [blame] | 117 | /* perform minimal intializations, report 0 in case of error, 1 if OK. */ |
| 118 | int init_task() |
| 119 | { |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 120 | memset(&timers, 0, sizeof(timers)); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 121 | pool2_task = create_pool("task", sizeof(struct task), MEM_F_SHARED); |
| 122 | return pool2_task != NULL; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 123 | } |
| 124 | |
Willy Tarreau | e33aece | 2007-04-30 13:15:14 +0200 | [diff] [blame] | 125 | struct task *_task_wakeup(struct task *t) |
| 126 | { |
| 127 | return __task_wakeup(t); |
| 128 | } |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 129 | |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 130 | /* |
| 131 | * task_queue() |
| 132 | * |
| 133 | * Inserts a task into the wait queue at the position given by its expiration |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 134 | * date. Note that the task must *not* already be in the wait queue nor in the |
| 135 | * run queue, otherwise unpredictable results may happen. Tasks queued with an |
| 136 | * eternity expiration date are simply returned. Last, tasks must not be queued |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 137 | * further than the end of the next tree, which is between <now_ms> and |
| 138 | * <now_ms> + TIMER_SIGN_BIT ms (now+12days..24days in 32bit). |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 139 | */ |
| 140 | struct task *task_queue(struct task *task) |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 141 | { |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 142 | if (unlikely(tv_iseternity(&task->expire))) |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 143 | return task; |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 144 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 145 | task->eb.key = timeval_to_ticks(&task->expire); |
| 146 | #ifdef DEBUG_CHECK_INVALID_EXPIRATION_DATES |
| 147 | if ((task->eb.key - now_ms) & TIMER_SIGN_BIT) |
| 148 | /* we're queuing too far away or in the past (most likely) */ |
| 149 | return task; |
| 150 | #endif |
| 151 | eb32_insert(&timers[ticks_to_tree(task->eb.key)], &task->eb); |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 152 | return task; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 153 | } |
| 154 | |
| 155 | |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 156 | /* |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 157 | * Extract all expired timers from the timer queue, and wakes up all |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 158 | * associated tasks. Returns the date of next event (or eternity). |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 159 | * |
| 160 | */ |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 161 | void wake_expired_tasks(struct timeval *next) |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 162 | { |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 163 | struct task *task; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 164 | struct eb32_node *eb; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 165 | unsigned int now_tree; |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 166 | unsigned int tree; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 167 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 168 | /* In theory, we should : |
| 169 | * - wake all tasks from the <previous> tree |
| 170 | * - wake all expired tasks from the <current> tree |
| 171 | * - scan <next> trees for next expiration date if not found earlier. |
| 172 | * But we can do all this more easily : we scan all 3 trees before we |
| 173 | * wrap, and wake everything expired from there, then stop on the first |
| 174 | * non-expired entry. |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 175 | */ |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 176 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 177 | now_tree = ticks_to_tree(now_ms); |
| 178 | tree = (now_tree - 1) & TIMER_TREE_MASK; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 179 | do { |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 180 | eb = eb32_first(&timers[tree]); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 181 | while (eb) { |
| 182 | struct eb32_node *next_eb; |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 183 | |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 184 | task = eb32_entry(eb, struct task, eb); |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 185 | if ((now_ms - eb->key) & TIMER_SIGN_BIT) { |
| 186 | /* note that we don't need this check for the <previous> |
| 187 | * tree, but it's cheaper than duplicating the code. |
| 188 | */ |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 189 | *next = task->expire; |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 190 | return; |
| 191 | } |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 192 | |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 193 | /* detach the task from the queue */ |
| 194 | next_eb = eb32_next(eb); |
| 195 | eb32_delete(eb); |
| 196 | eb = next_eb; |
| 197 | |
| 198 | /* and add the task to the run queue */ |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 199 | DLIST_ADD(run_queue, &task->qlist); |
| 200 | task->state = TASK_RUNNING; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 201 | } |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 202 | tree = (tree + 1) & TIMER_TREE_MASK; |
| 203 | } while (((tree - now_tree) & TIMER_TREE_MASK) < TIMER_TREES/2); |
Willy Tarreau | 9789f7b | 2008-06-24 08:17:16 +0200 | [diff] [blame] | 204 | |
Willy Tarreau | 28c41a4 | 2008-06-29 17:00:59 +0200 | [diff] [blame] | 205 | /* We have found no task to expire in any tree */ |
| 206 | tv_eternity(next); |
| 207 | return; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 208 | } |
| 209 | |
| 210 | /* |
| 211 | * This does 4 things : |
| 212 | * - wake up all expired tasks |
| 213 | * - call all runnable tasks |
| 214 | * - call maintain_proxies() to enable/disable the listeners |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 215 | * - return the date of next event in <next> or eternity. |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 216 | * |
| 217 | */ |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 218 | void process_runnable_tasks(struct timeval *next) |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 219 | { |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 220 | struct timeval temp; |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 221 | struct task *t; |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 222 | void *queue; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 223 | |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 224 | wake_expired_tasks(next); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 225 | /* process each task in the run queue now. Each task may be deleted |
| 226 | * since we only use the run queue's head. Note that any task can be |
| 227 | * woken up by any other task and it will be processed immediately |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 228 | * after as it will be queued on the run queue's head ! |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 229 | */ |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 230 | |
| 231 | queue = run_queue; |
| 232 | foreach_dlist_item(t, queue, struct task *, qlist) { |
Willy Tarreau | 96bcfd7 | 2007-04-29 10:41:56 +0200 | [diff] [blame] | 233 | DLIST_DEL(&t->qlist); |
| 234 | t->qlist.p = NULL; |
| 235 | |
| 236 | t->state = TASK_IDLE; |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 237 | t->process(t, &temp); |
| 238 | tv_bound(next, &temp); |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 239 | } |
Willy Tarreau | 964c936 | 2007-01-07 00:38:00 +0100 | [diff] [blame] | 240 | |
| 241 | /* maintain all proxies in a consistent state. This should quickly |
| 242 | * become a task because it becomes expensive when there are huge |
| 243 | * numbers of proxies. */ |
Willy Tarreau | d825eef | 2007-05-12 22:35:00 +0200 | [diff] [blame] | 244 | maintain_proxies(&temp); |
| 245 | tv_bound(next, &temp); |
| 246 | return; |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 247 | } |
| 248 | |
Willy Tarreau | baaee00 | 2006-06-26 02:48:02 +0200 | [diff] [blame] | 249 | /* |
| 250 | * Local variables: |
| 251 | * c-indent-level: 8 |
| 252 | * c-basic-offset: 8 |
| 253 | * End: |
| 254 | */ |