blob: 6d3aa9a12bcd6078d1b5a76969da4104a6adb1bd [file] [log] [blame]
Willy Tarreaubaaee002006-06-26 02:48:02 +02001/*
2 * Queue management functions.
3 *
Willy Tarreauac68c5d2009-10-04 23:12:44 +02004 * Copyright 2000-2009 Willy Tarreau <w@1wt.eu>
Willy Tarreaubaaee002006-06-26 02:48:02 +02005 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020013/* Short explanation on the locking, which is far from being trivial : a
14 * pendconn is a list element which necessarily is associated with an existing
15 * stream. It has pendconn->strm always valid. A pendconn may only be in one of
16 * these three states :
17 * - unlinked : in this case it is an empty list head ;
18 * - linked into the server's queue ;
19 * - linked into the proxy's queue.
20 *
21 * A stream does not necessarily have such a pendconn. Thus the pendconn is
22 * designated by the stream->pend_pos pointer. This results in some properties :
23 * - pendconn->strm->pend_pos is never NULL for any valid pendconn
Patrick Hemmer0355dab2018-05-11 12:52:31 -040024 * - if p->node.node.leaf_p is NULL, the element is unlinked,
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020025 * otherwise it necessarily belongs to one of the other lists ; this may
26 * not be atomically checked under threads though ;
27 * - pendconn->px is never NULL if pendconn->list is not empty
Willy Tarreau88930dd2018-07-26 07:38:54 +020028 * - pendconn->srv is never NULL if pendconn->list is in the server's queue,
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020029 * and is always NULL if pendconn->list is in the backend's queue or empty.
Willy Tarreau88930dd2018-07-26 07:38:54 +020030 * - pendconn->target is NULL while the element is queued, and points to the
31 * assigned server when the pendconn is picked.
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020032 *
33 * Threads complicate the design a little bit but rules remain simple :
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020034 * - the server's queue lock must be held at least when manipulating the
35 * server's queue, which is when adding a pendconn to the queue and when
36 * removing a pendconn from the queue. It protects the queue's integrity.
37 *
38 * - the proxy's queue lock must be held at least when manipulating the
39 * proxy's queue, which is when adding a pendconn to the queue and when
40 * removing a pendconn from the queue. It protects the queue's integrity.
41 *
Willy Tarreau3201e4e2018-07-26 08:23:24 +020042 * - both locks are compatible and may be held at the same time.
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020043 *
44 * - a pendconn_add() is only performed by the stream which will own the
45 * pendconn ; the pendconn is allocated at this moment and returned ; it is
46 * added to either the server or the proxy's queue while holding this
Willy Tarreau49801602020-06-04 22:50:02 +020047s * queue's lock.
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020048 *
49 * - the pendconn is then met by a thread walking over the proxy or server's
50 * queue with the respective lock held. This lock is exclusive and the
51 * pendconn can only appear in one queue so by definition a single thread
52 * may find this pendconn at a time.
53 *
54 * - the pendconn is unlinked either by its own stream upon success/abort/
55 * free, or by another one offering it its server slot. This is achieved by
56 * pendconn_process_next_strm() under either the server or proxy's lock,
57 * pendconn_redistribute() under the server's lock, pendconn_grab_from_px()
58 * under the proxy's lock, or pendconn_unlink() under either the proxy's or
59 * the server's lock depending on the queue the pendconn is attached to.
60 *
61 * - no single operation except the pendconn initialisation prior to the
Willy Tarreau3201e4e2018-07-26 08:23:24 +020062 * insertion are performed without eithre a queue lock held or the element
63 * being unlinked and visible exclusively to its stream.
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020064 *
Willy Tarreau88930dd2018-07-26 07:38:54 +020065 * - pendconn_grab_from_px() and pendconn_process_next_strm() assign ->target
66 * so that the stream knows what server to work with (via
67 * pendconn_dequeue() which sets it on strm->target).
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020068 *
69 * - a pendconn doesn't switch between queues, it stays where it is.
Willy Tarreau6bdd05c2018-07-25 15:21:00 +020070 */
71
Willy Tarreaudfd3de82020-06-04 23:46:14 +020072#include <import/eb32tree.h>
Willy Tarreau4c7e4b72020-05-27 12:58:42 +020073#include <haproxy/api.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020074#include <haproxy/backend.h>
Willy Tarreauc761f842020-06-04 11:40:28 +020075#include <haproxy/http_rules.h>
Willy Tarreaud0ef4392020-06-02 09:38:52 +020076#include <haproxy/pool.h>
Willy Tarreaua55c4542020-06-04 22:59:39 +020077#include <haproxy/queue.h>
Willy Tarreaue6ce10b2020-06-04 15:33:47 +020078#include <haproxy/sample.h>
Willy Tarreau1e56f922020-06-04 23:20:13 +020079#include <haproxy/server-t.h>
Willy Tarreaudfd3de82020-06-04 23:46:14 +020080#include <haproxy/stream.h>
Willy Tarreau5e539c92020-06-04 20:45:39 +020081#include <haproxy/stream_interface.h>
Willy Tarreaucea0e1b2020-06-04 17:25:40 +020082#include <haproxy/task.h>
Willy Tarreau8b550af2020-06-04 17:42:48 +020083#include <haproxy/tcp_rules.h>
Willy Tarreau3f567e42020-05-28 15:29:19 +020084#include <haproxy/thread.h>
Willy Tarreaub2551052020-06-09 09:07:15 +020085#include <haproxy/time.h>
Willy Tarreauc1a689f2021-05-08 13:59:05 +020086#include <haproxy/tools.h>
Willy Tarreaubaaee002006-06-26 02:48:02 +020087
88
Patrick Hemmer248cb4c2018-05-11 12:52:31 -040089#define NOW_OFFSET_BOUNDARY() ((now_ms - (TIMER_LOOK_BACK >> 12)) & 0xfffff)
90#define KEY_CLASS(key) ((u32)key & 0xfff00000)
91#define KEY_OFFSET(key) ((u32)key & 0x000fffff)
92#define KEY_CLASS_OFFSET_BOUNDARY(key) (KEY_CLASS(key) | NOW_OFFSET_BOUNDARY())
93#define MAKE_KEY(class, offset) (((u32)(class + 0x7ff) << 20) | ((u32)(now_ms + offset) & 0xfffff))
94
Willy Tarreau8ceae722018-11-26 11:58:30 +010095DECLARE_POOL(pool_head_pendconn, "pendconn", sizeof(struct pendconn));
Willy Tarreaubaaee002006-06-26 02:48:02 +020096
97/* returns the effective dynamic maxconn for a server, considering the minconn
Willy Tarreau86034312006-12-29 00:10:33 +010098 * and the proxy's usage relative to its dynamic connections limit. It is
Willy Tarreau9909fc12007-11-30 17:42:05 +010099 * expected that 0 < s->minconn <= s->maxconn when this is called. If the
100 * server is currently warming up, the slowstart is also applied to the
101 * resulting value, which can be lower than minconn in this case, but never
102 * less than 1.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200103 */
Willy Tarreaub17916e2006-10-15 15:17:57 +0200104unsigned int srv_dynamic_maxconn(const struct server *s)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200105{
Willy Tarreau9909fc12007-11-30 17:42:05 +0100106 unsigned int max;
107
Willy Tarreau86034312006-12-29 00:10:33 +0100108 if (s->proxy->beconn >= s->proxy->fullconn)
109 /* no fullconn or proxy is full */
Willy Tarreau9909fc12007-11-30 17:42:05 +0100110 max = s->maxconn;
111 else if (s->minconn == s->maxconn)
Willy Tarreau86034312006-12-29 00:10:33 +0100112 /* static limit */
Willy Tarreau9909fc12007-11-30 17:42:05 +0100113 max = s->maxconn;
114 else max = MAX(s->minconn,
115 s->proxy->beconn * s->maxconn / s->proxy->fullconn);
Willy Tarreau86034312006-12-29 00:10:33 +0100116
Emeric Brun52a91d32017-08-31 14:41:55 +0200117 if ((s->cur_state == SRV_ST_STARTING) &&
Willy Tarreau9909fc12007-11-30 17:42:05 +0100118 now.tv_sec < s->last_change + s->slowstart &&
119 now.tv_sec >= s->last_change) {
120 unsigned int ratio;
Willy Tarreau28a9e522008-09-14 17:43:27 +0200121 ratio = 100 * (now.tv_sec - s->last_change) / s->slowstart;
122 max = MAX(1, max * ratio / 100);
Willy Tarreau9909fc12007-11-30 17:42:05 +0100123 }
124 return max;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200125}
126
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200127/* Remove the pendconn from the server's queue. At this stage, the connection
Willy Tarreau96bca332020-10-21 12:01:28 +0200128 * is not really dequeued. It will be done during the process_stream. It is
129 * up to the caller to atomically decrement the pending counts.
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100130 *
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200131 * The caller must own the lock on the server queue. The pendconn must still be
132 * queued (p->node.leaf_p != NULL) and must be in a server (p->srv != NULL).
Christopher Fauletf3a55db2017-06-09 14:26:38 +0200133 */
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200134static void __pendconn_unlink_srv(struct pendconn *p)
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100135{
Willy Tarreau51c63f02021-06-23 16:43:45 +0200136 p->strm->logs.srv_queue_pos += _HA_ATOMIC_LOAD(&p->queue->idx) - p->queue_idx;
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200137 eb32_delete(&p->node);
138}
139
140/* Remove the pendconn from the proxy's queue. At this stage, the connection
Willy Tarreau96bca332020-10-21 12:01:28 +0200141 * is not really dequeued. It will be done during the process_stream. It is
142 * up to the caller to atomically decrement the pending counts.
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200143 *
144 * The caller must own the lock on the proxy queue. The pendconn must still be
145 * queued (p->node.leaf_p != NULL) and must be in the proxy (p->srv == NULL).
146 */
147static void __pendconn_unlink_prx(struct pendconn *p)
148{
Willy Tarreau51c63f02021-06-23 16:43:45 +0200149 p->strm->logs.prx_queue_pos += _HA_ATOMIC_LOAD(&p->queue->idx) - p->queue_idx;
Patrick Hemmer0355dab2018-05-11 12:52:31 -0400150 eb32_delete(&p->node);
Christopher Fauletf3a55db2017-06-09 14:26:38 +0200151}
152
Willy Tarreau7c6f8a22018-07-26 08:03:14 +0200153/* Locks the queue the pendconn element belongs to. This relies on both p->px
154 * and p->srv to be properly initialized (which is always the case once the
155 * element has been added).
156 */
157static inline void pendconn_queue_lock(struct pendconn *p)
158{
Willy Tarreau51c63f02021-06-23 16:43:45 +0200159 HA_SPIN_LOCK(QUEUE_LOCK, &p->queue->lock);
Willy Tarreau7c6f8a22018-07-26 08:03:14 +0200160}
161
162/* Unlocks the queue the pendconn element belongs to. This relies on both p->px
163 * and p->srv to be properly initialized (which is always the case once the
164 * element has been added).
165 */
166static inline void pendconn_queue_unlock(struct pendconn *p)
167{
Willy Tarreau51c63f02021-06-23 16:43:45 +0200168 HA_SPIN_UNLOCK(QUEUE_LOCK, &p->queue->lock);
Willy Tarreau7c6f8a22018-07-26 08:03:14 +0200169}
170
Willy Tarreau9624fae2018-07-25 08:04:20 +0200171/* Removes the pendconn from the server/proxy queue. At this stage, the
172 * connection is not really dequeued. It will be done during process_stream().
Willy Tarreau9ada0302019-11-14 14:58:39 +0100173 * This function takes all the required locks for the operation. The pendconn
174 * must be valid, though it doesn't matter if it was already unlinked. Prefer
Willy Tarreaud03adce2021-06-23 16:54:16 +0200175 * pendconn_cond_unlink() to first check <p>.
Willy Tarreau9624fae2018-07-25 08:04:20 +0200176 */
177void pendconn_unlink(struct pendconn *p)
178{
Willy Tarreau51c63f02021-06-23 16:43:45 +0200179 struct queue *q = p->queue;
180 struct proxy *px = q->px;
181 struct server *sv = q->sv;
Willy Tarreaud03adce2021-06-23 16:54:16 +0200182 uint oldidx;
183 int done = 0;
Willy Tarreau96bca332020-10-21 12:01:28 +0200184
Willy Tarreaud03adce2021-06-23 16:54:16 +0200185 oldidx = _HA_ATOMIC_LOAD(&p->queue->idx);
186 HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
187 if (p->node.node.leaf_p) {
188 eb32_delete(&p->node);
189 done = 1;
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200190 }
Willy Tarreaud03adce2021-06-23 16:54:16 +0200191 HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
192
193 if (done) {
194 oldidx -= p->queue_idx;
195 if (sv)
196 p->strm->logs.srv_queue_pos += oldidx;
197 else
198 p->strm->logs.prx_queue_pos += oldidx;
199
200 _HA_ATOMIC_DEC(&q->length);
201 _HA_ATOMIC_DEC(&px->totpend);
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200202 }
Willy Tarreau9624fae2018-07-25 08:04:20 +0200203}
204
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200205/* Retrieve the first pendconn from tree <pendconns>. Classes are always
206 * considered first, then the time offset. The time does wrap, so the
207 * lookup is performed twice, one to retrieve the first class and a second
208 * time to retrieve the earliest time in this class.
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400209 */
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200210static struct pendconn *pendconn_first(struct eb_root *pendconns)
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400211{
212 struct eb32_node *node, *node2 = NULL;
213 u32 key;
214
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200215 node = eb32_first(pendconns);
216 if (!node)
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400217 return NULL;
218
219 key = KEY_CLASS_OFFSET_BOUNDARY(node->key);
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200220 node2 = eb32_lookup_ge(pendconns, key);
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400221
222 if (!node2 ||
223 KEY_CLASS(node2->key) != KEY_CLASS(node->key)) {
224 /* no other key in the tree, or in this class */
225 return eb32_entry(node, struct pendconn, node);
226 }
227
228 /* found a better key */
229 return eb32_entry(node2, struct pendconn, node);
230}
231
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100232/* Process the next pending connection from either a server or a proxy, and
Christopher Fauletfd83f0b2018-03-19 15:22:09 +0100233 * returns a strictly positive value on success (see below). If no pending
234 * connection is found, 0 is returned. Note that neither <srv> nor <px> may be
235 * NULL. Priority is given to the oldest request in the queue if both <srv> and
236 * <px> have pending requests. This ensures that no request will be left
237 * unserved. The <px> queue is not considered if the server (or a tracked
238 * server) is not RUNNING, is disabled, or has a null weight (server going
239 * down). The <srv> queue is still considered in this case, because if some
240 * connections remain there, it means that some requests have been forced there
241 * after it was seen down (eg: due to option persist). The stream is
242 * immediately marked as "assigned", and both its <srv> and <srv_conn> are set
243 * to <srv>.
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100244 *
Willy Tarreaua0e9c552021-06-18 19:45:17 +0200245 * The proxy's queue will be consulted only if px_ok is non-zero.
246 *
Willy Tarreau5343d8e2021-06-24 07:22:03 +0200247 * This function must only be called if the server queue _AND_ the proxy queue
Willy Tarreaua0e9c552021-06-18 19:45:17 +0200248 * are locked (if px_ok is set). Today it is only called by process_srv_queue.
249 * When a pending connection is dequeued, this function returns 1 if a pendconn
250 * is dequeued, otherwise 0.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200251 */
Willy Tarreaua0e9c552021-06-18 19:45:17 +0200252static int pendconn_process_next_strm(struct server *srv, struct proxy *px, int px_ok)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200253{
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100254 struct pendconn *p = NULL;
Patrick Hemmerda282f42018-05-11 12:52:31 -0400255 struct pendconn *pp = NULL;
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400256 u32 pkey, ppkey;
Willy Tarreaud132f742010-08-06 10:08:23 +0200257
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200258 p = NULL;
Willy Tarreau90a160a2021-06-24 07:21:59 +0200259 if (srv->queue.length)
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200260 p = pendconn_first(&srv->queue.head);
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200261
262 pp = NULL;
Willy Tarreau49667c12021-06-24 08:04:24 +0200263 if (px_ok && px->queue.length) {
264 /* the lock only remains held as long as the pp is
265 * in the proxy's queue.
266 */
Willy Tarreau47ee44f2021-06-24 16:00:18 +0200267 HA_SPIN_LOCK(QUEUE_LOCK, &px->queue.lock);
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200268 pp = pendconn_first(&px->queue.head);
Willy Tarreau49667c12021-06-24 08:04:24 +0200269 if (!pp)
Willy Tarreau47ee44f2021-06-24 16:00:18 +0200270 HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
Willy Tarreau49667c12021-06-24 08:04:24 +0200271 }
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100272
Willy Tarreau5343d8e2021-06-24 07:22:03 +0200273 if (!p && !pp)
Willy Tarreaua48905b2021-06-24 07:27:01 +0200274 return 0;
Christopher Fauletcd7126b2021-02-11 11:13:33 +0100275 else if (!pp)
276 goto use_p; /* p != NULL */
277 else if (!p)
278 goto use_pp; /* pp != NULL */
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100279
Christopher Fauletcd7126b2021-02-11 11:13:33 +0100280 /* p != NULL && pp != NULL*/
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100281
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400282 if (KEY_CLASS(p->node.key) < KEY_CLASS(pp->node.key))
283 goto use_p;
284
285 if (KEY_CLASS(pp->node.key) < KEY_CLASS(p->node.key))
286 goto use_pp;
287
288 pkey = KEY_OFFSET(p->node.key);
289 ppkey = KEY_OFFSET(pp->node.key);
290
291 if (pkey < NOW_OFFSET_BOUNDARY())
292 pkey += 0x100000; // key in the future
293
294 if (ppkey < NOW_OFFSET_BOUNDARY())
295 ppkey += 0x100000; // key in the future
296
297 if (pkey <= ppkey)
298 goto use_p;
299
300 use_pp:
301 /* Let's switch from the server pendconn to the proxy pendconn */
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200302 __pendconn_unlink_prx(pp);
Willy Tarreau47ee44f2021-06-24 16:00:18 +0200303 HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
Willy Tarreau7f3c1df2021-06-18 09:22:21 +0200304 _HA_ATOMIC_DEC(&px->queue.length);
Willy Tarreau98c89102021-06-18 10:51:58 +0200305 _HA_ATOMIC_INC(&px->queue.idx);
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400306 p = pp;
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200307 goto unlinked;
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400308 use_p:
Willy Tarreau49667c12021-06-24 08:04:24 +0200309 if (pp)
Willy Tarreau47ee44f2021-06-24 16:00:18 +0200310 HA_SPIN_UNLOCK(QUEUE_LOCK, &px->queue.lock);
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200311 __pendconn_unlink_srv(p);
Willy Tarreaua0570452021-06-18 09:30:30 +0200312 _HA_ATOMIC_DEC(&srv->queue.length);
Willy Tarreau98c89102021-06-18 10:51:58 +0200313 _HA_ATOMIC_INC(&srv->queue.idx);
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200314 unlinked:
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100315 p->strm_flags |= SF_ASSIGNED;
Willy Tarreau88930dd2018-07-26 07:38:54 +0200316 p->target = srv;
Willy Tarreaua48905b2021-06-24 07:27:01 +0200317
Willy Tarreaua48905b2021-06-24 07:27:01 +0200318 stream_add_srv_conn(p->strm, srv);
319
320 task_wakeup(p->strm->task, TASK_WOKEN_RES);
321
322 return 1;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200323}
324
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100325/* Manages a server's connection queue. This function will try to dequeue as
Willy Tarreau9ab78292021-06-22 18:47:51 +0200326 * many pending streams as possible, and wake them up.
Christopher Faulet87566c92017-06-06 10:34:51 +0200327 */
Willy Tarreau9ab78292021-06-22 18:47:51 +0200328void process_srv_queue(struct server *s)
Christopher Faulet87566c92017-06-06 10:34:51 +0200329{
Willy Tarreaua0e9c552021-06-18 19:45:17 +0200330 struct server *ref = s->track ? s->track : s;
Christopher Faulet87566c92017-06-06 10:34:51 +0200331 struct proxy *p = s->proxy;
Olivier Houchardecfe6732018-07-26 18:47:27 +0200332 int maxconn;
Willy Tarreau19c55812021-06-24 15:51:12 +0200333 int stop = 0;
Willy Tarreau9cef43a2021-06-24 07:47:08 +0200334 int done = 0;
Willy Tarreaua0e9c552021-06-18 19:45:17 +0200335 int px_ok;
336
337 /* if a server is not usable or backup and must not be used
338 * to dequeue backend requests.
339 */
340 px_ok = srv_currently_usable(ref) &&
341 (!(s->flags & SRV_F_BACKUP) ||
342 (!p->srv_act &&
343 (s == p->lbprm.fbck || (p->options & PR_O_USE_ALL_BK))));
Christopher Faulet87566c92017-06-06 10:34:51 +0200344
Willy Tarreauae0b12e2021-06-24 08:30:07 +0200345 /* let's repeat that under the lock on each round. Threads competing
346 * for the same server will give up, knowing that at least one of
347 * them will check the conditions again before quitting.
348 */
Willy Tarreau19c55812021-06-24 15:51:12 +0200349 while (!stop && s->served < (maxconn = srv_dynamic_maxconn(s))) {
Willy Tarreau47ee44f2021-06-24 16:00:18 +0200350 if (HA_SPIN_TRYLOCK(QUEUE_LOCK, &s->queue.lock) != 0)
Christopher Faulet87566c92017-06-06 10:34:51 +0200351 break;
Willy Tarreauae0b12e2021-06-24 08:30:07 +0200352
353 while (s->served < maxconn) {
Willy Tarreau19c55812021-06-24 15:51:12 +0200354 stop = !pendconn_process_next_strm(s, p, px_ok);
355 if (stop)
Willy Tarreauae0b12e2021-06-24 08:30:07 +0200356 break;
357 _HA_ATOMIC_INC(&s->served);
358 done++;
359 }
Willy Tarreau47ee44f2021-06-24 16:00:18 +0200360 HA_SPIN_UNLOCK(QUEUE_LOCK, &s->queue.lock);
Christopher Faulet87566c92017-06-06 10:34:51 +0200361 }
Willy Tarreau9cef43a2021-06-24 07:47:08 +0200362
363 if (done) {
364 _HA_ATOMIC_SUB(&p->totpend, done);
365 _HA_ATOMIC_ADD(&p->served, done);
366 __ha_barrier_atomic_store();
367 if (p->lbprm.server_take_conn)
368 p->lbprm.server_take_conn(s);
369 }
Christopher Faulet87566c92017-06-06 10:34:51 +0200370}
371
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400372/* Adds the stream <strm> to the pending connection queue of server <strm>->srv
Willy Tarreau87b09662015-04-03 00:22:06 +0200373 * or to the one of <strm>->proxy if srv is NULL. All counters and back pointers
Willy Tarreaubaaee002006-06-26 02:48:02 +0200374 * are updated accordingly. Returns NULL if no memory is available, otherwise the
Willy Tarreau87b09662015-04-03 00:22:06 +0200375 * pendconn itself. If the stream was already marked as served, its flag is
376 * cleared. It is illegal to call this function with a non-NULL strm->srv_conn.
Patrick Hemmerda282f42018-05-11 12:52:31 -0400377 * The stream's queue position is counted with an offset of -1 because we want
378 * to make sure that being at the first position in the queue reports 1.
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100379 *
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400380 * The queue is sorted by the composition of the priority_class, and the current
381 * timestamp offset by strm->priority_offset. The timestamp is in milliseconds
382 * and truncated to 20 bits, so will wrap every 17m28s575ms.
383 * The offset can be positive or negative, and an offset of 0 puts it in the
384 * middle of this range (~ 8 min). Note that this also means if the adjusted
385 * timestamp wraps around, the request will be misinterpreted as being of
Joseph Herlantd8499ec2018-11-25 11:26:48 -0800386 * the highest priority for that priority class.
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400387 *
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100388 * This function must be called by the stream itself, so in the context of
389 * process_stream.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200390 */
Willy Tarreau87b09662015-04-03 00:22:06 +0200391struct pendconn *pendconn_add(struct stream *strm)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200392{
393 struct pendconn *p;
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100394 struct proxy *px;
395 struct server *srv;
Willy Tarreau12529c02021-06-18 10:21:20 +0200396 struct queue *q;
397 unsigned int *max_ptr;
398 unsigned int old_max, new_max;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200399
Willy Tarreaubafbe012017-11-24 17:34:44 +0100400 p = pool_alloc(pool_head_pendconn);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200401 if (!p)
402 return NULL;
403
Willy Tarreau88930dd2018-07-26 07:38:54 +0200404 p->target = NULL;
Patrick Hemmer248cb4c2018-05-11 12:52:31 -0400405 p->node.key = MAKE_KEY(strm->priority_class, strm->priority_offset);
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100406 p->strm = strm;
407 p->strm_flags = strm->flags;
Willy Tarreau901972e2021-06-18 10:33:47 +0200408 strm->pend_pos = p;
Willy Tarreau7c669d72008-06-20 15:04:11 +0200409
Willy Tarreau51c63f02021-06-23 16:43:45 +0200410 px = strm->be;
411 if (strm->flags & SF_ASSIGNED)
412 srv = objt_server(strm->target);
413 else
414 srv = NULL;
415
Willy Tarreau7c6f8a22018-07-26 08:03:14 +0200416 if (srv) {
Willy Tarreau12529c02021-06-18 10:21:20 +0200417 q = &srv->queue;
418 max_ptr = &srv->counters.nbpend_max;
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100419 }
420 else {
Willy Tarreau12529c02021-06-18 10:21:20 +0200421 q = &px->queue;
422 max_ptr = &px->be_counters.nbpend_max;
423 }
Willy Tarreau3eecdb62021-06-18 10:21:20 +0200424
Willy Tarreau84290972021-06-23 16:33:52 +0200425 p->queue = q;
Willy Tarreau98c89102021-06-18 10:51:58 +0200426 p->queue_idx = _HA_ATOMIC_LOAD(&q->idx) - 1; // for logging only
Willy Tarreau12529c02021-06-18 10:21:20 +0200427 new_max = _HA_ATOMIC_ADD_FETCH(&q->length, 1);
428 old_max = _HA_ATOMIC_LOAD(max_ptr);
429 while (new_max > old_max) {
430 if (likely(_HA_ATOMIC_CAS(max_ptr, &old_max, new_max)))
431 break;
Willy Tarreau58f4dfb2021-06-24 07:22:15 +0200432 }
Willy Tarreau12529c02021-06-18 10:21:20 +0200433 __ha_barrier_atomic_store();
434
435 HA_SPIN_LOCK(QUEUE_LOCK, &q->lock);
Willy Tarreau12529c02021-06-18 10:21:20 +0200436 eb32_insert(&q->head, &p->node);
437 HA_SPIN_UNLOCK(QUEUE_LOCK, &q->lock);
Willy Tarreau7c6f8a22018-07-26 08:03:14 +0200438
Willy Tarreau4781b152021-04-06 13:53:36 +0200439 _HA_ATOMIC_INC(&px->totpend);
Willy Tarreaubaaee002006-06-26 02:48:02 +0200440 return p;
441}
442
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200443/* Redistribute pending connections when a server goes down. The number of
Willy Tarreau16fbdda2021-06-18 09:45:27 +0200444 * connections redistributed is returned. It will take the server queue lock
445 * and does not use nor depend on other locks.
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200446 */
447int pendconn_redistribute(struct server *s)
448{
Patrick Hemmer0355dab2018-05-11 12:52:31 -0400449 struct pendconn *p;
Willy Tarreaubff005a2019-05-27 08:10:11 +0200450 struct eb32_node *node, *nodeb;
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200451 int xferred = 0;
452
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100453 /* The REDISP option was specified. We will ignore cookie and force to
454 * balance or use the dispatcher. */
455 if ((s->proxy->options & (PR_O_REDISP|PR_O_PERSIST)) != PR_O_REDISP)
456 return 0;
457
Willy Tarreau47ee44f2021-06-24 16:00:18 +0200458 HA_SPIN_LOCK(QUEUE_LOCK, &s->queue.lock);
Willy Tarreaua0570452021-06-18 09:30:30 +0200459 for (node = eb32_first(&s->queue.head); node; node = nodeb) {
Willy Tarreaubff005a2019-05-27 08:10:11 +0200460 nodeb = eb32_next(node);
461
462 p = eb32_entry(node, struct pendconn, node);
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100463 if (p->strm_flags & SF_FORCE_PRST)
464 continue;
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200465
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100466 /* it's left to the dispatcher to choose a server */
Willy Tarreau3e3ae252020-10-21 11:20:07 +0200467 __pendconn_unlink_srv(p);
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100468 p->strm_flags &= ~(SF_DIRECT | SF_ASSIGNED | SF_ADDR_SET);
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200469
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100470 task_wakeup(p->strm->task, TASK_WOKEN_RES);
Willy Tarreauef71f012020-10-21 11:54:38 +0200471 xferred++;
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200472 }
Willy Tarreau47ee44f2021-06-24 16:00:18 +0200473 HA_SPIN_UNLOCK(QUEUE_LOCK, &s->queue.lock);
Willy Tarreau16fbdda2021-06-18 09:45:27 +0200474
Willy Tarreau96bca332020-10-21 12:01:28 +0200475 if (xferred) {
Willy Tarreaua0570452021-06-18 09:30:30 +0200476 _HA_ATOMIC_SUB(&s->queue.length, xferred);
Willy Tarreau5472aa52020-10-24 12:57:41 +0200477 _HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
Willy Tarreau96bca332020-10-21 12:01:28 +0200478 }
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200479 return xferred;
480}
481
482/* Check for pending connections at the backend, and assign some of them to
483 * the server coming up. The server's weight is checked before being assigned
484 * connections it may not be able to handle. The total number of transferred
Willy Tarreau16fbdda2021-06-18 09:45:27 +0200485 * connections is returned. It will take the proxy's queue lock and will not
486 * use nor depend on other locks.
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200487 */
488int pendconn_grab_from_px(struct server *s)
489{
Patrick Hemmer0355dab2018-05-11 12:52:31 -0400490 struct pendconn *p;
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100491 int maxconn, xferred = 0;
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200492
Emeric Brun52a91d32017-08-31 14:41:55 +0200493 if (!srv_currently_usable(s))
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200494 return 0;
495
Willy Tarreaua8694652018-08-07 10:44:58 +0200496 /* if this is a backup server and there are active servers or at
497 * least another backup server was elected, then this one must
498 * not dequeue requests from the proxy.
499 */
500 if ((s->flags & SRV_F_BACKUP) &&
501 (s->proxy->srv_act ||
502 ((s != s->proxy->lbprm.fbck) && !(s->proxy->options & PR_O_USE_ALL_BK))))
503 return 0;
504
Willy Tarreau16fbdda2021-06-18 09:45:27 +0200505 HA_SPIN_LOCK(QUEUE_LOCK, &s->proxy->queue.lock);
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100506 maxconn = srv_dynamic_maxconn(s);
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200507 while ((p = pendconn_first(&s->proxy->queue.head))) {
508 if (s->maxconn && s->served + xferred >= maxconn)
509 break;
Willy Tarreau772e9682021-06-18 20:32:50 +0200510
Willy Tarreau2bf3f2c2021-06-24 07:20:26 +0200511 __pendconn_unlink_prx(p);
Willy Tarreau88930dd2018-07-26 07:38:54 +0200512 p->target = s;
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100513
514 task_wakeup(p->strm->task, TASK_WOKEN_RES);
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100515 xferred++;
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200516 }
Willy Tarreau16fbdda2021-06-18 09:45:27 +0200517 HA_SPIN_UNLOCK(QUEUE_LOCK, &s->proxy->queue.lock);
Willy Tarreau96bca332020-10-21 12:01:28 +0200518 if (xferred) {
Willy Tarreau7f3c1df2021-06-18 09:22:21 +0200519 _HA_ATOMIC_SUB(&s->proxy->queue.length, xferred);
Willy Tarreau5472aa52020-10-24 12:57:41 +0200520 _HA_ATOMIC_SUB(&s->proxy->totpend, xferred);
Willy Tarreau96bca332020-10-21 12:01:28 +0200521 }
Willy Tarreau4aac7db2014-05-16 11:48:10 +0200522 return xferred;
523}
524
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100525/* Try to dequeue pending connection attached to the stream <strm>. It must
526 * always exists here. If the pendconn is still linked to the server or the
527 * proxy queue, nothing is done and the function returns 1. Otherwise,
528 * <strm>->flags and <strm>->target are updated, the pendconn is released and 0
529 * is returned.
530 *
531 * This function must be called by the stream itself, so in the context of
532 * process_stream.
Willy Tarreaubaaee002006-06-26 02:48:02 +0200533 */
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100534int pendconn_dequeue(struct stream *strm)
Willy Tarreaubaaee002006-06-26 02:48:02 +0200535{
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100536 struct pendconn *p;
Willy Tarreau3201e4e2018-07-26 08:23:24 +0200537 int is_unlinked;
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100538
539 if (unlikely(!strm->pend_pos)) {
540 /* unexpected case because it is called by the stream itself and
541 * only the stream can release a pendconn. So it is only
542 * possible if a pendconn is released by someone else or if the
543 * stream is supposed to be queued but without its associated
544 * pendconn. In both cases it is a bug! */
545 abort();
Christopher Faulet8ba59142017-06-27 15:43:53 +0200546 }
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100547 p = strm->pend_pos;
Willy Tarreau3201e4e2018-07-26 08:23:24 +0200548
549 /* note below : we need to grab the queue's lock to check for emptiness
550 * because we don't want a partial _grab_from_px() or _redistribute()
551 * to be called in parallel and show an empty list without having the
552 * time to finish. With this we know that if we see the element
553 * unlinked, these functions were completely done.
554 */
555 pendconn_queue_lock(p);
Patrick Hemmer0355dab2018-05-11 12:52:31 -0400556 is_unlinked = !p->node.node.leaf_p;
Willy Tarreau3201e4e2018-07-26 08:23:24 +0200557 pendconn_queue_unlock(p);
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100558
Willy Tarreau3201e4e2018-07-26 08:23:24 +0200559 if (!is_unlinked)
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100560 return 1;
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100561
Willy Tarreau3201e4e2018-07-26 08:23:24 +0200562 /* the pendconn is not queued anymore and will not be so we're safe
563 * to proceed.
564 */
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100565 strm->flags &= ~(SF_DIRECT | SF_ASSIGNED | SF_ADDR_SET);
566 strm->flags |= p->strm_flags & (SF_DIRECT | SF_ASSIGNED | SF_ADDR_SET);
Willy Tarreau7867ceb2021-06-16 08:42:23 +0200567
568 if (p->target) {
569 /* a server picked this pendconn, it must skip LB */
570 strm->target = &p->target->obj_type;
571 strm->flags |= SF_ASSIGNED;
572 }
573
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100574 strm->pend_pos = NULL;
Willy Tarreaubafbe012017-11-24 17:34:44 +0100575 pool_free(pool_head_pendconn, p);
Christopher Faulet5cd4bbd2018-03-14 16:18:06 +0100576 return 0;
Willy Tarreaubaaee002006-06-26 02:48:02 +0200577}
578
Patrick Hemmer268a7072018-05-11 12:52:31 -0400579static enum act_return action_set_priority_class(struct act_rule *rule, struct proxy *px,
580 struct session *sess, struct stream *s, int flags)
581{
582 struct sample *smp;
583
584 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
585 if (!smp)
586 return ACT_RET_CONT;
587
588 s->priority_class = queue_limit_class(smp->data.u.sint);
589 return ACT_RET_CONT;
590}
591
592static enum act_return action_set_priority_offset(struct act_rule *rule, struct proxy *px,
593 struct session *sess, struct stream *s, int flags)
594{
595 struct sample *smp;
596
597 smp = sample_fetch_as_type(px, sess, s, SMP_OPT_DIR_REQ|SMP_OPT_FINAL, rule->arg.expr, SMP_T_SINT);
598 if (!smp)
599 return ACT_RET_CONT;
600
601 s->priority_offset = queue_limit_offset(smp->data.u.sint);
602
603 return ACT_RET_CONT;
604}
605
606static enum act_parse_ret parse_set_priority_class(const char **args, int *arg, struct proxy *px,
607 struct act_rule *rule, char **err)
608{
609 unsigned int where = 0;
610
611 rule->arg.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
Willy Tarreaue3b57bf2020-02-14 16:50:14 +0100612 px->conf.args.line, err, &px->conf.args, NULL);
Patrick Hemmer268a7072018-05-11 12:52:31 -0400613 if (!rule->arg.expr)
614 return ACT_RET_PRS_ERR;
615
616 if (px->cap & PR_CAP_FE)
617 where |= SMP_VAL_FE_HRQ_HDR;
618 if (px->cap & PR_CAP_BE)
619 where |= SMP_VAL_BE_HRQ_HDR;
620
621 if (!(rule->arg.expr->fetch->val & where)) {
622 memprintf(err,
623 "fetch method '%s' extracts information from '%s', none of which is available here",
624 args[0], sample_src_names(rule->arg.expr->fetch->use));
625 free(rule->arg.expr);
626 return ACT_RET_PRS_ERR;
627 }
628
629 rule->action = ACT_CUSTOM;
630 rule->action_ptr = action_set_priority_class;
631 return ACT_RET_PRS_OK;
632}
633
634static enum act_parse_ret parse_set_priority_offset(const char **args, int *arg, struct proxy *px,
635 struct act_rule *rule, char **err)
636{
637 unsigned int where = 0;
638
639 rule->arg.expr = sample_parse_expr((char **)args, arg, px->conf.args.file,
Willy Tarreaue3b57bf2020-02-14 16:50:14 +0100640 px->conf.args.line, err, &px->conf.args, NULL);
Patrick Hemmer268a7072018-05-11 12:52:31 -0400641 if (!rule->arg.expr)
642 return ACT_RET_PRS_ERR;
643
644 if (px->cap & PR_CAP_FE)
645 where |= SMP_VAL_FE_HRQ_HDR;
646 if (px->cap & PR_CAP_BE)
647 where |= SMP_VAL_BE_HRQ_HDR;
648
649 if (!(rule->arg.expr->fetch->val & where)) {
650 memprintf(err,
651 "fetch method '%s' extracts information from '%s', none of which is available here",
652 args[0], sample_src_names(rule->arg.expr->fetch->use));
653 free(rule->arg.expr);
654 return ACT_RET_PRS_ERR;
655 }
656
657 rule->action = ACT_CUSTOM;
658 rule->action_ptr = action_set_priority_offset;
659 return ACT_RET_PRS_OK;
660}
661
662static struct action_kw_list tcp_cont_kws = {ILH, {
663 { "set-priority-class", parse_set_priority_class },
664 { "set-priority-offset", parse_set_priority_offset },
665 { /* END */ }
666}};
667
Willy Tarreau0108d902018-11-25 19:14:37 +0100668INITCALL1(STG_REGISTER, tcp_req_cont_keywords_register, &tcp_cont_kws);
669
Patrick Hemmer268a7072018-05-11 12:52:31 -0400670static struct action_kw_list http_req_kws = {ILH, {
671 { "set-priority-class", parse_set_priority_class },
672 { "set-priority-offset", parse_set_priority_offset },
673 { /* END */ }
674}};
675
Willy Tarreau0108d902018-11-25 19:14:37 +0100676INITCALL1(STG_REGISTER, http_req_keywords_register, &http_req_kws);
677
Patrick Hemmer268a7072018-05-11 12:52:31 -0400678static int
679smp_fetch_priority_class(const struct arg *args, struct sample *smp, const char *kw, void *private)
680{
681 if (!smp->strm)
682 return 0;
683
684 smp->data.type = SMP_T_SINT;
685 smp->data.u.sint = smp->strm->priority_class;
686
687 return 1;
688}
689
690static int
691smp_fetch_priority_offset(const struct arg *args, struct sample *smp, const char *kw, void *private)
692{
693 if (!smp->strm)
694 return 0;
695
696 smp->data.type = SMP_T_SINT;
697 smp->data.u.sint = smp->strm->priority_offset;
698
699 return 1;
700}
701
702
703static struct sample_fetch_kw_list smp_kws = {ILH, {
704 { "prio_class", smp_fetch_priority_class, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
705 { "prio_offset", smp_fetch_priority_offset, 0, NULL, SMP_T_SINT, SMP_USE_INTRN, },
706 { /* END */},
707}};
708
Willy Tarreau0108d902018-11-25 19:14:37 +0100709INITCALL1(STG_REGISTER, sample_register_fetches, &smp_kws);
Patrick Hemmer268a7072018-05-11 12:52:31 -0400710
Willy Tarreaubaaee002006-06-26 02:48:02 +0200711/*
712 * Local variables:
713 * c-indent-level: 8
714 * c-basic-offset: 8
715 * End:
716 */