blob: fd7a514c2a496976937371e5dfb3b0f13b164db1 [file] [log] [blame]
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +02001#include <haproxy/quic_cc.h>
Frédéric Lécaille8f991942023-03-24 15:14:45 +01002#include <haproxy/ticks.h>
3#include <haproxy/trace.h>
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +02004
Frederic Lecaille569a0252024-01-30 11:40:41 +01005/* IMPORTANT NOTE about the units defined by the RFC 9438
6 * (CUBIC for Fast and Long-Distance Networks):
7 *
8 * RFC 9438 4.1. Definitions:
9 * The unit of all window sizes in this document is segments of the SMSS, and
10 * the unit of all times is seconds. Implementations can use bytes to express
11 * window sizes, which would require factoring in the SMSS wherever necessary
12 * and replacing segments_acked (Figure 4) with the number of acknowledged
13 * bytes.
14 */
15
16/* So, this is the reason why here in this implementation each time a number
17 * of segments is used (typically a congestion window value), its value is
18 * multiplied by the MTU value.
19 */
20
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020021/* This source file is highly inspired from Linux kernel source file
22 * implementation for TCP Cubic. In fact, we have no choice if we do
23 * not want to use any floating point operations to be fast!
24 * (See net/ipv4/tcp_cubic.c)
25 */
26#define TRACE_SOURCE &trace_quic
27
Frederic Lecaille569a0252024-01-30 11:40:41 +010028/* Constants definitions:
29 * CUBIC_BETA_SCALED refers to the scaled value of RFC 9438 beta_cubic variable.
30 * CUBIC_C_SCALED refers to the scaled value of RFC 9438 C variable.
31 */
32
33/* The right shifting value to apply to scaled values to get its real value. */
34#define CUBIC_SCALE_FACTOR_SHIFT 10
35
36/* CUBIC multiplicative decrease factor as described in RFC 9438 section 4.6 */
37#define CUBIC_BETA_SCALED 717 /* beta_cubic = 0.7 (constant) */
38
39/* CUBIC C constant that determines the aggressiveness of CUBIC in competing
40 * with other congestion control algorithms in high-BDP networks.
41 */
42#define CUBIC_C_SCALED 410 /* RFC 9438 C = 0.4 segment/seconds^3
43 * or 410 mB/s^3 in this implementation.
44 */
45
46/* The scaled value of 1 */
47#define CUBIC_ONE_SCALED (1 << CUBIC_SCALE_FACTOR_SHIFT)
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020048
Frederic Lecaille569a0252024-01-30 11:40:41 +010049/* The maximum time value which may be cubed and multiplied by CUBIC_C_SCALED */
50#define CUBIC_TIME_LIMIT 355535ULL /* ms */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020051
Frederic Lecaille569a0252024-01-30 11:40:41 +010052/* By connection CUBIC algorithm state. Note that the current congestion window
53 * value is not stored in this structure.
54 */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020055struct cubic {
Frederic Lecaille569a0252024-01-30 11:40:41 +010056 /* QUIC_CC_ST_* state values. */
Frédéric Lécaille7d6270a2023-04-02 12:43:22 +020057 uint32_t state;
Frederic Lecaille569a0252024-01-30 11:40:41 +010058 /* Slow start threshold (in bytes) */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020059 uint32_t ssthresh;
Frederic Lecaille569a0252024-01-30 11:40:41 +010060 /* Remaining number of acknowledged bytes between two ACK for CUBIC congestion
61 * control window (in bytes).
62 */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020063 uint32_t remaining_inc;
Frederic Lecaille569a0252024-01-30 11:40:41 +010064 /* Start time of at which the current avoidance stage started (in ms). */
65 uint32_t t_epoch;
66 /* The window to reach for each recovery period during a concave region (in bytes). */
67 uint32_t W_target;
68 /* The time period to reach W_target during a concave region (in ms). */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020069 uint32_t K;
Frederic Lecaille569a0252024-01-30 11:40:41 +010070 /* The last window maximum reached (in bytes). */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020071 uint32_t last_w_max;
Frederic Lecaille569a0252024-01-30 11:40:41 +010072 /* Estimated value of the Reno congestion window in the TCP-friendly region (in bytes). */
73 uint32_t W_est;
74 /* Remaining number of acknowledged bytes between two ACKs for estimated
75 * TCP-Reno congestion control window (in bytes).
76 */
77 uint32_t remaining_W_est_inc;
78 /* Start time of recovery period (used to avoid re-entering this state, if already
79 * in recovery period) (in ms).
80 */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020081 uint32_t recovery_start_time;
82};
83
84static void quic_cc_cubic_reset(struct quic_cc *cc)
85{
86 struct cubic *c = quic_cc_priv(cc);
87
Frédéric Lécaillede2ba862023-04-02 10:49:35 +020088 TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
Frédéric Lécaille7d6270a2023-04-02 12:43:22 +020089 c->state = QUIC_CC_ST_SS;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020090 c->ssthresh = QUIC_CC_INFINITE_SSTHESH;
91 c->remaining_inc = 0;
Frederic Lecaille569a0252024-01-30 11:40:41 +010092 c->remaining_W_est_inc = 0;
93 c->t_epoch = 0;
94 c->W_target = 0;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020095 c->K = 0;
96 c->last_w_max = 0;
Frederic Lecaille569a0252024-01-30 11:40:41 +010097 c->W_est = 0;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +020098 c->recovery_start_time = 0;
Frédéric Lécaillede2ba862023-04-02 10:49:35 +020099 TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200100}
101
102static int quic_cc_cubic_init(struct quic_cc *cc)
103{
104 quic_cc_cubic_reset(cc);
105 return 1;
106}
107
108/* Cubic root.
109 * Highly inspired from Linux kernel sources.
110 * See net/ipv4/tcp_cubic.c
111 */
112static uint32_t cubic_root(uint64_t val)
113{
114 uint32_t x, b, shift;
115
116 static const uint8_t v[] = {
117 0, 54, 54, 54, 118, 118, 118, 118,
118 123, 129, 134, 138, 143, 147, 151, 156,
119 157, 161, 164, 168, 170, 173, 176, 179,
120 181, 185, 187, 190, 192, 194, 197, 199,
121 200, 202, 204, 206, 209, 211, 213, 215,
122 217, 219, 221, 222, 224, 225, 227, 229,
123 231, 232, 234, 236, 237, 239, 240, 242,
124 244, 245, 246, 248, 250, 251, 252, 254,
125 };
126
Frédéric Lécaille2c77a5e2022-08-03 12:49:30 +0200127 if (!val || (b = my_flsl(val)) < 7) {
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200128 /* val in [0..63] */
129 return ((uint32_t)v[(uint32_t)val] + 35) >> 6;
130 }
131
132 b = ((b * 84) >> 8) - 1;
133 shift = (val >> (b * 3));
134
135 x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6;
136
137 x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1)));
138 x = ((x * 341) >> 10);
139
140 return x;
141}
142
Frederic Lecaille569a0252024-01-30 11:40:41 +0100143/*
144 * RFC 9438 3.1. Principle 1 for the CUBIC Increase Function
145 *
146 * For better network utilization and stability, CUBIC [HRX08] uses a cubic
147 * window increase function in terms of the elapsed time from the last
148 * congestion event. While most congestion control algorithms that provide
149 * alternatives to Reno increase the congestion window using convex functions,
150 * CUBIC uses both the concave and convex profiles of a cubic function for
151 * window growth.
152 *
153 * After a window reduction in response to a congestion event detected by
154 * duplicate acknowledgments (ACKs), Explicit Congestion Notification-Echo
155 * (ECN-Echo (ECE)) ACKs [RFC3168], RACK-TLP for TCP [RFC8985], or QUIC loss
156 * detection [RFC9002], CUBIC remembers the congestion window size at which it
157 * received the congestion event and performs a multiplicative decrease of the
158 * congestion window. When CUBIC enters into congestion avoidance, it starts to
159 * increase the congestion window using the concave profile of the cubic
160 * function. The cubic function is set to have its plateau at the remembered
161 * congestion window size, so that the concave window increase continues until
162 * then. After that, the cubic function turns into a convex profile and the
163 * convex window increase begins.
164 *
165 * W_cubic(time) (bytes)
166 * ^ convex region
167 * | <------------------------->
168 * | . +
169 * | . +
170 * | . +
171 * | . +
172 * | . + ^
173 * | . + | W_cubic_t
174 * | . + |
175 * | . + |
176 * W_target |-----------+--------------------------+------------------------+
177 * (W_max) | +. + . t
178 * | + . + .
179 * | + . + .
180 * | + . + .
181 * | + . + .
182 * | .+ .
183 * | + .
184 * | + .
185 * | + .
186 * | . .
187 * | . .
188 * | . .
189 * +-----------+--------------------------+-+------------------------> time (s)
190 * 0 t_epoch (t_epoch + K)
191 * <-------------------------->
192 * . concave region
193 * .
194 * congestion
195 * event
196 *
197 * RFC 9438 4.2. Window Increase Function:
198 *
199 * W_cubic(t) = C*(t-K)^3 + W_max (Figure 1)
200 * K = cubic_root((W_max - cwnd_epoch)/C) (Figure 2)
201 *
202 * +--------------------------------------------------------------------+
203 * | RFC 9438 definitions | Code variables |
204 * +--------------------------------------------------------------------+
205 * | C (segments/s^3) | CUBIC_C_SCALED (mB/s^3) |
206 * +--------------------------------------------------------------------+
207 * | W_max (segments) | c->last_w_max - path->cwnd (bytes) |
208 * +--------------------------------------------------------------------+
209 * | K (s) | c->K (ms) |
210 * +--------------------------------------------------------------------+
211 * | beta_cubic (constant) | CUBIC_BETA_SCALED (constant) |
212 * +--------------------------------------------------------------------+
213 */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200214static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked)
215{
216 struct cubic *c = quic_cc_priv(cc);
217 struct quic_path *path = container_of(cc, struct quic_path, cc);
Frederic Lecaille569a0252024-01-30 11:40:41 +0100218 /* The elapsed time since the start of the congestion event. */
219 uint32_t elapsed_time;
220 /* Target value of the congestion window. */
221 uint32_t target;
222 /* The time at which the congestion window will be computed based
223 * on the cubic increase function.
224 */
225 uint64_t t;
226 /* The computed value of the congestion window at time t based on the cubic
227 * increase function.
228 */
229 uint64_t W_cubic_t;
230 uint32_t inc, inc_diff;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200231
Frédéric Lécaillede2ba862023-04-02 10:49:35 +0200232 TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
Frederic Lecaille569a0252024-01-30 11:40:41 +0100233 if (!c->t_epoch) {
234 c->t_epoch = now_ms;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200235 if (c->last_w_max <= path->cwnd) {
236 c->K = 0;
Frederic Lecaille569a0252024-01-30 11:40:41 +0100237 c->W_target = path->cwnd;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200238 }
239 else {
Frederic Lecaille33a6f332024-07-24 16:16:26 +0200240 uint64_t wnd_diff;
241
Frederic Lecaille569a0252024-01-30 11:40:41 +0100242 /* K value computing (in seconds):
243 * K = cubic_root((W_max - cwnd_epoch)/C) (Figure 2)
Frederic Lecaille33a6f332024-07-24 16:16:26 +0200244 * Note that K is stored in milliseconds and that
245 * 8000 * 125000 = 1000^3.
246 *
247 * Supporting 2^40 windows, shifted by 10, leaves ~13 bits of unused
248 * precision. We exploit this precision for our NS conversion by
249 * multiplying by 8000 without overflowing, then later by 125000
250 * after the divide so that we limit the precision loss to the minimum
251 * before the cubic_root() call."
Frederic Lecaille569a0252024-01-30 11:40:41 +0100252 */
Frederic Lecaille33a6f332024-07-24 16:16:26 +0200253 wnd_diff = (c->last_w_max - path->cwnd) << CUBIC_SCALE_FACTOR_SHIFT;
254 wnd_diff *= 8000ULL;
255 wnd_diff /= CUBIC_C_SCALED * path->mtu;
256 wnd_diff *= 125000ULL;
257 c->K = cubic_root(wnd_diff);
Frederic Lecaille569a0252024-01-30 11:40:41 +0100258 c->W_target = c->last_w_max;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200259 }
260
Frederic Lecaille569a0252024-01-30 11:40:41 +0100261 c->W_est = path->cwnd;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200262 c->remaining_inc = 0;
Frederic Lecaille569a0252024-01-30 11:40:41 +0100263 c->remaining_W_est_inc = 0;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200264 }
265
Frederic Lecaille569a0252024-01-30 11:40:41 +0100266 elapsed_time = now_ms + path->loss.rtt_min - c->t_epoch;
267 if (elapsed_time < c->K) {
268 t = c->K - elapsed_time;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200269 }
270 else {
Frederic Lecaille569a0252024-01-30 11:40:41 +0100271 t = elapsed_time - c->K;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200272 }
273
Frederic Lecaille569a0252024-01-30 11:40:41 +0100274 if (t > CUBIC_TIME_LIMIT) {
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200275 /* TODO : should not happen if we handle the case
276 * of very late acks receipt. This must be handled as a congestion
277 * control event: a very late ack should trigger a congestion
278 * control algorithm reset.
279 */
280 quic_cc_cubic_reset(cc);
Frédéric Lécaillede2ba862023-04-02 10:49:35 +0200281 goto leave;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200282 }
283
Frederic Lecaille569a0252024-01-30 11:40:41 +0100284 /* Compute W_cubic_t at t time. */
Frederic Lecailleea349c72024-01-31 18:21:34 +0100285 W_cubic_t = CUBIC_C_SCALED * path->mtu;
Frederic Lecaillec34e99a2024-02-06 18:30:08 +0100286 W_cubic_t = (W_cubic_t * t) / 1000;
287 W_cubic_t = (W_cubic_t * t) / 1000;
288 W_cubic_t = (W_cubic_t * t) / 1000;
Frederic Lecailleea349c72024-01-31 18:21:34 +0100289 W_cubic_t >>= CUBIC_SCALE_FACTOR_SHIFT;
Frederic Lecaille569a0252024-01-30 11:40:41 +0100290 if (elapsed_time < c->K)
291 target = c->W_target - W_cubic_t;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200292 else
Frederic Lecaille569a0252024-01-30 11:40:41 +0100293 target = c->W_target + W_cubic_t;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200294
295 if (target > path->cwnd) {
Frederic Lecaille569a0252024-01-30 11:40:41 +0100296 /* Concave region */
297
298 /* RFC 9438 4.4. Concave Region
299 *
300 * When receiving a new ACK in congestion avoidance, if CUBIC is not in
301 * the Reno-friendly region and cwnd is less than Wmax, then CUBIC is
302 * in the concave region. In this region, cwnd MUST be incremented by
303 * (target - cwnd) / cwnd.
304 */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200305 inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd);
306 c->remaining_inc = inc_diff % path->cwnd;
307 inc = inc_diff / path->cwnd;
308 }
309 else {
Frederic Lecaille569a0252024-01-30 11:40:41 +0100310 /* Convex region: very small increment */
311
312 /* RFC 9438 4.5. Convex Region
313 *
314 * When receiving a new ACK in congestion avoidance, if CUBIC is not in
315 * the Reno-friendly region and cwnd is larger than or equal to Wmax,
316 * then CUBIC is in the convex region.The convex region indicates that
317 * the network conditions might have changed since the last congestion
318 * event, possibly implying more available bandwidth after some flow
319 * departures. Since the Internet is highly asynchronous, some amount
320 * of perturbation is always possible without causing a major change in
321 * available bandwidth.Unless the cwnd is overridden by the AIMD window
322 * increase, CUBIC will behave cautiously when operating in this region.
323 * The convex profile aims to increase the window very slowly at the
324 * beginning when cwnd is around Wmax and then gradually increases its
325 * rate of increase. This region is also called the "maximum probing
326 * phase", since CUBIC is searching for a new Wmax. In this region,
327 * cwnd MUST be incremented by (target - cwnd) / cwnd) for each received
328 * new ACK, where target is calculated as described in Section 4.2.
329 */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200330 inc_diff = c->remaining_inc + path->mtu;
331 c->remaining_inc = inc_diff % (100 * path->cwnd);
332 inc = inc_diff / (100 * path->cwnd);
333 }
334
Frederic Lecaille569a0252024-01-30 11:40:41 +0100335 inc_diff = c->remaining_W_est_inc + path->mtu * acked;
336 c->W_est += inc_diff / path->cwnd;
337 c->remaining_W_est_inc = inc_diff % path->cwnd;
338
339 /* TCP friendliness :
340 * RFC 9438 4.3. Reno-Friendly Region
341 *
342 * Reno performs well in certain types of networks -- for example, under
343 * short RTTs and small bandwidths (or small BDPs). In these networks,
344 * CUBIC remains in the Reno-friendly region to achieve at least the same
345 * throughput as Reno.
346 *
347 * When receiving a new ACK in congestion avoidance (where cwnd could be
348 * greater than or less than Wmax), CUBIC checks whether Wcubic(t) is less
349 * than West. If so, CUBIC is in the Reno-friendly region and cwnd SHOULD
350 * be set to West at each reception of a new ACK.
351 *
352 * West is set equal to cwnd_epoch at the start of the congestion avoidance
353 * stage. After that, on every new ACK, West is updated using Figure 4.
354 * Note that this equation uses segments_acked and cwnd is measured in
355 * segments. An implementation that measures cwnd in bytes should adjust the
356 * equation accordingly using the number of acknowledged bytes and the SMSS.
357 * Also note that this equation works for connections with enabled or
358 * disabled delayed ACKs [RFC5681], as segments_acked will be different based
359 * on the segments actually acknowledged by a new ACK.
360 *
361 * Figure 4 : West = West + alpha_cubic * (segments_acked / cwnd)
362 *
363 * Once West has grown to reach the cwnd at the time of most recently
364 * setting ssthresh -- that is, West >= cwndprior -- the sender SHOULD set
365 * alpha_cubic to 1 to ensure that it can achieve the same congestion window
366 * increment rate as Reno, which uses AIMD(1, 0.5).
367 */
368 if (c->W_est > path->cwnd) {
369 uint32_t W_est_inc = path->mtu * (c->W_est - path->cwnd) / path->cwnd;
370 if (W_est_inc > inc)
371 inc = W_est_inc;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200372 }
373
374 path->cwnd += inc;
Frédéric Lécaillefad0e6c2023-04-06 10:19:17 +0200375 path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
Frédéric Lécaillede2ba862023-04-02 10:49:35 +0200376 leave:
377 TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200378}
379
380static void quic_cc_cubic_slow_start(struct quic_cc *cc)
381{
Frédéric Lécaillede2ba862023-04-02 10:49:35 +0200382 TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200383 quic_cc_cubic_reset(cc);
Frédéric Lécaillede2ba862023-04-02 10:49:35 +0200384 TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200385}
386
387static void quic_enter_recovery(struct quic_cc *cc)
388{
389 struct quic_path *path = container_of(cc, struct quic_path, cc);
390 struct cubic *c = quic_cc_priv(cc);
391 /* Current cwnd as number of packets */
392
Frédéric Lécaillede2ba862023-04-02 10:49:35 +0200393 TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
Frederic Lecaille569a0252024-01-30 11:40:41 +0100394 c->t_epoch = 0;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200395 c->recovery_start_time = now_ms;
Frederic Lecaille569a0252024-01-30 11:40:41 +0100396
397 /* RFC 9438 4.7. Fast Convergence
398 *
399 * To improve convergence speed, CUBIC uses a heuristic. When a new flow
400 * joins the network, existing flows need to give up some of their bandwidth
401 * to allow the new flow some room for growth if the existing flows have
402 * been using all the network bandwidth. To speed up this bandwidth release
403 * by existing flows, the following fast convergence mechanism SHOULD be
404 * implemented.With fast convergence, when a congestion event occurs, Wmax
405 * is updated as follows, before the window reduction described in Section
406 * 4.6.
407 *
408 * if cwnd < Wmax and fast convergence enabled, further reduce Wax:
409 * Wmax = cwnd * (1 + beta_cubic)
410 * otherwise, remember cwn before reduction:
411 * Wmax = cwnd
412 */
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200413 if (path->cwnd < c->last_w_max) {
Frederic Lecaille569a0252024-01-30 11:40:41 +0100414 /* (1 + beta_cubic) * path->cwnd / 2 */
415 c->last_w_max = (path->cwnd * (CUBIC_ONE_SCALED + CUBIC_BETA_SCALED) / 2) >> CUBIC_SCALE_FACTOR_SHIFT;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200416 }
417 else {
418 c->last_w_max = path->cwnd;
419 }
Frederic Lecaille569a0252024-01-30 11:40:41 +0100420
421 c->ssthresh = (CUBIC_BETA_SCALED * path->cwnd) >> CUBIC_SCALE_FACTOR_SHIFT;
Frédéric Lécaille595251f2023-04-13 10:46:42 +0200422 path->cwnd = QUIC_MAX(c->ssthresh, (uint32_t)path->min_cwnd);
Frédéric Lécaille7d6270a2023-04-02 12:43:22 +0200423 c->state = QUIC_CC_ST_RP;
Frédéric Lécaillede2ba862023-04-02 10:49:35 +0200424 TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200425}
426
427/* Congestion slow-start callback. */
428static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev)
429{
430 struct quic_path *path = container_of(cc, struct quic_path, cc);
431 struct cubic *c = quic_cc_priv(cc);
432
Frédéric Lécaille8f991942023-03-24 15:14:45 +0100433 TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
434 TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200435 switch (ev->type) {
436 case QUIC_CC_EVT_ACK:
Frédéric Lécailledb548472023-04-02 10:07:48 +0200437 if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked)
438 path->cwnd += ev->ack.acked;
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200439 /* Exit to congestion avoidance if slow start threshold is reached. */
440 if (path->cwnd >= c->ssthresh)
Frédéric Lécaille7d6270a2023-04-02 12:43:22 +0200441 c->state = QUIC_CC_ST_CA;
Frédéric Lécaillefad0e6c2023-04-06 10:19:17 +0200442 path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200443 break;
444
445 case QUIC_CC_EVT_LOSS:
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200446 quic_enter_recovery(cc);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200447 break;
448
449 case QUIC_CC_EVT_ECN_CE:
450 /* TODO */
451 break;
452 }
453
454 out:
Frédéric Lécaille8f991942023-03-24 15:14:45 +0100455 TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
456 TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200457}
458
459/* Congestion avoidance callback. */
460static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev)
461{
Frédéric Lécaille8f991942023-03-24 15:14:45 +0100462 TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc);
463 TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200464 switch (ev->type) {
465 case QUIC_CC_EVT_ACK:
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200466 quic_cubic_update(cc, ev->ack.acked);
467 break;
468 case QUIC_CC_EVT_LOSS:
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200469 quic_enter_recovery(cc);
470 break;
471 case QUIC_CC_EVT_ECN_CE:
472 /* TODO */
473 break;
474 }
475
476 out:
Frédéric Lécaille8f991942023-03-24 15:14:45 +0100477 TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
Frédéric Lécaille23b8eef2023-03-31 20:18:44 +0200478 TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200479}
480
Frédéric Lécailled7243312023-03-21 11:54:02 +0100481/* Recovery period callback */
482static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev)
483{
484 struct cubic *c = quic_cc_priv(cc);
485
486 TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev);
Frédéric Lécaillede2ba862023-04-02 10:49:35 +0200487 TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev, cc);
Frédéric Lécailled7243312023-03-21 11:54:02 +0100488
Frédéric Lécailled7243312023-03-21 11:54:02 +0100489 switch (ev->type) {
490 case QUIC_CC_EVT_ACK:
Frederic Lecaille569a0252024-01-30 11:40:41 +0100491 /* RFC 9002 7.3.2. Recovery
Frédéric Lécailled7243312023-03-21 11:54:02 +0100492 * A recovery period ends and the sender enters congestion avoidance when a
493 * packet sent during the recovery period is acknowledged.
494 */
Frédéric Lécaille8f991942023-03-24 15:14:45 +0100495 if (tick_is_le(ev->ack.time_sent, c->recovery_start_time)) {
496 TRACE_PROTO("CC cubic (still in recov. period)", QUIC_EV_CONN_CC, cc->qc);
Frédéric Lécailled7243312023-03-21 11:54:02 +0100497 goto leave;
Frédéric Lécaille8f991942023-03-24 15:14:45 +0100498 }
Frédéric Lécailled7243312023-03-21 11:54:02 +0100499
Frédéric Lécaille7d6270a2023-04-02 12:43:22 +0200500 c->state = QUIC_CC_ST_CA;
Frédéric Lécailled7243312023-03-21 11:54:02 +0100501 c->recovery_start_time = TICK_ETERNITY;
502 break;
503 case QUIC_CC_EVT_LOSS:
504 break;
505 case QUIC_CC_EVT_ECN_CE:
506 /* TODO */
507 break;
508 }
509
510 leave:
Frédéric Lécaille8f991942023-03-24 15:14:45 +0100511 TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc);
Frédéric Lécailled7243312023-03-21 11:54:02 +0100512 TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc);
513}
514
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200515static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc,
516 struct quic_cc_event *ev) = {
517 [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb,
518 [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb,
Frédéric Lécailled7243312023-03-21 11:54:02 +0100519 [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb,
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200520};
521
522static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev)
523{
Frédéric Lécaille7d6270a2023-04-02 12:43:22 +0200524 struct cubic *c = quic_cc_priv(cc);
525
526 return quic_cc_cubic_state_cbs[c->state](cc, ev);
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200527}
528
529static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc)
530{
Frédéric Lécaille01314b82023-03-24 14:09:55 +0100531 struct quic_path *path;
532 struct cubic *c = quic_cc_priv(cc);
533
534 path = container_of(cc, struct quic_path, cc);
Frédéric Lécaillefad0e6c2023-04-06 10:19:17 +0200535 chunk_appendf(buf, " state=%s cwnd=%llu mcwnd=%llu ssthresh=%d rpst=%dms",
Frédéric Lécaille7d6270a2023-04-02 12:43:22 +0200536 quic_cc_state_str(c->state),
Frédéric Lécaille01314b82023-03-24 14:09:55 +0100537 (unsigned long long)path->cwnd,
Frédéric Lécaillefad0e6c2023-04-06 10:19:17 +0200538 (unsigned long long)path->mcwnd,
Frédéric Lécaille01314b82023-03-24 14:09:55 +0100539 (int)c->ssthresh,
540 !tick_isset(c->recovery_start_time) ? -1 :
541 TICKS_TO_MS(tick_remain(c->recovery_start_time, now_ms)));
Frédéric Lécaille1c9c2f62022-06-01 15:06:58 +0200542}
543
544struct quic_cc_algo quic_cc_algo_cubic = {
545 .type = QUIC_CC_ALGO_TP_CUBIC,
546 .init = quic_cc_cubic_init,
547 .event = quic_cc_cubic_event,
548 .slow_start = quic_cc_cubic_slow_start,
549 .state_trace = quic_cc_cubic_state_trace,
550};