Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 1 | #include <haproxy/quic_cc.h> |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 2 | #include <haproxy/ticks.h> |
| 3 | #include <haproxy/trace.h> |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 4 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 5 | /* IMPORTANT NOTE about the units defined by the RFC 9438 |
| 6 | * (CUBIC for Fast and Long-Distance Networks): |
| 7 | * |
| 8 | * RFC 9438 4.1. Definitions: |
| 9 | * The unit of all window sizes in this document is segments of the SMSS, and |
| 10 | * the unit of all times is seconds. Implementations can use bytes to express |
| 11 | * window sizes, which would require factoring in the SMSS wherever necessary |
| 12 | * and replacing segments_acked (Figure 4) with the number of acknowledged |
| 13 | * bytes. |
| 14 | */ |
| 15 | |
| 16 | /* So, this is the reason why here in this implementation each time a number |
| 17 | * of segments is used (typically a congestion window value), its value is |
| 18 | * multiplied by the MTU value. |
| 19 | */ |
| 20 | |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 21 | /* This source file is highly inspired from Linux kernel source file |
| 22 | * implementation for TCP Cubic. In fact, we have no choice if we do |
| 23 | * not want to use any floating point operations to be fast! |
| 24 | * (See net/ipv4/tcp_cubic.c) |
| 25 | */ |
| 26 | #define TRACE_SOURCE &trace_quic |
| 27 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 28 | /* Constants definitions: |
| 29 | * CUBIC_BETA_SCALED refers to the scaled value of RFC 9438 beta_cubic variable. |
| 30 | * CUBIC_C_SCALED refers to the scaled value of RFC 9438 C variable. |
| 31 | */ |
| 32 | |
| 33 | /* The right shifting value to apply to scaled values to get its real value. */ |
| 34 | #define CUBIC_SCALE_FACTOR_SHIFT 10 |
| 35 | |
| 36 | /* CUBIC multiplicative decrease factor as described in RFC 9438 section 4.6 */ |
| 37 | #define CUBIC_BETA_SCALED 717 /* beta_cubic = 0.7 (constant) */ |
| 38 | |
| 39 | /* CUBIC C constant that determines the aggressiveness of CUBIC in competing |
| 40 | * with other congestion control algorithms in high-BDP networks. |
| 41 | */ |
| 42 | #define CUBIC_C_SCALED 410 /* RFC 9438 C = 0.4 segment/seconds^3 |
| 43 | * or 410 mB/s^3 in this implementation. |
| 44 | */ |
| 45 | |
| 46 | /* The scaled value of 1 */ |
| 47 | #define CUBIC_ONE_SCALED (1 << CUBIC_SCALE_FACTOR_SHIFT) |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 48 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 49 | /* The left bit shifting to apply to convert milliseconds to seconds. */ |
| 50 | #define TIME_SCALE_FACTOR_SHIFT 10 |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 51 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 52 | /* The maximum time value which may be cubed and multiplied by CUBIC_C_SCALED */ |
| 53 | #define CUBIC_TIME_LIMIT 355535ULL /* ms */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 54 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 55 | /* By connection CUBIC algorithm state. Note that the current congestion window |
| 56 | * value is not stored in this structure. |
| 57 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 58 | struct cubic { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 59 | /* QUIC_CC_ST_* state values. */ |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 60 | uint32_t state; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 61 | /* Slow start threshold (in bytes) */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 62 | uint32_t ssthresh; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 63 | /* Remaining number of acknowledged bytes between two ACK for CUBIC congestion |
| 64 | * control window (in bytes). |
| 65 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 66 | uint32_t remaining_inc; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 67 | /* Start time of at which the current avoidance stage started (in ms). */ |
| 68 | uint32_t t_epoch; |
| 69 | /* The window to reach for each recovery period during a concave region (in bytes). */ |
| 70 | uint32_t W_target; |
| 71 | /* The time period to reach W_target during a concave region (in ms). */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 72 | uint32_t K; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 73 | /* The last window maximum reached (in bytes). */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 74 | uint32_t last_w_max; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 75 | /* Estimated value of the Reno congestion window in the TCP-friendly region (in bytes). */ |
| 76 | uint32_t W_est; |
| 77 | /* Remaining number of acknowledged bytes between two ACKs for estimated |
| 78 | * TCP-Reno congestion control window (in bytes). |
| 79 | */ |
| 80 | uint32_t remaining_W_est_inc; |
| 81 | /* Start time of recovery period (used to avoid re-entering this state, if already |
| 82 | * in recovery period) (in ms). |
| 83 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 84 | uint32_t recovery_start_time; |
| 85 | }; |
| 86 | |
| 87 | static void quic_cc_cubic_reset(struct quic_cc *cc) |
| 88 | { |
| 89 | struct cubic *c = quic_cc_priv(cc); |
| 90 | |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 91 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 92 | c->state = QUIC_CC_ST_SS; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 93 | c->ssthresh = QUIC_CC_INFINITE_SSTHESH; |
| 94 | c->remaining_inc = 0; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 95 | c->remaining_W_est_inc = 0; |
| 96 | c->t_epoch = 0; |
| 97 | c->W_target = 0; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 98 | c->K = 0; |
| 99 | c->last_w_max = 0; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 100 | c->W_est = 0; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 101 | c->recovery_start_time = 0; |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 102 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 103 | } |
| 104 | |
| 105 | static int quic_cc_cubic_init(struct quic_cc *cc) |
| 106 | { |
| 107 | quic_cc_cubic_reset(cc); |
| 108 | return 1; |
| 109 | } |
| 110 | |
| 111 | /* Cubic root. |
| 112 | * Highly inspired from Linux kernel sources. |
| 113 | * See net/ipv4/tcp_cubic.c |
| 114 | */ |
| 115 | static uint32_t cubic_root(uint64_t val) |
| 116 | { |
| 117 | uint32_t x, b, shift; |
| 118 | |
| 119 | static const uint8_t v[] = { |
| 120 | 0, 54, 54, 54, 118, 118, 118, 118, |
| 121 | 123, 129, 134, 138, 143, 147, 151, 156, |
| 122 | 157, 161, 164, 168, 170, 173, 176, 179, |
| 123 | 181, 185, 187, 190, 192, 194, 197, 199, |
| 124 | 200, 202, 204, 206, 209, 211, 213, 215, |
| 125 | 217, 219, 221, 222, 224, 225, 227, 229, |
| 126 | 231, 232, 234, 236, 237, 239, 240, 242, |
| 127 | 244, 245, 246, 248, 250, 251, 252, 254, |
| 128 | }; |
| 129 | |
Frédéric Lécaille | 2c77a5e | 2022-08-03 12:49:30 +0200 | [diff] [blame] | 130 | if (!val || (b = my_flsl(val)) < 7) { |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 131 | /* val in [0..63] */ |
| 132 | return ((uint32_t)v[(uint32_t)val] + 35) >> 6; |
| 133 | } |
| 134 | |
| 135 | b = ((b * 84) >> 8) - 1; |
| 136 | shift = (val >> (b * 3)); |
| 137 | |
| 138 | x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6; |
| 139 | |
| 140 | x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1))); |
| 141 | x = ((x * 341) >> 10); |
| 142 | |
| 143 | return x; |
| 144 | } |
| 145 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 146 | /* |
| 147 | * RFC 9438 3.1. Principle 1 for the CUBIC Increase Function |
| 148 | * |
| 149 | * For better network utilization and stability, CUBIC [HRX08] uses a cubic |
| 150 | * window increase function in terms of the elapsed time from the last |
| 151 | * congestion event. While most congestion control algorithms that provide |
| 152 | * alternatives to Reno increase the congestion window using convex functions, |
| 153 | * CUBIC uses both the concave and convex profiles of a cubic function for |
| 154 | * window growth. |
| 155 | * |
| 156 | * After a window reduction in response to a congestion event detected by |
| 157 | * duplicate acknowledgments (ACKs), Explicit Congestion Notification-Echo |
| 158 | * (ECN-Echo (ECE)) ACKs [RFC3168], RACK-TLP for TCP [RFC8985], or QUIC loss |
| 159 | * detection [RFC9002], CUBIC remembers the congestion window size at which it |
| 160 | * received the congestion event and performs a multiplicative decrease of the |
| 161 | * congestion window. When CUBIC enters into congestion avoidance, it starts to |
| 162 | * increase the congestion window using the concave profile of the cubic |
| 163 | * function. The cubic function is set to have its plateau at the remembered |
| 164 | * congestion window size, so that the concave window increase continues until |
| 165 | * then. After that, the cubic function turns into a convex profile and the |
| 166 | * convex window increase begins. |
| 167 | * |
| 168 | * W_cubic(time) (bytes) |
| 169 | * ^ convex region |
| 170 | * | <-------------------------> |
| 171 | * | . + |
| 172 | * | . + |
| 173 | * | . + |
| 174 | * | . + |
| 175 | * | . + ^ |
| 176 | * | . + | W_cubic_t |
| 177 | * | . + | |
| 178 | * | . + | |
| 179 | * W_target |-----------+--------------------------+------------------------+ |
| 180 | * (W_max) | +. + . t |
| 181 | * | + . + . |
| 182 | * | + . + . |
| 183 | * | + . + . |
| 184 | * | + . + . |
| 185 | * | .+ . |
| 186 | * | + . |
| 187 | * | + . |
| 188 | * | + . |
| 189 | * | . . |
| 190 | * | . . |
| 191 | * | . . |
| 192 | * +-----------+--------------------------+-+------------------------> time (s) |
| 193 | * 0 t_epoch (t_epoch + K) |
| 194 | * <--------------------------> |
| 195 | * . concave region |
| 196 | * . |
| 197 | * congestion |
| 198 | * event |
| 199 | * |
| 200 | * RFC 9438 4.2. Window Increase Function: |
| 201 | * |
| 202 | * W_cubic(t) = C*(t-K)^3 + W_max (Figure 1) |
| 203 | * K = cubic_root((W_max - cwnd_epoch)/C) (Figure 2) |
| 204 | * |
| 205 | * +--------------------------------------------------------------------+ |
| 206 | * | RFC 9438 definitions | Code variables | |
| 207 | * +--------------------------------------------------------------------+ |
| 208 | * | C (segments/s^3) | CUBIC_C_SCALED (mB/s^3) | |
| 209 | * +--------------------------------------------------------------------+ |
| 210 | * | W_max (segments) | c->last_w_max - path->cwnd (bytes) | |
| 211 | * +--------------------------------------------------------------------+ |
| 212 | * | K (s) | c->K (ms) | |
| 213 | * +--------------------------------------------------------------------+ |
| 214 | * | beta_cubic (constant) | CUBIC_BETA_SCALED (constant) | |
| 215 | * +--------------------------------------------------------------------+ |
| 216 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 217 | static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked) |
| 218 | { |
| 219 | struct cubic *c = quic_cc_priv(cc); |
| 220 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 221 | /* The elapsed time since the start of the congestion event. */ |
| 222 | uint32_t elapsed_time; |
| 223 | /* Target value of the congestion window. */ |
| 224 | uint32_t target; |
| 225 | /* The time at which the congestion window will be computed based |
| 226 | * on the cubic increase function. |
| 227 | */ |
| 228 | uint64_t t; |
| 229 | /* The computed value of the congestion window at time t based on the cubic |
| 230 | * increase function. |
| 231 | */ |
| 232 | uint64_t W_cubic_t; |
| 233 | uint32_t inc, inc_diff; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 234 | |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 235 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 236 | if (!c->t_epoch) { |
| 237 | c->t_epoch = now_ms; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 238 | if (c->last_w_max <= path->cwnd) { |
| 239 | c->K = 0; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 240 | c->W_target = path->cwnd; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 241 | } |
| 242 | else { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 243 | /* K value computing (in seconds): |
| 244 | * K = cubic_root((W_max - cwnd_epoch)/C) (Figure 2) |
| 245 | * Note that K is stored in 1024th of a second. |
| 246 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 247 | c->K = cubic_root((c->last_w_max - path->cwnd) * |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 248 | (CUBIC_ONE_SCALED - CUBIC_BETA_SCALED) / CUBIC_C_SCALED / path->mtu) << TIME_SCALE_FACTOR_SHIFT; |
| 249 | c->W_target = c->last_w_max; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 250 | } |
| 251 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 252 | c->W_est = path->cwnd; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 253 | c->remaining_inc = 0; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 254 | c->remaining_W_est_inc = 0; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 255 | } |
| 256 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 257 | elapsed_time = now_ms + path->loss.rtt_min - c->t_epoch; |
| 258 | if (elapsed_time < c->K) { |
| 259 | t = c->K - elapsed_time; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 260 | } |
| 261 | else { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 262 | t = elapsed_time - c->K; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 263 | } |
| 264 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 265 | if (t > CUBIC_TIME_LIMIT) { |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 266 | /* TODO : should not happen if we handle the case |
| 267 | * of very late acks receipt. This must be handled as a congestion |
| 268 | * control event: a very late ack should trigger a congestion |
| 269 | * control algorithm reset. |
| 270 | */ |
| 271 | quic_cc_cubic_reset(cc); |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 272 | goto leave; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 273 | } |
| 274 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 275 | /* Compute W_cubic_t at t time. */ |
| 276 | W_cubic_t = path->mtu * ((CUBIC_C_SCALED * t * t * t) >> (CUBIC_SCALE_FACTOR_SHIFT + 3 * TIME_SCALE_FACTOR_SHIFT)); |
| 277 | if (elapsed_time < c->K) |
| 278 | target = c->W_target - W_cubic_t; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 279 | else |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 280 | target = c->W_target + W_cubic_t; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 281 | |
| 282 | if (target > path->cwnd) { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 283 | /* Concave region */ |
| 284 | |
| 285 | /* RFC 9438 4.4. Concave Region |
| 286 | * |
| 287 | * When receiving a new ACK in congestion avoidance, if CUBIC is not in |
| 288 | * the Reno-friendly region and cwnd is less than Wmax, then CUBIC is |
| 289 | * in the concave region. In this region, cwnd MUST be incremented by |
| 290 | * (target - cwnd) / cwnd. |
| 291 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 292 | inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd); |
| 293 | c->remaining_inc = inc_diff % path->cwnd; |
| 294 | inc = inc_diff / path->cwnd; |
| 295 | } |
| 296 | else { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 297 | /* Convex region: very small increment */ |
| 298 | |
| 299 | /* RFC 9438 4.5. Convex Region |
| 300 | * |
| 301 | * When receiving a new ACK in congestion avoidance, if CUBIC is not in |
| 302 | * the Reno-friendly region and cwnd is larger than or equal to Wmax, |
| 303 | * then CUBIC is in the convex region.The convex region indicates that |
| 304 | * the network conditions might have changed since the last congestion |
| 305 | * event, possibly implying more available bandwidth after some flow |
| 306 | * departures. Since the Internet is highly asynchronous, some amount |
| 307 | * of perturbation is always possible without causing a major change in |
| 308 | * available bandwidth.Unless the cwnd is overridden by the AIMD window |
| 309 | * increase, CUBIC will behave cautiously when operating in this region. |
| 310 | * The convex profile aims to increase the window very slowly at the |
| 311 | * beginning when cwnd is around Wmax and then gradually increases its |
| 312 | * rate of increase. This region is also called the "maximum probing |
| 313 | * phase", since CUBIC is searching for a new Wmax. In this region, |
| 314 | * cwnd MUST be incremented by (target - cwnd) / cwnd) for each received |
| 315 | * new ACK, where target is calculated as described in Section 4.2. |
| 316 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 317 | inc_diff = c->remaining_inc + path->mtu; |
| 318 | c->remaining_inc = inc_diff % (100 * path->cwnd); |
| 319 | inc = inc_diff / (100 * path->cwnd); |
| 320 | } |
| 321 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 322 | inc_diff = c->remaining_W_est_inc + path->mtu * acked; |
| 323 | c->W_est += inc_diff / path->cwnd; |
| 324 | c->remaining_W_est_inc = inc_diff % path->cwnd; |
| 325 | |
| 326 | /* TCP friendliness : |
| 327 | * RFC 9438 4.3. Reno-Friendly Region |
| 328 | * |
| 329 | * Reno performs well in certain types of networks -- for example, under |
| 330 | * short RTTs and small bandwidths (or small BDPs). In these networks, |
| 331 | * CUBIC remains in the Reno-friendly region to achieve at least the same |
| 332 | * throughput as Reno. |
| 333 | * |
| 334 | * When receiving a new ACK in congestion avoidance (where cwnd could be |
| 335 | * greater than or less than Wmax), CUBIC checks whether Wcubic(t) is less |
| 336 | * than West. If so, CUBIC is in the Reno-friendly region and cwnd SHOULD |
| 337 | * be set to West at each reception of a new ACK. |
| 338 | * |
| 339 | * West is set equal to cwnd_epoch at the start of the congestion avoidance |
| 340 | * stage. After that, on every new ACK, West is updated using Figure 4. |
| 341 | * Note that this equation uses segments_acked and cwnd is measured in |
| 342 | * segments. An implementation that measures cwnd in bytes should adjust the |
| 343 | * equation accordingly using the number of acknowledged bytes and the SMSS. |
| 344 | * Also note that this equation works for connections with enabled or |
| 345 | * disabled delayed ACKs [RFC5681], as segments_acked will be different based |
| 346 | * on the segments actually acknowledged by a new ACK. |
| 347 | * |
| 348 | * Figure 4 : West = West + alpha_cubic * (segments_acked / cwnd) |
| 349 | * |
| 350 | * Once West has grown to reach the cwnd at the time of most recently |
| 351 | * setting ssthresh -- that is, West >= cwndprior -- the sender SHOULD set |
| 352 | * alpha_cubic to 1 to ensure that it can achieve the same congestion window |
| 353 | * increment rate as Reno, which uses AIMD(1, 0.5). |
| 354 | */ |
| 355 | if (c->W_est > path->cwnd) { |
| 356 | uint32_t W_est_inc = path->mtu * (c->W_est - path->cwnd) / path->cwnd; |
| 357 | if (W_est_inc > inc) |
| 358 | inc = W_est_inc; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 359 | } |
| 360 | |
| 361 | path->cwnd += inc; |
Frédéric Lécaille | fad0e6c | 2023-04-06 10:19:17 +0200 | [diff] [blame] | 362 | path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 363 | leave: |
| 364 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 365 | } |
| 366 | |
| 367 | static void quic_cc_cubic_slow_start(struct quic_cc *cc) |
| 368 | { |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 369 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 370 | quic_cc_cubic_reset(cc); |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 371 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 372 | } |
| 373 | |
| 374 | static void quic_enter_recovery(struct quic_cc *cc) |
| 375 | { |
| 376 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
| 377 | struct cubic *c = quic_cc_priv(cc); |
| 378 | /* Current cwnd as number of packets */ |
| 379 | |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 380 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 381 | c->t_epoch = 0; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 382 | c->recovery_start_time = now_ms; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 383 | |
| 384 | /* RFC 9438 4.7. Fast Convergence |
| 385 | * |
| 386 | * To improve convergence speed, CUBIC uses a heuristic. When a new flow |
| 387 | * joins the network, existing flows need to give up some of their bandwidth |
| 388 | * to allow the new flow some room for growth if the existing flows have |
| 389 | * been using all the network bandwidth. To speed up this bandwidth release |
| 390 | * by existing flows, the following fast convergence mechanism SHOULD be |
| 391 | * implemented.With fast convergence, when a congestion event occurs, Wmax |
| 392 | * is updated as follows, before the window reduction described in Section |
| 393 | * 4.6. |
| 394 | * |
| 395 | * if cwnd < Wmax and fast convergence enabled, further reduce Wax: |
| 396 | * Wmax = cwnd * (1 + beta_cubic) |
| 397 | * otherwise, remember cwn before reduction: |
| 398 | * Wmax = cwnd |
| 399 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 400 | if (path->cwnd < c->last_w_max) { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 401 | /* (1 + beta_cubic) * path->cwnd / 2 */ |
| 402 | c->last_w_max = (path->cwnd * (CUBIC_ONE_SCALED + CUBIC_BETA_SCALED) / 2) >> CUBIC_SCALE_FACTOR_SHIFT; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 403 | } |
| 404 | else { |
| 405 | c->last_w_max = path->cwnd; |
| 406 | } |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 407 | |
| 408 | c->ssthresh = (CUBIC_BETA_SCALED * path->cwnd) >> CUBIC_SCALE_FACTOR_SHIFT; |
Frédéric Lécaille | 595251f | 2023-04-13 10:46:42 +0200 | [diff] [blame] | 409 | path->cwnd = QUIC_MAX(c->ssthresh, (uint32_t)path->min_cwnd); |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 410 | c->state = QUIC_CC_ST_RP; |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 411 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 412 | } |
| 413 | |
| 414 | /* Congestion slow-start callback. */ |
| 415 | static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| 416 | { |
| 417 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
| 418 | struct cubic *c = quic_cc_priv(cc); |
| 419 | |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 420 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| 421 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 422 | switch (ev->type) { |
| 423 | case QUIC_CC_EVT_ACK: |
Frédéric Lécaille | db54847 | 2023-04-02 10:07:48 +0200 | [diff] [blame] | 424 | if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) |
| 425 | path->cwnd += ev->ack.acked; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 426 | /* Exit to congestion avoidance if slow start threshold is reached. */ |
| 427 | if (path->cwnd >= c->ssthresh) |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 428 | c->state = QUIC_CC_ST_CA; |
Frédéric Lécaille | fad0e6c | 2023-04-06 10:19:17 +0200 | [diff] [blame] | 429 | path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 430 | break; |
| 431 | |
| 432 | case QUIC_CC_EVT_LOSS: |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 433 | quic_enter_recovery(cc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 434 | break; |
| 435 | |
| 436 | case QUIC_CC_EVT_ECN_CE: |
| 437 | /* TODO */ |
| 438 | break; |
| 439 | } |
| 440 | |
| 441 | out: |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 442 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| 443 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 444 | } |
| 445 | |
| 446 | /* Congestion avoidance callback. */ |
| 447 | static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| 448 | { |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 449 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| 450 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 451 | switch (ev->type) { |
| 452 | case QUIC_CC_EVT_ACK: |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 453 | quic_cubic_update(cc, ev->ack.acked); |
| 454 | break; |
| 455 | case QUIC_CC_EVT_LOSS: |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 456 | quic_enter_recovery(cc); |
| 457 | break; |
| 458 | case QUIC_CC_EVT_ECN_CE: |
| 459 | /* TODO */ |
| 460 | break; |
| 461 | } |
| 462 | |
| 463 | out: |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 464 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
Frédéric Lécaille | 23b8eef | 2023-03-31 20:18:44 +0200 | [diff] [blame] | 465 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 466 | } |
| 467 | |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 468 | /* Recovery period callback */ |
| 469 | static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| 470 | { |
| 471 | struct cubic *c = quic_cc_priv(cc); |
| 472 | |
| 473 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev); |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 474 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev, cc); |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 475 | |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 476 | switch (ev->type) { |
| 477 | case QUIC_CC_EVT_ACK: |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame^] | 478 | /* RFC 9002 7.3.2. Recovery |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 479 | * A recovery period ends and the sender enters congestion avoidance when a |
| 480 | * packet sent during the recovery period is acknowledged. |
| 481 | */ |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 482 | if (tick_is_le(ev->ack.time_sent, c->recovery_start_time)) { |
| 483 | TRACE_PROTO("CC cubic (still in recov. period)", QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 484 | goto leave; |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 485 | } |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 486 | |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 487 | c->state = QUIC_CC_ST_CA; |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 488 | c->recovery_start_time = TICK_ETERNITY; |
| 489 | break; |
| 490 | case QUIC_CC_EVT_LOSS: |
| 491 | break; |
| 492 | case QUIC_CC_EVT_ECN_CE: |
| 493 | /* TODO */ |
| 494 | break; |
| 495 | } |
| 496 | |
| 497 | leave: |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 498 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 499 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| 500 | } |
| 501 | |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 502 | static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc, |
| 503 | struct quic_cc_event *ev) = { |
| 504 | [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb, |
| 505 | [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb, |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 506 | [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb, |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 507 | }; |
| 508 | |
| 509 | static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev) |
| 510 | { |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 511 | struct cubic *c = quic_cc_priv(cc); |
| 512 | |
| 513 | return quic_cc_cubic_state_cbs[c->state](cc, ev); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 514 | } |
| 515 | |
| 516 | static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc) |
| 517 | { |
Frédéric Lécaille | 01314b8 | 2023-03-24 14:09:55 +0100 | [diff] [blame] | 518 | struct quic_path *path; |
| 519 | struct cubic *c = quic_cc_priv(cc); |
| 520 | |
| 521 | path = container_of(cc, struct quic_path, cc); |
Frédéric Lécaille | fad0e6c | 2023-04-06 10:19:17 +0200 | [diff] [blame] | 522 | chunk_appendf(buf, " state=%s cwnd=%llu mcwnd=%llu ssthresh=%d rpst=%dms", |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 523 | quic_cc_state_str(c->state), |
Frédéric Lécaille | 01314b8 | 2023-03-24 14:09:55 +0100 | [diff] [blame] | 524 | (unsigned long long)path->cwnd, |
Frédéric Lécaille | fad0e6c | 2023-04-06 10:19:17 +0200 | [diff] [blame] | 525 | (unsigned long long)path->mcwnd, |
Frédéric Lécaille | 01314b8 | 2023-03-24 14:09:55 +0100 | [diff] [blame] | 526 | (int)c->ssthresh, |
| 527 | !tick_isset(c->recovery_start_time) ? -1 : |
| 528 | TICKS_TO_MS(tick_remain(c->recovery_start_time, now_ms))); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 529 | } |
| 530 | |
| 531 | struct quic_cc_algo quic_cc_algo_cubic = { |
| 532 | .type = QUIC_CC_ALGO_TP_CUBIC, |
| 533 | .init = quic_cc_cubic_init, |
| 534 | .event = quic_cc_cubic_event, |
| 535 | .slow_start = quic_cc_cubic_slow_start, |
| 536 | .state_trace = quic_cc_cubic_state_trace, |
| 537 | }; |