Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 1 | #include <haproxy/quic_cc.h> |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 2 | #include <haproxy/ticks.h> |
| 3 | #include <haproxy/trace.h> |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 4 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 5 | /* IMPORTANT NOTE about the units defined by the RFC 9438 |
| 6 | * (CUBIC for Fast and Long-Distance Networks): |
| 7 | * |
| 8 | * RFC 9438 4.1. Definitions: |
| 9 | * The unit of all window sizes in this document is segments of the SMSS, and |
| 10 | * the unit of all times is seconds. Implementations can use bytes to express |
| 11 | * window sizes, which would require factoring in the SMSS wherever necessary |
| 12 | * and replacing segments_acked (Figure 4) with the number of acknowledged |
| 13 | * bytes. |
| 14 | */ |
| 15 | |
| 16 | /* So, this is the reason why here in this implementation each time a number |
| 17 | * of segments is used (typically a congestion window value), its value is |
| 18 | * multiplied by the MTU value. |
| 19 | */ |
| 20 | |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 21 | /* This source file is highly inspired from Linux kernel source file |
| 22 | * implementation for TCP Cubic. In fact, we have no choice if we do |
| 23 | * not want to use any floating point operations to be fast! |
| 24 | * (See net/ipv4/tcp_cubic.c) |
| 25 | */ |
| 26 | #define TRACE_SOURCE &trace_quic |
| 27 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 28 | /* Constants definitions: |
| 29 | * CUBIC_BETA_SCALED refers to the scaled value of RFC 9438 beta_cubic variable. |
| 30 | * CUBIC_C_SCALED refers to the scaled value of RFC 9438 C variable. |
| 31 | */ |
| 32 | |
| 33 | /* The right shifting value to apply to scaled values to get its real value. */ |
| 34 | #define CUBIC_SCALE_FACTOR_SHIFT 10 |
| 35 | |
| 36 | /* CUBIC multiplicative decrease factor as described in RFC 9438 section 4.6 */ |
| 37 | #define CUBIC_BETA_SCALED 717 /* beta_cubic = 0.7 (constant) */ |
| 38 | |
| 39 | /* CUBIC C constant that determines the aggressiveness of CUBIC in competing |
| 40 | * with other congestion control algorithms in high-BDP networks. |
| 41 | */ |
| 42 | #define CUBIC_C_SCALED 410 /* RFC 9438 C = 0.4 segment/seconds^3 |
| 43 | * or 410 mB/s^3 in this implementation. |
| 44 | */ |
| 45 | |
| 46 | /* The scaled value of 1 */ |
| 47 | #define CUBIC_ONE_SCALED (1 << CUBIC_SCALE_FACTOR_SHIFT) |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 48 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 49 | /* The maximum time value which may be cubed and multiplied by CUBIC_C_SCALED */ |
| 50 | #define CUBIC_TIME_LIMIT 355535ULL /* ms */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 51 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 52 | /* By connection CUBIC algorithm state. Note that the current congestion window |
| 53 | * value is not stored in this structure. |
| 54 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 55 | struct cubic { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 56 | /* QUIC_CC_ST_* state values. */ |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 57 | uint32_t state; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 58 | /* Slow start threshold (in bytes) */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 59 | uint32_t ssthresh; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 60 | /* Remaining number of acknowledged bytes between two ACK for CUBIC congestion |
| 61 | * control window (in bytes). |
| 62 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 63 | uint32_t remaining_inc; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 64 | /* Start time of at which the current avoidance stage started (in ms). */ |
| 65 | uint32_t t_epoch; |
| 66 | /* The window to reach for each recovery period during a concave region (in bytes). */ |
| 67 | uint32_t W_target; |
| 68 | /* The time period to reach W_target during a concave region (in ms). */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 69 | uint32_t K; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 70 | /* The last window maximum reached (in bytes). */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 71 | uint32_t last_w_max; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 72 | /* Estimated value of the Reno congestion window in the TCP-friendly region (in bytes). */ |
| 73 | uint32_t W_est; |
| 74 | /* Remaining number of acknowledged bytes between two ACKs for estimated |
| 75 | * TCP-Reno congestion control window (in bytes). |
| 76 | */ |
| 77 | uint32_t remaining_W_est_inc; |
| 78 | /* Start time of recovery period (used to avoid re-entering this state, if already |
| 79 | * in recovery period) (in ms). |
| 80 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 81 | uint32_t recovery_start_time; |
| 82 | }; |
| 83 | |
| 84 | static void quic_cc_cubic_reset(struct quic_cc *cc) |
| 85 | { |
| 86 | struct cubic *c = quic_cc_priv(cc); |
| 87 | |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 88 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 89 | c->state = QUIC_CC_ST_SS; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 90 | c->ssthresh = QUIC_CC_INFINITE_SSTHESH; |
| 91 | c->remaining_inc = 0; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 92 | c->remaining_W_est_inc = 0; |
| 93 | c->t_epoch = 0; |
| 94 | c->W_target = 0; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 95 | c->K = 0; |
| 96 | c->last_w_max = 0; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 97 | c->W_est = 0; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 98 | c->recovery_start_time = 0; |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 99 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 100 | } |
| 101 | |
| 102 | static int quic_cc_cubic_init(struct quic_cc *cc) |
| 103 | { |
| 104 | quic_cc_cubic_reset(cc); |
| 105 | return 1; |
| 106 | } |
| 107 | |
| 108 | /* Cubic root. |
| 109 | * Highly inspired from Linux kernel sources. |
| 110 | * See net/ipv4/tcp_cubic.c |
| 111 | */ |
| 112 | static uint32_t cubic_root(uint64_t val) |
| 113 | { |
| 114 | uint32_t x, b, shift; |
| 115 | |
| 116 | static const uint8_t v[] = { |
| 117 | 0, 54, 54, 54, 118, 118, 118, 118, |
| 118 | 123, 129, 134, 138, 143, 147, 151, 156, |
| 119 | 157, 161, 164, 168, 170, 173, 176, 179, |
| 120 | 181, 185, 187, 190, 192, 194, 197, 199, |
| 121 | 200, 202, 204, 206, 209, 211, 213, 215, |
| 122 | 217, 219, 221, 222, 224, 225, 227, 229, |
| 123 | 231, 232, 234, 236, 237, 239, 240, 242, |
| 124 | 244, 245, 246, 248, 250, 251, 252, 254, |
| 125 | }; |
| 126 | |
Frédéric Lécaille | 2c77a5e | 2022-08-03 12:49:30 +0200 | [diff] [blame] | 127 | if (!val || (b = my_flsl(val)) < 7) { |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 128 | /* val in [0..63] */ |
| 129 | return ((uint32_t)v[(uint32_t)val] + 35) >> 6; |
| 130 | } |
| 131 | |
| 132 | b = ((b * 84) >> 8) - 1; |
| 133 | shift = (val >> (b * 3)); |
| 134 | |
| 135 | x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6; |
| 136 | |
| 137 | x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1))); |
| 138 | x = ((x * 341) >> 10); |
| 139 | |
| 140 | return x; |
| 141 | } |
| 142 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 143 | /* |
| 144 | * RFC 9438 3.1. Principle 1 for the CUBIC Increase Function |
| 145 | * |
| 146 | * For better network utilization and stability, CUBIC [HRX08] uses a cubic |
| 147 | * window increase function in terms of the elapsed time from the last |
| 148 | * congestion event. While most congestion control algorithms that provide |
| 149 | * alternatives to Reno increase the congestion window using convex functions, |
| 150 | * CUBIC uses both the concave and convex profiles of a cubic function for |
| 151 | * window growth. |
| 152 | * |
| 153 | * After a window reduction in response to a congestion event detected by |
| 154 | * duplicate acknowledgments (ACKs), Explicit Congestion Notification-Echo |
| 155 | * (ECN-Echo (ECE)) ACKs [RFC3168], RACK-TLP for TCP [RFC8985], or QUIC loss |
| 156 | * detection [RFC9002], CUBIC remembers the congestion window size at which it |
| 157 | * received the congestion event and performs a multiplicative decrease of the |
| 158 | * congestion window. When CUBIC enters into congestion avoidance, it starts to |
| 159 | * increase the congestion window using the concave profile of the cubic |
| 160 | * function. The cubic function is set to have its plateau at the remembered |
| 161 | * congestion window size, so that the concave window increase continues until |
| 162 | * then. After that, the cubic function turns into a convex profile and the |
| 163 | * convex window increase begins. |
| 164 | * |
| 165 | * W_cubic(time) (bytes) |
| 166 | * ^ convex region |
| 167 | * | <-------------------------> |
| 168 | * | . + |
| 169 | * | . + |
| 170 | * | . + |
| 171 | * | . + |
| 172 | * | . + ^ |
| 173 | * | . + | W_cubic_t |
| 174 | * | . + | |
| 175 | * | . + | |
| 176 | * W_target |-----------+--------------------------+------------------------+ |
| 177 | * (W_max) | +. + . t |
| 178 | * | + . + . |
| 179 | * | + . + . |
| 180 | * | + . + . |
| 181 | * | + . + . |
| 182 | * | .+ . |
| 183 | * | + . |
| 184 | * | + . |
| 185 | * | + . |
| 186 | * | . . |
| 187 | * | . . |
| 188 | * | . . |
| 189 | * +-----------+--------------------------+-+------------------------> time (s) |
| 190 | * 0 t_epoch (t_epoch + K) |
| 191 | * <--------------------------> |
| 192 | * . concave region |
| 193 | * . |
| 194 | * congestion |
| 195 | * event |
| 196 | * |
| 197 | * RFC 9438 4.2. Window Increase Function: |
| 198 | * |
| 199 | * W_cubic(t) = C*(t-K)^3 + W_max (Figure 1) |
| 200 | * K = cubic_root((W_max - cwnd_epoch)/C) (Figure 2) |
| 201 | * |
| 202 | * +--------------------------------------------------------------------+ |
| 203 | * | RFC 9438 definitions | Code variables | |
| 204 | * +--------------------------------------------------------------------+ |
| 205 | * | C (segments/s^3) | CUBIC_C_SCALED (mB/s^3) | |
| 206 | * +--------------------------------------------------------------------+ |
| 207 | * | W_max (segments) | c->last_w_max - path->cwnd (bytes) | |
| 208 | * +--------------------------------------------------------------------+ |
| 209 | * | K (s) | c->K (ms) | |
| 210 | * +--------------------------------------------------------------------+ |
| 211 | * | beta_cubic (constant) | CUBIC_BETA_SCALED (constant) | |
| 212 | * +--------------------------------------------------------------------+ |
| 213 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 214 | static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked) |
| 215 | { |
| 216 | struct cubic *c = quic_cc_priv(cc); |
| 217 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 218 | /* The elapsed time since the start of the congestion event. */ |
| 219 | uint32_t elapsed_time; |
| 220 | /* Target value of the congestion window. */ |
| 221 | uint32_t target; |
| 222 | /* The time at which the congestion window will be computed based |
| 223 | * on the cubic increase function. |
| 224 | */ |
| 225 | uint64_t t; |
| 226 | /* The computed value of the congestion window at time t based on the cubic |
| 227 | * increase function. |
| 228 | */ |
| 229 | uint64_t W_cubic_t; |
| 230 | uint32_t inc, inc_diff; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 231 | |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 232 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 233 | if (!c->t_epoch) { |
| 234 | c->t_epoch = now_ms; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 235 | if (c->last_w_max <= path->cwnd) { |
| 236 | c->K = 0; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 237 | c->W_target = path->cwnd; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 238 | } |
| 239 | else { |
Frederic Lecaille | 33a6f33 | 2024-07-24 16:16:26 +0200 | [diff] [blame] | 240 | uint64_t wnd_diff; |
| 241 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 242 | /* K value computing (in seconds): |
| 243 | * K = cubic_root((W_max - cwnd_epoch)/C) (Figure 2) |
Frederic Lecaille | 33a6f33 | 2024-07-24 16:16:26 +0200 | [diff] [blame] | 244 | * Note that K is stored in milliseconds and that |
| 245 | * 8000 * 125000 = 1000^3. |
| 246 | * |
| 247 | * Supporting 2^40 windows, shifted by 10, leaves ~13 bits of unused |
| 248 | * precision. We exploit this precision for our NS conversion by |
| 249 | * multiplying by 8000 without overflowing, then later by 125000 |
| 250 | * after the divide so that we limit the precision loss to the minimum |
| 251 | * before the cubic_root() call." |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 252 | */ |
Frederic Lecaille | 33a6f33 | 2024-07-24 16:16:26 +0200 | [diff] [blame] | 253 | wnd_diff = (c->last_w_max - path->cwnd) << CUBIC_SCALE_FACTOR_SHIFT; |
| 254 | wnd_diff *= 8000ULL; |
| 255 | wnd_diff /= CUBIC_C_SCALED * path->mtu; |
| 256 | wnd_diff *= 125000ULL; |
| 257 | c->K = cubic_root(wnd_diff); |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 258 | c->W_target = c->last_w_max; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 259 | } |
| 260 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 261 | c->W_est = path->cwnd; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 262 | c->remaining_inc = 0; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 263 | c->remaining_W_est_inc = 0; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 264 | } |
| 265 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 266 | elapsed_time = now_ms + path->loss.rtt_min - c->t_epoch; |
| 267 | if (elapsed_time < c->K) { |
| 268 | t = c->K - elapsed_time; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 269 | } |
| 270 | else { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 271 | t = elapsed_time - c->K; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 272 | } |
| 273 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 274 | if (t > CUBIC_TIME_LIMIT) { |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 275 | /* TODO : should not happen if we handle the case |
| 276 | * of very late acks receipt. This must be handled as a congestion |
| 277 | * control event: a very late ack should trigger a congestion |
| 278 | * control algorithm reset. |
| 279 | */ |
| 280 | quic_cc_cubic_reset(cc); |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 281 | goto leave; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 282 | } |
| 283 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 284 | /* Compute W_cubic_t at t time. */ |
Frederic Lecaille | ea349c7 | 2024-01-31 18:21:34 +0100 | [diff] [blame] | 285 | W_cubic_t = CUBIC_C_SCALED * path->mtu; |
Frederic Lecaille | c34e99a | 2024-02-06 18:30:08 +0100 | [diff] [blame] | 286 | W_cubic_t = (W_cubic_t * t) / 1000; |
| 287 | W_cubic_t = (W_cubic_t * t) / 1000; |
| 288 | W_cubic_t = (W_cubic_t * t) / 1000; |
Frederic Lecaille | ea349c7 | 2024-01-31 18:21:34 +0100 | [diff] [blame] | 289 | W_cubic_t >>= CUBIC_SCALE_FACTOR_SHIFT; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 290 | if (elapsed_time < c->K) |
| 291 | target = c->W_target - W_cubic_t; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 292 | else |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 293 | target = c->W_target + W_cubic_t; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 294 | |
| 295 | if (target > path->cwnd) { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 296 | /* Concave region */ |
| 297 | |
| 298 | /* RFC 9438 4.4. Concave Region |
| 299 | * |
| 300 | * When receiving a new ACK in congestion avoidance, if CUBIC is not in |
| 301 | * the Reno-friendly region and cwnd is less than Wmax, then CUBIC is |
| 302 | * in the concave region. In this region, cwnd MUST be incremented by |
| 303 | * (target - cwnd) / cwnd. |
| 304 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 305 | inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd); |
| 306 | c->remaining_inc = inc_diff % path->cwnd; |
| 307 | inc = inc_diff / path->cwnd; |
| 308 | } |
| 309 | else { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 310 | /* Convex region: very small increment */ |
| 311 | |
| 312 | /* RFC 9438 4.5. Convex Region |
| 313 | * |
| 314 | * When receiving a new ACK in congestion avoidance, if CUBIC is not in |
| 315 | * the Reno-friendly region and cwnd is larger than or equal to Wmax, |
| 316 | * then CUBIC is in the convex region.The convex region indicates that |
| 317 | * the network conditions might have changed since the last congestion |
| 318 | * event, possibly implying more available bandwidth after some flow |
| 319 | * departures. Since the Internet is highly asynchronous, some amount |
| 320 | * of perturbation is always possible without causing a major change in |
| 321 | * available bandwidth.Unless the cwnd is overridden by the AIMD window |
| 322 | * increase, CUBIC will behave cautiously when operating in this region. |
| 323 | * The convex profile aims to increase the window very slowly at the |
| 324 | * beginning when cwnd is around Wmax and then gradually increases its |
| 325 | * rate of increase. This region is also called the "maximum probing |
| 326 | * phase", since CUBIC is searching for a new Wmax. In this region, |
| 327 | * cwnd MUST be incremented by (target - cwnd) / cwnd) for each received |
| 328 | * new ACK, where target is calculated as described in Section 4.2. |
| 329 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 330 | inc_diff = c->remaining_inc + path->mtu; |
| 331 | c->remaining_inc = inc_diff % (100 * path->cwnd); |
| 332 | inc = inc_diff / (100 * path->cwnd); |
| 333 | } |
| 334 | |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 335 | inc_diff = c->remaining_W_est_inc + path->mtu * acked; |
| 336 | c->W_est += inc_diff / path->cwnd; |
| 337 | c->remaining_W_est_inc = inc_diff % path->cwnd; |
| 338 | |
| 339 | /* TCP friendliness : |
| 340 | * RFC 9438 4.3. Reno-Friendly Region |
| 341 | * |
| 342 | * Reno performs well in certain types of networks -- for example, under |
| 343 | * short RTTs and small bandwidths (or small BDPs). In these networks, |
| 344 | * CUBIC remains in the Reno-friendly region to achieve at least the same |
| 345 | * throughput as Reno. |
| 346 | * |
| 347 | * When receiving a new ACK in congestion avoidance (where cwnd could be |
| 348 | * greater than or less than Wmax), CUBIC checks whether Wcubic(t) is less |
| 349 | * than West. If so, CUBIC is in the Reno-friendly region and cwnd SHOULD |
| 350 | * be set to West at each reception of a new ACK. |
| 351 | * |
| 352 | * West is set equal to cwnd_epoch at the start of the congestion avoidance |
| 353 | * stage. After that, on every new ACK, West is updated using Figure 4. |
| 354 | * Note that this equation uses segments_acked and cwnd is measured in |
| 355 | * segments. An implementation that measures cwnd in bytes should adjust the |
| 356 | * equation accordingly using the number of acknowledged bytes and the SMSS. |
| 357 | * Also note that this equation works for connections with enabled or |
| 358 | * disabled delayed ACKs [RFC5681], as segments_acked will be different based |
| 359 | * on the segments actually acknowledged by a new ACK. |
| 360 | * |
| 361 | * Figure 4 : West = West + alpha_cubic * (segments_acked / cwnd) |
| 362 | * |
| 363 | * Once West has grown to reach the cwnd at the time of most recently |
| 364 | * setting ssthresh -- that is, West >= cwndprior -- the sender SHOULD set |
| 365 | * alpha_cubic to 1 to ensure that it can achieve the same congestion window |
| 366 | * increment rate as Reno, which uses AIMD(1, 0.5). |
| 367 | */ |
| 368 | if (c->W_est > path->cwnd) { |
| 369 | uint32_t W_est_inc = path->mtu * (c->W_est - path->cwnd) / path->cwnd; |
| 370 | if (W_est_inc > inc) |
| 371 | inc = W_est_inc; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 372 | } |
| 373 | |
| 374 | path->cwnd += inc; |
Frédéric Lécaille | fad0e6c | 2023-04-06 10:19:17 +0200 | [diff] [blame] | 375 | path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 376 | leave: |
| 377 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 378 | } |
| 379 | |
| 380 | static void quic_cc_cubic_slow_start(struct quic_cc *cc) |
| 381 | { |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 382 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 383 | quic_cc_cubic_reset(cc); |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 384 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 385 | } |
| 386 | |
| 387 | static void quic_enter_recovery(struct quic_cc *cc) |
| 388 | { |
| 389 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
| 390 | struct cubic *c = quic_cc_priv(cc); |
| 391 | /* Current cwnd as number of packets */ |
| 392 | |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 393 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 394 | c->t_epoch = 0; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 395 | c->recovery_start_time = now_ms; |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 396 | |
| 397 | /* RFC 9438 4.7. Fast Convergence |
| 398 | * |
| 399 | * To improve convergence speed, CUBIC uses a heuristic. When a new flow |
| 400 | * joins the network, existing flows need to give up some of their bandwidth |
| 401 | * to allow the new flow some room for growth if the existing flows have |
| 402 | * been using all the network bandwidth. To speed up this bandwidth release |
| 403 | * by existing flows, the following fast convergence mechanism SHOULD be |
| 404 | * implemented.With fast convergence, when a congestion event occurs, Wmax |
| 405 | * is updated as follows, before the window reduction described in Section |
| 406 | * 4.6. |
| 407 | * |
| 408 | * if cwnd < Wmax and fast convergence enabled, further reduce Wax: |
| 409 | * Wmax = cwnd * (1 + beta_cubic) |
| 410 | * otherwise, remember cwn before reduction: |
| 411 | * Wmax = cwnd |
| 412 | */ |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 413 | if (path->cwnd < c->last_w_max) { |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 414 | /* (1 + beta_cubic) * path->cwnd / 2 */ |
| 415 | c->last_w_max = (path->cwnd * (CUBIC_ONE_SCALED + CUBIC_BETA_SCALED) / 2) >> CUBIC_SCALE_FACTOR_SHIFT; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 416 | } |
| 417 | else { |
| 418 | c->last_w_max = path->cwnd; |
| 419 | } |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 420 | |
| 421 | c->ssthresh = (CUBIC_BETA_SCALED * path->cwnd) >> CUBIC_SCALE_FACTOR_SHIFT; |
Frédéric Lécaille | 595251f | 2023-04-13 10:46:42 +0200 | [diff] [blame] | 422 | path->cwnd = QUIC_MAX(c->ssthresh, (uint32_t)path->min_cwnd); |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 423 | c->state = QUIC_CC_ST_RP; |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 424 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 425 | } |
| 426 | |
| 427 | /* Congestion slow-start callback. */ |
| 428 | static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| 429 | { |
| 430 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
| 431 | struct cubic *c = quic_cc_priv(cc); |
| 432 | |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 433 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| 434 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 435 | switch (ev->type) { |
| 436 | case QUIC_CC_EVT_ACK: |
Frédéric Lécaille | db54847 | 2023-04-02 10:07:48 +0200 | [diff] [blame] | 437 | if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) |
| 438 | path->cwnd += ev->ack.acked; |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 439 | /* Exit to congestion avoidance if slow start threshold is reached. */ |
| 440 | if (path->cwnd >= c->ssthresh) |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 441 | c->state = QUIC_CC_ST_CA; |
Frédéric Lécaille | fad0e6c | 2023-04-06 10:19:17 +0200 | [diff] [blame] | 442 | path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 443 | break; |
| 444 | |
| 445 | case QUIC_CC_EVT_LOSS: |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 446 | quic_enter_recovery(cc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 447 | break; |
| 448 | |
| 449 | case QUIC_CC_EVT_ECN_CE: |
| 450 | /* TODO */ |
| 451 | break; |
| 452 | } |
| 453 | |
| 454 | out: |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 455 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| 456 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 457 | } |
| 458 | |
| 459 | /* Congestion avoidance callback. */ |
| 460 | static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| 461 | { |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 462 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| 463 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 464 | switch (ev->type) { |
| 465 | case QUIC_CC_EVT_ACK: |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 466 | quic_cubic_update(cc, ev->ack.acked); |
| 467 | break; |
| 468 | case QUIC_CC_EVT_LOSS: |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 469 | quic_enter_recovery(cc); |
| 470 | break; |
| 471 | case QUIC_CC_EVT_ECN_CE: |
| 472 | /* TODO */ |
| 473 | break; |
| 474 | } |
| 475 | |
| 476 | out: |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 477 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
Frédéric Lécaille | 23b8eef | 2023-03-31 20:18:44 +0200 | [diff] [blame] | 478 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 479 | } |
| 480 | |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 481 | /* Recovery period callback */ |
| 482 | static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| 483 | { |
| 484 | struct cubic *c = quic_cc_priv(cc); |
| 485 | |
| 486 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev); |
Frédéric Lécaille | de2ba86 | 2023-04-02 10:49:35 +0200 | [diff] [blame] | 487 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev, cc); |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 488 | |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 489 | switch (ev->type) { |
| 490 | case QUIC_CC_EVT_ACK: |
Frederic Lecaille | 569a025 | 2024-01-30 11:40:41 +0100 | [diff] [blame] | 491 | /* RFC 9002 7.3.2. Recovery |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 492 | * A recovery period ends and the sender enters congestion avoidance when a |
| 493 | * packet sent during the recovery period is acknowledged. |
| 494 | */ |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 495 | if (tick_is_le(ev->ack.time_sent, c->recovery_start_time)) { |
| 496 | TRACE_PROTO("CC cubic (still in recov. period)", QUIC_EV_CONN_CC, cc->qc); |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 497 | goto leave; |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 498 | } |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 499 | |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 500 | c->state = QUIC_CC_ST_CA; |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 501 | c->recovery_start_time = TICK_ETERNITY; |
| 502 | break; |
| 503 | case QUIC_CC_EVT_LOSS: |
| 504 | break; |
| 505 | case QUIC_CC_EVT_ECN_CE: |
| 506 | /* TODO */ |
| 507 | break; |
| 508 | } |
| 509 | |
| 510 | leave: |
Frédéric Lécaille | 8f99194 | 2023-03-24 15:14:45 +0100 | [diff] [blame] | 511 | TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 512 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| 513 | } |
| 514 | |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 515 | static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc, |
| 516 | struct quic_cc_event *ev) = { |
| 517 | [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb, |
| 518 | [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb, |
Frédéric Lécaille | d724331 | 2023-03-21 11:54:02 +0100 | [diff] [blame] | 519 | [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb, |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 520 | }; |
| 521 | |
| 522 | static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev) |
| 523 | { |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 524 | struct cubic *c = quic_cc_priv(cc); |
| 525 | |
| 526 | return quic_cc_cubic_state_cbs[c->state](cc, ev); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 527 | } |
| 528 | |
| 529 | static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc) |
| 530 | { |
Frédéric Lécaille | 01314b8 | 2023-03-24 14:09:55 +0100 | [diff] [blame] | 531 | struct quic_path *path; |
| 532 | struct cubic *c = quic_cc_priv(cc); |
| 533 | |
| 534 | path = container_of(cc, struct quic_path, cc); |
Frédéric Lécaille | fad0e6c | 2023-04-06 10:19:17 +0200 | [diff] [blame] | 535 | chunk_appendf(buf, " state=%s cwnd=%llu mcwnd=%llu ssthresh=%d rpst=%dms", |
Frédéric Lécaille | 7d6270a | 2023-04-02 12:43:22 +0200 | [diff] [blame] | 536 | quic_cc_state_str(c->state), |
Frédéric Lécaille | 01314b8 | 2023-03-24 14:09:55 +0100 | [diff] [blame] | 537 | (unsigned long long)path->cwnd, |
Frédéric Lécaille | fad0e6c | 2023-04-06 10:19:17 +0200 | [diff] [blame] | 538 | (unsigned long long)path->mcwnd, |
Frédéric Lécaille | 01314b8 | 2023-03-24 14:09:55 +0100 | [diff] [blame] | 539 | (int)c->ssthresh, |
| 540 | !tick_isset(c->recovery_start_time) ? -1 : |
| 541 | TICKS_TO_MS(tick_remain(c->recovery_start_time, now_ms))); |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 542 | } |
| 543 | |
| 544 | struct quic_cc_algo quic_cc_algo_cubic = { |
| 545 | .type = QUIC_CC_ALGO_TP_CUBIC, |
| 546 | .init = quic_cc_cubic_init, |
| 547 | .event = quic_cc_cubic_event, |
| 548 | .slow_start = quic_cc_cubic_slow_start, |
| 549 | .state_trace = quic_cc_cubic_state_trace, |
| 550 | }; |