Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 1 | #include <haproxy/trace.h> |
| 2 | #include <haproxy/quic_cc.h> |
| 3 | |
| 4 | /* This source file is highly inspired from Linux kernel source file |
| 5 | * implementation for TCP Cubic. In fact, we have no choice if we do |
| 6 | * not want to use any floating point operations to be fast! |
| 7 | * (See net/ipv4/tcp_cubic.c) |
| 8 | */ |
| 9 | #define TRACE_SOURCE &trace_quic |
| 10 | |
| 11 | #define CUBIC_BETA_SCALE 1024 |
| 12 | #define CUBIC_BETA_SCALE_SHIFT 10 |
| 13 | /* beta = 0.7 ; C = 0.4 */ |
| 14 | #define CUBIC_BETA 717 /* CUBIC_BETA / CUBIC_BETA_SCALE = 0.7 */ |
| 15 | #define CUBIC_C 410 /* CUBIC_C / CUBIC_BETA_SCALE = 0.4 */ |
| 16 | |
| 17 | #define CUBIC_BETA_SCALE_FACTOR_SHIFT (3 * CUBIC_BETA_SCALE_SHIFT) |
| 18 | #define TIME_SCALE_FACTOR_SHIFT 10 |
| 19 | |
| 20 | /* The maximum value which may be cubed an multiplied by CUBIC_BETA */ |
| 21 | #define CUBIC_DIFF_TIME_LIMIT 355535ULL /* ms */ |
| 22 | |
| 23 | /* K cube factor: (1 - beta) / c */ |
| 24 | struct cubic { |
| 25 | uint32_t ssthresh; |
| 26 | uint32_t remaining_inc; |
| 27 | uint32_t remaining_tcp_inc; |
| 28 | uint32_t epoch_start; |
| 29 | uint32_t origin_point; |
| 30 | uint32_t K; |
| 31 | uint32_t last_w_max; |
| 32 | uint32_t tcp_wnd; |
| 33 | uint32_t recovery_start_time; |
| 34 | }; |
| 35 | |
| 36 | static void quic_cc_cubic_reset(struct quic_cc *cc) |
| 37 | { |
| 38 | struct cubic *c = quic_cc_priv(cc); |
| 39 | |
| 40 | cc->algo->state = QUIC_CC_ST_SS; |
| 41 | |
| 42 | c->ssthresh = QUIC_CC_INFINITE_SSTHESH; |
| 43 | c->remaining_inc = 0; |
| 44 | c->remaining_tcp_inc = 0; |
| 45 | c->epoch_start = 0; |
| 46 | c->origin_point = 0; |
| 47 | c->K = 0; |
| 48 | c->last_w_max = 0; |
| 49 | c->tcp_wnd = 0; |
| 50 | c->recovery_start_time = 0; |
| 51 | } |
| 52 | |
| 53 | static int quic_cc_cubic_init(struct quic_cc *cc) |
| 54 | { |
| 55 | quic_cc_cubic_reset(cc); |
| 56 | return 1; |
| 57 | } |
| 58 | |
| 59 | /* Cubic root. |
| 60 | * Highly inspired from Linux kernel sources. |
| 61 | * See net/ipv4/tcp_cubic.c |
| 62 | */ |
| 63 | static uint32_t cubic_root(uint64_t val) |
| 64 | { |
| 65 | uint32_t x, b, shift; |
| 66 | |
| 67 | static const uint8_t v[] = { |
| 68 | 0, 54, 54, 54, 118, 118, 118, 118, |
| 69 | 123, 129, 134, 138, 143, 147, 151, 156, |
| 70 | 157, 161, 164, 168, 170, 173, 176, 179, |
| 71 | 181, 185, 187, 190, 192, 194, 197, 199, |
| 72 | 200, 202, 204, 206, 209, 211, 213, 215, |
| 73 | 217, 219, 221, 222, 224, 225, 227, 229, |
| 74 | 231, 232, 234, 236, 237, 239, 240, 242, |
| 75 | 244, 245, 246, 248, 250, 251, 252, 254, |
| 76 | }; |
| 77 | |
Frédéric Lécaille | 2c77a5e | 2022-08-03 12:49:30 +0200 | [diff] [blame] | 78 | if (!val || (b = my_flsl(val)) < 7) { |
Frédéric Lécaille | 1c9c2f6 | 2022-06-01 15:06:58 +0200 | [diff] [blame] | 79 | /* val in [0..63] */ |
| 80 | return ((uint32_t)v[(uint32_t)val] + 35) >> 6; |
| 81 | } |
| 82 | |
| 83 | b = ((b * 84) >> 8) - 1; |
| 84 | shift = (val >> (b * 3)); |
| 85 | |
| 86 | x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6; |
| 87 | |
| 88 | x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1))); |
| 89 | x = ((x * 341) >> 10); |
| 90 | |
| 91 | return x; |
| 92 | } |
| 93 | |
| 94 | static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked) |
| 95 | { |
| 96 | struct cubic *c = quic_cc_priv(cc); |
| 97 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
| 98 | /* Current cwnd as number of packets */ |
| 99 | uint32_t t, target, inc, inc_diff; |
| 100 | uint64_t delta, diff; |
| 101 | |
| 102 | if (!c->epoch_start) { |
| 103 | c->epoch_start = now_ms; |
| 104 | if (c->last_w_max <= path->cwnd) { |
| 105 | c->K = 0; |
| 106 | c->origin_point = path->cwnd; |
| 107 | } |
| 108 | else { |
| 109 | /* K = cubic_root((1 - beta) * W_max / C) */ |
| 110 | c->K = cubic_root((c->last_w_max - path->cwnd) * |
| 111 | (CUBIC_BETA_SCALE - CUBIC_BETA) / CUBIC_C / path->mtu) << TIME_SCALE_FACTOR_SHIFT; |
| 112 | c->origin_point = c->last_w_max; |
| 113 | } |
| 114 | |
| 115 | c->tcp_wnd = path->cwnd; |
| 116 | c->remaining_inc = 0; |
| 117 | c->remaining_tcp_inc = 0; |
| 118 | } |
| 119 | |
| 120 | t = now_ms + path->loss.rtt_min - c->epoch_start; |
| 121 | if (t < c->K) { |
| 122 | diff = c->K - t; |
| 123 | } |
| 124 | else { |
| 125 | diff = t - c->K; |
| 126 | } |
| 127 | |
| 128 | if (diff > CUBIC_DIFF_TIME_LIMIT) { |
| 129 | /* TODO : should not happen if we handle the case |
| 130 | * of very late acks receipt. This must be handled as a congestion |
| 131 | * control event: a very late ack should trigger a congestion |
| 132 | * control algorithm reset. |
| 133 | */ |
| 134 | quic_cc_cubic_reset(cc); |
| 135 | return; |
| 136 | } |
| 137 | |
| 138 | delta = path->mtu * ((CUBIC_C * diff * diff * diff) >> (10 + 3 * TIME_SCALE_FACTOR_SHIFT)); |
| 139 | if (t < c->K) |
| 140 | target = c->origin_point - delta; |
| 141 | else |
| 142 | target = c->origin_point + delta; |
| 143 | |
| 144 | if (target > path->cwnd) { |
| 145 | inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd); |
| 146 | c->remaining_inc = inc_diff % path->cwnd; |
| 147 | inc = inc_diff / path->cwnd; |
| 148 | } |
| 149 | else { |
| 150 | /* small increment */ |
| 151 | inc_diff = c->remaining_inc + path->mtu; |
| 152 | c->remaining_inc = inc_diff % (100 * path->cwnd); |
| 153 | inc = inc_diff / (100 * path->cwnd); |
| 154 | } |
| 155 | |
| 156 | inc_diff = c->remaining_tcp_inc + path->mtu * acked; |
| 157 | c->tcp_wnd += inc_diff / path->cwnd; |
| 158 | c->remaining_tcp_inc = inc_diff % path->cwnd; |
| 159 | /* TCP friendliness */ |
| 160 | if (c->tcp_wnd > path->cwnd) { |
| 161 | uint32_t tcp_inc = path->mtu * (c->tcp_wnd - path->cwnd) / path->cwnd; |
| 162 | if (tcp_inc > inc) |
| 163 | inc = tcp_inc; |
| 164 | } |
| 165 | |
| 166 | path->cwnd += inc; |
| 167 | } |
| 168 | |
| 169 | static void quic_cc_cubic_slow_start(struct quic_cc *cc) |
| 170 | { |
| 171 | quic_cc_cubic_reset(cc); |
| 172 | } |
| 173 | |
| 174 | static void quic_enter_recovery(struct quic_cc *cc) |
| 175 | { |
| 176 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
| 177 | struct cubic *c = quic_cc_priv(cc); |
| 178 | /* Current cwnd as number of packets */ |
| 179 | |
| 180 | c->epoch_start = 0; |
| 181 | c->recovery_start_time = now_ms; |
| 182 | /* Fast convergence */ |
| 183 | if (path->cwnd < c->last_w_max) { |
| 184 | /* (1 + beta) * path->cwnd / 2 */ |
| 185 | c->last_w_max = (path->cwnd * (CUBIC_BETA_SCALE + CUBIC_BETA) / 2) >> CUBIC_BETA_SCALE_SHIFT; |
| 186 | } |
| 187 | else { |
| 188 | c->last_w_max = path->cwnd; |
| 189 | } |
| 190 | path->cwnd = (CUBIC_BETA * path->cwnd) >> CUBIC_BETA_SCALE_SHIFT; |
| 191 | c->ssthresh = QUIC_MAX(path->cwnd, path->min_cwnd); |
| 192 | } |
| 193 | |
| 194 | /* Congestion slow-start callback. */ |
| 195 | static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| 196 | { |
| 197 | struct quic_path *path = container_of(cc, struct quic_path, cc); |
| 198 | struct cubic *c = quic_cc_priv(cc); |
| 199 | |
| 200 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev); |
| 201 | switch (ev->type) { |
| 202 | case QUIC_CC_EVT_ACK: |
| 203 | /* Do not increase the congestion window in recovery period. */ |
| 204 | if (ev->ack.time_sent <= c->recovery_start_time) |
| 205 | goto out; |
| 206 | |
| 207 | path->cwnd += ev->ack.acked; |
| 208 | /* Exit to congestion avoidance if slow start threshold is reached. */ |
| 209 | if (path->cwnd >= c->ssthresh) |
| 210 | cc->algo->state = QUIC_CC_ST_CA; |
| 211 | break; |
| 212 | |
| 213 | case QUIC_CC_EVT_LOSS: |
| 214 | /* Do not decrease the congestion window when already in recovery period. */ |
| 215 | if (ev->loss.time_sent <= c->recovery_start_time) |
| 216 | goto out; |
| 217 | |
| 218 | quic_enter_recovery(cc); |
| 219 | /* Exit to congestion avoidance. */ |
| 220 | cc->algo->state = QUIC_CC_ST_CA; |
| 221 | break; |
| 222 | |
| 223 | case QUIC_CC_EVT_ECN_CE: |
| 224 | /* TODO */ |
| 225 | break; |
| 226 | } |
| 227 | |
| 228 | out: |
| 229 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| 230 | } |
| 231 | |
| 232 | /* Congestion avoidance callback. */ |
| 233 | static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| 234 | { |
| 235 | struct cubic *c = quic_cc_priv(cc); |
| 236 | |
| 237 | TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev); |
| 238 | switch (ev->type) { |
| 239 | case QUIC_CC_EVT_ACK: |
| 240 | /* Do not increase the congestion window when already in recovery period. */ |
| 241 | if (ev->ack.time_sent <= c->recovery_start_time) |
| 242 | goto out; |
| 243 | |
| 244 | quic_cubic_update(cc, ev->ack.acked); |
| 245 | break; |
| 246 | case QUIC_CC_EVT_LOSS: |
| 247 | /* Do not decrease the congestion window when already in recovery period. */ |
| 248 | if (ev->loss.time_sent <= c->recovery_start_time) |
| 249 | goto out; |
| 250 | |
| 251 | quic_enter_recovery(cc); |
| 252 | break; |
| 253 | case QUIC_CC_EVT_ECN_CE: |
| 254 | /* TODO */ |
| 255 | break; |
| 256 | } |
| 257 | |
| 258 | out: |
| 259 | TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| 260 | } |
| 261 | |
| 262 | static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc, |
| 263 | struct quic_cc_event *ev) = { |
| 264 | [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb, |
| 265 | [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb, |
| 266 | }; |
| 267 | |
| 268 | static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev) |
| 269 | { |
| 270 | return quic_cc_cubic_state_cbs[cc->algo->state](cc, ev); |
| 271 | } |
| 272 | |
| 273 | static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc) |
| 274 | { |
| 275 | } |
| 276 | |
| 277 | struct quic_cc_algo quic_cc_algo_cubic = { |
| 278 | .type = QUIC_CC_ALGO_TP_CUBIC, |
| 279 | .init = quic_cc_cubic_init, |
| 280 | .event = quic_cc_cubic_event, |
| 281 | .slow_start = quic_cc_cubic_slow_start, |
| 282 | .state_trace = quic_cc_cubic_state_trace, |
| 283 | }; |