| #include <haproxy/quic_cc.h> |
| #include <haproxy/ticks.h> |
| #include <haproxy/trace.h> |
| |
| /* This source file is highly inspired from Linux kernel source file |
| * implementation for TCP Cubic. In fact, we have no choice if we do |
| * not want to use any floating point operations to be fast! |
| * (See net/ipv4/tcp_cubic.c) |
| */ |
| #define TRACE_SOURCE &trace_quic |
| |
| #define CUBIC_BETA_SCALE 1024 |
| #define CUBIC_BETA_SCALE_SHIFT 10 |
| /* beta = 0.7 ; C = 0.4 */ |
| #define CUBIC_BETA 717 /* CUBIC_BETA / CUBIC_BETA_SCALE = 0.7 */ |
| #define CUBIC_C 410 /* CUBIC_C / CUBIC_BETA_SCALE = 0.4 */ |
| |
| #define CUBIC_BETA_SCALE_FACTOR_SHIFT (3 * CUBIC_BETA_SCALE_SHIFT) |
| #define TIME_SCALE_FACTOR_SHIFT 10 |
| |
| /* The maximum value which may be cubed an multiplied by CUBIC_BETA */ |
| #define CUBIC_DIFF_TIME_LIMIT 355535ULL /* ms */ |
| |
| /* K cube factor: (1 - beta) / c */ |
| struct cubic { |
| uint32_t state; |
| uint32_t ssthresh; |
| uint32_t remaining_inc; |
| uint32_t remaining_tcp_inc; |
| uint32_t epoch_start; |
| uint32_t origin_point; |
| uint32_t K; |
| uint32_t last_w_max; |
| uint32_t tcp_wnd; |
| uint32_t recovery_start_time; |
| }; |
| |
| static void quic_cc_cubic_reset(struct quic_cc *cc) |
| { |
| struct cubic *c = quic_cc_priv(cc); |
| |
| TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| c->state = QUIC_CC_ST_SS; |
| c->ssthresh = QUIC_CC_INFINITE_SSTHESH; |
| c->remaining_inc = 0; |
| c->remaining_tcp_inc = 0; |
| c->epoch_start = 0; |
| c->origin_point = 0; |
| c->K = 0; |
| c->last_w_max = 0; |
| c->tcp_wnd = 0; |
| c->recovery_start_time = 0; |
| TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
| } |
| |
| static int quic_cc_cubic_init(struct quic_cc *cc) |
| { |
| quic_cc_cubic_reset(cc); |
| return 1; |
| } |
| |
| /* Cubic root. |
| * Highly inspired from Linux kernel sources. |
| * See net/ipv4/tcp_cubic.c |
| */ |
| static uint32_t cubic_root(uint64_t val) |
| { |
| uint32_t x, b, shift; |
| |
| static const uint8_t v[] = { |
| 0, 54, 54, 54, 118, 118, 118, 118, |
| 123, 129, 134, 138, 143, 147, 151, 156, |
| 157, 161, 164, 168, 170, 173, 176, 179, |
| 181, 185, 187, 190, 192, 194, 197, 199, |
| 200, 202, 204, 206, 209, 211, 213, 215, |
| 217, 219, 221, 222, 224, 225, 227, 229, |
| 231, 232, 234, 236, 237, 239, 240, 242, |
| 244, 245, 246, 248, 250, 251, 252, 254, |
| }; |
| |
| if (!val || (b = my_flsl(val)) < 7) { |
| /* val in [0..63] */ |
| return ((uint32_t)v[(uint32_t)val] + 35) >> 6; |
| } |
| |
| b = ((b * 84) >> 8) - 1; |
| shift = (val >> (b * 3)); |
| |
| x = ((uint32_t)(((uint32_t)v[shift] + 10) << b)) >> 6; |
| |
| x = 2 * x + (uint32_t)(val / ((uint64_t)x * (uint64_t)(x - 1))); |
| x = ((x * 341) >> 10); |
| |
| return x; |
| } |
| |
| static inline void quic_cubic_update(struct quic_cc *cc, uint32_t acked) |
| { |
| struct cubic *c = quic_cc_priv(cc); |
| struct quic_path *path = container_of(cc, struct quic_path, cc); |
| /* Current cwnd as number of packets */ |
| uint32_t t, target, inc, inc_diff; |
| uint64_t delta, diff; |
| |
| TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| if (!c->epoch_start) { |
| c->epoch_start = now_ms; |
| if (c->last_w_max <= path->cwnd) { |
| c->K = 0; |
| c->origin_point = path->cwnd; |
| } |
| else { |
| /* K = cubic_root((1 - beta) * W_max / C) */ |
| c->K = cubic_root((c->last_w_max - path->cwnd) * |
| (CUBIC_BETA_SCALE - CUBIC_BETA) / CUBIC_C / path->mtu) << TIME_SCALE_FACTOR_SHIFT; |
| c->origin_point = c->last_w_max; |
| } |
| |
| c->tcp_wnd = path->cwnd; |
| c->remaining_inc = 0; |
| c->remaining_tcp_inc = 0; |
| } |
| |
| t = now_ms + path->loss.rtt_min - c->epoch_start; |
| if (t < c->K) { |
| diff = c->K - t; |
| } |
| else { |
| diff = t - c->K; |
| } |
| |
| if (diff > CUBIC_DIFF_TIME_LIMIT) { |
| /* TODO : should not happen if we handle the case |
| * of very late acks receipt. This must be handled as a congestion |
| * control event: a very late ack should trigger a congestion |
| * control algorithm reset. |
| */ |
| quic_cc_cubic_reset(cc); |
| goto leave; |
| } |
| |
| delta = path->mtu * ((CUBIC_C * diff * diff * diff) >> (10 + 3 * TIME_SCALE_FACTOR_SHIFT)); |
| if (t < c->K) |
| target = c->origin_point - delta; |
| else |
| target = c->origin_point + delta; |
| |
| if (target > path->cwnd) { |
| inc_diff = c->remaining_inc + path->mtu * (target - path->cwnd); |
| c->remaining_inc = inc_diff % path->cwnd; |
| inc = inc_diff / path->cwnd; |
| } |
| else { |
| /* small increment */ |
| inc_diff = c->remaining_inc + path->mtu; |
| c->remaining_inc = inc_diff % (100 * path->cwnd); |
| inc = inc_diff / (100 * path->cwnd); |
| } |
| |
| inc_diff = c->remaining_tcp_inc + path->mtu * acked; |
| c->tcp_wnd += inc_diff / path->cwnd; |
| c->remaining_tcp_inc = inc_diff % path->cwnd; |
| /* TCP friendliness */ |
| if (c->tcp_wnd > path->cwnd) { |
| uint32_t tcp_inc = path->mtu * (c->tcp_wnd - path->cwnd) / path->cwnd; |
| if (tcp_inc > inc) |
| inc = tcp_inc; |
| } |
| |
| path->cwnd += inc; |
| path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); |
| leave: |
| TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
| } |
| |
| static void quic_cc_cubic_slow_start(struct quic_cc *cc) |
| { |
| TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| quic_cc_cubic_reset(cc); |
| TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
| } |
| |
| static void quic_enter_recovery(struct quic_cc *cc) |
| { |
| struct quic_path *path = container_of(cc, struct quic_path, cc); |
| struct cubic *c = quic_cc_priv(cc); |
| /* Current cwnd as number of packets */ |
| |
| TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| c->epoch_start = 0; |
| c->recovery_start_time = now_ms; |
| /* Fast convergence */ |
| if (path->cwnd < c->last_w_max) { |
| /* (1 + beta) * path->cwnd / 2 */ |
| c->last_w_max = (path->cwnd * (CUBIC_BETA_SCALE + CUBIC_BETA) / 2) >> CUBIC_BETA_SCALE_SHIFT; |
| } |
| else { |
| c->last_w_max = path->cwnd; |
| } |
| c->ssthresh = (CUBIC_BETA * path->cwnd) >> CUBIC_BETA_SCALE_SHIFT; |
| path->cwnd = QUIC_MAX(c->ssthresh, (uint32_t)path->min_cwnd); |
| c->state = QUIC_CC_ST_RP; |
| TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| } |
| |
| /* Congestion slow-start callback. */ |
| static void quic_cc_cubic_ss_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| { |
| struct quic_path *path = container_of(cc, struct quic_path, cc); |
| struct cubic *c = quic_cc_priv(cc); |
| |
| TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); |
| switch (ev->type) { |
| case QUIC_CC_EVT_ACK: |
| if (path->cwnd < QUIC_CC_INFINITE_SSTHESH - ev->ack.acked) |
| path->cwnd += ev->ack.acked; |
| /* Exit to congestion avoidance if slow start threshold is reached. */ |
| if (path->cwnd >= c->ssthresh) |
| c->state = QUIC_CC_ST_CA; |
| path->mcwnd = QUIC_MAX(path->cwnd, path->mcwnd); |
| break; |
| |
| case QUIC_CC_EVT_LOSS: |
| quic_enter_recovery(cc); |
| break; |
| |
| case QUIC_CC_EVT_ECN_CE: |
| /* TODO */ |
| break; |
| } |
| |
| out: |
| TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
| } |
| |
| /* Congestion avoidance callback. */ |
| static void quic_cc_cubic_ca_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| { |
| TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc); |
| TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev); |
| switch (ev->type) { |
| case QUIC_CC_EVT_ACK: |
| quic_cubic_update(cc, ev->ack.acked); |
| break; |
| case QUIC_CC_EVT_LOSS: |
| quic_enter_recovery(cc); |
| break; |
| case QUIC_CC_EVT_ECN_CE: |
| /* TODO */ |
| break; |
| } |
| |
| out: |
| TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc); |
| } |
| |
| /* Recovery period callback */ |
| static void quic_cc_cubic_rp_cb(struct quic_cc *cc, struct quic_cc_event *ev) |
| { |
| struct cubic *c = quic_cc_priv(cc); |
| |
| TRACE_ENTER(QUIC_EV_CONN_CC, cc->qc, ev); |
| TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, ev, cc); |
| |
| switch (ev->type) { |
| case QUIC_CC_EVT_ACK: |
| /* RFC 9022 7.3.2. Recovery |
| * A recovery period ends and the sender enters congestion avoidance when a |
| * packet sent during the recovery period is acknowledged. |
| */ |
| if (tick_is_le(ev->ack.time_sent, c->recovery_start_time)) { |
| TRACE_PROTO("CC cubic (still in recov. period)", QUIC_EV_CONN_CC, cc->qc); |
| goto leave; |
| } |
| |
| c->state = QUIC_CC_ST_CA; |
| c->recovery_start_time = TICK_ETERNITY; |
| break; |
| case QUIC_CC_EVT_LOSS: |
| break; |
| case QUIC_CC_EVT_ECN_CE: |
| /* TODO */ |
| break; |
| } |
| |
| leave: |
| TRACE_PROTO("CC cubic", QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| TRACE_LEAVE(QUIC_EV_CONN_CC, cc->qc, NULL, cc); |
| } |
| |
| static void (*quic_cc_cubic_state_cbs[])(struct quic_cc *cc, |
| struct quic_cc_event *ev) = { |
| [QUIC_CC_ST_SS] = quic_cc_cubic_ss_cb, |
| [QUIC_CC_ST_CA] = quic_cc_cubic_ca_cb, |
| [QUIC_CC_ST_RP] = quic_cc_cubic_rp_cb, |
| }; |
| |
| static void quic_cc_cubic_event(struct quic_cc *cc, struct quic_cc_event *ev) |
| { |
| struct cubic *c = quic_cc_priv(cc); |
| |
| return quic_cc_cubic_state_cbs[c->state](cc, ev); |
| } |
| |
| static void quic_cc_cubic_state_trace(struct buffer *buf, const struct quic_cc *cc) |
| { |
| struct quic_path *path; |
| struct cubic *c = quic_cc_priv(cc); |
| |
| path = container_of(cc, struct quic_path, cc); |
| chunk_appendf(buf, " state=%s cwnd=%llu mcwnd=%llu ssthresh=%d rpst=%dms", |
| quic_cc_state_str(c->state), |
| (unsigned long long)path->cwnd, |
| (unsigned long long)path->mcwnd, |
| (int)c->ssthresh, |
| !tick_isset(c->recovery_start_time) ? -1 : |
| TICKS_TO_MS(tick_remain(c->recovery_start_time, now_ms))); |
| } |
| |
| struct quic_cc_algo quic_cc_algo_cubic = { |
| .type = QUIC_CC_ALGO_TP_CUBIC, |
| .init = quic_cc_cubic_init, |
| .event = quic_cc_cubic_event, |
| .slow_start = quic_cc_cubic_slow_start, |
| .state_trace = quic_cc_cubic_state_trace, |
| }; |