Willy Tarreau | 11dc26f | 2013-11-20 15:02:38 +0100 | [diff] [blame] | 1 | SYN cookie analysis on 3.10 |
| 2 | |
| 3 | include/net/request_sock.h: |
| 4 | |
| 5 | static inline int reqsk_queue_is_full(const struct request_sock_queue *queue) |
| 6 | { |
| 7 | return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log; |
| 8 | } |
| 9 | |
| 10 | include/net/inet_connection_sock.h: |
| 11 | |
| 12 | static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk) |
| 13 | { |
| 14 | return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue); |
| 15 | } |
| 16 | |
| 17 | max_qlen_log is computed to equal log2(min(min(listen_backlog,somaxconn), sysctl_max_syn_backlog), |
| 18 | and this is done this way following this path : |
| 19 | |
| 20 | socket.c:listen(fd, backlog) : |
| 21 | |
| 22 | backlog = min(backlog, somaxconn) |
| 23 | => af_inet.c:inet_listen(sock, backlog) |
| 24 | |
| 25 | => inet_connection_sock.c:inet_csk_listen_start(sk, backlog) |
| 26 | |
| 27 | sk_max_ack_backlog = backlog |
| 28 | => request_sock.c:reqsk_queue_alloc(sk, backlog (=nr_table_entries)) |
| 29 | |
| 30 | nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog); |
| 31 | nr_table_entries = max_t(u32, nr_table_entries, 8); |
| 32 | nr_table_entries = roundup_pow_of_two(nr_table_entries + 1); |
| 33 | for (lopt->max_qlen_log = 3; |
| 34 | (1 << lopt->max_qlen_log) < nr_table_entries; |
| 35 | lopt->max_qlen_log++); |
| 36 | |
| 37 | |
| 38 | tcp_ipv4.c:tcp_v4_conn_request() |
| 39 | - inet_csk_reqsk_queue_is_full() returns true when the listening socket's |
| 40 | qlen is larger than 1 << max_qlen_log, so basically qlen >= min(backlog,max_backlog) |
| 41 | |
| 42 | - tcp_syn_flood_action() returns true when sysctl_tcp_syncookies is set. It |
| 43 | also emits a warning once per listening socket when activating the feature. |
| 44 | |
| 45 | if (inet_csk_reqsk_queue_is_full(sk) && !isn) { |
| 46 | want_cookie = tcp_syn_flood_action(sk, skb, "TCP"); |
| 47 | if (!want_cookie) |
| 48 | goto drop; |
| 49 | } |
| 50 | |
| 51 | => when the socket's current backlog is >= min(backlog,max_backlog), |
| 52 | either tcp_syn_cookies is set so we set want_cookie to 1, or we drop. |
| 53 | |
| 54 | |
| 55 | /* Accept backlog is full. If we have already queued enough |
| 56 | * of warm entries in syn queue, drop request. It is better than |
| 57 | * clogging syn queue with openreqs with exponentially increasing |
| 58 | * timeout. |
| 59 | */ |
| 60 | |
| 61 | sock.h:sk_acceptq_is_full() = sk_ack_backlog > sk_max_ack_backlog |
| 62 | = sk_ack_backlog > min(somaxconn, listen_backlog) |
| 63 | |
| 64 | if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) { |
| 65 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); |
| 66 | goto drop; |
| 67 | } |
| 68 | |
| 69 | ====> the following algorithm is applied in the reverse order but with these |
| 70 | priorities : |
| 71 | |
| 72 | 1) IF socket's accept queue >= min(somaxconn, listen_backlog) THEN drop |
| 73 | |
| 74 | 2) IF socket's SYN backlog < min(somaxconn, listen_backlog, tcp_max_syn_backlog) THEN accept |
| 75 | |
| 76 | 3) IF tcp_syn_cookies THEN send_syn_cookie |
| 77 | |
| 78 | 4) otherwise drop |
| 79 | |
| 80 | ====> the problem is the accept queue being filled, but it's supposed to be |
| 81 | filled only with validated client requests (step 1). |
| 82 | |
| 83 | |
| 84 | |
| 85 | req = inet_reqsk_alloc(&tcp_request_sock_ops); |
| 86 | if (!req) |
| 87 | goto drop; |
| 88 | |
| 89 | ... |
| 90 | if (!sysctl_tcp_syncookies && |
| 91 | (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) < |
| 92 | (sysctl_max_syn_backlog >> 2)) && |
| 93 | !tcp_peer_is_proven(req, dst, false)) { |
| 94 | /* Without syncookies last quarter of |
| 95 | * backlog is filled with destinations, |
| 96 | * proven to be alive. |
| 97 | * It means that we continue to communicate |
| 98 | * to destinations, already remembered |
| 99 | * to the moment of synflood. |
| 100 | */ |
| 101 | LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("drop open request from %pI4/%u\n"), |
| 102 | &saddr, ntohs(tcp_hdr(skb)->source)); |
| 103 | goto drop_and_release; |
| 104 | } |
| 105 | |
| 106 | |