developer | 4f3e9a2 | 2022-05-30 17:33:30 +0800 | [diff] [blame] | 1 | From git@z Thu Jan 1 00:00:00 1970 |
| 2 | Subject: [PATCH v2] napi: fix race inside napi_enable |
| 3 | From: Xuan Zhuo <xuanzhuo@linux.alibaba.com> |
| 4 | Date: Sat, 18 Sep 2021 16:52:32 +0800 |
| 5 | Message-Id: <20210918085232.71436-1-xuanzhuo@linux.alibaba.com> |
| 6 | To: netdev@vger.kernel.org, linyunsheng@huawei.com |
| 7 | Cc: "David S. Miller" <davem@davemloft.net>, Jakub Kicinski <kuba@kernel.org>, Eric Dumazet <edumazet@google.com>, Daniel Borkmann <daniel@iogearbox.net>, Antoine Tenart <atenart@kernel.org>, Alexander Lobakin <alobakin@pm.me>, Wei Wang <weiwan@google.com>, Taehee Yoo <ap420073@gmail.com>,Björn Töpel <bjorn@kernel.org>, Arnd Bergmann <arnd@arndb.de>, Kumar Kartikeya Dwivedi <memxor@gmail.com>, Neil Horman <nhorman@redhat.com>, Dust Li <dust.li@linux.alibaba.com> |
| 8 | List-Id: <netdev.vger.kernel.org> |
| 9 | MIME-Version: 1.0 |
| 10 | Content-Type: text/plain; charset="utf-8" |
| 11 | Content-Transfer-Encoding: 7bit |
| 12 | |
| 13 | The process will cause napi.state to contain NAPI_STATE_SCHED and |
| 14 | not in the poll_list, which will cause napi_disable() to get stuck. |
| 15 | |
| 16 | The prefix "NAPI_STATE_" is removed in the figure below, and |
| 17 | NAPI_STATE_HASHED is ignored in napi.state. |
| 18 | |
| 19 | CPU0 | CPU1 | napi.state |
| 20 | =============================================================================== |
| 21 | napi_disable() | | SCHED | NPSVC |
| 22 | napi_enable() | | |
| 23 | { | | |
| 24 | smp_mb__before_atomic(); | | |
| 25 | clear_bit(SCHED, &n->state); | | NPSVC |
| 26 | | napi_schedule_prep() | SCHED | NPSVC |
| 27 | | napi_poll() | |
| 28 | | napi_complete_done() | |
| 29 | | { | |
| 30 | | if (n->state & (NPSVC | | (1) |
| 31 | | _BUSY_POLL))) | |
| 32 | | return false; | |
| 33 | | ................ | |
| 34 | | } | SCHED | NPSVC |
| 35 | | | |
| 36 | clear_bit(NPSVC, &n->state); | | SCHED |
| 37 | } | | |
| 38 | | | |
| 39 | napi_schedule_prep() | | SCHED | MISSED (2) |
| 40 | |
| 41 | (1) Here return direct. Because of NAPI_STATE_NPSVC exists. |
| 42 | (2) NAPI_STATE_SCHED exists. So not add napi.poll_list to sd->poll_list |
| 43 | |
| 44 | Since NAPI_STATE_SCHED already exists and napi is not in the |
| 45 | sd->poll_list queue, NAPI_STATE_SCHED cannot be cleared and will always |
| 46 | exist. |
| 47 | |
| 48 | 1. This will cause this queue to no longer receive packets. |
| 49 | 2. If you encounter napi_disable under the protection of rtnl_lock, it |
| 50 | will cause the entire rtnl_lock to be locked, affecting the overall |
| 51 | system. |
| 52 | |
| 53 | This patch uses cmpxchg to implement napi_enable(), which ensures that |
| 54 | there will be no race due to the separation of clear two bits. |
| 55 | |
| 56 | Fixes: 2d8bff12699abc ("netpoll: Close race condition between poll_one_napi and napi_disable") |
| 57 | Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com> |
| 58 | Reviewed-by: Dust Li <dust.li@linux.alibaba.com> |
| 59 | --- |
| 60 | net/core/dev.c | 16 ++++++++++------ |
| 61 | 1 file changed, 10 insertions(+), 6 deletions(-) |
| 62 | |
| 63 | diff --git a/net/core/dev.c b/net/core/dev.c |
| 64 | index 74fd402d26dd..7ee9fecd3aff 100644 |
| 65 | --- a/net/core/dev.c |
| 66 | +++ b/net/core/dev.c |
| 67 | @@ -6923,12 +6923,16 @@ EXPORT_SYMBOL(napi_disable); |
| 68 | */ |
| 69 | void napi_enable(struct napi_struct *n) |
| 70 | { |
| 71 | - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); |
| 72 | - smp_mb__before_atomic(); |
| 73 | - clear_bit(NAPI_STATE_SCHED, &n->state); |
| 74 | - clear_bit(NAPI_STATE_NPSVC, &n->state); |
| 75 | - if (n->dev->threaded && n->thread) |
| 76 | - set_bit(NAPI_STATE_THREADED, &n->state); |
| 77 | + unsigned long val, new; |
| 78 | + |
| 79 | + do { |
| 80 | + val = READ_ONCE(n->state); |
| 81 | + BUG_ON(!test_bit(NAPI_STATE_SCHED, &val)); |
| 82 | + |
| 83 | + new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC); |
| 84 | + if (n->dev->threaded && n->thread) |
| 85 | + new |= NAPIF_STATE_THREADED; |
| 86 | + } while (cmpxchg(&n->state, val, new) != val); |
| 87 | } |
| 88 | EXPORT_SYMBOL(napi_enable); |
| 89 | |
| 90 | |
| 91 | -- |
| 92 | 2.31.0 |
| 93 | |
| 94 | |