blob: b9fcd0b704d75873eb95b1513c7663e88d52b623 [file] [log] [blame]
developer58aa0682023-09-18 14:02:26 +08001From b80c745d2b90b30558e4f5b12060af956ae8e76d Mon Sep 17 00:00:00 2001
developeree39bcf2023-06-16 08:03:30 +08002From: Bo Jiao <Bo.Jiao@mediatek.com>
developer58aa0682023-09-18 14:02:26 +08003Date: Mon, 18 Sep 2023 10:52:27 +0800
4Subject: [PATCH 02/22] mt7622 backport nf hw offload framework and upstream
5 hnat plus xt-FLOWOFFLOAD update v2
developer8cb3ac72022-07-04 10:55:14 +08006
7---
8 drivers/net/ethernet/mediatek/Makefile | 3 +-
developer58aa0682023-09-18 14:02:26 +08009 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 25 +-
10 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 19 +-
11 drivers/net/ethernet/mediatek/mtk_ppe.c | 510 +++++++
developeree39bcf2023-06-16 08:03:30 +080012 drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
13 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
developer58aa0682023-09-18 14:02:26 +080014 .../net/ethernet/mediatek/mtk_ppe_offload.c | 535 ++++++++
developeree39bcf2023-06-16 08:03:30 +080015 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
developer8cb3ac72022-07-04 10:55:14 +080016 drivers/net/ppp/ppp_generic.c | 22 +
17 drivers/net/ppp/pppoe.c | 24 +
developeree39bcf2023-06-16 08:03:30 +080018 include/linux/netdevice.h | 60 +
developer8cb3ac72022-07-04 10:55:14 +080019 include/linux/ppp_channel.h | 3 +
developer8cb3ac72022-07-04 10:55:14 +080020 include/net/flow_offload.h | 4 +
21 include/net/ip6_route.h | 5 +-
22 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
23 include/net/netfilter/nf_conntrack.h | 12 +
24 include/net/netfilter/nf_conntrack_acct.h | 11 +
developer58aa0682023-09-18 14:02:26 +080025 include/net/netfilter/nf_flow_table.h | 266 +++-
developer8cb3ac72022-07-04 10:55:14 +080026 include/net/netns/conntrack.h | 6 +
27 .../linux/netfilter/nf_conntrack_common.h | 9 +-
28 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
29 net/8021q/vlan_dev.c | 21 +
30 net/bridge/br_device.c | 49 +
31 net/bridge/br_private.h | 20 +
32 net/bridge/br_vlan.c | 55 +
33 net/core/dev.c | 46 +
developer227b7422023-12-06 13:24:52 +080034 net/dsa/slave.c | 36 +-
developer8cb3ac72022-07-04 10:55:14 +080035 net/ipv4/netfilter/Kconfig | 4 +-
36 net/ipv6/ip6_output.c | 2 +-
37 net/ipv6/netfilter/Kconfig | 3 +-
38 net/ipv6/route.c | 22 +-
39 net/netfilter/Kconfig | 14 +-
40 net/netfilter/Makefile | 4 +-
41 net/netfilter/nf_conntrack_core.c | 20 +-
42 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
43 net/netfilter/nf_conntrack_proto_udp.c | 4 +
44 net/netfilter/nf_conntrack_standalone.c | 34 +-
developer58aa0682023-09-18 14:02:26 +080045 net/netfilter/nf_flow_table_core.c | 462 ++++---
46 net/netfilter/nf_flow_table_ip.c | 447 +++---
47 net/netfilter/nf_flow_table_offload.c | 1199 +++++++++++++++++
48 net/netfilter/xt_FLOWOFFLOAD.c | 794 +++++++++++
developer227b7422023-12-06 13:24:52 +080049 41 files changed, 4985 insertions(+), 435 deletions(-)
developer58aa0682023-09-18 14:02:26 +080050 mode change 100644 => 100755 drivers/net/ethernet/mediatek/Makefile
51 mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_eth_soc.c
52 mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_eth_soc.h
developer8cb3ac72022-07-04 10:55:14 +080053 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
54 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
55 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
56 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
57 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
58 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
59 create mode 100644 net/netfilter/nf_flow_table_offload.c
60 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
61
62diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
developer58aa0682023-09-18 14:02:26 +080063old mode 100644
64new mode 100755
65index 634640d..5f342f4
developer8cb3ac72022-07-04 10:55:14 +080066--- a/drivers/net/ethernet/mediatek/Makefile
67+++ b/drivers/net/ethernet/mediatek/Makefile
developeree39bcf2023-06-16 08:03:30 +080068@@ -4,5 +4,6 @@
developer8cb3ac72022-07-04 10:55:14 +080069 #
70
71 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
developer68838542022-10-03 23:42:21 +080072-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
73+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
developer8cb3ac72022-07-04 10:55:14 +080074+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
75 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
76diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developer58aa0682023-09-18 14:02:26 +080077old mode 100644
78new mode 100755
79index c4bea4d..9c85e16
developer8cb3ac72022-07-04 10:55:14 +080080--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
81+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developer58aa0682023-09-18 14:02:26 +080082@@ -3573,6 +3573,7 @@ static int mtk_open(struct net_device *dev)
83 u32 id = mtk_mac2xgmii_id(eth, mac->id);
developerdca0fde2022-12-14 11:40:35 +080084 int err, i;
85 struct device_node *phy_node;
developeree39bcf2023-06-16 08:03:30 +080086+ u32 gdm_config = MTK_GDMA_TO_PDMA;
developer8cb3ac72022-07-04 10:55:14 +080087
developeree39bcf2023-06-16 08:03:30 +080088 err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
89 if (err) {
developer58aa0682023-09-18 14:02:26 +080090@@ -3650,7 +3651,10 @@ static int mtk_open(struct net_device *dev)
91 regmap_write(eth->sgmii->pcs[id].regmap,
92 SGMSYS_QPHY_PWR_STATE_CTRL, 0);
developer8cb3ac72022-07-04 10:55:14 +080093
developerdca0fde2022-12-14 11:40:35 +080094- mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA);
developeree39bcf2023-06-16 08:03:30 +080095+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
96+ gdm_config = MTK_GDMA_TO_PPE;
developer8cb3ac72022-07-04 10:55:14 +080097+
developerdca0fde2022-12-14 11:40:35 +080098+ mtk_gdm_config(eth, mac->id, gdm_config);
developer8cb3ac72022-07-04 10:55:14 +080099
developerdca0fde2022-12-14 11:40:35 +0800100 return 0;
101 }
developer58aa0682023-09-18 14:02:26 +0800102@@ -3730,6 +3734,9 @@ static int mtk_stop(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +0800103
104 mtk_dma_free(eth);
105
developeree39bcf2023-06-16 08:03:30 +0800106+ if (eth->soc->offload_version)
107+ mtk_ppe_stop(&eth->ppe);
developer8cb3ac72022-07-04 10:55:14 +0800108+
109 return 0;
110 }
111
developer58aa0682023-09-18 14:02:26 +0800112@@ -4576,6 +4583,7 @@ static const struct net_device_ops mtk_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +0800113 #ifdef CONFIG_NET_POLL_CONTROLLER
114 .ndo_poll_controller = mtk_poll_controller,
115 #endif
116+ .ndo_setup_tc = mtk_eth_setup_tc,
117 };
118
developer58aa0682023-09-18 14:02:26 +0800119 static void mux_poll(struct work_struct *work)
120@@ -5161,6 +5169,17 @@ static int mtk_probe(struct platform_device *pdev)
developer8cb3ac72022-07-04 10:55:14 +0800121 goto err_free_dev;
122 }
123
124+ if (eth->soc->offload_version) {
developeree39bcf2023-06-16 08:03:30 +0800125+ err = mtk_ppe_init(&eth->ppe, eth->dev,
126+ eth->base + MTK_ETH_PPE_BASE, 2);
127+ if (err)
128+ goto err_free_dev;
developer8cb3ac72022-07-04 10:55:14 +0800129+
130+ err = mtk_eth_offload_init(eth);
131+ if (err)
132+ goto err_free_dev;
133+ }
134+
135 for (i = 0; i < MTK_MAX_DEVS; i++) {
136 if (!eth->netdev[i])
137 continue;
developer58aa0682023-09-18 14:02:26 +0800138@@ -5254,6 +5273,7 @@ static const struct mtk_soc_data mt2701_data = {
developer8cb3ac72022-07-04 10:55:14 +0800139 .required_clks = MT7623_CLKS_BITMAP,
140 .required_pctl = true,
141 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800142+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800143 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800144 .txrx = {
145 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800146@@ -5271,6 +5291,7 @@ static const struct mtk_soc_data mt7621_data = {
developer8cb3ac72022-07-04 10:55:14 +0800147 .required_clks = MT7621_CLKS_BITMAP,
148 .required_pctl = false,
149 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800150+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800151 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800152 .txrx = {
153 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800154@@ -5289,6 +5310,7 @@ static const struct mtk_soc_data mt7622_data = {
developer8cb3ac72022-07-04 10:55:14 +0800155 .required_clks = MT7622_CLKS_BITMAP,
156 .required_pctl = false,
157 .has_sram = false,
158+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800159 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800160 .txrx = {
161 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800162@@ -5306,6 +5328,7 @@ static const struct mtk_soc_data mt7623_data = {
developer8cb3ac72022-07-04 10:55:14 +0800163 .required_clks = MT7623_CLKS_BITMAP,
164 .required_pctl = true,
165 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800166+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800167 .rss_num = 0,
developer7eb15dc2023-06-14 17:44:03 +0800168 .txrx = {
169 .txd_size = sizeof(struct mtk_tx_dma),
developer8cb3ac72022-07-04 10:55:14 +0800170diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
developer58aa0682023-09-18 14:02:26 +0800171old mode 100644
172new mode 100755
173index 8a9b615..a87e46d
developer8cb3ac72022-07-04 10:55:14 +0800174--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
175+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
176@@ -15,6 +15,8 @@
177 #include <linux/u64_stats_sync.h>
178 #include <linux/refcount.h>
179 #include <linux/phylink.h>
180+#include <linux/rhashtable.h>
181+#include "mtk_ppe.h"
182
183 #define MTK_QDMA_PAGE_SIZE 2048
184 #define MTK_MAX_RX_LENGTH 1536
developer58aa0682023-09-18 14:02:26 +0800185@@ -44,7 +46,8 @@
developer8cb3ac72022-07-04 10:55:14 +0800186 NETIF_F_HW_VLAN_CTAG_TX | \
187 NETIF_F_SG | NETIF_F_TSO | \
188 NETIF_F_TSO6 | \
189- NETIF_F_IPV6_CSUM)
190+ NETIF_F_IPV6_CSUM |\
191+ NETIF_F_HW_TC)
192 #define MTK_SET_FEATURES (NETIF_F_LRO | \
193 NETIF_F_HW_VLAN_CTAG_RX)
194 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
developer58aa0682023-09-18 14:02:26 +0800195@@ -127,6 +130,7 @@
developer8cb3ac72022-07-04 10:55:14 +0800196 #define MTK_GDMA_UCS_EN BIT(20)
developer58aa0682023-09-18 14:02:26 +0800197 #define MTK_GDMA_STRP_CRC BIT(16)
developer8cb3ac72022-07-04 10:55:14 +0800198 #define MTK_GDMA_TO_PDMA 0x0
199+#define MTK_GDMA_TO_PPE 0x4444
200 #define MTK_GDMA_DROP_ALL 0x7777
201
developer58aa0682023-09-18 14:02:26 +0800202 /* GDM Egress Control Register */
203@@ -617,6 +621,12 @@
developer8cb3ac72022-07-04 10:55:14 +0800204 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
205 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
206
207+/* QDMA descriptor rxd4 */
208+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
209+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
210+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
211+#define MTK_RXD4_ALG GENMASK(31, 22)
212+
213 /* QDMA descriptor rxd4 */
214 #define RX_DMA_L4_VALID BIT(24)
215 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
developer58aa0682023-09-18 14:02:26 +0800216@@ -1651,6 +1661,7 @@ struct mtk_soc_data {
217 u64 caps;
218 u64 required_clks;
developer8cb3ac72022-07-04 10:55:14 +0800219 bool required_pctl;
220+ u8 offload_version;
221 netdev_features_t hw_features;
222 bool has_sram;
developer58aa0682023-09-18 14:02:26 +0800223 struct {
224@@ -1847,6 +1858,9 @@ struct mtk_eth {
developer8cb3ac72022-07-04 10:55:14 +0800225 int ip_align;
226 spinlock_t syscfg0_lock;
227 struct timer_list mtk_dma_monitor_timer;
228+
developeree39bcf2023-06-16 08:03:30 +0800229+ struct mtk_ppe ppe;
developer8cb3ac72022-07-04 10:55:14 +0800230+ struct rhashtable flow_table;
231 };
232
233 /* struct mtk_mac - the structure that holds the info about the MACs of the
developer58aa0682023-09-18 14:02:26 +0800234@@ -1927,6 +1941,9 @@ int mtk_toprgu_init(struct mtk_eth *eth, struct device_node *r);
developer1fb19c92023-03-07 23:45:23 +0800235 int mtk_dump_usxgmii(struct regmap *pmap, char *name, u32 offset, u32 range);
developer58aa0682023-09-18 14:02:26 +0800236 void mtk_usxgmii_link_poll(struct work_struct *work);
developer8cb3ac72022-07-04 10:55:14 +0800237
238+int mtk_eth_offload_init(struct mtk_eth *eth);
239+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
240+ void *type_data);
developer1fb19c92023-03-07 23:45:23 +0800241 void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
developer58aa0682023-09-18 14:02:26 +0800242 u32 mtk_rss_indr_table(struct mtk_rss_params *rss_params, int index);
243 #endif /* MTK_ETH_H */
developer8cb3ac72022-07-04 10:55:14 +0800244diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
245new file mode 100644
developer58aa0682023-09-18 14:02:26 +0800246index 0000000..27b5be5
developer8cb3ac72022-07-04 10:55:14 +0800247--- /dev/null
248+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
developerb40da332023-10-20 11:13:59 +0800249@@ -0,0 +1,514 @@
developer8cb3ac72022-07-04 10:55:14 +0800250+// SPDX-License-Identifier: GPL-2.0-only
251+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
252+
253+#include <linux/kernel.h>
254+#include <linux/io.h>
255+#include <linux/iopoll.h>
256+#include <linux/etherdevice.h>
257+#include <linux/platform_device.h>
258+#include "mtk_ppe.h"
259+#include "mtk_ppe_regs.h"
260+
261+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
262+{
263+ writel(val, ppe->base + reg);
264+}
265+
266+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
267+{
268+ return readl(ppe->base + reg);
269+}
270+
271+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
272+{
273+ u32 val;
274+
275+ val = ppe_r32(ppe, reg);
276+ val &= ~mask;
277+ val |= set;
278+ ppe_w32(ppe, reg, val);
279+
280+ return val;
281+}
282+
283+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
284+{
285+ return ppe_m32(ppe, reg, 0, val);
286+}
287+
288+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
289+{
290+ return ppe_m32(ppe, reg, val, 0);
291+}
292+
293+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
294+{
295+ int ret;
296+ u32 val;
297+
298+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
299+ !(val & MTK_PPE_GLO_CFG_BUSY),
300+ 20, MTK_PPE_WAIT_TIMEOUT_US);
301+
302+ if (ret)
303+ dev_err(ppe->dev, "PPE table busy");
304+
305+ return ret;
306+}
307+
308+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
309+{
310+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
311+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
312+}
313+
314+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
315+{
316+ mtk_ppe_cache_clear(ppe);
317+
318+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
319+ enable * MTK_PPE_CACHE_CTL_EN);
320+}
321+
developeree39bcf2023-06-16 08:03:30 +0800322+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
developer8cb3ac72022-07-04 10:55:14 +0800323+{
324+ u32 hv1, hv2, hv3;
325+ u32 hash;
326+
developeree39bcf2023-06-16 08:03:30 +0800327+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
328+ case MTK_PPE_PKT_TYPE_BRIDGE:
329+ hv1 = e->bridge.src_mac_lo;
330+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
331+ hv2 = e->bridge.src_mac_hi >> 16;
332+ hv2 ^= e->bridge.dest_mac_lo;
333+ hv3 = e->bridge.dest_mac_hi;
334+ break;
developer8cb3ac72022-07-04 10:55:14 +0800335+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
336+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
337+ hv1 = e->ipv4.orig.ports;
338+ hv2 = e->ipv4.orig.dest_ip;
339+ hv3 = e->ipv4.orig.src_ip;
340+ break;
341+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
342+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
343+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
344+ hv1 ^= e->ipv6.ports;
345+
346+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
347+ hv2 ^= e->ipv6.dest_ip[0];
348+
349+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
350+ hv3 ^= e->ipv6.src_ip[0];
351+ break;
352+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
353+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
354+ default:
355+ WARN_ON_ONCE(1);
356+ return MTK_PPE_HASH_MASK;
357+ }
358+
359+ hash = (hv1 & hv2) | ((~hv1) & hv3);
360+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
361+ hash ^= hv1 ^ hv2 ^ hv3;
362+ hash ^= hash >> 16;
developeree39bcf2023-06-16 08:03:30 +0800363+ hash <<= 1;
developer8cb3ac72022-07-04 10:55:14 +0800364+ hash &= MTK_PPE_ENTRIES - 1;
365+
366+ return hash;
367+}
368+
369+static inline struct mtk_foe_mac_info *
developeree39bcf2023-06-16 08:03:30 +0800370+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800371+{
developeree39bcf2023-06-16 08:03:30 +0800372+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800373+
374+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
375+ return &entry->ipv6.l2;
376+
377+ return &entry->ipv4.l2;
378+}
379+
380+static inline u32 *
developeree39bcf2023-06-16 08:03:30 +0800381+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800382+{
developeree39bcf2023-06-16 08:03:30 +0800383+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800384+
385+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
386+ return &entry->ipv6.ib2;
387+
388+ return &entry->ipv4.ib2;
389+}
390+
developeree39bcf2023-06-16 08:03:30 +0800391+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
392+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
developer8cb3ac72022-07-04 10:55:14 +0800393+{
394+ struct mtk_foe_mac_info *l2;
395+ u32 ports_pad, val;
396+
397+ memset(entry, 0, sizeof(*entry));
398+
developeree39bcf2023-06-16 08:03:30 +0800399+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
400+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
401+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
402+ MTK_FOE_IB1_BIND_TTL |
403+ MTK_FOE_IB1_BIND_CACHE;
404+ entry->ib1 = val;
developer8cb3ac72022-07-04 10:55:14 +0800405+
developeree39bcf2023-06-16 08:03:30 +0800406+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
407+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
408+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
developer8cb3ac72022-07-04 10:55:14 +0800409+
410+ if (is_multicast_ether_addr(dest_mac))
developeree39bcf2023-06-16 08:03:30 +0800411+ val |= MTK_FOE_IB2_MULTICAST;
developer8cb3ac72022-07-04 10:55:14 +0800412+
413+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
414+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
415+ entry->ipv4.orig.ports = ports_pad;
416+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
417+ entry->ipv6.ports = ports_pad;
418+
developeree39bcf2023-06-16 08:03:30 +0800419+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
developer8cb3ac72022-07-04 10:55:14 +0800420+ entry->ipv6.ib2 = val;
421+ l2 = &entry->ipv6.l2;
422+ } else {
423+ entry->ipv4.ib2 = val;
424+ l2 = &entry->ipv4.l2;
425+ }
426+
427+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
428+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
429+ l2->src_mac_hi = get_unaligned_be32(src_mac);
430+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
431+
432+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
433+ l2->etype = ETH_P_IPV6;
434+ else
435+ l2->etype = ETH_P_IP;
436+
437+ return 0;
438+}
439+
developeree39bcf2023-06-16 08:03:30 +0800440+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
developer8cb3ac72022-07-04 10:55:14 +0800441+{
developeree39bcf2023-06-16 08:03:30 +0800442+ u32 *ib2 = mtk_foe_entry_ib2(entry);
443+ u32 val;
developer8cb3ac72022-07-04 10:55:14 +0800444+
developeree39bcf2023-06-16 08:03:30 +0800445+ val = *ib2;
446+ val &= ~MTK_FOE_IB2_DEST_PORT;
447+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
developer8cb3ac72022-07-04 10:55:14 +0800448+ *ib2 = val;
449+
450+ return 0;
451+}
452+
developeree39bcf2023-06-16 08:03:30 +0800453+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
developer8cb3ac72022-07-04 10:55:14 +0800454+ __be32 src_addr, __be16 src_port,
455+ __be32 dest_addr, __be16 dest_port)
456+{
developeree39bcf2023-06-16 08:03:30 +0800457+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800458+ struct mtk_ipv4_tuple *t;
459+
460+ switch (type) {
461+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
462+ if (egress) {
463+ t = &entry->ipv4.new;
464+ break;
465+ }
466+ fallthrough;
467+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
468+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
469+ t = &entry->ipv4.orig;
470+ break;
471+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
472+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
473+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
474+ return 0;
475+ default:
476+ WARN_ON_ONCE(1);
477+ return -EINVAL;
478+ }
479+
480+ t->src_ip = be32_to_cpu(src_addr);
481+ t->dest_ip = be32_to_cpu(dest_addr);
482+
483+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
484+ return 0;
485+
486+ t->src_port = be16_to_cpu(src_port);
487+ t->dest_port = be16_to_cpu(dest_port);
488+
489+ return 0;
490+}
491+
developeree39bcf2023-06-16 08:03:30 +0800492+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +0800493+ __be32 *src_addr, __be16 src_port,
494+ __be32 *dest_addr, __be16 dest_port)
495+{
developeree39bcf2023-06-16 08:03:30 +0800496+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800497+ u32 *src, *dest;
498+ int i;
499+
500+ switch (type) {
501+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
502+ src = entry->dslite.tunnel_src_ip;
503+ dest = entry->dslite.tunnel_dest_ip;
504+ break;
505+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
506+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
507+ entry->ipv6.src_port = be16_to_cpu(src_port);
508+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
509+ fallthrough;
510+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
511+ src = entry->ipv6.src_ip;
512+ dest = entry->ipv6.dest_ip;
513+ break;
514+ default:
515+ WARN_ON_ONCE(1);
516+ return -EINVAL;
517+ }
518+
519+ for (i = 0; i < 4; i++)
520+ src[i] = be32_to_cpu(src_addr[i]);
521+ for (i = 0; i < 4; i++)
522+ dest[i] = be32_to_cpu(dest_addr[i]);
523+
524+ return 0;
525+}
526+
developeree39bcf2023-06-16 08:03:30 +0800527+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
developer8cb3ac72022-07-04 10:55:14 +0800528+{
developeree39bcf2023-06-16 08:03:30 +0800529+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800530+
531+ l2->etype = BIT(port);
532+
developeree39bcf2023-06-16 08:03:30 +0800533+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
534+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800535+ else
536+ l2->etype |= BIT(8);
537+
developeree39bcf2023-06-16 08:03:30 +0800538+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
developer8cb3ac72022-07-04 10:55:14 +0800539+
540+ return 0;
541+}
542+
developeree39bcf2023-06-16 08:03:30 +0800543+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
developer8cb3ac72022-07-04 10:55:14 +0800544+{
developeree39bcf2023-06-16 08:03:30 +0800545+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800546+
developeree39bcf2023-06-16 08:03:30 +0800547+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
developer8cb3ac72022-07-04 10:55:14 +0800548+ case 0:
developeree39bcf2023-06-16 08:03:30 +0800549+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
550+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800551+ l2->vlan1 = vid;
552+ return 0;
553+ case 1:
developeree39bcf2023-06-16 08:03:30 +0800554+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
developer8cb3ac72022-07-04 10:55:14 +0800555+ l2->vlan1 = vid;
556+ l2->etype |= BIT(8);
557+ } else {
558+ l2->vlan2 = vid;
developeree39bcf2023-06-16 08:03:30 +0800559+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800560+ }
561+ return 0;
562+ default:
563+ return -ENOSPC;
564+ }
565+}
566+
developeree39bcf2023-06-16 08:03:30 +0800567+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
developer8cb3ac72022-07-04 10:55:14 +0800568+{
developeree39bcf2023-06-16 08:03:30 +0800569+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800570+
developeree39bcf2023-06-16 08:03:30 +0800571+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
572+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
developer8cb3ac72022-07-04 10:55:14 +0800573+ l2->etype = ETH_P_PPP_SES;
574+
developeree39bcf2023-06-16 08:03:30 +0800575+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
developer8cb3ac72022-07-04 10:55:14 +0800576+ l2->pppoe_id = sid;
577+
578+ return 0;
579+}
580+
581+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
582+{
583+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
584+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
585+}
586+
developeree39bcf2023-06-16 08:03:30 +0800587+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
588+ u16 timestamp)
developer7eb15dc2023-06-14 17:44:03 +0800589+{
developer8cb3ac72022-07-04 10:55:14 +0800590+ struct mtk_foe_entry *hwe;
developeree39bcf2023-06-16 08:03:30 +0800591+ u32 hash;
developer7eb15dc2023-06-14 17:44:03 +0800592+
developeree39bcf2023-06-16 08:03:30 +0800593+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
594+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
595+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
developer7eb15dc2023-06-14 17:44:03 +0800596+
developeree39bcf2023-06-16 08:03:30 +0800597+ hash = mtk_ppe_hash_entry(entry);
598+ hwe = &ppe->foe_table[hash];
599+ if (!mtk_foe_entry_usable(hwe)) {
600+ hwe++;
601+ hash++;
developer7eb15dc2023-06-14 17:44:03 +0800602+
developeree39bcf2023-06-16 08:03:30 +0800603+ if (!mtk_foe_entry_usable(hwe))
604+ return -ENOSPC;
developer7eb15dc2023-06-14 17:44:03 +0800605+ }
606+
developeree39bcf2023-06-16 08:03:30 +0800607+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
developer8cb3ac72022-07-04 10:55:14 +0800608+ wmb();
609+ hwe->ib1 = entry->ib1;
610+
611+ dma_wmb();
612+
613+ mtk_ppe_cache_clear(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800614+
developeree39bcf2023-06-16 08:03:30 +0800615+ return hash;
developer7eb15dc2023-06-14 17:44:03 +0800616+}
617+
developeree39bcf2023-06-16 08:03:30 +0800618+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
619+ int version)
developer7eb15dc2023-06-14 17:44:03 +0800620+{
developeree39bcf2023-06-16 08:03:30 +0800621+ struct mtk_foe_entry *foe;
developer8cb3ac72022-07-04 10:55:14 +0800622+
623+ /* need to allocate a separate device, since it PPE DMA access is
624+ * not coherent.
625+ */
626+ ppe->base = base;
627+ ppe->dev = dev;
developeree39bcf2023-06-16 08:03:30 +0800628+ ppe->version = version;
developer8cb3ac72022-07-04 10:55:14 +0800629+
developeree39bcf2023-06-16 08:03:30 +0800630+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
developer8cb3ac72022-07-04 10:55:14 +0800631+ &ppe->foe_phys, GFP_KERNEL);
632+ if (!foe)
developeree39bcf2023-06-16 08:03:30 +0800633+ return -ENOMEM;
developer8cb3ac72022-07-04 10:55:14 +0800634+
635+ ppe->foe_table = foe;
636+
developeree39bcf2023-06-16 08:03:30 +0800637+ mtk_ppe_debugfs_init(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800638+
developeree39bcf2023-06-16 08:03:30 +0800639+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800640+}
641+
642+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
643+{
644+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
645+ int i, k;
646+
developeree39bcf2023-06-16 08:03:30 +0800647+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
developer8cb3ac72022-07-04 10:55:14 +0800648+
649+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
650+ return;
651+
652+ /* skip all entries that cross the 1024 byte boundary */
developeree39bcf2023-06-16 08:03:30 +0800653+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
654+ for (k = 0; k < ARRAY_SIZE(skip); k++)
655+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
developer8cb3ac72022-07-04 10:55:14 +0800656+}
657+
developeree39bcf2023-06-16 08:03:30 +0800658+int mtk_ppe_start(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +0800659+{
660+ u32 val;
661+
662+ mtk_ppe_init_foe_table(ppe);
663+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
664+
665+ val = MTK_PPE_TB_CFG_ENTRY_80B |
666+ MTK_PPE_TB_CFG_AGE_NON_L4 |
667+ MTK_PPE_TB_CFG_AGE_UNBIND |
668+ MTK_PPE_TB_CFG_AGE_TCP |
669+ MTK_PPE_TB_CFG_AGE_UDP |
670+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
671+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
672+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
673+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
674+ MTK_PPE_KEEPALIVE_DISABLE) |
675+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
676+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
developerb40da332023-10-20 11:13:59 +0800677+ MTK_PPE_SCAN_MODE_CHECK_AGE) |
developer8cb3ac72022-07-04 10:55:14 +0800678+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
679+ MTK_PPE_ENTRIES_SHIFT);
680+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
681+
682+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
683+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
684+
685+ mtk_ppe_cache_enable(ppe, true);
686+
developeree39bcf2023-06-16 08:03:30 +0800687+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
688+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
689+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
developer8cb3ac72022-07-04 10:55:14 +0800690+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
691+ MTK_PPE_FLOW_CFG_IP6_6RD |
692+ MTK_PPE_FLOW_CFG_IP4_NAT |
693+ MTK_PPE_FLOW_CFG_IP4_NAPT |
694+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
developeree39bcf2023-06-16 08:03:30 +0800695+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
developer8cb3ac72022-07-04 10:55:14 +0800696+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
697+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
698+
699+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
700+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
701+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
702+
developeree39bcf2023-06-16 08:03:30 +0800703+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 30) |
developer8cb3ac72022-07-04 10:55:14 +0800704+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
705+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
706+
707+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
developeree39bcf2023-06-16 08:03:30 +0800708+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 30);
developer8cb3ac72022-07-04 10:55:14 +0800709+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
710+
711+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
712+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
713+
714+ val = MTK_PPE_BIND_LIMIT1_FULL |
715+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
716+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
717+
718+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
719+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
720+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
721+
722+ /* enable PPE */
723+ val = MTK_PPE_GLO_CFG_EN |
724+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
725+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
developercbbf1b02023-09-06 10:24:04 +0800726+ MTK_PPE_GLO_CFG_MCAST_TB_EN |
developer8cb3ac72022-07-04 10:55:14 +0800727+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
728+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
729+
730+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
731+
developeree39bcf2023-06-16 08:03:30 +0800732+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800733+}
734+
735+int mtk_ppe_stop(struct mtk_ppe *ppe)
736+{
737+ u32 val;
738+ int i;
739+
developeree39bcf2023-06-16 08:03:30 +0800740+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
741+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
742+ MTK_FOE_STATE_INVALID);
developer8cb3ac72022-07-04 10:55:14 +0800743+
744+ mtk_ppe_cache_enable(ppe, false);
745+
developer8cb3ac72022-07-04 10:55:14 +0800746+ /* disable aging */
747+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
748+ MTK_PPE_TB_CFG_AGE_UNBIND |
749+ MTK_PPE_TB_CFG_AGE_TCP |
750+ MTK_PPE_TB_CFG_AGE_UDP |
developerb40da332023-10-20 11:13:59 +0800751+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
752+ MTK_PPE_TB_CFG_SCAN_MODE;
developer8cb3ac72022-07-04 10:55:14 +0800753+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
754+
developerb40da332023-10-20 11:13:59 +0800755+ if (mtk_ppe_wait_busy(ppe))
756+ return -ETIMEDOUT;
757+
758+ /* disable offload engine */
759+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
760+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
761+
762+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800763+}
764diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
765new file mode 100644
developer58aa0682023-09-18 14:02:26 +0800766index 0000000..242fb8f
developer8cb3ac72022-07-04 10:55:14 +0800767--- /dev/null
768+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
developeree39bcf2023-06-16 08:03:30 +0800769@@ -0,0 +1,288 @@
developer8cb3ac72022-07-04 10:55:14 +0800770+// SPDX-License-Identifier: GPL-2.0-only
771+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
772+
773+#ifndef __MTK_PPE_H
774+#define __MTK_PPE_H
775+
776+#include <linux/kernel.h>
777+#include <linux/bitfield.h>
developeree39bcf2023-06-16 08:03:30 +0800778+
779+#define MTK_ETH_PPE_BASE 0xc00
developer8cb3ac72022-07-04 10:55:14 +0800780+
781+#define MTK_PPE_ENTRIES_SHIFT 3
782+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
783+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
784+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
785+
786+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
787+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
788+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
789+
790+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
791+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
792+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
793+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
794+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
795+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
796+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
797+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
798+#define MTK_FOE_IB1_BIND_TTL BIT(24)
799+
800+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
801+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
802+#define MTK_FOE_IB1_UDP BIT(30)
803+#define MTK_FOE_IB1_STATIC BIT(31)
804+
805+enum {
806+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
807+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
808+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
809+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
810+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
811+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
812+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
813+};
814+
815+#define MTK_FOE_IB2_QID GENMASK(3, 0)
816+#define MTK_FOE_IB2_PSE_QOS BIT(4)
817+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
818+#define MTK_FOE_IB2_MULTICAST BIT(8)
819+
developeree39bcf2023-06-16 08:03:30 +0800820+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
821+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
822+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
developer8cb3ac72022-07-04 10:55:14 +0800823+
824+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
825+
826+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
827+
828+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
829+
developeree39bcf2023-06-16 08:03:30 +0800830+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
831+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
832+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
developer8cb3ac72022-07-04 10:55:14 +0800833+
834+enum {
835+ MTK_FOE_STATE_INVALID,
836+ MTK_FOE_STATE_UNBIND,
837+ MTK_FOE_STATE_BIND,
838+ MTK_FOE_STATE_FIN
839+};
840+
841+struct mtk_foe_mac_info {
842+ u16 vlan1;
843+ u16 etype;
844+
845+ u32 dest_mac_hi;
846+
847+ u16 vlan2;
848+ u16 dest_mac_lo;
849+
850+ u32 src_mac_hi;
851+
852+ u16 pppoe_id;
853+ u16 src_mac_lo;
854+};
855+
856+struct mtk_foe_bridge {
developeree39bcf2023-06-16 08:03:30 +0800857+ u32 dest_mac_hi;
858+
859+ u16 src_mac_lo;
860+ u16 dest_mac_lo;
developer8cb3ac72022-07-04 10:55:14 +0800861+
developeree39bcf2023-06-16 08:03:30 +0800862+ u32 src_mac_hi;
developer8cb3ac72022-07-04 10:55:14 +0800863+
864+ u32 ib2;
865+
developeree39bcf2023-06-16 08:03:30 +0800866+ u32 _rsv[5];
867+
868+ u32 udf_tsid;
developer8cb3ac72022-07-04 10:55:14 +0800869+ struct mtk_foe_mac_info l2;
870+};
871+
872+struct mtk_ipv4_tuple {
873+ u32 src_ip;
874+ u32 dest_ip;
875+ union {
876+ struct {
877+ u16 dest_port;
878+ u16 src_port;
879+ };
880+ struct {
881+ u8 protocol;
882+ u8 _pad[3]; /* fill with 0xa5a5a5 */
883+ };
884+ u32 ports;
885+ };
886+};
887+
888+struct mtk_foe_ipv4 {
889+ struct mtk_ipv4_tuple orig;
890+
891+ u32 ib2;
892+
893+ struct mtk_ipv4_tuple new;
894+
895+ u16 timestamp;
896+ u16 _rsv0[3];
897+
898+ u32 udf_tsid;
899+
900+ struct mtk_foe_mac_info l2;
901+};
902+
903+struct mtk_foe_ipv4_dslite {
904+ struct mtk_ipv4_tuple ip4;
905+
906+ u32 tunnel_src_ip[4];
907+ u32 tunnel_dest_ip[4];
908+
909+ u8 flow_label[3];
910+ u8 priority;
911+
912+ u32 udf_tsid;
913+
914+ u32 ib2;
915+
916+ struct mtk_foe_mac_info l2;
917+};
918+
919+struct mtk_foe_ipv6 {
920+ u32 src_ip[4];
921+ u32 dest_ip[4];
922+
923+ union {
924+ struct {
925+ u8 protocol;
926+ u8 _pad[3]; /* fill with 0xa5a5a5 */
927+ }; /* 3-tuple */
928+ struct {
929+ u16 dest_port;
930+ u16 src_port;
931+ }; /* 5-tuple */
932+ u32 ports;
933+ };
934+
935+ u32 _rsv[3];
936+
937+ u32 udf;
938+
939+ u32 ib2;
940+ struct mtk_foe_mac_info l2;
941+};
942+
943+struct mtk_foe_ipv6_6rd {
944+ u32 src_ip[4];
945+ u32 dest_ip[4];
946+ u16 dest_port;
947+ u16 src_port;
948+
949+ u32 tunnel_src_ip;
950+ u32 tunnel_dest_ip;
951+
952+ u16 hdr_csum;
953+ u8 dscp;
954+ u8 ttl;
955+
956+ u8 flag;
957+ u8 pad;
958+ u8 per_flow_6rd_id;
959+ u8 pad2;
960+
961+ u32 ib2;
962+ struct mtk_foe_mac_info l2;
963+};
964+
965+struct mtk_foe_entry {
966+ u32 ib1;
967+
968+ union {
969+ struct mtk_foe_bridge bridge;
970+ struct mtk_foe_ipv4 ipv4;
971+ struct mtk_foe_ipv4_dslite dslite;
972+ struct mtk_foe_ipv6 ipv6;
973+ struct mtk_foe_ipv6_6rd ipv6_6rd;
developeree39bcf2023-06-16 08:03:30 +0800974+ u32 data[19];
developer8cb3ac72022-07-04 10:55:14 +0800975+ };
976+};
977+
978+enum {
979+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
980+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
981+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
982+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
983+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
984+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
985+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
986+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
987+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
988+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
989+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
990+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
991+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
992+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
993+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
994+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
995+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
996+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
997+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
998+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
999+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
1000+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
1001+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
1002+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
1003+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
1004+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
1005+};
1006+
1007+struct mtk_ppe {
1008+ struct device *dev;
1009+ void __iomem *base;
1010+ int version;
1011+
developeree39bcf2023-06-16 08:03:30 +08001012+ struct mtk_foe_entry *foe_table;
developer8cb3ac72022-07-04 10:55:14 +08001013+ dma_addr_t foe_phys;
1014+
1015+ void *acct_table;
1016+};
1017+
developeree39bcf2023-06-16 08:03:30 +08001018+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
1019+ int version);
1020+int mtk_ppe_start(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001021+int mtk_ppe_stop(struct mtk_ppe *ppe);
1022+
1023+static inline void
developeree39bcf2023-06-16 08:03:30 +08001024+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
developer8cb3ac72022-07-04 10:55:14 +08001025+{
developeree39bcf2023-06-16 08:03:30 +08001026+ ppe->foe_table[hash].ib1 = 0;
1027+ dma_wmb();
1028+}
developer8cb3ac72022-07-04 10:55:14 +08001029+
developeree39bcf2023-06-16 08:03:30 +08001030+static inline int
1031+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
1032+{
1033+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
developer8cb3ac72022-07-04 10:55:14 +08001034+
developeree39bcf2023-06-16 08:03:30 +08001035+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
1036+ return -1;
developer7eb15dc2023-06-14 17:44:03 +08001037+
developeree39bcf2023-06-16 08:03:30 +08001038+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
developer8cb3ac72022-07-04 10:55:14 +08001039+}
1040+
developeree39bcf2023-06-16 08:03:30 +08001041+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
1042+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
1043+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
1044+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
developer8cb3ac72022-07-04 10:55:14 +08001045+ __be32 src_addr, __be16 src_port,
1046+ __be32 dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001047+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +08001048+ __be32 *src_addr, __be16 src_port,
1049+ __be32 *dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001050+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
1051+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
1052+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
1053+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1054+ u16 timestamp);
1055+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001056+
1057+#endif
1058diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1059new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001060index 0000000..d4b4823
developer8cb3ac72022-07-04 10:55:14 +08001061--- /dev/null
1062+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
developeree39bcf2023-06-16 08:03:30 +08001063@@ -0,0 +1,214 @@
developer8cb3ac72022-07-04 10:55:14 +08001064+// SPDX-License-Identifier: GPL-2.0-only
1065+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1066+
1067+#include <linux/kernel.h>
1068+#include <linux/debugfs.h>
1069+#include "mtk_eth_soc.h"
1070+
1071+struct mtk_flow_addr_info
1072+{
1073+ void *src, *dest;
1074+ u16 *src_port, *dest_port;
1075+ bool ipv6;
1076+};
1077+
1078+static const char *mtk_foe_entry_state_str(int state)
1079+{
1080+ static const char * const state_str[] = {
1081+ [MTK_FOE_STATE_INVALID] = "INV",
1082+ [MTK_FOE_STATE_UNBIND] = "UNB",
1083+ [MTK_FOE_STATE_BIND] = "BND",
1084+ [MTK_FOE_STATE_FIN] = "FIN",
1085+ };
1086+
1087+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1088+ return "UNK";
1089+
1090+ return state_str[state];
1091+}
1092+
1093+static const char *mtk_foe_pkt_type_str(int type)
1094+{
1095+ static const char * const type_str[] = {
1096+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1097+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
developeree39bcf2023-06-16 08:03:30 +08001098+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
developer8cb3ac72022-07-04 10:55:14 +08001099+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
1100+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
1101+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
1102+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
1103+ };
1104+
1105+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
1106+ return "UNKNOWN";
1107+
1108+ return type_str[type];
1109+}
1110+
1111+static void
1112+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
1113+{
1114+ u32 n_addr[4];
1115+ int i;
1116+
1117+ if (!ipv6) {
1118+ seq_printf(m, "%pI4h", addr);
1119+ return;
1120+ }
1121+
1122+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
1123+ n_addr[i] = htonl(addr[i]);
1124+ seq_printf(m, "%pI6", n_addr);
1125+}
1126+
1127+static void
1128+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
1129+{
1130+ mtk_print_addr(m, ai->src, ai->ipv6);
1131+ if (ai->src_port)
1132+ seq_printf(m, ":%d", *ai->src_port);
1133+ seq_printf(m, "->");
1134+ mtk_print_addr(m, ai->dest, ai->ipv6);
1135+ if (ai->dest_port)
1136+ seq_printf(m, ":%d", *ai->dest_port);
1137+}
1138+
1139+static int
1140+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
1141+{
1142+ struct mtk_ppe *ppe = m->private;
1143+ int i;
1144+
1145+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
developeree39bcf2023-06-16 08:03:30 +08001146+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
developer8cb3ac72022-07-04 10:55:14 +08001147+ struct mtk_foe_mac_info *l2;
1148+ struct mtk_flow_addr_info ai = {};
1149+ unsigned char h_source[ETH_ALEN];
1150+ unsigned char h_dest[ETH_ALEN];
1151+ int type, state;
1152+ u32 ib2;
1153+
1154+
1155+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
1156+ if (!state)
1157+ continue;
1158+
1159+ if (bind && state != MTK_FOE_STATE_BIND)
1160+ continue;
1161+
1162+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
1163+ seq_printf(m, "%05x %s %7s", i,
1164+ mtk_foe_entry_state_str(state),
1165+ mtk_foe_pkt_type_str(type));
1166+
1167+ switch (type) {
1168+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1169+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1170+ ai.src_port = &entry->ipv4.orig.src_port;
1171+ ai.dest_port = &entry->ipv4.orig.dest_port;
1172+ fallthrough;
1173+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1174+ ai.src = &entry->ipv4.orig.src_ip;
1175+ ai.dest = &entry->ipv4.orig.dest_ip;
1176+ break;
1177+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
1178+ ai.src_port = &entry->ipv6.src_port;
1179+ ai.dest_port = &entry->ipv6.dest_port;
1180+ fallthrough;
1181+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
1182+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
1183+ ai.src = &entry->ipv6.src_ip;
1184+ ai.dest = &entry->ipv6.dest_ip;
1185+ ai.ipv6 = true;
1186+ break;
1187+ }
1188+
1189+ seq_printf(m, " orig=");
1190+ mtk_print_addr_info(m, &ai);
1191+
1192+ switch (type) {
1193+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1194+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1195+ ai.src_port = &entry->ipv4.new.src_port;
1196+ ai.dest_port = &entry->ipv4.new.dest_port;
1197+ fallthrough;
1198+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1199+ ai.src = &entry->ipv4.new.src_ip;
1200+ ai.dest = &entry->ipv4.new.dest_ip;
1201+ seq_printf(m, " new=");
1202+ mtk_print_addr_info(m, &ai);
1203+ break;
1204+ }
1205+
1206+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
1207+ l2 = &entry->ipv6.l2;
1208+ ib2 = entry->ipv6.ib2;
1209+ } else {
1210+ l2 = &entry->ipv4.l2;
1211+ ib2 = entry->ipv4.ib2;
1212+ }
1213+
1214+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
1215+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
1216+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
1217+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
1218+
1219+ seq_printf(m, " eth=%pM->%pM etype=%04x"
developeree39bcf2023-06-16 08:03:30 +08001220+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
developer8cb3ac72022-07-04 10:55:14 +08001221+ h_source, h_dest, ntohs(l2->etype),
developeree39bcf2023-06-16 08:03:30 +08001222+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
developer8cb3ac72022-07-04 10:55:14 +08001223+ }
1224+
1225+ return 0;
1226+}
1227+
1228+static int
1229+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
1230+{
1231+ return mtk_ppe_debugfs_foe_show(m, private, false);
1232+}
1233+
1234+static int
1235+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
1236+{
1237+ return mtk_ppe_debugfs_foe_show(m, private, true);
1238+}
1239+
1240+static int
1241+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
1242+{
1243+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
1244+ inode->i_private);
1245+}
1246+
1247+static int
1248+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
1249+{
1250+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
1251+ inode->i_private);
1252+}
1253+
developeree39bcf2023-06-16 08:03:30 +08001254+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +08001255+{
1256+ static const struct file_operations fops_all = {
1257+ .open = mtk_ppe_debugfs_foe_open_all,
1258+ .read = seq_read,
1259+ .llseek = seq_lseek,
1260+ .release = single_release,
1261+ };
developeree39bcf2023-06-16 08:03:30 +08001262+
developer8cb3ac72022-07-04 10:55:14 +08001263+ static const struct file_operations fops_bind = {
1264+ .open = mtk_ppe_debugfs_foe_open_bind,
1265+ .read = seq_read,
1266+ .llseek = seq_lseek,
1267+ .release = single_release,
1268+ };
developer7eb15dc2023-06-14 17:44:03 +08001269+
developeree39bcf2023-06-16 08:03:30 +08001270+ struct dentry *root;
developer7eb15dc2023-06-14 17:44:03 +08001271+
developeree39bcf2023-06-16 08:03:30 +08001272+ root = debugfs_create_dir("mtk_ppe", NULL);
developer8cb3ac72022-07-04 10:55:14 +08001273+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
1274+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
1275+
1276+ return 0;
1277+}
1278diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1279new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001280index 0000000..1380ef0
developer8cb3ac72022-07-04 10:55:14 +08001281--- /dev/null
1282+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
developeree39bcf2023-06-16 08:03:30 +08001283@@ -0,0 +1,535 @@
developer8cb3ac72022-07-04 10:55:14 +08001284+// SPDX-License-Identifier: GPL-2.0-only
1285+/*
1286+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
1287+ */
1288+
1289+#include <linux/if_ether.h>
1290+#include <linux/rhashtable.h>
1291+#include <linux/ip.h>
1292+#include <linux/ipv6.h>
1293+#include <net/flow_offload.h>
1294+#include <net/pkt_cls.h>
1295+#include <net/dsa.h>
1296+#include "mtk_eth_soc.h"
1297+
1298+struct mtk_flow_data {
1299+ struct ethhdr eth;
1300+
1301+ union {
1302+ struct {
1303+ __be32 src_addr;
1304+ __be32 dst_addr;
1305+ } v4;
1306+
1307+ struct {
1308+ struct in6_addr src_addr;
1309+ struct in6_addr dst_addr;
1310+ } v6;
1311+ };
1312+
1313+ __be16 src_port;
1314+ __be16 dst_port;
1315+
1316+ struct {
1317+ u16 id;
1318+ __be16 proto;
1319+ u8 num;
1320+ } vlan;
1321+ struct {
1322+ u16 sid;
1323+ u8 num;
1324+ } pppoe;
1325+};
1326+
developeree39bcf2023-06-16 08:03:30 +08001327+struct mtk_flow_entry {
1328+ struct rhash_head node;
1329+ unsigned long cookie;
1330+ u16 hash;
1331+};
1332+
developer8cb3ac72022-07-04 10:55:14 +08001333+static const struct rhashtable_params mtk_flow_ht_params = {
1334+ .head_offset = offsetof(struct mtk_flow_entry, node),
1335+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
1336+ .key_len = sizeof(unsigned long),
1337+ .automatic_shrinking = true,
1338+};
1339+
developeree39bcf2023-06-16 08:03:30 +08001340+static u32
1341+mtk_eth_timestamp(struct mtk_eth *eth)
1342+{
1343+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
1344+}
1345+
developer8cb3ac72022-07-04 10:55:14 +08001346+static int
developeree39bcf2023-06-16 08:03:30 +08001347+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
1348+ bool egress)
developer8cb3ac72022-07-04 10:55:14 +08001349+{
developeree39bcf2023-06-16 08:03:30 +08001350+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
developer8cb3ac72022-07-04 10:55:14 +08001351+ data->v4.src_addr, data->src_port,
1352+ data->v4.dst_addr, data->dst_port);
1353+}
1354+
1355+static int
developeree39bcf2023-06-16 08:03:30 +08001356+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
developer8cb3ac72022-07-04 10:55:14 +08001357+{
developeree39bcf2023-06-16 08:03:30 +08001358+ return mtk_foe_entry_set_ipv6_tuple(foe,
developer8cb3ac72022-07-04 10:55:14 +08001359+ data->v6.src_addr.s6_addr32, data->src_port,
1360+ data->v6.dst_addr.s6_addr32, data->dst_port);
1361+}
1362+
1363+static void
1364+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
1365+{
1366+ void *dest = eth + act->mangle.offset;
1367+ const void *src = &act->mangle.val;
1368+
1369+ if (act->mangle.offset > 8)
1370+ return;
1371+
1372+ if (act->mangle.mask == 0xffff) {
1373+ src += 2;
1374+ dest += 2;
1375+ }
1376+
1377+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
1378+}
1379+
developeree39bcf2023-06-16 08:03:30 +08001380+
developer8cb3ac72022-07-04 10:55:14 +08001381+static int
1382+mtk_flow_mangle_ports(const struct flow_action_entry *act,
1383+ struct mtk_flow_data *data)
1384+{
1385+ u32 val = ntohl(act->mangle.val);
1386+
1387+ switch (act->mangle.offset) {
1388+ case 0:
1389+ if (act->mangle.mask == ~htonl(0xffff))
1390+ data->dst_port = cpu_to_be16(val);
1391+ else
1392+ data->src_port = cpu_to_be16(val >> 16);
1393+ break;
1394+ case 2:
1395+ data->dst_port = cpu_to_be16(val);
1396+ break;
1397+ default:
1398+ return -EINVAL;
1399+ }
1400+
1401+ return 0;
1402+}
1403+
1404+static int
1405+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
1406+ struct mtk_flow_data *data)
1407+{
1408+ __be32 *dest;
1409+
1410+ switch (act->mangle.offset) {
1411+ case offsetof(struct iphdr, saddr):
1412+ dest = &data->v4.src_addr;
1413+ break;
1414+ case offsetof(struct iphdr, daddr):
1415+ dest = &data->v4.dst_addr;
1416+ break;
1417+ default:
1418+ return -EINVAL;
1419+ }
1420+
1421+ memcpy(dest, &act->mangle.val, sizeof(u32));
1422+
1423+ return 0;
1424+}
1425+
1426+static int
1427+mtk_flow_get_dsa_port(struct net_device **dev)
1428+{
1429+#if IS_ENABLED(CONFIG_NET_DSA)
1430+ struct dsa_port *dp;
1431+
1432+ dp = dsa_port_from_netdev(*dev);
1433+ if (IS_ERR(dp))
1434+ return -ENODEV;
1435+
1436+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
1437+ return -ENODEV;
1438+
1439+ *dev = dp->cpu_dp->master;
1440+
1441+ return dp->index;
1442+#else
1443+ return -ENODEV;
1444+#endif
1445+}
1446+
1447+static int
1448+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
developeree39bcf2023-06-16 08:03:30 +08001449+ struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08001450+{
developeree39bcf2023-06-16 08:03:30 +08001451+ int pse_port, dsa_port;
developer8cb3ac72022-07-04 10:55:14 +08001452+
1453+ dsa_port = mtk_flow_get_dsa_port(&dev);
developeree39bcf2023-06-16 08:03:30 +08001454+ if (dsa_port >= 0)
1455+ mtk_foe_entry_set_dsa(foe, dsa_port);
developer8cb3ac72022-07-04 10:55:14 +08001456+
1457+ if (dev == eth->netdev[0])
developeree39bcf2023-06-16 08:03:30 +08001458+ pse_port = PSE_GDM1_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001459+ else if (dev == eth->netdev[1])
developeree39bcf2023-06-16 08:03:30 +08001460+ pse_port = PSE_GDM2_PORT;
1461+ else
1462+ return -EOPNOTSUPP;
developer7eb15dc2023-06-14 17:44:03 +08001463+
developeree39bcf2023-06-16 08:03:30 +08001464+ mtk_foe_entry_set_pse_port(foe, pse_port);
developer8cb3ac72022-07-04 10:55:14 +08001465+
1466+ return 0;
1467+}
1468+
1469+static int
developeree39bcf2023-06-16 08:03:30 +08001470+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
developer8cb3ac72022-07-04 10:55:14 +08001471+{
1472+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1473+ struct flow_action_entry *act;
1474+ struct mtk_flow_data data = {};
1475+ struct mtk_foe_entry foe;
1476+ struct net_device *odev = NULL;
1477+ struct mtk_flow_entry *entry;
1478+ int offload_type = 0;
1479+ u16 addr_type = 0;
developeree39bcf2023-06-16 08:03:30 +08001480+ u32 timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001481+ u8 l4proto = 0;
1482+ int err = 0;
developeree39bcf2023-06-16 08:03:30 +08001483+ int hash;
developer8cb3ac72022-07-04 10:55:14 +08001484+ int i;
1485+
1486+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
1487+ return -EEXIST;
1488+
1489+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1490+ struct flow_match_meta match;
1491+
1492+ flow_rule_match_meta(rule, &match);
1493+ } else {
1494+ return -EOPNOTSUPP;
1495+ }
1496+
1497+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1498+ struct flow_match_control match;
1499+
1500+ flow_rule_match_control(rule, &match);
1501+ addr_type = match.key->addr_type;
1502+ } else {
1503+ return -EOPNOTSUPP;
1504+ }
1505+
1506+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1507+ struct flow_match_basic match;
1508+
1509+ flow_rule_match_basic(rule, &match);
1510+ l4proto = match.key->ip_proto;
1511+ } else {
1512+ return -EOPNOTSUPP;
1513+ }
1514+
1515+ flow_action_for_each(i, act, &rule->action) {
1516+ switch (act->id) {
1517+ case FLOW_ACTION_MANGLE:
1518+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
1519+ mtk_flow_offload_mangle_eth(act, &data.eth);
1520+ break;
1521+ case FLOW_ACTION_REDIRECT:
1522+ odev = act->dev;
1523+ break;
1524+ case FLOW_ACTION_CSUM:
1525+ break;
1526+ case FLOW_ACTION_VLAN_PUSH:
1527+ if (data.vlan.num == 1 ||
1528+ act->vlan.proto != htons(ETH_P_8021Q))
1529+ return -EOPNOTSUPP;
1530+
1531+ data.vlan.id = act->vlan.vid;
1532+ data.vlan.proto = act->vlan.proto;
1533+ data.vlan.num++;
1534+ break;
1535+ case FLOW_ACTION_VLAN_POP:
1536+ break;
1537+ case FLOW_ACTION_PPPOE_PUSH:
1538+ if (data.pppoe.num == 1)
1539+ return -EOPNOTSUPP;
1540+
1541+ data.pppoe.sid = act->pppoe.sid;
1542+ data.pppoe.num++;
1543+ break;
1544+ default:
1545+ return -EOPNOTSUPP;
1546+ }
1547+ }
1548+
developeree39bcf2023-06-16 08:03:30 +08001549+ switch (addr_type) {
1550+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1551+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
1552+ break;
1553+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1554+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
1555+ break;
1556+ default:
1557+ return -EOPNOTSUPP;
1558+ }
1559+
developer8cb3ac72022-07-04 10:55:14 +08001560+ if (!is_valid_ether_addr(data.eth.h_source) ||
1561+ !is_valid_ether_addr(data.eth.h_dest))
1562+ return -EINVAL;
1563+
developeree39bcf2023-06-16 08:03:30 +08001564+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
1565+ data.eth.h_source,
1566+ data.eth.h_dest);
developer8cb3ac72022-07-04 10:55:14 +08001567+ if (err)
1568+ return err;
1569+
1570+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1571+ struct flow_match_ports ports;
1572+
1573+ flow_rule_match_ports(rule, &ports);
1574+ data.src_port = ports.key->src;
1575+ data.dst_port = ports.key->dst;
developeree39bcf2023-06-16 08:03:30 +08001576+ } else {
developer8cb3ac72022-07-04 10:55:14 +08001577+ return -EOPNOTSUPP;
1578+ }
1579+
1580+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1581+ struct flow_match_ipv4_addrs addrs;
1582+
1583+ flow_rule_match_ipv4_addrs(rule, &addrs);
1584+
1585+ data.v4.src_addr = addrs.key->src;
1586+ data.v4.dst_addr = addrs.key->dst;
1587+
developeree39bcf2023-06-16 08:03:30 +08001588+ mtk_flow_set_ipv4_addr(&foe, &data, false);
developer8cb3ac72022-07-04 10:55:14 +08001589+ }
1590+
1591+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1592+ struct flow_match_ipv6_addrs addrs;
1593+
1594+ flow_rule_match_ipv6_addrs(rule, &addrs);
1595+
1596+ data.v6.src_addr = addrs.key->src;
1597+ data.v6.dst_addr = addrs.key->dst;
1598+
developeree39bcf2023-06-16 08:03:30 +08001599+ mtk_flow_set_ipv6_addr(&foe, &data);
developer8cb3ac72022-07-04 10:55:14 +08001600+ }
1601+
1602+ flow_action_for_each(i, act, &rule->action) {
1603+ if (act->id != FLOW_ACTION_MANGLE)
1604+ continue;
1605+
1606+ switch (act->mangle.htype) {
1607+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1608+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1609+ err = mtk_flow_mangle_ports(act, &data);
1610+ break;
1611+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1612+ err = mtk_flow_mangle_ipv4(act, &data);
1613+ break;
1614+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1615+ /* handled earlier */
1616+ break;
1617+ default:
1618+ return -EOPNOTSUPP;
1619+ }
1620+
1621+ if (err)
1622+ return err;
1623+ }
1624+
1625+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
developeree39bcf2023-06-16 08:03:30 +08001626+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
developer8cb3ac72022-07-04 10:55:14 +08001627+ if (err)
1628+ return err;
1629+ }
1630+
1631+ if (data.vlan.num == 1) {
1632+ if (data.vlan.proto != htons(ETH_P_8021Q))
1633+ return -EOPNOTSUPP;
1634+
developeree39bcf2023-06-16 08:03:30 +08001635+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
developer8cb3ac72022-07-04 10:55:14 +08001636+ }
1637+ if (data.pppoe.num == 1)
developeree39bcf2023-06-16 08:03:30 +08001638+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
developer8cb3ac72022-07-04 10:55:14 +08001639+
developeree39bcf2023-06-16 08:03:30 +08001640+ err = mtk_flow_set_output_device(eth, &foe, odev);
developer8cb3ac72022-07-04 10:55:14 +08001641+ if (err)
1642+ return err;
1643+
1644+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1645+ if (!entry)
1646+ return -ENOMEM;
1647+
1648+ entry->cookie = f->cookie;
developeree39bcf2023-06-16 08:03:30 +08001649+ timestamp = mtk_eth_timestamp(eth);
1650+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
1651+ if (hash < 0) {
1652+ err = hash;
developer8cb3ac72022-07-04 10:55:14 +08001653+ goto free;
developeree39bcf2023-06-16 08:03:30 +08001654+ }
developer8cb3ac72022-07-04 10:55:14 +08001655+
developeree39bcf2023-06-16 08:03:30 +08001656+ entry->hash = hash;
developer8cb3ac72022-07-04 10:55:14 +08001657+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
1658+ mtk_flow_ht_params);
1659+ if (err < 0)
developeree39bcf2023-06-16 08:03:30 +08001660+ goto clear_flow;
developer8cb3ac72022-07-04 10:55:14 +08001661+
1662+ return 0;
developeree39bcf2023-06-16 08:03:30 +08001663+clear_flow:
1664+ mtk_foe_entry_clear(&eth->ppe, hash);
developer8cb3ac72022-07-04 10:55:14 +08001665+free:
1666+ kfree(entry);
1667+ return err;
1668+}
1669+
1670+static int
1671+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
1672+{
1673+ struct mtk_flow_entry *entry;
1674+
1675+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1676+ mtk_flow_ht_params);
1677+ if (!entry)
1678+ return -ENOENT;
1679+
developeree39bcf2023-06-16 08:03:30 +08001680+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
developer8cb3ac72022-07-04 10:55:14 +08001681+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
1682+ mtk_flow_ht_params);
1683+ kfree(entry);
1684+
1685+ return 0;
1686+}
1687+
1688+static int
1689+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
1690+{
1691+ struct mtk_flow_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08001692+ int timestamp;
1693+ u32 idle;
developer8cb3ac72022-07-04 10:55:14 +08001694+
1695+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1696+ mtk_flow_ht_params);
1697+ if (!entry)
1698+ return -ENOENT;
1699+
developeree39bcf2023-06-16 08:03:30 +08001700+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
1701+ if (timestamp < 0)
1702+ return -ETIMEDOUT;
1703+
1704+ idle = mtk_eth_timestamp(eth) - timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001705+ f->stats.lastused = jiffies - idle * HZ;
1706+
1707+ return 0;
1708+}
1709+
1710+static DEFINE_MUTEX(mtk_flow_offload_mutex);
1711+
developeree39bcf2023-06-16 08:03:30 +08001712+static int
1713+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
developer8cb3ac72022-07-04 10:55:14 +08001714+{
developeree39bcf2023-06-16 08:03:30 +08001715+ struct flow_cls_offload *cls = type_data;
1716+ struct net_device *dev = cb_priv;
1717+ struct mtk_mac *mac = netdev_priv(dev);
1718+ struct mtk_eth *eth = mac->hw;
developer8cb3ac72022-07-04 10:55:14 +08001719+ int err;
1720+
developeree39bcf2023-06-16 08:03:30 +08001721+ if (!tc_can_offload(dev))
1722+ return -EOPNOTSUPP;
1723+
1724+ if (type != TC_SETUP_CLSFLOWER)
1725+ return -EOPNOTSUPP;
1726+
developer8cb3ac72022-07-04 10:55:14 +08001727+ mutex_lock(&mtk_flow_offload_mutex);
1728+ switch (cls->command) {
1729+ case FLOW_CLS_REPLACE:
developeree39bcf2023-06-16 08:03:30 +08001730+ err = mtk_flow_offload_replace(eth, cls);
developer8cb3ac72022-07-04 10:55:14 +08001731+ break;
1732+ case FLOW_CLS_DESTROY:
1733+ err = mtk_flow_offload_destroy(eth, cls);
1734+ break;
1735+ case FLOW_CLS_STATS:
1736+ err = mtk_flow_offload_stats(eth, cls);
1737+ break;
1738+ default:
1739+ err = -EOPNOTSUPP;
1740+ break;
1741+ }
1742+ mutex_unlock(&mtk_flow_offload_mutex);
1743+
1744+ return err;
1745+}
1746+
1747+static int
1748+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
1749+{
1750+ struct mtk_mac *mac = netdev_priv(dev);
1751+ struct mtk_eth *eth = mac->hw;
1752+ static LIST_HEAD(block_cb_list);
1753+ struct flow_block_cb *block_cb;
1754+ flow_setup_cb_t *cb;
developeree39bcf2023-06-16 08:03:30 +08001755+ int err = 0;
developer207b39d2022-10-07 15:57:16 +08001756+
developeree39bcf2023-06-16 08:03:30 +08001757+ if (!eth->ppe.foe_table)
developer8cb3ac72022-07-04 10:55:14 +08001758+ return -EOPNOTSUPP;
1759+
1760+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1761+ return -EOPNOTSUPP;
1762+
1763+ cb = mtk_eth_setup_tc_block_cb;
1764+ f->driver_block_list = &block_cb_list;
1765+
1766+ switch (f->command) {
1767+ case FLOW_BLOCK_BIND:
1768+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1769+ if (block_cb) {
1770+ flow_block_cb_incref(block_cb);
developeree39bcf2023-06-16 08:03:30 +08001771+ goto unlock;
developer8cb3ac72022-07-04 10:55:14 +08001772+ }
1773+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
developeree39bcf2023-06-16 08:03:30 +08001774+ if (IS_ERR(block_cb)) {
1775+ err = PTR_ERR(block_cb);
1776+ goto unlock;
1777+ }
developer8cb3ac72022-07-04 10:55:14 +08001778+
1779+ flow_block_cb_add(block_cb, f);
1780+ list_add_tail(&block_cb->driver_list, &block_cb_list);
developeree39bcf2023-06-16 08:03:30 +08001781+ break;
developer8cb3ac72022-07-04 10:55:14 +08001782+ case FLOW_BLOCK_UNBIND:
1783+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
developeree39bcf2023-06-16 08:03:30 +08001784+ if (!block_cb) {
1785+ err = -ENOENT;
1786+ goto unlock;
1787+ }
developer8cb3ac72022-07-04 10:55:14 +08001788+
developeree39bcf2023-06-16 08:03:30 +08001789+ if (flow_block_cb_decref(block_cb)) {
developer8cb3ac72022-07-04 10:55:14 +08001790+ flow_block_cb_remove(block_cb, f);
1791+ list_del(&block_cb->driver_list);
1792+ }
developeree39bcf2023-06-16 08:03:30 +08001793+ break;
developer8cb3ac72022-07-04 10:55:14 +08001794+ default:
developeree39bcf2023-06-16 08:03:30 +08001795+ err = -EOPNOTSUPP;
1796+ break;
developer8cb3ac72022-07-04 10:55:14 +08001797+ }
developeree39bcf2023-06-16 08:03:30 +08001798+
1799+unlock:
1800+ return err;
developer8cb3ac72022-07-04 10:55:14 +08001801+}
1802+
1803+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
1804+ void *type_data)
1805+{
developeree39bcf2023-06-16 08:03:30 +08001806+ if (type == TC_SETUP_FT)
developer8cb3ac72022-07-04 10:55:14 +08001807+ return mtk_eth_setup_tc_block(dev, type_data);
developeree39bcf2023-06-16 08:03:30 +08001808+
1809+ return -EOPNOTSUPP;
developer8cb3ac72022-07-04 10:55:14 +08001810+}
1811+
1812+int mtk_eth_offload_init(struct mtk_eth *eth)
1813+{
developeree39bcf2023-06-16 08:03:30 +08001814+ if (!eth->ppe.foe_table)
1815+ return 0;
1816+
developer8cb3ac72022-07-04 10:55:14 +08001817+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
1818+}
1819diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1820new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001821index 0000000..0c45ea0
developer8cb3ac72022-07-04 10:55:14 +08001822--- /dev/null
1823+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
developeree39bcf2023-06-16 08:03:30 +08001824@@ -0,0 +1,144 @@
developer8cb3ac72022-07-04 10:55:14 +08001825+// SPDX-License-Identifier: GPL-2.0-only
1826+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1827+
1828+#ifndef __MTK_PPE_REGS_H
1829+#define __MTK_PPE_REGS_H
1830+
1831+#define MTK_PPE_GLO_CFG 0x200
1832+#define MTK_PPE_GLO_CFG_EN BIT(0)
1833+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
1834+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
1835+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
1836+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
1837+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
1838+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
1839+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
1840+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
1841+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
1842+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
1843+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
1844+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
1845+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
1846+
1847+#define MTK_PPE_FLOW_CFG 0x204
1848+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
1849+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
1850+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
1851+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
1852+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
1853+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
1854+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
1855+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
1856+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
1857+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
1858+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
1859+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
1860+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
1861+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
1862+
1863+#define MTK_PPE_IP_PROTO_CHK 0x208
1864+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
1865+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
1866+
1867+#define MTK_PPE_TB_CFG 0x21c
1868+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
1869+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
1870+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
1871+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
1872+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
1873+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
1874+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
1875+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
1876+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
1877+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
1878+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
1879+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
1880+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
1881+
1882+enum {
1883+ MTK_PPE_SCAN_MODE_DISABLED,
1884+ MTK_PPE_SCAN_MODE_CHECK_AGE,
1885+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
1886+};
1887+
1888+enum {
1889+ MTK_PPE_KEEPALIVE_DISABLE,
1890+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
1891+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
1892+};
1893+
1894+enum {
1895+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
1896+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
1897+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
1898+};
1899+
1900+#define MTK_PPE_TB_BASE 0x220
1901+
1902+#define MTK_PPE_TB_USED 0x224
1903+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
1904+
1905+#define MTK_PPE_BIND_RATE 0x228
1906+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
1907+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
1908+
1909+#define MTK_PPE_BIND_LIMIT0 0x22c
1910+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
1911+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
1912+
1913+#define MTK_PPE_BIND_LIMIT1 0x230
1914+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
1915+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
1916+
1917+#define MTK_PPE_KEEPALIVE 0x234
1918+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
1919+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
1920+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
1921+
1922+#define MTK_PPE_UNBIND_AGE 0x238
1923+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
1924+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
1925+
1926+#define MTK_PPE_BIND_AGE0 0x23c
1927+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
1928+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
1929+
1930+#define MTK_PPE_BIND_AGE1 0x240
1931+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
1932+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
1933+
1934+#define MTK_PPE_HASH_SEED 0x244
1935+
1936+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
1937+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
1938+
1939+#define MTK_PPE_MTU_DROP 0x308
1940+
1941+#define MTK_PPE_VLAN_MTU0 0x30c
1942+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
1943+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
1944+
1945+#define MTK_PPE_VLAN_MTU1 0x310
1946+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
1947+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
1948+
1949+#define MTK_PPE_VPM_TPID 0x318
1950+
1951+#define MTK_PPE_CACHE_CTL 0x320
1952+#define MTK_PPE_CACHE_CTL_EN BIT(0)
1953+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
1954+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
1955+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
1956+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
1957+
1958+#define MTK_PPE_MIB_CFG 0x334
1959+#define MTK_PPE_MIB_CFG_EN BIT(0)
1960+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
1961+
1962+#define MTK_PPE_MIB_TB_BASE 0x338
1963+
1964+#define MTK_PPE_MIB_CACHE_CTL 0x350
1965+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
1966+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
1967+
1968+#endif
1969diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
developer58aa0682023-09-18 14:02:26 +08001970index 078c0f4..f8a98d8 100644
developer8cb3ac72022-07-04 10:55:14 +08001971--- a/drivers/net/ppp/ppp_generic.c
1972+++ b/drivers/net/ppp/ppp_generic.c
1973@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
1974 ppp_destroy_interface(ppp);
1975 }
1976
1977+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
1978+ struct net_device_path *path)
1979+{
1980+ struct ppp *ppp = netdev_priv(ctx->dev);
1981+ struct ppp_channel *chan;
1982+ struct channel *pch;
1983+
1984+ if (ppp->flags & SC_MULTILINK)
1985+ return -EOPNOTSUPP;
1986+
1987+ if (list_empty(&ppp->channels))
1988+ return -ENODEV;
1989+
1990+ pch = list_first_entry(&ppp->channels, struct channel, clist);
1991+ chan = pch->chan;
1992+ if (!chan->ops->fill_forward_path)
1993+ return -EOPNOTSUPP;
1994+
1995+ return chan->ops->fill_forward_path(ctx, path, chan);
1996+}
1997+
1998 static const struct net_device_ops ppp_netdev_ops = {
1999 .ndo_init = ppp_dev_init,
2000 .ndo_uninit = ppp_dev_uninit,
2001 .ndo_start_xmit = ppp_start_xmit,
2002 .ndo_do_ioctl = ppp_net_ioctl,
2003 .ndo_get_stats64 = ppp_get_stats64,
2004+ .ndo_fill_forward_path = ppp_fill_forward_path,
2005 };
2006
2007 static struct device_type ppp_type = {
2008diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
developer58aa0682023-09-18 14:02:26 +08002009index 087b016..7a8c246 100644
developer8cb3ac72022-07-04 10:55:14 +08002010--- a/drivers/net/ppp/pppoe.c
2011+++ b/drivers/net/ppp/pppoe.c
2012@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
2013 return __pppoe_xmit(sk, skb);
2014 }
2015
2016+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
2017+ struct net_device_path *path,
2018+ const struct ppp_channel *chan)
2019+{
2020+ struct sock *sk = (struct sock *)chan->private;
2021+ struct pppox_sock *po = pppox_sk(sk);
2022+ struct net_device *dev = po->pppoe_dev;
2023+
2024+ if (sock_flag(sk, SOCK_DEAD) ||
2025+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2026+ return -1;
2027+
2028+ path->type = DEV_PATH_PPPOE;
2029+ path->encap.proto = htons(ETH_P_PPP_SES);
2030+ path->encap.id = be16_to_cpu(po->num);
2031+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2032+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
2033+ path->dev = ctx->dev;
2034+ ctx->dev = dev;
2035+
2036+ return 0;
2037+}
2038+
2039 static const struct ppp_channel_ops pppoe_chan_ops = {
2040 .start_xmit = pppoe_xmit,
2041+ .fill_forward_path = pppoe_fill_forward_path,
2042 };
2043
2044 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
2045diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
developer58aa0682023-09-18 14:02:26 +08002046index 631d158..ef44d9a 100644
developer8cb3ac72022-07-04 10:55:14 +08002047--- a/include/linux/netdevice.h
2048+++ b/include/linux/netdevice.h
developer58aa0682023-09-18 14:02:26 +08002049@@ -838,6 +838,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002050 struct sk_buff *skb,
2051 struct net_device *sb_dev);
2052
2053+enum net_device_path_type {
2054+ DEV_PATH_ETHERNET = 0,
2055+ DEV_PATH_VLAN,
2056+ DEV_PATH_BRIDGE,
2057+ DEV_PATH_PPPOE,
2058+ DEV_PATH_DSA,
2059+};
2060+
2061+struct net_device_path {
2062+ enum net_device_path_type type;
2063+ const struct net_device *dev;
2064+ union {
2065+ struct {
2066+ u16 id;
2067+ __be16 proto;
2068+ u8 h_dest[ETH_ALEN];
2069+ } encap;
2070+ struct {
2071+ enum {
2072+ DEV_PATH_BR_VLAN_KEEP,
2073+ DEV_PATH_BR_VLAN_TAG,
2074+ DEV_PATH_BR_VLAN_UNTAG,
2075+ DEV_PATH_BR_VLAN_UNTAG_HW,
2076+ } vlan_mode;
2077+ u16 vlan_id;
2078+ __be16 vlan_proto;
2079+ } bridge;
2080+ struct {
2081+ int port;
2082+ u16 proto;
2083+ } dsa;
2084+ };
2085+};
2086+
2087+#define NET_DEVICE_PATH_STACK_MAX 5
2088+#define NET_DEVICE_PATH_VLAN_MAX 2
2089+
2090+struct net_device_path_stack {
2091+ int num_paths;
2092+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
2093+};
2094+
2095+struct net_device_path_ctx {
2096+ const struct net_device *dev;
2097+ u8 daddr[ETH_ALEN];
2098+
2099+ int num_vlans;
2100+ struct {
2101+ u16 id;
2102+ __be16 proto;
2103+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
2104+};
2105+
2106 enum tc_setup_type {
2107 TC_SETUP_QDISC_MQPRIO,
2108 TC_SETUP_CLSU32,
developer58aa0682023-09-18 14:02:26 +08002109@@ -853,6 +906,7 @@ enum tc_setup_type {
developer8cb3ac72022-07-04 10:55:14 +08002110 TC_SETUP_ROOT_QDISC,
2111 TC_SETUP_QDISC_GRED,
2112 TC_SETUP_QDISC_TAPRIO,
2113+ TC_SETUP_FT,
2114 };
2115
2116 /* These structures hold the attributes of bpf state that are being passed
developer58aa0682023-09-18 14:02:26 +08002117@@ -1248,6 +1302,8 @@ struct tlsdev_ops;
developer8cb3ac72022-07-04 10:55:14 +08002118 * Get devlink port instance associated with a given netdev.
2119 * Called with a reference on the netdevice and devlink locks only,
2120 * rtnl_lock is not held.
2121+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
2122+ * Get the forwarding path to reach the real device from the HW destination address
2123 */
2124 struct net_device_ops {
2125 int (*ndo_init)(struct net_device *dev);
developer58aa0682023-09-18 14:02:26 +08002126@@ -1445,6 +1501,8 @@ struct net_device_ops {
developer8cb3ac72022-07-04 10:55:14 +08002127 int (*ndo_xsk_wakeup)(struct net_device *dev,
2128 u32 queue_id, u32 flags);
2129 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
2130+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
2131+ struct net_device_path *path);
2132 };
2133
2134 /**
developer58aa0682023-09-18 14:02:26 +08002135@@ -2670,6 +2728,8 @@ void dev_remove_offload(struct packet_offload *po);
developer8cb3ac72022-07-04 10:55:14 +08002136
2137 int dev_get_iflink(const struct net_device *dev);
2138 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
2139+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2140+ struct net_device_path_stack *stack);
2141 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
2142 unsigned short mask);
2143 struct net_device *dev_get_by_name(struct net *net, const char *name);
2144diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
developer58aa0682023-09-18 14:02:26 +08002145index 9896606..91f9a92 100644
developer8cb3ac72022-07-04 10:55:14 +08002146--- a/include/linux/ppp_channel.h
2147+++ b/include/linux/ppp_channel.h
2148@@ -28,6 +28,9 @@ struct ppp_channel_ops {
2149 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
2150 /* Handle an ioctl call that has come in via /dev/ppp. */
2151 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
2152+ int (*fill_forward_path)(struct net_device_path_ctx *,
2153+ struct net_device_path *,
2154+ const struct ppp_channel *);
2155 };
2156
2157 struct ppp_channel {
developer8cb3ac72022-07-04 10:55:14 +08002158diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
developer58aa0682023-09-18 14:02:26 +08002159index c6f7bd2..59b8736 100644
developer8cb3ac72022-07-04 10:55:14 +08002160--- a/include/net/flow_offload.h
2161+++ b/include/net/flow_offload.h
2162@@ -138,6 +138,7 @@ enum flow_action_id {
2163 FLOW_ACTION_MPLS_PUSH,
2164 FLOW_ACTION_MPLS_POP,
2165 FLOW_ACTION_MPLS_MANGLE,
2166+ FLOW_ACTION_PPPOE_PUSH,
2167 NUM_FLOW_ACTIONS,
2168 };
2169
2170@@ -213,6 +214,9 @@ struct flow_action_entry {
2171 u8 bos;
2172 u8 ttl;
2173 } mpls_mangle;
2174+ struct { /* FLOW_ACTION_PPPOE_PUSH */
2175+ u16 sid;
2176+ } pppoe;
2177 };
2178 };
2179
2180diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
developer58aa0682023-09-18 14:02:26 +08002181index 2c739fc..89ab8f1 100644
developer8cb3ac72022-07-04 10:55:14 +08002182--- a/include/net/ip6_route.h
2183+++ b/include/net/ip6_route.h
2184@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
2185 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
2186 }
2187
2188-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
2189+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
2190+ bool forwarding)
2191 {
2192 struct inet6_dev *idev;
2193 unsigned int mtu;
2194
2195- if (dst_metric_locked(dst, RTAX_MTU)) {
2196+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
2197 mtu = dst_metric_raw(dst, RTAX_MTU);
2198 if (mtu)
2199 goto out;
2200diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
developer58aa0682023-09-18 14:02:26 +08002201index 7b3c873..e954831 100644
developer8cb3ac72022-07-04 10:55:14 +08002202--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2203+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2204@@ -4,7 +4,4 @@
2205
2206 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
2207
2208-#include <linux/sysctl.h>
2209-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
2210-
2211 #endif /* _NF_CONNTRACK_IPV6_H*/
2212diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
developer58aa0682023-09-18 14:02:26 +08002213index 90690e3..ce0bc3e 100644
developer8cb3ac72022-07-04 10:55:14 +08002214--- a/include/net/netfilter/nf_conntrack.h
2215+++ b/include/net/netfilter/nf_conntrack.h
2216@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
2217 !nf_ct_is_dying(ct);
2218 }
2219
2220+#define NF_CT_DAY (86400 * HZ)
2221+
2222+/* Set an arbitrary timeout large enough not to ever expire, this save
2223+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
2224+ * nf_ct_is_expired().
2225+ */
2226+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
2227+{
2228+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
2229+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
2230+}
2231+
2232 struct kernel_param;
2233
2234 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
2235diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
developer58aa0682023-09-18 14:02:26 +08002236index f7a060c..7f44a77 100644
developer8cb3ac72022-07-04 10:55:14 +08002237--- a/include/net/netfilter/nf_conntrack_acct.h
2238+++ b/include/net/netfilter/nf_conntrack_acct.h
2239@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
2240 #endif
2241 }
2242
2243+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
2244+ unsigned int bytes);
2245+
2246+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
2247+ unsigned int bytes)
2248+{
2249+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
2250+ nf_ct_acct_add(ct, dir, 1, bytes);
2251+#endif
2252+}
2253+
2254 void nf_conntrack_acct_pernet_init(struct net *net);
2255
2256 int nf_conntrack_acct_init(void);
2257diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
developer58aa0682023-09-18 14:02:26 +08002258index 68d7fc9..feac793 100644
developer8cb3ac72022-07-04 10:55:14 +08002259--- a/include/net/netfilter/nf_flow_table.h
2260+++ b/include/net/netfilter/nf_flow_table.h
2261@@ -8,31 +8,99 @@
2262 #include <linux/rcupdate.h>
2263 #include <linux/netfilter.h>
2264 #include <linux/netfilter/nf_conntrack_tuple_common.h>
2265+#include <net/flow_offload.h>
2266 #include <net/dst.h>
2267+#include <linux/if_pppox.h>
2268+#include <linux/ppp_defs.h>
2269
2270 struct nf_flowtable;
2271+struct nf_flow_rule;
2272+struct flow_offload;
2273+enum flow_offload_tuple_dir;
2274+
2275+struct nf_flow_key {
2276+ struct flow_dissector_key_meta meta;
2277+ struct flow_dissector_key_control control;
2278+ struct flow_dissector_key_control enc_control;
2279+ struct flow_dissector_key_basic basic;
2280+ struct flow_dissector_key_vlan vlan;
2281+ struct flow_dissector_key_vlan cvlan;
2282+ union {
2283+ struct flow_dissector_key_ipv4_addrs ipv4;
2284+ struct flow_dissector_key_ipv6_addrs ipv6;
2285+ };
2286+ struct flow_dissector_key_keyid enc_key_id;
2287+ union {
2288+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
2289+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
2290+ };
2291+ struct flow_dissector_key_tcp tcp;
2292+ struct flow_dissector_key_ports tp;
2293+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
2294+
2295+struct nf_flow_match {
2296+ struct flow_dissector dissector;
2297+ struct nf_flow_key key;
2298+ struct nf_flow_key mask;
2299+};
2300+
2301+struct nf_flow_rule {
2302+ struct nf_flow_match match;
2303+ struct flow_rule *rule;
2304+};
2305
2306 struct nf_flowtable_type {
2307 struct list_head list;
2308 int family;
2309 int (*init)(struct nf_flowtable *ft);
2310+ int (*setup)(struct nf_flowtable *ft,
2311+ struct net_device *dev,
2312+ enum flow_block_command cmd);
2313+ int (*action)(struct net *net,
2314+ const struct flow_offload *flow,
2315+ enum flow_offload_tuple_dir dir,
2316+ struct nf_flow_rule *flow_rule);
2317 void (*free)(struct nf_flowtable *ft);
2318 nf_hookfn *hook;
2319 struct module *owner;
2320 };
2321
2322+enum nf_flowtable_flags {
2323+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
2324+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
2325+};
2326+
2327 struct nf_flowtable {
2328 struct list_head list;
2329 struct rhashtable rhashtable;
2330+ int priority;
2331 const struct nf_flowtable_type *type;
2332 struct delayed_work gc_work;
2333+ unsigned int flags;
2334+ struct flow_block flow_block;
2335+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
2336+ possible_net_t net;
2337 };
2338
2339+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
2340+{
2341+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
2342+}
2343+
2344 enum flow_offload_tuple_dir {
2345 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
2346 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
2347- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
2348 };
2349+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
2350+
2351+enum flow_offload_xmit_type {
2352+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
2353+ FLOW_OFFLOAD_XMIT_NEIGH,
2354+ FLOW_OFFLOAD_XMIT_XFRM,
2355+ FLOW_OFFLOAD_XMIT_DIRECT,
2356+};
2357+
2358+#define NF_FLOW_TABLE_ENCAP_MAX 2
2359
2360 struct flow_offload_tuple {
2361 union {
developerb7c46752022-07-04 19:51:38 +08002362@@ -52,13 +120,30 @@ struct flow_offload_tuple {
developer8cb3ac72022-07-04 10:55:14 +08002363
2364 u8 l3proto;
2365 u8 l4proto;
2366- u8 dir;
2367+ struct {
2368+ u16 id;
2369+ __be16 proto;
2370+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2371
2372- u16 mtu;
2373+ /* All members above are keys for lookups, see flow_offload_hash(). */
2374+ struct { } __hash;
2375
developerb7c46752022-07-04 19:51:38 +08002376- struct {
2377- struct dst_entry *dst_cache;
2378- u32 dst_cookie;
developer8cb3ac72022-07-04 10:55:14 +08002379+ u8 dir:2,
2380+ xmit_type:2,
2381+ encap_num:2,
2382+ in_vlan_ingress:2;
2383+ u16 mtu;
2384+ union {
2385+ struct {
2386+ struct dst_entry *dst_cache;
2387+ u32 dst_cookie;
2388+ };
2389+ struct {
2390+ u32 ifidx;
2391+ u32 hw_ifidx;
2392+ u8 h_source[ETH_ALEN];
2393+ u8 h_dest[ETH_ALEN];
2394+ } out;
developerb7c46752022-07-04 19:51:38 +08002395 };
developer8cb3ac72022-07-04 10:55:14 +08002396 };
2397
developeree39bcf2023-06-16 08:03:30 +08002398@@ -67,52 +152,140 @@ struct flow_offload_tuple_rhash {
developer8cb3ac72022-07-04 10:55:14 +08002399 struct flow_offload_tuple tuple;
2400 };
2401
2402-#define FLOW_OFFLOAD_SNAT 0x1
2403-#define FLOW_OFFLOAD_DNAT 0x2
2404-#define FLOW_OFFLOAD_DYING 0x4
2405-#define FLOW_OFFLOAD_TEARDOWN 0x8
2406+enum nf_flow_flags {
2407+ NF_FLOW_SNAT,
2408+ NF_FLOW_DNAT,
2409+ NF_FLOW_TEARDOWN,
2410+ NF_FLOW_HW,
developeree39bcf2023-06-16 08:03:30 +08002411+ NF_FLOW_HW_ACCT_DYING,
developer8cb3ac72022-07-04 10:55:14 +08002412+ NF_FLOW_HW_DYING,
2413+ NF_FLOW_HW_DEAD,
2414+ NF_FLOW_HW_PENDING,
2415+};
2416+
2417+enum flow_offload_type {
2418+ NF_FLOW_OFFLOAD_UNSPEC = 0,
2419+ NF_FLOW_OFFLOAD_ROUTE,
2420+};
2421
2422 struct flow_offload {
2423 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
2424- u32 flags;
2425- union {
2426- /* Your private driver data here. */
2427- u32 timeout;
2428- };
2429+ struct nf_conn *ct;
2430+ unsigned long flags;
2431+ u16 type;
2432+ u32 timeout;
2433+ struct rcu_head rcu_head;
2434 };
2435
2436 #define NF_FLOW_TIMEOUT (30 * HZ)
2437+#define nf_flowtable_time_stamp (u32)jiffies
2438+
2439+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
2440+
2441+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
2442+{
2443+ return (__s32)(timeout - nf_flowtable_time_stamp);
2444+}
2445
2446 struct nf_flow_route {
2447 struct {
2448- struct dst_entry *dst;
2449+ struct dst_entry *dst;
2450+ struct {
2451+ u32 ifindex;
2452+ struct {
2453+ u16 id;
2454+ __be16 proto;
2455+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2456+ u8 num_encaps:2,
2457+ ingress_vlans:2;
2458+ } in;
2459+ struct {
2460+ u32 ifindex;
2461+ u32 hw_ifindex;
2462+ u8 h_source[ETH_ALEN];
2463+ u8 h_dest[ETH_ALEN];
2464+ } out;
2465+ enum flow_offload_xmit_type xmit_type;
2466 } tuple[FLOW_OFFLOAD_DIR_MAX];
2467 };
2468
2469-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
2470- struct nf_flow_route *route);
2471+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
2472 void flow_offload_free(struct flow_offload *flow);
2473
2474+static inline int
2475+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
2476+ flow_setup_cb_t *cb, void *cb_priv)
2477+{
2478+ struct flow_block *block = &flow_table->flow_block;
2479+ struct flow_block_cb *block_cb;
2480+ int err = 0;
2481+
2482+ down_write(&flow_table->flow_block_lock);
2483+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2484+ if (block_cb) {
2485+ err = -EEXIST;
2486+ goto unlock;
2487+ }
2488+
2489+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
2490+ if (IS_ERR(block_cb)) {
2491+ err = PTR_ERR(block_cb);
2492+ goto unlock;
2493+ }
2494+
2495+ list_add_tail(&block_cb->list, &block->cb_list);
2496+
2497+unlock:
2498+ up_write(&flow_table->flow_block_lock);
2499+ return err;
2500+}
2501+
2502+static inline void
2503+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
2504+ flow_setup_cb_t *cb, void *cb_priv)
2505+{
2506+ struct flow_block *block = &flow_table->flow_block;
2507+ struct flow_block_cb *block_cb;
2508+
2509+ down_write(&flow_table->flow_block_lock);
2510+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2511+ if (block_cb) {
2512+ list_del(&block_cb->list);
2513+ flow_block_cb_free(block_cb);
2514+ } else {
2515+ WARN_ON(true);
2516+ }
2517+ up_write(&flow_table->flow_block_lock);
2518+}
2519+
2520+int flow_offload_route_init(struct flow_offload *flow,
2521+ const struct nf_flow_route *route);
2522+
2523 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
2524+void flow_offload_refresh(struct nf_flowtable *flow_table,
2525+ struct flow_offload *flow);
2526+
2527 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
2528 struct flow_offload_tuple *tuple);
2529+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
2530+ struct net_device *dev);
2531 void nf_flow_table_cleanup(struct net_device *dev);
2532
2533 int nf_flow_table_init(struct nf_flowtable *flow_table);
2534 void nf_flow_table_free(struct nf_flowtable *flow_table);
2535
2536 void flow_offload_teardown(struct flow_offload *flow);
2537-static inline void flow_offload_dead(struct flow_offload *flow)
2538-{
2539- flow->flags |= FLOW_OFFLOAD_DYING;
2540-}
2541
2542-int nf_flow_snat_port(const struct flow_offload *flow,
2543- struct sk_buff *skb, unsigned int thoff,
2544- u8 protocol, enum flow_offload_tuple_dir dir);
2545-int nf_flow_dnat_port(const struct flow_offload *flow,
2546- struct sk_buff *skb, unsigned int thoff,
2547- u8 protocol, enum flow_offload_tuple_dir dir);
2548+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
2549+ void (*iter)(struct flow_offload *flow, void *data),
2550+ void *data);
2551+
2552+void nf_flow_snat_port(const struct flow_offload *flow,
2553+ struct sk_buff *skb, unsigned int thoff,
2554+ u8 protocol, enum flow_offload_tuple_dir dir);
2555+void nf_flow_dnat_port(const struct flow_offload *flow,
2556+ struct sk_buff *skb, unsigned int thoff,
2557+ u8 protocol, enum flow_offload_tuple_dir dir);
2558
2559 struct flow_ports {
2560 __be16 source, dest;
developer58aa0682023-09-18 14:02:26 +08002561@@ -126,4 +299,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08002562 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
2563 MODULE_ALIAS("nf-flowtable-" __stringify(family))
2564
2565+void nf_flow_offload_add(struct nf_flowtable *flowtable,
2566+ struct flow_offload *flow);
2567+void nf_flow_offload_del(struct nf_flowtable *flowtable,
2568+ struct flow_offload *flow);
2569+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08002570+ struct flow_offload *flow, bool force);
developer8cb3ac72022-07-04 10:55:14 +08002571+
2572+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
2573+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
2574+ struct net_device *dev,
2575+ enum flow_block_command cmd);
2576+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
2577+ enum flow_offload_tuple_dir dir,
2578+ struct nf_flow_rule *flow_rule);
2579+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
2580+ enum flow_offload_tuple_dir dir,
2581+ struct nf_flow_rule *flow_rule);
2582+
2583+int nf_flow_table_offload_init(void);
2584+void nf_flow_table_offload_exit(void);
2585+
2586+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
2587+{
2588+ __be16 proto;
2589+
2590+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
2591+ sizeof(struct pppoe_hdr)));
2592+ switch (proto) {
2593+ case htons(PPP_IP):
2594+ return htons(ETH_P_IP);
2595+ case htons(PPP_IPV6):
2596+ return htons(ETH_P_IPV6);
2597+ }
2598+
2599+ return 0;
2600+}
2601+
2602 #endif /* _NF_FLOW_TABLE_H */
2603diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
developer58aa0682023-09-18 14:02:26 +08002604index 806454e..9e3963c 100644
developer8cb3ac72022-07-04 10:55:14 +08002605--- a/include/net/netns/conntrack.h
2606+++ b/include/net/netns/conntrack.h
2607@@ -27,6 +27,9 @@ struct nf_tcp_net {
2608 int tcp_loose;
2609 int tcp_be_liberal;
2610 int tcp_max_retrans;
2611+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2612+ unsigned int offload_timeout;
2613+#endif
2614 };
2615
2616 enum udp_conntrack {
2617@@ -37,6 +40,9 @@ enum udp_conntrack {
2618
2619 struct nf_udp_net {
2620 unsigned int timeouts[UDP_CT_MAX];
2621+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2622+ unsigned int offload_timeout;
2623+#endif
2624 };
2625
2626 struct nf_icmp_net {
2627diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
developer58aa0682023-09-18 14:02:26 +08002628index 336014b..ae698d1 100644
developer8cb3ac72022-07-04 10:55:14 +08002629--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
2630+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
2631@@ -105,14 +105,19 @@ enum ip_conntrack_status {
2632 IPS_OFFLOAD_BIT = 14,
2633 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
2634
2635+ /* Conntrack has been offloaded to hardware. */
2636+ IPS_HW_OFFLOAD_BIT = 15,
2637+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
2638+
2639 /* Be careful here, modifying these bits can make things messy,
2640 * so don't let users modify them directly.
2641 */
2642 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
2643 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
2644- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
2645+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
2646+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
2647
2648- __IPS_MAX_BIT = 15,
2649+ __IPS_MAX_BIT = 16,
2650 };
2651
2652 /* Connection tracking event types */
2653diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2654new file mode 100644
developer58aa0682023-09-18 14:02:26 +08002655index 0000000..5841bbe
developer8cb3ac72022-07-04 10:55:14 +08002656--- /dev/null
2657+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2658@@ -0,0 +1,17 @@
2659+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2660+#ifndef _XT_FLOWOFFLOAD_H
2661+#define _XT_FLOWOFFLOAD_H
2662+
2663+#include <linux/types.h>
2664+
2665+enum {
2666+ XT_FLOWOFFLOAD_HW = 1 << 0,
2667+
2668+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
2669+};
2670+
2671+struct xt_flowoffload_target_info {
2672+ __u32 flags;
2673+};
2674+
2675+#endif /* _XT_FLOWOFFLOAD_H */
2676diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
developer58aa0682023-09-18 14:02:26 +08002677index 0a3a167..6112266 100644
developer8cb3ac72022-07-04 10:55:14 +08002678--- a/net/8021q/vlan_dev.c
2679+++ b/net/8021q/vlan_dev.c
2680@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
2681 return real_dev->ifindex;
2682 }
2683
2684+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
2685+ struct net_device_path *path)
2686+{
2687+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
2688+
2689+ path->type = DEV_PATH_VLAN;
2690+ path->encap.id = vlan->vlan_id;
2691+ path->encap.proto = vlan->vlan_proto;
2692+ path->dev = ctx->dev;
2693+ ctx->dev = vlan->real_dev;
2694+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2695+ return -ENOSPC;
2696+
2697+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
2698+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
2699+ ctx->num_vlans++;
2700+
2701+ return 0;
2702+}
2703+
2704 static const struct ethtool_ops vlan_ethtool_ops = {
2705 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
2706 .get_drvinfo = vlan_ethtool_get_drvinfo,
2707@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
2708 #endif
2709 .ndo_fix_features = vlan_dev_fix_features,
2710 .ndo_get_iflink = vlan_dev_get_iflink,
2711+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
2712 };
2713
2714 static void vlan_dev_free(struct net_device *dev)
2715diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
developer58aa0682023-09-18 14:02:26 +08002716index 501f77f..0940b44 100644
developer8cb3ac72022-07-04 10:55:14 +08002717--- a/net/bridge/br_device.c
2718+++ b/net/bridge/br_device.c
2719@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
2720 return br_del_if(br, slave_dev);
2721 }
2722
2723+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
2724+ struct net_device_path *path)
2725+{
2726+ struct net_bridge_fdb_entry *f;
2727+ struct net_bridge_port *dst;
2728+ struct net_bridge *br;
2729+
2730+ if (netif_is_bridge_port(ctx->dev))
2731+ return -1;
2732+
2733+ br = netdev_priv(ctx->dev);
2734+
2735+ br_vlan_fill_forward_path_pvid(br, ctx, path);
2736+
2737+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
2738+ if (!f || !f->dst)
2739+ return -1;
2740+
2741+ dst = READ_ONCE(f->dst);
2742+ if (!dst)
2743+ return -1;
2744+
2745+ if (br_vlan_fill_forward_path_mode(br, dst, path))
2746+ return -1;
2747+
2748+ path->type = DEV_PATH_BRIDGE;
2749+ path->dev = dst->br->dev;
2750+ ctx->dev = dst->dev;
2751+
2752+ switch (path->bridge.vlan_mode) {
2753+ case DEV_PATH_BR_VLAN_TAG:
2754+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2755+ return -ENOSPC;
2756+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
2757+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
2758+ ctx->num_vlans++;
2759+ break;
2760+ case DEV_PATH_BR_VLAN_UNTAG_HW:
2761+ case DEV_PATH_BR_VLAN_UNTAG:
2762+ ctx->num_vlans--;
2763+ break;
2764+ case DEV_PATH_BR_VLAN_KEEP:
2765+ break;
2766+ }
2767+
2768+ return 0;
2769+}
2770+
2771 static const struct ethtool_ops br_ethtool_ops = {
2772 .get_drvinfo = br_getinfo,
2773 .get_link = ethtool_op_get_link,
2774@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
2775 .ndo_bridge_setlink = br_setlink,
2776 .ndo_bridge_dellink = br_dellink,
2777 .ndo_features_check = passthru_features_check,
2778+ .ndo_fill_forward_path = br_fill_forward_path,
2779 };
2780
2781 static struct device_type br_type = {
2782diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
developer58aa0682023-09-18 14:02:26 +08002783index a736be8..4bd9e9b 100644
developer8cb3ac72022-07-04 10:55:14 +08002784--- a/net/bridge/br_private.h
2785+++ b/net/bridge/br_private.h
2786@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
2787 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
2788 void *ptr);
2789
2790+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2791+ struct net_device_path_ctx *ctx,
2792+ struct net_device_path *path);
2793+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2794+ struct net_bridge_port *dst,
2795+ struct net_device_path *path);
2796+
2797 static inline struct net_bridge_vlan_group *br_vlan_group(
2798 const struct net_bridge *br)
2799 {
2800@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
2801 return 0;
2802 }
2803
2804+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2805+ struct net_device_path_ctx *ctx,
2806+ struct net_device_path *path)
2807+{
2808+}
2809+
2810+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2811+ struct net_bridge_port *dst,
2812+ struct net_device_path *path)
2813+{
2814+ return 0;
2815+}
2816+
2817 static inline struct net_bridge_vlan_group *br_vlan_group(
2818 const struct net_bridge *br)
2819 {
2820diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
developer58aa0682023-09-18 14:02:26 +08002821index 9257292..bcfd169 100644
developer8cb3ac72022-07-04 10:55:14 +08002822--- a/net/bridge/br_vlan.c
2823+++ b/net/bridge/br_vlan.c
2824@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
2825 }
2826 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
2827
2828+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2829+ struct net_device_path_ctx *ctx,
2830+ struct net_device_path *path)
2831+{
2832+ struct net_bridge_vlan_group *vg;
2833+ int idx = ctx->num_vlans - 1;
2834+ u16 vid;
2835+
2836+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2837+
2838+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2839+ return;
2840+
2841+ vg = br_vlan_group(br);
2842+
2843+ if (idx >= 0 &&
2844+ ctx->vlan[idx].proto == br->vlan_proto) {
2845+ vid = ctx->vlan[idx].id;
2846+ } else {
2847+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
2848+ vid = br_get_pvid(vg);
2849+ }
2850+
2851+ path->bridge.vlan_id = vid;
2852+ path->bridge.vlan_proto = br->vlan_proto;
2853+}
2854+
2855+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2856+ struct net_bridge_port *dst,
2857+ struct net_device_path *path)
2858+{
2859+ struct net_bridge_vlan_group *vg;
2860+ struct net_bridge_vlan *v;
2861+
2862+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2863+ return 0;
2864+
2865+ vg = nbp_vlan_group_rcu(dst);
2866+ v = br_vlan_find(vg, path->bridge.vlan_id);
2867+ if (!v || !br_vlan_should_use(v))
2868+ return -EINVAL;
2869+
2870+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
2871+ return 0;
2872+
2873+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
2874+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2875+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
2876+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
2877+ else
2878+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
2879+
2880+ return 0;
2881+}
2882+
2883 int br_vlan_get_info(const struct net_device *dev, u16 vid,
2884 struct bridge_vlan_info *p_vinfo)
2885 {
2886diff --git a/net/core/dev.c b/net/core/dev.c
developer58aa0682023-09-18 14:02:26 +08002887index 54cc544..a117bd0 100644
developer8cb3ac72022-07-04 10:55:14 +08002888--- a/net/core/dev.c
2889+++ b/net/core/dev.c
2890@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2891 }
2892 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
2893
2894+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
2895+{
2896+ int k = stack->num_paths++;
2897+
2898+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
2899+ return NULL;
2900+
2901+ return &stack->path[k];
2902+}
2903+
2904+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2905+ struct net_device_path_stack *stack)
2906+{
2907+ const struct net_device *last_dev;
2908+ struct net_device_path_ctx ctx = {
2909+ .dev = dev,
2910+ };
2911+ struct net_device_path *path;
2912+ int ret = 0;
2913+
2914+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
2915+ stack->num_paths = 0;
2916+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
2917+ last_dev = ctx.dev;
2918+ path = dev_fwd_path(stack);
2919+ if (!path)
2920+ return -1;
2921+
2922+ memset(path, 0, sizeof(struct net_device_path));
2923+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
2924+ if (ret < 0)
2925+ return -1;
2926+
2927+ if (WARN_ON_ONCE(last_dev == ctx.dev))
2928+ return -1;
2929+ }
2930+ path = dev_fwd_path(stack);
2931+ if (!path)
2932+ return -1;
2933+ path->type = DEV_PATH_ETHERNET;
2934+ path->dev = ctx.dev;
2935+
2936+ return ret;
2937+}
2938+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
2939+
2940 /**
2941 * __dev_get_by_name - find a device by its name
2942 * @net: the applicable net namespace
developer8cb3ac72022-07-04 10:55:14 +08002943diff --git a/net/dsa/slave.c b/net/dsa/slave.c
developer58aa0682023-09-18 14:02:26 +08002944index e2b91b3..2dfaa1e 100644
developer8cb3ac72022-07-04 10:55:14 +08002945--- a/net/dsa/slave.c
2946+++ b/net/dsa/slave.c
developer58aa0682023-09-18 14:02:26 +08002947@@ -1031,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002948 }
2949 }
2950
2951+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
2952+ void *type_data)
2953+{
2954+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
2955+ struct net_device *master = cpu_dp->master;
2956+
2957+ if (!master->netdev_ops->ndo_setup_tc)
2958+ return -EOPNOTSUPP;
2959+
2960+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
2961+}
2962+
2963 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
2964 void *type_data)
2965 {
2966 struct dsa_port *dp = dsa_slave_to_port(dev);
2967 struct dsa_switch *ds = dp->ds;
2968
2969- if (type == TC_SETUP_BLOCK)
2970+ switch (type) {
2971+ case TC_SETUP_BLOCK:
2972 return dsa_slave_setup_tc_block(dev, type_data);
2973+ case TC_SETUP_FT:
2974+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
2975+ default:
2976+ break;
2977+ }
2978
2979 if (!ds->ops->port_setup_tc)
2980 return -EOPNOTSUPP;
developer58aa0682023-09-18 14:02:26 +08002981@@ -1224,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08002982 return dp->ds->devlink ? &dp->devlink_port : NULL;
2983 }
2984
2985+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
2986+ struct net_device_path *path)
2987+{
2988+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
2989+ struct dsa_port *cpu_dp = dp->cpu_dp;
2990+
2991+ path->dev = ctx->dev;
2992+ path->type = DEV_PATH_DSA;
2993+ path->dsa.proto = cpu_dp->tag_ops->proto;
2994+ path->dsa.port = dp->index;
2995+ ctx->dev = cpu_dp->master;
2996+
2997+ return 0;
2998+}
2999+
3000 static const struct net_device_ops dsa_slave_netdev_ops = {
3001 .ndo_open = dsa_slave_open,
3002 .ndo_stop = dsa_slave_close,
developer58aa0682023-09-18 14:02:26 +08003003@@ -1248,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +08003004 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
3005 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
3006 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
3007+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
3008 };
3009
3010 static struct device_type dsa_type = {
developer8cb3ac72022-07-04 10:55:14 +08003011diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003012index f17b402..803b92e 100644
developer8cb3ac72022-07-04 10:55:14 +08003013--- a/net/ipv4/netfilter/Kconfig
3014+++ b/net/ipv4/netfilter/Kconfig
3015@@ -56,8 +56,6 @@ config NF_TABLES_ARP
3016 help
3017 This option enables the ARP support for nf_tables.
3018
3019-endif # NF_TABLES
3020-
3021 config NF_FLOW_TABLE_IPV4
3022 tristate "Netfilter flow table IPv4 module"
3023 depends on NF_FLOW_TABLE
3024@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
3025
3026 To compile it as a module, choose M here.
3027
3028+endif # NF_TABLES
3029+
3030 config NF_DUP_IPV4
3031 tristate "Netfilter IPv4 packet duplication to alternate destination"
3032 depends on !NF_CONNTRACK || NF_CONNTRACK
3033diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
developer58aa0682023-09-18 14:02:26 +08003034index 8231a7a..7176d7f 100644
developer8cb3ac72022-07-04 10:55:14 +08003035--- a/net/ipv6/ip6_output.c
3036+++ b/net/ipv6/ip6_output.c
3037@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
3038 }
3039 }
3040
3041- mtu = ip6_dst_mtu_forward(dst);
3042+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
3043 if (mtu < IPV6_MIN_MTU)
3044 mtu = IPV6_MIN_MTU;
3045
3046diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003047index 69443e9..0b481d2 100644
developer8cb3ac72022-07-04 10:55:14 +08003048--- a/net/ipv6/netfilter/Kconfig
3049+++ b/net/ipv6/netfilter/Kconfig
3050@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
3051 multicast or blackhole.
3052
3053 endif # NF_TABLES_IPV6
3054-endif # NF_TABLES
3055
3056 config NF_FLOW_TABLE_IPV6
3057 tristate "Netfilter flow table IPv6 module"
3058@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
3059
3060 To compile it as a module, choose M here.
3061
3062+endif # NF_TABLES
3063+
3064 config NF_DUP_IPV6
3065 tristate "Netfilter IPv6 packet duplication to alternate destination"
3066 depends on !NF_CONNTRACK || NF_CONNTRACK
3067diff --git a/net/ipv6/route.c b/net/ipv6/route.c
developer58aa0682023-09-18 14:02:26 +08003068index 43d185c..82a752c 100644
developer8cb3ac72022-07-04 10:55:14 +08003069--- a/net/ipv6/route.c
3070+++ b/net/ipv6/route.c
3071@@ -83,7 +83,7 @@ enum rt6_nud_state {
3072
3073 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
3074 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
3075-static unsigned int ip6_mtu(const struct dst_entry *dst);
3076+static unsigned int ip6_mtu(const struct dst_entry *dst);
3077 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
3078 static void ip6_dst_destroy(struct dst_entry *);
3079 static void ip6_dst_ifdown(struct dst_entry *,
3080@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3081
3082 static unsigned int ip6_mtu(const struct dst_entry *dst)
3083 {
3084- struct inet6_dev *idev;
3085- unsigned int mtu;
3086-
3087- mtu = dst_metric_raw(dst, RTAX_MTU);
3088- if (mtu)
3089- goto out;
3090-
3091- mtu = IPV6_MIN_MTU;
3092-
3093- rcu_read_lock();
3094- idev = __in6_dev_get(dst->dev);
3095- if (idev)
3096- mtu = idev->cnf.mtu6;
3097- rcu_read_unlock();
3098-
3099-out:
3100- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3101-
3102- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3103+ return ip6_dst_mtu_maybe_forward(dst, false);
3104 }
3105
3106 /* MTU selection:
3107diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003108index b6e0a62..5d690ab 100644
developer8cb3ac72022-07-04 10:55:14 +08003109--- a/net/netfilter/Kconfig
3110+++ b/net/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003111@@ -689,8 +689,6 @@ config NFT_FIB_NETDEV
developer8cb3ac72022-07-04 10:55:14 +08003112
3113 endif # NF_TABLES_NETDEV
3114
3115-endif # NF_TABLES
3116-
3117 config NF_FLOW_TABLE_INET
3118 tristate "Netfilter flow table mixed IPv4/IPv6 module"
3119 depends on NF_FLOW_TABLE
developer58aa0682023-09-18 14:02:26 +08003120@@ -699,11 +697,12 @@ config NF_FLOW_TABLE_INET
developer8cb3ac72022-07-04 10:55:14 +08003121
3122 To compile it as a module, choose M here.
3123
3124+endif # NF_TABLES
3125+
3126 config NF_FLOW_TABLE
3127 tristate "Netfilter flow table module"
3128 depends on NETFILTER_INGRESS
3129 depends on NF_CONNTRACK
3130- depends on NF_TABLES
3131 help
3132 This option adds the flow table core infrastructure.
3133
developer58aa0682023-09-18 14:02:26 +08003134@@ -983,6 +982,15 @@ config NETFILTER_XT_TARGET_NOTRACK
developer8cb3ac72022-07-04 10:55:14 +08003135 depends on NETFILTER_ADVANCED
3136 select NETFILTER_XT_TARGET_CT
3137
3138+config NETFILTER_XT_TARGET_FLOWOFFLOAD
3139+ tristate '"FLOWOFFLOAD" target support'
3140+ depends on NF_FLOW_TABLE
3141+ depends on NETFILTER_INGRESS
3142+ help
3143+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
3144+ module to speed up processing of packets by bypassing the usual
3145+ netfilter chains
3146+
3147 config NETFILTER_XT_TARGET_RATEEST
3148 tristate '"RATEEST" target support'
3149 depends on NETFILTER_ADVANCED
3150diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
developer58aa0682023-09-18 14:02:26 +08003151index 4fc075b..d93a121 100644
developer8cb3ac72022-07-04 10:55:14 +08003152--- a/net/netfilter/Makefile
3153+++ b/net/netfilter/Makefile
3154@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
3155
3156 # flow table infrastructure
3157 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
3158-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
3159+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
3160+ nf_flow_table_offload.o
3161
3162 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
3163
3164@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
3165 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
3166 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
3167 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
3168+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
3169 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
3170 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
3171 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
3172diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
developer58aa0682023-09-18 14:02:26 +08003173index f6ab6f4..f689e19 100644
developer8cb3ac72022-07-04 10:55:14 +08003174--- a/net/netfilter/nf_conntrack_core.c
3175+++ b/net/netfilter/nf_conntrack_core.c
developer58aa0682023-09-18 14:02:26 +08003176@@ -864,9 +864,8 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003177 }
3178 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
3179
3180-static inline void nf_ct_acct_update(struct nf_conn *ct,
3181- enum ip_conntrack_info ctinfo,
3182- unsigned int len)
3183+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3184+ unsigned int bytes)
3185 {
3186 struct nf_conn_acct *acct;
3187
3188@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
3189 if (acct) {
3190 struct nf_conn_counter *counter = acct->counter;
3191
3192- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
3193- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
3194+ atomic64_add(packets, &counter[dir].packets);
3195+ atomic64_add(bytes, &counter[dir].bytes);
3196 }
3197 }
3198+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
3199
3200 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3201 const struct nf_conn *loser_ct)
3202@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3203
3204 /* u32 should be fine since we must have seen one packet. */
3205 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
3206- nf_ct_acct_update(ct, ctinfo, bytes);
3207+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
3208 }
3209 }
3210
3211@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
3212
3213 tmp = nf_ct_tuplehash_to_ctrack(h);
3214
3215- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
3216+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
3217+ nf_ct_offload_timeout(tmp);
3218 continue;
3219+ }
3220
3221 if (nf_ct_is_expired(tmp)) {
3222 nf_ct_gc_expired(tmp);
3223@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
3224 WRITE_ONCE(ct->timeout, extra_jiffies);
3225 acct:
3226 if (do_acct)
3227- nf_ct_acct_update(ct, ctinfo, skb->len);
3228+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3229 }
3230 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
3231
3232@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
3233 enum ip_conntrack_info ctinfo,
3234 const struct sk_buff *skb)
3235 {
3236- nf_ct_acct_update(ct, ctinfo, skb->len);
3237+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3238
3239 return nf_ct_delete(ct, 0, 0);
3240 }
3241diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
developer58aa0682023-09-18 14:02:26 +08003242index e219b6f..5cdc627 100644
developer8cb3ac72022-07-04 10:55:14 +08003243--- a/net/netfilter/nf_conntrack_proto_tcp.c
3244+++ b/net/netfilter/nf_conntrack_proto_tcp.c
developer58aa0682023-09-18 14:02:26 +08003245@@ -1463,6 +1463,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003246 tn->tcp_loose = nf_ct_tcp_loose;
3247 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
3248 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
3249+
3250+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3251+ tn->offload_timeout = 30 * HZ;
3252+#endif
3253 }
3254
3255 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
3256diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
developer58aa0682023-09-18 14:02:26 +08003257index e3a2d01..a1579d6 100644
developer8cb3ac72022-07-04 10:55:14 +08003258--- a/net/netfilter/nf_conntrack_proto_udp.c
3259+++ b/net/netfilter/nf_conntrack_proto_udp.c
3260@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
3261
3262 for (i = 0; i < UDP_CT_MAX; i++)
3263 un->timeouts[i] = udp_timeouts[i];
3264+
3265+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3266+ un->offload_timeout = 30 * HZ;
3267+#endif
3268 }
3269
3270 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
3271diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
developer58aa0682023-09-18 14:02:26 +08003272index 0b600b4..a2cfafa 100644
developer8cb3ac72022-07-04 10:55:14 +08003273--- a/net/netfilter/nf_conntrack_standalone.c
3274+++ b/net/netfilter/nf_conntrack_standalone.c
3275@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
3276 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
3277 goto release;
3278
3279- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3280+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
3281+ seq_puts(s, "[HW_OFFLOAD] ");
3282+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3283 seq_puts(s, "[OFFLOAD] ");
3284 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
3285 seq_puts(s, "[ASSURED] ");
3286@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
3287 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
3288 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
3289 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
3290+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3291+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
3292+#endif
3293 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
3294 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
3295 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
3296 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
3297 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
3298+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3299+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
3300+#endif
3301 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
3302 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
3303 #ifdef CONFIG_NF_CT_PROTO_SCTP
developer58aa0682023-09-18 14:02:26 +08003304@@ -811,6 +819,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003305 .mode = 0644,
3306 .proc_handler = proc_dointvec_jiffies,
3307 },
3308+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3309+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
3310+ .procname = "nf_flowtable_tcp_timeout",
3311+ .maxlen = sizeof(unsigned int),
3312+ .mode = 0644,
3313+ .proc_handler = proc_dointvec_jiffies,
3314+ },
3315+#endif
3316 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
3317 .procname = "nf_conntrack_tcp_loose",
3318 .maxlen = sizeof(int),
developer58aa0682023-09-18 14:02:26 +08003319@@ -845,6 +861,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003320 .mode = 0644,
3321 .proc_handler = proc_dointvec_jiffies,
3322 },
3323+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3324+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
3325+ .procname = "nf_flowtable_udp_timeout",
3326+ .maxlen = sizeof(unsigned int),
3327+ .mode = 0644,
3328+ .proc_handler = proc_dointvec_jiffies,
3329+ },
3330+#endif
3331 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
3332 .procname = "nf_conntrack_icmp_timeout",
3333 .maxlen = sizeof(unsigned int),
developer58aa0682023-09-18 14:02:26 +08003334@@ -1021,6 +1045,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
developer8cb3ac72022-07-04 10:55:14 +08003335 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
3336 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
3337 #undef XASSIGN
3338+
3339+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3340+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
3341+#endif
3342+
3343 }
3344
3345 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
developer58aa0682023-09-18 14:02:26 +08003346@@ -1107,6 +1136,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003347 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
3348 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
3349 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
3350+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3351+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
3352+#endif
3353
3354 nf_conntrack_standalone_init_tcp_sysctl(net, table);
3355 nf_conntrack_standalone_init_sctp_sysctl(net, table);
3356diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
developer58aa0682023-09-18 14:02:26 +08003357index f212cec..c3054af 100644
developer8cb3ac72022-07-04 10:55:14 +08003358--- a/net/netfilter/nf_flow_table_core.c
3359+++ b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003360@@ -7,43 +7,21 @@
developer8cb3ac72022-07-04 10:55:14 +08003361 #include <linux/netdevice.h>
3362 #include <net/ip.h>
3363 #include <net/ip6_route.h>
3364-#include <net/netfilter/nf_tables.h>
3365 #include <net/netfilter/nf_flow_table.h>
3366 #include <net/netfilter/nf_conntrack.h>
3367 #include <net/netfilter/nf_conntrack_core.h>
3368 #include <net/netfilter/nf_conntrack_l4proto.h>
3369 #include <net/netfilter/nf_conntrack_tuple.h>
3370
3371-struct flow_offload_entry {
3372- struct flow_offload flow;
3373- struct nf_conn *ct;
3374- struct rcu_head rcu_head;
3375-};
3376-
3377 static DEFINE_MUTEX(flowtable_lock);
3378 static LIST_HEAD(flowtables);
3379
developerb7c46752022-07-04 19:51:38 +08003380-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3381-{
3382- const struct rt6_info *rt;
3383-
3384- if (flow_tuple->l3proto == NFPROTO_IPV6) {
3385- rt = (const struct rt6_info *)flow_tuple->dst_cache;
3386- return rt6_get_cookie(rt);
3387- }
3388-
3389- return 0;
3390-}
3391-
developer8cb3ac72022-07-04 10:55:14 +08003392 static void
3393-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3394- struct nf_flow_route *route,
3395+flow_offload_fill_dir(struct flow_offload *flow,
3396 enum flow_offload_tuple_dir dir)
3397 {
3398 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
3399- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
3400- struct dst_entry *other_dst = route->tuple[!dir].dst;
3401- struct dst_entry *dst = route->tuple[dir].dst;
3402+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
3403
3404 ft->dir = dir;
3405
developerb7c46752022-07-04 19:51:38 +08003406@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003407 case NFPROTO_IPV4:
3408 ft->src_v4 = ctt->src.u3.in;
3409 ft->dst_v4 = ctt->dst.u3.in;
3410- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
3411 break;
3412 case NFPROTO_IPV6:
3413 ft->src_v6 = ctt->src.u3.in6;
3414 ft->dst_v6 = ctt->dst.u3.in6;
3415- ft->mtu = ip6_dst_mtu_forward(dst);
3416 break;
3417 }
3418
developerb7c46752022-07-04 19:51:38 +08003419@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003420 ft->l4proto = ctt->dst.protonum;
3421 ft->src_port = ctt->src.u.tcp.port;
3422 ft->dst_port = ctt->dst.u.tcp.port;
3423-
3424- ft->iifidx = other_dst->dev->ifindex;
3425- ft->dst_cache = dst;
developerb7c46752022-07-04 19:51:38 +08003426- ft->dst_cookie = flow_offload_dst_cookie(ft);
developer8cb3ac72022-07-04 10:55:14 +08003427 }
3428
3429-struct flow_offload *
3430-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
3431+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
3432 {
3433- struct flow_offload_entry *entry;
3434 struct flow_offload *flow;
3435
3436 if (unlikely(nf_ct_is_dying(ct) ||
3437 !atomic_inc_not_zero(&ct->ct_general.use)))
3438 return NULL;
3439
3440- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
3441- if (!entry)
3442+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
3443+ if (!flow)
3444 goto err_ct_refcnt;
3445
3446- flow = &entry->flow;
developerb7c46752022-07-04 19:51:38 +08003447-
developer8cb3ac72022-07-04 10:55:14 +08003448- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
3449- goto err_dst_cache_original;
developeree39bcf2023-06-16 08:03:30 +08003450-
developer7eb15dc2023-06-14 17:44:03 +08003451- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
3452- goto err_dst_cache_reply;
developeree39bcf2023-06-16 08:03:30 +08003453+ flow->ct = ct;
3454
developer8cb3ac72022-07-04 10:55:14 +08003455- entry->ct = ct;
3456-
3457- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3458- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
3459+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3460+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
3461
3462 if (ct->status & IPS_SRC_NAT)
3463- flow->flags |= FLOW_OFFLOAD_SNAT;
3464+ __set_bit(NF_FLOW_SNAT, &flow->flags);
3465 if (ct->status & IPS_DST_NAT)
3466- flow->flags |= FLOW_OFFLOAD_DNAT;
3467+ __set_bit(NF_FLOW_DNAT, &flow->flags);
3468
3469 return flow;
3470
3471-err_dst_cache_reply:
3472- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
3473-err_dst_cache_original:
3474- kfree(entry);
3475 err_ct_refcnt:
3476 nf_ct_put(ct);
3477
developeree39bcf2023-06-16 08:03:30 +08003478@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
developer8cb3ac72022-07-04 10:55:14 +08003479 }
3480 EXPORT_SYMBOL_GPL(flow_offload_alloc);
3481
3482-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3483+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3484 {
3485- tcp->state = TCP_CONNTRACK_ESTABLISHED;
3486- tcp->seen[0].td_maxwin = 0;
3487- tcp->seen[1].td_maxwin = 0;
3488+ const struct rt6_info *rt;
3489+
3490+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
3491+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
3492+ return rt6_get_cookie(rt);
3493+ }
3494+
3495+ return 0;
3496 }
3497
3498-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
3499-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
3500+static int flow_offload_fill_route(struct flow_offload *flow,
3501+ const struct nf_flow_route *route,
3502+ enum flow_offload_tuple_dir dir)
3503+{
3504+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
3505+ struct dst_entry *dst = route->tuple[dir].dst;
3506+ int i, j = 0;
developeree39bcf2023-06-16 08:03:30 +08003507+
developer8cb3ac72022-07-04 10:55:14 +08003508+ switch (flow_tuple->l3proto) {
3509+ case NFPROTO_IPV4:
3510+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
3511+ break;
3512+ case NFPROTO_IPV6:
3513+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
3514+ break;
3515+ }
3516+
3517+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
3518+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
3519+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
3520+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
3521+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
3522+ flow_tuple->in_vlan_ingress |= BIT(j);
3523+ j++;
3524+ }
3525+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
3526+
3527+ switch (route->tuple[dir].xmit_type) {
3528+ case FLOW_OFFLOAD_XMIT_DIRECT:
3529+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
3530+ ETH_ALEN);
3531+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
3532+ ETH_ALEN);
3533+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
3534+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
3535+ break;
3536+ case FLOW_OFFLOAD_XMIT_XFRM:
3537+ case FLOW_OFFLOAD_XMIT_NEIGH:
3538+ if (!dst_hold_safe(route->tuple[dir].dst))
3539+ return -1;
3540+
3541+ flow_tuple->dst_cache = dst;
3542+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
3543+ break;
3544+ default:
3545+ WARN_ON_ONCE(1);
3546+ break;
3547+ }
3548+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
developerb7c46752022-07-04 19:51:38 +08003549+
developer8cb3ac72022-07-04 10:55:14 +08003550+ return 0;
3551+}
3552+
3553+static void nft_flow_dst_release(struct flow_offload *flow,
3554+ enum flow_offload_tuple_dir dir)
developeree39bcf2023-06-16 08:03:30 +08003555+{
developer8cb3ac72022-07-04 10:55:14 +08003556+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3557+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
3558+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
developeree39bcf2023-06-16 08:03:30 +08003559+}
3560+
developer8cb3ac72022-07-04 10:55:14 +08003561+int flow_offload_route_init(struct flow_offload *flow,
3562+ const struct nf_flow_route *route)
developeree39bcf2023-06-16 08:03:30 +08003563+{
developer8cb3ac72022-07-04 10:55:14 +08003564+ int err;
developeree39bcf2023-06-16 08:03:30 +08003565+
developer8cb3ac72022-07-04 10:55:14 +08003566+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3567+ if (err < 0)
3568+ return err;
developeree39bcf2023-06-16 08:03:30 +08003569+
developer8cb3ac72022-07-04 10:55:14 +08003570+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
3571+ if (err < 0)
3572+ goto err_route_reply;
3573+
3574+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
developeree39bcf2023-06-16 08:03:30 +08003575+
developer8cb3ac72022-07-04 10:55:14 +08003576+ return 0;
3577+
3578+err_route_reply:
3579+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3580+
3581+ return err;
developeree39bcf2023-06-16 08:03:30 +08003582+}
developer8cb3ac72022-07-04 10:55:14 +08003583+EXPORT_SYMBOL_GPL(flow_offload_route_init);
developerb7c46752022-07-04 19:51:38 +08003584
developeree39bcf2023-06-16 08:03:30 +08003585-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
developer8cb3ac72022-07-04 10:55:14 +08003586+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3587 {
developeree39bcf2023-06-16 08:03:30 +08003588- return (__s32)(timeout - (u32)jiffies);
3589+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
developer8cb3ac72022-07-04 10:55:14 +08003590+ tcp->seen[0].td_maxwin = 0;
3591+ tcp->seen[1].td_maxwin = 0;
3592 }
3593
developeree39bcf2023-06-16 08:03:30 +08003594 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003595 {
developeree39bcf2023-06-16 08:03:30 +08003596- const struct nf_conntrack_l4proto *l4proto;
developer8cb3ac72022-07-04 10:55:14 +08003597+ struct net *net = nf_ct_net(ct);
developeree39bcf2023-06-16 08:03:30 +08003598 int l4num = nf_ct_protonum(ct);
3599- unsigned int timeout;
developer8cb3ac72022-07-04 10:55:14 +08003600+ s32 timeout;
developeree39bcf2023-06-16 08:03:30 +08003601
3602- l4proto = nf_ct_l4proto_find(l4num);
3603- if (!l4proto)
3604- return;
developer8cb3ac72022-07-04 10:55:14 +08003605+ if (l4num == IPPROTO_TCP) {
3606+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003607
3608- if (l4num == IPPROTO_TCP)
3609- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
3610- else if (l4num == IPPROTO_UDP)
3611- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
3612- else
3613+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
developer8cb3ac72022-07-04 10:55:14 +08003614+ timeout -= tn->offload_timeout;
3615+ } else if (l4num == IPPROTO_UDP) {
3616+ struct nf_udp_net *tn = nf_udp_pernet(net);
3617+
3618+ timeout = tn->timeouts[UDP_CT_REPLIED];
3619+ timeout -= tn->offload_timeout;
3620+ } else {
developeree39bcf2023-06-16 08:03:30 +08003621 return;
developer8cb3ac72022-07-04 10:55:14 +08003622+ }
3623+
3624+ if (timeout < 0)
3625+ timeout = 0;
developeree39bcf2023-06-16 08:03:30 +08003626
3627- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
3628- ct->timeout = nfct_time_stamp + timeout;
developer8cb3ac72022-07-04 10:55:14 +08003629+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
3630+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
3631 }
3632
developeree39bcf2023-06-16 08:03:30 +08003633 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
3634@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
3635 flow_offload_fixup_ct_timeout(ct);
3636 }
3637
developer8cb3ac72022-07-04 10:55:14 +08003638-void flow_offload_free(struct flow_offload *flow)
3639+static void flow_offload_route_release(struct flow_offload *flow)
3640 {
3641- struct flow_offload_entry *e;
3642+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3643+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
3644+}
3645
3646- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
3647- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
3648- e = container_of(flow, struct flow_offload_entry, flow);
3649- if (flow->flags & FLOW_OFFLOAD_DYING)
3650- nf_ct_delete(e->ct, 0, 0);
3651- nf_ct_put(e->ct);
3652- kfree_rcu(e, rcu_head);
3653+void flow_offload_free(struct flow_offload *flow)
3654+{
3655+ switch (flow->type) {
3656+ case NF_FLOW_OFFLOAD_ROUTE:
3657+ flow_offload_route_release(flow);
3658+ break;
3659+ default:
3660+ break;
3661+ }
3662+ nf_ct_put(flow->ct);
3663+ kfree_rcu(flow, rcu_head);
3664 }
3665 EXPORT_SYMBOL_GPL(flow_offload_free);
3666
developeree39bcf2023-06-16 08:03:30 +08003667@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
developer8cb3ac72022-07-04 10:55:14 +08003668 {
3669 const struct flow_offload_tuple *tuple = data;
3670
3671- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
3672+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3673 }
3674
3675 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
3676 {
3677 const struct flow_offload_tuple_rhash *tuplehash = data;
3678
3679- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
3680+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3681 }
3682
3683 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developeree39bcf2023-06-16 08:03:30 +08003684@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer8cb3ac72022-07-04 10:55:14 +08003685 const struct flow_offload_tuple *tuple = arg->key;
3686 const struct flow_offload_tuple_rhash *x = ptr;
3687
3688- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
3689+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
3690 return 1;
3691
3692 return 0;
developeree39bcf2023-06-16 08:03:30 +08003693@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
developer8cb3ac72022-07-04 10:55:14 +08003694 .automatic_shrinking = true,
3695 };
3696
3697-#define DAY (86400 * HZ)
3698-
3699-/* Set an arbitrary timeout large enough not to ever expire, this save
3700- * us a check for the IPS_OFFLOAD_BIT from the packet path via
3701- * nf_ct_is_expired().
3702- */
3703-static void nf_ct_offload_timeout(struct flow_offload *flow)
3704+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
3705 {
3706- struct flow_offload_entry *entry;
3707- struct nf_conn *ct;
3708+ unsigned long timeout = NF_FLOW_TIMEOUT;
3709+ struct net *net = nf_ct_net(flow->ct);
3710+ int l4num = nf_ct_protonum(flow->ct);
developeree39bcf2023-06-16 08:03:30 +08003711
3712- entry = container_of(flow, struct flow_offload_entry, flow);
3713- ct = entry->ct;
developerb7c46752022-07-04 19:51:38 +08003714+ if (l4num == IPPROTO_TCP) {
3715+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003716
3717- if (nf_ct_expires(ct) < DAY / 2)
3718- ct->timeout = nfct_time_stamp + DAY;
developer8cb3ac72022-07-04 10:55:14 +08003719+ timeout = tn->offload_timeout;
3720+ } else if (l4num == IPPROTO_UDP) {
3721+ struct nf_udp_net *tn = nf_udp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003722+
developer8cb3ac72022-07-04 10:55:14 +08003723+ timeout = tn->offload_timeout;
3724+ }
developeree39bcf2023-06-16 08:03:30 +08003725+
developer8cb3ac72022-07-04 10:55:14 +08003726+ return timeout;
3727 }
3728
3729 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3730 {
3731 int err;
3732
3733- nf_ct_offload_timeout(flow);
3734- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
3735+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3736
3737 err = rhashtable_insert_fast(&flow_table->rhashtable,
3738 &flow->tuplehash[0].node,
developeree39bcf2023-06-16 08:03:30 +08003739@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003740 return err;
3741 }
3742
3743+ nf_ct_offload_timeout(flow->ct);
3744+
3745+ if (nf_flowtable_hw_offload(flow_table)) {
3746+ __set_bit(NF_FLOW_HW, &flow->flags);
3747+ nf_flow_offload_add(flow_table, flow);
3748+ }
3749+
3750 return 0;
3751 }
3752 EXPORT_SYMBOL_GPL(flow_offload_add);
3753
3754+void flow_offload_refresh(struct nf_flowtable *flow_table,
3755+ struct flow_offload *flow)
3756+{
3757+ u32 timeout;
3758+
3759+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3760+ if (timeout - READ_ONCE(flow->timeout) > HZ)
3761+ WRITE_ONCE(flow->timeout, timeout);
3762+ else
3763+ return;
3764+
3765+ if (likely(!nf_flowtable_hw_offload(flow_table)))
3766+ return;
3767+
3768+ nf_flow_offload_add(flow_table, flow);
3769+}
3770+EXPORT_SYMBOL_GPL(flow_offload_refresh);
3771+
3772 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3773 {
3774 return nf_flow_timeout_delta(flow->timeout) <= 0;
developeree39bcf2023-06-16 08:03:30 +08003775@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003776 static void flow_offload_del(struct nf_flowtable *flow_table,
3777 struct flow_offload *flow)
3778 {
3779- struct flow_offload_entry *e;
3780-
3781 rhashtable_remove_fast(&flow_table->rhashtable,
3782 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
3783 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003784@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003785 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
3786 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003787
developer8cb3ac72022-07-04 10:55:14 +08003788- e = container_of(flow, struct flow_offload_entry, flow);
3789- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
developeree39bcf2023-06-16 08:03:30 +08003790+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
3791
3792 if (nf_flow_has_expired(flow))
developer8cb3ac72022-07-04 10:55:14 +08003793- flow_offload_fixup_ct(e->ct);
3794- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
3795- flow_offload_fixup_ct_timeout(e->ct);
3796-
3797- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
3798- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003799+ flow_offload_fixup_ct(flow->ct);
3800+ else
3801+ flow_offload_fixup_ct_timeout(flow->ct);
3802
developer8cb3ac72022-07-04 10:55:14 +08003803 flow_offload_free(flow);
3804 }
3805
3806 void flow_offload_teardown(struct flow_offload *flow)
3807 {
3808- struct flow_offload_entry *e;
developerb7c46752022-07-04 19:51:38 +08003809-
3810- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
developeree39bcf2023-06-16 08:03:30 +08003811+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3812
developer8cb3ac72022-07-04 10:55:14 +08003813- e = container_of(flow, struct flow_offload_entry, flow);
3814- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003815+ flow_offload_fixup_ct_state(flow->ct);
developer8cb3ac72022-07-04 10:55:14 +08003816 }
3817 EXPORT_SYMBOL_GPL(flow_offload_teardown);
3818
developeree39bcf2023-06-16 08:03:30 +08003819@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003820 {
3821 struct flow_offload_tuple_rhash *tuplehash;
3822 struct flow_offload *flow;
3823- struct flow_offload_entry *e;
3824 int dir;
3825
3826 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
developeree39bcf2023-06-16 08:03:30 +08003827@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003828
3829 dir = tuplehash->tuple.dir;
3830 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
3831- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
3832+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
3833 return NULL;
3834
3835- e = container_of(flow, struct flow_offload_entry, flow);
3836- if (unlikely(nf_ct_is_dying(e->ct)))
3837+ if (unlikely(nf_ct_is_dying(flow->ct)))
3838 return NULL;
3839
3840 return tuplehash;
3841 }
3842 EXPORT_SYMBOL_GPL(flow_offload_lookup);
3843
3844-static int
3845-nf_flow_table_iterate(struct nf_flowtable *flow_table,
3846+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3847 void (*iter)(struct flow_offload *flow, void *data),
3848 void *data)
3849 {
developeree39bcf2023-06-16 08:03:30 +08003850@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003851 rhashtable_walk_start(&hti);
3852
3853 while ((tuplehash = rhashtable_walk_next(&hti))) {
3854-
3855 if (IS_ERR(tuplehash)) {
3856 if (PTR_ERR(tuplehash) != -EAGAIN) {
3857 err = PTR_ERR(tuplehash);
developeree39bcf2023-06-16 08:03:30 +08003858@@ -359,23 +430,52 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003859
3860 return err;
3861 }
3862+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
3863
developeree39bcf2023-06-16 08:03:30 +08003864-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3865+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
developer8cb3ac72022-07-04 10:55:14 +08003866 {
developeree39bcf2023-06-16 08:03:30 +08003867- struct nf_flowtable *flow_table = data;
developer8cb3ac72022-07-04 10:55:14 +08003868- struct flow_offload_entry *e;
3869- bool teardown;
developeree39bcf2023-06-16 08:03:30 +08003870+ struct dst_entry *dst;
developer8cb3ac72022-07-04 10:55:14 +08003871
3872- e = container_of(flow, struct flow_offload_entry, flow);
developeree39bcf2023-06-16 08:03:30 +08003873+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3874+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
3875+ dst = tuple->dst_cache;
3876+ if (!dst_check(dst, tuple->dst_cookie))
3877+ return true;
3878+ }
3879
developer8cb3ac72022-07-04 10:55:14 +08003880- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
3881- FLOW_OFFLOAD_TEARDOWN);
developeree39bcf2023-06-16 08:03:30 +08003882+ return false;
3883+}
3884
developer8cb3ac72022-07-04 10:55:14 +08003885- if (!teardown)
3886- nf_ct_offload_timeout(flow);
developeree39bcf2023-06-16 08:03:30 +08003887+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
3888+{
3889+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
3890+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
3891+}
developer8cb3ac72022-07-04 10:55:14 +08003892
3893- if (nf_flow_has_expired(flow) || teardown)
3894- flow_offload_del(flow_table, flow);
developeree39bcf2023-06-16 08:03:30 +08003895+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3896+{
3897+ struct nf_flowtable *flow_table = data;
3898+
3899+ if (nf_flow_has_expired(flow) ||
3900+ nf_ct_is_dying(flow->ct) ||
3901+ nf_flow_has_stale_dst(flow))
3902+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3903+
developer8cb3ac72022-07-04 10:55:14 +08003904+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
3905+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003906+ if (!test_and_set_bit(NF_FLOW_HW_ACCT_DYING, &flow->flags))
3907+ nf_flow_offload_stats(flow_table, flow, true);
3908+
developer8cb3ac72022-07-04 10:55:14 +08003909+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
3910+ nf_flow_offload_del(flow_table, flow);
3911+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
3912+ flow_offload_del(flow_table, flow);
3913+ } else {
3914+ flow_offload_del(flow_table, flow);
3915+ }
3916+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003917+ nf_flow_offload_stats(flow_table, flow, false);
developer8cb3ac72022-07-04 10:55:14 +08003918+ }
3919 }
3920
3921 static void nf_flow_offload_work_gc(struct work_struct *work)
developer58aa0682023-09-18 14:02:26 +08003922@@ -387,30 +487,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
developer8cb3ac72022-07-04 10:55:14 +08003923 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
3924 }
3925
3926-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3927- __be16 port, __be16 new_port)
3928+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3929+ __be16 port, __be16 new_port)
3930 {
3931 struct tcphdr *tcph;
3932
3933- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
3934- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
3935- return -1;
3936-
3937 tcph = (void *)(skb_network_header(skb) + thoff);
3938 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
3939-
3940- return 0;
3941 }
3942
3943-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3944- __be16 port, __be16 new_port)
3945+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3946+ __be16 port, __be16 new_port)
3947 {
3948 struct udphdr *udph;
3949
3950- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
3951- skb_try_make_writable(skb, thoff + sizeof(*udph)))
3952- return -1;
3953-
3954 udph = (void *)(skb_network_header(skb) + thoff);
3955 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
3956 inet_proto_csum_replace2(&udph->check, skb, port,
developer58aa0682023-09-18 14:02:26 +08003957@@ -418,38 +508,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08003958 if (!udph->check)
3959 udph->check = CSUM_MANGLED_0;
3960 }
3961-
3962- return 0;
3963 }
3964
3965-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
3966- u8 protocol, __be16 port, __be16 new_port)
3967+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
3968+ u8 protocol, __be16 port, __be16 new_port)
3969 {
3970 switch (protocol) {
3971 case IPPROTO_TCP:
3972- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
3973- return NF_DROP;
3974+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
3975 break;
3976 case IPPROTO_UDP:
3977- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
3978- return NF_DROP;
3979+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
3980 break;
3981 }
3982-
3983- return 0;
3984 }
3985
3986-int nf_flow_snat_port(const struct flow_offload *flow,
3987- struct sk_buff *skb, unsigned int thoff,
3988- u8 protocol, enum flow_offload_tuple_dir dir)
3989+void nf_flow_snat_port(const struct flow_offload *flow,
3990+ struct sk_buff *skb, unsigned int thoff,
3991+ u8 protocol, enum flow_offload_tuple_dir dir)
3992 {
3993 struct flow_ports *hdr;
3994 __be16 port, new_port;
3995
3996- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
3997- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
3998- return -1;
3999-
4000 hdr = (void *)(skb_network_header(skb) + thoff);
4001
4002 switch (dir) {
developer58aa0682023-09-18 14:02:26 +08004003@@ -463,25 +543,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004004 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
4005 hdr->dest = new_port;
4006 break;
4007- default:
4008- return -1;
4009 }
4010
4011- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4012+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4013 }
4014 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
4015
4016-int nf_flow_dnat_port(const struct flow_offload *flow,
4017- struct sk_buff *skb, unsigned int thoff,
4018- u8 protocol, enum flow_offload_tuple_dir dir)
4019+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
4020+ unsigned int thoff, u8 protocol,
4021+ enum flow_offload_tuple_dir dir)
4022 {
4023 struct flow_ports *hdr;
4024 __be16 port, new_port;
4025
4026- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4027- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4028- return -1;
4029-
4030 hdr = (void *)(skb_network_header(skb) + thoff);
4031
4032 switch (dir) {
developer58aa0682023-09-18 14:02:26 +08004033@@ -495,11 +569,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004034 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
4035 hdr->source = new_port;
4036 break;
4037- default:
4038- return -1;
4039 }
4040
4041- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4042+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4043 }
4044 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
4045
developer58aa0682023-09-18 14:02:26 +08004046@@ -507,7 +579,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
developer8cb3ac72022-07-04 10:55:14 +08004047 {
4048 int err;
4049
4050- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4051+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4052+ flow_block_init(&flowtable->flow_block);
4053+ init_rwsem(&flowtable->flow_block_lock);
4054
4055 err = rhashtable_init(&flowtable->rhashtable,
4056 &nf_flow_offload_rhash_params);
developer58aa0682023-09-18 14:02:26 +08004057@@ -528,25 +602,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
developer8cb3ac72022-07-04 10:55:14 +08004058 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
4059 {
4060 struct net_device *dev = data;
4061- struct flow_offload_entry *e;
4062-
4063- e = container_of(flow, struct flow_offload_entry, flow);
4064
4065 if (!dev) {
4066 flow_offload_teardown(flow);
4067 return;
4068 }
4069- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
4070+
4071+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
4072 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
4073 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
4074- flow_offload_dead(flow);
4075+ flow_offload_teardown(flow);
4076 }
4077
4078-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
4079- struct net_device *dev)
4080+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
4081+ struct net_device *dev)
4082 {
4083 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
4084 flush_delayed_work(&flowtable->gc_work);
4085+ nf_flow_table_offload_flush(flowtable);
4086 }
4087
4088 void nf_flow_table_cleanup(struct net_device *dev)
developer58aa0682023-09-18 14:02:26 +08004089@@ -555,7 +628,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004090
4091 mutex_lock(&flowtable_lock);
4092 list_for_each_entry(flowtable, &flowtables, list)
4093- nf_flow_table_iterate_cleanup(flowtable, dev);
4094+ nf_flow_table_gc_cleanup(flowtable, dev);
4095 mutex_unlock(&flowtable_lock);
4096 }
4097 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
developer58aa0682023-09-18 14:02:26 +08004098@@ -565,9 +638,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
developer8cb3ac72022-07-04 10:55:14 +08004099 mutex_lock(&flowtable_lock);
4100 list_del(&flow_table->list);
4101 mutex_unlock(&flowtable_lock);
4102+
4103 cancel_delayed_work_sync(&flow_table->gc_work);
4104 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
4105 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
4106+ nf_flow_table_offload_flush(flow_table);
4107+ if (nf_flowtable_hw_offload(flow_table))
4108+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
4109+ flow_table);
4110 rhashtable_destroy(&flow_table->rhashtable);
4111 }
4112 EXPORT_SYMBOL_GPL(nf_flow_table_free);
developer58aa0682023-09-18 14:02:26 +08004113@@ -591,12 +669,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
developer8cb3ac72022-07-04 10:55:14 +08004114
4115 static int __init nf_flow_table_module_init(void)
4116 {
4117- return register_netdevice_notifier(&flow_offload_netdev_notifier);
4118+ int ret;
4119+
4120+ ret = nf_flow_table_offload_init();
4121+ if (ret)
4122+ return ret;
4123+
4124+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
4125+ if (ret)
4126+ nf_flow_table_offload_exit();
4127+
4128+ return ret;
4129 }
4130
4131 static void __exit nf_flow_table_module_exit(void)
4132 {
4133 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
4134+ nf_flow_table_offload_exit();
4135 }
4136
4137 module_init(nf_flow_table_module_init);
developer58aa0682023-09-18 14:02:26 +08004138@@ -604,3 +693,4 @@ module_exit(nf_flow_table_module_exit);
developer8cb3ac72022-07-04 10:55:14 +08004139
4140 MODULE_LICENSE("GPL");
4141 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
4142+MODULE_DESCRIPTION("Netfilter flow table module");
4143diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
developer58aa0682023-09-18 14:02:26 +08004144index 397129b..6257d87 100644
developer8cb3ac72022-07-04 10:55:14 +08004145--- a/net/netfilter/nf_flow_table_ip.c
4146+++ b/net/netfilter/nf_flow_table_ip.c
4147@@ -7,11 +7,13 @@
4148 #include <linux/ip.h>
4149 #include <linux/ipv6.h>
4150 #include <linux/netdevice.h>
4151+#include <linux/if_ether.h>
4152 #include <net/ip.h>
4153 #include <net/ipv6.h>
4154 #include <net/ip6_route.h>
4155 #include <net/neighbour.h>
4156 #include <net/netfilter/nf_flow_table.h>
4157+#include <net/netfilter/nf_conntrack_acct.h>
4158 /* For layer 4 checksum field offset. */
4159 #include <linux/tcp.h>
4160 #include <linux/udp.h>
4161@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4162 if (proto != IPPROTO_TCP)
4163 return 0;
4164
4165- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
4166- return -1;
4167-
4168 tcph = (void *)(skb_network_header(skb) + thoff);
4169 if (unlikely(tcph->fin || tcph->rst)) {
4170 flow_offload_teardown(flow);
4171@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4172 return 0;
4173 }
4174
4175-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4176- __be32 addr, __be32 new_addr)
4177+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4178+ __be32 addr, __be32 new_addr)
4179 {
4180 struct tcphdr *tcph;
4181
4182- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4183- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4184- return -1;
4185-
4186 tcph = (void *)(skb_network_header(skb) + thoff);
4187 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
4188-
4189- return 0;
4190 }
4191
4192-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4193- __be32 addr, __be32 new_addr)
4194+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4195+ __be32 addr, __be32 new_addr)
4196 {
4197 struct udphdr *udph;
4198
4199- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4200- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4201- return -1;
4202-
4203 udph = (void *)(skb_network_header(skb) + thoff);
4204 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4205 inet_proto_csum_replace4(&udph->check, skb, addr,
4206@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4207 if (!udph->check)
4208 udph->check = CSUM_MANGLED_0;
4209 }
4210-
4211- return 0;
4212 }
4213
4214-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4215- unsigned int thoff, __be32 addr,
4216- __be32 new_addr)
4217+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4218+ unsigned int thoff, __be32 addr,
4219+ __be32 new_addr)
4220 {
4221 switch (iph->protocol) {
4222 case IPPROTO_TCP:
4223- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
4224- return NF_DROP;
4225+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
4226 break;
4227 case IPPROTO_UDP:
4228- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
4229- return NF_DROP;
4230+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
4231 break;
4232 }
4233-
4234- return 0;
4235 }
4236
4237-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4238- struct iphdr *iph, unsigned int thoff,
4239- enum flow_offload_tuple_dir dir)
4240+static void nf_flow_snat_ip(const struct flow_offload *flow,
4241+ struct sk_buff *skb, struct iphdr *iph,
4242+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4243 {
4244 __be32 addr, new_addr;
4245
4246@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4247 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
4248 iph->daddr = new_addr;
4249 break;
4250- default:
4251- return -1;
4252 }
4253 csum_replace4(&iph->check, addr, new_addr);
4254
4255- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4256+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4257 }
4258
4259-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4260- struct iphdr *iph, unsigned int thoff,
4261- enum flow_offload_tuple_dir dir)
4262+static void nf_flow_dnat_ip(const struct flow_offload *flow,
4263+ struct sk_buff *skb, struct iphdr *iph,
4264+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4265 {
4266 __be32 addr, new_addr;
4267
4268@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4269 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
4270 iph->saddr = new_addr;
4271 break;
4272- default:
4273- return -1;
4274 }
4275 csum_replace4(&iph->check, addr, new_addr);
4276
4277- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4278+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4279 }
4280
4281-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4282- unsigned int thoff, enum flow_offload_tuple_dir dir)
4283+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4284+ unsigned int thoff, enum flow_offload_tuple_dir dir,
4285+ struct iphdr *iph)
4286 {
4287- struct iphdr *iph = ip_hdr(skb);
4288-
4289- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4290- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4291- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
4292- return -1;
4293- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4294- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4295- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
4296- return -1;
4297-
4298- return 0;
4299+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4300+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
4301+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
4302+ }
4303+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4304+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
4305+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
4306+ }
4307 }
4308
4309 static bool ip_has_options(unsigned int thoff)
4310@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
4311 return thoff != sizeof(struct iphdr);
4312 }
4313
4314+static void nf_flow_tuple_encap(struct sk_buff *skb,
4315+ struct flow_offload_tuple *tuple)
4316+{
4317+ struct vlan_ethhdr *veth;
4318+ struct pppoe_hdr *phdr;
4319+ int i = 0;
4320+
4321+ if (skb_vlan_tag_present(skb)) {
4322+ tuple->encap[i].id = skb_vlan_tag_get(skb);
4323+ tuple->encap[i].proto = skb->vlan_proto;
4324+ i++;
4325+ }
4326+ switch (skb->protocol) {
4327+ case htons(ETH_P_8021Q):
4328+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4329+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
4330+ tuple->encap[i].proto = skb->protocol;
4331+ break;
4332+ case htons(ETH_P_PPP_SES):
4333+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
4334+ tuple->encap[i].id = ntohs(phdr->sid);
4335+ tuple->encap[i].proto = skb->protocol;
4336+ break;
4337+ }
4338+}
4339+
4340 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4341- struct flow_offload_tuple *tuple)
4342+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4343+ u32 offset)
4344 {
4345 struct flow_ports *ports;
4346 unsigned int thoff;
4347 struct iphdr *iph;
4348
4349- if (!pskb_may_pull(skb, sizeof(*iph)))
4350+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
4351 return -1;
4352
4353- iph = ip_hdr(skb);
4354- thoff = iph->ihl * 4;
4355+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4356+ thoff = (iph->ihl * 4);
4357
4358 if (ip_is_fragment(iph) ||
4359 unlikely(ip_has_options(thoff)))
4360 return -1;
4361
4362- if (iph->protocol != IPPROTO_TCP &&
4363- iph->protocol != IPPROTO_UDP)
4364+ thoff += offset;
4365+
4366+ switch (iph->protocol) {
4367+ case IPPROTO_TCP:
4368+ *hdrsize = sizeof(struct tcphdr);
4369+ break;
4370+ case IPPROTO_UDP:
4371+ *hdrsize = sizeof(struct udphdr);
4372+ break;
4373+ default:
4374 return -1;
4375+ }
4376
4377 if (iph->ttl <= 1)
4378 return -1;
4379
4380- thoff = iph->ihl * 4;
4381- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4382+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4383 return -1;
4384
4385- iph = ip_hdr(skb);
4386+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4387 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4388
4389 tuple->src_v4.s_addr = iph->saddr;
4390@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4391 tuple->l3proto = AF_INET;
4392 tuple->l4proto = iph->protocol;
4393 tuple->iifidx = dev->ifindex;
4394+ nf_flow_tuple_encap(skb, tuple);
4395
4396 return 0;
4397 }
developeree39bcf2023-06-16 08:03:30 +08004398@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004399 return NF_STOLEN;
4400 }
4401
4402+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
4403+ u32 *offset)
4404+{
4405+ struct vlan_ethhdr *veth;
4406+
4407+ switch (skb->protocol) {
4408+ case htons(ETH_P_8021Q):
4409+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4410+ if (veth->h_vlan_encapsulated_proto == proto) {
4411+ *offset += VLAN_HLEN;
4412+ return true;
4413+ }
4414+ break;
4415+ case htons(ETH_P_PPP_SES):
4416+ if (nf_flow_pppoe_proto(skb) == proto) {
4417+ *offset += PPPOE_SES_HLEN;
4418+ return true;
4419+ }
4420+ break;
4421+ }
4422+
4423+ return false;
4424+}
4425+
4426+static void nf_flow_encap_pop(struct sk_buff *skb,
4427+ struct flow_offload_tuple_rhash *tuplehash)
4428+{
4429+ struct vlan_hdr *vlan_hdr;
4430+ int i;
4431+
4432+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
4433+ if (skb_vlan_tag_present(skb)) {
4434+ __vlan_hwaccel_clear_tag(skb);
4435+ continue;
4436+ }
4437+ switch (skb->protocol) {
4438+ case htons(ETH_P_8021Q):
4439+ vlan_hdr = (struct vlan_hdr *)skb->data;
4440+ __skb_pull(skb, VLAN_HLEN);
4441+ vlan_set_encap_proto(skb, vlan_hdr);
4442+ skb_reset_network_header(skb);
4443+ break;
4444+ case htons(ETH_P_PPP_SES):
4445+ skb->protocol = nf_flow_pppoe_proto(skb);
4446+ skb_pull(skb, PPPOE_SES_HLEN);
4447+ skb_reset_network_header(skb);
4448+ break;
4449+ }
4450+ }
4451+}
4452+
4453+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
4454+ const struct flow_offload_tuple_rhash *tuplehash,
4455+ unsigned short type)
4456+{
4457+ struct net_device *outdev;
4458+
4459+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
4460+ if (!outdev)
4461+ return NF_DROP;
4462+
4463+ skb->dev = outdev;
4464+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
4465+ tuplehash->tuple.out.h_source, skb->len);
4466+ dev_queue_xmit(skb);
4467+
4468+ return NF_STOLEN;
4469+}
4470+
4471 unsigned int
4472 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4473 const struct nf_hook_state *state)
developeree39bcf2023-06-16 08:03:30 +08004474@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004475 enum flow_offload_tuple_dir dir;
4476 struct flow_offload *flow;
4477 struct net_device *outdev;
4478+ u32 hdrsize, offset = 0;
4479+ unsigned int thoff, mtu;
4480 struct rtable *rt;
4481- unsigned int thoff;
4482 struct iphdr *iph;
4483 __be32 nexthop;
4484+ int ret;
4485
4486- if (skb->protocol != htons(ETH_P_IP))
4487+ if (skb->protocol != htons(ETH_P_IP) &&
4488+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
4489 return NF_ACCEPT;
4490
4491- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
4492+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
4493 return NF_ACCEPT;
4494
4495 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004496@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004497
4498 dir = tuplehash->tuple.dir;
4499 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4500- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
4501- outdev = rt->dst.dev;
developeree39bcf2023-06-16 08:03:30 +08004502-
developer8cb3ac72022-07-04 10:55:14 +08004503- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developeree39bcf2023-06-16 08:03:30 +08004504- return NF_ACCEPT;
developerb7c46752022-07-04 19:51:38 +08004505
developer8cb3ac72022-07-04 10:55:14 +08004506- if (skb_try_make_writable(skb, sizeof(*iph)))
4507- return NF_DROP;
developerb7c46752022-07-04 19:51:38 +08004508-
developer8cb3ac72022-07-04 10:55:14 +08004509- thoff = ip_hdr(skb)->ihl * 4;
4510- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
developeree39bcf2023-06-16 08:03:30 +08004511+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4512+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004513 return NF_ACCEPT;
developer7eb15dc2023-06-14 17:44:03 +08004514
4515- if (!dst_check(&rt->dst, 0)) {
developeree39bcf2023-06-16 08:03:30 +08004516- flow_offload_teardown(flow);
4517+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4518+ thoff = (iph->ihl * 4) + offset;
4519+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
developer7eb15dc2023-06-14 17:44:03 +08004520 return NF_ACCEPT;
developeree39bcf2023-06-16 08:03:30 +08004521- }
developer8cb3ac72022-07-04 10:55:14 +08004522
4523- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
4524+ if (skb_try_make_writable(skb, thoff + hdrsize))
4525 return NF_DROP;
4526
4527- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4528+ flow_offload_refresh(flow_table, flow);
4529+
4530+ nf_flow_encap_pop(skb, tuplehash);
4531+ thoff -= offset;
4532+
4533 iph = ip_hdr(skb);
4534+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
4535+
4536 ip_decrease_ttl(iph);
4537 skb->tstamp = 0;
4538
4539- if (unlikely(dst_xfrm(&rt->dst))) {
4540+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4541+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4542+
4543+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4544+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4545 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
4546 IPCB(skb)->iif = skb->dev->ifindex;
4547 IPCB(skb)->flags = IPSKB_FORWARDED;
4548 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4549 }
4550
4551- skb->dev = outdev;
4552- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4553- skb_dst_set_noref(skb, &rt->dst);
4554- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4555+ switch (tuplehash->tuple.xmit_type) {
4556+ case FLOW_OFFLOAD_XMIT_NEIGH:
4557+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4558+ outdev = rt->dst.dev;
4559+ skb->dev = outdev;
4560+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4561+ skb_dst_set_noref(skb, &rt->dst);
4562+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4563+ ret = NF_STOLEN;
4564+ break;
4565+ case FLOW_OFFLOAD_XMIT_DIRECT:
4566+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
4567+ if (ret == NF_DROP)
4568+ flow_offload_teardown(flow);
4569+ break;
4570+ }
4571
4572- return NF_STOLEN;
4573+ return ret;
4574 }
4575 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
4576
4577-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4578- struct in6_addr *addr,
4579- struct in6_addr *new_addr)
4580+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4581+ struct in6_addr *addr,
4582+ struct in6_addr *new_addr,
4583+ struct ipv6hdr *ip6h)
4584 {
4585 struct tcphdr *tcph;
4586
4587- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4588- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4589- return -1;
4590-
4591 tcph = (void *)(skb_network_header(skb) + thoff);
4592 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
4593 new_addr->s6_addr32, true);
4594-
4595- return 0;
4596 }
4597
4598-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4599- struct in6_addr *addr,
4600- struct in6_addr *new_addr)
4601+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4602+ struct in6_addr *addr,
4603+ struct in6_addr *new_addr)
4604 {
4605 struct udphdr *udph;
4606
4607- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4608- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4609- return -1;
4610-
4611 udph = (void *)(skb_network_header(skb) + thoff);
4612 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4613 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
developeree39bcf2023-06-16 08:03:30 +08004614@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004615 if (!udph->check)
4616 udph->check = CSUM_MANGLED_0;
4617 }
4618-
4619- return 0;
4620 }
4621
4622-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4623- unsigned int thoff, struct in6_addr *addr,
4624- struct in6_addr *new_addr)
4625+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4626+ unsigned int thoff, struct in6_addr *addr,
4627+ struct in6_addr *new_addr)
4628 {
4629 switch (ip6h->nexthdr) {
4630 case IPPROTO_TCP:
4631- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
4632- return NF_DROP;
4633+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
4634 break;
4635 case IPPROTO_UDP:
4636- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
4637- return NF_DROP;
4638+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
4639 break;
4640 }
4641-
4642- return 0;
4643 }
4644
4645-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4646- struct sk_buff *skb, struct ipv6hdr *ip6h,
4647- unsigned int thoff,
4648- enum flow_offload_tuple_dir dir)
4649+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
4650+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4651+ unsigned int thoff,
4652+ enum flow_offload_tuple_dir dir)
4653 {
4654 struct in6_addr addr, new_addr;
4655
developeree39bcf2023-06-16 08:03:30 +08004656@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004657 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
4658 ip6h->daddr = new_addr;
4659 break;
4660- default:
4661- return -1;
4662 }
4663
4664- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4665+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4666 }
4667
4668-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4669- struct sk_buff *skb, struct ipv6hdr *ip6h,
4670- unsigned int thoff,
4671- enum flow_offload_tuple_dir dir)
4672+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
4673+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4674+ unsigned int thoff,
4675+ enum flow_offload_tuple_dir dir)
4676 {
4677 struct in6_addr addr, new_addr;
4678
developeree39bcf2023-06-16 08:03:30 +08004679@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004680 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
4681 ip6h->saddr = new_addr;
4682 break;
4683- default:
4684- return -1;
4685 }
4686
4687- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4688+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4689 }
4690
4691-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
4692- struct sk_buff *skb,
4693- enum flow_offload_tuple_dir dir)
4694+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
4695+ struct sk_buff *skb,
4696+ enum flow_offload_tuple_dir dir,
4697+ struct ipv6hdr *ip6h)
4698 {
4699- struct ipv6hdr *ip6h = ipv6_hdr(skb);
4700 unsigned int thoff = sizeof(*ip6h);
4701
4702- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4703- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4704- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4705- return -1;
4706- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4707- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4708- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4709- return -1;
4710-
4711- return 0;
4712+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4713+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4714+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
4715+ }
4716+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4717+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4718+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
4719+ }
4720 }
4721
4722 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4723- struct flow_offload_tuple *tuple)
4724+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4725+ u32 offset)
4726 {
4727 struct flow_ports *ports;
4728 struct ipv6hdr *ip6h;
4729 unsigned int thoff;
4730
4731- if (!pskb_may_pull(skb, sizeof(*ip6h)))
4732+ thoff = sizeof(*ip6h) + offset;
4733+ if (!pskb_may_pull(skb, thoff))
4734 return -1;
4735
4736- ip6h = ipv6_hdr(skb);
4737+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4738
4739- if (ip6h->nexthdr != IPPROTO_TCP &&
4740- ip6h->nexthdr != IPPROTO_UDP)
4741+ switch (ip6h->nexthdr) {
4742+ case IPPROTO_TCP:
4743+ *hdrsize = sizeof(struct tcphdr);
4744+ break;
4745+ case IPPROTO_UDP:
4746+ *hdrsize = sizeof(struct udphdr);
4747+ break;
4748+ default:
4749 return -1;
4750+ }
4751
4752 if (ip6h->hop_limit <= 1)
4753 return -1;
4754
4755- thoff = sizeof(*ip6h);
4756- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4757+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4758 return -1;
4759
4760- ip6h = ipv6_hdr(skb);
4761+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4762 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4763
4764 tuple->src_v6 = ip6h->saddr;
developeree39bcf2023-06-16 08:03:30 +08004765@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08004766 tuple->l3proto = AF_INET6;
4767 tuple->l4proto = ip6h->nexthdr;
4768 tuple->iifidx = dev->ifindex;
4769+ nf_flow_tuple_encap(skb, tuple);
4770
4771 return 0;
4772 }
developeree39bcf2023-06-16 08:03:30 +08004773@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004774 const struct in6_addr *nexthop;
4775 struct flow_offload *flow;
4776 struct net_device *outdev;
4777+ unsigned int thoff, mtu;
4778+ u32 hdrsize, offset = 0;
4779 struct ipv6hdr *ip6h;
4780 struct rt6_info *rt;
4781+ int ret;
4782
4783- if (skb->protocol != htons(ETH_P_IPV6))
4784+ if (skb->protocol != htons(ETH_P_IPV6) &&
4785+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
4786 return NF_ACCEPT;
4787
4788- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
4789+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
4790 return NF_ACCEPT;
4791
4792 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004793@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004794
4795 dir = tuplehash->tuple.dir;
4796 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4797- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
4798- outdev = rt->dst.dev;
developer8cb3ac72022-07-04 10:55:14 +08004799
developerb7c46752022-07-04 19:51:38 +08004800- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004801+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4802+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4803 return NF_ACCEPT;
4804
developerb7c46752022-07-04 19:51:38 +08004805- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
4806- sizeof(*ip6h)))
developer8cb3ac72022-07-04 10:55:14 +08004807+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4808+ thoff = sizeof(*ip6h) + offset;
4809+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
4810 return NF_ACCEPT;
developer8cb3ac72022-07-04 10:55:14 +08004811
developerb7c46752022-07-04 19:51:38 +08004812- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
developeree39bcf2023-06-16 08:03:30 +08004813- flow_offload_teardown(flow);
4814- return NF_ACCEPT;
4815- }
4816-
developer8cb3ac72022-07-04 10:55:14 +08004817- if (skb_try_make_writable(skb, sizeof(*ip6h)))
4818+ if (skb_try_make_writable(skb, thoff + hdrsize))
4819 return NF_DROP;
4820
4821- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
4822- return NF_DROP;
4823+ flow_offload_refresh(flow_table, flow);
4824+
4825+ nf_flow_encap_pop(skb, tuplehash);
4826
4827- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4828 ip6h = ipv6_hdr(skb);
4829+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
4830+
4831 ip6h->hop_limit--;
4832 skb->tstamp = 0;
4833
4834- if (unlikely(dst_xfrm(&rt->dst))) {
4835+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4836+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4837+
4838+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4839+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4840 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4841 IP6CB(skb)->iif = skb->dev->ifindex;
4842 IP6CB(skb)->flags = IP6SKB_FORWARDED;
4843 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4844 }
4845
4846- skb->dev = outdev;
4847- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4848- skb_dst_set_noref(skb, &rt->dst);
4849- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4850+ switch (tuplehash->tuple.xmit_type) {
4851+ case FLOW_OFFLOAD_XMIT_NEIGH:
4852+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4853+ outdev = rt->dst.dev;
4854+ skb->dev = outdev;
4855+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4856+ skb_dst_set_noref(skb, &rt->dst);
4857+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4858+ ret = NF_STOLEN;
4859+ break;
4860+ case FLOW_OFFLOAD_XMIT_DIRECT:
4861+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
4862+ if (ret == NF_DROP)
4863+ flow_offload_teardown(flow);
4864+ break;
4865+ }
4866
4867- return NF_STOLEN;
4868+ return ret;
4869 }
4870 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
4871diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
4872new file mode 100644
developer58aa0682023-09-18 14:02:26 +08004873index 0000000..50f2f2e
developer8cb3ac72022-07-04 10:55:14 +08004874--- /dev/null
4875+++ b/net/netfilter/nf_flow_table_offload.c
developeree39bcf2023-06-16 08:03:30 +08004876@@ -0,0 +1,1199 @@
developer8cb3ac72022-07-04 10:55:14 +08004877+#include <linux/kernel.h>
4878+#include <linux/init.h>
4879+#include <linux/module.h>
4880+#include <linux/netfilter.h>
4881+#include <linux/rhashtable.h>
4882+#include <linux/netdevice.h>
4883+#include <linux/tc_act/tc_csum.h>
4884+#include <net/flow_offload.h>
4885+#include <net/netfilter/nf_flow_table.h>
4886+#include <net/netfilter/nf_tables.h>
4887+#include <net/netfilter/nf_conntrack.h>
4888+#include <net/netfilter/nf_conntrack_acct.h>
4889+#include <net/netfilter/nf_conntrack_core.h>
4890+#include <net/netfilter/nf_conntrack_tuple.h>
4891+
4892+static struct workqueue_struct *nf_flow_offload_add_wq;
4893+static struct workqueue_struct *nf_flow_offload_del_wq;
4894+static struct workqueue_struct *nf_flow_offload_stats_wq;
4895+
4896+struct flow_offload_work {
4897+ struct list_head list;
4898+ enum flow_cls_command cmd;
4899+ int priority;
4900+ struct nf_flowtable *flowtable;
4901+ struct flow_offload *flow;
4902+ struct work_struct work;
4903+};
4904+
4905+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
4906+ (__match)->dissector.offset[__type] = \
4907+ offsetof(struct nf_flow_key, __field)
4908+
4909+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
4910+ struct ip_tunnel_info *tun_info)
4911+{
4912+ struct nf_flow_key *mask = &match->mask;
4913+ struct nf_flow_key *key = &match->key;
4914+ unsigned int enc_keys;
4915+
4916+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
4917+ return;
4918+
4919+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
4920+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
4921+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
4922+ mask->enc_key_id.keyid = 0xffffffff;
4923+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
4924+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
4925+
4926+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
4927+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
4928+ enc_ipv4);
4929+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
4930+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
4931+ if (key->enc_ipv4.src)
4932+ mask->enc_ipv4.src = 0xffffffff;
4933+ if (key->enc_ipv4.dst)
4934+ mask->enc_ipv4.dst = 0xffffffff;
4935+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
4936+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
4937+ } else {
4938+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
4939+ sizeof(struct in6_addr));
4940+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
4941+ sizeof(struct in6_addr));
4942+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
4943+ sizeof(struct in6_addr)))
4944+ memset(&mask->enc_ipv6.src, 0xff,
4945+ sizeof(struct in6_addr));
4946+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
4947+ sizeof(struct in6_addr)))
4948+ memset(&mask->enc_ipv6.dst, 0xff,
4949+ sizeof(struct in6_addr));
4950+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
4951+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
4952+ }
4953+
4954+ match->dissector.used_keys |= enc_keys;
4955+}
4956+
4957+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
4958+ struct flow_dissector_key_vlan *mask,
4959+ u16 vlan_id, __be16 proto)
4960+{
4961+ key->vlan_id = vlan_id;
4962+ mask->vlan_id = VLAN_VID_MASK;
4963+ key->vlan_tpid = proto;
4964+ mask->vlan_tpid = 0xffff;
4965+}
4966+
4967+static int nf_flow_rule_match(struct nf_flow_match *match,
4968+ const struct flow_offload_tuple *tuple,
4969+ struct dst_entry *other_dst)
4970+{
4971+ struct nf_flow_key *mask = &match->mask;
4972+ struct nf_flow_key *key = &match->key;
4973+ struct ip_tunnel_info *tun_info;
4974+ bool vlan_encap = false;
4975+
4976+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
4977+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
4978+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
4979+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
4980+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
4981+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
4982+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
4983+
4984+ if (other_dst && other_dst->lwtstate) {
4985+ tun_info = lwt_tun_info(other_dst->lwtstate);
4986+ nf_flow_rule_lwt_match(match, tun_info);
4987+ }
4988+
4989+ key->meta.ingress_ifindex = tuple->iifidx;
4990+ mask->meta.ingress_ifindex = 0xffffffff;
4991+
4992+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
4993+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
4994+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
4995+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
4996+ tuple->encap[0].id,
4997+ tuple->encap[0].proto);
4998+ vlan_encap = true;
4999+ }
5000+
5001+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
5002+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
5003+ if (vlan_encap) {
5004+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
5005+ cvlan);
5006+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
5007+ tuple->encap[1].id,
5008+ tuple->encap[1].proto);
5009+ } else {
5010+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
5011+ vlan);
5012+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5013+ tuple->encap[1].id,
5014+ tuple->encap[1].proto);
5015+ }
5016+ }
5017+
5018+ switch (tuple->l3proto) {
5019+ case AF_INET:
5020+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5021+ key->basic.n_proto = htons(ETH_P_IP);
5022+ key->ipv4.src = tuple->src_v4.s_addr;
5023+ mask->ipv4.src = 0xffffffff;
5024+ key->ipv4.dst = tuple->dst_v4.s_addr;
5025+ mask->ipv4.dst = 0xffffffff;
5026+ break;
5027+ case AF_INET6:
5028+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5029+ key->basic.n_proto = htons(ETH_P_IPV6);
5030+ key->ipv6.src = tuple->src_v6;
5031+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
5032+ key->ipv6.dst = tuple->dst_v6;
5033+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
5034+ break;
5035+ default:
5036+ return -EOPNOTSUPP;
5037+ }
5038+ mask->control.addr_type = 0xffff;
5039+ match->dissector.used_keys |= BIT(key->control.addr_type);
5040+ mask->basic.n_proto = 0xffff;
5041+
5042+ switch (tuple->l4proto) {
5043+ case IPPROTO_TCP:
5044+ key->tcp.flags = 0;
5045+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
5046+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
5047+ break;
5048+ case IPPROTO_UDP:
5049+ break;
5050+ default:
5051+ return -EOPNOTSUPP;
5052+ }
5053+
5054+ key->basic.ip_proto = tuple->l4proto;
5055+ mask->basic.ip_proto = 0xff;
5056+
5057+ key->tp.src = tuple->src_port;
5058+ mask->tp.src = 0xffff;
5059+ key->tp.dst = tuple->dst_port;
5060+ mask->tp.dst = 0xffff;
5061+
5062+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
5063+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
5064+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
5065+ BIT(FLOW_DISSECTOR_KEY_PORTS);
5066+ return 0;
5067+}
5068+
5069+static void flow_offload_mangle(struct flow_action_entry *entry,
5070+ enum flow_action_mangle_base htype, u32 offset,
5071+ const __be32 *value, const __be32 *mask)
5072+{
5073+ entry->id = FLOW_ACTION_MANGLE;
5074+ entry->mangle.htype = htype;
5075+ entry->mangle.offset = offset;
5076+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
5077+ memcpy(&entry->mangle.val, value, sizeof(u32));
5078+}
5079+
5080+static inline struct flow_action_entry *
5081+flow_action_entry_next(struct nf_flow_rule *flow_rule)
5082+{
5083+ int i = flow_rule->rule->action.num_entries++;
5084+
5085+ return &flow_rule->rule->action.entries[i];
5086+}
5087+
5088+static int flow_offload_eth_src(struct net *net,
5089+ const struct flow_offload *flow,
5090+ enum flow_offload_tuple_dir dir,
5091+ struct nf_flow_rule *flow_rule)
5092+{
5093+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5094+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5095+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5096+ struct net_device *dev = NULL;
5097+ const unsigned char *addr;
5098+ u32 mask, val;
5099+ u16 val16;
5100+
5101+ this_tuple = &flow->tuplehash[dir].tuple;
5102+
5103+ switch (this_tuple->xmit_type) {
5104+ case FLOW_OFFLOAD_XMIT_DIRECT:
5105+ addr = this_tuple->out.h_source;
5106+ break;
5107+ case FLOW_OFFLOAD_XMIT_NEIGH:
5108+ other_tuple = &flow->tuplehash[!dir].tuple;
5109+ dev = dev_get_by_index(net, other_tuple->iifidx);
5110+ if (!dev)
5111+ return -ENOENT;
5112+
5113+ addr = dev->dev_addr;
5114+ break;
5115+ default:
5116+ return -EOPNOTSUPP;
5117+ }
5118+
5119+ mask = ~0xffff0000;
5120+ memcpy(&val16, addr, 2);
5121+ val = val16 << 16;
5122+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5123+ &val, &mask);
5124+
5125+ mask = ~0xffffffff;
5126+ memcpy(&val, addr + 2, 4);
5127+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
5128+ &val, &mask);
5129+
developeree39bcf2023-06-16 08:03:30 +08005130+ if (dev)
5131+ dev_put(dev);
developer8cb3ac72022-07-04 10:55:14 +08005132+
5133+ return 0;
5134+}
5135+
5136+static int flow_offload_eth_dst(struct net *net,
5137+ const struct flow_offload *flow,
5138+ enum flow_offload_tuple_dir dir,
5139+ struct nf_flow_rule *flow_rule)
5140+{
5141+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5142+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5143+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5144+ const struct dst_entry *dst_cache;
5145+ unsigned char ha[ETH_ALEN];
5146+ struct neighbour *n;
5147+ const void *daddr;
5148+ u32 mask, val;
5149+ u8 nud_state;
5150+ u16 val16;
5151+
5152+ this_tuple = &flow->tuplehash[dir].tuple;
5153+
5154+ switch (this_tuple->xmit_type) {
5155+ case FLOW_OFFLOAD_XMIT_DIRECT:
5156+ ether_addr_copy(ha, this_tuple->out.h_dest);
5157+ break;
5158+ case FLOW_OFFLOAD_XMIT_NEIGH:
5159+ other_tuple = &flow->tuplehash[!dir].tuple;
5160+ daddr = &other_tuple->src_v4;
5161+ dst_cache = this_tuple->dst_cache;
5162+ n = dst_neigh_lookup(dst_cache, daddr);
5163+ if (!n)
5164+ return -ENOENT;
5165+
5166+ read_lock_bh(&n->lock);
5167+ nud_state = n->nud_state;
5168+ ether_addr_copy(ha, n->ha);
5169+ read_unlock_bh(&n->lock);
5170+ neigh_release(n);
5171+
5172+ if (!(nud_state & NUD_VALID))
5173+ return -ENOENT;
5174+ break;
5175+ default:
5176+ return -EOPNOTSUPP;
5177+ }
5178+
5179+ mask = ~0xffffffff;
5180+ memcpy(&val, ha, 4);
5181+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
5182+ &val, &mask);
5183+
5184+ mask = ~0x0000ffff;
5185+ memcpy(&val16, ha + 4, 2);
5186+ val = val16;
5187+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5188+ &val, &mask);
5189+
5190+ return 0;
5191+}
5192+
5193+static void flow_offload_ipv4_snat(struct net *net,
5194+ const struct flow_offload *flow,
5195+ enum flow_offload_tuple_dir dir,
5196+ struct nf_flow_rule *flow_rule)
5197+{
5198+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5199+ u32 mask = ~htonl(0xffffffff);
5200+ __be32 addr;
5201+ u32 offset;
5202+
5203+ switch (dir) {
5204+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5205+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
5206+ offset = offsetof(struct iphdr, saddr);
5207+ break;
5208+ case FLOW_OFFLOAD_DIR_REPLY:
5209+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5210+ offset = offsetof(struct iphdr, daddr);
5211+ break;
5212+ default:
5213+ return;
5214+ }
5215+
5216+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5217+ &addr, &mask);
5218+}
5219+
5220+static void flow_offload_ipv4_dnat(struct net *net,
5221+ const struct flow_offload *flow,
5222+ enum flow_offload_tuple_dir dir,
5223+ struct nf_flow_rule *flow_rule)
5224+{
5225+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5226+ u32 mask = ~htonl(0xffffffff);
5227+ __be32 addr;
5228+ u32 offset;
5229+
5230+ switch (dir) {
5231+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5232+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
5233+ offset = offsetof(struct iphdr, daddr);
5234+ break;
5235+ case FLOW_OFFLOAD_DIR_REPLY:
5236+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5237+ offset = offsetof(struct iphdr, saddr);
5238+ break;
5239+ default:
5240+ return;
5241+ }
5242+
5243+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5244+ &addr, &mask);
5245+}
5246+
5247+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
5248+ unsigned int offset,
5249+ const __be32 *addr, const __be32 *mask)
5250+{
5251+ struct flow_action_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08005252+ int i, j;
developer8cb3ac72022-07-04 10:55:14 +08005253+
developeree39bcf2023-06-16 08:03:30 +08005254+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
developer8cb3ac72022-07-04 10:55:14 +08005255+ entry = flow_action_entry_next(flow_rule);
5256+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
developeree39bcf2023-06-16 08:03:30 +08005257+ offset + i, &addr[j], mask);
developer8cb3ac72022-07-04 10:55:14 +08005258+ }
5259+}
5260+
5261+static void flow_offload_ipv6_snat(struct net *net,
5262+ const struct flow_offload *flow,
5263+ enum flow_offload_tuple_dir dir,
5264+ struct nf_flow_rule *flow_rule)
5265+{
5266+ u32 mask = ~htonl(0xffffffff);
5267+ const __be32 *addr;
5268+ u32 offset;
5269+
5270+ switch (dir) {
5271+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5272+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
5273+ offset = offsetof(struct ipv6hdr, saddr);
5274+ break;
5275+ case FLOW_OFFLOAD_DIR_REPLY:
5276+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
5277+ offset = offsetof(struct ipv6hdr, daddr);
5278+ break;
5279+ default:
5280+ return;
5281+ }
5282+
5283+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5284+}
5285+
5286+static void flow_offload_ipv6_dnat(struct net *net,
5287+ const struct flow_offload *flow,
5288+ enum flow_offload_tuple_dir dir,
5289+ struct nf_flow_rule *flow_rule)
5290+{
5291+ u32 mask = ~htonl(0xffffffff);
5292+ const __be32 *addr;
5293+ u32 offset;
5294+
5295+ switch (dir) {
5296+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5297+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
5298+ offset = offsetof(struct ipv6hdr, daddr);
5299+ break;
5300+ case FLOW_OFFLOAD_DIR_REPLY:
5301+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
5302+ offset = offsetof(struct ipv6hdr, saddr);
5303+ break;
5304+ default:
5305+ return;
5306+ }
5307+
5308+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5309+}
5310+
5311+static int flow_offload_l4proto(const struct flow_offload *flow)
5312+{
5313+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5314+ u8 type = 0;
5315+
5316+ switch (protonum) {
5317+ case IPPROTO_TCP:
5318+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
5319+ break;
5320+ case IPPROTO_UDP:
5321+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
5322+ break;
5323+ default:
5324+ break;
5325+ }
5326+
5327+ return type;
5328+}
5329+
5330+static void flow_offload_port_snat(struct net *net,
5331+ const struct flow_offload *flow,
5332+ enum flow_offload_tuple_dir dir,
5333+ struct nf_flow_rule *flow_rule)
5334+{
5335+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5336+ u32 mask, port;
5337+ u32 offset;
5338+
5339+ switch (dir) {
5340+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5341+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
5342+ offset = 0; /* offsetof(struct tcphdr, source); */
5343+ port = htonl(port << 16);
5344+ mask = ~htonl(0xffff0000);
5345+ break;
5346+ case FLOW_OFFLOAD_DIR_REPLY:
5347+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
5348+ offset = 0; /* offsetof(struct tcphdr, dest); */
5349+ port = htonl(port);
5350+ mask = ~htonl(0xffff);
5351+ break;
5352+ default:
5353+ return;
5354+ }
5355+
5356+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5357+ &port, &mask);
5358+}
5359+
5360+static void flow_offload_port_dnat(struct net *net,
5361+ const struct flow_offload *flow,
5362+ enum flow_offload_tuple_dir dir,
5363+ struct nf_flow_rule *flow_rule)
5364+{
5365+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5366+ u32 mask, port;
5367+ u32 offset;
5368+
5369+ switch (dir) {
5370+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5371+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
5372+ offset = 0; /* offsetof(struct tcphdr, dest); */
5373+ port = htonl(port);
5374+ mask = ~htonl(0xffff);
5375+ break;
5376+ case FLOW_OFFLOAD_DIR_REPLY:
5377+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
5378+ offset = 0; /* offsetof(struct tcphdr, source); */
5379+ port = htonl(port << 16);
5380+ mask = ~htonl(0xffff0000);
5381+ break;
5382+ default:
5383+ return;
5384+ }
5385+
5386+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5387+ &port, &mask);
5388+}
5389+
5390+static void flow_offload_ipv4_checksum(struct net *net,
5391+ const struct flow_offload *flow,
5392+ struct nf_flow_rule *flow_rule)
5393+{
5394+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5395+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5396+
5397+ entry->id = FLOW_ACTION_CSUM;
5398+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
5399+
5400+ switch (protonum) {
5401+ case IPPROTO_TCP:
5402+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
5403+ break;
5404+ case IPPROTO_UDP:
5405+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
5406+ break;
5407+ }
5408+}
5409+
5410+static void flow_offload_redirect(struct net *net,
5411+ const struct flow_offload *flow,
5412+ enum flow_offload_tuple_dir dir,
5413+ struct nf_flow_rule *flow_rule)
5414+{
5415+ const struct flow_offload_tuple *this_tuple, *other_tuple;
5416+ struct flow_action_entry *entry;
5417+ struct net_device *dev;
5418+ int ifindex;
5419+
5420+ this_tuple = &flow->tuplehash[dir].tuple;
5421+ switch (this_tuple->xmit_type) {
5422+ case FLOW_OFFLOAD_XMIT_DIRECT:
5423+ this_tuple = &flow->tuplehash[dir].tuple;
5424+ ifindex = this_tuple->out.hw_ifidx;
5425+ break;
5426+ case FLOW_OFFLOAD_XMIT_NEIGH:
5427+ other_tuple = &flow->tuplehash[!dir].tuple;
5428+ ifindex = other_tuple->iifidx;
5429+ break;
5430+ default:
5431+ return;
5432+ }
5433+
5434+ dev = dev_get_by_index(net, ifindex);
5435+ if (!dev)
5436+ return;
5437+
5438+ entry = flow_action_entry_next(flow_rule);
5439+ entry->id = FLOW_ACTION_REDIRECT;
5440+ entry->dev = dev;
5441+}
5442+
5443+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
5444+ enum flow_offload_tuple_dir dir,
5445+ struct nf_flow_rule *flow_rule)
5446+{
5447+ const struct flow_offload_tuple *this_tuple;
5448+ struct flow_action_entry *entry;
5449+ struct dst_entry *dst;
5450+
5451+ this_tuple = &flow->tuplehash[dir].tuple;
5452+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5453+ return;
5454+
5455+ dst = this_tuple->dst_cache;
5456+ if (dst && dst->lwtstate) {
5457+ struct ip_tunnel_info *tun_info;
5458+
5459+ tun_info = lwt_tun_info(dst->lwtstate);
5460+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5461+ entry = flow_action_entry_next(flow_rule);
5462+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
5463+ entry->tunnel = tun_info;
5464+ }
5465+ }
5466+}
5467+
5468+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
5469+ enum flow_offload_tuple_dir dir,
5470+ struct nf_flow_rule *flow_rule)
5471+{
5472+ const struct flow_offload_tuple *other_tuple;
5473+ struct flow_action_entry *entry;
5474+ struct dst_entry *dst;
5475+
5476+ other_tuple = &flow->tuplehash[!dir].tuple;
5477+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5478+ return;
5479+
5480+ dst = other_tuple->dst_cache;
5481+ if (dst && dst->lwtstate) {
5482+ struct ip_tunnel_info *tun_info;
5483+
5484+ tun_info = lwt_tun_info(dst->lwtstate);
5485+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5486+ entry = flow_action_entry_next(flow_rule);
5487+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
5488+ }
5489+ }
5490+}
5491+
5492+static int
5493+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
5494+ enum flow_offload_tuple_dir dir,
5495+ struct nf_flow_rule *flow_rule)
5496+{
5497+ const struct flow_offload_tuple *other_tuple;
5498+ const struct flow_offload_tuple *tuple;
5499+ int i;
5500+
5501+ flow_offload_decap_tunnel(flow, dir, flow_rule);
5502+ flow_offload_encap_tunnel(flow, dir, flow_rule);
5503+
5504+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
5505+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
5506+ return -1;
5507+
5508+ tuple = &flow->tuplehash[dir].tuple;
5509+
5510+ for (i = 0; i < tuple->encap_num; i++) {
5511+ struct flow_action_entry *entry;
5512+
5513+ if (tuple->in_vlan_ingress & BIT(i))
5514+ continue;
5515+
5516+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
5517+ entry = flow_action_entry_next(flow_rule);
5518+ entry->id = FLOW_ACTION_VLAN_POP;
5519+ }
5520+ }
5521+
5522+ other_tuple = &flow->tuplehash[!dir].tuple;
5523+
5524+ for (i = 0; i < other_tuple->encap_num; i++) {
5525+ struct flow_action_entry *entry;
5526+
5527+ if (other_tuple->in_vlan_ingress & BIT(i))
5528+ continue;
5529+
5530+ entry = flow_action_entry_next(flow_rule);
5531+
5532+ switch (other_tuple->encap[i].proto) {
5533+ case htons(ETH_P_PPP_SES):
5534+ entry->id = FLOW_ACTION_PPPOE_PUSH;
5535+ entry->pppoe.sid = other_tuple->encap[i].id;
5536+ break;
5537+ case htons(ETH_P_8021Q):
5538+ entry->id = FLOW_ACTION_VLAN_PUSH;
5539+ entry->vlan.vid = other_tuple->encap[i].id;
5540+ entry->vlan.proto = other_tuple->encap[i].proto;
5541+ break;
5542+ }
5543+ }
5544+
5545+ return 0;
5546+}
5547+
5548+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
5549+ enum flow_offload_tuple_dir dir,
5550+ struct nf_flow_rule *flow_rule)
5551+{
5552+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5553+ return -1;
5554+
5555+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5556+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
5557+ flow_offload_port_snat(net, flow, dir, flow_rule);
5558+ }
5559+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5560+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
5561+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5562+ }
5563+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
5564+ test_bit(NF_FLOW_DNAT, &flow->flags))
5565+ flow_offload_ipv4_checksum(net, flow, flow_rule);
5566+
5567+ flow_offload_redirect(net, flow, dir, flow_rule);
5568+
5569+ return 0;
5570+}
5571+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
5572+
5573+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
5574+ enum flow_offload_tuple_dir dir,
5575+ struct nf_flow_rule *flow_rule)
5576+{
5577+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5578+ return -1;
5579+
5580+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5581+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
5582+ flow_offload_port_snat(net, flow, dir, flow_rule);
5583+ }
5584+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5585+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
5586+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5587+ }
5588+
5589+ flow_offload_redirect(net, flow, dir, flow_rule);
5590+
5591+ return 0;
5592+}
5593+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
5594+
5595+#define NF_FLOW_RULE_ACTION_MAX 16
5596+
5597+static struct nf_flow_rule *
5598+nf_flow_offload_rule_alloc(struct net *net,
5599+ const struct flow_offload_work *offload,
5600+ enum flow_offload_tuple_dir dir)
5601+{
5602+ const struct nf_flowtable *flowtable = offload->flowtable;
5603+ const struct flow_offload_tuple *tuple, *other_tuple;
5604+ const struct flow_offload *flow = offload->flow;
5605+ struct dst_entry *other_dst = NULL;
5606+ struct nf_flow_rule *flow_rule;
5607+ int err = -ENOMEM;
5608+
5609+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
5610+ if (!flow_rule)
5611+ goto err_flow;
5612+
5613+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
5614+ if (!flow_rule->rule)
5615+ goto err_flow_rule;
5616+
5617+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
5618+ flow_rule->rule->match.mask = &flow_rule->match.mask;
5619+ flow_rule->rule->match.key = &flow_rule->match.key;
5620+
5621+ tuple = &flow->tuplehash[dir].tuple;
5622+ other_tuple = &flow->tuplehash[!dir].tuple;
5623+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
5624+ other_dst = other_tuple->dst_cache;
5625+
5626+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
5627+ if (err < 0)
5628+ goto err_flow_match;
5629+
5630+ flow_rule->rule->action.num_entries = 0;
5631+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
5632+ goto err_flow_match;
5633+
5634+ return flow_rule;
5635+
5636+err_flow_match:
5637+ kfree(flow_rule->rule);
5638+err_flow_rule:
5639+ kfree(flow_rule);
5640+err_flow:
5641+ return NULL;
5642+}
5643+
5644+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
5645+{
5646+ struct flow_action_entry *entry;
5647+ int i;
5648+
5649+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
5650+ entry = &flow_rule->rule->action.entries[i];
5651+ if (entry->id != FLOW_ACTION_REDIRECT)
5652+ continue;
5653+
5654+ dev_put(entry->dev);
5655+ }
5656+ kfree(flow_rule->rule);
5657+ kfree(flow_rule);
5658+}
5659+
5660+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
5661+{
5662+ int i;
5663+
5664+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
5665+ __nf_flow_offload_destroy(flow_rule[i]);
5666+}
5667+
5668+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
5669+ struct nf_flow_rule *flow_rule[])
5670+{
5671+ struct net *net = read_pnet(&offload->flowtable->net);
5672+
5673+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
5674+ FLOW_OFFLOAD_DIR_ORIGINAL);
5675+ if (!flow_rule[0])
5676+ return -ENOMEM;
5677+
5678+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
5679+ FLOW_OFFLOAD_DIR_REPLY);
5680+ if (!flow_rule[1]) {
5681+ __nf_flow_offload_destroy(flow_rule[0]);
5682+ return -ENOMEM;
5683+ }
5684+
5685+ return 0;
5686+}
5687+
5688+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
5689+ __be16 proto, int priority,
5690+ enum flow_cls_command cmd,
5691+ const struct flow_offload_tuple *tuple,
5692+ struct netlink_ext_ack *extack)
5693+{
5694+ cls_flow->common.protocol = proto;
5695+ cls_flow->common.prio = priority;
5696+ cls_flow->common.extack = extack;
5697+ cls_flow->command = cmd;
5698+ cls_flow->cookie = (unsigned long)tuple;
5699+}
5700+
5701+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
5702+ struct flow_offload *flow,
5703+ struct nf_flow_rule *flow_rule,
5704+ enum flow_offload_tuple_dir dir,
5705+ int priority, int cmd,
5706+ struct flow_stats *stats,
5707+ struct list_head *block_cb_list)
5708+{
5709+ struct flow_cls_offload cls_flow = {};
5710+ struct flow_block_cb *block_cb;
5711+ struct netlink_ext_ack extack;
5712+ __be16 proto = ETH_P_ALL;
5713+ int err, i = 0;
5714+
5715+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
5716+ &flow->tuplehash[dir].tuple, &extack);
5717+ if (cmd == FLOW_CLS_REPLACE)
5718+ cls_flow.rule = flow_rule->rule;
5719+
developer0cc0d732023-06-07 13:52:41 +08005720+ down_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005721+ list_for_each_entry(block_cb, block_cb_list, list) {
5722+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
5723+ block_cb->cb_priv);
5724+ if (err < 0)
5725+ continue;
5726+
5727+ i++;
5728+ }
developer0cc0d732023-06-07 13:52:41 +08005729+ up_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005730+
5731+ if (cmd == FLOW_CLS_STATS)
5732+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
5733+
5734+ return i;
5735+}
5736+
5737+static int flow_offload_tuple_add(struct flow_offload_work *offload,
5738+ struct nf_flow_rule *flow_rule,
5739+ enum flow_offload_tuple_dir dir)
5740+{
5741+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
5742+ flow_rule, dir, offload->priority,
5743+ FLOW_CLS_REPLACE, NULL,
5744+ &offload->flowtable->flow_block.cb_list);
5745+}
5746+
5747+static void flow_offload_tuple_del(struct flow_offload_work *offload,
5748+ enum flow_offload_tuple_dir dir)
5749+{
5750+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5751+ offload->priority, FLOW_CLS_DESTROY, NULL,
5752+ &offload->flowtable->flow_block.cb_list);
5753+}
5754+
5755+static int flow_offload_rule_add(struct flow_offload_work *offload,
5756+ struct nf_flow_rule *flow_rule[])
5757+{
5758+ int ok_count = 0;
5759+
5760+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
5761+ FLOW_OFFLOAD_DIR_ORIGINAL);
5762+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
5763+ FLOW_OFFLOAD_DIR_REPLY);
5764+ if (ok_count == 0)
5765+ return -ENOENT;
5766+
5767+ return 0;
5768+}
5769+
5770+static void flow_offload_work_add(struct flow_offload_work *offload)
5771+{
5772+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
5773+ int err;
5774+
5775+ err = nf_flow_offload_alloc(offload, flow_rule);
5776+ if (err < 0)
5777+ return;
5778+
5779+ err = flow_offload_rule_add(offload, flow_rule);
5780+ if (err < 0)
5781+ goto out;
5782+
5783+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5784+
5785+out:
5786+ nf_flow_offload_destroy(flow_rule);
5787+}
5788+
5789+static void flow_offload_work_del(struct flow_offload_work *offload)
5790+{
5791+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5792+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
5793+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
5794+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
5795+}
5796+
5797+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
5798+ enum flow_offload_tuple_dir dir,
5799+ struct flow_stats *stats)
5800+{
5801+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5802+ offload->priority, FLOW_CLS_STATS, stats,
5803+ &offload->flowtable->flow_block.cb_list);
5804+}
5805+
5806+static void flow_offload_work_stats(struct flow_offload_work *offload)
5807+{
5808+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
5809+ u64 lastused;
5810+
5811+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
5812+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
5813+
5814+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
5815+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
5816+ lastused + flow_offload_get_timeout(offload->flow));
5817+
5818+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
5819+ if (stats[0].pkts)
5820+ nf_ct_acct_add(offload->flow->ct,
5821+ FLOW_OFFLOAD_DIR_ORIGINAL,
5822+ stats[0].pkts, stats[0].bytes);
5823+ if (stats[1].pkts)
5824+ nf_ct_acct_add(offload->flow->ct,
5825+ FLOW_OFFLOAD_DIR_REPLY,
5826+ stats[1].pkts, stats[1].bytes);
5827+ }
5828+}
5829+
5830+static void flow_offload_work_handler(struct work_struct *work)
5831+{
5832+ struct flow_offload_work *offload;
5833+
5834+ offload = container_of(work, struct flow_offload_work, work);
5835+ switch (offload->cmd) {
5836+ case FLOW_CLS_REPLACE:
5837+ flow_offload_work_add(offload);
5838+ break;
5839+ case FLOW_CLS_DESTROY:
5840+ flow_offload_work_del(offload);
5841+ break;
5842+ case FLOW_CLS_STATS:
5843+ flow_offload_work_stats(offload);
5844+ break;
5845+ default:
5846+ WARN_ON_ONCE(1);
5847+ }
5848+
5849+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
5850+ kfree(offload);
5851+}
5852+
5853+static void flow_offload_queue_work(struct flow_offload_work *offload)
5854+{
5855+ if (offload->cmd == FLOW_CLS_REPLACE)
5856+ queue_work(nf_flow_offload_add_wq, &offload->work);
5857+ else if (offload->cmd == FLOW_CLS_DESTROY)
5858+ queue_work(nf_flow_offload_del_wq, &offload->work);
5859+ else
5860+ queue_work(nf_flow_offload_stats_wq, &offload->work);
5861+}
5862+
5863+static struct flow_offload_work *
5864+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
5865+ struct flow_offload *flow, unsigned int cmd)
5866+{
5867+ struct flow_offload_work *offload;
5868+
5869+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
5870+ return NULL;
5871+
5872+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
5873+ if (!offload) {
5874+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
5875+ return NULL;
5876+ }
5877+
5878+ offload->cmd = cmd;
5879+ offload->flow = flow;
5880+ offload->priority = flowtable->priority;
5881+ offload->flowtable = flowtable;
5882+ INIT_WORK(&offload->work, flow_offload_work_handler);
5883+
5884+ return offload;
5885+}
5886+
5887+
5888+void nf_flow_offload_add(struct nf_flowtable *flowtable,
5889+ struct flow_offload *flow)
5890+{
5891+ struct flow_offload_work *offload;
5892+
5893+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
5894+ if (!offload)
5895+ return;
5896+
5897+ flow_offload_queue_work(offload);
5898+}
5899+
5900+void nf_flow_offload_del(struct nf_flowtable *flowtable,
5901+ struct flow_offload *flow)
5902+{
5903+ struct flow_offload_work *offload;
5904+
5905+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
5906+ if (!offload)
5907+ return;
5908+
5909+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
5910+ flow_offload_queue_work(offload);
5911+}
5912+
5913+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08005914+ struct flow_offload *flow, bool force)
developer8cb3ac72022-07-04 10:55:14 +08005915+{
5916+ struct flow_offload_work *offload;
5917+ __s32 delta;
5918+
developeree39bcf2023-06-16 08:03:30 +08005919+ if (!force) {
5920+ delta = nf_flow_timeout_delta(flow->timeout);
5921+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
5922+ return;
5923+ }
developer8cb3ac72022-07-04 10:55:14 +08005924+
5925+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
5926+ if (!offload)
5927+ return;
5928+
5929+ flow_offload_queue_work(offload);
5930+}
5931+
5932+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
5933+{
5934+ if (nf_flowtable_hw_offload(flowtable)) {
5935+ flush_workqueue(nf_flow_offload_add_wq);
5936+ flush_workqueue(nf_flow_offload_del_wq);
5937+ flush_workqueue(nf_flow_offload_stats_wq);
5938+ }
5939+}
5940+
5941+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
5942+ struct flow_block_offload *bo,
5943+ enum flow_block_command cmd)
5944+{
5945+ struct flow_block_cb *block_cb, *next;
5946+ int err = 0;
5947+
developer0cc0d732023-06-07 13:52:41 +08005948+ down_write(&flowtable->flow_block_lock);
developeree39bcf2023-06-16 08:03:30 +08005949+
developer8cb3ac72022-07-04 10:55:14 +08005950+ switch (cmd) {
5951+ case FLOW_BLOCK_BIND:
5952+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
5953+ break;
5954+ case FLOW_BLOCK_UNBIND:
5955+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
5956+ list_del(&block_cb->list);
5957+ flow_block_cb_free(block_cb);
5958+ }
5959+ break;
5960+ default:
5961+ WARN_ON_ONCE(1);
5962+ err = -EOPNOTSUPP;
5963+ }
developeree39bcf2023-06-16 08:03:30 +08005964+
developer0cc0d732023-06-07 13:52:41 +08005965+ up_write(&flowtable->flow_block_lock);
developera54478c2022-10-01 16:41:46 +08005966+
developer8cb3ac72022-07-04 10:55:14 +08005967+ return err;
5968+}
5969+
5970+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
5971+ struct net *net,
5972+ enum flow_block_command cmd,
5973+ struct nf_flowtable *flowtable,
5974+ struct netlink_ext_ack *extack)
5975+{
5976+ memset(bo, 0, sizeof(*bo));
5977+ bo->net = net;
5978+ bo->block = &flowtable->flow_block;
5979+ bo->command = cmd;
5980+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
5981+ bo->extack = extack;
5982+ INIT_LIST_HEAD(&bo->cb_list);
5983+}
5984+
5985+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
5986+ struct nf_flowtable *flowtable,
5987+ struct net_device *dev,
5988+ enum flow_block_command cmd,
5989+ struct netlink_ext_ack *extack)
5990+{
5991+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
5992+ extack);
5993+ flow_indr_block_call(dev, bo, cmd);
5994+
5995+ if (list_empty(&bo->cb_list))
5996+ return -EOPNOTSUPP;
5997+
5998+ return 0;
5999+}
6000+
6001+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
6002+ struct nf_flowtable *flowtable,
6003+ struct net_device *dev,
6004+ enum flow_block_command cmd,
6005+ struct netlink_ext_ack *extack)
6006+{
6007+ int err;
6008+
6009+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6010+ extack);
developer0cc0d732023-06-07 13:52:41 +08006011+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006012+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
developer0cc0d732023-06-07 13:52:41 +08006013+ up_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006014+ if (err < 0)
6015+ return err;
6016+
6017+ return 0;
6018+}
6019+
6020+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
6021+ struct net_device *dev,
6022+ enum flow_block_command cmd)
6023+{
6024+ struct netlink_ext_ack extack = {};
6025+ struct flow_block_offload bo;
6026+ int err;
6027+
6028+ if (!nf_flowtable_hw_offload(flowtable))
6029+ return 0;
6030+
6031+ if (dev->netdev_ops->ndo_setup_tc)
6032+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
6033+ &extack);
6034+ else
6035+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
6036+ &extack);
6037+ if (err < 0)
6038+ return err;
6039+
6040+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
6041+}
6042+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
6043+
6044+int nf_flow_table_offload_init(void)
6045+{
6046+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
6047+ WQ_UNBOUND | WQ_SYSFS, 0);
6048+ if (!nf_flow_offload_add_wq)
6049+ return -ENOMEM;
6050+
6051+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
6052+ WQ_UNBOUND | WQ_SYSFS, 0);
6053+ if (!nf_flow_offload_del_wq)
6054+ goto err_del_wq;
6055+
6056+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
6057+ WQ_UNBOUND | WQ_SYSFS, 0);
6058+ if (!nf_flow_offload_stats_wq)
6059+ goto err_stats_wq;
6060+
6061+ return 0;
6062+
6063+err_stats_wq:
6064+ destroy_workqueue(nf_flow_offload_del_wq);
6065+err_del_wq:
6066+ destroy_workqueue(nf_flow_offload_add_wq);
6067+ return -ENOMEM;
6068+}
6069+
6070+void nf_flow_table_offload_exit(void)
6071+{
6072+ destroy_workqueue(nf_flow_offload_add_wq);
6073+ destroy_workqueue(nf_flow_offload_del_wq);
6074+ destroy_workqueue(nf_flow_offload_stats_wq);
6075+}
6076diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
6077new file mode 100644
developer58aa0682023-09-18 14:02:26 +08006078index 0000000..2cab008
developer8cb3ac72022-07-04 10:55:14 +08006079--- /dev/null
6080+++ b/net/netfilter/xt_FLOWOFFLOAD.c
developer494fbb62023-12-14 23:11:24 +08006081@@ -0,0 +1,799 @@
developer8cb3ac72022-07-04 10:55:14 +08006082+/*
6083+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
6084+ *
6085+ * This program is free software; you can redistribute it and/or modify
6086+ * it under the terms of the GNU General Public License version 2 as
6087+ * published by the Free Software Foundation.
6088+ */
6089+#include <linux/module.h>
6090+#include <linux/init.h>
6091+#include <linux/netfilter.h>
6092+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
6093+#include <linux/if_vlan.h>
6094+#include <net/ip.h>
6095+#include <net/netfilter/nf_conntrack.h>
6096+#include <net/netfilter/nf_conntrack_extend.h>
6097+#include <net/netfilter/nf_conntrack_helper.h>
6098+#include <net/netfilter/nf_flow_table.h>
6099+
6100+struct xt_flowoffload_hook {
6101+ struct hlist_node list;
6102+ struct nf_hook_ops ops;
6103+ struct net *net;
6104+ bool registered;
6105+ bool used;
6106+};
6107+
6108+struct xt_flowoffload_table {
6109+ struct nf_flowtable ft;
6110+ struct hlist_head hooks;
6111+ struct delayed_work work;
6112+};
6113+
6114+struct nf_forward_info {
6115+ const struct net_device *indev;
6116+ const struct net_device *outdev;
6117+ const struct net_device *hw_outdev;
6118+ struct id {
6119+ __u16 id;
6120+ __be16 proto;
6121+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
6122+ u8 num_encaps;
6123+ u8 ingress_vlans;
6124+ u8 h_source[ETH_ALEN];
6125+ u8 h_dest[ETH_ALEN];
6126+ enum flow_offload_xmit_type xmit_type;
6127+};
6128+
6129+static DEFINE_SPINLOCK(hooks_lock);
6130+
6131+struct xt_flowoffload_table flowtable[2];
6132+
6133+static unsigned int
6134+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
6135+ const struct nf_hook_state *state)
6136+{
6137+ struct vlan_ethhdr *veth;
6138+ __be16 proto;
6139+
6140+ switch (skb->protocol) {
6141+ case htons(ETH_P_8021Q):
6142+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
6143+ proto = veth->h_vlan_encapsulated_proto;
6144+ break;
6145+ case htons(ETH_P_PPP_SES):
6146+ proto = nf_flow_pppoe_proto(skb);
6147+ break;
6148+ default:
6149+ proto = skb->protocol;
6150+ break;
6151+ }
6152+
6153+ switch (proto) {
6154+ case htons(ETH_P_IP):
6155+ return nf_flow_offload_ip_hook(priv, skb, state);
6156+ case htons(ETH_P_IPV6):
6157+ return nf_flow_offload_ipv6_hook(priv, skb, state);
6158+ }
6159+
6160+ return NF_ACCEPT;
6161+}
6162+
6163+static int
6164+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
6165+ struct net_device *dev)
6166+{
6167+ struct xt_flowoffload_hook *hook;
6168+ struct nf_hook_ops *ops;
6169+
6170+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
6171+ if (!hook)
6172+ return -ENOMEM;
6173+
6174+ ops = &hook->ops;
6175+ ops->pf = NFPROTO_NETDEV;
6176+ ops->hooknum = NF_NETDEV_INGRESS;
6177+ ops->priority = 10;
6178+ ops->priv = &table->ft;
6179+ ops->hook = xt_flowoffload_net_hook;
6180+ ops->dev = dev;
6181+
6182+ hlist_add_head(&hook->list, &table->hooks);
6183+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
6184+
6185+ return 0;
6186+}
6187+
6188+static struct xt_flowoffload_hook *
6189+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
6190+ struct net_device *dev)
6191+{
6192+ struct xt_flowoffload_hook *hook;
6193+
6194+ hlist_for_each_entry(hook, &table->hooks, list) {
6195+ if (hook->ops.dev == dev)
6196+ return hook;
6197+ }
6198+
6199+ return NULL;
6200+}
6201+
6202+static void
6203+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
6204+ struct net_device *dev)
6205+{
6206+ struct xt_flowoffload_hook *hook;
6207+
6208+ if (!dev)
6209+ return;
6210+
6211+ spin_lock_bh(&hooks_lock);
6212+ hook = flow_offload_lookup_hook(table, dev);
6213+ if (hook)
6214+ hook->used = true;
6215+ else
6216+ xt_flowoffload_create_hook(table, dev);
6217+ spin_unlock_bh(&hooks_lock);
6218+}
6219+
6220+static void
6221+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
6222+{
6223+ struct xt_flowoffload_hook *hook;
6224+
6225+restart:
6226+ hlist_for_each_entry(hook, &table->hooks, list) {
6227+ if (hook->registered)
6228+ continue;
6229+
6230+ hook->registered = true;
6231+ hook->net = dev_net(hook->ops.dev);
6232+ spin_unlock_bh(&hooks_lock);
6233+ nf_register_net_hook(hook->net, &hook->ops);
6234+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6235+ table->ft.type->setup(&table->ft, hook->ops.dev,
6236+ FLOW_BLOCK_BIND);
6237+ spin_lock_bh(&hooks_lock);
6238+ goto restart;
6239+ }
6240+
6241+}
6242+
6243+static bool
6244+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
6245+{
6246+ struct xt_flowoffload_hook *hook;
6247+ bool active = false;
6248+
6249+restart:
6250+ spin_lock_bh(&hooks_lock);
6251+ hlist_for_each_entry(hook, &table->hooks, list) {
6252+ if (hook->used || !hook->registered) {
6253+ active = true;
6254+ continue;
6255+ }
6256+
6257+ hlist_del(&hook->list);
6258+ spin_unlock_bh(&hooks_lock);
6259+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6260+ table->ft.type->setup(&table->ft, hook->ops.dev,
6261+ FLOW_BLOCK_UNBIND);
6262+ nf_unregister_net_hook(hook->net, &hook->ops);
6263+ kfree(hook);
6264+ goto restart;
6265+ }
6266+ spin_unlock_bh(&hooks_lock);
6267+
6268+ return active;
6269+}
6270+
6271+static void
6272+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
6273+{
6274+ struct xt_flowoffload_table *table = data;
6275+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
6276+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
6277+ struct xt_flowoffload_hook *hook;
6278+
6279+ spin_lock_bh(&hooks_lock);
6280+ hlist_for_each_entry(hook, &table->hooks, list) {
6281+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
6282+ hook->ops.dev->ifindex != tuple1->iifidx)
6283+ continue;
6284+
6285+ hook->used = true;
6286+ }
6287+ spin_unlock_bh(&hooks_lock);
6288+}
6289+
6290+static void
6291+xt_flowoffload_hook_work(struct work_struct *work)
6292+{
6293+ struct xt_flowoffload_table *table;
6294+ struct xt_flowoffload_hook *hook;
6295+ int err;
6296+
6297+ table = container_of(work, struct xt_flowoffload_table, work.work);
6298+
6299+ spin_lock_bh(&hooks_lock);
6300+ xt_flowoffload_register_hooks(table);
6301+ hlist_for_each_entry(hook, &table->hooks, list)
6302+ hook->used = false;
6303+ spin_unlock_bh(&hooks_lock);
6304+
6305+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
6306+ table);
6307+ if (err && err != -EAGAIN)
6308+ goto out;
6309+
6310+ if (!xt_flowoffload_cleanup_hooks(table))
6311+ return;
6312+
6313+out:
6314+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
6315+}
6316+
6317+static bool
6318+xt_flowoffload_skip(struct sk_buff *skb, int family)
6319+{
6320+ if (skb_sec_path(skb))
6321+ return true;
6322+
6323+ if (family == NFPROTO_IPV4) {
6324+ const struct ip_options *opt = &(IPCB(skb)->opt);
6325+
6326+ if (unlikely(opt->optlen))
6327+ return true;
6328+ }
6329+
6330+ return false;
6331+}
6332+
6333+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
6334+{
6335+ if (dst_xfrm(dst))
6336+ return FLOW_OFFLOAD_XMIT_XFRM;
6337+
6338+ return FLOW_OFFLOAD_XMIT_NEIGH;
6339+}
6340+
6341+static void nf_default_forward_path(struct nf_flow_route *route,
6342+ struct dst_entry *dst_cache,
6343+ enum ip_conntrack_dir dir,
6344+ struct net_device **dev)
6345+{
6346+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
6347+ route->tuple[dir].dst = dst_cache;
6348+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
6349+}
6350+
6351+static bool nf_is_valid_ether_device(const struct net_device *dev)
6352+{
6353+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
6354+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
6355+ return false;
6356+
6357+ return true;
6358+}
6359+
6360+static void nf_dev_path_info(const struct net_device_path_stack *stack,
6361+ struct nf_forward_info *info,
6362+ unsigned char *ha)
6363+{
6364+ const struct net_device_path *path;
6365+ int i;
6366+
6367+ memcpy(info->h_dest, ha, ETH_ALEN);
6368+
6369+ for (i = 0; i < stack->num_paths; i++) {
6370+ path = &stack->path[i];
6371+
6372+ info->indev = path->dev;
6373+
6374+ switch (path->type) {
6375+ case DEV_PATH_ETHERNET:
6376+ case DEV_PATH_DSA:
6377+ case DEV_PATH_VLAN:
6378+ case DEV_PATH_PPPOE:
6379+ if (is_zero_ether_addr(info->h_source))
6380+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6381+
6382+ if (path->type == DEV_PATH_ETHERNET)
6383+ break;
6384+ if (path->type == DEV_PATH_DSA) {
6385+ i = stack->num_paths;
6386+ break;
6387+ }
6388+
6389+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
6390+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
6391+ info->indev = NULL;
6392+ break;
6393+ }
6394+ if (!info->outdev)
6395+ info->outdev = path->dev;
6396+ info->encap[info->num_encaps].id = path->encap.id;
6397+ info->encap[info->num_encaps].proto = path->encap.proto;
6398+ info->num_encaps++;
6399+ if (path->type == DEV_PATH_PPPOE)
6400+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
6401+ break;
6402+ case DEV_PATH_BRIDGE:
6403+ if (is_zero_ether_addr(info->h_source))
6404+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6405+
6406+ switch (path->bridge.vlan_mode) {
6407+ case DEV_PATH_BR_VLAN_UNTAG_HW:
6408+ info->ingress_vlans |= BIT(info->num_encaps - 1);
6409+ break;
6410+ case DEV_PATH_BR_VLAN_TAG:
6411+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
6412+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
6413+ info->num_encaps++;
6414+ break;
6415+ case DEV_PATH_BR_VLAN_UNTAG:
6416+ info->num_encaps--;
6417+ break;
6418+ case DEV_PATH_BR_VLAN_KEEP:
6419+ break;
6420+ }
6421+ break;
6422+ default:
6423+ break;
6424+ }
6425+ }
6426+ if (!info->outdev)
6427+ info->outdev = info->indev;
6428+
6429+ info->hw_outdev = info->indev;
6430+
6431+ if (nf_is_valid_ether_device(info->indev))
6432+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
6433+}
6434+
6435+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
6436+ const struct dst_entry *dst_cache,
6437+ const struct nf_conn *ct,
6438+ enum ip_conntrack_dir dir, u8 *ha,
developer494fbb62023-12-14 23:11:24 +08006439+ struct net_device_path_stack *stack,
6440+ bool is_bridge)
developer8cb3ac72022-07-04 10:55:14 +08006441+{
6442+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
6443+ struct net_device *dev = dst_cache->dev;
6444+ struct neighbour *n;
6445+ u8 nud_state;
6446+
6447+ if (!nf_is_valid_ether_device(dev))
6448+ goto out;
6449+
developer494fbb62023-12-14 23:11:24 +08006450+ if (!is_bridge) {
developer9fdc0e82023-05-12 14:21:17 +08006451+ n = dst_neigh_lookup(dst_cache, daddr);
6452+ if (!n)
6453+ return -1;
developer8cb3ac72022-07-04 10:55:14 +08006454+
developer9fdc0e82023-05-12 14:21:17 +08006455+ read_lock_bh(&n->lock);
6456+ nud_state = n->nud_state;
6457+ ether_addr_copy(ha, n->ha);
6458+ read_unlock_bh(&n->lock);
6459+ neigh_release(n);
developer8cb3ac72022-07-04 10:55:14 +08006460+
developer9fdc0e82023-05-12 14:21:17 +08006461+ if (!(nud_state & NUD_VALID))
6462+ return -1;
6463+ }
developer64db8532023-04-28 13:56:00 +08006464+
developer8cb3ac72022-07-04 10:55:14 +08006465+out:
6466+ return dev_fill_forward_path(dev, ha, stack);
6467+}
6468+
developer9fdc0e82023-05-12 14:21:17 +08006469+static int nf_dev_forward_path(struct sk_buff *skb,
6470+ struct nf_flow_route *route,
developer8cb3ac72022-07-04 10:55:14 +08006471+ const struct nf_conn *ct,
6472+ enum ip_conntrack_dir dir,
6473+ struct net_device **devs)
6474+{
6475+ const struct dst_entry *dst = route->tuple[dir].dst;
developer9fdc0e82023-05-12 14:21:17 +08006476+ struct ethhdr *eth;
6477+ enum ip_conntrack_dir skb_dir;
developer8cb3ac72022-07-04 10:55:14 +08006478+ struct net_device_path_stack stack;
6479+ struct nf_forward_info info = {};
6480+ unsigned char ha[ETH_ALEN];
developer494fbb62023-12-14 23:11:24 +08006481+ bool is_bridge = false;
developer8cb3ac72022-07-04 10:55:14 +08006482+ int i;
6483+
developer494fbb62023-12-14 23:11:24 +08006484+ if (devs[dir] == devs[!dir])
6485+ is_bridge = true;
6486+
6487+ if (is_bridge && skb_mac_header_was_set(skb)) {
developer9fdc0e82023-05-12 14:21:17 +08006488+ eth = eth_hdr(skb);
6489+ skb_dir = CTINFO2DIR(skb_get_nfct(skb) & NFCT_INFOMASK);
6490+
6491+ if (skb_dir != dir) {
6492+ memcpy(ha, eth->h_source, ETH_ALEN);
6493+ memcpy(info.h_source, eth->h_dest, ETH_ALEN);
6494+ } else {
6495+ memcpy(ha, eth->h_dest, ETH_ALEN);
6496+ memcpy(info.h_source, eth->h_source, ETH_ALEN);
6497+ }
6498+ }
6499+
developer494fbb62023-12-14 23:11:24 +08006500+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack, is_bridge) >= 0)
developer8cb3ac72022-07-04 10:55:14 +08006501+ nf_dev_path_info(&stack, &info, ha);
6502+
6503+ devs[!dir] = (struct net_device *)info.indev;
6504+ if (!info.indev)
6505+ return -1;
6506+
6507+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
6508+ for (i = 0; i < info.num_encaps; i++) {
6509+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
6510+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
6511+ }
6512+ route->tuple[!dir].in.num_encaps = info.num_encaps;
6513+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
6514+
6515+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
6516+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
6517+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
6518+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
6519+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
6520+ route->tuple[dir].xmit_type = info.xmit_type;
6521+ }
6522+
6523+ return 0;
6524+}
6525+
6526+static int
6527+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
6528+ enum ip_conntrack_dir dir,
6529+ const struct xt_action_param *par, int ifindex,
6530+ struct net_device **devs)
6531+{
6532+ struct dst_entry *dst = NULL;
6533+ struct flowi fl;
6534+
6535+ memset(&fl, 0, sizeof(fl));
6536+ switch (xt_family(par)) {
6537+ case NFPROTO_IPV4:
6538+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
6539+ fl.u.ip4.flowi4_oif = ifindex;
6540+ break;
6541+ case NFPROTO_IPV6:
6542+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6543+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
6544+ fl.u.ip6.flowi6_oif = ifindex;
6545+ break;
6546+ }
6547+
6548+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
6549+ if (!dst)
6550+ return -ENOENT;
6551+
6552+ nf_default_forward_path(route, dst, dir, devs);
6553+
6554+ return 0;
6555+}
6556+
6557+static int
developer480c5d52022-12-28 14:48:14 +08006558+xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct,
6559+ const struct xt_action_param *par,
6560+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6561+ struct net_device **devs)
6562+{
6563+ struct dst_entry *this_dst = skb_dst(skb);
6564+ struct dst_entry *other_dst = NULL;
6565+ struct flowi fl;
6566+
6567+ memset(&fl, 0, sizeof(fl));
6568+ switch (xt_family(par)) {
6569+ case NFPROTO_IPV4:
6570+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
6571+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
6572+ break;
6573+ case NFPROTO_IPV6:
6574+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6575+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
6576+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
6577+ break;
6578+ }
6579+
6580+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
6581+ if (!other_dst)
6582+ return -ENOENT;
6583+
6584+ nf_default_forward_path(route, this_dst, dir, devs);
6585+ nf_default_forward_path(route, other_dst, !dir, devs);
6586+
developer7e533772023-04-27 05:59:30 +08006587+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer480c5d52022-12-28 14:48:14 +08006588+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006589+ if (nf_dev_forward_path(skb, route, ct, dir, devs))
developer480c5d52022-12-28 14:48:14 +08006590+ return -1;
developer9fdc0e82023-05-12 14:21:17 +08006591+ if (nf_dev_forward_path(skb, route, ct, !dir, devs))
developer480c5d52022-12-28 14:48:14 +08006592+ return -1;
6593+ }
6594+
6595+ return 0;
6596+}
6597+
6598+static int
6599+xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct,
6600+ const struct xt_action_param *par,
6601+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6602+ struct net_device **devs)
developer8cb3ac72022-07-04 10:55:14 +08006603+{
6604+ int ret;
6605+
6606+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
6607+ devs[dir]->ifindex,
6608+ devs);
6609+ if (ret)
6610+ return ret;
6611+
6612+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
6613+ devs[!dir]->ifindex,
6614+ devs);
6615+ if (ret)
developer67bbcc02022-07-08 09:04:01 +08006616+ goto err_route_dir1;
developer8cb3ac72022-07-04 10:55:14 +08006617+
developer7e533772023-04-27 05:59:30 +08006618+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer8cb3ac72022-07-04 10:55:14 +08006619+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006620+ if (nf_dev_forward_path(skb, route, ct, dir, devs) ||
6621+ nf_dev_forward_path(skb, route, ct, !dir, devs)) {
developer67bbcc02022-07-08 09:04:01 +08006622+ ret = -1;
6623+ goto err_route_dir2;
6624+ }
developer8cb3ac72022-07-04 10:55:14 +08006625+ }
6626+
6627+ return 0;
developer67bbcc02022-07-08 09:04:01 +08006628+
6629+err_route_dir2:
6630+ dst_release(route->tuple[!dir].dst);
6631+err_route_dir1:
6632+ dst_release(route->tuple[dir].dst);
6633+ return ret;
developer8cb3ac72022-07-04 10:55:14 +08006634+}
6635+
6636+static unsigned int
6637+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
6638+{
6639+ struct xt_flowoffload_table *table;
6640+ const struct xt_flowoffload_target_info *info = par->targinfo;
6641+ struct tcphdr _tcph, *tcph = NULL;
6642+ enum ip_conntrack_info ctinfo;
6643+ enum ip_conntrack_dir dir;
6644+ struct nf_flow_route route = {};
6645+ struct flow_offload *flow = NULL;
6646+ struct net_device *devs[2] = {};
6647+ struct nf_conn *ct;
6648+ struct net *net;
6649+
6650+ if (xt_flowoffload_skip(skb, xt_family(par)))
6651+ return XT_CONTINUE;
6652+
6653+ ct = nf_ct_get(skb, &ctinfo);
6654+ if (ct == NULL)
6655+ return XT_CONTINUE;
6656+
6657+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
6658+ case IPPROTO_TCP:
6659+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
6660+ return XT_CONTINUE;
6661+
6662+ tcph = skb_header_pointer(skb, par->thoff,
6663+ sizeof(_tcph), &_tcph);
6664+ if (unlikely(!tcph || tcph->fin || tcph->rst))
6665+ return XT_CONTINUE;
6666+ break;
6667+ case IPPROTO_UDP:
6668+ break;
6669+ default:
6670+ return XT_CONTINUE;
6671+ }
6672+
6673+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
6674+ ct->status & IPS_SEQ_ADJUST)
6675+ return XT_CONTINUE;
6676+
6677+ if (!nf_ct_is_confirmed(ct))
6678+ return XT_CONTINUE;
6679+
6680+ devs[dir] = xt_out(par);
6681+ devs[!dir] = xt_in(par);
6682+
6683+ if (!devs[dir] || !devs[!dir])
6684+ return XT_CONTINUE;
6685+
6686+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
6687+ return XT_CONTINUE;
6688+
6689+ dir = CTINFO2DIR(ctinfo);
6690+
developer480c5d52022-12-28 14:48:14 +08006691+ if (ct->status & IPS_NAT_MASK) {
6692+ if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0)
6693+ goto err_flow_route;
6694+ } else {
6695+ if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0)
6696+ goto err_flow_route;
6697+ }
developer8cb3ac72022-07-04 10:55:14 +08006698+
6699+ flow = flow_offload_alloc(ct);
6700+ if (!flow)
6701+ goto err_flow_alloc;
6702+
6703+ if (flow_offload_route_init(flow, &route) < 0)
6704+ goto err_flow_add;
6705+
6706+ if (tcph) {
6707+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6708+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6709+ }
6710+
6711+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
6712+
6713+ net = read_pnet(&table->ft.net);
6714+ if (!net)
6715+ write_pnet(&table->ft.net, xt_net(par));
6716+
6717+ if (flow_offload_add(&table->ft, flow) < 0)
6718+ goto err_flow_add;
6719+
6720+ xt_flowoffload_check_device(table, devs[0]);
6721+ xt_flowoffload_check_device(table, devs[1]);
6722+
developer480c5d52022-12-28 14:48:14 +08006723+ if (!(ct->status & IPS_NAT_MASK))
6724+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006725+ dst_release(route.tuple[!dir].dst);
6726+
6727+ return XT_CONTINUE;
6728+
6729+err_flow_add:
6730+ flow_offload_free(flow);
6731+err_flow_alloc:
developer480c5d52022-12-28 14:48:14 +08006732+ if (!(ct->status & IPS_NAT_MASK))
6733+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006734+ dst_release(route.tuple[!dir].dst);
6735+err_flow_route:
6736+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
6737+
6738+ return XT_CONTINUE;
6739+}
6740+
6741+static int flowoffload_chk(const struct xt_tgchk_param *par)
6742+{
6743+ struct xt_flowoffload_target_info *info = par->targinfo;
6744+
6745+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
6746+ return -EINVAL;
6747+
6748+ return 0;
6749+}
6750+
6751+static struct xt_target offload_tg_reg __read_mostly = {
6752+ .family = NFPROTO_UNSPEC,
6753+ .name = "FLOWOFFLOAD",
6754+ .revision = 0,
6755+ .targetsize = sizeof(struct xt_flowoffload_target_info),
6756+ .usersize = sizeof(struct xt_flowoffload_target_info),
6757+ .checkentry = flowoffload_chk,
6758+ .target = flowoffload_tg,
6759+ .me = THIS_MODULE,
6760+};
6761+
6762+static int flow_offload_netdev_event(struct notifier_block *this,
6763+ unsigned long event, void *ptr)
6764+{
6765+ struct xt_flowoffload_hook *hook0, *hook1;
6766+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6767+
6768+ if (event != NETDEV_UNREGISTER)
6769+ return NOTIFY_DONE;
6770+
6771+ spin_lock_bh(&hooks_lock);
6772+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
6773+ if (hook0)
6774+ hlist_del(&hook0->list);
6775+
6776+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
6777+ if (hook1)
6778+ hlist_del(&hook1->list);
6779+ spin_unlock_bh(&hooks_lock);
6780+
6781+ if (hook0) {
6782+ nf_unregister_net_hook(hook0->net, &hook0->ops);
6783+ kfree(hook0);
6784+ }
6785+
6786+ if (hook1) {
6787+ nf_unregister_net_hook(hook1->net, &hook1->ops);
6788+ kfree(hook1);
6789+ }
6790+
6791+ nf_flow_table_cleanup(dev);
6792+
6793+ return NOTIFY_DONE;
6794+}
6795+
6796+static struct notifier_block flow_offload_netdev_notifier = {
6797+ .notifier_call = flow_offload_netdev_event,
6798+};
6799+
6800+static int nf_flow_rule_route_inet(struct net *net,
6801+ const struct flow_offload *flow,
6802+ enum flow_offload_tuple_dir dir,
6803+ struct nf_flow_rule *flow_rule)
6804+{
6805+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
6806+ int err;
6807+
6808+ switch (flow_tuple->l3proto) {
6809+ case NFPROTO_IPV4:
6810+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
6811+ break;
6812+ case NFPROTO_IPV6:
6813+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
6814+ break;
6815+ default:
6816+ err = -1;
6817+ break;
6818+ }
6819+
6820+ return err;
6821+}
6822+
6823+static struct nf_flowtable_type flowtable_inet = {
6824+ .family = NFPROTO_INET,
6825+ .init = nf_flow_table_init,
6826+ .setup = nf_flow_table_offload_setup,
6827+ .action = nf_flow_rule_route_inet,
6828+ .free = nf_flow_table_free,
6829+ .hook = xt_flowoffload_net_hook,
6830+ .owner = THIS_MODULE,
6831+};
6832+
6833+static int init_flowtable(struct xt_flowoffload_table *tbl)
6834+{
6835+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
6836+ tbl->ft.type = &flowtable_inet;
6837+
6838+ return nf_flow_table_init(&tbl->ft);
6839+}
6840+
6841+static int __init xt_flowoffload_tg_init(void)
6842+{
6843+ int ret;
6844+
6845+ register_netdevice_notifier(&flow_offload_netdev_notifier);
6846+
6847+ ret = init_flowtable(&flowtable[0]);
6848+ if (ret)
6849+ return ret;
6850+
6851+ ret = init_flowtable(&flowtable[1]);
6852+ if (ret)
6853+ goto cleanup;
6854+
developeree39bcf2023-06-16 08:03:30 +08006855+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
developer8cb3ac72022-07-04 10:55:14 +08006856+
6857+ ret = xt_register_target(&offload_tg_reg);
6858+ if (ret)
6859+ goto cleanup2;
6860+
6861+ return 0;
6862+
6863+cleanup2:
6864+ nf_flow_table_free(&flowtable[1].ft);
6865+cleanup:
6866+ nf_flow_table_free(&flowtable[0].ft);
6867+ return ret;
6868+}
6869+
6870+static void __exit xt_flowoffload_tg_exit(void)
6871+{
6872+ xt_unregister_target(&offload_tg_reg);
6873+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
6874+ nf_flow_table_free(&flowtable[0].ft);
6875+ nf_flow_table_free(&flowtable[1].ft);
6876+}
6877+
6878+MODULE_LICENSE("GPL");
6879+module_init(xt_flowoffload_tg_init);
6880+module_exit(xt_flowoffload_tg_exit);
6881--
68822.18.0
6883