blob: 528abb14ba787c776de3711dfae43867c8b81149 [file] [log] [blame]
developer58aa0682023-09-18 14:02:26 +08001From b80c745d2b90b30558e4f5b12060af956ae8e76d Mon Sep 17 00:00:00 2001
developeree39bcf2023-06-16 08:03:30 +08002From: Bo Jiao <Bo.Jiao@mediatek.com>
developer58aa0682023-09-18 14:02:26 +08003Date: Mon, 18 Sep 2023 10:52:27 +0800
4Subject: [PATCH 02/22] mt7622 backport nf hw offload framework and upstream
5 hnat plus xt-FLOWOFFLOAD update v2
developer8cb3ac72022-07-04 10:55:14 +08006
7---
8 drivers/net/ethernet/mediatek/Makefile | 3 +-
developer58aa0682023-09-18 14:02:26 +08009 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 25 +-
10 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 19 +-
11 drivers/net/ethernet/mediatek/mtk_ppe.c | 510 +++++++
developeree39bcf2023-06-16 08:03:30 +080012 drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
13 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
developer58aa0682023-09-18 14:02:26 +080014 .../net/ethernet/mediatek/mtk_ppe_offload.c | 535 ++++++++
developeree39bcf2023-06-16 08:03:30 +080015 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
developer8cb3ac72022-07-04 10:55:14 +080016 drivers/net/ppp/ppp_generic.c | 22 +
17 drivers/net/ppp/pppoe.c | 24 +
developeree39bcf2023-06-16 08:03:30 +080018 include/linux/netdevice.h | 60 +
developer8cb3ac72022-07-04 10:55:14 +080019 include/linux/ppp_channel.h | 3 +
20 include/net/dsa.h | 10 +
21 include/net/flow_offload.h | 4 +
22 include/net/ip6_route.h | 5 +-
23 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
24 include/net/netfilter/nf_conntrack.h | 12 +
25 include/net/netfilter/nf_conntrack_acct.h | 11 +
developer58aa0682023-09-18 14:02:26 +080026 include/net/netfilter/nf_flow_table.h | 266 +++-
developer8cb3ac72022-07-04 10:55:14 +080027 include/net/netns/conntrack.h | 6 +
28 .../linux/netfilter/nf_conntrack_common.h | 9 +-
29 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
30 net/8021q/vlan_dev.c | 21 +
31 net/bridge/br_device.c | 49 +
32 net/bridge/br_private.h | 20 +
33 net/bridge/br_vlan.c | 55 +
34 net/core/dev.c | 46 +
35 net/dsa/dsa.c | 9 +
developer58aa0682023-09-18 14:02:26 +080036 net/dsa/slave.c | 37 +-
developer8cb3ac72022-07-04 10:55:14 +080037 net/ipv4/netfilter/Kconfig | 4 +-
38 net/ipv6/ip6_output.c | 2 +-
39 net/ipv6/netfilter/Kconfig | 3 +-
40 net/ipv6/route.c | 22 +-
41 net/netfilter/Kconfig | 14 +-
42 net/netfilter/Makefile | 4 +-
43 net/netfilter/nf_conntrack_core.c | 20 +-
44 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
45 net/netfilter/nf_conntrack_proto_udp.c | 4 +
46 net/netfilter/nf_conntrack_standalone.c | 34 +-
developer58aa0682023-09-18 14:02:26 +080047 net/netfilter/nf_flow_table_core.c | 462 ++++---
48 net/netfilter/nf_flow_table_ip.c | 447 +++---
49 net/netfilter/nf_flow_table_offload.c | 1199 +++++++++++++++++
50 net/netfilter/xt_FLOWOFFLOAD.c | 794 +++++++++++
51 43 files changed, 5005 insertions(+), 435 deletions(-)
52 mode change 100644 => 100755 drivers/net/ethernet/mediatek/Makefile
53 mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_eth_soc.c
54 mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_eth_soc.h
developer8cb3ac72022-07-04 10:55:14 +080055 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
56 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
57 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
58 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
59 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
60 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
61 create mode 100644 net/netfilter/nf_flow_table_offload.c
62 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
63
64diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
developer58aa0682023-09-18 14:02:26 +080065old mode 100644
66new mode 100755
67index 634640d..5f342f4
developer8cb3ac72022-07-04 10:55:14 +080068--- a/drivers/net/ethernet/mediatek/Makefile
69+++ b/drivers/net/ethernet/mediatek/Makefile
developeree39bcf2023-06-16 08:03:30 +080070@@ -4,5 +4,6 @@
developer8cb3ac72022-07-04 10:55:14 +080071 #
72
73 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
developer68838542022-10-03 23:42:21 +080074-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
75+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
developer8cb3ac72022-07-04 10:55:14 +080076+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
77 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
78diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developer58aa0682023-09-18 14:02:26 +080079old mode 100644
80new mode 100755
81index c4bea4d..9c85e16
developer8cb3ac72022-07-04 10:55:14 +080082--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
83+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developer58aa0682023-09-18 14:02:26 +080084@@ -3573,6 +3573,7 @@ static int mtk_open(struct net_device *dev)
85 u32 id = mtk_mac2xgmii_id(eth, mac->id);
developerdca0fde2022-12-14 11:40:35 +080086 int err, i;
87 struct device_node *phy_node;
developeree39bcf2023-06-16 08:03:30 +080088+ u32 gdm_config = MTK_GDMA_TO_PDMA;
developer8cb3ac72022-07-04 10:55:14 +080089
developeree39bcf2023-06-16 08:03:30 +080090 err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
91 if (err) {
developer58aa0682023-09-18 14:02:26 +080092@@ -3650,7 +3651,10 @@ static int mtk_open(struct net_device *dev)
93 regmap_write(eth->sgmii->pcs[id].regmap,
94 SGMSYS_QPHY_PWR_STATE_CTRL, 0);
developer8cb3ac72022-07-04 10:55:14 +080095
developerdca0fde2022-12-14 11:40:35 +080096- mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA);
developeree39bcf2023-06-16 08:03:30 +080097+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
98+ gdm_config = MTK_GDMA_TO_PPE;
developer8cb3ac72022-07-04 10:55:14 +080099+
developerdca0fde2022-12-14 11:40:35 +0800100+ mtk_gdm_config(eth, mac->id, gdm_config);
developer8cb3ac72022-07-04 10:55:14 +0800101
developerdca0fde2022-12-14 11:40:35 +0800102 return 0;
103 }
developer58aa0682023-09-18 14:02:26 +0800104@@ -3730,6 +3734,9 @@ static int mtk_stop(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +0800105
106 mtk_dma_free(eth);
107
developeree39bcf2023-06-16 08:03:30 +0800108+ if (eth->soc->offload_version)
109+ mtk_ppe_stop(&eth->ppe);
developer8cb3ac72022-07-04 10:55:14 +0800110+
111 return 0;
112 }
113
developer58aa0682023-09-18 14:02:26 +0800114@@ -4576,6 +4583,7 @@ static const struct net_device_ops mtk_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +0800115 #ifdef CONFIG_NET_POLL_CONTROLLER
116 .ndo_poll_controller = mtk_poll_controller,
117 #endif
118+ .ndo_setup_tc = mtk_eth_setup_tc,
119 };
120
developer58aa0682023-09-18 14:02:26 +0800121 static void mux_poll(struct work_struct *work)
122@@ -5161,6 +5169,17 @@ static int mtk_probe(struct platform_device *pdev)
developer8cb3ac72022-07-04 10:55:14 +0800123 goto err_free_dev;
124 }
125
126+ if (eth->soc->offload_version) {
developeree39bcf2023-06-16 08:03:30 +0800127+ err = mtk_ppe_init(&eth->ppe, eth->dev,
128+ eth->base + MTK_ETH_PPE_BASE, 2);
129+ if (err)
130+ goto err_free_dev;
developer8cb3ac72022-07-04 10:55:14 +0800131+
132+ err = mtk_eth_offload_init(eth);
133+ if (err)
134+ goto err_free_dev;
135+ }
136+
137 for (i = 0; i < MTK_MAX_DEVS; i++) {
138 if (!eth->netdev[i])
139 continue;
developer58aa0682023-09-18 14:02:26 +0800140@@ -5254,6 +5273,7 @@ static const struct mtk_soc_data mt2701_data = {
developer8cb3ac72022-07-04 10:55:14 +0800141 .required_clks = MT7623_CLKS_BITMAP,
142 .required_pctl = true,
143 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800144+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800145 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800146 .txrx = {
147 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800148@@ -5271,6 +5291,7 @@ static const struct mtk_soc_data mt7621_data = {
developer8cb3ac72022-07-04 10:55:14 +0800149 .required_clks = MT7621_CLKS_BITMAP,
150 .required_pctl = false,
151 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800152+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800153 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800154 .txrx = {
155 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800156@@ -5289,6 +5310,7 @@ static const struct mtk_soc_data mt7622_data = {
developer8cb3ac72022-07-04 10:55:14 +0800157 .required_clks = MT7622_CLKS_BITMAP,
158 .required_pctl = false,
159 .has_sram = false,
160+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800161 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800162 .txrx = {
163 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800164@@ -5306,6 +5328,7 @@ static const struct mtk_soc_data mt7623_data = {
developer8cb3ac72022-07-04 10:55:14 +0800165 .required_clks = MT7623_CLKS_BITMAP,
166 .required_pctl = true,
167 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800168+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800169 .rss_num = 0,
developer7eb15dc2023-06-14 17:44:03 +0800170 .txrx = {
171 .txd_size = sizeof(struct mtk_tx_dma),
developer8cb3ac72022-07-04 10:55:14 +0800172diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
developer58aa0682023-09-18 14:02:26 +0800173old mode 100644
174new mode 100755
175index 8a9b615..a87e46d
developer8cb3ac72022-07-04 10:55:14 +0800176--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
177+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
178@@ -15,6 +15,8 @@
179 #include <linux/u64_stats_sync.h>
180 #include <linux/refcount.h>
181 #include <linux/phylink.h>
182+#include <linux/rhashtable.h>
183+#include "mtk_ppe.h"
184
185 #define MTK_QDMA_PAGE_SIZE 2048
186 #define MTK_MAX_RX_LENGTH 1536
developer58aa0682023-09-18 14:02:26 +0800187@@ -44,7 +46,8 @@
developer8cb3ac72022-07-04 10:55:14 +0800188 NETIF_F_HW_VLAN_CTAG_TX | \
189 NETIF_F_SG | NETIF_F_TSO | \
190 NETIF_F_TSO6 | \
191- NETIF_F_IPV6_CSUM)
192+ NETIF_F_IPV6_CSUM |\
193+ NETIF_F_HW_TC)
194 #define MTK_SET_FEATURES (NETIF_F_LRO | \
195 NETIF_F_HW_VLAN_CTAG_RX)
196 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
developer58aa0682023-09-18 14:02:26 +0800197@@ -127,6 +130,7 @@
developer8cb3ac72022-07-04 10:55:14 +0800198 #define MTK_GDMA_UCS_EN BIT(20)
developer58aa0682023-09-18 14:02:26 +0800199 #define MTK_GDMA_STRP_CRC BIT(16)
developer8cb3ac72022-07-04 10:55:14 +0800200 #define MTK_GDMA_TO_PDMA 0x0
201+#define MTK_GDMA_TO_PPE 0x4444
202 #define MTK_GDMA_DROP_ALL 0x7777
203
developer58aa0682023-09-18 14:02:26 +0800204 /* GDM Egress Control Register */
205@@ -617,6 +621,12 @@
developer8cb3ac72022-07-04 10:55:14 +0800206 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
207 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
208
209+/* QDMA descriptor rxd4 */
210+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
211+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
212+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
213+#define MTK_RXD4_ALG GENMASK(31, 22)
214+
215 /* QDMA descriptor rxd4 */
216 #define RX_DMA_L4_VALID BIT(24)
217 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
developer58aa0682023-09-18 14:02:26 +0800218@@ -1651,6 +1661,7 @@ struct mtk_soc_data {
219 u64 caps;
220 u64 required_clks;
developer8cb3ac72022-07-04 10:55:14 +0800221 bool required_pctl;
222+ u8 offload_version;
223 netdev_features_t hw_features;
224 bool has_sram;
developer58aa0682023-09-18 14:02:26 +0800225 struct {
226@@ -1847,6 +1858,9 @@ struct mtk_eth {
developer8cb3ac72022-07-04 10:55:14 +0800227 int ip_align;
228 spinlock_t syscfg0_lock;
229 struct timer_list mtk_dma_monitor_timer;
230+
developeree39bcf2023-06-16 08:03:30 +0800231+ struct mtk_ppe ppe;
developer8cb3ac72022-07-04 10:55:14 +0800232+ struct rhashtable flow_table;
233 };
234
235 /* struct mtk_mac - the structure that holds the info about the MACs of the
developer58aa0682023-09-18 14:02:26 +0800236@@ -1927,6 +1941,9 @@ int mtk_toprgu_init(struct mtk_eth *eth, struct device_node *r);
developer1fb19c92023-03-07 23:45:23 +0800237 int mtk_dump_usxgmii(struct regmap *pmap, char *name, u32 offset, u32 range);
developer58aa0682023-09-18 14:02:26 +0800238 void mtk_usxgmii_link_poll(struct work_struct *work);
developer8cb3ac72022-07-04 10:55:14 +0800239
240+int mtk_eth_offload_init(struct mtk_eth *eth);
241+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
242+ void *type_data);
developer1fb19c92023-03-07 23:45:23 +0800243 void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
developer58aa0682023-09-18 14:02:26 +0800244 u32 mtk_rss_indr_table(struct mtk_rss_params *rss_params, int index);
245 #endif /* MTK_ETH_H */
developer8cb3ac72022-07-04 10:55:14 +0800246diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
247new file mode 100644
developer58aa0682023-09-18 14:02:26 +0800248index 0000000..27b5be5
developer8cb3ac72022-07-04 10:55:14 +0800249--- /dev/null
250+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
developerb40da332023-10-20 11:13:59 +0800251@@ -0,0 +1,514 @@
developer8cb3ac72022-07-04 10:55:14 +0800252+// SPDX-License-Identifier: GPL-2.0-only
253+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
254+
255+#include <linux/kernel.h>
256+#include <linux/io.h>
257+#include <linux/iopoll.h>
258+#include <linux/etherdevice.h>
259+#include <linux/platform_device.h>
260+#include "mtk_ppe.h"
261+#include "mtk_ppe_regs.h"
262+
263+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
264+{
265+ writel(val, ppe->base + reg);
266+}
267+
268+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
269+{
270+ return readl(ppe->base + reg);
271+}
272+
273+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
274+{
275+ u32 val;
276+
277+ val = ppe_r32(ppe, reg);
278+ val &= ~mask;
279+ val |= set;
280+ ppe_w32(ppe, reg, val);
281+
282+ return val;
283+}
284+
285+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
286+{
287+ return ppe_m32(ppe, reg, 0, val);
288+}
289+
290+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
291+{
292+ return ppe_m32(ppe, reg, val, 0);
293+}
294+
295+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
296+{
297+ int ret;
298+ u32 val;
299+
300+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
301+ !(val & MTK_PPE_GLO_CFG_BUSY),
302+ 20, MTK_PPE_WAIT_TIMEOUT_US);
303+
304+ if (ret)
305+ dev_err(ppe->dev, "PPE table busy");
306+
307+ return ret;
308+}
309+
310+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
311+{
312+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
313+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
314+}
315+
316+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
317+{
318+ mtk_ppe_cache_clear(ppe);
319+
320+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
321+ enable * MTK_PPE_CACHE_CTL_EN);
322+}
323+
developeree39bcf2023-06-16 08:03:30 +0800324+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
developer8cb3ac72022-07-04 10:55:14 +0800325+{
326+ u32 hv1, hv2, hv3;
327+ u32 hash;
328+
developeree39bcf2023-06-16 08:03:30 +0800329+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
330+ case MTK_PPE_PKT_TYPE_BRIDGE:
331+ hv1 = e->bridge.src_mac_lo;
332+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
333+ hv2 = e->bridge.src_mac_hi >> 16;
334+ hv2 ^= e->bridge.dest_mac_lo;
335+ hv3 = e->bridge.dest_mac_hi;
336+ break;
developer8cb3ac72022-07-04 10:55:14 +0800337+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
338+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
339+ hv1 = e->ipv4.orig.ports;
340+ hv2 = e->ipv4.orig.dest_ip;
341+ hv3 = e->ipv4.orig.src_ip;
342+ break;
343+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
344+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
345+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
346+ hv1 ^= e->ipv6.ports;
347+
348+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
349+ hv2 ^= e->ipv6.dest_ip[0];
350+
351+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
352+ hv3 ^= e->ipv6.src_ip[0];
353+ break;
354+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
355+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
356+ default:
357+ WARN_ON_ONCE(1);
358+ return MTK_PPE_HASH_MASK;
359+ }
360+
361+ hash = (hv1 & hv2) | ((~hv1) & hv3);
362+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
363+ hash ^= hv1 ^ hv2 ^ hv3;
364+ hash ^= hash >> 16;
developeree39bcf2023-06-16 08:03:30 +0800365+ hash <<= 1;
developer8cb3ac72022-07-04 10:55:14 +0800366+ hash &= MTK_PPE_ENTRIES - 1;
367+
368+ return hash;
369+}
370+
371+static inline struct mtk_foe_mac_info *
developeree39bcf2023-06-16 08:03:30 +0800372+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800373+{
developeree39bcf2023-06-16 08:03:30 +0800374+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800375+
376+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
377+ return &entry->ipv6.l2;
378+
379+ return &entry->ipv4.l2;
380+}
381+
382+static inline u32 *
developeree39bcf2023-06-16 08:03:30 +0800383+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800384+{
developeree39bcf2023-06-16 08:03:30 +0800385+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800386+
387+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
388+ return &entry->ipv6.ib2;
389+
390+ return &entry->ipv4.ib2;
391+}
392+
developeree39bcf2023-06-16 08:03:30 +0800393+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
394+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
developer8cb3ac72022-07-04 10:55:14 +0800395+{
396+ struct mtk_foe_mac_info *l2;
397+ u32 ports_pad, val;
398+
399+ memset(entry, 0, sizeof(*entry));
400+
developeree39bcf2023-06-16 08:03:30 +0800401+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
402+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
403+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
404+ MTK_FOE_IB1_BIND_TTL |
405+ MTK_FOE_IB1_BIND_CACHE;
406+ entry->ib1 = val;
developer8cb3ac72022-07-04 10:55:14 +0800407+
developeree39bcf2023-06-16 08:03:30 +0800408+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
409+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
410+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
developer8cb3ac72022-07-04 10:55:14 +0800411+
412+ if (is_multicast_ether_addr(dest_mac))
developeree39bcf2023-06-16 08:03:30 +0800413+ val |= MTK_FOE_IB2_MULTICAST;
developer8cb3ac72022-07-04 10:55:14 +0800414+
415+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
416+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
417+ entry->ipv4.orig.ports = ports_pad;
418+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
419+ entry->ipv6.ports = ports_pad;
420+
developeree39bcf2023-06-16 08:03:30 +0800421+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
developer8cb3ac72022-07-04 10:55:14 +0800422+ entry->ipv6.ib2 = val;
423+ l2 = &entry->ipv6.l2;
424+ } else {
425+ entry->ipv4.ib2 = val;
426+ l2 = &entry->ipv4.l2;
427+ }
428+
429+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
430+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
431+ l2->src_mac_hi = get_unaligned_be32(src_mac);
432+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
433+
434+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
435+ l2->etype = ETH_P_IPV6;
436+ else
437+ l2->etype = ETH_P_IP;
438+
439+ return 0;
440+}
441+
developeree39bcf2023-06-16 08:03:30 +0800442+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
developer8cb3ac72022-07-04 10:55:14 +0800443+{
developeree39bcf2023-06-16 08:03:30 +0800444+ u32 *ib2 = mtk_foe_entry_ib2(entry);
445+ u32 val;
developer8cb3ac72022-07-04 10:55:14 +0800446+
developeree39bcf2023-06-16 08:03:30 +0800447+ val = *ib2;
448+ val &= ~MTK_FOE_IB2_DEST_PORT;
449+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
developer8cb3ac72022-07-04 10:55:14 +0800450+ *ib2 = val;
451+
452+ return 0;
453+}
454+
developeree39bcf2023-06-16 08:03:30 +0800455+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
developer8cb3ac72022-07-04 10:55:14 +0800456+ __be32 src_addr, __be16 src_port,
457+ __be32 dest_addr, __be16 dest_port)
458+{
developeree39bcf2023-06-16 08:03:30 +0800459+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800460+ struct mtk_ipv4_tuple *t;
461+
462+ switch (type) {
463+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
464+ if (egress) {
465+ t = &entry->ipv4.new;
466+ break;
467+ }
468+ fallthrough;
469+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
470+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
471+ t = &entry->ipv4.orig;
472+ break;
473+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
474+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
475+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
476+ return 0;
477+ default:
478+ WARN_ON_ONCE(1);
479+ return -EINVAL;
480+ }
481+
482+ t->src_ip = be32_to_cpu(src_addr);
483+ t->dest_ip = be32_to_cpu(dest_addr);
484+
485+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
486+ return 0;
487+
488+ t->src_port = be16_to_cpu(src_port);
489+ t->dest_port = be16_to_cpu(dest_port);
490+
491+ return 0;
492+}
493+
developeree39bcf2023-06-16 08:03:30 +0800494+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +0800495+ __be32 *src_addr, __be16 src_port,
496+ __be32 *dest_addr, __be16 dest_port)
497+{
developeree39bcf2023-06-16 08:03:30 +0800498+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800499+ u32 *src, *dest;
500+ int i;
501+
502+ switch (type) {
503+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
504+ src = entry->dslite.tunnel_src_ip;
505+ dest = entry->dslite.tunnel_dest_ip;
506+ break;
507+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
508+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
509+ entry->ipv6.src_port = be16_to_cpu(src_port);
510+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
511+ fallthrough;
512+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
513+ src = entry->ipv6.src_ip;
514+ dest = entry->ipv6.dest_ip;
515+ break;
516+ default:
517+ WARN_ON_ONCE(1);
518+ return -EINVAL;
519+ }
520+
521+ for (i = 0; i < 4; i++)
522+ src[i] = be32_to_cpu(src_addr[i]);
523+ for (i = 0; i < 4; i++)
524+ dest[i] = be32_to_cpu(dest_addr[i]);
525+
526+ return 0;
527+}
528+
developeree39bcf2023-06-16 08:03:30 +0800529+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
developer8cb3ac72022-07-04 10:55:14 +0800530+{
developeree39bcf2023-06-16 08:03:30 +0800531+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800532+
533+ l2->etype = BIT(port);
534+
developeree39bcf2023-06-16 08:03:30 +0800535+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
536+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800537+ else
538+ l2->etype |= BIT(8);
539+
developeree39bcf2023-06-16 08:03:30 +0800540+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
developer8cb3ac72022-07-04 10:55:14 +0800541+
542+ return 0;
543+}
544+
developeree39bcf2023-06-16 08:03:30 +0800545+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
developer8cb3ac72022-07-04 10:55:14 +0800546+{
developeree39bcf2023-06-16 08:03:30 +0800547+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800548+
developeree39bcf2023-06-16 08:03:30 +0800549+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
developer8cb3ac72022-07-04 10:55:14 +0800550+ case 0:
developeree39bcf2023-06-16 08:03:30 +0800551+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
552+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800553+ l2->vlan1 = vid;
554+ return 0;
555+ case 1:
developeree39bcf2023-06-16 08:03:30 +0800556+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
developer8cb3ac72022-07-04 10:55:14 +0800557+ l2->vlan1 = vid;
558+ l2->etype |= BIT(8);
559+ } else {
560+ l2->vlan2 = vid;
developeree39bcf2023-06-16 08:03:30 +0800561+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800562+ }
563+ return 0;
564+ default:
565+ return -ENOSPC;
566+ }
567+}
568+
developeree39bcf2023-06-16 08:03:30 +0800569+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
developer8cb3ac72022-07-04 10:55:14 +0800570+{
developeree39bcf2023-06-16 08:03:30 +0800571+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800572+
developeree39bcf2023-06-16 08:03:30 +0800573+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
574+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
developer8cb3ac72022-07-04 10:55:14 +0800575+ l2->etype = ETH_P_PPP_SES;
576+
developeree39bcf2023-06-16 08:03:30 +0800577+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
developer8cb3ac72022-07-04 10:55:14 +0800578+ l2->pppoe_id = sid;
579+
580+ return 0;
581+}
582+
583+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
584+{
585+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
586+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
587+}
588+
developeree39bcf2023-06-16 08:03:30 +0800589+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
590+ u16 timestamp)
developer7eb15dc2023-06-14 17:44:03 +0800591+{
developer8cb3ac72022-07-04 10:55:14 +0800592+ struct mtk_foe_entry *hwe;
developeree39bcf2023-06-16 08:03:30 +0800593+ u32 hash;
developer7eb15dc2023-06-14 17:44:03 +0800594+
developeree39bcf2023-06-16 08:03:30 +0800595+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
596+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
597+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
developer7eb15dc2023-06-14 17:44:03 +0800598+
developeree39bcf2023-06-16 08:03:30 +0800599+ hash = mtk_ppe_hash_entry(entry);
600+ hwe = &ppe->foe_table[hash];
601+ if (!mtk_foe_entry_usable(hwe)) {
602+ hwe++;
603+ hash++;
developer7eb15dc2023-06-14 17:44:03 +0800604+
developeree39bcf2023-06-16 08:03:30 +0800605+ if (!mtk_foe_entry_usable(hwe))
606+ return -ENOSPC;
developer7eb15dc2023-06-14 17:44:03 +0800607+ }
608+
developeree39bcf2023-06-16 08:03:30 +0800609+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
developer8cb3ac72022-07-04 10:55:14 +0800610+ wmb();
611+ hwe->ib1 = entry->ib1;
612+
613+ dma_wmb();
614+
615+ mtk_ppe_cache_clear(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800616+
developeree39bcf2023-06-16 08:03:30 +0800617+ return hash;
developer7eb15dc2023-06-14 17:44:03 +0800618+}
619+
developeree39bcf2023-06-16 08:03:30 +0800620+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
621+ int version)
developer7eb15dc2023-06-14 17:44:03 +0800622+{
developeree39bcf2023-06-16 08:03:30 +0800623+ struct mtk_foe_entry *foe;
developer8cb3ac72022-07-04 10:55:14 +0800624+
625+ /* need to allocate a separate device, since it PPE DMA access is
626+ * not coherent.
627+ */
628+ ppe->base = base;
629+ ppe->dev = dev;
developeree39bcf2023-06-16 08:03:30 +0800630+ ppe->version = version;
developer8cb3ac72022-07-04 10:55:14 +0800631+
developeree39bcf2023-06-16 08:03:30 +0800632+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
developer8cb3ac72022-07-04 10:55:14 +0800633+ &ppe->foe_phys, GFP_KERNEL);
634+ if (!foe)
developeree39bcf2023-06-16 08:03:30 +0800635+ return -ENOMEM;
developer8cb3ac72022-07-04 10:55:14 +0800636+
637+ ppe->foe_table = foe;
638+
developeree39bcf2023-06-16 08:03:30 +0800639+ mtk_ppe_debugfs_init(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800640+
developeree39bcf2023-06-16 08:03:30 +0800641+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800642+}
643+
644+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
645+{
646+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
647+ int i, k;
648+
developeree39bcf2023-06-16 08:03:30 +0800649+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
developer8cb3ac72022-07-04 10:55:14 +0800650+
651+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
652+ return;
653+
654+ /* skip all entries that cross the 1024 byte boundary */
developeree39bcf2023-06-16 08:03:30 +0800655+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
656+ for (k = 0; k < ARRAY_SIZE(skip); k++)
657+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
developer8cb3ac72022-07-04 10:55:14 +0800658+}
659+
developeree39bcf2023-06-16 08:03:30 +0800660+int mtk_ppe_start(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +0800661+{
662+ u32 val;
663+
664+ mtk_ppe_init_foe_table(ppe);
665+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
666+
667+ val = MTK_PPE_TB_CFG_ENTRY_80B |
668+ MTK_PPE_TB_CFG_AGE_NON_L4 |
669+ MTK_PPE_TB_CFG_AGE_UNBIND |
670+ MTK_PPE_TB_CFG_AGE_TCP |
671+ MTK_PPE_TB_CFG_AGE_UDP |
672+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
673+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
674+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
675+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
676+ MTK_PPE_KEEPALIVE_DISABLE) |
677+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
678+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
developerb40da332023-10-20 11:13:59 +0800679+ MTK_PPE_SCAN_MODE_CHECK_AGE) |
developer8cb3ac72022-07-04 10:55:14 +0800680+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
681+ MTK_PPE_ENTRIES_SHIFT);
682+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
683+
684+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
685+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
686+
687+ mtk_ppe_cache_enable(ppe, true);
688+
developeree39bcf2023-06-16 08:03:30 +0800689+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
690+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
691+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
developer8cb3ac72022-07-04 10:55:14 +0800692+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
693+ MTK_PPE_FLOW_CFG_IP6_6RD |
694+ MTK_PPE_FLOW_CFG_IP4_NAT |
695+ MTK_PPE_FLOW_CFG_IP4_NAPT |
696+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
developeree39bcf2023-06-16 08:03:30 +0800697+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
developer8cb3ac72022-07-04 10:55:14 +0800698+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
699+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
700+
701+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
702+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
703+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
704+
developeree39bcf2023-06-16 08:03:30 +0800705+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 30) |
developer8cb3ac72022-07-04 10:55:14 +0800706+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
707+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
708+
709+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
developeree39bcf2023-06-16 08:03:30 +0800710+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 30);
developer8cb3ac72022-07-04 10:55:14 +0800711+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
712+
713+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
714+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
715+
716+ val = MTK_PPE_BIND_LIMIT1_FULL |
717+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
718+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
719+
720+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
721+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
722+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
723+
724+ /* enable PPE */
725+ val = MTK_PPE_GLO_CFG_EN |
726+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
727+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
developercbbf1b02023-09-06 10:24:04 +0800728+ MTK_PPE_GLO_CFG_MCAST_TB_EN |
developer8cb3ac72022-07-04 10:55:14 +0800729+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
730+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
731+
732+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
733+
developeree39bcf2023-06-16 08:03:30 +0800734+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800735+}
736+
737+int mtk_ppe_stop(struct mtk_ppe *ppe)
738+{
739+ u32 val;
740+ int i;
741+
developeree39bcf2023-06-16 08:03:30 +0800742+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
743+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
744+ MTK_FOE_STATE_INVALID);
developer8cb3ac72022-07-04 10:55:14 +0800745+
746+ mtk_ppe_cache_enable(ppe, false);
747+
developer8cb3ac72022-07-04 10:55:14 +0800748+ /* disable aging */
749+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
750+ MTK_PPE_TB_CFG_AGE_UNBIND |
751+ MTK_PPE_TB_CFG_AGE_TCP |
752+ MTK_PPE_TB_CFG_AGE_UDP |
developerb40da332023-10-20 11:13:59 +0800753+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
754+ MTK_PPE_TB_CFG_SCAN_MODE;
developer8cb3ac72022-07-04 10:55:14 +0800755+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
756+
developerb40da332023-10-20 11:13:59 +0800757+ if (mtk_ppe_wait_busy(ppe))
758+ return -ETIMEDOUT;
759+
760+ /* disable offload engine */
761+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
762+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
763+
764+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800765+}
766diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
767new file mode 100644
developer58aa0682023-09-18 14:02:26 +0800768index 0000000..242fb8f
developer8cb3ac72022-07-04 10:55:14 +0800769--- /dev/null
770+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
developeree39bcf2023-06-16 08:03:30 +0800771@@ -0,0 +1,288 @@
developer8cb3ac72022-07-04 10:55:14 +0800772+// SPDX-License-Identifier: GPL-2.0-only
773+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
774+
775+#ifndef __MTK_PPE_H
776+#define __MTK_PPE_H
777+
778+#include <linux/kernel.h>
779+#include <linux/bitfield.h>
developeree39bcf2023-06-16 08:03:30 +0800780+
781+#define MTK_ETH_PPE_BASE 0xc00
developer8cb3ac72022-07-04 10:55:14 +0800782+
783+#define MTK_PPE_ENTRIES_SHIFT 3
784+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
785+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
786+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
787+
788+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
789+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
790+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
791+
792+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
793+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
794+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
795+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
796+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
797+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
798+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
799+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
800+#define MTK_FOE_IB1_BIND_TTL BIT(24)
801+
802+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
803+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
804+#define MTK_FOE_IB1_UDP BIT(30)
805+#define MTK_FOE_IB1_STATIC BIT(31)
806+
807+enum {
808+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
809+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
810+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
811+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
812+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
813+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
814+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
815+};
816+
817+#define MTK_FOE_IB2_QID GENMASK(3, 0)
818+#define MTK_FOE_IB2_PSE_QOS BIT(4)
819+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
820+#define MTK_FOE_IB2_MULTICAST BIT(8)
821+
developeree39bcf2023-06-16 08:03:30 +0800822+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
823+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
824+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
developer8cb3ac72022-07-04 10:55:14 +0800825+
826+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
827+
828+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
829+
830+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
831+
developeree39bcf2023-06-16 08:03:30 +0800832+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
833+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
834+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
developer8cb3ac72022-07-04 10:55:14 +0800835+
836+enum {
837+ MTK_FOE_STATE_INVALID,
838+ MTK_FOE_STATE_UNBIND,
839+ MTK_FOE_STATE_BIND,
840+ MTK_FOE_STATE_FIN
841+};
842+
843+struct mtk_foe_mac_info {
844+ u16 vlan1;
845+ u16 etype;
846+
847+ u32 dest_mac_hi;
848+
849+ u16 vlan2;
850+ u16 dest_mac_lo;
851+
852+ u32 src_mac_hi;
853+
854+ u16 pppoe_id;
855+ u16 src_mac_lo;
856+};
857+
858+struct mtk_foe_bridge {
developeree39bcf2023-06-16 08:03:30 +0800859+ u32 dest_mac_hi;
860+
861+ u16 src_mac_lo;
862+ u16 dest_mac_lo;
developer8cb3ac72022-07-04 10:55:14 +0800863+
developeree39bcf2023-06-16 08:03:30 +0800864+ u32 src_mac_hi;
developer8cb3ac72022-07-04 10:55:14 +0800865+
866+ u32 ib2;
867+
developeree39bcf2023-06-16 08:03:30 +0800868+ u32 _rsv[5];
869+
870+ u32 udf_tsid;
developer8cb3ac72022-07-04 10:55:14 +0800871+ struct mtk_foe_mac_info l2;
872+};
873+
874+struct mtk_ipv4_tuple {
875+ u32 src_ip;
876+ u32 dest_ip;
877+ union {
878+ struct {
879+ u16 dest_port;
880+ u16 src_port;
881+ };
882+ struct {
883+ u8 protocol;
884+ u8 _pad[3]; /* fill with 0xa5a5a5 */
885+ };
886+ u32 ports;
887+ };
888+};
889+
890+struct mtk_foe_ipv4 {
891+ struct mtk_ipv4_tuple orig;
892+
893+ u32 ib2;
894+
895+ struct mtk_ipv4_tuple new;
896+
897+ u16 timestamp;
898+ u16 _rsv0[3];
899+
900+ u32 udf_tsid;
901+
902+ struct mtk_foe_mac_info l2;
903+};
904+
905+struct mtk_foe_ipv4_dslite {
906+ struct mtk_ipv4_tuple ip4;
907+
908+ u32 tunnel_src_ip[4];
909+ u32 tunnel_dest_ip[4];
910+
911+ u8 flow_label[3];
912+ u8 priority;
913+
914+ u32 udf_tsid;
915+
916+ u32 ib2;
917+
918+ struct mtk_foe_mac_info l2;
919+};
920+
921+struct mtk_foe_ipv6 {
922+ u32 src_ip[4];
923+ u32 dest_ip[4];
924+
925+ union {
926+ struct {
927+ u8 protocol;
928+ u8 _pad[3]; /* fill with 0xa5a5a5 */
929+ }; /* 3-tuple */
930+ struct {
931+ u16 dest_port;
932+ u16 src_port;
933+ }; /* 5-tuple */
934+ u32 ports;
935+ };
936+
937+ u32 _rsv[3];
938+
939+ u32 udf;
940+
941+ u32 ib2;
942+ struct mtk_foe_mac_info l2;
943+};
944+
945+struct mtk_foe_ipv6_6rd {
946+ u32 src_ip[4];
947+ u32 dest_ip[4];
948+ u16 dest_port;
949+ u16 src_port;
950+
951+ u32 tunnel_src_ip;
952+ u32 tunnel_dest_ip;
953+
954+ u16 hdr_csum;
955+ u8 dscp;
956+ u8 ttl;
957+
958+ u8 flag;
959+ u8 pad;
960+ u8 per_flow_6rd_id;
961+ u8 pad2;
962+
963+ u32 ib2;
964+ struct mtk_foe_mac_info l2;
965+};
966+
967+struct mtk_foe_entry {
968+ u32 ib1;
969+
970+ union {
971+ struct mtk_foe_bridge bridge;
972+ struct mtk_foe_ipv4 ipv4;
973+ struct mtk_foe_ipv4_dslite dslite;
974+ struct mtk_foe_ipv6 ipv6;
975+ struct mtk_foe_ipv6_6rd ipv6_6rd;
developeree39bcf2023-06-16 08:03:30 +0800976+ u32 data[19];
developer8cb3ac72022-07-04 10:55:14 +0800977+ };
978+};
979+
980+enum {
981+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
982+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
983+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
984+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
985+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
986+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
987+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
988+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
989+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
990+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
991+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
992+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
993+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
994+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
995+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
996+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
997+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
998+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
999+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
1000+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
1001+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
1002+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
1003+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
1004+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
1005+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
1006+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
1007+};
1008+
1009+struct mtk_ppe {
1010+ struct device *dev;
1011+ void __iomem *base;
1012+ int version;
1013+
developeree39bcf2023-06-16 08:03:30 +08001014+ struct mtk_foe_entry *foe_table;
developer8cb3ac72022-07-04 10:55:14 +08001015+ dma_addr_t foe_phys;
1016+
1017+ void *acct_table;
1018+};
1019+
developeree39bcf2023-06-16 08:03:30 +08001020+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
1021+ int version);
1022+int mtk_ppe_start(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001023+int mtk_ppe_stop(struct mtk_ppe *ppe);
1024+
1025+static inline void
developeree39bcf2023-06-16 08:03:30 +08001026+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
developer8cb3ac72022-07-04 10:55:14 +08001027+{
developeree39bcf2023-06-16 08:03:30 +08001028+ ppe->foe_table[hash].ib1 = 0;
1029+ dma_wmb();
1030+}
developer8cb3ac72022-07-04 10:55:14 +08001031+
developeree39bcf2023-06-16 08:03:30 +08001032+static inline int
1033+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
1034+{
1035+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
developer8cb3ac72022-07-04 10:55:14 +08001036+
developeree39bcf2023-06-16 08:03:30 +08001037+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
1038+ return -1;
developer7eb15dc2023-06-14 17:44:03 +08001039+
developeree39bcf2023-06-16 08:03:30 +08001040+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
developer8cb3ac72022-07-04 10:55:14 +08001041+}
1042+
developeree39bcf2023-06-16 08:03:30 +08001043+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
1044+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
1045+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
1046+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
developer8cb3ac72022-07-04 10:55:14 +08001047+ __be32 src_addr, __be16 src_port,
1048+ __be32 dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001049+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +08001050+ __be32 *src_addr, __be16 src_port,
1051+ __be32 *dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001052+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
1053+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
1054+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
1055+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1056+ u16 timestamp);
1057+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001058+
1059+#endif
1060diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1061new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001062index 0000000..d4b4823
developer8cb3ac72022-07-04 10:55:14 +08001063--- /dev/null
1064+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
developeree39bcf2023-06-16 08:03:30 +08001065@@ -0,0 +1,214 @@
developer8cb3ac72022-07-04 10:55:14 +08001066+// SPDX-License-Identifier: GPL-2.0-only
1067+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1068+
1069+#include <linux/kernel.h>
1070+#include <linux/debugfs.h>
1071+#include "mtk_eth_soc.h"
1072+
1073+struct mtk_flow_addr_info
1074+{
1075+ void *src, *dest;
1076+ u16 *src_port, *dest_port;
1077+ bool ipv6;
1078+};
1079+
1080+static const char *mtk_foe_entry_state_str(int state)
1081+{
1082+ static const char * const state_str[] = {
1083+ [MTK_FOE_STATE_INVALID] = "INV",
1084+ [MTK_FOE_STATE_UNBIND] = "UNB",
1085+ [MTK_FOE_STATE_BIND] = "BND",
1086+ [MTK_FOE_STATE_FIN] = "FIN",
1087+ };
1088+
1089+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1090+ return "UNK";
1091+
1092+ return state_str[state];
1093+}
1094+
1095+static const char *mtk_foe_pkt_type_str(int type)
1096+{
1097+ static const char * const type_str[] = {
1098+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1099+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
developeree39bcf2023-06-16 08:03:30 +08001100+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
developer8cb3ac72022-07-04 10:55:14 +08001101+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
1102+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
1103+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
1104+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
1105+ };
1106+
1107+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
1108+ return "UNKNOWN";
1109+
1110+ return type_str[type];
1111+}
1112+
1113+static void
1114+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
1115+{
1116+ u32 n_addr[4];
1117+ int i;
1118+
1119+ if (!ipv6) {
1120+ seq_printf(m, "%pI4h", addr);
1121+ return;
1122+ }
1123+
1124+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
1125+ n_addr[i] = htonl(addr[i]);
1126+ seq_printf(m, "%pI6", n_addr);
1127+}
1128+
1129+static void
1130+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
1131+{
1132+ mtk_print_addr(m, ai->src, ai->ipv6);
1133+ if (ai->src_port)
1134+ seq_printf(m, ":%d", *ai->src_port);
1135+ seq_printf(m, "->");
1136+ mtk_print_addr(m, ai->dest, ai->ipv6);
1137+ if (ai->dest_port)
1138+ seq_printf(m, ":%d", *ai->dest_port);
1139+}
1140+
1141+static int
1142+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
1143+{
1144+ struct mtk_ppe *ppe = m->private;
1145+ int i;
1146+
1147+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
developeree39bcf2023-06-16 08:03:30 +08001148+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
developer8cb3ac72022-07-04 10:55:14 +08001149+ struct mtk_foe_mac_info *l2;
1150+ struct mtk_flow_addr_info ai = {};
1151+ unsigned char h_source[ETH_ALEN];
1152+ unsigned char h_dest[ETH_ALEN];
1153+ int type, state;
1154+ u32 ib2;
1155+
1156+
1157+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
1158+ if (!state)
1159+ continue;
1160+
1161+ if (bind && state != MTK_FOE_STATE_BIND)
1162+ continue;
1163+
1164+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
1165+ seq_printf(m, "%05x %s %7s", i,
1166+ mtk_foe_entry_state_str(state),
1167+ mtk_foe_pkt_type_str(type));
1168+
1169+ switch (type) {
1170+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1171+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1172+ ai.src_port = &entry->ipv4.orig.src_port;
1173+ ai.dest_port = &entry->ipv4.orig.dest_port;
1174+ fallthrough;
1175+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1176+ ai.src = &entry->ipv4.orig.src_ip;
1177+ ai.dest = &entry->ipv4.orig.dest_ip;
1178+ break;
1179+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
1180+ ai.src_port = &entry->ipv6.src_port;
1181+ ai.dest_port = &entry->ipv6.dest_port;
1182+ fallthrough;
1183+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
1184+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
1185+ ai.src = &entry->ipv6.src_ip;
1186+ ai.dest = &entry->ipv6.dest_ip;
1187+ ai.ipv6 = true;
1188+ break;
1189+ }
1190+
1191+ seq_printf(m, " orig=");
1192+ mtk_print_addr_info(m, &ai);
1193+
1194+ switch (type) {
1195+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1196+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1197+ ai.src_port = &entry->ipv4.new.src_port;
1198+ ai.dest_port = &entry->ipv4.new.dest_port;
1199+ fallthrough;
1200+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1201+ ai.src = &entry->ipv4.new.src_ip;
1202+ ai.dest = &entry->ipv4.new.dest_ip;
1203+ seq_printf(m, " new=");
1204+ mtk_print_addr_info(m, &ai);
1205+ break;
1206+ }
1207+
1208+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
1209+ l2 = &entry->ipv6.l2;
1210+ ib2 = entry->ipv6.ib2;
1211+ } else {
1212+ l2 = &entry->ipv4.l2;
1213+ ib2 = entry->ipv4.ib2;
1214+ }
1215+
1216+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
1217+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
1218+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
1219+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
1220+
1221+ seq_printf(m, " eth=%pM->%pM etype=%04x"
developeree39bcf2023-06-16 08:03:30 +08001222+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
developer8cb3ac72022-07-04 10:55:14 +08001223+ h_source, h_dest, ntohs(l2->etype),
developeree39bcf2023-06-16 08:03:30 +08001224+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
developer8cb3ac72022-07-04 10:55:14 +08001225+ }
1226+
1227+ return 0;
1228+}
1229+
1230+static int
1231+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
1232+{
1233+ return mtk_ppe_debugfs_foe_show(m, private, false);
1234+}
1235+
1236+static int
1237+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
1238+{
1239+ return mtk_ppe_debugfs_foe_show(m, private, true);
1240+}
1241+
1242+static int
1243+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
1244+{
1245+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
1246+ inode->i_private);
1247+}
1248+
1249+static int
1250+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
1251+{
1252+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
1253+ inode->i_private);
1254+}
1255+
developeree39bcf2023-06-16 08:03:30 +08001256+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +08001257+{
1258+ static const struct file_operations fops_all = {
1259+ .open = mtk_ppe_debugfs_foe_open_all,
1260+ .read = seq_read,
1261+ .llseek = seq_lseek,
1262+ .release = single_release,
1263+ };
developeree39bcf2023-06-16 08:03:30 +08001264+
developer8cb3ac72022-07-04 10:55:14 +08001265+ static const struct file_operations fops_bind = {
1266+ .open = mtk_ppe_debugfs_foe_open_bind,
1267+ .read = seq_read,
1268+ .llseek = seq_lseek,
1269+ .release = single_release,
1270+ };
developer7eb15dc2023-06-14 17:44:03 +08001271+
developeree39bcf2023-06-16 08:03:30 +08001272+ struct dentry *root;
developer7eb15dc2023-06-14 17:44:03 +08001273+
developeree39bcf2023-06-16 08:03:30 +08001274+ root = debugfs_create_dir("mtk_ppe", NULL);
developer8cb3ac72022-07-04 10:55:14 +08001275+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
1276+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
1277+
1278+ return 0;
1279+}
1280diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1281new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001282index 0000000..1380ef0
developer8cb3ac72022-07-04 10:55:14 +08001283--- /dev/null
1284+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
developeree39bcf2023-06-16 08:03:30 +08001285@@ -0,0 +1,535 @@
developer8cb3ac72022-07-04 10:55:14 +08001286+// SPDX-License-Identifier: GPL-2.0-only
1287+/*
1288+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
1289+ */
1290+
1291+#include <linux/if_ether.h>
1292+#include <linux/rhashtable.h>
1293+#include <linux/ip.h>
1294+#include <linux/ipv6.h>
1295+#include <net/flow_offload.h>
1296+#include <net/pkt_cls.h>
1297+#include <net/dsa.h>
1298+#include "mtk_eth_soc.h"
1299+
1300+struct mtk_flow_data {
1301+ struct ethhdr eth;
1302+
1303+ union {
1304+ struct {
1305+ __be32 src_addr;
1306+ __be32 dst_addr;
1307+ } v4;
1308+
1309+ struct {
1310+ struct in6_addr src_addr;
1311+ struct in6_addr dst_addr;
1312+ } v6;
1313+ };
1314+
1315+ __be16 src_port;
1316+ __be16 dst_port;
1317+
1318+ struct {
1319+ u16 id;
1320+ __be16 proto;
1321+ u8 num;
1322+ } vlan;
1323+ struct {
1324+ u16 sid;
1325+ u8 num;
1326+ } pppoe;
1327+};
1328+
developeree39bcf2023-06-16 08:03:30 +08001329+struct mtk_flow_entry {
1330+ struct rhash_head node;
1331+ unsigned long cookie;
1332+ u16 hash;
1333+};
1334+
developer8cb3ac72022-07-04 10:55:14 +08001335+static const struct rhashtable_params mtk_flow_ht_params = {
1336+ .head_offset = offsetof(struct mtk_flow_entry, node),
1337+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
1338+ .key_len = sizeof(unsigned long),
1339+ .automatic_shrinking = true,
1340+};
1341+
developeree39bcf2023-06-16 08:03:30 +08001342+static u32
1343+mtk_eth_timestamp(struct mtk_eth *eth)
1344+{
1345+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
1346+}
1347+
developer8cb3ac72022-07-04 10:55:14 +08001348+static int
developeree39bcf2023-06-16 08:03:30 +08001349+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
1350+ bool egress)
developer8cb3ac72022-07-04 10:55:14 +08001351+{
developeree39bcf2023-06-16 08:03:30 +08001352+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
developer8cb3ac72022-07-04 10:55:14 +08001353+ data->v4.src_addr, data->src_port,
1354+ data->v4.dst_addr, data->dst_port);
1355+}
1356+
1357+static int
developeree39bcf2023-06-16 08:03:30 +08001358+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
developer8cb3ac72022-07-04 10:55:14 +08001359+{
developeree39bcf2023-06-16 08:03:30 +08001360+ return mtk_foe_entry_set_ipv6_tuple(foe,
developer8cb3ac72022-07-04 10:55:14 +08001361+ data->v6.src_addr.s6_addr32, data->src_port,
1362+ data->v6.dst_addr.s6_addr32, data->dst_port);
1363+}
1364+
1365+static void
1366+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
1367+{
1368+ void *dest = eth + act->mangle.offset;
1369+ const void *src = &act->mangle.val;
1370+
1371+ if (act->mangle.offset > 8)
1372+ return;
1373+
1374+ if (act->mangle.mask == 0xffff) {
1375+ src += 2;
1376+ dest += 2;
1377+ }
1378+
1379+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
1380+}
1381+
developeree39bcf2023-06-16 08:03:30 +08001382+
developer8cb3ac72022-07-04 10:55:14 +08001383+static int
1384+mtk_flow_mangle_ports(const struct flow_action_entry *act,
1385+ struct mtk_flow_data *data)
1386+{
1387+ u32 val = ntohl(act->mangle.val);
1388+
1389+ switch (act->mangle.offset) {
1390+ case 0:
1391+ if (act->mangle.mask == ~htonl(0xffff))
1392+ data->dst_port = cpu_to_be16(val);
1393+ else
1394+ data->src_port = cpu_to_be16(val >> 16);
1395+ break;
1396+ case 2:
1397+ data->dst_port = cpu_to_be16(val);
1398+ break;
1399+ default:
1400+ return -EINVAL;
1401+ }
1402+
1403+ return 0;
1404+}
1405+
1406+static int
1407+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
1408+ struct mtk_flow_data *data)
1409+{
1410+ __be32 *dest;
1411+
1412+ switch (act->mangle.offset) {
1413+ case offsetof(struct iphdr, saddr):
1414+ dest = &data->v4.src_addr;
1415+ break;
1416+ case offsetof(struct iphdr, daddr):
1417+ dest = &data->v4.dst_addr;
1418+ break;
1419+ default:
1420+ return -EINVAL;
1421+ }
1422+
1423+ memcpy(dest, &act->mangle.val, sizeof(u32));
1424+
1425+ return 0;
1426+}
1427+
1428+static int
1429+mtk_flow_get_dsa_port(struct net_device **dev)
1430+{
1431+#if IS_ENABLED(CONFIG_NET_DSA)
1432+ struct dsa_port *dp;
1433+
1434+ dp = dsa_port_from_netdev(*dev);
1435+ if (IS_ERR(dp))
1436+ return -ENODEV;
1437+
1438+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
1439+ return -ENODEV;
1440+
1441+ *dev = dp->cpu_dp->master;
1442+
1443+ return dp->index;
1444+#else
1445+ return -ENODEV;
1446+#endif
1447+}
1448+
1449+static int
1450+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
developeree39bcf2023-06-16 08:03:30 +08001451+ struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08001452+{
developeree39bcf2023-06-16 08:03:30 +08001453+ int pse_port, dsa_port;
developer8cb3ac72022-07-04 10:55:14 +08001454+
1455+ dsa_port = mtk_flow_get_dsa_port(&dev);
developeree39bcf2023-06-16 08:03:30 +08001456+ if (dsa_port >= 0)
1457+ mtk_foe_entry_set_dsa(foe, dsa_port);
developer8cb3ac72022-07-04 10:55:14 +08001458+
1459+ if (dev == eth->netdev[0])
developeree39bcf2023-06-16 08:03:30 +08001460+ pse_port = PSE_GDM1_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001461+ else if (dev == eth->netdev[1])
developeree39bcf2023-06-16 08:03:30 +08001462+ pse_port = PSE_GDM2_PORT;
1463+ else
1464+ return -EOPNOTSUPP;
developer7eb15dc2023-06-14 17:44:03 +08001465+
developeree39bcf2023-06-16 08:03:30 +08001466+ mtk_foe_entry_set_pse_port(foe, pse_port);
developer8cb3ac72022-07-04 10:55:14 +08001467+
1468+ return 0;
1469+}
1470+
1471+static int
developeree39bcf2023-06-16 08:03:30 +08001472+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
developer8cb3ac72022-07-04 10:55:14 +08001473+{
1474+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1475+ struct flow_action_entry *act;
1476+ struct mtk_flow_data data = {};
1477+ struct mtk_foe_entry foe;
1478+ struct net_device *odev = NULL;
1479+ struct mtk_flow_entry *entry;
1480+ int offload_type = 0;
1481+ u16 addr_type = 0;
developeree39bcf2023-06-16 08:03:30 +08001482+ u32 timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001483+ u8 l4proto = 0;
1484+ int err = 0;
developeree39bcf2023-06-16 08:03:30 +08001485+ int hash;
developer8cb3ac72022-07-04 10:55:14 +08001486+ int i;
1487+
1488+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
1489+ return -EEXIST;
1490+
1491+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1492+ struct flow_match_meta match;
1493+
1494+ flow_rule_match_meta(rule, &match);
1495+ } else {
1496+ return -EOPNOTSUPP;
1497+ }
1498+
1499+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1500+ struct flow_match_control match;
1501+
1502+ flow_rule_match_control(rule, &match);
1503+ addr_type = match.key->addr_type;
1504+ } else {
1505+ return -EOPNOTSUPP;
1506+ }
1507+
1508+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1509+ struct flow_match_basic match;
1510+
1511+ flow_rule_match_basic(rule, &match);
1512+ l4proto = match.key->ip_proto;
1513+ } else {
1514+ return -EOPNOTSUPP;
1515+ }
1516+
1517+ flow_action_for_each(i, act, &rule->action) {
1518+ switch (act->id) {
1519+ case FLOW_ACTION_MANGLE:
1520+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
1521+ mtk_flow_offload_mangle_eth(act, &data.eth);
1522+ break;
1523+ case FLOW_ACTION_REDIRECT:
1524+ odev = act->dev;
1525+ break;
1526+ case FLOW_ACTION_CSUM:
1527+ break;
1528+ case FLOW_ACTION_VLAN_PUSH:
1529+ if (data.vlan.num == 1 ||
1530+ act->vlan.proto != htons(ETH_P_8021Q))
1531+ return -EOPNOTSUPP;
1532+
1533+ data.vlan.id = act->vlan.vid;
1534+ data.vlan.proto = act->vlan.proto;
1535+ data.vlan.num++;
1536+ break;
1537+ case FLOW_ACTION_VLAN_POP:
1538+ break;
1539+ case FLOW_ACTION_PPPOE_PUSH:
1540+ if (data.pppoe.num == 1)
1541+ return -EOPNOTSUPP;
1542+
1543+ data.pppoe.sid = act->pppoe.sid;
1544+ data.pppoe.num++;
1545+ break;
1546+ default:
1547+ return -EOPNOTSUPP;
1548+ }
1549+ }
1550+
developeree39bcf2023-06-16 08:03:30 +08001551+ switch (addr_type) {
1552+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1553+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
1554+ break;
1555+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1556+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
1557+ break;
1558+ default:
1559+ return -EOPNOTSUPP;
1560+ }
1561+
developer8cb3ac72022-07-04 10:55:14 +08001562+ if (!is_valid_ether_addr(data.eth.h_source) ||
1563+ !is_valid_ether_addr(data.eth.h_dest))
1564+ return -EINVAL;
1565+
developeree39bcf2023-06-16 08:03:30 +08001566+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
1567+ data.eth.h_source,
1568+ data.eth.h_dest);
developer8cb3ac72022-07-04 10:55:14 +08001569+ if (err)
1570+ return err;
1571+
1572+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1573+ struct flow_match_ports ports;
1574+
1575+ flow_rule_match_ports(rule, &ports);
1576+ data.src_port = ports.key->src;
1577+ data.dst_port = ports.key->dst;
developeree39bcf2023-06-16 08:03:30 +08001578+ } else {
developer8cb3ac72022-07-04 10:55:14 +08001579+ return -EOPNOTSUPP;
1580+ }
1581+
1582+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1583+ struct flow_match_ipv4_addrs addrs;
1584+
1585+ flow_rule_match_ipv4_addrs(rule, &addrs);
1586+
1587+ data.v4.src_addr = addrs.key->src;
1588+ data.v4.dst_addr = addrs.key->dst;
1589+
developeree39bcf2023-06-16 08:03:30 +08001590+ mtk_flow_set_ipv4_addr(&foe, &data, false);
developer8cb3ac72022-07-04 10:55:14 +08001591+ }
1592+
1593+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1594+ struct flow_match_ipv6_addrs addrs;
1595+
1596+ flow_rule_match_ipv6_addrs(rule, &addrs);
1597+
1598+ data.v6.src_addr = addrs.key->src;
1599+ data.v6.dst_addr = addrs.key->dst;
1600+
developeree39bcf2023-06-16 08:03:30 +08001601+ mtk_flow_set_ipv6_addr(&foe, &data);
developer8cb3ac72022-07-04 10:55:14 +08001602+ }
1603+
1604+ flow_action_for_each(i, act, &rule->action) {
1605+ if (act->id != FLOW_ACTION_MANGLE)
1606+ continue;
1607+
1608+ switch (act->mangle.htype) {
1609+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1610+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1611+ err = mtk_flow_mangle_ports(act, &data);
1612+ break;
1613+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1614+ err = mtk_flow_mangle_ipv4(act, &data);
1615+ break;
1616+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1617+ /* handled earlier */
1618+ break;
1619+ default:
1620+ return -EOPNOTSUPP;
1621+ }
1622+
1623+ if (err)
1624+ return err;
1625+ }
1626+
1627+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
developeree39bcf2023-06-16 08:03:30 +08001628+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
developer8cb3ac72022-07-04 10:55:14 +08001629+ if (err)
1630+ return err;
1631+ }
1632+
1633+ if (data.vlan.num == 1) {
1634+ if (data.vlan.proto != htons(ETH_P_8021Q))
1635+ return -EOPNOTSUPP;
1636+
developeree39bcf2023-06-16 08:03:30 +08001637+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
developer8cb3ac72022-07-04 10:55:14 +08001638+ }
1639+ if (data.pppoe.num == 1)
developeree39bcf2023-06-16 08:03:30 +08001640+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
developer8cb3ac72022-07-04 10:55:14 +08001641+
developeree39bcf2023-06-16 08:03:30 +08001642+ err = mtk_flow_set_output_device(eth, &foe, odev);
developer8cb3ac72022-07-04 10:55:14 +08001643+ if (err)
1644+ return err;
1645+
1646+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1647+ if (!entry)
1648+ return -ENOMEM;
1649+
1650+ entry->cookie = f->cookie;
developeree39bcf2023-06-16 08:03:30 +08001651+ timestamp = mtk_eth_timestamp(eth);
1652+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
1653+ if (hash < 0) {
1654+ err = hash;
developer8cb3ac72022-07-04 10:55:14 +08001655+ goto free;
developeree39bcf2023-06-16 08:03:30 +08001656+ }
developer8cb3ac72022-07-04 10:55:14 +08001657+
developeree39bcf2023-06-16 08:03:30 +08001658+ entry->hash = hash;
developer8cb3ac72022-07-04 10:55:14 +08001659+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
1660+ mtk_flow_ht_params);
1661+ if (err < 0)
developeree39bcf2023-06-16 08:03:30 +08001662+ goto clear_flow;
developer8cb3ac72022-07-04 10:55:14 +08001663+
1664+ return 0;
developeree39bcf2023-06-16 08:03:30 +08001665+clear_flow:
1666+ mtk_foe_entry_clear(&eth->ppe, hash);
developer8cb3ac72022-07-04 10:55:14 +08001667+free:
1668+ kfree(entry);
1669+ return err;
1670+}
1671+
1672+static int
1673+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
1674+{
1675+ struct mtk_flow_entry *entry;
1676+
1677+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1678+ mtk_flow_ht_params);
1679+ if (!entry)
1680+ return -ENOENT;
1681+
developeree39bcf2023-06-16 08:03:30 +08001682+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
developer8cb3ac72022-07-04 10:55:14 +08001683+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
1684+ mtk_flow_ht_params);
1685+ kfree(entry);
1686+
1687+ return 0;
1688+}
1689+
1690+static int
1691+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
1692+{
1693+ struct mtk_flow_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08001694+ int timestamp;
1695+ u32 idle;
developer8cb3ac72022-07-04 10:55:14 +08001696+
1697+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1698+ mtk_flow_ht_params);
1699+ if (!entry)
1700+ return -ENOENT;
1701+
developeree39bcf2023-06-16 08:03:30 +08001702+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
1703+ if (timestamp < 0)
1704+ return -ETIMEDOUT;
1705+
1706+ idle = mtk_eth_timestamp(eth) - timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001707+ f->stats.lastused = jiffies - idle * HZ;
1708+
1709+ return 0;
1710+}
1711+
1712+static DEFINE_MUTEX(mtk_flow_offload_mutex);
1713+
developeree39bcf2023-06-16 08:03:30 +08001714+static int
1715+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
developer8cb3ac72022-07-04 10:55:14 +08001716+{
developeree39bcf2023-06-16 08:03:30 +08001717+ struct flow_cls_offload *cls = type_data;
1718+ struct net_device *dev = cb_priv;
1719+ struct mtk_mac *mac = netdev_priv(dev);
1720+ struct mtk_eth *eth = mac->hw;
developer8cb3ac72022-07-04 10:55:14 +08001721+ int err;
1722+
developeree39bcf2023-06-16 08:03:30 +08001723+ if (!tc_can_offload(dev))
1724+ return -EOPNOTSUPP;
1725+
1726+ if (type != TC_SETUP_CLSFLOWER)
1727+ return -EOPNOTSUPP;
1728+
developer8cb3ac72022-07-04 10:55:14 +08001729+ mutex_lock(&mtk_flow_offload_mutex);
1730+ switch (cls->command) {
1731+ case FLOW_CLS_REPLACE:
developeree39bcf2023-06-16 08:03:30 +08001732+ err = mtk_flow_offload_replace(eth, cls);
developer8cb3ac72022-07-04 10:55:14 +08001733+ break;
1734+ case FLOW_CLS_DESTROY:
1735+ err = mtk_flow_offload_destroy(eth, cls);
1736+ break;
1737+ case FLOW_CLS_STATS:
1738+ err = mtk_flow_offload_stats(eth, cls);
1739+ break;
1740+ default:
1741+ err = -EOPNOTSUPP;
1742+ break;
1743+ }
1744+ mutex_unlock(&mtk_flow_offload_mutex);
1745+
1746+ return err;
1747+}
1748+
1749+static int
1750+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
1751+{
1752+ struct mtk_mac *mac = netdev_priv(dev);
1753+ struct mtk_eth *eth = mac->hw;
1754+ static LIST_HEAD(block_cb_list);
1755+ struct flow_block_cb *block_cb;
1756+ flow_setup_cb_t *cb;
developeree39bcf2023-06-16 08:03:30 +08001757+ int err = 0;
developer207b39d2022-10-07 15:57:16 +08001758+
developeree39bcf2023-06-16 08:03:30 +08001759+ if (!eth->ppe.foe_table)
developer8cb3ac72022-07-04 10:55:14 +08001760+ return -EOPNOTSUPP;
1761+
1762+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1763+ return -EOPNOTSUPP;
1764+
1765+ cb = mtk_eth_setup_tc_block_cb;
1766+ f->driver_block_list = &block_cb_list;
1767+
1768+ switch (f->command) {
1769+ case FLOW_BLOCK_BIND:
1770+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1771+ if (block_cb) {
1772+ flow_block_cb_incref(block_cb);
developeree39bcf2023-06-16 08:03:30 +08001773+ goto unlock;
developer8cb3ac72022-07-04 10:55:14 +08001774+ }
1775+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
developeree39bcf2023-06-16 08:03:30 +08001776+ if (IS_ERR(block_cb)) {
1777+ err = PTR_ERR(block_cb);
1778+ goto unlock;
1779+ }
developer8cb3ac72022-07-04 10:55:14 +08001780+
1781+ flow_block_cb_add(block_cb, f);
1782+ list_add_tail(&block_cb->driver_list, &block_cb_list);
developeree39bcf2023-06-16 08:03:30 +08001783+ break;
developer8cb3ac72022-07-04 10:55:14 +08001784+ case FLOW_BLOCK_UNBIND:
1785+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
developeree39bcf2023-06-16 08:03:30 +08001786+ if (!block_cb) {
1787+ err = -ENOENT;
1788+ goto unlock;
1789+ }
developer8cb3ac72022-07-04 10:55:14 +08001790+
developeree39bcf2023-06-16 08:03:30 +08001791+ if (flow_block_cb_decref(block_cb)) {
developer8cb3ac72022-07-04 10:55:14 +08001792+ flow_block_cb_remove(block_cb, f);
1793+ list_del(&block_cb->driver_list);
1794+ }
developeree39bcf2023-06-16 08:03:30 +08001795+ break;
developer8cb3ac72022-07-04 10:55:14 +08001796+ default:
developeree39bcf2023-06-16 08:03:30 +08001797+ err = -EOPNOTSUPP;
1798+ break;
developer8cb3ac72022-07-04 10:55:14 +08001799+ }
developeree39bcf2023-06-16 08:03:30 +08001800+
1801+unlock:
1802+ return err;
developer8cb3ac72022-07-04 10:55:14 +08001803+}
1804+
1805+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
1806+ void *type_data)
1807+{
developeree39bcf2023-06-16 08:03:30 +08001808+ if (type == TC_SETUP_FT)
developer8cb3ac72022-07-04 10:55:14 +08001809+ return mtk_eth_setup_tc_block(dev, type_data);
developeree39bcf2023-06-16 08:03:30 +08001810+
1811+ return -EOPNOTSUPP;
developer8cb3ac72022-07-04 10:55:14 +08001812+}
1813+
1814+int mtk_eth_offload_init(struct mtk_eth *eth)
1815+{
developeree39bcf2023-06-16 08:03:30 +08001816+ if (!eth->ppe.foe_table)
1817+ return 0;
1818+
developer8cb3ac72022-07-04 10:55:14 +08001819+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
1820+}
1821diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1822new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001823index 0000000..0c45ea0
developer8cb3ac72022-07-04 10:55:14 +08001824--- /dev/null
1825+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
developeree39bcf2023-06-16 08:03:30 +08001826@@ -0,0 +1,144 @@
developer8cb3ac72022-07-04 10:55:14 +08001827+// SPDX-License-Identifier: GPL-2.0-only
1828+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1829+
1830+#ifndef __MTK_PPE_REGS_H
1831+#define __MTK_PPE_REGS_H
1832+
1833+#define MTK_PPE_GLO_CFG 0x200
1834+#define MTK_PPE_GLO_CFG_EN BIT(0)
1835+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
1836+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
1837+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
1838+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
1839+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
1840+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
1841+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
1842+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
1843+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
1844+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
1845+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
1846+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
1847+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
1848+
1849+#define MTK_PPE_FLOW_CFG 0x204
1850+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
1851+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
1852+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
1853+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
1854+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
1855+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
1856+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
1857+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
1858+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
1859+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
1860+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
1861+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
1862+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
1863+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
1864+
1865+#define MTK_PPE_IP_PROTO_CHK 0x208
1866+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
1867+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
1868+
1869+#define MTK_PPE_TB_CFG 0x21c
1870+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
1871+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
1872+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
1873+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
1874+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
1875+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
1876+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
1877+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
1878+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
1879+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
1880+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
1881+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
1882+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
1883+
1884+enum {
1885+ MTK_PPE_SCAN_MODE_DISABLED,
1886+ MTK_PPE_SCAN_MODE_CHECK_AGE,
1887+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
1888+};
1889+
1890+enum {
1891+ MTK_PPE_KEEPALIVE_DISABLE,
1892+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
1893+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
1894+};
1895+
1896+enum {
1897+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
1898+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
1899+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
1900+};
1901+
1902+#define MTK_PPE_TB_BASE 0x220
1903+
1904+#define MTK_PPE_TB_USED 0x224
1905+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
1906+
1907+#define MTK_PPE_BIND_RATE 0x228
1908+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
1909+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
1910+
1911+#define MTK_PPE_BIND_LIMIT0 0x22c
1912+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
1913+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
1914+
1915+#define MTK_PPE_BIND_LIMIT1 0x230
1916+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
1917+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
1918+
1919+#define MTK_PPE_KEEPALIVE 0x234
1920+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
1921+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
1922+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
1923+
1924+#define MTK_PPE_UNBIND_AGE 0x238
1925+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
1926+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
1927+
1928+#define MTK_PPE_BIND_AGE0 0x23c
1929+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
1930+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
1931+
1932+#define MTK_PPE_BIND_AGE1 0x240
1933+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
1934+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
1935+
1936+#define MTK_PPE_HASH_SEED 0x244
1937+
1938+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
1939+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
1940+
1941+#define MTK_PPE_MTU_DROP 0x308
1942+
1943+#define MTK_PPE_VLAN_MTU0 0x30c
1944+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
1945+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
1946+
1947+#define MTK_PPE_VLAN_MTU1 0x310
1948+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
1949+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
1950+
1951+#define MTK_PPE_VPM_TPID 0x318
1952+
1953+#define MTK_PPE_CACHE_CTL 0x320
1954+#define MTK_PPE_CACHE_CTL_EN BIT(0)
1955+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
1956+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
1957+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
1958+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
1959+
1960+#define MTK_PPE_MIB_CFG 0x334
1961+#define MTK_PPE_MIB_CFG_EN BIT(0)
1962+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
1963+
1964+#define MTK_PPE_MIB_TB_BASE 0x338
1965+
1966+#define MTK_PPE_MIB_CACHE_CTL 0x350
1967+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
1968+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
1969+
1970+#endif
1971diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
developer58aa0682023-09-18 14:02:26 +08001972index 078c0f4..f8a98d8 100644
developer8cb3ac72022-07-04 10:55:14 +08001973--- a/drivers/net/ppp/ppp_generic.c
1974+++ b/drivers/net/ppp/ppp_generic.c
1975@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
1976 ppp_destroy_interface(ppp);
1977 }
1978
1979+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
1980+ struct net_device_path *path)
1981+{
1982+ struct ppp *ppp = netdev_priv(ctx->dev);
1983+ struct ppp_channel *chan;
1984+ struct channel *pch;
1985+
1986+ if (ppp->flags & SC_MULTILINK)
1987+ return -EOPNOTSUPP;
1988+
1989+ if (list_empty(&ppp->channels))
1990+ return -ENODEV;
1991+
1992+ pch = list_first_entry(&ppp->channels, struct channel, clist);
1993+ chan = pch->chan;
1994+ if (!chan->ops->fill_forward_path)
1995+ return -EOPNOTSUPP;
1996+
1997+ return chan->ops->fill_forward_path(ctx, path, chan);
1998+}
1999+
2000 static const struct net_device_ops ppp_netdev_ops = {
2001 .ndo_init = ppp_dev_init,
2002 .ndo_uninit = ppp_dev_uninit,
2003 .ndo_start_xmit = ppp_start_xmit,
2004 .ndo_do_ioctl = ppp_net_ioctl,
2005 .ndo_get_stats64 = ppp_get_stats64,
2006+ .ndo_fill_forward_path = ppp_fill_forward_path,
2007 };
2008
2009 static struct device_type ppp_type = {
2010diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
developer58aa0682023-09-18 14:02:26 +08002011index 087b016..7a8c246 100644
developer8cb3ac72022-07-04 10:55:14 +08002012--- a/drivers/net/ppp/pppoe.c
2013+++ b/drivers/net/ppp/pppoe.c
2014@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
2015 return __pppoe_xmit(sk, skb);
2016 }
2017
2018+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
2019+ struct net_device_path *path,
2020+ const struct ppp_channel *chan)
2021+{
2022+ struct sock *sk = (struct sock *)chan->private;
2023+ struct pppox_sock *po = pppox_sk(sk);
2024+ struct net_device *dev = po->pppoe_dev;
2025+
2026+ if (sock_flag(sk, SOCK_DEAD) ||
2027+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2028+ return -1;
2029+
2030+ path->type = DEV_PATH_PPPOE;
2031+ path->encap.proto = htons(ETH_P_PPP_SES);
2032+ path->encap.id = be16_to_cpu(po->num);
2033+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2034+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
2035+ path->dev = ctx->dev;
2036+ ctx->dev = dev;
2037+
2038+ return 0;
2039+}
2040+
2041 static const struct ppp_channel_ops pppoe_chan_ops = {
2042 .start_xmit = pppoe_xmit,
2043+ .fill_forward_path = pppoe_fill_forward_path,
2044 };
2045
2046 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
2047diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
developer58aa0682023-09-18 14:02:26 +08002048index 631d158..ef44d9a 100644
developer8cb3ac72022-07-04 10:55:14 +08002049--- a/include/linux/netdevice.h
2050+++ b/include/linux/netdevice.h
developer58aa0682023-09-18 14:02:26 +08002051@@ -838,6 +838,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002052 struct sk_buff *skb,
2053 struct net_device *sb_dev);
2054
2055+enum net_device_path_type {
2056+ DEV_PATH_ETHERNET = 0,
2057+ DEV_PATH_VLAN,
2058+ DEV_PATH_BRIDGE,
2059+ DEV_PATH_PPPOE,
2060+ DEV_PATH_DSA,
2061+};
2062+
2063+struct net_device_path {
2064+ enum net_device_path_type type;
2065+ const struct net_device *dev;
2066+ union {
2067+ struct {
2068+ u16 id;
2069+ __be16 proto;
2070+ u8 h_dest[ETH_ALEN];
2071+ } encap;
2072+ struct {
2073+ enum {
2074+ DEV_PATH_BR_VLAN_KEEP,
2075+ DEV_PATH_BR_VLAN_TAG,
2076+ DEV_PATH_BR_VLAN_UNTAG,
2077+ DEV_PATH_BR_VLAN_UNTAG_HW,
2078+ } vlan_mode;
2079+ u16 vlan_id;
2080+ __be16 vlan_proto;
2081+ } bridge;
2082+ struct {
2083+ int port;
2084+ u16 proto;
2085+ } dsa;
2086+ };
2087+};
2088+
2089+#define NET_DEVICE_PATH_STACK_MAX 5
2090+#define NET_DEVICE_PATH_VLAN_MAX 2
2091+
2092+struct net_device_path_stack {
2093+ int num_paths;
2094+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
2095+};
2096+
2097+struct net_device_path_ctx {
2098+ const struct net_device *dev;
2099+ u8 daddr[ETH_ALEN];
2100+
2101+ int num_vlans;
2102+ struct {
2103+ u16 id;
2104+ __be16 proto;
2105+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
2106+};
2107+
2108 enum tc_setup_type {
2109 TC_SETUP_QDISC_MQPRIO,
2110 TC_SETUP_CLSU32,
developer58aa0682023-09-18 14:02:26 +08002111@@ -853,6 +906,7 @@ enum tc_setup_type {
developer8cb3ac72022-07-04 10:55:14 +08002112 TC_SETUP_ROOT_QDISC,
2113 TC_SETUP_QDISC_GRED,
2114 TC_SETUP_QDISC_TAPRIO,
2115+ TC_SETUP_FT,
2116 };
2117
2118 /* These structures hold the attributes of bpf state that are being passed
developer58aa0682023-09-18 14:02:26 +08002119@@ -1248,6 +1302,8 @@ struct tlsdev_ops;
developer8cb3ac72022-07-04 10:55:14 +08002120 * Get devlink port instance associated with a given netdev.
2121 * Called with a reference on the netdevice and devlink locks only,
2122 * rtnl_lock is not held.
2123+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
2124+ * Get the forwarding path to reach the real device from the HW destination address
2125 */
2126 struct net_device_ops {
2127 int (*ndo_init)(struct net_device *dev);
developer58aa0682023-09-18 14:02:26 +08002128@@ -1445,6 +1501,8 @@ struct net_device_ops {
developer8cb3ac72022-07-04 10:55:14 +08002129 int (*ndo_xsk_wakeup)(struct net_device *dev,
2130 u32 queue_id, u32 flags);
2131 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
2132+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
2133+ struct net_device_path *path);
2134 };
2135
2136 /**
developer58aa0682023-09-18 14:02:26 +08002137@@ -2670,6 +2728,8 @@ void dev_remove_offload(struct packet_offload *po);
developer8cb3ac72022-07-04 10:55:14 +08002138
2139 int dev_get_iflink(const struct net_device *dev);
2140 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
2141+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2142+ struct net_device_path_stack *stack);
2143 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
2144 unsigned short mask);
2145 struct net_device *dev_get_by_name(struct net *net, const char *name);
2146diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
developer58aa0682023-09-18 14:02:26 +08002147index 9896606..91f9a92 100644
developer8cb3ac72022-07-04 10:55:14 +08002148--- a/include/linux/ppp_channel.h
2149+++ b/include/linux/ppp_channel.h
2150@@ -28,6 +28,9 @@ struct ppp_channel_ops {
2151 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
2152 /* Handle an ioctl call that has come in via /dev/ppp. */
2153 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
2154+ int (*fill_forward_path)(struct net_device_path_ctx *,
2155+ struct net_device_path *,
2156+ const struct ppp_channel *);
2157 };
2158
2159 struct ppp_channel {
2160diff --git a/include/net/dsa.h b/include/net/dsa.h
developer58aa0682023-09-18 14:02:26 +08002161index d29ee9e..43f65cb 100644
developer8cb3ac72022-07-04 10:55:14 +08002162--- a/include/net/dsa.h
2163+++ b/include/net/dsa.h
developer58aa0682023-09-18 14:02:26 +08002164@@ -562,6 +562,8 @@ struct dsa_switch_ops {
developer8cb3ac72022-07-04 10:55:14 +08002165 struct sk_buff *skb);
2166 };
2167
2168+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
2169+
2170 struct dsa_switch_driver {
2171 struct list_head list;
2172 const struct dsa_switch_ops *ops;
developer58aa0682023-09-18 14:02:26 +08002173@@ -654,6 +656,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002174 #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
2175 #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
2176
2177+#if IS_ENABLED(CONFIG_NET_DSA)
2178+bool dsa_slave_dev_check(const struct net_device *dev);
2179+#else
2180+static inline bool dsa_slave_dev_check(const struct net_device *dev)
2181+{
2182+ return false;
2183+}
2184+#endif
2185
2186 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
2187 int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
2188diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
developer58aa0682023-09-18 14:02:26 +08002189index c6f7bd2..59b8736 100644
developer8cb3ac72022-07-04 10:55:14 +08002190--- a/include/net/flow_offload.h
2191+++ b/include/net/flow_offload.h
2192@@ -138,6 +138,7 @@ enum flow_action_id {
2193 FLOW_ACTION_MPLS_PUSH,
2194 FLOW_ACTION_MPLS_POP,
2195 FLOW_ACTION_MPLS_MANGLE,
2196+ FLOW_ACTION_PPPOE_PUSH,
2197 NUM_FLOW_ACTIONS,
2198 };
2199
2200@@ -213,6 +214,9 @@ struct flow_action_entry {
2201 u8 bos;
2202 u8 ttl;
2203 } mpls_mangle;
2204+ struct { /* FLOW_ACTION_PPPOE_PUSH */
2205+ u16 sid;
2206+ } pppoe;
2207 };
2208 };
2209
2210diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
developer58aa0682023-09-18 14:02:26 +08002211index 2c739fc..89ab8f1 100644
developer8cb3ac72022-07-04 10:55:14 +08002212--- a/include/net/ip6_route.h
2213+++ b/include/net/ip6_route.h
2214@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
2215 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
2216 }
2217
2218-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
2219+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
2220+ bool forwarding)
2221 {
2222 struct inet6_dev *idev;
2223 unsigned int mtu;
2224
2225- if (dst_metric_locked(dst, RTAX_MTU)) {
2226+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
2227 mtu = dst_metric_raw(dst, RTAX_MTU);
2228 if (mtu)
2229 goto out;
2230diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
developer58aa0682023-09-18 14:02:26 +08002231index 7b3c873..e954831 100644
developer8cb3ac72022-07-04 10:55:14 +08002232--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2233+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2234@@ -4,7 +4,4 @@
2235
2236 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
2237
2238-#include <linux/sysctl.h>
2239-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
2240-
2241 #endif /* _NF_CONNTRACK_IPV6_H*/
2242diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
developer58aa0682023-09-18 14:02:26 +08002243index 90690e3..ce0bc3e 100644
developer8cb3ac72022-07-04 10:55:14 +08002244--- a/include/net/netfilter/nf_conntrack.h
2245+++ b/include/net/netfilter/nf_conntrack.h
2246@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
2247 !nf_ct_is_dying(ct);
2248 }
2249
2250+#define NF_CT_DAY (86400 * HZ)
2251+
2252+/* Set an arbitrary timeout large enough not to ever expire, this save
2253+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
2254+ * nf_ct_is_expired().
2255+ */
2256+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
2257+{
2258+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
2259+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
2260+}
2261+
2262 struct kernel_param;
2263
2264 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
2265diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
developer58aa0682023-09-18 14:02:26 +08002266index f7a060c..7f44a77 100644
developer8cb3ac72022-07-04 10:55:14 +08002267--- a/include/net/netfilter/nf_conntrack_acct.h
2268+++ b/include/net/netfilter/nf_conntrack_acct.h
2269@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
2270 #endif
2271 }
2272
2273+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
2274+ unsigned int bytes);
2275+
2276+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
2277+ unsigned int bytes)
2278+{
2279+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
2280+ nf_ct_acct_add(ct, dir, 1, bytes);
2281+#endif
2282+}
2283+
2284 void nf_conntrack_acct_pernet_init(struct net *net);
2285
2286 int nf_conntrack_acct_init(void);
2287diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
developer58aa0682023-09-18 14:02:26 +08002288index 68d7fc9..feac793 100644
developer8cb3ac72022-07-04 10:55:14 +08002289--- a/include/net/netfilter/nf_flow_table.h
2290+++ b/include/net/netfilter/nf_flow_table.h
2291@@ -8,31 +8,99 @@
2292 #include <linux/rcupdate.h>
2293 #include <linux/netfilter.h>
2294 #include <linux/netfilter/nf_conntrack_tuple_common.h>
2295+#include <net/flow_offload.h>
2296 #include <net/dst.h>
2297+#include <linux/if_pppox.h>
2298+#include <linux/ppp_defs.h>
2299
2300 struct nf_flowtable;
2301+struct nf_flow_rule;
2302+struct flow_offload;
2303+enum flow_offload_tuple_dir;
2304+
2305+struct nf_flow_key {
2306+ struct flow_dissector_key_meta meta;
2307+ struct flow_dissector_key_control control;
2308+ struct flow_dissector_key_control enc_control;
2309+ struct flow_dissector_key_basic basic;
2310+ struct flow_dissector_key_vlan vlan;
2311+ struct flow_dissector_key_vlan cvlan;
2312+ union {
2313+ struct flow_dissector_key_ipv4_addrs ipv4;
2314+ struct flow_dissector_key_ipv6_addrs ipv6;
2315+ };
2316+ struct flow_dissector_key_keyid enc_key_id;
2317+ union {
2318+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
2319+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
2320+ };
2321+ struct flow_dissector_key_tcp tcp;
2322+ struct flow_dissector_key_ports tp;
2323+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
2324+
2325+struct nf_flow_match {
2326+ struct flow_dissector dissector;
2327+ struct nf_flow_key key;
2328+ struct nf_flow_key mask;
2329+};
2330+
2331+struct nf_flow_rule {
2332+ struct nf_flow_match match;
2333+ struct flow_rule *rule;
2334+};
2335
2336 struct nf_flowtable_type {
2337 struct list_head list;
2338 int family;
2339 int (*init)(struct nf_flowtable *ft);
2340+ int (*setup)(struct nf_flowtable *ft,
2341+ struct net_device *dev,
2342+ enum flow_block_command cmd);
2343+ int (*action)(struct net *net,
2344+ const struct flow_offload *flow,
2345+ enum flow_offload_tuple_dir dir,
2346+ struct nf_flow_rule *flow_rule);
2347 void (*free)(struct nf_flowtable *ft);
2348 nf_hookfn *hook;
2349 struct module *owner;
2350 };
2351
2352+enum nf_flowtable_flags {
2353+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
2354+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
2355+};
2356+
2357 struct nf_flowtable {
2358 struct list_head list;
2359 struct rhashtable rhashtable;
2360+ int priority;
2361 const struct nf_flowtable_type *type;
2362 struct delayed_work gc_work;
2363+ unsigned int flags;
2364+ struct flow_block flow_block;
2365+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
2366+ possible_net_t net;
2367 };
2368
2369+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
2370+{
2371+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
2372+}
2373+
2374 enum flow_offload_tuple_dir {
2375 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
2376 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
2377- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
2378 };
2379+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
2380+
2381+enum flow_offload_xmit_type {
2382+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
2383+ FLOW_OFFLOAD_XMIT_NEIGH,
2384+ FLOW_OFFLOAD_XMIT_XFRM,
2385+ FLOW_OFFLOAD_XMIT_DIRECT,
2386+};
2387+
2388+#define NF_FLOW_TABLE_ENCAP_MAX 2
2389
2390 struct flow_offload_tuple {
2391 union {
developerb7c46752022-07-04 19:51:38 +08002392@@ -52,13 +120,30 @@ struct flow_offload_tuple {
developer8cb3ac72022-07-04 10:55:14 +08002393
2394 u8 l3proto;
2395 u8 l4proto;
2396- u8 dir;
2397+ struct {
2398+ u16 id;
2399+ __be16 proto;
2400+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2401
2402- u16 mtu;
2403+ /* All members above are keys for lookups, see flow_offload_hash(). */
2404+ struct { } __hash;
2405
developerb7c46752022-07-04 19:51:38 +08002406- struct {
2407- struct dst_entry *dst_cache;
2408- u32 dst_cookie;
developer8cb3ac72022-07-04 10:55:14 +08002409+ u8 dir:2,
2410+ xmit_type:2,
2411+ encap_num:2,
2412+ in_vlan_ingress:2;
2413+ u16 mtu;
2414+ union {
2415+ struct {
2416+ struct dst_entry *dst_cache;
2417+ u32 dst_cookie;
2418+ };
2419+ struct {
2420+ u32 ifidx;
2421+ u32 hw_ifidx;
2422+ u8 h_source[ETH_ALEN];
2423+ u8 h_dest[ETH_ALEN];
2424+ } out;
developerb7c46752022-07-04 19:51:38 +08002425 };
developer8cb3ac72022-07-04 10:55:14 +08002426 };
2427
developeree39bcf2023-06-16 08:03:30 +08002428@@ -67,52 +152,140 @@ struct flow_offload_tuple_rhash {
developer8cb3ac72022-07-04 10:55:14 +08002429 struct flow_offload_tuple tuple;
2430 };
2431
2432-#define FLOW_OFFLOAD_SNAT 0x1
2433-#define FLOW_OFFLOAD_DNAT 0x2
2434-#define FLOW_OFFLOAD_DYING 0x4
2435-#define FLOW_OFFLOAD_TEARDOWN 0x8
2436+enum nf_flow_flags {
2437+ NF_FLOW_SNAT,
2438+ NF_FLOW_DNAT,
2439+ NF_FLOW_TEARDOWN,
2440+ NF_FLOW_HW,
developeree39bcf2023-06-16 08:03:30 +08002441+ NF_FLOW_HW_ACCT_DYING,
developer8cb3ac72022-07-04 10:55:14 +08002442+ NF_FLOW_HW_DYING,
2443+ NF_FLOW_HW_DEAD,
2444+ NF_FLOW_HW_PENDING,
2445+};
2446+
2447+enum flow_offload_type {
2448+ NF_FLOW_OFFLOAD_UNSPEC = 0,
2449+ NF_FLOW_OFFLOAD_ROUTE,
2450+};
2451
2452 struct flow_offload {
2453 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
2454- u32 flags;
2455- union {
2456- /* Your private driver data here. */
2457- u32 timeout;
2458- };
2459+ struct nf_conn *ct;
2460+ unsigned long flags;
2461+ u16 type;
2462+ u32 timeout;
2463+ struct rcu_head rcu_head;
2464 };
2465
2466 #define NF_FLOW_TIMEOUT (30 * HZ)
2467+#define nf_flowtable_time_stamp (u32)jiffies
2468+
2469+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
2470+
2471+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
2472+{
2473+ return (__s32)(timeout - nf_flowtable_time_stamp);
2474+}
2475
2476 struct nf_flow_route {
2477 struct {
2478- struct dst_entry *dst;
2479+ struct dst_entry *dst;
2480+ struct {
2481+ u32 ifindex;
2482+ struct {
2483+ u16 id;
2484+ __be16 proto;
2485+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2486+ u8 num_encaps:2,
2487+ ingress_vlans:2;
2488+ } in;
2489+ struct {
2490+ u32 ifindex;
2491+ u32 hw_ifindex;
2492+ u8 h_source[ETH_ALEN];
2493+ u8 h_dest[ETH_ALEN];
2494+ } out;
2495+ enum flow_offload_xmit_type xmit_type;
2496 } tuple[FLOW_OFFLOAD_DIR_MAX];
2497 };
2498
2499-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
2500- struct nf_flow_route *route);
2501+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
2502 void flow_offload_free(struct flow_offload *flow);
2503
2504+static inline int
2505+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
2506+ flow_setup_cb_t *cb, void *cb_priv)
2507+{
2508+ struct flow_block *block = &flow_table->flow_block;
2509+ struct flow_block_cb *block_cb;
2510+ int err = 0;
2511+
2512+ down_write(&flow_table->flow_block_lock);
2513+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2514+ if (block_cb) {
2515+ err = -EEXIST;
2516+ goto unlock;
2517+ }
2518+
2519+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
2520+ if (IS_ERR(block_cb)) {
2521+ err = PTR_ERR(block_cb);
2522+ goto unlock;
2523+ }
2524+
2525+ list_add_tail(&block_cb->list, &block->cb_list);
2526+
2527+unlock:
2528+ up_write(&flow_table->flow_block_lock);
2529+ return err;
2530+}
2531+
2532+static inline void
2533+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
2534+ flow_setup_cb_t *cb, void *cb_priv)
2535+{
2536+ struct flow_block *block = &flow_table->flow_block;
2537+ struct flow_block_cb *block_cb;
2538+
2539+ down_write(&flow_table->flow_block_lock);
2540+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2541+ if (block_cb) {
2542+ list_del(&block_cb->list);
2543+ flow_block_cb_free(block_cb);
2544+ } else {
2545+ WARN_ON(true);
2546+ }
2547+ up_write(&flow_table->flow_block_lock);
2548+}
2549+
2550+int flow_offload_route_init(struct flow_offload *flow,
2551+ const struct nf_flow_route *route);
2552+
2553 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
2554+void flow_offload_refresh(struct nf_flowtable *flow_table,
2555+ struct flow_offload *flow);
2556+
2557 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
2558 struct flow_offload_tuple *tuple);
2559+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
2560+ struct net_device *dev);
2561 void nf_flow_table_cleanup(struct net_device *dev);
2562
2563 int nf_flow_table_init(struct nf_flowtable *flow_table);
2564 void nf_flow_table_free(struct nf_flowtable *flow_table);
2565
2566 void flow_offload_teardown(struct flow_offload *flow);
2567-static inline void flow_offload_dead(struct flow_offload *flow)
2568-{
2569- flow->flags |= FLOW_OFFLOAD_DYING;
2570-}
2571
2572-int nf_flow_snat_port(const struct flow_offload *flow,
2573- struct sk_buff *skb, unsigned int thoff,
2574- u8 protocol, enum flow_offload_tuple_dir dir);
2575-int nf_flow_dnat_port(const struct flow_offload *flow,
2576- struct sk_buff *skb, unsigned int thoff,
2577- u8 protocol, enum flow_offload_tuple_dir dir);
2578+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
2579+ void (*iter)(struct flow_offload *flow, void *data),
2580+ void *data);
2581+
2582+void nf_flow_snat_port(const struct flow_offload *flow,
2583+ struct sk_buff *skb, unsigned int thoff,
2584+ u8 protocol, enum flow_offload_tuple_dir dir);
2585+void nf_flow_dnat_port(const struct flow_offload *flow,
2586+ struct sk_buff *skb, unsigned int thoff,
2587+ u8 protocol, enum flow_offload_tuple_dir dir);
2588
2589 struct flow_ports {
2590 __be16 source, dest;
developer58aa0682023-09-18 14:02:26 +08002591@@ -126,4 +299,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08002592 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
2593 MODULE_ALIAS("nf-flowtable-" __stringify(family))
2594
2595+void nf_flow_offload_add(struct nf_flowtable *flowtable,
2596+ struct flow_offload *flow);
2597+void nf_flow_offload_del(struct nf_flowtable *flowtable,
2598+ struct flow_offload *flow);
2599+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08002600+ struct flow_offload *flow, bool force);
developer8cb3ac72022-07-04 10:55:14 +08002601+
2602+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
2603+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
2604+ struct net_device *dev,
2605+ enum flow_block_command cmd);
2606+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
2607+ enum flow_offload_tuple_dir dir,
2608+ struct nf_flow_rule *flow_rule);
2609+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
2610+ enum flow_offload_tuple_dir dir,
2611+ struct nf_flow_rule *flow_rule);
2612+
2613+int nf_flow_table_offload_init(void);
2614+void nf_flow_table_offload_exit(void);
2615+
2616+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
2617+{
2618+ __be16 proto;
2619+
2620+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
2621+ sizeof(struct pppoe_hdr)));
2622+ switch (proto) {
2623+ case htons(PPP_IP):
2624+ return htons(ETH_P_IP);
2625+ case htons(PPP_IPV6):
2626+ return htons(ETH_P_IPV6);
2627+ }
2628+
2629+ return 0;
2630+}
2631+
2632 #endif /* _NF_FLOW_TABLE_H */
2633diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
developer58aa0682023-09-18 14:02:26 +08002634index 806454e..9e3963c 100644
developer8cb3ac72022-07-04 10:55:14 +08002635--- a/include/net/netns/conntrack.h
2636+++ b/include/net/netns/conntrack.h
2637@@ -27,6 +27,9 @@ struct nf_tcp_net {
2638 int tcp_loose;
2639 int tcp_be_liberal;
2640 int tcp_max_retrans;
2641+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2642+ unsigned int offload_timeout;
2643+#endif
2644 };
2645
2646 enum udp_conntrack {
2647@@ -37,6 +40,9 @@ enum udp_conntrack {
2648
2649 struct nf_udp_net {
2650 unsigned int timeouts[UDP_CT_MAX];
2651+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2652+ unsigned int offload_timeout;
2653+#endif
2654 };
2655
2656 struct nf_icmp_net {
2657diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
developer58aa0682023-09-18 14:02:26 +08002658index 336014b..ae698d1 100644
developer8cb3ac72022-07-04 10:55:14 +08002659--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
2660+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
2661@@ -105,14 +105,19 @@ enum ip_conntrack_status {
2662 IPS_OFFLOAD_BIT = 14,
2663 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
2664
2665+ /* Conntrack has been offloaded to hardware. */
2666+ IPS_HW_OFFLOAD_BIT = 15,
2667+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
2668+
2669 /* Be careful here, modifying these bits can make things messy,
2670 * so don't let users modify them directly.
2671 */
2672 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
2673 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
2674- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
2675+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
2676+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
2677
2678- __IPS_MAX_BIT = 15,
2679+ __IPS_MAX_BIT = 16,
2680 };
2681
2682 /* Connection tracking event types */
2683diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2684new file mode 100644
developer58aa0682023-09-18 14:02:26 +08002685index 0000000..5841bbe
developer8cb3ac72022-07-04 10:55:14 +08002686--- /dev/null
2687+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2688@@ -0,0 +1,17 @@
2689+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2690+#ifndef _XT_FLOWOFFLOAD_H
2691+#define _XT_FLOWOFFLOAD_H
2692+
2693+#include <linux/types.h>
2694+
2695+enum {
2696+ XT_FLOWOFFLOAD_HW = 1 << 0,
2697+
2698+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
2699+};
2700+
2701+struct xt_flowoffload_target_info {
2702+ __u32 flags;
2703+};
2704+
2705+#endif /* _XT_FLOWOFFLOAD_H */
2706diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
developer58aa0682023-09-18 14:02:26 +08002707index 0a3a167..6112266 100644
developer8cb3ac72022-07-04 10:55:14 +08002708--- a/net/8021q/vlan_dev.c
2709+++ b/net/8021q/vlan_dev.c
2710@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
2711 return real_dev->ifindex;
2712 }
2713
2714+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
2715+ struct net_device_path *path)
2716+{
2717+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
2718+
2719+ path->type = DEV_PATH_VLAN;
2720+ path->encap.id = vlan->vlan_id;
2721+ path->encap.proto = vlan->vlan_proto;
2722+ path->dev = ctx->dev;
2723+ ctx->dev = vlan->real_dev;
2724+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2725+ return -ENOSPC;
2726+
2727+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
2728+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
2729+ ctx->num_vlans++;
2730+
2731+ return 0;
2732+}
2733+
2734 static const struct ethtool_ops vlan_ethtool_ops = {
2735 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
2736 .get_drvinfo = vlan_ethtool_get_drvinfo,
2737@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
2738 #endif
2739 .ndo_fix_features = vlan_dev_fix_features,
2740 .ndo_get_iflink = vlan_dev_get_iflink,
2741+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
2742 };
2743
2744 static void vlan_dev_free(struct net_device *dev)
2745diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
developer58aa0682023-09-18 14:02:26 +08002746index 501f77f..0940b44 100644
developer8cb3ac72022-07-04 10:55:14 +08002747--- a/net/bridge/br_device.c
2748+++ b/net/bridge/br_device.c
2749@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
2750 return br_del_if(br, slave_dev);
2751 }
2752
2753+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
2754+ struct net_device_path *path)
2755+{
2756+ struct net_bridge_fdb_entry *f;
2757+ struct net_bridge_port *dst;
2758+ struct net_bridge *br;
2759+
2760+ if (netif_is_bridge_port(ctx->dev))
2761+ return -1;
2762+
2763+ br = netdev_priv(ctx->dev);
2764+
2765+ br_vlan_fill_forward_path_pvid(br, ctx, path);
2766+
2767+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
2768+ if (!f || !f->dst)
2769+ return -1;
2770+
2771+ dst = READ_ONCE(f->dst);
2772+ if (!dst)
2773+ return -1;
2774+
2775+ if (br_vlan_fill_forward_path_mode(br, dst, path))
2776+ return -1;
2777+
2778+ path->type = DEV_PATH_BRIDGE;
2779+ path->dev = dst->br->dev;
2780+ ctx->dev = dst->dev;
2781+
2782+ switch (path->bridge.vlan_mode) {
2783+ case DEV_PATH_BR_VLAN_TAG:
2784+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2785+ return -ENOSPC;
2786+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
2787+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
2788+ ctx->num_vlans++;
2789+ break;
2790+ case DEV_PATH_BR_VLAN_UNTAG_HW:
2791+ case DEV_PATH_BR_VLAN_UNTAG:
2792+ ctx->num_vlans--;
2793+ break;
2794+ case DEV_PATH_BR_VLAN_KEEP:
2795+ break;
2796+ }
2797+
2798+ return 0;
2799+}
2800+
2801 static const struct ethtool_ops br_ethtool_ops = {
2802 .get_drvinfo = br_getinfo,
2803 .get_link = ethtool_op_get_link,
2804@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
2805 .ndo_bridge_setlink = br_setlink,
2806 .ndo_bridge_dellink = br_dellink,
2807 .ndo_features_check = passthru_features_check,
2808+ .ndo_fill_forward_path = br_fill_forward_path,
2809 };
2810
2811 static struct device_type br_type = {
2812diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
developer58aa0682023-09-18 14:02:26 +08002813index a736be8..4bd9e9b 100644
developer8cb3ac72022-07-04 10:55:14 +08002814--- a/net/bridge/br_private.h
2815+++ b/net/bridge/br_private.h
2816@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
2817 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
2818 void *ptr);
2819
2820+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2821+ struct net_device_path_ctx *ctx,
2822+ struct net_device_path *path);
2823+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2824+ struct net_bridge_port *dst,
2825+ struct net_device_path *path);
2826+
2827 static inline struct net_bridge_vlan_group *br_vlan_group(
2828 const struct net_bridge *br)
2829 {
2830@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
2831 return 0;
2832 }
2833
2834+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2835+ struct net_device_path_ctx *ctx,
2836+ struct net_device_path *path)
2837+{
2838+}
2839+
2840+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2841+ struct net_bridge_port *dst,
2842+ struct net_device_path *path)
2843+{
2844+ return 0;
2845+}
2846+
2847 static inline struct net_bridge_vlan_group *br_vlan_group(
2848 const struct net_bridge *br)
2849 {
2850diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
developer58aa0682023-09-18 14:02:26 +08002851index 9257292..bcfd169 100644
developer8cb3ac72022-07-04 10:55:14 +08002852--- a/net/bridge/br_vlan.c
2853+++ b/net/bridge/br_vlan.c
2854@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
2855 }
2856 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
2857
2858+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2859+ struct net_device_path_ctx *ctx,
2860+ struct net_device_path *path)
2861+{
2862+ struct net_bridge_vlan_group *vg;
2863+ int idx = ctx->num_vlans - 1;
2864+ u16 vid;
2865+
2866+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2867+
2868+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2869+ return;
2870+
2871+ vg = br_vlan_group(br);
2872+
2873+ if (idx >= 0 &&
2874+ ctx->vlan[idx].proto == br->vlan_proto) {
2875+ vid = ctx->vlan[idx].id;
2876+ } else {
2877+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
2878+ vid = br_get_pvid(vg);
2879+ }
2880+
2881+ path->bridge.vlan_id = vid;
2882+ path->bridge.vlan_proto = br->vlan_proto;
2883+}
2884+
2885+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2886+ struct net_bridge_port *dst,
2887+ struct net_device_path *path)
2888+{
2889+ struct net_bridge_vlan_group *vg;
2890+ struct net_bridge_vlan *v;
2891+
2892+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2893+ return 0;
2894+
2895+ vg = nbp_vlan_group_rcu(dst);
2896+ v = br_vlan_find(vg, path->bridge.vlan_id);
2897+ if (!v || !br_vlan_should_use(v))
2898+ return -EINVAL;
2899+
2900+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
2901+ return 0;
2902+
2903+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
2904+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2905+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
2906+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
2907+ else
2908+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
2909+
2910+ return 0;
2911+}
2912+
2913 int br_vlan_get_info(const struct net_device *dev, u16 vid,
2914 struct bridge_vlan_info *p_vinfo)
2915 {
2916diff --git a/net/core/dev.c b/net/core/dev.c
developer58aa0682023-09-18 14:02:26 +08002917index 54cc544..a117bd0 100644
developer8cb3ac72022-07-04 10:55:14 +08002918--- a/net/core/dev.c
2919+++ b/net/core/dev.c
2920@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2921 }
2922 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
2923
2924+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
2925+{
2926+ int k = stack->num_paths++;
2927+
2928+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
2929+ return NULL;
2930+
2931+ return &stack->path[k];
2932+}
2933+
2934+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2935+ struct net_device_path_stack *stack)
2936+{
2937+ const struct net_device *last_dev;
2938+ struct net_device_path_ctx ctx = {
2939+ .dev = dev,
2940+ };
2941+ struct net_device_path *path;
2942+ int ret = 0;
2943+
2944+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
2945+ stack->num_paths = 0;
2946+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
2947+ last_dev = ctx.dev;
2948+ path = dev_fwd_path(stack);
2949+ if (!path)
2950+ return -1;
2951+
2952+ memset(path, 0, sizeof(struct net_device_path));
2953+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
2954+ if (ret < 0)
2955+ return -1;
2956+
2957+ if (WARN_ON_ONCE(last_dev == ctx.dev))
2958+ return -1;
2959+ }
2960+ path = dev_fwd_path(stack);
2961+ if (!path)
2962+ return -1;
2963+ path->type = DEV_PATH_ETHERNET;
2964+ path->dev = ctx.dev;
2965+
2966+ return ret;
2967+}
2968+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
2969+
2970 /**
2971 * __dev_get_by_name - find a device by its name
2972 * @net: the applicable net namespace
2973diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
developer58aa0682023-09-18 14:02:26 +08002974index ca80f86..35a1249 100644
developer8cb3ac72022-07-04 10:55:14 +08002975--- a/net/dsa/dsa.c
2976+++ b/net/dsa/dsa.c
2977@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2978 }
2979 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
2980
2981+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
2982+{
2983+ if (!netdev || !dsa_slave_dev_check(netdev))
2984+ return ERR_PTR(-ENODEV);
2985+
2986+ return dsa_slave_to_port(netdev);
2987+}
2988+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
2989+
2990 static int __init dsa_init_module(void)
2991 {
2992 int rc;
2993diff --git a/net/dsa/slave.c b/net/dsa/slave.c
developer58aa0682023-09-18 14:02:26 +08002994index e2b91b3..2dfaa1e 100644
developer8cb3ac72022-07-04 10:55:14 +08002995--- a/net/dsa/slave.c
2996+++ b/net/dsa/slave.c
developer58aa0682023-09-18 14:02:26 +08002997@@ -1031,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002998 }
2999 }
3000
3001+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
3002+ void *type_data)
3003+{
3004+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
3005+ struct net_device *master = cpu_dp->master;
3006+
3007+ if (!master->netdev_ops->ndo_setup_tc)
3008+ return -EOPNOTSUPP;
3009+
3010+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
3011+}
3012+
3013 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
3014 void *type_data)
3015 {
3016 struct dsa_port *dp = dsa_slave_to_port(dev);
3017 struct dsa_switch *ds = dp->ds;
3018
3019- if (type == TC_SETUP_BLOCK)
3020+ switch (type) {
3021+ case TC_SETUP_BLOCK:
3022 return dsa_slave_setup_tc_block(dev, type_data);
3023+ case TC_SETUP_FT:
3024+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
3025+ default:
3026+ break;
3027+ }
3028
3029 if (!ds->ops->port_setup_tc)
3030 return -EOPNOTSUPP;
developer58aa0682023-09-18 14:02:26 +08003031@@ -1224,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003032 return dp->ds->devlink ? &dp->devlink_port : NULL;
3033 }
3034
3035+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
3036+ struct net_device_path *path)
3037+{
3038+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
3039+ struct dsa_port *cpu_dp = dp->cpu_dp;
3040+
3041+ path->dev = ctx->dev;
3042+ path->type = DEV_PATH_DSA;
3043+ path->dsa.proto = cpu_dp->tag_ops->proto;
3044+ path->dsa.port = dp->index;
3045+ ctx->dev = cpu_dp->master;
3046+
3047+ return 0;
3048+}
3049+
3050 static const struct net_device_ops dsa_slave_netdev_ops = {
3051 .ndo_open = dsa_slave_open,
3052 .ndo_stop = dsa_slave_close,
developer58aa0682023-09-18 14:02:26 +08003053@@ -1248,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +08003054 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
3055 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
3056 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
3057+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
3058 };
3059
3060 static struct device_type dsa_type = {
developer58aa0682023-09-18 14:02:26 +08003061@@ -1499,6 +1533,7 @@ bool dsa_slave_dev_check(const struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003062 {
3063 return dev->netdev_ops == &dsa_slave_netdev_ops;
3064 }
3065+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
3066
3067 static int dsa_slave_changeupper(struct net_device *dev,
3068 struct netdev_notifier_changeupper_info *info)
3069diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003070index f17b402..803b92e 100644
developer8cb3ac72022-07-04 10:55:14 +08003071--- a/net/ipv4/netfilter/Kconfig
3072+++ b/net/ipv4/netfilter/Kconfig
3073@@ -56,8 +56,6 @@ config NF_TABLES_ARP
3074 help
3075 This option enables the ARP support for nf_tables.
3076
3077-endif # NF_TABLES
3078-
3079 config NF_FLOW_TABLE_IPV4
3080 tristate "Netfilter flow table IPv4 module"
3081 depends on NF_FLOW_TABLE
3082@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
3083
3084 To compile it as a module, choose M here.
3085
3086+endif # NF_TABLES
3087+
3088 config NF_DUP_IPV4
3089 tristate "Netfilter IPv4 packet duplication to alternate destination"
3090 depends on !NF_CONNTRACK || NF_CONNTRACK
3091diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
developer58aa0682023-09-18 14:02:26 +08003092index 8231a7a..7176d7f 100644
developer8cb3ac72022-07-04 10:55:14 +08003093--- a/net/ipv6/ip6_output.c
3094+++ b/net/ipv6/ip6_output.c
3095@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
3096 }
3097 }
3098
3099- mtu = ip6_dst_mtu_forward(dst);
3100+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
3101 if (mtu < IPV6_MIN_MTU)
3102 mtu = IPV6_MIN_MTU;
3103
3104diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003105index 69443e9..0b481d2 100644
developer8cb3ac72022-07-04 10:55:14 +08003106--- a/net/ipv6/netfilter/Kconfig
3107+++ b/net/ipv6/netfilter/Kconfig
3108@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
3109 multicast or blackhole.
3110
3111 endif # NF_TABLES_IPV6
3112-endif # NF_TABLES
3113
3114 config NF_FLOW_TABLE_IPV6
3115 tristate "Netfilter flow table IPv6 module"
3116@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
3117
3118 To compile it as a module, choose M here.
3119
3120+endif # NF_TABLES
3121+
3122 config NF_DUP_IPV6
3123 tristate "Netfilter IPv6 packet duplication to alternate destination"
3124 depends on !NF_CONNTRACK || NF_CONNTRACK
3125diff --git a/net/ipv6/route.c b/net/ipv6/route.c
developer58aa0682023-09-18 14:02:26 +08003126index 43d185c..82a752c 100644
developer8cb3ac72022-07-04 10:55:14 +08003127--- a/net/ipv6/route.c
3128+++ b/net/ipv6/route.c
3129@@ -83,7 +83,7 @@ enum rt6_nud_state {
3130
3131 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
3132 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
3133-static unsigned int ip6_mtu(const struct dst_entry *dst);
3134+static unsigned int ip6_mtu(const struct dst_entry *dst);
3135 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
3136 static void ip6_dst_destroy(struct dst_entry *);
3137 static void ip6_dst_ifdown(struct dst_entry *,
3138@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3139
3140 static unsigned int ip6_mtu(const struct dst_entry *dst)
3141 {
3142- struct inet6_dev *idev;
3143- unsigned int mtu;
3144-
3145- mtu = dst_metric_raw(dst, RTAX_MTU);
3146- if (mtu)
3147- goto out;
3148-
3149- mtu = IPV6_MIN_MTU;
3150-
3151- rcu_read_lock();
3152- idev = __in6_dev_get(dst->dev);
3153- if (idev)
3154- mtu = idev->cnf.mtu6;
3155- rcu_read_unlock();
3156-
3157-out:
3158- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3159-
3160- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3161+ return ip6_dst_mtu_maybe_forward(dst, false);
3162 }
3163
3164 /* MTU selection:
3165diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003166index b6e0a62..5d690ab 100644
developer8cb3ac72022-07-04 10:55:14 +08003167--- a/net/netfilter/Kconfig
3168+++ b/net/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003169@@ -689,8 +689,6 @@ config NFT_FIB_NETDEV
developer8cb3ac72022-07-04 10:55:14 +08003170
3171 endif # NF_TABLES_NETDEV
3172
3173-endif # NF_TABLES
3174-
3175 config NF_FLOW_TABLE_INET
3176 tristate "Netfilter flow table mixed IPv4/IPv6 module"
3177 depends on NF_FLOW_TABLE
developer58aa0682023-09-18 14:02:26 +08003178@@ -699,11 +697,12 @@ config NF_FLOW_TABLE_INET
developer8cb3ac72022-07-04 10:55:14 +08003179
3180 To compile it as a module, choose M here.
3181
3182+endif # NF_TABLES
3183+
3184 config NF_FLOW_TABLE
3185 tristate "Netfilter flow table module"
3186 depends on NETFILTER_INGRESS
3187 depends on NF_CONNTRACK
3188- depends on NF_TABLES
3189 help
3190 This option adds the flow table core infrastructure.
3191
developer58aa0682023-09-18 14:02:26 +08003192@@ -983,6 +982,15 @@ config NETFILTER_XT_TARGET_NOTRACK
developer8cb3ac72022-07-04 10:55:14 +08003193 depends on NETFILTER_ADVANCED
3194 select NETFILTER_XT_TARGET_CT
3195
3196+config NETFILTER_XT_TARGET_FLOWOFFLOAD
3197+ tristate '"FLOWOFFLOAD" target support'
3198+ depends on NF_FLOW_TABLE
3199+ depends on NETFILTER_INGRESS
3200+ help
3201+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
3202+ module to speed up processing of packets by bypassing the usual
3203+ netfilter chains
3204+
3205 config NETFILTER_XT_TARGET_RATEEST
3206 tristate '"RATEEST" target support'
3207 depends on NETFILTER_ADVANCED
3208diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
developer58aa0682023-09-18 14:02:26 +08003209index 4fc075b..d93a121 100644
developer8cb3ac72022-07-04 10:55:14 +08003210--- a/net/netfilter/Makefile
3211+++ b/net/netfilter/Makefile
3212@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
3213
3214 # flow table infrastructure
3215 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
3216-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
3217+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
3218+ nf_flow_table_offload.o
3219
3220 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
3221
3222@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
3223 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
3224 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
3225 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
3226+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
3227 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
3228 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
3229 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
3230diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
developer58aa0682023-09-18 14:02:26 +08003231index f6ab6f4..f689e19 100644
developer8cb3ac72022-07-04 10:55:14 +08003232--- a/net/netfilter/nf_conntrack_core.c
3233+++ b/net/netfilter/nf_conntrack_core.c
developer58aa0682023-09-18 14:02:26 +08003234@@ -864,9 +864,8 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003235 }
3236 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
3237
3238-static inline void nf_ct_acct_update(struct nf_conn *ct,
3239- enum ip_conntrack_info ctinfo,
3240- unsigned int len)
3241+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3242+ unsigned int bytes)
3243 {
3244 struct nf_conn_acct *acct;
3245
3246@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
3247 if (acct) {
3248 struct nf_conn_counter *counter = acct->counter;
3249
3250- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
3251- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
3252+ atomic64_add(packets, &counter[dir].packets);
3253+ atomic64_add(bytes, &counter[dir].bytes);
3254 }
3255 }
3256+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
3257
3258 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3259 const struct nf_conn *loser_ct)
3260@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3261
3262 /* u32 should be fine since we must have seen one packet. */
3263 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
3264- nf_ct_acct_update(ct, ctinfo, bytes);
3265+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
3266 }
3267 }
3268
3269@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
3270
3271 tmp = nf_ct_tuplehash_to_ctrack(h);
3272
3273- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
3274+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
3275+ nf_ct_offload_timeout(tmp);
3276 continue;
3277+ }
3278
3279 if (nf_ct_is_expired(tmp)) {
3280 nf_ct_gc_expired(tmp);
3281@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
3282 WRITE_ONCE(ct->timeout, extra_jiffies);
3283 acct:
3284 if (do_acct)
3285- nf_ct_acct_update(ct, ctinfo, skb->len);
3286+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3287 }
3288 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
3289
3290@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
3291 enum ip_conntrack_info ctinfo,
3292 const struct sk_buff *skb)
3293 {
3294- nf_ct_acct_update(ct, ctinfo, skb->len);
3295+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3296
3297 return nf_ct_delete(ct, 0, 0);
3298 }
3299diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
developer58aa0682023-09-18 14:02:26 +08003300index e219b6f..5cdc627 100644
developer8cb3ac72022-07-04 10:55:14 +08003301--- a/net/netfilter/nf_conntrack_proto_tcp.c
3302+++ b/net/netfilter/nf_conntrack_proto_tcp.c
developer58aa0682023-09-18 14:02:26 +08003303@@ -1463,6 +1463,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003304 tn->tcp_loose = nf_ct_tcp_loose;
3305 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
3306 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
3307+
3308+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3309+ tn->offload_timeout = 30 * HZ;
3310+#endif
3311 }
3312
3313 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
3314diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
developer58aa0682023-09-18 14:02:26 +08003315index e3a2d01..a1579d6 100644
developer8cb3ac72022-07-04 10:55:14 +08003316--- a/net/netfilter/nf_conntrack_proto_udp.c
3317+++ b/net/netfilter/nf_conntrack_proto_udp.c
3318@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
3319
3320 for (i = 0; i < UDP_CT_MAX; i++)
3321 un->timeouts[i] = udp_timeouts[i];
3322+
3323+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3324+ un->offload_timeout = 30 * HZ;
3325+#endif
3326 }
3327
3328 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
3329diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
developer58aa0682023-09-18 14:02:26 +08003330index 0b600b4..a2cfafa 100644
developer8cb3ac72022-07-04 10:55:14 +08003331--- a/net/netfilter/nf_conntrack_standalone.c
3332+++ b/net/netfilter/nf_conntrack_standalone.c
3333@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
3334 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
3335 goto release;
3336
3337- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3338+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
3339+ seq_puts(s, "[HW_OFFLOAD] ");
3340+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3341 seq_puts(s, "[OFFLOAD] ");
3342 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
3343 seq_puts(s, "[ASSURED] ");
3344@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
3345 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
3346 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
3347 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
3348+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3349+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
3350+#endif
3351 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
3352 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
3353 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
3354 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
3355 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
3356+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3357+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
3358+#endif
3359 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
3360 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
3361 #ifdef CONFIG_NF_CT_PROTO_SCTP
developer58aa0682023-09-18 14:02:26 +08003362@@ -811,6 +819,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003363 .mode = 0644,
3364 .proc_handler = proc_dointvec_jiffies,
3365 },
3366+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3367+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
3368+ .procname = "nf_flowtable_tcp_timeout",
3369+ .maxlen = sizeof(unsigned int),
3370+ .mode = 0644,
3371+ .proc_handler = proc_dointvec_jiffies,
3372+ },
3373+#endif
3374 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
3375 .procname = "nf_conntrack_tcp_loose",
3376 .maxlen = sizeof(int),
developer58aa0682023-09-18 14:02:26 +08003377@@ -845,6 +861,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003378 .mode = 0644,
3379 .proc_handler = proc_dointvec_jiffies,
3380 },
3381+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3382+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
3383+ .procname = "nf_flowtable_udp_timeout",
3384+ .maxlen = sizeof(unsigned int),
3385+ .mode = 0644,
3386+ .proc_handler = proc_dointvec_jiffies,
3387+ },
3388+#endif
3389 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
3390 .procname = "nf_conntrack_icmp_timeout",
3391 .maxlen = sizeof(unsigned int),
developer58aa0682023-09-18 14:02:26 +08003392@@ -1021,6 +1045,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
developer8cb3ac72022-07-04 10:55:14 +08003393 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
3394 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
3395 #undef XASSIGN
3396+
3397+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3398+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
3399+#endif
3400+
3401 }
3402
3403 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
developer58aa0682023-09-18 14:02:26 +08003404@@ -1107,6 +1136,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003405 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
3406 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
3407 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
3408+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3409+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
3410+#endif
3411
3412 nf_conntrack_standalone_init_tcp_sysctl(net, table);
3413 nf_conntrack_standalone_init_sctp_sysctl(net, table);
3414diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
developer58aa0682023-09-18 14:02:26 +08003415index f212cec..c3054af 100644
developer8cb3ac72022-07-04 10:55:14 +08003416--- a/net/netfilter/nf_flow_table_core.c
3417+++ b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003418@@ -7,43 +7,21 @@
developer8cb3ac72022-07-04 10:55:14 +08003419 #include <linux/netdevice.h>
3420 #include <net/ip.h>
3421 #include <net/ip6_route.h>
3422-#include <net/netfilter/nf_tables.h>
3423 #include <net/netfilter/nf_flow_table.h>
3424 #include <net/netfilter/nf_conntrack.h>
3425 #include <net/netfilter/nf_conntrack_core.h>
3426 #include <net/netfilter/nf_conntrack_l4proto.h>
3427 #include <net/netfilter/nf_conntrack_tuple.h>
3428
3429-struct flow_offload_entry {
3430- struct flow_offload flow;
3431- struct nf_conn *ct;
3432- struct rcu_head rcu_head;
3433-};
3434-
3435 static DEFINE_MUTEX(flowtable_lock);
3436 static LIST_HEAD(flowtables);
3437
developerb7c46752022-07-04 19:51:38 +08003438-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3439-{
3440- const struct rt6_info *rt;
3441-
3442- if (flow_tuple->l3proto == NFPROTO_IPV6) {
3443- rt = (const struct rt6_info *)flow_tuple->dst_cache;
3444- return rt6_get_cookie(rt);
3445- }
3446-
3447- return 0;
3448-}
3449-
developer8cb3ac72022-07-04 10:55:14 +08003450 static void
3451-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3452- struct nf_flow_route *route,
3453+flow_offload_fill_dir(struct flow_offload *flow,
3454 enum flow_offload_tuple_dir dir)
3455 {
3456 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
3457- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
3458- struct dst_entry *other_dst = route->tuple[!dir].dst;
3459- struct dst_entry *dst = route->tuple[dir].dst;
3460+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
3461
3462 ft->dir = dir;
3463
developerb7c46752022-07-04 19:51:38 +08003464@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003465 case NFPROTO_IPV4:
3466 ft->src_v4 = ctt->src.u3.in;
3467 ft->dst_v4 = ctt->dst.u3.in;
3468- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
3469 break;
3470 case NFPROTO_IPV6:
3471 ft->src_v6 = ctt->src.u3.in6;
3472 ft->dst_v6 = ctt->dst.u3.in6;
3473- ft->mtu = ip6_dst_mtu_forward(dst);
3474 break;
3475 }
3476
developerb7c46752022-07-04 19:51:38 +08003477@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003478 ft->l4proto = ctt->dst.protonum;
3479 ft->src_port = ctt->src.u.tcp.port;
3480 ft->dst_port = ctt->dst.u.tcp.port;
3481-
3482- ft->iifidx = other_dst->dev->ifindex;
3483- ft->dst_cache = dst;
developerb7c46752022-07-04 19:51:38 +08003484- ft->dst_cookie = flow_offload_dst_cookie(ft);
developer8cb3ac72022-07-04 10:55:14 +08003485 }
3486
3487-struct flow_offload *
3488-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
3489+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
3490 {
3491- struct flow_offload_entry *entry;
3492 struct flow_offload *flow;
3493
3494 if (unlikely(nf_ct_is_dying(ct) ||
3495 !atomic_inc_not_zero(&ct->ct_general.use)))
3496 return NULL;
3497
3498- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
3499- if (!entry)
3500+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
3501+ if (!flow)
3502 goto err_ct_refcnt;
3503
3504- flow = &entry->flow;
developerb7c46752022-07-04 19:51:38 +08003505-
developer8cb3ac72022-07-04 10:55:14 +08003506- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
3507- goto err_dst_cache_original;
developeree39bcf2023-06-16 08:03:30 +08003508-
developer7eb15dc2023-06-14 17:44:03 +08003509- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
3510- goto err_dst_cache_reply;
developeree39bcf2023-06-16 08:03:30 +08003511+ flow->ct = ct;
3512
developer8cb3ac72022-07-04 10:55:14 +08003513- entry->ct = ct;
3514-
3515- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3516- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
3517+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3518+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
3519
3520 if (ct->status & IPS_SRC_NAT)
3521- flow->flags |= FLOW_OFFLOAD_SNAT;
3522+ __set_bit(NF_FLOW_SNAT, &flow->flags);
3523 if (ct->status & IPS_DST_NAT)
3524- flow->flags |= FLOW_OFFLOAD_DNAT;
3525+ __set_bit(NF_FLOW_DNAT, &flow->flags);
3526
3527 return flow;
3528
3529-err_dst_cache_reply:
3530- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
3531-err_dst_cache_original:
3532- kfree(entry);
3533 err_ct_refcnt:
3534 nf_ct_put(ct);
3535
developeree39bcf2023-06-16 08:03:30 +08003536@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
developer8cb3ac72022-07-04 10:55:14 +08003537 }
3538 EXPORT_SYMBOL_GPL(flow_offload_alloc);
3539
3540-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3541+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3542 {
3543- tcp->state = TCP_CONNTRACK_ESTABLISHED;
3544- tcp->seen[0].td_maxwin = 0;
3545- tcp->seen[1].td_maxwin = 0;
3546+ const struct rt6_info *rt;
3547+
3548+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
3549+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
3550+ return rt6_get_cookie(rt);
3551+ }
3552+
3553+ return 0;
3554 }
3555
3556-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
3557-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
3558+static int flow_offload_fill_route(struct flow_offload *flow,
3559+ const struct nf_flow_route *route,
3560+ enum flow_offload_tuple_dir dir)
3561+{
3562+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
3563+ struct dst_entry *dst = route->tuple[dir].dst;
3564+ int i, j = 0;
developeree39bcf2023-06-16 08:03:30 +08003565+
developer8cb3ac72022-07-04 10:55:14 +08003566+ switch (flow_tuple->l3proto) {
3567+ case NFPROTO_IPV4:
3568+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
3569+ break;
3570+ case NFPROTO_IPV6:
3571+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
3572+ break;
3573+ }
3574+
3575+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
3576+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
3577+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
3578+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
3579+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
3580+ flow_tuple->in_vlan_ingress |= BIT(j);
3581+ j++;
3582+ }
3583+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
3584+
3585+ switch (route->tuple[dir].xmit_type) {
3586+ case FLOW_OFFLOAD_XMIT_DIRECT:
3587+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
3588+ ETH_ALEN);
3589+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
3590+ ETH_ALEN);
3591+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
3592+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
3593+ break;
3594+ case FLOW_OFFLOAD_XMIT_XFRM:
3595+ case FLOW_OFFLOAD_XMIT_NEIGH:
3596+ if (!dst_hold_safe(route->tuple[dir].dst))
3597+ return -1;
3598+
3599+ flow_tuple->dst_cache = dst;
3600+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
3601+ break;
3602+ default:
3603+ WARN_ON_ONCE(1);
3604+ break;
3605+ }
3606+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
developerb7c46752022-07-04 19:51:38 +08003607+
developer8cb3ac72022-07-04 10:55:14 +08003608+ return 0;
3609+}
3610+
3611+static void nft_flow_dst_release(struct flow_offload *flow,
3612+ enum flow_offload_tuple_dir dir)
developeree39bcf2023-06-16 08:03:30 +08003613+{
developer8cb3ac72022-07-04 10:55:14 +08003614+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3615+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
3616+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
developeree39bcf2023-06-16 08:03:30 +08003617+}
3618+
developer8cb3ac72022-07-04 10:55:14 +08003619+int flow_offload_route_init(struct flow_offload *flow,
3620+ const struct nf_flow_route *route)
developeree39bcf2023-06-16 08:03:30 +08003621+{
developer8cb3ac72022-07-04 10:55:14 +08003622+ int err;
developeree39bcf2023-06-16 08:03:30 +08003623+
developer8cb3ac72022-07-04 10:55:14 +08003624+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3625+ if (err < 0)
3626+ return err;
developeree39bcf2023-06-16 08:03:30 +08003627+
developer8cb3ac72022-07-04 10:55:14 +08003628+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
3629+ if (err < 0)
3630+ goto err_route_reply;
3631+
3632+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
developeree39bcf2023-06-16 08:03:30 +08003633+
developer8cb3ac72022-07-04 10:55:14 +08003634+ return 0;
3635+
3636+err_route_reply:
3637+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3638+
3639+ return err;
developeree39bcf2023-06-16 08:03:30 +08003640+}
developer8cb3ac72022-07-04 10:55:14 +08003641+EXPORT_SYMBOL_GPL(flow_offload_route_init);
developerb7c46752022-07-04 19:51:38 +08003642
developeree39bcf2023-06-16 08:03:30 +08003643-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
developer8cb3ac72022-07-04 10:55:14 +08003644+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3645 {
developeree39bcf2023-06-16 08:03:30 +08003646- return (__s32)(timeout - (u32)jiffies);
3647+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
developer8cb3ac72022-07-04 10:55:14 +08003648+ tcp->seen[0].td_maxwin = 0;
3649+ tcp->seen[1].td_maxwin = 0;
3650 }
3651
developeree39bcf2023-06-16 08:03:30 +08003652 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003653 {
developeree39bcf2023-06-16 08:03:30 +08003654- const struct nf_conntrack_l4proto *l4proto;
developer8cb3ac72022-07-04 10:55:14 +08003655+ struct net *net = nf_ct_net(ct);
developeree39bcf2023-06-16 08:03:30 +08003656 int l4num = nf_ct_protonum(ct);
3657- unsigned int timeout;
developer8cb3ac72022-07-04 10:55:14 +08003658+ s32 timeout;
developeree39bcf2023-06-16 08:03:30 +08003659
3660- l4proto = nf_ct_l4proto_find(l4num);
3661- if (!l4proto)
3662- return;
developer8cb3ac72022-07-04 10:55:14 +08003663+ if (l4num == IPPROTO_TCP) {
3664+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003665
3666- if (l4num == IPPROTO_TCP)
3667- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
3668- else if (l4num == IPPROTO_UDP)
3669- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
3670- else
3671+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
developer8cb3ac72022-07-04 10:55:14 +08003672+ timeout -= tn->offload_timeout;
3673+ } else if (l4num == IPPROTO_UDP) {
3674+ struct nf_udp_net *tn = nf_udp_pernet(net);
3675+
3676+ timeout = tn->timeouts[UDP_CT_REPLIED];
3677+ timeout -= tn->offload_timeout;
3678+ } else {
developeree39bcf2023-06-16 08:03:30 +08003679 return;
developer8cb3ac72022-07-04 10:55:14 +08003680+ }
3681+
3682+ if (timeout < 0)
3683+ timeout = 0;
developeree39bcf2023-06-16 08:03:30 +08003684
3685- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
3686- ct->timeout = nfct_time_stamp + timeout;
developer8cb3ac72022-07-04 10:55:14 +08003687+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
3688+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
3689 }
3690
developeree39bcf2023-06-16 08:03:30 +08003691 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
3692@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
3693 flow_offload_fixup_ct_timeout(ct);
3694 }
3695
developer8cb3ac72022-07-04 10:55:14 +08003696-void flow_offload_free(struct flow_offload *flow)
3697+static void flow_offload_route_release(struct flow_offload *flow)
3698 {
3699- struct flow_offload_entry *e;
3700+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3701+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
3702+}
3703
3704- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
3705- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
3706- e = container_of(flow, struct flow_offload_entry, flow);
3707- if (flow->flags & FLOW_OFFLOAD_DYING)
3708- nf_ct_delete(e->ct, 0, 0);
3709- nf_ct_put(e->ct);
3710- kfree_rcu(e, rcu_head);
3711+void flow_offload_free(struct flow_offload *flow)
3712+{
3713+ switch (flow->type) {
3714+ case NF_FLOW_OFFLOAD_ROUTE:
3715+ flow_offload_route_release(flow);
3716+ break;
3717+ default:
3718+ break;
3719+ }
3720+ nf_ct_put(flow->ct);
3721+ kfree_rcu(flow, rcu_head);
3722 }
3723 EXPORT_SYMBOL_GPL(flow_offload_free);
3724
developeree39bcf2023-06-16 08:03:30 +08003725@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
developer8cb3ac72022-07-04 10:55:14 +08003726 {
3727 const struct flow_offload_tuple *tuple = data;
3728
3729- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
3730+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3731 }
3732
3733 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
3734 {
3735 const struct flow_offload_tuple_rhash *tuplehash = data;
3736
3737- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
3738+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3739 }
3740
3741 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developeree39bcf2023-06-16 08:03:30 +08003742@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer8cb3ac72022-07-04 10:55:14 +08003743 const struct flow_offload_tuple *tuple = arg->key;
3744 const struct flow_offload_tuple_rhash *x = ptr;
3745
3746- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
3747+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
3748 return 1;
3749
3750 return 0;
developeree39bcf2023-06-16 08:03:30 +08003751@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
developer8cb3ac72022-07-04 10:55:14 +08003752 .automatic_shrinking = true,
3753 };
3754
3755-#define DAY (86400 * HZ)
3756-
3757-/* Set an arbitrary timeout large enough not to ever expire, this save
3758- * us a check for the IPS_OFFLOAD_BIT from the packet path via
3759- * nf_ct_is_expired().
3760- */
3761-static void nf_ct_offload_timeout(struct flow_offload *flow)
3762+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
3763 {
3764- struct flow_offload_entry *entry;
3765- struct nf_conn *ct;
3766+ unsigned long timeout = NF_FLOW_TIMEOUT;
3767+ struct net *net = nf_ct_net(flow->ct);
3768+ int l4num = nf_ct_protonum(flow->ct);
developeree39bcf2023-06-16 08:03:30 +08003769
3770- entry = container_of(flow, struct flow_offload_entry, flow);
3771- ct = entry->ct;
developerb7c46752022-07-04 19:51:38 +08003772+ if (l4num == IPPROTO_TCP) {
3773+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003774
3775- if (nf_ct_expires(ct) < DAY / 2)
3776- ct->timeout = nfct_time_stamp + DAY;
developer8cb3ac72022-07-04 10:55:14 +08003777+ timeout = tn->offload_timeout;
3778+ } else if (l4num == IPPROTO_UDP) {
3779+ struct nf_udp_net *tn = nf_udp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003780+
developer8cb3ac72022-07-04 10:55:14 +08003781+ timeout = tn->offload_timeout;
3782+ }
developeree39bcf2023-06-16 08:03:30 +08003783+
developer8cb3ac72022-07-04 10:55:14 +08003784+ return timeout;
3785 }
3786
3787 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3788 {
3789 int err;
3790
3791- nf_ct_offload_timeout(flow);
3792- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
3793+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3794
3795 err = rhashtable_insert_fast(&flow_table->rhashtable,
3796 &flow->tuplehash[0].node,
developeree39bcf2023-06-16 08:03:30 +08003797@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003798 return err;
3799 }
3800
3801+ nf_ct_offload_timeout(flow->ct);
3802+
3803+ if (nf_flowtable_hw_offload(flow_table)) {
3804+ __set_bit(NF_FLOW_HW, &flow->flags);
3805+ nf_flow_offload_add(flow_table, flow);
3806+ }
3807+
3808 return 0;
3809 }
3810 EXPORT_SYMBOL_GPL(flow_offload_add);
3811
3812+void flow_offload_refresh(struct nf_flowtable *flow_table,
3813+ struct flow_offload *flow)
3814+{
3815+ u32 timeout;
3816+
3817+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3818+ if (timeout - READ_ONCE(flow->timeout) > HZ)
3819+ WRITE_ONCE(flow->timeout, timeout);
3820+ else
3821+ return;
3822+
3823+ if (likely(!nf_flowtable_hw_offload(flow_table)))
3824+ return;
3825+
3826+ nf_flow_offload_add(flow_table, flow);
3827+}
3828+EXPORT_SYMBOL_GPL(flow_offload_refresh);
3829+
3830 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3831 {
3832 return nf_flow_timeout_delta(flow->timeout) <= 0;
developeree39bcf2023-06-16 08:03:30 +08003833@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003834 static void flow_offload_del(struct nf_flowtable *flow_table,
3835 struct flow_offload *flow)
3836 {
3837- struct flow_offload_entry *e;
3838-
3839 rhashtable_remove_fast(&flow_table->rhashtable,
3840 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
3841 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003842@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003843 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
3844 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003845
developer8cb3ac72022-07-04 10:55:14 +08003846- e = container_of(flow, struct flow_offload_entry, flow);
3847- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
developeree39bcf2023-06-16 08:03:30 +08003848+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
3849
3850 if (nf_flow_has_expired(flow))
developer8cb3ac72022-07-04 10:55:14 +08003851- flow_offload_fixup_ct(e->ct);
3852- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
3853- flow_offload_fixup_ct_timeout(e->ct);
3854-
3855- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
3856- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003857+ flow_offload_fixup_ct(flow->ct);
3858+ else
3859+ flow_offload_fixup_ct_timeout(flow->ct);
3860
developer8cb3ac72022-07-04 10:55:14 +08003861 flow_offload_free(flow);
3862 }
3863
3864 void flow_offload_teardown(struct flow_offload *flow)
3865 {
3866- struct flow_offload_entry *e;
developerb7c46752022-07-04 19:51:38 +08003867-
3868- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
developeree39bcf2023-06-16 08:03:30 +08003869+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3870
developer8cb3ac72022-07-04 10:55:14 +08003871- e = container_of(flow, struct flow_offload_entry, flow);
3872- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003873+ flow_offload_fixup_ct_state(flow->ct);
developer8cb3ac72022-07-04 10:55:14 +08003874 }
3875 EXPORT_SYMBOL_GPL(flow_offload_teardown);
3876
developeree39bcf2023-06-16 08:03:30 +08003877@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003878 {
3879 struct flow_offload_tuple_rhash *tuplehash;
3880 struct flow_offload *flow;
3881- struct flow_offload_entry *e;
3882 int dir;
3883
3884 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
developeree39bcf2023-06-16 08:03:30 +08003885@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003886
3887 dir = tuplehash->tuple.dir;
3888 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
3889- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
3890+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
3891 return NULL;
3892
3893- e = container_of(flow, struct flow_offload_entry, flow);
3894- if (unlikely(nf_ct_is_dying(e->ct)))
3895+ if (unlikely(nf_ct_is_dying(flow->ct)))
3896 return NULL;
3897
3898 return tuplehash;
3899 }
3900 EXPORT_SYMBOL_GPL(flow_offload_lookup);
3901
3902-static int
3903-nf_flow_table_iterate(struct nf_flowtable *flow_table,
3904+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3905 void (*iter)(struct flow_offload *flow, void *data),
3906 void *data)
3907 {
developeree39bcf2023-06-16 08:03:30 +08003908@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003909 rhashtable_walk_start(&hti);
3910
3911 while ((tuplehash = rhashtable_walk_next(&hti))) {
3912-
3913 if (IS_ERR(tuplehash)) {
3914 if (PTR_ERR(tuplehash) != -EAGAIN) {
3915 err = PTR_ERR(tuplehash);
developeree39bcf2023-06-16 08:03:30 +08003916@@ -359,23 +430,52 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003917
3918 return err;
3919 }
3920+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
3921
developeree39bcf2023-06-16 08:03:30 +08003922-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3923+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
developer8cb3ac72022-07-04 10:55:14 +08003924 {
developeree39bcf2023-06-16 08:03:30 +08003925- struct nf_flowtable *flow_table = data;
developer8cb3ac72022-07-04 10:55:14 +08003926- struct flow_offload_entry *e;
3927- bool teardown;
developeree39bcf2023-06-16 08:03:30 +08003928+ struct dst_entry *dst;
developer8cb3ac72022-07-04 10:55:14 +08003929
3930- e = container_of(flow, struct flow_offload_entry, flow);
developeree39bcf2023-06-16 08:03:30 +08003931+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3932+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
3933+ dst = tuple->dst_cache;
3934+ if (!dst_check(dst, tuple->dst_cookie))
3935+ return true;
3936+ }
3937
developer8cb3ac72022-07-04 10:55:14 +08003938- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
3939- FLOW_OFFLOAD_TEARDOWN);
developeree39bcf2023-06-16 08:03:30 +08003940+ return false;
3941+}
3942
developer8cb3ac72022-07-04 10:55:14 +08003943- if (!teardown)
3944- nf_ct_offload_timeout(flow);
developeree39bcf2023-06-16 08:03:30 +08003945+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
3946+{
3947+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
3948+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
3949+}
developer8cb3ac72022-07-04 10:55:14 +08003950
3951- if (nf_flow_has_expired(flow) || teardown)
3952- flow_offload_del(flow_table, flow);
developeree39bcf2023-06-16 08:03:30 +08003953+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3954+{
3955+ struct nf_flowtable *flow_table = data;
3956+
3957+ if (nf_flow_has_expired(flow) ||
3958+ nf_ct_is_dying(flow->ct) ||
3959+ nf_flow_has_stale_dst(flow))
3960+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3961+
developer8cb3ac72022-07-04 10:55:14 +08003962+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
3963+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003964+ if (!test_and_set_bit(NF_FLOW_HW_ACCT_DYING, &flow->flags))
3965+ nf_flow_offload_stats(flow_table, flow, true);
3966+
developer8cb3ac72022-07-04 10:55:14 +08003967+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
3968+ nf_flow_offload_del(flow_table, flow);
3969+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
3970+ flow_offload_del(flow_table, flow);
3971+ } else {
3972+ flow_offload_del(flow_table, flow);
3973+ }
3974+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003975+ nf_flow_offload_stats(flow_table, flow, false);
developer8cb3ac72022-07-04 10:55:14 +08003976+ }
3977 }
3978
3979 static void nf_flow_offload_work_gc(struct work_struct *work)
developer58aa0682023-09-18 14:02:26 +08003980@@ -387,30 +487,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
developer8cb3ac72022-07-04 10:55:14 +08003981 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
3982 }
3983
3984-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3985- __be16 port, __be16 new_port)
3986+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3987+ __be16 port, __be16 new_port)
3988 {
3989 struct tcphdr *tcph;
3990
3991- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
3992- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
3993- return -1;
3994-
3995 tcph = (void *)(skb_network_header(skb) + thoff);
3996 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
3997-
3998- return 0;
3999 }
4000
4001-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
4002- __be16 port, __be16 new_port)
4003+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
4004+ __be16 port, __be16 new_port)
4005 {
4006 struct udphdr *udph;
4007
4008- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4009- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4010- return -1;
4011-
4012 udph = (void *)(skb_network_header(skb) + thoff);
4013 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4014 inet_proto_csum_replace2(&udph->check, skb, port,
developer58aa0682023-09-18 14:02:26 +08004015@@ -418,38 +508,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004016 if (!udph->check)
4017 udph->check = CSUM_MANGLED_0;
4018 }
4019-
4020- return 0;
4021 }
4022
4023-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4024- u8 protocol, __be16 port, __be16 new_port)
4025+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4026+ u8 protocol, __be16 port, __be16 new_port)
4027 {
4028 switch (protocol) {
4029 case IPPROTO_TCP:
4030- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
4031- return NF_DROP;
4032+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
4033 break;
4034 case IPPROTO_UDP:
4035- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
4036- return NF_DROP;
4037+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
4038 break;
4039 }
4040-
4041- return 0;
4042 }
4043
4044-int nf_flow_snat_port(const struct flow_offload *flow,
4045- struct sk_buff *skb, unsigned int thoff,
4046- u8 protocol, enum flow_offload_tuple_dir dir)
4047+void nf_flow_snat_port(const struct flow_offload *flow,
4048+ struct sk_buff *skb, unsigned int thoff,
4049+ u8 protocol, enum flow_offload_tuple_dir dir)
4050 {
4051 struct flow_ports *hdr;
4052 __be16 port, new_port;
4053
4054- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4055- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4056- return -1;
4057-
4058 hdr = (void *)(skb_network_header(skb) + thoff);
4059
4060 switch (dir) {
developer58aa0682023-09-18 14:02:26 +08004061@@ -463,25 +543,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004062 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
4063 hdr->dest = new_port;
4064 break;
4065- default:
4066- return -1;
4067 }
4068
4069- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4070+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4071 }
4072 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
4073
4074-int nf_flow_dnat_port(const struct flow_offload *flow,
4075- struct sk_buff *skb, unsigned int thoff,
4076- u8 protocol, enum flow_offload_tuple_dir dir)
4077+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
4078+ unsigned int thoff, u8 protocol,
4079+ enum flow_offload_tuple_dir dir)
4080 {
4081 struct flow_ports *hdr;
4082 __be16 port, new_port;
4083
4084- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4085- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4086- return -1;
4087-
4088 hdr = (void *)(skb_network_header(skb) + thoff);
4089
4090 switch (dir) {
developer58aa0682023-09-18 14:02:26 +08004091@@ -495,11 +569,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004092 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
4093 hdr->source = new_port;
4094 break;
4095- default:
4096- return -1;
4097 }
4098
4099- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4100+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4101 }
4102 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
4103
developer58aa0682023-09-18 14:02:26 +08004104@@ -507,7 +579,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
developer8cb3ac72022-07-04 10:55:14 +08004105 {
4106 int err;
4107
4108- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4109+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4110+ flow_block_init(&flowtable->flow_block);
4111+ init_rwsem(&flowtable->flow_block_lock);
4112
4113 err = rhashtable_init(&flowtable->rhashtable,
4114 &nf_flow_offload_rhash_params);
developer58aa0682023-09-18 14:02:26 +08004115@@ -528,25 +602,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
developer8cb3ac72022-07-04 10:55:14 +08004116 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
4117 {
4118 struct net_device *dev = data;
4119- struct flow_offload_entry *e;
4120-
4121- e = container_of(flow, struct flow_offload_entry, flow);
4122
4123 if (!dev) {
4124 flow_offload_teardown(flow);
4125 return;
4126 }
4127- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
4128+
4129+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
4130 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
4131 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
4132- flow_offload_dead(flow);
4133+ flow_offload_teardown(flow);
4134 }
4135
4136-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
4137- struct net_device *dev)
4138+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
4139+ struct net_device *dev)
4140 {
4141 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
4142 flush_delayed_work(&flowtable->gc_work);
4143+ nf_flow_table_offload_flush(flowtable);
4144 }
4145
4146 void nf_flow_table_cleanup(struct net_device *dev)
developer58aa0682023-09-18 14:02:26 +08004147@@ -555,7 +628,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004148
4149 mutex_lock(&flowtable_lock);
4150 list_for_each_entry(flowtable, &flowtables, list)
4151- nf_flow_table_iterate_cleanup(flowtable, dev);
4152+ nf_flow_table_gc_cleanup(flowtable, dev);
4153 mutex_unlock(&flowtable_lock);
4154 }
4155 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
developer58aa0682023-09-18 14:02:26 +08004156@@ -565,9 +638,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
developer8cb3ac72022-07-04 10:55:14 +08004157 mutex_lock(&flowtable_lock);
4158 list_del(&flow_table->list);
4159 mutex_unlock(&flowtable_lock);
4160+
4161 cancel_delayed_work_sync(&flow_table->gc_work);
4162 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
4163 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
4164+ nf_flow_table_offload_flush(flow_table);
4165+ if (nf_flowtable_hw_offload(flow_table))
4166+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
4167+ flow_table);
4168 rhashtable_destroy(&flow_table->rhashtable);
4169 }
4170 EXPORT_SYMBOL_GPL(nf_flow_table_free);
developer58aa0682023-09-18 14:02:26 +08004171@@ -591,12 +669,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
developer8cb3ac72022-07-04 10:55:14 +08004172
4173 static int __init nf_flow_table_module_init(void)
4174 {
4175- return register_netdevice_notifier(&flow_offload_netdev_notifier);
4176+ int ret;
4177+
4178+ ret = nf_flow_table_offload_init();
4179+ if (ret)
4180+ return ret;
4181+
4182+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
4183+ if (ret)
4184+ nf_flow_table_offload_exit();
4185+
4186+ return ret;
4187 }
4188
4189 static void __exit nf_flow_table_module_exit(void)
4190 {
4191 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
4192+ nf_flow_table_offload_exit();
4193 }
4194
4195 module_init(nf_flow_table_module_init);
developer58aa0682023-09-18 14:02:26 +08004196@@ -604,3 +693,4 @@ module_exit(nf_flow_table_module_exit);
developer8cb3ac72022-07-04 10:55:14 +08004197
4198 MODULE_LICENSE("GPL");
4199 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
4200+MODULE_DESCRIPTION("Netfilter flow table module");
4201diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
developer58aa0682023-09-18 14:02:26 +08004202index 397129b..6257d87 100644
developer8cb3ac72022-07-04 10:55:14 +08004203--- a/net/netfilter/nf_flow_table_ip.c
4204+++ b/net/netfilter/nf_flow_table_ip.c
4205@@ -7,11 +7,13 @@
4206 #include <linux/ip.h>
4207 #include <linux/ipv6.h>
4208 #include <linux/netdevice.h>
4209+#include <linux/if_ether.h>
4210 #include <net/ip.h>
4211 #include <net/ipv6.h>
4212 #include <net/ip6_route.h>
4213 #include <net/neighbour.h>
4214 #include <net/netfilter/nf_flow_table.h>
4215+#include <net/netfilter/nf_conntrack_acct.h>
4216 /* For layer 4 checksum field offset. */
4217 #include <linux/tcp.h>
4218 #include <linux/udp.h>
4219@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4220 if (proto != IPPROTO_TCP)
4221 return 0;
4222
4223- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
4224- return -1;
4225-
4226 tcph = (void *)(skb_network_header(skb) + thoff);
4227 if (unlikely(tcph->fin || tcph->rst)) {
4228 flow_offload_teardown(flow);
4229@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4230 return 0;
4231 }
4232
4233-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4234- __be32 addr, __be32 new_addr)
4235+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4236+ __be32 addr, __be32 new_addr)
4237 {
4238 struct tcphdr *tcph;
4239
4240- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4241- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4242- return -1;
4243-
4244 tcph = (void *)(skb_network_header(skb) + thoff);
4245 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
4246-
4247- return 0;
4248 }
4249
4250-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4251- __be32 addr, __be32 new_addr)
4252+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4253+ __be32 addr, __be32 new_addr)
4254 {
4255 struct udphdr *udph;
4256
4257- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4258- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4259- return -1;
4260-
4261 udph = (void *)(skb_network_header(skb) + thoff);
4262 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4263 inet_proto_csum_replace4(&udph->check, skb, addr,
4264@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4265 if (!udph->check)
4266 udph->check = CSUM_MANGLED_0;
4267 }
4268-
4269- return 0;
4270 }
4271
4272-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4273- unsigned int thoff, __be32 addr,
4274- __be32 new_addr)
4275+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4276+ unsigned int thoff, __be32 addr,
4277+ __be32 new_addr)
4278 {
4279 switch (iph->protocol) {
4280 case IPPROTO_TCP:
4281- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
4282- return NF_DROP;
4283+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
4284 break;
4285 case IPPROTO_UDP:
4286- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
4287- return NF_DROP;
4288+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
4289 break;
4290 }
4291-
4292- return 0;
4293 }
4294
4295-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4296- struct iphdr *iph, unsigned int thoff,
4297- enum flow_offload_tuple_dir dir)
4298+static void nf_flow_snat_ip(const struct flow_offload *flow,
4299+ struct sk_buff *skb, struct iphdr *iph,
4300+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4301 {
4302 __be32 addr, new_addr;
4303
4304@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4305 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
4306 iph->daddr = new_addr;
4307 break;
4308- default:
4309- return -1;
4310 }
4311 csum_replace4(&iph->check, addr, new_addr);
4312
4313- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4314+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4315 }
4316
4317-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4318- struct iphdr *iph, unsigned int thoff,
4319- enum flow_offload_tuple_dir dir)
4320+static void nf_flow_dnat_ip(const struct flow_offload *flow,
4321+ struct sk_buff *skb, struct iphdr *iph,
4322+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4323 {
4324 __be32 addr, new_addr;
4325
4326@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4327 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
4328 iph->saddr = new_addr;
4329 break;
4330- default:
4331- return -1;
4332 }
4333 csum_replace4(&iph->check, addr, new_addr);
4334
4335- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4336+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4337 }
4338
4339-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4340- unsigned int thoff, enum flow_offload_tuple_dir dir)
4341+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4342+ unsigned int thoff, enum flow_offload_tuple_dir dir,
4343+ struct iphdr *iph)
4344 {
4345- struct iphdr *iph = ip_hdr(skb);
4346-
4347- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4348- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4349- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
4350- return -1;
4351- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4352- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4353- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
4354- return -1;
4355-
4356- return 0;
4357+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4358+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
4359+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
4360+ }
4361+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4362+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
4363+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
4364+ }
4365 }
4366
4367 static bool ip_has_options(unsigned int thoff)
4368@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
4369 return thoff != sizeof(struct iphdr);
4370 }
4371
4372+static void nf_flow_tuple_encap(struct sk_buff *skb,
4373+ struct flow_offload_tuple *tuple)
4374+{
4375+ struct vlan_ethhdr *veth;
4376+ struct pppoe_hdr *phdr;
4377+ int i = 0;
4378+
4379+ if (skb_vlan_tag_present(skb)) {
4380+ tuple->encap[i].id = skb_vlan_tag_get(skb);
4381+ tuple->encap[i].proto = skb->vlan_proto;
4382+ i++;
4383+ }
4384+ switch (skb->protocol) {
4385+ case htons(ETH_P_8021Q):
4386+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4387+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
4388+ tuple->encap[i].proto = skb->protocol;
4389+ break;
4390+ case htons(ETH_P_PPP_SES):
4391+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
4392+ tuple->encap[i].id = ntohs(phdr->sid);
4393+ tuple->encap[i].proto = skb->protocol;
4394+ break;
4395+ }
4396+}
4397+
4398 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4399- struct flow_offload_tuple *tuple)
4400+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4401+ u32 offset)
4402 {
4403 struct flow_ports *ports;
4404 unsigned int thoff;
4405 struct iphdr *iph;
4406
4407- if (!pskb_may_pull(skb, sizeof(*iph)))
4408+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
4409 return -1;
4410
4411- iph = ip_hdr(skb);
4412- thoff = iph->ihl * 4;
4413+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4414+ thoff = (iph->ihl * 4);
4415
4416 if (ip_is_fragment(iph) ||
4417 unlikely(ip_has_options(thoff)))
4418 return -1;
4419
4420- if (iph->protocol != IPPROTO_TCP &&
4421- iph->protocol != IPPROTO_UDP)
4422+ thoff += offset;
4423+
4424+ switch (iph->protocol) {
4425+ case IPPROTO_TCP:
4426+ *hdrsize = sizeof(struct tcphdr);
4427+ break;
4428+ case IPPROTO_UDP:
4429+ *hdrsize = sizeof(struct udphdr);
4430+ break;
4431+ default:
4432 return -1;
4433+ }
4434
4435 if (iph->ttl <= 1)
4436 return -1;
4437
4438- thoff = iph->ihl * 4;
4439- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4440+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4441 return -1;
4442
4443- iph = ip_hdr(skb);
4444+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4445 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4446
4447 tuple->src_v4.s_addr = iph->saddr;
4448@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4449 tuple->l3proto = AF_INET;
4450 tuple->l4proto = iph->protocol;
4451 tuple->iifidx = dev->ifindex;
4452+ nf_flow_tuple_encap(skb, tuple);
4453
4454 return 0;
4455 }
developeree39bcf2023-06-16 08:03:30 +08004456@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004457 return NF_STOLEN;
4458 }
4459
4460+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
4461+ u32 *offset)
4462+{
4463+ struct vlan_ethhdr *veth;
4464+
4465+ switch (skb->protocol) {
4466+ case htons(ETH_P_8021Q):
4467+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4468+ if (veth->h_vlan_encapsulated_proto == proto) {
4469+ *offset += VLAN_HLEN;
4470+ return true;
4471+ }
4472+ break;
4473+ case htons(ETH_P_PPP_SES):
4474+ if (nf_flow_pppoe_proto(skb) == proto) {
4475+ *offset += PPPOE_SES_HLEN;
4476+ return true;
4477+ }
4478+ break;
4479+ }
4480+
4481+ return false;
4482+}
4483+
4484+static void nf_flow_encap_pop(struct sk_buff *skb,
4485+ struct flow_offload_tuple_rhash *tuplehash)
4486+{
4487+ struct vlan_hdr *vlan_hdr;
4488+ int i;
4489+
4490+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
4491+ if (skb_vlan_tag_present(skb)) {
4492+ __vlan_hwaccel_clear_tag(skb);
4493+ continue;
4494+ }
4495+ switch (skb->protocol) {
4496+ case htons(ETH_P_8021Q):
4497+ vlan_hdr = (struct vlan_hdr *)skb->data;
4498+ __skb_pull(skb, VLAN_HLEN);
4499+ vlan_set_encap_proto(skb, vlan_hdr);
4500+ skb_reset_network_header(skb);
4501+ break;
4502+ case htons(ETH_P_PPP_SES):
4503+ skb->protocol = nf_flow_pppoe_proto(skb);
4504+ skb_pull(skb, PPPOE_SES_HLEN);
4505+ skb_reset_network_header(skb);
4506+ break;
4507+ }
4508+ }
4509+}
4510+
4511+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
4512+ const struct flow_offload_tuple_rhash *tuplehash,
4513+ unsigned short type)
4514+{
4515+ struct net_device *outdev;
4516+
4517+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
4518+ if (!outdev)
4519+ return NF_DROP;
4520+
4521+ skb->dev = outdev;
4522+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
4523+ tuplehash->tuple.out.h_source, skb->len);
4524+ dev_queue_xmit(skb);
4525+
4526+ return NF_STOLEN;
4527+}
4528+
4529 unsigned int
4530 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4531 const struct nf_hook_state *state)
developeree39bcf2023-06-16 08:03:30 +08004532@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004533 enum flow_offload_tuple_dir dir;
4534 struct flow_offload *flow;
4535 struct net_device *outdev;
4536+ u32 hdrsize, offset = 0;
4537+ unsigned int thoff, mtu;
4538 struct rtable *rt;
4539- unsigned int thoff;
4540 struct iphdr *iph;
4541 __be32 nexthop;
4542+ int ret;
4543
4544- if (skb->protocol != htons(ETH_P_IP))
4545+ if (skb->protocol != htons(ETH_P_IP) &&
4546+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
4547 return NF_ACCEPT;
4548
4549- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
4550+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
4551 return NF_ACCEPT;
4552
4553 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004554@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004555
4556 dir = tuplehash->tuple.dir;
4557 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4558- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
4559- outdev = rt->dst.dev;
developeree39bcf2023-06-16 08:03:30 +08004560-
developer8cb3ac72022-07-04 10:55:14 +08004561- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developeree39bcf2023-06-16 08:03:30 +08004562- return NF_ACCEPT;
developerb7c46752022-07-04 19:51:38 +08004563
developer8cb3ac72022-07-04 10:55:14 +08004564- if (skb_try_make_writable(skb, sizeof(*iph)))
4565- return NF_DROP;
developerb7c46752022-07-04 19:51:38 +08004566-
developer8cb3ac72022-07-04 10:55:14 +08004567- thoff = ip_hdr(skb)->ihl * 4;
4568- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
developeree39bcf2023-06-16 08:03:30 +08004569+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4570+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004571 return NF_ACCEPT;
developer7eb15dc2023-06-14 17:44:03 +08004572
4573- if (!dst_check(&rt->dst, 0)) {
developeree39bcf2023-06-16 08:03:30 +08004574- flow_offload_teardown(flow);
4575+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4576+ thoff = (iph->ihl * 4) + offset;
4577+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
developer7eb15dc2023-06-14 17:44:03 +08004578 return NF_ACCEPT;
developeree39bcf2023-06-16 08:03:30 +08004579- }
developer8cb3ac72022-07-04 10:55:14 +08004580
4581- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
4582+ if (skb_try_make_writable(skb, thoff + hdrsize))
4583 return NF_DROP;
4584
4585- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4586+ flow_offload_refresh(flow_table, flow);
4587+
4588+ nf_flow_encap_pop(skb, tuplehash);
4589+ thoff -= offset;
4590+
4591 iph = ip_hdr(skb);
4592+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
4593+
4594 ip_decrease_ttl(iph);
4595 skb->tstamp = 0;
4596
4597- if (unlikely(dst_xfrm(&rt->dst))) {
4598+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4599+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4600+
4601+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4602+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4603 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
4604 IPCB(skb)->iif = skb->dev->ifindex;
4605 IPCB(skb)->flags = IPSKB_FORWARDED;
4606 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4607 }
4608
4609- skb->dev = outdev;
4610- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4611- skb_dst_set_noref(skb, &rt->dst);
4612- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4613+ switch (tuplehash->tuple.xmit_type) {
4614+ case FLOW_OFFLOAD_XMIT_NEIGH:
4615+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4616+ outdev = rt->dst.dev;
4617+ skb->dev = outdev;
4618+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4619+ skb_dst_set_noref(skb, &rt->dst);
4620+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4621+ ret = NF_STOLEN;
4622+ break;
4623+ case FLOW_OFFLOAD_XMIT_DIRECT:
4624+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
4625+ if (ret == NF_DROP)
4626+ flow_offload_teardown(flow);
4627+ break;
4628+ }
4629
4630- return NF_STOLEN;
4631+ return ret;
4632 }
4633 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
4634
4635-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4636- struct in6_addr *addr,
4637- struct in6_addr *new_addr)
4638+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4639+ struct in6_addr *addr,
4640+ struct in6_addr *new_addr,
4641+ struct ipv6hdr *ip6h)
4642 {
4643 struct tcphdr *tcph;
4644
4645- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4646- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4647- return -1;
4648-
4649 tcph = (void *)(skb_network_header(skb) + thoff);
4650 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
4651 new_addr->s6_addr32, true);
4652-
4653- return 0;
4654 }
4655
4656-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4657- struct in6_addr *addr,
4658- struct in6_addr *new_addr)
4659+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4660+ struct in6_addr *addr,
4661+ struct in6_addr *new_addr)
4662 {
4663 struct udphdr *udph;
4664
4665- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4666- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4667- return -1;
4668-
4669 udph = (void *)(skb_network_header(skb) + thoff);
4670 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4671 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
developeree39bcf2023-06-16 08:03:30 +08004672@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004673 if (!udph->check)
4674 udph->check = CSUM_MANGLED_0;
4675 }
4676-
4677- return 0;
4678 }
4679
4680-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4681- unsigned int thoff, struct in6_addr *addr,
4682- struct in6_addr *new_addr)
4683+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4684+ unsigned int thoff, struct in6_addr *addr,
4685+ struct in6_addr *new_addr)
4686 {
4687 switch (ip6h->nexthdr) {
4688 case IPPROTO_TCP:
4689- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
4690- return NF_DROP;
4691+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
4692 break;
4693 case IPPROTO_UDP:
4694- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
4695- return NF_DROP;
4696+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
4697 break;
4698 }
4699-
4700- return 0;
4701 }
4702
4703-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4704- struct sk_buff *skb, struct ipv6hdr *ip6h,
4705- unsigned int thoff,
4706- enum flow_offload_tuple_dir dir)
4707+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
4708+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4709+ unsigned int thoff,
4710+ enum flow_offload_tuple_dir dir)
4711 {
4712 struct in6_addr addr, new_addr;
4713
developeree39bcf2023-06-16 08:03:30 +08004714@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004715 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
4716 ip6h->daddr = new_addr;
4717 break;
4718- default:
4719- return -1;
4720 }
4721
4722- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4723+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4724 }
4725
4726-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4727- struct sk_buff *skb, struct ipv6hdr *ip6h,
4728- unsigned int thoff,
4729- enum flow_offload_tuple_dir dir)
4730+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
4731+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4732+ unsigned int thoff,
4733+ enum flow_offload_tuple_dir dir)
4734 {
4735 struct in6_addr addr, new_addr;
4736
developeree39bcf2023-06-16 08:03:30 +08004737@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004738 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
4739 ip6h->saddr = new_addr;
4740 break;
4741- default:
4742- return -1;
4743 }
4744
4745- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4746+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4747 }
4748
4749-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
4750- struct sk_buff *skb,
4751- enum flow_offload_tuple_dir dir)
4752+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
4753+ struct sk_buff *skb,
4754+ enum flow_offload_tuple_dir dir,
4755+ struct ipv6hdr *ip6h)
4756 {
4757- struct ipv6hdr *ip6h = ipv6_hdr(skb);
4758 unsigned int thoff = sizeof(*ip6h);
4759
4760- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4761- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4762- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4763- return -1;
4764- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4765- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4766- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4767- return -1;
4768-
4769- return 0;
4770+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4771+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4772+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
4773+ }
4774+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4775+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4776+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
4777+ }
4778 }
4779
4780 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4781- struct flow_offload_tuple *tuple)
4782+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4783+ u32 offset)
4784 {
4785 struct flow_ports *ports;
4786 struct ipv6hdr *ip6h;
4787 unsigned int thoff;
4788
4789- if (!pskb_may_pull(skb, sizeof(*ip6h)))
4790+ thoff = sizeof(*ip6h) + offset;
4791+ if (!pskb_may_pull(skb, thoff))
4792 return -1;
4793
4794- ip6h = ipv6_hdr(skb);
4795+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4796
4797- if (ip6h->nexthdr != IPPROTO_TCP &&
4798- ip6h->nexthdr != IPPROTO_UDP)
4799+ switch (ip6h->nexthdr) {
4800+ case IPPROTO_TCP:
4801+ *hdrsize = sizeof(struct tcphdr);
4802+ break;
4803+ case IPPROTO_UDP:
4804+ *hdrsize = sizeof(struct udphdr);
4805+ break;
4806+ default:
4807 return -1;
4808+ }
4809
4810 if (ip6h->hop_limit <= 1)
4811 return -1;
4812
4813- thoff = sizeof(*ip6h);
4814- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4815+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4816 return -1;
4817
4818- ip6h = ipv6_hdr(skb);
4819+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4820 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4821
4822 tuple->src_v6 = ip6h->saddr;
developeree39bcf2023-06-16 08:03:30 +08004823@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08004824 tuple->l3proto = AF_INET6;
4825 tuple->l4proto = ip6h->nexthdr;
4826 tuple->iifidx = dev->ifindex;
4827+ nf_flow_tuple_encap(skb, tuple);
4828
4829 return 0;
4830 }
developeree39bcf2023-06-16 08:03:30 +08004831@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004832 const struct in6_addr *nexthop;
4833 struct flow_offload *flow;
4834 struct net_device *outdev;
4835+ unsigned int thoff, mtu;
4836+ u32 hdrsize, offset = 0;
4837 struct ipv6hdr *ip6h;
4838 struct rt6_info *rt;
4839+ int ret;
4840
4841- if (skb->protocol != htons(ETH_P_IPV6))
4842+ if (skb->protocol != htons(ETH_P_IPV6) &&
4843+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
4844 return NF_ACCEPT;
4845
4846- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
4847+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
4848 return NF_ACCEPT;
4849
4850 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004851@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004852
4853 dir = tuplehash->tuple.dir;
4854 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4855- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
4856- outdev = rt->dst.dev;
developer8cb3ac72022-07-04 10:55:14 +08004857
developerb7c46752022-07-04 19:51:38 +08004858- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004859+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4860+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4861 return NF_ACCEPT;
4862
developerb7c46752022-07-04 19:51:38 +08004863- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
4864- sizeof(*ip6h)))
developer8cb3ac72022-07-04 10:55:14 +08004865+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4866+ thoff = sizeof(*ip6h) + offset;
4867+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
4868 return NF_ACCEPT;
developer8cb3ac72022-07-04 10:55:14 +08004869
developerb7c46752022-07-04 19:51:38 +08004870- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
developeree39bcf2023-06-16 08:03:30 +08004871- flow_offload_teardown(flow);
4872- return NF_ACCEPT;
4873- }
4874-
developer8cb3ac72022-07-04 10:55:14 +08004875- if (skb_try_make_writable(skb, sizeof(*ip6h)))
4876+ if (skb_try_make_writable(skb, thoff + hdrsize))
4877 return NF_DROP;
4878
4879- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
4880- return NF_DROP;
4881+ flow_offload_refresh(flow_table, flow);
4882+
4883+ nf_flow_encap_pop(skb, tuplehash);
4884
4885- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4886 ip6h = ipv6_hdr(skb);
4887+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
4888+
4889 ip6h->hop_limit--;
4890 skb->tstamp = 0;
4891
4892- if (unlikely(dst_xfrm(&rt->dst))) {
4893+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4894+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4895+
4896+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4897+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4898 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4899 IP6CB(skb)->iif = skb->dev->ifindex;
4900 IP6CB(skb)->flags = IP6SKB_FORWARDED;
4901 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4902 }
4903
4904- skb->dev = outdev;
4905- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4906- skb_dst_set_noref(skb, &rt->dst);
4907- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4908+ switch (tuplehash->tuple.xmit_type) {
4909+ case FLOW_OFFLOAD_XMIT_NEIGH:
4910+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4911+ outdev = rt->dst.dev;
4912+ skb->dev = outdev;
4913+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4914+ skb_dst_set_noref(skb, &rt->dst);
4915+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4916+ ret = NF_STOLEN;
4917+ break;
4918+ case FLOW_OFFLOAD_XMIT_DIRECT:
4919+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
4920+ if (ret == NF_DROP)
4921+ flow_offload_teardown(flow);
4922+ break;
4923+ }
4924
4925- return NF_STOLEN;
4926+ return ret;
4927 }
4928 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
4929diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
4930new file mode 100644
developer58aa0682023-09-18 14:02:26 +08004931index 0000000..50f2f2e
developer8cb3ac72022-07-04 10:55:14 +08004932--- /dev/null
4933+++ b/net/netfilter/nf_flow_table_offload.c
developeree39bcf2023-06-16 08:03:30 +08004934@@ -0,0 +1,1199 @@
developer8cb3ac72022-07-04 10:55:14 +08004935+#include <linux/kernel.h>
4936+#include <linux/init.h>
4937+#include <linux/module.h>
4938+#include <linux/netfilter.h>
4939+#include <linux/rhashtable.h>
4940+#include <linux/netdevice.h>
4941+#include <linux/tc_act/tc_csum.h>
4942+#include <net/flow_offload.h>
4943+#include <net/netfilter/nf_flow_table.h>
4944+#include <net/netfilter/nf_tables.h>
4945+#include <net/netfilter/nf_conntrack.h>
4946+#include <net/netfilter/nf_conntrack_acct.h>
4947+#include <net/netfilter/nf_conntrack_core.h>
4948+#include <net/netfilter/nf_conntrack_tuple.h>
4949+
4950+static struct workqueue_struct *nf_flow_offload_add_wq;
4951+static struct workqueue_struct *nf_flow_offload_del_wq;
4952+static struct workqueue_struct *nf_flow_offload_stats_wq;
4953+
4954+struct flow_offload_work {
4955+ struct list_head list;
4956+ enum flow_cls_command cmd;
4957+ int priority;
4958+ struct nf_flowtable *flowtable;
4959+ struct flow_offload *flow;
4960+ struct work_struct work;
4961+};
4962+
4963+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
4964+ (__match)->dissector.offset[__type] = \
4965+ offsetof(struct nf_flow_key, __field)
4966+
4967+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
4968+ struct ip_tunnel_info *tun_info)
4969+{
4970+ struct nf_flow_key *mask = &match->mask;
4971+ struct nf_flow_key *key = &match->key;
4972+ unsigned int enc_keys;
4973+
4974+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
4975+ return;
4976+
4977+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
4978+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
4979+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
4980+ mask->enc_key_id.keyid = 0xffffffff;
4981+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
4982+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
4983+
4984+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
4985+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
4986+ enc_ipv4);
4987+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
4988+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
4989+ if (key->enc_ipv4.src)
4990+ mask->enc_ipv4.src = 0xffffffff;
4991+ if (key->enc_ipv4.dst)
4992+ mask->enc_ipv4.dst = 0xffffffff;
4993+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
4994+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
4995+ } else {
4996+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
4997+ sizeof(struct in6_addr));
4998+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
4999+ sizeof(struct in6_addr));
5000+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
5001+ sizeof(struct in6_addr)))
5002+ memset(&mask->enc_ipv6.src, 0xff,
5003+ sizeof(struct in6_addr));
5004+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
5005+ sizeof(struct in6_addr)))
5006+ memset(&mask->enc_ipv6.dst, 0xff,
5007+ sizeof(struct in6_addr));
5008+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
5009+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5010+ }
5011+
5012+ match->dissector.used_keys |= enc_keys;
5013+}
5014+
5015+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
5016+ struct flow_dissector_key_vlan *mask,
5017+ u16 vlan_id, __be16 proto)
5018+{
5019+ key->vlan_id = vlan_id;
5020+ mask->vlan_id = VLAN_VID_MASK;
5021+ key->vlan_tpid = proto;
5022+ mask->vlan_tpid = 0xffff;
5023+}
5024+
5025+static int nf_flow_rule_match(struct nf_flow_match *match,
5026+ const struct flow_offload_tuple *tuple,
5027+ struct dst_entry *other_dst)
5028+{
5029+ struct nf_flow_key *mask = &match->mask;
5030+ struct nf_flow_key *key = &match->key;
5031+ struct ip_tunnel_info *tun_info;
5032+ bool vlan_encap = false;
5033+
5034+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
5035+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
5036+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
5037+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
5038+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
5039+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
5040+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
5041+
5042+ if (other_dst && other_dst->lwtstate) {
5043+ tun_info = lwt_tun_info(other_dst->lwtstate);
5044+ nf_flow_rule_lwt_match(match, tun_info);
5045+ }
5046+
5047+ key->meta.ingress_ifindex = tuple->iifidx;
5048+ mask->meta.ingress_ifindex = 0xffffffff;
5049+
5050+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
5051+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
5052+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
5053+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5054+ tuple->encap[0].id,
5055+ tuple->encap[0].proto);
5056+ vlan_encap = true;
5057+ }
5058+
5059+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
5060+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
5061+ if (vlan_encap) {
5062+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
5063+ cvlan);
5064+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
5065+ tuple->encap[1].id,
5066+ tuple->encap[1].proto);
5067+ } else {
5068+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
5069+ vlan);
5070+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5071+ tuple->encap[1].id,
5072+ tuple->encap[1].proto);
5073+ }
5074+ }
5075+
5076+ switch (tuple->l3proto) {
5077+ case AF_INET:
5078+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5079+ key->basic.n_proto = htons(ETH_P_IP);
5080+ key->ipv4.src = tuple->src_v4.s_addr;
5081+ mask->ipv4.src = 0xffffffff;
5082+ key->ipv4.dst = tuple->dst_v4.s_addr;
5083+ mask->ipv4.dst = 0xffffffff;
5084+ break;
5085+ case AF_INET6:
5086+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5087+ key->basic.n_proto = htons(ETH_P_IPV6);
5088+ key->ipv6.src = tuple->src_v6;
5089+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
5090+ key->ipv6.dst = tuple->dst_v6;
5091+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
5092+ break;
5093+ default:
5094+ return -EOPNOTSUPP;
5095+ }
5096+ mask->control.addr_type = 0xffff;
5097+ match->dissector.used_keys |= BIT(key->control.addr_type);
5098+ mask->basic.n_proto = 0xffff;
5099+
5100+ switch (tuple->l4proto) {
5101+ case IPPROTO_TCP:
5102+ key->tcp.flags = 0;
5103+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
5104+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
5105+ break;
5106+ case IPPROTO_UDP:
5107+ break;
5108+ default:
5109+ return -EOPNOTSUPP;
5110+ }
5111+
5112+ key->basic.ip_proto = tuple->l4proto;
5113+ mask->basic.ip_proto = 0xff;
5114+
5115+ key->tp.src = tuple->src_port;
5116+ mask->tp.src = 0xffff;
5117+ key->tp.dst = tuple->dst_port;
5118+ mask->tp.dst = 0xffff;
5119+
5120+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
5121+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
5122+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
5123+ BIT(FLOW_DISSECTOR_KEY_PORTS);
5124+ return 0;
5125+}
5126+
5127+static void flow_offload_mangle(struct flow_action_entry *entry,
5128+ enum flow_action_mangle_base htype, u32 offset,
5129+ const __be32 *value, const __be32 *mask)
5130+{
5131+ entry->id = FLOW_ACTION_MANGLE;
5132+ entry->mangle.htype = htype;
5133+ entry->mangle.offset = offset;
5134+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
5135+ memcpy(&entry->mangle.val, value, sizeof(u32));
5136+}
5137+
5138+static inline struct flow_action_entry *
5139+flow_action_entry_next(struct nf_flow_rule *flow_rule)
5140+{
5141+ int i = flow_rule->rule->action.num_entries++;
5142+
5143+ return &flow_rule->rule->action.entries[i];
5144+}
5145+
5146+static int flow_offload_eth_src(struct net *net,
5147+ const struct flow_offload *flow,
5148+ enum flow_offload_tuple_dir dir,
5149+ struct nf_flow_rule *flow_rule)
5150+{
5151+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5152+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5153+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5154+ struct net_device *dev = NULL;
5155+ const unsigned char *addr;
5156+ u32 mask, val;
5157+ u16 val16;
5158+
5159+ this_tuple = &flow->tuplehash[dir].tuple;
5160+
5161+ switch (this_tuple->xmit_type) {
5162+ case FLOW_OFFLOAD_XMIT_DIRECT:
5163+ addr = this_tuple->out.h_source;
5164+ break;
5165+ case FLOW_OFFLOAD_XMIT_NEIGH:
5166+ other_tuple = &flow->tuplehash[!dir].tuple;
5167+ dev = dev_get_by_index(net, other_tuple->iifidx);
5168+ if (!dev)
5169+ return -ENOENT;
5170+
5171+ addr = dev->dev_addr;
5172+ break;
5173+ default:
5174+ return -EOPNOTSUPP;
5175+ }
5176+
5177+ mask = ~0xffff0000;
5178+ memcpy(&val16, addr, 2);
5179+ val = val16 << 16;
5180+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5181+ &val, &mask);
5182+
5183+ mask = ~0xffffffff;
5184+ memcpy(&val, addr + 2, 4);
5185+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
5186+ &val, &mask);
5187+
developeree39bcf2023-06-16 08:03:30 +08005188+ if (dev)
5189+ dev_put(dev);
developer8cb3ac72022-07-04 10:55:14 +08005190+
5191+ return 0;
5192+}
5193+
5194+static int flow_offload_eth_dst(struct net *net,
5195+ const struct flow_offload *flow,
5196+ enum flow_offload_tuple_dir dir,
5197+ struct nf_flow_rule *flow_rule)
5198+{
5199+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5200+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5201+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5202+ const struct dst_entry *dst_cache;
5203+ unsigned char ha[ETH_ALEN];
5204+ struct neighbour *n;
5205+ const void *daddr;
5206+ u32 mask, val;
5207+ u8 nud_state;
5208+ u16 val16;
5209+
5210+ this_tuple = &flow->tuplehash[dir].tuple;
5211+
5212+ switch (this_tuple->xmit_type) {
5213+ case FLOW_OFFLOAD_XMIT_DIRECT:
5214+ ether_addr_copy(ha, this_tuple->out.h_dest);
5215+ break;
5216+ case FLOW_OFFLOAD_XMIT_NEIGH:
5217+ other_tuple = &flow->tuplehash[!dir].tuple;
5218+ daddr = &other_tuple->src_v4;
5219+ dst_cache = this_tuple->dst_cache;
5220+ n = dst_neigh_lookup(dst_cache, daddr);
5221+ if (!n)
5222+ return -ENOENT;
5223+
5224+ read_lock_bh(&n->lock);
5225+ nud_state = n->nud_state;
5226+ ether_addr_copy(ha, n->ha);
5227+ read_unlock_bh(&n->lock);
5228+ neigh_release(n);
5229+
5230+ if (!(nud_state & NUD_VALID))
5231+ return -ENOENT;
5232+ break;
5233+ default:
5234+ return -EOPNOTSUPP;
5235+ }
5236+
5237+ mask = ~0xffffffff;
5238+ memcpy(&val, ha, 4);
5239+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
5240+ &val, &mask);
5241+
5242+ mask = ~0x0000ffff;
5243+ memcpy(&val16, ha + 4, 2);
5244+ val = val16;
5245+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5246+ &val, &mask);
5247+
5248+ return 0;
5249+}
5250+
5251+static void flow_offload_ipv4_snat(struct net *net,
5252+ const struct flow_offload *flow,
5253+ enum flow_offload_tuple_dir dir,
5254+ struct nf_flow_rule *flow_rule)
5255+{
5256+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5257+ u32 mask = ~htonl(0xffffffff);
5258+ __be32 addr;
5259+ u32 offset;
5260+
5261+ switch (dir) {
5262+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5263+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
5264+ offset = offsetof(struct iphdr, saddr);
5265+ break;
5266+ case FLOW_OFFLOAD_DIR_REPLY:
5267+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5268+ offset = offsetof(struct iphdr, daddr);
5269+ break;
5270+ default:
5271+ return;
5272+ }
5273+
5274+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5275+ &addr, &mask);
5276+}
5277+
5278+static void flow_offload_ipv4_dnat(struct net *net,
5279+ const struct flow_offload *flow,
5280+ enum flow_offload_tuple_dir dir,
5281+ struct nf_flow_rule *flow_rule)
5282+{
5283+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5284+ u32 mask = ~htonl(0xffffffff);
5285+ __be32 addr;
5286+ u32 offset;
5287+
5288+ switch (dir) {
5289+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5290+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
5291+ offset = offsetof(struct iphdr, daddr);
5292+ break;
5293+ case FLOW_OFFLOAD_DIR_REPLY:
5294+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5295+ offset = offsetof(struct iphdr, saddr);
5296+ break;
5297+ default:
5298+ return;
5299+ }
5300+
5301+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5302+ &addr, &mask);
5303+}
5304+
5305+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
5306+ unsigned int offset,
5307+ const __be32 *addr, const __be32 *mask)
5308+{
5309+ struct flow_action_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08005310+ int i, j;
developer8cb3ac72022-07-04 10:55:14 +08005311+
developeree39bcf2023-06-16 08:03:30 +08005312+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
developer8cb3ac72022-07-04 10:55:14 +08005313+ entry = flow_action_entry_next(flow_rule);
5314+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
developeree39bcf2023-06-16 08:03:30 +08005315+ offset + i, &addr[j], mask);
developer8cb3ac72022-07-04 10:55:14 +08005316+ }
5317+}
5318+
5319+static void flow_offload_ipv6_snat(struct net *net,
5320+ const struct flow_offload *flow,
5321+ enum flow_offload_tuple_dir dir,
5322+ struct nf_flow_rule *flow_rule)
5323+{
5324+ u32 mask = ~htonl(0xffffffff);
5325+ const __be32 *addr;
5326+ u32 offset;
5327+
5328+ switch (dir) {
5329+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5330+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
5331+ offset = offsetof(struct ipv6hdr, saddr);
5332+ break;
5333+ case FLOW_OFFLOAD_DIR_REPLY:
5334+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
5335+ offset = offsetof(struct ipv6hdr, daddr);
5336+ break;
5337+ default:
5338+ return;
5339+ }
5340+
5341+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5342+}
5343+
5344+static void flow_offload_ipv6_dnat(struct net *net,
5345+ const struct flow_offload *flow,
5346+ enum flow_offload_tuple_dir dir,
5347+ struct nf_flow_rule *flow_rule)
5348+{
5349+ u32 mask = ~htonl(0xffffffff);
5350+ const __be32 *addr;
5351+ u32 offset;
5352+
5353+ switch (dir) {
5354+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5355+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
5356+ offset = offsetof(struct ipv6hdr, daddr);
5357+ break;
5358+ case FLOW_OFFLOAD_DIR_REPLY:
5359+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
5360+ offset = offsetof(struct ipv6hdr, saddr);
5361+ break;
5362+ default:
5363+ return;
5364+ }
5365+
5366+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5367+}
5368+
5369+static int flow_offload_l4proto(const struct flow_offload *flow)
5370+{
5371+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5372+ u8 type = 0;
5373+
5374+ switch (protonum) {
5375+ case IPPROTO_TCP:
5376+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
5377+ break;
5378+ case IPPROTO_UDP:
5379+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
5380+ break;
5381+ default:
5382+ break;
5383+ }
5384+
5385+ return type;
5386+}
5387+
5388+static void flow_offload_port_snat(struct net *net,
5389+ const struct flow_offload *flow,
5390+ enum flow_offload_tuple_dir dir,
5391+ struct nf_flow_rule *flow_rule)
5392+{
5393+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5394+ u32 mask, port;
5395+ u32 offset;
5396+
5397+ switch (dir) {
5398+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5399+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
5400+ offset = 0; /* offsetof(struct tcphdr, source); */
5401+ port = htonl(port << 16);
5402+ mask = ~htonl(0xffff0000);
5403+ break;
5404+ case FLOW_OFFLOAD_DIR_REPLY:
5405+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
5406+ offset = 0; /* offsetof(struct tcphdr, dest); */
5407+ port = htonl(port);
5408+ mask = ~htonl(0xffff);
5409+ break;
5410+ default:
5411+ return;
5412+ }
5413+
5414+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5415+ &port, &mask);
5416+}
5417+
5418+static void flow_offload_port_dnat(struct net *net,
5419+ const struct flow_offload *flow,
5420+ enum flow_offload_tuple_dir dir,
5421+ struct nf_flow_rule *flow_rule)
5422+{
5423+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5424+ u32 mask, port;
5425+ u32 offset;
5426+
5427+ switch (dir) {
5428+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5429+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
5430+ offset = 0; /* offsetof(struct tcphdr, dest); */
5431+ port = htonl(port);
5432+ mask = ~htonl(0xffff);
5433+ break;
5434+ case FLOW_OFFLOAD_DIR_REPLY:
5435+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
5436+ offset = 0; /* offsetof(struct tcphdr, source); */
5437+ port = htonl(port << 16);
5438+ mask = ~htonl(0xffff0000);
5439+ break;
5440+ default:
5441+ return;
5442+ }
5443+
5444+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5445+ &port, &mask);
5446+}
5447+
5448+static void flow_offload_ipv4_checksum(struct net *net,
5449+ const struct flow_offload *flow,
5450+ struct nf_flow_rule *flow_rule)
5451+{
5452+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5453+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5454+
5455+ entry->id = FLOW_ACTION_CSUM;
5456+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
5457+
5458+ switch (protonum) {
5459+ case IPPROTO_TCP:
5460+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
5461+ break;
5462+ case IPPROTO_UDP:
5463+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
5464+ break;
5465+ }
5466+}
5467+
5468+static void flow_offload_redirect(struct net *net,
5469+ const struct flow_offload *flow,
5470+ enum flow_offload_tuple_dir dir,
5471+ struct nf_flow_rule *flow_rule)
5472+{
5473+ const struct flow_offload_tuple *this_tuple, *other_tuple;
5474+ struct flow_action_entry *entry;
5475+ struct net_device *dev;
5476+ int ifindex;
5477+
5478+ this_tuple = &flow->tuplehash[dir].tuple;
5479+ switch (this_tuple->xmit_type) {
5480+ case FLOW_OFFLOAD_XMIT_DIRECT:
5481+ this_tuple = &flow->tuplehash[dir].tuple;
5482+ ifindex = this_tuple->out.hw_ifidx;
5483+ break;
5484+ case FLOW_OFFLOAD_XMIT_NEIGH:
5485+ other_tuple = &flow->tuplehash[!dir].tuple;
5486+ ifindex = other_tuple->iifidx;
5487+ break;
5488+ default:
5489+ return;
5490+ }
5491+
5492+ dev = dev_get_by_index(net, ifindex);
5493+ if (!dev)
5494+ return;
5495+
5496+ entry = flow_action_entry_next(flow_rule);
5497+ entry->id = FLOW_ACTION_REDIRECT;
5498+ entry->dev = dev;
5499+}
5500+
5501+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
5502+ enum flow_offload_tuple_dir dir,
5503+ struct nf_flow_rule *flow_rule)
5504+{
5505+ const struct flow_offload_tuple *this_tuple;
5506+ struct flow_action_entry *entry;
5507+ struct dst_entry *dst;
5508+
5509+ this_tuple = &flow->tuplehash[dir].tuple;
5510+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5511+ return;
5512+
5513+ dst = this_tuple->dst_cache;
5514+ if (dst && dst->lwtstate) {
5515+ struct ip_tunnel_info *tun_info;
5516+
5517+ tun_info = lwt_tun_info(dst->lwtstate);
5518+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5519+ entry = flow_action_entry_next(flow_rule);
5520+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
5521+ entry->tunnel = tun_info;
5522+ }
5523+ }
5524+}
5525+
5526+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
5527+ enum flow_offload_tuple_dir dir,
5528+ struct nf_flow_rule *flow_rule)
5529+{
5530+ const struct flow_offload_tuple *other_tuple;
5531+ struct flow_action_entry *entry;
5532+ struct dst_entry *dst;
5533+
5534+ other_tuple = &flow->tuplehash[!dir].tuple;
5535+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5536+ return;
5537+
5538+ dst = other_tuple->dst_cache;
5539+ if (dst && dst->lwtstate) {
5540+ struct ip_tunnel_info *tun_info;
5541+
5542+ tun_info = lwt_tun_info(dst->lwtstate);
5543+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5544+ entry = flow_action_entry_next(flow_rule);
5545+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
5546+ }
5547+ }
5548+}
5549+
5550+static int
5551+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
5552+ enum flow_offload_tuple_dir dir,
5553+ struct nf_flow_rule *flow_rule)
5554+{
5555+ const struct flow_offload_tuple *other_tuple;
5556+ const struct flow_offload_tuple *tuple;
5557+ int i;
5558+
5559+ flow_offload_decap_tunnel(flow, dir, flow_rule);
5560+ flow_offload_encap_tunnel(flow, dir, flow_rule);
5561+
5562+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
5563+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
5564+ return -1;
5565+
5566+ tuple = &flow->tuplehash[dir].tuple;
5567+
5568+ for (i = 0; i < tuple->encap_num; i++) {
5569+ struct flow_action_entry *entry;
5570+
5571+ if (tuple->in_vlan_ingress & BIT(i))
5572+ continue;
5573+
5574+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
5575+ entry = flow_action_entry_next(flow_rule);
5576+ entry->id = FLOW_ACTION_VLAN_POP;
5577+ }
5578+ }
5579+
5580+ other_tuple = &flow->tuplehash[!dir].tuple;
5581+
5582+ for (i = 0; i < other_tuple->encap_num; i++) {
5583+ struct flow_action_entry *entry;
5584+
5585+ if (other_tuple->in_vlan_ingress & BIT(i))
5586+ continue;
5587+
5588+ entry = flow_action_entry_next(flow_rule);
5589+
5590+ switch (other_tuple->encap[i].proto) {
5591+ case htons(ETH_P_PPP_SES):
5592+ entry->id = FLOW_ACTION_PPPOE_PUSH;
5593+ entry->pppoe.sid = other_tuple->encap[i].id;
5594+ break;
5595+ case htons(ETH_P_8021Q):
5596+ entry->id = FLOW_ACTION_VLAN_PUSH;
5597+ entry->vlan.vid = other_tuple->encap[i].id;
5598+ entry->vlan.proto = other_tuple->encap[i].proto;
5599+ break;
5600+ }
5601+ }
5602+
5603+ return 0;
5604+}
5605+
5606+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
5607+ enum flow_offload_tuple_dir dir,
5608+ struct nf_flow_rule *flow_rule)
5609+{
5610+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5611+ return -1;
5612+
5613+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5614+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
5615+ flow_offload_port_snat(net, flow, dir, flow_rule);
5616+ }
5617+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5618+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
5619+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5620+ }
5621+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
5622+ test_bit(NF_FLOW_DNAT, &flow->flags))
5623+ flow_offload_ipv4_checksum(net, flow, flow_rule);
5624+
5625+ flow_offload_redirect(net, flow, dir, flow_rule);
5626+
5627+ return 0;
5628+}
5629+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
5630+
5631+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
5632+ enum flow_offload_tuple_dir dir,
5633+ struct nf_flow_rule *flow_rule)
5634+{
5635+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5636+ return -1;
5637+
5638+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5639+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
5640+ flow_offload_port_snat(net, flow, dir, flow_rule);
5641+ }
5642+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5643+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
5644+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5645+ }
5646+
5647+ flow_offload_redirect(net, flow, dir, flow_rule);
5648+
5649+ return 0;
5650+}
5651+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
5652+
5653+#define NF_FLOW_RULE_ACTION_MAX 16
5654+
5655+static struct nf_flow_rule *
5656+nf_flow_offload_rule_alloc(struct net *net,
5657+ const struct flow_offload_work *offload,
5658+ enum flow_offload_tuple_dir dir)
5659+{
5660+ const struct nf_flowtable *flowtable = offload->flowtable;
5661+ const struct flow_offload_tuple *tuple, *other_tuple;
5662+ const struct flow_offload *flow = offload->flow;
5663+ struct dst_entry *other_dst = NULL;
5664+ struct nf_flow_rule *flow_rule;
5665+ int err = -ENOMEM;
5666+
5667+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
5668+ if (!flow_rule)
5669+ goto err_flow;
5670+
5671+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
5672+ if (!flow_rule->rule)
5673+ goto err_flow_rule;
5674+
5675+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
5676+ flow_rule->rule->match.mask = &flow_rule->match.mask;
5677+ flow_rule->rule->match.key = &flow_rule->match.key;
5678+
5679+ tuple = &flow->tuplehash[dir].tuple;
5680+ other_tuple = &flow->tuplehash[!dir].tuple;
5681+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
5682+ other_dst = other_tuple->dst_cache;
5683+
5684+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
5685+ if (err < 0)
5686+ goto err_flow_match;
5687+
5688+ flow_rule->rule->action.num_entries = 0;
5689+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
5690+ goto err_flow_match;
5691+
5692+ return flow_rule;
5693+
5694+err_flow_match:
5695+ kfree(flow_rule->rule);
5696+err_flow_rule:
5697+ kfree(flow_rule);
5698+err_flow:
5699+ return NULL;
5700+}
5701+
5702+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
5703+{
5704+ struct flow_action_entry *entry;
5705+ int i;
5706+
5707+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
5708+ entry = &flow_rule->rule->action.entries[i];
5709+ if (entry->id != FLOW_ACTION_REDIRECT)
5710+ continue;
5711+
5712+ dev_put(entry->dev);
5713+ }
5714+ kfree(flow_rule->rule);
5715+ kfree(flow_rule);
5716+}
5717+
5718+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
5719+{
5720+ int i;
5721+
5722+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
5723+ __nf_flow_offload_destroy(flow_rule[i]);
5724+}
5725+
5726+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
5727+ struct nf_flow_rule *flow_rule[])
5728+{
5729+ struct net *net = read_pnet(&offload->flowtable->net);
5730+
5731+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
5732+ FLOW_OFFLOAD_DIR_ORIGINAL);
5733+ if (!flow_rule[0])
5734+ return -ENOMEM;
5735+
5736+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
5737+ FLOW_OFFLOAD_DIR_REPLY);
5738+ if (!flow_rule[1]) {
5739+ __nf_flow_offload_destroy(flow_rule[0]);
5740+ return -ENOMEM;
5741+ }
5742+
5743+ return 0;
5744+}
5745+
5746+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
5747+ __be16 proto, int priority,
5748+ enum flow_cls_command cmd,
5749+ const struct flow_offload_tuple *tuple,
5750+ struct netlink_ext_ack *extack)
5751+{
5752+ cls_flow->common.protocol = proto;
5753+ cls_flow->common.prio = priority;
5754+ cls_flow->common.extack = extack;
5755+ cls_flow->command = cmd;
5756+ cls_flow->cookie = (unsigned long)tuple;
5757+}
5758+
5759+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
5760+ struct flow_offload *flow,
5761+ struct nf_flow_rule *flow_rule,
5762+ enum flow_offload_tuple_dir dir,
5763+ int priority, int cmd,
5764+ struct flow_stats *stats,
5765+ struct list_head *block_cb_list)
5766+{
5767+ struct flow_cls_offload cls_flow = {};
5768+ struct flow_block_cb *block_cb;
5769+ struct netlink_ext_ack extack;
5770+ __be16 proto = ETH_P_ALL;
5771+ int err, i = 0;
5772+
5773+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
5774+ &flow->tuplehash[dir].tuple, &extack);
5775+ if (cmd == FLOW_CLS_REPLACE)
5776+ cls_flow.rule = flow_rule->rule;
5777+
developer0cc0d732023-06-07 13:52:41 +08005778+ down_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005779+ list_for_each_entry(block_cb, block_cb_list, list) {
5780+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
5781+ block_cb->cb_priv);
5782+ if (err < 0)
5783+ continue;
5784+
5785+ i++;
5786+ }
developer0cc0d732023-06-07 13:52:41 +08005787+ up_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005788+
5789+ if (cmd == FLOW_CLS_STATS)
5790+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
5791+
5792+ return i;
5793+}
5794+
5795+static int flow_offload_tuple_add(struct flow_offload_work *offload,
5796+ struct nf_flow_rule *flow_rule,
5797+ enum flow_offload_tuple_dir dir)
5798+{
5799+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
5800+ flow_rule, dir, offload->priority,
5801+ FLOW_CLS_REPLACE, NULL,
5802+ &offload->flowtable->flow_block.cb_list);
5803+}
5804+
5805+static void flow_offload_tuple_del(struct flow_offload_work *offload,
5806+ enum flow_offload_tuple_dir dir)
5807+{
5808+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5809+ offload->priority, FLOW_CLS_DESTROY, NULL,
5810+ &offload->flowtable->flow_block.cb_list);
5811+}
5812+
5813+static int flow_offload_rule_add(struct flow_offload_work *offload,
5814+ struct nf_flow_rule *flow_rule[])
5815+{
5816+ int ok_count = 0;
5817+
5818+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
5819+ FLOW_OFFLOAD_DIR_ORIGINAL);
5820+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
5821+ FLOW_OFFLOAD_DIR_REPLY);
5822+ if (ok_count == 0)
5823+ return -ENOENT;
5824+
5825+ return 0;
5826+}
5827+
5828+static void flow_offload_work_add(struct flow_offload_work *offload)
5829+{
5830+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
5831+ int err;
5832+
5833+ err = nf_flow_offload_alloc(offload, flow_rule);
5834+ if (err < 0)
5835+ return;
5836+
5837+ err = flow_offload_rule_add(offload, flow_rule);
5838+ if (err < 0)
5839+ goto out;
5840+
5841+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5842+
5843+out:
5844+ nf_flow_offload_destroy(flow_rule);
5845+}
5846+
5847+static void flow_offload_work_del(struct flow_offload_work *offload)
5848+{
5849+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5850+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
5851+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
5852+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
5853+}
5854+
5855+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
5856+ enum flow_offload_tuple_dir dir,
5857+ struct flow_stats *stats)
5858+{
5859+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5860+ offload->priority, FLOW_CLS_STATS, stats,
5861+ &offload->flowtable->flow_block.cb_list);
5862+}
5863+
5864+static void flow_offload_work_stats(struct flow_offload_work *offload)
5865+{
5866+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
5867+ u64 lastused;
5868+
5869+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
5870+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
5871+
5872+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
5873+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
5874+ lastused + flow_offload_get_timeout(offload->flow));
5875+
5876+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
5877+ if (stats[0].pkts)
5878+ nf_ct_acct_add(offload->flow->ct,
5879+ FLOW_OFFLOAD_DIR_ORIGINAL,
5880+ stats[0].pkts, stats[0].bytes);
5881+ if (stats[1].pkts)
5882+ nf_ct_acct_add(offload->flow->ct,
5883+ FLOW_OFFLOAD_DIR_REPLY,
5884+ stats[1].pkts, stats[1].bytes);
5885+ }
5886+}
5887+
5888+static void flow_offload_work_handler(struct work_struct *work)
5889+{
5890+ struct flow_offload_work *offload;
5891+
5892+ offload = container_of(work, struct flow_offload_work, work);
5893+ switch (offload->cmd) {
5894+ case FLOW_CLS_REPLACE:
5895+ flow_offload_work_add(offload);
5896+ break;
5897+ case FLOW_CLS_DESTROY:
5898+ flow_offload_work_del(offload);
5899+ break;
5900+ case FLOW_CLS_STATS:
5901+ flow_offload_work_stats(offload);
5902+ break;
5903+ default:
5904+ WARN_ON_ONCE(1);
5905+ }
5906+
5907+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
5908+ kfree(offload);
5909+}
5910+
5911+static void flow_offload_queue_work(struct flow_offload_work *offload)
5912+{
5913+ if (offload->cmd == FLOW_CLS_REPLACE)
5914+ queue_work(nf_flow_offload_add_wq, &offload->work);
5915+ else if (offload->cmd == FLOW_CLS_DESTROY)
5916+ queue_work(nf_flow_offload_del_wq, &offload->work);
5917+ else
5918+ queue_work(nf_flow_offload_stats_wq, &offload->work);
5919+}
5920+
5921+static struct flow_offload_work *
5922+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
5923+ struct flow_offload *flow, unsigned int cmd)
5924+{
5925+ struct flow_offload_work *offload;
5926+
5927+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
5928+ return NULL;
5929+
5930+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
5931+ if (!offload) {
5932+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
5933+ return NULL;
5934+ }
5935+
5936+ offload->cmd = cmd;
5937+ offload->flow = flow;
5938+ offload->priority = flowtable->priority;
5939+ offload->flowtable = flowtable;
5940+ INIT_WORK(&offload->work, flow_offload_work_handler);
5941+
5942+ return offload;
5943+}
5944+
5945+
5946+void nf_flow_offload_add(struct nf_flowtable *flowtable,
5947+ struct flow_offload *flow)
5948+{
5949+ struct flow_offload_work *offload;
5950+
5951+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
5952+ if (!offload)
5953+ return;
5954+
5955+ flow_offload_queue_work(offload);
5956+}
5957+
5958+void nf_flow_offload_del(struct nf_flowtable *flowtable,
5959+ struct flow_offload *flow)
5960+{
5961+ struct flow_offload_work *offload;
5962+
5963+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
5964+ if (!offload)
5965+ return;
5966+
5967+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
5968+ flow_offload_queue_work(offload);
5969+}
5970+
5971+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08005972+ struct flow_offload *flow, bool force)
developer8cb3ac72022-07-04 10:55:14 +08005973+{
5974+ struct flow_offload_work *offload;
5975+ __s32 delta;
5976+
developeree39bcf2023-06-16 08:03:30 +08005977+ if (!force) {
5978+ delta = nf_flow_timeout_delta(flow->timeout);
5979+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
5980+ return;
5981+ }
developer8cb3ac72022-07-04 10:55:14 +08005982+
5983+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
5984+ if (!offload)
5985+ return;
5986+
5987+ flow_offload_queue_work(offload);
5988+}
5989+
5990+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
5991+{
5992+ if (nf_flowtable_hw_offload(flowtable)) {
5993+ flush_workqueue(nf_flow_offload_add_wq);
5994+ flush_workqueue(nf_flow_offload_del_wq);
5995+ flush_workqueue(nf_flow_offload_stats_wq);
5996+ }
5997+}
5998+
5999+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
6000+ struct flow_block_offload *bo,
6001+ enum flow_block_command cmd)
6002+{
6003+ struct flow_block_cb *block_cb, *next;
6004+ int err = 0;
6005+
developer0cc0d732023-06-07 13:52:41 +08006006+ down_write(&flowtable->flow_block_lock);
developeree39bcf2023-06-16 08:03:30 +08006007+
developer8cb3ac72022-07-04 10:55:14 +08006008+ switch (cmd) {
6009+ case FLOW_BLOCK_BIND:
6010+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
6011+ break;
6012+ case FLOW_BLOCK_UNBIND:
6013+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
6014+ list_del(&block_cb->list);
6015+ flow_block_cb_free(block_cb);
6016+ }
6017+ break;
6018+ default:
6019+ WARN_ON_ONCE(1);
6020+ err = -EOPNOTSUPP;
6021+ }
developeree39bcf2023-06-16 08:03:30 +08006022+
developer0cc0d732023-06-07 13:52:41 +08006023+ up_write(&flowtable->flow_block_lock);
developera54478c2022-10-01 16:41:46 +08006024+
developer8cb3ac72022-07-04 10:55:14 +08006025+ return err;
6026+}
6027+
6028+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
6029+ struct net *net,
6030+ enum flow_block_command cmd,
6031+ struct nf_flowtable *flowtable,
6032+ struct netlink_ext_ack *extack)
6033+{
6034+ memset(bo, 0, sizeof(*bo));
6035+ bo->net = net;
6036+ bo->block = &flowtable->flow_block;
6037+ bo->command = cmd;
6038+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
6039+ bo->extack = extack;
6040+ INIT_LIST_HEAD(&bo->cb_list);
6041+}
6042+
6043+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
6044+ struct nf_flowtable *flowtable,
6045+ struct net_device *dev,
6046+ enum flow_block_command cmd,
6047+ struct netlink_ext_ack *extack)
6048+{
6049+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6050+ extack);
6051+ flow_indr_block_call(dev, bo, cmd);
6052+
6053+ if (list_empty(&bo->cb_list))
6054+ return -EOPNOTSUPP;
6055+
6056+ return 0;
6057+}
6058+
6059+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
6060+ struct nf_flowtable *flowtable,
6061+ struct net_device *dev,
6062+ enum flow_block_command cmd,
6063+ struct netlink_ext_ack *extack)
6064+{
6065+ int err;
6066+
6067+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6068+ extack);
developer0cc0d732023-06-07 13:52:41 +08006069+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006070+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
developer0cc0d732023-06-07 13:52:41 +08006071+ up_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006072+ if (err < 0)
6073+ return err;
6074+
6075+ return 0;
6076+}
6077+
6078+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
6079+ struct net_device *dev,
6080+ enum flow_block_command cmd)
6081+{
6082+ struct netlink_ext_ack extack = {};
6083+ struct flow_block_offload bo;
6084+ int err;
6085+
6086+ if (!nf_flowtable_hw_offload(flowtable))
6087+ return 0;
6088+
6089+ if (dev->netdev_ops->ndo_setup_tc)
6090+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
6091+ &extack);
6092+ else
6093+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
6094+ &extack);
6095+ if (err < 0)
6096+ return err;
6097+
6098+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
6099+}
6100+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
6101+
6102+int nf_flow_table_offload_init(void)
6103+{
6104+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
6105+ WQ_UNBOUND | WQ_SYSFS, 0);
6106+ if (!nf_flow_offload_add_wq)
6107+ return -ENOMEM;
6108+
6109+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
6110+ WQ_UNBOUND | WQ_SYSFS, 0);
6111+ if (!nf_flow_offload_del_wq)
6112+ goto err_del_wq;
6113+
6114+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
6115+ WQ_UNBOUND | WQ_SYSFS, 0);
6116+ if (!nf_flow_offload_stats_wq)
6117+ goto err_stats_wq;
6118+
6119+ return 0;
6120+
6121+err_stats_wq:
6122+ destroy_workqueue(nf_flow_offload_del_wq);
6123+err_del_wq:
6124+ destroy_workqueue(nf_flow_offload_add_wq);
6125+ return -ENOMEM;
6126+}
6127+
6128+void nf_flow_table_offload_exit(void)
6129+{
6130+ destroy_workqueue(nf_flow_offload_add_wq);
6131+ destroy_workqueue(nf_flow_offload_del_wq);
6132+ destroy_workqueue(nf_flow_offload_stats_wq);
6133+}
6134diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
6135new file mode 100644
developer58aa0682023-09-18 14:02:26 +08006136index 0000000..2cab008
developer8cb3ac72022-07-04 10:55:14 +08006137--- /dev/null
6138+++ b/net/netfilter/xt_FLOWOFFLOAD.c
developeree39bcf2023-06-16 08:03:30 +08006139@@ -0,0 +1,794 @@
developer8cb3ac72022-07-04 10:55:14 +08006140+/*
6141+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
6142+ *
6143+ * This program is free software; you can redistribute it and/or modify
6144+ * it under the terms of the GNU General Public License version 2 as
6145+ * published by the Free Software Foundation.
6146+ */
6147+#include <linux/module.h>
6148+#include <linux/init.h>
6149+#include <linux/netfilter.h>
6150+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
6151+#include <linux/if_vlan.h>
6152+#include <net/ip.h>
6153+#include <net/netfilter/nf_conntrack.h>
6154+#include <net/netfilter/nf_conntrack_extend.h>
6155+#include <net/netfilter/nf_conntrack_helper.h>
6156+#include <net/netfilter/nf_flow_table.h>
6157+
6158+struct xt_flowoffload_hook {
6159+ struct hlist_node list;
6160+ struct nf_hook_ops ops;
6161+ struct net *net;
6162+ bool registered;
6163+ bool used;
6164+};
6165+
6166+struct xt_flowoffload_table {
6167+ struct nf_flowtable ft;
6168+ struct hlist_head hooks;
6169+ struct delayed_work work;
6170+};
6171+
6172+struct nf_forward_info {
6173+ const struct net_device *indev;
6174+ const struct net_device *outdev;
6175+ const struct net_device *hw_outdev;
6176+ struct id {
6177+ __u16 id;
6178+ __be16 proto;
6179+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
6180+ u8 num_encaps;
6181+ u8 ingress_vlans;
6182+ u8 h_source[ETH_ALEN];
6183+ u8 h_dest[ETH_ALEN];
6184+ enum flow_offload_xmit_type xmit_type;
6185+};
6186+
6187+static DEFINE_SPINLOCK(hooks_lock);
6188+
6189+struct xt_flowoffload_table flowtable[2];
6190+
6191+static unsigned int
6192+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
6193+ const struct nf_hook_state *state)
6194+{
6195+ struct vlan_ethhdr *veth;
6196+ __be16 proto;
6197+
6198+ switch (skb->protocol) {
6199+ case htons(ETH_P_8021Q):
6200+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
6201+ proto = veth->h_vlan_encapsulated_proto;
6202+ break;
6203+ case htons(ETH_P_PPP_SES):
6204+ proto = nf_flow_pppoe_proto(skb);
6205+ break;
6206+ default:
6207+ proto = skb->protocol;
6208+ break;
6209+ }
6210+
6211+ switch (proto) {
6212+ case htons(ETH_P_IP):
6213+ return nf_flow_offload_ip_hook(priv, skb, state);
6214+ case htons(ETH_P_IPV6):
6215+ return nf_flow_offload_ipv6_hook(priv, skb, state);
6216+ }
6217+
6218+ return NF_ACCEPT;
6219+}
6220+
6221+static int
6222+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
6223+ struct net_device *dev)
6224+{
6225+ struct xt_flowoffload_hook *hook;
6226+ struct nf_hook_ops *ops;
6227+
6228+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
6229+ if (!hook)
6230+ return -ENOMEM;
6231+
6232+ ops = &hook->ops;
6233+ ops->pf = NFPROTO_NETDEV;
6234+ ops->hooknum = NF_NETDEV_INGRESS;
6235+ ops->priority = 10;
6236+ ops->priv = &table->ft;
6237+ ops->hook = xt_flowoffload_net_hook;
6238+ ops->dev = dev;
6239+
6240+ hlist_add_head(&hook->list, &table->hooks);
6241+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
6242+
6243+ return 0;
6244+}
6245+
6246+static struct xt_flowoffload_hook *
6247+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
6248+ struct net_device *dev)
6249+{
6250+ struct xt_flowoffload_hook *hook;
6251+
6252+ hlist_for_each_entry(hook, &table->hooks, list) {
6253+ if (hook->ops.dev == dev)
6254+ return hook;
6255+ }
6256+
6257+ return NULL;
6258+}
6259+
6260+static void
6261+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
6262+ struct net_device *dev)
6263+{
6264+ struct xt_flowoffload_hook *hook;
6265+
6266+ if (!dev)
6267+ return;
6268+
6269+ spin_lock_bh(&hooks_lock);
6270+ hook = flow_offload_lookup_hook(table, dev);
6271+ if (hook)
6272+ hook->used = true;
6273+ else
6274+ xt_flowoffload_create_hook(table, dev);
6275+ spin_unlock_bh(&hooks_lock);
6276+}
6277+
6278+static void
6279+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
6280+{
6281+ struct xt_flowoffload_hook *hook;
6282+
6283+restart:
6284+ hlist_for_each_entry(hook, &table->hooks, list) {
6285+ if (hook->registered)
6286+ continue;
6287+
6288+ hook->registered = true;
6289+ hook->net = dev_net(hook->ops.dev);
6290+ spin_unlock_bh(&hooks_lock);
6291+ nf_register_net_hook(hook->net, &hook->ops);
6292+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6293+ table->ft.type->setup(&table->ft, hook->ops.dev,
6294+ FLOW_BLOCK_BIND);
6295+ spin_lock_bh(&hooks_lock);
6296+ goto restart;
6297+ }
6298+
6299+}
6300+
6301+static bool
6302+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
6303+{
6304+ struct xt_flowoffload_hook *hook;
6305+ bool active = false;
6306+
6307+restart:
6308+ spin_lock_bh(&hooks_lock);
6309+ hlist_for_each_entry(hook, &table->hooks, list) {
6310+ if (hook->used || !hook->registered) {
6311+ active = true;
6312+ continue;
6313+ }
6314+
6315+ hlist_del(&hook->list);
6316+ spin_unlock_bh(&hooks_lock);
6317+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6318+ table->ft.type->setup(&table->ft, hook->ops.dev,
6319+ FLOW_BLOCK_UNBIND);
6320+ nf_unregister_net_hook(hook->net, &hook->ops);
6321+ kfree(hook);
6322+ goto restart;
6323+ }
6324+ spin_unlock_bh(&hooks_lock);
6325+
6326+ return active;
6327+}
6328+
6329+static void
6330+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
6331+{
6332+ struct xt_flowoffload_table *table = data;
6333+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
6334+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
6335+ struct xt_flowoffload_hook *hook;
6336+
6337+ spin_lock_bh(&hooks_lock);
6338+ hlist_for_each_entry(hook, &table->hooks, list) {
6339+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
6340+ hook->ops.dev->ifindex != tuple1->iifidx)
6341+ continue;
6342+
6343+ hook->used = true;
6344+ }
6345+ spin_unlock_bh(&hooks_lock);
6346+}
6347+
6348+static void
6349+xt_flowoffload_hook_work(struct work_struct *work)
6350+{
6351+ struct xt_flowoffload_table *table;
6352+ struct xt_flowoffload_hook *hook;
6353+ int err;
6354+
6355+ table = container_of(work, struct xt_flowoffload_table, work.work);
6356+
6357+ spin_lock_bh(&hooks_lock);
6358+ xt_flowoffload_register_hooks(table);
6359+ hlist_for_each_entry(hook, &table->hooks, list)
6360+ hook->used = false;
6361+ spin_unlock_bh(&hooks_lock);
6362+
6363+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
6364+ table);
6365+ if (err && err != -EAGAIN)
6366+ goto out;
6367+
6368+ if (!xt_flowoffload_cleanup_hooks(table))
6369+ return;
6370+
6371+out:
6372+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
6373+}
6374+
6375+static bool
6376+xt_flowoffload_skip(struct sk_buff *skb, int family)
6377+{
6378+ if (skb_sec_path(skb))
6379+ return true;
6380+
6381+ if (family == NFPROTO_IPV4) {
6382+ const struct ip_options *opt = &(IPCB(skb)->opt);
6383+
6384+ if (unlikely(opt->optlen))
6385+ return true;
6386+ }
6387+
6388+ return false;
6389+}
6390+
6391+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
6392+{
6393+ if (dst_xfrm(dst))
6394+ return FLOW_OFFLOAD_XMIT_XFRM;
6395+
6396+ return FLOW_OFFLOAD_XMIT_NEIGH;
6397+}
6398+
6399+static void nf_default_forward_path(struct nf_flow_route *route,
6400+ struct dst_entry *dst_cache,
6401+ enum ip_conntrack_dir dir,
6402+ struct net_device **dev)
6403+{
6404+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
6405+ route->tuple[dir].dst = dst_cache;
6406+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
6407+}
6408+
6409+static bool nf_is_valid_ether_device(const struct net_device *dev)
6410+{
6411+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
6412+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
6413+ return false;
6414+
6415+ return true;
6416+}
6417+
6418+static void nf_dev_path_info(const struct net_device_path_stack *stack,
6419+ struct nf_forward_info *info,
6420+ unsigned char *ha)
6421+{
6422+ const struct net_device_path *path;
6423+ int i;
6424+
6425+ memcpy(info->h_dest, ha, ETH_ALEN);
6426+
6427+ for (i = 0; i < stack->num_paths; i++) {
6428+ path = &stack->path[i];
6429+
6430+ info->indev = path->dev;
6431+
6432+ switch (path->type) {
6433+ case DEV_PATH_ETHERNET:
6434+ case DEV_PATH_DSA:
6435+ case DEV_PATH_VLAN:
6436+ case DEV_PATH_PPPOE:
6437+ if (is_zero_ether_addr(info->h_source))
6438+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6439+
6440+ if (path->type == DEV_PATH_ETHERNET)
6441+ break;
6442+ if (path->type == DEV_PATH_DSA) {
6443+ i = stack->num_paths;
6444+ break;
6445+ }
6446+
6447+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
6448+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
6449+ info->indev = NULL;
6450+ break;
6451+ }
6452+ if (!info->outdev)
6453+ info->outdev = path->dev;
6454+ info->encap[info->num_encaps].id = path->encap.id;
6455+ info->encap[info->num_encaps].proto = path->encap.proto;
6456+ info->num_encaps++;
6457+ if (path->type == DEV_PATH_PPPOE)
6458+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
6459+ break;
6460+ case DEV_PATH_BRIDGE:
6461+ if (is_zero_ether_addr(info->h_source))
6462+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6463+
6464+ switch (path->bridge.vlan_mode) {
6465+ case DEV_PATH_BR_VLAN_UNTAG_HW:
6466+ info->ingress_vlans |= BIT(info->num_encaps - 1);
6467+ break;
6468+ case DEV_PATH_BR_VLAN_TAG:
6469+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
6470+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
6471+ info->num_encaps++;
6472+ break;
6473+ case DEV_PATH_BR_VLAN_UNTAG:
6474+ info->num_encaps--;
6475+ break;
6476+ case DEV_PATH_BR_VLAN_KEEP:
6477+ break;
6478+ }
6479+ break;
6480+ default:
6481+ break;
6482+ }
6483+ }
6484+ if (!info->outdev)
6485+ info->outdev = info->indev;
6486+
6487+ info->hw_outdev = info->indev;
6488+
6489+ if (nf_is_valid_ether_device(info->indev))
6490+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
6491+}
6492+
6493+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
6494+ const struct dst_entry *dst_cache,
6495+ const struct nf_conn *ct,
6496+ enum ip_conntrack_dir dir, u8 *ha,
6497+ struct net_device_path_stack *stack)
6498+{
6499+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
6500+ struct net_device *dev = dst_cache->dev;
6501+ struct neighbour *n;
6502+ u8 nud_state;
6503+
6504+ if (!nf_is_valid_ether_device(dev))
6505+ goto out;
6506+
developer9fdc0e82023-05-12 14:21:17 +08006507+ if (ct->status & IPS_NAT_MASK) {
6508+ n = dst_neigh_lookup(dst_cache, daddr);
6509+ if (!n)
6510+ return -1;
developer8cb3ac72022-07-04 10:55:14 +08006511+
developer9fdc0e82023-05-12 14:21:17 +08006512+ read_lock_bh(&n->lock);
6513+ nud_state = n->nud_state;
6514+ ether_addr_copy(ha, n->ha);
6515+ read_unlock_bh(&n->lock);
6516+ neigh_release(n);
developer8cb3ac72022-07-04 10:55:14 +08006517+
developer9fdc0e82023-05-12 14:21:17 +08006518+ if (!(nud_state & NUD_VALID))
6519+ return -1;
6520+ }
developer64db8532023-04-28 13:56:00 +08006521+
developer8cb3ac72022-07-04 10:55:14 +08006522+out:
6523+ return dev_fill_forward_path(dev, ha, stack);
6524+}
6525+
developer9fdc0e82023-05-12 14:21:17 +08006526+static int nf_dev_forward_path(struct sk_buff *skb,
6527+ struct nf_flow_route *route,
developer8cb3ac72022-07-04 10:55:14 +08006528+ const struct nf_conn *ct,
6529+ enum ip_conntrack_dir dir,
6530+ struct net_device **devs)
6531+{
6532+ const struct dst_entry *dst = route->tuple[dir].dst;
developer9fdc0e82023-05-12 14:21:17 +08006533+ struct ethhdr *eth;
6534+ enum ip_conntrack_dir skb_dir;
developer8cb3ac72022-07-04 10:55:14 +08006535+ struct net_device_path_stack stack;
6536+ struct nf_forward_info info = {};
6537+ unsigned char ha[ETH_ALEN];
6538+ int i;
6539+
developer9fdc0e82023-05-12 14:21:17 +08006540+ if (!(ct->status & IPS_NAT_MASK) && skb_mac_header_was_set(skb)) {
6541+ eth = eth_hdr(skb);
6542+ skb_dir = CTINFO2DIR(skb_get_nfct(skb) & NFCT_INFOMASK);
6543+
6544+ if (skb_dir != dir) {
6545+ memcpy(ha, eth->h_source, ETH_ALEN);
6546+ memcpy(info.h_source, eth->h_dest, ETH_ALEN);
6547+ } else {
6548+ memcpy(ha, eth->h_dest, ETH_ALEN);
6549+ memcpy(info.h_source, eth->h_source, ETH_ALEN);
6550+ }
6551+ }
6552+
developer7e533772023-04-27 05:59:30 +08006553+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
developer8cb3ac72022-07-04 10:55:14 +08006554+ nf_dev_path_info(&stack, &info, ha);
6555+
6556+ devs[!dir] = (struct net_device *)info.indev;
6557+ if (!info.indev)
6558+ return -1;
6559+
6560+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
6561+ for (i = 0; i < info.num_encaps; i++) {
6562+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
6563+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
6564+ }
6565+ route->tuple[!dir].in.num_encaps = info.num_encaps;
6566+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
6567+
6568+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
6569+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
6570+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
6571+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
6572+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
6573+ route->tuple[dir].xmit_type = info.xmit_type;
6574+ }
6575+
6576+ return 0;
6577+}
6578+
6579+static int
6580+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
6581+ enum ip_conntrack_dir dir,
6582+ const struct xt_action_param *par, int ifindex,
6583+ struct net_device **devs)
6584+{
6585+ struct dst_entry *dst = NULL;
6586+ struct flowi fl;
6587+
6588+ memset(&fl, 0, sizeof(fl));
6589+ switch (xt_family(par)) {
6590+ case NFPROTO_IPV4:
6591+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
6592+ fl.u.ip4.flowi4_oif = ifindex;
6593+ break;
6594+ case NFPROTO_IPV6:
6595+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6596+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
6597+ fl.u.ip6.flowi6_oif = ifindex;
6598+ break;
6599+ }
6600+
6601+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
6602+ if (!dst)
6603+ return -ENOENT;
6604+
6605+ nf_default_forward_path(route, dst, dir, devs);
6606+
6607+ return 0;
6608+}
6609+
6610+static int
developer480c5d52022-12-28 14:48:14 +08006611+xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct,
6612+ const struct xt_action_param *par,
6613+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6614+ struct net_device **devs)
6615+{
6616+ struct dst_entry *this_dst = skb_dst(skb);
6617+ struct dst_entry *other_dst = NULL;
6618+ struct flowi fl;
6619+
6620+ memset(&fl, 0, sizeof(fl));
6621+ switch (xt_family(par)) {
6622+ case NFPROTO_IPV4:
6623+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
6624+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
6625+ break;
6626+ case NFPROTO_IPV6:
6627+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6628+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
6629+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
6630+ break;
6631+ }
6632+
6633+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
6634+ if (!other_dst)
6635+ return -ENOENT;
6636+
6637+ nf_default_forward_path(route, this_dst, dir, devs);
6638+ nf_default_forward_path(route, other_dst, !dir, devs);
6639+
developer7e533772023-04-27 05:59:30 +08006640+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer480c5d52022-12-28 14:48:14 +08006641+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006642+ if (nf_dev_forward_path(skb, route, ct, dir, devs))
developer480c5d52022-12-28 14:48:14 +08006643+ return -1;
developer9fdc0e82023-05-12 14:21:17 +08006644+ if (nf_dev_forward_path(skb, route, ct, !dir, devs))
developer480c5d52022-12-28 14:48:14 +08006645+ return -1;
6646+ }
6647+
6648+ return 0;
6649+}
6650+
6651+static int
6652+xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct,
6653+ const struct xt_action_param *par,
6654+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6655+ struct net_device **devs)
developer8cb3ac72022-07-04 10:55:14 +08006656+{
6657+ int ret;
6658+
6659+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
6660+ devs[dir]->ifindex,
6661+ devs);
6662+ if (ret)
6663+ return ret;
6664+
6665+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
6666+ devs[!dir]->ifindex,
6667+ devs);
6668+ if (ret)
developer67bbcc02022-07-08 09:04:01 +08006669+ goto err_route_dir1;
developer8cb3ac72022-07-04 10:55:14 +08006670+
developer7e533772023-04-27 05:59:30 +08006671+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer8cb3ac72022-07-04 10:55:14 +08006672+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006673+ if (nf_dev_forward_path(skb, route, ct, dir, devs) ||
6674+ nf_dev_forward_path(skb, route, ct, !dir, devs)) {
developer67bbcc02022-07-08 09:04:01 +08006675+ ret = -1;
6676+ goto err_route_dir2;
6677+ }
developer8cb3ac72022-07-04 10:55:14 +08006678+ }
6679+
6680+ return 0;
developer67bbcc02022-07-08 09:04:01 +08006681+
6682+err_route_dir2:
6683+ dst_release(route->tuple[!dir].dst);
6684+err_route_dir1:
6685+ dst_release(route->tuple[dir].dst);
6686+ return ret;
developer8cb3ac72022-07-04 10:55:14 +08006687+}
6688+
6689+static unsigned int
6690+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
6691+{
6692+ struct xt_flowoffload_table *table;
6693+ const struct xt_flowoffload_target_info *info = par->targinfo;
6694+ struct tcphdr _tcph, *tcph = NULL;
6695+ enum ip_conntrack_info ctinfo;
6696+ enum ip_conntrack_dir dir;
6697+ struct nf_flow_route route = {};
6698+ struct flow_offload *flow = NULL;
6699+ struct net_device *devs[2] = {};
6700+ struct nf_conn *ct;
6701+ struct net *net;
6702+
6703+ if (xt_flowoffload_skip(skb, xt_family(par)))
6704+ return XT_CONTINUE;
6705+
6706+ ct = nf_ct_get(skb, &ctinfo);
6707+ if (ct == NULL)
6708+ return XT_CONTINUE;
6709+
6710+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
6711+ case IPPROTO_TCP:
6712+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
6713+ return XT_CONTINUE;
6714+
6715+ tcph = skb_header_pointer(skb, par->thoff,
6716+ sizeof(_tcph), &_tcph);
6717+ if (unlikely(!tcph || tcph->fin || tcph->rst))
6718+ return XT_CONTINUE;
6719+ break;
6720+ case IPPROTO_UDP:
6721+ break;
6722+ default:
6723+ return XT_CONTINUE;
6724+ }
6725+
6726+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
6727+ ct->status & IPS_SEQ_ADJUST)
6728+ return XT_CONTINUE;
6729+
6730+ if (!nf_ct_is_confirmed(ct))
6731+ return XT_CONTINUE;
6732+
6733+ devs[dir] = xt_out(par);
6734+ devs[!dir] = xt_in(par);
6735+
6736+ if (!devs[dir] || !devs[!dir])
6737+ return XT_CONTINUE;
6738+
6739+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
6740+ return XT_CONTINUE;
6741+
6742+ dir = CTINFO2DIR(ctinfo);
6743+
developer480c5d52022-12-28 14:48:14 +08006744+ if (ct->status & IPS_NAT_MASK) {
6745+ if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0)
6746+ goto err_flow_route;
6747+ } else {
6748+ if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0)
6749+ goto err_flow_route;
6750+ }
developer8cb3ac72022-07-04 10:55:14 +08006751+
6752+ flow = flow_offload_alloc(ct);
6753+ if (!flow)
6754+ goto err_flow_alloc;
6755+
6756+ if (flow_offload_route_init(flow, &route) < 0)
6757+ goto err_flow_add;
6758+
6759+ if (tcph) {
6760+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6761+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6762+ }
6763+
6764+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
6765+
6766+ net = read_pnet(&table->ft.net);
6767+ if (!net)
6768+ write_pnet(&table->ft.net, xt_net(par));
6769+
6770+ if (flow_offload_add(&table->ft, flow) < 0)
6771+ goto err_flow_add;
6772+
6773+ xt_flowoffload_check_device(table, devs[0]);
6774+ xt_flowoffload_check_device(table, devs[1]);
6775+
developer480c5d52022-12-28 14:48:14 +08006776+ if (!(ct->status & IPS_NAT_MASK))
6777+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006778+ dst_release(route.tuple[!dir].dst);
6779+
6780+ return XT_CONTINUE;
6781+
6782+err_flow_add:
6783+ flow_offload_free(flow);
6784+err_flow_alloc:
developer480c5d52022-12-28 14:48:14 +08006785+ if (!(ct->status & IPS_NAT_MASK))
6786+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006787+ dst_release(route.tuple[!dir].dst);
6788+err_flow_route:
6789+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
6790+
6791+ return XT_CONTINUE;
6792+}
6793+
6794+static int flowoffload_chk(const struct xt_tgchk_param *par)
6795+{
6796+ struct xt_flowoffload_target_info *info = par->targinfo;
6797+
6798+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
6799+ return -EINVAL;
6800+
6801+ return 0;
6802+}
6803+
6804+static struct xt_target offload_tg_reg __read_mostly = {
6805+ .family = NFPROTO_UNSPEC,
6806+ .name = "FLOWOFFLOAD",
6807+ .revision = 0,
6808+ .targetsize = sizeof(struct xt_flowoffload_target_info),
6809+ .usersize = sizeof(struct xt_flowoffload_target_info),
6810+ .checkentry = flowoffload_chk,
6811+ .target = flowoffload_tg,
6812+ .me = THIS_MODULE,
6813+};
6814+
6815+static int flow_offload_netdev_event(struct notifier_block *this,
6816+ unsigned long event, void *ptr)
6817+{
6818+ struct xt_flowoffload_hook *hook0, *hook1;
6819+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6820+
6821+ if (event != NETDEV_UNREGISTER)
6822+ return NOTIFY_DONE;
6823+
6824+ spin_lock_bh(&hooks_lock);
6825+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
6826+ if (hook0)
6827+ hlist_del(&hook0->list);
6828+
6829+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
6830+ if (hook1)
6831+ hlist_del(&hook1->list);
6832+ spin_unlock_bh(&hooks_lock);
6833+
6834+ if (hook0) {
6835+ nf_unregister_net_hook(hook0->net, &hook0->ops);
6836+ kfree(hook0);
6837+ }
6838+
6839+ if (hook1) {
6840+ nf_unregister_net_hook(hook1->net, &hook1->ops);
6841+ kfree(hook1);
6842+ }
6843+
6844+ nf_flow_table_cleanup(dev);
6845+
6846+ return NOTIFY_DONE;
6847+}
6848+
6849+static struct notifier_block flow_offload_netdev_notifier = {
6850+ .notifier_call = flow_offload_netdev_event,
6851+};
6852+
6853+static int nf_flow_rule_route_inet(struct net *net,
6854+ const struct flow_offload *flow,
6855+ enum flow_offload_tuple_dir dir,
6856+ struct nf_flow_rule *flow_rule)
6857+{
6858+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
6859+ int err;
6860+
6861+ switch (flow_tuple->l3proto) {
6862+ case NFPROTO_IPV4:
6863+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
6864+ break;
6865+ case NFPROTO_IPV6:
6866+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
6867+ break;
6868+ default:
6869+ err = -1;
6870+ break;
6871+ }
6872+
6873+ return err;
6874+}
6875+
6876+static struct nf_flowtable_type flowtable_inet = {
6877+ .family = NFPROTO_INET,
6878+ .init = nf_flow_table_init,
6879+ .setup = nf_flow_table_offload_setup,
6880+ .action = nf_flow_rule_route_inet,
6881+ .free = nf_flow_table_free,
6882+ .hook = xt_flowoffload_net_hook,
6883+ .owner = THIS_MODULE,
6884+};
6885+
6886+static int init_flowtable(struct xt_flowoffload_table *tbl)
6887+{
6888+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
6889+ tbl->ft.type = &flowtable_inet;
6890+
6891+ return nf_flow_table_init(&tbl->ft);
6892+}
6893+
6894+static int __init xt_flowoffload_tg_init(void)
6895+{
6896+ int ret;
6897+
6898+ register_netdevice_notifier(&flow_offload_netdev_notifier);
6899+
6900+ ret = init_flowtable(&flowtable[0]);
6901+ if (ret)
6902+ return ret;
6903+
6904+ ret = init_flowtable(&flowtable[1]);
6905+ if (ret)
6906+ goto cleanup;
6907+
developeree39bcf2023-06-16 08:03:30 +08006908+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
developer8cb3ac72022-07-04 10:55:14 +08006909+
6910+ ret = xt_register_target(&offload_tg_reg);
6911+ if (ret)
6912+ goto cleanup2;
6913+
6914+ return 0;
6915+
6916+cleanup2:
6917+ nf_flow_table_free(&flowtable[1].ft);
6918+cleanup:
6919+ nf_flow_table_free(&flowtable[0].ft);
6920+ return ret;
6921+}
6922+
6923+static void __exit xt_flowoffload_tg_exit(void)
6924+{
6925+ xt_unregister_target(&offload_tg_reg);
6926+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
6927+ nf_flow_table_free(&flowtable[0].ft);
6928+ nf_flow_table_free(&flowtable[1].ft);
6929+}
6930+
6931+MODULE_LICENSE("GPL");
6932+module_init(xt_flowoffload_tg_init);
6933+module_exit(xt_flowoffload_tg_exit);
6934--
69352.18.0
6936