blob: ec6efb9e45cc022dc8689527d08c2f8e1a41987c [file] [log] [blame]
developer58aa0682023-09-18 14:02:26 +08001From b80c745d2b90b30558e4f5b12060af956ae8e76d Mon Sep 17 00:00:00 2001
developeree39bcf2023-06-16 08:03:30 +08002From: Bo Jiao <Bo.Jiao@mediatek.com>
developer58aa0682023-09-18 14:02:26 +08003Date: Mon, 18 Sep 2023 10:52:27 +0800
4Subject: [PATCH 02/22] mt7622 backport nf hw offload framework and upstream
5 hnat plus xt-FLOWOFFLOAD update v2
developer8cb3ac72022-07-04 10:55:14 +08006
7---
8 drivers/net/ethernet/mediatek/Makefile | 3 +-
developer58aa0682023-09-18 14:02:26 +08009 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 25 +-
10 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 19 +-
11 drivers/net/ethernet/mediatek/mtk_ppe.c | 510 +++++++
developeree39bcf2023-06-16 08:03:30 +080012 drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
13 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
developer58aa0682023-09-18 14:02:26 +080014 .../net/ethernet/mediatek/mtk_ppe_offload.c | 535 ++++++++
developeree39bcf2023-06-16 08:03:30 +080015 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
developer8cb3ac72022-07-04 10:55:14 +080016 drivers/net/ppp/ppp_generic.c | 22 +
17 drivers/net/ppp/pppoe.c | 24 +
developeree39bcf2023-06-16 08:03:30 +080018 include/linux/netdevice.h | 60 +
developer8cb3ac72022-07-04 10:55:14 +080019 include/linux/ppp_channel.h | 3 +
20 include/net/dsa.h | 10 +
21 include/net/flow_offload.h | 4 +
22 include/net/ip6_route.h | 5 +-
23 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
24 include/net/netfilter/nf_conntrack.h | 12 +
25 include/net/netfilter/nf_conntrack_acct.h | 11 +
developer58aa0682023-09-18 14:02:26 +080026 include/net/netfilter/nf_flow_table.h | 266 +++-
developer8cb3ac72022-07-04 10:55:14 +080027 include/net/netns/conntrack.h | 6 +
28 .../linux/netfilter/nf_conntrack_common.h | 9 +-
29 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
30 net/8021q/vlan_dev.c | 21 +
31 net/bridge/br_device.c | 49 +
32 net/bridge/br_private.h | 20 +
33 net/bridge/br_vlan.c | 55 +
34 net/core/dev.c | 46 +
35 net/dsa/dsa.c | 9 +
developer58aa0682023-09-18 14:02:26 +080036 net/dsa/slave.c | 37 +-
developer8cb3ac72022-07-04 10:55:14 +080037 net/ipv4/netfilter/Kconfig | 4 +-
38 net/ipv6/ip6_output.c | 2 +-
39 net/ipv6/netfilter/Kconfig | 3 +-
40 net/ipv6/route.c | 22 +-
41 net/netfilter/Kconfig | 14 +-
42 net/netfilter/Makefile | 4 +-
43 net/netfilter/nf_conntrack_core.c | 20 +-
44 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
45 net/netfilter/nf_conntrack_proto_udp.c | 4 +
46 net/netfilter/nf_conntrack_standalone.c | 34 +-
developer58aa0682023-09-18 14:02:26 +080047 net/netfilter/nf_flow_table_core.c | 462 ++++---
48 net/netfilter/nf_flow_table_ip.c | 447 +++---
49 net/netfilter/nf_flow_table_offload.c | 1199 +++++++++++++++++
50 net/netfilter/xt_FLOWOFFLOAD.c | 794 +++++++++++
51 43 files changed, 5005 insertions(+), 435 deletions(-)
52 mode change 100644 => 100755 drivers/net/ethernet/mediatek/Makefile
53 mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_eth_soc.c
54 mode change 100644 => 100755 drivers/net/ethernet/mediatek/mtk_eth_soc.h
developer8cb3ac72022-07-04 10:55:14 +080055 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
56 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
57 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
58 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
59 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
60 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
61 create mode 100644 net/netfilter/nf_flow_table_offload.c
62 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
63
64diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
developer58aa0682023-09-18 14:02:26 +080065old mode 100644
66new mode 100755
67index 634640d..5f342f4
developer8cb3ac72022-07-04 10:55:14 +080068--- a/drivers/net/ethernet/mediatek/Makefile
69+++ b/drivers/net/ethernet/mediatek/Makefile
developeree39bcf2023-06-16 08:03:30 +080070@@ -4,5 +4,6 @@
developer8cb3ac72022-07-04 10:55:14 +080071 #
72
73 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
developer68838542022-10-03 23:42:21 +080074-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
75+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
developer8cb3ac72022-07-04 10:55:14 +080076+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
77 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
78diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developer58aa0682023-09-18 14:02:26 +080079old mode 100644
80new mode 100755
81index c4bea4d..9c85e16
developer8cb3ac72022-07-04 10:55:14 +080082--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
83+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developer58aa0682023-09-18 14:02:26 +080084@@ -3573,6 +3573,7 @@ static int mtk_open(struct net_device *dev)
85 u32 id = mtk_mac2xgmii_id(eth, mac->id);
developerdca0fde2022-12-14 11:40:35 +080086 int err, i;
87 struct device_node *phy_node;
developeree39bcf2023-06-16 08:03:30 +080088+ u32 gdm_config = MTK_GDMA_TO_PDMA;
developer8cb3ac72022-07-04 10:55:14 +080089
developeree39bcf2023-06-16 08:03:30 +080090 err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
91 if (err) {
developer58aa0682023-09-18 14:02:26 +080092@@ -3650,7 +3651,10 @@ static int mtk_open(struct net_device *dev)
93 regmap_write(eth->sgmii->pcs[id].regmap,
94 SGMSYS_QPHY_PWR_STATE_CTRL, 0);
developer8cb3ac72022-07-04 10:55:14 +080095
developerdca0fde2022-12-14 11:40:35 +080096- mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA);
developeree39bcf2023-06-16 08:03:30 +080097+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
98+ gdm_config = MTK_GDMA_TO_PPE;
developer8cb3ac72022-07-04 10:55:14 +080099+
developerdca0fde2022-12-14 11:40:35 +0800100+ mtk_gdm_config(eth, mac->id, gdm_config);
developer8cb3ac72022-07-04 10:55:14 +0800101
developerdca0fde2022-12-14 11:40:35 +0800102 return 0;
103 }
developer58aa0682023-09-18 14:02:26 +0800104@@ -3730,6 +3734,9 @@ static int mtk_stop(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +0800105
106 mtk_dma_free(eth);
107
developeree39bcf2023-06-16 08:03:30 +0800108+ if (eth->soc->offload_version)
109+ mtk_ppe_stop(&eth->ppe);
developer8cb3ac72022-07-04 10:55:14 +0800110+
111 return 0;
112 }
113
developer58aa0682023-09-18 14:02:26 +0800114@@ -4576,6 +4583,7 @@ static const struct net_device_ops mtk_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +0800115 #ifdef CONFIG_NET_POLL_CONTROLLER
116 .ndo_poll_controller = mtk_poll_controller,
117 #endif
118+ .ndo_setup_tc = mtk_eth_setup_tc,
119 };
120
developer58aa0682023-09-18 14:02:26 +0800121 static void mux_poll(struct work_struct *work)
122@@ -5161,6 +5169,17 @@ static int mtk_probe(struct platform_device *pdev)
developer8cb3ac72022-07-04 10:55:14 +0800123 goto err_free_dev;
124 }
125
126+ if (eth->soc->offload_version) {
developeree39bcf2023-06-16 08:03:30 +0800127+ err = mtk_ppe_init(&eth->ppe, eth->dev,
128+ eth->base + MTK_ETH_PPE_BASE, 2);
129+ if (err)
130+ goto err_free_dev;
developer8cb3ac72022-07-04 10:55:14 +0800131+
132+ err = mtk_eth_offload_init(eth);
133+ if (err)
134+ goto err_free_dev;
135+ }
136+
137 for (i = 0; i < MTK_MAX_DEVS; i++) {
138 if (!eth->netdev[i])
139 continue;
developer58aa0682023-09-18 14:02:26 +0800140@@ -5254,6 +5273,7 @@ static const struct mtk_soc_data mt2701_data = {
developer8cb3ac72022-07-04 10:55:14 +0800141 .required_clks = MT7623_CLKS_BITMAP,
142 .required_pctl = true,
143 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800144+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800145 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800146 .txrx = {
147 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800148@@ -5271,6 +5291,7 @@ static const struct mtk_soc_data mt7621_data = {
developer8cb3ac72022-07-04 10:55:14 +0800149 .required_clks = MT7621_CLKS_BITMAP,
150 .required_pctl = false,
151 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800152+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800153 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800154 .txrx = {
155 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800156@@ -5289,6 +5310,7 @@ static const struct mtk_soc_data mt7622_data = {
developer8cb3ac72022-07-04 10:55:14 +0800157 .required_clks = MT7622_CLKS_BITMAP,
158 .required_pctl = false,
159 .has_sram = false,
160+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800161 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800162 .txrx = {
163 .txd_size = sizeof(struct mtk_tx_dma),
developer58aa0682023-09-18 14:02:26 +0800164@@ -5306,6 +5328,7 @@ static const struct mtk_soc_data mt7623_data = {
developer8cb3ac72022-07-04 10:55:14 +0800165 .required_clks = MT7623_CLKS_BITMAP,
166 .required_pctl = true,
167 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800168+ .offload_version = 2,
developer58aa0682023-09-18 14:02:26 +0800169 .rss_num = 0,
developer7eb15dc2023-06-14 17:44:03 +0800170 .txrx = {
171 .txd_size = sizeof(struct mtk_tx_dma),
developer8cb3ac72022-07-04 10:55:14 +0800172diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
developer58aa0682023-09-18 14:02:26 +0800173old mode 100644
174new mode 100755
175index 8a9b615..a87e46d
developer8cb3ac72022-07-04 10:55:14 +0800176--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
177+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
178@@ -15,6 +15,8 @@
179 #include <linux/u64_stats_sync.h>
180 #include <linux/refcount.h>
181 #include <linux/phylink.h>
182+#include <linux/rhashtable.h>
183+#include "mtk_ppe.h"
184
185 #define MTK_QDMA_PAGE_SIZE 2048
186 #define MTK_MAX_RX_LENGTH 1536
developer58aa0682023-09-18 14:02:26 +0800187@@ -44,7 +46,8 @@
developer8cb3ac72022-07-04 10:55:14 +0800188 NETIF_F_HW_VLAN_CTAG_TX | \
189 NETIF_F_SG | NETIF_F_TSO | \
190 NETIF_F_TSO6 | \
191- NETIF_F_IPV6_CSUM)
192+ NETIF_F_IPV6_CSUM |\
193+ NETIF_F_HW_TC)
194 #define MTK_SET_FEATURES (NETIF_F_LRO | \
195 NETIF_F_HW_VLAN_CTAG_RX)
196 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
developer58aa0682023-09-18 14:02:26 +0800197@@ -127,6 +130,7 @@
developer8cb3ac72022-07-04 10:55:14 +0800198 #define MTK_GDMA_UCS_EN BIT(20)
developer58aa0682023-09-18 14:02:26 +0800199 #define MTK_GDMA_STRP_CRC BIT(16)
developer8cb3ac72022-07-04 10:55:14 +0800200 #define MTK_GDMA_TO_PDMA 0x0
201+#define MTK_GDMA_TO_PPE 0x4444
202 #define MTK_GDMA_DROP_ALL 0x7777
203
developer58aa0682023-09-18 14:02:26 +0800204 /* GDM Egress Control Register */
205@@ -617,6 +621,12 @@
developer8cb3ac72022-07-04 10:55:14 +0800206 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
207 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
208
209+/* QDMA descriptor rxd4 */
210+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
211+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
212+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
213+#define MTK_RXD4_ALG GENMASK(31, 22)
214+
215 /* QDMA descriptor rxd4 */
216 #define RX_DMA_L4_VALID BIT(24)
217 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
developer58aa0682023-09-18 14:02:26 +0800218@@ -1651,6 +1661,7 @@ struct mtk_soc_data {
219 u64 caps;
220 u64 required_clks;
developer8cb3ac72022-07-04 10:55:14 +0800221 bool required_pctl;
222+ u8 offload_version;
223 netdev_features_t hw_features;
224 bool has_sram;
developer58aa0682023-09-18 14:02:26 +0800225 struct {
226@@ -1847,6 +1858,9 @@ struct mtk_eth {
developer8cb3ac72022-07-04 10:55:14 +0800227 int ip_align;
228 spinlock_t syscfg0_lock;
229 struct timer_list mtk_dma_monitor_timer;
230+
developeree39bcf2023-06-16 08:03:30 +0800231+ struct mtk_ppe ppe;
developer8cb3ac72022-07-04 10:55:14 +0800232+ struct rhashtable flow_table;
233 };
234
235 /* struct mtk_mac - the structure that holds the info about the MACs of the
developer58aa0682023-09-18 14:02:26 +0800236@@ -1927,6 +1941,9 @@ int mtk_toprgu_init(struct mtk_eth *eth, struct device_node *r);
developer1fb19c92023-03-07 23:45:23 +0800237 int mtk_dump_usxgmii(struct regmap *pmap, char *name, u32 offset, u32 range);
developer58aa0682023-09-18 14:02:26 +0800238 void mtk_usxgmii_link_poll(struct work_struct *work);
developer8cb3ac72022-07-04 10:55:14 +0800239
240+int mtk_eth_offload_init(struct mtk_eth *eth);
241+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
242+ void *type_data);
developer1fb19c92023-03-07 23:45:23 +0800243 void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
developer58aa0682023-09-18 14:02:26 +0800244 u32 mtk_rss_indr_table(struct mtk_rss_params *rss_params, int index);
245 #endif /* MTK_ETH_H */
developer8cb3ac72022-07-04 10:55:14 +0800246diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
247new file mode 100644
developer58aa0682023-09-18 14:02:26 +0800248index 0000000..27b5be5
developer8cb3ac72022-07-04 10:55:14 +0800249--- /dev/null
250+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
developercbbf1b02023-09-06 10:24:04 +0800251@@ -0,0 +1,510 @@
developer8cb3ac72022-07-04 10:55:14 +0800252+// SPDX-License-Identifier: GPL-2.0-only
253+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
254+
255+#include <linux/kernel.h>
256+#include <linux/io.h>
257+#include <linux/iopoll.h>
258+#include <linux/etherdevice.h>
259+#include <linux/platform_device.h>
260+#include "mtk_ppe.h"
261+#include "mtk_ppe_regs.h"
262+
263+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
264+{
265+ writel(val, ppe->base + reg);
266+}
267+
268+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
269+{
270+ return readl(ppe->base + reg);
271+}
272+
273+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
274+{
275+ u32 val;
276+
277+ val = ppe_r32(ppe, reg);
278+ val &= ~mask;
279+ val |= set;
280+ ppe_w32(ppe, reg, val);
281+
282+ return val;
283+}
284+
285+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
286+{
287+ return ppe_m32(ppe, reg, 0, val);
288+}
289+
290+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
291+{
292+ return ppe_m32(ppe, reg, val, 0);
293+}
294+
295+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
296+{
297+ int ret;
298+ u32 val;
299+
300+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
301+ !(val & MTK_PPE_GLO_CFG_BUSY),
302+ 20, MTK_PPE_WAIT_TIMEOUT_US);
303+
304+ if (ret)
305+ dev_err(ppe->dev, "PPE table busy");
306+
307+ return ret;
308+}
309+
310+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
311+{
312+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
313+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
314+}
315+
316+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
317+{
318+ mtk_ppe_cache_clear(ppe);
319+
320+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
321+ enable * MTK_PPE_CACHE_CTL_EN);
322+}
323+
developeree39bcf2023-06-16 08:03:30 +0800324+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
developer8cb3ac72022-07-04 10:55:14 +0800325+{
326+ u32 hv1, hv2, hv3;
327+ u32 hash;
328+
developeree39bcf2023-06-16 08:03:30 +0800329+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
330+ case MTK_PPE_PKT_TYPE_BRIDGE:
331+ hv1 = e->bridge.src_mac_lo;
332+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
333+ hv2 = e->bridge.src_mac_hi >> 16;
334+ hv2 ^= e->bridge.dest_mac_lo;
335+ hv3 = e->bridge.dest_mac_hi;
336+ break;
developer8cb3ac72022-07-04 10:55:14 +0800337+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
338+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
339+ hv1 = e->ipv4.orig.ports;
340+ hv2 = e->ipv4.orig.dest_ip;
341+ hv3 = e->ipv4.orig.src_ip;
342+ break;
343+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
344+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
345+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
346+ hv1 ^= e->ipv6.ports;
347+
348+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
349+ hv2 ^= e->ipv6.dest_ip[0];
350+
351+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
352+ hv3 ^= e->ipv6.src_ip[0];
353+ break;
354+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
355+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
356+ default:
357+ WARN_ON_ONCE(1);
358+ return MTK_PPE_HASH_MASK;
359+ }
360+
361+ hash = (hv1 & hv2) | ((~hv1) & hv3);
362+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
363+ hash ^= hv1 ^ hv2 ^ hv3;
364+ hash ^= hash >> 16;
developeree39bcf2023-06-16 08:03:30 +0800365+ hash <<= 1;
developer8cb3ac72022-07-04 10:55:14 +0800366+ hash &= MTK_PPE_ENTRIES - 1;
367+
368+ return hash;
369+}
370+
371+static inline struct mtk_foe_mac_info *
developeree39bcf2023-06-16 08:03:30 +0800372+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800373+{
developeree39bcf2023-06-16 08:03:30 +0800374+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800375+
376+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
377+ return &entry->ipv6.l2;
378+
379+ return &entry->ipv4.l2;
380+}
381+
382+static inline u32 *
developeree39bcf2023-06-16 08:03:30 +0800383+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800384+{
developeree39bcf2023-06-16 08:03:30 +0800385+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800386+
387+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
388+ return &entry->ipv6.ib2;
389+
390+ return &entry->ipv4.ib2;
391+}
392+
developeree39bcf2023-06-16 08:03:30 +0800393+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
394+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
developer8cb3ac72022-07-04 10:55:14 +0800395+{
396+ struct mtk_foe_mac_info *l2;
397+ u32 ports_pad, val;
398+
399+ memset(entry, 0, sizeof(*entry));
400+
developeree39bcf2023-06-16 08:03:30 +0800401+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
402+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
403+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
404+ MTK_FOE_IB1_BIND_TTL |
405+ MTK_FOE_IB1_BIND_CACHE;
406+ entry->ib1 = val;
developer8cb3ac72022-07-04 10:55:14 +0800407+
developeree39bcf2023-06-16 08:03:30 +0800408+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
409+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
410+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
developer8cb3ac72022-07-04 10:55:14 +0800411+
412+ if (is_multicast_ether_addr(dest_mac))
developeree39bcf2023-06-16 08:03:30 +0800413+ val |= MTK_FOE_IB2_MULTICAST;
developer8cb3ac72022-07-04 10:55:14 +0800414+
415+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
416+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
417+ entry->ipv4.orig.ports = ports_pad;
418+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
419+ entry->ipv6.ports = ports_pad;
420+
developeree39bcf2023-06-16 08:03:30 +0800421+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
developer8cb3ac72022-07-04 10:55:14 +0800422+ entry->ipv6.ib2 = val;
423+ l2 = &entry->ipv6.l2;
424+ } else {
425+ entry->ipv4.ib2 = val;
426+ l2 = &entry->ipv4.l2;
427+ }
428+
429+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
430+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
431+ l2->src_mac_hi = get_unaligned_be32(src_mac);
432+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
433+
434+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
435+ l2->etype = ETH_P_IPV6;
436+ else
437+ l2->etype = ETH_P_IP;
438+
439+ return 0;
440+}
441+
developeree39bcf2023-06-16 08:03:30 +0800442+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
developer8cb3ac72022-07-04 10:55:14 +0800443+{
developeree39bcf2023-06-16 08:03:30 +0800444+ u32 *ib2 = mtk_foe_entry_ib2(entry);
445+ u32 val;
developer8cb3ac72022-07-04 10:55:14 +0800446+
developeree39bcf2023-06-16 08:03:30 +0800447+ val = *ib2;
448+ val &= ~MTK_FOE_IB2_DEST_PORT;
449+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
developer8cb3ac72022-07-04 10:55:14 +0800450+ *ib2 = val;
451+
452+ return 0;
453+}
454+
developeree39bcf2023-06-16 08:03:30 +0800455+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
developer8cb3ac72022-07-04 10:55:14 +0800456+ __be32 src_addr, __be16 src_port,
457+ __be32 dest_addr, __be16 dest_port)
458+{
developeree39bcf2023-06-16 08:03:30 +0800459+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800460+ struct mtk_ipv4_tuple *t;
461+
462+ switch (type) {
463+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
464+ if (egress) {
465+ t = &entry->ipv4.new;
466+ break;
467+ }
468+ fallthrough;
469+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
470+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
471+ t = &entry->ipv4.orig;
472+ break;
473+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
474+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
475+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
476+ return 0;
477+ default:
478+ WARN_ON_ONCE(1);
479+ return -EINVAL;
480+ }
481+
482+ t->src_ip = be32_to_cpu(src_addr);
483+ t->dest_ip = be32_to_cpu(dest_addr);
484+
485+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
486+ return 0;
487+
488+ t->src_port = be16_to_cpu(src_port);
489+ t->dest_port = be16_to_cpu(dest_port);
490+
491+ return 0;
492+}
493+
developeree39bcf2023-06-16 08:03:30 +0800494+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +0800495+ __be32 *src_addr, __be16 src_port,
496+ __be32 *dest_addr, __be16 dest_port)
497+{
developeree39bcf2023-06-16 08:03:30 +0800498+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800499+ u32 *src, *dest;
500+ int i;
501+
502+ switch (type) {
503+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
504+ src = entry->dslite.tunnel_src_ip;
505+ dest = entry->dslite.tunnel_dest_ip;
506+ break;
507+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
508+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
509+ entry->ipv6.src_port = be16_to_cpu(src_port);
510+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
511+ fallthrough;
512+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
513+ src = entry->ipv6.src_ip;
514+ dest = entry->ipv6.dest_ip;
515+ break;
516+ default:
517+ WARN_ON_ONCE(1);
518+ return -EINVAL;
519+ }
520+
521+ for (i = 0; i < 4; i++)
522+ src[i] = be32_to_cpu(src_addr[i]);
523+ for (i = 0; i < 4; i++)
524+ dest[i] = be32_to_cpu(dest_addr[i]);
525+
526+ return 0;
527+}
528+
developeree39bcf2023-06-16 08:03:30 +0800529+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
developer8cb3ac72022-07-04 10:55:14 +0800530+{
developeree39bcf2023-06-16 08:03:30 +0800531+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800532+
533+ l2->etype = BIT(port);
534+
developeree39bcf2023-06-16 08:03:30 +0800535+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
536+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800537+ else
538+ l2->etype |= BIT(8);
539+
developeree39bcf2023-06-16 08:03:30 +0800540+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
developer8cb3ac72022-07-04 10:55:14 +0800541+
542+ return 0;
543+}
544+
developeree39bcf2023-06-16 08:03:30 +0800545+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
developer8cb3ac72022-07-04 10:55:14 +0800546+{
developeree39bcf2023-06-16 08:03:30 +0800547+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800548+
developeree39bcf2023-06-16 08:03:30 +0800549+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
developer8cb3ac72022-07-04 10:55:14 +0800550+ case 0:
developeree39bcf2023-06-16 08:03:30 +0800551+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
552+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800553+ l2->vlan1 = vid;
554+ return 0;
555+ case 1:
developeree39bcf2023-06-16 08:03:30 +0800556+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
developer8cb3ac72022-07-04 10:55:14 +0800557+ l2->vlan1 = vid;
558+ l2->etype |= BIT(8);
559+ } else {
560+ l2->vlan2 = vid;
developeree39bcf2023-06-16 08:03:30 +0800561+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800562+ }
563+ return 0;
564+ default:
565+ return -ENOSPC;
566+ }
567+}
568+
developeree39bcf2023-06-16 08:03:30 +0800569+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
developer8cb3ac72022-07-04 10:55:14 +0800570+{
developeree39bcf2023-06-16 08:03:30 +0800571+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800572+
developeree39bcf2023-06-16 08:03:30 +0800573+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
574+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
developer8cb3ac72022-07-04 10:55:14 +0800575+ l2->etype = ETH_P_PPP_SES;
576+
developeree39bcf2023-06-16 08:03:30 +0800577+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
developer8cb3ac72022-07-04 10:55:14 +0800578+ l2->pppoe_id = sid;
579+
580+ return 0;
581+}
582+
583+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
584+{
585+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
586+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
587+}
588+
developeree39bcf2023-06-16 08:03:30 +0800589+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
590+ u16 timestamp)
developer7eb15dc2023-06-14 17:44:03 +0800591+{
developer8cb3ac72022-07-04 10:55:14 +0800592+ struct mtk_foe_entry *hwe;
developeree39bcf2023-06-16 08:03:30 +0800593+ u32 hash;
developer7eb15dc2023-06-14 17:44:03 +0800594+
developeree39bcf2023-06-16 08:03:30 +0800595+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
596+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
597+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
developer7eb15dc2023-06-14 17:44:03 +0800598+
developeree39bcf2023-06-16 08:03:30 +0800599+ hash = mtk_ppe_hash_entry(entry);
600+ hwe = &ppe->foe_table[hash];
601+ if (!mtk_foe_entry_usable(hwe)) {
602+ hwe++;
603+ hash++;
developer7eb15dc2023-06-14 17:44:03 +0800604+
developeree39bcf2023-06-16 08:03:30 +0800605+ if (!mtk_foe_entry_usable(hwe))
606+ return -ENOSPC;
developer7eb15dc2023-06-14 17:44:03 +0800607+ }
608+
developeree39bcf2023-06-16 08:03:30 +0800609+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
developer8cb3ac72022-07-04 10:55:14 +0800610+ wmb();
611+ hwe->ib1 = entry->ib1;
612+
613+ dma_wmb();
614+
615+ mtk_ppe_cache_clear(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800616+
developeree39bcf2023-06-16 08:03:30 +0800617+ return hash;
developer7eb15dc2023-06-14 17:44:03 +0800618+}
619+
developeree39bcf2023-06-16 08:03:30 +0800620+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
621+ int version)
developer7eb15dc2023-06-14 17:44:03 +0800622+{
developeree39bcf2023-06-16 08:03:30 +0800623+ struct mtk_foe_entry *foe;
developer8cb3ac72022-07-04 10:55:14 +0800624+
625+ /* need to allocate a separate device, since it PPE DMA access is
626+ * not coherent.
627+ */
628+ ppe->base = base;
629+ ppe->dev = dev;
developeree39bcf2023-06-16 08:03:30 +0800630+ ppe->version = version;
developer8cb3ac72022-07-04 10:55:14 +0800631+
developeree39bcf2023-06-16 08:03:30 +0800632+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
developer8cb3ac72022-07-04 10:55:14 +0800633+ &ppe->foe_phys, GFP_KERNEL);
634+ if (!foe)
developeree39bcf2023-06-16 08:03:30 +0800635+ return -ENOMEM;
developer8cb3ac72022-07-04 10:55:14 +0800636+
637+ ppe->foe_table = foe;
638+
developeree39bcf2023-06-16 08:03:30 +0800639+ mtk_ppe_debugfs_init(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800640+
developeree39bcf2023-06-16 08:03:30 +0800641+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800642+}
643+
644+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
645+{
646+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
647+ int i, k;
648+
developeree39bcf2023-06-16 08:03:30 +0800649+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
developer8cb3ac72022-07-04 10:55:14 +0800650+
651+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
652+ return;
653+
654+ /* skip all entries that cross the 1024 byte boundary */
developeree39bcf2023-06-16 08:03:30 +0800655+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
656+ for (k = 0; k < ARRAY_SIZE(skip); k++)
657+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
developer8cb3ac72022-07-04 10:55:14 +0800658+}
659+
developeree39bcf2023-06-16 08:03:30 +0800660+int mtk_ppe_start(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +0800661+{
662+ u32 val;
663+
664+ mtk_ppe_init_foe_table(ppe);
665+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
666+
667+ val = MTK_PPE_TB_CFG_ENTRY_80B |
668+ MTK_PPE_TB_CFG_AGE_NON_L4 |
669+ MTK_PPE_TB_CFG_AGE_UNBIND |
670+ MTK_PPE_TB_CFG_AGE_TCP |
671+ MTK_PPE_TB_CFG_AGE_UDP |
672+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
673+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
674+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
675+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
676+ MTK_PPE_KEEPALIVE_DISABLE) |
677+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
678+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
679+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
680+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
681+ MTK_PPE_ENTRIES_SHIFT);
682+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
683+
684+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
685+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
686+
687+ mtk_ppe_cache_enable(ppe, true);
688+
developeree39bcf2023-06-16 08:03:30 +0800689+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
690+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
691+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
developer8cb3ac72022-07-04 10:55:14 +0800692+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
693+ MTK_PPE_FLOW_CFG_IP6_6RD |
694+ MTK_PPE_FLOW_CFG_IP4_NAT |
695+ MTK_PPE_FLOW_CFG_IP4_NAPT |
696+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
developeree39bcf2023-06-16 08:03:30 +0800697+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
developer8cb3ac72022-07-04 10:55:14 +0800698+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
699+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
700+
701+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
702+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
703+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
704+
developeree39bcf2023-06-16 08:03:30 +0800705+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 30) |
developer8cb3ac72022-07-04 10:55:14 +0800706+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
707+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
708+
709+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
developeree39bcf2023-06-16 08:03:30 +0800710+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 30);
developer8cb3ac72022-07-04 10:55:14 +0800711+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
712+
713+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
714+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
715+
716+ val = MTK_PPE_BIND_LIMIT1_FULL |
717+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
718+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
719+
720+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
721+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
722+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
723+
724+ /* enable PPE */
725+ val = MTK_PPE_GLO_CFG_EN |
726+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
727+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
developercbbf1b02023-09-06 10:24:04 +0800728+ MTK_PPE_GLO_CFG_MCAST_TB_EN |
developer8cb3ac72022-07-04 10:55:14 +0800729+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
730+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
731+
732+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
733+
developeree39bcf2023-06-16 08:03:30 +0800734+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800735+}
736+
737+int mtk_ppe_stop(struct mtk_ppe *ppe)
738+{
739+ u32 val;
740+ int i;
741+
developeree39bcf2023-06-16 08:03:30 +0800742+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
743+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
744+ MTK_FOE_STATE_INVALID);
developer8cb3ac72022-07-04 10:55:14 +0800745+
746+ mtk_ppe_cache_enable(ppe, false);
747+
748+ /* disable offload engine */
749+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
750+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
751+
752+ /* disable aging */
753+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
754+ MTK_PPE_TB_CFG_AGE_UNBIND |
755+ MTK_PPE_TB_CFG_AGE_TCP |
756+ MTK_PPE_TB_CFG_AGE_UDP |
757+ MTK_PPE_TB_CFG_AGE_TCP_FIN;
758+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
759+
760+ return mtk_ppe_wait_busy(ppe);
761+}
762diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
763new file mode 100644
developer58aa0682023-09-18 14:02:26 +0800764index 0000000..242fb8f
developer8cb3ac72022-07-04 10:55:14 +0800765--- /dev/null
766+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
developeree39bcf2023-06-16 08:03:30 +0800767@@ -0,0 +1,288 @@
developer8cb3ac72022-07-04 10:55:14 +0800768+// SPDX-License-Identifier: GPL-2.0-only
769+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
770+
771+#ifndef __MTK_PPE_H
772+#define __MTK_PPE_H
773+
774+#include <linux/kernel.h>
775+#include <linux/bitfield.h>
developeree39bcf2023-06-16 08:03:30 +0800776+
777+#define MTK_ETH_PPE_BASE 0xc00
developer8cb3ac72022-07-04 10:55:14 +0800778+
779+#define MTK_PPE_ENTRIES_SHIFT 3
780+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
781+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
782+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
783+
784+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
785+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
786+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
787+
788+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
789+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
790+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
791+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
792+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
793+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
794+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
795+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
796+#define MTK_FOE_IB1_BIND_TTL BIT(24)
797+
798+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
799+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
800+#define MTK_FOE_IB1_UDP BIT(30)
801+#define MTK_FOE_IB1_STATIC BIT(31)
802+
803+enum {
804+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
805+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
806+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
807+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
808+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
809+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
810+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
811+};
812+
813+#define MTK_FOE_IB2_QID GENMASK(3, 0)
814+#define MTK_FOE_IB2_PSE_QOS BIT(4)
815+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
816+#define MTK_FOE_IB2_MULTICAST BIT(8)
817+
developeree39bcf2023-06-16 08:03:30 +0800818+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
819+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
820+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
developer8cb3ac72022-07-04 10:55:14 +0800821+
822+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
823+
824+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
825+
826+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
827+
developeree39bcf2023-06-16 08:03:30 +0800828+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
829+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
830+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
developer8cb3ac72022-07-04 10:55:14 +0800831+
832+enum {
833+ MTK_FOE_STATE_INVALID,
834+ MTK_FOE_STATE_UNBIND,
835+ MTK_FOE_STATE_BIND,
836+ MTK_FOE_STATE_FIN
837+};
838+
839+struct mtk_foe_mac_info {
840+ u16 vlan1;
841+ u16 etype;
842+
843+ u32 dest_mac_hi;
844+
845+ u16 vlan2;
846+ u16 dest_mac_lo;
847+
848+ u32 src_mac_hi;
849+
850+ u16 pppoe_id;
851+ u16 src_mac_lo;
852+};
853+
854+struct mtk_foe_bridge {
developeree39bcf2023-06-16 08:03:30 +0800855+ u32 dest_mac_hi;
856+
857+ u16 src_mac_lo;
858+ u16 dest_mac_lo;
developer8cb3ac72022-07-04 10:55:14 +0800859+
developeree39bcf2023-06-16 08:03:30 +0800860+ u32 src_mac_hi;
developer8cb3ac72022-07-04 10:55:14 +0800861+
862+ u32 ib2;
863+
developeree39bcf2023-06-16 08:03:30 +0800864+ u32 _rsv[5];
865+
866+ u32 udf_tsid;
developer8cb3ac72022-07-04 10:55:14 +0800867+ struct mtk_foe_mac_info l2;
868+};
869+
870+struct mtk_ipv4_tuple {
871+ u32 src_ip;
872+ u32 dest_ip;
873+ union {
874+ struct {
875+ u16 dest_port;
876+ u16 src_port;
877+ };
878+ struct {
879+ u8 protocol;
880+ u8 _pad[3]; /* fill with 0xa5a5a5 */
881+ };
882+ u32 ports;
883+ };
884+};
885+
886+struct mtk_foe_ipv4 {
887+ struct mtk_ipv4_tuple orig;
888+
889+ u32 ib2;
890+
891+ struct mtk_ipv4_tuple new;
892+
893+ u16 timestamp;
894+ u16 _rsv0[3];
895+
896+ u32 udf_tsid;
897+
898+ struct mtk_foe_mac_info l2;
899+};
900+
901+struct mtk_foe_ipv4_dslite {
902+ struct mtk_ipv4_tuple ip4;
903+
904+ u32 tunnel_src_ip[4];
905+ u32 tunnel_dest_ip[4];
906+
907+ u8 flow_label[3];
908+ u8 priority;
909+
910+ u32 udf_tsid;
911+
912+ u32 ib2;
913+
914+ struct mtk_foe_mac_info l2;
915+};
916+
917+struct mtk_foe_ipv6 {
918+ u32 src_ip[4];
919+ u32 dest_ip[4];
920+
921+ union {
922+ struct {
923+ u8 protocol;
924+ u8 _pad[3]; /* fill with 0xa5a5a5 */
925+ }; /* 3-tuple */
926+ struct {
927+ u16 dest_port;
928+ u16 src_port;
929+ }; /* 5-tuple */
930+ u32 ports;
931+ };
932+
933+ u32 _rsv[3];
934+
935+ u32 udf;
936+
937+ u32 ib2;
938+ struct mtk_foe_mac_info l2;
939+};
940+
941+struct mtk_foe_ipv6_6rd {
942+ u32 src_ip[4];
943+ u32 dest_ip[4];
944+ u16 dest_port;
945+ u16 src_port;
946+
947+ u32 tunnel_src_ip;
948+ u32 tunnel_dest_ip;
949+
950+ u16 hdr_csum;
951+ u8 dscp;
952+ u8 ttl;
953+
954+ u8 flag;
955+ u8 pad;
956+ u8 per_flow_6rd_id;
957+ u8 pad2;
958+
959+ u32 ib2;
960+ struct mtk_foe_mac_info l2;
961+};
962+
963+struct mtk_foe_entry {
964+ u32 ib1;
965+
966+ union {
967+ struct mtk_foe_bridge bridge;
968+ struct mtk_foe_ipv4 ipv4;
969+ struct mtk_foe_ipv4_dslite dslite;
970+ struct mtk_foe_ipv6 ipv6;
971+ struct mtk_foe_ipv6_6rd ipv6_6rd;
developeree39bcf2023-06-16 08:03:30 +0800972+ u32 data[19];
developer8cb3ac72022-07-04 10:55:14 +0800973+ };
974+};
975+
976+enum {
977+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
978+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
979+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
980+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
981+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
982+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
983+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
984+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
985+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
986+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
987+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
988+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
989+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
990+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
991+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
992+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
993+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
994+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
995+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
996+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
997+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
998+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
999+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
1000+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
1001+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
1002+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
1003+};
1004+
1005+struct mtk_ppe {
1006+ struct device *dev;
1007+ void __iomem *base;
1008+ int version;
1009+
developeree39bcf2023-06-16 08:03:30 +08001010+ struct mtk_foe_entry *foe_table;
developer8cb3ac72022-07-04 10:55:14 +08001011+ dma_addr_t foe_phys;
1012+
1013+ void *acct_table;
1014+};
1015+
developeree39bcf2023-06-16 08:03:30 +08001016+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
1017+ int version);
1018+int mtk_ppe_start(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001019+int mtk_ppe_stop(struct mtk_ppe *ppe);
1020+
1021+static inline void
developeree39bcf2023-06-16 08:03:30 +08001022+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
developer8cb3ac72022-07-04 10:55:14 +08001023+{
developeree39bcf2023-06-16 08:03:30 +08001024+ ppe->foe_table[hash].ib1 = 0;
1025+ dma_wmb();
1026+}
developer8cb3ac72022-07-04 10:55:14 +08001027+
developeree39bcf2023-06-16 08:03:30 +08001028+static inline int
1029+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
1030+{
1031+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
developer8cb3ac72022-07-04 10:55:14 +08001032+
developeree39bcf2023-06-16 08:03:30 +08001033+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
1034+ return -1;
developer7eb15dc2023-06-14 17:44:03 +08001035+
developeree39bcf2023-06-16 08:03:30 +08001036+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
developer8cb3ac72022-07-04 10:55:14 +08001037+}
1038+
developeree39bcf2023-06-16 08:03:30 +08001039+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
1040+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
1041+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
1042+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
developer8cb3ac72022-07-04 10:55:14 +08001043+ __be32 src_addr, __be16 src_port,
1044+ __be32 dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001045+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +08001046+ __be32 *src_addr, __be16 src_port,
1047+ __be32 *dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001048+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
1049+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
1050+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
1051+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1052+ u16 timestamp);
1053+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001054+
1055+#endif
1056diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1057new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001058index 0000000..d4b4823
developer8cb3ac72022-07-04 10:55:14 +08001059--- /dev/null
1060+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
developeree39bcf2023-06-16 08:03:30 +08001061@@ -0,0 +1,214 @@
developer8cb3ac72022-07-04 10:55:14 +08001062+// SPDX-License-Identifier: GPL-2.0-only
1063+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1064+
1065+#include <linux/kernel.h>
1066+#include <linux/debugfs.h>
1067+#include "mtk_eth_soc.h"
1068+
1069+struct mtk_flow_addr_info
1070+{
1071+ void *src, *dest;
1072+ u16 *src_port, *dest_port;
1073+ bool ipv6;
1074+};
1075+
1076+static const char *mtk_foe_entry_state_str(int state)
1077+{
1078+ static const char * const state_str[] = {
1079+ [MTK_FOE_STATE_INVALID] = "INV",
1080+ [MTK_FOE_STATE_UNBIND] = "UNB",
1081+ [MTK_FOE_STATE_BIND] = "BND",
1082+ [MTK_FOE_STATE_FIN] = "FIN",
1083+ };
1084+
1085+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1086+ return "UNK";
1087+
1088+ return state_str[state];
1089+}
1090+
1091+static const char *mtk_foe_pkt_type_str(int type)
1092+{
1093+ static const char * const type_str[] = {
1094+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1095+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
developeree39bcf2023-06-16 08:03:30 +08001096+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
developer8cb3ac72022-07-04 10:55:14 +08001097+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
1098+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
1099+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
1100+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
1101+ };
1102+
1103+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
1104+ return "UNKNOWN";
1105+
1106+ return type_str[type];
1107+}
1108+
1109+static void
1110+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
1111+{
1112+ u32 n_addr[4];
1113+ int i;
1114+
1115+ if (!ipv6) {
1116+ seq_printf(m, "%pI4h", addr);
1117+ return;
1118+ }
1119+
1120+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
1121+ n_addr[i] = htonl(addr[i]);
1122+ seq_printf(m, "%pI6", n_addr);
1123+}
1124+
1125+static void
1126+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
1127+{
1128+ mtk_print_addr(m, ai->src, ai->ipv6);
1129+ if (ai->src_port)
1130+ seq_printf(m, ":%d", *ai->src_port);
1131+ seq_printf(m, "->");
1132+ mtk_print_addr(m, ai->dest, ai->ipv6);
1133+ if (ai->dest_port)
1134+ seq_printf(m, ":%d", *ai->dest_port);
1135+}
1136+
1137+static int
1138+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
1139+{
1140+ struct mtk_ppe *ppe = m->private;
1141+ int i;
1142+
1143+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
developeree39bcf2023-06-16 08:03:30 +08001144+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
developer8cb3ac72022-07-04 10:55:14 +08001145+ struct mtk_foe_mac_info *l2;
1146+ struct mtk_flow_addr_info ai = {};
1147+ unsigned char h_source[ETH_ALEN];
1148+ unsigned char h_dest[ETH_ALEN];
1149+ int type, state;
1150+ u32 ib2;
1151+
1152+
1153+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
1154+ if (!state)
1155+ continue;
1156+
1157+ if (bind && state != MTK_FOE_STATE_BIND)
1158+ continue;
1159+
1160+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
1161+ seq_printf(m, "%05x %s %7s", i,
1162+ mtk_foe_entry_state_str(state),
1163+ mtk_foe_pkt_type_str(type));
1164+
1165+ switch (type) {
1166+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1167+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1168+ ai.src_port = &entry->ipv4.orig.src_port;
1169+ ai.dest_port = &entry->ipv4.orig.dest_port;
1170+ fallthrough;
1171+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1172+ ai.src = &entry->ipv4.orig.src_ip;
1173+ ai.dest = &entry->ipv4.orig.dest_ip;
1174+ break;
1175+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
1176+ ai.src_port = &entry->ipv6.src_port;
1177+ ai.dest_port = &entry->ipv6.dest_port;
1178+ fallthrough;
1179+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
1180+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
1181+ ai.src = &entry->ipv6.src_ip;
1182+ ai.dest = &entry->ipv6.dest_ip;
1183+ ai.ipv6 = true;
1184+ break;
1185+ }
1186+
1187+ seq_printf(m, " orig=");
1188+ mtk_print_addr_info(m, &ai);
1189+
1190+ switch (type) {
1191+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1192+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1193+ ai.src_port = &entry->ipv4.new.src_port;
1194+ ai.dest_port = &entry->ipv4.new.dest_port;
1195+ fallthrough;
1196+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1197+ ai.src = &entry->ipv4.new.src_ip;
1198+ ai.dest = &entry->ipv4.new.dest_ip;
1199+ seq_printf(m, " new=");
1200+ mtk_print_addr_info(m, &ai);
1201+ break;
1202+ }
1203+
1204+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
1205+ l2 = &entry->ipv6.l2;
1206+ ib2 = entry->ipv6.ib2;
1207+ } else {
1208+ l2 = &entry->ipv4.l2;
1209+ ib2 = entry->ipv4.ib2;
1210+ }
1211+
1212+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
1213+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
1214+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
1215+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
1216+
1217+ seq_printf(m, " eth=%pM->%pM etype=%04x"
developeree39bcf2023-06-16 08:03:30 +08001218+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
developer8cb3ac72022-07-04 10:55:14 +08001219+ h_source, h_dest, ntohs(l2->etype),
developeree39bcf2023-06-16 08:03:30 +08001220+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
developer8cb3ac72022-07-04 10:55:14 +08001221+ }
1222+
1223+ return 0;
1224+}
1225+
1226+static int
1227+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
1228+{
1229+ return mtk_ppe_debugfs_foe_show(m, private, false);
1230+}
1231+
1232+static int
1233+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
1234+{
1235+ return mtk_ppe_debugfs_foe_show(m, private, true);
1236+}
1237+
1238+static int
1239+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
1240+{
1241+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
1242+ inode->i_private);
1243+}
1244+
1245+static int
1246+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
1247+{
1248+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
1249+ inode->i_private);
1250+}
1251+
developeree39bcf2023-06-16 08:03:30 +08001252+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +08001253+{
1254+ static const struct file_operations fops_all = {
1255+ .open = mtk_ppe_debugfs_foe_open_all,
1256+ .read = seq_read,
1257+ .llseek = seq_lseek,
1258+ .release = single_release,
1259+ };
developeree39bcf2023-06-16 08:03:30 +08001260+
developer8cb3ac72022-07-04 10:55:14 +08001261+ static const struct file_operations fops_bind = {
1262+ .open = mtk_ppe_debugfs_foe_open_bind,
1263+ .read = seq_read,
1264+ .llseek = seq_lseek,
1265+ .release = single_release,
1266+ };
developer7eb15dc2023-06-14 17:44:03 +08001267+
developeree39bcf2023-06-16 08:03:30 +08001268+ struct dentry *root;
developer7eb15dc2023-06-14 17:44:03 +08001269+
developeree39bcf2023-06-16 08:03:30 +08001270+ root = debugfs_create_dir("mtk_ppe", NULL);
developer8cb3ac72022-07-04 10:55:14 +08001271+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
1272+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
1273+
1274+ return 0;
1275+}
1276diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1277new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001278index 0000000..1380ef0
developer8cb3ac72022-07-04 10:55:14 +08001279--- /dev/null
1280+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
developeree39bcf2023-06-16 08:03:30 +08001281@@ -0,0 +1,535 @@
developer8cb3ac72022-07-04 10:55:14 +08001282+// SPDX-License-Identifier: GPL-2.0-only
1283+/*
1284+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
1285+ */
1286+
1287+#include <linux/if_ether.h>
1288+#include <linux/rhashtable.h>
1289+#include <linux/ip.h>
1290+#include <linux/ipv6.h>
1291+#include <net/flow_offload.h>
1292+#include <net/pkt_cls.h>
1293+#include <net/dsa.h>
1294+#include "mtk_eth_soc.h"
1295+
1296+struct mtk_flow_data {
1297+ struct ethhdr eth;
1298+
1299+ union {
1300+ struct {
1301+ __be32 src_addr;
1302+ __be32 dst_addr;
1303+ } v4;
1304+
1305+ struct {
1306+ struct in6_addr src_addr;
1307+ struct in6_addr dst_addr;
1308+ } v6;
1309+ };
1310+
1311+ __be16 src_port;
1312+ __be16 dst_port;
1313+
1314+ struct {
1315+ u16 id;
1316+ __be16 proto;
1317+ u8 num;
1318+ } vlan;
1319+ struct {
1320+ u16 sid;
1321+ u8 num;
1322+ } pppoe;
1323+};
1324+
developeree39bcf2023-06-16 08:03:30 +08001325+struct mtk_flow_entry {
1326+ struct rhash_head node;
1327+ unsigned long cookie;
1328+ u16 hash;
1329+};
1330+
developer8cb3ac72022-07-04 10:55:14 +08001331+static const struct rhashtable_params mtk_flow_ht_params = {
1332+ .head_offset = offsetof(struct mtk_flow_entry, node),
1333+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
1334+ .key_len = sizeof(unsigned long),
1335+ .automatic_shrinking = true,
1336+};
1337+
developeree39bcf2023-06-16 08:03:30 +08001338+static u32
1339+mtk_eth_timestamp(struct mtk_eth *eth)
1340+{
1341+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
1342+}
1343+
developer8cb3ac72022-07-04 10:55:14 +08001344+static int
developeree39bcf2023-06-16 08:03:30 +08001345+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
1346+ bool egress)
developer8cb3ac72022-07-04 10:55:14 +08001347+{
developeree39bcf2023-06-16 08:03:30 +08001348+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
developer8cb3ac72022-07-04 10:55:14 +08001349+ data->v4.src_addr, data->src_port,
1350+ data->v4.dst_addr, data->dst_port);
1351+}
1352+
1353+static int
developeree39bcf2023-06-16 08:03:30 +08001354+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
developer8cb3ac72022-07-04 10:55:14 +08001355+{
developeree39bcf2023-06-16 08:03:30 +08001356+ return mtk_foe_entry_set_ipv6_tuple(foe,
developer8cb3ac72022-07-04 10:55:14 +08001357+ data->v6.src_addr.s6_addr32, data->src_port,
1358+ data->v6.dst_addr.s6_addr32, data->dst_port);
1359+}
1360+
1361+static void
1362+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
1363+{
1364+ void *dest = eth + act->mangle.offset;
1365+ const void *src = &act->mangle.val;
1366+
1367+ if (act->mangle.offset > 8)
1368+ return;
1369+
1370+ if (act->mangle.mask == 0xffff) {
1371+ src += 2;
1372+ dest += 2;
1373+ }
1374+
1375+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
1376+}
1377+
developeree39bcf2023-06-16 08:03:30 +08001378+
developer8cb3ac72022-07-04 10:55:14 +08001379+static int
1380+mtk_flow_mangle_ports(const struct flow_action_entry *act,
1381+ struct mtk_flow_data *data)
1382+{
1383+ u32 val = ntohl(act->mangle.val);
1384+
1385+ switch (act->mangle.offset) {
1386+ case 0:
1387+ if (act->mangle.mask == ~htonl(0xffff))
1388+ data->dst_port = cpu_to_be16(val);
1389+ else
1390+ data->src_port = cpu_to_be16(val >> 16);
1391+ break;
1392+ case 2:
1393+ data->dst_port = cpu_to_be16(val);
1394+ break;
1395+ default:
1396+ return -EINVAL;
1397+ }
1398+
1399+ return 0;
1400+}
1401+
1402+static int
1403+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
1404+ struct mtk_flow_data *data)
1405+{
1406+ __be32 *dest;
1407+
1408+ switch (act->mangle.offset) {
1409+ case offsetof(struct iphdr, saddr):
1410+ dest = &data->v4.src_addr;
1411+ break;
1412+ case offsetof(struct iphdr, daddr):
1413+ dest = &data->v4.dst_addr;
1414+ break;
1415+ default:
1416+ return -EINVAL;
1417+ }
1418+
1419+ memcpy(dest, &act->mangle.val, sizeof(u32));
1420+
1421+ return 0;
1422+}
1423+
1424+static int
1425+mtk_flow_get_dsa_port(struct net_device **dev)
1426+{
1427+#if IS_ENABLED(CONFIG_NET_DSA)
1428+ struct dsa_port *dp;
1429+
1430+ dp = dsa_port_from_netdev(*dev);
1431+ if (IS_ERR(dp))
1432+ return -ENODEV;
1433+
1434+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
1435+ return -ENODEV;
1436+
1437+ *dev = dp->cpu_dp->master;
1438+
1439+ return dp->index;
1440+#else
1441+ return -ENODEV;
1442+#endif
1443+}
1444+
1445+static int
1446+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
developeree39bcf2023-06-16 08:03:30 +08001447+ struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08001448+{
developeree39bcf2023-06-16 08:03:30 +08001449+ int pse_port, dsa_port;
developer8cb3ac72022-07-04 10:55:14 +08001450+
1451+ dsa_port = mtk_flow_get_dsa_port(&dev);
developeree39bcf2023-06-16 08:03:30 +08001452+ if (dsa_port >= 0)
1453+ mtk_foe_entry_set_dsa(foe, dsa_port);
developer8cb3ac72022-07-04 10:55:14 +08001454+
1455+ if (dev == eth->netdev[0])
developeree39bcf2023-06-16 08:03:30 +08001456+ pse_port = PSE_GDM1_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001457+ else if (dev == eth->netdev[1])
developeree39bcf2023-06-16 08:03:30 +08001458+ pse_port = PSE_GDM2_PORT;
1459+ else
1460+ return -EOPNOTSUPP;
developer7eb15dc2023-06-14 17:44:03 +08001461+
developeree39bcf2023-06-16 08:03:30 +08001462+ mtk_foe_entry_set_pse_port(foe, pse_port);
developer8cb3ac72022-07-04 10:55:14 +08001463+
1464+ return 0;
1465+}
1466+
1467+static int
developeree39bcf2023-06-16 08:03:30 +08001468+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
developer8cb3ac72022-07-04 10:55:14 +08001469+{
1470+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1471+ struct flow_action_entry *act;
1472+ struct mtk_flow_data data = {};
1473+ struct mtk_foe_entry foe;
1474+ struct net_device *odev = NULL;
1475+ struct mtk_flow_entry *entry;
1476+ int offload_type = 0;
1477+ u16 addr_type = 0;
developeree39bcf2023-06-16 08:03:30 +08001478+ u32 timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001479+ u8 l4proto = 0;
1480+ int err = 0;
developeree39bcf2023-06-16 08:03:30 +08001481+ int hash;
developer8cb3ac72022-07-04 10:55:14 +08001482+ int i;
1483+
1484+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
1485+ return -EEXIST;
1486+
1487+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1488+ struct flow_match_meta match;
1489+
1490+ flow_rule_match_meta(rule, &match);
1491+ } else {
1492+ return -EOPNOTSUPP;
1493+ }
1494+
1495+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1496+ struct flow_match_control match;
1497+
1498+ flow_rule_match_control(rule, &match);
1499+ addr_type = match.key->addr_type;
1500+ } else {
1501+ return -EOPNOTSUPP;
1502+ }
1503+
1504+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1505+ struct flow_match_basic match;
1506+
1507+ flow_rule_match_basic(rule, &match);
1508+ l4proto = match.key->ip_proto;
1509+ } else {
1510+ return -EOPNOTSUPP;
1511+ }
1512+
1513+ flow_action_for_each(i, act, &rule->action) {
1514+ switch (act->id) {
1515+ case FLOW_ACTION_MANGLE:
1516+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
1517+ mtk_flow_offload_mangle_eth(act, &data.eth);
1518+ break;
1519+ case FLOW_ACTION_REDIRECT:
1520+ odev = act->dev;
1521+ break;
1522+ case FLOW_ACTION_CSUM:
1523+ break;
1524+ case FLOW_ACTION_VLAN_PUSH:
1525+ if (data.vlan.num == 1 ||
1526+ act->vlan.proto != htons(ETH_P_8021Q))
1527+ return -EOPNOTSUPP;
1528+
1529+ data.vlan.id = act->vlan.vid;
1530+ data.vlan.proto = act->vlan.proto;
1531+ data.vlan.num++;
1532+ break;
1533+ case FLOW_ACTION_VLAN_POP:
1534+ break;
1535+ case FLOW_ACTION_PPPOE_PUSH:
1536+ if (data.pppoe.num == 1)
1537+ return -EOPNOTSUPP;
1538+
1539+ data.pppoe.sid = act->pppoe.sid;
1540+ data.pppoe.num++;
1541+ break;
1542+ default:
1543+ return -EOPNOTSUPP;
1544+ }
1545+ }
1546+
developeree39bcf2023-06-16 08:03:30 +08001547+ switch (addr_type) {
1548+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1549+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
1550+ break;
1551+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1552+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
1553+ break;
1554+ default:
1555+ return -EOPNOTSUPP;
1556+ }
1557+
developer8cb3ac72022-07-04 10:55:14 +08001558+ if (!is_valid_ether_addr(data.eth.h_source) ||
1559+ !is_valid_ether_addr(data.eth.h_dest))
1560+ return -EINVAL;
1561+
developeree39bcf2023-06-16 08:03:30 +08001562+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
1563+ data.eth.h_source,
1564+ data.eth.h_dest);
developer8cb3ac72022-07-04 10:55:14 +08001565+ if (err)
1566+ return err;
1567+
1568+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1569+ struct flow_match_ports ports;
1570+
1571+ flow_rule_match_ports(rule, &ports);
1572+ data.src_port = ports.key->src;
1573+ data.dst_port = ports.key->dst;
developeree39bcf2023-06-16 08:03:30 +08001574+ } else {
developer8cb3ac72022-07-04 10:55:14 +08001575+ return -EOPNOTSUPP;
1576+ }
1577+
1578+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1579+ struct flow_match_ipv4_addrs addrs;
1580+
1581+ flow_rule_match_ipv4_addrs(rule, &addrs);
1582+
1583+ data.v4.src_addr = addrs.key->src;
1584+ data.v4.dst_addr = addrs.key->dst;
1585+
developeree39bcf2023-06-16 08:03:30 +08001586+ mtk_flow_set_ipv4_addr(&foe, &data, false);
developer8cb3ac72022-07-04 10:55:14 +08001587+ }
1588+
1589+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1590+ struct flow_match_ipv6_addrs addrs;
1591+
1592+ flow_rule_match_ipv6_addrs(rule, &addrs);
1593+
1594+ data.v6.src_addr = addrs.key->src;
1595+ data.v6.dst_addr = addrs.key->dst;
1596+
developeree39bcf2023-06-16 08:03:30 +08001597+ mtk_flow_set_ipv6_addr(&foe, &data);
developer8cb3ac72022-07-04 10:55:14 +08001598+ }
1599+
1600+ flow_action_for_each(i, act, &rule->action) {
1601+ if (act->id != FLOW_ACTION_MANGLE)
1602+ continue;
1603+
1604+ switch (act->mangle.htype) {
1605+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1606+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1607+ err = mtk_flow_mangle_ports(act, &data);
1608+ break;
1609+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1610+ err = mtk_flow_mangle_ipv4(act, &data);
1611+ break;
1612+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1613+ /* handled earlier */
1614+ break;
1615+ default:
1616+ return -EOPNOTSUPP;
1617+ }
1618+
1619+ if (err)
1620+ return err;
1621+ }
1622+
1623+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
developeree39bcf2023-06-16 08:03:30 +08001624+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
developer8cb3ac72022-07-04 10:55:14 +08001625+ if (err)
1626+ return err;
1627+ }
1628+
1629+ if (data.vlan.num == 1) {
1630+ if (data.vlan.proto != htons(ETH_P_8021Q))
1631+ return -EOPNOTSUPP;
1632+
developeree39bcf2023-06-16 08:03:30 +08001633+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
developer8cb3ac72022-07-04 10:55:14 +08001634+ }
1635+ if (data.pppoe.num == 1)
developeree39bcf2023-06-16 08:03:30 +08001636+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
developer8cb3ac72022-07-04 10:55:14 +08001637+
developeree39bcf2023-06-16 08:03:30 +08001638+ err = mtk_flow_set_output_device(eth, &foe, odev);
developer8cb3ac72022-07-04 10:55:14 +08001639+ if (err)
1640+ return err;
1641+
1642+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1643+ if (!entry)
1644+ return -ENOMEM;
1645+
1646+ entry->cookie = f->cookie;
developeree39bcf2023-06-16 08:03:30 +08001647+ timestamp = mtk_eth_timestamp(eth);
1648+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
1649+ if (hash < 0) {
1650+ err = hash;
developer8cb3ac72022-07-04 10:55:14 +08001651+ goto free;
developeree39bcf2023-06-16 08:03:30 +08001652+ }
developer8cb3ac72022-07-04 10:55:14 +08001653+
developeree39bcf2023-06-16 08:03:30 +08001654+ entry->hash = hash;
developer8cb3ac72022-07-04 10:55:14 +08001655+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
1656+ mtk_flow_ht_params);
1657+ if (err < 0)
developeree39bcf2023-06-16 08:03:30 +08001658+ goto clear_flow;
developer8cb3ac72022-07-04 10:55:14 +08001659+
1660+ return 0;
developeree39bcf2023-06-16 08:03:30 +08001661+clear_flow:
1662+ mtk_foe_entry_clear(&eth->ppe, hash);
developer8cb3ac72022-07-04 10:55:14 +08001663+free:
1664+ kfree(entry);
1665+ return err;
1666+}
1667+
1668+static int
1669+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
1670+{
1671+ struct mtk_flow_entry *entry;
1672+
1673+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1674+ mtk_flow_ht_params);
1675+ if (!entry)
1676+ return -ENOENT;
1677+
developeree39bcf2023-06-16 08:03:30 +08001678+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
developer8cb3ac72022-07-04 10:55:14 +08001679+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
1680+ mtk_flow_ht_params);
1681+ kfree(entry);
1682+
1683+ return 0;
1684+}
1685+
1686+static int
1687+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
1688+{
1689+ struct mtk_flow_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08001690+ int timestamp;
1691+ u32 idle;
developer8cb3ac72022-07-04 10:55:14 +08001692+
1693+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1694+ mtk_flow_ht_params);
1695+ if (!entry)
1696+ return -ENOENT;
1697+
developeree39bcf2023-06-16 08:03:30 +08001698+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
1699+ if (timestamp < 0)
1700+ return -ETIMEDOUT;
1701+
1702+ idle = mtk_eth_timestamp(eth) - timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001703+ f->stats.lastused = jiffies - idle * HZ;
1704+
1705+ return 0;
1706+}
1707+
1708+static DEFINE_MUTEX(mtk_flow_offload_mutex);
1709+
developeree39bcf2023-06-16 08:03:30 +08001710+static int
1711+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
developer8cb3ac72022-07-04 10:55:14 +08001712+{
developeree39bcf2023-06-16 08:03:30 +08001713+ struct flow_cls_offload *cls = type_data;
1714+ struct net_device *dev = cb_priv;
1715+ struct mtk_mac *mac = netdev_priv(dev);
1716+ struct mtk_eth *eth = mac->hw;
developer8cb3ac72022-07-04 10:55:14 +08001717+ int err;
1718+
developeree39bcf2023-06-16 08:03:30 +08001719+ if (!tc_can_offload(dev))
1720+ return -EOPNOTSUPP;
1721+
1722+ if (type != TC_SETUP_CLSFLOWER)
1723+ return -EOPNOTSUPP;
1724+
developer8cb3ac72022-07-04 10:55:14 +08001725+ mutex_lock(&mtk_flow_offload_mutex);
1726+ switch (cls->command) {
1727+ case FLOW_CLS_REPLACE:
developeree39bcf2023-06-16 08:03:30 +08001728+ err = mtk_flow_offload_replace(eth, cls);
developer8cb3ac72022-07-04 10:55:14 +08001729+ break;
1730+ case FLOW_CLS_DESTROY:
1731+ err = mtk_flow_offload_destroy(eth, cls);
1732+ break;
1733+ case FLOW_CLS_STATS:
1734+ err = mtk_flow_offload_stats(eth, cls);
1735+ break;
1736+ default:
1737+ err = -EOPNOTSUPP;
1738+ break;
1739+ }
1740+ mutex_unlock(&mtk_flow_offload_mutex);
1741+
1742+ return err;
1743+}
1744+
1745+static int
1746+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
1747+{
1748+ struct mtk_mac *mac = netdev_priv(dev);
1749+ struct mtk_eth *eth = mac->hw;
1750+ static LIST_HEAD(block_cb_list);
1751+ struct flow_block_cb *block_cb;
1752+ flow_setup_cb_t *cb;
developeree39bcf2023-06-16 08:03:30 +08001753+ int err = 0;
developer207b39d2022-10-07 15:57:16 +08001754+
developeree39bcf2023-06-16 08:03:30 +08001755+ if (!eth->ppe.foe_table)
developer8cb3ac72022-07-04 10:55:14 +08001756+ return -EOPNOTSUPP;
1757+
1758+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1759+ return -EOPNOTSUPP;
1760+
1761+ cb = mtk_eth_setup_tc_block_cb;
1762+ f->driver_block_list = &block_cb_list;
1763+
1764+ switch (f->command) {
1765+ case FLOW_BLOCK_BIND:
1766+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1767+ if (block_cb) {
1768+ flow_block_cb_incref(block_cb);
developeree39bcf2023-06-16 08:03:30 +08001769+ goto unlock;
developer8cb3ac72022-07-04 10:55:14 +08001770+ }
1771+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
developeree39bcf2023-06-16 08:03:30 +08001772+ if (IS_ERR(block_cb)) {
1773+ err = PTR_ERR(block_cb);
1774+ goto unlock;
1775+ }
developer8cb3ac72022-07-04 10:55:14 +08001776+
1777+ flow_block_cb_add(block_cb, f);
1778+ list_add_tail(&block_cb->driver_list, &block_cb_list);
developeree39bcf2023-06-16 08:03:30 +08001779+ break;
developer8cb3ac72022-07-04 10:55:14 +08001780+ case FLOW_BLOCK_UNBIND:
1781+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
developeree39bcf2023-06-16 08:03:30 +08001782+ if (!block_cb) {
1783+ err = -ENOENT;
1784+ goto unlock;
1785+ }
developer8cb3ac72022-07-04 10:55:14 +08001786+
developeree39bcf2023-06-16 08:03:30 +08001787+ if (flow_block_cb_decref(block_cb)) {
developer8cb3ac72022-07-04 10:55:14 +08001788+ flow_block_cb_remove(block_cb, f);
1789+ list_del(&block_cb->driver_list);
1790+ }
developeree39bcf2023-06-16 08:03:30 +08001791+ break;
developer8cb3ac72022-07-04 10:55:14 +08001792+ default:
developeree39bcf2023-06-16 08:03:30 +08001793+ err = -EOPNOTSUPP;
1794+ break;
developer8cb3ac72022-07-04 10:55:14 +08001795+ }
developeree39bcf2023-06-16 08:03:30 +08001796+
1797+unlock:
1798+ return err;
developer8cb3ac72022-07-04 10:55:14 +08001799+}
1800+
1801+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
1802+ void *type_data)
1803+{
developeree39bcf2023-06-16 08:03:30 +08001804+ if (type == TC_SETUP_FT)
developer8cb3ac72022-07-04 10:55:14 +08001805+ return mtk_eth_setup_tc_block(dev, type_data);
developeree39bcf2023-06-16 08:03:30 +08001806+
1807+ return -EOPNOTSUPP;
developer8cb3ac72022-07-04 10:55:14 +08001808+}
1809+
1810+int mtk_eth_offload_init(struct mtk_eth *eth)
1811+{
developeree39bcf2023-06-16 08:03:30 +08001812+ if (!eth->ppe.foe_table)
1813+ return 0;
1814+
developer8cb3ac72022-07-04 10:55:14 +08001815+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
1816+}
1817diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1818new file mode 100644
developer58aa0682023-09-18 14:02:26 +08001819index 0000000..0c45ea0
developer8cb3ac72022-07-04 10:55:14 +08001820--- /dev/null
1821+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
developeree39bcf2023-06-16 08:03:30 +08001822@@ -0,0 +1,144 @@
developer8cb3ac72022-07-04 10:55:14 +08001823+// SPDX-License-Identifier: GPL-2.0-only
1824+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1825+
1826+#ifndef __MTK_PPE_REGS_H
1827+#define __MTK_PPE_REGS_H
1828+
1829+#define MTK_PPE_GLO_CFG 0x200
1830+#define MTK_PPE_GLO_CFG_EN BIT(0)
1831+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
1832+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
1833+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
1834+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
1835+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
1836+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
1837+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
1838+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
1839+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
1840+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
1841+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
1842+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
1843+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
1844+
1845+#define MTK_PPE_FLOW_CFG 0x204
1846+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
1847+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
1848+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
1849+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
1850+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
1851+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
1852+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
1853+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
1854+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
1855+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
1856+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
1857+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
1858+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
1859+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
1860+
1861+#define MTK_PPE_IP_PROTO_CHK 0x208
1862+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
1863+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
1864+
1865+#define MTK_PPE_TB_CFG 0x21c
1866+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
1867+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
1868+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
1869+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
1870+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
1871+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
1872+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
1873+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
1874+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
1875+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
1876+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
1877+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
1878+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
1879+
1880+enum {
1881+ MTK_PPE_SCAN_MODE_DISABLED,
1882+ MTK_PPE_SCAN_MODE_CHECK_AGE,
1883+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
1884+};
1885+
1886+enum {
1887+ MTK_PPE_KEEPALIVE_DISABLE,
1888+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
1889+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
1890+};
1891+
1892+enum {
1893+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
1894+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
1895+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
1896+};
1897+
1898+#define MTK_PPE_TB_BASE 0x220
1899+
1900+#define MTK_PPE_TB_USED 0x224
1901+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
1902+
1903+#define MTK_PPE_BIND_RATE 0x228
1904+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
1905+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
1906+
1907+#define MTK_PPE_BIND_LIMIT0 0x22c
1908+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
1909+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
1910+
1911+#define MTK_PPE_BIND_LIMIT1 0x230
1912+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
1913+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
1914+
1915+#define MTK_PPE_KEEPALIVE 0x234
1916+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
1917+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
1918+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
1919+
1920+#define MTK_PPE_UNBIND_AGE 0x238
1921+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
1922+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
1923+
1924+#define MTK_PPE_BIND_AGE0 0x23c
1925+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
1926+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
1927+
1928+#define MTK_PPE_BIND_AGE1 0x240
1929+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
1930+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
1931+
1932+#define MTK_PPE_HASH_SEED 0x244
1933+
1934+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
1935+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
1936+
1937+#define MTK_PPE_MTU_DROP 0x308
1938+
1939+#define MTK_PPE_VLAN_MTU0 0x30c
1940+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
1941+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
1942+
1943+#define MTK_PPE_VLAN_MTU1 0x310
1944+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
1945+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
1946+
1947+#define MTK_PPE_VPM_TPID 0x318
1948+
1949+#define MTK_PPE_CACHE_CTL 0x320
1950+#define MTK_PPE_CACHE_CTL_EN BIT(0)
1951+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
1952+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
1953+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
1954+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
1955+
1956+#define MTK_PPE_MIB_CFG 0x334
1957+#define MTK_PPE_MIB_CFG_EN BIT(0)
1958+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
1959+
1960+#define MTK_PPE_MIB_TB_BASE 0x338
1961+
1962+#define MTK_PPE_MIB_CACHE_CTL 0x350
1963+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
1964+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
1965+
1966+#endif
1967diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
developer58aa0682023-09-18 14:02:26 +08001968index 078c0f4..f8a98d8 100644
developer8cb3ac72022-07-04 10:55:14 +08001969--- a/drivers/net/ppp/ppp_generic.c
1970+++ b/drivers/net/ppp/ppp_generic.c
1971@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
1972 ppp_destroy_interface(ppp);
1973 }
1974
1975+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
1976+ struct net_device_path *path)
1977+{
1978+ struct ppp *ppp = netdev_priv(ctx->dev);
1979+ struct ppp_channel *chan;
1980+ struct channel *pch;
1981+
1982+ if (ppp->flags & SC_MULTILINK)
1983+ return -EOPNOTSUPP;
1984+
1985+ if (list_empty(&ppp->channels))
1986+ return -ENODEV;
1987+
1988+ pch = list_first_entry(&ppp->channels, struct channel, clist);
1989+ chan = pch->chan;
1990+ if (!chan->ops->fill_forward_path)
1991+ return -EOPNOTSUPP;
1992+
1993+ return chan->ops->fill_forward_path(ctx, path, chan);
1994+}
1995+
1996 static const struct net_device_ops ppp_netdev_ops = {
1997 .ndo_init = ppp_dev_init,
1998 .ndo_uninit = ppp_dev_uninit,
1999 .ndo_start_xmit = ppp_start_xmit,
2000 .ndo_do_ioctl = ppp_net_ioctl,
2001 .ndo_get_stats64 = ppp_get_stats64,
2002+ .ndo_fill_forward_path = ppp_fill_forward_path,
2003 };
2004
2005 static struct device_type ppp_type = {
2006diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
developer58aa0682023-09-18 14:02:26 +08002007index 087b016..7a8c246 100644
developer8cb3ac72022-07-04 10:55:14 +08002008--- a/drivers/net/ppp/pppoe.c
2009+++ b/drivers/net/ppp/pppoe.c
2010@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
2011 return __pppoe_xmit(sk, skb);
2012 }
2013
2014+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
2015+ struct net_device_path *path,
2016+ const struct ppp_channel *chan)
2017+{
2018+ struct sock *sk = (struct sock *)chan->private;
2019+ struct pppox_sock *po = pppox_sk(sk);
2020+ struct net_device *dev = po->pppoe_dev;
2021+
2022+ if (sock_flag(sk, SOCK_DEAD) ||
2023+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2024+ return -1;
2025+
2026+ path->type = DEV_PATH_PPPOE;
2027+ path->encap.proto = htons(ETH_P_PPP_SES);
2028+ path->encap.id = be16_to_cpu(po->num);
2029+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2030+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
2031+ path->dev = ctx->dev;
2032+ ctx->dev = dev;
2033+
2034+ return 0;
2035+}
2036+
2037 static const struct ppp_channel_ops pppoe_chan_ops = {
2038 .start_xmit = pppoe_xmit,
2039+ .fill_forward_path = pppoe_fill_forward_path,
2040 };
2041
2042 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
2043diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
developer58aa0682023-09-18 14:02:26 +08002044index 631d158..ef44d9a 100644
developer8cb3ac72022-07-04 10:55:14 +08002045--- a/include/linux/netdevice.h
2046+++ b/include/linux/netdevice.h
developer58aa0682023-09-18 14:02:26 +08002047@@ -838,6 +838,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002048 struct sk_buff *skb,
2049 struct net_device *sb_dev);
2050
2051+enum net_device_path_type {
2052+ DEV_PATH_ETHERNET = 0,
2053+ DEV_PATH_VLAN,
2054+ DEV_PATH_BRIDGE,
2055+ DEV_PATH_PPPOE,
2056+ DEV_PATH_DSA,
2057+};
2058+
2059+struct net_device_path {
2060+ enum net_device_path_type type;
2061+ const struct net_device *dev;
2062+ union {
2063+ struct {
2064+ u16 id;
2065+ __be16 proto;
2066+ u8 h_dest[ETH_ALEN];
2067+ } encap;
2068+ struct {
2069+ enum {
2070+ DEV_PATH_BR_VLAN_KEEP,
2071+ DEV_PATH_BR_VLAN_TAG,
2072+ DEV_PATH_BR_VLAN_UNTAG,
2073+ DEV_PATH_BR_VLAN_UNTAG_HW,
2074+ } vlan_mode;
2075+ u16 vlan_id;
2076+ __be16 vlan_proto;
2077+ } bridge;
2078+ struct {
2079+ int port;
2080+ u16 proto;
2081+ } dsa;
2082+ };
2083+};
2084+
2085+#define NET_DEVICE_PATH_STACK_MAX 5
2086+#define NET_DEVICE_PATH_VLAN_MAX 2
2087+
2088+struct net_device_path_stack {
2089+ int num_paths;
2090+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
2091+};
2092+
2093+struct net_device_path_ctx {
2094+ const struct net_device *dev;
2095+ u8 daddr[ETH_ALEN];
2096+
2097+ int num_vlans;
2098+ struct {
2099+ u16 id;
2100+ __be16 proto;
2101+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
2102+};
2103+
2104 enum tc_setup_type {
2105 TC_SETUP_QDISC_MQPRIO,
2106 TC_SETUP_CLSU32,
developer58aa0682023-09-18 14:02:26 +08002107@@ -853,6 +906,7 @@ enum tc_setup_type {
developer8cb3ac72022-07-04 10:55:14 +08002108 TC_SETUP_ROOT_QDISC,
2109 TC_SETUP_QDISC_GRED,
2110 TC_SETUP_QDISC_TAPRIO,
2111+ TC_SETUP_FT,
2112 };
2113
2114 /* These structures hold the attributes of bpf state that are being passed
developer58aa0682023-09-18 14:02:26 +08002115@@ -1248,6 +1302,8 @@ struct tlsdev_ops;
developer8cb3ac72022-07-04 10:55:14 +08002116 * Get devlink port instance associated with a given netdev.
2117 * Called with a reference on the netdevice and devlink locks only,
2118 * rtnl_lock is not held.
2119+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
2120+ * Get the forwarding path to reach the real device from the HW destination address
2121 */
2122 struct net_device_ops {
2123 int (*ndo_init)(struct net_device *dev);
developer58aa0682023-09-18 14:02:26 +08002124@@ -1445,6 +1501,8 @@ struct net_device_ops {
developer8cb3ac72022-07-04 10:55:14 +08002125 int (*ndo_xsk_wakeup)(struct net_device *dev,
2126 u32 queue_id, u32 flags);
2127 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
2128+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
2129+ struct net_device_path *path);
2130 };
2131
2132 /**
developer58aa0682023-09-18 14:02:26 +08002133@@ -2670,6 +2728,8 @@ void dev_remove_offload(struct packet_offload *po);
developer8cb3ac72022-07-04 10:55:14 +08002134
2135 int dev_get_iflink(const struct net_device *dev);
2136 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
2137+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2138+ struct net_device_path_stack *stack);
2139 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
2140 unsigned short mask);
2141 struct net_device *dev_get_by_name(struct net *net, const char *name);
2142diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
developer58aa0682023-09-18 14:02:26 +08002143index 9896606..91f9a92 100644
developer8cb3ac72022-07-04 10:55:14 +08002144--- a/include/linux/ppp_channel.h
2145+++ b/include/linux/ppp_channel.h
2146@@ -28,6 +28,9 @@ struct ppp_channel_ops {
2147 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
2148 /* Handle an ioctl call that has come in via /dev/ppp. */
2149 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
2150+ int (*fill_forward_path)(struct net_device_path_ctx *,
2151+ struct net_device_path *,
2152+ const struct ppp_channel *);
2153 };
2154
2155 struct ppp_channel {
2156diff --git a/include/net/dsa.h b/include/net/dsa.h
developer58aa0682023-09-18 14:02:26 +08002157index d29ee9e..43f65cb 100644
developer8cb3ac72022-07-04 10:55:14 +08002158--- a/include/net/dsa.h
2159+++ b/include/net/dsa.h
developer58aa0682023-09-18 14:02:26 +08002160@@ -562,6 +562,8 @@ struct dsa_switch_ops {
developer8cb3ac72022-07-04 10:55:14 +08002161 struct sk_buff *skb);
2162 };
2163
2164+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
2165+
2166 struct dsa_switch_driver {
2167 struct list_head list;
2168 const struct dsa_switch_ops *ops;
developer58aa0682023-09-18 14:02:26 +08002169@@ -654,6 +656,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002170 #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
2171 #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
2172
2173+#if IS_ENABLED(CONFIG_NET_DSA)
2174+bool dsa_slave_dev_check(const struct net_device *dev);
2175+#else
2176+static inline bool dsa_slave_dev_check(const struct net_device *dev)
2177+{
2178+ return false;
2179+}
2180+#endif
2181
2182 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
2183 int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
2184diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
developer58aa0682023-09-18 14:02:26 +08002185index c6f7bd2..59b8736 100644
developer8cb3ac72022-07-04 10:55:14 +08002186--- a/include/net/flow_offload.h
2187+++ b/include/net/flow_offload.h
2188@@ -138,6 +138,7 @@ enum flow_action_id {
2189 FLOW_ACTION_MPLS_PUSH,
2190 FLOW_ACTION_MPLS_POP,
2191 FLOW_ACTION_MPLS_MANGLE,
2192+ FLOW_ACTION_PPPOE_PUSH,
2193 NUM_FLOW_ACTIONS,
2194 };
2195
2196@@ -213,6 +214,9 @@ struct flow_action_entry {
2197 u8 bos;
2198 u8 ttl;
2199 } mpls_mangle;
2200+ struct { /* FLOW_ACTION_PPPOE_PUSH */
2201+ u16 sid;
2202+ } pppoe;
2203 };
2204 };
2205
2206diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
developer58aa0682023-09-18 14:02:26 +08002207index 2c739fc..89ab8f1 100644
developer8cb3ac72022-07-04 10:55:14 +08002208--- a/include/net/ip6_route.h
2209+++ b/include/net/ip6_route.h
2210@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
2211 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
2212 }
2213
2214-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
2215+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
2216+ bool forwarding)
2217 {
2218 struct inet6_dev *idev;
2219 unsigned int mtu;
2220
2221- if (dst_metric_locked(dst, RTAX_MTU)) {
2222+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
2223 mtu = dst_metric_raw(dst, RTAX_MTU);
2224 if (mtu)
2225 goto out;
2226diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
developer58aa0682023-09-18 14:02:26 +08002227index 7b3c873..e954831 100644
developer8cb3ac72022-07-04 10:55:14 +08002228--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2229+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2230@@ -4,7 +4,4 @@
2231
2232 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
2233
2234-#include <linux/sysctl.h>
2235-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
2236-
2237 #endif /* _NF_CONNTRACK_IPV6_H*/
2238diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
developer58aa0682023-09-18 14:02:26 +08002239index 90690e3..ce0bc3e 100644
developer8cb3ac72022-07-04 10:55:14 +08002240--- a/include/net/netfilter/nf_conntrack.h
2241+++ b/include/net/netfilter/nf_conntrack.h
2242@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
2243 !nf_ct_is_dying(ct);
2244 }
2245
2246+#define NF_CT_DAY (86400 * HZ)
2247+
2248+/* Set an arbitrary timeout large enough not to ever expire, this save
2249+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
2250+ * nf_ct_is_expired().
2251+ */
2252+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
2253+{
2254+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
2255+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
2256+}
2257+
2258 struct kernel_param;
2259
2260 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
2261diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
developer58aa0682023-09-18 14:02:26 +08002262index f7a060c..7f44a77 100644
developer8cb3ac72022-07-04 10:55:14 +08002263--- a/include/net/netfilter/nf_conntrack_acct.h
2264+++ b/include/net/netfilter/nf_conntrack_acct.h
2265@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
2266 #endif
2267 }
2268
2269+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
2270+ unsigned int bytes);
2271+
2272+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
2273+ unsigned int bytes)
2274+{
2275+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
2276+ nf_ct_acct_add(ct, dir, 1, bytes);
2277+#endif
2278+}
2279+
2280 void nf_conntrack_acct_pernet_init(struct net *net);
2281
2282 int nf_conntrack_acct_init(void);
2283diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
developer58aa0682023-09-18 14:02:26 +08002284index 68d7fc9..feac793 100644
developer8cb3ac72022-07-04 10:55:14 +08002285--- a/include/net/netfilter/nf_flow_table.h
2286+++ b/include/net/netfilter/nf_flow_table.h
2287@@ -8,31 +8,99 @@
2288 #include <linux/rcupdate.h>
2289 #include <linux/netfilter.h>
2290 #include <linux/netfilter/nf_conntrack_tuple_common.h>
2291+#include <net/flow_offload.h>
2292 #include <net/dst.h>
2293+#include <linux/if_pppox.h>
2294+#include <linux/ppp_defs.h>
2295
2296 struct nf_flowtable;
2297+struct nf_flow_rule;
2298+struct flow_offload;
2299+enum flow_offload_tuple_dir;
2300+
2301+struct nf_flow_key {
2302+ struct flow_dissector_key_meta meta;
2303+ struct flow_dissector_key_control control;
2304+ struct flow_dissector_key_control enc_control;
2305+ struct flow_dissector_key_basic basic;
2306+ struct flow_dissector_key_vlan vlan;
2307+ struct flow_dissector_key_vlan cvlan;
2308+ union {
2309+ struct flow_dissector_key_ipv4_addrs ipv4;
2310+ struct flow_dissector_key_ipv6_addrs ipv6;
2311+ };
2312+ struct flow_dissector_key_keyid enc_key_id;
2313+ union {
2314+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
2315+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
2316+ };
2317+ struct flow_dissector_key_tcp tcp;
2318+ struct flow_dissector_key_ports tp;
2319+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
2320+
2321+struct nf_flow_match {
2322+ struct flow_dissector dissector;
2323+ struct nf_flow_key key;
2324+ struct nf_flow_key mask;
2325+};
2326+
2327+struct nf_flow_rule {
2328+ struct nf_flow_match match;
2329+ struct flow_rule *rule;
2330+};
2331
2332 struct nf_flowtable_type {
2333 struct list_head list;
2334 int family;
2335 int (*init)(struct nf_flowtable *ft);
2336+ int (*setup)(struct nf_flowtable *ft,
2337+ struct net_device *dev,
2338+ enum flow_block_command cmd);
2339+ int (*action)(struct net *net,
2340+ const struct flow_offload *flow,
2341+ enum flow_offload_tuple_dir dir,
2342+ struct nf_flow_rule *flow_rule);
2343 void (*free)(struct nf_flowtable *ft);
2344 nf_hookfn *hook;
2345 struct module *owner;
2346 };
2347
2348+enum nf_flowtable_flags {
2349+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
2350+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
2351+};
2352+
2353 struct nf_flowtable {
2354 struct list_head list;
2355 struct rhashtable rhashtable;
2356+ int priority;
2357 const struct nf_flowtable_type *type;
2358 struct delayed_work gc_work;
2359+ unsigned int flags;
2360+ struct flow_block flow_block;
2361+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
2362+ possible_net_t net;
2363 };
2364
2365+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
2366+{
2367+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
2368+}
2369+
2370 enum flow_offload_tuple_dir {
2371 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
2372 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
2373- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
2374 };
2375+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
2376+
2377+enum flow_offload_xmit_type {
2378+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
2379+ FLOW_OFFLOAD_XMIT_NEIGH,
2380+ FLOW_OFFLOAD_XMIT_XFRM,
2381+ FLOW_OFFLOAD_XMIT_DIRECT,
2382+};
2383+
2384+#define NF_FLOW_TABLE_ENCAP_MAX 2
2385
2386 struct flow_offload_tuple {
2387 union {
developerb7c46752022-07-04 19:51:38 +08002388@@ -52,13 +120,30 @@ struct flow_offload_tuple {
developer8cb3ac72022-07-04 10:55:14 +08002389
2390 u8 l3proto;
2391 u8 l4proto;
2392- u8 dir;
2393+ struct {
2394+ u16 id;
2395+ __be16 proto;
2396+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2397
2398- u16 mtu;
2399+ /* All members above are keys for lookups, see flow_offload_hash(). */
2400+ struct { } __hash;
2401
developerb7c46752022-07-04 19:51:38 +08002402- struct {
2403- struct dst_entry *dst_cache;
2404- u32 dst_cookie;
developer8cb3ac72022-07-04 10:55:14 +08002405+ u8 dir:2,
2406+ xmit_type:2,
2407+ encap_num:2,
2408+ in_vlan_ingress:2;
2409+ u16 mtu;
2410+ union {
2411+ struct {
2412+ struct dst_entry *dst_cache;
2413+ u32 dst_cookie;
2414+ };
2415+ struct {
2416+ u32 ifidx;
2417+ u32 hw_ifidx;
2418+ u8 h_source[ETH_ALEN];
2419+ u8 h_dest[ETH_ALEN];
2420+ } out;
developerb7c46752022-07-04 19:51:38 +08002421 };
developer8cb3ac72022-07-04 10:55:14 +08002422 };
2423
developeree39bcf2023-06-16 08:03:30 +08002424@@ -67,52 +152,140 @@ struct flow_offload_tuple_rhash {
developer8cb3ac72022-07-04 10:55:14 +08002425 struct flow_offload_tuple tuple;
2426 };
2427
2428-#define FLOW_OFFLOAD_SNAT 0x1
2429-#define FLOW_OFFLOAD_DNAT 0x2
2430-#define FLOW_OFFLOAD_DYING 0x4
2431-#define FLOW_OFFLOAD_TEARDOWN 0x8
2432+enum nf_flow_flags {
2433+ NF_FLOW_SNAT,
2434+ NF_FLOW_DNAT,
2435+ NF_FLOW_TEARDOWN,
2436+ NF_FLOW_HW,
developeree39bcf2023-06-16 08:03:30 +08002437+ NF_FLOW_HW_ACCT_DYING,
developer8cb3ac72022-07-04 10:55:14 +08002438+ NF_FLOW_HW_DYING,
2439+ NF_FLOW_HW_DEAD,
2440+ NF_FLOW_HW_PENDING,
2441+};
2442+
2443+enum flow_offload_type {
2444+ NF_FLOW_OFFLOAD_UNSPEC = 0,
2445+ NF_FLOW_OFFLOAD_ROUTE,
2446+};
2447
2448 struct flow_offload {
2449 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
2450- u32 flags;
2451- union {
2452- /* Your private driver data here. */
2453- u32 timeout;
2454- };
2455+ struct nf_conn *ct;
2456+ unsigned long flags;
2457+ u16 type;
2458+ u32 timeout;
2459+ struct rcu_head rcu_head;
2460 };
2461
2462 #define NF_FLOW_TIMEOUT (30 * HZ)
2463+#define nf_flowtable_time_stamp (u32)jiffies
2464+
2465+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
2466+
2467+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
2468+{
2469+ return (__s32)(timeout - nf_flowtable_time_stamp);
2470+}
2471
2472 struct nf_flow_route {
2473 struct {
2474- struct dst_entry *dst;
2475+ struct dst_entry *dst;
2476+ struct {
2477+ u32 ifindex;
2478+ struct {
2479+ u16 id;
2480+ __be16 proto;
2481+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2482+ u8 num_encaps:2,
2483+ ingress_vlans:2;
2484+ } in;
2485+ struct {
2486+ u32 ifindex;
2487+ u32 hw_ifindex;
2488+ u8 h_source[ETH_ALEN];
2489+ u8 h_dest[ETH_ALEN];
2490+ } out;
2491+ enum flow_offload_xmit_type xmit_type;
2492 } tuple[FLOW_OFFLOAD_DIR_MAX];
2493 };
2494
2495-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
2496- struct nf_flow_route *route);
2497+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
2498 void flow_offload_free(struct flow_offload *flow);
2499
2500+static inline int
2501+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
2502+ flow_setup_cb_t *cb, void *cb_priv)
2503+{
2504+ struct flow_block *block = &flow_table->flow_block;
2505+ struct flow_block_cb *block_cb;
2506+ int err = 0;
2507+
2508+ down_write(&flow_table->flow_block_lock);
2509+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2510+ if (block_cb) {
2511+ err = -EEXIST;
2512+ goto unlock;
2513+ }
2514+
2515+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
2516+ if (IS_ERR(block_cb)) {
2517+ err = PTR_ERR(block_cb);
2518+ goto unlock;
2519+ }
2520+
2521+ list_add_tail(&block_cb->list, &block->cb_list);
2522+
2523+unlock:
2524+ up_write(&flow_table->flow_block_lock);
2525+ return err;
2526+}
2527+
2528+static inline void
2529+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
2530+ flow_setup_cb_t *cb, void *cb_priv)
2531+{
2532+ struct flow_block *block = &flow_table->flow_block;
2533+ struct flow_block_cb *block_cb;
2534+
2535+ down_write(&flow_table->flow_block_lock);
2536+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2537+ if (block_cb) {
2538+ list_del(&block_cb->list);
2539+ flow_block_cb_free(block_cb);
2540+ } else {
2541+ WARN_ON(true);
2542+ }
2543+ up_write(&flow_table->flow_block_lock);
2544+}
2545+
2546+int flow_offload_route_init(struct flow_offload *flow,
2547+ const struct nf_flow_route *route);
2548+
2549 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
2550+void flow_offload_refresh(struct nf_flowtable *flow_table,
2551+ struct flow_offload *flow);
2552+
2553 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
2554 struct flow_offload_tuple *tuple);
2555+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
2556+ struct net_device *dev);
2557 void nf_flow_table_cleanup(struct net_device *dev);
2558
2559 int nf_flow_table_init(struct nf_flowtable *flow_table);
2560 void nf_flow_table_free(struct nf_flowtable *flow_table);
2561
2562 void flow_offload_teardown(struct flow_offload *flow);
2563-static inline void flow_offload_dead(struct flow_offload *flow)
2564-{
2565- flow->flags |= FLOW_OFFLOAD_DYING;
2566-}
2567
2568-int nf_flow_snat_port(const struct flow_offload *flow,
2569- struct sk_buff *skb, unsigned int thoff,
2570- u8 protocol, enum flow_offload_tuple_dir dir);
2571-int nf_flow_dnat_port(const struct flow_offload *flow,
2572- struct sk_buff *skb, unsigned int thoff,
2573- u8 protocol, enum flow_offload_tuple_dir dir);
2574+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
2575+ void (*iter)(struct flow_offload *flow, void *data),
2576+ void *data);
2577+
2578+void nf_flow_snat_port(const struct flow_offload *flow,
2579+ struct sk_buff *skb, unsigned int thoff,
2580+ u8 protocol, enum flow_offload_tuple_dir dir);
2581+void nf_flow_dnat_port(const struct flow_offload *flow,
2582+ struct sk_buff *skb, unsigned int thoff,
2583+ u8 protocol, enum flow_offload_tuple_dir dir);
2584
2585 struct flow_ports {
2586 __be16 source, dest;
developer58aa0682023-09-18 14:02:26 +08002587@@ -126,4 +299,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08002588 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
2589 MODULE_ALIAS("nf-flowtable-" __stringify(family))
2590
2591+void nf_flow_offload_add(struct nf_flowtable *flowtable,
2592+ struct flow_offload *flow);
2593+void nf_flow_offload_del(struct nf_flowtable *flowtable,
2594+ struct flow_offload *flow);
2595+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08002596+ struct flow_offload *flow, bool force);
developer8cb3ac72022-07-04 10:55:14 +08002597+
2598+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
2599+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
2600+ struct net_device *dev,
2601+ enum flow_block_command cmd);
2602+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
2603+ enum flow_offload_tuple_dir dir,
2604+ struct nf_flow_rule *flow_rule);
2605+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
2606+ enum flow_offload_tuple_dir dir,
2607+ struct nf_flow_rule *flow_rule);
2608+
2609+int nf_flow_table_offload_init(void);
2610+void nf_flow_table_offload_exit(void);
2611+
2612+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
2613+{
2614+ __be16 proto;
2615+
2616+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
2617+ sizeof(struct pppoe_hdr)));
2618+ switch (proto) {
2619+ case htons(PPP_IP):
2620+ return htons(ETH_P_IP);
2621+ case htons(PPP_IPV6):
2622+ return htons(ETH_P_IPV6);
2623+ }
2624+
2625+ return 0;
2626+}
2627+
2628 #endif /* _NF_FLOW_TABLE_H */
2629diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
developer58aa0682023-09-18 14:02:26 +08002630index 806454e..9e3963c 100644
developer8cb3ac72022-07-04 10:55:14 +08002631--- a/include/net/netns/conntrack.h
2632+++ b/include/net/netns/conntrack.h
2633@@ -27,6 +27,9 @@ struct nf_tcp_net {
2634 int tcp_loose;
2635 int tcp_be_liberal;
2636 int tcp_max_retrans;
2637+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2638+ unsigned int offload_timeout;
2639+#endif
2640 };
2641
2642 enum udp_conntrack {
2643@@ -37,6 +40,9 @@ enum udp_conntrack {
2644
2645 struct nf_udp_net {
2646 unsigned int timeouts[UDP_CT_MAX];
2647+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2648+ unsigned int offload_timeout;
2649+#endif
2650 };
2651
2652 struct nf_icmp_net {
2653diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
developer58aa0682023-09-18 14:02:26 +08002654index 336014b..ae698d1 100644
developer8cb3ac72022-07-04 10:55:14 +08002655--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
2656+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
2657@@ -105,14 +105,19 @@ enum ip_conntrack_status {
2658 IPS_OFFLOAD_BIT = 14,
2659 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
2660
2661+ /* Conntrack has been offloaded to hardware. */
2662+ IPS_HW_OFFLOAD_BIT = 15,
2663+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
2664+
2665 /* Be careful here, modifying these bits can make things messy,
2666 * so don't let users modify them directly.
2667 */
2668 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
2669 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
2670- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
2671+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
2672+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
2673
2674- __IPS_MAX_BIT = 15,
2675+ __IPS_MAX_BIT = 16,
2676 };
2677
2678 /* Connection tracking event types */
2679diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2680new file mode 100644
developer58aa0682023-09-18 14:02:26 +08002681index 0000000..5841bbe
developer8cb3ac72022-07-04 10:55:14 +08002682--- /dev/null
2683+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2684@@ -0,0 +1,17 @@
2685+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2686+#ifndef _XT_FLOWOFFLOAD_H
2687+#define _XT_FLOWOFFLOAD_H
2688+
2689+#include <linux/types.h>
2690+
2691+enum {
2692+ XT_FLOWOFFLOAD_HW = 1 << 0,
2693+
2694+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
2695+};
2696+
2697+struct xt_flowoffload_target_info {
2698+ __u32 flags;
2699+};
2700+
2701+#endif /* _XT_FLOWOFFLOAD_H */
2702diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
developer58aa0682023-09-18 14:02:26 +08002703index 0a3a167..6112266 100644
developer8cb3ac72022-07-04 10:55:14 +08002704--- a/net/8021q/vlan_dev.c
2705+++ b/net/8021q/vlan_dev.c
2706@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
2707 return real_dev->ifindex;
2708 }
2709
2710+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
2711+ struct net_device_path *path)
2712+{
2713+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
2714+
2715+ path->type = DEV_PATH_VLAN;
2716+ path->encap.id = vlan->vlan_id;
2717+ path->encap.proto = vlan->vlan_proto;
2718+ path->dev = ctx->dev;
2719+ ctx->dev = vlan->real_dev;
2720+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2721+ return -ENOSPC;
2722+
2723+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
2724+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
2725+ ctx->num_vlans++;
2726+
2727+ return 0;
2728+}
2729+
2730 static const struct ethtool_ops vlan_ethtool_ops = {
2731 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
2732 .get_drvinfo = vlan_ethtool_get_drvinfo,
2733@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
2734 #endif
2735 .ndo_fix_features = vlan_dev_fix_features,
2736 .ndo_get_iflink = vlan_dev_get_iflink,
2737+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
2738 };
2739
2740 static void vlan_dev_free(struct net_device *dev)
2741diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
developer58aa0682023-09-18 14:02:26 +08002742index 501f77f..0940b44 100644
developer8cb3ac72022-07-04 10:55:14 +08002743--- a/net/bridge/br_device.c
2744+++ b/net/bridge/br_device.c
2745@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
2746 return br_del_if(br, slave_dev);
2747 }
2748
2749+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
2750+ struct net_device_path *path)
2751+{
2752+ struct net_bridge_fdb_entry *f;
2753+ struct net_bridge_port *dst;
2754+ struct net_bridge *br;
2755+
2756+ if (netif_is_bridge_port(ctx->dev))
2757+ return -1;
2758+
2759+ br = netdev_priv(ctx->dev);
2760+
2761+ br_vlan_fill_forward_path_pvid(br, ctx, path);
2762+
2763+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
2764+ if (!f || !f->dst)
2765+ return -1;
2766+
2767+ dst = READ_ONCE(f->dst);
2768+ if (!dst)
2769+ return -1;
2770+
2771+ if (br_vlan_fill_forward_path_mode(br, dst, path))
2772+ return -1;
2773+
2774+ path->type = DEV_PATH_BRIDGE;
2775+ path->dev = dst->br->dev;
2776+ ctx->dev = dst->dev;
2777+
2778+ switch (path->bridge.vlan_mode) {
2779+ case DEV_PATH_BR_VLAN_TAG:
2780+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2781+ return -ENOSPC;
2782+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
2783+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
2784+ ctx->num_vlans++;
2785+ break;
2786+ case DEV_PATH_BR_VLAN_UNTAG_HW:
2787+ case DEV_PATH_BR_VLAN_UNTAG:
2788+ ctx->num_vlans--;
2789+ break;
2790+ case DEV_PATH_BR_VLAN_KEEP:
2791+ break;
2792+ }
2793+
2794+ return 0;
2795+}
2796+
2797 static const struct ethtool_ops br_ethtool_ops = {
2798 .get_drvinfo = br_getinfo,
2799 .get_link = ethtool_op_get_link,
2800@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
2801 .ndo_bridge_setlink = br_setlink,
2802 .ndo_bridge_dellink = br_dellink,
2803 .ndo_features_check = passthru_features_check,
2804+ .ndo_fill_forward_path = br_fill_forward_path,
2805 };
2806
2807 static struct device_type br_type = {
2808diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
developer58aa0682023-09-18 14:02:26 +08002809index a736be8..4bd9e9b 100644
developer8cb3ac72022-07-04 10:55:14 +08002810--- a/net/bridge/br_private.h
2811+++ b/net/bridge/br_private.h
2812@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
2813 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
2814 void *ptr);
2815
2816+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2817+ struct net_device_path_ctx *ctx,
2818+ struct net_device_path *path);
2819+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2820+ struct net_bridge_port *dst,
2821+ struct net_device_path *path);
2822+
2823 static inline struct net_bridge_vlan_group *br_vlan_group(
2824 const struct net_bridge *br)
2825 {
2826@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
2827 return 0;
2828 }
2829
2830+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2831+ struct net_device_path_ctx *ctx,
2832+ struct net_device_path *path)
2833+{
2834+}
2835+
2836+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2837+ struct net_bridge_port *dst,
2838+ struct net_device_path *path)
2839+{
2840+ return 0;
2841+}
2842+
2843 static inline struct net_bridge_vlan_group *br_vlan_group(
2844 const struct net_bridge *br)
2845 {
2846diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
developer58aa0682023-09-18 14:02:26 +08002847index 9257292..bcfd169 100644
developer8cb3ac72022-07-04 10:55:14 +08002848--- a/net/bridge/br_vlan.c
2849+++ b/net/bridge/br_vlan.c
2850@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
2851 }
2852 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
2853
2854+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2855+ struct net_device_path_ctx *ctx,
2856+ struct net_device_path *path)
2857+{
2858+ struct net_bridge_vlan_group *vg;
2859+ int idx = ctx->num_vlans - 1;
2860+ u16 vid;
2861+
2862+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2863+
2864+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2865+ return;
2866+
2867+ vg = br_vlan_group(br);
2868+
2869+ if (idx >= 0 &&
2870+ ctx->vlan[idx].proto == br->vlan_proto) {
2871+ vid = ctx->vlan[idx].id;
2872+ } else {
2873+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
2874+ vid = br_get_pvid(vg);
2875+ }
2876+
2877+ path->bridge.vlan_id = vid;
2878+ path->bridge.vlan_proto = br->vlan_proto;
2879+}
2880+
2881+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2882+ struct net_bridge_port *dst,
2883+ struct net_device_path *path)
2884+{
2885+ struct net_bridge_vlan_group *vg;
2886+ struct net_bridge_vlan *v;
2887+
2888+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2889+ return 0;
2890+
2891+ vg = nbp_vlan_group_rcu(dst);
2892+ v = br_vlan_find(vg, path->bridge.vlan_id);
2893+ if (!v || !br_vlan_should_use(v))
2894+ return -EINVAL;
2895+
2896+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
2897+ return 0;
2898+
2899+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
2900+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2901+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
2902+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
2903+ else
2904+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
2905+
2906+ return 0;
2907+}
2908+
2909 int br_vlan_get_info(const struct net_device *dev, u16 vid,
2910 struct bridge_vlan_info *p_vinfo)
2911 {
2912diff --git a/net/core/dev.c b/net/core/dev.c
developer58aa0682023-09-18 14:02:26 +08002913index 54cc544..a117bd0 100644
developer8cb3ac72022-07-04 10:55:14 +08002914--- a/net/core/dev.c
2915+++ b/net/core/dev.c
2916@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2917 }
2918 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
2919
2920+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
2921+{
2922+ int k = stack->num_paths++;
2923+
2924+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
2925+ return NULL;
2926+
2927+ return &stack->path[k];
2928+}
2929+
2930+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2931+ struct net_device_path_stack *stack)
2932+{
2933+ const struct net_device *last_dev;
2934+ struct net_device_path_ctx ctx = {
2935+ .dev = dev,
2936+ };
2937+ struct net_device_path *path;
2938+ int ret = 0;
2939+
2940+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
2941+ stack->num_paths = 0;
2942+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
2943+ last_dev = ctx.dev;
2944+ path = dev_fwd_path(stack);
2945+ if (!path)
2946+ return -1;
2947+
2948+ memset(path, 0, sizeof(struct net_device_path));
2949+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
2950+ if (ret < 0)
2951+ return -1;
2952+
2953+ if (WARN_ON_ONCE(last_dev == ctx.dev))
2954+ return -1;
2955+ }
2956+ path = dev_fwd_path(stack);
2957+ if (!path)
2958+ return -1;
2959+ path->type = DEV_PATH_ETHERNET;
2960+ path->dev = ctx.dev;
2961+
2962+ return ret;
2963+}
2964+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
2965+
2966 /**
2967 * __dev_get_by_name - find a device by its name
2968 * @net: the applicable net namespace
2969diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
developer58aa0682023-09-18 14:02:26 +08002970index ca80f86..35a1249 100644
developer8cb3ac72022-07-04 10:55:14 +08002971--- a/net/dsa/dsa.c
2972+++ b/net/dsa/dsa.c
2973@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2974 }
2975 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
2976
2977+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
2978+{
2979+ if (!netdev || !dsa_slave_dev_check(netdev))
2980+ return ERR_PTR(-ENODEV);
2981+
2982+ return dsa_slave_to_port(netdev);
2983+}
2984+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
2985+
2986 static int __init dsa_init_module(void)
2987 {
2988 int rc;
2989diff --git a/net/dsa/slave.c b/net/dsa/slave.c
developer58aa0682023-09-18 14:02:26 +08002990index e2b91b3..2dfaa1e 100644
developer8cb3ac72022-07-04 10:55:14 +08002991--- a/net/dsa/slave.c
2992+++ b/net/dsa/slave.c
developer58aa0682023-09-18 14:02:26 +08002993@@ -1031,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002994 }
2995 }
2996
2997+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
2998+ void *type_data)
2999+{
3000+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
3001+ struct net_device *master = cpu_dp->master;
3002+
3003+ if (!master->netdev_ops->ndo_setup_tc)
3004+ return -EOPNOTSUPP;
3005+
3006+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
3007+}
3008+
3009 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
3010 void *type_data)
3011 {
3012 struct dsa_port *dp = dsa_slave_to_port(dev);
3013 struct dsa_switch *ds = dp->ds;
3014
3015- if (type == TC_SETUP_BLOCK)
3016+ switch (type) {
3017+ case TC_SETUP_BLOCK:
3018 return dsa_slave_setup_tc_block(dev, type_data);
3019+ case TC_SETUP_FT:
3020+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
3021+ default:
3022+ break;
3023+ }
3024
3025 if (!ds->ops->port_setup_tc)
3026 return -EOPNOTSUPP;
developer58aa0682023-09-18 14:02:26 +08003027@@ -1224,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003028 return dp->ds->devlink ? &dp->devlink_port : NULL;
3029 }
3030
3031+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
3032+ struct net_device_path *path)
3033+{
3034+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
3035+ struct dsa_port *cpu_dp = dp->cpu_dp;
3036+
3037+ path->dev = ctx->dev;
3038+ path->type = DEV_PATH_DSA;
3039+ path->dsa.proto = cpu_dp->tag_ops->proto;
3040+ path->dsa.port = dp->index;
3041+ ctx->dev = cpu_dp->master;
3042+
3043+ return 0;
3044+}
3045+
3046 static const struct net_device_ops dsa_slave_netdev_ops = {
3047 .ndo_open = dsa_slave_open,
3048 .ndo_stop = dsa_slave_close,
developer58aa0682023-09-18 14:02:26 +08003049@@ -1248,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +08003050 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
3051 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
3052 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
3053+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
3054 };
3055
3056 static struct device_type dsa_type = {
developer58aa0682023-09-18 14:02:26 +08003057@@ -1499,6 +1533,7 @@ bool dsa_slave_dev_check(const struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003058 {
3059 return dev->netdev_ops == &dsa_slave_netdev_ops;
3060 }
3061+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
3062
3063 static int dsa_slave_changeupper(struct net_device *dev,
3064 struct netdev_notifier_changeupper_info *info)
3065diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003066index f17b402..803b92e 100644
developer8cb3ac72022-07-04 10:55:14 +08003067--- a/net/ipv4/netfilter/Kconfig
3068+++ b/net/ipv4/netfilter/Kconfig
3069@@ -56,8 +56,6 @@ config NF_TABLES_ARP
3070 help
3071 This option enables the ARP support for nf_tables.
3072
3073-endif # NF_TABLES
3074-
3075 config NF_FLOW_TABLE_IPV4
3076 tristate "Netfilter flow table IPv4 module"
3077 depends on NF_FLOW_TABLE
3078@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
3079
3080 To compile it as a module, choose M here.
3081
3082+endif # NF_TABLES
3083+
3084 config NF_DUP_IPV4
3085 tristate "Netfilter IPv4 packet duplication to alternate destination"
3086 depends on !NF_CONNTRACK || NF_CONNTRACK
3087diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
developer58aa0682023-09-18 14:02:26 +08003088index 8231a7a..7176d7f 100644
developer8cb3ac72022-07-04 10:55:14 +08003089--- a/net/ipv6/ip6_output.c
3090+++ b/net/ipv6/ip6_output.c
3091@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
3092 }
3093 }
3094
3095- mtu = ip6_dst_mtu_forward(dst);
3096+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
3097 if (mtu < IPV6_MIN_MTU)
3098 mtu = IPV6_MIN_MTU;
3099
3100diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003101index 69443e9..0b481d2 100644
developer8cb3ac72022-07-04 10:55:14 +08003102--- a/net/ipv6/netfilter/Kconfig
3103+++ b/net/ipv6/netfilter/Kconfig
3104@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
3105 multicast or blackhole.
3106
3107 endif # NF_TABLES_IPV6
3108-endif # NF_TABLES
3109
3110 config NF_FLOW_TABLE_IPV6
3111 tristate "Netfilter flow table IPv6 module"
3112@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
3113
3114 To compile it as a module, choose M here.
3115
3116+endif # NF_TABLES
3117+
3118 config NF_DUP_IPV6
3119 tristate "Netfilter IPv6 packet duplication to alternate destination"
3120 depends on !NF_CONNTRACK || NF_CONNTRACK
3121diff --git a/net/ipv6/route.c b/net/ipv6/route.c
developer58aa0682023-09-18 14:02:26 +08003122index 43d185c..82a752c 100644
developer8cb3ac72022-07-04 10:55:14 +08003123--- a/net/ipv6/route.c
3124+++ b/net/ipv6/route.c
3125@@ -83,7 +83,7 @@ enum rt6_nud_state {
3126
3127 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
3128 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
3129-static unsigned int ip6_mtu(const struct dst_entry *dst);
3130+static unsigned int ip6_mtu(const struct dst_entry *dst);
3131 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
3132 static void ip6_dst_destroy(struct dst_entry *);
3133 static void ip6_dst_ifdown(struct dst_entry *,
3134@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3135
3136 static unsigned int ip6_mtu(const struct dst_entry *dst)
3137 {
3138- struct inet6_dev *idev;
3139- unsigned int mtu;
3140-
3141- mtu = dst_metric_raw(dst, RTAX_MTU);
3142- if (mtu)
3143- goto out;
3144-
3145- mtu = IPV6_MIN_MTU;
3146-
3147- rcu_read_lock();
3148- idev = __in6_dev_get(dst->dev);
3149- if (idev)
3150- mtu = idev->cnf.mtu6;
3151- rcu_read_unlock();
3152-
3153-out:
3154- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3155-
3156- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3157+ return ip6_dst_mtu_maybe_forward(dst, false);
3158 }
3159
3160 /* MTU selection:
3161diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003162index b6e0a62..5d690ab 100644
developer8cb3ac72022-07-04 10:55:14 +08003163--- a/net/netfilter/Kconfig
3164+++ b/net/netfilter/Kconfig
developer58aa0682023-09-18 14:02:26 +08003165@@ -689,8 +689,6 @@ config NFT_FIB_NETDEV
developer8cb3ac72022-07-04 10:55:14 +08003166
3167 endif # NF_TABLES_NETDEV
3168
3169-endif # NF_TABLES
3170-
3171 config NF_FLOW_TABLE_INET
3172 tristate "Netfilter flow table mixed IPv4/IPv6 module"
3173 depends on NF_FLOW_TABLE
developer58aa0682023-09-18 14:02:26 +08003174@@ -699,11 +697,12 @@ config NF_FLOW_TABLE_INET
developer8cb3ac72022-07-04 10:55:14 +08003175
3176 To compile it as a module, choose M here.
3177
3178+endif # NF_TABLES
3179+
3180 config NF_FLOW_TABLE
3181 tristate "Netfilter flow table module"
3182 depends on NETFILTER_INGRESS
3183 depends on NF_CONNTRACK
3184- depends on NF_TABLES
3185 help
3186 This option adds the flow table core infrastructure.
3187
developer58aa0682023-09-18 14:02:26 +08003188@@ -983,6 +982,15 @@ config NETFILTER_XT_TARGET_NOTRACK
developer8cb3ac72022-07-04 10:55:14 +08003189 depends on NETFILTER_ADVANCED
3190 select NETFILTER_XT_TARGET_CT
3191
3192+config NETFILTER_XT_TARGET_FLOWOFFLOAD
3193+ tristate '"FLOWOFFLOAD" target support'
3194+ depends on NF_FLOW_TABLE
3195+ depends on NETFILTER_INGRESS
3196+ help
3197+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
3198+ module to speed up processing of packets by bypassing the usual
3199+ netfilter chains
3200+
3201 config NETFILTER_XT_TARGET_RATEEST
3202 tristate '"RATEEST" target support'
3203 depends on NETFILTER_ADVANCED
3204diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
developer58aa0682023-09-18 14:02:26 +08003205index 4fc075b..d93a121 100644
developer8cb3ac72022-07-04 10:55:14 +08003206--- a/net/netfilter/Makefile
3207+++ b/net/netfilter/Makefile
3208@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
3209
3210 # flow table infrastructure
3211 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
3212-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
3213+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
3214+ nf_flow_table_offload.o
3215
3216 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
3217
3218@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
3219 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
3220 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
3221 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
3222+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
3223 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
3224 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
3225 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
3226diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
developer58aa0682023-09-18 14:02:26 +08003227index f6ab6f4..f689e19 100644
developer8cb3ac72022-07-04 10:55:14 +08003228--- a/net/netfilter/nf_conntrack_core.c
3229+++ b/net/netfilter/nf_conntrack_core.c
developer58aa0682023-09-18 14:02:26 +08003230@@ -864,9 +864,8 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003231 }
3232 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
3233
3234-static inline void nf_ct_acct_update(struct nf_conn *ct,
3235- enum ip_conntrack_info ctinfo,
3236- unsigned int len)
3237+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3238+ unsigned int bytes)
3239 {
3240 struct nf_conn_acct *acct;
3241
3242@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
3243 if (acct) {
3244 struct nf_conn_counter *counter = acct->counter;
3245
3246- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
3247- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
3248+ atomic64_add(packets, &counter[dir].packets);
3249+ atomic64_add(bytes, &counter[dir].bytes);
3250 }
3251 }
3252+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
3253
3254 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3255 const struct nf_conn *loser_ct)
3256@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3257
3258 /* u32 should be fine since we must have seen one packet. */
3259 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
3260- nf_ct_acct_update(ct, ctinfo, bytes);
3261+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
3262 }
3263 }
3264
3265@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
3266
3267 tmp = nf_ct_tuplehash_to_ctrack(h);
3268
3269- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
3270+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
3271+ nf_ct_offload_timeout(tmp);
3272 continue;
3273+ }
3274
3275 if (nf_ct_is_expired(tmp)) {
3276 nf_ct_gc_expired(tmp);
3277@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
3278 WRITE_ONCE(ct->timeout, extra_jiffies);
3279 acct:
3280 if (do_acct)
3281- nf_ct_acct_update(ct, ctinfo, skb->len);
3282+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3283 }
3284 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
3285
3286@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
3287 enum ip_conntrack_info ctinfo,
3288 const struct sk_buff *skb)
3289 {
3290- nf_ct_acct_update(ct, ctinfo, skb->len);
3291+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3292
3293 return nf_ct_delete(ct, 0, 0);
3294 }
3295diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
developer58aa0682023-09-18 14:02:26 +08003296index e219b6f..5cdc627 100644
developer8cb3ac72022-07-04 10:55:14 +08003297--- a/net/netfilter/nf_conntrack_proto_tcp.c
3298+++ b/net/netfilter/nf_conntrack_proto_tcp.c
developer58aa0682023-09-18 14:02:26 +08003299@@ -1463,6 +1463,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003300 tn->tcp_loose = nf_ct_tcp_loose;
3301 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
3302 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
3303+
3304+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3305+ tn->offload_timeout = 30 * HZ;
3306+#endif
3307 }
3308
3309 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
3310diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
developer58aa0682023-09-18 14:02:26 +08003311index e3a2d01..a1579d6 100644
developer8cb3ac72022-07-04 10:55:14 +08003312--- a/net/netfilter/nf_conntrack_proto_udp.c
3313+++ b/net/netfilter/nf_conntrack_proto_udp.c
3314@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
3315
3316 for (i = 0; i < UDP_CT_MAX; i++)
3317 un->timeouts[i] = udp_timeouts[i];
3318+
3319+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3320+ un->offload_timeout = 30 * HZ;
3321+#endif
3322 }
3323
3324 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
3325diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
developer58aa0682023-09-18 14:02:26 +08003326index 0b600b4..a2cfafa 100644
developer8cb3ac72022-07-04 10:55:14 +08003327--- a/net/netfilter/nf_conntrack_standalone.c
3328+++ b/net/netfilter/nf_conntrack_standalone.c
3329@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
3330 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
3331 goto release;
3332
3333- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3334+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
3335+ seq_puts(s, "[HW_OFFLOAD] ");
3336+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3337 seq_puts(s, "[OFFLOAD] ");
3338 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
3339 seq_puts(s, "[ASSURED] ");
3340@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
3341 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
3342 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
3343 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
3344+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3345+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
3346+#endif
3347 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
3348 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
3349 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
3350 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
3351 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
3352+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3353+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
3354+#endif
3355 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
3356 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
3357 #ifdef CONFIG_NF_CT_PROTO_SCTP
developer58aa0682023-09-18 14:02:26 +08003358@@ -811,6 +819,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003359 .mode = 0644,
3360 .proc_handler = proc_dointvec_jiffies,
3361 },
3362+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3363+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
3364+ .procname = "nf_flowtable_tcp_timeout",
3365+ .maxlen = sizeof(unsigned int),
3366+ .mode = 0644,
3367+ .proc_handler = proc_dointvec_jiffies,
3368+ },
3369+#endif
3370 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
3371 .procname = "nf_conntrack_tcp_loose",
3372 .maxlen = sizeof(int),
developer58aa0682023-09-18 14:02:26 +08003373@@ -845,6 +861,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003374 .mode = 0644,
3375 .proc_handler = proc_dointvec_jiffies,
3376 },
3377+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3378+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
3379+ .procname = "nf_flowtable_udp_timeout",
3380+ .maxlen = sizeof(unsigned int),
3381+ .mode = 0644,
3382+ .proc_handler = proc_dointvec_jiffies,
3383+ },
3384+#endif
3385 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
3386 .procname = "nf_conntrack_icmp_timeout",
3387 .maxlen = sizeof(unsigned int),
developer58aa0682023-09-18 14:02:26 +08003388@@ -1021,6 +1045,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
developer8cb3ac72022-07-04 10:55:14 +08003389 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
3390 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
3391 #undef XASSIGN
3392+
3393+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3394+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
3395+#endif
3396+
3397 }
3398
3399 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
developer58aa0682023-09-18 14:02:26 +08003400@@ -1107,6 +1136,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003401 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
3402 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
3403 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
3404+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3405+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
3406+#endif
3407
3408 nf_conntrack_standalone_init_tcp_sysctl(net, table);
3409 nf_conntrack_standalone_init_sctp_sysctl(net, table);
3410diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
developer58aa0682023-09-18 14:02:26 +08003411index f212cec..c3054af 100644
developer8cb3ac72022-07-04 10:55:14 +08003412--- a/net/netfilter/nf_flow_table_core.c
3413+++ b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003414@@ -7,43 +7,21 @@
developer8cb3ac72022-07-04 10:55:14 +08003415 #include <linux/netdevice.h>
3416 #include <net/ip.h>
3417 #include <net/ip6_route.h>
3418-#include <net/netfilter/nf_tables.h>
3419 #include <net/netfilter/nf_flow_table.h>
3420 #include <net/netfilter/nf_conntrack.h>
3421 #include <net/netfilter/nf_conntrack_core.h>
3422 #include <net/netfilter/nf_conntrack_l4proto.h>
3423 #include <net/netfilter/nf_conntrack_tuple.h>
3424
3425-struct flow_offload_entry {
3426- struct flow_offload flow;
3427- struct nf_conn *ct;
3428- struct rcu_head rcu_head;
3429-};
3430-
3431 static DEFINE_MUTEX(flowtable_lock);
3432 static LIST_HEAD(flowtables);
3433
developerb7c46752022-07-04 19:51:38 +08003434-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3435-{
3436- const struct rt6_info *rt;
3437-
3438- if (flow_tuple->l3proto == NFPROTO_IPV6) {
3439- rt = (const struct rt6_info *)flow_tuple->dst_cache;
3440- return rt6_get_cookie(rt);
3441- }
3442-
3443- return 0;
3444-}
3445-
developer8cb3ac72022-07-04 10:55:14 +08003446 static void
3447-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3448- struct nf_flow_route *route,
3449+flow_offload_fill_dir(struct flow_offload *flow,
3450 enum flow_offload_tuple_dir dir)
3451 {
3452 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
3453- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
3454- struct dst_entry *other_dst = route->tuple[!dir].dst;
3455- struct dst_entry *dst = route->tuple[dir].dst;
3456+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
3457
3458 ft->dir = dir;
3459
developerb7c46752022-07-04 19:51:38 +08003460@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003461 case NFPROTO_IPV4:
3462 ft->src_v4 = ctt->src.u3.in;
3463 ft->dst_v4 = ctt->dst.u3.in;
3464- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
3465 break;
3466 case NFPROTO_IPV6:
3467 ft->src_v6 = ctt->src.u3.in6;
3468 ft->dst_v6 = ctt->dst.u3.in6;
3469- ft->mtu = ip6_dst_mtu_forward(dst);
3470 break;
3471 }
3472
developerb7c46752022-07-04 19:51:38 +08003473@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003474 ft->l4proto = ctt->dst.protonum;
3475 ft->src_port = ctt->src.u.tcp.port;
3476 ft->dst_port = ctt->dst.u.tcp.port;
3477-
3478- ft->iifidx = other_dst->dev->ifindex;
3479- ft->dst_cache = dst;
developerb7c46752022-07-04 19:51:38 +08003480- ft->dst_cookie = flow_offload_dst_cookie(ft);
developer8cb3ac72022-07-04 10:55:14 +08003481 }
3482
3483-struct flow_offload *
3484-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
3485+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
3486 {
3487- struct flow_offload_entry *entry;
3488 struct flow_offload *flow;
3489
3490 if (unlikely(nf_ct_is_dying(ct) ||
3491 !atomic_inc_not_zero(&ct->ct_general.use)))
3492 return NULL;
3493
3494- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
3495- if (!entry)
3496+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
3497+ if (!flow)
3498 goto err_ct_refcnt;
3499
3500- flow = &entry->flow;
developerb7c46752022-07-04 19:51:38 +08003501-
developer8cb3ac72022-07-04 10:55:14 +08003502- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
3503- goto err_dst_cache_original;
developeree39bcf2023-06-16 08:03:30 +08003504-
developer7eb15dc2023-06-14 17:44:03 +08003505- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
3506- goto err_dst_cache_reply;
developeree39bcf2023-06-16 08:03:30 +08003507+ flow->ct = ct;
3508
developer8cb3ac72022-07-04 10:55:14 +08003509- entry->ct = ct;
3510-
3511- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3512- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
3513+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3514+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
3515
3516 if (ct->status & IPS_SRC_NAT)
3517- flow->flags |= FLOW_OFFLOAD_SNAT;
3518+ __set_bit(NF_FLOW_SNAT, &flow->flags);
3519 if (ct->status & IPS_DST_NAT)
3520- flow->flags |= FLOW_OFFLOAD_DNAT;
3521+ __set_bit(NF_FLOW_DNAT, &flow->flags);
3522
3523 return flow;
3524
3525-err_dst_cache_reply:
3526- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
3527-err_dst_cache_original:
3528- kfree(entry);
3529 err_ct_refcnt:
3530 nf_ct_put(ct);
3531
developeree39bcf2023-06-16 08:03:30 +08003532@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
developer8cb3ac72022-07-04 10:55:14 +08003533 }
3534 EXPORT_SYMBOL_GPL(flow_offload_alloc);
3535
3536-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3537+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3538 {
3539- tcp->state = TCP_CONNTRACK_ESTABLISHED;
3540- tcp->seen[0].td_maxwin = 0;
3541- tcp->seen[1].td_maxwin = 0;
3542+ const struct rt6_info *rt;
3543+
3544+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
3545+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
3546+ return rt6_get_cookie(rt);
3547+ }
3548+
3549+ return 0;
3550 }
3551
3552-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
3553-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
3554+static int flow_offload_fill_route(struct flow_offload *flow,
3555+ const struct nf_flow_route *route,
3556+ enum flow_offload_tuple_dir dir)
3557+{
3558+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
3559+ struct dst_entry *dst = route->tuple[dir].dst;
3560+ int i, j = 0;
developeree39bcf2023-06-16 08:03:30 +08003561+
developer8cb3ac72022-07-04 10:55:14 +08003562+ switch (flow_tuple->l3proto) {
3563+ case NFPROTO_IPV4:
3564+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
3565+ break;
3566+ case NFPROTO_IPV6:
3567+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
3568+ break;
3569+ }
3570+
3571+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
3572+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
3573+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
3574+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
3575+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
3576+ flow_tuple->in_vlan_ingress |= BIT(j);
3577+ j++;
3578+ }
3579+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
3580+
3581+ switch (route->tuple[dir].xmit_type) {
3582+ case FLOW_OFFLOAD_XMIT_DIRECT:
3583+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
3584+ ETH_ALEN);
3585+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
3586+ ETH_ALEN);
3587+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
3588+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
3589+ break;
3590+ case FLOW_OFFLOAD_XMIT_XFRM:
3591+ case FLOW_OFFLOAD_XMIT_NEIGH:
3592+ if (!dst_hold_safe(route->tuple[dir].dst))
3593+ return -1;
3594+
3595+ flow_tuple->dst_cache = dst;
3596+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
3597+ break;
3598+ default:
3599+ WARN_ON_ONCE(1);
3600+ break;
3601+ }
3602+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
developerb7c46752022-07-04 19:51:38 +08003603+
developer8cb3ac72022-07-04 10:55:14 +08003604+ return 0;
3605+}
3606+
3607+static void nft_flow_dst_release(struct flow_offload *flow,
3608+ enum flow_offload_tuple_dir dir)
developeree39bcf2023-06-16 08:03:30 +08003609+{
developer8cb3ac72022-07-04 10:55:14 +08003610+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3611+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
3612+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
developeree39bcf2023-06-16 08:03:30 +08003613+}
3614+
developer8cb3ac72022-07-04 10:55:14 +08003615+int flow_offload_route_init(struct flow_offload *flow,
3616+ const struct nf_flow_route *route)
developeree39bcf2023-06-16 08:03:30 +08003617+{
developer8cb3ac72022-07-04 10:55:14 +08003618+ int err;
developeree39bcf2023-06-16 08:03:30 +08003619+
developer8cb3ac72022-07-04 10:55:14 +08003620+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3621+ if (err < 0)
3622+ return err;
developeree39bcf2023-06-16 08:03:30 +08003623+
developer8cb3ac72022-07-04 10:55:14 +08003624+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
3625+ if (err < 0)
3626+ goto err_route_reply;
3627+
3628+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
developeree39bcf2023-06-16 08:03:30 +08003629+
developer8cb3ac72022-07-04 10:55:14 +08003630+ return 0;
3631+
3632+err_route_reply:
3633+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3634+
3635+ return err;
developeree39bcf2023-06-16 08:03:30 +08003636+}
developer8cb3ac72022-07-04 10:55:14 +08003637+EXPORT_SYMBOL_GPL(flow_offload_route_init);
developerb7c46752022-07-04 19:51:38 +08003638
developeree39bcf2023-06-16 08:03:30 +08003639-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
developer8cb3ac72022-07-04 10:55:14 +08003640+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3641 {
developeree39bcf2023-06-16 08:03:30 +08003642- return (__s32)(timeout - (u32)jiffies);
3643+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
developer8cb3ac72022-07-04 10:55:14 +08003644+ tcp->seen[0].td_maxwin = 0;
3645+ tcp->seen[1].td_maxwin = 0;
3646 }
3647
developeree39bcf2023-06-16 08:03:30 +08003648 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003649 {
developeree39bcf2023-06-16 08:03:30 +08003650- const struct nf_conntrack_l4proto *l4proto;
developer8cb3ac72022-07-04 10:55:14 +08003651+ struct net *net = nf_ct_net(ct);
developeree39bcf2023-06-16 08:03:30 +08003652 int l4num = nf_ct_protonum(ct);
3653- unsigned int timeout;
developer8cb3ac72022-07-04 10:55:14 +08003654+ s32 timeout;
developeree39bcf2023-06-16 08:03:30 +08003655
3656- l4proto = nf_ct_l4proto_find(l4num);
3657- if (!l4proto)
3658- return;
developer8cb3ac72022-07-04 10:55:14 +08003659+ if (l4num == IPPROTO_TCP) {
3660+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003661
3662- if (l4num == IPPROTO_TCP)
3663- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
3664- else if (l4num == IPPROTO_UDP)
3665- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
3666- else
3667+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
developer8cb3ac72022-07-04 10:55:14 +08003668+ timeout -= tn->offload_timeout;
3669+ } else if (l4num == IPPROTO_UDP) {
3670+ struct nf_udp_net *tn = nf_udp_pernet(net);
3671+
3672+ timeout = tn->timeouts[UDP_CT_REPLIED];
3673+ timeout -= tn->offload_timeout;
3674+ } else {
developeree39bcf2023-06-16 08:03:30 +08003675 return;
developer8cb3ac72022-07-04 10:55:14 +08003676+ }
3677+
3678+ if (timeout < 0)
3679+ timeout = 0;
developeree39bcf2023-06-16 08:03:30 +08003680
3681- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
3682- ct->timeout = nfct_time_stamp + timeout;
developer8cb3ac72022-07-04 10:55:14 +08003683+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
3684+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
3685 }
3686
developeree39bcf2023-06-16 08:03:30 +08003687 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
3688@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
3689 flow_offload_fixup_ct_timeout(ct);
3690 }
3691
developer8cb3ac72022-07-04 10:55:14 +08003692-void flow_offload_free(struct flow_offload *flow)
3693+static void flow_offload_route_release(struct flow_offload *flow)
3694 {
3695- struct flow_offload_entry *e;
3696+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3697+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
3698+}
3699
3700- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
3701- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
3702- e = container_of(flow, struct flow_offload_entry, flow);
3703- if (flow->flags & FLOW_OFFLOAD_DYING)
3704- nf_ct_delete(e->ct, 0, 0);
3705- nf_ct_put(e->ct);
3706- kfree_rcu(e, rcu_head);
3707+void flow_offload_free(struct flow_offload *flow)
3708+{
3709+ switch (flow->type) {
3710+ case NF_FLOW_OFFLOAD_ROUTE:
3711+ flow_offload_route_release(flow);
3712+ break;
3713+ default:
3714+ break;
3715+ }
3716+ nf_ct_put(flow->ct);
3717+ kfree_rcu(flow, rcu_head);
3718 }
3719 EXPORT_SYMBOL_GPL(flow_offload_free);
3720
developeree39bcf2023-06-16 08:03:30 +08003721@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
developer8cb3ac72022-07-04 10:55:14 +08003722 {
3723 const struct flow_offload_tuple *tuple = data;
3724
3725- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
3726+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3727 }
3728
3729 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
3730 {
3731 const struct flow_offload_tuple_rhash *tuplehash = data;
3732
3733- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
3734+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3735 }
3736
3737 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developeree39bcf2023-06-16 08:03:30 +08003738@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer8cb3ac72022-07-04 10:55:14 +08003739 const struct flow_offload_tuple *tuple = arg->key;
3740 const struct flow_offload_tuple_rhash *x = ptr;
3741
3742- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
3743+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
3744 return 1;
3745
3746 return 0;
developeree39bcf2023-06-16 08:03:30 +08003747@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
developer8cb3ac72022-07-04 10:55:14 +08003748 .automatic_shrinking = true,
3749 };
3750
3751-#define DAY (86400 * HZ)
3752-
3753-/* Set an arbitrary timeout large enough not to ever expire, this save
3754- * us a check for the IPS_OFFLOAD_BIT from the packet path via
3755- * nf_ct_is_expired().
3756- */
3757-static void nf_ct_offload_timeout(struct flow_offload *flow)
3758+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
3759 {
3760- struct flow_offload_entry *entry;
3761- struct nf_conn *ct;
3762+ unsigned long timeout = NF_FLOW_TIMEOUT;
3763+ struct net *net = nf_ct_net(flow->ct);
3764+ int l4num = nf_ct_protonum(flow->ct);
developeree39bcf2023-06-16 08:03:30 +08003765
3766- entry = container_of(flow, struct flow_offload_entry, flow);
3767- ct = entry->ct;
developerb7c46752022-07-04 19:51:38 +08003768+ if (l4num == IPPROTO_TCP) {
3769+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003770
3771- if (nf_ct_expires(ct) < DAY / 2)
3772- ct->timeout = nfct_time_stamp + DAY;
developer8cb3ac72022-07-04 10:55:14 +08003773+ timeout = tn->offload_timeout;
3774+ } else if (l4num == IPPROTO_UDP) {
3775+ struct nf_udp_net *tn = nf_udp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003776+
developer8cb3ac72022-07-04 10:55:14 +08003777+ timeout = tn->offload_timeout;
3778+ }
developeree39bcf2023-06-16 08:03:30 +08003779+
developer8cb3ac72022-07-04 10:55:14 +08003780+ return timeout;
3781 }
3782
3783 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3784 {
3785 int err;
3786
3787- nf_ct_offload_timeout(flow);
3788- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
3789+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3790
3791 err = rhashtable_insert_fast(&flow_table->rhashtable,
3792 &flow->tuplehash[0].node,
developeree39bcf2023-06-16 08:03:30 +08003793@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003794 return err;
3795 }
3796
3797+ nf_ct_offload_timeout(flow->ct);
3798+
3799+ if (nf_flowtable_hw_offload(flow_table)) {
3800+ __set_bit(NF_FLOW_HW, &flow->flags);
3801+ nf_flow_offload_add(flow_table, flow);
3802+ }
3803+
3804 return 0;
3805 }
3806 EXPORT_SYMBOL_GPL(flow_offload_add);
3807
3808+void flow_offload_refresh(struct nf_flowtable *flow_table,
3809+ struct flow_offload *flow)
3810+{
3811+ u32 timeout;
3812+
3813+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3814+ if (timeout - READ_ONCE(flow->timeout) > HZ)
3815+ WRITE_ONCE(flow->timeout, timeout);
3816+ else
3817+ return;
3818+
3819+ if (likely(!nf_flowtable_hw_offload(flow_table)))
3820+ return;
3821+
3822+ nf_flow_offload_add(flow_table, flow);
3823+}
3824+EXPORT_SYMBOL_GPL(flow_offload_refresh);
3825+
3826 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3827 {
3828 return nf_flow_timeout_delta(flow->timeout) <= 0;
developeree39bcf2023-06-16 08:03:30 +08003829@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003830 static void flow_offload_del(struct nf_flowtable *flow_table,
3831 struct flow_offload *flow)
3832 {
3833- struct flow_offload_entry *e;
3834-
3835 rhashtable_remove_fast(&flow_table->rhashtable,
3836 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
3837 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003838@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003839 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
3840 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003841
developer8cb3ac72022-07-04 10:55:14 +08003842- e = container_of(flow, struct flow_offload_entry, flow);
3843- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
developeree39bcf2023-06-16 08:03:30 +08003844+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
3845
3846 if (nf_flow_has_expired(flow))
developer8cb3ac72022-07-04 10:55:14 +08003847- flow_offload_fixup_ct(e->ct);
3848- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
3849- flow_offload_fixup_ct_timeout(e->ct);
3850-
3851- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
3852- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003853+ flow_offload_fixup_ct(flow->ct);
3854+ else
3855+ flow_offload_fixup_ct_timeout(flow->ct);
3856
developer8cb3ac72022-07-04 10:55:14 +08003857 flow_offload_free(flow);
3858 }
3859
3860 void flow_offload_teardown(struct flow_offload *flow)
3861 {
3862- struct flow_offload_entry *e;
developerb7c46752022-07-04 19:51:38 +08003863-
3864- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
developeree39bcf2023-06-16 08:03:30 +08003865+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3866
developer8cb3ac72022-07-04 10:55:14 +08003867- e = container_of(flow, struct flow_offload_entry, flow);
3868- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003869+ flow_offload_fixup_ct_state(flow->ct);
developer8cb3ac72022-07-04 10:55:14 +08003870 }
3871 EXPORT_SYMBOL_GPL(flow_offload_teardown);
3872
developeree39bcf2023-06-16 08:03:30 +08003873@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003874 {
3875 struct flow_offload_tuple_rhash *tuplehash;
3876 struct flow_offload *flow;
3877- struct flow_offload_entry *e;
3878 int dir;
3879
3880 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
developeree39bcf2023-06-16 08:03:30 +08003881@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003882
3883 dir = tuplehash->tuple.dir;
3884 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
3885- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
3886+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
3887 return NULL;
3888
3889- e = container_of(flow, struct flow_offload_entry, flow);
3890- if (unlikely(nf_ct_is_dying(e->ct)))
3891+ if (unlikely(nf_ct_is_dying(flow->ct)))
3892 return NULL;
3893
3894 return tuplehash;
3895 }
3896 EXPORT_SYMBOL_GPL(flow_offload_lookup);
3897
3898-static int
3899-nf_flow_table_iterate(struct nf_flowtable *flow_table,
3900+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3901 void (*iter)(struct flow_offload *flow, void *data),
3902 void *data)
3903 {
developeree39bcf2023-06-16 08:03:30 +08003904@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003905 rhashtable_walk_start(&hti);
3906
3907 while ((tuplehash = rhashtable_walk_next(&hti))) {
3908-
3909 if (IS_ERR(tuplehash)) {
3910 if (PTR_ERR(tuplehash) != -EAGAIN) {
3911 err = PTR_ERR(tuplehash);
developeree39bcf2023-06-16 08:03:30 +08003912@@ -359,23 +430,52 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003913
3914 return err;
3915 }
3916+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
3917
developeree39bcf2023-06-16 08:03:30 +08003918-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3919+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
developer8cb3ac72022-07-04 10:55:14 +08003920 {
developeree39bcf2023-06-16 08:03:30 +08003921- struct nf_flowtable *flow_table = data;
developer8cb3ac72022-07-04 10:55:14 +08003922- struct flow_offload_entry *e;
3923- bool teardown;
developeree39bcf2023-06-16 08:03:30 +08003924+ struct dst_entry *dst;
developer8cb3ac72022-07-04 10:55:14 +08003925
3926- e = container_of(flow, struct flow_offload_entry, flow);
developeree39bcf2023-06-16 08:03:30 +08003927+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3928+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
3929+ dst = tuple->dst_cache;
3930+ if (!dst_check(dst, tuple->dst_cookie))
3931+ return true;
3932+ }
3933
developer8cb3ac72022-07-04 10:55:14 +08003934- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
3935- FLOW_OFFLOAD_TEARDOWN);
developeree39bcf2023-06-16 08:03:30 +08003936+ return false;
3937+}
3938
developer8cb3ac72022-07-04 10:55:14 +08003939- if (!teardown)
3940- nf_ct_offload_timeout(flow);
developeree39bcf2023-06-16 08:03:30 +08003941+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
3942+{
3943+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
3944+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
3945+}
developer8cb3ac72022-07-04 10:55:14 +08003946
3947- if (nf_flow_has_expired(flow) || teardown)
3948- flow_offload_del(flow_table, flow);
developeree39bcf2023-06-16 08:03:30 +08003949+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3950+{
3951+ struct nf_flowtable *flow_table = data;
3952+
3953+ if (nf_flow_has_expired(flow) ||
3954+ nf_ct_is_dying(flow->ct) ||
3955+ nf_flow_has_stale_dst(flow))
3956+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3957+
developer8cb3ac72022-07-04 10:55:14 +08003958+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
3959+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003960+ if (!test_and_set_bit(NF_FLOW_HW_ACCT_DYING, &flow->flags))
3961+ nf_flow_offload_stats(flow_table, flow, true);
3962+
developer8cb3ac72022-07-04 10:55:14 +08003963+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
3964+ nf_flow_offload_del(flow_table, flow);
3965+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
3966+ flow_offload_del(flow_table, flow);
3967+ } else {
3968+ flow_offload_del(flow_table, flow);
3969+ }
3970+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003971+ nf_flow_offload_stats(flow_table, flow, false);
developer8cb3ac72022-07-04 10:55:14 +08003972+ }
3973 }
3974
3975 static void nf_flow_offload_work_gc(struct work_struct *work)
developer58aa0682023-09-18 14:02:26 +08003976@@ -387,30 +487,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
developer8cb3ac72022-07-04 10:55:14 +08003977 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
3978 }
3979
3980-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3981- __be16 port, __be16 new_port)
3982+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3983+ __be16 port, __be16 new_port)
3984 {
3985 struct tcphdr *tcph;
3986
3987- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
3988- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
3989- return -1;
3990-
3991 tcph = (void *)(skb_network_header(skb) + thoff);
3992 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
3993-
3994- return 0;
3995 }
3996
3997-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3998- __be16 port, __be16 new_port)
3999+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
4000+ __be16 port, __be16 new_port)
4001 {
4002 struct udphdr *udph;
4003
4004- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4005- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4006- return -1;
4007-
4008 udph = (void *)(skb_network_header(skb) + thoff);
4009 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4010 inet_proto_csum_replace2(&udph->check, skb, port,
developer58aa0682023-09-18 14:02:26 +08004011@@ -418,38 +508,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004012 if (!udph->check)
4013 udph->check = CSUM_MANGLED_0;
4014 }
4015-
4016- return 0;
4017 }
4018
4019-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4020- u8 protocol, __be16 port, __be16 new_port)
4021+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4022+ u8 protocol, __be16 port, __be16 new_port)
4023 {
4024 switch (protocol) {
4025 case IPPROTO_TCP:
4026- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
4027- return NF_DROP;
4028+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
4029 break;
4030 case IPPROTO_UDP:
4031- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
4032- return NF_DROP;
4033+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
4034 break;
4035 }
4036-
4037- return 0;
4038 }
4039
4040-int nf_flow_snat_port(const struct flow_offload *flow,
4041- struct sk_buff *skb, unsigned int thoff,
4042- u8 protocol, enum flow_offload_tuple_dir dir)
4043+void nf_flow_snat_port(const struct flow_offload *flow,
4044+ struct sk_buff *skb, unsigned int thoff,
4045+ u8 protocol, enum flow_offload_tuple_dir dir)
4046 {
4047 struct flow_ports *hdr;
4048 __be16 port, new_port;
4049
4050- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4051- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4052- return -1;
4053-
4054 hdr = (void *)(skb_network_header(skb) + thoff);
4055
4056 switch (dir) {
developer58aa0682023-09-18 14:02:26 +08004057@@ -463,25 +543,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004058 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
4059 hdr->dest = new_port;
4060 break;
4061- default:
4062- return -1;
4063 }
4064
4065- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4066+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4067 }
4068 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
4069
4070-int nf_flow_dnat_port(const struct flow_offload *flow,
4071- struct sk_buff *skb, unsigned int thoff,
4072- u8 protocol, enum flow_offload_tuple_dir dir)
4073+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
4074+ unsigned int thoff, u8 protocol,
4075+ enum flow_offload_tuple_dir dir)
4076 {
4077 struct flow_ports *hdr;
4078 __be16 port, new_port;
4079
4080- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4081- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4082- return -1;
4083-
4084 hdr = (void *)(skb_network_header(skb) + thoff);
4085
4086 switch (dir) {
developer58aa0682023-09-18 14:02:26 +08004087@@ -495,11 +569,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004088 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
4089 hdr->source = new_port;
4090 break;
4091- default:
4092- return -1;
4093 }
4094
4095- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4096+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4097 }
4098 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
4099
developer58aa0682023-09-18 14:02:26 +08004100@@ -507,7 +579,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
developer8cb3ac72022-07-04 10:55:14 +08004101 {
4102 int err;
4103
4104- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4105+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4106+ flow_block_init(&flowtable->flow_block);
4107+ init_rwsem(&flowtable->flow_block_lock);
4108
4109 err = rhashtable_init(&flowtable->rhashtable,
4110 &nf_flow_offload_rhash_params);
developer58aa0682023-09-18 14:02:26 +08004111@@ -528,25 +602,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
developer8cb3ac72022-07-04 10:55:14 +08004112 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
4113 {
4114 struct net_device *dev = data;
4115- struct flow_offload_entry *e;
4116-
4117- e = container_of(flow, struct flow_offload_entry, flow);
4118
4119 if (!dev) {
4120 flow_offload_teardown(flow);
4121 return;
4122 }
4123- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
4124+
4125+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
4126 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
4127 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
4128- flow_offload_dead(flow);
4129+ flow_offload_teardown(flow);
4130 }
4131
4132-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
4133- struct net_device *dev)
4134+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
4135+ struct net_device *dev)
4136 {
4137 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
4138 flush_delayed_work(&flowtable->gc_work);
4139+ nf_flow_table_offload_flush(flowtable);
4140 }
4141
4142 void nf_flow_table_cleanup(struct net_device *dev)
developer58aa0682023-09-18 14:02:26 +08004143@@ -555,7 +628,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004144
4145 mutex_lock(&flowtable_lock);
4146 list_for_each_entry(flowtable, &flowtables, list)
4147- nf_flow_table_iterate_cleanup(flowtable, dev);
4148+ nf_flow_table_gc_cleanup(flowtable, dev);
4149 mutex_unlock(&flowtable_lock);
4150 }
4151 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
developer58aa0682023-09-18 14:02:26 +08004152@@ -565,9 +638,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
developer8cb3ac72022-07-04 10:55:14 +08004153 mutex_lock(&flowtable_lock);
4154 list_del(&flow_table->list);
4155 mutex_unlock(&flowtable_lock);
4156+
4157 cancel_delayed_work_sync(&flow_table->gc_work);
4158 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
4159 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
4160+ nf_flow_table_offload_flush(flow_table);
4161+ if (nf_flowtable_hw_offload(flow_table))
4162+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
4163+ flow_table);
4164 rhashtable_destroy(&flow_table->rhashtable);
4165 }
4166 EXPORT_SYMBOL_GPL(nf_flow_table_free);
developer58aa0682023-09-18 14:02:26 +08004167@@ -591,12 +669,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
developer8cb3ac72022-07-04 10:55:14 +08004168
4169 static int __init nf_flow_table_module_init(void)
4170 {
4171- return register_netdevice_notifier(&flow_offload_netdev_notifier);
4172+ int ret;
4173+
4174+ ret = nf_flow_table_offload_init();
4175+ if (ret)
4176+ return ret;
4177+
4178+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
4179+ if (ret)
4180+ nf_flow_table_offload_exit();
4181+
4182+ return ret;
4183 }
4184
4185 static void __exit nf_flow_table_module_exit(void)
4186 {
4187 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
4188+ nf_flow_table_offload_exit();
4189 }
4190
4191 module_init(nf_flow_table_module_init);
developer58aa0682023-09-18 14:02:26 +08004192@@ -604,3 +693,4 @@ module_exit(nf_flow_table_module_exit);
developer8cb3ac72022-07-04 10:55:14 +08004193
4194 MODULE_LICENSE("GPL");
4195 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
4196+MODULE_DESCRIPTION("Netfilter flow table module");
4197diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
developer58aa0682023-09-18 14:02:26 +08004198index 397129b..6257d87 100644
developer8cb3ac72022-07-04 10:55:14 +08004199--- a/net/netfilter/nf_flow_table_ip.c
4200+++ b/net/netfilter/nf_flow_table_ip.c
4201@@ -7,11 +7,13 @@
4202 #include <linux/ip.h>
4203 #include <linux/ipv6.h>
4204 #include <linux/netdevice.h>
4205+#include <linux/if_ether.h>
4206 #include <net/ip.h>
4207 #include <net/ipv6.h>
4208 #include <net/ip6_route.h>
4209 #include <net/neighbour.h>
4210 #include <net/netfilter/nf_flow_table.h>
4211+#include <net/netfilter/nf_conntrack_acct.h>
4212 /* For layer 4 checksum field offset. */
4213 #include <linux/tcp.h>
4214 #include <linux/udp.h>
4215@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4216 if (proto != IPPROTO_TCP)
4217 return 0;
4218
4219- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
4220- return -1;
4221-
4222 tcph = (void *)(skb_network_header(skb) + thoff);
4223 if (unlikely(tcph->fin || tcph->rst)) {
4224 flow_offload_teardown(flow);
4225@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4226 return 0;
4227 }
4228
4229-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4230- __be32 addr, __be32 new_addr)
4231+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4232+ __be32 addr, __be32 new_addr)
4233 {
4234 struct tcphdr *tcph;
4235
4236- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4237- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4238- return -1;
4239-
4240 tcph = (void *)(skb_network_header(skb) + thoff);
4241 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
4242-
4243- return 0;
4244 }
4245
4246-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4247- __be32 addr, __be32 new_addr)
4248+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4249+ __be32 addr, __be32 new_addr)
4250 {
4251 struct udphdr *udph;
4252
4253- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4254- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4255- return -1;
4256-
4257 udph = (void *)(skb_network_header(skb) + thoff);
4258 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4259 inet_proto_csum_replace4(&udph->check, skb, addr,
4260@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4261 if (!udph->check)
4262 udph->check = CSUM_MANGLED_0;
4263 }
4264-
4265- return 0;
4266 }
4267
4268-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4269- unsigned int thoff, __be32 addr,
4270- __be32 new_addr)
4271+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4272+ unsigned int thoff, __be32 addr,
4273+ __be32 new_addr)
4274 {
4275 switch (iph->protocol) {
4276 case IPPROTO_TCP:
4277- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
4278- return NF_DROP;
4279+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
4280 break;
4281 case IPPROTO_UDP:
4282- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
4283- return NF_DROP;
4284+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
4285 break;
4286 }
4287-
4288- return 0;
4289 }
4290
4291-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4292- struct iphdr *iph, unsigned int thoff,
4293- enum flow_offload_tuple_dir dir)
4294+static void nf_flow_snat_ip(const struct flow_offload *flow,
4295+ struct sk_buff *skb, struct iphdr *iph,
4296+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4297 {
4298 __be32 addr, new_addr;
4299
4300@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4301 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
4302 iph->daddr = new_addr;
4303 break;
4304- default:
4305- return -1;
4306 }
4307 csum_replace4(&iph->check, addr, new_addr);
4308
4309- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4310+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4311 }
4312
4313-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4314- struct iphdr *iph, unsigned int thoff,
4315- enum flow_offload_tuple_dir dir)
4316+static void nf_flow_dnat_ip(const struct flow_offload *flow,
4317+ struct sk_buff *skb, struct iphdr *iph,
4318+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4319 {
4320 __be32 addr, new_addr;
4321
4322@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4323 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
4324 iph->saddr = new_addr;
4325 break;
4326- default:
4327- return -1;
4328 }
4329 csum_replace4(&iph->check, addr, new_addr);
4330
4331- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4332+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4333 }
4334
4335-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4336- unsigned int thoff, enum flow_offload_tuple_dir dir)
4337+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4338+ unsigned int thoff, enum flow_offload_tuple_dir dir,
4339+ struct iphdr *iph)
4340 {
4341- struct iphdr *iph = ip_hdr(skb);
4342-
4343- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4344- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4345- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
4346- return -1;
4347- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4348- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4349- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
4350- return -1;
4351-
4352- return 0;
4353+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4354+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
4355+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
4356+ }
4357+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4358+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
4359+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
4360+ }
4361 }
4362
4363 static bool ip_has_options(unsigned int thoff)
4364@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
4365 return thoff != sizeof(struct iphdr);
4366 }
4367
4368+static void nf_flow_tuple_encap(struct sk_buff *skb,
4369+ struct flow_offload_tuple *tuple)
4370+{
4371+ struct vlan_ethhdr *veth;
4372+ struct pppoe_hdr *phdr;
4373+ int i = 0;
4374+
4375+ if (skb_vlan_tag_present(skb)) {
4376+ tuple->encap[i].id = skb_vlan_tag_get(skb);
4377+ tuple->encap[i].proto = skb->vlan_proto;
4378+ i++;
4379+ }
4380+ switch (skb->protocol) {
4381+ case htons(ETH_P_8021Q):
4382+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4383+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
4384+ tuple->encap[i].proto = skb->protocol;
4385+ break;
4386+ case htons(ETH_P_PPP_SES):
4387+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
4388+ tuple->encap[i].id = ntohs(phdr->sid);
4389+ tuple->encap[i].proto = skb->protocol;
4390+ break;
4391+ }
4392+}
4393+
4394 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4395- struct flow_offload_tuple *tuple)
4396+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4397+ u32 offset)
4398 {
4399 struct flow_ports *ports;
4400 unsigned int thoff;
4401 struct iphdr *iph;
4402
4403- if (!pskb_may_pull(skb, sizeof(*iph)))
4404+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
4405 return -1;
4406
4407- iph = ip_hdr(skb);
4408- thoff = iph->ihl * 4;
4409+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4410+ thoff = (iph->ihl * 4);
4411
4412 if (ip_is_fragment(iph) ||
4413 unlikely(ip_has_options(thoff)))
4414 return -1;
4415
4416- if (iph->protocol != IPPROTO_TCP &&
4417- iph->protocol != IPPROTO_UDP)
4418+ thoff += offset;
4419+
4420+ switch (iph->protocol) {
4421+ case IPPROTO_TCP:
4422+ *hdrsize = sizeof(struct tcphdr);
4423+ break;
4424+ case IPPROTO_UDP:
4425+ *hdrsize = sizeof(struct udphdr);
4426+ break;
4427+ default:
4428 return -1;
4429+ }
4430
4431 if (iph->ttl <= 1)
4432 return -1;
4433
4434- thoff = iph->ihl * 4;
4435- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4436+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4437 return -1;
4438
4439- iph = ip_hdr(skb);
4440+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4441 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4442
4443 tuple->src_v4.s_addr = iph->saddr;
4444@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4445 tuple->l3proto = AF_INET;
4446 tuple->l4proto = iph->protocol;
4447 tuple->iifidx = dev->ifindex;
4448+ nf_flow_tuple_encap(skb, tuple);
4449
4450 return 0;
4451 }
developeree39bcf2023-06-16 08:03:30 +08004452@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004453 return NF_STOLEN;
4454 }
4455
4456+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
4457+ u32 *offset)
4458+{
4459+ struct vlan_ethhdr *veth;
4460+
4461+ switch (skb->protocol) {
4462+ case htons(ETH_P_8021Q):
4463+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4464+ if (veth->h_vlan_encapsulated_proto == proto) {
4465+ *offset += VLAN_HLEN;
4466+ return true;
4467+ }
4468+ break;
4469+ case htons(ETH_P_PPP_SES):
4470+ if (nf_flow_pppoe_proto(skb) == proto) {
4471+ *offset += PPPOE_SES_HLEN;
4472+ return true;
4473+ }
4474+ break;
4475+ }
4476+
4477+ return false;
4478+}
4479+
4480+static void nf_flow_encap_pop(struct sk_buff *skb,
4481+ struct flow_offload_tuple_rhash *tuplehash)
4482+{
4483+ struct vlan_hdr *vlan_hdr;
4484+ int i;
4485+
4486+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
4487+ if (skb_vlan_tag_present(skb)) {
4488+ __vlan_hwaccel_clear_tag(skb);
4489+ continue;
4490+ }
4491+ switch (skb->protocol) {
4492+ case htons(ETH_P_8021Q):
4493+ vlan_hdr = (struct vlan_hdr *)skb->data;
4494+ __skb_pull(skb, VLAN_HLEN);
4495+ vlan_set_encap_proto(skb, vlan_hdr);
4496+ skb_reset_network_header(skb);
4497+ break;
4498+ case htons(ETH_P_PPP_SES):
4499+ skb->protocol = nf_flow_pppoe_proto(skb);
4500+ skb_pull(skb, PPPOE_SES_HLEN);
4501+ skb_reset_network_header(skb);
4502+ break;
4503+ }
4504+ }
4505+}
4506+
4507+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
4508+ const struct flow_offload_tuple_rhash *tuplehash,
4509+ unsigned short type)
4510+{
4511+ struct net_device *outdev;
4512+
4513+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
4514+ if (!outdev)
4515+ return NF_DROP;
4516+
4517+ skb->dev = outdev;
4518+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
4519+ tuplehash->tuple.out.h_source, skb->len);
4520+ dev_queue_xmit(skb);
4521+
4522+ return NF_STOLEN;
4523+}
4524+
4525 unsigned int
4526 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4527 const struct nf_hook_state *state)
developeree39bcf2023-06-16 08:03:30 +08004528@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004529 enum flow_offload_tuple_dir dir;
4530 struct flow_offload *flow;
4531 struct net_device *outdev;
4532+ u32 hdrsize, offset = 0;
4533+ unsigned int thoff, mtu;
4534 struct rtable *rt;
4535- unsigned int thoff;
4536 struct iphdr *iph;
4537 __be32 nexthop;
4538+ int ret;
4539
4540- if (skb->protocol != htons(ETH_P_IP))
4541+ if (skb->protocol != htons(ETH_P_IP) &&
4542+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
4543 return NF_ACCEPT;
4544
4545- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
4546+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
4547 return NF_ACCEPT;
4548
4549 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004550@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004551
4552 dir = tuplehash->tuple.dir;
4553 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4554- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
4555- outdev = rt->dst.dev;
developeree39bcf2023-06-16 08:03:30 +08004556-
developer8cb3ac72022-07-04 10:55:14 +08004557- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developeree39bcf2023-06-16 08:03:30 +08004558- return NF_ACCEPT;
developerb7c46752022-07-04 19:51:38 +08004559
developer8cb3ac72022-07-04 10:55:14 +08004560- if (skb_try_make_writable(skb, sizeof(*iph)))
4561- return NF_DROP;
developerb7c46752022-07-04 19:51:38 +08004562-
developer8cb3ac72022-07-04 10:55:14 +08004563- thoff = ip_hdr(skb)->ihl * 4;
4564- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
developeree39bcf2023-06-16 08:03:30 +08004565+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4566+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004567 return NF_ACCEPT;
developer7eb15dc2023-06-14 17:44:03 +08004568
4569- if (!dst_check(&rt->dst, 0)) {
developeree39bcf2023-06-16 08:03:30 +08004570- flow_offload_teardown(flow);
4571+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4572+ thoff = (iph->ihl * 4) + offset;
4573+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
developer7eb15dc2023-06-14 17:44:03 +08004574 return NF_ACCEPT;
developeree39bcf2023-06-16 08:03:30 +08004575- }
developer8cb3ac72022-07-04 10:55:14 +08004576
4577- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
4578+ if (skb_try_make_writable(skb, thoff + hdrsize))
4579 return NF_DROP;
4580
4581- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4582+ flow_offload_refresh(flow_table, flow);
4583+
4584+ nf_flow_encap_pop(skb, tuplehash);
4585+ thoff -= offset;
4586+
4587 iph = ip_hdr(skb);
4588+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
4589+
4590 ip_decrease_ttl(iph);
4591 skb->tstamp = 0;
4592
4593- if (unlikely(dst_xfrm(&rt->dst))) {
4594+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4595+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4596+
4597+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4598+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4599 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
4600 IPCB(skb)->iif = skb->dev->ifindex;
4601 IPCB(skb)->flags = IPSKB_FORWARDED;
4602 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4603 }
4604
4605- skb->dev = outdev;
4606- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4607- skb_dst_set_noref(skb, &rt->dst);
4608- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4609+ switch (tuplehash->tuple.xmit_type) {
4610+ case FLOW_OFFLOAD_XMIT_NEIGH:
4611+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4612+ outdev = rt->dst.dev;
4613+ skb->dev = outdev;
4614+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4615+ skb_dst_set_noref(skb, &rt->dst);
4616+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4617+ ret = NF_STOLEN;
4618+ break;
4619+ case FLOW_OFFLOAD_XMIT_DIRECT:
4620+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
4621+ if (ret == NF_DROP)
4622+ flow_offload_teardown(flow);
4623+ break;
4624+ }
4625
4626- return NF_STOLEN;
4627+ return ret;
4628 }
4629 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
4630
4631-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4632- struct in6_addr *addr,
4633- struct in6_addr *new_addr)
4634+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4635+ struct in6_addr *addr,
4636+ struct in6_addr *new_addr,
4637+ struct ipv6hdr *ip6h)
4638 {
4639 struct tcphdr *tcph;
4640
4641- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4642- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4643- return -1;
4644-
4645 tcph = (void *)(skb_network_header(skb) + thoff);
4646 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
4647 new_addr->s6_addr32, true);
4648-
4649- return 0;
4650 }
4651
4652-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4653- struct in6_addr *addr,
4654- struct in6_addr *new_addr)
4655+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4656+ struct in6_addr *addr,
4657+ struct in6_addr *new_addr)
4658 {
4659 struct udphdr *udph;
4660
4661- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4662- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4663- return -1;
4664-
4665 udph = (void *)(skb_network_header(skb) + thoff);
4666 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4667 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
developeree39bcf2023-06-16 08:03:30 +08004668@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004669 if (!udph->check)
4670 udph->check = CSUM_MANGLED_0;
4671 }
4672-
4673- return 0;
4674 }
4675
4676-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4677- unsigned int thoff, struct in6_addr *addr,
4678- struct in6_addr *new_addr)
4679+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4680+ unsigned int thoff, struct in6_addr *addr,
4681+ struct in6_addr *new_addr)
4682 {
4683 switch (ip6h->nexthdr) {
4684 case IPPROTO_TCP:
4685- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
4686- return NF_DROP;
4687+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
4688 break;
4689 case IPPROTO_UDP:
4690- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
4691- return NF_DROP;
4692+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
4693 break;
4694 }
4695-
4696- return 0;
4697 }
4698
4699-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4700- struct sk_buff *skb, struct ipv6hdr *ip6h,
4701- unsigned int thoff,
4702- enum flow_offload_tuple_dir dir)
4703+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
4704+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4705+ unsigned int thoff,
4706+ enum flow_offload_tuple_dir dir)
4707 {
4708 struct in6_addr addr, new_addr;
4709
developeree39bcf2023-06-16 08:03:30 +08004710@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004711 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
4712 ip6h->daddr = new_addr;
4713 break;
4714- default:
4715- return -1;
4716 }
4717
4718- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4719+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4720 }
4721
4722-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4723- struct sk_buff *skb, struct ipv6hdr *ip6h,
4724- unsigned int thoff,
4725- enum flow_offload_tuple_dir dir)
4726+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
4727+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4728+ unsigned int thoff,
4729+ enum flow_offload_tuple_dir dir)
4730 {
4731 struct in6_addr addr, new_addr;
4732
developeree39bcf2023-06-16 08:03:30 +08004733@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004734 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
4735 ip6h->saddr = new_addr;
4736 break;
4737- default:
4738- return -1;
4739 }
4740
4741- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4742+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4743 }
4744
4745-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
4746- struct sk_buff *skb,
4747- enum flow_offload_tuple_dir dir)
4748+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
4749+ struct sk_buff *skb,
4750+ enum flow_offload_tuple_dir dir,
4751+ struct ipv6hdr *ip6h)
4752 {
4753- struct ipv6hdr *ip6h = ipv6_hdr(skb);
4754 unsigned int thoff = sizeof(*ip6h);
4755
4756- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4757- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4758- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4759- return -1;
4760- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4761- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4762- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4763- return -1;
4764-
4765- return 0;
4766+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4767+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4768+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
4769+ }
4770+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4771+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4772+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
4773+ }
4774 }
4775
4776 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4777- struct flow_offload_tuple *tuple)
4778+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4779+ u32 offset)
4780 {
4781 struct flow_ports *ports;
4782 struct ipv6hdr *ip6h;
4783 unsigned int thoff;
4784
4785- if (!pskb_may_pull(skb, sizeof(*ip6h)))
4786+ thoff = sizeof(*ip6h) + offset;
4787+ if (!pskb_may_pull(skb, thoff))
4788 return -1;
4789
4790- ip6h = ipv6_hdr(skb);
4791+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4792
4793- if (ip6h->nexthdr != IPPROTO_TCP &&
4794- ip6h->nexthdr != IPPROTO_UDP)
4795+ switch (ip6h->nexthdr) {
4796+ case IPPROTO_TCP:
4797+ *hdrsize = sizeof(struct tcphdr);
4798+ break;
4799+ case IPPROTO_UDP:
4800+ *hdrsize = sizeof(struct udphdr);
4801+ break;
4802+ default:
4803 return -1;
4804+ }
4805
4806 if (ip6h->hop_limit <= 1)
4807 return -1;
4808
4809- thoff = sizeof(*ip6h);
4810- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4811+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4812 return -1;
4813
4814- ip6h = ipv6_hdr(skb);
4815+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4816 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4817
4818 tuple->src_v6 = ip6h->saddr;
developeree39bcf2023-06-16 08:03:30 +08004819@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08004820 tuple->l3proto = AF_INET6;
4821 tuple->l4proto = ip6h->nexthdr;
4822 tuple->iifidx = dev->ifindex;
4823+ nf_flow_tuple_encap(skb, tuple);
4824
4825 return 0;
4826 }
developeree39bcf2023-06-16 08:03:30 +08004827@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004828 const struct in6_addr *nexthop;
4829 struct flow_offload *flow;
4830 struct net_device *outdev;
4831+ unsigned int thoff, mtu;
4832+ u32 hdrsize, offset = 0;
4833 struct ipv6hdr *ip6h;
4834 struct rt6_info *rt;
4835+ int ret;
4836
4837- if (skb->protocol != htons(ETH_P_IPV6))
4838+ if (skb->protocol != htons(ETH_P_IPV6) &&
4839+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
4840 return NF_ACCEPT;
4841
4842- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
4843+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
4844 return NF_ACCEPT;
4845
4846 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004847@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004848
4849 dir = tuplehash->tuple.dir;
4850 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4851- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
4852- outdev = rt->dst.dev;
developer8cb3ac72022-07-04 10:55:14 +08004853
developerb7c46752022-07-04 19:51:38 +08004854- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004855+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4856+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4857 return NF_ACCEPT;
4858
developerb7c46752022-07-04 19:51:38 +08004859- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
4860- sizeof(*ip6h)))
developer8cb3ac72022-07-04 10:55:14 +08004861+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4862+ thoff = sizeof(*ip6h) + offset;
4863+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
4864 return NF_ACCEPT;
developer8cb3ac72022-07-04 10:55:14 +08004865
developerb7c46752022-07-04 19:51:38 +08004866- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
developeree39bcf2023-06-16 08:03:30 +08004867- flow_offload_teardown(flow);
4868- return NF_ACCEPT;
4869- }
4870-
developer8cb3ac72022-07-04 10:55:14 +08004871- if (skb_try_make_writable(skb, sizeof(*ip6h)))
4872+ if (skb_try_make_writable(skb, thoff + hdrsize))
4873 return NF_DROP;
4874
4875- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
4876- return NF_DROP;
4877+ flow_offload_refresh(flow_table, flow);
4878+
4879+ nf_flow_encap_pop(skb, tuplehash);
4880
4881- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4882 ip6h = ipv6_hdr(skb);
4883+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
4884+
4885 ip6h->hop_limit--;
4886 skb->tstamp = 0;
4887
4888- if (unlikely(dst_xfrm(&rt->dst))) {
4889+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4890+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4891+
4892+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4893+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4894 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4895 IP6CB(skb)->iif = skb->dev->ifindex;
4896 IP6CB(skb)->flags = IP6SKB_FORWARDED;
4897 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4898 }
4899
4900- skb->dev = outdev;
4901- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4902- skb_dst_set_noref(skb, &rt->dst);
4903- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4904+ switch (tuplehash->tuple.xmit_type) {
4905+ case FLOW_OFFLOAD_XMIT_NEIGH:
4906+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4907+ outdev = rt->dst.dev;
4908+ skb->dev = outdev;
4909+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4910+ skb_dst_set_noref(skb, &rt->dst);
4911+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4912+ ret = NF_STOLEN;
4913+ break;
4914+ case FLOW_OFFLOAD_XMIT_DIRECT:
4915+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
4916+ if (ret == NF_DROP)
4917+ flow_offload_teardown(flow);
4918+ break;
4919+ }
4920
4921- return NF_STOLEN;
4922+ return ret;
4923 }
4924 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
4925diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
4926new file mode 100644
developer58aa0682023-09-18 14:02:26 +08004927index 0000000..50f2f2e
developer8cb3ac72022-07-04 10:55:14 +08004928--- /dev/null
4929+++ b/net/netfilter/nf_flow_table_offload.c
developeree39bcf2023-06-16 08:03:30 +08004930@@ -0,0 +1,1199 @@
developer8cb3ac72022-07-04 10:55:14 +08004931+#include <linux/kernel.h>
4932+#include <linux/init.h>
4933+#include <linux/module.h>
4934+#include <linux/netfilter.h>
4935+#include <linux/rhashtable.h>
4936+#include <linux/netdevice.h>
4937+#include <linux/tc_act/tc_csum.h>
4938+#include <net/flow_offload.h>
4939+#include <net/netfilter/nf_flow_table.h>
4940+#include <net/netfilter/nf_tables.h>
4941+#include <net/netfilter/nf_conntrack.h>
4942+#include <net/netfilter/nf_conntrack_acct.h>
4943+#include <net/netfilter/nf_conntrack_core.h>
4944+#include <net/netfilter/nf_conntrack_tuple.h>
4945+
4946+static struct workqueue_struct *nf_flow_offload_add_wq;
4947+static struct workqueue_struct *nf_flow_offload_del_wq;
4948+static struct workqueue_struct *nf_flow_offload_stats_wq;
4949+
4950+struct flow_offload_work {
4951+ struct list_head list;
4952+ enum flow_cls_command cmd;
4953+ int priority;
4954+ struct nf_flowtable *flowtable;
4955+ struct flow_offload *flow;
4956+ struct work_struct work;
4957+};
4958+
4959+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
4960+ (__match)->dissector.offset[__type] = \
4961+ offsetof(struct nf_flow_key, __field)
4962+
4963+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
4964+ struct ip_tunnel_info *tun_info)
4965+{
4966+ struct nf_flow_key *mask = &match->mask;
4967+ struct nf_flow_key *key = &match->key;
4968+ unsigned int enc_keys;
4969+
4970+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
4971+ return;
4972+
4973+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
4974+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
4975+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
4976+ mask->enc_key_id.keyid = 0xffffffff;
4977+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
4978+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
4979+
4980+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
4981+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
4982+ enc_ipv4);
4983+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
4984+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
4985+ if (key->enc_ipv4.src)
4986+ mask->enc_ipv4.src = 0xffffffff;
4987+ if (key->enc_ipv4.dst)
4988+ mask->enc_ipv4.dst = 0xffffffff;
4989+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
4990+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
4991+ } else {
4992+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
4993+ sizeof(struct in6_addr));
4994+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
4995+ sizeof(struct in6_addr));
4996+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
4997+ sizeof(struct in6_addr)))
4998+ memset(&mask->enc_ipv6.src, 0xff,
4999+ sizeof(struct in6_addr));
5000+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
5001+ sizeof(struct in6_addr)))
5002+ memset(&mask->enc_ipv6.dst, 0xff,
5003+ sizeof(struct in6_addr));
5004+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
5005+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5006+ }
5007+
5008+ match->dissector.used_keys |= enc_keys;
5009+}
5010+
5011+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
5012+ struct flow_dissector_key_vlan *mask,
5013+ u16 vlan_id, __be16 proto)
5014+{
5015+ key->vlan_id = vlan_id;
5016+ mask->vlan_id = VLAN_VID_MASK;
5017+ key->vlan_tpid = proto;
5018+ mask->vlan_tpid = 0xffff;
5019+}
5020+
5021+static int nf_flow_rule_match(struct nf_flow_match *match,
5022+ const struct flow_offload_tuple *tuple,
5023+ struct dst_entry *other_dst)
5024+{
5025+ struct nf_flow_key *mask = &match->mask;
5026+ struct nf_flow_key *key = &match->key;
5027+ struct ip_tunnel_info *tun_info;
5028+ bool vlan_encap = false;
5029+
5030+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
5031+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
5032+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
5033+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
5034+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
5035+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
5036+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
5037+
5038+ if (other_dst && other_dst->lwtstate) {
5039+ tun_info = lwt_tun_info(other_dst->lwtstate);
5040+ nf_flow_rule_lwt_match(match, tun_info);
5041+ }
5042+
5043+ key->meta.ingress_ifindex = tuple->iifidx;
5044+ mask->meta.ingress_ifindex = 0xffffffff;
5045+
5046+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
5047+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
5048+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
5049+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5050+ tuple->encap[0].id,
5051+ tuple->encap[0].proto);
5052+ vlan_encap = true;
5053+ }
5054+
5055+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
5056+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
5057+ if (vlan_encap) {
5058+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
5059+ cvlan);
5060+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
5061+ tuple->encap[1].id,
5062+ tuple->encap[1].proto);
5063+ } else {
5064+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
5065+ vlan);
5066+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5067+ tuple->encap[1].id,
5068+ tuple->encap[1].proto);
5069+ }
5070+ }
5071+
5072+ switch (tuple->l3proto) {
5073+ case AF_INET:
5074+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5075+ key->basic.n_proto = htons(ETH_P_IP);
5076+ key->ipv4.src = tuple->src_v4.s_addr;
5077+ mask->ipv4.src = 0xffffffff;
5078+ key->ipv4.dst = tuple->dst_v4.s_addr;
5079+ mask->ipv4.dst = 0xffffffff;
5080+ break;
5081+ case AF_INET6:
5082+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5083+ key->basic.n_proto = htons(ETH_P_IPV6);
5084+ key->ipv6.src = tuple->src_v6;
5085+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
5086+ key->ipv6.dst = tuple->dst_v6;
5087+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
5088+ break;
5089+ default:
5090+ return -EOPNOTSUPP;
5091+ }
5092+ mask->control.addr_type = 0xffff;
5093+ match->dissector.used_keys |= BIT(key->control.addr_type);
5094+ mask->basic.n_proto = 0xffff;
5095+
5096+ switch (tuple->l4proto) {
5097+ case IPPROTO_TCP:
5098+ key->tcp.flags = 0;
5099+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
5100+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
5101+ break;
5102+ case IPPROTO_UDP:
5103+ break;
5104+ default:
5105+ return -EOPNOTSUPP;
5106+ }
5107+
5108+ key->basic.ip_proto = tuple->l4proto;
5109+ mask->basic.ip_proto = 0xff;
5110+
5111+ key->tp.src = tuple->src_port;
5112+ mask->tp.src = 0xffff;
5113+ key->tp.dst = tuple->dst_port;
5114+ mask->tp.dst = 0xffff;
5115+
5116+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
5117+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
5118+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
5119+ BIT(FLOW_DISSECTOR_KEY_PORTS);
5120+ return 0;
5121+}
5122+
5123+static void flow_offload_mangle(struct flow_action_entry *entry,
5124+ enum flow_action_mangle_base htype, u32 offset,
5125+ const __be32 *value, const __be32 *mask)
5126+{
5127+ entry->id = FLOW_ACTION_MANGLE;
5128+ entry->mangle.htype = htype;
5129+ entry->mangle.offset = offset;
5130+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
5131+ memcpy(&entry->mangle.val, value, sizeof(u32));
5132+}
5133+
5134+static inline struct flow_action_entry *
5135+flow_action_entry_next(struct nf_flow_rule *flow_rule)
5136+{
5137+ int i = flow_rule->rule->action.num_entries++;
5138+
5139+ return &flow_rule->rule->action.entries[i];
5140+}
5141+
5142+static int flow_offload_eth_src(struct net *net,
5143+ const struct flow_offload *flow,
5144+ enum flow_offload_tuple_dir dir,
5145+ struct nf_flow_rule *flow_rule)
5146+{
5147+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5148+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5149+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5150+ struct net_device *dev = NULL;
5151+ const unsigned char *addr;
5152+ u32 mask, val;
5153+ u16 val16;
5154+
5155+ this_tuple = &flow->tuplehash[dir].tuple;
5156+
5157+ switch (this_tuple->xmit_type) {
5158+ case FLOW_OFFLOAD_XMIT_DIRECT:
5159+ addr = this_tuple->out.h_source;
5160+ break;
5161+ case FLOW_OFFLOAD_XMIT_NEIGH:
5162+ other_tuple = &flow->tuplehash[!dir].tuple;
5163+ dev = dev_get_by_index(net, other_tuple->iifidx);
5164+ if (!dev)
5165+ return -ENOENT;
5166+
5167+ addr = dev->dev_addr;
5168+ break;
5169+ default:
5170+ return -EOPNOTSUPP;
5171+ }
5172+
5173+ mask = ~0xffff0000;
5174+ memcpy(&val16, addr, 2);
5175+ val = val16 << 16;
5176+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5177+ &val, &mask);
5178+
5179+ mask = ~0xffffffff;
5180+ memcpy(&val, addr + 2, 4);
5181+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
5182+ &val, &mask);
5183+
developeree39bcf2023-06-16 08:03:30 +08005184+ if (dev)
5185+ dev_put(dev);
developer8cb3ac72022-07-04 10:55:14 +08005186+
5187+ return 0;
5188+}
5189+
5190+static int flow_offload_eth_dst(struct net *net,
5191+ const struct flow_offload *flow,
5192+ enum flow_offload_tuple_dir dir,
5193+ struct nf_flow_rule *flow_rule)
5194+{
5195+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5196+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5197+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5198+ const struct dst_entry *dst_cache;
5199+ unsigned char ha[ETH_ALEN];
5200+ struct neighbour *n;
5201+ const void *daddr;
5202+ u32 mask, val;
5203+ u8 nud_state;
5204+ u16 val16;
5205+
5206+ this_tuple = &flow->tuplehash[dir].tuple;
5207+
5208+ switch (this_tuple->xmit_type) {
5209+ case FLOW_OFFLOAD_XMIT_DIRECT:
5210+ ether_addr_copy(ha, this_tuple->out.h_dest);
5211+ break;
5212+ case FLOW_OFFLOAD_XMIT_NEIGH:
5213+ other_tuple = &flow->tuplehash[!dir].tuple;
5214+ daddr = &other_tuple->src_v4;
5215+ dst_cache = this_tuple->dst_cache;
5216+ n = dst_neigh_lookup(dst_cache, daddr);
5217+ if (!n)
5218+ return -ENOENT;
5219+
5220+ read_lock_bh(&n->lock);
5221+ nud_state = n->nud_state;
5222+ ether_addr_copy(ha, n->ha);
5223+ read_unlock_bh(&n->lock);
5224+ neigh_release(n);
5225+
5226+ if (!(nud_state & NUD_VALID))
5227+ return -ENOENT;
5228+ break;
5229+ default:
5230+ return -EOPNOTSUPP;
5231+ }
5232+
5233+ mask = ~0xffffffff;
5234+ memcpy(&val, ha, 4);
5235+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
5236+ &val, &mask);
5237+
5238+ mask = ~0x0000ffff;
5239+ memcpy(&val16, ha + 4, 2);
5240+ val = val16;
5241+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5242+ &val, &mask);
5243+
5244+ return 0;
5245+}
5246+
5247+static void flow_offload_ipv4_snat(struct net *net,
5248+ const struct flow_offload *flow,
5249+ enum flow_offload_tuple_dir dir,
5250+ struct nf_flow_rule *flow_rule)
5251+{
5252+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5253+ u32 mask = ~htonl(0xffffffff);
5254+ __be32 addr;
5255+ u32 offset;
5256+
5257+ switch (dir) {
5258+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5259+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
5260+ offset = offsetof(struct iphdr, saddr);
5261+ break;
5262+ case FLOW_OFFLOAD_DIR_REPLY:
5263+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5264+ offset = offsetof(struct iphdr, daddr);
5265+ break;
5266+ default:
5267+ return;
5268+ }
5269+
5270+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5271+ &addr, &mask);
5272+}
5273+
5274+static void flow_offload_ipv4_dnat(struct net *net,
5275+ const struct flow_offload *flow,
5276+ enum flow_offload_tuple_dir dir,
5277+ struct nf_flow_rule *flow_rule)
5278+{
5279+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5280+ u32 mask = ~htonl(0xffffffff);
5281+ __be32 addr;
5282+ u32 offset;
5283+
5284+ switch (dir) {
5285+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5286+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
5287+ offset = offsetof(struct iphdr, daddr);
5288+ break;
5289+ case FLOW_OFFLOAD_DIR_REPLY:
5290+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5291+ offset = offsetof(struct iphdr, saddr);
5292+ break;
5293+ default:
5294+ return;
5295+ }
5296+
5297+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5298+ &addr, &mask);
5299+}
5300+
5301+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
5302+ unsigned int offset,
5303+ const __be32 *addr, const __be32 *mask)
5304+{
5305+ struct flow_action_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08005306+ int i, j;
developer8cb3ac72022-07-04 10:55:14 +08005307+
developeree39bcf2023-06-16 08:03:30 +08005308+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
developer8cb3ac72022-07-04 10:55:14 +08005309+ entry = flow_action_entry_next(flow_rule);
5310+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
developeree39bcf2023-06-16 08:03:30 +08005311+ offset + i, &addr[j], mask);
developer8cb3ac72022-07-04 10:55:14 +08005312+ }
5313+}
5314+
5315+static void flow_offload_ipv6_snat(struct net *net,
5316+ const struct flow_offload *flow,
5317+ enum flow_offload_tuple_dir dir,
5318+ struct nf_flow_rule *flow_rule)
5319+{
5320+ u32 mask = ~htonl(0xffffffff);
5321+ const __be32 *addr;
5322+ u32 offset;
5323+
5324+ switch (dir) {
5325+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5326+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
5327+ offset = offsetof(struct ipv6hdr, saddr);
5328+ break;
5329+ case FLOW_OFFLOAD_DIR_REPLY:
5330+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
5331+ offset = offsetof(struct ipv6hdr, daddr);
5332+ break;
5333+ default:
5334+ return;
5335+ }
5336+
5337+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5338+}
5339+
5340+static void flow_offload_ipv6_dnat(struct net *net,
5341+ const struct flow_offload *flow,
5342+ enum flow_offload_tuple_dir dir,
5343+ struct nf_flow_rule *flow_rule)
5344+{
5345+ u32 mask = ~htonl(0xffffffff);
5346+ const __be32 *addr;
5347+ u32 offset;
5348+
5349+ switch (dir) {
5350+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5351+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
5352+ offset = offsetof(struct ipv6hdr, daddr);
5353+ break;
5354+ case FLOW_OFFLOAD_DIR_REPLY:
5355+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
5356+ offset = offsetof(struct ipv6hdr, saddr);
5357+ break;
5358+ default:
5359+ return;
5360+ }
5361+
5362+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5363+}
5364+
5365+static int flow_offload_l4proto(const struct flow_offload *flow)
5366+{
5367+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5368+ u8 type = 0;
5369+
5370+ switch (protonum) {
5371+ case IPPROTO_TCP:
5372+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
5373+ break;
5374+ case IPPROTO_UDP:
5375+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
5376+ break;
5377+ default:
5378+ break;
5379+ }
5380+
5381+ return type;
5382+}
5383+
5384+static void flow_offload_port_snat(struct net *net,
5385+ const struct flow_offload *flow,
5386+ enum flow_offload_tuple_dir dir,
5387+ struct nf_flow_rule *flow_rule)
5388+{
5389+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5390+ u32 mask, port;
5391+ u32 offset;
5392+
5393+ switch (dir) {
5394+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5395+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
5396+ offset = 0; /* offsetof(struct tcphdr, source); */
5397+ port = htonl(port << 16);
5398+ mask = ~htonl(0xffff0000);
5399+ break;
5400+ case FLOW_OFFLOAD_DIR_REPLY:
5401+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
5402+ offset = 0; /* offsetof(struct tcphdr, dest); */
5403+ port = htonl(port);
5404+ mask = ~htonl(0xffff);
5405+ break;
5406+ default:
5407+ return;
5408+ }
5409+
5410+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5411+ &port, &mask);
5412+}
5413+
5414+static void flow_offload_port_dnat(struct net *net,
5415+ const struct flow_offload *flow,
5416+ enum flow_offload_tuple_dir dir,
5417+ struct nf_flow_rule *flow_rule)
5418+{
5419+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5420+ u32 mask, port;
5421+ u32 offset;
5422+
5423+ switch (dir) {
5424+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5425+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
5426+ offset = 0; /* offsetof(struct tcphdr, dest); */
5427+ port = htonl(port);
5428+ mask = ~htonl(0xffff);
5429+ break;
5430+ case FLOW_OFFLOAD_DIR_REPLY:
5431+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
5432+ offset = 0; /* offsetof(struct tcphdr, source); */
5433+ port = htonl(port << 16);
5434+ mask = ~htonl(0xffff0000);
5435+ break;
5436+ default:
5437+ return;
5438+ }
5439+
5440+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5441+ &port, &mask);
5442+}
5443+
5444+static void flow_offload_ipv4_checksum(struct net *net,
5445+ const struct flow_offload *flow,
5446+ struct nf_flow_rule *flow_rule)
5447+{
5448+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5449+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5450+
5451+ entry->id = FLOW_ACTION_CSUM;
5452+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
5453+
5454+ switch (protonum) {
5455+ case IPPROTO_TCP:
5456+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
5457+ break;
5458+ case IPPROTO_UDP:
5459+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
5460+ break;
5461+ }
5462+}
5463+
5464+static void flow_offload_redirect(struct net *net,
5465+ const struct flow_offload *flow,
5466+ enum flow_offload_tuple_dir dir,
5467+ struct nf_flow_rule *flow_rule)
5468+{
5469+ const struct flow_offload_tuple *this_tuple, *other_tuple;
5470+ struct flow_action_entry *entry;
5471+ struct net_device *dev;
5472+ int ifindex;
5473+
5474+ this_tuple = &flow->tuplehash[dir].tuple;
5475+ switch (this_tuple->xmit_type) {
5476+ case FLOW_OFFLOAD_XMIT_DIRECT:
5477+ this_tuple = &flow->tuplehash[dir].tuple;
5478+ ifindex = this_tuple->out.hw_ifidx;
5479+ break;
5480+ case FLOW_OFFLOAD_XMIT_NEIGH:
5481+ other_tuple = &flow->tuplehash[!dir].tuple;
5482+ ifindex = other_tuple->iifidx;
5483+ break;
5484+ default:
5485+ return;
5486+ }
5487+
5488+ dev = dev_get_by_index(net, ifindex);
5489+ if (!dev)
5490+ return;
5491+
5492+ entry = flow_action_entry_next(flow_rule);
5493+ entry->id = FLOW_ACTION_REDIRECT;
5494+ entry->dev = dev;
5495+}
5496+
5497+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
5498+ enum flow_offload_tuple_dir dir,
5499+ struct nf_flow_rule *flow_rule)
5500+{
5501+ const struct flow_offload_tuple *this_tuple;
5502+ struct flow_action_entry *entry;
5503+ struct dst_entry *dst;
5504+
5505+ this_tuple = &flow->tuplehash[dir].tuple;
5506+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5507+ return;
5508+
5509+ dst = this_tuple->dst_cache;
5510+ if (dst && dst->lwtstate) {
5511+ struct ip_tunnel_info *tun_info;
5512+
5513+ tun_info = lwt_tun_info(dst->lwtstate);
5514+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5515+ entry = flow_action_entry_next(flow_rule);
5516+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
5517+ entry->tunnel = tun_info;
5518+ }
5519+ }
5520+}
5521+
5522+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
5523+ enum flow_offload_tuple_dir dir,
5524+ struct nf_flow_rule *flow_rule)
5525+{
5526+ const struct flow_offload_tuple *other_tuple;
5527+ struct flow_action_entry *entry;
5528+ struct dst_entry *dst;
5529+
5530+ other_tuple = &flow->tuplehash[!dir].tuple;
5531+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5532+ return;
5533+
5534+ dst = other_tuple->dst_cache;
5535+ if (dst && dst->lwtstate) {
5536+ struct ip_tunnel_info *tun_info;
5537+
5538+ tun_info = lwt_tun_info(dst->lwtstate);
5539+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5540+ entry = flow_action_entry_next(flow_rule);
5541+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
5542+ }
5543+ }
5544+}
5545+
5546+static int
5547+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
5548+ enum flow_offload_tuple_dir dir,
5549+ struct nf_flow_rule *flow_rule)
5550+{
5551+ const struct flow_offload_tuple *other_tuple;
5552+ const struct flow_offload_tuple *tuple;
5553+ int i;
5554+
5555+ flow_offload_decap_tunnel(flow, dir, flow_rule);
5556+ flow_offload_encap_tunnel(flow, dir, flow_rule);
5557+
5558+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
5559+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
5560+ return -1;
5561+
5562+ tuple = &flow->tuplehash[dir].tuple;
5563+
5564+ for (i = 0; i < tuple->encap_num; i++) {
5565+ struct flow_action_entry *entry;
5566+
5567+ if (tuple->in_vlan_ingress & BIT(i))
5568+ continue;
5569+
5570+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
5571+ entry = flow_action_entry_next(flow_rule);
5572+ entry->id = FLOW_ACTION_VLAN_POP;
5573+ }
5574+ }
5575+
5576+ other_tuple = &flow->tuplehash[!dir].tuple;
5577+
5578+ for (i = 0; i < other_tuple->encap_num; i++) {
5579+ struct flow_action_entry *entry;
5580+
5581+ if (other_tuple->in_vlan_ingress & BIT(i))
5582+ continue;
5583+
5584+ entry = flow_action_entry_next(flow_rule);
5585+
5586+ switch (other_tuple->encap[i].proto) {
5587+ case htons(ETH_P_PPP_SES):
5588+ entry->id = FLOW_ACTION_PPPOE_PUSH;
5589+ entry->pppoe.sid = other_tuple->encap[i].id;
5590+ break;
5591+ case htons(ETH_P_8021Q):
5592+ entry->id = FLOW_ACTION_VLAN_PUSH;
5593+ entry->vlan.vid = other_tuple->encap[i].id;
5594+ entry->vlan.proto = other_tuple->encap[i].proto;
5595+ break;
5596+ }
5597+ }
5598+
5599+ return 0;
5600+}
5601+
5602+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
5603+ enum flow_offload_tuple_dir dir,
5604+ struct nf_flow_rule *flow_rule)
5605+{
5606+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5607+ return -1;
5608+
5609+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5610+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
5611+ flow_offload_port_snat(net, flow, dir, flow_rule);
5612+ }
5613+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5614+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
5615+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5616+ }
5617+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
5618+ test_bit(NF_FLOW_DNAT, &flow->flags))
5619+ flow_offload_ipv4_checksum(net, flow, flow_rule);
5620+
5621+ flow_offload_redirect(net, flow, dir, flow_rule);
5622+
5623+ return 0;
5624+}
5625+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
5626+
5627+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
5628+ enum flow_offload_tuple_dir dir,
5629+ struct nf_flow_rule *flow_rule)
5630+{
5631+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5632+ return -1;
5633+
5634+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5635+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
5636+ flow_offload_port_snat(net, flow, dir, flow_rule);
5637+ }
5638+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5639+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
5640+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5641+ }
5642+
5643+ flow_offload_redirect(net, flow, dir, flow_rule);
5644+
5645+ return 0;
5646+}
5647+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
5648+
5649+#define NF_FLOW_RULE_ACTION_MAX 16
5650+
5651+static struct nf_flow_rule *
5652+nf_flow_offload_rule_alloc(struct net *net,
5653+ const struct flow_offload_work *offload,
5654+ enum flow_offload_tuple_dir dir)
5655+{
5656+ const struct nf_flowtable *flowtable = offload->flowtable;
5657+ const struct flow_offload_tuple *tuple, *other_tuple;
5658+ const struct flow_offload *flow = offload->flow;
5659+ struct dst_entry *other_dst = NULL;
5660+ struct nf_flow_rule *flow_rule;
5661+ int err = -ENOMEM;
5662+
5663+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
5664+ if (!flow_rule)
5665+ goto err_flow;
5666+
5667+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
5668+ if (!flow_rule->rule)
5669+ goto err_flow_rule;
5670+
5671+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
5672+ flow_rule->rule->match.mask = &flow_rule->match.mask;
5673+ flow_rule->rule->match.key = &flow_rule->match.key;
5674+
5675+ tuple = &flow->tuplehash[dir].tuple;
5676+ other_tuple = &flow->tuplehash[!dir].tuple;
5677+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
5678+ other_dst = other_tuple->dst_cache;
5679+
5680+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
5681+ if (err < 0)
5682+ goto err_flow_match;
5683+
5684+ flow_rule->rule->action.num_entries = 0;
5685+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
5686+ goto err_flow_match;
5687+
5688+ return flow_rule;
5689+
5690+err_flow_match:
5691+ kfree(flow_rule->rule);
5692+err_flow_rule:
5693+ kfree(flow_rule);
5694+err_flow:
5695+ return NULL;
5696+}
5697+
5698+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
5699+{
5700+ struct flow_action_entry *entry;
5701+ int i;
5702+
5703+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
5704+ entry = &flow_rule->rule->action.entries[i];
5705+ if (entry->id != FLOW_ACTION_REDIRECT)
5706+ continue;
5707+
5708+ dev_put(entry->dev);
5709+ }
5710+ kfree(flow_rule->rule);
5711+ kfree(flow_rule);
5712+}
5713+
5714+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
5715+{
5716+ int i;
5717+
5718+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
5719+ __nf_flow_offload_destroy(flow_rule[i]);
5720+}
5721+
5722+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
5723+ struct nf_flow_rule *flow_rule[])
5724+{
5725+ struct net *net = read_pnet(&offload->flowtable->net);
5726+
5727+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
5728+ FLOW_OFFLOAD_DIR_ORIGINAL);
5729+ if (!flow_rule[0])
5730+ return -ENOMEM;
5731+
5732+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
5733+ FLOW_OFFLOAD_DIR_REPLY);
5734+ if (!flow_rule[1]) {
5735+ __nf_flow_offload_destroy(flow_rule[0]);
5736+ return -ENOMEM;
5737+ }
5738+
5739+ return 0;
5740+}
5741+
5742+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
5743+ __be16 proto, int priority,
5744+ enum flow_cls_command cmd,
5745+ const struct flow_offload_tuple *tuple,
5746+ struct netlink_ext_ack *extack)
5747+{
5748+ cls_flow->common.protocol = proto;
5749+ cls_flow->common.prio = priority;
5750+ cls_flow->common.extack = extack;
5751+ cls_flow->command = cmd;
5752+ cls_flow->cookie = (unsigned long)tuple;
5753+}
5754+
5755+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
5756+ struct flow_offload *flow,
5757+ struct nf_flow_rule *flow_rule,
5758+ enum flow_offload_tuple_dir dir,
5759+ int priority, int cmd,
5760+ struct flow_stats *stats,
5761+ struct list_head *block_cb_list)
5762+{
5763+ struct flow_cls_offload cls_flow = {};
5764+ struct flow_block_cb *block_cb;
5765+ struct netlink_ext_ack extack;
5766+ __be16 proto = ETH_P_ALL;
5767+ int err, i = 0;
5768+
5769+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
5770+ &flow->tuplehash[dir].tuple, &extack);
5771+ if (cmd == FLOW_CLS_REPLACE)
5772+ cls_flow.rule = flow_rule->rule;
5773+
developer0cc0d732023-06-07 13:52:41 +08005774+ down_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005775+ list_for_each_entry(block_cb, block_cb_list, list) {
5776+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
5777+ block_cb->cb_priv);
5778+ if (err < 0)
5779+ continue;
5780+
5781+ i++;
5782+ }
developer0cc0d732023-06-07 13:52:41 +08005783+ up_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005784+
5785+ if (cmd == FLOW_CLS_STATS)
5786+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
5787+
5788+ return i;
5789+}
5790+
5791+static int flow_offload_tuple_add(struct flow_offload_work *offload,
5792+ struct nf_flow_rule *flow_rule,
5793+ enum flow_offload_tuple_dir dir)
5794+{
5795+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
5796+ flow_rule, dir, offload->priority,
5797+ FLOW_CLS_REPLACE, NULL,
5798+ &offload->flowtable->flow_block.cb_list);
5799+}
5800+
5801+static void flow_offload_tuple_del(struct flow_offload_work *offload,
5802+ enum flow_offload_tuple_dir dir)
5803+{
5804+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5805+ offload->priority, FLOW_CLS_DESTROY, NULL,
5806+ &offload->flowtable->flow_block.cb_list);
5807+}
5808+
5809+static int flow_offload_rule_add(struct flow_offload_work *offload,
5810+ struct nf_flow_rule *flow_rule[])
5811+{
5812+ int ok_count = 0;
5813+
5814+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
5815+ FLOW_OFFLOAD_DIR_ORIGINAL);
5816+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
5817+ FLOW_OFFLOAD_DIR_REPLY);
5818+ if (ok_count == 0)
5819+ return -ENOENT;
5820+
5821+ return 0;
5822+}
5823+
5824+static void flow_offload_work_add(struct flow_offload_work *offload)
5825+{
5826+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
5827+ int err;
5828+
5829+ err = nf_flow_offload_alloc(offload, flow_rule);
5830+ if (err < 0)
5831+ return;
5832+
5833+ err = flow_offload_rule_add(offload, flow_rule);
5834+ if (err < 0)
5835+ goto out;
5836+
5837+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5838+
5839+out:
5840+ nf_flow_offload_destroy(flow_rule);
5841+}
5842+
5843+static void flow_offload_work_del(struct flow_offload_work *offload)
5844+{
5845+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5846+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
5847+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
5848+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
5849+}
5850+
5851+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
5852+ enum flow_offload_tuple_dir dir,
5853+ struct flow_stats *stats)
5854+{
5855+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5856+ offload->priority, FLOW_CLS_STATS, stats,
5857+ &offload->flowtable->flow_block.cb_list);
5858+}
5859+
5860+static void flow_offload_work_stats(struct flow_offload_work *offload)
5861+{
5862+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
5863+ u64 lastused;
5864+
5865+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
5866+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
5867+
5868+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
5869+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
5870+ lastused + flow_offload_get_timeout(offload->flow));
5871+
5872+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
5873+ if (stats[0].pkts)
5874+ nf_ct_acct_add(offload->flow->ct,
5875+ FLOW_OFFLOAD_DIR_ORIGINAL,
5876+ stats[0].pkts, stats[0].bytes);
5877+ if (stats[1].pkts)
5878+ nf_ct_acct_add(offload->flow->ct,
5879+ FLOW_OFFLOAD_DIR_REPLY,
5880+ stats[1].pkts, stats[1].bytes);
5881+ }
5882+}
5883+
5884+static void flow_offload_work_handler(struct work_struct *work)
5885+{
5886+ struct flow_offload_work *offload;
5887+
5888+ offload = container_of(work, struct flow_offload_work, work);
5889+ switch (offload->cmd) {
5890+ case FLOW_CLS_REPLACE:
5891+ flow_offload_work_add(offload);
5892+ break;
5893+ case FLOW_CLS_DESTROY:
5894+ flow_offload_work_del(offload);
5895+ break;
5896+ case FLOW_CLS_STATS:
5897+ flow_offload_work_stats(offload);
5898+ break;
5899+ default:
5900+ WARN_ON_ONCE(1);
5901+ }
5902+
5903+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
5904+ kfree(offload);
5905+}
5906+
5907+static void flow_offload_queue_work(struct flow_offload_work *offload)
5908+{
5909+ if (offload->cmd == FLOW_CLS_REPLACE)
5910+ queue_work(nf_flow_offload_add_wq, &offload->work);
5911+ else if (offload->cmd == FLOW_CLS_DESTROY)
5912+ queue_work(nf_flow_offload_del_wq, &offload->work);
5913+ else
5914+ queue_work(nf_flow_offload_stats_wq, &offload->work);
5915+}
5916+
5917+static struct flow_offload_work *
5918+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
5919+ struct flow_offload *flow, unsigned int cmd)
5920+{
5921+ struct flow_offload_work *offload;
5922+
5923+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
5924+ return NULL;
5925+
5926+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
5927+ if (!offload) {
5928+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
5929+ return NULL;
5930+ }
5931+
5932+ offload->cmd = cmd;
5933+ offload->flow = flow;
5934+ offload->priority = flowtable->priority;
5935+ offload->flowtable = flowtable;
5936+ INIT_WORK(&offload->work, flow_offload_work_handler);
5937+
5938+ return offload;
5939+}
5940+
5941+
5942+void nf_flow_offload_add(struct nf_flowtable *flowtable,
5943+ struct flow_offload *flow)
5944+{
5945+ struct flow_offload_work *offload;
5946+
5947+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
5948+ if (!offload)
5949+ return;
5950+
5951+ flow_offload_queue_work(offload);
5952+}
5953+
5954+void nf_flow_offload_del(struct nf_flowtable *flowtable,
5955+ struct flow_offload *flow)
5956+{
5957+ struct flow_offload_work *offload;
5958+
5959+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
5960+ if (!offload)
5961+ return;
5962+
5963+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
5964+ flow_offload_queue_work(offload);
5965+}
5966+
5967+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08005968+ struct flow_offload *flow, bool force)
developer8cb3ac72022-07-04 10:55:14 +08005969+{
5970+ struct flow_offload_work *offload;
5971+ __s32 delta;
5972+
developeree39bcf2023-06-16 08:03:30 +08005973+ if (!force) {
5974+ delta = nf_flow_timeout_delta(flow->timeout);
5975+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
5976+ return;
5977+ }
developer8cb3ac72022-07-04 10:55:14 +08005978+
5979+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
5980+ if (!offload)
5981+ return;
5982+
5983+ flow_offload_queue_work(offload);
5984+}
5985+
5986+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
5987+{
5988+ if (nf_flowtable_hw_offload(flowtable)) {
5989+ flush_workqueue(nf_flow_offload_add_wq);
5990+ flush_workqueue(nf_flow_offload_del_wq);
5991+ flush_workqueue(nf_flow_offload_stats_wq);
5992+ }
5993+}
5994+
5995+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
5996+ struct flow_block_offload *bo,
5997+ enum flow_block_command cmd)
5998+{
5999+ struct flow_block_cb *block_cb, *next;
6000+ int err = 0;
6001+
developer0cc0d732023-06-07 13:52:41 +08006002+ down_write(&flowtable->flow_block_lock);
developeree39bcf2023-06-16 08:03:30 +08006003+
developer8cb3ac72022-07-04 10:55:14 +08006004+ switch (cmd) {
6005+ case FLOW_BLOCK_BIND:
6006+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
6007+ break;
6008+ case FLOW_BLOCK_UNBIND:
6009+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
6010+ list_del(&block_cb->list);
6011+ flow_block_cb_free(block_cb);
6012+ }
6013+ break;
6014+ default:
6015+ WARN_ON_ONCE(1);
6016+ err = -EOPNOTSUPP;
6017+ }
developeree39bcf2023-06-16 08:03:30 +08006018+
developer0cc0d732023-06-07 13:52:41 +08006019+ up_write(&flowtable->flow_block_lock);
developera54478c2022-10-01 16:41:46 +08006020+
developer8cb3ac72022-07-04 10:55:14 +08006021+ return err;
6022+}
6023+
6024+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
6025+ struct net *net,
6026+ enum flow_block_command cmd,
6027+ struct nf_flowtable *flowtable,
6028+ struct netlink_ext_ack *extack)
6029+{
6030+ memset(bo, 0, sizeof(*bo));
6031+ bo->net = net;
6032+ bo->block = &flowtable->flow_block;
6033+ bo->command = cmd;
6034+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
6035+ bo->extack = extack;
6036+ INIT_LIST_HEAD(&bo->cb_list);
6037+}
6038+
6039+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
6040+ struct nf_flowtable *flowtable,
6041+ struct net_device *dev,
6042+ enum flow_block_command cmd,
6043+ struct netlink_ext_ack *extack)
6044+{
6045+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6046+ extack);
6047+ flow_indr_block_call(dev, bo, cmd);
6048+
6049+ if (list_empty(&bo->cb_list))
6050+ return -EOPNOTSUPP;
6051+
6052+ return 0;
6053+}
6054+
6055+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
6056+ struct nf_flowtable *flowtable,
6057+ struct net_device *dev,
6058+ enum flow_block_command cmd,
6059+ struct netlink_ext_ack *extack)
6060+{
6061+ int err;
6062+
6063+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6064+ extack);
developer0cc0d732023-06-07 13:52:41 +08006065+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006066+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
developer0cc0d732023-06-07 13:52:41 +08006067+ up_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006068+ if (err < 0)
6069+ return err;
6070+
6071+ return 0;
6072+}
6073+
6074+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
6075+ struct net_device *dev,
6076+ enum flow_block_command cmd)
6077+{
6078+ struct netlink_ext_ack extack = {};
6079+ struct flow_block_offload bo;
6080+ int err;
6081+
6082+ if (!nf_flowtable_hw_offload(flowtable))
6083+ return 0;
6084+
6085+ if (dev->netdev_ops->ndo_setup_tc)
6086+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
6087+ &extack);
6088+ else
6089+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
6090+ &extack);
6091+ if (err < 0)
6092+ return err;
6093+
6094+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
6095+}
6096+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
6097+
6098+int nf_flow_table_offload_init(void)
6099+{
6100+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
6101+ WQ_UNBOUND | WQ_SYSFS, 0);
6102+ if (!nf_flow_offload_add_wq)
6103+ return -ENOMEM;
6104+
6105+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
6106+ WQ_UNBOUND | WQ_SYSFS, 0);
6107+ if (!nf_flow_offload_del_wq)
6108+ goto err_del_wq;
6109+
6110+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
6111+ WQ_UNBOUND | WQ_SYSFS, 0);
6112+ if (!nf_flow_offload_stats_wq)
6113+ goto err_stats_wq;
6114+
6115+ return 0;
6116+
6117+err_stats_wq:
6118+ destroy_workqueue(nf_flow_offload_del_wq);
6119+err_del_wq:
6120+ destroy_workqueue(nf_flow_offload_add_wq);
6121+ return -ENOMEM;
6122+}
6123+
6124+void nf_flow_table_offload_exit(void)
6125+{
6126+ destroy_workqueue(nf_flow_offload_add_wq);
6127+ destroy_workqueue(nf_flow_offload_del_wq);
6128+ destroy_workqueue(nf_flow_offload_stats_wq);
6129+}
6130diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
6131new file mode 100644
developer58aa0682023-09-18 14:02:26 +08006132index 0000000..2cab008
developer8cb3ac72022-07-04 10:55:14 +08006133--- /dev/null
6134+++ b/net/netfilter/xt_FLOWOFFLOAD.c
developeree39bcf2023-06-16 08:03:30 +08006135@@ -0,0 +1,794 @@
developer8cb3ac72022-07-04 10:55:14 +08006136+/*
6137+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
6138+ *
6139+ * This program is free software; you can redistribute it and/or modify
6140+ * it under the terms of the GNU General Public License version 2 as
6141+ * published by the Free Software Foundation.
6142+ */
6143+#include <linux/module.h>
6144+#include <linux/init.h>
6145+#include <linux/netfilter.h>
6146+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
6147+#include <linux/if_vlan.h>
6148+#include <net/ip.h>
6149+#include <net/netfilter/nf_conntrack.h>
6150+#include <net/netfilter/nf_conntrack_extend.h>
6151+#include <net/netfilter/nf_conntrack_helper.h>
6152+#include <net/netfilter/nf_flow_table.h>
6153+
6154+struct xt_flowoffload_hook {
6155+ struct hlist_node list;
6156+ struct nf_hook_ops ops;
6157+ struct net *net;
6158+ bool registered;
6159+ bool used;
6160+};
6161+
6162+struct xt_flowoffload_table {
6163+ struct nf_flowtable ft;
6164+ struct hlist_head hooks;
6165+ struct delayed_work work;
6166+};
6167+
6168+struct nf_forward_info {
6169+ const struct net_device *indev;
6170+ const struct net_device *outdev;
6171+ const struct net_device *hw_outdev;
6172+ struct id {
6173+ __u16 id;
6174+ __be16 proto;
6175+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
6176+ u8 num_encaps;
6177+ u8 ingress_vlans;
6178+ u8 h_source[ETH_ALEN];
6179+ u8 h_dest[ETH_ALEN];
6180+ enum flow_offload_xmit_type xmit_type;
6181+};
6182+
6183+static DEFINE_SPINLOCK(hooks_lock);
6184+
6185+struct xt_flowoffload_table flowtable[2];
6186+
6187+static unsigned int
6188+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
6189+ const struct nf_hook_state *state)
6190+{
6191+ struct vlan_ethhdr *veth;
6192+ __be16 proto;
6193+
6194+ switch (skb->protocol) {
6195+ case htons(ETH_P_8021Q):
6196+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
6197+ proto = veth->h_vlan_encapsulated_proto;
6198+ break;
6199+ case htons(ETH_P_PPP_SES):
6200+ proto = nf_flow_pppoe_proto(skb);
6201+ break;
6202+ default:
6203+ proto = skb->protocol;
6204+ break;
6205+ }
6206+
6207+ switch (proto) {
6208+ case htons(ETH_P_IP):
6209+ return nf_flow_offload_ip_hook(priv, skb, state);
6210+ case htons(ETH_P_IPV6):
6211+ return nf_flow_offload_ipv6_hook(priv, skb, state);
6212+ }
6213+
6214+ return NF_ACCEPT;
6215+}
6216+
6217+static int
6218+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
6219+ struct net_device *dev)
6220+{
6221+ struct xt_flowoffload_hook *hook;
6222+ struct nf_hook_ops *ops;
6223+
6224+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
6225+ if (!hook)
6226+ return -ENOMEM;
6227+
6228+ ops = &hook->ops;
6229+ ops->pf = NFPROTO_NETDEV;
6230+ ops->hooknum = NF_NETDEV_INGRESS;
6231+ ops->priority = 10;
6232+ ops->priv = &table->ft;
6233+ ops->hook = xt_flowoffload_net_hook;
6234+ ops->dev = dev;
6235+
6236+ hlist_add_head(&hook->list, &table->hooks);
6237+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
6238+
6239+ return 0;
6240+}
6241+
6242+static struct xt_flowoffload_hook *
6243+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
6244+ struct net_device *dev)
6245+{
6246+ struct xt_flowoffload_hook *hook;
6247+
6248+ hlist_for_each_entry(hook, &table->hooks, list) {
6249+ if (hook->ops.dev == dev)
6250+ return hook;
6251+ }
6252+
6253+ return NULL;
6254+}
6255+
6256+static void
6257+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
6258+ struct net_device *dev)
6259+{
6260+ struct xt_flowoffload_hook *hook;
6261+
6262+ if (!dev)
6263+ return;
6264+
6265+ spin_lock_bh(&hooks_lock);
6266+ hook = flow_offload_lookup_hook(table, dev);
6267+ if (hook)
6268+ hook->used = true;
6269+ else
6270+ xt_flowoffload_create_hook(table, dev);
6271+ spin_unlock_bh(&hooks_lock);
6272+}
6273+
6274+static void
6275+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
6276+{
6277+ struct xt_flowoffload_hook *hook;
6278+
6279+restart:
6280+ hlist_for_each_entry(hook, &table->hooks, list) {
6281+ if (hook->registered)
6282+ continue;
6283+
6284+ hook->registered = true;
6285+ hook->net = dev_net(hook->ops.dev);
6286+ spin_unlock_bh(&hooks_lock);
6287+ nf_register_net_hook(hook->net, &hook->ops);
6288+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6289+ table->ft.type->setup(&table->ft, hook->ops.dev,
6290+ FLOW_BLOCK_BIND);
6291+ spin_lock_bh(&hooks_lock);
6292+ goto restart;
6293+ }
6294+
6295+}
6296+
6297+static bool
6298+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
6299+{
6300+ struct xt_flowoffload_hook *hook;
6301+ bool active = false;
6302+
6303+restart:
6304+ spin_lock_bh(&hooks_lock);
6305+ hlist_for_each_entry(hook, &table->hooks, list) {
6306+ if (hook->used || !hook->registered) {
6307+ active = true;
6308+ continue;
6309+ }
6310+
6311+ hlist_del(&hook->list);
6312+ spin_unlock_bh(&hooks_lock);
6313+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6314+ table->ft.type->setup(&table->ft, hook->ops.dev,
6315+ FLOW_BLOCK_UNBIND);
6316+ nf_unregister_net_hook(hook->net, &hook->ops);
6317+ kfree(hook);
6318+ goto restart;
6319+ }
6320+ spin_unlock_bh(&hooks_lock);
6321+
6322+ return active;
6323+}
6324+
6325+static void
6326+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
6327+{
6328+ struct xt_flowoffload_table *table = data;
6329+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
6330+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
6331+ struct xt_flowoffload_hook *hook;
6332+
6333+ spin_lock_bh(&hooks_lock);
6334+ hlist_for_each_entry(hook, &table->hooks, list) {
6335+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
6336+ hook->ops.dev->ifindex != tuple1->iifidx)
6337+ continue;
6338+
6339+ hook->used = true;
6340+ }
6341+ spin_unlock_bh(&hooks_lock);
6342+}
6343+
6344+static void
6345+xt_flowoffload_hook_work(struct work_struct *work)
6346+{
6347+ struct xt_flowoffload_table *table;
6348+ struct xt_flowoffload_hook *hook;
6349+ int err;
6350+
6351+ table = container_of(work, struct xt_flowoffload_table, work.work);
6352+
6353+ spin_lock_bh(&hooks_lock);
6354+ xt_flowoffload_register_hooks(table);
6355+ hlist_for_each_entry(hook, &table->hooks, list)
6356+ hook->used = false;
6357+ spin_unlock_bh(&hooks_lock);
6358+
6359+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
6360+ table);
6361+ if (err && err != -EAGAIN)
6362+ goto out;
6363+
6364+ if (!xt_flowoffload_cleanup_hooks(table))
6365+ return;
6366+
6367+out:
6368+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
6369+}
6370+
6371+static bool
6372+xt_flowoffload_skip(struct sk_buff *skb, int family)
6373+{
6374+ if (skb_sec_path(skb))
6375+ return true;
6376+
6377+ if (family == NFPROTO_IPV4) {
6378+ const struct ip_options *opt = &(IPCB(skb)->opt);
6379+
6380+ if (unlikely(opt->optlen))
6381+ return true;
6382+ }
6383+
6384+ return false;
6385+}
6386+
6387+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
6388+{
6389+ if (dst_xfrm(dst))
6390+ return FLOW_OFFLOAD_XMIT_XFRM;
6391+
6392+ return FLOW_OFFLOAD_XMIT_NEIGH;
6393+}
6394+
6395+static void nf_default_forward_path(struct nf_flow_route *route,
6396+ struct dst_entry *dst_cache,
6397+ enum ip_conntrack_dir dir,
6398+ struct net_device **dev)
6399+{
6400+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
6401+ route->tuple[dir].dst = dst_cache;
6402+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
6403+}
6404+
6405+static bool nf_is_valid_ether_device(const struct net_device *dev)
6406+{
6407+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
6408+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
6409+ return false;
6410+
6411+ return true;
6412+}
6413+
6414+static void nf_dev_path_info(const struct net_device_path_stack *stack,
6415+ struct nf_forward_info *info,
6416+ unsigned char *ha)
6417+{
6418+ const struct net_device_path *path;
6419+ int i;
6420+
6421+ memcpy(info->h_dest, ha, ETH_ALEN);
6422+
6423+ for (i = 0; i < stack->num_paths; i++) {
6424+ path = &stack->path[i];
6425+
6426+ info->indev = path->dev;
6427+
6428+ switch (path->type) {
6429+ case DEV_PATH_ETHERNET:
6430+ case DEV_PATH_DSA:
6431+ case DEV_PATH_VLAN:
6432+ case DEV_PATH_PPPOE:
6433+ if (is_zero_ether_addr(info->h_source))
6434+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6435+
6436+ if (path->type == DEV_PATH_ETHERNET)
6437+ break;
6438+ if (path->type == DEV_PATH_DSA) {
6439+ i = stack->num_paths;
6440+ break;
6441+ }
6442+
6443+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
6444+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
6445+ info->indev = NULL;
6446+ break;
6447+ }
6448+ if (!info->outdev)
6449+ info->outdev = path->dev;
6450+ info->encap[info->num_encaps].id = path->encap.id;
6451+ info->encap[info->num_encaps].proto = path->encap.proto;
6452+ info->num_encaps++;
6453+ if (path->type == DEV_PATH_PPPOE)
6454+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
6455+ break;
6456+ case DEV_PATH_BRIDGE:
6457+ if (is_zero_ether_addr(info->h_source))
6458+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6459+
6460+ switch (path->bridge.vlan_mode) {
6461+ case DEV_PATH_BR_VLAN_UNTAG_HW:
6462+ info->ingress_vlans |= BIT(info->num_encaps - 1);
6463+ break;
6464+ case DEV_PATH_BR_VLAN_TAG:
6465+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
6466+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
6467+ info->num_encaps++;
6468+ break;
6469+ case DEV_PATH_BR_VLAN_UNTAG:
6470+ info->num_encaps--;
6471+ break;
6472+ case DEV_PATH_BR_VLAN_KEEP:
6473+ break;
6474+ }
6475+ break;
6476+ default:
6477+ break;
6478+ }
6479+ }
6480+ if (!info->outdev)
6481+ info->outdev = info->indev;
6482+
6483+ info->hw_outdev = info->indev;
6484+
6485+ if (nf_is_valid_ether_device(info->indev))
6486+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
6487+}
6488+
6489+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
6490+ const struct dst_entry *dst_cache,
6491+ const struct nf_conn *ct,
6492+ enum ip_conntrack_dir dir, u8 *ha,
6493+ struct net_device_path_stack *stack)
6494+{
6495+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
6496+ struct net_device *dev = dst_cache->dev;
6497+ struct neighbour *n;
6498+ u8 nud_state;
6499+
6500+ if (!nf_is_valid_ether_device(dev))
6501+ goto out;
6502+
developer9fdc0e82023-05-12 14:21:17 +08006503+ if (ct->status & IPS_NAT_MASK) {
6504+ n = dst_neigh_lookup(dst_cache, daddr);
6505+ if (!n)
6506+ return -1;
developer8cb3ac72022-07-04 10:55:14 +08006507+
developer9fdc0e82023-05-12 14:21:17 +08006508+ read_lock_bh(&n->lock);
6509+ nud_state = n->nud_state;
6510+ ether_addr_copy(ha, n->ha);
6511+ read_unlock_bh(&n->lock);
6512+ neigh_release(n);
developer8cb3ac72022-07-04 10:55:14 +08006513+
developer9fdc0e82023-05-12 14:21:17 +08006514+ if (!(nud_state & NUD_VALID))
6515+ return -1;
6516+ }
developer64db8532023-04-28 13:56:00 +08006517+
developer8cb3ac72022-07-04 10:55:14 +08006518+out:
6519+ return dev_fill_forward_path(dev, ha, stack);
6520+}
6521+
developer9fdc0e82023-05-12 14:21:17 +08006522+static int nf_dev_forward_path(struct sk_buff *skb,
6523+ struct nf_flow_route *route,
developer8cb3ac72022-07-04 10:55:14 +08006524+ const struct nf_conn *ct,
6525+ enum ip_conntrack_dir dir,
6526+ struct net_device **devs)
6527+{
6528+ const struct dst_entry *dst = route->tuple[dir].dst;
developer9fdc0e82023-05-12 14:21:17 +08006529+ struct ethhdr *eth;
6530+ enum ip_conntrack_dir skb_dir;
developer8cb3ac72022-07-04 10:55:14 +08006531+ struct net_device_path_stack stack;
6532+ struct nf_forward_info info = {};
6533+ unsigned char ha[ETH_ALEN];
6534+ int i;
6535+
developer9fdc0e82023-05-12 14:21:17 +08006536+ if (!(ct->status & IPS_NAT_MASK) && skb_mac_header_was_set(skb)) {
6537+ eth = eth_hdr(skb);
6538+ skb_dir = CTINFO2DIR(skb_get_nfct(skb) & NFCT_INFOMASK);
6539+
6540+ if (skb_dir != dir) {
6541+ memcpy(ha, eth->h_source, ETH_ALEN);
6542+ memcpy(info.h_source, eth->h_dest, ETH_ALEN);
6543+ } else {
6544+ memcpy(ha, eth->h_dest, ETH_ALEN);
6545+ memcpy(info.h_source, eth->h_source, ETH_ALEN);
6546+ }
6547+ }
6548+
developer7e533772023-04-27 05:59:30 +08006549+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
developer8cb3ac72022-07-04 10:55:14 +08006550+ nf_dev_path_info(&stack, &info, ha);
6551+
6552+ devs[!dir] = (struct net_device *)info.indev;
6553+ if (!info.indev)
6554+ return -1;
6555+
6556+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
6557+ for (i = 0; i < info.num_encaps; i++) {
6558+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
6559+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
6560+ }
6561+ route->tuple[!dir].in.num_encaps = info.num_encaps;
6562+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
6563+
6564+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
6565+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
6566+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
6567+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
6568+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
6569+ route->tuple[dir].xmit_type = info.xmit_type;
6570+ }
6571+
6572+ return 0;
6573+}
6574+
6575+static int
6576+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
6577+ enum ip_conntrack_dir dir,
6578+ const struct xt_action_param *par, int ifindex,
6579+ struct net_device **devs)
6580+{
6581+ struct dst_entry *dst = NULL;
6582+ struct flowi fl;
6583+
6584+ memset(&fl, 0, sizeof(fl));
6585+ switch (xt_family(par)) {
6586+ case NFPROTO_IPV4:
6587+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
6588+ fl.u.ip4.flowi4_oif = ifindex;
6589+ break;
6590+ case NFPROTO_IPV6:
6591+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6592+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
6593+ fl.u.ip6.flowi6_oif = ifindex;
6594+ break;
6595+ }
6596+
6597+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
6598+ if (!dst)
6599+ return -ENOENT;
6600+
6601+ nf_default_forward_path(route, dst, dir, devs);
6602+
6603+ return 0;
6604+}
6605+
6606+static int
developer480c5d52022-12-28 14:48:14 +08006607+xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct,
6608+ const struct xt_action_param *par,
6609+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6610+ struct net_device **devs)
6611+{
6612+ struct dst_entry *this_dst = skb_dst(skb);
6613+ struct dst_entry *other_dst = NULL;
6614+ struct flowi fl;
6615+
6616+ memset(&fl, 0, sizeof(fl));
6617+ switch (xt_family(par)) {
6618+ case NFPROTO_IPV4:
6619+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
6620+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
6621+ break;
6622+ case NFPROTO_IPV6:
6623+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6624+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
6625+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
6626+ break;
6627+ }
6628+
6629+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
6630+ if (!other_dst)
6631+ return -ENOENT;
6632+
6633+ nf_default_forward_path(route, this_dst, dir, devs);
6634+ nf_default_forward_path(route, other_dst, !dir, devs);
6635+
developer7e533772023-04-27 05:59:30 +08006636+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer480c5d52022-12-28 14:48:14 +08006637+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006638+ if (nf_dev_forward_path(skb, route, ct, dir, devs))
developer480c5d52022-12-28 14:48:14 +08006639+ return -1;
developer9fdc0e82023-05-12 14:21:17 +08006640+ if (nf_dev_forward_path(skb, route, ct, !dir, devs))
developer480c5d52022-12-28 14:48:14 +08006641+ return -1;
6642+ }
6643+
6644+ return 0;
6645+}
6646+
6647+static int
6648+xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct,
6649+ const struct xt_action_param *par,
6650+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6651+ struct net_device **devs)
developer8cb3ac72022-07-04 10:55:14 +08006652+{
6653+ int ret;
6654+
6655+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
6656+ devs[dir]->ifindex,
6657+ devs);
6658+ if (ret)
6659+ return ret;
6660+
6661+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
6662+ devs[!dir]->ifindex,
6663+ devs);
6664+ if (ret)
developer67bbcc02022-07-08 09:04:01 +08006665+ goto err_route_dir1;
developer8cb3ac72022-07-04 10:55:14 +08006666+
developer7e533772023-04-27 05:59:30 +08006667+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer8cb3ac72022-07-04 10:55:14 +08006668+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006669+ if (nf_dev_forward_path(skb, route, ct, dir, devs) ||
6670+ nf_dev_forward_path(skb, route, ct, !dir, devs)) {
developer67bbcc02022-07-08 09:04:01 +08006671+ ret = -1;
6672+ goto err_route_dir2;
6673+ }
developer8cb3ac72022-07-04 10:55:14 +08006674+ }
6675+
6676+ return 0;
developer67bbcc02022-07-08 09:04:01 +08006677+
6678+err_route_dir2:
6679+ dst_release(route->tuple[!dir].dst);
6680+err_route_dir1:
6681+ dst_release(route->tuple[dir].dst);
6682+ return ret;
developer8cb3ac72022-07-04 10:55:14 +08006683+}
6684+
6685+static unsigned int
6686+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
6687+{
6688+ struct xt_flowoffload_table *table;
6689+ const struct xt_flowoffload_target_info *info = par->targinfo;
6690+ struct tcphdr _tcph, *tcph = NULL;
6691+ enum ip_conntrack_info ctinfo;
6692+ enum ip_conntrack_dir dir;
6693+ struct nf_flow_route route = {};
6694+ struct flow_offload *flow = NULL;
6695+ struct net_device *devs[2] = {};
6696+ struct nf_conn *ct;
6697+ struct net *net;
6698+
6699+ if (xt_flowoffload_skip(skb, xt_family(par)))
6700+ return XT_CONTINUE;
6701+
6702+ ct = nf_ct_get(skb, &ctinfo);
6703+ if (ct == NULL)
6704+ return XT_CONTINUE;
6705+
6706+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
6707+ case IPPROTO_TCP:
6708+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
6709+ return XT_CONTINUE;
6710+
6711+ tcph = skb_header_pointer(skb, par->thoff,
6712+ sizeof(_tcph), &_tcph);
6713+ if (unlikely(!tcph || tcph->fin || tcph->rst))
6714+ return XT_CONTINUE;
6715+ break;
6716+ case IPPROTO_UDP:
6717+ break;
6718+ default:
6719+ return XT_CONTINUE;
6720+ }
6721+
6722+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
6723+ ct->status & IPS_SEQ_ADJUST)
6724+ return XT_CONTINUE;
6725+
6726+ if (!nf_ct_is_confirmed(ct))
6727+ return XT_CONTINUE;
6728+
6729+ devs[dir] = xt_out(par);
6730+ devs[!dir] = xt_in(par);
6731+
6732+ if (!devs[dir] || !devs[!dir])
6733+ return XT_CONTINUE;
6734+
6735+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
6736+ return XT_CONTINUE;
6737+
6738+ dir = CTINFO2DIR(ctinfo);
6739+
developer480c5d52022-12-28 14:48:14 +08006740+ if (ct->status & IPS_NAT_MASK) {
6741+ if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0)
6742+ goto err_flow_route;
6743+ } else {
6744+ if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0)
6745+ goto err_flow_route;
6746+ }
developer8cb3ac72022-07-04 10:55:14 +08006747+
6748+ flow = flow_offload_alloc(ct);
6749+ if (!flow)
6750+ goto err_flow_alloc;
6751+
6752+ if (flow_offload_route_init(flow, &route) < 0)
6753+ goto err_flow_add;
6754+
6755+ if (tcph) {
6756+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6757+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6758+ }
6759+
6760+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
6761+
6762+ net = read_pnet(&table->ft.net);
6763+ if (!net)
6764+ write_pnet(&table->ft.net, xt_net(par));
6765+
6766+ if (flow_offload_add(&table->ft, flow) < 0)
6767+ goto err_flow_add;
6768+
6769+ xt_flowoffload_check_device(table, devs[0]);
6770+ xt_flowoffload_check_device(table, devs[1]);
6771+
developer480c5d52022-12-28 14:48:14 +08006772+ if (!(ct->status & IPS_NAT_MASK))
6773+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006774+ dst_release(route.tuple[!dir].dst);
6775+
6776+ return XT_CONTINUE;
6777+
6778+err_flow_add:
6779+ flow_offload_free(flow);
6780+err_flow_alloc:
developer480c5d52022-12-28 14:48:14 +08006781+ if (!(ct->status & IPS_NAT_MASK))
6782+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006783+ dst_release(route.tuple[!dir].dst);
6784+err_flow_route:
6785+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
6786+
6787+ return XT_CONTINUE;
6788+}
6789+
6790+static int flowoffload_chk(const struct xt_tgchk_param *par)
6791+{
6792+ struct xt_flowoffload_target_info *info = par->targinfo;
6793+
6794+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
6795+ return -EINVAL;
6796+
6797+ return 0;
6798+}
6799+
6800+static struct xt_target offload_tg_reg __read_mostly = {
6801+ .family = NFPROTO_UNSPEC,
6802+ .name = "FLOWOFFLOAD",
6803+ .revision = 0,
6804+ .targetsize = sizeof(struct xt_flowoffload_target_info),
6805+ .usersize = sizeof(struct xt_flowoffload_target_info),
6806+ .checkentry = flowoffload_chk,
6807+ .target = flowoffload_tg,
6808+ .me = THIS_MODULE,
6809+};
6810+
6811+static int flow_offload_netdev_event(struct notifier_block *this,
6812+ unsigned long event, void *ptr)
6813+{
6814+ struct xt_flowoffload_hook *hook0, *hook1;
6815+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6816+
6817+ if (event != NETDEV_UNREGISTER)
6818+ return NOTIFY_DONE;
6819+
6820+ spin_lock_bh(&hooks_lock);
6821+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
6822+ if (hook0)
6823+ hlist_del(&hook0->list);
6824+
6825+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
6826+ if (hook1)
6827+ hlist_del(&hook1->list);
6828+ spin_unlock_bh(&hooks_lock);
6829+
6830+ if (hook0) {
6831+ nf_unregister_net_hook(hook0->net, &hook0->ops);
6832+ kfree(hook0);
6833+ }
6834+
6835+ if (hook1) {
6836+ nf_unregister_net_hook(hook1->net, &hook1->ops);
6837+ kfree(hook1);
6838+ }
6839+
6840+ nf_flow_table_cleanup(dev);
6841+
6842+ return NOTIFY_DONE;
6843+}
6844+
6845+static struct notifier_block flow_offload_netdev_notifier = {
6846+ .notifier_call = flow_offload_netdev_event,
6847+};
6848+
6849+static int nf_flow_rule_route_inet(struct net *net,
6850+ const struct flow_offload *flow,
6851+ enum flow_offload_tuple_dir dir,
6852+ struct nf_flow_rule *flow_rule)
6853+{
6854+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
6855+ int err;
6856+
6857+ switch (flow_tuple->l3proto) {
6858+ case NFPROTO_IPV4:
6859+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
6860+ break;
6861+ case NFPROTO_IPV6:
6862+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
6863+ break;
6864+ default:
6865+ err = -1;
6866+ break;
6867+ }
6868+
6869+ return err;
6870+}
6871+
6872+static struct nf_flowtable_type flowtable_inet = {
6873+ .family = NFPROTO_INET,
6874+ .init = nf_flow_table_init,
6875+ .setup = nf_flow_table_offload_setup,
6876+ .action = nf_flow_rule_route_inet,
6877+ .free = nf_flow_table_free,
6878+ .hook = xt_flowoffload_net_hook,
6879+ .owner = THIS_MODULE,
6880+};
6881+
6882+static int init_flowtable(struct xt_flowoffload_table *tbl)
6883+{
6884+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
6885+ tbl->ft.type = &flowtable_inet;
6886+
6887+ return nf_flow_table_init(&tbl->ft);
6888+}
6889+
6890+static int __init xt_flowoffload_tg_init(void)
6891+{
6892+ int ret;
6893+
6894+ register_netdevice_notifier(&flow_offload_netdev_notifier);
6895+
6896+ ret = init_flowtable(&flowtable[0]);
6897+ if (ret)
6898+ return ret;
6899+
6900+ ret = init_flowtable(&flowtable[1]);
6901+ if (ret)
6902+ goto cleanup;
6903+
developeree39bcf2023-06-16 08:03:30 +08006904+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
developer8cb3ac72022-07-04 10:55:14 +08006905+
6906+ ret = xt_register_target(&offload_tg_reg);
6907+ if (ret)
6908+ goto cleanup2;
6909+
6910+ return 0;
6911+
6912+cleanup2:
6913+ nf_flow_table_free(&flowtable[1].ft);
6914+cleanup:
6915+ nf_flow_table_free(&flowtable[0].ft);
6916+ return ret;
6917+}
6918+
6919+static void __exit xt_flowoffload_tg_exit(void)
6920+{
6921+ xt_unregister_target(&offload_tg_reg);
6922+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
6923+ nf_flow_table_free(&flowtable[0].ft);
6924+ nf_flow_table_free(&flowtable[1].ft);
6925+}
6926+
6927+MODULE_LICENSE("GPL");
6928+module_init(xt_flowoffload_tg_init);
6929+module_exit(xt_flowoffload_tg_exit);
6930--
69312.18.0
6932