blob: 9ccf9cedcd140cd4cdf8f108d1beac6e910e0e61 [file] [log] [blame]
developeree39bcf2023-06-16 08:03:30 +08001From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001
2From: Bo Jiao <Bo.Jiao@mediatek.com>
3Date: Wed, 22 Jun 2022 09:42:19 +0800
4Subject: [PATCH 1/8]
5 9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2
developer8cb3ac72022-07-04 10:55:14 +08006
7---
8 drivers/net/ethernet/mediatek/Makefile | 3 +-
developeree39bcf2023-06-16 08:03:30 +08009 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 28 +-
10 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 20 +-
11 drivers/net/ethernet/mediatek/mtk_ppe.c | 509 +++++++
12 drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
13 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
14 .../net/ethernet/mediatek/mtk_ppe_offload.c | 526 ++++++++
15 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
developer8cb3ac72022-07-04 10:55:14 +080016 drivers/net/ppp/ppp_generic.c | 22 +
17 drivers/net/ppp/pppoe.c | 24 +
developeree39bcf2023-06-16 08:03:30 +080018 include/linux/netdevice.h | 60 +
developer8cb3ac72022-07-04 10:55:14 +080019 include/linux/ppp_channel.h | 3 +
20 include/net/dsa.h | 10 +
21 include/net/flow_offload.h | 4 +
22 include/net/ip6_route.h | 5 +-
23 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
24 include/net/netfilter/nf_conntrack.h | 12 +
25 include/net/netfilter/nf_conntrack_acct.h | 11 +
developeree39bcf2023-06-16 08:03:30 +080026 include/net/netfilter/nf_flow_table.h | 264 +++-
developer8cb3ac72022-07-04 10:55:14 +080027 include/net/netns/conntrack.h | 6 +
28 .../linux/netfilter/nf_conntrack_common.h | 9 +-
29 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
30 net/8021q/vlan_dev.c | 21 +
31 net/bridge/br_device.c | 49 +
32 net/bridge/br_private.h | 20 +
33 net/bridge/br_vlan.c | 55 +
34 net/core/dev.c | 46 +
35 net/dsa/dsa.c | 9 +
developeree39bcf2023-06-16 08:03:30 +080036 net/dsa/slave.c | 41 +-
developer8cb3ac72022-07-04 10:55:14 +080037 net/ipv4/netfilter/Kconfig | 4 +-
38 net/ipv6/ip6_output.c | 2 +-
39 net/ipv6/netfilter/Kconfig | 3 +-
40 net/ipv6/route.c | 22 +-
41 net/netfilter/Kconfig | 14 +-
42 net/netfilter/Makefile | 4 +-
43 net/netfilter/nf_conntrack_core.c | 20 +-
44 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
45 net/netfilter/nf_conntrack_proto_udp.c | 4 +
46 net/netfilter/nf_conntrack_standalone.c | 34 +-
developeree39bcf2023-06-16 08:03:30 +080047 net/netfilter/nf_flow_table_core.c | 446 +++---
48 net/netfilter/nf_flow_table_ip.c | 455 ++++---
49 net/netfilter/nf_flow_table_offload.c | 1191 +++++++++++++++++
50 net/netfilter/xt_FLOWOFFLOAD.c | 719 ++++++++++
51 43 files changed, 4913 insertions(+), 432 deletions(-)
developer8cb3ac72022-07-04 10:55:14 +080052 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
53 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
54 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
55 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
56 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
57 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
58 create mode 100644 net/netfilter/nf_flow_table_offload.c
59 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
60
61diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
developeree39bcf2023-06-16 08:03:30 +080062index 13c5b4e8f..0a6af99f1 100755
developer8cb3ac72022-07-04 10:55:14 +080063--- a/drivers/net/ethernet/mediatek/Makefile
64+++ b/drivers/net/ethernet/mediatek/Makefile
developeree39bcf2023-06-16 08:03:30 +080065@@ -4,5 +4,6 @@
developer8cb3ac72022-07-04 10:55:14 +080066 #
67
68 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
developer68838542022-10-03 23:42:21 +080069-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
70+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
developer8cb3ac72022-07-04 10:55:14 +080071+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
72 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
73diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developeree39bcf2023-06-16 08:03:30 +080074index 2b21f7ed0..819d8a0be 100755
developer8cb3ac72022-07-04 10:55:14 +080075--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
76+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developeree39bcf2023-06-16 08:03:30 +080077@@ -3081,6 +3081,7 @@ static int mtk_open(struct net_device *d
developerdca0fde2022-12-14 11:40:35 +080078 struct mtk_phylink_priv *phylink_priv = &mac->phylink_priv;
79 int err, i;
80 struct device_node *phy_node;
developeree39bcf2023-06-16 08:03:30 +080081+ u32 gdm_config = MTK_GDMA_TO_PDMA;
developer8cb3ac72022-07-04 10:55:14 +080082
developeree39bcf2023-06-16 08:03:30 +080083 err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
84 if (err) {
85@@ -3157,7 +3158,10 @@ static int mtk_open(struct net_device *d
86 if (!phy_node && eth->xgmii->regmap_sgmii[mac->id])
87 regmap_write(eth->xgmii->regmap_sgmii[mac->id], SGMSYS_QPHY_PWR_STATE_CTRL, 0);
developer8cb3ac72022-07-04 10:55:14 +080088
developerdca0fde2022-12-14 11:40:35 +080089- mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA);
developeree39bcf2023-06-16 08:03:30 +080090+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
91+ gdm_config = MTK_GDMA_TO_PPE;
developer8cb3ac72022-07-04 10:55:14 +080092+
developerdca0fde2022-12-14 11:40:35 +080093+ mtk_gdm_config(eth, mac->id, gdm_config);
developer8cb3ac72022-07-04 10:55:14 +080094
developerdca0fde2022-12-14 11:40:35 +080095 return 0;
96 }
developeree39bcf2023-06-16 08:03:30 +080097@@ -3238,6 +3242,9 @@ static int mtk_stop(struct net_device *d
developer8cb3ac72022-07-04 10:55:14 +080098
99 mtk_dma_free(eth);
100
developeree39bcf2023-06-16 08:03:30 +0800101+ if (eth->soc->offload_version)
102+ mtk_ppe_stop(&eth->ppe);
developer8cb3ac72022-07-04 10:55:14 +0800103+
104 return 0;
105 }
106
developeree39bcf2023-06-16 08:03:30 +0800107@@ -3915,6 +3922,7 @@ static const struct net_device_ops mtk_n
developer8cb3ac72022-07-04 10:55:14 +0800108 #ifdef CONFIG_NET_POLL_CONTROLLER
109 .ndo_poll_controller = mtk_poll_controller,
110 #endif
111+ .ndo_setup_tc = mtk_eth_setup_tc,
112 };
113
114 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
developeree39bcf2023-06-16 08:03:30 +0800115@@ -4308,6 +4316,17 @@ static int mtk_probe(struct platform_dev
developer8cb3ac72022-07-04 10:55:14 +0800116 goto err_free_dev;
117 }
118
119+ if (eth->soc->offload_version) {
developeree39bcf2023-06-16 08:03:30 +0800120+ err = mtk_ppe_init(&eth->ppe, eth->dev,
121+ eth->base + MTK_ETH_PPE_BASE, 2);
122+ if (err)
123+ goto err_free_dev;
developer8cb3ac72022-07-04 10:55:14 +0800124+
125+ err = mtk_eth_offload_init(eth);
126+ if (err)
127+ goto err_free_dev;
128+ }
129+
130 for (i = 0; i < MTK_MAX_DEVS; i++) {
131 if (!eth->netdev[i])
132 continue;
developeree39bcf2023-06-16 08:03:30 +0800133@@ -4410,6 +4429,7 @@ static const struct mtk_soc_data mt2701_
developer8cb3ac72022-07-04 10:55:14 +0800134 .required_clks = MT7623_CLKS_BITMAP,
135 .required_pctl = true,
136 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800137+ .offload_version = 2,
138 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800139 .txrx = {
140 .txd_size = sizeof(struct mtk_tx_dma),
developeree39bcf2023-06-16 08:03:30 +0800141@@ -4424,6 +4444,7 @@ static const struct mtk_soc_data mt7621_
developer8cb3ac72022-07-04 10:55:14 +0800142 .required_clks = MT7621_CLKS_BITMAP,
143 .required_pctl = false,
144 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800145+ .offload_version = 2,
146 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800147 .txrx = {
148 .txd_size = sizeof(struct mtk_tx_dma),
developeree39bcf2023-06-16 08:03:30 +0800149@@ -4439,6 +4460,7 @@ static const struct mtk_soc_data mt7622_
developer8cb3ac72022-07-04 10:55:14 +0800150 .required_clks = MT7622_CLKS_BITMAP,
151 .required_pctl = false,
152 .has_sram = false,
153+ .offload_version = 2,
developeree39bcf2023-06-16 08:03:30 +0800154 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800155 .txrx = {
156 .txd_size = sizeof(struct mtk_tx_dma),
developeree39bcf2023-06-16 08:03:30 +0800157@@ -4453,6 +4475,7 @@ static const struct mtk_soc_data mt7623_
developer8cb3ac72022-07-04 10:55:14 +0800158 .required_clks = MT7623_CLKS_BITMAP,
159 .required_pctl = true,
160 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800161+ .offload_version = 2,
developeree39bcf2023-06-16 08:03:30 +0800162 .rss_num = 0,
developer7eb15dc2023-06-14 17:44:03 +0800163 .txrx = {
164 .txd_size = sizeof(struct mtk_tx_dma),
developer8cb3ac72022-07-04 10:55:14 +0800165diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
developeree39bcf2023-06-16 08:03:30 +0800166index b6380ffeb..349f98503 100755
developer8cb3ac72022-07-04 10:55:14 +0800167--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
168+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
169@@ -15,6 +15,8 @@
170 #include <linux/u64_stats_sync.h>
171 #include <linux/refcount.h>
172 #include <linux/phylink.h>
173+#include <linux/rhashtable.h>
174+#include "mtk_ppe.h"
175
176 #define MTK_QDMA_PAGE_SIZE 2048
177 #define MTK_MAX_RX_LENGTH 1536
developeree39bcf2023-06-16 08:03:30 +0800178@@ -37,7 +39,8 @@
developer8cb3ac72022-07-04 10:55:14 +0800179 NETIF_F_HW_VLAN_CTAG_TX | \
180 NETIF_F_SG | NETIF_F_TSO | \
181 NETIF_F_TSO6 | \
182- NETIF_F_IPV6_CSUM)
183+ NETIF_F_IPV6_CSUM |\
184+ NETIF_F_HW_TC)
185 #define MTK_SET_FEATURES (NETIF_F_LRO | \
186 NETIF_F_HW_VLAN_CTAG_RX)
187 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
developeree39bcf2023-06-16 08:03:30 +0800188@@ -107,6 +110,7 @@
189 #define MTK_GDMA_TCS_EN BIT(21)
developer8cb3ac72022-07-04 10:55:14 +0800190 #define MTK_GDMA_UCS_EN BIT(20)
191 #define MTK_GDMA_TO_PDMA 0x0
192+#define MTK_GDMA_TO_PPE 0x4444
193 #define MTK_GDMA_DROP_ALL 0x7777
194
developeree39bcf2023-06-16 08:03:30 +0800195 /* Unicast Filter MAC Address Register - Low */
196@@ -547,6 +551,12 @@
developer8cb3ac72022-07-04 10:55:14 +0800197 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
198 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
199
200+/* QDMA descriptor rxd4 */
201+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
202+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
203+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
204+#define MTK_RXD4_ALG GENMASK(31, 22)
205+
206 /* QDMA descriptor rxd4 */
207 #define RX_DMA_L4_VALID BIT(24)
208 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
developeree39bcf2023-06-16 08:03:30 +0800209@@ -1158,6 +1168,7 @@ struct mtk_soc_data {
210 u32 caps;
211 u32 required_clks;
developer8cb3ac72022-07-04 10:55:14 +0800212 bool required_pctl;
213+ u8 offload_version;
214 netdev_features_t hw_features;
215 bool has_sram;
developeree39bcf2023-06-16 08:03:30 +0800216 };
217@@ -1271,6 +1282,9 @@ struct mtk_eth {
developer8cb3ac72022-07-04 10:55:14 +0800218 int ip_align;
219 spinlock_t syscfg0_lock;
220 struct timer_list mtk_dma_monitor_timer;
221+
developeree39bcf2023-06-16 08:03:30 +0800222+ struct mtk_ppe ppe;
developer8cb3ac72022-07-04 10:55:14 +0800223+ struct rhashtable flow_table;
224 };
225
226 /* struct mtk_mac - the structure that holds the info about the MACs of the
developeree39bcf2023-06-16 08:03:30 +0800227@@ -1319,4 +1333,7 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
228 void mtk_usxgmii_reset(struct mtk_xgmii *ss, int mac_id);
developer1fb19c92023-03-07 23:45:23 +0800229 int mtk_dump_usxgmii(struct regmap *pmap, char *name, u32 offset, u32 range);
developer8cb3ac72022-07-04 10:55:14 +0800230
231+int mtk_eth_offload_init(struct mtk_eth *eth);
232+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
233+ void *type_data);
developer1fb19c92023-03-07 23:45:23 +0800234 void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
developer8cb3ac72022-07-04 10:55:14 +0800235diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
236new file mode 100644
developeree39bcf2023-06-16 08:03:30 +0800237index 000000000..66298e223
developer8cb3ac72022-07-04 10:55:14 +0800238--- /dev/null
239+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
developercbbf1b02023-09-06 10:24:04 +0800240@@ -0,0 +1,510 @@
developer8cb3ac72022-07-04 10:55:14 +0800241+// SPDX-License-Identifier: GPL-2.0-only
242+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
243+
244+#include <linux/kernel.h>
245+#include <linux/io.h>
246+#include <linux/iopoll.h>
247+#include <linux/etherdevice.h>
248+#include <linux/platform_device.h>
249+#include "mtk_ppe.h"
250+#include "mtk_ppe_regs.h"
251+
252+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
253+{
254+ writel(val, ppe->base + reg);
255+}
256+
257+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
258+{
259+ return readl(ppe->base + reg);
260+}
261+
262+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
263+{
264+ u32 val;
265+
266+ val = ppe_r32(ppe, reg);
267+ val &= ~mask;
268+ val |= set;
269+ ppe_w32(ppe, reg, val);
270+
271+ return val;
272+}
273+
274+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
275+{
276+ return ppe_m32(ppe, reg, 0, val);
277+}
278+
279+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
280+{
281+ return ppe_m32(ppe, reg, val, 0);
282+}
283+
284+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
285+{
286+ int ret;
287+ u32 val;
288+
289+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
290+ !(val & MTK_PPE_GLO_CFG_BUSY),
291+ 20, MTK_PPE_WAIT_TIMEOUT_US);
292+
293+ if (ret)
294+ dev_err(ppe->dev, "PPE table busy");
295+
296+ return ret;
297+}
298+
299+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
300+{
301+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
302+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
303+}
304+
305+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
306+{
307+ mtk_ppe_cache_clear(ppe);
308+
309+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
310+ enable * MTK_PPE_CACHE_CTL_EN);
311+}
312+
developeree39bcf2023-06-16 08:03:30 +0800313+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
developer8cb3ac72022-07-04 10:55:14 +0800314+{
315+ u32 hv1, hv2, hv3;
316+ u32 hash;
317+
developeree39bcf2023-06-16 08:03:30 +0800318+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
319+ case MTK_PPE_PKT_TYPE_BRIDGE:
320+ hv1 = e->bridge.src_mac_lo;
321+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
322+ hv2 = e->bridge.src_mac_hi >> 16;
323+ hv2 ^= e->bridge.dest_mac_lo;
324+ hv3 = e->bridge.dest_mac_hi;
325+ break;
developer8cb3ac72022-07-04 10:55:14 +0800326+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
327+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
328+ hv1 = e->ipv4.orig.ports;
329+ hv2 = e->ipv4.orig.dest_ip;
330+ hv3 = e->ipv4.orig.src_ip;
331+ break;
332+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
333+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
334+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
335+ hv1 ^= e->ipv6.ports;
336+
337+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
338+ hv2 ^= e->ipv6.dest_ip[0];
339+
340+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
341+ hv3 ^= e->ipv6.src_ip[0];
342+ break;
343+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
344+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
345+ default:
346+ WARN_ON_ONCE(1);
347+ return MTK_PPE_HASH_MASK;
348+ }
349+
350+ hash = (hv1 & hv2) | ((~hv1) & hv3);
351+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
352+ hash ^= hv1 ^ hv2 ^ hv3;
353+ hash ^= hash >> 16;
developeree39bcf2023-06-16 08:03:30 +0800354+ hash <<= 1;
developer8cb3ac72022-07-04 10:55:14 +0800355+ hash &= MTK_PPE_ENTRIES - 1;
356+
357+ return hash;
358+}
359+
360+static inline struct mtk_foe_mac_info *
developeree39bcf2023-06-16 08:03:30 +0800361+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800362+{
developeree39bcf2023-06-16 08:03:30 +0800363+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800364+
365+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
366+ return &entry->ipv6.l2;
367+
368+ return &entry->ipv4.l2;
369+}
370+
371+static inline u32 *
developeree39bcf2023-06-16 08:03:30 +0800372+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800373+{
developeree39bcf2023-06-16 08:03:30 +0800374+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800375+
376+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
377+ return &entry->ipv6.ib2;
378+
379+ return &entry->ipv4.ib2;
380+}
381+
developeree39bcf2023-06-16 08:03:30 +0800382+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
383+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
developer8cb3ac72022-07-04 10:55:14 +0800384+{
385+ struct mtk_foe_mac_info *l2;
386+ u32 ports_pad, val;
387+
388+ memset(entry, 0, sizeof(*entry));
389+
developeree39bcf2023-06-16 08:03:30 +0800390+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
391+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
392+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
393+ MTK_FOE_IB1_BIND_TTL |
394+ MTK_FOE_IB1_BIND_CACHE;
395+ entry->ib1 = val;
developer8cb3ac72022-07-04 10:55:14 +0800396+
developeree39bcf2023-06-16 08:03:30 +0800397+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
398+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
399+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
developer8cb3ac72022-07-04 10:55:14 +0800400+
401+ if (is_multicast_ether_addr(dest_mac))
developeree39bcf2023-06-16 08:03:30 +0800402+ val |= MTK_FOE_IB2_MULTICAST;
developer8cb3ac72022-07-04 10:55:14 +0800403+
404+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
405+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
406+ entry->ipv4.orig.ports = ports_pad;
407+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
408+ entry->ipv6.ports = ports_pad;
409+
developeree39bcf2023-06-16 08:03:30 +0800410+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
developer8cb3ac72022-07-04 10:55:14 +0800411+ entry->ipv6.ib2 = val;
412+ l2 = &entry->ipv6.l2;
413+ } else {
414+ entry->ipv4.ib2 = val;
415+ l2 = &entry->ipv4.l2;
416+ }
417+
418+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
419+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
420+ l2->src_mac_hi = get_unaligned_be32(src_mac);
421+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
422+
423+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
424+ l2->etype = ETH_P_IPV6;
425+ else
426+ l2->etype = ETH_P_IP;
427+
428+ return 0;
429+}
430+
developeree39bcf2023-06-16 08:03:30 +0800431+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
developer8cb3ac72022-07-04 10:55:14 +0800432+{
developeree39bcf2023-06-16 08:03:30 +0800433+ u32 *ib2 = mtk_foe_entry_ib2(entry);
434+ u32 val;
developer8cb3ac72022-07-04 10:55:14 +0800435+
developeree39bcf2023-06-16 08:03:30 +0800436+ val = *ib2;
437+ val &= ~MTK_FOE_IB2_DEST_PORT;
438+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
developer8cb3ac72022-07-04 10:55:14 +0800439+ *ib2 = val;
440+
441+ return 0;
442+}
443+
developeree39bcf2023-06-16 08:03:30 +0800444+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
developer8cb3ac72022-07-04 10:55:14 +0800445+ __be32 src_addr, __be16 src_port,
446+ __be32 dest_addr, __be16 dest_port)
447+{
developeree39bcf2023-06-16 08:03:30 +0800448+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800449+ struct mtk_ipv4_tuple *t;
450+
451+ switch (type) {
452+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
453+ if (egress) {
454+ t = &entry->ipv4.new;
455+ break;
456+ }
457+ fallthrough;
458+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
459+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
460+ t = &entry->ipv4.orig;
461+ break;
462+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
463+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
464+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
465+ return 0;
466+ default:
467+ WARN_ON_ONCE(1);
468+ return -EINVAL;
469+ }
470+
471+ t->src_ip = be32_to_cpu(src_addr);
472+ t->dest_ip = be32_to_cpu(dest_addr);
473+
474+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
475+ return 0;
476+
477+ t->src_port = be16_to_cpu(src_port);
478+ t->dest_port = be16_to_cpu(dest_port);
479+
480+ return 0;
481+}
482+
developeree39bcf2023-06-16 08:03:30 +0800483+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +0800484+ __be32 *src_addr, __be16 src_port,
485+ __be32 *dest_addr, __be16 dest_port)
486+{
developeree39bcf2023-06-16 08:03:30 +0800487+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800488+ u32 *src, *dest;
489+ int i;
490+
491+ switch (type) {
492+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
493+ src = entry->dslite.tunnel_src_ip;
494+ dest = entry->dslite.tunnel_dest_ip;
495+ break;
496+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
497+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
498+ entry->ipv6.src_port = be16_to_cpu(src_port);
499+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
500+ fallthrough;
501+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
502+ src = entry->ipv6.src_ip;
503+ dest = entry->ipv6.dest_ip;
504+ break;
505+ default:
506+ WARN_ON_ONCE(1);
507+ return -EINVAL;
508+ }
509+
510+ for (i = 0; i < 4; i++)
511+ src[i] = be32_to_cpu(src_addr[i]);
512+ for (i = 0; i < 4; i++)
513+ dest[i] = be32_to_cpu(dest_addr[i]);
514+
515+ return 0;
516+}
517+
developeree39bcf2023-06-16 08:03:30 +0800518+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
developer8cb3ac72022-07-04 10:55:14 +0800519+{
developeree39bcf2023-06-16 08:03:30 +0800520+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800521+
522+ l2->etype = BIT(port);
523+
developeree39bcf2023-06-16 08:03:30 +0800524+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
525+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800526+ else
527+ l2->etype |= BIT(8);
528+
developeree39bcf2023-06-16 08:03:30 +0800529+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
developer8cb3ac72022-07-04 10:55:14 +0800530+
531+ return 0;
532+}
533+
developeree39bcf2023-06-16 08:03:30 +0800534+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
developer8cb3ac72022-07-04 10:55:14 +0800535+{
developeree39bcf2023-06-16 08:03:30 +0800536+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800537+
developeree39bcf2023-06-16 08:03:30 +0800538+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
developer8cb3ac72022-07-04 10:55:14 +0800539+ case 0:
developeree39bcf2023-06-16 08:03:30 +0800540+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
541+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800542+ l2->vlan1 = vid;
543+ return 0;
544+ case 1:
developeree39bcf2023-06-16 08:03:30 +0800545+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
developer8cb3ac72022-07-04 10:55:14 +0800546+ l2->vlan1 = vid;
547+ l2->etype |= BIT(8);
548+ } else {
549+ l2->vlan2 = vid;
developeree39bcf2023-06-16 08:03:30 +0800550+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800551+ }
552+ return 0;
553+ default:
554+ return -ENOSPC;
555+ }
556+}
557+
developeree39bcf2023-06-16 08:03:30 +0800558+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
developer8cb3ac72022-07-04 10:55:14 +0800559+{
developeree39bcf2023-06-16 08:03:30 +0800560+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800561+
developeree39bcf2023-06-16 08:03:30 +0800562+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
563+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
developer8cb3ac72022-07-04 10:55:14 +0800564+ l2->etype = ETH_P_PPP_SES;
565+
developeree39bcf2023-06-16 08:03:30 +0800566+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
developer8cb3ac72022-07-04 10:55:14 +0800567+ l2->pppoe_id = sid;
568+
569+ return 0;
570+}
571+
572+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
573+{
574+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
575+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
576+}
577+
developeree39bcf2023-06-16 08:03:30 +0800578+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
579+ u16 timestamp)
developer7eb15dc2023-06-14 17:44:03 +0800580+{
developer8cb3ac72022-07-04 10:55:14 +0800581+ struct mtk_foe_entry *hwe;
developeree39bcf2023-06-16 08:03:30 +0800582+ u32 hash;
developer7eb15dc2023-06-14 17:44:03 +0800583+
developeree39bcf2023-06-16 08:03:30 +0800584+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
585+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
586+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
developer7eb15dc2023-06-14 17:44:03 +0800587+
developeree39bcf2023-06-16 08:03:30 +0800588+ hash = mtk_ppe_hash_entry(entry);
589+ hwe = &ppe->foe_table[hash];
590+ if (!mtk_foe_entry_usable(hwe)) {
591+ hwe++;
592+ hash++;
developer7eb15dc2023-06-14 17:44:03 +0800593+
developeree39bcf2023-06-16 08:03:30 +0800594+ if (!mtk_foe_entry_usable(hwe))
595+ return -ENOSPC;
developer7eb15dc2023-06-14 17:44:03 +0800596+ }
597+
developeree39bcf2023-06-16 08:03:30 +0800598+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
developer8cb3ac72022-07-04 10:55:14 +0800599+ wmb();
600+ hwe->ib1 = entry->ib1;
601+
602+ dma_wmb();
603+
604+ mtk_ppe_cache_clear(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800605+
developeree39bcf2023-06-16 08:03:30 +0800606+ return hash;
developer7eb15dc2023-06-14 17:44:03 +0800607+}
608+
developeree39bcf2023-06-16 08:03:30 +0800609+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
610+ int version)
developer7eb15dc2023-06-14 17:44:03 +0800611+{
developeree39bcf2023-06-16 08:03:30 +0800612+ struct mtk_foe_entry *foe;
developer8cb3ac72022-07-04 10:55:14 +0800613+
614+ /* need to allocate a separate device, since it PPE DMA access is
615+ * not coherent.
616+ */
617+ ppe->base = base;
618+ ppe->dev = dev;
developeree39bcf2023-06-16 08:03:30 +0800619+ ppe->version = version;
developer8cb3ac72022-07-04 10:55:14 +0800620+
developeree39bcf2023-06-16 08:03:30 +0800621+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
developer8cb3ac72022-07-04 10:55:14 +0800622+ &ppe->foe_phys, GFP_KERNEL);
623+ if (!foe)
developeree39bcf2023-06-16 08:03:30 +0800624+ return -ENOMEM;
developer8cb3ac72022-07-04 10:55:14 +0800625+
626+ ppe->foe_table = foe;
627+
developeree39bcf2023-06-16 08:03:30 +0800628+ mtk_ppe_debugfs_init(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800629+
developeree39bcf2023-06-16 08:03:30 +0800630+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800631+}
632+
633+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
634+{
635+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
636+ int i, k;
637+
developeree39bcf2023-06-16 08:03:30 +0800638+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
developer8cb3ac72022-07-04 10:55:14 +0800639+
640+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
641+ return;
642+
643+ /* skip all entries that cross the 1024 byte boundary */
developeree39bcf2023-06-16 08:03:30 +0800644+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
645+ for (k = 0; k < ARRAY_SIZE(skip); k++)
646+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
developer8cb3ac72022-07-04 10:55:14 +0800647+}
648+
developeree39bcf2023-06-16 08:03:30 +0800649+int mtk_ppe_start(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +0800650+{
651+ u32 val;
652+
653+ mtk_ppe_init_foe_table(ppe);
654+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
655+
656+ val = MTK_PPE_TB_CFG_ENTRY_80B |
657+ MTK_PPE_TB_CFG_AGE_NON_L4 |
658+ MTK_PPE_TB_CFG_AGE_UNBIND |
659+ MTK_PPE_TB_CFG_AGE_TCP |
660+ MTK_PPE_TB_CFG_AGE_UDP |
661+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
662+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
663+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
664+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
665+ MTK_PPE_KEEPALIVE_DISABLE) |
666+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
667+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
668+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
669+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
670+ MTK_PPE_ENTRIES_SHIFT);
671+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
672+
673+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
674+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
675+
676+ mtk_ppe_cache_enable(ppe, true);
677+
developeree39bcf2023-06-16 08:03:30 +0800678+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
679+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
680+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
developer8cb3ac72022-07-04 10:55:14 +0800681+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
682+ MTK_PPE_FLOW_CFG_IP6_6RD |
683+ MTK_PPE_FLOW_CFG_IP4_NAT |
684+ MTK_PPE_FLOW_CFG_IP4_NAPT |
685+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
developeree39bcf2023-06-16 08:03:30 +0800686+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
developer8cb3ac72022-07-04 10:55:14 +0800687+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
688+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
689+
690+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
691+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
692+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
693+
developeree39bcf2023-06-16 08:03:30 +0800694+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 30) |
developer8cb3ac72022-07-04 10:55:14 +0800695+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
696+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
697+
698+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
developeree39bcf2023-06-16 08:03:30 +0800699+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 30);
developer8cb3ac72022-07-04 10:55:14 +0800700+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
701+
702+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
703+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
704+
705+ val = MTK_PPE_BIND_LIMIT1_FULL |
706+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
707+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
708+
709+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
710+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
711+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
712+
713+ /* enable PPE */
714+ val = MTK_PPE_GLO_CFG_EN |
715+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
716+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
developercbbf1b02023-09-06 10:24:04 +0800717+ MTK_PPE_GLO_CFG_MCAST_TB_EN |
developer8cb3ac72022-07-04 10:55:14 +0800718+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
719+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
720+
721+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
722+
developeree39bcf2023-06-16 08:03:30 +0800723+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800724+}
725+
726+int mtk_ppe_stop(struct mtk_ppe *ppe)
727+{
728+ u32 val;
729+ int i;
730+
developeree39bcf2023-06-16 08:03:30 +0800731+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
732+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
733+ MTK_FOE_STATE_INVALID);
developer8cb3ac72022-07-04 10:55:14 +0800734+
735+ mtk_ppe_cache_enable(ppe, false);
736+
737+ /* disable offload engine */
738+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
739+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
740+
741+ /* disable aging */
742+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
743+ MTK_PPE_TB_CFG_AGE_UNBIND |
744+ MTK_PPE_TB_CFG_AGE_TCP |
745+ MTK_PPE_TB_CFG_AGE_UDP |
746+ MTK_PPE_TB_CFG_AGE_TCP_FIN;
747+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
748+
749+ return mtk_ppe_wait_busy(ppe);
750+}
751diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
752new file mode 100644
developeree39bcf2023-06-16 08:03:30 +0800753index 000000000..242fb8f2a
developer8cb3ac72022-07-04 10:55:14 +0800754--- /dev/null
755+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
developeree39bcf2023-06-16 08:03:30 +0800756@@ -0,0 +1,288 @@
developer8cb3ac72022-07-04 10:55:14 +0800757+// SPDX-License-Identifier: GPL-2.0-only
758+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
759+
760+#ifndef __MTK_PPE_H
761+#define __MTK_PPE_H
762+
763+#include <linux/kernel.h>
764+#include <linux/bitfield.h>
developeree39bcf2023-06-16 08:03:30 +0800765+
766+#define MTK_ETH_PPE_BASE 0xc00
developer8cb3ac72022-07-04 10:55:14 +0800767+
768+#define MTK_PPE_ENTRIES_SHIFT 3
769+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
770+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
771+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
772+
773+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
774+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
775+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
776+
777+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
778+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
779+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
780+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
781+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
782+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
783+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
784+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
785+#define MTK_FOE_IB1_BIND_TTL BIT(24)
786+
787+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
788+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
789+#define MTK_FOE_IB1_UDP BIT(30)
790+#define MTK_FOE_IB1_STATIC BIT(31)
791+
792+enum {
793+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
794+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
795+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
796+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
797+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
798+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
799+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
800+};
801+
802+#define MTK_FOE_IB2_QID GENMASK(3, 0)
803+#define MTK_FOE_IB2_PSE_QOS BIT(4)
804+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
805+#define MTK_FOE_IB2_MULTICAST BIT(8)
806+
developeree39bcf2023-06-16 08:03:30 +0800807+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
808+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
809+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
developer8cb3ac72022-07-04 10:55:14 +0800810+
811+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
812+
813+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
814+
815+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
816+
developeree39bcf2023-06-16 08:03:30 +0800817+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
818+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
819+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
developer8cb3ac72022-07-04 10:55:14 +0800820+
821+enum {
822+ MTK_FOE_STATE_INVALID,
823+ MTK_FOE_STATE_UNBIND,
824+ MTK_FOE_STATE_BIND,
825+ MTK_FOE_STATE_FIN
826+};
827+
828+struct mtk_foe_mac_info {
829+ u16 vlan1;
830+ u16 etype;
831+
832+ u32 dest_mac_hi;
833+
834+ u16 vlan2;
835+ u16 dest_mac_lo;
836+
837+ u32 src_mac_hi;
838+
839+ u16 pppoe_id;
840+ u16 src_mac_lo;
841+};
842+
843+struct mtk_foe_bridge {
developeree39bcf2023-06-16 08:03:30 +0800844+ u32 dest_mac_hi;
845+
846+ u16 src_mac_lo;
847+ u16 dest_mac_lo;
developer8cb3ac72022-07-04 10:55:14 +0800848+
developeree39bcf2023-06-16 08:03:30 +0800849+ u32 src_mac_hi;
developer8cb3ac72022-07-04 10:55:14 +0800850+
851+ u32 ib2;
852+
developeree39bcf2023-06-16 08:03:30 +0800853+ u32 _rsv[5];
854+
855+ u32 udf_tsid;
developer8cb3ac72022-07-04 10:55:14 +0800856+ struct mtk_foe_mac_info l2;
857+};
858+
859+struct mtk_ipv4_tuple {
860+ u32 src_ip;
861+ u32 dest_ip;
862+ union {
863+ struct {
864+ u16 dest_port;
865+ u16 src_port;
866+ };
867+ struct {
868+ u8 protocol;
869+ u8 _pad[3]; /* fill with 0xa5a5a5 */
870+ };
871+ u32 ports;
872+ };
873+};
874+
875+struct mtk_foe_ipv4 {
876+ struct mtk_ipv4_tuple orig;
877+
878+ u32 ib2;
879+
880+ struct mtk_ipv4_tuple new;
881+
882+ u16 timestamp;
883+ u16 _rsv0[3];
884+
885+ u32 udf_tsid;
886+
887+ struct mtk_foe_mac_info l2;
888+};
889+
890+struct mtk_foe_ipv4_dslite {
891+ struct mtk_ipv4_tuple ip4;
892+
893+ u32 tunnel_src_ip[4];
894+ u32 tunnel_dest_ip[4];
895+
896+ u8 flow_label[3];
897+ u8 priority;
898+
899+ u32 udf_tsid;
900+
901+ u32 ib2;
902+
903+ struct mtk_foe_mac_info l2;
904+};
905+
906+struct mtk_foe_ipv6 {
907+ u32 src_ip[4];
908+ u32 dest_ip[4];
909+
910+ union {
911+ struct {
912+ u8 protocol;
913+ u8 _pad[3]; /* fill with 0xa5a5a5 */
914+ }; /* 3-tuple */
915+ struct {
916+ u16 dest_port;
917+ u16 src_port;
918+ }; /* 5-tuple */
919+ u32 ports;
920+ };
921+
922+ u32 _rsv[3];
923+
924+ u32 udf;
925+
926+ u32 ib2;
927+ struct mtk_foe_mac_info l2;
928+};
929+
930+struct mtk_foe_ipv6_6rd {
931+ u32 src_ip[4];
932+ u32 dest_ip[4];
933+ u16 dest_port;
934+ u16 src_port;
935+
936+ u32 tunnel_src_ip;
937+ u32 tunnel_dest_ip;
938+
939+ u16 hdr_csum;
940+ u8 dscp;
941+ u8 ttl;
942+
943+ u8 flag;
944+ u8 pad;
945+ u8 per_flow_6rd_id;
946+ u8 pad2;
947+
948+ u32 ib2;
949+ struct mtk_foe_mac_info l2;
950+};
951+
952+struct mtk_foe_entry {
953+ u32 ib1;
954+
955+ union {
956+ struct mtk_foe_bridge bridge;
957+ struct mtk_foe_ipv4 ipv4;
958+ struct mtk_foe_ipv4_dslite dslite;
959+ struct mtk_foe_ipv6 ipv6;
960+ struct mtk_foe_ipv6_6rd ipv6_6rd;
developeree39bcf2023-06-16 08:03:30 +0800961+ u32 data[19];
developer8cb3ac72022-07-04 10:55:14 +0800962+ };
963+};
964+
965+enum {
966+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
967+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
968+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
969+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
970+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
971+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
972+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
973+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
974+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
975+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
976+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
977+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
978+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
979+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
980+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
981+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
982+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
983+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
984+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
985+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
986+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
987+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
988+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
989+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
990+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
991+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
992+};
993+
994+struct mtk_ppe {
995+ struct device *dev;
996+ void __iomem *base;
997+ int version;
998+
developeree39bcf2023-06-16 08:03:30 +0800999+ struct mtk_foe_entry *foe_table;
developer8cb3ac72022-07-04 10:55:14 +08001000+ dma_addr_t foe_phys;
1001+
1002+ void *acct_table;
1003+};
1004+
developeree39bcf2023-06-16 08:03:30 +08001005+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
1006+ int version);
1007+int mtk_ppe_start(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001008+int mtk_ppe_stop(struct mtk_ppe *ppe);
1009+
1010+static inline void
developeree39bcf2023-06-16 08:03:30 +08001011+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
developer8cb3ac72022-07-04 10:55:14 +08001012+{
developeree39bcf2023-06-16 08:03:30 +08001013+ ppe->foe_table[hash].ib1 = 0;
1014+ dma_wmb();
1015+}
developer8cb3ac72022-07-04 10:55:14 +08001016+
developeree39bcf2023-06-16 08:03:30 +08001017+static inline int
1018+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
1019+{
1020+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
developer8cb3ac72022-07-04 10:55:14 +08001021+
developeree39bcf2023-06-16 08:03:30 +08001022+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
1023+ return -1;
developer7eb15dc2023-06-14 17:44:03 +08001024+
developeree39bcf2023-06-16 08:03:30 +08001025+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
developer8cb3ac72022-07-04 10:55:14 +08001026+}
1027+
developeree39bcf2023-06-16 08:03:30 +08001028+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
1029+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
1030+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
1031+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
developer8cb3ac72022-07-04 10:55:14 +08001032+ __be32 src_addr, __be16 src_port,
1033+ __be32 dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001034+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +08001035+ __be32 *src_addr, __be16 src_port,
1036+ __be32 *dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001037+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
1038+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
1039+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
1040+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1041+ u16 timestamp);
1042+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001043+
1044+#endif
1045diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1046new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08001047index 000000000..d4b482340
developer8cb3ac72022-07-04 10:55:14 +08001048--- /dev/null
1049+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
developeree39bcf2023-06-16 08:03:30 +08001050@@ -0,0 +1,214 @@
developer8cb3ac72022-07-04 10:55:14 +08001051+// SPDX-License-Identifier: GPL-2.0-only
1052+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1053+
1054+#include <linux/kernel.h>
1055+#include <linux/debugfs.h>
1056+#include "mtk_eth_soc.h"
1057+
1058+struct mtk_flow_addr_info
1059+{
1060+ void *src, *dest;
1061+ u16 *src_port, *dest_port;
1062+ bool ipv6;
1063+};
1064+
1065+static const char *mtk_foe_entry_state_str(int state)
1066+{
1067+ static const char * const state_str[] = {
1068+ [MTK_FOE_STATE_INVALID] = "INV",
1069+ [MTK_FOE_STATE_UNBIND] = "UNB",
1070+ [MTK_FOE_STATE_BIND] = "BND",
1071+ [MTK_FOE_STATE_FIN] = "FIN",
1072+ };
1073+
1074+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1075+ return "UNK";
1076+
1077+ return state_str[state];
1078+}
1079+
1080+static const char *mtk_foe_pkt_type_str(int type)
1081+{
1082+ static const char * const type_str[] = {
1083+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1084+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
developeree39bcf2023-06-16 08:03:30 +08001085+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
developer8cb3ac72022-07-04 10:55:14 +08001086+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
1087+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
1088+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
1089+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
1090+ };
1091+
1092+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
1093+ return "UNKNOWN";
1094+
1095+ return type_str[type];
1096+}
1097+
1098+static void
1099+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
1100+{
1101+ u32 n_addr[4];
1102+ int i;
1103+
1104+ if (!ipv6) {
1105+ seq_printf(m, "%pI4h", addr);
1106+ return;
1107+ }
1108+
1109+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
1110+ n_addr[i] = htonl(addr[i]);
1111+ seq_printf(m, "%pI6", n_addr);
1112+}
1113+
1114+static void
1115+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
1116+{
1117+ mtk_print_addr(m, ai->src, ai->ipv6);
1118+ if (ai->src_port)
1119+ seq_printf(m, ":%d", *ai->src_port);
1120+ seq_printf(m, "->");
1121+ mtk_print_addr(m, ai->dest, ai->ipv6);
1122+ if (ai->dest_port)
1123+ seq_printf(m, ":%d", *ai->dest_port);
1124+}
1125+
1126+static int
1127+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
1128+{
1129+ struct mtk_ppe *ppe = m->private;
1130+ int i;
1131+
1132+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
developeree39bcf2023-06-16 08:03:30 +08001133+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
developer8cb3ac72022-07-04 10:55:14 +08001134+ struct mtk_foe_mac_info *l2;
1135+ struct mtk_flow_addr_info ai = {};
1136+ unsigned char h_source[ETH_ALEN];
1137+ unsigned char h_dest[ETH_ALEN];
1138+ int type, state;
1139+ u32 ib2;
1140+
1141+
1142+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
1143+ if (!state)
1144+ continue;
1145+
1146+ if (bind && state != MTK_FOE_STATE_BIND)
1147+ continue;
1148+
1149+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
1150+ seq_printf(m, "%05x %s %7s", i,
1151+ mtk_foe_entry_state_str(state),
1152+ mtk_foe_pkt_type_str(type));
1153+
1154+ switch (type) {
1155+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1156+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1157+ ai.src_port = &entry->ipv4.orig.src_port;
1158+ ai.dest_port = &entry->ipv4.orig.dest_port;
1159+ fallthrough;
1160+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1161+ ai.src = &entry->ipv4.orig.src_ip;
1162+ ai.dest = &entry->ipv4.orig.dest_ip;
1163+ break;
1164+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
1165+ ai.src_port = &entry->ipv6.src_port;
1166+ ai.dest_port = &entry->ipv6.dest_port;
1167+ fallthrough;
1168+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
1169+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
1170+ ai.src = &entry->ipv6.src_ip;
1171+ ai.dest = &entry->ipv6.dest_ip;
1172+ ai.ipv6 = true;
1173+ break;
1174+ }
1175+
1176+ seq_printf(m, " orig=");
1177+ mtk_print_addr_info(m, &ai);
1178+
1179+ switch (type) {
1180+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1181+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1182+ ai.src_port = &entry->ipv4.new.src_port;
1183+ ai.dest_port = &entry->ipv4.new.dest_port;
1184+ fallthrough;
1185+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1186+ ai.src = &entry->ipv4.new.src_ip;
1187+ ai.dest = &entry->ipv4.new.dest_ip;
1188+ seq_printf(m, " new=");
1189+ mtk_print_addr_info(m, &ai);
1190+ break;
1191+ }
1192+
1193+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
1194+ l2 = &entry->ipv6.l2;
1195+ ib2 = entry->ipv6.ib2;
1196+ } else {
1197+ l2 = &entry->ipv4.l2;
1198+ ib2 = entry->ipv4.ib2;
1199+ }
1200+
1201+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
1202+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
1203+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
1204+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
1205+
1206+ seq_printf(m, " eth=%pM->%pM etype=%04x"
developeree39bcf2023-06-16 08:03:30 +08001207+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
developer8cb3ac72022-07-04 10:55:14 +08001208+ h_source, h_dest, ntohs(l2->etype),
developeree39bcf2023-06-16 08:03:30 +08001209+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
developer8cb3ac72022-07-04 10:55:14 +08001210+ }
1211+
1212+ return 0;
1213+}
1214+
1215+static int
1216+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
1217+{
1218+ return mtk_ppe_debugfs_foe_show(m, private, false);
1219+}
1220+
1221+static int
1222+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
1223+{
1224+ return mtk_ppe_debugfs_foe_show(m, private, true);
1225+}
1226+
1227+static int
1228+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
1229+{
1230+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
1231+ inode->i_private);
1232+}
1233+
1234+static int
1235+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
1236+{
1237+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
1238+ inode->i_private);
1239+}
1240+
developeree39bcf2023-06-16 08:03:30 +08001241+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +08001242+{
1243+ static const struct file_operations fops_all = {
1244+ .open = mtk_ppe_debugfs_foe_open_all,
1245+ .read = seq_read,
1246+ .llseek = seq_lseek,
1247+ .release = single_release,
1248+ };
developeree39bcf2023-06-16 08:03:30 +08001249+
developer8cb3ac72022-07-04 10:55:14 +08001250+ static const struct file_operations fops_bind = {
1251+ .open = mtk_ppe_debugfs_foe_open_bind,
1252+ .read = seq_read,
1253+ .llseek = seq_lseek,
1254+ .release = single_release,
1255+ };
developer7eb15dc2023-06-14 17:44:03 +08001256+
developeree39bcf2023-06-16 08:03:30 +08001257+ struct dentry *root;
developer7eb15dc2023-06-14 17:44:03 +08001258+
developeree39bcf2023-06-16 08:03:30 +08001259+ root = debugfs_create_dir("mtk_ppe", NULL);
developer8cb3ac72022-07-04 10:55:14 +08001260+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
1261+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
1262+
1263+ return 0;
1264+}
1265diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1266new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08001267index 000000000..4294f0c74
developer8cb3ac72022-07-04 10:55:14 +08001268--- /dev/null
1269+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
developeree39bcf2023-06-16 08:03:30 +08001270@@ -0,0 +1,535 @@
developer8cb3ac72022-07-04 10:55:14 +08001271+// SPDX-License-Identifier: GPL-2.0-only
1272+/*
1273+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
1274+ */
1275+
1276+#include <linux/if_ether.h>
1277+#include <linux/rhashtable.h>
1278+#include <linux/ip.h>
1279+#include <linux/ipv6.h>
1280+#include <net/flow_offload.h>
1281+#include <net/pkt_cls.h>
1282+#include <net/dsa.h>
1283+#include "mtk_eth_soc.h"
1284+
1285+struct mtk_flow_data {
1286+ struct ethhdr eth;
1287+
1288+ union {
1289+ struct {
1290+ __be32 src_addr;
1291+ __be32 dst_addr;
1292+ } v4;
1293+
1294+ struct {
1295+ struct in6_addr src_addr;
1296+ struct in6_addr dst_addr;
1297+ } v6;
1298+ };
1299+
1300+ __be16 src_port;
1301+ __be16 dst_port;
1302+
1303+ struct {
1304+ u16 id;
1305+ __be16 proto;
1306+ u8 num;
1307+ } vlan;
1308+ struct {
1309+ u16 sid;
1310+ u8 num;
1311+ } pppoe;
1312+};
1313+
developeree39bcf2023-06-16 08:03:30 +08001314+struct mtk_flow_entry {
1315+ struct rhash_head node;
1316+ unsigned long cookie;
1317+ u16 hash;
1318+};
1319+
developer8cb3ac72022-07-04 10:55:14 +08001320+static const struct rhashtable_params mtk_flow_ht_params = {
1321+ .head_offset = offsetof(struct mtk_flow_entry, node),
1322+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
1323+ .key_len = sizeof(unsigned long),
1324+ .automatic_shrinking = true,
1325+};
1326+
developeree39bcf2023-06-16 08:03:30 +08001327+static u32
1328+mtk_eth_timestamp(struct mtk_eth *eth)
1329+{
1330+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
1331+}
1332+
developer8cb3ac72022-07-04 10:55:14 +08001333+static int
developeree39bcf2023-06-16 08:03:30 +08001334+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
1335+ bool egress)
developer8cb3ac72022-07-04 10:55:14 +08001336+{
developeree39bcf2023-06-16 08:03:30 +08001337+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
developer8cb3ac72022-07-04 10:55:14 +08001338+ data->v4.src_addr, data->src_port,
1339+ data->v4.dst_addr, data->dst_port);
1340+}
1341+
1342+static int
developeree39bcf2023-06-16 08:03:30 +08001343+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
developer8cb3ac72022-07-04 10:55:14 +08001344+{
developeree39bcf2023-06-16 08:03:30 +08001345+ return mtk_foe_entry_set_ipv6_tuple(foe,
developer8cb3ac72022-07-04 10:55:14 +08001346+ data->v6.src_addr.s6_addr32, data->src_port,
1347+ data->v6.dst_addr.s6_addr32, data->dst_port);
1348+}
1349+
1350+static void
1351+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
1352+{
1353+ void *dest = eth + act->mangle.offset;
1354+ const void *src = &act->mangle.val;
1355+
1356+ if (act->mangle.offset > 8)
1357+ return;
1358+
1359+ if (act->mangle.mask == 0xffff) {
1360+ src += 2;
1361+ dest += 2;
1362+ }
1363+
1364+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
1365+}
1366+
developeree39bcf2023-06-16 08:03:30 +08001367+
developer8cb3ac72022-07-04 10:55:14 +08001368+static int
1369+mtk_flow_mangle_ports(const struct flow_action_entry *act,
1370+ struct mtk_flow_data *data)
1371+{
1372+ u32 val = ntohl(act->mangle.val);
1373+
1374+ switch (act->mangle.offset) {
1375+ case 0:
1376+ if (act->mangle.mask == ~htonl(0xffff))
1377+ data->dst_port = cpu_to_be16(val);
1378+ else
1379+ data->src_port = cpu_to_be16(val >> 16);
1380+ break;
1381+ case 2:
1382+ data->dst_port = cpu_to_be16(val);
1383+ break;
1384+ default:
1385+ return -EINVAL;
1386+ }
1387+
1388+ return 0;
1389+}
1390+
1391+static int
1392+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
1393+ struct mtk_flow_data *data)
1394+{
1395+ __be32 *dest;
1396+
1397+ switch (act->mangle.offset) {
1398+ case offsetof(struct iphdr, saddr):
1399+ dest = &data->v4.src_addr;
1400+ break;
1401+ case offsetof(struct iphdr, daddr):
1402+ dest = &data->v4.dst_addr;
1403+ break;
1404+ default:
1405+ return -EINVAL;
1406+ }
1407+
1408+ memcpy(dest, &act->mangle.val, sizeof(u32));
1409+
1410+ return 0;
1411+}
1412+
1413+static int
1414+mtk_flow_get_dsa_port(struct net_device **dev)
1415+{
1416+#if IS_ENABLED(CONFIG_NET_DSA)
1417+ struct dsa_port *dp;
1418+
1419+ dp = dsa_port_from_netdev(*dev);
1420+ if (IS_ERR(dp))
1421+ return -ENODEV;
1422+
1423+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
1424+ return -ENODEV;
1425+
1426+ *dev = dp->cpu_dp->master;
1427+
1428+ return dp->index;
1429+#else
1430+ return -ENODEV;
1431+#endif
1432+}
1433+
1434+static int
1435+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
developeree39bcf2023-06-16 08:03:30 +08001436+ struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08001437+{
developeree39bcf2023-06-16 08:03:30 +08001438+ int pse_port, dsa_port;
developer8cb3ac72022-07-04 10:55:14 +08001439+
1440+ dsa_port = mtk_flow_get_dsa_port(&dev);
developeree39bcf2023-06-16 08:03:30 +08001441+ if (dsa_port >= 0)
1442+ mtk_foe_entry_set_dsa(foe, dsa_port);
developer8cb3ac72022-07-04 10:55:14 +08001443+
1444+ if (dev == eth->netdev[0])
developeree39bcf2023-06-16 08:03:30 +08001445+ pse_port = PSE_GDM1_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001446+ else if (dev == eth->netdev[1])
developeree39bcf2023-06-16 08:03:30 +08001447+ pse_port = PSE_GDM2_PORT;
1448+ else
1449+ return -EOPNOTSUPP;
developer7eb15dc2023-06-14 17:44:03 +08001450+
developeree39bcf2023-06-16 08:03:30 +08001451+ mtk_foe_entry_set_pse_port(foe, pse_port);
developer8cb3ac72022-07-04 10:55:14 +08001452+
1453+ return 0;
1454+}
1455+
1456+static int
developeree39bcf2023-06-16 08:03:30 +08001457+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
developer8cb3ac72022-07-04 10:55:14 +08001458+{
1459+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1460+ struct flow_action_entry *act;
1461+ struct mtk_flow_data data = {};
1462+ struct mtk_foe_entry foe;
1463+ struct net_device *odev = NULL;
1464+ struct mtk_flow_entry *entry;
1465+ int offload_type = 0;
1466+ u16 addr_type = 0;
developeree39bcf2023-06-16 08:03:30 +08001467+ u32 timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001468+ u8 l4proto = 0;
1469+ int err = 0;
developeree39bcf2023-06-16 08:03:30 +08001470+ int hash;
developer8cb3ac72022-07-04 10:55:14 +08001471+ int i;
1472+
1473+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
1474+ return -EEXIST;
1475+
1476+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1477+ struct flow_match_meta match;
1478+
1479+ flow_rule_match_meta(rule, &match);
1480+ } else {
1481+ return -EOPNOTSUPP;
1482+ }
1483+
1484+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1485+ struct flow_match_control match;
1486+
1487+ flow_rule_match_control(rule, &match);
1488+ addr_type = match.key->addr_type;
1489+ } else {
1490+ return -EOPNOTSUPP;
1491+ }
1492+
1493+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1494+ struct flow_match_basic match;
1495+
1496+ flow_rule_match_basic(rule, &match);
1497+ l4proto = match.key->ip_proto;
1498+ } else {
1499+ return -EOPNOTSUPP;
1500+ }
1501+
1502+ flow_action_for_each(i, act, &rule->action) {
1503+ switch (act->id) {
1504+ case FLOW_ACTION_MANGLE:
1505+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
1506+ mtk_flow_offload_mangle_eth(act, &data.eth);
1507+ break;
1508+ case FLOW_ACTION_REDIRECT:
1509+ odev = act->dev;
1510+ break;
1511+ case FLOW_ACTION_CSUM:
1512+ break;
1513+ case FLOW_ACTION_VLAN_PUSH:
1514+ if (data.vlan.num == 1 ||
1515+ act->vlan.proto != htons(ETH_P_8021Q))
1516+ return -EOPNOTSUPP;
1517+
1518+ data.vlan.id = act->vlan.vid;
1519+ data.vlan.proto = act->vlan.proto;
1520+ data.vlan.num++;
1521+ break;
1522+ case FLOW_ACTION_VLAN_POP:
1523+ break;
1524+ case FLOW_ACTION_PPPOE_PUSH:
1525+ if (data.pppoe.num == 1)
1526+ return -EOPNOTSUPP;
1527+
1528+ data.pppoe.sid = act->pppoe.sid;
1529+ data.pppoe.num++;
1530+ break;
1531+ default:
1532+ return -EOPNOTSUPP;
1533+ }
1534+ }
1535+
developeree39bcf2023-06-16 08:03:30 +08001536+ switch (addr_type) {
1537+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1538+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
1539+ break;
1540+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1541+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
1542+ break;
1543+ default:
1544+ return -EOPNOTSUPP;
1545+ }
1546+
developer8cb3ac72022-07-04 10:55:14 +08001547+ if (!is_valid_ether_addr(data.eth.h_source) ||
1548+ !is_valid_ether_addr(data.eth.h_dest))
1549+ return -EINVAL;
1550+
developeree39bcf2023-06-16 08:03:30 +08001551+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
1552+ data.eth.h_source,
1553+ data.eth.h_dest);
developer8cb3ac72022-07-04 10:55:14 +08001554+ if (err)
1555+ return err;
1556+
1557+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1558+ struct flow_match_ports ports;
1559+
1560+ flow_rule_match_ports(rule, &ports);
1561+ data.src_port = ports.key->src;
1562+ data.dst_port = ports.key->dst;
developeree39bcf2023-06-16 08:03:30 +08001563+ } else {
developer8cb3ac72022-07-04 10:55:14 +08001564+ return -EOPNOTSUPP;
1565+ }
1566+
1567+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1568+ struct flow_match_ipv4_addrs addrs;
1569+
1570+ flow_rule_match_ipv4_addrs(rule, &addrs);
1571+
1572+ data.v4.src_addr = addrs.key->src;
1573+ data.v4.dst_addr = addrs.key->dst;
1574+
developeree39bcf2023-06-16 08:03:30 +08001575+ mtk_flow_set_ipv4_addr(&foe, &data, false);
developer8cb3ac72022-07-04 10:55:14 +08001576+ }
1577+
1578+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1579+ struct flow_match_ipv6_addrs addrs;
1580+
1581+ flow_rule_match_ipv6_addrs(rule, &addrs);
1582+
1583+ data.v6.src_addr = addrs.key->src;
1584+ data.v6.dst_addr = addrs.key->dst;
1585+
developeree39bcf2023-06-16 08:03:30 +08001586+ mtk_flow_set_ipv6_addr(&foe, &data);
developer8cb3ac72022-07-04 10:55:14 +08001587+ }
1588+
1589+ flow_action_for_each(i, act, &rule->action) {
1590+ if (act->id != FLOW_ACTION_MANGLE)
1591+ continue;
1592+
1593+ switch (act->mangle.htype) {
1594+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1595+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1596+ err = mtk_flow_mangle_ports(act, &data);
1597+ break;
1598+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1599+ err = mtk_flow_mangle_ipv4(act, &data);
1600+ break;
1601+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1602+ /* handled earlier */
1603+ break;
1604+ default:
1605+ return -EOPNOTSUPP;
1606+ }
1607+
1608+ if (err)
1609+ return err;
1610+ }
1611+
1612+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
developeree39bcf2023-06-16 08:03:30 +08001613+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
developer8cb3ac72022-07-04 10:55:14 +08001614+ if (err)
1615+ return err;
1616+ }
1617+
1618+ if (data.vlan.num == 1) {
1619+ if (data.vlan.proto != htons(ETH_P_8021Q))
1620+ return -EOPNOTSUPP;
1621+
developeree39bcf2023-06-16 08:03:30 +08001622+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
developer8cb3ac72022-07-04 10:55:14 +08001623+ }
1624+ if (data.pppoe.num == 1)
developeree39bcf2023-06-16 08:03:30 +08001625+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
developer8cb3ac72022-07-04 10:55:14 +08001626+
developeree39bcf2023-06-16 08:03:30 +08001627+ err = mtk_flow_set_output_device(eth, &foe, odev);
developer8cb3ac72022-07-04 10:55:14 +08001628+ if (err)
1629+ return err;
1630+
1631+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1632+ if (!entry)
1633+ return -ENOMEM;
1634+
1635+ entry->cookie = f->cookie;
developeree39bcf2023-06-16 08:03:30 +08001636+ timestamp = mtk_eth_timestamp(eth);
1637+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
1638+ if (hash < 0) {
1639+ err = hash;
developer8cb3ac72022-07-04 10:55:14 +08001640+ goto free;
developeree39bcf2023-06-16 08:03:30 +08001641+ }
developer8cb3ac72022-07-04 10:55:14 +08001642+
developeree39bcf2023-06-16 08:03:30 +08001643+ entry->hash = hash;
developer8cb3ac72022-07-04 10:55:14 +08001644+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
1645+ mtk_flow_ht_params);
1646+ if (err < 0)
developeree39bcf2023-06-16 08:03:30 +08001647+ goto clear_flow;
developer8cb3ac72022-07-04 10:55:14 +08001648+
1649+ return 0;
developeree39bcf2023-06-16 08:03:30 +08001650+clear_flow:
1651+ mtk_foe_entry_clear(&eth->ppe, hash);
developer8cb3ac72022-07-04 10:55:14 +08001652+free:
1653+ kfree(entry);
1654+ return err;
1655+}
1656+
1657+static int
1658+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
1659+{
1660+ struct mtk_flow_entry *entry;
1661+
1662+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1663+ mtk_flow_ht_params);
1664+ if (!entry)
1665+ return -ENOENT;
1666+
developeree39bcf2023-06-16 08:03:30 +08001667+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
developer8cb3ac72022-07-04 10:55:14 +08001668+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
1669+ mtk_flow_ht_params);
1670+ kfree(entry);
1671+
1672+ return 0;
1673+}
1674+
1675+static int
1676+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
1677+{
1678+ struct mtk_flow_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08001679+ int timestamp;
1680+ u32 idle;
developer8cb3ac72022-07-04 10:55:14 +08001681+
1682+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1683+ mtk_flow_ht_params);
1684+ if (!entry)
1685+ return -ENOENT;
1686+
developeree39bcf2023-06-16 08:03:30 +08001687+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
1688+ if (timestamp < 0)
1689+ return -ETIMEDOUT;
1690+
1691+ idle = mtk_eth_timestamp(eth) - timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001692+ f->stats.lastused = jiffies - idle * HZ;
1693+
1694+ return 0;
1695+}
1696+
1697+static DEFINE_MUTEX(mtk_flow_offload_mutex);
1698+
developeree39bcf2023-06-16 08:03:30 +08001699+static int
1700+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
developer8cb3ac72022-07-04 10:55:14 +08001701+{
developeree39bcf2023-06-16 08:03:30 +08001702+ struct flow_cls_offload *cls = type_data;
1703+ struct net_device *dev = cb_priv;
1704+ struct mtk_mac *mac = netdev_priv(dev);
1705+ struct mtk_eth *eth = mac->hw;
developer8cb3ac72022-07-04 10:55:14 +08001706+ int err;
1707+
developeree39bcf2023-06-16 08:03:30 +08001708+ if (!tc_can_offload(dev))
1709+ return -EOPNOTSUPP;
1710+
1711+ if (type != TC_SETUP_CLSFLOWER)
1712+ return -EOPNOTSUPP;
1713+
developer8cb3ac72022-07-04 10:55:14 +08001714+ mutex_lock(&mtk_flow_offload_mutex);
1715+ switch (cls->command) {
1716+ case FLOW_CLS_REPLACE:
developeree39bcf2023-06-16 08:03:30 +08001717+ err = mtk_flow_offload_replace(eth, cls);
developer8cb3ac72022-07-04 10:55:14 +08001718+ break;
1719+ case FLOW_CLS_DESTROY:
1720+ err = mtk_flow_offload_destroy(eth, cls);
1721+ break;
1722+ case FLOW_CLS_STATS:
1723+ err = mtk_flow_offload_stats(eth, cls);
1724+ break;
1725+ default:
1726+ err = -EOPNOTSUPP;
1727+ break;
1728+ }
1729+ mutex_unlock(&mtk_flow_offload_mutex);
1730+
1731+ return err;
1732+}
1733+
1734+static int
1735+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
1736+{
1737+ struct mtk_mac *mac = netdev_priv(dev);
1738+ struct mtk_eth *eth = mac->hw;
1739+ static LIST_HEAD(block_cb_list);
1740+ struct flow_block_cb *block_cb;
1741+ flow_setup_cb_t *cb;
developeree39bcf2023-06-16 08:03:30 +08001742+ int err = 0;
developer207b39d2022-10-07 15:57:16 +08001743+
developeree39bcf2023-06-16 08:03:30 +08001744+ if (!eth->ppe.foe_table)
developer8cb3ac72022-07-04 10:55:14 +08001745+ return -EOPNOTSUPP;
1746+
1747+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1748+ return -EOPNOTSUPP;
1749+
1750+ cb = mtk_eth_setup_tc_block_cb;
1751+ f->driver_block_list = &block_cb_list;
1752+
1753+ switch (f->command) {
1754+ case FLOW_BLOCK_BIND:
1755+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1756+ if (block_cb) {
1757+ flow_block_cb_incref(block_cb);
developeree39bcf2023-06-16 08:03:30 +08001758+ goto unlock;
developer8cb3ac72022-07-04 10:55:14 +08001759+ }
1760+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
developeree39bcf2023-06-16 08:03:30 +08001761+ if (IS_ERR(block_cb)) {
1762+ err = PTR_ERR(block_cb);
1763+ goto unlock;
1764+ }
developer8cb3ac72022-07-04 10:55:14 +08001765+
1766+ flow_block_cb_add(block_cb, f);
1767+ list_add_tail(&block_cb->driver_list, &block_cb_list);
developeree39bcf2023-06-16 08:03:30 +08001768+ break;
developer8cb3ac72022-07-04 10:55:14 +08001769+ case FLOW_BLOCK_UNBIND:
1770+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
developeree39bcf2023-06-16 08:03:30 +08001771+ if (!block_cb) {
1772+ err = -ENOENT;
1773+ goto unlock;
1774+ }
developer8cb3ac72022-07-04 10:55:14 +08001775+
developeree39bcf2023-06-16 08:03:30 +08001776+ if (flow_block_cb_decref(block_cb)) {
developer8cb3ac72022-07-04 10:55:14 +08001777+ flow_block_cb_remove(block_cb, f);
1778+ list_del(&block_cb->driver_list);
1779+ }
developeree39bcf2023-06-16 08:03:30 +08001780+ break;
developer8cb3ac72022-07-04 10:55:14 +08001781+ default:
developeree39bcf2023-06-16 08:03:30 +08001782+ err = -EOPNOTSUPP;
1783+ break;
developer8cb3ac72022-07-04 10:55:14 +08001784+ }
developeree39bcf2023-06-16 08:03:30 +08001785+
1786+unlock:
1787+ return err;
developer8cb3ac72022-07-04 10:55:14 +08001788+}
1789+
1790+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
1791+ void *type_data)
1792+{
developeree39bcf2023-06-16 08:03:30 +08001793+ if (type == TC_SETUP_FT)
developer8cb3ac72022-07-04 10:55:14 +08001794+ return mtk_eth_setup_tc_block(dev, type_data);
developeree39bcf2023-06-16 08:03:30 +08001795+
1796+ return -EOPNOTSUPP;
developer8cb3ac72022-07-04 10:55:14 +08001797+}
1798+
1799+int mtk_eth_offload_init(struct mtk_eth *eth)
1800+{
developeree39bcf2023-06-16 08:03:30 +08001801+ if (!eth->ppe.foe_table)
1802+ return 0;
1803+
developer8cb3ac72022-07-04 10:55:14 +08001804+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
1805+}
1806diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1807new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08001808index 000000000..0c45ea090
developer8cb3ac72022-07-04 10:55:14 +08001809--- /dev/null
1810+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
developeree39bcf2023-06-16 08:03:30 +08001811@@ -0,0 +1,144 @@
developer8cb3ac72022-07-04 10:55:14 +08001812+// SPDX-License-Identifier: GPL-2.0-only
1813+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1814+
1815+#ifndef __MTK_PPE_REGS_H
1816+#define __MTK_PPE_REGS_H
1817+
1818+#define MTK_PPE_GLO_CFG 0x200
1819+#define MTK_PPE_GLO_CFG_EN BIT(0)
1820+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
1821+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
1822+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
1823+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
1824+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
1825+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
1826+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
1827+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
1828+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
1829+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
1830+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
1831+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
1832+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
1833+
1834+#define MTK_PPE_FLOW_CFG 0x204
1835+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
1836+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
1837+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
1838+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
1839+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
1840+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
1841+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
1842+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
1843+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
1844+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
1845+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
1846+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
1847+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
1848+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
1849+
1850+#define MTK_PPE_IP_PROTO_CHK 0x208
1851+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
1852+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
1853+
1854+#define MTK_PPE_TB_CFG 0x21c
1855+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
1856+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
1857+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
1858+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
1859+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
1860+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
1861+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
1862+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
1863+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
1864+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
1865+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
1866+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
1867+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
1868+
1869+enum {
1870+ MTK_PPE_SCAN_MODE_DISABLED,
1871+ MTK_PPE_SCAN_MODE_CHECK_AGE,
1872+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
1873+};
1874+
1875+enum {
1876+ MTK_PPE_KEEPALIVE_DISABLE,
1877+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
1878+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
1879+};
1880+
1881+enum {
1882+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
1883+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
1884+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
1885+};
1886+
1887+#define MTK_PPE_TB_BASE 0x220
1888+
1889+#define MTK_PPE_TB_USED 0x224
1890+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
1891+
1892+#define MTK_PPE_BIND_RATE 0x228
1893+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
1894+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
1895+
1896+#define MTK_PPE_BIND_LIMIT0 0x22c
1897+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
1898+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
1899+
1900+#define MTK_PPE_BIND_LIMIT1 0x230
1901+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
1902+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
1903+
1904+#define MTK_PPE_KEEPALIVE 0x234
1905+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
1906+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
1907+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
1908+
1909+#define MTK_PPE_UNBIND_AGE 0x238
1910+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
1911+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
1912+
1913+#define MTK_PPE_BIND_AGE0 0x23c
1914+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
1915+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
1916+
1917+#define MTK_PPE_BIND_AGE1 0x240
1918+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
1919+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
1920+
1921+#define MTK_PPE_HASH_SEED 0x244
1922+
1923+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
1924+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
1925+
1926+#define MTK_PPE_MTU_DROP 0x308
1927+
1928+#define MTK_PPE_VLAN_MTU0 0x30c
1929+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
1930+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
1931+
1932+#define MTK_PPE_VLAN_MTU1 0x310
1933+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
1934+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
1935+
1936+#define MTK_PPE_VPM_TPID 0x318
1937+
1938+#define MTK_PPE_CACHE_CTL 0x320
1939+#define MTK_PPE_CACHE_CTL_EN BIT(0)
1940+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
1941+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
1942+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
1943+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
1944+
1945+#define MTK_PPE_MIB_CFG 0x334
1946+#define MTK_PPE_MIB_CFG_EN BIT(0)
1947+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
1948+
1949+#define MTK_PPE_MIB_TB_BASE 0x338
1950+
1951+#define MTK_PPE_MIB_CACHE_CTL 0x350
1952+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
1953+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
1954+
1955+#endif
1956diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
developeree39bcf2023-06-16 08:03:30 +08001957index a085213dc..813e30495 100644
developer8cb3ac72022-07-04 10:55:14 +08001958--- a/drivers/net/ppp/ppp_generic.c
1959+++ b/drivers/net/ppp/ppp_generic.c
1960@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
1961 ppp_destroy_interface(ppp);
1962 }
1963
1964+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
1965+ struct net_device_path *path)
1966+{
1967+ struct ppp *ppp = netdev_priv(ctx->dev);
1968+ struct ppp_channel *chan;
1969+ struct channel *pch;
1970+
1971+ if (ppp->flags & SC_MULTILINK)
1972+ return -EOPNOTSUPP;
1973+
1974+ if (list_empty(&ppp->channels))
1975+ return -ENODEV;
1976+
1977+ pch = list_first_entry(&ppp->channels, struct channel, clist);
1978+ chan = pch->chan;
1979+ if (!chan->ops->fill_forward_path)
1980+ return -EOPNOTSUPP;
1981+
1982+ return chan->ops->fill_forward_path(ctx, path, chan);
1983+}
1984+
1985 static const struct net_device_ops ppp_netdev_ops = {
1986 .ndo_init = ppp_dev_init,
1987 .ndo_uninit = ppp_dev_uninit,
1988 .ndo_start_xmit = ppp_start_xmit,
1989 .ndo_do_ioctl = ppp_net_ioctl,
1990 .ndo_get_stats64 = ppp_get_stats64,
1991+ .ndo_fill_forward_path = ppp_fill_forward_path,
1992 };
1993
1994 static struct device_type ppp_type = {
1995diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
developeree39bcf2023-06-16 08:03:30 +08001996index 087b01684..7a8c246ab 100644
developer8cb3ac72022-07-04 10:55:14 +08001997--- a/drivers/net/ppp/pppoe.c
1998+++ b/drivers/net/ppp/pppoe.c
1999@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
2000 return __pppoe_xmit(sk, skb);
2001 }
2002
2003+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
2004+ struct net_device_path *path,
2005+ const struct ppp_channel *chan)
2006+{
2007+ struct sock *sk = (struct sock *)chan->private;
2008+ struct pppox_sock *po = pppox_sk(sk);
2009+ struct net_device *dev = po->pppoe_dev;
2010+
2011+ if (sock_flag(sk, SOCK_DEAD) ||
2012+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2013+ return -1;
2014+
2015+ path->type = DEV_PATH_PPPOE;
2016+ path->encap.proto = htons(ETH_P_PPP_SES);
2017+ path->encap.id = be16_to_cpu(po->num);
2018+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2019+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
2020+ path->dev = ctx->dev;
2021+ ctx->dev = dev;
2022+
2023+ return 0;
2024+}
2025+
2026 static const struct ppp_channel_ops pppoe_chan_ops = {
2027 .start_xmit = pppoe_xmit,
2028+ .fill_forward_path = pppoe_fill_forward_path,
2029 };
2030
2031 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
2032diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
developeree39bcf2023-06-16 08:03:30 +08002033index 38af42bf8..9f64504ac 100644
developer8cb3ac72022-07-04 10:55:14 +08002034--- a/include/linux/netdevice.h
2035+++ b/include/linux/netdevice.h
developeree39bcf2023-06-16 08:03:30 +08002036@@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002037 struct sk_buff *skb,
2038 struct net_device *sb_dev);
2039
2040+enum net_device_path_type {
2041+ DEV_PATH_ETHERNET = 0,
2042+ DEV_PATH_VLAN,
2043+ DEV_PATH_BRIDGE,
2044+ DEV_PATH_PPPOE,
2045+ DEV_PATH_DSA,
2046+};
2047+
2048+struct net_device_path {
2049+ enum net_device_path_type type;
2050+ const struct net_device *dev;
2051+ union {
2052+ struct {
2053+ u16 id;
2054+ __be16 proto;
2055+ u8 h_dest[ETH_ALEN];
2056+ } encap;
2057+ struct {
2058+ enum {
2059+ DEV_PATH_BR_VLAN_KEEP,
2060+ DEV_PATH_BR_VLAN_TAG,
2061+ DEV_PATH_BR_VLAN_UNTAG,
2062+ DEV_PATH_BR_VLAN_UNTAG_HW,
2063+ } vlan_mode;
2064+ u16 vlan_id;
2065+ __be16 vlan_proto;
2066+ } bridge;
2067+ struct {
2068+ int port;
2069+ u16 proto;
2070+ } dsa;
2071+ };
2072+};
2073+
2074+#define NET_DEVICE_PATH_STACK_MAX 5
2075+#define NET_DEVICE_PATH_VLAN_MAX 2
2076+
2077+struct net_device_path_stack {
2078+ int num_paths;
2079+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
2080+};
2081+
2082+struct net_device_path_ctx {
2083+ const struct net_device *dev;
2084+ u8 daddr[ETH_ALEN];
2085+
2086+ int num_vlans;
2087+ struct {
2088+ u16 id;
2089+ __be16 proto;
2090+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
2091+};
2092+
2093 enum tc_setup_type {
2094 TC_SETUP_QDISC_MQPRIO,
2095 TC_SETUP_CLSU32,
developeree39bcf2023-06-16 08:03:30 +08002096@@ -844,6 +897,7 @@ enum tc_setup_type {
developer8cb3ac72022-07-04 10:55:14 +08002097 TC_SETUP_ROOT_QDISC,
2098 TC_SETUP_QDISC_GRED,
2099 TC_SETUP_QDISC_TAPRIO,
2100+ TC_SETUP_FT,
2101 };
2102
2103 /* These structures hold the attributes of bpf state that are being passed
developeree39bcf2023-06-16 08:03:30 +08002104@@ -1239,6 +1293,8 @@ struct tlsdev_ops;
developer8cb3ac72022-07-04 10:55:14 +08002105 * Get devlink port instance associated with a given netdev.
2106 * Called with a reference on the netdevice and devlink locks only,
2107 * rtnl_lock is not held.
2108+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
2109+ * Get the forwarding path to reach the real device from the HW destination address
2110 */
2111 struct net_device_ops {
2112 int (*ndo_init)(struct net_device *dev);
developeree39bcf2023-06-16 08:03:30 +08002113@@ -1436,6 +1492,8 @@ struct net_device_ops {
developer8cb3ac72022-07-04 10:55:14 +08002114 int (*ndo_xsk_wakeup)(struct net_device *dev,
2115 u32 queue_id, u32 flags);
2116 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
2117+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
2118+ struct net_device_path *path);
2119 };
2120
2121 /**
developeree39bcf2023-06-16 08:03:30 +08002122@@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po);
developer8cb3ac72022-07-04 10:55:14 +08002123
2124 int dev_get_iflink(const struct net_device *dev);
2125 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
2126+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2127+ struct net_device_path_stack *stack);
2128 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
2129 unsigned short mask);
2130 struct net_device *dev_get_by_name(struct net *net, const char *name);
2131diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
developeree39bcf2023-06-16 08:03:30 +08002132index 98966064e..91f9a9283 100644
developer8cb3ac72022-07-04 10:55:14 +08002133--- a/include/linux/ppp_channel.h
2134+++ b/include/linux/ppp_channel.h
2135@@ -28,6 +28,9 @@ struct ppp_channel_ops {
2136 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
2137 /* Handle an ioctl call that has come in via /dev/ppp. */
2138 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
2139+ int (*fill_forward_path)(struct net_device_path_ctx *,
2140+ struct net_device_path *,
2141+ const struct ppp_channel *);
2142 };
2143
2144 struct ppp_channel {
2145diff --git a/include/net/dsa.h b/include/net/dsa.h
developeree39bcf2023-06-16 08:03:30 +08002146index 05f66d487..cafc74218 100644
developer8cb3ac72022-07-04 10:55:14 +08002147--- a/include/net/dsa.h
2148+++ b/include/net/dsa.h
developeree39bcf2023-06-16 08:03:30 +08002149@@ -561,6 +561,8 @@ struct dsa_switch_ops {
developer8cb3ac72022-07-04 10:55:14 +08002150 struct sk_buff *skb);
2151 };
2152
2153+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
2154+
2155 struct dsa_switch_driver {
2156 struct list_head list;
2157 const struct dsa_switch_ops *ops;
developeree39bcf2023-06-16 08:03:30 +08002158@@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002159 #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
2160 #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
2161
2162+#if IS_ENABLED(CONFIG_NET_DSA)
2163+bool dsa_slave_dev_check(const struct net_device *dev);
2164+#else
2165+static inline bool dsa_slave_dev_check(const struct net_device *dev)
2166+{
2167+ return false;
2168+}
2169+#endif
2170
2171 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
2172 int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
2173diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
developeree39bcf2023-06-16 08:03:30 +08002174index c6f7bd22d..59b873653 100644
developer8cb3ac72022-07-04 10:55:14 +08002175--- a/include/net/flow_offload.h
2176+++ b/include/net/flow_offload.h
2177@@ -138,6 +138,7 @@ enum flow_action_id {
2178 FLOW_ACTION_MPLS_PUSH,
2179 FLOW_ACTION_MPLS_POP,
2180 FLOW_ACTION_MPLS_MANGLE,
2181+ FLOW_ACTION_PPPOE_PUSH,
2182 NUM_FLOW_ACTIONS,
2183 };
2184
2185@@ -213,6 +214,9 @@ struct flow_action_entry {
2186 u8 bos;
2187 u8 ttl;
2188 } mpls_mangle;
2189+ struct { /* FLOW_ACTION_PPPOE_PUSH */
2190+ u16 sid;
2191+ } pppoe;
2192 };
2193 };
2194
2195diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
developeree39bcf2023-06-16 08:03:30 +08002196index 2c739fc75..89ab8f180 100644
developer8cb3ac72022-07-04 10:55:14 +08002197--- a/include/net/ip6_route.h
2198+++ b/include/net/ip6_route.h
2199@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
2200 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
2201 }
2202
2203-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
2204+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
2205+ bool forwarding)
2206 {
2207 struct inet6_dev *idev;
2208 unsigned int mtu;
2209
2210- if (dst_metric_locked(dst, RTAX_MTU)) {
2211+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
2212 mtu = dst_metric_raw(dst, RTAX_MTU);
2213 if (mtu)
2214 goto out;
2215diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
developeree39bcf2023-06-16 08:03:30 +08002216index 7b3c873f8..e95483192 100644
developer8cb3ac72022-07-04 10:55:14 +08002217--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2218+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2219@@ -4,7 +4,4 @@
2220
2221 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
2222
2223-#include <linux/sysctl.h>
2224-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
2225-
2226 #endif /* _NF_CONNTRACK_IPV6_H*/
2227diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
developeree39bcf2023-06-16 08:03:30 +08002228index 90690e37a..ce0bc3e62 100644
developer8cb3ac72022-07-04 10:55:14 +08002229--- a/include/net/netfilter/nf_conntrack.h
2230+++ b/include/net/netfilter/nf_conntrack.h
2231@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
2232 !nf_ct_is_dying(ct);
2233 }
2234
2235+#define NF_CT_DAY (86400 * HZ)
2236+
2237+/* Set an arbitrary timeout large enough not to ever expire, this save
2238+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
2239+ * nf_ct_is_expired().
2240+ */
2241+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
2242+{
2243+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
2244+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
2245+}
2246+
2247 struct kernel_param;
2248
2249 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
2250diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
developeree39bcf2023-06-16 08:03:30 +08002251index f7a060c6e..7f44a7715 100644
developer8cb3ac72022-07-04 10:55:14 +08002252--- a/include/net/netfilter/nf_conntrack_acct.h
2253+++ b/include/net/netfilter/nf_conntrack_acct.h
2254@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
2255 #endif
2256 }
2257
2258+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
2259+ unsigned int bytes);
2260+
2261+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
2262+ unsigned int bytes)
2263+{
2264+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
2265+ nf_ct_acct_add(ct, dir, 1, bytes);
2266+#endif
2267+}
2268+
2269 void nf_conntrack_acct_pernet_init(struct net *net);
2270
2271 int nf_conntrack_acct_init(void);
2272diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
developeree39bcf2023-06-16 08:03:30 +08002273index 68d7fc92..7cf89767 100644
developer8cb3ac72022-07-04 10:55:14 +08002274--- a/include/net/netfilter/nf_flow_table.h
2275+++ b/include/net/netfilter/nf_flow_table.h
2276@@ -8,31 +8,99 @@
2277 #include <linux/rcupdate.h>
2278 #include <linux/netfilter.h>
2279 #include <linux/netfilter/nf_conntrack_tuple_common.h>
2280+#include <net/flow_offload.h>
2281 #include <net/dst.h>
2282+#include <linux/if_pppox.h>
2283+#include <linux/ppp_defs.h>
2284
2285 struct nf_flowtable;
2286+struct nf_flow_rule;
2287+struct flow_offload;
2288+enum flow_offload_tuple_dir;
2289+
2290+struct nf_flow_key {
2291+ struct flow_dissector_key_meta meta;
2292+ struct flow_dissector_key_control control;
2293+ struct flow_dissector_key_control enc_control;
2294+ struct flow_dissector_key_basic basic;
2295+ struct flow_dissector_key_vlan vlan;
2296+ struct flow_dissector_key_vlan cvlan;
2297+ union {
2298+ struct flow_dissector_key_ipv4_addrs ipv4;
2299+ struct flow_dissector_key_ipv6_addrs ipv6;
2300+ };
2301+ struct flow_dissector_key_keyid enc_key_id;
2302+ union {
2303+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
2304+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
2305+ };
2306+ struct flow_dissector_key_tcp tcp;
2307+ struct flow_dissector_key_ports tp;
2308+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
2309+
2310+struct nf_flow_match {
2311+ struct flow_dissector dissector;
2312+ struct nf_flow_key key;
2313+ struct nf_flow_key mask;
2314+};
2315+
2316+struct nf_flow_rule {
2317+ struct nf_flow_match match;
2318+ struct flow_rule *rule;
2319+};
2320
2321 struct nf_flowtable_type {
2322 struct list_head list;
2323 int family;
2324 int (*init)(struct nf_flowtable *ft);
2325+ int (*setup)(struct nf_flowtable *ft,
2326+ struct net_device *dev,
2327+ enum flow_block_command cmd);
2328+ int (*action)(struct net *net,
2329+ const struct flow_offload *flow,
2330+ enum flow_offload_tuple_dir dir,
2331+ struct nf_flow_rule *flow_rule);
2332 void (*free)(struct nf_flowtable *ft);
2333 nf_hookfn *hook;
2334 struct module *owner;
2335 };
2336
2337+enum nf_flowtable_flags {
2338+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
2339+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
2340+};
2341+
2342 struct nf_flowtable {
2343 struct list_head list;
2344 struct rhashtable rhashtable;
2345+ int priority;
2346 const struct nf_flowtable_type *type;
2347 struct delayed_work gc_work;
2348+ unsigned int flags;
2349+ struct flow_block flow_block;
2350+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
2351+ possible_net_t net;
2352 };
2353
2354+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
2355+{
2356+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
2357+}
2358+
2359 enum flow_offload_tuple_dir {
2360 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
2361 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
2362- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
2363 };
2364+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
2365+
2366+enum flow_offload_xmit_type {
2367+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
2368+ FLOW_OFFLOAD_XMIT_NEIGH,
2369+ FLOW_OFFLOAD_XMIT_XFRM,
2370+ FLOW_OFFLOAD_XMIT_DIRECT,
2371+};
2372+
2373+#define NF_FLOW_TABLE_ENCAP_MAX 2
2374
2375 struct flow_offload_tuple {
2376 union {
developerb7c46752022-07-04 19:51:38 +08002377@@ -52,13 +120,30 @@ struct flow_offload_tuple {
developer8cb3ac72022-07-04 10:55:14 +08002378
2379 u8 l3proto;
2380 u8 l4proto;
2381- u8 dir;
2382+ struct {
2383+ u16 id;
2384+ __be16 proto;
2385+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2386
2387- u16 mtu;
2388+ /* All members above are keys for lookups, see flow_offload_hash(). */
2389+ struct { } __hash;
2390
developerb7c46752022-07-04 19:51:38 +08002391- struct {
2392- struct dst_entry *dst_cache;
2393- u32 dst_cookie;
developer8cb3ac72022-07-04 10:55:14 +08002394+ u8 dir:2,
2395+ xmit_type:2,
2396+ encap_num:2,
2397+ in_vlan_ingress:2;
2398+ u16 mtu;
2399+ union {
2400+ struct {
2401+ struct dst_entry *dst_cache;
2402+ u32 dst_cookie;
2403+ };
2404+ struct {
2405+ u32 ifidx;
2406+ u32 hw_ifidx;
2407+ u8 h_source[ETH_ALEN];
2408+ u8 h_dest[ETH_ALEN];
2409+ } out;
developerb7c46752022-07-04 19:51:38 +08002410 };
developer8cb3ac72022-07-04 10:55:14 +08002411 };
2412
developeree39bcf2023-06-16 08:03:30 +08002413@@ -67,52 +152,140 @@ struct flow_offload_tuple_rhash {
developer8cb3ac72022-07-04 10:55:14 +08002414 struct flow_offload_tuple tuple;
2415 };
2416
2417-#define FLOW_OFFLOAD_SNAT 0x1
2418-#define FLOW_OFFLOAD_DNAT 0x2
2419-#define FLOW_OFFLOAD_DYING 0x4
2420-#define FLOW_OFFLOAD_TEARDOWN 0x8
2421+enum nf_flow_flags {
2422+ NF_FLOW_SNAT,
2423+ NF_FLOW_DNAT,
2424+ NF_FLOW_TEARDOWN,
2425+ NF_FLOW_HW,
developeree39bcf2023-06-16 08:03:30 +08002426+ NF_FLOW_HW_ACCT_DYING,
developer8cb3ac72022-07-04 10:55:14 +08002427+ NF_FLOW_HW_DYING,
2428+ NF_FLOW_HW_DEAD,
2429+ NF_FLOW_HW_PENDING,
2430+};
2431+
2432+enum flow_offload_type {
2433+ NF_FLOW_OFFLOAD_UNSPEC = 0,
2434+ NF_FLOW_OFFLOAD_ROUTE,
2435+};
2436
2437 struct flow_offload {
2438 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
2439- u32 flags;
2440- union {
2441- /* Your private driver data here. */
2442- u32 timeout;
2443- };
2444+ struct nf_conn *ct;
2445+ unsigned long flags;
2446+ u16 type;
2447+ u32 timeout;
2448+ struct rcu_head rcu_head;
2449 };
2450
2451 #define NF_FLOW_TIMEOUT (30 * HZ)
2452+#define nf_flowtable_time_stamp (u32)jiffies
2453+
2454+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
2455+
2456+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
2457+{
2458+ return (__s32)(timeout - nf_flowtable_time_stamp);
2459+}
2460
2461 struct nf_flow_route {
2462 struct {
2463- struct dst_entry *dst;
2464+ struct dst_entry *dst;
2465+ struct {
2466+ u32 ifindex;
2467+ struct {
2468+ u16 id;
2469+ __be16 proto;
2470+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2471+ u8 num_encaps:2,
2472+ ingress_vlans:2;
2473+ } in;
2474+ struct {
2475+ u32 ifindex;
2476+ u32 hw_ifindex;
2477+ u8 h_source[ETH_ALEN];
2478+ u8 h_dest[ETH_ALEN];
2479+ } out;
2480+ enum flow_offload_xmit_type xmit_type;
2481 } tuple[FLOW_OFFLOAD_DIR_MAX];
2482 };
2483
2484-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
2485- struct nf_flow_route *route);
2486+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
2487 void flow_offload_free(struct flow_offload *flow);
2488
2489+static inline int
2490+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
2491+ flow_setup_cb_t *cb, void *cb_priv)
2492+{
2493+ struct flow_block *block = &flow_table->flow_block;
2494+ struct flow_block_cb *block_cb;
2495+ int err = 0;
2496+
2497+ down_write(&flow_table->flow_block_lock);
2498+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2499+ if (block_cb) {
2500+ err = -EEXIST;
2501+ goto unlock;
2502+ }
2503+
2504+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
2505+ if (IS_ERR(block_cb)) {
2506+ err = PTR_ERR(block_cb);
2507+ goto unlock;
2508+ }
2509+
2510+ list_add_tail(&block_cb->list, &block->cb_list);
2511+
2512+unlock:
2513+ up_write(&flow_table->flow_block_lock);
2514+ return err;
2515+}
2516+
2517+static inline void
2518+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
2519+ flow_setup_cb_t *cb, void *cb_priv)
2520+{
2521+ struct flow_block *block = &flow_table->flow_block;
2522+ struct flow_block_cb *block_cb;
2523+
2524+ down_write(&flow_table->flow_block_lock);
2525+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2526+ if (block_cb) {
2527+ list_del(&block_cb->list);
2528+ flow_block_cb_free(block_cb);
2529+ } else {
2530+ WARN_ON(true);
2531+ }
2532+ up_write(&flow_table->flow_block_lock);
2533+}
2534+
2535+int flow_offload_route_init(struct flow_offload *flow,
2536+ const struct nf_flow_route *route);
2537+
2538 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
2539+void flow_offload_refresh(struct nf_flowtable *flow_table,
2540+ struct flow_offload *flow);
2541+
2542 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
2543 struct flow_offload_tuple *tuple);
2544+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
2545+ struct net_device *dev);
2546 void nf_flow_table_cleanup(struct net_device *dev);
2547
2548 int nf_flow_table_init(struct nf_flowtable *flow_table);
2549 void nf_flow_table_free(struct nf_flowtable *flow_table);
2550
2551 void flow_offload_teardown(struct flow_offload *flow);
2552-static inline void flow_offload_dead(struct flow_offload *flow)
2553-{
2554- flow->flags |= FLOW_OFFLOAD_DYING;
2555-}
2556
2557-int nf_flow_snat_port(const struct flow_offload *flow,
2558- struct sk_buff *skb, unsigned int thoff,
2559- u8 protocol, enum flow_offload_tuple_dir dir);
2560-int nf_flow_dnat_port(const struct flow_offload *flow,
2561- struct sk_buff *skb, unsigned int thoff,
2562- u8 protocol, enum flow_offload_tuple_dir dir);
2563+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
2564+ void (*iter)(struct flow_offload *flow, void *data),
2565+ void *data);
2566+
2567+void nf_flow_snat_port(const struct flow_offload *flow,
2568+ struct sk_buff *skb, unsigned int thoff,
2569+ u8 protocol, enum flow_offload_tuple_dir dir);
2570+void nf_flow_dnat_port(const struct flow_offload *flow,
2571+ struct sk_buff *skb, unsigned int thoff,
2572+ u8 protocol, enum flow_offload_tuple_dir dir);
2573
2574 struct flow_ports {
2575 __be16 source, dest;
developerb7c46752022-07-04 19:51:38 +08002576@@ -126,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08002577 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
2578 MODULE_ALIAS("nf-flowtable-" __stringify(family))
2579
2580+void nf_flow_offload_add(struct nf_flowtable *flowtable,
2581+ struct flow_offload *flow);
2582+void nf_flow_offload_del(struct nf_flowtable *flowtable,
2583+ struct flow_offload *flow);
2584+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08002585+ struct flow_offload *flow, bool force);
developer8cb3ac72022-07-04 10:55:14 +08002586+
2587+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
2588+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
2589+ struct net_device *dev,
2590+ enum flow_block_command cmd);
2591+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
2592+ enum flow_offload_tuple_dir dir,
2593+ struct nf_flow_rule *flow_rule);
2594+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
2595+ enum flow_offload_tuple_dir dir,
2596+ struct nf_flow_rule *flow_rule);
2597+
2598+int nf_flow_table_offload_init(void);
2599+void nf_flow_table_offload_exit(void);
2600+
2601+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
2602+{
2603+ __be16 proto;
2604+
2605+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
2606+ sizeof(struct pppoe_hdr)));
2607+ switch (proto) {
2608+ case htons(PPP_IP):
2609+ return htons(ETH_P_IP);
2610+ case htons(PPP_IPV6):
2611+ return htons(ETH_P_IPV6);
2612+ }
2613+
2614+ return 0;
2615+}
2616+
2617 #endif /* _NF_FLOW_TABLE_H */
2618diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
developeree39bcf2023-06-16 08:03:30 +08002619index 806454e76..9e3963c8f 100644
developer8cb3ac72022-07-04 10:55:14 +08002620--- a/include/net/netns/conntrack.h
2621+++ b/include/net/netns/conntrack.h
2622@@ -27,6 +27,9 @@ struct nf_tcp_net {
2623 int tcp_loose;
2624 int tcp_be_liberal;
2625 int tcp_max_retrans;
2626+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2627+ unsigned int offload_timeout;
2628+#endif
2629 };
2630
2631 enum udp_conntrack {
2632@@ -37,6 +40,9 @@ enum udp_conntrack {
2633
2634 struct nf_udp_net {
2635 unsigned int timeouts[UDP_CT_MAX];
2636+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2637+ unsigned int offload_timeout;
2638+#endif
2639 };
2640
2641 struct nf_icmp_net {
2642diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
developeree39bcf2023-06-16 08:03:30 +08002643index 336014bf8..ae698d11c 100644
developer8cb3ac72022-07-04 10:55:14 +08002644--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
2645+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
2646@@ -105,14 +105,19 @@ enum ip_conntrack_status {
2647 IPS_OFFLOAD_BIT = 14,
2648 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
2649
2650+ /* Conntrack has been offloaded to hardware. */
2651+ IPS_HW_OFFLOAD_BIT = 15,
2652+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
2653+
2654 /* Be careful here, modifying these bits can make things messy,
2655 * so don't let users modify them directly.
2656 */
2657 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
2658 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
2659- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
2660+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
2661+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
2662
2663- __IPS_MAX_BIT = 15,
2664+ __IPS_MAX_BIT = 16,
2665 };
2666
2667 /* Connection tracking event types */
2668diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2669new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08002670index 000000000..5841bbe0e
developer8cb3ac72022-07-04 10:55:14 +08002671--- /dev/null
2672+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2673@@ -0,0 +1,17 @@
2674+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2675+#ifndef _XT_FLOWOFFLOAD_H
2676+#define _XT_FLOWOFFLOAD_H
2677+
2678+#include <linux/types.h>
2679+
2680+enum {
2681+ XT_FLOWOFFLOAD_HW = 1 << 0,
2682+
2683+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
2684+};
2685+
2686+struct xt_flowoffload_target_info {
2687+ __u32 flags;
2688+};
2689+
2690+#endif /* _XT_FLOWOFFLOAD_H */
2691diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
developeree39bcf2023-06-16 08:03:30 +08002692index 589615ec4..444ab5fae 100644
developer8cb3ac72022-07-04 10:55:14 +08002693--- a/net/8021q/vlan_dev.c
2694+++ b/net/8021q/vlan_dev.c
2695@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
2696 return real_dev->ifindex;
2697 }
2698
2699+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
2700+ struct net_device_path *path)
2701+{
2702+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
2703+
2704+ path->type = DEV_PATH_VLAN;
2705+ path->encap.id = vlan->vlan_id;
2706+ path->encap.proto = vlan->vlan_proto;
2707+ path->dev = ctx->dev;
2708+ ctx->dev = vlan->real_dev;
2709+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2710+ return -ENOSPC;
2711+
2712+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
2713+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
2714+ ctx->num_vlans++;
2715+
2716+ return 0;
2717+}
2718+
2719 static const struct ethtool_ops vlan_ethtool_ops = {
2720 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
2721 .get_drvinfo = vlan_ethtool_get_drvinfo,
2722@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
2723 #endif
2724 .ndo_fix_features = vlan_dev_fix_features,
2725 .ndo_get_iflink = vlan_dev_get_iflink,
2726+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
2727 };
2728
2729 static void vlan_dev_free(struct net_device *dev)
2730diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
developeree39bcf2023-06-16 08:03:30 +08002731index 501f77f0f..0940b44cd 100644
developer8cb3ac72022-07-04 10:55:14 +08002732--- a/net/bridge/br_device.c
2733+++ b/net/bridge/br_device.c
2734@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
2735 return br_del_if(br, slave_dev);
2736 }
2737
2738+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
2739+ struct net_device_path *path)
2740+{
2741+ struct net_bridge_fdb_entry *f;
2742+ struct net_bridge_port *dst;
2743+ struct net_bridge *br;
2744+
2745+ if (netif_is_bridge_port(ctx->dev))
2746+ return -1;
2747+
2748+ br = netdev_priv(ctx->dev);
2749+
2750+ br_vlan_fill_forward_path_pvid(br, ctx, path);
2751+
2752+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
2753+ if (!f || !f->dst)
2754+ return -1;
2755+
2756+ dst = READ_ONCE(f->dst);
2757+ if (!dst)
2758+ return -1;
2759+
2760+ if (br_vlan_fill_forward_path_mode(br, dst, path))
2761+ return -1;
2762+
2763+ path->type = DEV_PATH_BRIDGE;
2764+ path->dev = dst->br->dev;
2765+ ctx->dev = dst->dev;
2766+
2767+ switch (path->bridge.vlan_mode) {
2768+ case DEV_PATH_BR_VLAN_TAG:
2769+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2770+ return -ENOSPC;
2771+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
2772+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
2773+ ctx->num_vlans++;
2774+ break;
2775+ case DEV_PATH_BR_VLAN_UNTAG_HW:
2776+ case DEV_PATH_BR_VLAN_UNTAG:
2777+ ctx->num_vlans--;
2778+ break;
2779+ case DEV_PATH_BR_VLAN_KEEP:
2780+ break;
2781+ }
2782+
2783+ return 0;
2784+}
2785+
2786 static const struct ethtool_ops br_ethtool_ops = {
2787 .get_drvinfo = br_getinfo,
2788 .get_link = ethtool_op_get_link,
2789@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
2790 .ndo_bridge_setlink = br_setlink,
2791 .ndo_bridge_dellink = br_dellink,
2792 .ndo_features_check = passthru_features_check,
2793+ .ndo_fill_forward_path = br_fill_forward_path,
2794 };
2795
2796 static struct device_type br_type = {
2797diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
developeree39bcf2023-06-16 08:03:30 +08002798index a736be8a1..4bd9e9b57 100644
developer8cb3ac72022-07-04 10:55:14 +08002799--- a/net/bridge/br_private.h
2800+++ b/net/bridge/br_private.h
2801@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
2802 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
2803 void *ptr);
2804
2805+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2806+ struct net_device_path_ctx *ctx,
2807+ struct net_device_path *path);
2808+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2809+ struct net_bridge_port *dst,
2810+ struct net_device_path *path);
2811+
2812 static inline struct net_bridge_vlan_group *br_vlan_group(
2813 const struct net_bridge *br)
2814 {
2815@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
2816 return 0;
2817 }
2818
2819+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2820+ struct net_device_path_ctx *ctx,
2821+ struct net_device_path *path)
2822+{
2823+}
2824+
2825+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2826+ struct net_bridge_port *dst,
2827+ struct net_device_path *path)
2828+{
2829+ return 0;
2830+}
2831+
2832 static inline struct net_bridge_vlan_group *br_vlan_group(
2833 const struct net_bridge *br)
2834 {
2835diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
developeree39bcf2023-06-16 08:03:30 +08002836index 9257292bd..bcfd16924 100644
developer8cb3ac72022-07-04 10:55:14 +08002837--- a/net/bridge/br_vlan.c
2838+++ b/net/bridge/br_vlan.c
2839@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
2840 }
2841 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
2842
2843+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2844+ struct net_device_path_ctx *ctx,
2845+ struct net_device_path *path)
2846+{
2847+ struct net_bridge_vlan_group *vg;
2848+ int idx = ctx->num_vlans - 1;
2849+ u16 vid;
2850+
2851+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2852+
2853+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2854+ return;
2855+
2856+ vg = br_vlan_group(br);
2857+
2858+ if (idx >= 0 &&
2859+ ctx->vlan[idx].proto == br->vlan_proto) {
2860+ vid = ctx->vlan[idx].id;
2861+ } else {
2862+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
2863+ vid = br_get_pvid(vg);
2864+ }
2865+
2866+ path->bridge.vlan_id = vid;
2867+ path->bridge.vlan_proto = br->vlan_proto;
2868+}
2869+
2870+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2871+ struct net_bridge_port *dst,
2872+ struct net_device_path *path)
2873+{
2874+ struct net_bridge_vlan_group *vg;
2875+ struct net_bridge_vlan *v;
2876+
2877+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2878+ return 0;
2879+
2880+ vg = nbp_vlan_group_rcu(dst);
2881+ v = br_vlan_find(vg, path->bridge.vlan_id);
2882+ if (!v || !br_vlan_should_use(v))
2883+ return -EINVAL;
2884+
2885+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
2886+ return 0;
2887+
2888+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
2889+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2890+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
2891+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
2892+ else
2893+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
2894+
2895+ return 0;
2896+}
2897+
2898 int br_vlan_get_info(const struct net_device *dev, u16 vid,
2899 struct bridge_vlan_info *p_vinfo)
2900 {
2901diff --git a/net/core/dev.c b/net/core/dev.c
developeree39bcf2023-06-16 08:03:30 +08002902index fe2c856b9..4f0edb218 100644
developer8cb3ac72022-07-04 10:55:14 +08002903--- a/net/core/dev.c
2904+++ b/net/core/dev.c
2905@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2906 }
2907 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
2908
2909+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
2910+{
2911+ int k = stack->num_paths++;
2912+
2913+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
2914+ return NULL;
2915+
2916+ return &stack->path[k];
2917+}
2918+
2919+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2920+ struct net_device_path_stack *stack)
2921+{
2922+ const struct net_device *last_dev;
2923+ struct net_device_path_ctx ctx = {
2924+ .dev = dev,
2925+ };
2926+ struct net_device_path *path;
2927+ int ret = 0;
2928+
2929+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
2930+ stack->num_paths = 0;
2931+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
2932+ last_dev = ctx.dev;
2933+ path = dev_fwd_path(stack);
2934+ if (!path)
2935+ return -1;
2936+
2937+ memset(path, 0, sizeof(struct net_device_path));
2938+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
2939+ if (ret < 0)
2940+ return -1;
2941+
2942+ if (WARN_ON_ONCE(last_dev == ctx.dev))
2943+ return -1;
2944+ }
2945+ path = dev_fwd_path(stack);
2946+ if (!path)
2947+ return -1;
2948+ path->type = DEV_PATH_ETHERNET;
2949+ path->dev = ctx.dev;
2950+
2951+ return ret;
2952+}
2953+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
2954+
2955 /**
2956 * __dev_get_by_name - find a device by its name
2957 * @net: the applicable net namespace
2958diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
developeree39bcf2023-06-16 08:03:30 +08002959index ca80f8699..35a1249a9 100644
developer8cb3ac72022-07-04 10:55:14 +08002960--- a/net/dsa/dsa.c
2961+++ b/net/dsa/dsa.c
2962@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2963 }
2964 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
2965
2966+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
2967+{
2968+ if (!netdev || !dsa_slave_dev_check(netdev))
2969+ return ERR_PTR(-ENODEV);
2970+
2971+ return dsa_slave_to_port(netdev);
2972+}
2973+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
2974+
2975 static int __init dsa_init_module(void)
2976 {
2977 int rc;
2978diff --git a/net/dsa/slave.c b/net/dsa/slave.c
developeree39bcf2023-06-16 08:03:30 +08002979index 036fda317..2dfaa1eac 100644
developer8cb3ac72022-07-04 10:55:14 +08002980--- a/net/dsa/slave.c
2981+++ b/net/dsa/slave.c
developeree39bcf2023-06-16 08:03:30 +08002982@@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002983 }
2984 }
2985
2986+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
2987+ void *type_data)
2988+{
2989+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
2990+ struct net_device *master = cpu_dp->master;
2991+
2992+ if (!master->netdev_ops->ndo_setup_tc)
2993+ return -EOPNOTSUPP;
2994+
2995+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
2996+}
2997+
2998 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
2999 void *type_data)
3000 {
3001 struct dsa_port *dp = dsa_slave_to_port(dev);
3002 struct dsa_switch *ds = dp->ds;
3003
3004- if (type == TC_SETUP_BLOCK)
3005+ switch (type) {
3006+ case TC_SETUP_BLOCK:
3007 return dsa_slave_setup_tc_block(dev, type_data);
3008+ case TC_SETUP_FT:
3009+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
3010+ default:
3011+ break;
3012+ }
3013
3014 if (!ds->ops->port_setup_tc)
3015 return -EOPNOTSUPP;
developeree39bcf2023-06-16 08:03:30 +08003016@@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003017 return dp->ds->devlink ? &dp->devlink_port : NULL;
3018 }
3019
3020+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
3021+ struct net_device_path *path)
3022+{
3023+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
3024+ struct dsa_port *cpu_dp = dp->cpu_dp;
3025+
3026+ path->dev = ctx->dev;
3027+ path->type = DEV_PATH_DSA;
3028+ path->dsa.proto = cpu_dp->tag_ops->proto;
3029+ path->dsa.port = dp->index;
3030+ ctx->dev = cpu_dp->master;
3031+
3032+ return 0;
3033+}
3034+
3035 static const struct net_device_ops dsa_slave_netdev_ops = {
3036 .ndo_open = dsa_slave_open,
3037 .ndo_stop = dsa_slave_close,
developeree39bcf2023-06-16 08:03:30 +08003038@@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +08003039 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
3040 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
3041 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
3042+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
3043 };
3044
3045 static struct device_type dsa_type = {
developeree39bcf2023-06-16 08:03:30 +08003046@@ -1497,7 +1529,8 @@ void dsa_slave_destroy(struct net_device *slave_dev)
3047 bool dsa_slave_dev_check(const struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003048 {
3049 return dev->netdev_ops == &dsa_slave_netdev_ops;
3050 }
3051+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
3052
3053 static int dsa_slave_changeupper(struct net_device *dev,
3054 struct netdev_notifier_changeupper_info *info)
3055diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
developeree39bcf2023-06-16 08:03:30 +08003056index f17b40211..803b92e4c 100644
developer8cb3ac72022-07-04 10:55:14 +08003057--- a/net/ipv4/netfilter/Kconfig
3058+++ b/net/ipv4/netfilter/Kconfig
3059@@ -56,8 +56,6 @@ config NF_TABLES_ARP
3060 help
3061 This option enables the ARP support for nf_tables.
3062
3063-endif # NF_TABLES
3064-
3065 config NF_FLOW_TABLE_IPV4
3066 tristate "Netfilter flow table IPv4 module"
3067 depends on NF_FLOW_TABLE
3068@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
3069
3070 To compile it as a module, choose M here.
3071
3072+endif # NF_TABLES
3073+
3074 config NF_DUP_IPV4
3075 tristate "Netfilter IPv4 packet duplication to alternate destination"
3076 depends on !NF_CONNTRACK || NF_CONNTRACK
3077diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
developeree39bcf2023-06-16 08:03:30 +08003078index 5585e3a94..bb76f6061 100644
developer8cb3ac72022-07-04 10:55:14 +08003079--- a/net/ipv6/ip6_output.c
3080+++ b/net/ipv6/ip6_output.c
3081@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
3082 }
3083 }
3084
3085- mtu = ip6_dst_mtu_forward(dst);
3086+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
3087 if (mtu < IPV6_MIN_MTU)
3088 mtu = IPV6_MIN_MTU;
3089
3090diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
developeree39bcf2023-06-16 08:03:30 +08003091index 69443e9a3..0b481d236 100644
developer8cb3ac72022-07-04 10:55:14 +08003092--- a/net/ipv6/netfilter/Kconfig
3093+++ b/net/ipv6/netfilter/Kconfig
3094@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
3095 multicast or blackhole.
3096
3097 endif # NF_TABLES_IPV6
3098-endif # NF_TABLES
3099
3100 config NF_FLOW_TABLE_IPV6
3101 tristate "Netfilter flow table IPv6 module"
3102@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
3103
3104 To compile it as a module, choose M here.
3105
3106+endif # NF_TABLES
3107+
3108 config NF_DUP_IPV6
3109 tristate "Netfilter IPv6 packet duplication to alternate destination"
3110 depends on !NF_CONNTRACK || NF_CONNTRACK
3111diff --git a/net/ipv6/route.c b/net/ipv6/route.c
developeree39bcf2023-06-16 08:03:30 +08003112index 98aaf0b79..2b357ac71 100644
developer8cb3ac72022-07-04 10:55:14 +08003113--- a/net/ipv6/route.c
3114+++ b/net/ipv6/route.c
3115@@ -83,7 +83,7 @@ enum rt6_nud_state {
3116
3117 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
3118 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
3119-static unsigned int ip6_mtu(const struct dst_entry *dst);
3120+static unsigned int ip6_mtu(const struct dst_entry *dst);
3121 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
3122 static void ip6_dst_destroy(struct dst_entry *);
3123 static void ip6_dst_ifdown(struct dst_entry *,
3124@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3125
3126 static unsigned int ip6_mtu(const struct dst_entry *dst)
3127 {
3128- struct inet6_dev *idev;
3129- unsigned int mtu;
3130-
3131- mtu = dst_metric_raw(dst, RTAX_MTU);
3132- if (mtu)
3133- goto out;
3134-
3135- mtu = IPV6_MIN_MTU;
3136-
3137- rcu_read_lock();
3138- idev = __in6_dev_get(dst->dev);
3139- if (idev)
3140- mtu = idev->cnf.mtu6;
3141- rcu_read_unlock();
3142-
3143-out:
3144- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3145-
3146- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3147+ return ip6_dst_mtu_maybe_forward(dst, false);
3148 }
3149
3150 /* MTU selection:
3151diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
developeree39bcf2023-06-16 08:03:30 +08003152index b967763f5..c040e713a 100644
developer8cb3ac72022-07-04 10:55:14 +08003153--- a/net/netfilter/Kconfig
3154+++ b/net/netfilter/Kconfig
developeree39bcf2023-06-16 08:03:30 +08003155@@ -690,8 +690,6 @@ config NFT_FIB_NETDEV
developer8cb3ac72022-07-04 10:55:14 +08003156
3157 endif # NF_TABLES_NETDEV
3158
3159-endif # NF_TABLES
3160-
3161 config NF_FLOW_TABLE_INET
3162 tristate "Netfilter flow table mixed IPv4/IPv6 module"
3163 depends on NF_FLOW_TABLE
developeree39bcf2023-06-16 08:03:30 +08003164@@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET
developer8cb3ac72022-07-04 10:55:14 +08003165
3166 To compile it as a module, choose M here.
3167
3168+endif # NF_TABLES
3169+
3170 config NF_FLOW_TABLE
3171 tristate "Netfilter flow table module"
3172 depends on NETFILTER_INGRESS
3173 depends on NF_CONNTRACK
3174- depends on NF_TABLES
3175 help
3176 This option adds the flow table core infrastructure.
3177
developeree39bcf2023-06-16 08:03:30 +08003178@@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK
developer8cb3ac72022-07-04 10:55:14 +08003179 depends on NETFILTER_ADVANCED
3180 select NETFILTER_XT_TARGET_CT
3181
3182+config NETFILTER_XT_TARGET_FLOWOFFLOAD
3183+ tristate '"FLOWOFFLOAD" target support'
3184+ depends on NF_FLOW_TABLE
3185+ depends on NETFILTER_INGRESS
3186+ help
3187+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
3188+ module to speed up processing of packets by bypassing the usual
3189+ netfilter chains
3190+
3191 config NETFILTER_XT_TARGET_RATEEST
3192 tristate '"RATEEST" target support'
3193 depends on NETFILTER_ADVANCED
3194diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
developeree39bcf2023-06-16 08:03:30 +08003195index 4fc075b61..d93a121bc 100644
developer8cb3ac72022-07-04 10:55:14 +08003196--- a/net/netfilter/Makefile
3197+++ b/net/netfilter/Makefile
3198@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
3199
3200 # flow table infrastructure
3201 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
3202-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
3203+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
3204+ nf_flow_table_offload.o
3205
3206 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
3207
3208@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
3209 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
3210 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
3211 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
3212+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
3213 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
3214 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
3215 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
3216diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
developeree39bcf2023-06-16 08:03:30 +08003217index f6ab6f484..f689e19d8 100644
developer8cb3ac72022-07-04 10:55:14 +08003218--- a/net/netfilter/nf_conntrack_core.c
3219+++ b/net/netfilter/nf_conntrack_core.c
developeree39bcf2023-06-16 08:03:30 +08003220@@ -864,9 +864,8 @@ out:
developer8cb3ac72022-07-04 10:55:14 +08003221 }
3222 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
3223
3224-static inline void nf_ct_acct_update(struct nf_conn *ct,
3225- enum ip_conntrack_info ctinfo,
3226- unsigned int len)
3227+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3228+ unsigned int bytes)
3229 {
3230 struct nf_conn_acct *acct;
3231
3232@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
3233 if (acct) {
3234 struct nf_conn_counter *counter = acct->counter;
3235
3236- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
3237- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
3238+ atomic64_add(packets, &counter[dir].packets);
3239+ atomic64_add(bytes, &counter[dir].bytes);
3240 }
3241 }
3242+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
3243
3244 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3245 const struct nf_conn *loser_ct)
3246@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3247
3248 /* u32 should be fine since we must have seen one packet. */
3249 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
3250- nf_ct_acct_update(ct, ctinfo, bytes);
3251+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
3252 }
3253 }
3254
3255@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
3256
3257 tmp = nf_ct_tuplehash_to_ctrack(h);
3258
3259- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
3260+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
3261+ nf_ct_offload_timeout(tmp);
3262 continue;
3263+ }
3264
3265 if (nf_ct_is_expired(tmp)) {
3266 nf_ct_gc_expired(tmp);
3267@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
3268 WRITE_ONCE(ct->timeout, extra_jiffies);
3269 acct:
3270 if (do_acct)
3271- nf_ct_acct_update(ct, ctinfo, skb->len);
3272+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3273 }
3274 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
3275
3276@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
3277 enum ip_conntrack_info ctinfo,
3278 const struct sk_buff *skb)
3279 {
3280- nf_ct_acct_update(ct, ctinfo, skb->len);
3281+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3282
3283 return nf_ct_delete(ct, 0, 0);
3284 }
3285diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
developeree39bcf2023-06-16 08:03:30 +08003286index 7204f0366..3742bae21 100644
developer8cb3ac72022-07-04 10:55:14 +08003287--- a/net/netfilter/nf_conntrack_proto_tcp.c
3288+++ b/net/netfilter/nf_conntrack_proto_tcp.c
developeree39bcf2023-06-16 08:03:30 +08003289@@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003290 tn->tcp_loose = nf_ct_tcp_loose;
3291 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
3292 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
3293+
3294+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3295+ tn->offload_timeout = 30 * HZ;
3296+#endif
3297 }
3298
3299 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
3300diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
developeree39bcf2023-06-16 08:03:30 +08003301index e3a2d018f..a1579d6c3 100644
developer8cb3ac72022-07-04 10:55:14 +08003302--- a/net/netfilter/nf_conntrack_proto_udp.c
3303+++ b/net/netfilter/nf_conntrack_proto_udp.c
3304@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
3305
3306 for (i = 0; i < UDP_CT_MAX; i++)
3307 un->timeouts[i] = udp_timeouts[i];
3308+
3309+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3310+ un->offload_timeout = 30 * HZ;
3311+#endif
3312 }
3313
3314 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
3315diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
developeree39bcf2023-06-16 08:03:30 +08003316index 9c6259c28..10d9f93ce 100644
developer8cb3ac72022-07-04 10:55:14 +08003317--- a/net/netfilter/nf_conntrack_standalone.c
3318+++ b/net/netfilter/nf_conntrack_standalone.c
3319@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
3320 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
3321 goto release;
3322
3323- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3324+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
3325+ seq_puts(s, "[HW_OFFLOAD] ");
3326+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3327 seq_puts(s, "[OFFLOAD] ");
3328 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
3329 seq_puts(s, "[ASSURED] ");
3330@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
3331 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
3332 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
3333 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
3334+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3335+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
3336+#endif
3337 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
3338 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
3339 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
3340 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
3341 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
3342+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3343+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
3344+#endif
3345 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
3346 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
3347 #ifdef CONFIG_NF_CT_PROTO_SCTP
developeree39bcf2023-06-16 08:03:30 +08003348@@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003349 .mode = 0644,
3350 .proc_handler = proc_dointvec_jiffies,
3351 },
3352+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3353+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
3354+ .procname = "nf_flowtable_tcp_timeout",
3355+ .maxlen = sizeof(unsigned int),
3356+ .mode = 0644,
3357+ .proc_handler = proc_dointvec_jiffies,
3358+ },
3359+#endif
3360 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
3361 .procname = "nf_conntrack_tcp_loose",
3362 .maxlen = sizeof(int),
developeree39bcf2023-06-16 08:03:30 +08003363@@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003364 .mode = 0644,
3365 .proc_handler = proc_dointvec_jiffies,
3366 },
3367+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3368+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
3369+ .procname = "nf_flowtable_udp_timeout",
3370+ .maxlen = sizeof(unsigned int),
3371+ .mode = 0644,
3372+ .proc_handler = proc_dointvec_jiffies,
3373+ },
3374+#endif
3375 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
3376 .procname = "nf_conntrack_icmp_timeout",
3377 .maxlen = sizeof(unsigned int),
developeree39bcf2023-06-16 08:03:30 +08003378@@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
developer8cb3ac72022-07-04 10:55:14 +08003379 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
3380 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
3381 #undef XASSIGN
3382+
3383+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3384+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
3385+#endif
3386+
3387 }
3388
3389 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
developeree39bcf2023-06-16 08:03:30 +08003390@@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003391 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
3392 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
3393 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
3394+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3395+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
3396+#endif
3397
3398 nf_conntrack_standalone_init_tcp_sysctl(net, table);
3399 nf_conntrack_standalone_init_sctp_sysctl(net, table);
3400diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
developeree39bcf2023-06-16 08:03:30 +08003401index f212cec0..10365581 100644
developer8cb3ac72022-07-04 10:55:14 +08003402--- a/net/netfilter/nf_flow_table_core.c
3403+++ b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003404@@ -7,43 +7,21 @@
developer8cb3ac72022-07-04 10:55:14 +08003405 #include <linux/netdevice.h>
3406 #include <net/ip.h>
3407 #include <net/ip6_route.h>
3408-#include <net/netfilter/nf_tables.h>
3409 #include <net/netfilter/nf_flow_table.h>
3410 #include <net/netfilter/nf_conntrack.h>
3411 #include <net/netfilter/nf_conntrack_core.h>
3412 #include <net/netfilter/nf_conntrack_l4proto.h>
3413 #include <net/netfilter/nf_conntrack_tuple.h>
3414
3415-struct flow_offload_entry {
3416- struct flow_offload flow;
3417- struct nf_conn *ct;
3418- struct rcu_head rcu_head;
3419-};
3420-
3421 static DEFINE_MUTEX(flowtable_lock);
3422 static LIST_HEAD(flowtables);
3423
developerb7c46752022-07-04 19:51:38 +08003424-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3425-{
3426- const struct rt6_info *rt;
3427-
3428- if (flow_tuple->l3proto == NFPROTO_IPV6) {
3429- rt = (const struct rt6_info *)flow_tuple->dst_cache;
3430- return rt6_get_cookie(rt);
3431- }
3432-
3433- return 0;
3434-}
3435-
developer8cb3ac72022-07-04 10:55:14 +08003436 static void
3437-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3438- struct nf_flow_route *route,
3439+flow_offload_fill_dir(struct flow_offload *flow,
3440 enum flow_offload_tuple_dir dir)
3441 {
3442 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
3443- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
3444- struct dst_entry *other_dst = route->tuple[!dir].dst;
3445- struct dst_entry *dst = route->tuple[dir].dst;
3446+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
3447
3448 ft->dir = dir;
3449
developerb7c46752022-07-04 19:51:38 +08003450@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003451 case NFPROTO_IPV4:
3452 ft->src_v4 = ctt->src.u3.in;
3453 ft->dst_v4 = ctt->dst.u3.in;
3454- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
3455 break;
3456 case NFPROTO_IPV6:
3457 ft->src_v6 = ctt->src.u3.in6;
3458 ft->dst_v6 = ctt->dst.u3.in6;
3459- ft->mtu = ip6_dst_mtu_forward(dst);
3460 break;
3461 }
3462
developerb7c46752022-07-04 19:51:38 +08003463@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003464 ft->l4proto = ctt->dst.protonum;
3465 ft->src_port = ctt->src.u.tcp.port;
3466 ft->dst_port = ctt->dst.u.tcp.port;
3467-
3468- ft->iifidx = other_dst->dev->ifindex;
3469- ft->dst_cache = dst;
developerb7c46752022-07-04 19:51:38 +08003470- ft->dst_cookie = flow_offload_dst_cookie(ft);
developer8cb3ac72022-07-04 10:55:14 +08003471 }
3472
3473-struct flow_offload *
3474-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
3475+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
3476 {
3477- struct flow_offload_entry *entry;
3478 struct flow_offload *flow;
3479
3480 if (unlikely(nf_ct_is_dying(ct) ||
3481 !atomic_inc_not_zero(&ct->ct_general.use)))
3482 return NULL;
3483
3484- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
3485- if (!entry)
3486+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
3487+ if (!flow)
3488 goto err_ct_refcnt;
3489
3490- flow = &entry->flow;
developerb7c46752022-07-04 19:51:38 +08003491-
developer8cb3ac72022-07-04 10:55:14 +08003492- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
3493- goto err_dst_cache_original;
developeree39bcf2023-06-16 08:03:30 +08003494-
developer7eb15dc2023-06-14 17:44:03 +08003495- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
3496- goto err_dst_cache_reply;
developeree39bcf2023-06-16 08:03:30 +08003497+ flow->ct = ct;
3498
developer8cb3ac72022-07-04 10:55:14 +08003499- entry->ct = ct;
3500-
3501- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3502- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
3503+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3504+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
3505
3506 if (ct->status & IPS_SRC_NAT)
3507- flow->flags |= FLOW_OFFLOAD_SNAT;
3508+ __set_bit(NF_FLOW_SNAT, &flow->flags);
3509 if (ct->status & IPS_DST_NAT)
3510- flow->flags |= FLOW_OFFLOAD_DNAT;
3511+ __set_bit(NF_FLOW_DNAT, &flow->flags);
3512
3513 return flow;
3514
3515-err_dst_cache_reply:
3516- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
3517-err_dst_cache_original:
3518- kfree(entry);
3519 err_ct_refcnt:
3520 nf_ct_put(ct);
3521
developeree39bcf2023-06-16 08:03:30 +08003522@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
developer8cb3ac72022-07-04 10:55:14 +08003523 }
3524 EXPORT_SYMBOL_GPL(flow_offload_alloc);
3525
3526-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3527+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3528 {
3529- tcp->state = TCP_CONNTRACK_ESTABLISHED;
3530- tcp->seen[0].td_maxwin = 0;
3531- tcp->seen[1].td_maxwin = 0;
3532+ const struct rt6_info *rt;
3533+
3534+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
3535+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
3536+ return rt6_get_cookie(rt);
3537+ }
3538+
3539+ return 0;
3540 }
3541
3542-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
3543-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
3544+static int flow_offload_fill_route(struct flow_offload *flow,
3545+ const struct nf_flow_route *route,
3546+ enum flow_offload_tuple_dir dir)
3547+{
3548+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
3549+ struct dst_entry *dst = route->tuple[dir].dst;
3550+ int i, j = 0;
developeree39bcf2023-06-16 08:03:30 +08003551+
developer8cb3ac72022-07-04 10:55:14 +08003552+ switch (flow_tuple->l3proto) {
3553+ case NFPROTO_IPV4:
3554+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
3555+ break;
3556+ case NFPROTO_IPV6:
3557+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
3558+ break;
3559+ }
3560+
3561+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
3562+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
3563+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
3564+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
3565+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
3566+ flow_tuple->in_vlan_ingress |= BIT(j);
3567+ j++;
3568+ }
3569+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
3570+
3571+ switch (route->tuple[dir].xmit_type) {
3572+ case FLOW_OFFLOAD_XMIT_DIRECT:
3573+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
3574+ ETH_ALEN);
3575+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
3576+ ETH_ALEN);
3577+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
3578+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
3579+ break;
3580+ case FLOW_OFFLOAD_XMIT_XFRM:
3581+ case FLOW_OFFLOAD_XMIT_NEIGH:
3582+ if (!dst_hold_safe(route->tuple[dir].dst))
3583+ return -1;
3584+
3585+ flow_tuple->dst_cache = dst;
3586+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
3587+ break;
3588+ default:
3589+ WARN_ON_ONCE(1);
3590+ break;
3591+ }
3592+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
developerb7c46752022-07-04 19:51:38 +08003593+
developer8cb3ac72022-07-04 10:55:14 +08003594+ return 0;
3595+}
3596+
3597+static void nft_flow_dst_release(struct flow_offload *flow,
3598+ enum flow_offload_tuple_dir dir)
developeree39bcf2023-06-16 08:03:30 +08003599+{
developer8cb3ac72022-07-04 10:55:14 +08003600+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3601+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
3602+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
developeree39bcf2023-06-16 08:03:30 +08003603+}
3604+
developer8cb3ac72022-07-04 10:55:14 +08003605+int flow_offload_route_init(struct flow_offload *flow,
3606+ const struct nf_flow_route *route)
developeree39bcf2023-06-16 08:03:30 +08003607+{
developer8cb3ac72022-07-04 10:55:14 +08003608+ int err;
developeree39bcf2023-06-16 08:03:30 +08003609+
developer8cb3ac72022-07-04 10:55:14 +08003610+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3611+ if (err < 0)
3612+ return err;
developeree39bcf2023-06-16 08:03:30 +08003613+
developer8cb3ac72022-07-04 10:55:14 +08003614+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
3615+ if (err < 0)
3616+ goto err_route_reply;
3617+
3618+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
developeree39bcf2023-06-16 08:03:30 +08003619+
developer8cb3ac72022-07-04 10:55:14 +08003620+ return 0;
3621+
3622+err_route_reply:
3623+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3624+
3625+ return err;
developeree39bcf2023-06-16 08:03:30 +08003626+}
developer8cb3ac72022-07-04 10:55:14 +08003627+EXPORT_SYMBOL_GPL(flow_offload_route_init);
developerb7c46752022-07-04 19:51:38 +08003628
developeree39bcf2023-06-16 08:03:30 +08003629-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
developer8cb3ac72022-07-04 10:55:14 +08003630+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3631 {
developeree39bcf2023-06-16 08:03:30 +08003632- return (__s32)(timeout - (u32)jiffies);
3633+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
developer8cb3ac72022-07-04 10:55:14 +08003634+ tcp->seen[0].td_maxwin = 0;
3635+ tcp->seen[1].td_maxwin = 0;
3636 }
3637
developeree39bcf2023-06-16 08:03:30 +08003638 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003639 {
developeree39bcf2023-06-16 08:03:30 +08003640- const struct nf_conntrack_l4proto *l4proto;
developer8cb3ac72022-07-04 10:55:14 +08003641+ struct net *net = nf_ct_net(ct);
developeree39bcf2023-06-16 08:03:30 +08003642 int l4num = nf_ct_protonum(ct);
3643- unsigned int timeout;
developer8cb3ac72022-07-04 10:55:14 +08003644+ s32 timeout;
developeree39bcf2023-06-16 08:03:30 +08003645
3646- l4proto = nf_ct_l4proto_find(l4num);
3647- if (!l4proto)
3648- return;
developer8cb3ac72022-07-04 10:55:14 +08003649+ if (l4num == IPPROTO_TCP) {
3650+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003651
3652- if (l4num == IPPROTO_TCP)
3653- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
3654- else if (l4num == IPPROTO_UDP)
3655- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
3656- else
3657+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
developer8cb3ac72022-07-04 10:55:14 +08003658+ timeout -= tn->offload_timeout;
3659+ } else if (l4num == IPPROTO_UDP) {
3660+ struct nf_udp_net *tn = nf_udp_pernet(net);
3661+
3662+ timeout = tn->timeouts[UDP_CT_REPLIED];
3663+ timeout -= tn->offload_timeout;
3664+ } else {
developeree39bcf2023-06-16 08:03:30 +08003665 return;
developer8cb3ac72022-07-04 10:55:14 +08003666+ }
3667+
3668+ if (timeout < 0)
3669+ timeout = 0;
developeree39bcf2023-06-16 08:03:30 +08003670
3671- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
3672- ct->timeout = nfct_time_stamp + timeout;
developer8cb3ac72022-07-04 10:55:14 +08003673+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
3674+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
3675 }
3676
developeree39bcf2023-06-16 08:03:30 +08003677 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
3678@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
3679 flow_offload_fixup_ct_timeout(ct);
3680 }
3681
developer8cb3ac72022-07-04 10:55:14 +08003682-void flow_offload_free(struct flow_offload *flow)
3683+static void flow_offload_route_release(struct flow_offload *flow)
3684 {
3685- struct flow_offload_entry *e;
3686+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3687+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
3688+}
3689
3690- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
3691- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
3692- e = container_of(flow, struct flow_offload_entry, flow);
3693- if (flow->flags & FLOW_OFFLOAD_DYING)
3694- nf_ct_delete(e->ct, 0, 0);
3695- nf_ct_put(e->ct);
3696- kfree_rcu(e, rcu_head);
3697+void flow_offload_free(struct flow_offload *flow)
3698+{
3699+ switch (flow->type) {
3700+ case NF_FLOW_OFFLOAD_ROUTE:
3701+ flow_offload_route_release(flow);
3702+ break;
3703+ default:
3704+ break;
3705+ }
3706+ nf_ct_put(flow->ct);
3707+ kfree_rcu(flow, rcu_head);
3708 }
3709 EXPORT_SYMBOL_GPL(flow_offload_free);
3710
developeree39bcf2023-06-16 08:03:30 +08003711@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
developer8cb3ac72022-07-04 10:55:14 +08003712 {
3713 const struct flow_offload_tuple *tuple = data;
3714
3715- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
3716+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3717 }
3718
3719 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
3720 {
3721 const struct flow_offload_tuple_rhash *tuplehash = data;
3722
3723- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
3724+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3725 }
3726
3727 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developeree39bcf2023-06-16 08:03:30 +08003728@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer8cb3ac72022-07-04 10:55:14 +08003729 const struct flow_offload_tuple *tuple = arg->key;
3730 const struct flow_offload_tuple_rhash *x = ptr;
3731
3732- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
3733+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
3734 return 1;
3735
3736 return 0;
developeree39bcf2023-06-16 08:03:30 +08003737@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
developer8cb3ac72022-07-04 10:55:14 +08003738 .automatic_shrinking = true,
3739 };
3740
3741-#define DAY (86400 * HZ)
3742-
3743-/* Set an arbitrary timeout large enough not to ever expire, this save
3744- * us a check for the IPS_OFFLOAD_BIT from the packet path via
3745- * nf_ct_is_expired().
3746- */
3747-static void nf_ct_offload_timeout(struct flow_offload *flow)
3748+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
3749 {
3750- struct flow_offload_entry *entry;
3751- struct nf_conn *ct;
3752+ unsigned long timeout = NF_FLOW_TIMEOUT;
3753+ struct net *net = nf_ct_net(flow->ct);
3754+ int l4num = nf_ct_protonum(flow->ct);
developeree39bcf2023-06-16 08:03:30 +08003755
3756- entry = container_of(flow, struct flow_offload_entry, flow);
3757- ct = entry->ct;
developerb7c46752022-07-04 19:51:38 +08003758+ if (l4num == IPPROTO_TCP) {
3759+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003760
3761- if (nf_ct_expires(ct) < DAY / 2)
3762- ct->timeout = nfct_time_stamp + DAY;
developer8cb3ac72022-07-04 10:55:14 +08003763+ timeout = tn->offload_timeout;
3764+ } else if (l4num == IPPROTO_UDP) {
3765+ struct nf_udp_net *tn = nf_udp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003766+
developer8cb3ac72022-07-04 10:55:14 +08003767+ timeout = tn->offload_timeout;
3768+ }
developeree39bcf2023-06-16 08:03:30 +08003769+
developer8cb3ac72022-07-04 10:55:14 +08003770+ return timeout;
3771 }
3772
3773 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3774 {
3775 int err;
3776
3777- nf_ct_offload_timeout(flow);
3778- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
3779+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3780
3781 err = rhashtable_insert_fast(&flow_table->rhashtable,
3782 &flow->tuplehash[0].node,
developeree39bcf2023-06-16 08:03:30 +08003783@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003784 return err;
3785 }
3786
3787+ nf_ct_offload_timeout(flow->ct);
3788+
3789+ if (nf_flowtable_hw_offload(flow_table)) {
3790+ __set_bit(NF_FLOW_HW, &flow->flags);
3791+ nf_flow_offload_add(flow_table, flow);
3792+ }
3793+
3794 return 0;
3795 }
3796 EXPORT_SYMBOL_GPL(flow_offload_add);
3797
3798+void flow_offload_refresh(struct nf_flowtable *flow_table,
3799+ struct flow_offload *flow)
3800+{
3801+ u32 timeout;
3802+
3803+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3804+ if (timeout - READ_ONCE(flow->timeout) > HZ)
3805+ WRITE_ONCE(flow->timeout, timeout);
3806+ else
3807+ return;
3808+
3809+ if (likely(!nf_flowtable_hw_offload(flow_table)))
3810+ return;
3811+
3812+ nf_flow_offload_add(flow_table, flow);
3813+}
3814+EXPORT_SYMBOL_GPL(flow_offload_refresh);
3815+
3816 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3817 {
3818 return nf_flow_timeout_delta(flow->timeout) <= 0;
developeree39bcf2023-06-16 08:03:30 +08003819@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003820 static void flow_offload_del(struct nf_flowtable *flow_table,
3821 struct flow_offload *flow)
3822 {
3823- struct flow_offload_entry *e;
3824-
3825 rhashtable_remove_fast(&flow_table->rhashtable,
3826 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
3827 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003828@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003829 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
3830 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003831
developer8cb3ac72022-07-04 10:55:14 +08003832- e = container_of(flow, struct flow_offload_entry, flow);
3833- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
developeree39bcf2023-06-16 08:03:30 +08003834+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
3835
3836 if (nf_flow_has_expired(flow))
developer8cb3ac72022-07-04 10:55:14 +08003837- flow_offload_fixup_ct(e->ct);
3838- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
3839- flow_offload_fixup_ct_timeout(e->ct);
3840-
3841- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
3842- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003843+ flow_offload_fixup_ct(flow->ct);
3844+ else
3845+ flow_offload_fixup_ct_timeout(flow->ct);
3846
developer8cb3ac72022-07-04 10:55:14 +08003847 flow_offload_free(flow);
3848 }
3849
3850 void flow_offload_teardown(struct flow_offload *flow)
3851 {
3852- struct flow_offload_entry *e;
developerb7c46752022-07-04 19:51:38 +08003853-
3854- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
developeree39bcf2023-06-16 08:03:30 +08003855+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3856
developer8cb3ac72022-07-04 10:55:14 +08003857- e = container_of(flow, struct flow_offload_entry, flow);
3858- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003859+ flow_offload_fixup_ct_state(flow->ct);
developer8cb3ac72022-07-04 10:55:14 +08003860 }
3861 EXPORT_SYMBOL_GPL(flow_offload_teardown);
3862
developeree39bcf2023-06-16 08:03:30 +08003863@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003864 {
3865 struct flow_offload_tuple_rhash *tuplehash;
3866 struct flow_offload *flow;
3867- struct flow_offload_entry *e;
3868 int dir;
3869
3870 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
developeree39bcf2023-06-16 08:03:30 +08003871@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003872
3873 dir = tuplehash->tuple.dir;
3874 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
3875- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
3876+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
3877 return NULL;
3878
3879- e = container_of(flow, struct flow_offload_entry, flow);
3880- if (unlikely(nf_ct_is_dying(e->ct)))
3881+ if (unlikely(nf_ct_is_dying(flow->ct)))
3882 return NULL;
3883
3884 return tuplehash;
3885 }
3886 EXPORT_SYMBOL_GPL(flow_offload_lookup);
3887
3888-static int
3889-nf_flow_table_iterate(struct nf_flowtable *flow_table,
3890+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3891 void (*iter)(struct flow_offload *flow, void *data),
3892 void *data)
3893 {
developeree39bcf2023-06-16 08:03:30 +08003894@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003895 rhashtable_walk_start(&hti);
3896
3897 while ((tuplehash = rhashtable_walk_next(&hti))) {
3898-
3899 if (IS_ERR(tuplehash)) {
3900 if (PTR_ERR(tuplehash) != -EAGAIN) {
3901 err = PTR_ERR(tuplehash);
developeree39bcf2023-06-16 08:03:30 +08003902@@ -359,23 +430,52 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003903
3904 return err;
3905 }
3906+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
3907
developeree39bcf2023-06-16 08:03:30 +08003908-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3909+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
developer8cb3ac72022-07-04 10:55:14 +08003910 {
developeree39bcf2023-06-16 08:03:30 +08003911- struct nf_flowtable *flow_table = data;
developer8cb3ac72022-07-04 10:55:14 +08003912- struct flow_offload_entry *e;
3913- bool teardown;
developeree39bcf2023-06-16 08:03:30 +08003914+ struct dst_entry *dst;
developer8cb3ac72022-07-04 10:55:14 +08003915
3916- e = container_of(flow, struct flow_offload_entry, flow);
developeree39bcf2023-06-16 08:03:30 +08003917+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3918+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
3919+ dst = tuple->dst_cache;
3920+ if (!dst_check(dst, tuple->dst_cookie))
3921+ return true;
3922+ }
3923
developer8cb3ac72022-07-04 10:55:14 +08003924- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
3925- FLOW_OFFLOAD_TEARDOWN);
developeree39bcf2023-06-16 08:03:30 +08003926+ return false;
3927+}
3928
developer8cb3ac72022-07-04 10:55:14 +08003929- if (!teardown)
3930- nf_ct_offload_timeout(flow);
developeree39bcf2023-06-16 08:03:30 +08003931+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
3932+{
3933+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
3934+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
3935+}
developer8cb3ac72022-07-04 10:55:14 +08003936
3937- if (nf_flow_has_expired(flow) || teardown)
3938- flow_offload_del(flow_table, flow);
developeree39bcf2023-06-16 08:03:30 +08003939+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3940+{
3941+ struct nf_flowtable *flow_table = data;
3942+
3943+ if (nf_flow_has_expired(flow) ||
3944+ nf_ct_is_dying(flow->ct) ||
3945+ nf_flow_has_stale_dst(flow))
3946+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3947+
developer8cb3ac72022-07-04 10:55:14 +08003948+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
3949+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003950+ if (!test_and_set_bit(NF_FLOW_HW_ACCT_DYING, &flow->flags))
3951+ nf_flow_offload_stats(flow_table, flow, true);
3952+
developer8cb3ac72022-07-04 10:55:14 +08003953+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
3954+ nf_flow_offload_del(flow_table, flow);
3955+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
3956+ flow_offload_del(flow_table, flow);
3957+ } else {
3958+ flow_offload_del(flow_table, flow);
3959+ }
3960+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003961+ nf_flow_offload_stats(flow_table, flow, false);
developer8cb3ac72022-07-04 10:55:14 +08003962+ }
3963 }
3964
3965 static void nf_flow_offload_work_gc(struct work_struct *work)
developeree39bcf2023-06-16 08:03:30 +08003966@@ -387,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
developer8cb3ac72022-07-04 10:55:14 +08003967 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
3968 }
3969
3970-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3971- __be16 port, __be16 new_port)
3972+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3973+ __be16 port, __be16 new_port)
3974 {
3975 struct tcphdr *tcph;
3976
3977- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
3978- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
3979- return -1;
3980-
3981 tcph = (void *)(skb_network_header(skb) + thoff);
3982 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
3983-
3984- return 0;
3985 }
3986
3987-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3988- __be16 port, __be16 new_port)
3989+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3990+ __be16 port, __be16 new_port)
3991 {
3992 struct udphdr *udph;
3993
3994- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
3995- skb_try_make_writable(skb, thoff + sizeof(*udph)))
3996- return -1;
3997-
3998 udph = (void *)(skb_network_header(skb) + thoff);
3999 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4000 inet_proto_csum_replace2(&udph->check, skb, port,
developeree39bcf2023-06-16 08:03:30 +08004001@@ -418,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004002 if (!udph->check)
4003 udph->check = CSUM_MANGLED_0;
4004 }
4005-
4006- return 0;
4007 }
4008
4009-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4010- u8 protocol, __be16 port, __be16 new_port)
4011+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4012+ u8 protocol, __be16 port, __be16 new_port)
4013 {
4014 switch (protocol) {
4015 case IPPROTO_TCP:
4016- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
4017- return NF_DROP;
4018+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
4019 break;
4020 case IPPROTO_UDP:
4021- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
4022- return NF_DROP;
4023+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
4024 break;
4025 }
4026-
4027- return 0;
4028 }
4029
4030-int nf_flow_snat_port(const struct flow_offload *flow,
4031- struct sk_buff *skb, unsigned int thoff,
4032- u8 protocol, enum flow_offload_tuple_dir dir)
4033+void nf_flow_snat_port(const struct flow_offload *flow,
4034+ struct sk_buff *skb, unsigned int thoff,
4035+ u8 protocol, enum flow_offload_tuple_dir dir)
4036 {
4037 struct flow_ports *hdr;
4038 __be16 port, new_port;
4039
4040- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4041- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4042- return -1;
4043-
4044 hdr = (void *)(skb_network_header(skb) + thoff);
4045
4046 switch (dir) {
developeree39bcf2023-06-16 08:03:30 +08004047@@ -463,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004048 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
4049 hdr->dest = new_port;
4050 break;
4051- default:
4052- return -1;
4053 }
4054
4055- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4056+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4057 }
4058 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
4059
4060-int nf_flow_dnat_port(const struct flow_offload *flow,
4061- struct sk_buff *skb, unsigned int thoff,
4062- u8 protocol, enum flow_offload_tuple_dir dir)
4063+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
4064+ unsigned int thoff, u8 protocol,
4065+ enum flow_offload_tuple_dir dir)
4066 {
4067 struct flow_ports *hdr;
4068 __be16 port, new_port;
4069
4070- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4071- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4072- return -1;
4073-
4074 hdr = (void *)(skb_network_header(skb) + thoff);
4075
4076 switch (dir) {
developeree39bcf2023-06-16 08:03:30 +08004077@@ -495,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004078 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
4079 hdr->source = new_port;
4080 break;
4081- default:
4082- return -1;
4083 }
4084
4085- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4086+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4087 }
4088 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
4089
developeree39bcf2023-06-16 08:03:30 +08004090@@ -507,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
developer8cb3ac72022-07-04 10:55:14 +08004091 {
4092 int err;
4093
4094- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4095+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4096+ flow_block_init(&flowtable->flow_block);
4097+ init_rwsem(&flowtable->flow_block_lock);
4098
4099 err = rhashtable_init(&flowtable->rhashtable,
4100 &nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08004101@@ -528,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
developer8cb3ac72022-07-04 10:55:14 +08004102 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
4103 {
4104 struct net_device *dev = data;
4105- struct flow_offload_entry *e;
4106-
4107- e = container_of(flow, struct flow_offload_entry, flow);
4108
4109 if (!dev) {
4110 flow_offload_teardown(flow);
4111 return;
4112 }
4113- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
4114+
4115+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
4116 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
4117 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
4118- flow_offload_dead(flow);
4119+ flow_offload_teardown(flow);
4120 }
4121
4122-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
4123- struct net_device *dev)
4124+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
4125+ struct net_device *dev)
4126 {
4127 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
4128 flush_delayed_work(&flowtable->gc_work);
4129+ nf_flow_table_offload_flush(flowtable);
4130 }
4131
4132 void nf_flow_table_cleanup(struct net_device *dev)
developeree39bcf2023-06-16 08:03:30 +08004133@@ -555,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004134
4135 mutex_lock(&flowtable_lock);
4136 list_for_each_entry(flowtable, &flowtables, list)
4137- nf_flow_table_iterate_cleanup(flowtable, dev);
4138+ nf_flow_table_gc_cleanup(flowtable, dev);
4139 mutex_unlock(&flowtable_lock);
4140 }
4141 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
developeree39bcf2023-06-16 08:03:30 +08004142@@ -565,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
developer8cb3ac72022-07-04 10:55:14 +08004143 mutex_lock(&flowtable_lock);
4144 list_del(&flow_table->list);
4145 mutex_unlock(&flowtable_lock);
4146+
4147 cancel_delayed_work_sync(&flow_table->gc_work);
4148 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
4149 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
4150+ nf_flow_table_offload_flush(flow_table);
4151+ if (nf_flowtable_hw_offload(flow_table))
4152+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
4153+ flow_table);
4154 rhashtable_destroy(&flow_table->rhashtable);
4155 }
4156 EXPORT_SYMBOL_GPL(nf_flow_table_free);
developeree39bcf2023-06-16 08:03:30 +08004157@@ -591,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
developer8cb3ac72022-07-04 10:55:14 +08004158
4159 static int __init nf_flow_table_module_init(void)
4160 {
4161- return register_netdevice_notifier(&flow_offload_netdev_notifier);
4162+ int ret;
4163+
4164+ ret = nf_flow_table_offload_init();
4165+ if (ret)
4166+ return ret;
4167+
4168+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
4169+ if (ret)
4170+ nf_flow_table_offload_exit();
4171+
4172+ return ret;
4173 }
4174
4175 static void __exit nf_flow_table_module_exit(void)
4176 {
4177 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
4178+ nf_flow_table_offload_exit();
4179 }
4180
4181 module_init(nf_flow_table_module_init);
developeree39bcf2023-06-16 08:03:30 +08004182@@ -604,3 +690,4 @@ module_exit(nf_flow_table_module_exit);
developer8cb3ac72022-07-04 10:55:14 +08004183
4184 MODULE_LICENSE("GPL");
4185 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
4186+MODULE_DESCRIPTION("Netfilter flow table module");
4187diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
developeree39bcf2023-06-16 08:03:30 +08004188index 397129b2..6257d87c 100644
developer8cb3ac72022-07-04 10:55:14 +08004189--- a/net/netfilter/nf_flow_table_ip.c
4190+++ b/net/netfilter/nf_flow_table_ip.c
4191@@ -7,11 +7,13 @@
4192 #include <linux/ip.h>
4193 #include <linux/ipv6.h>
4194 #include <linux/netdevice.h>
4195+#include <linux/if_ether.h>
4196 #include <net/ip.h>
4197 #include <net/ipv6.h>
4198 #include <net/ip6_route.h>
4199 #include <net/neighbour.h>
4200 #include <net/netfilter/nf_flow_table.h>
4201+#include <net/netfilter/nf_conntrack_acct.h>
4202 /* For layer 4 checksum field offset. */
4203 #include <linux/tcp.h>
4204 #include <linux/udp.h>
4205@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4206 if (proto != IPPROTO_TCP)
4207 return 0;
4208
4209- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
4210- return -1;
4211-
4212 tcph = (void *)(skb_network_header(skb) + thoff);
4213 if (unlikely(tcph->fin || tcph->rst)) {
4214 flow_offload_teardown(flow);
4215@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4216 return 0;
4217 }
4218
4219-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4220- __be32 addr, __be32 new_addr)
4221+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4222+ __be32 addr, __be32 new_addr)
4223 {
4224 struct tcphdr *tcph;
4225
4226- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4227- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4228- return -1;
4229-
4230 tcph = (void *)(skb_network_header(skb) + thoff);
4231 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
4232-
4233- return 0;
4234 }
4235
4236-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4237- __be32 addr, __be32 new_addr)
4238+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4239+ __be32 addr, __be32 new_addr)
4240 {
4241 struct udphdr *udph;
4242
4243- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4244- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4245- return -1;
4246-
4247 udph = (void *)(skb_network_header(skb) + thoff);
4248 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4249 inet_proto_csum_replace4(&udph->check, skb, addr,
4250@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4251 if (!udph->check)
4252 udph->check = CSUM_MANGLED_0;
4253 }
4254-
4255- return 0;
4256 }
4257
4258-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4259- unsigned int thoff, __be32 addr,
4260- __be32 new_addr)
4261+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4262+ unsigned int thoff, __be32 addr,
4263+ __be32 new_addr)
4264 {
4265 switch (iph->protocol) {
4266 case IPPROTO_TCP:
4267- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
4268- return NF_DROP;
4269+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
4270 break;
4271 case IPPROTO_UDP:
4272- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
4273- return NF_DROP;
4274+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
4275 break;
4276 }
4277-
4278- return 0;
4279 }
4280
4281-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4282- struct iphdr *iph, unsigned int thoff,
4283- enum flow_offload_tuple_dir dir)
4284+static void nf_flow_snat_ip(const struct flow_offload *flow,
4285+ struct sk_buff *skb, struct iphdr *iph,
4286+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4287 {
4288 __be32 addr, new_addr;
4289
4290@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4291 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
4292 iph->daddr = new_addr;
4293 break;
4294- default:
4295- return -1;
4296 }
4297 csum_replace4(&iph->check, addr, new_addr);
4298
4299- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4300+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4301 }
4302
4303-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4304- struct iphdr *iph, unsigned int thoff,
4305- enum flow_offload_tuple_dir dir)
4306+static void nf_flow_dnat_ip(const struct flow_offload *flow,
4307+ struct sk_buff *skb, struct iphdr *iph,
4308+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4309 {
4310 __be32 addr, new_addr;
4311
4312@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4313 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
4314 iph->saddr = new_addr;
4315 break;
4316- default:
4317- return -1;
4318 }
4319 csum_replace4(&iph->check, addr, new_addr);
4320
4321- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4322+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4323 }
4324
4325-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4326- unsigned int thoff, enum flow_offload_tuple_dir dir)
4327+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4328+ unsigned int thoff, enum flow_offload_tuple_dir dir,
4329+ struct iphdr *iph)
4330 {
4331- struct iphdr *iph = ip_hdr(skb);
4332-
4333- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4334- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4335- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
4336- return -1;
4337- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4338- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4339- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
4340- return -1;
4341-
4342- return 0;
4343+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4344+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
4345+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
4346+ }
4347+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4348+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
4349+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
4350+ }
4351 }
4352
4353 static bool ip_has_options(unsigned int thoff)
4354@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
4355 return thoff != sizeof(struct iphdr);
4356 }
4357
4358+static void nf_flow_tuple_encap(struct sk_buff *skb,
4359+ struct flow_offload_tuple *tuple)
4360+{
4361+ struct vlan_ethhdr *veth;
4362+ struct pppoe_hdr *phdr;
4363+ int i = 0;
4364+
4365+ if (skb_vlan_tag_present(skb)) {
4366+ tuple->encap[i].id = skb_vlan_tag_get(skb);
4367+ tuple->encap[i].proto = skb->vlan_proto;
4368+ i++;
4369+ }
4370+ switch (skb->protocol) {
4371+ case htons(ETH_P_8021Q):
4372+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4373+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
4374+ tuple->encap[i].proto = skb->protocol;
4375+ break;
4376+ case htons(ETH_P_PPP_SES):
4377+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
4378+ tuple->encap[i].id = ntohs(phdr->sid);
4379+ tuple->encap[i].proto = skb->protocol;
4380+ break;
4381+ }
4382+}
4383+
4384 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4385- struct flow_offload_tuple *tuple)
4386+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4387+ u32 offset)
4388 {
4389 struct flow_ports *ports;
4390 unsigned int thoff;
4391 struct iphdr *iph;
4392
4393- if (!pskb_may_pull(skb, sizeof(*iph)))
4394+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
4395 return -1;
4396
4397- iph = ip_hdr(skb);
4398- thoff = iph->ihl * 4;
4399+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4400+ thoff = (iph->ihl * 4);
4401
4402 if (ip_is_fragment(iph) ||
4403 unlikely(ip_has_options(thoff)))
4404 return -1;
4405
4406- if (iph->protocol != IPPROTO_TCP &&
4407- iph->protocol != IPPROTO_UDP)
4408+ thoff += offset;
4409+
4410+ switch (iph->protocol) {
4411+ case IPPROTO_TCP:
4412+ *hdrsize = sizeof(struct tcphdr);
4413+ break;
4414+ case IPPROTO_UDP:
4415+ *hdrsize = sizeof(struct udphdr);
4416+ break;
4417+ default:
4418 return -1;
4419+ }
4420
4421 if (iph->ttl <= 1)
4422 return -1;
4423
4424- thoff = iph->ihl * 4;
4425- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4426+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4427 return -1;
4428
4429- iph = ip_hdr(skb);
4430+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4431 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4432
4433 tuple->src_v4.s_addr = iph->saddr;
4434@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4435 tuple->l3proto = AF_INET;
4436 tuple->l4proto = iph->protocol;
4437 tuple->iifidx = dev->ifindex;
4438+ nf_flow_tuple_encap(skb, tuple);
4439
4440 return 0;
4441 }
developeree39bcf2023-06-16 08:03:30 +08004442@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004443 return NF_STOLEN;
4444 }
4445
4446+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
4447+ u32 *offset)
4448+{
4449+ struct vlan_ethhdr *veth;
4450+
4451+ switch (skb->protocol) {
4452+ case htons(ETH_P_8021Q):
4453+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4454+ if (veth->h_vlan_encapsulated_proto == proto) {
4455+ *offset += VLAN_HLEN;
4456+ return true;
4457+ }
4458+ break;
4459+ case htons(ETH_P_PPP_SES):
4460+ if (nf_flow_pppoe_proto(skb) == proto) {
4461+ *offset += PPPOE_SES_HLEN;
4462+ return true;
4463+ }
4464+ break;
4465+ }
4466+
4467+ return false;
4468+}
4469+
4470+static void nf_flow_encap_pop(struct sk_buff *skb,
4471+ struct flow_offload_tuple_rhash *tuplehash)
4472+{
4473+ struct vlan_hdr *vlan_hdr;
4474+ int i;
4475+
4476+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
4477+ if (skb_vlan_tag_present(skb)) {
4478+ __vlan_hwaccel_clear_tag(skb);
4479+ continue;
4480+ }
4481+ switch (skb->protocol) {
4482+ case htons(ETH_P_8021Q):
4483+ vlan_hdr = (struct vlan_hdr *)skb->data;
4484+ __skb_pull(skb, VLAN_HLEN);
4485+ vlan_set_encap_proto(skb, vlan_hdr);
4486+ skb_reset_network_header(skb);
4487+ break;
4488+ case htons(ETH_P_PPP_SES):
4489+ skb->protocol = nf_flow_pppoe_proto(skb);
4490+ skb_pull(skb, PPPOE_SES_HLEN);
4491+ skb_reset_network_header(skb);
4492+ break;
4493+ }
4494+ }
4495+}
4496+
4497+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
4498+ const struct flow_offload_tuple_rhash *tuplehash,
4499+ unsigned short type)
4500+{
4501+ struct net_device *outdev;
4502+
4503+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
4504+ if (!outdev)
4505+ return NF_DROP;
4506+
4507+ skb->dev = outdev;
4508+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
4509+ tuplehash->tuple.out.h_source, skb->len);
4510+ dev_queue_xmit(skb);
4511+
4512+ return NF_STOLEN;
4513+}
4514+
4515 unsigned int
4516 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4517 const struct nf_hook_state *state)
developeree39bcf2023-06-16 08:03:30 +08004518@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004519 enum flow_offload_tuple_dir dir;
4520 struct flow_offload *flow;
4521 struct net_device *outdev;
4522+ u32 hdrsize, offset = 0;
4523+ unsigned int thoff, mtu;
4524 struct rtable *rt;
4525- unsigned int thoff;
4526 struct iphdr *iph;
4527 __be32 nexthop;
4528+ int ret;
4529
4530- if (skb->protocol != htons(ETH_P_IP))
4531+ if (skb->protocol != htons(ETH_P_IP) &&
4532+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
4533 return NF_ACCEPT;
4534
4535- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
4536+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
4537 return NF_ACCEPT;
4538
4539 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004540@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004541
4542 dir = tuplehash->tuple.dir;
4543 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4544- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
4545- outdev = rt->dst.dev;
developeree39bcf2023-06-16 08:03:30 +08004546-
developer8cb3ac72022-07-04 10:55:14 +08004547- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developeree39bcf2023-06-16 08:03:30 +08004548- return NF_ACCEPT;
developerb7c46752022-07-04 19:51:38 +08004549
developer8cb3ac72022-07-04 10:55:14 +08004550- if (skb_try_make_writable(skb, sizeof(*iph)))
4551- return NF_DROP;
developerb7c46752022-07-04 19:51:38 +08004552-
developer8cb3ac72022-07-04 10:55:14 +08004553- thoff = ip_hdr(skb)->ihl * 4;
4554- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
developeree39bcf2023-06-16 08:03:30 +08004555+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4556+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004557 return NF_ACCEPT;
developer7eb15dc2023-06-14 17:44:03 +08004558
4559- if (!dst_check(&rt->dst, 0)) {
developeree39bcf2023-06-16 08:03:30 +08004560- flow_offload_teardown(flow);
4561+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4562+ thoff = (iph->ihl * 4) + offset;
4563+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
developer7eb15dc2023-06-14 17:44:03 +08004564 return NF_ACCEPT;
developeree39bcf2023-06-16 08:03:30 +08004565- }
developer8cb3ac72022-07-04 10:55:14 +08004566
4567- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
4568+ if (skb_try_make_writable(skb, thoff + hdrsize))
4569 return NF_DROP;
4570
4571- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4572+ flow_offload_refresh(flow_table, flow);
4573+
4574+ nf_flow_encap_pop(skb, tuplehash);
4575+ thoff -= offset;
4576+
4577 iph = ip_hdr(skb);
4578+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
4579+
4580 ip_decrease_ttl(iph);
4581 skb->tstamp = 0;
4582
4583- if (unlikely(dst_xfrm(&rt->dst))) {
4584+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4585+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4586+
4587+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4588+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4589 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
4590 IPCB(skb)->iif = skb->dev->ifindex;
4591 IPCB(skb)->flags = IPSKB_FORWARDED;
4592 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4593 }
4594
4595- skb->dev = outdev;
4596- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4597- skb_dst_set_noref(skb, &rt->dst);
4598- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4599+ switch (tuplehash->tuple.xmit_type) {
4600+ case FLOW_OFFLOAD_XMIT_NEIGH:
4601+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4602+ outdev = rt->dst.dev;
4603+ skb->dev = outdev;
4604+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4605+ skb_dst_set_noref(skb, &rt->dst);
4606+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4607+ ret = NF_STOLEN;
4608+ break;
4609+ case FLOW_OFFLOAD_XMIT_DIRECT:
4610+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
4611+ if (ret == NF_DROP)
4612+ flow_offload_teardown(flow);
4613+ break;
4614+ }
4615
4616- return NF_STOLEN;
4617+ return ret;
4618 }
4619 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
4620
4621-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4622- struct in6_addr *addr,
4623- struct in6_addr *new_addr)
4624+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4625+ struct in6_addr *addr,
4626+ struct in6_addr *new_addr,
4627+ struct ipv6hdr *ip6h)
4628 {
4629 struct tcphdr *tcph;
4630
4631- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4632- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4633- return -1;
4634-
4635 tcph = (void *)(skb_network_header(skb) + thoff);
4636 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
4637 new_addr->s6_addr32, true);
4638-
4639- return 0;
4640 }
4641
4642-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4643- struct in6_addr *addr,
4644- struct in6_addr *new_addr)
4645+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4646+ struct in6_addr *addr,
4647+ struct in6_addr *new_addr)
4648 {
4649 struct udphdr *udph;
4650
4651- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4652- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4653- return -1;
4654-
4655 udph = (void *)(skb_network_header(skb) + thoff);
4656 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4657 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
developeree39bcf2023-06-16 08:03:30 +08004658@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004659 if (!udph->check)
4660 udph->check = CSUM_MANGLED_0;
4661 }
4662-
4663- return 0;
4664 }
4665
4666-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4667- unsigned int thoff, struct in6_addr *addr,
4668- struct in6_addr *new_addr)
4669+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4670+ unsigned int thoff, struct in6_addr *addr,
4671+ struct in6_addr *new_addr)
4672 {
4673 switch (ip6h->nexthdr) {
4674 case IPPROTO_TCP:
4675- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
4676- return NF_DROP;
4677+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
4678 break;
4679 case IPPROTO_UDP:
4680- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
4681- return NF_DROP;
4682+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
4683 break;
4684 }
4685-
4686- return 0;
4687 }
4688
4689-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4690- struct sk_buff *skb, struct ipv6hdr *ip6h,
4691- unsigned int thoff,
4692- enum flow_offload_tuple_dir dir)
4693+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
4694+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4695+ unsigned int thoff,
4696+ enum flow_offload_tuple_dir dir)
4697 {
4698 struct in6_addr addr, new_addr;
4699
developeree39bcf2023-06-16 08:03:30 +08004700@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004701 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
4702 ip6h->daddr = new_addr;
4703 break;
4704- default:
4705- return -1;
4706 }
4707
4708- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4709+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4710 }
4711
4712-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4713- struct sk_buff *skb, struct ipv6hdr *ip6h,
4714- unsigned int thoff,
4715- enum flow_offload_tuple_dir dir)
4716+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
4717+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4718+ unsigned int thoff,
4719+ enum flow_offload_tuple_dir dir)
4720 {
4721 struct in6_addr addr, new_addr;
4722
developeree39bcf2023-06-16 08:03:30 +08004723@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004724 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
4725 ip6h->saddr = new_addr;
4726 break;
4727- default:
4728- return -1;
4729 }
4730
4731- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4732+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4733 }
4734
4735-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
4736- struct sk_buff *skb,
4737- enum flow_offload_tuple_dir dir)
4738+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
4739+ struct sk_buff *skb,
4740+ enum flow_offload_tuple_dir dir,
4741+ struct ipv6hdr *ip6h)
4742 {
4743- struct ipv6hdr *ip6h = ipv6_hdr(skb);
4744 unsigned int thoff = sizeof(*ip6h);
4745
4746- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4747- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4748- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4749- return -1;
4750- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4751- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4752- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4753- return -1;
4754-
4755- return 0;
4756+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4757+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4758+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
4759+ }
4760+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4761+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4762+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
4763+ }
4764 }
4765
4766 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4767- struct flow_offload_tuple *tuple)
4768+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4769+ u32 offset)
4770 {
4771 struct flow_ports *ports;
4772 struct ipv6hdr *ip6h;
4773 unsigned int thoff;
4774
4775- if (!pskb_may_pull(skb, sizeof(*ip6h)))
4776+ thoff = sizeof(*ip6h) + offset;
4777+ if (!pskb_may_pull(skb, thoff))
4778 return -1;
4779
4780- ip6h = ipv6_hdr(skb);
4781+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4782
4783- if (ip6h->nexthdr != IPPROTO_TCP &&
4784- ip6h->nexthdr != IPPROTO_UDP)
4785+ switch (ip6h->nexthdr) {
4786+ case IPPROTO_TCP:
4787+ *hdrsize = sizeof(struct tcphdr);
4788+ break;
4789+ case IPPROTO_UDP:
4790+ *hdrsize = sizeof(struct udphdr);
4791+ break;
4792+ default:
4793 return -1;
4794+ }
4795
4796 if (ip6h->hop_limit <= 1)
4797 return -1;
4798
4799- thoff = sizeof(*ip6h);
4800- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4801+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4802 return -1;
4803
4804- ip6h = ipv6_hdr(skb);
4805+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4806 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4807
4808 tuple->src_v6 = ip6h->saddr;
developeree39bcf2023-06-16 08:03:30 +08004809@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08004810 tuple->l3proto = AF_INET6;
4811 tuple->l4proto = ip6h->nexthdr;
4812 tuple->iifidx = dev->ifindex;
4813+ nf_flow_tuple_encap(skb, tuple);
4814
4815 return 0;
4816 }
developeree39bcf2023-06-16 08:03:30 +08004817@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004818 const struct in6_addr *nexthop;
4819 struct flow_offload *flow;
4820 struct net_device *outdev;
4821+ unsigned int thoff, mtu;
4822+ u32 hdrsize, offset = 0;
4823 struct ipv6hdr *ip6h;
4824 struct rt6_info *rt;
4825+ int ret;
4826
4827- if (skb->protocol != htons(ETH_P_IPV6))
4828+ if (skb->protocol != htons(ETH_P_IPV6) &&
4829+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
4830 return NF_ACCEPT;
4831
4832- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
4833+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
4834 return NF_ACCEPT;
4835
4836 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004837@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004838
4839 dir = tuplehash->tuple.dir;
4840 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4841- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
4842- outdev = rt->dst.dev;
developer8cb3ac72022-07-04 10:55:14 +08004843
developerb7c46752022-07-04 19:51:38 +08004844- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004845+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4846+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4847 return NF_ACCEPT;
4848
developerb7c46752022-07-04 19:51:38 +08004849- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
4850- sizeof(*ip6h)))
developer8cb3ac72022-07-04 10:55:14 +08004851+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4852+ thoff = sizeof(*ip6h) + offset;
4853+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
4854 return NF_ACCEPT;
developer8cb3ac72022-07-04 10:55:14 +08004855
developerb7c46752022-07-04 19:51:38 +08004856- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
developeree39bcf2023-06-16 08:03:30 +08004857- flow_offload_teardown(flow);
4858- return NF_ACCEPT;
4859- }
4860-
developer8cb3ac72022-07-04 10:55:14 +08004861- if (skb_try_make_writable(skb, sizeof(*ip6h)))
4862+ if (skb_try_make_writable(skb, thoff + hdrsize))
4863 return NF_DROP;
4864
4865- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
4866- return NF_DROP;
4867+ flow_offload_refresh(flow_table, flow);
4868+
4869+ nf_flow_encap_pop(skb, tuplehash);
4870
4871- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4872 ip6h = ipv6_hdr(skb);
4873+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
4874+
4875 ip6h->hop_limit--;
4876 skb->tstamp = 0;
4877
4878- if (unlikely(dst_xfrm(&rt->dst))) {
4879+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4880+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4881+
4882+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4883+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4884 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4885 IP6CB(skb)->iif = skb->dev->ifindex;
4886 IP6CB(skb)->flags = IP6SKB_FORWARDED;
4887 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4888 }
4889
4890- skb->dev = outdev;
4891- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4892- skb_dst_set_noref(skb, &rt->dst);
4893- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4894+ switch (tuplehash->tuple.xmit_type) {
4895+ case FLOW_OFFLOAD_XMIT_NEIGH:
4896+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4897+ outdev = rt->dst.dev;
4898+ skb->dev = outdev;
4899+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4900+ skb_dst_set_noref(skb, &rt->dst);
4901+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4902+ ret = NF_STOLEN;
4903+ break;
4904+ case FLOW_OFFLOAD_XMIT_DIRECT:
4905+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
4906+ if (ret == NF_DROP)
4907+ flow_offload_teardown(flow);
4908+ break;
4909+ }
4910
4911- return NF_STOLEN;
4912+ return ret;
4913 }
4914 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
4915diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
4916new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08004917index 000000000..d94c6fb92
developer8cb3ac72022-07-04 10:55:14 +08004918--- /dev/null
4919+++ b/net/netfilter/nf_flow_table_offload.c
developeree39bcf2023-06-16 08:03:30 +08004920@@ -0,0 +1,1199 @@
developer8cb3ac72022-07-04 10:55:14 +08004921+#include <linux/kernel.h>
4922+#include <linux/init.h>
4923+#include <linux/module.h>
4924+#include <linux/netfilter.h>
4925+#include <linux/rhashtable.h>
4926+#include <linux/netdevice.h>
4927+#include <linux/tc_act/tc_csum.h>
4928+#include <net/flow_offload.h>
4929+#include <net/netfilter/nf_flow_table.h>
4930+#include <net/netfilter/nf_tables.h>
4931+#include <net/netfilter/nf_conntrack.h>
4932+#include <net/netfilter/nf_conntrack_acct.h>
4933+#include <net/netfilter/nf_conntrack_core.h>
4934+#include <net/netfilter/nf_conntrack_tuple.h>
4935+
4936+static struct workqueue_struct *nf_flow_offload_add_wq;
4937+static struct workqueue_struct *nf_flow_offload_del_wq;
4938+static struct workqueue_struct *nf_flow_offload_stats_wq;
4939+
4940+struct flow_offload_work {
4941+ struct list_head list;
4942+ enum flow_cls_command cmd;
4943+ int priority;
4944+ struct nf_flowtable *flowtable;
4945+ struct flow_offload *flow;
4946+ struct work_struct work;
4947+};
4948+
4949+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
4950+ (__match)->dissector.offset[__type] = \
4951+ offsetof(struct nf_flow_key, __field)
4952+
4953+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
4954+ struct ip_tunnel_info *tun_info)
4955+{
4956+ struct nf_flow_key *mask = &match->mask;
4957+ struct nf_flow_key *key = &match->key;
4958+ unsigned int enc_keys;
4959+
4960+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
4961+ return;
4962+
4963+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
4964+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
4965+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
4966+ mask->enc_key_id.keyid = 0xffffffff;
4967+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
4968+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
4969+
4970+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
4971+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
4972+ enc_ipv4);
4973+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
4974+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
4975+ if (key->enc_ipv4.src)
4976+ mask->enc_ipv4.src = 0xffffffff;
4977+ if (key->enc_ipv4.dst)
4978+ mask->enc_ipv4.dst = 0xffffffff;
4979+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
4980+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
4981+ } else {
4982+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
4983+ sizeof(struct in6_addr));
4984+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
4985+ sizeof(struct in6_addr));
4986+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
4987+ sizeof(struct in6_addr)))
4988+ memset(&mask->enc_ipv6.src, 0xff,
4989+ sizeof(struct in6_addr));
4990+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
4991+ sizeof(struct in6_addr)))
4992+ memset(&mask->enc_ipv6.dst, 0xff,
4993+ sizeof(struct in6_addr));
4994+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
4995+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
4996+ }
4997+
4998+ match->dissector.used_keys |= enc_keys;
4999+}
5000+
5001+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
5002+ struct flow_dissector_key_vlan *mask,
5003+ u16 vlan_id, __be16 proto)
5004+{
5005+ key->vlan_id = vlan_id;
5006+ mask->vlan_id = VLAN_VID_MASK;
5007+ key->vlan_tpid = proto;
5008+ mask->vlan_tpid = 0xffff;
5009+}
5010+
5011+static int nf_flow_rule_match(struct nf_flow_match *match,
5012+ const struct flow_offload_tuple *tuple,
5013+ struct dst_entry *other_dst)
5014+{
5015+ struct nf_flow_key *mask = &match->mask;
5016+ struct nf_flow_key *key = &match->key;
5017+ struct ip_tunnel_info *tun_info;
5018+ bool vlan_encap = false;
5019+
5020+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
5021+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
5022+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
5023+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
5024+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
5025+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
5026+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
5027+
5028+ if (other_dst && other_dst->lwtstate) {
5029+ tun_info = lwt_tun_info(other_dst->lwtstate);
5030+ nf_flow_rule_lwt_match(match, tun_info);
5031+ }
5032+
5033+ key->meta.ingress_ifindex = tuple->iifidx;
5034+ mask->meta.ingress_ifindex = 0xffffffff;
5035+
5036+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
5037+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
5038+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
5039+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5040+ tuple->encap[0].id,
5041+ tuple->encap[0].proto);
5042+ vlan_encap = true;
5043+ }
5044+
5045+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
5046+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
5047+ if (vlan_encap) {
5048+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
5049+ cvlan);
5050+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
5051+ tuple->encap[1].id,
5052+ tuple->encap[1].proto);
5053+ } else {
5054+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
5055+ vlan);
5056+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5057+ tuple->encap[1].id,
5058+ tuple->encap[1].proto);
5059+ }
5060+ }
5061+
5062+ switch (tuple->l3proto) {
5063+ case AF_INET:
5064+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5065+ key->basic.n_proto = htons(ETH_P_IP);
5066+ key->ipv4.src = tuple->src_v4.s_addr;
5067+ mask->ipv4.src = 0xffffffff;
5068+ key->ipv4.dst = tuple->dst_v4.s_addr;
5069+ mask->ipv4.dst = 0xffffffff;
5070+ break;
5071+ case AF_INET6:
5072+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5073+ key->basic.n_proto = htons(ETH_P_IPV6);
5074+ key->ipv6.src = tuple->src_v6;
5075+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
5076+ key->ipv6.dst = tuple->dst_v6;
5077+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
5078+ break;
5079+ default:
5080+ return -EOPNOTSUPP;
5081+ }
5082+ mask->control.addr_type = 0xffff;
5083+ match->dissector.used_keys |= BIT(key->control.addr_type);
5084+ mask->basic.n_proto = 0xffff;
5085+
5086+ switch (tuple->l4proto) {
5087+ case IPPROTO_TCP:
5088+ key->tcp.flags = 0;
5089+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
5090+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
5091+ break;
5092+ case IPPROTO_UDP:
5093+ break;
5094+ default:
5095+ return -EOPNOTSUPP;
5096+ }
5097+
5098+ key->basic.ip_proto = tuple->l4proto;
5099+ mask->basic.ip_proto = 0xff;
5100+
5101+ key->tp.src = tuple->src_port;
5102+ mask->tp.src = 0xffff;
5103+ key->tp.dst = tuple->dst_port;
5104+ mask->tp.dst = 0xffff;
5105+
5106+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
5107+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
5108+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
5109+ BIT(FLOW_DISSECTOR_KEY_PORTS);
5110+ return 0;
5111+}
5112+
5113+static void flow_offload_mangle(struct flow_action_entry *entry,
5114+ enum flow_action_mangle_base htype, u32 offset,
5115+ const __be32 *value, const __be32 *mask)
5116+{
5117+ entry->id = FLOW_ACTION_MANGLE;
5118+ entry->mangle.htype = htype;
5119+ entry->mangle.offset = offset;
5120+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
5121+ memcpy(&entry->mangle.val, value, sizeof(u32));
5122+}
5123+
5124+static inline struct flow_action_entry *
5125+flow_action_entry_next(struct nf_flow_rule *flow_rule)
5126+{
5127+ int i = flow_rule->rule->action.num_entries++;
5128+
5129+ return &flow_rule->rule->action.entries[i];
5130+}
5131+
5132+static int flow_offload_eth_src(struct net *net,
5133+ const struct flow_offload *flow,
5134+ enum flow_offload_tuple_dir dir,
5135+ struct nf_flow_rule *flow_rule)
5136+{
5137+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5138+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5139+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5140+ struct net_device *dev = NULL;
5141+ const unsigned char *addr;
5142+ u32 mask, val;
5143+ u16 val16;
5144+
5145+ this_tuple = &flow->tuplehash[dir].tuple;
5146+
5147+ switch (this_tuple->xmit_type) {
5148+ case FLOW_OFFLOAD_XMIT_DIRECT:
5149+ addr = this_tuple->out.h_source;
5150+ break;
5151+ case FLOW_OFFLOAD_XMIT_NEIGH:
5152+ other_tuple = &flow->tuplehash[!dir].tuple;
5153+ dev = dev_get_by_index(net, other_tuple->iifidx);
5154+ if (!dev)
5155+ return -ENOENT;
5156+
5157+ addr = dev->dev_addr;
5158+ break;
5159+ default:
5160+ return -EOPNOTSUPP;
5161+ }
5162+
5163+ mask = ~0xffff0000;
5164+ memcpy(&val16, addr, 2);
5165+ val = val16 << 16;
5166+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5167+ &val, &mask);
5168+
5169+ mask = ~0xffffffff;
5170+ memcpy(&val, addr + 2, 4);
5171+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
5172+ &val, &mask);
5173+
developeree39bcf2023-06-16 08:03:30 +08005174+ if (dev)
5175+ dev_put(dev);
developer8cb3ac72022-07-04 10:55:14 +08005176+
5177+ return 0;
5178+}
5179+
5180+static int flow_offload_eth_dst(struct net *net,
5181+ const struct flow_offload *flow,
5182+ enum flow_offload_tuple_dir dir,
5183+ struct nf_flow_rule *flow_rule)
5184+{
5185+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5186+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5187+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5188+ const struct dst_entry *dst_cache;
5189+ unsigned char ha[ETH_ALEN];
5190+ struct neighbour *n;
5191+ const void *daddr;
5192+ u32 mask, val;
5193+ u8 nud_state;
5194+ u16 val16;
5195+
5196+ this_tuple = &flow->tuplehash[dir].tuple;
5197+
5198+ switch (this_tuple->xmit_type) {
5199+ case FLOW_OFFLOAD_XMIT_DIRECT:
5200+ ether_addr_copy(ha, this_tuple->out.h_dest);
5201+ break;
5202+ case FLOW_OFFLOAD_XMIT_NEIGH:
5203+ other_tuple = &flow->tuplehash[!dir].tuple;
5204+ daddr = &other_tuple->src_v4;
5205+ dst_cache = this_tuple->dst_cache;
5206+ n = dst_neigh_lookup(dst_cache, daddr);
5207+ if (!n)
5208+ return -ENOENT;
5209+
5210+ read_lock_bh(&n->lock);
5211+ nud_state = n->nud_state;
5212+ ether_addr_copy(ha, n->ha);
5213+ read_unlock_bh(&n->lock);
5214+ neigh_release(n);
5215+
5216+ if (!(nud_state & NUD_VALID))
5217+ return -ENOENT;
5218+ break;
5219+ default:
5220+ return -EOPNOTSUPP;
5221+ }
5222+
5223+ mask = ~0xffffffff;
5224+ memcpy(&val, ha, 4);
5225+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
5226+ &val, &mask);
5227+
5228+ mask = ~0x0000ffff;
5229+ memcpy(&val16, ha + 4, 2);
5230+ val = val16;
5231+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5232+ &val, &mask);
5233+
5234+ return 0;
5235+}
5236+
5237+static void flow_offload_ipv4_snat(struct net *net,
5238+ const struct flow_offload *flow,
5239+ enum flow_offload_tuple_dir dir,
5240+ struct nf_flow_rule *flow_rule)
5241+{
5242+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5243+ u32 mask = ~htonl(0xffffffff);
5244+ __be32 addr;
5245+ u32 offset;
5246+
5247+ switch (dir) {
5248+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5249+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
5250+ offset = offsetof(struct iphdr, saddr);
5251+ break;
5252+ case FLOW_OFFLOAD_DIR_REPLY:
5253+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5254+ offset = offsetof(struct iphdr, daddr);
5255+ break;
5256+ default:
5257+ return;
5258+ }
5259+
5260+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5261+ &addr, &mask);
5262+}
5263+
5264+static void flow_offload_ipv4_dnat(struct net *net,
5265+ const struct flow_offload *flow,
5266+ enum flow_offload_tuple_dir dir,
5267+ struct nf_flow_rule *flow_rule)
5268+{
5269+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5270+ u32 mask = ~htonl(0xffffffff);
5271+ __be32 addr;
5272+ u32 offset;
5273+
5274+ switch (dir) {
5275+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5276+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
5277+ offset = offsetof(struct iphdr, daddr);
5278+ break;
5279+ case FLOW_OFFLOAD_DIR_REPLY:
5280+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5281+ offset = offsetof(struct iphdr, saddr);
5282+ break;
5283+ default:
5284+ return;
5285+ }
5286+
5287+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5288+ &addr, &mask);
5289+}
5290+
5291+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
5292+ unsigned int offset,
5293+ const __be32 *addr, const __be32 *mask)
5294+{
5295+ struct flow_action_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08005296+ int i, j;
developer8cb3ac72022-07-04 10:55:14 +08005297+
developeree39bcf2023-06-16 08:03:30 +08005298+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
developer8cb3ac72022-07-04 10:55:14 +08005299+ entry = flow_action_entry_next(flow_rule);
5300+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
developeree39bcf2023-06-16 08:03:30 +08005301+ offset + i, &addr[j], mask);
developer8cb3ac72022-07-04 10:55:14 +08005302+ }
5303+}
5304+
5305+static void flow_offload_ipv6_snat(struct net *net,
5306+ const struct flow_offload *flow,
5307+ enum flow_offload_tuple_dir dir,
5308+ struct nf_flow_rule *flow_rule)
5309+{
5310+ u32 mask = ~htonl(0xffffffff);
5311+ const __be32 *addr;
5312+ u32 offset;
5313+
5314+ switch (dir) {
5315+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5316+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
5317+ offset = offsetof(struct ipv6hdr, saddr);
5318+ break;
5319+ case FLOW_OFFLOAD_DIR_REPLY:
5320+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
5321+ offset = offsetof(struct ipv6hdr, daddr);
5322+ break;
5323+ default:
5324+ return;
5325+ }
5326+
5327+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5328+}
5329+
5330+static void flow_offload_ipv6_dnat(struct net *net,
5331+ const struct flow_offload *flow,
5332+ enum flow_offload_tuple_dir dir,
5333+ struct nf_flow_rule *flow_rule)
5334+{
5335+ u32 mask = ~htonl(0xffffffff);
5336+ const __be32 *addr;
5337+ u32 offset;
5338+
5339+ switch (dir) {
5340+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5341+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
5342+ offset = offsetof(struct ipv6hdr, daddr);
5343+ break;
5344+ case FLOW_OFFLOAD_DIR_REPLY:
5345+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
5346+ offset = offsetof(struct ipv6hdr, saddr);
5347+ break;
5348+ default:
5349+ return;
5350+ }
5351+
5352+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5353+}
5354+
5355+static int flow_offload_l4proto(const struct flow_offload *flow)
5356+{
5357+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5358+ u8 type = 0;
5359+
5360+ switch (protonum) {
5361+ case IPPROTO_TCP:
5362+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
5363+ break;
5364+ case IPPROTO_UDP:
5365+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
5366+ break;
5367+ default:
5368+ break;
5369+ }
5370+
5371+ return type;
5372+}
5373+
5374+static void flow_offload_port_snat(struct net *net,
5375+ const struct flow_offload *flow,
5376+ enum flow_offload_tuple_dir dir,
5377+ struct nf_flow_rule *flow_rule)
5378+{
5379+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5380+ u32 mask, port;
5381+ u32 offset;
5382+
5383+ switch (dir) {
5384+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5385+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
5386+ offset = 0; /* offsetof(struct tcphdr, source); */
5387+ port = htonl(port << 16);
5388+ mask = ~htonl(0xffff0000);
5389+ break;
5390+ case FLOW_OFFLOAD_DIR_REPLY:
5391+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
5392+ offset = 0; /* offsetof(struct tcphdr, dest); */
5393+ port = htonl(port);
5394+ mask = ~htonl(0xffff);
5395+ break;
5396+ default:
5397+ return;
5398+ }
5399+
5400+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5401+ &port, &mask);
5402+}
5403+
5404+static void flow_offload_port_dnat(struct net *net,
5405+ const struct flow_offload *flow,
5406+ enum flow_offload_tuple_dir dir,
5407+ struct nf_flow_rule *flow_rule)
5408+{
5409+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5410+ u32 mask, port;
5411+ u32 offset;
5412+
5413+ switch (dir) {
5414+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5415+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
5416+ offset = 0; /* offsetof(struct tcphdr, dest); */
5417+ port = htonl(port);
5418+ mask = ~htonl(0xffff);
5419+ break;
5420+ case FLOW_OFFLOAD_DIR_REPLY:
5421+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
5422+ offset = 0; /* offsetof(struct tcphdr, source); */
5423+ port = htonl(port << 16);
5424+ mask = ~htonl(0xffff0000);
5425+ break;
5426+ default:
5427+ return;
5428+ }
5429+
5430+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5431+ &port, &mask);
5432+}
5433+
5434+static void flow_offload_ipv4_checksum(struct net *net,
5435+ const struct flow_offload *flow,
5436+ struct nf_flow_rule *flow_rule)
5437+{
5438+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5439+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5440+
5441+ entry->id = FLOW_ACTION_CSUM;
5442+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
5443+
5444+ switch (protonum) {
5445+ case IPPROTO_TCP:
5446+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
5447+ break;
5448+ case IPPROTO_UDP:
5449+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
5450+ break;
5451+ }
5452+}
5453+
5454+static void flow_offload_redirect(struct net *net,
5455+ const struct flow_offload *flow,
5456+ enum flow_offload_tuple_dir dir,
5457+ struct nf_flow_rule *flow_rule)
5458+{
5459+ const struct flow_offload_tuple *this_tuple, *other_tuple;
5460+ struct flow_action_entry *entry;
5461+ struct net_device *dev;
5462+ int ifindex;
5463+
5464+ this_tuple = &flow->tuplehash[dir].tuple;
5465+ switch (this_tuple->xmit_type) {
5466+ case FLOW_OFFLOAD_XMIT_DIRECT:
5467+ this_tuple = &flow->tuplehash[dir].tuple;
5468+ ifindex = this_tuple->out.hw_ifidx;
5469+ break;
5470+ case FLOW_OFFLOAD_XMIT_NEIGH:
5471+ other_tuple = &flow->tuplehash[!dir].tuple;
5472+ ifindex = other_tuple->iifidx;
5473+ break;
5474+ default:
5475+ return;
5476+ }
5477+
5478+ dev = dev_get_by_index(net, ifindex);
5479+ if (!dev)
5480+ return;
5481+
5482+ entry = flow_action_entry_next(flow_rule);
5483+ entry->id = FLOW_ACTION_REDIRECT;
5484+ entry->dev = dev;
5485+}
5486+
5487+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
5488+ enum flow_offload_tuple_dir dir,
5489+ struct nf_flow_rule *flow_rule)
5490+{
5491+ const struct flow_offload_tuple *this_tuple;
5492+ struct flow_action_entry *entry;
5493+ struct dst_entry *dst;
5494+
5495+ this_tuple = &flow->tuplehash[dir].tuple;
5496+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5497+ return;
5498+
5499+ dst = this_tuple->dst_cache;
5500+ if (dst && dst->lwtstate) {
5501+ struct ip_tunnel_info *tun_info;
5502+
5503+ tun_info = lwt_tun_info(dst->lwtstate);
5504+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5505+ entry = flow_action_entry_next(flow_rule);
5506+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
5507+ entry->tunnel = tun_info;
5508+ }
5509+ }
5510+}
5511+
5512+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
5513+ enum flow_offload_tuple_dir dir,
5514+ struct nf_flow_rule *flow_rule)
5515+{
5516+ const struct flow_offload_tuple *other_tuple;
5517+ struct flow_action_entry *entry;
5518+ struct dst_entry *dst;
5519+
5520+ other_tuple = &flow->tuplehash[!dir].tuple;
5521+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5522+ return;
5523+
5524+ dst = other_tuple->dst_cache;
5525+ if (dst && dst->lwtstate) {
5526+ struct ip_tunnel_info *tun_info;
5527+
5528+ tun_info = lwt_tun_info(dst->lwtstate);
5529+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5530+ entry = flow_action_entry_next(flow_rule);
5531+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
5532+ }
5533+ }
5534+}
5535+
5536+static int
5537+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
5538+ enum flow_offload_tuple_dir dir,
5539+ struct nf_flow_rule *flow_rule)
5540+{
5541+ const struct flow_offload_tuple *other_tuple;
5542+ const struct flow_offload_tuple *tuple;
5543+ int i;
5544+
5545+ flow_offload_decap_tunnel(flow, dir, flow_rule);
5546+ flow_offload_encap_tunnel(flow, dir, flow_rule);
5547+
5548+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
5549+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
5550+ return -1;
5551+
5552+ tuple = &flow->tuplehash[dir].tuple;
5553+
5554+ for (i = 0; i < tuple->encap_num; i++) {
5555+ struct flow_action_entry *entry;
5556+
5557+ if (tuple->in_vlan_ingress & BIT(i))
5558+ continue;
5559+
5560+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
5561+ entry = flow_action_entry_next(flow_rule);
5562+ entry->id = FLOW_ACTION_VLAN_POP;
5563+ }
5564+ }
5565+
5566+ other_tuple = &flow->tuplehash[!dir].tuple;
5567+
5568+ for (i = 0; i < other_tuple->encap_num; i++) {
5569+ struct flow_action_entry *entry;
5570+
5571+ if (other_tuple->in_vlan_ingress & BIT(i))
5572+ continue;
5573+
5574+ entry = flow_action_entry_next(flow_rule);
5575+
5576+ switch (other_tuple->encap[i].proto) {
5577+ case htons(ETH_P_PPP_SES):
5578+ entry->id = FLOW_ACTION_PPPOE_PUSH;
5579+ entry->pppoe.sid = other_tuple->encap[i].id;
5580+ break;
5581+ case htons(ETH_P_8021Q):
5582+ entry->id = FLOW_ACTION_VLAN_PUSH;
5583+ entry->vlan.vid = other_tuple->encap[i].id;
5584+ entry->vlan.proto = other_tuple->encap[i].proto;
5585+ break;
5586+ }
5587+ }
5588+
5589+ return 0;
5590+}
5591+
5592+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
5593+ enum flow_offload_tuple_dir dir,
5594+ struct nf_flow_rule *flow_rule)
5595+{
5596+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5597+ return -1;
5598+
5599+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5600+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
5601+ flow_offload_port_snat(net, flow, dir, flow_rule);
5602+ }
5603+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5604+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
5605+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5606+ }
5607+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
5608+ test_bit(NF_FLOW_DNAT, &flow->flags))
5609+ flow_offload_ipv4_checksum(net, flow, flow_rule);
5610+
5611+ flow_offload_redirect(net, flow, dir, flow_rule);
5612+
5613+ return 0;
5614+}
5615+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
5616+
5617+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
5618+ enum flow_offload_tuple_dir dir,
5619+ struct nf_flow_rule *flow_rule)
5620+{
5621+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5622+ return -1;
5623+
5624+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5625+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
5626+ flow_offload_port_snat(net, flow, dir, flow_rule);
5627+ }
5628+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5629+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
5630+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5631+ }
5632+
5633+ flow_offload_redirect(net, flow, dir, flow_rule);
5634+
5635+ return 0;
5636+}
5637+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
5638+
5639+#define NF_FLOW_RULE_ACTION_MAX 16
5640+
5641+static struct nf_flow_rule *
5642+nf_flow_offload_rule_alloc(struct net *net,
5643+ const struct flow_offload_work *offload,
5644+ enum flow_offload_tuple_dir dir)
5645+{
5646+ const struct nf_flowtable *flowtable = offload->flowtable;
5647+ const struct flow_offload_tuple *tuple, *other_tuple;
5648+ const struct flow_offload *flow = offload->flow;
5649+ struct dst_entry *other_dst = NULL;
5650+ struct nf_flow_rule *flow_rule;
5651+ int err = -ENOMEM;
5652+
5653+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
5654+ if (!flow_rule)
5655+ goto err_flow;
5656+
5657+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
5658+ if (!flow_rule->rule)
5659+ goto err_flow_rule;
5660+
5661+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
5662+ flow_rule->rule->match.mask = &flow_rule->match.mask;
5663+ flow_rule->rule->match.key = &flow_rule->match.key;
5664+
5665+ tuple = &flow->tuplehash[dir].tuple;
5666+ other_tuple = &flow->tuplehash[!dir].tuple;
5667+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
5668+ other_dst = other_tuple->dst_cache;
5669+
5670+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
5671+ if (err < 0)
5672+ goto err_flow_match;
5673+
5674+ flow_rule->rule->action.num_entries = 0;
5675+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
5676+ goto err_flow_match;
5677+
5678+ return flow_rule;
5679+
5680+err_flow_match:
5681+ kfree(flow_rule->rule);
5682+err_flow_rule:
5683+ kfree(flow_rule);
5684+err_flow:
5685+ return NULL;
5686+}
5687+
5688+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
5689+{
5690+ struct flow_action_entry *entry;
5691+ int i;
5692+
5693+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
5694+ entry = &flow_rule->rule->action.entries[i];
5695+ if (entry->id != FLOW_ACTION_REDIRECT)
5696+ continue;
5697+
5698+ dev_put(entry->dev);
5699+ }
5700+ kfree(flow_rule->rule);
5701+ kfree(flow_rule);
5702+}
5703+
5704+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
5705+{
5706+ int i;
5707+
5708+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
5709+ __nf_flow_offload_destroy(flow_rule[i]);
5710+}
5711+
5712+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
5713+ struct nf_flow_rule *flow_rule[])
5714+{
5715+ struct net *net = read_pnet(&offload->flowtable->net);
5716+
5717+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
5718+ FLOW_OFFLOAD_DIR_ORIGINAL);
5719+ if (!flow_rule[0])
5720+ return -ENOMEM;
5721+
5722+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
5723+ FLOW_OFFLOAD_DIR_REPLY);
5724+ if (!flow_rule[1]) {
5725+ __nf_flow_offload_destroy(flow_rule[0]);
5726+ return -ENOMEM;
5727+ }
5728+
5729+ return 0;
5730+}
5731+
5732+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
5733+ __be16 proto, int priority,
5734+ enum flow_cls_command cmd,
5735+ const struct flow_offload_tuple *tuple,
5736+ struct netlink_ext_ack *extack)
5737+{
5738+ cls_flow->common.protocol = proto;
5739+ cls_flow->common.prio = priority;
5740+ cls_flow->common.extack = extack;
5741+ cls_flow->command = cmd;
5742+ cls_flow->cookie = (unsigned long)tuple;
5743+}
5744+
5745+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
5746+ struct flow_offload *flow,
5747+ struct nf_flow_rule *flow_rule,
5748+ enum flow_offload_tuple_dir dir,
5749+ int priority, int cmd,
5750+ struct flow_stats *stats,
5751+ struct list_head *block_cb_list)
5752+{
5753+ struct flow_cls_offload cls_flow = {};
5754+ struct flow_block_cb *block_cb;
5755+ struct netlink_ext_ack extack;
5756+ __be16 proto = ETH_P_ALL;
5757+ int err, i = 0;
5758+
5759+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
5760+ &flow->tuplehash[dir].tuple, &extack);
5761+ if (cmd == FLOW_CLS_REPLACE)
5762+ cls_flow.rule = flow_rule->rule;
5763+
developer0cc0d732023-06-07 13:52:41 +08005764+ down_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005765+ list_for_each_entry(block_cb, block_cb_list, list) {
5766+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
5767+ block_cb->cb_priv);
5768+ if (err < 0)
5769+ continue;
5770+
5771+ i++;
5772+ }
developer0cc0d732023-06-07 13:52:41 +08005773+ up_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005774+
5775+ if (cmd == FLOW_CLS_STATS)
5776+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
5777+
5778+ return i;
5779+}
5780+
5781+static int flow_offload_tuple_add(struct flow_offload_work *offload,
5782+ struct nf_flow_rule *flow_rule,
5783+ enum flow_offload_tuple_dir dir)
5784+{
5785+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
5786+ flow_rule, dir, offload->priority,
5787+ FLOW_CLS_REPLACE, NULL,
5788+ &offload->flowtable->flow_block.cb_list);
5789+}
5790+
5791+static void flow_offload_tuple_del(struct flow_offload_work *offload,
5792+ enum flow_offload_tuple_dir dir)
5793+{
5794+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5795+ offload->priority, FLOW_CLS_DESTROY, NULL,
5796+ &offload->flowtable->flow_block.cb_list);
5797+}
5798+
5799+static int flow_offload_rule_add(struct flow_offload_work *offload,
5800+ struct nf_flow_rule *flow_rule[])
5801+{
5802+ int ok_count = 0;
5803+
5804+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
5805+ FLOW_OFFLOAD_DIR_ORIGINAL);
5806+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
5807+ FLOW_OFFLOAD_DIR_REPLY);
5808+ if (ok_count == 0)
5809+ return -ENOENT;
5810+
5811+ return 0;
5812+}
5813+
5814+static void flow_offload_work_add(struct flow_offload_work *offload)
5815+{
5816+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
5817+ int err;
5818+
5819+ err = nf_flow_offload_alloc(offload, flow_rule);
5820+ if (err < 0)
5821+ return;
5822+
5823+ err = flow_offload_rule_add(offload, flow_rule);
5824+ if (err < 0)
5825+ goto out;
5826+
5827+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5828+
5829+out:
5830+ nf_flow_offload_destroy(flow_rule);
5831+}
5832+
5833+static void flow_offload_work_del(struct flow_offload_work *offload)
5834+{
5835+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5836+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
5837+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
5838+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
5839+}
5840+
5841+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
5842+ enum flow_offload_tuple_dir dir,
5843+ struct flow_stats *stats)
5844+{
5845+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5846+ offload->priority, FLOW_CLS_STATS, stats,
5847+ &offload->flowtable->flow_block.cb_list);
5848+}
5849+
5850+static void flow_offload_work_stats(struct flow_offload_work *offload)
5851+{
5852+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
5853+ u64 lastused;
5854+
5855+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
5856+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
5857+
5858+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
5859+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
5860+ lastused + flow_offload_get_timeout(offload->flow));
5861+
5862+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
5863+ if (stats[0].pkts)
5864+ nf_ct_acct_add(offload->flow->ct,
5865+ FLOW_OFFLOAD_DIR_ORIGINAL,
5866+ stats[0].pkts, stats[0].bytes);
5867+ if (stats[1].pkts)
5868+ nf_ct_acct_add(offload->flow->ct,
5869+ FLOW_OFFLOAD_DIR_REPLY,
5870+ stats[1].pkts, stats[1].bytes);
5871+ }
5872+}
5873+
5874+static void flow_offload_work_handler(struct work_struct *work)
5875+{
5876+ struct flow_offload_work *offload;
5877+
5878+ offload = container_of(work, struct flow_offload_work, work);
5879+ switch (offload->cmd) {
5880+ case FLOW_CLS_REPLACE:
5881+ flow_offload_work_add(offload);
5882+ break;
5883+ case FLOW_CLS_DESTROY:
5884+ flow_offload_work_del(offload);
5885+ break;
5886+ case FLOW_CLS_STATS:
5887+ flow_offload_work_stats(offload);
5888+ break;
5889+ default:
5890+ WARN_ON_ONCE(1);
5891+ }
5892+
5893+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
5894+ kfree(offload);
5895+}
5896+
5897+static void flow_offload_queue_work(struct flow_offload_work *offload)
5898+{
5899+ if (offload->cmd == FLOW_CLS_REPLACE)
5900+ queue_work(nf_flow_offload_add_wq, &offload->work);
5901+ else if (offload->cmd == FLOW_CLS_DESTROY)
5902+ queue_work(nf_flow_offload_del_wq, &offload->work);
5903+ else
5904+ queue_work(nf_flow_offload_stats_wq, &offload->work);
5905+}
5906+
5907+static struct flow_offload_work *
5908+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
5909+ struct flow_offload *flow, unsigned int cmd)
5910+{
5911+ struct flow_offload_work *offload;
5912+
5913+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
5914+ return NULL;
5915+
5916+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
5917+ if (!offload) {
5918+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
5919+ return NULL;
5920+ }
5921+
5922+ offload->cmd = cmd;
5923+ offload->flow = flow;
5924+ offload->priority = flowtable->priority;
5925+ offload->flowtable = flowtable;
5926+ INIT_WORK(&offload->work, flow_offload_work_handler);
5927+
5928+ return offload;
5929+}
5930+
5931+
5932+void nf_flow_offload_add(struct nf_flowtable *flowtable,
5933+ struct flow_offload *flow)
5934+{
5935+ struct flow_offload_work *offload;
5936+
5937+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
5938+ if (!offload)
5939+ return;
5940+
5941+ flow_offload_queue_work(offload);
5942+}
5943+
5944+void nf_flow_offload_del(struct nf_flowtable *flowtable,
5945+ struct flow_offload *flow)
5946+{
5947+ struct flow_offload_work *offload;
5948+
5949+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
5950+ if (!offload)
5951+ return;
5952+
5953+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
5954+ flow_offload_queue_work(offload);
5955+}
5956+
5957+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08005958+ struct flow_offload *flow, bool force)
developer8cb3ac72022-07-04 10:55:14 +08005959+{
5960+ struct flow_offload_work *offload;
5961+ __s32 delta;
5962+
developeree39bcf2023-06-16 08:03:30 +08005963+ if (!force) {
5964+ delta = nf_flow_timeout_delta(flow->timeout);
5965+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
5966+ return;
5967+ }
developer8cb3ac72022-07-04 10:55:14 +08005968+
5969+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
5970+ if (!offload)
5971+ return;
5972+
5973+ flow_offload_queue_work(offload);
5974+}
5975+
5976+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
5977+{
5978+ if (nf_flowtable_hw_offload(flowtable)) {
5979+ flush_workqueue(nf_flow_offload_add_wq);
5980+ flush_workqueue(nf_flow_offload_del_wq);
5981+ flush_workqueue(nf_flow_offload_stats_wq);
5982+ }
5983+}
5984+
5985+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
5986+ struct flow_block_offload *bo,
5987+ enum flow_block_command cmd)
5988+{
5989+ struct flow_block_cb *block_cb, *next;
5990+ int err = 0;
5991+
developer0cc0d732023-06-07 13:52:41 +08005992+ down_write(&flowtable->flow_block_lock);
developeree39bcf2023-06-16 08:03:30 +08005993+
developer8cb3ac72022-07-04 10:55:14 +08005994+ switch (cmd) {
5995+ case FLOW_BLOCK_BIND:
5996+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
5997+ break;
5998+ case FLOW_BLOCK_UNBIND:
5999+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
6000+ list_del(&block_cb->list);
6001+ flow_block_cb_free(block_cb);
6002+ }
6003+ break;
6004+ default:
6005+ WARN_ON_ONCE(1);
6006+ err = -EOPNOTSUPP;
6007+ }
developeree39bcf2023-06-16 08:03:30 +08006008+
developer0cc0d732023-06-07 13:52:41 +08006009+ up_write(&flowtable->flow_block_lock);
developera54478c2022-10-01 16:41:46 +08006010+
developer8cb3ac72022-07-04 10:55:14 +08006011+ return err;
6012+}
6013+
6014+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
6015+ struct net *net,
6016+ enum flow_block_command cmd,
6017+ struct nf_flowtable *flowtable,
6018+ struct netlink_ext_ack *extack)
6019+{
6020+ memset(bo, 0, sizeof(*bo));
6021+ bo->net = net;
6022+ bo->block = &flowtable->flow_block;
6023+ bo->command = cmd;
6024+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
6025+ bo->extack = extack;
6026+ INIT_LIST_HEAD(&bo->cb_list);
6027+}
6028+
6029+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
6030+ struct nf_flowtable *flowtable,
6031+ struct net_device *dev,
6032+ enum flow_block_command cmd,
6033+ struct netlink_ext_ack *extack)
6034+{
6035+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6036+ extack);
6037+ flow_indr_block_call(dev, bo, cmd);
6038+
6039+ if (list_empty(&bo->cb_list))
6040+ return -EOPNOTSUPP;
6041+
6042+ return 0;
6043+}
6044+
6045+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
6046+ struct nf_flowtable *flowtable,
6047+ struct net_device *dev,
6048+ enum flow_block_command cmd,
6049+ struct netlink_ext_ack *extack)
6050+{
6051+ int err;
6052+
6053+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6054+ extack);
developer0cc0d732023-06-07 13:52:41 +08006055+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006056+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
developer0cc0d732023-06-07 13:52:41 +08006057+ up_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006058+ if (err < 0)
6059+ return err;
6060+
6061+ return 0;
6062+}
6063+
6064+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
6065+ struct net_device *dev,
6066+ enum flow_block_command cmd)
6067+{
6068+ struct netlink_ext_ack extack = {};
6069+ struct flow_block_offload bo;
6070+ int err;
6071+
6072+ if (!nf_flowtable_hw_offload(flowtable))
6073+ return 0;
6074+
6075+ if (dev->netdev_ops->ndo_setup_tc)
6076+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
6077+ &extack);
6078+ else
6079+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
6080+ &extack);
6081+ if (err < 0)
6082+ return err;
6083+
6084+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
6085+}
6086+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
6087+
6088+int nf_flow_table_offload_init(void)
6089+{
6090+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
6091+ WQ_UNBOUND | WQ_SYSFS, 0);
6092+ if (!nf_flow_offload_add_wq)
6093+ return -ENOMEM;
6094+
6095+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
6096+ WQ_UNBOUND | WQ_SYSFS, 0);
6097+ if (!nf_flow_offload_del_wq)
6098+ goto err_del_wq;
6099+
6100+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
6101+ WQ_UNBOUND | WQ_SYSFS, 0);
6102+ if (!nf_flow_offload_stats_wq)
6103+ goto err_stats_wq;
6104+
6105+ return 0;
6106+
6107+err_stats_wq:
6108+ destroy_workqueue(nf_flow_offload_del_wq);
6109+err_del_wq:
6110+ destroy_workqueue(nf_flow_offload_add_wq);
6111+ return -ENOMEM;
6112+}
6113+
6114+void nf_flow_table_offload_exit(void)
6115+{
6116+ destroy_workqueue(nf_flow_offload_add_wq);
6117+ destroy_workqueue(nf_flow_offload_del_wq);
6118+ destroy_workqueue(nf_flow_offload_stats_wq);
6119+}
6120diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
6121new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08006122index 0000000..12f067c
developer8cb3ac72022-07-04 10:55:14 +08006123--- /dev/null
6124+++ b/net/netfilter/xt_FLOWOFFLOAD.c
developeree39bcf2023-06-16 08:03:30 +08006125@@ -0,0 +1,794 @@
developer8cb3ac72022-07-04 10:55:14 +08006126+/*
6127+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
6128+ *
6129+ * This program is free software; you can redistribute it and/or modify
6130+ * it under the terms of the GNU General Public License version 2 as
6131+ * published by the Free Software Foundation.
6132+ */
6133+#include <linux/module.h>
6134+#include <linux/init.h>
6135+#include <linux/netfilter.h>
6136+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
6137+#include <linux/if_vlan.h>
6138+#include <net/ip.h>
6139+#include <net/netfilter/nf_conntrack.h>
6140+#include <net/netfilter/nf_conntrack_extend.h>
6141+#include <net/netfilter/nf_conntrack_helper.h>
6142+#include <net/netfilter/nf_flow_table.h>
6143+
6144+struct xt_flowoffload_hook {
6145+ struct hlist_node list;
6146+ struct nf_hook_ops ops;
6147+ struct net *net;
6148+ bool registered;
6149+ bool used;
6150+};
6151+
6152+struct xt_flowoffload_table {
6153+ struct nf_flowtable ft;
6154+ struct hlist_head hooks;
6155+ struct delayed_work work;
6156+};
6157+
6158+struct nf_forward_info {
6159+ const struct net_device *indev;
6160+ const struct net_device *outdev;
6161+ const struct net_device *hw_outdev;
6162+ struct id {
6163+ __u16 id;
6164+ __be16 proto;
6165+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
6166+ u8 num_encaps;
6167+ u8 ingress_vlans;
6168+ u8 h_source[ETH_ALEN];
6169+ u8 h_dest[ETH_ALEN];
6170+ enum flow_offload_xmit_type xmit_type;
6171+};
6172+
6173+static DEFINE_SPINLOCK(hooks_lock);
6174+
6175+struct xt_flowoffload_table flowtable[2];
6176+
6177+static unsigned int
6178+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
6179+ const struct nf_hook_state *state)
6180+{
6181+ struct vlan_ethhdr *veth;
6182+ __be16 proto;
6183+
6184+ switch (skb->protocol) {
6185+ case htons(ETH_P_8021Q):
6186+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
6187+ proto = veth->h_vlan_encapsulated_proto;
6188+ break;
6189+ case htons(ETH_P_PPP_SES):
6190+ proto = nf_flow_pppoe_proto(skb);
6191+ break;
6192+ default:
6193+ proto = skb->protocol;
6194+ break;
6195+ }
6196+
6197+ switch (proto) {
6198+ case htons(ETH_P_IP):
6199+ return nf_flow_offload_ip_hook(priv, skb, state);
6200+ case htons(ETH_P_IPV6):
6201+ return nf_flow_offload_ipv6_hook(priv, skb, state);
6202+ }
6203+
6204+ return NF_ACCEPT;
6205+}
6206+
6207+static int
6208+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
6209+ struct net_device *dev)
6210+{
6211+ struct xt_flowoffload_hook *hook;
6212+ struct nf_hook_ops *ops;
6213+
6214+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
6215+ if (!hook)
6216+ return -ENOMEM;
6217+
6218+ ops = &hook->ops;
6219+ ops->pf = NFPROTO_NETDEV;
6220+ ops->hooknum = NF_NETDEV_INGRESS;
6221+ ops->priority = 10;
6222+ ops->priv = &table->ft;
6223+ ops->hook = xt_flowoffload_net_hook;
6224+ ops->dev = dev;
6225+
6226+ hlist_add_head(&hook->list, &table->hooks);
6227+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
6228+
6229+ return 0;
6230+}
6231+
6232+static struct xt_flowoffload_hook *
6233+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
6234+ struct net_device *dev)
6235+{
6236+ struct xt_flowoffload_hook *hook;
6237+
6238+ hlist_for_each_entry(hook, &table->hooks, list) {
6239+ if (hook->ops.dev == dev)
6240+ return hook;
6241+ }
6242+
6243+ return NULL;
6244+}
6245+
6246+static void
6247+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
6248+ struct net_device *dev)
6249+{
6250+ struct xt_flowoffload_hook *hook;
6251+
6252+ if (!dev)
6253+ return;
6254+
6255+ spin_lock_bh(&hooks_lock);
6256+ hook = flow_offload_lookup_hook(table, dev);
6257+ if (hook)
6258+ hook->used = true;
6259+ else
6260+ xt_flowoffload_create_hook(table, dev);
6261+ spin_unlock_bh(&hooks_lock);
6262+}
6263+
6264+static void
6265+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
6266+{
6267+ struct xt_flowoffload_hook *hook;
6268+
6269+restart:
6270+ hlist_for_each_entry(hook, &table->hooks, list) {
6271+ if (hook->registered)
6272+ continue;
6273+
6274+ hook->registered = true;
6275+ hook->net = dev_net(hook->ops.dev);
6276+ spin_unlock_bh(&hooks_lock);
6277+ nf_register_net_hook(hook->net, &hook->ops);
6278+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6279+ table->ft.type->setup(&table->ft, hook->ops.dev,
6280+ FLOW_BLOCK_BIND);
6281+ spin_lock_bh(&hooks_lock);
6282+ goto restart;
6283+ }
6284+
6285+}
6286+
6287+static bool
6288+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
6289+{
6290+ struct xt_flowoffload_hook *hook;
6291+ bool active = false;
6292+
6293+restart:
6294+ spin_lock_bh(&hooks_lock);
6295+ hlist_for_each_entry(hook, &table->hooks, list) {
6296+ if (hook->used || !hook->registered) {
6297+ active = true;
6298+ continue;
6299+ }
6300+
6301+ hlist_del(&hook->list);
6302+ spin_unlock_bh(&hooks_lock);
6303+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6304+ table->ft.type->setup(&table->ft, hook->ops.dev,
6305+ FLOW_BLOCK_UNBIND);
6306+ nf_unregister_net_hook(hook->net, &hook->ops);
6307+ kfree(hook);
6308+ goto restart;
6309+ }
6310+ spin_unlock_bh(&hooks_lock);
6311+
6312+ return active;
6313+}
6314+
6315+static void
6316+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
6317+{
6318+ struct xt_flowoffload_table *table = data;
6319+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
6320+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
6321+ struct xt_flowoffload_hook *hook;
6322+
6323+ spin_lock_bh(&hooks_lock);
6324+ hlist_for_each_entry(hook, &table->hooks, list) {
6325+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
6326+ hook->ops.dev->ifindex != tuple1->iifidx)
6327+ continue;
6328+
6329+ hook->used = true;
6330+ }
6331+ spin_unlock_bh(&hooks_lock);
6332+}
6333+
6334+static void
6335+xt_flowoffload_hook_work(struct work_struct *work)
6336+{
6337+ struct xt_flowoffload_table *table;
6338+ struct xt_flowoffload_hook *hook;
6339+ int err;
6340+
6341+ table = container_of(work, struct xt_flowoffload_table, work.work);
6342+
6343+ spin_lock_bh(&hooks_lock);
6344+ xt_flowoffload_register_hooks(table);
6345+ hlist_for_each_entry(hook, &table->hooks, list)
6346+ hook->used = false;
6347+ spin_unlock_bh(&hooks_lock);
6348+
6349+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
6350+ table);
6351+ if (err && err != -EAGAIN)
6352+ goto out;
6353+
6354+ if (!xt_flowoffload_cleanup_hooks(table))
6355+ return;
6356+
6357+out:
6358+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
6359+}
6360+
6361+static bool
6362+xt_flowoffload_skip(struct sk_buff *skb, int family)
6363+{
6364+ if (skb_sec_path(skb))
6365+ return true;
6366+
6367+ if (family == NFPROTO_IPV4) {
6368+ const struct ip_options *opt = &(IPCB(skb)->opt);
6369+
6370+ if (unlikely(opt->optlen))
6371+ return true;
6372+ }
6373+
6374+ return false;
6375+}
6376+
6377+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
6378+{
6379+ if (dst_xfrm(dst))
6380+ return FLOW_OFFLOAD_XMIT_XFRM;
6381+
6382+ return FLOW_OFFLOAD_XMIT_NEIGH;
6383+}
6384+
6385+static void nf_default_forward_path(struct nf_flow_route *route,
6386+ struct dst_entry *dst_cache,
6387+ enum ip_conntrack_dir dir,
6388+ struct net_device **dev)
6389+{
6390+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
6391+ route->tuple[dir].dst = dst_cache;
6392+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
6393+}
6394+
6395+static bool nf_is_valid_ether_device(const struct net_device *dev)
6396+{
6397+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
6398+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
6399+ return false;
6400+
6401+ return true;
6402+}
6403+
6404+static void nf_dev_path_info(const struct net_device_path_stack *stack,
6405+ struct nf_forward_info *info,
6406+ unsigned char *ha)
6407+{
6408+ const struct net_device_path *path;
6409+ int i;
6410+
6411+ memcpy(info->h_dest, ha, ETH_ALEN);
6412+
6413+ for (i = 0; i < stack->num_paths; i++) {
6414+ path = &stack->path[i];
6415+
6416+ info->indev = path->dev;
6417+
6418+ switch (path->type) {
6419+ case DEV_PATH_ETHERNET:
6420+ case DEV_PATH_DSA:
6421+ case DEV_PATH_VLAN:
6422+ case DEV_PATH_PPPOE:
6423+ if (is_zero_ether_addr(info->h_source))
6424+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6425+
6426+ if (path->type == DEV_PATH_ETHERNET)
6427+ break;
6428+ if (path->type == DEV_PATH_DSA) {
6429+ i = stack->num_paths;
6430+ break;
6431+ }
6432+
6433+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
6434+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
6435+ info->indev = NULL;
6436+ break;
6437+ }
6438+ if (!info->outdev)
6439+ info->outdev = path->dev;
6440+ info->encap[info->num_encaps].id = path->encap.id;
6441+ info->encap[info->num_encaps].proto = path->encap.proto;
6442+ info->num_encaps++;
6443+ if (path->type == DEV_PATH_PPPOE)
6444+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
6445+ break;
6446+ case DEV_PATH_BRIDGE:
6447+ if (is_zero_ether_addr(info->h_source))
6448+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6449+
6450+ switch (path->bridge.vlan_mode) {
6451+ case DEV_PATH_BR_VLAN_UNTAG_HW:
6452+ info->ingress_vlans |= BIT(info->num_encaps - 1);
6453+ break;
6454+ case DEV_PATH_BR_VLAN_TAG:
6455+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
6456+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
6457+ info->num_encaps++;
6458+ break;
6459+ case DEV_PATH_BR_VLAN_UNTAG:
6460+ info->num_encaps--;
6461+ break;
6462+ case DEV_PATH_BR_VLAN_KEEP:
6463+ break;
6464+ }
6465+ break;
6466+ default:
6467+ break;
6468+ }
6469+ }
6470+ if (!info->outdev)
6471+ info->outdev = info->indev;
6472+
6473+ info->hw_outdev = info->indev;
6474+
6475+ if (nf_is_valid_ether_device(info->indev))
6476+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
6477+}
6478+
6479+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
6480+ const struct dst_entry *dst_cache,
6481+ const struct nf_conn *ct,
6482+ enum ip_conntrack_dir dir, u8 *ha,
6483+ struct net_device_path_stack *stack)
6484+{
6485+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
6486+ struct net_device *dev = dst_cache->dev;
6487+ struct neighbour *n;
6488+ u8 nud_state;
6489+
6490+ if (!nf_is_valid_ether_device(dev))
6491+ goto out;
6492+
developer9fdc0e82023-05-12 14:21:17 +08006493+ if (ct->status & IPS_NAT_MASK) {
6494+ n = dst_neigh_lookup(dst_cache, daddr);
6495+ if (!n)
6496+ return -1;
developer8cb3ac72022-07-04 10:55:14 +08006497+
developer9fdc0e82023-05-12 14:21:17 +08006498+ read_lock_bh(&n->lock);
6499+ nud_state = n->nud_state;
6500+ ether_addr_copy(ha, n->ha);
6501+ read_unlock_bh(&n->lock);
6502+ neigh_release(n);
developer8cb3ac72022-07-04 10:55:14 +08006503+
developer9fdc0e82023-05-12 14:21:17 +08006504+ if (!(nud_state & NUD_VALID))
6505+ return -1;
6506+ }
developer64db8532023-04-28 13:56:00 +08006507+
developer8cb3ac72022-07-04 10:55:14 +08006508+out:
6509+ return dev_fill_forward_path(dev, ha, stack);
6510+}
6511+
developer9fdc0e82023-05-12 14:21:17 +08006512+static int nf_dev_forward_path(struct sk_buff *skb,
6513+ struct nf_flow_route *route,
developer8cb3ac72022-07-04 10:55:14 +08006514+ const struct nf_conn *ct,
6515+ enum ip_conntrack_dir dir,
6516+ struct net_device **devs)
6517+{
6518+ const struct dst_entry *dst = route->tuple[dir].dst;
developer9fdc0e82023-05-12 14:21:17 +08006519+ struct ethhdr *eth;
6520+ enum ip_conntrack_dir skb_dir;
developer8cb3ac72022-07-04 10:55:14 +08006521+ struct net_device_path_stack stack;
6522+ struct nf_forward_info info = {};
6523+ unsigned char ha[ETH_ALEN];
6524+ int i;
6525+
developer9fdc0e82023-05-12 14:21:17 +08006526+ if (!(ct->status & IPS_NAT_MASK) && skb_mac_header_was_set(skb)) {
6527+ eth = eth_hdr(skb);
6528+ skb_dir = CTINFO2DIR(skb_get_nfct(skb) & NFCT_INFOMASK);
6529+
6530+ if (skb_dir != dir) {
6531+ memcpy(ha, eth->h_source, ETH_ALEN);
6532+ memcpy(info.h_source, eth->h_dest, ETH_ALEN);
6533+ } else {
6534+ memcpy(ha, eth->h_dest, ETH_ALEN);
6535+ memcpy(info.h_source, eth->h_source, ETH_ALEN);
6536+ }
6537+ }
6538+
developer7e533772023-04-27 05:59:30 +08006539+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
developer8cb3ac72022-07-04 10:55:14 +08006540+ nf_dev_path_info(&stack, &info, ha);
6541+
6542+ devs[!dir] = (struct net_device *)info.indev;
6543+ if (!info.indev)
6544+ return -1;
6545+
6546+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
6547+ for (i = 0; i < info.num_encaps; i++) {
6548+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
6549+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
6550+ }
6551+ route->tuple[!dir].in.num_encaps = info.num_encaps;
6552+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
6553+
6554+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
6555+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
6556+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
6557+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
6558+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
6559+ route->tuple[dir].xmit_type = info.xmit_type;
6560+ }
6561+
6562+ return 0;
6563+}
6564+
6565+static int
6566+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
6567+ enum ip_conntrack_dir dir,
6568+ const struct xt_action_param *par, int ifindex,
6569+ struct net_device **devs)
6570+{
6571+ struct dst_entry *dst = NULL;
6572+ struct flowi fl;
6573+
6574+ memset(&fl, 0, sizeof(fl));
6575+ switch (xt_family(par)) {
6576+ case NFPROTO_IPV4:
6577+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
6578+ fl.u.ip4.flowi4_oif = ifindex;
6579+ break;
6580+ case NFPROTO_IPV6:
6581+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6582+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
6583+ fl.u.ip6.flowi6_oif = ifindex;
6584+ break;
6585+ }
6586+
6587+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
6588+ if (!dst)
6589+ return -ENOENT;
6590+
6591+ nf_default_forward_path(route, dst, dir, devs);
6592+
6593+ return 0;
6594+}
6595+
6596+static int
developer480c5d52022-12-28 14:48:14 +08006597+xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct,
6598+ const struct xt_action_param *par,
6599+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6600+ struct net_device **devs)
6601+{
6602+ struct dst_entry *this_dst = skb_dst(skb);
6603+ struct dst_entry *other_dst = NULL;
6604+ struct flowi fl;
6605+
6606+ memset(&fl, 0, sizeof(fl));
6607+ switch (xt_family(par)) {
6608+ case NFPROTO_IPV4:
6609+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
6610+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
6611+ break;
6612+ case NFPROTO_IPV6:
6613+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6614+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
6615+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
6616+ break;
6617+ }
6618+
6619+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
6620+ if (!other_dst)
6621+ return -ENOENT;
6622+
6623+ nf_default_forward_path(route, this_dst, dir, devs);
6624+ nf_default_forward_path(route, other_dst, !dir, devs);
6625+
developer7e533772023-04-27 05:59:30 +08006626+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer480c5d52022-12-28 14:48:14 +08006627+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006628+ if (nf_dev_forward_path(skb, route, ct, dir, devs))
developer480c5d52022-12-28 14:48:14 +08006629+ return -1;
developer9fdc0e82023-05-12 14:21:17 +08006630+ if (nf_dev_forward_path(skb, route, ct, !dir, devs))
developer480c5d52022-12-28 14:48:14 +08006631+ return -1;
6632+ }
6633+
6634+ return 0;
6635+}
6636+
6637+static int
6638+xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct,
6639+ const struct xt_action_param *par,
6640+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6641+ struct net_device **devs)
developer8cb3ac72022-07-04 10:55:14 +08006642+{
6643+ int ret;
6644+
6645+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
6646+ devs[dir]->ifindex,
6647+ devs);
6648+ if (ret)
6649+ return ret;
6650+
6651+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
6652+ devs[!dir]->ifindex,
6653+ devs);
6654+ if (ret)
developer67bbcc02022-07-08 09:04:01 +08006655+ goto err_route_dir1;
developer8cb3ac72022-07-04 10:55:14 +08006656+
developer7e533772023-04-27 05:59:30 +08006657+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer8cb3ac72022-07-04 10:55:14 +08006658+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006659+ if (nf_dev_forward_path(skb, route, ct, dir, devs) ||
6660+ nf_dev_forward_path(skb, route, ct, !dir, devs)) {
developer67bbcc02022-07-08 09:04:01 +08006661+ ret = -1;
6662+ goto err_route_dir2;
6663+ }
developer8cb3ac72022-07-04 10:55:14 +08006664+ }
6665+
6666+ return 0;
developer67bbcc02022-07-08 09:04:01 +08006667+
6668+err_route_dir2:
6669+ dst_release(route->tuple[!dir].dst);
6670+err_route_dir1:
6671+ dst_release(route->tuple[dir].dst);
6672+ return ret;
developer8cb3ac72022-07-04 10:55:14 +08006673+}
6674+
6675+static unsigned int
6676+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
6677+{
6678+ struct xt_flowoffload_table *table;
6679+ const struct xt_flowoffload_target_info *info = par->targinfo;
6680+ struct tcphdr _tcph, *tcph = NULL;
6681+ enum ip_conntrack_info ctinfo;
6682+ enum ip_conntrack_dir dir;
6683+ struct nf_flow_route route = {};
6684+ struct flow_offload *flow = NULL;
6685+ struct net_device *devs[2] = {};
6686+ struct nf_conn *ct;
6687+ struct net *net;
6688+
6689+ if (xt_flowoffload_skip(skb, xt_family(par)))
6690+ return XT_CONTINUE;
6691+
6692+ ct = nf_ct_get(skb, &ctinfo);
6693+ if (ct == NULL)
6694+ return XT_CONTINUE;
6695+
6696+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
6697+ case IPPROTO_TCP:
6698+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
6699+ return XT_CONTINUE;
6700+
6701+ tcph = skb_header_pointer(skb, par->thoff,
6702+ sizeof(_tcph), &_tcph);
6703+ if (unlikely(!tcph || tcph->fin || tcph->rst))
6704+ return XT_CONTINUE;
6705+ break;
6706+ case IPPROTO_UDP:
6707+ break;
6708+ default:
6709+ return XT_CONTINUE;
6710+ }
6711+
6712+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
6713+ ct->status & IPS_SEQ_ADJUST)
6714+ return XT_CONTINUE;
6715+
6716+ if (!nf_ct_is_confirmed(ct))
6717+ return XT_CONTINUE;
6718+
6719+ devs[dir] = xt_out(par);
6720+ devs[!dir] = xt_in(par);
6721+
6722+ if (!devs[dir] || !devs[!dir])
6723+ return XT_CONTINUE;
6724+
6725+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
6726+ return XT_CONTINUE;
6727+
6728+ dir = CTINFO2DIR(ctinfo);
6729+
developer480c5d52022-12-28 14:48:14 +08006730+ if (ct->status & IPS_NAT_MASK) {
6731+ if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0)
6732+ goto err_flow_route;
6733+ } else {
6734+ if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0)
6735+ goto err_flow_route;
6736+ }
developer8cb3ac72022-07-04 10:55:14 +08006737+
6738+ flow = flow_offload_alloc(ct);
6739+ if (!flow)
6740+ goto err_flow_alloc;
6741+
6742+ if (flow_offload_route_init(flow, &route) < 0)
6743+ goto err_flow_add;
6744+
6745+ if (tcph) {
6746+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6747+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6748+ }
6749+
6750+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
6751+
6752+ net = read_pnet(&table->ft.net);
6753+ if (!net)
6754+ write_pnet(&table->ft.net, xt_net(par));
6755+
6756+ if (flow_offload_add(&table->ft, flow) < 0)
6757+ goto err_flow_add;
6758+
6759+ xt_flowoffload_check_device(table, devs[0]);
6760+ xt_flowoffload_check_device(table, devs[1]);
6761+
developer480c5d52022-12-28 14:48:14 +08006762+ if (!(ct->status & IPS_NAT_MASK))
6763+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006764+ dst_release(route.tuple[!dir].dst);
6765+
6766+ return XT_CONTINUE;
6767+
6768+err_flow_add:
6769+ flow_offload_free(flow);
6770+err_flow_alloc:
developer480c5d52022-12-28 14:48:14 +08006771+ if (!(ct->status & IPS_NAT_MASK))
6772+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006773+ dst_release(route.tuple[!dir].dst);
6774+err_flow_route:
6775+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
6776+
6777+ return XT_CONTINUE;
6778+}
6779+
6780+static int flowoffload_chk(const struct xt_tgchk_param *par)
6781+{
6782+ struct xt_flowoffload_target_info *info = par->targinfo;
6783+
6784+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
6785+ return -EINVAL;
6786+
6787+ return 0;
6788+}
6789+
6790+static struct xt_target offload_tg_reg __read_mostly = {
6791+ .family = NFPROTO_UNSPEC,
6792+ .name = "FLOWOFFLOAD",
6793+ .revision = 0,
6794+ .targetsize = sizeof(struct xt_flowoffload_target_info),
6795+ .usersize = sizeof(struct xt_flowoffload_target_info),
6796+ .checkentry = flowoffload_chk,
6797+ .target = flowoffload_tg,
6798+ .me = THIS_MODULE,
6799+};
6800+
6801+static int flow_offload_netdev_event(struct notifier_block *this,
6802+ unsigned long event, void *ptr)
6803+{
6804+ struct xt_flowoffload_hook *hook0, *hook1;
6805+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6806+
6807+ if (event != NETDEV_UNREGISTER)
6808+ return NOTIFY_DONE;
6809+
6810+ spin_lock_bh(&hooks_lock);
6811+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
6812+ if (hook0)
6813+ hlist_del(&hook0->list);
6814+
6815+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
6816+ if (hook1)
6817+ hlist_del(&hook1->list);
6818+ spin_unlock_bh(&hooks_lock);
6819+
6820+ if (hook0) {
6821+ nf_unregister_net_hook(hook0->net, &hook0->ops);
6822+ kfree(hook0);
6823+ }
6824+
6825+ if (hook1) {
6826+ nf_unregister_net_hook(hook1->net, &hook1->ops);
6827+ kfree(hook1);
6828+ }
6829+
6830+ nf_flow_table_cleanup(dev);
6831+
6832+ return NOTIFY_DONE;
6833+}
6834+
6835+static struct notifier_block flow_offload_netdev_notifier = {
6836+ .notifier_call = flow_offload_netdev_event,
6837+};
6838+
6839+static int nf_flow_rule_route_inet(struct net *net,
6840+ const struct flow_offload *flow,
6841+ enum flow_offload_tuple_dir dir,
6842+ struct nf_flow_rule *flow_rule)
6843+{
6844+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
6845+ int err;
6846+
6847+ switch (flow_tuple->l3proto) {
6848+ case NFPROTO_IPV4:
6849+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
6850+ break;
6851+ case NFPROTO_IPV6:
6852+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
6853+ break;
6854+ default:
6855+ err = -1;
6856+ break;
6857+ }
6858+
6859+ return err;
6860+}
6861+
6862+static struct nf_flowtable_type flowtable_inet = {
6863+ .family = NFPROTO_INET,
6864+ .init = nf_flow_table_init,
6865+ .setup = nf_flow_table_offload_setup,
6866+ .action = nf_flow_rule_route_inet,
6867+ .free = nf_flow_table_free,
6868+ .hook = xt_flowoffload_net_hook,
6869+ .owner = THIS_MODULE,
6870+};
6871+
6872+static int init_flowtable(struct xt_flowoffload_table *tbl)
6873+{
6874+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
6875+ tbl->ft.type = &flowtable_inet;
6876+
6877+ return nf_flow_table_init(&tbl->ft);
6878+}
6879+
6880+static int __init xt_flowoffload_tg_init(void)
6881+{
6882+ int ret;
6883+
6884+ register_netdevice_notifier(&flow_offload_netdev_notifier);
6885+
6886+ ret = init_flowtable(&flowtable[0]);
6887+ if (ret)
6888+ return ret;
6889+
6890+ ret = init_flowtable(&flowtable[1]);
6891+ if (ret)
6892+ goto cleanup;
6893+
developeree39bcf2023-06-16 08:03:30 +08006894+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
developer8cb3ac72022-07-04 10:55:14 +08006895+
6896+ ret = xt_register_target(&offload_tg_reg);
6897+ if (ret)
6898+ goto cleanup2;
6899+
6900+ return 0;
6901+
6902+cleanup2:
6903+ nf_flow_table_free(&flowtable[1].ft);
6904+cleanup:
6905+ nf_flow_table_free(&flowtable[0].ft);
6906+ return ret;
6907+}
6908+
6909+static void __exit xt_flowoffload_tg_exit(void)
6910+{
6911+ xt_unregister_target(&offload_tg_reg);
6912+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
6913+ nf_flow_table_free(&flowtable[0].ft);
6914+ nf_flow_table_free(&flowtable[1].ft);
6915+}
6916+
6917+MODULE_LICENSE("GPL");
6918+module_init(xt_flowoffload_tg_init);
6919+module_exit(xt_flowoffload_tg_exit);
6920--
69212.18.0
6922