blob: b52782f5eb01ee075103af3d7b574cc630315d25 [file] [log] [blame]
developeree39bcf2023-06-16 08:03:30 +08001From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001
2From: Bo Jiao <Bo.Jiao@mediatek.com>
3Date: Wed, 22 Jun 2022 09:42:19 +0800
4Subject: [PATCH 1/8]
5 9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2
developer8cb3ac72022-07-04 10:55:14 +08006
7---
8 drivers/net/ethernet/mediatek/Makefile | 3 +-
developeree39bcf2023-06-16 08:03:30 +08009 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 28 +-
10 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 20 +-
11 drivers/net/ethernet/mediatek/mtk_ppe.c | 509 +++++++
12 drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
13 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
14 .../net/ethernet/mediatek/mtk_ppe_offload.c | 526 ++++++++
15 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
developer8cb3ac72022-07-04 10:55:14 +080016 drivers/net/ppp/ppp_generic.c | 22 +
17 drivers/net/ppp/pppoe.c | 24 +
developeree39bcf2023-06-16 08:03:30 +080018 include/linux/netdevice.h | 60 +
developer8cb3ac72022-07-04 10:55:14 +080019 include/linux/ppp_channel.h | 3 +
20 include/net/dsa.h | 10 +
21 include/net/flow_offload.h | 4 +
22 include/net/ip6_route.h | 5 +-
23 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
24 include/net/netfilter/nf_conntrack.h | 12 +
25 include/net/netfilter/nf_conntrack_acct.h | 11 +
developeree39bcf2023-06-16 08:03:30 +080026 include/net/netfilter/nf_flow_table.h | 264 +++-
developer8cb3ac72022-07-04 10:55:14 +080027 include/net/netns/conntrack.h | 6 +
28 .../linux/netfilter/nf_conntrack_common.h | 9 +-
29 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
30 net/8021q/vlan_dev.c | 21 +
31 net/bridge/br_device.c | 49 +
32 net/bridge/br_private.h | 20 +
33 net/bridge/br_vlan.c | 55 +
34 net/core/dev.c | 46 +
35 net/dsa/dsa.c | 9 +
developeree39bcf2023-06-16 08:03:30 +080036 net/dsa/slave.c | 41 +-
developer8cb3ac72022-07-04 10:55:14 +080037 net/ipv4/netfilter/Kconfig | 4 +-
38 net/ipv6/ip6_output.c | 2 +-
39 net/ipv6/netfilter/Kconfig | 3 +-
40 net/ipv6/route.c | 22 +-
41 net/netfilter/Kconfig | 14 +-
42 net/netfilter/Makefile | 4 +-
43 net/netfilter/nf_conntrack_core.c | 20 +-
44 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
45 net/netfilter/nf_conntrack_proto_udp.c | 4 +
46 net/netfilter/nf_conntrack_standalone.c | 34 +-
developeree39bcf2023-06-16 08:03:30 +080047 net/netfilter/nf_flow_table_core.c | 446 +++---
48 net/netfilter/nf_flow_table_ip.c | 455 ++++---
49 net/netfilter/nf_flow_table_offload.c | 1191 +++++++++++++++++
50 net/netfilter/xt_FLOWOFFLOAD.c | 719 ++++++++++
51 43 files changed, 4913 insertions(+), 432 deletions(-)
developer8cb3ac72022-07-04 10:55:14 +080052 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
53 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
54 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
55 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
56 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
57 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
58 create mode 100644 net/netfilter/nf_flow_table_offload.c
59 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
60
61diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
developeree39bcf2023-06-16 08:03:30 +080062index 13c5b4e8f..0a6af99f1 100755
developer8cb3ac72022-07-04 10:55:14 +080063--- a/drivers/net/ethernet/mediatek/Makefile
64+++ b/drivers/net/ethernet/mediatek/Makefile
developeree39bcf2023-06-16 08:03:30 +080065@@ -4,5 +4,6 @@
developer8cb3ac72022-07-04 10:55:14 +080066 #
67
68 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
developer68838542022-10-03 23:42:21 +080069-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
70+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
developer8cb3ac72022-07-04 10:55:14 +080071+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
72 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
73diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developeree39bcf2023-06-16 08:03:30 +080074index 2b21f7ed0..819d8a0be 100755
developer8cb3ac72022-07-04 10:55:14 +080075--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
76+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developeree39bcf2023-06-16 08:03:30 +080077@@ -3081,6 +3081,7 @@ static int mtk_open(struct net_device *d
developerdca0fde2022-12-14 11:40:35 +080078 struct mtk_phylink_priv *phylink_priv = &mac->phylink_priv;
79 int err, i;
80 struct device_node *phy_node;
developeree39bcf2023-06-16 08:03:30 +080081+ u32 gdm_config = MTK_GDMA_TO_PDMA;
developer8cb3ac72022-07-04 10:55:14 +080082
developeree39bcf2023-06-16 08:03:30 +080083 err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
84 if (err) {
85@@ -3157,7 +3158,10 @@ static int mtk_open(struct net_device *d
86 if (!phy_node && eth->xgmii->regmap_sgmii[mac->id])
87 regmap_write(eth->xgmii->regmap_sgmii[mac->id], SGMSYS_QPHY_PWR_STATE_CTRL, 0);
developer8cb3ac72022-07-04 10:55:14 +080088
developerdca0fde2022-12-14 11:40:35 +080089- mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA);
developeree39bcf2023-06-16 08:03:30 +080090+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
91+ gdm_config = MTK_GDMA_TO_PPE;
developer8cb3ac72022-07-04 10:55:14 +080092+
developerdca0fde2022-12-14 11:40:35 +080093+ mtk_gdm_config(eth, mac->id, gdm_config);
developer8cb3ac72022-07-04 10:55:14 +080094
developerdca0fde2022-12-14 11:40:35 +080095 return 0;
96 }
developeree39bcf2023-06-16 08:03:30 +080097@@ -3238,6 +3242,9 @@ static int mtk_stop(struct net_device *d
developer8cb3ac72022-07-04 10:55:14 +080098
99 mtk_dma_free(eth);
100
developeree39bcf2023-06-16 08:03:30 +0800101+ if (eth->soc->offload_version)
102+ mtk_ppe_stop(&eth->ppe);
developer8cb3ac72022-07-04 10:55:14 +0800103+
104 return 0;
105 }
106
developeree39bcf2023-06-16 08:03:30 +0800107@@ -3915,6 +3922,7 @@ static const struct net_device_ops mtk_n
developer8cb3ac72022-07-04 10:55:14 +0800108 #ifdef CONFIG_NET_POLL_CONTROLLER
109 .ndo_poll_controller = mtk_poll_controller,
110 #endif
111+ .ndo_setup_tc = mtk_eth_setup_tc,
112 };
113
114 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
developeree39bcf2023-06-16 08:03:30 +0800115@@ -4308,6 +4316,17 @@ static int mtk_probe(struct platform_dev
developer8cb3ac72022-07-04 10:55:14 +0800116 goto err_free_dev;
117 }
118
119+ if (eth->soc->offload_version) {
developeree39bcf2023-06-16 08:03:30 +0800120+ err = mtk_ppe_init(&eth->ppe, eth->dev,
121+ eth->base + MTK_ETH_PPE_BASE, 2);
122+ if (err)
123+ goto err_free_dev;
developer8cb3ac72022-07-04 10:55:14 +0800124+
125+ err = mtk_eth_offload_init(eth);
126+ if (err)
127+ goto err_free_dev;
128+ }
129+
130 for (i = 0; i < MTK_MAX_DEVS; i++) {
131 if (!eth->netdev[i])
132 continue;
developeree39bcf2023-06-16 08:03:30 +0800133@@ -4410,6 +4429,7 @@ static const struct mtk_soc_data mt2701_
developer8cb3ac72022-07-04 10:55:14 +0800134 .required_clks = MT7623_CLKS_BITMAP,
135 .required_pctl = true,
136 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800137+ .offload_version = 2,
138 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800139 .txrx = {
140 .txd_size = sizeof(struct mtk_tx_dma),
developeree39bcf2023-06-16 08:03:30 +0800141@@ -4424,6 +4444,7 @@ static const struct mtk_soc_data mt7621_
developer8cb3ac72022-07-04 10:55:14 +0800142 .required_clks = MT7621_CLKS_BITMAP,
143 .required_pctl = false,
144 .has_sram = false,
developeree39bcf2023-06-16 08:03:30 +0800145+ .offload_version = 2,
146 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800147 .txrx = {
148 .txd_size = sizeof(struct mtk_tx_dma),
developeree39bcf2023-06-16 08:03:30 +0800149@@ -4439,6 +4460,7 @@ static const struct mtk_soc_data mt7622_
developer8cb3ac72022-07-04 10:55:14 +0800150 .required_clks = MT7622_CLKS_BITMAP,
151 .required_pctl = false,
152 .has_sram = false,
153+ .offload_version = 2,
developeree39bcf2023-06-16 08:03:30 +0800154 .rss_num = 0,
developerdca0fde2022-12-14 11:40:35 +0800155 .txrx = {
156 .txd_size = sizeof(struct mtk_tx_dma),
developeree39bcf2023-06-16 08:03:30 +0800157@@ -4453,6 +4475,7 @@ static const struct mtk_soc_data mt7623_
developer8cb3ac72022-07-04 10:55:14 +0800158 .required_clks = MT7623_CLKS_BITMAP,
159 .required_pctl = true,
160 .has_sram = false,
developer7eb15dc2023-06-14 17:44:03 +0800161+ .offload_version = 2,
developeree39bcf2023-06-16 08:03:30 +0800162 .rss_num = 0,
developer7eb15dc2023-06-14 17:44:03 +0800163 .txrx = {
164 .txd_size = sizeof(struct mtk_tx_dma),
developer8cb3ac72022-07-04 10:55:14 +0800165diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
developeree39bcf2023-06-16 08:03:30 +0800166index b6380ffeb..349f98503 100755
developer8cb3ac72022-07-04 10:55:14 +0800167--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
168+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
169@@ -15,6 +15,8 @@
170 #include <linux/u64_stats_sync.h>
171 #include <linux/refcount.h>
172 #include <linux/phylink.h>
173+#include <linux/rhashtable.h>
174+#include "mtk_ppe.h"
175
176 #define MTK_QDMA_PAGE_SIZE 2048
177 #define MTK_MAX_RX_LENGTH 1536
developeree39bcf2023-06-16 08:03:30 +0800178@@ -37,7 +39,8 @@
developer8cb3ac72022-07-04 10:55:14 +0800179 NETIF_F_HW_VLAN_CTAG_TX | \
180 NETIF_F_SG | NETIF_F_TSO | \
181 NETIF_F_TSO6 | \
182- NETIF_F_IPV6_CSUM)
183+ NETIF_F_IPV6_CSUM |\
184+ NETIF_F_HW_TC)
185 #define MTK_SET_FEATURES (NETIF_F_LRO | \
186 NETIF_F_HW_VLAN_CTAG_RX)
187 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
developeree39bcf2023-06-16 08:03:30 +0800188@@ -107,6 +110,7 @@
189 #define MTK_GDMA_TCS_EN BIT(21)
developer8cb3ac72022-07-04 10:55:14 +0800190 #define MTK_GDMA_UCS_EN BIT(20)
191 #define MTK_GDMA_TO_PDMA 0x0
192+#define MTK_GDMA_TO_PPE 0x4444
193 #define MTK_GDMA_DROP_ALL 0x7777
194
developeree39bcf2023-06-16 08:03:30 +0800195 /* Unicast Filter MAC Address Register - Low */
196@@ -547,6 +551,12 @@
developer8cb3ac72022-07-04 10:55:14 +0800197 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
198 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
199
200+/* QDMA descriptor rxd4 */
201+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
202+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
203+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
204+#define MTK_RXD4_ALG GENMASK(31, 22)
205+
206 /* QDMA descriptor rxd4 */
207 #define RX_DMA_L4_VALID BIT(24)
208 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
developeree39bcf2023-06-16 08:03:30 +0800209@@ -1158,6 +1168,7 @@ struct mtk_soc_data {
210 u32 caps;
211 u32 required_clks;
developer8cb3ac72022-07-04 10:55:14 +0800212 bool required_pctl;
213+ u8 offload_version;
214 netdev_features_t hw_features;
215 bool has_sram;
developeree39bcf2023-06-16 08:03:30 +0800216 };
217@@ -1271,6 +1282,9 @@ struct mtk_eth {
developer8cb3ac72022-07-04 10:55:14 +0800218 int ip_align;
219 spinlock_t syscfg0_lock;
220 struct timer_list mtk_dma_monitor_timer;
221+
developeree39bcf2023-06-16 08:03:30 +0800222+ struct mtk_ppe ppe;
developer8cb3ac72022-07-04 10:55:14 +0800223+ struct rhashtable flow_table;
224 };
225
226 /* struct mtk_mac - the structure that holds the info about the MACs of the
developeree39bcf2023-06-16 08:03:30 +0800227@@ -1319,4 +1333,7 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
228 void mtk_usxgmii_reset(struct mtk_xgmii *ss, int mac_id);
developer1fb19c92023-03-07 23:45:23 +0800229 int mtk_dump_usxgmii(struct regmap *pmap, char *name, u32 offset, u32 range);
developer8cb3ac72022-07-04 10:55:14 +0800230
231+int mtk_eth_offload_init(struct mtk_eth *eth);
232+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
233+ void *type_data);
developer1fb19c92023-03-07 23:45:23 +0800234 void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
developer8cb3ac72022-07-04 10:55:14 +0800235diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
236new file mode 100644
developeree39bcf2023-06-16 08:03:30 +0800237index 000000000..66298e223
developer8cb3ac72022-07-04 10:55:14 +0800238--- /dev/null
239+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
developeree39bcf2023-06-16 08:03:30 +0800240@@ -0,0 +1,509 @@
developer8cb3ac72022-07-04 10:55:14 +0800241+// SPDX-License-Identifier: GPL-2.0-only
242+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
243+
244+#include <linux/kernel.h>
245+#include <linux/io.h>
246+#include <linux/iopoll.h>
247+#include <linux/etherdevice.h>
248+#include <linux/platform_device.h>
249+#include "mtk_ppe.h"
250+#include "mtk_ppe_regs.h"
251+
252+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
253+{
254+ writel(val, ppe->base + reg);
255+}
256+
257+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
258+{
259+ return readl(ppe->base + reg);
260+}
261+
262+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
263+{
264+ u32 val;
265+
266+ val = ppe_r32(ppe, reg);
267+ val &= ~mask;
268+ val |= set;
269+ ppe_w32(ppe, reg, val);
270+
271+ return val;
272+}
273+
274+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
275+{
276+ return ppe_m32(ppe, reg, 0, val);
277+}
278+
279+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
280+{
281+ return ppe_m32(ppe, reg, val, 0);
282+}
283+
284+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
285+{
286+ int ret;
287+ u32 val;
288+
289+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
290+ !(val & MTK_PPE_GLO_CFG_BUSY),
291+ 20, MTK_PPE_WAIT_TIMEOUT_US);
292+
293+ if (ret)
294+ dev_err(ppe->dev, "PPE table busy");
295+
296+ return ret;
297+}
298+
299+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
300+{
301+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
302+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
303+}
304+
305+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
306+{
307+ mtk_ppe_cache_clear(ppe);
308+
309+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
310+ enable * MTK_PPE_CACHE_CTL_EN);
311+}
312+
developeree39bcf2023-06-16 08:03:30 +0800313+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
developer8cb3ac72022-07-04 10:55:14 +0800314+{
315+ u32 hv1, hv2, hv3;
316+ u32 hash;
317+
developeree39bcf2023-06-16 08:03:30 +0800318+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
319+ case MTK_PPE_PKT_TYPE_BRIDGE:
320+ hv1 = e->bridge.src_mac_lo;
321+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
322+ hv2 = e->bridge.src_mac_hi >> 16;
323+ hv2 ^= e->bridge.dest_mac_lo;
324+ hv3 = e->bridge.dest_mac_hi;
325+ break;
developer8cb3ac72022-07-04 10:55:14 +0800326+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
327+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
328+ hv1 = e->ipv4.orig.ports;
329+ hv2 = e->ipv4.orig.dest_ip;
330+ hv3 = e->ipv4.orig.src_ip;
331+ break;
332+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
333+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
334+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
335+ hv1 ^= e->ipv6.ports;
336+
337+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
338+ hv2 ^= e->ipv6.dest_ip[0];
339+
340+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
341+ hv3 ^= e->ipv6.src_ip[0];
342+ break;
343+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
344+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
345+ default:
346+ WARN_ON_ONCE(1);
347+ return MTK_PPE_HASH_MASK;
348+ }
349+
350+ hash = (hv1 & hv2) | ((~hv1) & hv3);
351+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
352+ hash ^= hv1 ^ hv2 ^ hv3;
353+ hash ^= hash >> 16;
developeree39bcf2023-06-16 08:03:30 +0800354+ hash <<= 1;
developer8cb3ac72022-07-04 10:55:14 +0800355+ hash &= MTK_PPE_ENTRIES - 1;
356+
357+ return hash;
358+}
359+
360+static inline struct mtk_foe_mac_info *
developeree39bcf2023-06-16 08:03:30 +0800361+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800362+{
developeree39bcf2023-06-16 08:03:30 +0800363+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800364+
365+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
366+ return &entry->ipv6.l2;
367+
368+ return &entry->ipv4.l2;
369+}
370+
371+static inline u32 *
developeree39bcf2023-06-16 08:03:30 +0800372+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
developer8cb3ac72022-07-04 10:55:14 +0800373+{
developeree39bcf2023-06-16 08:03:30 +0800374+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800375+
376+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
377+ return &entry->ipv6.ib2;
378+
379+ return &entry->ipv4.ib2;
380+}
381+
developeree39bcf2023-06-16 08:03:30 +0800382+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
383+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
developer8cb3ac72022-07-04 10:55:14 +0800384+{
385+ struct mtk_foe_mac_info *l2;
386+ u32 ports_pad, val;
387+
388+ memset(entry, 0, sizeof(*entry));
389+
developeree39bcf2023-06-16 08:03:30 +0800390+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
391+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
392+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
393+ MTK_FOE_IB1_BIND_TTL |
394+ MTK_FOE_IB1_BIND_CACHE;
395+ entry->ib1 = val;
developer8cb3ac72022-07-04 10:55:14 +0800396+
developeree39bcf2023-06-16 08:03:30 +0800397+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
398+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
399+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
developer8cb3ac72022-07-04 10:55:14 +0800400+
401+ if (is_multicast_ether_addr(dest_mac))
developeree39bcf2023-06-16 08:03:30 +0800402+ val |= MTK_FOE_IB2_MULTICAST;
developer8cb3ac72022-07-04 10:55:14 +0800403+
404+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
405+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
406+ entry->ipv4.orig.ports = ports_pad;
407+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
408+ entry->ipv6.ports = ports_pad;
409+
developeree39bcf2023-06-16 08:03:30 +0800410+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
developer8cb3ac72022-07-04 10:55:14 +0800411+ entry->ipv6.ib2 = val;
412+ l2 = &entry->ipv6.l2;
413+ } else {
414+ entry->ipv4.ib2 = val;
415+ l2 = &entry->ipv4.l2;
416+ }
417+
418+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
419+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
420+ l2->src_mac_hi = get_unaligned_be32(src_mac);
421+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
422+
423+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
424+ l2->etype = ETH_P_IPV6;
425+ else
426+ l2->etype = ETH_P_IP;
427+
428+ return 0;
429+}
430+
developeree39bcf2023-06-16 08:03:30 +0800431+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
developer8cb3ac72022-07-04 10:55:14 +0800432+{
developeree39bcf2023-06-16 08:03:30 +0800433+ u32 *ib2 = mtk_foe_entry_ib2(entry);
434+ u32 val;
developer8cb3ac72022-07-04 10:55:14 +0800435+
developeree39bcf2023-06-16 08:03:30 +0800436+ val = *ib2;
437+ val &= ~MTK_FOE_IB2_DEST_PORT;
438+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
developer8cb3ac72022-07-04 10:55:14 +0800439+ *ib2 = val;
440+
441+ return 0;
442+}
443+
developeree39bcf2023-06-16 08:03:30 +0800444+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
developer8cb3ac72022-07-04 10:55:14 +0800445+ __be32 src_addr, __be16 src_port,
446+ __be32 dest_addr, __be16 dest_port)
447+{
developeree39bcf2023-06-16 08:03:30 +0800448+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800449+ struct mtk_ipv4_tuple *t;
450+
451+ switch (type) {
452+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
453+ if (egress) {
454+ t = &entry->ipv4.new;
455+ break;
456+ }
457+ fallthrough;
458+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
459+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
460+ t = &entry->ipv4.orig;
461+ break;
462+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
463+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
464+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
465+ return 0;
466+ default:
467+ WARN_ON_ONCE(1);
468+ return -EINVAL;
469+ }
470+
471+ t->src_ip = be32_to_cpu(src_addr);
472+ t->dest_ip = be32_to_cpu(dest_addr);
473+
474+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
475+ return 0;
476+
477+ t->src_port = be16_to_cpu(src_port);
478+ t->dest_port = be16_to_cpu(dest_port);
479+
480+ return 0;
481+}
482+
developeree39bcf2023-06-16 08:03:30 +0800483+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +0800484+ __be32 *src_addr, __be16 src_port,
485+ __be32 *dest_addr, __be16 dest_port)
486+{
developeree39bcf2023-06-16 08:03:30 +0800487+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
developer8cb3ac72022-07-04 10:55:14 +0800488+ u32 *src, *dest;
489+ int i;
490+
491+ switch (type) {
492+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
493+ src = entry->dslite.tunnel_src_ip;
494+ dest = entry->dslite.tunnel_dest_ip;
495+ break;
496+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
497+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
498+ entry->ipv6.src_port = be16_to_cpu(src_port);
499+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
500+ fallthrough;
501+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
502+ src = entry->ipv6.src_ip;
503+ dest = entry->ipv6.dest_ip;
504+ break;
505+ default:
506+ WARN_ON_ONCE(1);
507+ return -EINVAL;
508+ }
509+
510+ for (i = 0; i < 4; i++)
511+ src[i] = be32_to_cpu(src_addr[i]);
512+ for (i = 0; i < 4; i++)
513+ dest[i] = be32_to_cpu(dest_addr[i]);
514+
515+ return 0;
516+}
517+
developeree39bcf2023-06-16 08:03:30 +0800518+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
developer8cb3ac72022-07-04 10:55:14 +0800519+{
developeree39bcf2023-06-16 08:03:30 +0800520+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800521+
522+ l2->etype = BIT(port);
523+
developeree39bcf2023-06-16 08:03:30 +0800524+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
525+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800526+ else
527+ l2->etype |= BIT(8);
528+
developeree39bcf2023-06-16 08:03:30 +0800529+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
developer8cb3ac72022-07-04 10:55:14 +0800530+
531+ return 0;
532+}
533+
developeree39bcf2023-06-16 08:03:30 +0800534+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
developer8cb3ac72022-07-04 10:55:14 +0800535+{
developeree39bcf2023-06-16 08:03:30 +0800536+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800537+
developeree39bcf2023-06-16 08:03:30 +0800538+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
developer8cb3ac72022-07-04 10:55:14 +0800539+ case 0:
developeree39bcf2023-06-16 08:03:30 +0800540+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
541+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800542+ l2->vlan1 = vid;
543+ return 0;
544+ case 1:
developeree39bcf2023-06-16 08:03:30 +0800545+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
developer8cb3ac72022-07-04 10:55:14 +0800546+ l2->vlan1 = vid;
547+ l2->etype |= BIT(8);
548+ } else {
549+ l2->vlan2 = vid;
developeree39bcf2023-06-16 08:03:30 +0800550+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
developer8cb3ac72022-07-04 10:55:14 +0800551+ }
552+ return 0;
553+ default:
554+ return -ENOSPC;
555+ }
556+}
557+
developeree39bcf2023-06-16 08:03:30 +0800558+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
developer8cb3ac72022-07-04 10:55:14 +0800559+{
developeree39bcf2023-06-16 08:03:30 +0800560+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
developer8cb3ac72022-07-04 10:55:14 +0800561+
developeree39bcf2023-06-16 08:03:30 +0800562+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
563+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
developer8cb3ac72022-07-04 10:55:14 +0800564+ l2->etype = ETH_P_PPP_SES;
565+
developeree39bcf2023-06-16 08:03:30 +0800566+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
developer8cb3ac72022-07-04 10:55:14 +0800567+ l2->pppoe_id = sid;
568+
569+ return 0;
570+}
571+
572+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
573+{
574+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
575+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
576+}
577+
developeree39bcf2023-06-16 08:03:30 +0800578+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
579+ u16 timestamp)
developer7eb15dc2023-06-14 17:44:03 +0800580+{
developer8cb3ac72022-07-04 10:55:14 +0800581+ struct mtk_foe_entry *hwe;
developeree39bcf2023-06-16 08:03:30 +0800582+ u32 hash;
developer7eb15dc2023-06-14 17:44:03 +0800583+
developeree39bcf2023-06-16 08:03:30 +0800584+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
585+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
586+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
developer7eb15dc2023-06-14 17:44:03 +0800587+
developeree39bcf2023-06-16 08:03:30 +0800588+ hash = mtk_ppe_hash_entry(entry);
589+ hwe = &ppe->foe_table[hash];
590+ if (!mtk_foe_entry_usable(hwe)) {
591+ hwe++;
592+ hash++;
developer7eb15dc2023-06-14 17:44:03 +0800593+
developeree39bcf2023-06-16 08:03:30 +0800594+ if (!mtk_foe_entry_usable(hwe))
595+ return -ENOSPC;
developer7eb15dc2023-06-14 17:44:03 +0800596+ }
597+
developeree39bcf2023-06-16 08:03:30 +0800598+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
developer8cb3ac72022-07-04 10:55:14 +0800599+ wmb();
600+ hwe->ib1 = entry->ib1;
601+
602+ dma_wmb();
603+
604+ mtk_ppe_cache_clear(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800605+
developeree39bcf2023-06-16 08:03:30 +0800606+ return hash;
developer7eb15dc2023-06-14 17:44:03 +0800607+}
608+
developeree39bcf2023-06-16 08:03:30 +0800609+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
610+ int version)
developer7eb15dc2023-06-14 17:44:03 +0800611+{
developeree39bcf2023-06-16 08:03:30 +0800612+ struct mtk_foe_entry *foe;
developer8cb3ac72022-07-04 10:55:14 +0800613+
614+ /* need to allocate a separate device, since it PPE DMA access is
615+ * not coherent.
616+ */
617+ ppe->base = base;
618+ ppe->dev = dev;
developeree39bcf2023-06-16 08:03:30 +0800619+ ppe->version = version;
developer8cb3ac72022-07-04 10:55:14 +0800620+
developeree39bcf2023-06-16 08:03:30 +0800621+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
developer8cb3ac72022-07-04 10:55:14 +0800622+ &ppe->foe_phys, GFP_KERNEL);
623+ if (!foe)
developeree39bcf2023-06-16 08:03:30 +0800624+ return -ENOMEM;
developer8cb3ac72022-07-04 10:55:14 +0800625+
626+ ppe->foe_table = foe;
627+
developeree39bcf2023-06-16 08:03:30 +0800628+ mtk_ppe_debugfs_init(ppe);
developer7eb15dc2023-06-14 17:44:03 +0800629+
developeree39bcf2023-06-16 08:03:30 +0800630+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800631+}
632+
633+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
634+{
635+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
636+ int i, k;
637+
developeree39bcf2023-06-16 08:03:30 +0800638+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
developer8cb3ac72022-07-04 10:55:14 +0800639+
640+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
641+ return;
642+
643+ /* skip all entries that cross the 1024 byte boundary */
developeree39bcf2023-06-16 08:03:30 +0800644+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
645+ for (k = 0; k < ARRAY_SIZE(skip); k++)
646+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
developer8cb3ac72022-07-04 10:55:14 +0800647+}
648+
developeree39bcf2023-06-16 08:03:30 +0800649+int mtk_ppe_start(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +0800650+{
651+ u32 val;
652+
653+ mtk_ppe_init_foe_table(ppe);
654+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
655+
656+ val = MTK_PPE_TB_CFG_ENTRY_80B |
657+ MTK_PPE_TB_CFG_AGE_NON_L4 |
658+ MTK_PPE_TB_CFG_AGE_UNBIND |
659+ MTK_PPE_TB_CFG_AGE_TCP |
660+ MTK_PPE_TB_CFG_AGE_UDP |
661+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
662+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
663+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
664+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
665+ MTK_PPE_KEEPALIVE_DISABLE) |
666+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
667+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
668+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
669+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
670+ MTK_PPE_ENTRIES_SHIFT);
671+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
672+
673+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
674+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
675+
676+ mtk_ppe_cache_enable(ppe, true);
677+
developeree39bcf2023-06-16 08:03:30 +0800678+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
679+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
680+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
developer8cb3ac72022-07-04 10:55:14 +0800681+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
682+ MTK_PPE_FLOW_CFG_IP6_6RD |
683+ MTK_PPE_FLOW_CFG_IP4_NAT |
684+ MTK_PPE_FLOW_CFG_IP4_NAPT |
685+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
developeree39bcf2023-06-16 08:03:30 +0800686+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
developer8cb3ac72022-07-04 10:55:14 +0800687+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
688+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
689+
690+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
691+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
692+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
693+
developeree39bcf2023-06-16 08:03:30 +0800694+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 30) |
developer8cb3ac72022-07-04 10:55:14 +0800695+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
696+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
697+
698+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
developeree39bcf2023-06-16 08:03:30 +0800699+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 30);
developer8cb3ac72022-07-04 10:55:14 +0800700+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
701+
702+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
703+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
704+
705+ val = MTK_PPE_BIND_LIMIT1_FULL |
706+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
707+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
708+
709+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
710+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
711+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
712+
713+ /* enable PPE */
714+ val = MTK_PPE_GLO_CFG_EN |
715+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
716+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
717+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
718+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
719+
720+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
721+
developeree39bcf2023-06-16 08:03:30 +0800722+ return 0;
developer8cb3ac72022-07-04 10:55:14 +0800723+}
724+
725+int mtk_ppe_stop(struct mtk_ppe *ppe)
726+{
727+ u32 val;
728+ int i;
729+
developeree39bcf2023-06-16 08:03:30 +0800730+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
731+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
732+ MTK_FOE_STATE_INVALID);
developer8cb3ac72022-07-04 10:55:14 +0800733+
734+ mtk_ppe_cache_enable(ppe, false);
735+
736+ /* disable offload engine */
737+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
738+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
739+
740+ /* disable aging */
741+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
742+ MTK_PPE_TB_CFG_AGE_UNBIND |
743+ MTK_PPE_TB_CFG_AGE_TCP |
744+ MTK_PPE_TB_CFG_AGE_UDP |
745+ MTK_PPE_TB_CFG_AGE_TCP_FIN;
746+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
747+
748+ return mtk_ppe_wait_busy(ppe);
749+}
750diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
751new file mode 100644
developeree39bcf2023-06-16 08:03:30 +0800752index 000000000..242fb8f2a
developer8cb3ac72022-07-04 10:55:14 +0800753--- /dev/null
754+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
developeree39bcf2023-06-16 08:03:30 +0800755@@ -0,0 +1,288 @@
developer8cb3ac72022-07-04 10:55:14 +0800756+// SPDX-License-Identifier: GPL-2.0-only
757+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
758+
759+#ifndef __MTK_PPE_H
760+#define __MTK_PPE_H
761+
762+#include <linux/kernel.h>
763+#include <linux/bitfield.h>
developeree39bcf2023-06-16 08:03:30 +0800764+
765+#define MTK_ETH_PPE_BASE 0xc00
developer8cb3ac72022-07-04 10:55:14 +0800766+
767+#define MTK_PPE_ENTRIES_SHIFT 3
768+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
769+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
770+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
771+
772+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
773+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
774+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
775+
776+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
777+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
778+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
779+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
780+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
781+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
782+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
783+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
784+#define MTK_FOE_IB1_BIND_TTL BIT(24)
785+
786+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
787+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
788+#define MTK_FOE_IB1_UDP BIT(30)
789+#define MTK_FOE_IB1_STATIC BIT(31)
790+
791+enum {
792+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
793+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
794+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
795+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
796+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
797+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
798+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
799+};
800+
801+#define MTK_FOE_IB2_QID GENMASK(3, 0)
802+#define MTK_FOE_IB2_PSE_QOS BIT(4)
803+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
804+#define MTK_FOE_IB2_MULTICAST BIT(8)
805+
developeree39bcf2023-06-16 08:03:30 +0800806+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
807+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
808+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
developer8cb3ac72022-07-04 10:55:14 +0800809+
810+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
811+
812+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
813+
814+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
815+
developeree39bcf2023-06-16 08:03:30 +0800816+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
817+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
818+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
developer8cb3ac72022-07-04 10:55:14 +0800819+
820+enum {
821+ MTK_FOE_STATE_INVALID,
822+ MTK_FOE_STATE_UNBIND,
823+ MTK_FOE_STATE_BIND,
824+ MTK_FOE_STATE_FIN
825+};
826+
827+struct mtk_foe_mac_info {
828+ u16 vlan1;
829+ u16 etype;
830+
831+ u32 dest_mac_hi;
832+
833+ u16 vlan2;
834+ u16 dest_mac_lo;
835+
836+ u32 src_mac_hi;
837+
838+ u16 pppoe_id;
839+ u16 src_mac_lo;
840+};
841+
842+struct mtk_foe_bridge {
developeree39bcf2023-06-16 08:03:30 +0800843+ u32 dest_mac_hi;
844+
845+ u16 src_mac_lo;
846+ u16 dest_mac_lo;
developer8cb3ac72022-07-04 10:55:14 +0800847+
developeree39bcf2023-06-16 08:03:30 +0800848+ u32 src_mac_hi;
developer8cb3ac72022-07-04 10:55:14 +0800849+
850+ u32 ib2;
851+
developeree39bcf2023-06-16 08:03:30 +0800852+ u32 _rsv[5];
853+
854+ u32 udf_tsid;
developer8cb3ac72022-07-04 10:55:14 +0800855+ struct mtk_foe_mac_info l2;
856+};
857+
858+struct mtk_ipv4_tuple {
859+ u32 src_ip;
860+ u32 dest_ip;
861+ union {
862+ struct {
863+ u16 dest_port;
864+ u16 src_port;
865+ };
866+ struct {
867+ u8 protocol;
868+ u8 _pad[3]; /* fill with 0xa5a5a5 */
869+ };
870+ u32 ports;
871+ };
872+};
873+
874+struct mtk_foe_ipv4 {
875+ struct mtk_ipv4_tuple orig;
876+
877+ u32 ib2;
878+
879+ struct mtk_ipv4_tuple new;
880+
881+ u16 timestamp;
882+ u16 _rsv0[3];
883+
884+ u32 udf_tsid;
885+
886+ struct mtk_foe_mac_info l2;
887+};
888+
889+struct mtk_foe_ipv4_dslite {
890+ struct mtk_ipv4_tuple ip4;
891+
892+ u32 tunnel_src_ip[4];
893+ u32 tunnel_dest_ip[4];
894+
895+ u8 flow_label[3];
896+ u8 priority;
897+
898+ u32 udf_tsid;
899+
900+ u32 ib2;
901+
902+ struct mtk_foe_mac_info l2;
903+};
904+
905+struct mtk_foe_ipv6 {
906+ u32 src_ip[4];
907+ u32 dest_ip[4];
908+
909+ union {
910+ struct {
911+ u8 protocol;
912+ u8 _pad[3]; /* fill with 0xa5a5a5 */
913+ }; /* 3-tuple */
914+ struct {
915+ u16 dest_port;
916+ u16 src_port;
917+ }; /* 5-tuple */
918+ u32 ports;
919+ };
920+
921+ u32 _rsv[3];
922+
923+ u32 udf;
924+
925+ u32 ib2;
926+ struct mtk_foe_mac_info l2;
927+};
928+
929+struct mtk_foe_ipv6_6rd {
930+ u32 src_ip[4];
931+ u32 dest_ip[4];
932+ u16 dest_port;
933+ u16 src_port;
934+
935+ u32 tunnel_src_ip;
936+ u32 tunnel_dest_ip;
937+
938+ u16 hdr_csum;
939+ u8 dscp;
940+ u8 ttl;
941+
942+ u8 flag;
943+ u8 pad;
944+ u8 per_flow_6rd_id;
945+ u8 pad2;
946+
947+ u32 ib2;
948+ struct mtk_foe_mac_info l2;
949+};
950+
951+struct mtk_foe_entry {
952+ u32 ib1;
953+
954+ union {
955+ struct mtk_foe_bridge bridge;
956+ struct mtk_foe_ipv4 ipv4;
957+ struct mtk_foe_ipv4_dslite dslite;
958+ struct mtk_foe_ipv6 ipv6;
959+ struct mtk_foe_ipv6_6rd ipv6_6rd;
developeree39bcf2023-06-16 08:03:30 +0800960+ u32 data[19];
developer8cb3ac72022-07-04 10:55:14 +0800961+ };
962+};
963+
964+enum {
965+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
966+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
967+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
968+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
969+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
970+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
971+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
972+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
973+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
974+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
975+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
976+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
977+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
978+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
979+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
980+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
981+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
982+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
983+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
984+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
985+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
986+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
987+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
988+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
989+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
990+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
991+};
992+
993+struct mtk_ppe {
994+ struct device *dev;
995+ void __iomem *base;
996+ int version;
997+
developeree39bcf2023-06-16 08:03:30 +0800998+ struct mtk_foe_entry *foe_table;
developer8cb3ac72022-07-04 10:55:14 +0800999+ dma_addr_t foe_phys;
1000+
1001+ void *acct_table;
1002+};
1003+
developeree39bcf2023-06-16 08:03:30 +08001004+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
1005+ int version);
1006+int mtk_ppe_start(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001007+int mtk_ppe_stop(struct mtk_ppe *ppe);
1008+
1009+static inline void
developeree39bcf2023-06-16 08:03:30 +08001010+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
developer8cb3ac72022-07-04 10:55:14 +08001011+{
developeree39bcf2023-06-16 08:03:30 +08001012+ ppe->foe_table[hash].ib1 = 0;
1013+ dma_wmb();
1014+}
developer8cb3ac72022-07-04 10:55:14 +08001015+
developeree39bcf2023-06-16 08:03:30 +08001016+static inline int
1017+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
1018+{
1019+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
developer8cb3ac72022-07-04 10:55:14 +08001020+
developeree39bcf2023-06-16 08:03:30 +08001021+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
1022+ return -1;
developer7eb15dc2023-06-14 17:44:03 +08001023+
developeree39bcf2023-06-16 08:03:30 +08001024+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
developer8cb3ac72022-07-04 10:55:14 +08001025+}
1026+
developeree39bcf2023-06-16 08:03:30 +08001027+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
1028+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
1029+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
1030+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
developer8cb3ac72022-07-04 10:55:14 +08001031+ __be32 src_addr, __be16 src_port,
1032+ __be32 dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001033+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
developer8cb3ac72022-07-04 10:55:14 +08001034+ __be32 *src_addr, __be16 src_port,
1035+ __be32 *dest_addr, __be16 dest_port);
developeree39bcf2023-06-16 08:03:30 +08001036+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
1037+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
1038+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
1039+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1040+ u16 timestamp);
1041+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
developer8cb3ac72022-07-04 10:55:14 +08001042+
1043+#endif
1044diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1045new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08001046index 000000000..d4b482340
developer8cb3ac72022-07-04 10:55:14 +08001047--- /dev/null
1048+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
developeree39bcf2023-06-16 08:03:30 +08001049@@ -0,0 +1,214 @@
developer8cb3ac72022-07-04 10:55:14 +08001050+// SPDX-License-Identifier: GPL-2.0-only
1051+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1052+
1053+#include <linux/kernel.h>
1054+#include <linux/debugfs.h>
1055+#include "mtk_eth_soc.h"
1056+
1057+struct mtk_flow_addr_info
1058+{
1059+ void *src, *dest;
1060+ u16 *src_port, *dest_port;
1061+ bool ipv6;
1062+};
1063+
1064+static const char *mtk_foe_entry_state_str(int state)
1065+{
1066+ static const char * const state_str[] = {
1067+ [MTK_FOE_STATE_INVALID] = "INV",
1068+ [MTK_FOE_STATE_UNBIND] = "UNB",
1069+ [MTK_FOE_STATE_BIND] = "BND",
1070+ [MTK_FOE_STATE_FIN] = "FIN",
1071+ };
1072+
1073+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1074+ return "UNK";
1075+
1076+ return state_str[state];
1077+}
1078+
1079+static const char *mtk_foe_pkt_type_str(int type)
1080+{
1081+ static const char * const type_str[] = {
1082+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1083+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
developeree39bcf2023-06-16 08:03:30 +08001084+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
developer8cb3ac72022-07-04 10:55:14 +08001085+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
1086+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
1087+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
1088+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
1089+ };
1090+
1091+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
1092+ return "UNKNOWN";
1093+
1094+ return type_str[type];
1095+}
1096+
1097+static void
1098+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
1099+{
1100+ u32 n_addr[4];
1101+ int i;
1102+
1103+ if (!ipv6) {
1104+ seq_printf(m, "%pI4h", addr);
1105+ return;
1106+ }
1107+
1108+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
1109+ n_addr[i] = htonl(addr[i]);
1110+ seq_printf(m, "%pI6", n_addr);
1111+}
1112+
1113+static void
1114+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
1115+{
1116+ mtk_print_addr(m, ai->src, ai->ipv6);
1117+ if (ai->src_port)
1118+ seq_printf(m, ":%d", *ai->src_port);
1119+ seq_printf(m, "->");
1120+ mtk_print_addr(m, ai->dest, ai->ipv6);
1121+ if (ai->dest_port)
1122+ seq_printf(m, ":%d", *ai->dest_port);
1123+}
1124+
1125+static int
1126+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
1127+{
1128+ struct mtk_ppe *ppe = m->private;
1129+ int i;
1130+
1131+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
developeree39bcf2023-06-16 08:03:30 +08001132+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
developer8cb3ac72022-07-04 10:55:14 +08001133+ struct mtk_foe_mac_info *l2;
1134+ struct mtk_flow_addr_info ai = {};
1135+ unsigned char h_source[ETH_ALEN];
1136+ unsigned char h_dest[ETH_ALEN];
1137+ int type, state;
1138+ u32 ib2;
1139+
1140+
1141+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
1142+ if (!state)
1143+ continue;
1144+
1145+ if (bind && state != MTK_FOE_STATE_BIND)
1146+ continue;
1147+
1148+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
1149+ seq_printf(m, "%05x %s %7s", i,
1150+ mtk_foe_entry_state_str(state),
1151+ mtk_foe_pkt_type_str(type));
1152+
1153+ switch (type) {
1154+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1155+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1156+ ai.src_port = &entry->ipv4.orig.src_port;
1157+ ai.dest_port = &entry->ipv4.orig.dest_port;
1158+ fallthrough;
1159+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1160+ ai.src = &entry->ipv4.orig.src_ip;
1161+ ai.dest = &entry->ipv4.orig.dest_ip;
1162+ break;
1163+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
1164+ ai.src_port = &entry->ipv6.src_port;
1165+ ai.dest_port = &entry->ipv6.dest_port;
1166+ fallthrough;
1167+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
1168+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
1169+ ai.src = &entry->ipv6.src_ip;
1170+ ai.dest = &entry->ipv6.dest_ip;
1171+ ai.ipv6 = true;
1172+ break;
1173+ }
1174+
1175+ seq_printf(m, " orig=");
1176+ mtk_print_addr_info(m, &ai);
1177+
1178+ switch (type) {
1179+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1180+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1181+ ai.src_port = &entry->ipv4.new.src_port;
1182+ ai.dest_port = &entry->ipv4.new.dest_port;
1183+ fallthrough;
1184+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1185+ ai.src = &entry->ipv4.new.src_ip;
1186+ ai.dest = &entry->ipv4.new.dest_ip;
1187+ seq_printf(m, " new=");
1188+ mtk_print_addr_info(m, &ai);
1189+ break;
1190+ }
1191+
1192+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
1193+ l2 = &entry->ipv6.l2;
1194+ ib2 = entry->ipv6.ib2;
1195+ } else {
1196+ l2 = &entry->ipv4.l2;
1197+ ib2 = entry->ipv4.ib2;
1198+ }
1199+
1200+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
1201+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
1202+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
1203+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
1204+
1205+ seq_printf(m, " eth=%pM->%pM etype=%04x"
developeree39bcf2023-06-16 08:03:30 +08001206+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
developer8cb3ac72022-07-04 10:55:14 +08001207+ h_source, h_dest, ntohs(l2->etype),
developeree39bcf2023-06-16 08:03:30 +08001208+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
developer8cb3ac72022-07-04 10:55:14 +08001209+ }
1210+
1211+ return 0;
1212+}
1213+
1214+static int
1215+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
1216+{
1217+ return mtk_ppe_debugfs_foe_show(m, private, false);
1218+}
1219+
1220+static int
1221+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
1222+{
1223+ return mtk_ppe_debugfs_foe_show(m, private, true);
1224+}
1225+
1226+static int
1227+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
1228+{
1229+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
1230+ inode->i_private);
1231+}
1232+
1233+static int
1234+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
1235+{
1236+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
1237+ inode->i_private);
1238+}
1239+
developeree39bcf2023-06-16 08:03:30 +08001240+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
developer8cb3ac72022-07-04 10:55:14 +08001241+{
1242+ static const struct file_operations fops_all = {
1243+ .open = mtk_ppe_debugfs_foe_open_all,
1244+ .read = seq_read,
1245+ .llseek = seq_lseek,
1246+ .release = single_release,
1247+ };
developeree39bcf2023-06-16 08:03:30 +08001248+
developer8cb3ac72022-07-04 10:55:14 +08001249+ static const struct file_operations fops_bind = {
1250+ .open = mtk_ppe_debugfs_foe_open_bind,
1251+ .read = seq_read,
1252+ .llseek = seq_lseek,
1253+ .release = single_release,
1254+ };
developer7eb15dc2023-06-14 17:44:03 +08001255+
developeree39bcf2023-06-16 08:03:30 +08001256+ struct dentry *root;
developer7eb15dc2023-06-14 17:44:03 +08001257+
developeree39bcf2023-06-16 08:03:30 +08001258+ root = debugfs_create_dir("mtk_ppe", NULL);
developer8cb3ac72022-07-04 10:55:14 +08001259+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
1260+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
1261+
1262+ return 0;
1263+}
1264diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1265new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08001266index 000000000..4294f0c74
developer8cb3ac72022-07-04 10:55:14 +08001267--- /dev/null
1268+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
developeree39bcf2023-06-16 08:03:30 +08001269@@ -0,0 +1,535 @@
developer8cb3ac72022-07-04 10:55:14 +08001270+// SPDX-License-Identifier: GPL-2.0-only
1271+/*
1272+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
1273+ */
1274+
1275+#include <linux/if_ether.h>
1276+#include <linux/rhashtable.h>
1277+#include <linux/ip.h>
1278+#include <linux/ipv6.h>
1279+#include <net/flow_offload.h>
1280+#include <net/pkt_cls.h>
1281+#include <net/dsa.h>
1282+#include "mtk_eth_soc.h"
1283+
1284+struct mtk_flow_data {
1285+ struct ethhdr eth;
1286+
1287+ union {
1288+ struct {
1289+ __be32 src_addr;
1290+ __be32 dst_addr;
1291+ } v4;
1292+
1293+ struct {
1294+ struct in6_addr src_addr;
1295+ struct in6_addr dst_addr;
1296+ } v6;
1297+ };
1298+
1299+ __be16 src_port;
1300+ __be16 dst_port;
1301+
1302+ struct {
1303+ u16 id;
1304+ __be16 proto;
1305+ u8 num;
1306+ } vlan;
1307+ struct {
1308+ u16 sid;
1309+ u8 num;
1310+ } pppoe;
1311+};
1312+
developeree39bcf2023-06-16 08:03:30 +08001313+struct mtk_flow_entry {
1314+ struct rhash_head node;
1315+ unsigned long cookie;
1316+ u16 hash;
1317+};
1318+
developer8cb3ac72022-07-04 10:55:14 +08001319+static const struct rhashtable_params mtk_flow_ht_params = {
1320+ .head_offset = offsetof(struct mtk_flow_entry, node),
1321+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
1322+ .key_len = sizeof(unsigned long),
1323+ .automatic_shrinking = true,
1324+};
1325+
developeree39bcf2023-06-16 08:03:30 +08001326+static u32
1327+mtk_eth_timestamp(struct mtk_eth *eth)
1328+{
1329+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
1330+}
1331+
developer8cb3ac72022-07-04 10:55:14 +08001332+static int
developeree39bcf2023-06-16 08:03:30 +08001333+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
1334+ bool egress)
developer8cb3ac72022-07-04 10:55:14 +08001335+{
developeree39bcf2023-06-16 08:03:30 +08001336+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
developer8cb3ac72022-07-04 10:55:14 +08001337+ data->v4.src_addr, data->src_port,
1338+ data->v4.dst_addr, data->dst_port);
1339+}
1340+
1341+static int
developeree39bcf2023-06-16 08:03:30 +08001342+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
developer8cb3ac72022-07-04 10:55:14 +08001343+{
developeree39bcf2023-06-16 08:03:30 +08001344+ return mtk_foe_entry_set_ipv6_tuple(foe,
developer8cb3ac72022-07-04 10:55:14 +08001345+ data->v6.src_addr.s6_addr32, data->src_port,
1346+ data->v6.dst_addr.s6_addr32, data->dst_port);
1347+}
1348+
1349+static void
1350+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
1351+{
1352+ void *dest = eth + act->mangle.offset;
1353+ const void *src = &act->mangle.val;
1354+
1355+ if (act->mangle.offset > 8)
1356+ return;
1357+
1358+ if (act->mangle.mask == 0xffff) {
1359+ src += 2;
1360+ dest += 2;
1361+ }
1362+
1363+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
1364+}
1365+
developeree39bcf2023-06-16 08:03:30 +08001366+
developer8cb3ac72022-07-04 10:55:14 +08001367+static int
1368+mtk_flow_mangle_ports(const struct flow_action_entry *act,
1369+ struct mtk_flow_data *data)
1370+{
1371+ u32 val = ntohl(act->mangle.val);
1372+
1373+ switch (act->mangle.offset) {
1374+ case 0:
1375+ if (act->mangle.mask == ~htonl(0xffff))
1376+ data->dst_port = cpu_to_be16(val);
1377+ else
1378+ data->src_port = cpu_to_be16(val >> 16);
1379+ break;
1380+ case 2:
1381+ data->dst_port = cpu_to_be16(val);
1382+ break;
1383+ default:
1384+ return -EINVAL;
1385+ }
1386+
1387+ return 0;
1388+}
1389+
1390+static int
1391+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
1392+ struct mtk_flow_data *data)
1393+{
1394+ __be32 *dest;
1395+
1396+ switch (act->mangle.offset) {
1397+ case offsetof(struct iphdr, saddr):
1398+ dest = &data->v4.src_addr;
1399+ break;
1400+ case offsetof(struct iphdr, daddr):
1401+ dest = &data->v4.dst_addr;
1402+ break;
1403+ default:
1404+ return -EINVAL;
1405+ }
1406+
1407+ memcpy(dest, &act->mangle.val, sizeof(u32));
1408+
1409+ return 0;
1410+}
1411+
1412+static int
1413+mtk_flow_get_dsa_port(struct net_device **dev)
1414+{
1415+#if IS_ENABLED(CONFIG_NET_DSA)
1416+ struct dsa_port *dp;
1417+
1418+ dp = dsa_port_from_netdev(*dev);
1419+ if (IS_ERR(dp))
1420+ return -ENODEV;
1421+
1422+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
1423+ return -ENODEV;
1424+
1425+ *dev = dp->cpu_dp->master;
1426+
1427+ return dp->index;
1428+#else
1429+ return -ENODEV;
1430+#endif
1431+}
1432+
1433+static int
1434+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
developeree39bcf2023-06-16 08:03:30 +08001435+ struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08001436+{
developeree39bcf2023-06-16 08:03:30 +08001437+ int pse_port, dsa_port;
developer8cb3ac72022-07-04 10:55:14 +08001438+
1439+ dsa_port = mtk_flow_get_dsa_port(&dev);
developeree39bcf2023-06-16 08:03:30 +08001440+ if (dsa_port >= 0)
1441+ mtk_foe_entry_set_dsa(foe, dsa_port);
developer8cb3ac72022-07-04 10:55:14 +08001442+
1443+ if (dev == eth->netdev[0])
developeree39bcf2023-06-16 08:03:30 +08001444+ pse_port = PSE_GDM1_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001445+ else if (dev == eth->netdev[1])
developeree39bcf2023-06-16 08:03:30 +08001446+ pse_port = PSE_GDM2_PORT;
1447+ else
1448+ return -EOPNOTSUPP;
developer7eb15dc2023-06-14 17:44:03 +08001449+
developeree39bcf2023-06-16 08:03:30 +08001450+ mtk_foe_entry_set_pse_port(foe, pse_port);
developer8cb3ac72022-07-04 10:55:14 +08001451+
1452+ return 0;
1453+}
1454+
1455+static int
developeree39bcf2023-06-16 08:03:30 +08001456+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
developer8cb3ac72022-07-04 10:55:14 +08001457+{
1458+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1459+ struct flow_action_entry *act;
1460+ struct mtk_flow_data data = {};
1461+ struct mtk_foe_entry foe;
1462+ struct net_device *odev = NULL;
1463+ struct mtk_flow_entry *entry;
1464+ int offload_type = 0;
1465+ u16 addr_type = 0;
developeree39bcf2023-06-16 08:03:30 +08001466+ u32 timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001467+ u8 l4proto = 0;
1468+ int err = 0;
developeree39bcf2023-06-16 08:03:30 +08001469+ int hash;
developer8cb3ac72022-07-04 10:55:14 +08001470+ int i;
1471+
1472+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
1473+ return -EEXIST;
1474+
1475+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1476+ struct flow_match_meta match;
1477+
1478+ flow_rule_match_meta(rule, &match);
1479+ } else {
1480+ return -EOPNOTSUPP;
1481+ }
1482+
1483+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1484+ struct flow_match_control match;
1485+
1486+ flow_rule_match_control(rule, &match);
1487+ addr_type = match.key->addr_type;
1488+ } else {
1489+ return -EOPNOTSUPP;
1490+ }
1491+
1492+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1493+ struct flow_match_basic match;
1494+
1495+ flow_rule_match_basic(rule, &match);
1496+ l4proto = match.key->ip_proto;
1497+ } else {
1498+ return -EOPNOTSUPP;
1499+ }
1500+
1501+ flow_action_for_each(i, act, &rule->action) {
1502+ switch (act->id) {
1503+ case FLOW_ACTION_MANGLE:
1504+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
1505+ mtk_flow_offload_mangle_eth(act, &data.eth);
1506+ break;
1507+ case FLOW_ACTION_REDIRECT:
1508+ odev = act->dev;
1509+ break;
1510+ case FLOW_ACTION_CSUM:
1511+ break;
1512+ case FLOW_ACTION_VLAN_PUSH:
1513+ if (data.vlan.num == 1 ||
1514+ act->vlan.proto != htons(ETH_P_8021Q))
1515+ return -EOPNOTSUPP;
1516+
1517+ data.vlan.id = act->vlan.vid;
1518+ data.vlan.proto = act->vlan.proto;
1519+ data.vlan.num++;
1520+ break;
1521+ case FLOW_ACTION_VLAN_POP:
1522+ break;
1523+ case FLOW_ACTION_PPPOE_PUSH:
1524+ if (data.pppoe.num == 1)
1525+ return -EOPNOTSUPP;
1526+
1527+ data.pppoe.sid = act->pppoe.sid;
1528+ data.pppoe.num++;
1529+ break;
1530+ default:
1531+ return -EOPNOTSUPP;
1532+ }
1533+ }
1534+
developeree39bcf2023-06-16 08:03:30 +08001535+ switch (addr_type) {
1536+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1537+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
1538+ break;
1539+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1540+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
1541+ break;
1542+ default:
1543+ return -EOPNOTSUPP;
1544+ }
1545+
developer8cb3ac72022-07-04 10:55:14 +08001546+ if (!is_valid_ether_addr(data.eth.h_source) ||
1547+ !is_valid_ether_addr(data.eth.h_dest))
1548+ return -EINVAL;
1549+
developeree39bcf2023-06-16 08:03:30 +08001550+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
1551+ data.eth.h_source,
1552+ data.eth.h_dest);
developer8cb3ac72022-07-04 10:55:14 +08001553+ if (err)
1554+ return err;
1555+
1556+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1557+ struct flow_match_ports ports;
1558+
1559+ flow_rule_match_ports(rule, &ports);
1560+ data.src_port = ports.key->src;
1561+ data.dst_port = ports.key->dst;
developeree39bcf2023-06-16 08:03:30 +08001562+ } else {
developer8cb3ac72022-07-04 10:55:14 +08001563+ return -EOPNOTSUPP;
1564+ }
1565+
1566+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1567+ struct flow_match_ipv4_addrs addrs;
1568+
1569+ flow_rule_match_ipv4_addrs(rule, &addrs);
1570+
1571+ data.v4.src_addr = addrs.key->src;
1572+ data.v4.dst_addr = addrs.key->dst;
1573+
developeree39bcf2023-06-16 08:03:30 +08001574+ mtk_flow_set_ipv4_addr(&foe, &data, false);
developer8cb3ac72022-07-04 10:55:14 +08001575+ }
1576+
1577+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1578+ struct flow_match_ipv6_addrs addrs;
1579+
1580+ flow_rule_match_ipv6_addrs(rule, &addrs);
1581+
1582+ data.v6.src_addr = addrs.key->src;
1583+ data.v6.dst_addr = addrs.key->dst;
1584+
developeree39bcf2023-06-16 08:03:30 +08001585+ mtk_flow_set_ipv6_addr(&foe, &data);
developer8cb3ac72022-07-04 10:55:14 +08001586+ }
1587+
1588+ flow_action_for_each(i, act, &rule->action) {
1589+ if (act->id != FLOW_ACTION_MANGLE)
1590+ continue;
1591+
1592+ switch (act->mangle.htype) {
1593+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1594+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1595+ err = mtk_flow_mangle_ports(act, &data);
1596+ break;
1597+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1598+ err = mtk_flow_mangle_ipv4(act, &data);
1599+ break;
1600+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1601+ /* handled earlier */
1602+ break;
1603+ default:
1604+ return -EOPNOTSUPP;
1605+ }
1606+
1607+ if (err)
1608+ return err;
1609+ }
1610+
1611+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
developeree39bcf2023-06-16 08:03:30 +08001612+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
developer8cb3ac72022-07-04 10:55:14 +08001613+ if (err)
1614+ return err;
1615+ }
1616+
1617+ if (data.vlan.num == 1) {
1618+ if (data.vlan.proto != htons(ETH_P_8021Q))
1619+ return -EOPNOTSUPP;
1620+
developeree39bcf2023-06-16 08:03:30 +08001621+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
developer8cb3ac72022-07-04 10:55:14 +08001622+ }
1623+ if (data.pppoe.num == 1)
developeree39bcf2023-06-16 08:03:30 +08001624+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
developer8cb3ac72022-07-04 10:55:14 +08001625+
developeree39bcf2023-06-16 08:03:30 +08001626+ err = mtk_flow_set_output_device(eth, &foe, odev);
developer8cb3ac72022-07-04 10:55:14 +08001627+ if (err)
1628+ return err;
1629+
1630+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1631+ if (!entry)
1632+ return -ENOMEM;
1633+
1634+ entry->cookie = f->cookie;
developeree39bcf2023-06-16 08:03:30 +08001635+ timestamp = mtk_eth_timestamp(eth);
1636+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
1637+ if (hash < 0) {
1638+ err = hash;
developer8cb3ac72022-07-04 10:55:14 +08001639+ goto free;
developeree39bcf2023-06-16 08:03:30 +08001640+ }
developer8cb3ac72022-07-04 10:55:14 +08001641+
developeree39bcf2023-06-16 08:03:30 +08001642+ entry->hash = hash;
developer8cb3ac72022-07-04 10:55:14 +08001643+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
1644+ mtk_flow_ht_params);
1645+ if (err < 0)
developeree39bcf2023-06-16 08:03:30 +08001646+ goto clear_flow;
developer8cb3ac72022-07-04 10:55:14 +08001647+
1648+ return 0;
developeree39bcf2023-06-16 08:03:30 +08001649+clear_flow:
1650+ mtk_foe_entry_clear(&eth->ppe, hash);
developer8cb3ac72022-07-04 10:55:14 +08001651+free:
1652+ kfree(entry);
1653+ return err;
1654+}
1655+
1656+static int
1657+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
1658+{
1659+ struct mtk_flow_entry *entry;
1660+
1661+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1662+ mtk_flow_ht_params);
1663+ if (!entry)
1664+ return -ENOENT;
1665+
developeree39bcf2023-06-16 08:03:30 +08001666+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
developer8cb3ac72022-07-04 10:55:14 +08001667+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
1668+ mtk_flow_ht_params);
1669+ kfree(entry);
1670+
1671+ return 0;
1672+}
1673+
1674+static int
1675+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
1676+{
1677+ struct mtk_flow_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08001678+ int timestamp;
1679+ u32 idle;
developer8cb3ac72022-07-04 10:55:14 +08001680+
1681+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1682+ mtk_flow_ht_params);
1683+ if (!entry)
1684+ return -ENOENT;
1685+
developeree39bcf2023-06-16 08:03:30 +08001686+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
1687+ if (timestamp < 0)
1688+ return -ETIMEDOUT;
1689+
1690+ idle = mtk_eth_timestamp(eth) - timestamp;
developer8cb3ac72022-07-04 10:55:14 +08001691+ f->stats.lastused = jiffies - idle * HZ;
1692+
1693+ return 0;
1694+}
1695+
1696+static DEFINE_MUTEX(mtk_flow_offload_mutex);
1697+
developeree39bcf2023-06-16 08:03:30 +08001698+static int
1699+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
developer8cb3ac72022-07-04 10:55:14 +08001700+{
developeree39bcf2023-06-16 08:03:30 +08001701+ struct flow_cls_offload *cls = type_data;
1702+ struct net_device *dev = cb_priv;
1703+ struct mtk_mac *mac = netdev_priv(dev);
1704+ struct mtk_eth *eth = mac->hw;
developer8cb3ac72022-07-04 10:55:14 +08001705+ int err;
1706+
developeree39bcf2023-06-16 08:03:30 +08001707+ if (!tc_can_offload(dev))
1708+ return -EOPNOTSUPP;
1709+
1710+ if (type != TC_SETUP_CLSFLOWER)
1711+ return -EOPNOTSUPP;
1712+
developer8cb3ac72022-07-04 10:55:14 +08001713+ mutex_lock(&mtk_flow_offload_mutex);
1714+ switch (cls->command) {
1715+ case FLOW_CLS_REPLACE:
developeree39bcf2023-06-16 08:03:30 +08001716+ err = mtk_flow_offload_replace(eth, cls);
developer8cb3ac72022-07-04 10:55:14 +08001717+ break;
1718+ case FLOW_CLS_DESTROY:
1719+ err = mtk_flow_offload_destroy(eth, cls);
1720+ break;
1721+ case FLOW_CLS_STATS:
1722+ err = mtk_flow_offload_stats(eth, cls);
1723+ break;
1724+ default:
1725+ err = -EOPNOTSUPP;
1726+ break;
1727+ }
1728+ mutex_unlock(&mtk_flow_offload_mutex);
1729+
1730+ return err;
1731+}
1732+
1733+static int
1734+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
1735+{
1736+ struct mtk_mac *mac = netdev_priv(dev);
1737+ struct mtk_eth *eth = mac->hw;
1738+ static LIST_HEAD(block_cb_list);
1739+ struct flow_block_cb *block_cb;
1740+ flow_setup_cb_t *cb;
developeree39bcf2023-06-16 08:03:30 +08001741+ int err = 0;
developer207b39d2022-10-07 15:57:16 +08001742+
developeree39bcf2023-06-16 08:03:30 +08001743+ if (!eth->ppe.foe_table)
developer8cb3ac72022-07-04 10:55:14 +08001744+ return -EOPNOTSUPP;
1745+
1746+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1747+ return -EOPNOTSUPP;
1748+
1749+ cb = mtk_eth_setup_tc_block_cb;
1750+ f->driver_block_list = &block_cb_list;
1751+
1752+ switch (f->command) {
1753+ case FLOW_BLOCK_BIND:
1754+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1755+ if (block_cb) {
1756+ flow_block_cb_incref(block_cb);
developeree39bcf2023-06-16 08:03:30 +08001757+ goto unlock;
developer8cb3ac72022-07-04 10:55:14 +08001758+ }
1759+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
developeree39bcf2023-06-16 08:03:30 +08001760+ if (IS_ERR(block_cb)) {
1761+ err = PTR_ERR(block_cb);
1762+ goto unlock;
1763+ }
developer8cb3ac72022-07-04 10:55:14 +08001764+
1765+ flow_block_cb_add(block_cb, f);
1766+ list_add_tail(&block_cb->driver_list, &block_cb_list);
developeree39bcf2023-06-16 08:03:30 +08001767+ break;
developer8cb3ac72022-07-04 10:55:14 +08001768+ case FLOW_BLOCK_UNBIND:
1769+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
developeree39bcf2023-06-16 08:03:30 +08001770+ if (!block_cb) {
1771+ err = -ENOENT;
1772+ goto unlock;
1773+ }
developer8cb3ac72022-07-04 10:55:14 +08001774+
developeree39bcf2023-06-16 08:03:30 +08001775+ if (flow_block_cb_decref(block_cb)) {
developer8cb3ac72022-07-04 10:55:14 +08001776+ flow_block_cb_remove(block_cb, f);
1777+ list_del(&block_cb->driver_list);
1778+ }
developeree39bcf2023-06-16 08:03:30 +08001779+ break;
developer8cb3ac72022-07-04 10:55:14 +08001780+ default:
developeree39bcf2023-06-16 08:03:30 +08001781+ err = -EOPNOTSUPP;
1782+ break;
developer8cb3ac72022-07-04 10:55:14 +08001783+ }
developeree39bcf2023-06-16 08:03:30 +08001784+
1785+unlock:
1786+ return err;
developer8cb3ac72022-07-04 10:55:14 +08001787+}
1788+
1789+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
1790+ void *type_data)
1791+{
developeree39bcf2023-06-16 08:03:30 +08001792+ if (type == TC_SETUP_FT)
developer8cb3ac72022-07-04 10:55:14 +08001793+ return mtk_eth_setup_tc_block(dev, type_data);
developeree39bcf2023-06-16 08:03:30 +08001794+
1795+ return -EOPNOTSUPP;
developer8cb3ac72022-07-04 10:55:14 +08001796+}
1797+
1798+int mtk_eth_offload_init(struct mtk_eth *eth)
1799+{
developeree39bcf2023-06-16 08:03:30 +08001800+ if (!eth->ppe.foe_table)
1801+ return 0;
1802+
developer8cb3ac72022-07-04 10:55:14 +08001803+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
1804+}
1805diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1806new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08001807index 000000000..0c45ea090
developer8cb3ac72022-07-04 10:55:14 +08001808--- /dev/null
1809+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
developeree39bcf2023-06-16 08:03:30 +08001810@@ -0,0 +1,144 @@
developer8cb3ac72022-07-04 10:55:14 +08001811+// SPDX-License-Identifier: GPL-2.0-only
1812+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1813+
1814+#ifndef __MTK_PPE_REGS_H
1815+#define __MTK_PPE_REGS_H
1816+
1817+#define MTK_PPE_GLO_CFG 0x200
1818+#define MTK_PPE_GLO_CFG_EN BIT(0)
1819+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
1820+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
1821+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
1822+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
1823+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
1824+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
1825+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
1826+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
1827+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
1828+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
1829+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
1830+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
1831+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
1832+
1833+#define MTK_PPE_FLOW_CFG 0x204
1834+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
1835+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
1836+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
1837+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
1838+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
1839+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
1840+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
1841+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
1842+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
1843+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
1844+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
1845+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
1846+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
1847+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
1848+
1849+#define MTK_PPE_IP_PROTO_CHK 0x208
1850+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
1851+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
1852+
1853+#define MTK_PPE_TB_CFG 0x21c
1854+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
1855+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
1856+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
1857+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
1858+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
1859+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
1860+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
1861+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
1862+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
1863+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
1864+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
1865+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
1866+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
1867+
1868+enum {
1869+ MTK_PPE_SCAN_MODE_DISABLED,
1870+ MTK_PPE_SCAN_MODE_CHECK_AGE,
1871+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
1872+};
1873+
1874+enum {
1875+ MTK_PPE_KEEPALIVE_DISABLE,
1876+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
1877+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
1878+};
1879+
1880+enum {
1881+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
1882+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
1883+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
1884+};
1885+
1886+#define MTK_PPE_TB_BASE 0x220
1887+
1888+#define MTK_PPE_TB_USED 0x224
1889+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
1890+
1891+#define MTK_PPE_BIND_RATE 0x228
1892+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
1893+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
1894+
1895+#define MTK_PPE_BIND_LIMIT0 0x22c
1896+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
1897+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
1898+
1899+#define MTK_PPE_BIND_LIMIT1 0x230
1900+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
1901+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
1902+
1903+#define MTK_PPE_KEEPALIVE 0x234
1904+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
1905+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
1906+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
1907+
1908+#define MTK_PPE_UNBIND_AGE 0x238
1909+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
1910+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
1911+
1912+#define MTK_PPE_BIND_AGE0 0x23c
1913+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
1914+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
1915+
1916+#define MTK_PPE_BIND_AGE1 0x240
1917+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
1918+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
1919+
1920+#define MTK_PPE_HASH_SEED 0x244
1921+
1922+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
1923+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
1924+
1925+#define MTK_PPE_MTU_DROP 0x308
1926+
1927+#define MTK_PPE_VLAN_MTU0 0x30c
1928+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
1929+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
1930+
1931+#define MTK_PPE_VLAN_MTU1 0x310
1932+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
1933+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
1934+
1935+#define MTK_PPE_VPM_TPID 0x318
1936+
1937+#define MTK_PPE_CACHE_CTL 0x320
1938+#define MTK_PPE_CACHE_CTL_EN BIT(0)
1939+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
1940+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
1941+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
1942+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
1943+
1944+#define MTK_PPE_MIB_CFG 0x334
1945+#define MTK_PPE_MIB_CFG_EN BIT(0)
1946+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
1947+
1948+#define MTK_PPE_MIB_TB_BASE 0x338
1949+
1950+#define MTK_PPE_MIB_CACHE_CTL 0x350
1951+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
1952+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
1953+
1954+#endif
1955diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
developeree39bcf2023-06-16 08:03:30 +08001956index a085213dc..813e30495 100644
developer8cb3ac72022-07-04 10:55:14 +08001957--- a/drivers/net/ppp/ppp_generic.c
1958+++ b/drivers/net/ppp/ppp_generic.c
1959@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
1960 ppp_destroy_interface(ppp);
1961 }
1962
1963+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
1964+ struct net_device_path *path)
1965+{
1966+ struct ppp *ppp = netdev_priv(ctx->dev);
1967+ struct ppp_channel *chan;
1968+ struct channel *pch;
1969+
1970+ if (ppp->flags & SC_MULTILINK)
1971+ return -EOPNOTSUPP;
1972+
1973+ if (list_empty(&ppp->channels))
1974+ return -ENODEV;
1975+
1976+ pch = list_first_entry(&ppp->channels, struct channel, clist);
1977+ chan = pch->chan;
1978+ if (!chan->ops->fill_forward_path)
1979+ return -EOPNOTSUPP;
1980+
1981+ return chan->ops->fill_forward_path(ctx, path, chan);
1982+}
1983+
1984 static const struct net_device_ops ppp_netdev_ops = {
1985 .ndo_init = ppp_dev_init,
1986 .ndo_uninit = ppp_dev_uninit,
1987 .ndo_start_xmit = ppp_start_xmit,
1988 .ndo_do_ioctl = ppp_net_ioctl,
1989 .ndo_get_stats64 = ppp_get_stats64,
1990+ .ndo_fill_forward_path = ppp_fill_forward_path,
1991 };
1992
1993 static struct device_type ppp_type = {
1994diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
developeree39bcf2023-06-16 08:03:30 +08001995index 087b01684..7a8c246ab 100644
developer8cb3ac72022-07-04 10:55:14 +08001996--- a/drivers/net/ppp/pppoe.c
1997+++ b/drivers/net/ppp/pppoe.c
1998@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
1999 return __pppoe_xmit(sk, skb);
2000 }
2001
2002+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
2003+ struct net_device_path *path,
2004+ const struct ppp_channel *chan)
2005+{
2006+ struct sock *sk = (struct sock *)chan->private;
2007+ struct pppox_sock *po = pppox_sk(sk);
2008+ struct net_device *dev = po->pppoe_dev;
2009+
2010+ if (sock_flag(sk, SOCK_DEAD) ||
2011+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2012+ return -1;
2013+
2014+ path->type = DEV_PATH_PPPOE;
2015+ path->encap.proto = htons(ETH_P_PPP_SES);
2016+ path->encap.id = be16_to_cpu(po->num);
2017+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2018+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
2019+ path->dev = ctx->dev;
2020+ ctx->dev = dev;
2021+
2022+ return 0;
2023+}
2024+
2025 static const struct ppp_channel_ops pppoe_chan_ops = {
2026 .start_xmit = pppoe_xmit,
2027+ .fill_forward_path = pppoe_fill_forward_path,
2028 };
2029
2030 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
2031diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
developeree39bcf2023-06-16 08:03:30 +08002032index 38af42bf8..9f64504ac 100644
developer8cb3ac72022-07-04 10:55:14 +08002033--- a/include/linux/netdevice.h
2034+++ b/include/linux/netdevice.h
developeree39bcf2023-06-16 08:03:30 +08002035@@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002036 struct sk_buff *skb,
2037 struct net_device *sb_dev);
2038
2039+enum net_device_path_type {
2040+ DEV_PATH_ETHERNET = 0,
2041+ DEV_PATH_VLAN,
2042+ DEV_PATH_BRIDGE,
2043+ DEV_PATH_PPPOE,
2044+ DEV_PATH_DSA,
2045+};
2046+
2047+struct net_device_path {
2048+ enum net_device_path_type type;
2049+ const struct net_device *dev;
2050+ union {
2051+ struct {
2052+ u16 id;
2053+ __be16 proto;
2054+ u8 h_dest[ETH_ALEN];
2055+ } encap;
2056+ struct {
2057+ enum {
2058+ DEV_PATH_BR_VLAN_KEEP,
2059+ DEV_PATH_BR_VLAN_TAG,
2060+ DEV_PATH_BR_VLAN_UNTAG,
2061+ DEV_PATH_BR_VLAN_UNTAG_HW,
2062+ } vlan_mode;
2063+ u16 vlan_id;
2064+ __be16 vlan_proto;
2065+ } bridge;
2066+ struct {
2067+ int port;
2068+ u16 proto;
2069+ } dsa;
2070+ };
2071+};
2072+
2073+#define NET_DEVICE_PATH_STACK_MAX 5
2074+#define NET_DEVICE_PATH_VLAN_MAX 2
2075+
2076+struct net_device_path_stack {
2077+ int num_paths;
2078+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
2079+};
2080+
2081+struct net_device_path_ctx {
2082+ const struct net_device *dev;
2083+ u8 daddr[ETH_ALEN];
2084+
2085+ int num_vlans;
2086+ struct {
2087+ u16 id;
2088+ __be16 proto;
2089+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
2090+};
2091+
2092 enum tc_setup_type {
2093 TC_SETUP_QDISC_MQPRIO,
2094 TC_SETUP_CLSU32,
developeree39bcf2023-06-16 08:03:30 +08002095@@ -844,6 +897,7 @@ enum tc_setup_type {
developer8cb3ac72022-07-04 10:55:14 +08002096 TC_SETUP_ROOT_QDISC,
2097 TC_SETUP_QDISC_GRED,
2098 TC_SETUP_QDISC_TAPRIO,
2099+ TC_SETUP_FT,
2100 };
2101
2102 /* These structures hold the attributes of bpf state that are being passed
developeree39bcf2023-06-16 08:03:30 +08002103@@ -1239,6 +1293,8 @@ struct tlsdev_ops;
developer8cb3ac72022-07-04 10:55:14 +08002104 * Get devlink port instance associated with a given netdev.
2105 * Called with a reference on the netdevice and devlink locks only,
2106 * rtnl_lock is not held.
2107+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
2108+ * Get the forwarding path to reach the real device from the HW destination address
2109 */
2110 struct net_device_ops {
2111 int (*ndo_init)(struct net_device *dev);
developeree39bcf2023-06-16 08:03:30 +08002112@@ -1436,6 +1492,8 @@ struct net_device_ops {
developer8cb3ac72022-07-04 10:55:14 +08002113 int (*ndo_xsk_wakeup)(struct net_device *dev,
2114 u32 queue_id, u32 flags);
2115 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
2116+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
2117+ struct net_device_path *path);
2118 };
2119
2120 /**
developeree39bcf2023-06-16 08:03:30 +08002121@@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po);
developer8cb3ac72022-07-04 10:55:14 +08002122
2123 int dev_get_iflink(const struct net_device *dev);
2124 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
2125+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2126+ struct net_device_path_stack *stack);
2127 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
2128 unsigned short mask);
2129 struct net_device *dev_get_by_name(struct net *net, const char *name);
2130diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
developeree39bcf2023-06-16 08:03:30 +08002131index 98966064e..91f9a9283 100644
developer8cb3ac72022-07-04 10:55:14 +08002132--- a/include/linux/ppp_channel.h
2133+++ b/include/linux/ppp_channel.h
2134@@ -28,6 +28,9 @@ struct ppp_channel_ops {
2135 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
2136 /* Handle an ioctl call that has come in via /dev/ppp. */
2137 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
2138+ int (*fill_forward_path)(struct net_device_path_ctx *,
2139+ struct net_device_path *,
2140+ const struct ppp_channel *);
2141 };
2142
2143 struct ppp_channel {
2144diff --git a/include/net/dsa.h b/include/net/dsa.h
developeree39bcf2023-06-16 08:03:30 +08002145index 05f66d487..cafc74218 100644
developer8cb3ac72022-07-04 10:55:14 +08002146--- a/include/net/dsa.h
2147+++ b/include/net/dsa.h
developeree39bcf2023-06-16 08:03:30 +08002148@@ -561,6 +561,8 @@ struct dsa_switch_ops {
developer8cb3ac72022-07-04 10:55:14 +08002149 struct sk_buff *skb);
2150 };
2151
2152+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
2153+
2154 struct dsa_switch_driver {
2155 struct list_head list;
2156 const struct dsa_switch_ops *ops;
developeree39bcf2023-06-16 08:03:30 +08002157@@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002158 #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
2159 #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
2160
2161+#if IS_ENABLED(CONFIG_NET_DSA)
2162+bool dsa_slave_dev_check(const struct net_device *dev);
2163+#else
2164+static inline bool dsa_slave_dev_check(const struct net_device *dev)
2165+{
2166+ return false;
2167+}
2168+#endif
2169
2170 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
2171 int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
2172diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
developeree39bcf2023-06-16 08:03:30 +08002173index c6f7bd22d..59b873653 100644
developer8cb3ac72022-07-04 10:55:14 +08002174--- a/include/net/flow_offload.h
2175+++ b/include/net/flow_offload.h
2176@@ -138,6 +138,7 @@ enum flow_action_id {
2177 FLOW_ACTION_MPLS_PUSH,
2178 FLOW_ACTION_MPLS_POP,
2179 FLOW_ACTION_MPLS_MANGLE,
2180+ FLOW_ACTION_PPPOE_PUSH,
2181 NUM_FLOW_ACTIONS,
2182 };
2183
2184@@ -213,6 +214,9 @@ struct flow_action_entry {
2185 u8 bos;
2186 u8 ttl;
2187 } mpls_mangle;
2188+ struct { /* FLOW_ACTION_PPPOE_PUSH */
2189+ u16 sid;
2190+ } pppoe;
2191 };
2192 };
2193
2194diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
developeree39bcf2023-06-16 08:03:30 +08002195index 2c739fc75..89ab8f180 100644
developer8cb3ac72022-07-04 10:55:14 +08002196--- a/include/net/ip6_route.h
2197+++ b/include/net/ip6_route.h
2198@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
2199 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
2200 }
2201
2202-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
2203+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
2204+ bool forwarding)
2205 {
2206 struct inet6_dev *idev;
2207 unsigned int mtu;
2208
2209- if (dst_metric_locked(dst, RTAX_MTU)) {
2210+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
2211 mtu = dst_metric_raw(dst, RTAX_MTU);
2212 if (mtu)
2213 goto out;
2214diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
developeree39bcf2023-06-16 08:03:30 +08002215index 7b3c873f8..e95483192 100644
developer8cb3ac72022-07-04 10:55:14 +08002216--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2217+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2218@@ -4,7 +4,4 @@
2219
2220 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
2221
2222-#include <linux/sysctl.h>
2223-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
2224-
2225 #endif /* _NF_CONNTRACK_IPV6_H*/
2226diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
developeree39bcf2023-06-16 08:03:30 +08002227index 90690e37a..ce0bc3e62 100644
developer8cb3ac72022-07-04 10:55:14 +08002228--- a/include/net/netfilter/nf_conntrack.h
2229+++ b/include/net/netfilter/nf_conntrack.h
2230@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
2231 !nf_ct_is_dying(ct);
2232 }
2233
2234+#define NF_CT_DAY (86400 * HZ)
2235+
2236+/* Set an arbitrary timeout large enough not to ever expire, this save
2237+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
2238+ * nf_ct_is_expired().
2239+ */
2240+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
2241+{
2242+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
2243+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
2244+}
2245+
2246 struct kernel_param;
2247
2248 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
2249diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
developeree39bcf2023-06-16 08:03:30 +08002250index f7a060c6e..7f44a7715 100644
developer8cb3ac72022-07-04 10:55:14 +08002251--- a/include/net/netfilter/nf_conntrack_acct.h
2252+++ b/include/net/netfilter/nf_conntrack_acct.h
2253@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
2254 #endif
2255 }
2256
2257+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
2258+ unsigned int bytes);
2259+
2260+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
2261+ unsigned int bytes)
2262+{
2263+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
2264+ nf_ct_acct_add(ct, dir, 1, bytes);
2265+#endif
2266+}
2267+
2268 void nf_conntrack_acct_pernet_init(struct net *net);
2269
2270 int nf_conntrack_acct_init(void);
2271diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
developeree39bcf2023-06-16 08:03:30 +08002272index 68d7fc92..7cf89767 100644
developer8cb3ac72022-07-04 10:55:14 +08002273--- a/include/net/netfilter/nf_flow_table.h
2274+++ b/include/net/netfilter/nf_flow_table.h
2275@@ -8,31 +8,99 @@
2276 #include <linux/rcupdate.h>
2277 #include <linux/netfilter.h>
2278 #include <linux/netfilter/nf_conntrack_tuple_common.h>
2279+#include <net/flow_offload.h>
2280 #include <net/dst.h>
2281+#include <linux/if_pppox.h>
2282+#include <linux/ppp_defs.h>
2283
2284 struct nf_flowtable;
2285+struct nf_flow_rule;
2286+struct flow_offload;
2287+enum flow_offload_tuple_dir;
2288+
2289+struct nf_flow_key {
2290+ struct flow_dissector_key_meta meta;
2291+ struct flow_dissector_key_control control;
2292+ struct flow_dissector_key_control enc_control;
2293+ struct flow_dissector_key_basic basic;
2294+ struct flow_dissector_key_vlan vlan;
2295+ struct flow_dissector_key_vlan cvlan;
2296+ union {
2297+ struct flow_dissector_key_ipv4_addrs ipv4;
2298+ struct flow_dissector_key_ipv6_addrs ipv6;
2299+ };
2300+ struct flow_dissector_key_keyid enc_key_id;
2301+ union {
2302+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
2303+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
2304+ };
2305+ struct flow_dissector_key_tcp tcp;
2306+ struct flow_dissector_key_ports tp;
2307+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
2308+
2309+struct nf_flow_match {
2310+ struct flow_dissector dissector;
2311+ struct nf_flow_key key;
2312+ struct nf_flow_key mask;
2313+};
2314+
2315+struct nf_flow_rule {
2316+ struct nf_flow_match match;
2317+ struct flow_rule *rule;
2318+};
2319
2320 struct nf_flowtable_type {
2321 struct list_head list;
2322 int family;
2323 int (*init)(struct nf_flowtable *ft);
2324+ int (*setup)(struct nf_flowtable *ft,
2325+ struct net_device *dev,
2326+ enum flow_block_command cmd);
2327+ int (*action)(struct net *net,
2328+ const struct flow_offload *flow,
2329+ enum flow_offload_tuple_dir dir,
2330+ struct nf_flow_rule *flow_rule);
2331 void (*free)(struct nf_flowtable *ft);
2332 nf_hookfn *hook;
2333 struct module *owner;
2334 };
2335
2336+enum nf_flowtable_flags {
2337+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
2338+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
2339+};
2340+
2341 struct nf_flowtable {
2342 struct list_head list;
2343 struct rhashtable rhashtable;
2344+ int priority;
2345 const struct nf_flowtable_type *type;
2346 struct delayed_work gc_work;
2347+ unsigned int flags;
2348+ struct flow_block flow_block;
2349+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
2350+ possible_net_t net;
2351 };
2352
2353+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
2354+{
2355+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
2356+}
2357+
2358 enum flow_offload_tuple_dir {
2359 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
2360 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
2361- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
2362 };
2363+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
2364+
2365+enum flow_offload_xmit_type {
2366+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
2367+ FLOW_OFFLOAD_XMIT_NEIGH,
2368+ FLOW_OFFLOAD_XMIT_XFRM,
2369+ FLOW_OFFLOAD_XMIT_DIRECT,
2370+};
2371+
2372+#define NF_FLOW_TABLE_ENCAP_MAX 2
2373
2374 struct flow_offload_tuple {
2375 union {
developerb7c46752022-07-04 19:51:38 +08002376@@ -52,13 +120,30 @@ struct flow_offload_tuple {
developer8cb3ac72022-07-04 10:55:14 +08002377
2378 u8 l3proto;
2379 u8 l4proto;
2380- u8 dir;
2381+ struct {
2382+ u16 id;
2383+ __be16 proto;
2384+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2385
2386- u16 mtu;
2387+ /* All members above are keys for lookups, see flow_offload_hash(). */
2388+ struct { } __hash;
2389
developerb7c46752022-07-04 19:51:38 +08002390- struct {
2391- struct dst_entry *dst_cache;
2392- u32 dst_cookie;
developer8cb3ac72022-07-04 10:55:14 +08002393+ u8 dir:2,
2394+ xmit_type:2,
2395+ encap_num:2,
2396+ in_vlan_ingress:2;
2397+ u16 mtu;
2398+ union {
2399+ struct {
2400+ struct dst_entry *dst_cache;
2401+ u32 dst_cookie;
2402+ };
2403+ struct {
2404+ u32 ifidx;
2405+ u32 hw_ifidx;
2406+ u8 h_source[ETH_ALEN];
2407+ u8 h_dest[ETH_ALEN];
2408+ } out;
developerb7c46752022-07-04 19:51:38 +08002409 };
developer8cb3ac72022-07-04 10:55:14 +08002410 };
2411
developeree39bcf2023-06-16 08:03:30 +08002412@@ -67,52 +152,140 @@ struct flow_offload_tuple_rhash {
developer8cb3ac72022-07-04 10:55:14 +08002413 struct flow_offload_tuple tuple;
2414 };
2415
2416-#define FLOW_OFFLOAD_SNAT 0x1
2417-#define FLOW_OFFLOAD_DNAT 0x2
2418-#define FLOW_OFFLOAD_DYING 0x4
2419-#define FLOW_OFFLOAD_TEARDOWN 0x8
2420+enum nf_flow_flags {
2421+ NF_FLOW_SNAT,
2422+ NF_FLOW_DNAT,
2423+ NF_FLOW_TEARDOWN,
2424+ NF_FLOW_HW,
developeree39bcf2023-06-16 08:03:30 +08002425+ NF_FLOW_HW_ACCT_DYING,
developer8cb3ac72022-07-04 10:55:14 +08002426+ NF_FLOW_HW_DYING,
2427+ NF_FLOW_HW_DEAD,
2428+ NF_FLOW_HW_PENDING,
2429+};
2430+
2431+enum flow_offload_type {
2432+ NF_FLOW_OFFLOAD_UNSPEC = 0,
2433+ NF_FLOW_OFFLOAD_ROUTE,
2434+};
2435
2436 struct flow_offload {
2437 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
2438- u32 flags;
2439- union {
2440- /* Your private driver data here. */
2441- u32 timeout;
2442- };
2443+ struct nf_conn *ct;
2444+ unsigned long flags;
2445+ u16 type;
2446+ u32 timeout;
2447+ struct rcu_head rcu_head;
2448 };
2449
2450 #define NF_FLOW_TIMEOUT (30 * HZ)
2451+#define nf_flowtable_time_stamp (u32)jiffies
2452+
2453+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
2454+
2455+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
2456+{
2457+ return (__s32)(timeout - nf_flowtable_time_stamp);
2458+}
2459
2460 struct nf_flow_route {
2461 struct {
2462- struct dst_entry *dst;
2463+ struct dst_entry *dst;
2464+ struct {
2465+ u32 ifindex;
2466+ struct {
2467+ u16 id;
2468+ __be16 proto;
2469+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2470+ u8 num_encaps:2,
2471+ ingress_vlans:2;
2472+ } in;
2473+ struct {
2474+ u32 ifindex;
2475+ u32 hw_ifindex;
2476+ u8 h_source[ETH_ALEN];
2477+ u8 h_dest[ETH_ALEN];
2478+ } out;
2479+ enum flow_offload_xmit_type xmit_type;
2480 } tuple[FLOW_OFFLOAD_DIR_MAX];
2481 };
2482
2483-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
2484- struct nf_flow_route *route);
2485+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
2486 void flow_offload_free(struct flow_offload *flow);
2487
2488+static inline int
2489+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
2490+ flow_setup_cb_t *cb, void *cb_priv)
2491+{
2492+ struct flow_block *block = &flow_table->flow_block;
2493+ struct flow_block_cb *block_cb;
2494+ int err = 0;
2495+
2496+ down_write(&flow_table->flow_block_lock);
2497+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2498+ if (block_cb) {
2499+ err = -EEXIST;
2500+ goto unlock;
2501+ }
2502+
2503+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
2504+ if (IS_ERR(block_cb)) {
2505+ err = PTR_ERR(block_cb);
2506+ goto unlock;
2507+ }
2508+
2509+ list_add_tail(&block_cb->list, &block->cb_list);
2510+
2511+unlock:
2512+ up_write(&flow_table->flow_block_lock);
2513+ return err;
2514+}
2515+
2516+static inline void
2517+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
2518+ flow_setup_cb_t *cb, void *cb_priv)
2519+{
2520+ struct flow_block *block = &flow_table->flow_block;
2521+ struct flow_block_cb *block_cb;
2522+
2523+ down_write(&flow_table->flow_block_lock);
2524+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2525+ if (block_cb) {
2526+ list_del(&block_cb->list);
2527+ flow_block_cb_free(block_cb);
2528+ } else {
2529+ WARN_ON(true);
2530+ }
2531+ up_write(&flow_table->flow_block_lock);
2532+}
2533+
2534+int flow_offload_route_init(struct flow_offload *flow,
2535+ const struct nf_flow_route *route);
2536+
2537 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
2538+void flow_offload_refresh(struct nf_flowtable *flow_table,
2539+ struct flow_offload *flow);
2540+
2541 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
2542 struct flow_offload_tuple *tuple);
2543+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
2544+ struct net_device *dev);
2545 void nf_flow_table_cleanup(struct net_device *dev);
2546
2547 int nf_flow_table_init(struct nf_flowtable *flow_table);
2548 void nf_flow_table_free(struct nf_flowtable *flow_table);
2549
2550 void flow_offload_teardown(struct flow_offload *flow);
2551-static inline void flow_offload_dead(struct flow_offload *flow)
2552-{
2553- flow->flags |= FLOW_OFFLOAD_DYING;
2554-}
2555
2556-int nf_flow_snat_port(const struct flow_offload *flow,
2557- struct sk_buff *skb, unsigned int thoff,
2558- u8 protocol, enum flow_offload_tuple_dir dir);
2559-int nf_flow_dnat_port(const struct flow_offload *flow,
2560- struct sk_buff *skb, unsigned int thoff,
2561- u8 protocol, enum flow_offload_tuple_dir dir);
2562+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
2563+ void (*iter)(struct flow_offload *flow, void *data),
2564+ void *data);
2565+
2566+void nf_flow_snat_port(const struct flow_offload *flow,
2567+ struct sk_buff *skb, unsigned int thoff,
2568+ u8 protocol, enum flow_offload_tuple_dir dir);
2569+void nf_flow_dnat_port(const struct flow_offload *flow,
2570+ struct sk_buff *skb, unsigned int thoff,
2571+ u8 protocol, enum flow_offload_tuple_dir dir);
2572
2573 struct flow_ports {
2574 __be16 source, dest;
developerb7c46752022-07-04 19:51:38 +08002575@@ -126,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08002576 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
2577 MODULE_ALIAS("nf-flowtable-" __stringify(family))
2578
2579+void nf_flow_offload_add(struct nf_flowtable *flowtable,
2580+ struct flow_offload *flow);
2581+void nf_flow_offload_del(struct nf_flowtable *flowtable,
2582+ struct flow_offload *flow);
2583+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08002584+ struct flow_offload *flow, bool force);
developer8cb3ac72022-07-04 10:55:14 +08002585+
2586+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
2587+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
2588+ struct net_device *dev,
2589+ enum flow_block_command cmd);
2590+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
2591+ enum flow_offload_tuple_dir dir,
2592+ struct nf_flow_rule *flow_rule);
2593+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
2594+ enum flow_offload_tuple_dir dir,
2595+ struct nf_flow_rule *flow_rule);
2596+
2597+int nf_flow_table_offload_init(void);
2598+void nf_flow_table_offload_exit(void);
2599+
2600+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
2601+{
2602+ __be16 proto;
2603+
2604+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
2605+ sizeof(struct pppoe_hdr)));
2606+ switch (proto) {
2607+ case htons(PPP_IP):
2608+ return htons(ETH_P_IP);
2609+ case htons(PPP_IPV6):
2610+ return htons(ETH_P_IPV6);
2611+ }
2612+
2613+ return 0;
2614+}
2615+
2616 #endif /* _NF_FLOW_TABLE_H */
2617diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
developeree39bcf2023-06-16 08:03:30 +08002618index 806454e76..9e3963c8f 100644
developer8cb3ac72022-07-04 10:55:14 +08002619--- a/include/net/netns/conntrack.h
2620+++ b/include/net/netns/conntrack.h
2621@@ -27,6 +27,9 @@ struct nf_tcp_net {
2622 int tcp_loose;
2623 int tcp_be_liberal;
2624 int tcp_max_retrans;
2625+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2626+ unsigned int offload_timeout;
2627+#endif
2628 };
2629
2630 enum udp_conntrack {
2631@@ -37,6 +40,9 @@ enum udp_conntrack {
2632
2633 struct nf_udp_net {
2634 unsigned int timeouts[UDP_CT_MAX];
2635+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2636+ unsigned int offload_timeout;
2637+#endif
2638 };
2639
2640 struct nf_icmp_net {
2641diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
developeree39bcf2023-06-16 08:03:30 +08002642index 336014bf8..ae698d11c 100644
developer8cb3ac72022-07-04 10:55:14 +08002643--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
2644+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
2645@@ -105,14 +105,19 @@ enum ip_conntrack_status {
2646 IPS_OFFLOAD_BIT = 14,
2647 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
2648
2649+ /* Conntrack has been offloaded to hardware. */
2650+ IPS_HW_OFFLOAD_BIT = 15,
2651+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
2652+
2653 /* Be careful here, modifying these bits can make things messy,
2654 * so don't let users modify them directly.
2655 */
2656 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
2657 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
2658- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
2659+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
2660+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
2661
2662- __IPS_MAX_BIT = 15,
2663+ __IPS_MAX_BIT = 16,
2664 };
2665
2666 /* Connection tracking event types */
2667diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2668new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08002669index 000000000..5841bbe0e
developer8cb3ac72022-07-04 10:55:14 +08002670--- /dev/null
2671+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2672@@ -0,0 +1,17 @@
2673+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2674+#ifndef _XT_FLOWOFFLOAD_H
2675+#define _XT_FLOWOFFLOAD_H
2676+
2677+#include <linux/types.h>
2678+
2679+enum {
2680+ XT_FLOWOFFLOAD_HW = 1 << 0,
2681+
2682+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
2683+};
2684+
2685+struct xt_flowoffload_target_info {
2686+ __u32 flags;
2687+};
2688+
2689+#endif /* _XT_FLOWOFFLOAD_H */
2690diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
developeree39bcf2023-06-16 08:03:30 +08002691index 589615ec4..444ab5fae 100644
developer8cb3ac72022-07-04 10:55:14 +08002692--- a/net/8021q/vlan_dev.c
2693+++ b/net/8021q/vlan_dev.c
2694@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
2695 return real_dev->ifindex;
2696 }
2697
2698+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
2699+ struct net_device_path *path)
2700+{
2701+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
2702+
2703+ path->type = DEV_PATH_VLAN;
2704+ path->encap.id = vlan->vlan_id;
2705+ path->encap.proto = vlan->vlan_proto;
2706+ path->dev = ctx->dev;
2707+ ctx->dev = vlan->real_dev;
2708+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2709+ return -ENOSPC;
2710+
2711+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
2712+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
2713+ ctx->num_vlans++;
2714+
2715+ return 0;
2716+}
2717+
2718 static const struct ethtool_ops vlan_ethtool_ops = {
2719 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
2720 .get_drvinfo = vlan_ethtool_get_drvinfo,
2721@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
2722 #endif
2723 .ndo_fix_features = vlan_dev_fix_features,
2724 .ndo_get_iflink = vlan_dev_get_iflink,
2725+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
2726 };
2727
2728 static void vlan_dev_free(struct net_device *dev)
2729diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
developeree39bcf2023-06-16 08:03:30 +08002730index 501f77f0f..0940b44cd 100644
developer8cb3ac72022-07-04 10:55:14 +08002731--- a/net/bridge/br_device.c
2732+++ b/net/bridge/br_device.c
2733@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
2734 return br_del_if(br, slave_dev);
2735 }
2736
2737+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
2738+ struct net_device_path *path)
2739+{
2740+ struct net_bridge_fdb_entry *f;
2741+ struct net_bridge_port *dst;
2742+ struct net_bridge *br;
2743+
2744+ if (netif_is_bridge_port(ctx->dev))
2745+ return -1;
2746+
2747+ br = netdev_priv(ctx->dev);
2748+
2749+ br_vlan_fill_forward_path_pvid(br, ctx, path);
2750+
2751+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
2752+ if (!f || !f->dst)
2753+ return -1;
2754+
2755+ dst = READ_ONCE(f->dst);
2756+ if (!dst)
2757+ return -1;
2758+
2759+ if (br_vlan_fill_forward_path_mode(br, dst, path))
2760+ return -1;
2761+
2762+ path->type = DEV_PATH_BRIDGE;
2763+ path->dev = dst->br->dev;
2764+ ctx->dev = dst->dev;
2765+
2766+ switch (path->bridge.vlan_mode) {
2767+ case DEV_PATH_BR_VLAN_TAG:
2768+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2769+ return -ENOSPC;
2770+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
2771+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
2772+ ctx->num_vlans++;
2773+ break;
2774+ case DEV_PATH_BR_VLAN_UNTAG_HW:
2775+ case DEV_PATH_BR_VLAN_UNTAG:
2776+ ctx->num_vlans--;
2777+ break;
2778+ case DEV_PATH_BR_VLAN_KEEP:
2779+ break;
2780+ }
2781+
2782+ return 0;
2783+}
2784+
2785 static const struct ethtool_ops br_ethtool_ops = {
2786 .get_drvinfo = br_getinfo,
2787 .get_link = ethtool_op_get_link,
2788@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
2789 .ndo_bridge_setlink = br_setlink,
2790 .ndo_bridge_dellink = br_dellink,
2791 .ndo_features_check = passthru_features_check,
2792+ .ndo_fill_forward_path = br_fill_forward_path,
2793 };
2794
2795 static struct device_type br_type = {
2796diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
developeree39bcf2023-06-16 08:03:30 +08002797index a736be8a1..4bd9e9b57 100644
developer8cb3ac72022-07-04 10:55:14 +08002798--- a/net/bridge/br_private.h
2799+++ b/net/bridge/br_private.h
2800@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
2801 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
2802 void *ptr);
2803
2804+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2805+ struct net_device_path_ctx *ctx,
2806+ struct net_device_path *path);
2807+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2808+ struct net_bridge_port *dst,
2809+ struct net_device_path *path);
2810+
2811 static inline struct net_bridge_vlan_group *br_vlan_group(
2812 const struct net_bridge *br)
2813 {
2814@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
2815 return 0;
2816 }
2817
2818+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2819+ struct net_device_path_ctx *ctx,
2820+ struct net_device_path *path)
2821+{
2822+}
2823+
2824+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2825+ struct net_bridge_port *dst,
2826+ struct net_device_path *path)
2827+{
2828+ return 0;
2829+}
2830+
2831 static inline struct net_bridge_vlan_group *br_vlan_group(
2832 const struct net_bridge *br)
2833 {
2834diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
developeree39bcf2023-06-16 08:03:30 +08002835index 9257292bd..bcfd16924 100644
developer8cb3ac72022-07-04 10:55:14 +08002836--- a/net/bridge/br_vlan.c
2837+++ b/net/bridge/br_vlan.c
2838@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
2839 }
2840 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
2841
2842+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2843+ struct net_device_path_ctx *ctx,
2844+ struct net_device_path *path)
2845+{
2846+ struct net_bridge_vlan_group *vg;
2847+ int idx = ctx->num_vlans - 1;
2848+ u16 vid;
2849+
2850+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2851+
2852+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2853+ return;
2854+
2855+ vg = br_vlan_group(br);
2856+
2857+ if (idx >= 0 &&
2858+ ctx->vlan[idx].proto == br->vlan_proto) {
2859+ vid = ctx->vlan[idx].id;
2860+ } else {
2861+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
2862+ vid = br_get_pvid(vg);
2863+ }
2864+
2865+ path->bridge.vlan_id = vid;
2866+ path->bridge.vlan_proto = br->vlan_proto;
2867+}
2868+
2869+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2870+ struct net_bridge_port *dst,
2871+ struct net_device_path *path)
2872+{
2873+ struct net_bridge_vlan_group *vg;
2874+ struct net_bridge_vlan *v;
2875+
2876+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2877+ return 0;
2878+
2879+ vg = nbp_vlan_group_rcu(dst);
2880+ v = br_vlan_find(vg, path->bridge.vlan_id);
2881+ if (!v || !br_vlan_should_use(v))
2882+ return -EINVAL;
2883+
2884+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
2885+ return 0;
2886+
2887+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
2888+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2889+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
2890+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
2891+ else
2892+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
2893+
2894+ return 0;
2895+}
2896+
2897 int br_vlan_get_info(const struct net_device *dev, u16 vid,
2898 struct bridge_vlan_info *p_vinfo)
2899 {
2900diff --git a/net/core/dev.c b/net/core/dev.c
developeree39bcf2023-06-16 08:03:30 +08002901index fe2c856b9..4f0edb218 100644
developer8cb3ac72022-07-04 10:55:14 +08002902--- a/net/core/dev.c
2903+++ b/net/core/dev.c
2904@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2905 }
2906 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
2907
2908+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
2909+{
2910+ int k = stack->num_paths++;
2911+
2912+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
2913+ return NULL;
2914+
2915+ return &stack->path[k];
2916+}
2917+
2918+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2919+ struct net_device_path_stack *stack)
2920+{
2921+ const struct net_device *last_dev;
2922+ struct net_device_path_ctx ctx = {
2923+ .dev = dev,
2924+ };
2925+ struct net_device_path *path;
2926+ int ret = 0;
2927+
2928+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
2929+ stack->num_paths = 0;
2930+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
2931+ last_dev = ctx.dev;
2932+ path = dev_fwd_path(stack);
2933+ if (!path)
2934+ return -1;
2935+
2936+ memset(path, 0, sizeof(struct net_device_path));
2937+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
2938+ if (ret < 0)
2939+ return -1;
2940+
2941+ if (WARN_ON_ONCE(last_dev == ctx.dev))
2942+ return -1;
2943+ }
2944+ path = dev_fwd_path(stack);
2945+ if (!path)
2946+ return -1;
2947+ path->type = DEV_PATH_ETHERNET;
2948+ path->dev = ctx.dev;
2949+
2950+ return ret;
2951+}
2952+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
2953+
2954 /**
2955 * __dev_get_by_name - find a device by its name
2956 * @net: the applicable net namespace
2957diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
developeree39bcf2023-06-16 08:03:30 +08002958index ca80f8699..35a1249a9 100644
developer8cb3ac72022-07-04 10:55:14 +08002959--- a/net/dsa/dsa.c
2960+++ b/net/dsa/dsa.c
2961@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2962 }
2963 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
2964
2965+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
2966+{
2967+ if (!netdev || !dsa_slave_dev_check(netdev))
2968+ return ERR_PTR(-ENODEV);
2969+
2970+ return dsa_slave_to_port(netdev);
2971+}
2972+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
2973+
2974 static int __init dsa_init_module(void)
2975 {
2976 int rc;
2977diff --git a/net/dsa/slave.c b/net/dsa/slave.c
developeree39bcf2023-06-16 08:03:30 +08002978index 036fda317..2dfaa1eac 100644
developer8cb3ac72022-07-04 10:55:14 +08002979--- a/net/dsa/slave.c
2980+++ b/net/dsa/slave.c
developeree39bcf2023-06-16 08:03:30 +08002981@@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08002982 }
2983 }
2984
2985+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
2986+ void *type_data)
2987+{
2988+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
2989+ struct net_device *master = cpu_dp->master;
2990+
2991+ if (!master->netdev_ops->ndo_setup_tc)
2992+ return -EOPNOTSUPP;
2993+
2994+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
2995+}
2996+
2997 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
2998 void *type_data)
2999 {
3000 struct dsa_port *dp = dsa_slave_to_port(dev);
3001 struct dsa_switch *ds = dp->ds;
3002
3003- if (type == TC_SETUP_BLOCK)
3004+ switch (type) {
3005+ case TC_SETUP_BLOCK:
3006 return dsa_slave_setup_tc_block(dev, type_data);
3007+ case TC_SETUP_FT:
3008+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
3009+ default:
3010+ break;
3011+ }
3012
3013 if (!ds->ops->port_setup_tc)
3014 return -EOPNOTSUPP;
developeree39bcf2023-06-16 08:03:30 +08003015@@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003016 return dp->ds->devlink ? &dp->devlink_port : NULL;
3017 }
3018
3019+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
3020+ struct net_device_path *path)
3021+{
3022+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
3023+ struct dsa_port *cpu_dp = dp->cpu_dp;
3024+
3025+ path->dev = ctx->dev;
3026+ path->type = DEV_PATH_DSA;
3027+ path->dsa.proto = cpu_dp->tag_ops->proto;
3028+ path->dsa.port = dp->index;
3029+ ctx->dev = cpu_dp->master;
3030+
3031+ return 0;
3032+}
3033+
3034 static const struct net_device_ops dsa_slave_netdev_ops = {
3035 .ndo_open = dsa_slave_open,
3036 .ndo_stop = dsa_slave_close,
developeree39bcf2023-06-16 08:03:30 +08003037@@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
developer8cb3ac72022-07-04 10:55:14 +08003038 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
3039 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
3040 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
3041+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
3042 };
3043
3044 static struct device_type dsa_type = {
developeree39bcf2023-06-16 08:03:30 +08003045@@ -1497,7 +1529,8 @@ void dsa_slave_destroy(struct net_device *slave_dev)
3046 bool dsa_slave_dev_check(const struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003047 {
3048 return dev->netdev_ops == &dsa_slave_netdev_ops;
3049 }
3050+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
3051
3052 static int dsa_slave_changeupper(struct net_device *dev,
3053 struct netdev_notifier_changeupper_info *info)
3054diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
developeree39bcf2023-06-16 08:03:30 +08003055index f17b40211..803b92e4c 100644
developer8cb3ac72022-07-04 10:55:14 +08003056--- a/net/ipv4/netfilter/Kconfig
3057+++ b/net/ipv4/netfilter/Kconfig
3058@@ -56,8 +56,6 @@ config NF_TABLES_ARP
3059 help
3060 This option enables the ARP support for nf_tables.
3061
3062-endif # NF_TABLES
3063-
3064 config NF_FLOW_TABLE_IPV4
3065 tristate "Netfilter flow table IPv4 module"
3066 depends on NF_FLOW_TABLE
3067@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
3068
3069 To compile it as a module, choose M here.
3070
3071+endif # NF_TABLES
3072+
3073 config NF_DUP_IPV4
3074 tristate "Netfilter IPv4 packet duplication to alternate destination"
3075 depends on !NF_CONNTRACK || NF_CONNTRACK
3076diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
developeree39bcf2023-06-16 08:03:30 +08003077index 5585e3a94..bb76f6061 100644
developer8cb3ac72022-07-04 10:55:14 +08003078--- a/net/ipv6/ip6_output.c
3079+++ b/net/ipv6/ip6_output.c
3080@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
3081 }
3082 }
3083
3084- mtu = ip6_dst_mtu_forward(dst);
3085+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
3086 if (mtu < IPV6_MIN_MTU)
3087 mtu = IPV6_MIN_MTU;
3088
3089diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
developeree39bcf2023-06-16 08:03:30 +08003090index 69443e9a3..0b481d236 100644
developer8cb3ac72022-07-04 10:55:14 +08003091--- a/net/ipv6/netfilter/Kconfig
3092+++ b/net/ipv6/netfilter/Kconfig
3093@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
3094 multicast or blackhole.
3095
3096 endif # NF_TABLES_IPV6
3097-endif # NF_TABLES
3098
3099 config NF_FLOW_TABLE_IPV6
3100 tristate "Netfilter flow table IPv6 module"
3101@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
3102
3103 To compile it as a module, choose M here.
3104
3105+endif # NF_TABLES
3106+
3107 config NF_DUP_IPV6
3108 tristate "Netfilter IPv6 packet duplication to alternate destination"
3109 depends on !NF_CONNTRACK || NF_CONNTRACK
3110diff --git a/net/ipv6/route.c b/net/ipv6/route.c
developeree39bcf2023-06-16 08:03:30 +08003111index 98aaf0b79..2b357ac71 100644
developer8cb3ac72022-07-04 10:55:14 +08003112--- a/net/ipv6/route.c
3113+++ b/net/ipv6/route.c
3114@@ -83,7 +83,7 @@ enum rt6_nud_state {
3115
3116 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
3117 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
3118-static unsigned int ip6_mtu(const struct dst_entry *dst);
3119+static unsigned int ip6_mtu(const struct dst_entry *dst);
3120 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
3121 static void ip6_dst_destroy(struct dst_entry *);
3122 static void ip6_dst_ifdown(struct dst_entry *,
3123@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3124
3125 static unsigned int ip6_mtu(const struct dst_entry *dst)
3126 {
3127- struct inet6_dev *idev;
3128- unsigned int mtu;
3129-
3130- mtu = dst_metric_raw(dst, RTAX_MTU);
3131- if (mtu)
3132- goto out;
3133-
3134- mtu = IPV6_MIN_MTU;
3135-
3136- rcu_read_lock();
3137- idev = __in6_dev_get(dst->dev);
3138- if (idev)
3139- mtu = idev->cnf.mtu6;
3140- rcu_read_unlock();
3141-
3142-out:
3143- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3144-
3145- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3146+ return ip6_dst_mtu_maybe_forward(dst, false);
3147 }
3148
3149 /* MTU selection:
3150diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
developeree39bcf2023-06-16 08:03:30 +08003151index b967763f5..c040e713a 100644
developer8cb3ac72022-07-04 10:55:14 +08003152--- a/net/netfilter/Kconfig
3153+++ b/net/netfilter/Kconfig
developeree39bcf2023-06-16 08:03:30 +08003154@@ -690,8 +690,6 @@ config NFT_FIB_NETDEV
developer8cb3ac72022-07-04 10:55:14 +08003155
3156 endif # NF_TABLES_NETDEV
3157
3158-endif # NF_TABLES
3159-
3160 config NF_FLOW_TABLE_INET
3161 tristate "Netfilter flow table mixed IPv4/IPv6 module"
3162 depends on NF_FLOW_TABLE
developeree39bcf2023-06-16 08:03:30 +08003163@@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET
developer8cb3ac72022-07-04 10:55:14 +08003164
3165 To compile it as a module, choose M here.
3166
3167+endif # NF_TABLES
3168+
3169 config NF_FLOW_TABLE
3170 tristate "Netfilter flow table module"
3171 depends on NETFILTER_INGRESS
3172 depends on NF_CONNTRACK
3173- depends on NF_TABLES
3174 help
3175 This option adds the flow table core infrastructure.
3176
developeree39bcf2023-06-16 08:03:30 +08003177@@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK
developer8cb3ac72022-07-04 10:55:14 +08003178 depends on NETFILTER_ADVANCED
3179 select NETFILTER_XT_TARGET_CT
3180
3181+config NETFILTER_XT_TARGET_FLOWOFFLOAD
3182+ tristate '"FLOWOFFLOAD" target support'
3183+ depends on NF_FLOW_TABLE
3184+ depends on NETFILTER_INGRESS
3185+ help
3186+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
3187+ module to speed up processing of packets by bypassing the usual
3188+ netfilter chains
3189+
3190 config NETFILTER_XT_TARGET_RATEEST
3191 tristate '"RATEEST" target support'
3192 depends on NETFILTER_ADVANCED
3193diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
developeree39bcf2023-06-16 08:03:30 +08003194index 4fc075b61..d93a121bc 100644
developer8cb3ac72022-07-04 10:55:14 +08003195--- a/net/netfilter/Makefile
3196+++ b/net/netfilter/Makefile
3197@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
3198
3199 # flow table infrastructure
3200 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
3201-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
3202+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
3203+ nf_flow_table_offload.o
3204
3205 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
3206
3207@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
3208 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
3209 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
3210 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
3211+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
3212 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
3213 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
3214 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
3215diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
developeree39bcf2023-06-16 08:03:30 +08003216index f6ab6f484..f689e19d8 100644
developer8cb3ac72022-07-04 10:55:14 +08003217--- a/net/netfilter/nf_conntrack_core.c
3218+++ b/net/netfilter/nf_conntrack_core.c
developeree39bcf2023-06-16 08:03:30 +08003219@@ -864,9 +864,8 @@ out:
developer8cb3ac72022-07-04 10:55:14 +08003220 }
3221 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
3222
3223-static inline void nf_ct_acct_update(struct nf_conn *ct,
3224- enum ip_conntrack_info ctinfo,
3225- unsigned int len)
3226+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3227+ unsigned int bytes)
3228 {
3229 struct nf_conn_acct *acct;
3230
3231@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
3232 if (acct) {
3233 struct nf_conn_counter *counter = acct->counter;
3234
3235- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
3236- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
3237+ atomic64_add(packets, &counter[dir].packets);
3238+ atomic64_add(bytes, &counter[dir].bytes);
3239 }
3240 }
3241+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
3242
3243 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3244 const struct nf_conn *loser_ct)
3245@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3246
3247 /* u32 should be fine since we must have seen one packet. */
3248 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
3249- nf_ct_acct_update(ct, ctinfo, bytes);
3250+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
3251 }
3252 }
3253
3254@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
3255
3256 tmp = nf_ct_tuplehash_to_ctrack(h);
3257
3258- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
3259+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
3260+ nf_ct_offload_timeout(tmp);
3261 continue;
3262+ }
3263
3264 if (nf_ct_is_expired(tmp)) {
3265 nf_ct_gc_expired(tmp);
3266@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
3267 WRITE_ONCE(ct->timeout, extra_jiffies);
3268 acct:
3269 if (do_acct)
3270- nf_ct_acct_update(ct, ctinfo, skb->len);
3271+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3272 }
3273 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
3274
3275@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
3276 enum ip_conntrack_info ctinfo,
3277 const struct sk_buff *skb)
3278 {
3279- nf_ct_acct_update(ct, ctinfo, skb->len);
3280+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3281
3282 return nf_ct_delete(ct, 0, 0);
3283 }
3284diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
developeree39bcf2023-06-16 08:03:30 +08003285index 7204f0366..3742bae21 100644
developer8cb3ac72022-07-04 10:55:14 +08003286--- a/net/netfilter/nf_conntrack_proto_tcp.c
3287+++ b/net/netfilter/nf_conntrack_proto_tcp.c
developeree39bcf2023-06-16 08:03:30 +08003288@@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003289 tn->tcp_loose = nf_ct_tcp_loose;
3290 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
3291 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
3292+
3293+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3294+ tn->offload_timeout = 30 * HZ;
3295+#endif
3296 }
3297
3298 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
3299diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
developeree39bcf2023-06-16 08:03:30 +08003300index e3a2d018f..a1579d6c3 100644
developer8cb3ac72022-07-04 10:55:14 +08003301--- a/net/netfilter/nf_conntrack_proto_udp.c
3302+++ b/net/netfilter/nf_conntrack_proto_udp.c
3303@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
3304
3305 for (i = 0; i < UDP_CT_MAX; i++)
3306 un->timeouts[i] = udp_timeouts[i];
3307+
3308+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3309+ un->offload_timeout = 30 * HZ;
3310+#endif
3311 }
3312
3313 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
3314diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
developeree39bcf2023-06-16 08:03:30 +08003315index 9c6259c28..10d9f93ce 100644
developer8cb3ac72022-07-04 10:55:14 +08003316--- a/net/netfilter/nf_conntrack_standalone.c
3317+++ b/net/netfilter/nf_conntrack_standalone.c
3318@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
3319 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
3320 goto release;
3321
3322- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3323+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
3324+ seq_puts(s, "[HW_OFFLOAD] ");
3325+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3326 seq_puts(s, "[OFFLOAD] ");
3327 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
3328 seq_puts(s, "[ASSURED] ");
3329@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
3330 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
3331 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
3332 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
3333+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3334+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
3335+#endif
3336 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
3337 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
3338 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
3339 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
3340 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
3341+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3342+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
3343+#endif
3344 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
3345 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
3346 #ifdef CONFIG_NF_CT_PROTO_SCTP
developeree39bcf2023-06-16 08:03:30 +08003347@@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003348 .mode = 0644,
3349 .proc_handler = proc_dointvec_jiffies,
3350 },
3351+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3352+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
3353+ .procname = "nf_flowtable_tcp_timeout",
3354+ .maxlen = sizeof(unsigned int),
3355+ .mode = 0644,
3356+ .proc_handler = proc_dointvec_jiffies,
3357+ },
3358+#endif
3359 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
3360 .procname = "nf_conntrack_tcp_loose",
3361 .maxlen = sizeof(int),
developeree39bcf2023-06-16 08:03:30 +08003362@@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
developer8cb3ac72022-07-04 10:55:14 +08003363 .mode = 0644,
3364 .proc_handler = proc_dointvec_jiffies,
3365 },
3366+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3367+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
3368+ .procname = "nf_flowtable_udp_timeout",
3369+ .maxlen = sizeof(unsigned int),
3370+ .mode = 0644,
3371+ .proc_handler = proc_dointvec_jiffies,
3372+ },
3373+#endif
3374 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
3375 .procname = "nf_conntrack_icmp_timeout",
3376 .maxlen = sizeof(unsigned int),
developeree39bcf2023-06-16 08:03:30 +08003377@@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
developer8cb3ac72022-07-04 10:55:14 +08003378 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
3379 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
3380 #undef XASSIGN
3381+
3382+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3383+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
3384+#endif
3385+
3386 }
3387
3388 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
developeree39bcf2023-06-16 08:03:30 +08003389@@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
developer8cb3ac72022-07-04 10:55:14 +08003390 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
3391 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
3392 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
3393+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3394+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
3395+#endif
3396
3397 nf_conntrack_standalone_init_tcp_sysctl(net, table);
3398 nf_conntrack_standalone_init_sctp_sysctl(net, table);
3399diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
developeree39bcf2023-06-16 08:03:30 +08003400index f212cec0..10365581 100644
developer8cb3ac72022-07-04 10:55:14 +08003401--- a/net/netfilter/nf_flow_table_core.c
3402+++ b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003403@@ -7,43 +7,21 @@
developer8cb3ac72022-07-04 10:55:14 +08003404 #include <linux/netdevice.h>
3405 #include <net/ip.h>
3406 #include <net/ip6_route.h>
3407-#include <net/netfilter/nf_tables.h>
3408 #include <net/netfilter/nf_flow_table.h>
3409 #include <net/netfilter/nf_conntrack.h>
3410 #include <net/netfilter/nf_conntrack_core.h>
3411 #include <net/netfilter/nf_conntrack_l4proto.h>
3412 #include <net/netfilter/nf_conntrack_tuple.h>
3413
3414-struct flow_offload_entry {
3415- struct flow_offload flow;
3416- struct nf_conn *ct;
3417- struct rcu_head rcu_head;
3418-};
3419-
3420 static DEFINE_MUTEX(flowtable_lock);
3421 static LIST_HEAD(flowtables);
3422
developerb7c46752022-07-04 19:51:38 +08003423-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3424-{
3425- const struct rt6_info *rt;
3426-
3427- if (flow_tuple->l3proto == NFPROTO_IPV6) {
3428- rt = (const struct rt6_info *)flow_tuple->dst_cache;
3429- return rt6_get_cookie(rt);
3430- }
3431-
3432- return 0;
3433-}
3434-
developer8cb3ac72022-07-04 10:55:14 +08003435 static void
3436-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3437- struct nf_flow_route *route,
3438+flow_offload_fill_dir(struct flow_offload *flow,
3439 enum flow_offload_tuple_dir dir)
3440 {
3441 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
3442- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
3443- struct dst_entry *other_dst = route->tuple[!dir].dst;
3444- struct dst_entry *dst = route->tuple[dir].dst;
3445+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
3446
3447 ft->dir = dir;
3448
developerb7c46752022-07-04 19:51:38 +08003449@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003450 case NFPROTO_IPV4:
3451 ft->src_v4 = ctt->src.u3.in;
3452 ft->dst_v4 = ctt->dst.u3.in;
3453- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
3454 break;
3455 case NFPROTO_IPV6:
3456 ft->src_v6 = ctt->src.u3.in6;
3457 ft->dst_v6 = ctt->dst.u3.in6;
3458- ft->mtu = ip6_dst_mtu_forward(dst);
3459 break;
3460 }
3461
developerb7c46752022-07-04 19:51:38 +08003462@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003463 ft->l4proto = ctt->dst.protonum;
3464 ft->src_port = ctt->src.u.tcp.port;
3465 ft->dst_port = ctt->dst.u.tcp.port;
3466-
3467- ft->iifidx = other_dst->dev->ifindex;
3468- ft->dst_cache = dst;
developerb7c46752022-07-04 19:51:38 +08003469- ft->dst_cookie = flow_offload_dst_cookie(ft);
developer8cb3ac72022-07-04 10:55:14 +08003470 }
3471
3472-struct flow_offload *
3473-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
3474+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
3475 {
3476- struct flow_offload_entry *entry;
3477 struct flow_offload *flow;
3478
3479 if (unlikely(nf_ct_is_dying(ct) ||
3480 !atomic_inc_not_zero(&ct->ct_general.use)))
3481 return NULL;
3482
3483- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
3484- if (!entry)
3485+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
3486+ if (!flow)
3487 goto err_ct_refcnt;
3488
3489- flow = &entry->flow;
developerb7c46752022-07-04 19:51:38 +08003490-
developer8cb3ac72022-07-04 10:55:14 +08003491- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
3492- goto err_dst_cache_original;
developeree39bcf2023-06-16 08:03:30 +08003493-
developer7eb15dc2023-06-14 17:44:03 +08003494- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
3495- goto err_dst_cache_reply;
developeree39bcf2023-06-16 08:03:30 +08003496+ flow->ct = ct;
3497
developer8cb3ac72022-07-04 10:55:14 +08003498- entry->ct = ct;
3499-
3500- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3501- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
3502+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3503+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
3504
3505 if (ct->status & IPS_SRC_NAT)
3506- flow->flags |= FLOW_OFFLOAD_SNAT;
3507+ __set_bit(NF_FLOW_SNAT, &flow->flags);
3508 if (ct->status & IPS_DST_NAT)
3509- flow->flags |= FLOW_OFFLOAD_DNAT;
3510+ __set_bit(NF_FLOW_DNAT, &flow->flags);
3511
3512 return flow;
3513
3514-err_dst_cache_reply:
3515- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
3516-err_dst_cache_original:
3517- kfree(entry);
3518 err_ct_refcnt:
3519 nf_ct_put(ct);
3520
developeree39bcf2023-06-16 08:03:30 +08003521@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
developer8cb3ac72022-07-04 10:55:14 +08003522 }
3523 EXPORT_SYMBOL_GPL(flow_offload_alloc);
3524
3525-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3526+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3527 {
3528- tcp->state = TCP_CONNTRACK_ESTABLISHED;
3529- tcp->seen[0].td_maxwin = 0;
3530- tcp->seen[1].td_maxwin = 0;
3531+ const struct rt6_info *rt;
3532+
3533+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
3534+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
3535+ return rt6_get_cookie(rt);
3536+ }
3537+
3538+ return 0;
3539 }
3540
3541-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
3542-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
3543+static int flow_offload_fill_route(struct flow_offload *flow,
3544+ const struct nf_flow_route *route,
3545+ enum flow_offload_tuple_dir dir)
3546+{
3547+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
3548+ struct dst_entry *dst = route->tuple[dir].dst;
3549+ int i, j = 0;
developeree39bcf2023-06-16 08:03:30 +08003550+
developer8cb3ac72022-07-04 10:55:14 +08003551+ switch (flow_tuple->l3proto) {
3552+ case NFPROTO_IPV4:
3553+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
3554+ break;
3555+ case NFPROTO_IPV6:
3556+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
3557+ break;
3558+ }
3559+
3560+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
3561+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
3562+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
3563+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
3564+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
3565+ flow_tuple->in_vlan_ingress |= BIT(j);
3566+ j++;
3567+ }
3568+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
3569+
3570+ switch (route->tuple[dir].xmit_type) {
3571+ case FLOW_OFFLOAD_XMIT_DIRECT:
3572+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
3573+ ETH_ALEN);
3574+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
3575+ ETH_ALEN);
3576+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
3577+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
3578+ break;
3579+ case FLOW_OFFLOAD_XMIT_XFRM:
3580+ case FLOW_OFFLOAD_XMIT_NEIGH:
3581+ if (!dst_hold_safe(route->tuple[dir].dst))
3582+ return -1;
3583+
3584+ flow_tuple->dst_cache = dst;
3585+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
3586+ break;
3587+ default:
3588+ WARN_ON_ONCE(1);
3589+ break;
3590+ }
3591+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
developerb7c46752022-07-04 19:51:38 +08003592+
developer8cb3ac72022-07-04 10:55:14 +08003593+ return 0;
3594+}
3595+
3596+static void nft_flow_dst_release(struct flow_offload *flow,
3597+ enum flow_offload_tuple_dir dir)
developeree39bcf2023-06-16 08:03:30 +08003598+{
developer8cb3ac72022-07-04 10:55:14 +08003599+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3600+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
3601+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
developeree39bcf2023-06-16 08:03:30 +08003602+}
3603+
developer8cb3ac72022-07-04 10:55:14 +08003604+int flow_offload_route_init(struct flow_offload *flow,
3605+ const struct nf_flow_route *route)
developeree39bcf2023-06-16 08:03:30 +08003606+{
developer8cb3ac72022-07-04 10:55:14 +08003607+ int err;
developeree39bcf2023-06-16 08:03:30 +08003608+
developer8cb3ac72022-07-04 10:55:14 +08003609+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3610+ if (err < 0)
3611+ return err;
developeree39bcf2023-06-16 08:03:30 +08003612+
developer8cb3ac72022-07-04 10:55:14 +08003613+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
3614+ if (err < 0)
3615+ goto err_route_reply;
3616+
3617+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
developeree39bcf2023-06-16 08:03:30 +08003618+
developer8cb3ac72022-07-04 10:55:14 +08003619+ return 0;
3620+
3621+err_route_reply:
3622+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3623+
3624+ return err;
developeree39bcf2023-06-16 08:03:30 +08003625+}
developer8cb3ac72022-07-04 10:55:14 +08003626+EXPORT_SYMBOL_GPL(flow_offload_route_init);
developerb7c46752022-07-04 19:51:38 +08003627
developeree39bcf2023-06-16 08:03:30 +08003628-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
developer8cb3ac72022-07-04 10:55:14 +08003629+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3630 {
developeree39bcf2023-06-16 08:03:30 +08003631- return (__s32)(timeout - (u32)jiffies);
3632+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
developer8cb3ac72022-07-04 10:55:14 +08003633+ tcp->seen[0].td_maxwin = 0;
3634+ tcp->seen[1].td_maxwin = 0;
3635 }
3636
developeree39bcf2023-06-16 08:03:30 +08003637 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003638 {
developeree39bcf2023-06-16 08:03:30 +08003639- const struct nf_conntrack_l4proto *l4proto;
developer8cb3ac72022-07-04 10:55:14 +08003640+ struct net *net = nf_ct_net(ct);
developeree39bcf2023-06-16 08:03:30 +08003641 int l4num = nf_ct_protonum(ct);
3642- unsigned int timeout;
developer8cb3ac72022-07-04 10:55:14 +08003643+ s32 timeout;
developeree39bcf2023-06-16 08:03:30 +08003644
3645- l4proto = nf_ct_l4proto_find(l4num);
3646- if (!l4proto)
3647- return;
developer8cb3ac72022-07-04 10:55:14 +08003648+ if (l4num == IPPROTO_TCP) {
3649+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003650
3651- if (l4num == IPPROTO_TCP)
3652- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
3653- else if (l4num == IPPROTO_UDP)
3654- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
3655- else
3656+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
developer8cb3ac72022-07-04 10:55:14 +08003657+ timeout -= tn->offload_timeout;
3658+ } else if (l4num == IPPROTO_UDP) {
3659+ struct nf_udp_net *tn = nf_udp_pernet(net);
3660+
3661+ timeout = tn->timeouts[UDP_CT_REPLIED];
3662+ timeout -= tn->offload_timeout;
3663+ } else {
developeree39bcf2023-06-16 08:03:30 +08003664 return;
developer8cb3ac72022-07-04 10:55:14 +08003665+ }
3666+
3667+ if (timeout < 0)
3668+ timeout = 0;
developeree39bcf2023-06-16 08:03:30 +08003669
3670- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
3671- ct->timeout = nfct_time_stamp + timeout;
developer8cb3ac72022-07-04 10:55:14 +08003672+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
3673+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
3674 }
3675
developeree39bcf2023-06-16 08:03:30 +08003676 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
3677@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
3678 flow_offload_fixup_ct_timeout(ct);
3679 }
3680
developer8cb3ac72022-07-04 10:55:14 +08003681-void flow_offload_free(struct flow_offload *flow)
3682+static void flow_offload_route_release(struct flow_offload *flow)
3683 {
3684- struct flow_offload_entry *e;
3685+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3686+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
3687+}
3688
3689- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
3690- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
3691- e = container_of(flow, struct flow_offload_entry, flow);
3692- if (flow->flags & FLOW_OFFLOAD_DYING)
3693- nf_ct_delete(e->ct, 0, 0);
3694- nf_ct_put(e->ct);
3695- kfree_rcu(e, rcu_head);
3696+void flow_offload_free(struct flow_offload *flow)
3697+{
3698+ switch (flow->type) {
3699+ case NF_FLOW_OFFLOAD_ROUTE:
3700+ flow_offload_route_release(flow);
3701+ break;
3702+ default:
3703+ break;
3704+ }
3705+ nf_ct_put(flow->ct);
3706+ kfree_rcu(flow, rcu_head);
3707 }
3708 EXPORT_SYMBOL_GPL(flow_offload_free);
3709
developeree39bcf2023-06-16 08:03:30 +08003710@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
developer8cb3ac72022-07-04 10:55:14 +08003711 {
3712 const struct flow_offload_tuple *tuple = data;
3713
3714- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
3715+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3716 }
3717
3718 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
3719 {
3720 const struct flow_offload_tuple_rhash *tuplehash = data;
3721
3722- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
3723+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3724 }
3725
3726 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developeree39bcf2023-06-16 08:03:30 +08003727@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer8cb3ac72022-07-04 10:55:14 +08003728 const struct flow_offload_tuple *tuple = arg->key;
3729 const struct flow_offload_tuple_rhash *x = ptr;
3730
3731- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
3732+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
3733 return 1;
3734
3735 return 0;
developeree39bcf2023-06-16 08:03:30 +08003736@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
developer8cb3ac72022-07-04 10:55:14 +08003737 .automatic_shrinking = true,
3738 };
3739
3740-#define DAY (86400 * HZ)
3741-
3742-/* Set an arbitrary timeout large enough not to ever expire, this save
3743- * us a check for the IPS_OFFLOAD_BIT from the packet path via
3744- * nf_ct_is_expired().
3745- */
3746-static void nf_ct_offload_timeout(struct flow_offload *flow)
3747+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
3748 {
3749- struct flow_offload_entry *entry;
3750- struct nf_conn *ct;
3751+ unsigned long timeout = NF_FLOW_TIMEOUT;
3752+ struct net *net = nf_ct_net(flow->ct);
3753+ int l4num = nf_ct_protonum(flow->ct);
developeree39bcf2023-06-16 08:03:30 +08003754
3755- entry = container_of(flow, struct flow_offload_entry, flow);
3756- ct = entry->ct;
developerb7c46752022-07-04 19:51:38 +08003757+ if (l4num == IPPROTO_TCP) {
3758+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003759
3760- if (nf_ct_expires(ct) < DAY / 2)
3761- ct->timeout = nfct_time_stamp + DAY;
developer8cb3ac72022-07-04 10:55:14 +08003762+ timeout = tn->offload_timeout;
3763+ } else if (l4num == IPPROTO_UDP) {
3764+ struct nf_udp_net *tn = nf_udp_pernet(net);
developeree39bcf2023-06-16 08:03:30 +08003765+
developer8cb3ac72022-07-04 10:55:14 +08003766+ timeout = tn->offload_timeout;
3767+ }
developeree39bcf2023-06-16 08:03:30 +08003768+
developer8cb3ac72022-07-04 10:55:14 +08003769+ return timeout;
3770 }
3771
3772 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3773 {
3774 int err;
3775
3776- nf_ct_offload_timeout(flow);
3777- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
3778+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3779
3780 err = rhashtable_insert_fast(&flow_table->rhashtable,
3781 &flow->tuplehash[0].node,
developeree39bcf2023-06-16 08:03:30 +08003782@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003783 return err;
3784 }
3785
3786+ nf_ct_offload_timeout(flow->ct);
3787+
3788+ if (nf_flowtable_hw_offload(flow_table)) {
3789+ __set_bit(NF_FLOW_HW, &flow->flags);
3790+ nf_flow_offload_add(flow_table, flow);
3791+ }
3792+
3793 return 0;
3794 }
3795 EXPORT_SYMBOL_GPL(flow_offload_add);
3796
3797+void flow_offload_refresh(struct nf_flowtable *flow_table,
3798+ struct flow_offload *flow)
3799+{
3800+ u32 timeout;
3801+
3802+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3803+ if (timeout - READ_ONCE(flow->timeout) > HZ)
3804+ WRITE_ONCE(flow->timeout, timeout);
3805+ else
3806+ return;
3807+
3808+ if (likely(!nf_flowtable_hw_offload(flow_table)))
3809+ return;
3810+
3811+ nf_flow_offload_add(flow_table, flow);
3812+}
3813+EXPORT_SYMBOL_GPL(flow_offload_refresh);
3814+
3815 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3816 {
3817 return nf_flow_timeout_delta(flow->timeout) <= 0;
developeree39bcf2023-06-16 08:03:30 +08003818@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003819 static void flow_offload_del(struct nf_flowtable *flow_table,
3820 struct flow_offload *flow)
3821 {
3822- struct flow_offload_entry *e;
3823-
3824 rhashtable_remove_fast(&flow_table->rhashtable,
3825 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
3826 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003827@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003828 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
3829 nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08003830
developer8cb3ac72022-07-04 10:55:14 +08003831- e = container_of(flow, struct flow_offload_entry, flow);
3832- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
developeree39bcf2023-06-16 08:03:30 +08003833+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
3834
3835 if (nf_flow_has_expired(flow))
developer8cb3ac72022-07-04 10:55:14 +08003836- flow_offload_fixup_ct(e->ct);
3837- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
3838- flow_offload_fixup_ct_timeout(e->ct);
3839-
3840- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
3841- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003842+ flow_offload_fixup_ct(flow->ct);
3843+ else
3844+ flow_offload_fixup_ct_timeout(flow->ct);
3845
developer8cb3ac72022-07-04 10:55:14 +08003846 flow_offload_free(flow);
3847 }
3848
3849 void flow_offload_teardown(struct flow_offload *flow)
3850 {
3851- struct flow_offload_entry *e;
developerb7c46752022-07-04 19:51:38 +08003852-
3853- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
developeree39bcf2023-06-16 08:03:30 +08003854+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3855
developer8cb3ac72022-07-04 10:55:14 +08003856- e = container_of(flow, struct flow_offload_entry, flow);
3857- flow_offload_fixup_ct_state(e->ct);
developeree39bcf2023-06-16 08:03:30 +08003858+ flow_offload_fixup_ct_state(flow->ct);
developer8cb3ac72022-07-04 10:55:14 +08003859 }
3860 EXPORT_SYMBOL_GPL(flow_offload_teardown);
3861
developeree39bcf2023-06-16 08:03:30 +08003862@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003863 {
3864 struct flow_offload_tuple_rhash *tuplehash;
3865 struct flow_offload *flow;
3866- struct flow_offload_entry *e;
3867 int dir;
3868
3869 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
developeree39bcf2023-06-16 08:03:30 +08003870@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003871
3872 dir = tuplehash->tuple.dir;
3873 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
3874- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
3875+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
3876 return NULL;
3877
3878- e = container_of(flow, struct flow_offload_entry, flow);
3879- if (unlikely(nf_ct_is_dying(e->ct)))
3880+ if (unlikely(nf_ct_is_dying(flow->ct)))
3881 return NULL;
3882
3883 return tuplehash;
3884 }
3885 EXPORT_SYMBOL_GPL(flow_offload_lookup);
3886
3887-static int
3888-nf_flow_table_iterate(struct nf_flowtable *flow_table,
3889+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3890 void (*iter)(struct flow_offload *flow, void *data),
3891 void *data)
3892 {
developeree39bcf2023-06-16 08:03:30 +08003893@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003894 rhashtable_walk_start(&hti);
3895
3896 while ((tuplehash = rhashtable_walk_next(&hti))) {
3897-
3898 if (IS_ERR(tuplehash)) {
3899 if (PTR_ERR(tuplehash) != -EAGAIN) {
3900 err = PTR_ERR(tuplehash);
developeree39bcf2023-06-16 08:03:30 +08003901@@ -359,23 +430,52 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003902
3903 return err;
3904 }
3905+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
3906
developeree39bcf2023-06-16 08:03:30 +08003907-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3908+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
developer8cb3ac72022-07-04 10:55:14 +08003909 {
developeree39bcf2023-06-16 08:03:30 +08003910- struct nf_flowtable *flow_table = data;
developer8cb3ac72022-07-04 10:55:14 +08003911- struct flow_offload_entry *e;
3912- bool teardown;
developeree39bcf2023-06-16 08:03:30 +08003913+ struct dst_entry *dst;
developer8cb3ac72022-07-04 10:55:14 +08003914
3915- e = container_of(flow, struct flow_offload_entry, flow);
developeree39bcf2023-06-16 08:03:30 +08003916+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3917+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
3918+ dst = tuple->dst_cache;
3919+ if (!dst_check(dst, tuple->dst_cookie))
3920+ return true;
3921+ }
3922
developer8cb3ac72022-07-04 10:55:14 +08003923- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
3924- FLOW_OFFLOAD_TEARDOWN);
developeree39bcf2023-06-16 08:03:30 +08003925+ return false;
3926+}
3927
developer8cb3ac72022-07-04 10:55:14 +08003928- if (!teardown)
3929- nf_ct_offload_timeout(flow);
developeree39bcf2023-06-16 08:03:30 +08003930+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
3931+{
3932+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
3933+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
3934+}
developer8cb3ac72022-07-04 10:55:14 +08003935
3936- if (nf_flow_has_expired(flow) || teardown)
3937- flow_offload_del(flow_table, flow);
developeree39bcf2023-06-16 08:03:30 +08003938+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3939+{
3940+ struct nf_flowtable *flow_table = data;
3941+
3942+ if (nf_flow_has_expired(flow) ||
3943+ nf_ct_is_dying(flow->ct) ||
3944+ nf_flow_has_stale_dst(flow))
3945+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3946+
developer8cb3ac72022-07-04 10:55:14 +08003947+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
3948+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003949+ if (!test_and_set_bit(NF_FLOW_HW_ACCT_DYING, &flow->flags))
3950+ nf_flow_offload_stats(flow_table, flow, true);
3951+
developer8cb3ac72022-07-04 10:55:14 +08003952+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
3953+ nf_flow_offload_del(flow_table, flow);
3954+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
3955+ flow_offload_del(flow_table, flow);
3956+ } else {
3957+ flow_offload_del(flow_table, flow);
3958+ }
3959+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
developeree39bcf2023-06-16 08:03:30 +08003960+ nf_flow_offload_stats(flow_table, flow, false);
developer8cb3ac72022-07-04 10:55:14 +08003961+ }
3962 }
3963
3964 static void nf_flow_offload_work_gc(struct work_struct *work)
developeree39bcf2023-06-16 08:03:30 +08003965@@ -387,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
developer8cb3ac72022-07-04 10:55:14 +08003966 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
3967 }
3968
3969-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3970- __be16 port, __be16 new_port)
3971+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3972+ __be16 port, __be16 new_port)
3973 {
3974 struct tcphdr *tcph;
3975
3976- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
3977- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
3978- return -1;
3979-
3980 tcph = (void *)(skb_network_header(skb) + thoff);
3981 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
3982-
3983- return 0;
3984 }
3985
3986-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3987- __be16 port, __be16 new_port)
3988+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3989+ __be16 port, __be16 new_port)
3990 {
3991 struct udphdr *udph;
3992
3993- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
3994- skb_try_make_writable(skb, thoff + sizeof(*udph)))
3995- return -1;
3996-
3997 udph = (void *)(skb_network_header(skb) + thoff);
3998 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
3999 inet_proto_csum_replace2(&udph->check, skb, port,
developeree39bcf2023-06-16 08:03:30 +08004000@@ -418,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004001 if (!udph->check)
4002 udph->check = CSUM_MANGLED_0;
4003 }
4004-
4005- return 0;
4006 }
4007
4008-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4009- u8 protocol, __be16 port, __be16 new_port)
4010+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4011+ u8 protocol, __be16 port, __be16 new_port)
4012 {
4013 switch (protocol) {
4014 case IPPROTO_TCP:
4015- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
4016- return NF_DROP;
4017+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
4018 break;
4019 case IPPROTO_UDP:
4020- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
4021- return NF_DROP;
4022+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
4023 break;
4024 }
4025-
4026- return 0;
4027 }
4028
4029-int nf_flow_snat_port(const struct flow_offload *flow,
4030- struct sk_buff *skb, unsigned int thoff,
4031- u8 protocol, enum flow_offload_tuple_dir dir)
4032+void nf_flow_snat_port(const struct flow_offload *flow,
4033+ struct sk_buff *skb, unsigned int thoff,
4034+ u8 protocol, enum flow_offload_tuple_dir dir)
4035 {
4036 struct flow_ports *hdr;
4037 __be16 port, new_port;
4038
4039- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4040- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4041- return -1;
4042-
4043 hdr = (void *)(skb_network_header(skb) + thoff);
4044
4045 switch (dir) {
developeree39bcf2023-06-16 08:03:30 +08004046@@ -463,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004047 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
4048 hdr->dest = new_port;
4049 break;
4050- default:
4051- return -1;
4052 }
4053
4054- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4055+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4056 }
4057 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
4058
4059-int nf_flow_dnat_port(const struct flow_offload *flow,
4060- struct sk_buff *skb, unsigned int thoff,
4061- u8 protocol, enum flow_offload_tuple_dir dir)
4062+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
4063+ unsigned int thoff, u8 protocol,
4064+ enum flow_offload_tuple_dir dir)
4065 {
4066 struct flow_ports *hdr;
4067 __be16 port, new_port;
4068
4069- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4070- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4071- return -1;
4072-
4073 hdr = (void *)(skb_network_header(skb) + thoff);
4074
4075 switch (dir) {
developeree39bcf2023-06-16 08:03:30 +08004076@@ -495,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004077 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
4078 hdr->source = new_port;
4079 break;
4080- default:
4081- return -1;
4082 }
4083
4084- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4085+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4086 }
4087 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
4088
developeree39bcf2023-06-16 08:03:30 +08004089@@ -507,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
developer8cb3ac72022-07-04 10:55:14 +08004090 {
4091 int err;
4092
4093- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4094+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4095+ flow_block_init(&flowtable->flow_block);
4096+ init_rwsem(&flowtable->flow_block_lock);
4097
4098 err = rhashtable_init(&flowtable->rhashtable,
4099 &nf_flow_offload_rhash_params);
developeree39bcf2023-06-16 08:03:30 +08004100@@ -528,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
developer8cb3ac72022-07-04 10:55:14 +08004101 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
4102 {
4103 struct net_device *dev = data;
4104- struct flow_offload_entry *e;
4105-
4106- e = container_of(flow, struct flow_offload_entry, flow);
4107
4108 if (!dev) {
4109 flow_offload_teardown(flow);
4110 return;
4111 }
4112- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
4113+
4114+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
4115 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
4116 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
4117- flow_offload_dead(flow);
4118+ flow_offload_teardown(flow);
4119 }
4120
4121-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
4122- struct net_device *dev)
4123+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
4124+ struct net_device *dev)
4125 {
4126 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
4127 flush_delayed_work(&flowtable->gc_work);
4128+ nf_flow_table_offload_flush(flowtable);
4129 }
4130
4131 void nf_flow_table_cleanup(struct net_device *dev)
developeree39bcf2023-06-16 08:03:30 +08004132@@ -555,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004133
4134 mutex_lock(&flowtable_lock);
4135 list_for_each_entry(flowtable, &flowtables, list)
4136- nf_flow_table_iterate_cleanup(flowtable, dev);
4137+ nf_flow_table_gc_cleanup(flowtable, dev);
4138 mutex_unlock(&flowtable_lock);
4139 }
4140 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
developeree39bcf2023-06-16 08:03:30 +08004141@@ -565,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
developer8cb3ac72022-07-04 10:55:14 +08004142 mutex_lock(&flowtable_lock);
4143 list_del(&flow_table->list);
4144 mutex_unlock(&flowtable_lock);
4145+
4146 cancel_delayed_work_sync(&flow_table->gc_work);
4147 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
4148 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
4149+ nf_flow_table_offload_flush(flow_table);
4150+ if (nf_flowtable_hw_offload(flow_table))
4151+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
4152+ flow_table);
4153 rhashtable_destroy(&flow_table->rhashtable);
4154 }
4155 EXPORT_SYMBOL_GPL(nf_flow_table_free);
developeree39bcf2023-06-16 08:03:30 +08004156@@ -591,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
developer8cb3ac72022-07-04 10:55:14 +08004157
4158 static int __init nf_flow_table_module_init(void)
4159 {
4160- return register_netdevice_notifier(&flow_offload_netdev_notifier);
4161+ int ret;
4162+
4163+ ret = nf_flow_table_offload_init();
4164+ if (ret)
4165+ return ret;
4166+
4167+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
4168+ if (ret)
4169+ nf_flow_table_offload_exit();
4170+
4171+ return ret;
4172 }
4173
4174 static void __exit nf_flow_table_module_exit(void)
4175 {
4176 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
4177+ nf_flow_table_offload_exit();
4178 }
4179
4180 module_init(nf_flow_table_module_init);
developeree39bcf2023-06-16 08:03:30 +08004181@@ -604,3 +690,4 @@ module_exit(nf_flow_table_module_exit);
developer8cb3ac72022-07-04 10:55:14 +08004182
4183 MODULE_LICENSE("GPL");
4184 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
4185+MODULE_DESCRIPTION("Netfilter flow table module");
4186diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
developeree39bcf2023-06-16 08:03:30 +08004187index 397129b2..6257d87c 100644
developer8cb3ac72022-07-04 10:55:14 +08004188--- a/net/netfilter/nf_flow_table_ip.c
4189+++ b/net/netfilter/nf_flow_table_ip.c
4190@@ -7,11 +7,13 @@
4191 #include <linux/ip.h>
4192 #include <linux/ipv6.h>
4193 #include <linux/netdevice.h>
4194+#include <linux/if_ether.h>
4195 #include <net/ip.h>
4196 #include <net/ipv6.h>
4197 #include <net/ip6_route.h>
4198 #include <net/neighbour.h>
4199 #include <net/netfilter/nf_flow_table.h>
4200+#include <net/netfilter/nf_conntrack_acct.h>
4201 /* For layer 4 checksum field offset. */
4202 #include <linux/tcp.h>
4203 #include <linux/udp.h>
4204@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4205 if (proto != IPPROTO_TCP)
4206 return 0;
4207
4208- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
4209- return -1;
4210-
4211 tcph = (void *)(skb_network_header(skb) + thoff);
4212 if (unlikely(tcph->fin || tcph->rst)) {
4213 flow_offload_teardown(flow);
4214@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4215 return 0;
4216 }
4217
4218-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4219- __be32 addr, __be32 new_addr)
4220+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4221+ __be32 addr, __be32 new_addr)
4222 {
4223 struct tcphdr *tcph;
4224
4225- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4226- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4227- return -1;
4228-
4229 tcph = (void *)(skb_network_header(skb) + thoff);
4230 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
4231-
4232- return 0;
4233 }
4234
4235-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4236- __be32 addr, __be32 new_addr)
4237+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4238+ __be32 addr, __be32 new_addr)
4239 {
4240 struct udphdr *udph;
4241
4242- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4243- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4244- return -1;
4245-
4246 udph = (void *)(skb_network_header(skb) + thoff);
4247 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4248 inet_proto_csum_replace4(&udph->check, skb, addr,
4249@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4250 if (!udph->check)
4251 udph->check = CSUM_MANGLED_0;
4252 }
4253-
4254- return 0;
4255 }
4256
4257-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4258- unsigned int thoff, __be32 addr,
4259- __be32 new_addr)
4260+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4261+ unsigned int thoff, __be32 addr,
4262+ __be32 new_addr)
4263 {
4264 switch (iph->protocol) {
4265 case IPPROTO_TCP:
4266- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
4267- return NF_DROP;
4268+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
4269 break;
4270 case IPPROTO_UDP:
4271- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
4272- return NF_DROP;
4273+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
4274 break;
4275 }
4276-
4277- return 0;
4278 }
4279
4280-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4281- struct iphdr *iph, unsigned int thoff,
4282- enum flow_offload_tuple_dir dir)
4283+static void nf_flow_snat_ip(const struct flow_offload *flow,
4284+ struct sk_buff *skb, struct iphdr *iph,
4285+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4286 {
4287 __be32 addr, new_addr;
4288
4289@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4290 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
4291 iph->daddr = new_addr;
4292 break;
4293- default:
4294- return -1;
4295 }
4296 csum_replace4(&iph->check, addr, new_addr);
4297
4298- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4299+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4300 }
4301
4302-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4303- struct iphdr *iph, unsigned int thoff,
4304- enum flow_offload_tuple_dir dir)
4305+static void nf_flow_dnat_ip(const struct flow_offload *flow,
4306+ struct sk_buff *skb, struct iphdr *iph,
4307+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4308 {
4309 __be32 addr, new_addr;
4310
4311@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4312 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
4313 iph->saddr = new_addr;
4314 break;
4315- default:
4316- return -1;
4317 }
4318 csum_replace4(&iph->check, addr, new_addr);
4319
4320- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4321+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4322 }
4323
4324-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4325- unsigned int thoff, enum flow_offload_tuple_dir dir)
4326+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4327+ unsigned int thoff, enum flow_offload_tuple_dir dir,
4328+ struct iphdr *iph)
4329 {
4330- struct iphdr *iph = ip_hdr(skb);
4331-
4332- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4333- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4334- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
4335- return -1;
4336- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4337- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4338- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
4339- return -1;
4340-
4341- return 0;
4342+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4343+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
4344+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
4345+ }
4346+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4347+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
4348+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
4349+ }
4350 }
4351
4352 static bool ip_has_options(unsigned int thoff)
4353@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
4354 return thoff != sizeof(struct iphdr);
4355 }
4356
4357+static void nf_flow_tuple_encap(struct sk_buff *skb,
4358+ struct flow_offload_tuple *tuple)
4359+{
4360+ struct vlan_ethhdr *veth;
4361+ struct pppoe_hdr *phdr;
4362+ int i = 0;
4363+
4364+ if (skb_vlan_tag_present(skb)) {
4365+ tuple->encap[i].id = skb_vlan_tag_get(skb);
4366+ tuple->encap[i].proto = skb->vlan_proto;
4367+ i++;
4368+ }
4369+ switch (skb->protocol) {
4370+ case htons(ETH_P_8021Q):
4371+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4372+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
4373+ tuple->encap[i].proto = skb->protocol;
4374+ break;
4375+ case htons(ETH_P_PPP_SES):
4376+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
4377+ tuple->encap[i].id = ntohs(phdr->sid);
4378+ tuple->encap[i].proto = skb->protocol;
4379+ break;
4380+ }
4381+}
4382+
4383 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4384- struct flow_offload_tuple *tuple)
4385+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4386+ u32 offset)
4387 {
4388 struct flow_ports *ports;
4389 unsigned int thoff;
4390 struct iphdr *iph;
4391
4392- if (!pskb_may_pull(skb, sizeof(*iph)))
4393+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
4394 return -1;
4395
4396- iph = ip_hdr(skb);
4397- thoff = iph->ihl * 4;
4398+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4399+ thoff = (iph->ihl * 4);
4400
4401 if (ip_is_fragment(iph) ||
4402 unlikely(ip_has_options(thoff)))
4403 return -1;
4404
4405- if (iph->protocol != IPPROTO_TCP &&
4406- iph->protocol != IPPROTO_UDP)
4407+ thoff += offset;
4408+
4409+ switch (iph->protocol) {
4410+ case IPPROTO_TCP:
4411+ *hdrsize = sizeof(struct tcphdr);
4412+ break;
4413+ case IPPROTO_UDP:
4414+ *hdrsize = sizeof(struct udphdr);
4415+ break;
4416+ default:
4417 return -1;
4418+ }
4419
4420 if (iph->ttl <= 1)
4421 return -1;
4422
4423- thoff = iph->ihl * 4;
4424- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4425+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4426 return -1;
4427
4428- iph = ip_hdr(skb);
4429+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4430 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4431
4432 tuple->src_v4.s_addr = iph->saddr;
4433@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4434 tuple->l3proto = AF_INET;
4435 tuple->l4proto = iph->protocol;
4436 tuple->iifidx = dev->ifindex;
4437+ nf_flow_tuple_encap(skb, tuple);
4438
4439 return 0;
4440 }
developeree39bcf2023-06-16 08:03:30 +08004441@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004442 return NF_STOLEN;
4443 }
4444
4445+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
4446+ u32 *offset)
4447+{
4448+ struct vlan_ethhdr *veth;
4449+
4450+ switch (skb->protocol) {
4451+ case htons(ETH_P_8021Q):
4452+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4453+ if (veth->h_vlan_encapsulated_proto == proto) {
4454+ *offset += VLAN_HLEN;
4455+ return true;
4456+ }
4457+ break;
4458+ case htons(ETH_P_PPP_SES):
4459+ if (nf_flow_pppoe_proto(skb) == proto) {
4460+ *offset += PPPOE_SES_HLEN;
4461+ return true;
4462+ }
4463+ break;
4464+ }
4465+
4466+ return false;
4467+}
4468+
4469+static void nf_flow_encap_pop(struct sk_buff *skb,
4470+ struct flow_offload_tuple_rhash *tuplehash)
4471+{
4472+ struct vlan_hdr *vlan_hdr;
4473+ int i;
4474+
4475+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
4476+ if (skb_vlan_tag_present(skb)) {
4477+ __vlan_hwaccel_clear_tag(skb);
4478+ continue;
4479+ }
4480+ switch (skb->protocol) {
4481+ case htons(ETH_P_8021Q):
4482+ vlan_hdr = (struct vlan_hdr *)skb->data;
4483+ __skb_pull(skb, VLAN_HLEN);
4484+ vlan_set_encap_proto(skb, vlan_hdr);
4485+ skb_reset_network_header(skb);
4486+ break;
4487+ case htons(ETH_P_PPP_SES):
4488+ skb->protocol = nf_flow_pppoe_proto(skb);
4489+ skb_pull(skb, PPPOE_SES_HLEN);
4490+ skb_reset_network_header(skb);
4491+ break;
4492+ }
4493+ }
4494+}
4495+
4496+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
4497+ const struct flow_offload_tuple_rhash *tuplehash,
4498+ unsigned short type)
4499+{
4500+ struct net_device *outdev;
4501+
4502+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
4503+ if (!outdev)
4504+ return NF_DROP;
4505+
4506+ skb->dev = outdev;
4507+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
4508+ tuplehash->tuple.out.h_source, skb->len);
4509+ dev_queue_xmit(skb);
4510+
4511+ return NF_STOLEN;
4512+}
4513+
4514 unsigned int
4515 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4516 const struct nf_hook_state *state)
developeree39bcf2023-06-16 08:03:30 +08004517@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004518 enum flow_offload_tuple_dir dir;
4519 struct flow_offload *flow;
4520 struct net_device *outdev;
4521+ u32 hdrsize, offset = 0;
4522+ unsigned int thoff, mtu;
4523 struct rtable *rt;
4524- unsigned int thoff;
4525 struct iphdr *iph;
4526 __be32 nexthop;
4527+ int ret;
4528
4529- if (skb->protocol != htons(ETH_P_IP))
4530+ if (skb->protocol != htons(ETH_P_IP) &&
4531+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
4532 return NF_ACCEPT;
4533
4534- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
4535+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
4536 return NF_ACCEPT;
4537
4538 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004539@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004540
4541 dir = tuplehash->tuple.dir;
4542 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4543- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
4544- outdev = rt->dst.dev;
developeree39bcf2023-06-16 08:03:30 +08004545-
developer8cb3ac72022-07-04 10:55:14 +08004546- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developeree39bcf2023-06-16 08:03:30 +08004547- return NF_ACCEPT;
developerb7c46752022-07-04 19:51:38 +08004548
developer8cb3ac72022-07-04 10:55:14 +08004549- if (skb_try_make_writable(skb, sizeof(*iph)))
4550- return NF_DROP;
developerb7c46752022-07-04 19:51:38 +08004551-
developer8cb3ac72022-07-04 10:55:14 +08004552- thoff = ip_hdr(skb)->ihl * 4;
4553- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
developeree39bcf2023-06-16 08:03:30 +08004554+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4555+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004556 return NF_ACCEPT;
developer7eb15dc2023-06-14 17:44:03 +08004557
4558- if (!dst_check(&rt->dst, 0)) {
developeree39bcf2023-06-16 08:03:30 +08004559- flow_offload_teardown(flow);
4560+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4561+ thoff = (iph->ihl * 4) + offset;
4562+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
developer7eb15dc2023-06-14 17:44:03 +08004563 return NF_ACCEPT;
developeree39bcf2023-06-16 08:03:30 +08004564- }
developer8cb3ac72022-07-04 10:55:14 +08004565
4566- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
4567+ if (skb_try_make_writable(skb, thoff + hdrsize))
4568 return NF_DROP;
4569
4570- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4571+ flow_offload_refresh(flow_table, flow);
4572+
4573+ nf_flow_encap_pop(skb, tuplehash);
4574+ thoff -= offset;
4575+
4576 iph = ip_hdr(skb);
4577+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
4578+
4579 ip_decrease_ttl(iph);
4580 skb->tstamp = 0;
4581
4582- if (unlikely(dst_xfrm(&rt->dst))) {
4583+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4584+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4585+
4586+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4587+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4588 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
4589 IPCB(skb)->iif = skb->dev->ifindex;
4590 IPCB(skb)->flags = IPSKB_FORWARDED;
4591 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4592 }
4593
4594- skb->dev = outdev;
4595- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4596- skb_dst_set_noref(skb, &rt->dst);
4597- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4598+ switch (tuplehash->tuple.xmit_type) {
4599+ case FLOW_OFFLOAD_XMIT_NEIGH:
4600+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4601+ outdev = rt->dst.dev;
4602+ skb->dev = outdev;
4603+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4604+ skb_dst_set_noref(skb, &rt->dst);
4605+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4606+ ret = NF_STOLEN;
4607+ break;
4608+ case FLOW_OFFLOAD_XMIT_DIRECT:
4609+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
4610+ if (ret == NF_DROP)
4611+ flow_offload_teardown(flow);
4612+ break;
4613+ }
4614
4615- return NF_STOLEN;
4616+ return ret;
4617 }
4618 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
4619
4620-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4621- struct in6_addr *addr,
4622- struct in6_addr *new_addr)
4623+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4624+ struct in6_addr *addr,
4625+ struct in6_addr *new_addr,
4626+ struct ipv6hdr *ip6h)
4627 {
4628 struct tcphdr *tcph;
4629
4630- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4631- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4632- return -1;
4633-
4634 tcph = (void *)(skb_network_header(skb) + thoff);
4635 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
4636 new_addr->s6_addr32, true);
4637-
4638- return 0;
4639 }
4640
4641-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4642- struct in6_addr *addr,
4643- struct in6_addr *new_addr)
4644+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4645+ struct in6_addr *addr,
4646+ struct in6_addr *new_addr)
4647 {
4648 struct udphdr *udph;
4649
4650- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4651- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4652- return -1;
4653-
4654 udph = (void *)(skb_network_header(skb) + thoff);
4655 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4656 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
developeree39bcf2023-06-16 08:03:30 +08004657@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004658 if (!udph->check)
4659 udph->check = CSUM_MANGLED_0;
4660 }
4661-
4662- return 0;
4663 }
4664
4665-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4666- unsigned int thoff, struct in6_addr *addr,
4667- struct in6_addr *new_addr)
4668+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4669+ unsigned int thoff, struct in6_addr *addr,
4670+ struct in6_addr *new_addr)
4671 {
4672 switch (ip6h->nexthdr) {
4673 case IPPROTO_TCP:
4674- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
4675- return NF_DROP;
4676+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
4677 break;
4678 case IPPROTO_UDP:
4679- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
4680- return NF_DROP;
4681+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
4682 break;
4683 }
4684-
4685- return 0;
4686 }
4687
4688-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4689- struct sk_buff *skb, struct ipv6hdr *ip6h,
4690- unsigned int thoff,
4691- enum flow_offload_tuple_dir dir)
4692+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
4693+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4694+ unsigned int thoff,
4695+ enum flow_offload_tuple_dir dir)
4696 {
4697 struct in6_addr addr, new_addr;
4698
developeree39bcf2023-06-16 08:03:30 +08004699@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004700 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
4701 ip6h->daddr = new_addr;
4702 break;
4703- default:
4704- return -1;
4705 }
4706
4707- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4708+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4709 }
4710
4711-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4712- struct sk_buff *skb, struct ipv6hdr *ip6h,
4713- unsigned int thoff,
4714- enum flow_offload_tuple_dir dir)
4715+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
4716+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4717+ unsigned int thoff,
4718+ enum flow_offload_tuple_dir dir)
4719 {
4720 struct in6_addr addr, new_addr;
4721
developeree39bcf2023-06-16 08:03:30 +08004722@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004723 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
4724 ip6h->saddr = new_addr;
4725 break;
4726- default:
4727- return -1;
4728 }
4729
4730- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4731+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4732 }
4733
4734-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
4735- struct sk_buff *skb,
4736- enum flow_offload_tuple_dir dir)
4737+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
4738+ struct sk_buff *skb,
4739+ enum flow_offload_tuple_dir dir,
4740+ struct ipv6hdr *ip6h)
4741 {
4742- struct ipv6hdr *ip6h = ipv6_hdr(skb);
4743 unsigned int thoff = sizeof(*ip6h);
4744
4745- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4746- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4747- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4748- return -1;
4749- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4750- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4751- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4752- return -1;
4753-
4754- return 0;
4755+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4756+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4757+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
4758+ }
4759+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4760+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4761+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
4762+ }
4763 }
4764
4765 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4766- struct flow_offload_tuple *tuple)
4767+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4768+ u32 offset)
4769 {
4770 struct flow_ports *ports;
4771 struct ipv6hdr *ip6h;
4772 unsigned int thoff;
4773
4774- if (!pskb_may_pull(skb, sizeof(*ip6h)))
4775+ thoff = sizeof(*ip6h) + offset;
4776+ if (!pskb_may_pull(skb, thoff))
4777 return -1;
4778
4779- ip6h = ipv6_hdr(skb);
4780+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4781
4782- if (ip6h->nexthdr != IPPROTO_TCP &&
4783- ip6h->nexthdr != IPPROTO_UDP)
4784+ switch (ip6h->nexthdr) {
4785+ case IPPROTO_TCP:
4786+ *hdrsize = sizeof(struct tcphdr);
4787+ break;
4788+ case IPPROTO_UDP:
4789+ *hdrsize = sizeof(struct udphdr);
4790+ break;
4791+ default:
4792 return -1;
4793+ }
4794
4795 if (ip6h->hop_limit <= 1)
4796 return -1;
4797
4798- thoff = sizeof(*ip6h);
4799- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4800+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4801 return -1;
4802
4803- ip6h = ipv6_hdr(skb);
4804+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4805 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4806
4807 tuple->src_v6 = ip6h->saddr;
developeree39bcf2023-06-16 08:03:30 +08004808@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08004809 tuple->l3proto = AF_INET6;
4810 tuple->l4proto = ip6h->nexthdr;
4811 tuple->iifidx = dev->ifindex;
4812+ nf_flow_tuple_encap(skb, tuple);
4813
4814 return 0;
4815 }
developeree39bcf2023-06-16 08:03:30 +08004816@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004817 const struct in6_addr *nexthop;
4818 struct flow_offload *flow;
4819 struct net_device *outdev;
4820+ unsigned int thoff, mtu;
4821+ u32 hdrsize, offset = 0;
4822 struct ipv6hdr *ip6h;
4823 struct rt6_info *rt;
4824+ int ret;
4825
4826- if (skb->protocol != htons(ETH_P_IPV6))
4827+ if (skb->protocol != htons(ETH_P_IPV6) &&
4828+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
4829 return NF_ACCEPT;
4830
4831- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
4832+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
4833 return NF_ACCEPT;
4834
4835 tuplehash = flow_offload_lookup(flow_table, &tuple);
developeree39bcf2023-06-16 08:03:30 +08004836@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004837
4838 dir = tuplehash->tuple.dir;
4839 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4840- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
4841- outdev = rt->dst.dev;
developer8cb3ac72022-07-04 10:55:14 +08004842
developerb7c46752022-07-04 19:51:38 +08004843- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004844+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4845+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4846 return NF_ACCEPT;
4847
developerb7c46752022-07-04 19:51:38 +08004848- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
4849- sizeof(*ip6h)))
developer8cb3ac72022-07-04 10:55:14 +08004850+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4851+ thoff = sizeof(*ip6h) + offset;
4852+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
4853 return NF_ACCEPT;
developer8cb3ac72022-07-04 10:55:14 +08004854
developerb7c46752022-07-04 19:51:38 +08004855- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
developeree39bcf2023-06-16 08:03:30 +08004856- flow_offload_teardown(flow);
4857- return NF_ACCEPT;
4858- }
4859-
developer8cb3ac72022-07-04 10:55:14 +08004860- if (skb_try_make_writable(skb, sizeof(*ip6h)))
4861+ if (skb_try_make_writable(skb, thoff + hdrsize))
4862 return NF_DROP;
4863
4864- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
4865- return NF_DROP;
4866+ flow_offload_refresh(flow_table, flow);
4867+
4868+ nf_flow_encap_pop(skb, tuplehash);
4869
4870- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4871 ip6h = ipv6_hdr(skb);
4872+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
4873+
4874 ip6h->hop_limit--;
4875 skb->tstamp = 0;
4876
4877- if (unlikely(dst_xfrm(&rt->dst))) {
4878+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4879+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4880+
4881+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4882+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4883 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4884 IP6CB(skb)->iif = skb->dev->ifindex;
4885 IP6CB(skb)->flags = IP6SKB_FORWARDED;
4886 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4887 }
4888
4889- skb->dev = outdev;
4890- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4891- skb_dst_set_noref(skb, &rt->dst);
4892- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4893+ switch (tuplehash->tuple.xmit_type) {
4894+ case FLOW_OFFLOAD_XMIT_NEIGH:
4895+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4896+ outdev = rt->dst.dev;
4897+ skb->dev = outdev;
4898+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4899+ skb_dst_set_noref(skb, &rt->dst);
4900+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4901+ ret = NF_STOLEN;
4902+ break;
4903+ case FLOW_OFFLOAD_XMIT_DIRECT:
4904+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
4905+ if (ret == NF_DROP)
4906+ flow_offload_teardown(flow);
4907+ break;
4908+ }
4909
4910- return NF_STOLEN;
4911+ return ret;
4912 }
4913 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
4914diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
4915new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08004916index 000000000..d94c6fb92
developer8cb3ac72022-07-04 10:55:14 +08004917--- /dev/null
4918+++ b/net/netfilter/nf_flow_table_offload.c
developeree39bcf2023-06-16 08:03:30 +08004919@@ -0,0 +1,1199 @@
developer8cb3ac72022-07-04 10:55:14 +08004920+#include <linux/kernel.h>
4921+#include <linux/init.h>
4922+#include <linux/module.h>
4923+#include <linux/netfilter.h>
4924+#include <linux/rhashtable.h>
4925+#include <linux/netdevice.h>
4926+#include <linux/tc_act/tc_csum.h>
4927+#include <net/flow_offload.h>
4928+#include <net/netfilter/nf_flow_table.h>
4929+#include <net/netfilter/nf_tables.h>
4930+#include <net/netfilter/nf_conntrack.h>
4931+#include <net/netfilter/nf_conntrack_acct.h>
4932+#include <net/netfilter/nf_conntrack_core.h>
4933+#include <net/netfilter/nf_conntrack_tuple.h>
4934+
4935+static struct workqueue_struct *nf_flow_offload_add_wq;
4936+static struct workqueue_struct *nf_flow_offload_del_wq;
4937+static struct workqueue_struct *nf_flow_offload_stats_wq;
4938+
4939+struct flow_offload_work {
4940+ struct list_head list;
4941+ enum flow_cls_command cmd;
4942+ int priority;
4943+ struct nf_flowtable *flowtable;
4944+ struct flow_offload *flow;
4945+ struct work_struct work;
4946+};
4947+
4948+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
4949+ (__match)->dissector.offset[__type] = \
4950+ offsetof(struct nf_flow_key, __field)
4951+
4952+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
4953+ struct ip_tunnel_info *tun_info)
4954+{
4955+ struct nf_flow_key *mask = &match->mask;
4956+ struct nf_flow_key *key = &match->key;
4957+ unsigned int enc_keys;
4958+
4959+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
4960+ return;
4961+
4962+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
4963+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
4964+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
4965+ mask->enc_key_id.keyid = 0xffffffff;
4966+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
4967+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
4968+
4969+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
4970+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
4971+ enc_ipv4);
4972+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
4973+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
4974+ if (key->enc_ipv4.src)
4975+ mask->enc_ipv4.src = 0xffffffff;
4976+ if (key->enc_ipv4.dst)
4977+ mask->enc_ipv4.dst = 0xffffffff;
4978+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
4979+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
4980+ } else {
4981+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
4982+ sizeof(struct in6_addr));
4983+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
4984+ sizeof(struct in6_addr));
4985+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
4986+ sizeof(struct in6_addr)))
4987+ memset(&mask->enc_ipv6.src, 0xff,
4988+ sizeof(struct in6_addr));
4989+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
4990+ sizeof(struct in6_addr)))
4991+ memset(&mask->enc_ipv6.dst, 0xff,
4992+ sizeof(struct in6_addr));
4993+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
4994+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
4995+ }
4996+
4997+ match->dissector.used_keys |= enc_keys;
4998+}
4999+
5000+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
5001+ struct flow_dissector_key_vlan *mask,
5002+ u16 vlan_id, __be16 proto)
5003+{
5004+ key->vlan_id = vlan_id;
5005+ mask->vlan_id = VLAN_VID_MASK;
5006+ key->vlan_tpid = proto;
5007+ mask->vlan_tpid = 0xffff;
5008+}
5009+
5010+static int nf_flow_rule_match(struct nf_flow_match *match,
5011+ const struct flow_offload_tuple *tuple,
5012+ struct dst_entry *other_dst)
5013+{
5014+ struct nf_flow_key *mask = &match->mask;
5015+ struct nf_flow_key *key = &match->key;
5016+ struct ip_tunnel_info *tun_info;
5017+ bool vlan_encap = false;
5018+
5019+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
5020+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
5021+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
5022+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
5023+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
5024+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
5025+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
5026+
5027+ if (other_dst && other_dst->lwtstate) {
5028+ tun_info = lwt_tun_info(other_dst->lwtstate);
5029+ nf_flow_rule_lwt_match(match, tun_info);
5030+ }
5031+
5032+ key->meta.ingress_ifindex = tuple->iifidx;
5033+ mask->meta.ingress_ifindex = 0xffffffff;
5034+
5035+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
5036+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
5037+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
5038+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5039+ tuple->encap[0].id,
5040+ tuple->encap[0].proto);
5041+ vlan_encap = true;
5042+ }
5043+
5044+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
5045+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
5046+ if (vlan_encap) {
5047+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
5048+ cvlan);
5049+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
5050+ tuple->encap[1].id,
5051+ tuple->encap[1].proto);
5052+ } else {
5053+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
5054+ vlan);
5055+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5056+ tuple->encap[1].id,
5057+ tuple->encap[1].proto);
5058+ }
5059+ }
5060+
5061+ switch (tuple->l3proto) {
5062+ case AF_INET:
5063+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5064+ key->basic.n_proto = htons(ETH_P_IP);
5065+ key->ipv4.src = tuple->src_v4.s_addr;
5066+ mask->ipv4.src = 0xffffffff;
5067+ key->ipv4.dst = tuple->dst_v4.s_addr;
5068+ mask->ipv4.dst = 0xffffffff;
5069+ break;
5070+ case AF_INET6:
5071+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5072+ key->basic.n_proto = htons(ETH_P_IPV6);
5073+ key->ipv6.src = tuple->src_v6;
5074+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
5075+ key->ipv6.dst = tuple->dst_v6;
5076+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
5077+ break;
5078+ default:
5079+ return -EOPNOTSUPP;
5080+ }
5081+ mask->control.addr_type = 0xffff;
5082+ match->dissector.used_keys |= BIT(key->control.addr_type);
5083+ mask->basic.n_proto = 0xffff;
5084+
5085+ switch (tuple->l4proto) {
5086+ case IPPROTO_TCP:
5087+ key->tcp.flags = 0;
5088+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
5089+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
5090+ break;
5091+ case IPPROTO_UDP:
5092+ break;
5093+ default:
5094+ return -EOPNOTSUPP;
5095+ }
5096+
5097+ key->basic.ip_proto = tuple->l4proto;
5098+ mask->basic.ip_proto = 0xff;
5099+
5100+ key->tp.src = tuple->src_port;
5101+ mask->tp.src = 0xffff;
5102+ key->tp.dst = tuple->dst_port;
5103+ mask->tp.dst = 0xffff;
5104+
5105+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
5106+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
5107+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
5108+ BIT(FLOW_DISSECTOR_KEY_PORTS);
5109+ return 0;
5110+}
5111+
5112+static void flow_offload_mangle(struct flow_action_entry *entry,
5113+ enum flow_action_mangle_base htype, u32 offset,
5114+ const __be32 *value, const __be32 *mask)
5115+{
5116+ entry->id = FLOW_ACTION_MANGLE;
5117+ entry->mangle.htype = htype;
5118+ entry->mangle.offset = offset;
5119+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
5120+ memcpy(&entry->mangle.val, value, sizeof(u32));
5121+}
5122+
5123+static inline struct flow_action_entry *
5124+flow_action_entry_next(struct nf_flow_rule *flow_rule)
5125+{
5126+ int i = flow_rule->rule->action.num_entries++;
5127+
5128+ return &flow_rule->rule->action.entries[i];
5129+}
5130+
5131+static int flow_offload_eth_src(struct net *net,
5132+ const struct flow_offload *flow,
5133+ enum flow_offload_tuple_dir dir,
5134+ struct nf_flow_rule *flow_rule)
5135+{
5136+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5137+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5138+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5139+ struct net_device *dev = NULL;
5140+ const unsigned char *addr;
5141+ u32 mask, val;
5142+ u16 val16;
5143+
5144+ this_tuple = &flow->tuplehash[dir].tuple;
5145+
5146+ switch (this_tuple->xmit_type) {
5147+ case FLOW_OFFLOAD_XMIT_DIRECT:
5148+ addr = this_tuple->out.h_source;
5149+ break;
5150+ case FLOW_OFFLOAD_XMIT_NEIGH:
5151+ other_tuple = &flow->tuplehash[!dir].tuple;
5152+ dev = dev_get_by_index(net, other_tuple->iifidx);
5153+ if (!dev)
5154+ return -ENOENT;
5155+
5156+ addr = dev->dev_addr;
5157+ break;
5158+ default:
5159+ return -EOPNOTSUPP;
5160+ }
5161+
5162+ mask = ~0xffff0000;
5163+ memcpy(&val16, addr, 2);
5164+ val = val16 << 16;
5165+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5166+ &val, &mask);
5167+
5168+ mask = ~0xffffffff;
5169+ memcpy(&val, addr + 2, 4);
5170+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
5171+ &val, &mask);
5172+
developeree39bcf2023-06-16 08:03:30 +08005173+ if (dev)
5174+ dev_put(dev);
developer8cb3ac72022-07-04 10:55:14 +08005175+
5176+ return 0;
5177+}
5178+
5179+static int flow_offload_eth_dst(struct net *net,
5180+ const struct flow_offload *flow,
5181+ enum flow_offload_tuple_dir dir,
5182+ struct nf_flow_rule *flow_rule)
5183+{
5184+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5185+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5186+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5187+ const struct dst_entry *dst_cache;
5188+ unsigned char ha[ETH_ALEN];
5189+ struct neighbour *n;
5190+ const void *daddr;
5191+ u32 mask, val;
5192+ u8 nud_state;
5193+ u16 val16;
5194+
5195+ this_tuple = &flow->tuplehash[dir].tuple;
5196+
5197+ switch (this_tuple->xmit_type) {
5198+ case FLOW_OFFLOAD_XMIT_DIRECT:
5199+ ether_addr_copy(ha, this_tuple->out.h_dest);
5200+ break;
5201+ case FLOW_OFFLOAD_XMIT_NEIGH:
5202+ other_tuple = &flow->tuplehash[!dir].tuple;
5203+ daddr = &other_tuple->src_v4;
5204+ dst_cache = this_tuple->dst_cache;
5205+ n = dst_neigh_lookup(dst_cache, daddr);
5206+ if (!n)
5207+ return -ENOENT;
5208+
5209+ read_lock_bh(&n->lock);
5210+ nud_state = n->nud_state;
5211+ ether_addr_copy(ha, n->ha);
5212+ read_unlock_bh(&n->lock);
5213+ neigh_release(n);
5214+
5215+ if (!(nud_state & NUD_VALID))
5216+ return -ENOENT;
5217+ break;
5218+ default:
5219+ return -EOPNOTSUPP;
5220+ }
5221+
5222+ mask = ~0xffffffff;
5223+ memcpy(&val, ha, 4);
5224+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
5225+ &val, &mask);
5226+
5227+ mask = ~0x0000ffff;
5228+ memcpy(&val16, ha + 4, 2);
5229+ val = val16;
5230+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5231+ &val, &mask);
5232+
5233+ return 0;
5234+}
5235+
5236+static void flow_offload_ipv4_snat(struct net *net,
5237+ const struct flow_offload *flow,
5238+ enum flow_offload_tuple_dir dir,
5239+ struct nf_flow_rule *flow_rule)
5240+{
5241+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5242+ u32 mask = ~htonl(0xffffffff);
5243+ __be32 addr;
5244+ u32 offset;
5245+
5246+ switch (dir) {
5247+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5248+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
5249+ offset = offsetof(struct iphdr, saddr);
5250+ break;
5251+ case FLOW_OFFLOAD_DIR_REPLY:
5252+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5253+ offset = offsetof(struct iphdr, daddr);
5254+ break;
5255+ default:
5256+ return;
5257+ }
5258+
5259+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5260+ &addr, &mask);
5261+}
5262+
5263+static void flow_offload_ipv4_dnat(struct net *net,
5264+ const struct flow_offload *flow,
5265+ enum flow_offload_tuple_dir dir,
5266+ struct nf_flow_rule *flow_rule)
5267+{
5268+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5269+ u32 mask = ~htonl(0xffffffff);
5270+ __be32 addr;
5271+ u32 offset;
5272+
5273+ switch (dir) {
5274+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5275+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
5276+ offset = offsetof(struct iphdr, daddr);
5277+ break;
5278+ case FLOW_OFFLOAD_DIR_REPLY:
5279+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5280+ offset = offsetof(struct iphdr, saddr);
5281+ break;
5282+ default:
5283+ return;
5284+ }
5285+
5286+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5287+ &addr, &mask);
5288+}
5289+
5290+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
5291+ unsigned int offset,
5292+ const __be32 *addr, const __be32 *mask)
5293+{
5294+ struct flow_action_entry *entry;
developeree39bcf2023-06-16 08:03:30 +08005295+ int i, j;
developer8cb3ac72022-07-04 10:55:14 +08005296+
developeree39bcf2023-06-16 08:03:30 +08005297+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
developer8cb3ac72022-07-04 10:55:14 +08005298+ entry = flow_action_entry_next(flow_rule);
5299+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
developeree39bcf2023-06-16 08:03:30 +08005300+ offset + i, &addr[j], mask);
developer8cb3ac72022-07-04 10:55:14 +08005301+ }
5302+}
5303+
5304+static void flow_offload_ipv6_snat(struct net *net,
5305+ const struct flow_offload *flow,
5306+ enum flow_offload_tuple_dir dir,
5307+ struct nf_flow_rule *flow_rule)
5308+{
5309+ u32 mask = ~htonl(0xffffffff);
5310+ const __be32 *addr;
5311+ u32 offset;
5312+
5313+ switch (dir) {
5314+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5315+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
5316+ offset = offsetof(struct ipv6hdr, saddr);
5317+ break;
5318+ case FLOW_OFFLOAD_DIR_REPLY:
5319+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
5320+ offset = offsetof(struct ipv6hdr, daddr);
5321+ break;
5322+ default:
5323+ return;
5324+ }
5325+
5326+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5327+}
5328+
5329+static void flow_offload_ipv6_dnat(struct net *net,
5330+ const struct flow_offload *flow,
5331+ enum flow_offload_tuple_dir dir,
5332+ struct nf_flow_rule *flow_rule)
5333+{
5334+ u32 mask = ~htonl(0xffffffff);
5335+ const __be32 *addr;
5336+ u32 offset;
5337+
5338+ switch (dir) {
5339+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5340+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
5341+ offset = offsetof(struct ipv6hdr, daddr);
5342+ break;
5343+ case FLOW_OFFLOAD_DIR_REPLY:
5344+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
5345+ offset = offsetof(struct ipv6hdr, saddr);
5346+ break;
5347+ default:
5348+ return;
5349+ }
5350+
5351+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5352+}
5353+
5354+static int flow_offload_l4proto(const struct flow_offload *flow)
5355+{
5356+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5357+ u8 type = 0;
5358+
5359+ switch (protonum) {
5360+ case IPPROTO_TCP:
5361+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
5362+ break;
5363+ case IPPROTO_UDP:
5364+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
5365+ break;
5366+ default:
5367+ break;
5368+ }
5369+
5370+ return type;
5371+}
5372+
5373+static void flow_offload_port_snat(struct net *net,
5374+ const struct flow_offload *flow,
5375+ enum flow_offload_tuple_dir dir,
5376+ struct nf_flow_rule *flow_rule)
5377+{
5378+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5379+ u32 mask, port;
5380+ u32 offset;
5381+
5382+ switch (dir) {
5383+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5384+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
5385+ offset = 0; /* offsetof(struct tcphdr, source); */
5386+ port = htonl(port << 16);
5387+ mask = ~htonl(0xffff0000);
5388+ break;
5389+ case FLOW_OFFLOAD_DIR_REPLY:
5390+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
5391+ offset = 0; /* offsetof(struct tcphdr, dest); */
5392+ port = htonl(port);
5393+ mask = ~htonl(0xffff);
5394+ break;
5395+ default:
5396+ return;
5397+ }
5398+
5399+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5400+ &port, &mask);
5401+}
5402+
5403+static void flow_offload_port_dnat(struct net *net,
5404+ const struct flow_offload *flow,
5405+ enum flow_offload_tuple_dir dir,
5406+ struct nf_flow_rule *flow_rule)
5407+{
5408+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5409+ u32 mask, port;
5410+ u32 offset;
5411+
5412+ switch (dir) {
5413+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5414+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
5415+ offset = 0; /* offsetof(struct tcphdr, dest); */
5416+ port = htonl(port);
5417+ mask = ~htonl(0xffff);
5418+ break;
5419+ case FLOW_OFFLOAD_DIR_REPLY:
5420+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
5421+ offset = 0; /* offsetof(struct tcphdr, source); */
5422+ port = htonl(port << 16);
5423+ mask = ~htonl(0xffff0000);
5424+ break;
5425+ default:
5426+ return;
5427+ }
5428+
5429+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5430+ &port, &mask);
5431+}
5432+
5433+static void flow_offload_ipv4_checksum(struct net *net,
5434+ const struct flow_offload *flow,
5435+ struct nf_flow_rule *flow_rule)
5436+{
5437+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5438+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5439+
5440+ entry->id = FLOW_ACTION_CSUM;
5441+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
5442+
5443+ switch (protonum) {
5444+ case IPPROTO_TCP:
5445+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
5446+ break;
5447+ case IPPROTO_UDP:
5448+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
5449+ break;
5450+ }
5451+}
5452+
5453+static void flow_offload_redirect(struct net *net,
5454+ const struct flow_offload *flow,
5455+ enum flow_offload_tuple_dir dir,
5456+ struct nf_flow_rule *flow_rule)
5457+{
5458+ const struct flow_offload_tuple *this_tuple, *other_tuple;
5459+ struct flow_action_entry *entry;
5460+ struct net_device *dev;
5461+ int ifindex;
5462+
5463+ this_tuple = &flow->tuplehash[dir].tuple;
5464+ switch (this_tuple->xmit_type) {
5465+ case FLOW_OFFLOAD_XMIT_DIRECT:
5466+ this_tuple = &flow->tuplehash[dir].tuple;
5467+ ifindex = this_tuple->out.hw_ifidx;
5468+ break;
5469+ case FLOW_OFFLOAD_XMIT_NEIGH:
5470+ other_tuple = &flow->tuplehash[!dir].tuple;
5471+ ifindex = other_tuple->iifidx;
5472+ break;
5473+ default:
5474+ return;
5475+ }
5476+
5477+ dev = dev_get_by_index(net, ifindex);
5478+ if (!dev)
5479+ return;
5480+
5481+ entry = flow_action_entry_next(flow_rule);
5482+ entry->id = FLOW_ACTION_REDIRECT;
5483+ entry->dev = dev;
5484+}
5485+
5486+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
5487+ enum flow_offload_tuple_dir dir,
5488+ struct nf_flow_rule *flow_rule)
5489+{
5490+ const struct flow_offload_tuple *this_tuple;
5491+ struct flow_action_entry *entry;
5492+ struct dst_entry *dst;
5493+
5494+ this_tuple = &flow->tuplehash[dir].tuple;
5495+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5496+ return;
5497+
5498+ dst = this_tuple->dst_cache;
5499+ if (dst && dst->lwtstate) {
5500+ struct ip_tunnel_info *tun_info;
5501+
5502+ tun_info = lwt_tun_info(dst->lwtstate);
5503+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5504+ entry = flow_action_entry_next(flow_rule);
5505+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
5506+ entry->tunnel = tun_info;
5507+ }
5508+ }
5509+}
5510+
5511+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
5512+ enum flow_offload_tuple_dir dir,
5513+ struct nf_flow_rule *flow_rule)
5514+{
5515+ const struct flow_offload_tuple *other_tuple;
5516+ struct flow_action_entry *entry;
5517+ struct dst_entry *dst;
5518+
5519+ other_tuple = &flow->tuplehash[!dir].tuple;
5520+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5521+ return;
5522+
5523+ dst = other_tuple->dst_cache;
5524+ if (dst && dst->lwtstate) {
5525+ struct ip_tunnel_info *tun_info;
5526+
5527+ tun_info = lwt_tun_info(dst->lwtstate);
5528+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5529+ entry = flow_action_entry_next(flow_rule);
5530+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
5531+ }
5532+ }
5533+}
5534+
5535+static int
5536+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
5537+ enum flow_offload_tuple_dir dir,
5538+ struct nf_flow_rule *flow_rule)
5539+{
5540+ const struct flow_offload_tuple *other_tuple;
5541+ const struct flow_offload_tuple *tuple;
5542+ int i;
5543+
5544+ flow_offload_decap_tunnel(flow, dir, flow_rule);
5545+ flow_offload_encap_tunnel(flow, dir, flow_rule);
5546+
5547+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
5548+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
5549+ return -1;
5550+
5551+ tuple = &flow->tuplehash[dir].tuple;
5552+
5553+ for (i = 0; i < tuple->encap_num; i++) {
5554+ struct flow_action_entry *entry;
5555+
5556+ if (tuple->in_vlan_ingress & BIT(i))
5557+ continue;
5558+
5559+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
5560+ entry = flow_action_entry_next(flow_rule);
5561+ entry->id = FLOW_ACTION_VLAN_POP;
5562+ }
5563+ }
5564+
5565+ other_tuple = &flow->tuplehash[!dir].tuple;
5566+
5567+ for (i = 0; i < other_tuple->encap_num; i++) {
5568+ struct flow_action_entry *entry;
5569+
5570+ if (other_tuple->in_vlan_ingress & BIT(i))
5571+ continue;
5572+
5573+ entry = flow_action_entry_next(flow_rule);
5574+
5575+ switch (other_tuple->encap[i].proto) {
5576+ case htons(ETH_P_PPP_SES):
5577+ entry->id = FLOW_ACTION_PPPOE_PUSH;
5578+ entry->pppoe.sid = other_tuple->encap[i].id;
5579+ break;
5580+ case htons(ETH_P_8021Q):
5581+ entry->id = FLOW_ACTION_VLAN_PUSH;
5582+ entry->vlan.vid = other_tuple->encap[i].id;
5583+ entry->vlan.proto = other_tuple->encap[i].proto;
5584+ break;
5585+ }
5586+ }
5587+
5588+ return 0;
5589+}
5590+
5591+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
5592+ enum flow_offload_tuple_dir dir,
5593+ struct nf_flow_rule *flow_rule)
5594+{
5595+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5596+ return -1;
5597+
5598+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5599+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
5600+ flow_offload_port_snat(net, flow, dir, flow_rule);
5601+ }
5602+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5603+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
5604+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5605+ }
5606+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
5607+ test_bit(NF_FLOW_DNAT, &flow->flags))
5608+ flow_offload_ipv4_checksum(net, flow, flow_rule);
5609+
5610+ flow_offload_redirect(net, flow, dir, flow_rule);
5611+
5612+ return 0;
5613+}
5614+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
5615+
5616+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
5617+ enum flow_offload_tuple_dir dir,
5618+ struct nf_flow_rule *flow_rule)
5619+{
5620+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5621+ return -1;
5622+
5623+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5624+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
5625+ flow_offload_port_snat(net, flow, dir, flow_rule);
5626+ }
5627+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5628+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
5629+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5630+ }
5631+
5632+ flow_offload_redirect(net, flow, dir, flow_rule);
5633+
5634+ return 0;
5635+}
5636+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
5637+
5638+#define NF_FLOW_RULE_ACTION_MAX 16
5639+
5640+static struct nf_flow_rule *
5641+nf_flow_offload_rule_alloc(struct net *net,
5642+ const struct flow_offload_work *offload,
5643+ enum flow_offload_tuple_dir dir)
5644+{
5645+ const struct nf_flowtable *flowtable = offload->flowtable;
5646+ const struct flow_offload_tuple *tuple, *other_tuple;
5647+ const struct flow_offload *flow = offload->flow;
5648+ struct dst_entry *other_dst = NULL;
5649+ struct nf_flow_rule *flow_rule;
5650+ int err = -ENOMEM;
5651+
5652+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
5653+ if (!flow_rule)
5654+ goto err_flow;
5655+
5656+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
5657+ if (!flow_rule->rule)
5658+ goto err_flow_rule;
5659+
5660+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
5661+ flow_rule->rule->match.mask = &flow_rule->match.mask;
5662+ flow_rule->rule->match.key = &flow_rule->match.key;
5663+
5664+ tuple = &flow->tuplehash[dir].tuple;
5665+ other_tuple = &flow->tuplehash[!dir].tuple;
5666+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
5667+ other_dst = other_tuple->dst_cache;
5668+
5669+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
5670+ if (err < 0)
5671+ goto err_flow_match;
5672+
5673+ flow_rule->rule->action.num_entries = 0;
5674+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
5675+ goto err_flow_match;
5676+
5677+ return flow_rule;
5678+
5679+err_flow_match:
5680+ kfree(flow_rule->rule);
5681+err_flow_rule:
5682+ kfree(flow_rule);
5683+err_flow:
5684+ return NULL;
5685+}
5686+
5687+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
5688+{
5689+ struct flow_action_entry *entry;
5690+ int i;
5691+
5692+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
5693+ entry = &flow_rule->rule->action.entries[i];
5694+ if (entry->id != FLOW_ACTION_REDIRECT)
5695+ continue;
5696+
5697+ dev_put(entry->dev);
5698+ }
5699+ kfree(flow_rule->rule);
5700+ kfree(flow_rule);
5701+}
5702+
5703+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
5704+{
5705+ int i;
5706+
5707+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
5708+ __nf_flow_offload_destroy(flow_rule[i]);
5709+}
5710+
5711+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
5712+ struct nf_flow_rule *flow_rule[])
5713+{
5714+ struct net *net = read_pnet(&offload->flowtable->net);
5715+
5716+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
5717+ FLOW_OFFLOAD_DIR_ORIGINAL);
5718+ if (!flow_rule[0])
5719+ return -ENOMEM;
5720+
5721+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
5722+ FLOW_OFFLOAD_DIR_REPLY);
5723+ if (!flow_rule[1]) {
5724+ __nf_flow_offload_destroy(flow_rule[0]);
5725+ return -ENOMEM;
5726+ }
5727+
5728+ return 0;
5729+}
5730+
5731+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
5732+ __be16 proto, int priority,
5733+ enum flow_cls_command cmd,
5734+ const struct flow_offload_tuple *tuple,
5735+ struct netlink_ext_ack *extack)
5736+{
5737+ cls_flow->common.protocol = proto;
5738+ cls_flow->common.prio = priority;
5739+ cls_flow->common.extack = extack;
5740+ cls_flow->command = cmd;
5741+ cls_flow->cookie = (unsigned long)tuple;
5742+}
5743+
5744+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
5745+ struct flow_offload *flow,
5746+ struct nf_flow_rule *flow_rule,
5747+ enum flow_offload_tuple_dir dir,
5748+ int priority, int cmd,
5749+ struct flow_stats *stats,
5750+ struct list_head *block_cb_list)
5751+{
5752+ struct flow_cls_offload cls_flow = {};
5753+ struct flow_block_cb *block_cb;
5754+ struct netlink_ext_ack extack;
5755+ __be16 proto = ETH_P_ALL;
5756+ int err, i = 0;
5757+
5758+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
5759+ &flow->tuplehash[dir].tuple, &extack);
5760+ if (cmd == FLOW_CLS_REPLACE)
5761+ cls_flow.rule = flow_rule->rule;
5762+
developer0cc0d732023-06-07 13:52:41 +08005763+ down_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005764+ list_for_each_entry(block_cb, block_cb_list, list) {
5765+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
5766+ block_cb->cb_priv);
5767+ if (err < 0)
5768+ continue;
5769+
5770+ i++;
5771+ }
developer0cc0d732023-06-07 13:52:41 +08005772+ up_read(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005773+
5774+ if (cmd == FLOW_CLS_STATS)
5775+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
5776+
5777+ return i;
5778+}
5779+
5780+static int flow_offload_tuple_add(struct flow_offload_work *offload,
5781+ struct nf_flow_rule *flow_rule,
5782+ enum flow_offload_tuple_dir dir)
5783+{
5784+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
5785+ flow_rule, dir, offload->priority,
5786+ FLOW_CLS_REPLACE, NULL,
5787+ &offload->flowtable->flow_block.cb_list);
5788+}
5789+
5790+static void flow_offload_tuple_del(struct flow_offload_work *offload,
5791+ enum flow_offload_tuple_dir dir)
5792+{
5793+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5794+ offload->priority, FLOW_CLS_DESTROY, NULL,
5795+ &offload->flowtable->flow_block.cb_list);
5796+}
5797+
5798+static int flow_offload_rule_add(struct flow_offload_work *offload,
5799+ struct nf_flow_rule *flow_rule[])
5800+{
5801+ int ok_count = 0;
5802+
5803+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
5804+ FLOW_OFFLOAD_DIR_ORIGINAL);
5805+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
5806+ FLOW_OFFLOAD_DIR_REPLY);
5807+ if (ok_count == 0)
5808+ return -ENOENT;
5809+
5810+ return 0;
5811+}
5812+
5813+static void flow_offload_work_add(struct flow_offload_work *offload)
5814+{
5815+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
5816+ int err;
5817+
5818+ err = nf_flow_offload_alloc(offload, flow_rule);
5819+ if (err < 0)
5820+ return;
5821+
5822+ err = flow_offload_rule_add(offload, flow_rule);
5823+ if (err < 0)
5824+ goto out;
5825+
5826+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5827+
5828+out:
5829+ nf_flow_offload_destroy(flow_rule);
5830+}
5831+
5832+static void flow_offload_work_del(struct flow_offload_work *offload)
5833+{
5834+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5835+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
5836+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
5837+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
5838+}
5839+
5840+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
5841+ enum flow_offload_tuple_dir dir,
5842+ struct flow_stats *stats)
5843+{
5844+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5845+ offload->priority, FLOW_CLS_STATS, stats,
5846+ &offload->flowtable->flow_block.cb_list);
5847+}
5848+
5849+static void flow_offload_work_stats(struct flow_offload_work *offload)
5850+{
5851+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
5852+ u64 lastused;
5853+
5854+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
5855+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
5856+
5857+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
5858+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
5859+ lastused + flow_offload_get_timeout(offload->flow));
5860+
5861+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
5862+ if (stats[0].pkts)
5863+ nf_ct_acct_add(offload->flow->ct,
5864+ FLOW_OFFLOAD_DIR_ORIGINAL,
5865+ stats[0].pkts, stats[0].bytes);
5866+ if (stats[1].pkts)
5867+ nf_ct_acct_add(offload->flow->ct,
5868+ FLOW_OFFLOAD_DIR_REPLY,
5869+ stats[1].pkts, stats[1].bytes);
5870+ }
5871+}
5872+
5873+static void flow_offload_work_handler(struct work_struct *work)
5874+{
5875+ struct flow_offload_work *offload;
5876+
5877+ offload = container_of(work, struct flow_offload_work, work);
5878+ switch (offload->cmd) {
5879+ case FLOW_CLS_REPLACE:
5880+ flow_offload_work_add(offload);
5881+ break;
5882+ case FLOW_CLS_DESTROY:
5883+ flow_offload_work_del(offload);
5884+ break;
5885+ case FLOW_CLS_STATS:
5886+ flow_offload_work_stats(offload);
5887+ break;
5888+ default:
5889+ WARN_ON_ONCE(1);
5890+ }
5891+
5892+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
5893+ kfree(offload);
5894+}
5895+
5896+static void flow_offload_queue_work(struct flow_offload_work *offload)
5897+{
5898+ if (offload->cmd == FLOW_CLS_REPLACE)
5899+ queue_work(nf_flow_offload_add_wq, &offload->work);
5900+ else if (offload->cmd == FLOW_CLS_DESTROY)
5901+ queue_work(nf_flow_offload_del_wq, &offload->work);
5902+ else
5903+ queue_work(nf_flow_offload_stats_wq, &offload->work);
5904+}
5905+
5906+static struct flow_offload_work *
5907+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
5908+ struct flow_offload *flow, unsigned int cmd)
5909+{
5910+ struct flow_offload_work *offload;
5911+
5912+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
5913+ return NULL;
5914+
5915+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
5916+ if (!offload) {
5917+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
5918+ return NULL;
5919+ }
5920+
5921+ offload->cmd = cmd;
5922+ offload->flow = flow;
5923+ offload->priority = flowtable->priority;
5924+ offload->flowtable = flowtable;
5925+ INIT_WORK(&offload->work, flow_offload_work_handler);
5926+
5927+ return offload;
5928+}
5929+
5930+
5931+void nf_flow_offload_add(struct nf_flowtable *flowtable,
5932+ struct flow_offload *flow)
5933+{
5934+ struct flow_offload_work *offload;
5935+
5936+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
5937+ if (!offload)
5938+ return;
5939+
5940+ flow_offload_queue_work(offload);
5941+}
5942+
5943+void nf_flow_offload_del(struct nf_flowtable *flowtable,
5944+ struct flow_offload *flow)
5945+{
5946+ struct flow_offload_work *offload;
5947+
5948+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
5949+ if (!offload)
5950+ return;
5951+
5952+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
5953+ flow_offload_queue_work(offload);
5954+}
5955+
5956+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developeree39bcf2023-06-16 08:03:30 +08005957+ struct flow_offload *flow, bool force)
developer8cb3ac72022-07-04 10:55:14 +08005958+{
5959+ struct flow_offload_work *offload;
5960+ __s32 delta;
5961+
developeree39bcf2023-06-16 08:03:30 +08005962+ if (!force) {
5963+ delta = nf_flow_timeout_delta(flow->timeout);
5964+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
5965+ return;
5966+ }
developer8cb3ac72022-07-04 10:55:14 +08005967+
5968+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
5969+ if (!offload)
5970+ return;
5971+
5972+ flow_offload_queue_work(offload);
5973+}
5974+
5975+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
5976+{
5977+ if (nf_flowtable_hw_offload(flowtable)) {
5978+ flush_workqueue(nf_flow_offload_add_wq);
5979+ flush_workqueue(nf_flow_offload_del_wq);
5980+ flush_workqueue(nf_flow_offload_stats_wq);
5981+ }
5982+}
5983+
5984+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
5985+ struct flow_block_offload *bo,
5986+ enum flow_block_command cmd)
5987+{
5988+ struct flow_block_cb *block_cb, *next;
5989+ int err = 0;
5990+
developer0cc0d732023-06-07 13:52:41 +08005991+ down_write(&flowtable->flow_block_lock);
developeree39bcf2023-06-16 08:03:30 +08005992+
developer8cb3ac72022-07-04 10:55:14 +08005993+ switch (cmd) {
5994+ case FLOW_BLOCK_BIND:
5995+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
5996+ break;
5997+ case FLOW_BLOCK_UNBIND:
5998+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
5999+ list_del(&block_cb->list);
6000+ flow_block_cb_free(block_cb);
6001+ }
6002+ break;
6003+ default:
6004+ WARN_ON_ONCE(1);
6005+ err = -EOPNOTSUPP;
6006+ }
developeree39bcf2023-06-16 08:03:30 +08006007+
developer0cc0d732023-06-07 13:52:41 +08006008+ up_write(&flowtable->flow_block_lock);
developera54478c2022-10-01 16:41:46 +08006009+
developer8cb3ac72022-07-04 10:55:14 +08006010+ return err;
6011+}
6012+
6013+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
6014+ struct net *net,
6015+ enum flow_block_command cmd,
6016+ struct nf_flowtable *flowtable,
6017+ struct netlink_ext_ack *extack)
6018+{
6019+ memset(bo, 0, sizeof(*bo));
6020+ bo->net = net;
6021+ bo->block = &flowtable->flow_block;
6022+ bo->command = cmd;
6023+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
6024+ bo->extack = extack;
6025+ INIT_LIST_HEAD(&bo->cb_list);
6026+}
6027+
6028+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
6029+ struct nf_flowtable *flowtable,
6030+ struct net_device *dev,
6031+ enum flow_block_command cmd,
6032+ struct netlink_ext_ack *extack)
6033+{
6034+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6035+ extack);
6036+ flow_indr_block_call(dev, bo, cmd);
6037+
6038+ if (list_empty(&bo->cb_list))
6039+ return -EOPNOTSUPP;
6040+
6041+ return 0;
6042+}
6043+
6044+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
6045+ struct nf_flowtable *flowtable,
6046+ struct net_device *dev,
6047+ enum flow_block_command cmd,
6048+ struct netlink_ext_ack *extack)
6049+{
6050+ int err;
6051+
6052+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6053+ extack);
developer0cc0d732023-06-07 13:52:41 +08006054+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006055+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
developer0cc0d732023-06-07 13:52:41 +08006056+ up_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08006057+ if (err < 0)
6058+ return err;
6059+
6060+ return 0;
6061+}
6062+
6063+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
6064+ struct net_device *dev,
6065+ enum flow_block_command cmd)
6066+{
6067+ struct netlink_ext_ack extack = {};
6068+ struct flow_block_offload bo;
6069+ int err;
6070+
6071+ if (!nf_flowtable_hw_offload(flowtable))
6072+ return 0;
6073+
6074+ if (dev->netdev_ops->ndo_setup_tc)
6075+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
6076+ &extack);
6077+ else
6078+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
6079+ &extack);
6080+ if (err < 0)
6081+ return err;
6082+
6083+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
6084+}
6085+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
6086+
6087+int nf_flow_table_offload_init(void)
6088+{
6089+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
6090+ WQ_UNBOUND | WQ_SYSFS, 0);
6091+ if (!nf_flow_offload_add_wq)
6092+ return -ENOMEM;
6093+
6094+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
6095+ WQ_UNBOUND | WQ_SYSFS, 0);
6096+ if (!nf_flow_offload_del_wq)
6097+ goto err_del_wq;
6098+
6099+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
6100+ WQ_UNBOUND | WQ_SYSFS, 0);
6101+ if (!nf_flow_offload_stats_wq)
6102+ goto err_stats_wq;
6103+
6104+ return 0;
6105+
6106+err_stats_wq:
6107+ destroy_workqueue(nf_flow_offload_del_wq);
6108+err_del_wq:
6109+ destroy_workqueue(nf_flow_offload_add_wq);
6110+ return -ENOMEM;
6111+}
6112+
6113+void nf_flow_table_offload_exit(void)
6114+{
6115+ destroy_workqueue(nf_flow_offload_add_wq);
6116+ destroy_workqueue(nf_flow_offload_del_wq);
6117+ destroy_workqueue(nf_flow_offload_stats_wq);
6118+}
6119diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
6120new file mode 100644
developeree39bcf2023-06-16 08:03:30 +08006121index 0000000..12f067c
developer8cb3ac72022-07-04 10:55:14 +08006122--- /dev/null
6123+++ b/net/netfilter/xt_FLOWOFFLOAD.c
developeree39bcf2023-06-16 08:03:30 +08006124@@ -0,0 +1,794 @@
developer8cb3ac72022-07-04 10:55:14 +08006125+/*
6126+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
6127+ *
6128+ * This program is free software; you can redistribute it and/or modify
6129+ * it under the terms of the GNU General Public License version 2 as
6130+ * published by the Free Software Foundation.
6131+ */
6132+#include <linux/module.h>
6133+#include <linux/init.h>
6134+#include <linux/netfilter.h>
6135+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
6136+#include <linux/if_vlan.h>
6137+#include <net/ip.h>
6138+#include <net/netfilter/nf_conntrack.h>
6139+#include <net/netfilter/nf_conntrack_extend.h>
6140+#include <net/netfilter/nf_conntrack_helper.h>
6141+#include <net/netfilter/nf_flow_table.h>
6142+
6143+struct xt_flowoffload_hook {
6144+ struct hlist_node list;
6145+ struct nf_hook_ops ops;
6146+ struct net *net;
6147+ bool registered;
6148+ bool used;
6149+};
6150+
6151+struct xt_flowoffload_table {
6152+ struct nf_flowtable ft;
6153+ struct hlist_head hooks;
6154+ struct delayed_work work;
6155+};
6156+
6157+struct nf_forward_info {
6158+ const struct net_device *indev;
6159+ const struct net_device *outdev;
6160+ const struct net_device *hw_outdev;
6161+ struct id {
6162+ __u16 id;
6163+ __be16 proto;
6164+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
6165+ u8 num_encaps;
6166+ u8 ingress_vlans;
6167+ u8 h_source[ETH_ALEN];
6168+ u8 h_dest[ETH_ALEN];
6169+ enum flow_offload_xmit_type xmit_type;
6170+};
6171+
6172+static DEFINE_SPINLOCK(hooks_lock);
6173+
6174+struct xt_flowoffload_table flowtable[2];
6175+
6176+static unsigned int
6177+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
6178+ const struct nf_hook_state *state)
6179+{
6180+ struct vlan_ethhdr *veth;
6181+ __be16 proto;
6182+
6183+ switch (skb->protocol) {
6184+ case htons(ETH_P_8021Q):
6185+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
6186+ proto = veth->h_vlan_encapsulated_proto;
6187+ break;
6188+ case htons(ETH_P_PPP_SES):
6189+ proto = nf_flow_pppoe_proto(skb);
6190+ break;
6191+ default:
6192+ proto = skb->protocol;
6193+ break;
6194+ }
6195+
6196+ switch (proto) {
6197+ case htons(ETH_P_IP):
6198+ return nf_flow_offload_ip_hook(priv, skb, state);
6199+ case htons(ETH_P_IPV6):
6200+ return nf_flow_offload_ipv6_hook(priv, skb, state);
6201+ }
6202+
6203+ return NF_ACCEPT;
6204+}
6205+
6206+static int
6207+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
6208+ struct net_device *dev)
6209+{
6210+ struct xt_flowoffload_hook *hook;
6211+ struct nf_hook_ops *ops;
6212+
6213+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
6214+ if (!hook)
6215+ return -ENOMEM;
6216+
6217+ ops = &hook->ops;
6218+ ops->pf = NFPROTO_NETDEV;
6219+ ops->hooknum = NF_NETDEV_INGRESS;
6220+ ops->priority = 10;
6221+ ops->priv = &table->ft;
6222+ ops->hook = xt_flowoffload_net_hook;
6223+ ops->dev = dev;
6224+
6225+ hlist_add_head(&hook->list, &table->hooks);
6226+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
6227+
6228+ return 0;
6229+}
6230+
6231+static struct xt_flowoffload_hook *
6232+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
6233+ struct net_device *dev)
6234+{
6235+ struct xt_flowoffload_hook *hook;
6236+
6237+ hlist_for_each_entry(hook, &table->hooks, list) {
6238+ if (hook->ops.dev == dev)
6239+ return hook;
6240+ }
6241+
6242+ return NULL;
6243+}
6244+
6245+static void
6246+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
6247+ struct net_device *dev)
6248+{
6249+ struct xt_flowoffload_hook *hook;
6250+
6251+ if (!dev)
6252+ return;
6253+
6254+ spin_lock_bh(&hooks_lock);
6255+ hook = flow_offload_lookup_hook(table, dev);
6256+ if (hook)
6257+ hook->used = true;
6258+ else
6259+ xt_flowoffload_create_hook(table, dev);
6260+ spin_unlock_bh(&hooks_lock);
6261+}
6262+
6263+static void
6264+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
6265+{
6266+ struct xt_flowoffload_hook *hook;
6267+
6268+restart:
6269+ hlist_for_each_entry(hook, &table->hooks, list) {
6270+ if (hook->registered)
6271+ continue;
6272+
6273+ hook->registered = true;
6274+ hook->net = dev_net(hook->ops.dev);
6275+ spin_unlock_bh(&hooks_lock);
6276+ nf_register_net_hook(hook->net, &hook->ops);
6277+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6278+ table->ft.type->setup(&table->ft, hook->ops.dev,
6279+ FLOW_BLOCK_BIND);
6280+ spin_lock_bh(&hooks_lock);
6281+ goto restart;
6282+ }
6283+
6284+}
6285+
6286+static bool
6287+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
6288+{
6289+ struct xt_flowoffload_hook *hook;
6290+ bool active = false;
6291+
6292+restart:
6293+ spin_lock_bh(&hooks_lock);
6294+ hlist_for_each_entry(hook, &table->hooks, list) {
6295+ if (hook->used || !hook->registered) {
6296+ active = true;
6297+ continue;
6298+ }
6299+
6300+ hlist_del(&hook->list);
6301+ spin_unlock_bh(&hooks_lock);
6302+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6303+ table->ft.type->setup(&table->ft, hook->ops.dev,
6304+ FLOW_BLOCK_UNBIND);
6305+ nf_unregister_net_hook(hook->net, &hook->ops);
6306+ kfree(hook);
6307+ goto restart;
6308+ }
6309+ spin_unlock_bh(&hooks_lock);
6310+
6311+ return active;
6312+}
6313+
6314+static void
6315+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
6316+{
6317+ struct xt_flowoffload_table *table = data;
6318+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
6319+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
6320+ struct xt_flowoffload_hook *hook;
6321+
6322+ spin_lock_bh(&hooks_lock);
6323+ hlist_for_each_entry(hook, &table->hooks, list) {
6324+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
6325+ hook->ops.dev->ifindex != tuple1->iifidx)
6326+ continue;
6327+
6328+ hook->used = true;
6329+ }
6330+ spin_unlock_bh(&hooks_lock);
6331+}
6332+
6333+static void
6334+xt_flowoffload_hook_work(struct work_struct *work)
6335+{
6336+ struct xt_flowoffload_table *table;
6337+ struct xt_flowoffload_hook *hook;
6338+ int err;
6339+
6340+ table = container_of(work, struct xt_flowoffload_table, work.work);
6341+
6342+ spin_lock_bh(&hooks_lock);
6343+ xt_flowoffload_register_hooks(table);
6344+ hlist_for_each_entry(hook, &table->hooks, list)
6345+ hook->used = false;
6346+ spin_unlock_bh(&hooks_lock);
6347+
6348+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
6349+ table);
6350+ if (err && err != -EAGAIN)
6351+ goto out;
6352+
6353+ if (!xt_flowoffload_cleanup_hooks(table))
6354+ return;
6355+
6356+out:
6357+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
6358+}
6359+
6360+static bool
6361+xt_flowoffload_skip(struct sk_buff *skb, int family)
6362+{
6363+ if (skb_sec_path(skb))
6364+ return true;
6365+
6366+ if (family == NFPROTO_IPV4) {
6367+ const struct ip_options *opt = &(IPCB(skb)->opt);
6368+
6369+ if (unlikely(opt->optlen))
6370+ return true;
6371+ }
6372+
6373+ return false;
6374+}
6375+
6376+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
6377+{
6378+ if (dst_xfrm(dst))
6379+ return FLOW_OFFLOAD_XMIT_XFRM;
6380+
6381+ return FLOW_OFFLOAD_XMIT_NEIGH;
6382+}
6383+
6384+static void nf_default_forward_path(struct nf_flow_route *route,
6385+ struct dst_entry *dst_cache,
6386+ enum ip_conntrack_dir dir,
6387+ struct net_device **dev)
6388+{
6389+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
6390+ route->tuple[dir].dst = dst_cache;
6391+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
6392+}
6393+
6394+static bool nf_is_valid_ether_device(const struct net_device *dev)
6395+{
6396+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
6397+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
6398+ return false;
6399+
6400+ return true;
6401+}
6402+
6403+static void nf_dev_path_info(const struct net_device_path_stack *stack,
6404+ struct nf_forward_info *info,
6405+ unsigned char *ha)
6406+{
6407+ const struct net_device_path *path;
6408+ int i;
6409+
6410+ memcpy(info->h_dest, ha, ETH_ALEN);
6411+
6412+ for (i = 0; i < stack->num_paths; i++) {
6413+ path = &stack->path[i];
6414+
6415+ info->indev = path->dev;
6416+
6417+ switch (path->type) {
6418+ case DEV_PATH_ETHERNET:
6419+ case DEV_PATH_DSA:
6420+ case DEV_PATH_VLAN:
6421+ case DEV_PATH_PPPOE:
6422+ if (is_zero_ether_addr(info->h_source))
6423+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6424+
6425+ if (path->type == DEV_PATH_ETHERNET)
6426+ break;
6427+ if (path->type == DEV_PATH_DSA) {
6428+ i = stack->num_paths;
6429+ break;
6430+ }
6431+
6432+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
6433+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
6434+ info->indev = NULL;
6435+ break;
6436+ }
6437+ if (!info->outdev)
6438+ info->outdev = path->dev;
6439+ info->encap[info->num_encaps].id = path->encap.id;
6440+ info->encap[info->num_encaps].proto = path->encap.proto;
6441+ info->num_encaps++;
6442+ if (path->type == DEV_PATH_PPPOE)
6443+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
6444+ break;
6445+ case DEV_PATH_BRIDGE:
6446+ if (is_zero_ether_addr(info->h_source))
6447+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6448+
6449+ switch (path->bridge.vlan_mode) {
6450+ case DEV_PATH_BR_VLAN_UNTAG_HW:
6451+ info->ingress_vlans |= BIT(info->num_encaps - 1);
6452+ break;
6453+ case DEV_PATH_BR_VLAN_TAG:
6454+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
6455+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
6456+ info->num_encaps++;
6457+ break;
6458+ case DEV_PATH_BR_VLAN_UNTAG:
6459+ info->num_encaps--;
6460+ break;
6461+ case DEV_PATH_BR_VLAN_KEEP:
6462+ break;
6463+ }
6464+ break;
6465+ default:
6466+ break;
6467+ }
6468+ }
6469+ if (!info->outdev)
6470+ info->outdev = info->indev;
6471+
6472+ info->hw_outdev = info->indev;
6473+
6474+ if (nf_is_valid_ether_device(info->indev))
6475+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
6476+}
6477+
6478+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
6479+ const struct dst_entry *dst_cache,
6480+ const struct nf_conn *ct,
6481+ enum ip_conntrack_dir dir, u8 *ha,
6482+ struct net_device_path_stack *stack)
6483+{
6484+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
6485+ struct net_device *dev = dst_cache->dev;
6486+ struct neighbour *n;
6487+ u8 nud_state;
6488+
6489+ if (!nf_is_valid_ether_device(dev))
6490+ goto out;
6491+
developer9fdc0e82023-05-12 14:21:17 +08006492+ if (ct->status & IPS_NAT_MASK) {
6493+ n = dst_neigh_lookup(dst_cache, daddr);
6494+ if (!n)
6495+ return -1;
developer8cb3ac72022-07-04 10:55:14 +08006496+
developer9fdc0e82023-05-12 14:21:17 +08006497+ read_lock_bh(&n->lock);
6498+ nud_state = n->nud_state;
6499+ ether_addr_copy(ha, n->ha);
6500+ read_unlock_bh(&n->lock);
6501+ neigh_release(n);
developer8cb3ac72022-07-04 10:55:14 +08006502+
developer9fdc0e82023-05-12 14:21:17 +08006503+ if (!(nud_state & NUD_VALID))
6504+ return -1;
6505+ }
developer64db8532023-04-28 13:56:00 +08006506+
developer8cb3ac72022-07-04 10:55:14 +08006507+out:
6508+ return dev_fill_forward_path(dev, ha, stack);
6509+}
6510+
developer9fdc0e82023-05-12 14:21:17 +08006511+static int nf_dev_forward_path(struct sk_buff *skb,
6512+ struct nf_flow_route *route,
developer8cb3ac72022-07-04 10:55:14 +08006513+ const struct nf_conn *ct,
6514+ enum ip_conntrack_dir dir,
6515+ struct net_device **devs)
6516+{
6517+ const struct dst_entry *dst = route->tuple[dir].dst;
developer9fdc0e82023-05-12 14:21:17 +08006518+ struct ethhdr *eth;
6519+ enum ip_conntrack_dir skb_dir;
developer8cb3ac72022-07-04 10:55:14 +08006520+ struct net_device_path_stack stack;
6521+ struct nf_forward_info info = {};
6522+ unsigned char ha[ETH_ALEN];
6523+ int i;
6524+
developer9fdc0e82023-05-12 14:21:17 +08006525+ if (!(ct->status & IPS_NAT_MASK) && skb_mac_header_was_set(skb)) {
6526+ eth = eth_hdr(skb);
6527+ skb_dir = CTINFO2DIR(skb_get_nfct(skb) & NFCT_INFOMASK);
6528+
6529+ if (skb_dir != dir) {
6530+ memcpy(ha, eth->h_source, ETH_ALEN);
6531+ memcpy(info.h_source, eth->h_dest, ETH_ALEN);
6532+ } else {
6533+ memcpy(ha, eth->h_dest, ETH_ALEN);
6534+ memcpy(info.h_source, eth->h_source, ETH_ALEN);
6535+ }
6536+ }
6537+
developer7e533772023-04-27 05:59:30 +08006538+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
developer8cb3ac72022-07-04 10:55:14 +08006539+ nf_dev_path_info(&stack, &info, ha);
6540+
6541+ devs[!dir] = (struct net_device *)info.indev;
6542+ if (!info.indev)
6543+ return -1;
6544+
6545+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
6546+ for (i = 0; i < info.num_encaps; i++) {
6547+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
6548+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
6549+ }
6550+ route->tuple[!dir].in.num_encaps = info.num_encaps;
6551+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
6552+
6553+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
6554+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
6555+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
6556+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
6557+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
6558+ route->tuple[dir].xmit_type = info.xmit_type;
6559+ }
6560+
6561+ return 0;
6562+}
6563+
6564+static int
6565+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
6566+ enum ip_conntrack_dir dir,
6567+ const struct xt_action_param *par, int ifindex,
6568+ struct net_device **devs)
6569+{
6570+ struct dst_entry *dst = NULL;
6571+ struct flowi fl;
6572+
6573+ memset(&fl, 0, sizeof(fl));
6574+ switch (xt_family(par)) {
6575+ case NFPROTO_IPV4:
6576+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
6577+ fl.u.ip4.flowi4_oif = ifindex;
6578+ break;
6579+ case NFPROTO_IPV6:
6580+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6581+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
6582+ fl.u.ip6.flowi6_oif = ifindex;
6583+ break;
6584+ }
6585+
6586+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
6587+ if (!dst)
6588+ return -ENOENT;
6589+
6590+ nf_default_forward_path(route, dst, dir, devs);
6591+
6592+ return 0;
6593+}
6594+
6595+static int
developer480c5d52022-12-28 14:48:14 +08006596+xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct,
6597+ const struct xt_action_param *par,
6598+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6599+ struct net_device **devs)
6600+{
6601+ struct dst_entry *this_dst = skb_dst(skb);
6602+ struct dst_entry *other_dst = NULL;
6603+ struct flowi fl;
6604+
6605+ memset(&fl, 0, sizeof(fl));
6606+ switch (xt_family(par)) {
6607+ case NFPROTO_IPV4:
6608+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
6609+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
6610+ break;
6611+ case NFPROTO_IPV6:
6612+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6613+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
6614+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
6615+ break;
6616+ }
6617+
6618+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
6619+ if (!other_dst)
6620+ return -ENOENT;
6621+
6622+ nf_default_forward_path(route, this_dst, dir, devs);
6623+ nf_default_forward_path(route, other_dst, !dir, devs);
6624+
developer7e533772023-04-27 05:59:30 +08006625+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer480c5d52022-12-28 14:48:14 +08006626+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006627+ if (nf_dev_forward_path(skb, route, ct, dir, devs))
developer480c5d52022-12-28 14:48:14 +08006628+ return -1;
developer9fdc0e82023-05-12 14:21:17 +08006629+ if (nf_dev_forward_path(skb, route, ct, !dir, devs))
developer480c5d52022-12-28 14:48:14 +08006630+ return -1;
6631+ }
6632+
6633+ return 0;
6634+}
6635+
6636+static int
6637+xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct,
6638+ const struct xt_action_param *par,
6639+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6640+ struct net_device **devs)
developer8cb3ac72022-07-04 10:55:14 +08006641+{
6642+ int ret;
6643+
6644+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
6645+ devs[dir]->ifindex,
6646+ devs);
6647+ if (ret)
6648+ return ret;
6649+
6650+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
6651+ devs[!dir]->ifindex,
6652+ devs);
6653+ if (ret)
developer67bbcc02022-07-08 09:04:01 +08006654+ goto err_route_dir1;
developer8cb3ac72022-07-04 10:55:14 +08006655+
developer7e533772023-04-27 05:59:30 +08006656+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer8cb3ac72022-07-04 10:55:14 +08006657+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006658+ if (nf_dev_forward_path(skb, route, ct, dir, devs) ||
6659+ nf_dev_forward_path(skb, route, ct, !dir, devs)) {
developer67bbcc02022-07-08 09:04:01 +08006660+ ret = -1;
6661+ goto err_route_dir2;
6662+ }
developer8cb3ac72022-07-04 10:55:14 +08006663+ }
6664+
6665+ return 0;
developer67bbcc02022-07-08 09:04:01 +08006666+
6667+err_route_dir2:
6668+ dst_release(route->tuple[!dir].dst);
6669+err_route_dir1:
6670+ dst_release(route->tuple[dir].dst);
6671+ return ret;
developer8cb3ac72022-07-04 10:55:14 +08006672+}
6673+
6674+static unsigned int
6675+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
6676+{
6677+ struct xt_flowoffload_table *table;
6678+ const struct xt_flowoffload_target_info *info = par->targinfo;
6679+ struct tcphdr _tcph, *tcph = NULL;
6680+ enum ip_conntrack_info ctinfo;
6681+ enum ip_conntrack_dir dir;
6682+ struct nf_flow_route route = {};
6683+ struct flow_offload *flow = NULL;
6684+ struct net_device *devs[2] = {};
6685+ struct nf_conn *ct;
6686+ struct net *net;
6687+
6688+ if (xt_flowoffload_skip(skb, xt_family(par)))
6689+ return XT_CONTINUE;
6690+
6691+ ct = nf_ct_get(skb, &ctinfo);
6692+ if (ct == NULL)
6693+ return XT_CONTINUE;
6694+
6695+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
6696+ case IPPROTO_TCP:
6697+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
6698+ return XT_CONTINUE;
6699+
6700+ tcph = skb_header_pointer(skb, par->thoff,
6701+ sizeof(_tcph), &_tcph);
6702+ if (unlikely(!tcph || tcph->fin || tcph->rst))
6703+ return XT_CONTINUE;
6704+ break;
6705+ case IPPROTO_UDP:
6706+ break;
6707+ default:
6708+ return XT_CONTINUE;
6709+ }
6710+
6711+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
6712+ ct->status & IPS_SEQ_ADJUST)
6713+ return XT_CONTINUE;
6714+
6715+ if (!nf_ct_is_confirmed(ct))
6716+ return XT_CONTINUE;
6717+
6718+ devs[dir] = xt_out(par);
6719+ devs[!dir] = xt_in(par);
6720+
6721+ if (!devs[dir] || !devs[!dir])
6722+ return XT_CONTINUE;
6723+
6724+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
6725+ return XT_CONTINUE;
6726+
6727+ dir = CTINFO2DIR(ctinfo);
6728+
developer480c5d52022-12-28 14:48:14 +08006729+ if (ct->status & IPS_NAT_MASK) {
6730+ if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0)
6731+ goto err_flow_route;
6732+ } else {
6733+ if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0)
6734+ goto err_flow_route;
6735+ }
developer8cb3ac72022-07-04 10:55:14 +08006736+
6737+ flow = flow_offload_alloc(ct);
6738+ if (!flow)
6739+ goto err_flow_alloc;
6740+
6741+ if (flow_offload_route_init(flow, &route) < 0)
6742+ goto err_flow_add;
6743+
6744+ if (tcph) {
6745+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6746+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6747+ }
6748+
6749+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
6750+
6751+ net = read_pnet(&table->ft.net);
6752+ if (!net)
6753+ write_pnet(&table->ft.net, xt_net(par));
6754+
6755+ if (flow_offload_add(&table->ft, flow) < 0)
6756+ goto err_flow_add;
6757+
6758+ xt_flowoffload_check_device(table, devs[0]);
6759+ xt_flowoffload_check_device(table, devs[1]);
6760+
developer480c5d52022-12-28 14:48:14 +08006761+ if (!(ct->status & IPS_NAT_MASK))
6762+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006763+ dst_release(route.tuple[!dir].dst);
6764+
6765+ return XT_CONTINUE;
6766+
6767+err_flow_add:
6768+ flow_offload_free(flow);
6769+err_flow_alloc:
developer480c5d52022-12-28 14:48:14 +08006770+ if (!(ct->status & IPS_NAT_MASK))
6771+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006772+ dst_release(route.tuple[!dir].dst);
6773+err_flow_route:
6774+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
6775+
6776+ return XT_CONTINUE;
6777+}
6778+
6779+static int flowoffload_chk(const struct xt_tgchk_param *par)
6780+{
6781+ struct xt_flowoffload_target_info *info = par->targinfo;
6782+
6783+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
6784+ return -EINVAL;
6785+
6786+ return 0;
6787+}
6788+
6789+static struct xt_target offload_tg_reg __read_mostly = {
6790+ .family = NFPROTO_UNSPEC,
6791+ .name = "FLOWOFFLOAD",
6792+ .revision = 0,
6793+ .targetsize = sizeof(struct xt_flowoffload_target_info),
6794+ .usersize = sizeof(struct xt_flowoffload_target_info),
6795+ .checkentry = flowoffload_chk,
6796+ .target = flowoffload_tg,
6797+ .me = THIS_MODULE,
6798+};
6799+
6800+static int flow_offload_netdev_event(struct notifier_block *this,
6801+ unsigned long event, void *ptr)
6802+{
6803+ struct xt_flowoffload_hook *hook0, *hook1;
6804+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6805+
6806+ if (event != NETDEV_UNREGISTER)
6807+ return NOTIFY_DONE;
6808+
6809+ spin_lock_bh(&hooks_lock);
6810+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
6811+ if (hook0)
6812+ hlist_del(&hook0->list);
6813+
6814+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
6815+ if (hook1)
6816+ hlist_del(&hook1->list);
6817+ spin_unlock_bh(&hooks_lock);
6818+
6819+ if (hook0) {
6820+ nf_unregister_net_hook(hook0->net, &hook0->ops);
6821+ kfree(hook0);
6822+ }
6823+
6824+ if (hook1) {
6825+ nf_unregister_net_hook(hook1->net, &hook1->ops);
6826+ kfree(hook1);
6827+ }
6828+
6829+ nf_flow_table_cleanup(dev);
6830+
6831+ return NOTIFY_DONE;
6832+}
6833+
6834+static struct notifier_block flow_offload_netdev_notifier = {
6835+ .notifier_call = flow_offload_netdev_event,
6836+};
6837+
6838+static int nf_flow_rule_route_inet(struct net *net,
6839+ const struct flow_offload *flow,
6840+ enum flow_offload_tuple_dir dir,
6841+ struct nf_flow_rule *flow_rule)
6842+{
6843+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
6844+ int err;
6845+
6846+ switch (flow_tuple->l3proto) {
6847+ case NFPROTO_IPV4:
6848+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
6849+ break;
6850+ case NFPROTO_IPV6:
6851+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
6852+ break;
6853+ default:
6854+ err = -1;
6855+ break;
6856+ }
6857+
6858+ return err;
6859+}
6860+
6861+static struct nf_flowtable_type flowtable_inet = {
6862+ .family = NFPROTO_INET,
6863+ .init = nf_flow_table_init,
6864+ .setup = nf_flow_table_offload_setup,
6865+ .action = nf_flow_rule_route_inet,
6866+ .free = nf_flow_table_free,
6867+ .hook = xt_flowoffload_net_hook,
6868+ .owner = THIS_MODULE,
6869+};
6870+
6871+static int init_flowtable(struct xt_flowoffload_table *tbl)
6872+{
6873+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
6874+ tbl->ft.type = &flowtable_inet;
6875+
6876+ return nf_flow_table_init(&tbl->ft);
6877+}
6878+
6879+static int __init xt_flowoffload_tg_init(void)
6880+{
6881+ int ret;
6882+
6883+ register_netdevice_notifier(&flow_offload_netdev_notifier);
6884+
6885+ ret = init_flowtable(&flowtable[0]);
6886+ if (ret)
6887+ return ret;
6888+
6889+ ret = init_flowtable(&flowtable[1]);
6890+ if (ret)
6891+ goto cleanup;
6892+
developeree39bcf2023-06-16 08:03:30 +08006893+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
developer8cb3ac72022-07-04 10:55:14 +08006894+
6895+ ret = xt_register_target(&offload_tg_reg);
6896+ if (ret)
6897+ goto cleanup2;
6898+
6899+ return 0;
6900+
6901+cleanup2:
6902+ nf_flow_table_free(&flowtable[1].ft);
6903+cleanup:
6904+ nf_flow_table_free(&flowtable[0].ft);
6905+ return ret;
6906+}
6907+
6908+static void __exit xt_flowoffload_tg_exit(void)
6909+{
6910+ xt_unregister_target(&offload_tg_reg);
6911+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
6912+ nf_flow_table_free(&flowtable[0].ft);
6913+ nf_flow_table_free(&flowtable[1].ft);
6914+}
6915+
6916+MODULE_LICENSE("GPL");
6917+module_init(xt_flowoffload_tg_init);
6918+module_exit(xt_flowoffload_tg_exit);
6919--
69202.18.0
6921