blob: 17f2cb1d87b1f9bb58c22e768b2ad22592ae862f [file] [log] [blame]
developer8cb3ac72022-07-04 10:55:14 +08001From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001
2From: Bo Jiao <Bo.Jiao@mediatek.com>
3Date: Wed, 22 Jun 2022 09:42:19 +0800
4Subject: [PATCH 1/8]
5 9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2
6
7---
8 drivers/net/ethernet/mediatek/Makefile | 3 +-
9 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 28 +-
10 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 20 +-
11 drivers/net/ethernet/mediatek/mtk_ppe.c | 509 +++++++
12 drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
13 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
14 .../net/ethernet/mediatek/mtk_ppe_offload.c | 526 ++++++++
15 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
16 drivers/net/ppp/ppp_generic.c | 22 +
17 drivers/net/ppp/pppoe.c | 24 +
18 include/linux/netdevice.h | 60 +
19 include/linux/ppp_channel.h | 3 +
20 include/net/dsa.h | 10 +
21 include/net/flow_offload.h | 4 +
22 include/net/ip6_route.h | 5 +-
23 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
24 include/net/netfilter/nf_conntrack.h | 12 +
25 include/net/netfilter/nf_conntrack_acct.h | 11 +
26 include/net/netfilter/nf_flow_table.h | 264 +++-
27 include/net/netns/conntrack.h | 6 +
28 .../linux/netfilter/nf_conntrack_common.h | 9 +-
29 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
30 net/8021q/vlan_dev.c | 21 +
31 net/bridge/br_device.c | 49 +
32 net/bridge/br_private.h | 20 +
33 net/bridge/br_vlan.c | 55 +
34 net/core/dev.c | 46 +
35 net/dsa/dsa.c | 9 +
36 net/dsa/slave.c | 41 +-
37 net/ipv4/netfilter/Kconfig | 4 +-
38 net/ipv6/ip6_output.c | 2 +-
39 net/ipv6/netfilter/Kconfig | 3 +-
40 net/ipv6/route.c | 22 +-
41 net/netfilter/Kconfig | 14 +-
42 net/netfilter/Makefile | 4 +-
43 net/netfilter/nf_conntrack_core.c | 20 +-
44 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
45 net/netfilter/nf_conntrack_proto_udp.c | 4 +
46 net/netfilter/nf_conntrack_standalone.c | 34 +-
47 net/netfilter/nf_flow_table_core.c | 446 +++---
48 net/netfilter/nf_flow_table_ip.c | 455 ++++---
49 net/netfilter/nf_flow_table_offload.c | 1191 +++++++++++++++++
50 net/netfilter/xt_FLOWOFFLOAD.c | 719 ++++++++++
51 43 files changed, 4913 insertions(+), 432 deletions(-)
52 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
53 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
54 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
55 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
56 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
57 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
58 create mode 100644 net/netfilter/nf_flow_table_offload.c
59 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
60
61diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
62index 13c5b4e8f..0a6af99f1 100755
63--- a/drivers/net/ethernet/mediatek/Makefile
64+++ b/drivers/net/ethernet/mediatek/Makefile
65@@ -4,5 +4,6 @@
66 #
67
68 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
developer68838542022-10-03 23:42:21 +080069-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
70+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
developer8cb3ac72022-07-04 10:55:14 +080071+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
72 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
73diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
74index 2b21f7ed0..819d8a0be 100755
75--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
76+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
developerdca0fde2022-12-14 11:40:35 +080077@@ -3081,6 +3081,7 @@ static int mtk_open(struct net_device *d
78 struct mtk_phylink_priv *phylink_priv = &mac->phylink_priv;
79 int err, i;
80 struct device_node *phy_node;
81+ u32 gdm_config = MTK_GDMA_TO_PDMA;
developer8cb3ac72022-07-04 10:55:14 +080082
developerdca0fde2022-12-14 11:40:35 +080083 err = phylink_of_phy_connect(mac->phylink, mac->of_node, 0);
84 if (err) {
85@@ -3157,7 +3158,10 @@ static int mtk_open(struct net_device *d
86 if (!phy_node && eth->xgmii->regmap_sgmii[mac->id])
87 regmap_write(eth->xgmii->regmap_sgmii[mac->id], SGMSYS_QPHY_PWR_STATE_CTRL, 0);
developer8cb3ac72022-07-04 10:55:14 +080088
developerdca0fde2022-12-14 11:40:35 +080089- mtk_gdm_config(eth, mac->id, MTK_GDMA_TO_PDMA);
90+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
developer8cb3ac72022-07-04 10:55:14 +080091+ gdm_config = MTK_GDMA_TO_PPE;
92+
developerdca0fde2022-12-14 11:40:35 +080093+ mtk_gdm_config(eth, mac->id, gdm_config);
developer8cb3ac72022-07-04 10:55:14 +080094
developerdca0fde2022-12-14 11:40:35 +080095 return 0;
96 }
97@@ -3238,6 +3242,9 @@ static int mtk_stop(struct net_device *d
developer8cb3ac72022-07-04 10:55:14 +080098
99 mtk_dma_free(eth);
100
101+ if (eth->soc->offload_version)
102+ mtk_ppe_stop(&eth->ppe);
103+
104 return 0;
105 }
106
developerdca0fde2022-12-14 11:40:35 +0800107@@ -3915,6 +3922,7 @@ static const struct net_device_ops mtk_n
developer8cb3ac72022-07-04 10:55:14 +0800108 #ifdef CONFIG_NET_POLL_CONTROLLER
109 .ndo_poll_controller = mtk_poll_controller,
110 #endif
111+ .ndo_setup_tc = mtk_eth_setup_tc,
112 };
113
114 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
developerdca0fde2022-12-14 11:40:35 +0800115@@ -4308,6 +4316,17 @@ static int mtk_probe(struct platform_dev
developer8cb3ac72022-07-04 10:55:14 +0800116 goto err_free_dev;
117 }
118
119+ if (eth->soc->offload_version) {
120+ err = mtk_ppe_init(&eth->ppe, eth->dev,
121+ eth->base + MTK_ETH_PPE_BASE, 2);
122+ if (err)
123+ goto err_free_dev;
124+
125+ err = mtk_eth_offload_init(eth);
126+ if (err)
127+ goto err_free_dev;
128+ }
129+
130 for (i = 0; i < MTK_MAX_DEVS; i++) {
131 if (!eth->netdev[i])
132 continue;
developerdca0fde2022-12-14 11:40:35 +0800133@@ -4410,6 +4429,7 @@ static const struct mtk_soc_data mt2701_
developer8cb3ac72022-07-04 10:55:14 +0800134 .required_clks = MT7623_CLKS_BITMAP,
135 .required_pctl = true,
136 .has_sram = false,
137+ .offload_version = 2,
developerdca0fde2022-12-14 11:40:35 +0800138 .txrx = {
139 .txd_size = sizeof(struct mtk_tx_dma),
140 .rxd_size = sizeof(struct mtk_rx_dma),
141@@ -4424,6 +4444,7 @@ static const struct mtk_soc_data mt7621_
developer8cb3ac72022-07-04 10:55:14 +0800142 .required_clks = MT7621_CLKS_BITMAP,
143 .required_pctl = false,
144 .has_sram = false,
145+ .offload_version = 2,
developerdca0fde2022-12-14 11:40:35 +0800146 .txrx = {
147 .txd_size = sizeof(struct mtk_tx_dma),
148 .rxd_size = sizeof(struct mtk_rx_dma),
149@@ -4439,6 +4460,7 @@ static const struct mtk_soc_data mt7622_
developer8cb3ac72022-07-04 10:55:14 +0800150 .required_clks = MT7622_CLKS_BITMAP,
151 .required_pctl = false,
152 .has_sram = false,
153+ .offload_version = 2,
developerdca0fde2022-12-14 11:40:35 +0800154 .txrx = {
155 .txd_size = sizeof(struct mtk_tx_dma),
156 .rxd_size = sizeof(struct mtk_rx_dma),
157@@ -4453,6 +4475,7 @@ static const struct mtk_soc_data mt7623_
developer8cb3ac72022-07-04 10:55:14 +0800158 .required_clks = MT7623_CLKS_BITMAP,
159 .required_pctl = true,
160 .has_sram = false,
161+ .offload_version = 2,
developerdca0fde2022-12-14 11:40:35 +0800162 .txrx = {
163 .txd_size = sizeof(struct mtk_tx_dma),
164 .rxd_size = sizeof(struct mtk_rx_dma),
developer8cb3ac72022-07-04 10:55:14 +0800165diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
166index b6380ffeb..349f98503 100755
167--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
168+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
169@@ -15,6 +15,8 @@
170 #include <linux/u64_stats_sync.h>
171 #include <linux/refcount.h>
172 #include <linux/phylink.h>
173+#include <linux/rhashtable.h>
174+#include "mtk_ppe.h"
175
176 #define MTK_QDMA_PAGE_SIZE 2048
177 #define MTK_MAX_RX_LENGTH 1536
178@@ -37,7 +39,8 @@
179 NETIF_F_HW_VLAN_CTAG_TX | \
180 NETIF_F_SG | NETIF_F_TSO | \
181 NETIF_F_TSO6 | \
182- NETIF_F_IPV6_CSUM)
183+ NETIF_F_IPV6_CSUM |\
184+ NETIF_F_HW_TC)
185 #define MTK_SET_FEATURES (NETIF_F_LRO | \
186 NETIF_F_HW_VLAN_CTAG_RX)
187 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
188@@ -107,6 +110,7 @@
189 #define MTK_GDMA_TCS_EN BIT(21)
190 #define MTK_GDMA_UCS_EN BIT(20)
191 #define MTK_GDMA_TO_PDMA 0x0
192+#define MTK_GDMA_TO_PPE 0x4444
193 #define MTK_GDMA_DROP_ALL 0x7777
194
195 /* Unicast Filter MAC Address Register - Low */
196@@ -547,6 +551,12 @@
197 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
198 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
199
200+/* QDMA descriptor rxd4 */
201+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
202+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
203+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
204+#define MTK_RXD4_ALG GENMASK(31, 22)
205+
206 /* QDMA descriptor rxd4 */
207 #define RX_DMA_L4_VALID BIT(24)
208 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
209@@ -1158,6 +1168,7 @@ struct mtk_soc_data {
210 u32 caps;
211 u32 required_clks;
212 bool required_pctl;
213+ u8 offload_version;
214 netdev_features_t hw_features;
215 bool has_sram;
216 };
217@@ -1271,6 +1282,9 @@ struct mtk_eth {
218 int ip_align;
219 spinlock_t syscfg0_lock;
220 struct timer_list mtk_dma_monitor_timer;
221+
222+ struct mtk_ppe ppe;
223+ struct rhashtable flow_table;
224 };
225
226 /* struct mtk_mac - the structure that holds the info about the MACs of the
developer1fb19c92023-03-07 23:45:23 +0800227@@ -1319,4 +1333,7 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
228 void mtk_usxgmii_reset(struct mtk_xgmii *ss, int mac_id);
229 int mtk_dump_usxgmii(struct regmap *pmap, char *name, u32 offset, u32 range);
developer8cb3ac72022-07-04 10:55:14 +0800230
231+int mtk_eth_offload_init(struct mtk_eth *eth);
232+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
233+ void *type_data);
developer1fb19c92023-03-07 23:45:23 +0800234 void mtk_eth_set_dma_device(struct mtk_eth *eth, struct device *dma_dev);
developer8cb3ac72022-07-04 10:55:14 +0800235diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
236new file mode 100644
237index 000000000..66298e223
238--- /dev/null
239+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
240@@ -0,0 +1,509 @@
241+// SPDX-License-Identifier: GPL-2.0-only
242+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
243+
244+#include <linux/kernel.h>
245+#include <linux/io.h>
246+#include <linux/iopoll.h>
247+#include <linux/etherdevice.h>
248+#include <linux/platform_device.h>
249+#include "mtk_ppe.h"
250+#include "mtk_ppe_regs.h"
251+
252+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
253+{
254+ writel(val, ppe->base + reg);
255+}
256+
257+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
258+{
259+ return readl(ppe->base + reg);
260+}
261+
262+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
263+{
264+ u32 val;
265+
266+ val = ppe_r32(ppe, reg);
267+ val &= ~mask;
268+ val |= set;
269+ ppe_w32(ppe, reg, val);
270+
271+ return val;
272+}
273+
274+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
275+{
276+ return ppe_m32(ppe, reg, 0, val);
277+}
278+
279+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
280+{
281+ return ppe_m32(ppe, reg, val, 0);
282+}
283+
284+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
285+{
286+ int ret;
287+ u32 val;
288+
289+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
290+ !(val & MTK_PPE_GLO_CFG_BUSY),
291+ 20, MTK_PPE_WAIT_TIMEOUT_US);
292+
293+ if (ret)
294+ dev_err(ppe->dev, "PPE table busy");
295+
296+ return ret;
297+}
298+
299+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
300+{
301+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
302+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
303+}
304+
305+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
306+{
307+ mtk_ppe_cache_clear(ppe);
308+
309+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
310+ enable * MTK_PPE_CACHE_CTL_EN);
311+}
312+
313+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
314+{
315+ u32 hv1, hv2, hv3;
316+ u32 hash;
317+
318+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
319+ case MTK_PPE_PKT_TYPE_BRIDGE:
320+ hv1 = e->bridge.src_mac_lo;
321+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
322+ hv2 = e->bridge.src_mac_hi >> 16;
323+ hv2 ^= e->bridge.dest_mac_lo;
324+ hv3 = e->bridge.dest_mac_hi;
325+ break;
326+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
327+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
328+ hv1 = e->ipv4.orig.ports;
329+ hv2 = e->ipv4.orig.dest_ip;
330+ hv3 = e->ipv4.orig.src_ip;
331+ break;
332+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
333+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
334+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
335+ hv1 ^= e->ipv6.ports;
336+
337+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
338+ hv2 ^= e->ipv6.dest_ip[0];
339+
340+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
341+ hv3 ^= e->ipv6.src_ip[0];
342+ break;
343+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
344+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
345+ default:
346+ WARN_ON_ONCE(1);
347+ return MTK_PPE_HASH_MASK;
348+ }
349+
350+ hash = (hv1 & hv2) | ((~hv1) & hv3);
351+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
352+ hash ^= hv1 ^ hv2 ^ hv3;
353+ hash ^= hash >> 16;
354+ hash <<= 1;
355+ hash &= MTK_PPE_ENTRIES - 1;
356+
357+ return hash;
358+}
359+
360+static inline struct mtk_foe_mac_info *
361+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
362+{
363+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
364+
365+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
366+ return &entry->ipv6.l2;
367+
368+ return &entry->ipv4.l2;
369+}
370+
371+static inline u32 *
372+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
373+{
374+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
375+
376+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
377+ return &entry->ipv6.ib2;
378+
379+ return &entry->ipv4.ib2;
380+}
381+
382+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
383+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
384+{
385+ struct mtk_foe_mac_info *l2;
386+ u32 ports_pad, val;
387+
388+ memset(entry, 0, sizeof(*entry));
389+
390+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
391+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
392+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
393+ MTK_FOE_IB1_BIND_TTL |
394+ MTK_FOE_IB1_BIND_CACHE;
395+ entry->ib1 = val;
396+
397+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
398+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
399+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
400+
401+ if (is_multicast_ether_addr(dest_mac))
402+ val |= MTK_FOE_IB2_MULTICAST;
403+
404+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
405+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
406+ entry->ipv4.orig.ports = ports_pad;
407+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
408+ entry->ipv6.ports = ports_pad;
409+
410+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
411+ entry->ipv6.ib2 = val;
412+ l2 = &entry->ipv6.l2;
413+ } else {
414+ entry->ipv4.ib2 = val;
415+ l2 = &entry->ipv4.l2;
416+ }
417+
418+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
419+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
420+ l2->src_mac_hi = get_unaligned_be32(src_mac);
421+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
422+
423+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
424+ l2->etype = ETH_P_IPV6;
425+ else
426+ l2->etype = ETH_P_IP;
427+
428+ return 0;
429+}
430+
431+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
432+{
433+ u32 *ib2 = mtk_foe_entry_ib2(entry);
434+ u32 val;
435+
436+ val = *ib2;
437+ val &= ~MTK_FOE_IB2_DEST_PORT;
438+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
439+ *ib2 = val;
440+
441+ return 0;
442+}
443+
444+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
445+ __be32 src_addr, __be16 src_port,
446+ __be32 dest_addr, __be16 dest_port)
447+{
448+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
449+ struct mtk_ipv4_tuple *t;
450+
451+ switch (type) {
452+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
453+ if (egress) {
454+ t = &entry->ipv4.new;
455+ break;
456+ }
457+ fallthrough;
458+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
459+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
460+ t = &entry->ipv4.orig;
461+ break;
462+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
463+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
464+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
465+ return 0;
466+ default:
467+ WARN_ON_ONCE(1);
468+ return -EINVAL;
469+ }
470+
471+ t->src_ip = be32_to_cpu(src_addr);
472+ t->dest_ip = be32_to_cpu(dest_addr);
473+
474+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
475+ return 0;
476+
477+ t->src_port = be16_to_cpu(src_port);
478+ t->dest_port = be16_to_cpu(dest_port);
479+
480+ return 0;
481+}
482+
483+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
484+ __be32 *src_addr, __be16 src_port,
485+ __be32 *dest_addr, __be16 dest_port)
486+{
487+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
488+ u32 *src, *dest;
489+ int i;
490+
491+ switch (type) {
492+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
493+ src = entry->dslite.tunnel_src_ip;
494+ dest = entry->dslite.tunnel_dest_ip;
495+ break;
496+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
497+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
498+ entry->ipv6.src_port = be16_to_cpu(src_port);
499+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
500+ fallthrough;
501+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
502+ src = entry->ipv6.src_ip;
503+ dest = entry->ipv6.dest_ip;
504+ break;
505+ default:
506+ WARN_ON_ONCE(1);
507+ return -EINVAL;
508+ }
509+
510+ for (i = 0; i < 4; i++)
511+ src[i] = be32_to_cpu(src_addr[i]);
512+ for (i = 0; i < 4; i++)
513+ dest[i] = be32_to_cpu(dest_addr[i]);
514+
515+ return 0;
516+}
517+
518+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
519+{
520+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
521+
522+ l2->etype = BIT(port);
523+
524+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
525+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
526+ else
527+ l2->etype |= BIT(8);
528+
529+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
530+
531+ return 0;
532+}
533+
534+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
535+{
536+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
537+
538+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
539+ case 0:
540+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
541+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
542+ l2->vlan1 = vid;
543+ return 0;
544+ case 1:
545+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
546+ l2->vlan1 = vid;
547+ l2->etype |= BIT(8);
548+ } else {
549+ l2->vlan2 = vid;
550+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
551+ }
552+ return 0;
553+ default:
554+ return -ENOSPC;
555+ }
556+}
557+
558+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
559+{
560+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
561+
562+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
563+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
564+ l2->etype = ETH_P_PPP_SES;
565+
566+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
567+ l2->pppoe_id = sid;
568+
569+ return 0;
570+}
571+
572+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
573+{
574+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
575+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
576+}
577+
578+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
579+ u16 timestamp)
580+{
581+ struct mtk_foe_entry *hwe;
582+ u32 hash;
583+
584+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
585+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
586+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
587+
588+ hash = mtk_ppe_hash_entry(entry);
589+ hwe = &ppe->foe_table[hash];
590+ if (!mtk_foe_entry_usable(hwe)) {
591+ hwe++;
592+ hash++;
593+
594+ if (!mtk_foe_entry_usable(hwe))
595+ return -ENOSPC;
596+ }
597+
598+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
599+ wmb();
600+ hwe->ib1 = entry->ib1;
601+
602+ dma_wmb();
603+
604+ mtk_ppe_cache_clear(ppe);
605+
606+ return hash;
607+}
608+
609+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
610+ int version)
611+{
612+ struct mtk_foe_entry *foe;
613+
614+ /* need to allocate a separate device, since it PPE DMA access is
615+ * not coherent.
616+ */
617+ ppe->base = base;
618+ ppe->dev = dev;
619+ ppe->version = version;
620+
621+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
622+ &ppe->foe_phys, GFP_KERNEL);
623+ if (!foe)
624+ return -ENOMEM;
625+
626+ ppe->foe_table = foe;
627+
628+ mtk_ppe_debugfs_init(ppe);
629+
630+ return 0;
631+}
632+
633+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
634+{
635+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
636+ int i, k;
637+
638+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
639+
640+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
641+ return;
642+
643+ /* skip all entries that cross the 1024 byte boundary */
644+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
645+ for (k = 0; k < ARRAY_SIZE(skip); k++)
646+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
647+}
648+
649+int mtk_ppe_start(struct mtk_ppe *ppe)
650+{
651+ u32 val;
652+
653+ mtk_ppe_init_foe_table(ppe);
654+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
655+
656+ val = MTK_PPE_TB_CFG_ENTRY_80B |
657+ MTK_PPE_TB_CFG_AGE_NON_L4 |
658+ MTK_PPE_TB_CFG_AGE_UNBIND |
659+ MTK_PPE_TB_CFG_AGE_TCP |
660+ MTK_PPE_TB_CFG_AGE_UDP |
661+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
662+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
663+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
664+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
665+ MTK_PPE_KEEPALIVE_DISABLE) |
666+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
667+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
668+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
669+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
670+ MTK_PPE_ENTRIES_SHIFT);
671+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
672+
673+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
674+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
675+
676+ mtk_ppe_cache_enable(ppe, true);
677+
678+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
679+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
680+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
681+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
682+ MTK_PPE_FLOW_CFG_IP6_6RD |
683+ MTK_PPE_FLOW_CFG_IP4_NAT |
684+ MTK_PPE_FLOW_CFG_IP4_NAPT |
685+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
686+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
687+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
688+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
689+
690+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
691+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
692+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
693+
developere71ba072023-01-06 09:34:01 +0800694+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 30) |
developer8cb3ac72022-07-04 10:55:14 +0800695+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
696+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
697+
698+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
developere71ba072023-01-06 09:34:01 +0800699+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 30);
developer8cb3ac72022-07-04 10:55:14 +0800700+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
701+
702+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
703+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
704+
705+ val = MTK_PPE_BIND_LIMIT1_FULL |
706+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
707+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
708+
709+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
710+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
711+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
712+
713+ /* enable PPE */
714+ val = MTK_PPE_GLO_CFG_EN |
715+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
716+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
717+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
718+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
719+
720+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
721+
722+ return 0;
723+}
724+
725+int mtk_ppe_stop(struct mtk_ppe *ppe)
726+{
727+ u32 val;
728+ int i;
729+
730+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
731+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
732+ MTK_FOE_STATE_INVALID);
733+
734+ mtk_ppe_cache_enable(ppe, false);
735+
736+ /* disable offload engine */
737+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
738+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
739+
740+ /* disable aging */
741+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
742+ MTK_PPE_TB_CFG_AGE_UNBIND |
743+ MTK_PPE_TB_CFG_AGE_TCP |
744+ MTK_PPE_TB_CFG_AGE_UDP |
745+ MTK_PPE_TB_CFG_AGE_TCP_FIN;
746+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
747+
748+ return mtk_ppe_wait_busy(ppe);
749+}
750diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
751new file mode 100644
752index 000000000..242fb8f2a
753--- /dev/null
754+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
755@@ -0,0 +1,288 @@
756+// SPDX-License-Identifier: GPL-2.0-only
757+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
758+
759+#ifndef __MTK_PPE_H
760+#define __MTK_PPE_H
761+
762+#include <linux/kernel.h>
763+#include <linux/bitfield.h>
764+
765+#define MTK_ETH_PPE_BASE 0xc00
766+
767+#define MTK_PPE_ENTRIES_SHIFT 3
768+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
769+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
770+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
771+
772+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
773+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
774+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
775+
776+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
777+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
778+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
779+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
780+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
781+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
782+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
783+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
784+#define MTK_FOE_IB1_BIND_TTL BIT(24)
785+
786+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
787+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
788+#define MTK_FOE_IB1_UDP BIT(30)
789+#define MTK_FOE_IB1_STATIC BIT(31)
790+
791+enum {
792+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
793+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
794+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
795+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
796+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
797+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
798+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
799+};
800+
801+#define MTK_FOE_IB2_QID GENMASK(3, 0)
802+#define MTK_FOE_IB2_PSE_QOS BIT(4)
803+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
804+#define MTK_FOE_IB2_MULTICAST BIT(8)
805+
806+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
807+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
808+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
809+
810+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
811+
812+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
813+
814+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
815+
816+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
817+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
818+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
819+
820+enum {
821+ MTK_FOE_STATE_INVALID,
822+ MTK_FOE_STATE_UNBIND,
823+ MTK_FOE_STATE_BIND,
824+ MTK_FOE_STATE_FIN
825+};
826+
827+struct mtk_foe_mac_info {
828+ u16 vlan1;
829+ u16 etype;
830+
831+ u32 dest_mac_hi;
832+
833+ u16 vlan2;
834+ u16 dest_mac_lo;
835+
836+ u32 src_mac_hi;
837+
838+ u16 pppoe_id;
839+ u16 src_mac_lo;
840+};
841+
842+struct mtk_foe_bridge {
843+ u32 dest_mac_hi;
844+
845+ u16 src_mac_lo;
846+ u16 dest_mac_lo;
847+
848+ u32 src_mac_hi;
849+
850+ u32 ib2;
851+
852+ u32 _rsv[5];
853+
854+ u32 udf_tsid;
855+ struct mtk_foe_mac_info l2;
856+};
857+
858+struct mtk_ipv4_tuple {
859+ u32 src_ip;
860+ u32 dest_ip;
861+ union {
862+ struct {
863+ u16 dest_port;
864+ u16 src_port;
865+ };
866+ struct {
867+ u8 protocol;
868+ u8 _pad[3]; /* fill with 0xa5a5a5 */
869+ };
870+ u32 ports;
871+ };
872+};
873+
874+struct mtk_foe_ipv4 {
875+ struct mtk_ipv4_tuple orig;
876+
877+ u32 ib2;
878+
879+ struct mtk_ipv4_tuple new;
880+
881+ u16 timestamp;
882+ u16 _rsv0[3];
883+
884+ u32 udf_tsid;
885+
886+ struct mtk_foe_mac_info l2;
887+};
888+
889+struct mtk_foe_ipv4_dslite {
890+ struct mtk_ipv4_tuple ip4;
891+
892+ u32 tunnel_src_ip[4];
893+ u32 tunnel_dest_ip[4];
894+
895+ u8 flow_label[3];
896+ u8 priority;
897+
898+ u32 udf_tsid;
899+
900+ u32 ib2;
901+
902+ struct mtk_foe_mac_info l2;
903+};
904+
905+struct mtk_foe_ipv6 {
906+ u32 src_ip[4];
907+ u32 dest_ip[4];
908+
909+ union {
910+ struct {
911+ u8 protocol;
912+ u8 _pad[3]; /* fill with 0xa5a5a5 */
913+ }; /* 3-tuple */
914+ struct {
915+ u16 dest_port;
916+ u16 src_port;
917+ }; /* 5-tuple */
918+ u32 ports;
919+ };
920+
921+ u32 _rsv[3];
922+
923+ u32 udf;
924+
925+ u32 ib2;
926+ struct mtk_foe_mac_info l2;
927+};
928+
929+struct mtk_foe_ipv6_6rd {
930+ u32 src_ip[4];
931+ u32 dest_ip[4];
932+ u16 dest_port;
933+ u16 src_port;
934+
935+ u32 tunnel_src_ip;
936+ u32 tunnel_dest_ip;
937+
938+ u16 hdr_csum;
939+ u8 dscp;
940+ u8 ttl;
941+
942+ u8 flag;
943+ u8 pad;
944+ u8 per_flow_6rd_id;
945+ u8 pad2;
946+
947+ u32 ib2;
948+ struct mtk_foe_mac_info l2;
949+};
950+
951+struct mtk_foe_entry {
952+ u32 ib1;
953+
954+ union {
955+ struct mtk_foe_bridge bridge;
956+ struct mtk_foe_ipv4 ipv4;
957+ struct mtk_foe_ipv4_dslite dslite;
958+ struct mtk_foe_ipv6 ipv6;
959+ struct mtk_foe_ipv6_6rd ipv6_6rd;
960+ u32 data[19];
961+ };
962+};
963+
964+enum {
965+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
966+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
967+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
968+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
969+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
970+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
971+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
972+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
973+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
974+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
975+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
976+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
977+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
978+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
979+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
980+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
981+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
982+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
983+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
984+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
985+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
986+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
987+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
988+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
989+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
990+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
991+};
992+
993+struct mtk_ppe {
994+ struct device *dev;
995+ void __iomem *base;
996+ int version;
997+
998+ struct mtk_foe_entry *foe_table;
999+ dma_addr_t foe_phys;
1000+
1001+ void *acct_table;
1002+};
1003+
1004+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
1005+ int version);
1006+int mtk_ppe_start(struct mtk_ppe *ppe);
1007+int mtk_ppe_stop(struct mtk_ppe *ppe);
1008+
1009+static inline void
1010+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
1011+{
1012+ ppe->foe_table[hash].ib1 = 0;
1013+ dma_wmb();
1014+}
1015+
1016+static inline int
1017+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
1018+{
1019+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
1020+
1021+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
1022+ return -1;
1023+
1024+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
1025+}
1026+
1027+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
1028+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
1029+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
1030+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
1031+ __be32 src_addr, __be16 src_port,
1032+ __be32 dest_addr, __be16 dest_port);
1033+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
1034+ __be32 *src_addr, __be16 src_port,
1035+ __be32 *dest_addr, __be16 dest_port);
1036+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
1037+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
1038+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
1039+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1040+ u16 timestamp);
1041+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
1042+
1043+#endif
1044diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1045new file mode 100644
1046index 000000000..d4b482340
1047--- /dev/null
1048+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1049@@ -0,0 +1,214 @@
1050+// SPDX-License-Identifier: GPL-2.0-only
1051+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1052+
1053+#include <linux/kernel.h>
1054+#include <linux/debugfs.h>
1055+#include "mtk_eth_soc.h"
1056+
1057+struct mtk_flow_addr_info
1058+{
1059+ void *src, *dest;
1060+ u16 *src_port, *dest_port;
1061+ bool ipv6;
1062+};
1063+
1064+static const char *mtk_foe_entry_state_str(int state)
1065+{
1066+ static const char * const state_str[] = {
1067+ [MTK_FOE_STATE_INVALID] = "INV",
1068+ [MTK_FOE_STATE_UNBIND] = "UNB",
1069+ [MTK_FOE_STATE_BIND] = "BND",
1070+ [MTK_FOE_STATE_FIN] = "FIN",
1071+ };
1072+
1073+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1074+ return "UNK";
1075+
1076+ return state_str[state];
1077+}
1078+
1079+static const char *mtk_foe_pkt_type_str(int type)
1080+{
1081+ static const char * const type_str[] = {
1082+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1083+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
1084+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
1085+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
1086+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
1087+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
1088+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
1089+ };
1090+
1091+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
1092+ return "UNKNOWN";
1093+
1094+ return type_str[type];
1095+}
1096+
1097+static void
1098+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
1099+{
1100+ u32 n_addr[4];
1101+ int i;
1102+
1103+ if (!ipv6) {
1104+ seq_printf(m, "%pI4h", addr);
1105+ return;
1106+ }
1107+
1108+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
1109+ n_addr[i] = htonl(addr[i]);
1110+ seq_printf(m, "%pI6", n_addr);
1111+}
1112+
1113+static void
1114+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
1115+{
1116+ mtk_print_addr(m, ai->src, ai->ipv6);
1117+ if (ai->src_port)
1118+ seq_printf(m, ":%d", *ai->src_port);
1119+ seq_printf(m, "->");
1120+ mtk_print_addr(m, ai->dest, ai->ipv6);
1121+ if (ai->dest_port)
1122+ seq_printf(m, ":%d", *ai->dest_port);
1123+}
1124+
1125+static int
1126+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
1127+{
1128+ struct mtk_ppe *ppe = m->private;
1129+ int i;
1130+
1131+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
1132+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
1133+ struct mtk_foe_mac_info *l2;
1134+ struct mtk_flow_addr_info ai = {};
1135+ unsigned char h_source[ETH_ALEN];
1136+ unsigned char h_dest[ETH_ALEN];
1137+ int type, state;
1138+ u32 ib2;
1139+
1140+
1141+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
1142+ if (!state)
1143+ continue;
1144+
1145+ if (bind && state != MTK_FOE_STATE_BIND)
1146+ continue;
1147+
1148+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
1149+ seq_printf(m, "%05x %s %7s", i,
1150+ mtk_foe_entry_state_str(state),
1151+ mtk_foe_pkt_type_str(type));
1152+
1153+ switch (type) {
1154+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1155+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1156+ ai.src_port = &entry->ipv4.orig.src_port;
1157+ ai.dest_port = &entry->ipv4.orig.dest_port;
1158+ fallthrough;
1159+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1160+ ai.src = &entry->ipv4.orig.src_ip;
1161+ ai.dest = &entry->ipv4.orig.dest_ip;
1162+ break;
1163+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
1164+ ai.src_port = &entry->ipv6.src_port;
1165+ ai.dest_port = &entry->ipv6.dest_port;
1166+ fallthrough;
1167+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
1168+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
1169+ ai.src = &entry->ipv6.src_ip;
1170+ ai.dest = &entry->ipv6.dest_ip;
1171+ ai.ipv6 = true;
1172+ break;
1173+ }
1174+
1175+ seq_printf(m, " orig=");
1176+ mtk_print_addr_info(m, &ai);
1177+
1178+ switch (type) {
1179+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1180+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1181+ ai.src_port = &entry->ipv4.new.src_port;
1182+ ai.dest_port = &entry->ipv4.new.dest_port;
1183+ fallthrough;
1184+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1185+ ai.src = &entry->ipv4.new.src_ip;
1186+ ai.dest = &entry->ipv4.new.dest_ip;
1187+ seq_printf(m, " new=");
1188+ mtk_print_addr_info(m, &ai);
1189+ break;
1190+ }
1191+
1192+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
1193+ l2 = &entry->ipv6.l2;
1194+ ib2 = entry->ipv6.ib2;
1195+ } else {
1196+ l2 = &entry->ipv4.l2;
1197+ ib2 = entry->ipv4.ib2;
1198+ }
1199+
1200+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
1201+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
1202+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
1203+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
1204+
1205+ seq_printf(m, " eth=%pM->%pM etype=%04x"
1206+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
1207+ h_source, h_dest, ntohs(l2->etype),
1208+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
1209+ }
1210+
1211+ return 0;
1212+}
1213+
1214+static int
1215+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
1216+{
1217+ return mtk_ppe_debugfs_foe_show(m, private, false);
1218+}
1219+
1220+static int
1221+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
1222+{
1223+ return mtk_ppe_debugfs_foe_show(m, private, true);
1224+}
1225+
1226+static int
1227+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
1228+{
1229+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
1230+ inode->i_private);
1231+}
1232+
1233+static int
1234+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
1235+{
1236+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
1237+ inode->i_private);
1238+}
1239+
1240+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
1241+{
1242+ static const struct file_operations fops_all = {
1243+ .open = mtk_ppe_debugfs_foe_open_all,
1244+ .read = seq_read,
1245+ .llseek = seq_lseek,
1246+ .release = single_release,
1247+ };
1248+
1249+ static const struct file_operations fops_bind = {
1250+ .open = mtk_ppe_debugfs_foe_open_bind,
1251+ .read = seq_read,
1252+ .llseek = seq_lseek,
1253+ .release = single_release,
1254+ };
1255+
1256+ struct dentry *root;
1257+
1258+ root = debugfs_create_dir("mtk_ppe", NULL);
1259+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
1260+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
1261+
1262+ return 0;
1263+}
1264diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1265new file mode 100644
1266index 000000000..4294f0c74
1267--- /dev/null
1268+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
developer207b39d2022-10-07 15:57:16 +08001269@@ -0,0 +1,541 @@
developer8cb3ac72022-07-04 10:55:14 +08001270+// SPDX-License-Identifier: GPL-2.0-only
1271+/*
1272+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
1273+ */
1274+
1275+#include <linux/if_ether.h>
1276+#include <linux/rhashtable.h>
1277+#include <linux/ip.h>
1278+#include <linux/ipv6.h>
1279+#include <net/flow_offload.h>
1280+#include <net/pkt_cls.h>
1281+#include <net/dsa.h>
1282+#include "mtk_eth_soc.h"
1283+
1284+struct mtk_flow_data {
1285+ struct ethhdr eth;
1286+
1287+ union {
1288+ struct {
1289+ __be32 src_addr;
1290+ __be32 dst_addr;
1291+ } v4;
1292+
1293+ struct {
1294+ struct in6_addr src_addr;
1295+ struct in6_addr dst_addr;
1296+ } v6;
1297+ };
1298+
1299+ __be16 src_port;
1300+ __be16 dst_port;
1301+
1302+ struct {
1303+ u16 id;
1304+ __be16 proto;
1305+ u8 num;
1306+ } vlan;
1307+ struct {
1308+ u16 sid;
1309+ u8 num;
1310+ } pppoe;
1311+};
1312+
1313+struct mtk_flow_entry {
1314+ struct rhash_head node;
1315+ unsigned long cookie;
1316+ u16 hash;
1317+};
1318+
1319+static const struct rhashtable_params mtk_flow_ht_params = {
1320+ .head_offset = offsetof(struct mtk_flow_entry, node),
1321+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
1322+ .key_len = sizeof(unsigned long),
1323+ .automatic_shrinking = true,
1324+};
1325+
1326+static u32
1327+mtk_eth_timestamp(struct mtk_eth *eth)
1328+{
1329+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
1330+}
1331+
1332+static int
1333+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
1334+ bool egress)
1335+{
1336+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
1337+ data->v4.src_addr, data->src_port,
1338+ data->v4.dst_addr, data->dst_port);
1339+}
1340+
1341+static int
1342+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
1343+{
1344+ return mtk_foe_entry_set_ipv6_tuple(foe,
1345+ data->v6.src_addr.s6_addr32, data->src_port,
1346+ data->v6.dst_addr.s6_addr32, data->dst_port);
1347+}
1348+
1349+static void
1350+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
1351+{
1352+ void *dest = eth + act->mangle.offset;
1353+ const void *src = &act->mangle.val;
1354+
1355+ if (act->mangle.offset > 8)
1356+ return;
1357+
1358+ if (act->mangle.mask == 0xffff) {
1359+ src += 2;
1360+ dest += 2;
1361+ }
1362+
1363+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
1364+}
1365+
1366+
1367+static int
1368+mtk_flow_mangle_ports(const struct flow_action_entry *act,
1369+ struct mtk_flow_data *data)
1370+{
1371+ u32 val = ntohl(act->mangle.val);
1372+
1373+ switch (act->mangle.offset) {
1374+ case 0:
1375+ if (act->mangle.mask == ~htonl(0xffff))
1376+ data->dst_port = cpu_to_be16(val);
1377+ else
1378+ data->src_port = cpu_to_be16(val >> 16);
1379+ break;
1380+ case 2:
1381+ data->dst_port = cpu_to_be16(val);
1382+ break;
1383+ default:
1384+ return -EINVAL;
1385+ }
1386+
1387+ return 0;
1388+}
1389+
1390+static int
1391+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
1392+ struct mtk_flow_data *data)
1393+{
1394+ __be32 *dest;
1395+
1396+ switch (act->mangle.offset) {
1397+ case offsetof(struct iphdr, saddr):
1398+ dest = &data->v4.src_addr;
1399+ break;
1400+ case offsetof(struct iphdr, daddr):
1401+ dest = &data->v4.dst_addr;
1402+ break;
1403+ default:
1404+ return -EINVAL;
1405+ }
1406+
1407+ memcpy(dest, &act->mangle.val, sizeof(u32));
1408+
1409+ return 0;
1410+}
1411+
1412+static int
1413+mtk_flow_get_dsa_port(struct net_device **dev)
1414+{
1415+#if IS_ENABLED(CONFIG_NET_DSA)
1416+ struct dsa_port *dp;
1417+
1418+ dp = dsa_port_from_netdev(*dev);
1419+ if (IS_ERR(dp))
1420+ return -ENODEV;
1421+
1422+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
1423+ return -ENODEV;
1424+
1425+ *dev = dp->cpu_dp->master;
1426+
1427+ return dp->index;
1428+#else
1429+ return -ENODEV;
1430+#endif
1431+}
1432+
1433+static int
1434+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
1435+ struct net_device *dev)
1436+{
1437+ int pse_port, dsa_port;
1438+
1439+ dsa_port = mtk_flow_get_dsa_port(&dev);
1440+ if (dsa_port >= 0)
1441+ mtk_foe_entry_set_dsa(foe, dsa_port);
1442+
1443+ if (dev == eth->netdev[0])
developerc693c152022-12-02 09:38:46 +08001444+ pse_port = PSE_GDM1_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001445+ else if (dev == eth->netdev[1])
developerc693c152022-12-02 09:38:46 +08001446+ pse_port = PSE_GDM2_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001447+ else
1448+ return -EOPNOTSUPP;
1449+
1450+ mtk_foe_entry_set_pse_port(foe, pse_port);
1451+
1452+ return 0;
1453+}
1454+
1455+static int
1456+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
1457+{
1458+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1459+ struct flow_action_entry *act;
1460+ struct mtk_flow_data data = {};
1461+ struct mtk_foe_entry foe;
1462+ struct net_device *odev = NULL;
1463+ struct mtk_flow_entry *entry;
1464+ int offload_type = 0;
1465+ u16 addr_type = 0;
1466+ u32 timestamp;
1467+ u8 l4proto = 0;
1468+ int err = 0;
1469+ int hash;
1470+ int i;
1471+
1472+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
1473+ return -EEXIST;
1474+
1475+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1476+ struct flow_match_meta match;
1477+
1478+ flow_rule_match_meta(rule, &match);
1479+ } else {
1480+ return -EOPNOTSUPP;
1481+ }
1482+
1483+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1484+ struct flow_match_control match;
1485+
1486+ flow_rule_match_control(rule, &match);
1487+ addr_type = match.key->addr_type;
1488+ } else {
1489+ return -EOPNOTSUPP;
1490+ }
1491+
1492+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1493+ struct flow_match_basic match;
1494+
1495+ flow_rule_match_basic(rule, &match);
1496+ l4proto = match.key->ip_proto;
1497+ } else {
1498+ return -EOPNOTSUPP;
1499+ }
1500+
1501+ flow_action_for_each(i, act, &rule->action) {
1502+ switch (act->id) {
1503+ case FLOW_ACTION_MANGLE:
1504+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
1505+ mtk_flow_offload_mangle_eth(act, &data.eth);
1506+ break;
1507+ case FLOW_ACTION_REDIRECT:
1508+ odev = act->dev;
1509+ break;
1510+ case FLOW_ACTION_CSUM:
1511+ break;
1512+ case FLOW_ACTION_VLAN_PUSH:
1513+ if (data.vlan.num == 1 ||
1514+ act->vlan.proto != htons(ETH_P_8021Q))
1515+ return -EOPNOTSUPP;
1516+
1517+ data.vlan.id = act->vlan.vid;
1518+ data.vlan.proto = act->vlan.proto;
1519+ data.vlan.num++;
1520+ break;
1521+ case FLOW_ACTION_VLAN_POP:
1522+ break;
1523+ case FLOW_ACTION_PPPOE_PUSH:
1524+ if (data.pppoe.num == 1)
1525+ return -EOPNOTSUPP;
1526+
1527+ data.pppoe.sid = act->pppoe.sid;
1528+ data.pppoe.num++;
1529+ break;
1530+ default:
1531+ return -EOPNOTSUPP;
1532+ }
1533+ }
1534+
1535+ switch (addr_type) {
1536+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1537+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
1538+ break;
1539+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1540+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
1541+ break;
1542+ default:
1543+ return -EOPNOTSUPP;
1544+ }
1545+
1546+ if (!is_valid_ether_addr(data.eth.h_source) ||
1547+ !is_valid_ether_addr(data.eth.h_dest))
1548+ return -EINVAL;
1549+
1550+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
1551+ data.eth.h_source,
1552+ data.eth.h_dest);
1553+ if (err)
1554+ return err;
1555+
1556+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1557+ struct flow_match_ports ports;
1558+
1559+ flow_rule_match_ports(rule, &ports);
1560+ data.src_port = ports.key->src;
1561+ data.dst_port = ports.key->dst;
1562+ } else {
1563+ return -EOPNOTSUPP;
1564+ }
1565+
1566+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1567+ struct flow_match_ipv4_addrs addrs;
1568+
1569+ flow_rule_match_ipv4_addrs(rule, &addrs);
1570+
1571+ data.v4.src_addr = addrs.key->src;
1572+ data.v4.dst_addr = addrs.key->dst;
1573+
1574+ mtk_flow_set_ipv4_addr(&foe, &data, false);
1575+ }
1576+
1577+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1578+ struct flow_match_ipv6_addrs addrs;
1579+
1580+ flow_rule_match_ipv6_addrs(rule, &addrs);
1581+
1582+ data.v6.src_addr = addrs.key->src;
1583+ data.v6.dst_addr = addrs.key->dst;
1584+
1585+ mtk_flow_set_ipv6_addr(&foe, &data);
1586+ }
1587+
1588+ flow_action_for_each(i, act, &rule->action) {
1589+ if (act->id != FLOW_ACTION_MANGLE)
1590+ continue;
1591+
1592+ switch (act->mangle.htype) {
1593+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1594+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1595+ err = mtk_flow_mangle_ports(act, &data);
1596+ break;
1597+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1598+ err = mtk_flow_mangle_ipv4(act, &data);
1599+ break;
1600+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1601+ /* handled earlier */
1602+ break;
1603+ default:
1604+ return -EOPNOTSUPP;
1605+ }
1606+
1607+ if (err)
1608+ return err;
1609+ }
1610+
1611+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1612+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
1613+ if (err)
1614+ return err;
1615+ }
1616+
1617+ if (data.vlan.num == 1) {
1618+ if (data.vlan.proto != htons(ETH_P_8021Q))
1619+ return -EOPNOTSUPP;
1620+
1621+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
1622+ }
1623+ if (data.pppoe.num == 1)
1624+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
1625+
1626+ err = mtk_flow_set_output_device(eth, &foe, odev);
1627+ if (err)
1628+ return err;
1629+
1630+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1631+ if (!entry)
1632+ return -ENOMEM;
1633+
1634+ entry->cookie = f->cookie;
1635+ timestamp = mtk_eth_timestamp(eth);
1636+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
1637+ if (hash < 0) {
1638+ err = hash;
1639+ goto free;
1640+ }
1641+
1642+ entry->hash = hash;
1643+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
1644+ mtk_flow_ht_params);
1645+ if (err < 0)
1646+ goto clear_flow;
1647+
1648+ return 0;
1649+clear_flow:
1650+ mtk_foe_entry_clear(&eth->ppe, hash);
1651+free:
1652+ kfree(entry);
1653+ return err;
1654+}
1655+
1656+static int
1657+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
1658+{
1659+ struct mtk_flow_entry *entry;
1660+
1661+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1662+ mtk_flow_ht_params);
1663+ if (!entry)
1664+ return -ENOENT;
1665+
1666+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
1667+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
1668+ mtk_flow_ht_params);
1669+ kfree(entry);
1670+
1671+ return 0;
1672+}
1673+
1674+static int
1675+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
1676+{
1677+ struct mtk_flow_entry *entry;
1678+ int timestamp;
1679+ u32 idle;
1680+
1681+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1682+ mtk_flow_ht_params);
1683+ if (!entry)
1684+ return -ENOENT;
1685+
1686+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
1687+ if (timestamp < 0)
1688+ return -ETIMEDOUT;
1689+
1690+ idle = mtk_eth_timestamp(eth) - timestamp;
1691+ f->stats.lastused = jiffies - idle * HZ;
1692+
1693+ return 0;
1694+}
1695+
1696+static DEFINE_MUTEX(mtk_flow_offload_mutex);
1697+
1698+static int
1699+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
1700+{
1701+ struct flow_cls_offload *cls = type_data;
1702+ struct net_device *dev = cb_priv;
1703+ struct mtk_mac *mac = netdev_priv(dev);
1704+ struct mtk_eth *eth = mac->hw;
1705+ int err;
1706+
1707+ if (!tc_can_offload(dev))
1708+ return -EOPNOTSUPP;
1709+
1710+ if (type != TC_SETUP_CLSFLOWER)
1711+ return -EOPNOTSUPP;
1712+
1713+ mutex_lock(&mtk_flow_offload_mutex);
1714+ switch (cls->command) {
1715+ case FLOW_CLS_REPLACE:
1716+ err = mtk_flow_offload_replace(eth, cls);
1717+ break;
1718+ case FLOW_CLS_DESTROY:
1719+ err = mtk_flow_offload_destroy(eth, cls);
1720+ break;
1721+ case FLOW_CLS_STATS:
1722+ err = mtk_flow_offload_stats(eth, cls);
1723+ break;
1724+ default:
1725+ err = -EOPNOTSUPP;
1726+ break;
1727+ }
1728+ mutex_unlock(&mtk_flow_offload_mutex);
1729+
1730+ return err;
1731+}
1732+
1733+static int
1734+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
1735+{
1736+ struct mtk_mac *mac = netdev_priv(dev);
1737+ struct mtk_eth *eth = mac->hw;
developer207b39d2022-10-07 15:57:16 +08001738+ struct nf_flowtable *flowtable;
developer8cb3ac72022-07-04 10:55:14 +08001739+ static LIST_HEAD(block_cb_list);
1740+ struct flow_block_cb *block_cb;
1741+ flow_setup_cb_t *cb;
developer207b39d2022-10-07 15:57:16 +08001742+ int err = 0;
1743+
1744+ flowtable = container_of(f->block, struct nf_flowtable, flow_block);
developer8cb3ac72022-07-04 10:55:14 +08001745+
1746+ if (!eth->ppe.foe_table)
1747+ return -EOPNOTSUPP;
1748+
1749+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1750+ return -EOPNOTSUPP;
1751+
1752+ cb = mtk_eth_setup_tc_block_cb;
1753+ f->driver_block_list = &block_cb_list;
1754+
developer207b39d2022-10-07 15:57:16 +08001755+ down_write(&flowtable->flow_block_lock);
1756+
developer8cb3ac72022-07-04 10:55:14 +08001757+ switch (f->command) {
1758+ case FLOW_BLOCK_BIND:
1759+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1760+ if (block_cb) {
1761+ flow_block_cb_incref(block_cb);
developer207b39d2022-10-07 15:57:16 +08001762+ goto unlock;
developer8cb3ac72022-07-04 10:55:14 +08001763+ }
1764+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
developer207b39d2022-10-07 15:57:16 +08001765+ if (IS_ERR(block_cb)) {
1766+ err = PTR_ERR(block_cb);
1767+ goto unlock;
1768+ }
developer8cb3ac72022-07-04 10:55:14 +08001769+
1770+ flow_block_cb_add(block_cb, f);
1771+ list_add_tail(&block_cb->driver_list, &block_cb_list);
developer207b39d2022-10-07 15:57:16 +08001772+ break;
developer8cb3ac72022-07-04 10:55:14 +08001773+ case FLOW_BLOCK_UNBIND:
1774+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
developer207b39d2022-10-07 15:57:16 +08001775+ if (!block_cb) {
1776+ err = -ENOENT;
1777+ goto unlock;
1778+ }
developer8cb3ac72022-07-04 10:55:14 +08001779+
1780+ if (flow_block_cb_decref(block_cb)) {
1781+ flow_block_cb_remove(block_cb, f);
1782+ list_del(&block_cb->driver_list);
1783+ }
developer207b39d2022-10-07 15:57:16 +08001784+ break;
developer8cb3ac72022-07-04 10:55:14 +08001785+ default:
developer207b39d2022-10-07 15:57:16 +08001786+ err = -EOPNOTSUPP;
1787+ break;
developer8cb3ac72022-07-04 10:55:14 +08001788+ }
developer207b39d2022-10-07 15:57:16 +08001789+
1790+unlock:
1791+ up_write(&flowtable->flow_block_lock);
1792+ return err;
developer8cb3ac72022-07-04 10:55:14 +08001793+}
1794+
1795+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
1796+ void *type_data)
1797+{
1798+ if (type == TC_SETUP_FT)
1799+ return mtk_eth_setup_tc_block(dev, type_data);
1800+
1801+ return -EOPNOTSUPP;
1802+}
1803+
1804+int mtk_eth_offload_init(struct mtk_eth *eth)
1805+{
1806+ if (!eth->ppe.foe_table)
1807+ return 0;
1808+
1809+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
1810+}
1811diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1812new file mode 100644
1813index 000000000..0c45ea090
1814--- /dev/null
1815+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1816@@ -0,0 +1,144 @@
1817+// SPDX-License-Identifier: GPL-2.0-only
1818+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1819+
1820+#ifndef __MTK_PPE_REGS_H
1821+#define __MTK_PPE_REGS_H
1822+
1823+#define MTK_PPE_GLO_CFG 0x200
1824+#define MTK_PPE_GLO_CFG_EN BIT(0)
1825+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
1826+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
1827+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
1828+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
1829+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
1830+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
1831+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
1832+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
1833+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
1834+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
1835+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
1836+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
1837+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
1838+
1839+#define MTK_PPE_FLOW_CFG 0x204
1840+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
1841+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
1842+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
1843+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
1844+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
1845+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
1846+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
1847+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
1848+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
1849+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
1850+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
1851+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
1852+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
1853+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
1854+
1855+#define MTK_PPE_IP_PROTO_CHK 0x208
1856+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
1857+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
1858+
1859+#define MTK_PPE_TB_CFG 0x21c
1860+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
1861+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
1862+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
1863+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
1864+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
1865+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
1866+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
1867+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
1868+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
1869+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
1870+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
1871+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
1872+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
1873+
1874+enum {
1875+ MTK_PPE_SCAN_MODE_DISABLED,
1876+ MTK_PPE_SCAN_MODE_CHECK_AGE,
1877+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
1878+};
1879+
1880+enum {
1881+ MTK_PPE_KEEPALIVE_DISABLE,
1882+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
1883+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
1884+};
1885+
1886+enum {
1887+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
1888+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
1889+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
1890+};
1891+
1892+#define MTK_PPE_TB_BASE 0x220
1893+
1894+#define MTK_PPE_TB_USED 0x224
1895+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
1896+
1897+#define MTK_PPE_BIND_RATE 0x228
1898+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
1899+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
1900+
1901+#define MTK_PPE_BIND_LIMIT0 0x22c
1902+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
1903+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
1904+
1905+#define MTK_PPE_BIND_LIMIT1 0x230
1906+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
1907+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
1908+
1909+#define MTK_PPE_KEEPALIVE 0x234
1910+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
1911+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
1912+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
1913+
1914+#define MTK_PPE_UNBIND_AGE 0x238
1915+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
1916+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
1917+
1918+#define MTK_PPE_BIND_AGE0 0x23c
1919+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
1920+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
1921+
1922+#define MTK_PPE_BIND_AGE1 0x240
1923+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
1924+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
1925+
1926+#define MTK_PPE_HASH_SEED 0x244
1927+
1928+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
1929+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
1930+
1931+#define MTK_PPE_MTU_DROP 0x308
1932+
1933+#define MTK_PPE_VLAN_MTU0 0x30c
1934+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
1935+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
1936+
1937+#define MTK_PPE_VLAN_MTU1 0x310
1938+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
1939+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
1940+
1941+#define MTK_PPE_VPM_TPID 0x318
1942+
1943+#define MTK_PPE_CACHE_CTL 0x320
1944+#define MTK_PPE_CACHE_CTL_EN BIT(0)
1945+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
1946+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
1947+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
1948+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
1949+
1950+#define MTK_PPE_MIB_CFG 0x334
1951+#define MTK_PPE_MIB_CFG_EN BIT(0)
1952+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
1953+
1954+#define MTK_PPE_MIB_TB_BASE 0x338
1955+
1956+#define MTK_PPE_MIB_CACHE_CTL 0x350
1957+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
1958+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
1959+
1960+#endif
1961diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
1962index a085213dc..813e30495 100644
1963--- a/drivers/net/ppp/ppp_generic.c
1964+++ b/drivers/net/ppp/ppp_generic.c
1965@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
1966 ppp_destroy_interface(ppp);
1967 }
1968
1969+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
1970+ struct net_device_path *path)
1971+{
1972+ struct ppp *ppp = netdev_priv(ctx->dev);
1973+ struct ppp_channel *chan;
1974+ struct channel *pch;
1975+
1976+ if (ppp->flags & SC_MULTILINK)
1977+ return -EOPNOTSUPP;
1978+
1979+ if (list_empty(&ppp->channels))
1980+ return -ENODEV;
1981+
1982+ pch = list_first_entry(&ppp->channels, struct channel, clist);
1983+ chan = pch->chan;
1984+ if (!chan->ops->fill_forward_path)
1985+ return -EOPNOTSUPP;
1986+
1987+ return chan->ops->fill_forward_path(ctx, path, chan);
1988+}
1989+
1990 static const struct net_device_ops ppp_netdev_ops = {
1991 .ndo_init = ppp_dev_init,
1992 .ndo_uninit = ppp_dev_uninit,
1993 .ndo_start_xmit = ppp_start_xmit,
1994 .ndo_do_ioctl = ppp_net_ioctl,
1995 .ndo_get_stats64 = ppp_get_stats64,
1996+ .ndo_fill_forward_path = ppp_fill_forward_path,
1997 };
1998
1999 static struct device_type ppp_type = {
2000diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
2001index 087b01684..7a8c246ab 100644
2002--- a/drivers/net/ppp/pppoe.c
2003+++ b/drivers/net/ppp/pppoe.c
2004@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
2005 return __pppoe_xmit(sk, skb);
2006 }
2007
2008+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
2009+ struct net_device_path *path,
2010+ const struct ppp_channel *chan)
2011+{
2012+ struct sock *sk = (struct sock *)chan->private;
2013+ struct pppox_sock *po = pppox_sk(sk);
2014+ struct net_device *dev = po->pppoe_dev;
2015+
2016+ if (sock_flag(sk, SOCK_DEAD) ||
2017+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2018+ return -1;
2019+
2020+ path->type = DEV_PATH_PPPOE;
2021+ path->encap.proto = htons(ETH_P_PPP_SES);
2022+ path->encap.id = be16_to_cpu(po->num);
2023+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2024+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
2025+ path->dev = ctx->dev;
2026+ ctx->dev = dev;
2027+
2028+ return 0;
2029+}
2030+
2031 static const struct ppp_channel_ops pppoe_chan_ops = {
2032 .start_xmit = pppoe_xmit,
2033+ .fill_forward_path = pppoe_fill_forward_path,
2034 };
2035
2036 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
2037diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
2038index 38af42bf8..9f64504ac 100644
2039--- a/include/linux/netdevice.h
2040+++ b/include/linux/netdevice.h
2041@@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
2042 struct sk_buff *skb,
2043 struct net_device *sb_dev);
2044
2045+enum net_device_path_type {
2046+ DEV_PATH_ETHERNET = 0,
2047+ DEV_PATH_VLAN,
2048+ DEV_PATH_BRIDGE,
2049+ DEV_PATH_PPPOE,
2050+ DEV_PATH_DSA,
2051+};
2052+
2053+struct net_device_path {
2054+ enum net_device_path_type type;
2055+ const struct net_device *dev;
2056+ union {
2057+ struct {
2058+ u16 id;
2059+ __be16 proto;
2060+ u8 h_dest[ETH_ALEN];
2061+ } encap;
2062+ struct {
2063+ enum {
2064+ DEV_PATH_BR_VLAN_KEEP,
2065+ DEV_PATH_BR_VLAN_TAG,
2066+ DEV_PATH_BR_VLAN_UNTAG,
2067+ DEV_PATH_BR_VLAN_UNTAG_HW,
2068+ } vlan_mode;
2069+ u16 vlan_id;
2070+ __be16 vlan_proto;
2071+ } bridge;
2072+ struct {
2073+ int port;
2074+ u16 proto;
2075+ } dsa;
2076+ };
2077+};
2078+
2079+#define NET_DEVICE_PATH_STACK_MAX 5
2080+#define NET_DEVICE_PATH_VLAN_MAX 2
2081+
2082+struct net_device_path_stack {
2083+ int num_paths;
2084+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
2085+};
2086+
2087+struct net_device_path_ctx {
2088+ const struct net_device *dev;
2089+ u8 daddr[ETH_ALEN];
2090+
2091+ int num_vlans;
2092+ struct {
2093+ u16 id;
2094+ __be16 proto;
2095+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
2096+};
2097+
2098 enum tc_setup_type {
2099 TC_SETUP_QDISC_MQPRIO,
2100 TC_SETUP_CLSU32,
2101@@ -844,6 +897,7 @@ enum tc_setup_type {
2102 TC_SETUP_ROOT_QDISC,
2103 TC_SETUP_QDISC_GRED,
2104 TC_SETUP_QDISC_TAPRIO,
2105+ TC_SETUP_FT,
2106 };
2107
2108 /* These structures hold the attributes of bpf state that are being passed
2109@@ -1239,6 +1293,8 @@ struct tlsdev_ops;
2110 * Get devlink port instance associated with a given netdev.
2111 * Called with a reference on the netdevice and devlink locks only,
2112 * rtnl_lock is not held.
2113+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
2114+ * Get the forwarding path to reach the real device from the HW destination address
2115 */
2116 struct net_device_ops {
2117 int (*ndo_init)(struct net_device *dev);
2118@@ -1436,6 +1492,8 @@ struct net_device_ops {
2119 int (*ndo_xsk_wakeup)(struct net_device *dev,
2120 u32 queue_id, u32 flags);
2121 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
2122+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
2123+ struct net_device_path *path);
2124 };
2125
2126 /**
2127@@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po);
2128
2129 int dev_get_iflink(const struct net_device *dev);
2130 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
2131+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2132+ struct net_device_path_stack *stack);
2133 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
2134 unsigned short mask);
2135 struct net_device *dev_get_by_name(struct net *net, const char *name);
2136diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
2137index 98966064e..91f9a9283 100644
2138--- a/include/linux/ppp_channel.h
2139+++ b/include/linux/ppp_channel.h
2140@@ -28,6 +28,9 @@ struct ppp_channel_ops {
2141 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
2142 /* Handle an ioctl call that has come in via /dev/ppp. */
2143 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
2144+ int (*fill_forward_path)(struct net_device_path_ctx *,
2145+ struct net_device_path *,
2146+ const struct ppp_channel *);
2147 };
2148
2149 struct ppp_channel {
2150diff --git a/include/net/dsa.h b/include/net/dsa.h
2151index 05f66d487..cafc74218 100644
2152--- a/include/net/dsa.h
2153+++ b/include/net/dsa.h
2154@@ -561,6 +561,8 @@ struct dsa_switch_ops {
2155 struct sk_buff *skb);
2156 };
2157
2158+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
2159+
2160 struct dsa_switch_driver {
2161 struct list_head list;
2162 const struct dsa_switch_ops *ops;
2163@@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2164 #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
2165 #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
2166
2167+#if IS_ENABLED(CONFIG_NET_DSA)
2168+bool dsa_slave_dev_check(const struct net_device *dev);
2169+#else
2170+static inline bool dsa_slave_dev_check(const struct net_device *dev)
2171+{
2172+ return false;
2173+}
2174+#endif
2175
2176 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
2177 int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
2178diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
2179index c6f7bd22d..59b873653 100644
2180--- a/include/net/flow_offload.h
2181+++ b/include/net/flow_offload.h
2182@@ -138,6 +138,7 @@ enum flow_action_id {
2183 FLOW_ACTION_MPLS_PUSH,
2184 FLOW_ACTION_MPLS_POP,
2185 FLOW_ACTION_MPLS_MANGLE,
2186+ FLOW_ACTION_PPPOE_PUSH,
2187 NUM_FLOW_ACTIONS,
2188 };
2189
2190@@ -213,6 +214,9 @@ struct flow_action_entry {
2191 u8 bos;
2192 u8 ttl;
2193 } mpls_mangle;
2194+ struct { /* FLOW_ACTION_PPPOE_PUSH */
2195+ u16 sid;
2196+ } pppoe;
2197 };
2198 };
2199
2200diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
2201index 2c739fc75..89ab8f180 100644
2202--- a/include/net/ip6_route.h
2203+++ b/include/net/ip6_route.h
2204@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
2205 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
2206 }
2207
2208-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
2209+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
2210+ bool forwarding)
2211 {
2212 struct inet6_dev *idev;
2213 unsigned int mtu;
2214
2215- if (dst_metric_locked(dst, RTAX_MTU)) {
2216+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
2217 mtu = dst_metric_raw(dst, RTAX_MTU);
2218 if (mtu)
2219 goto out;
2220diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2221index 7b3c873f8..e95483192 100644
2222--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2223+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2224@@ -4,7 +4,4 @@
2225
2226 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
2227
2228-#include <linux/sysctl.h>
2229-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
2230-
2231 #endif /* _NF_CONNTRACK_IPV6_H*/
2232diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
2233index 90690e37a..ce0bc3e62 100644
2234--- a/include/net/netfilter/nf_conntrack.h
2235+++ b/include/net/netfilter/nf_conntrack.h
2236@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
2237 !nf_ct_is_dying(ct);
2238 }
2239
2240+#define NF_CT_DAY (86400 * HZ)
2241+
2242+/* Set an arbitrary timeout large enough not to ever expire, this save
2243+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
2244+ * nf_ct_is_expired().
2245+ */
2246+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
2247+{
2248+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
2249+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
2250+}
2251+
2252 struct kernel_param;
2253
2254 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
2255diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
2256index f7a060c6e..7f44a7715 100644
2257--- a/include/net/netfilter/nf_conntrack_acct.h
2258+++ b/include/net/netfilter/nf_conntrack_acct.h
2259@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
2260 #endif
2261 }
2262
2263+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
2264+ unsigned int bytes);
2265+
2266+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
2267+ unsigned int bytes)
2268+{
2269+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
2270+ nf_ct_acct_add(ct, dir, 1, bytes);
2271+#endif
2272+}
2273+
2274 void nf_conntrack_acct_pernet_init(struct net *net);
2275
2276 int nf_conntrack_acct_init(void);
2277diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
developerb7c46752022-07-04 19:51:38 +08002278index 68d7fc92..7cf89767 100644
developer8cb3ac72022-07-04 10:55:14 +08002279--- a/include/net/netfilter/nf_flow_table.h
2280+++ b/include/net/netfilter/nf_flow_table.h
2281@@ -8,31 +8,99 @@
2282 #include <linux/rcupdate.h>
2283 #include <linux/netfilter.h>
2284 #include <linux/netfilter/nf_conntrack_tuple_common.h>
2285+#include <net/flow_offload.h>
2286 #include <net/dst.h>
2287+#include <linux/if_pppox.h>
2288+#include <linux/ppp_defs.h>
2289
2290 struct nf_flowtable;
2291+struct nf_flow_rule;
2292+struct flow_offload;
2293+enum flow_offload_tuple_dir;
2294+
2295+struct nf_flow_key {
2296+ struct flow_dissector_key_meta meta;
2297+ struct flow_dissector_key_control control;
2298+ struct flow_dissector_key_control enc_control;
2299+ struct flow_dissector_key_basic basic;
2300+ struct flow_dissector_key_vlan vlan;
2301+ struct flow_dissector_key_vlan cvlan;
2302+ union {
2303+ struct flow_dissector_key_ipv4_addrs ipv4;
2304+ struct flow_dissector_key_ipv6_addrs ipv6;
2305+ };
2306+ struct flow_dissector_key_keyid enc_key_id;
2307+ union {
2308+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
2309+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
2310+ };
2311+ struct flow_dissector_key_tcp tcp;
2312+ struct flow_dissector_key_ports tp;
2313+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
2314+
2315+struct nf_flow_match {
2316+ struct flow_dissector dissector;
2317+ struct nf_flow_key key;
2318+ struct nf_flow_key mask;
2319+};
2320+
2321+struct nf_flow_rule {
2322+ struct nf_flow_match match;
2323+ struct flow_rule *rule;
2324+};
2325
2326 struct nf_flowtable_type {
2327 struct list_head list;
2328 int family;
2329 int (*init)(struct nf_flowtable *ft);
2330+ int (*setup)(struct nf_flowtable *ft,
2331+ struct net_device *dev,
2332+ enum flow_block_command cmd);
2333+ int (*action)(struct net *net,
2334+ const struct flow_offload *flow,
2335+ enum flow_offload_tuple_dir dir,
2336+ struct nf_flow_rule *flow_rule);
2337 void (*free)(struct nf_flowtable *ft);
2338 nf_hookfn *hook;
2339 struct module *owner;
2340 };
2341
2342+enum nf_flowtable_flags {
2343+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
2344+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
2345+};
2346+
2347 struct nf_flowtable {
2348 struct list_head list;
2349 struct rhashtable rhashtable;
2350+ int priority;
2351 const struct nf_flowtable_type *type;
2352 struct delayed_work gc_work;
2353+ unsigned int flags;
2354+ struct flow_block flow_block;
2355+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
2356+ possible_net_t net;
2357 };
2358
2359+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
2360+{
2361+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
2362+}
2363+
2364 enum flow_offload_tuple_dir {
2365 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
2366 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
2367- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
2368 };
2369+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
2370+
2371+enum flow_offload_xmit_type {
2372+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
2373+ FLOW_OFFLOAD_XMIT_NEIGH,
2374+ FLOW_OFFLOAD_XMIT_XFRM,
2375+ FLOW_OFFLOAD_XMIT_DIRECT,
2376+};
2377+
2378+#define NF_FLOW_TABLE_ENCAP_MAX 2
2379
2380 struct flow_offload_tuple {
2381 union {
developerb7c46752022-07-04 19:51:38 +08002382@@ -52,13 +120,30 @@ struct flow_offload_tuple {
developer8cb3ac72022-07-04 10:55:14 +08002383
2384 u8 l3proto;
2385 u8 l4proto;
2386- u8 dir;
2387+ struct {
2388+ u16 id;
2389+ __be16 proto;
2390+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2391
2392- u16 mtu;
2393+ /* All members above are keys for lookups, see flow_offload_hash(). */
2394+ struct { } __hash;
2395
developerb7c46752022-07-04 19:51:38 +08002396- struct {
2397- struct dst_entry *dst_cache;
2398- u32 dst_cookie;
developer8cb3ac72022-07-04 10:55:14 +08002399+ u8 dir:2,
2400+ xmit_type:2,
2401+ encap_num:2,
2402+ in_vlan_ingress:2;
2403+ u16 mtu;
2404+ union {
2405+ struct {
2406+ struct dst_entry *dst_cache;
2407+ u32 dst_cookie;
2408+ };
2409+ struct {
2410+ u32 ifidx;
2411+ u32 hw_ifidx;
2412+ u8 h_source[ETH_ALEN];
2413+ u8 h_dest[ETH_ALEN];
2414+ } out;
developerb7c46752022-07-04 19:51:38 +08002415 };
developer8cb3ac72022-07-04 10:55:14 +08002416 };
2417
developerec862f42023-03-23 13:08:45 +08002418@@ -67,52 +152,140 @@ struct flow_offload_tuple_rhash {
developer8cb3ac72022-07-04 10:55:14 +08002419 struct flow_offload_tuple tuple;
2420 };
2421
2422-#define FLOW_OFFLOAD_SNAT 0x1
2423-#define FLOW_OFFLOAD_DNAT 0x2
2424-#define FLOW_OFFLOAD_DYING 0x4
2425-#define FLOW_OFFLOAD_TEARDOWN 0x8
2426+enum nf_flow_flags {
2427+ NF_FLOW_SNAT,
2428+ NF_FLOW_DNAT,
2429+ NF_FLOW_TEARDOWN,
2430+ NF_FLOW_HW,
developerec862f42023-03-23 13:08:45 +08002431+ NF_FLOW_HW_ACCT_DYING,
developer8cb3ac72022-07-04 10:55:14 +08002432+ NF_FLOW_HW_DYING,
2433+ NF_FLOW_HW_DEAD,
2434+ NF_FLOW_HW_PENDING,
2435+};
2436+
2437+enum flow_offload_type {
2438+ NF_FLOW_OFFLOAD_UNSPEC = 0,
2439+ NF_FLOW_OFFLOAD_ROUTE,
2440+};
2441
2442 struct flow_offload {
2443 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
2444- u32 flags;
2445- union {
2446- /* Your private driver data here. */
2447- u32 timeout;
2448- };
2449+ struct nf_conn *ct;
2450+ unsigned long flags;
2451+ u16 type;
2452+ u32 timeout;
2453+ struct rcu_head rcu_head;
2454 };
2455
2456 #define NF_FLOW_TIMEOUT (30 * HZ)
2457+#define nf_flowtable_time_stamp (u32)jiffies
2458+
2459+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
2460+
2461+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
2462+{
2463+ return (__s32)(timeout - nf_flowtable_time_stamp);
2464+}
2465
2466 struct nf_flow_route {
2467 struct {
2468- struct dst_entry *dst;
2469+ struct dst_entry *dst;
2470+ struct {
2471+ u32 ifindex;
2472+ struct {
2473+ u16 id;
2474+ __be16 proto;
2475+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2476+ u8 num_encaps:2,
2477+ ingress_vlans:2;
2478+ } in;
2479+ struct {
2480+ u32 ifindex;
2481+ u32 hw_ifindex;
2482+ u8 h_source[ETH_ALEN];
2483+ u8 h_dest[ETH_ALEN];
2484+ } out;
2485+ enum flow_offload_xmit_type xmit_type;
2486 } tuple[FLOW_OFFLOAD_DIR_MAX];
2487 };
2488
2489-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
2490- struct nf_flow_route *route);
2491+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
2492 void flow_offload_free(struct flow_offload *flow);
2493
2494+static inline int
2495+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
2496+ flow_setup_cb_t *cb, void *cb_priv)
2497+{
2498+ struct flow_block *block = &flow_table->flow_block;
2499+ struct flow_block_cb *block_cb;
2500+ int err = 0;
2501+
2502+ down_write(&flow_table->flow_block_lock);
2503+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2504+ if (block_cb) {
2505+ err = -EEXIST;
2506+ goto unlock;
2507+ }
2508+
2509+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
2510+ if (IS_ERR(block_cb)) {
2511+ err = PTR_ERR(block_cb);
2512+ goto unlock;
2513+ }
2514+
2515+ list_add_tail(&block_cb->list, &block->cb_list);
2516+
2517+unlock:
2518+ up_write(&flow_table->flow_block_lock);
2519+ return err;
2520+}
2521+
2522+static inline void
2523+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
2524+ flow_setup_cb_t *cb, void *cb_priv)
2525+{
2526+ struct flow_block *block = &flow_table->flow_block;
2527+ struct flow_block_cb *block_cb;
2528+
2529+ down_write(&flow_table->flow_block_lock);
2530+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2531+ if (block_cb) {
2532+ list_del(&block_cb->list);
2533+ flow_block_cb_free(block_cb);
2534+ } else {
2535+ WARN_ON(true);
2536+ }
2537+ up_write(&flow_table->flow_block_lock);
2538+}
2539+
2540+int flow_offload_route_init(struct flow_offload *flow,
2541+ const struct nf_flow_route *route);
2542+
2543 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
2544+void flow_offload_refresh(struct nf_flowtable *flow_table,
2545+ struct flow_offload *flow);
2546+
2547 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
2548 struct flow_offload_tuple *tuple);
2549+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
2550+ struct net_device *dev);
2551 void nf_flow_table_cleanup(struct net_device *dev);
2552
2553 int nf_flow_table_init(struct nf_flowtable *flow_table);
2554 void nf_flow_table_free(struct nf_flowtable *flow_table);
2555
2556 void flow_offload_teardown(struct flow_offload *flow);
2557-static inline void flow_offload_dead(struct flow_offload *flow)
2558-{
2559- flow->flags |= FLOW_OFFLOAD_DYING;
2560-}
2561
2562-int nf_flow_snat_port(const struct flow_offload *flow,
2563- struct sk_buff *skb, unsigned int thoff,
2564- u8 protocol, enum flow_offload_tuple_dir dir);
2565-int nf_flow_dnat_port(const struct flow_offload *flow,
2566- struct sk_buff *skb, unsigned int thoff,
2567- u8 protocol, enum flow_offload_tuple_dir dir);
2568+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
2569+ void (*iter)(struct flow_offload *flow, void *data),
2570+ void *data);
2571+
2572+void nf_flow_snat_port(const struct flow_offload *flow,
2573+ struct sk_buff *skb, unsigned int thoff,
2574+ u8 protocol, enum flow_offload_tuple_dir dir);
2575+void nf_flow_dnat_port(const struct flow_offload *flow,
2576+ struct sk_buff *skb, unsigned int thoff,
2577+ u8 protocol, enum flow_offload_tuple_dir dir);
2578
2579 struct flow_ports {
2580 __be16 source, dest;
developerb7c46752022-07-04 19:51:38 +08002581@@ -126,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08002582 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
2583 MODULE_ALIAS("nf-flowtable-" __stringify(family))
2584
2585+void nf_flow_offload_add(struct nf_flowtable *flowtable,
2586+ struct flow_offload *flow);
2587+void nf_flow_offload_del(struct nf_flowtable *flowtable,
2588+ struct flow_offload *flow);
2589+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developerec862f42023-03-23 13:08:45 +08002590+ struct flow_offload *flow, bool force);
developer8cb3ac72022-07-04 10:55:14 +08002591+
2592+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
2593+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
2594+ struct net_device *dev,
2595+ enum flow_block_command cmd);
2596+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
2597+ enum flow_offload_tuple_dir dir,
2598+ struct nf_flow_rule *flow_rule);
2599+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
2600+ enum flow_offload_tuple_dir dir,
2601+ struct nf_flow_rule *flow_rule);
2602+
2603+int nf_flow_table_offload_init(void);
2604+void nf_flow_table_offload_exit(void);
2605+
2606+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
2607+{
2608+ __be16 proto;
2609+
2610+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
2611+ sizeof(struct pppoe_hdr)));
2612+ switch (proto) {
2613+ case htons(PPP_IP):
2614+ return htons(ETH_P_IP);
2615+ case htons(PPP_IPV6):
2616+ return htons(ETH_P_IPV6);
2617+ }
2618+
2619+ return 0;
2620+}
2621+
2622 #endif /* _NF_FLOW_TABLE_H */
2623diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
2624index 806454e76..9e3963c8f 100644
2625--- a/include/net/netns/conntrack.h
2626+++ b/include/net/netns/conntrack.h
2627@@ -27,6 +27,9 @@ struct nf_tcp_net {
2628 int tcp_loose;
2629 int tcp_be_liberal;
2630 int tcp_max_retrans;
2631+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2632+ unsigned int offload_timeout;
2633+#endif
2634 };
2635
2636 enum udp_conntrack {
2637@@ -37,6 +40,9 @@ enum udp_conntrack {
2638
2639 struct nf_udp_net {
2640 unsigned int timeouts[UDP_CT_MAX];
2641+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2642+ unsigned int offload_timeout;
2643+#endif
2644 };
2645
2646 struct nf_icmp_net {
2647diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
2648index 336014bf8..ae698d11c 100644
2649--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
2650+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
2651@@ -105,14 +105,19 @@ enum ip_conntrack_status {
2652 IPS_OFFLOAD_BIT = 14,
2653 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
2654
2655+ /* Conntrack has been offloaded to hardware. */
2656+ IPS_HW_OFFLOAD_BIT = 15,
2657+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
2658+
2659 /* Be careful here, modifying these bits can make things messy,
2660 * so don't let users modify them directly.
2661 */
2662 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
2663 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
2664- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
2665+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
2666+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
2667
2668- __IPS_MAX_BIT = 15,
2669+ __IPS_MAX_BIT = 16,
2670 };
2671
2672 /* Connection tracking event types */
2673diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2674new file mode 100644
2675index 000000000..5841bbe0e
2676--- /dev/null
2677+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2678@@ -0,0 +1,17 @@
2679+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2680+#ifndef _XT_FLOWOFFLOAD_H
2681+#define _XT_FLOWOFFLOAD_H
2682+
2683+#include <linux/types.h>
2684+
2685+enum {
2686+ XT_FLOWOFFLOAD_HW = 1 << 0,
2687+
2688+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
2689+};
2690+
2691+struct xt_flowoffload_target_info {
2692+ __u32 flags;
2693+};
2694+
2695+#endif /* _XT_FLOWOFFLOAD_H */
2696diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
2697index 589615ec4..444ab5fae 100644
2698--- a/net/8021q/vlan_dev.c
2699+++ b/net/8021q/vlan_dev.c
2700@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
2701 return real_dev->ifindex;
2702 }
2703
2704+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
2705+ struct net_device_path *path)
2706+{
2707+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
2708+
2709+ path->type = DEV_PATH_VLAN;
2710+ path->encap.id = vlan->vlan_id;
2711+ path->encap.proto = vlan->vlan_proto;
2712+ path->dev = ctx->dev;
2713+ ctx->dev = vlan->real_dev;
2714+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2715+ return -ENOSPC;
2716+
2717+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
2718+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
2719+ ctx->num_vlans++;
2720+
2721+ return 0;
2722+}
2723+
2724 static const struct ethtool_ops vlan_ethtool_ops = {
2725 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
2726 .get_drvinfo = vlan_ethtool_get_drvinfo,
2727@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
2728 #endif
2729 .ndo_fix_features = vlan_dev_fix_features,
2730 .ndo_get_iflink = vlan_dev_get_iflink,
2731+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
2732 };
2733
2734 static void vlan_dev_free(struct net_device *dev)
2735diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
2736index 501f77f0f..0940b44cd 100644
2737--- a/net/bridge/br_device.c
2738+++ b/net/bridge/br_device.c
2739@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
2740 return br_del_if(br, slave_dev);
2741 }
2742
2743+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
2744+ struct net_device_path *path)
2745+{
2746+ struct net_bridge_fdb_entry *f;
2747+ struct net_bridge_port *dst;
2748+ struct net_bridge *br;
2749+
2750+ if (netif_is_bridge_port(ctx->dev))
2751+ return -1;
2752+
2753+ br = netdev_priv(ctx->dev);
2754+
2755+ br_vlan_fill_forward_path_pvid(br, ctx, path);
2756+
2757+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
2758+ if (!f || !f->dst)
2759+ return -1;
2760+
2761+ dst = READ_ONCE(f->dst);
2762+ if (!dst)
2763+ return -1;
2764+
2765+ if (br_vlan_fill_forward_path_mode(br, dst, path))
2766+ return -1;
2767+
2768+ path->type = DEV_PATH_BRIDGE;
2769+ path->dev = dst->br->dev;
2770+ ctx->dev = dst->dev;
2771+
2772+ switch (path->bridge.vlan_mode) {
2773+ case DEV_PATH_BR_VLAN_TAG:
2774+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2775+ return -ENOSPC;
2776+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
2777+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
2778+ ctx->num_vlans++;
2779+ break;
2780+ case DEV_PATH_BR_VLAN_UNTAG_HW:
2781+ case DEV_PATH_BR_VLAN_UNTAG:
2782+ ctx->num_vlans--;
2783+ break;
2784+ case DEV_PATH_BR_VLAN_KEEP:
2785+ break;
2786+ }
2787+
2788+ return 0;
2789+}
2790+
2791 static const struct ethtool_ops br_ethtool_ops = {
2792 .get_drvinfo = br_getinfo,
2793 .get_link = ethtool_op_get_link,
2794@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
2795 .ndo_bridge_setlink = br_setlink,
2796 .ndo_bridge_dellink = br_dellink,
2797 .ndo_features_check = passthru_features_check,
2798+ .ndo_fill_forward_path = br_fill_forward_path,
2799 };
2800
2801 static struct device_type br_type = {
2802diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
2803index a736be8a1..4bd9e9b57 100644
2804--- a/net/bridge/br_private.h
2805+++ b/net/bridge/br_private.h
2806@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
2807 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
2808 void *ptr);
2809
2810+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2811+ struct net_device_path_ctx *ctx,
2812+ struct net_device_path *path);
2813+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2814+ struct net_bridge_port *dst,
2815+ struct net_device_path *path);
2816+
2817 static inline struct net_bridge_vlan_group *br_vlan_group(
2818 const struct net_bridge *br)
2819 {
2820@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
2821 return 0;
2822 }
2823
2824+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2825+ struct net_device_path_ctx *ctx,
2826+ struct net_device_path *path)
2827+{
2828+}
2829+
2830+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2831+ struct net_bridge_port *dst,
2832+ struct net_device_path *path)
2833+{
2834+ return 0;
2835+}
2836+
2837 static inline struct net_bridge_vlan_group *br_vlan_group(
2838 const struct net_bridge *br)
2839 {
2840diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
2841index 9257292bd..bcfd16924 100644
2842--- a/net/bridge/br_vlan.c
2843+++ b/net/bridge/br_vlan.c
2844@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
2845 }
2846 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
2847
2848+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2849+ struct net_device_path_ctx *ctx,
2850+ struct net_device_path *path)
2851+{
2852+ struct net_bridge_vlan_group *vg;
2853+ int idx = ctx->num_vlans - 1;
2854+ u16 vid;
2855+
2856+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2857+
2858+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2859+ return;
2860+
2861+ vg = br_vlan_group(br);
2862+
2863+ if (idx >= 0 &&
2864+ ctx->vlan[idx].proto == br->vlan_proto) {
2865+ vid = ctx->vlan[idx].id;
2866+ } else {
2867+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
2868+ vid = br_get_pvid(vg);
2869+ }
2870+
2871+ path->bridge.vlan_id = vid;
2872+ path->bridge.vlan_proto = br->vlan_proto;
2873+}
2874+
2875+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2876+ struct net_bridge_port *dst,
2877+ struct net_device_path *path)
2878+{
2879+ struct net_bridge_vlan_group *vg;
2880+ struct net_bridge_vlan *v;
2881+
2882+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2883+ return 0;
2884+
2885+ vg = nbp_vlan_group_rcu(dst);
2886+ v = br_vlan_find(vg, path->bridge.vlan_id);
2887+ if (!v || !br_vlan_should_use(v))
2888+ return -EINVAL;
2889+
2890+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
2891+ return 0;
2892+
2893+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
2894+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2895+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
2896+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
2897+ else
2898+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
2899+
2900+ return 0;
2901+}
2902+
2903 int br_vlan_get_info(const struct net_device *dev, u16 vid,
2904 struct bridge_vlan_info *p_vinfo)
2905 {
2906diff --git a/net/core/dev.c b/net/core/dev.c
2907index fe2c856b9..4f0edb218 100644
2908--- a/net/core/dev.c
2909+++ b/net/core/dev.c
2910@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2911 }
2912 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
2913
2914+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
2915+{
2916+ int k = stack->num_paths++;
2917+
2918+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
2919+ return NULL;
2920+
2921+ return &stack->path[k];
2922+}
2923+
2924+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2925+ struct net_device_path_stack *stack)
2926+{
2927+ const struct net_device *last_dev;
2928+ struct net_device_path_ctx ctx = {
2929+ .dev = dev,
2930+ };
2931+ struct net_device_path *path;
2932+ int ret = 0;
2933+
2934+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
2935+ stack->num_paths = 0;
2936+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
2937+ last_dev = ctx.dev;
2938+ path = dev_fwd_path(stack);
2939+ if (!path)
2940+ return -1;
2941+
2942+ memset(path, 0, sizeof(struct net_device_path));
2943+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
2944+ if (ret < 0)
2945+ return -1;
2946+
2947+ if (WARN_ON_ONCE(last_dev == ctx.dev))
2948+ return -1;
2949+ }
2950+ path = dev_fwd_path(stack);
2951+ if (!path)
2952+ return -1;
2953+ path->type = DEV_PATH_ETHERNET;
2954+ path->dev = ctx.dev;
2955+
2956+ return ret;
2957+}
2958+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
2959+
2960 /**
2961 * __dev_get_by_name - find a device by its name
2962 * @net: the applicable net namespace
2963diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
2964index ca80f8699..35a1249a9 100644
2965--- a/net/dsa/dsa.c
2966+++ b/net/dsa/dsa.c
2967@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2968 }
2969 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
2970
2971+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
2972+{
2973+ if (!netdev || !dsa_slave_dev_check(netdev))
2974+ return ERR_PTR(-ENODEV);
2975+
2976+ return dsa_slave_to_port(netdev);
2977+}
2978+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
2979+
2980 static int __init dsa_init_module(void)
2981 {
2982 int rc;
2983diff --git a/net/dsa/slave.c b/net/dsa/slave.c
2984index 036fda317..2dfaa1eac 100644
2985--- a/net/dsa/slave.c
2986+++ b/net/dsa/slave.c
developer8cb3ac72022-07-04 10:55:14 +08002987@@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
2988 }
2989 }
2990
2991+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
2992+ void *type_data)
2993+{
2994+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
2995+ struct net_device *master = cpu_dp->master;
2996+
2997+ if (!master->netdev_ops->ndo_setup_tc)
2998+ return -EOPNOTSUPP;
2999+
3000+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
3001+}
3002+
3003 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
3004 void *type_data)
3005 {
3006 struct dsa_port *dp = dsa_slave_to_port(dev);
3007 struct dsa_switch *ds = dp->ds;
3008
3009- if (type == TC_SETUP_BLOCK)
3010+ switch (type) {
3011+ case TC_SETUP_BLOCK:
3012 return dsa_slave_setup_tc_block(dev, type_data);
3013+ case TC_SETUP_FT:
3014+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
3015+ default:
3016+ break;
3017+ }
3018
3019 if (!ds->ops->port_setup_tc)
3020 return -EOPNOTSUPP;
3021@@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
3022 return dp->ds->devlink ? &dp->devlink_port : NULL;
3023 }
3024
3025+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
3026+ struct net_device_path *path)
3027+{
3028+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
3029+ struct dsa_port *cpu_dp = dp->cpu_dp;
3030+
3031+ path->dev = ctx->dev;
3032+ path->type = DEV_PATH_DSA;
3033+ path->dsa.proto = cpu_dp->tag_ops->proto;
3034+ path->dsa.port = dp->index;
3035+ ctx->dev = cpu_dp->master;
3036+
3037+ return 0;
3038+}
3039+
3040 static const struct net_device_ops dsa_slave_netdev_ops = {
3041 .ndo_open = dsa_slave_open,
3042 .ndo_stop = dsa_slave_close,
3043@@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
3044 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
3045 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
3046 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
3047+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
3048 };
3049
3050 static struct device_type dsa_type = {
developer91c043c2022-12-08 18:35:53 +08003051@@ -1497,7 +1529,8 @@ void dsa_slave_destroy(struct net_device *slave_dev)
3052 bool dsa_slave_dev_check(const struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08003053 {
3054 return dev->netdev_ops == &dsa_slave_netdev_ops;
3055 }
3056+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
3057
3058 static int dsa_slave_changeupper(struct net_device *dev,
3059 struct netdev_notifier_changeupper_info *info)
3060diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
3061index f17b40211..803b92e4c 100644
3062--- a/net/ipv4/netfilter/Kconfig
3063+++ b/net/ipv4/netfilter/Kconfig
3064@@ -56,8 +56,6 @@ config NF_TABLES_ARP
3065 help
3066 This option enables the ARP support for nf_tables.
3067
3068-endif # NF_TABLES
3069-
3070 config NF_FLOW_TABLE_IPV4
3071 tristate "Netfilter flow table IPv4 module"
3072 depends on NF_FLOW_TABLE
3073@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
3074
3075 To compile it as a module, choose M here.
3076
3077+endif # NF_TABLES
3078+
3079 config NF_DUP_IPV4
3080 tristate "Netfilter IPv4 packet duplication to alternate destination"
3081 depends on !NF_CONNTRACK || NF_CONNTRACK
3082diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
3083index 5585e3a94..bb76f6061 100644
3084--- a/net/ipv6/ip6_output.c
3085+++ b/net/ipv6/ip6_output.c
3086@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
3087 }
3088 }
3089
3090- mtu = ip6_dst_mtu_forward(dst);
3091+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
3092 if (mtu < IPV6_MIN_MTU)
3093 mtu = IPV6_MIN_MTU;
3094
3095diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
3096index 69443e9a3..0b481d236 100644
3097--- a/net/ipv6/netfilter/Kconfig
3098+++ b/net/ipv6/netfilter/Kconfig
3099@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
3100 multicast or blackhole.
3101
3102 endif # NF_TABLES_IPV6
3103-endif # NF_TABLES
3104
3105 config NF_FLOW_TABLE_IPV6
3106 tristate "Netfilter flow table IPv6 module"
3107@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
3108
3109 To compile it as a module, choose M here.
3110
3111+endif # NF_TABLES
3112+
3113 config NF_DUP_IPV6
3114 tristate "Netfilter IPv6 packet duplication to alternate destination"
3115 depends on !NF_CONNTRACK || NF_CONNTRACK
3116diff --git a/net/ipv6/route.c b/net/ipv6/route.c
3117index 98aaf0b79..2b357ac71 100644
3118--- a/net/ipv6/route.c
3119+++ b/net/ipv6/route.c
3120@@ -83,7 +83,7 @@ enum rt6_nud_state {
3121
3122 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
3123 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
3124-static unsigned int ip6_mtu(const struct dst_entry *dst);
3125+static unsigned int ip6_mtu(const struct dst_entry *dst);
3126 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
3127 static void ip6_dst_destroy(struct dst_entry *);
3128 static void ip6_dst_ifdown(struct dst_entry *,
3129@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3130
3131 static unsigned int ip6_mtu(const struct dst_entry *dst)
3132 {
3133- struct inet6_dev *idev;
3134- unsigned int mtu;
3135-
3136- mtu = dst_metric_raw(dst, RTAX_MTU);
3137- if (mtu)
3138- goto out;
3139-
3140- mtu = IPV6_MIN_MTU;
3141-
3142- rcu_read_lock();
3143- idev = __in6_dev_get(dst->dev);
3144- if (idev)
3145- mtu = idev->cnf.mtu6;
3146- rcu_read_unlock();
3147-
3148-out:
3149- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3150-
3151- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3152+ return ip6_dst_mtu_maybe_forward(dst, false);
3153 }
3154
3155 /* MTU selection:
3156diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
3157index b967763f5..c040e713a 100644
3158--- a/net/netfilter/Kconfig
3159+++ b/net/netfilter/Kconfig
3160@@ -690,8 +690,6 @@ config NFT_FIB_NETDEV
3161
3162 endif # NF_TABLES_NETDEV
3163
3164-endif # NF_TABLES
3165-
3166 config NF_FLOW_TABLE_INET
3167 tristate "Netfilter flow table mixed IPv4/IPv6 module"
3168 depends on NF_FLOW_TABLE
3169@@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET
3170
3171 To compile it as a module, choose M here.
3172
3173+endif # NF_TABLES
3174+
3175 config NF_FLOW_TABLE
3176 tristate "Netfilter flow table module"
3177 depends on NETFILTER_INGRESS
3178 depends on NF_CONNTRACK
3179- depends on NF_TABLES
3180 help
3181 This option adds the flow table core infrastructure.
3182
3183@@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK
3184 depends on NETFILTER_ADVANCED
3185 select NETFILTER_XT_TARGET_CT
3186
3187+config NETFILTER_XT_TARGET_FLOWOFFLOAD
3188+ tristate '"FLOWOFFLOAD" target support'
3189+ depends on NF_FLOW_TABLE
3190+ depends on NETFILTER_INGRESS
3191+ help
3192+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
3193+ module to speed up processing of packets by bypassing the usual
3194+ netfilter chains
3195+
3196 config NETFILTER_XT_TARGET_RATEEST
3197 tristate '"RATEEST" target support'
3198 depends on NETFILTER_ADVANCED
3199diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
3200index 4fc075b61..d93a121bc 100644
3201--- a/net/netfilter/Makefile
3202+++ b/net/netfilter/Makefile
3203@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
3204
3205 # flow table infrastructure
3206 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
3207-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
3208+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
3209+ nf_flow_table_offload.o
3210
3211 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
3212
3213@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
3214 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
3215 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
3216 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
3217+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
3218 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
3219 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
3220 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
3221diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
3222index f6ab6f484..f689e19d8 100644
3223--- a/net/netfilter/nf_conntrack_core.c
3224+++ b/net/netfilter/nf_conntrack_core.c
3225@@ -864,9 +864,8 @@ out:
3226 }
3227 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
3228
3229-static inline void nf_ct_acct_update(struct nf_conn *ct,
3230- enum ip_conntrack_info ctinfo,
3231- unsigned int len)
3232+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3233+ unsigned int bytes)
3234 {
3235 struct nf_conn_acct *acct;
3236
3237@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
3238 if (acct) {
3239 struct nf_conn_counter *counter = acct->counter;
3240
3241- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
3242- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
3243+ atomic64_add(packets, &counter[dir].packets);
3244+ atomic64_add(bytes, &counter[dir].bytes);
3245 }
3246 }
3247+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
3248
3249 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3250 const struct nf_conn *loser_ct)
3251@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3252
3253 /* u32 should be fine since we must have seen one packet. */
3254 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
3255- nf_ct_acct_update(ct, ctinfo, bytes);
3256+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
3257 }
3258 }
3259
3260@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
3261
3262 tmp = nf_ct_tuplehash_to_ctrack(h);
3263
3264- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
3265+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
3266+ nf_ct_offload_timeout(tmp);
3267 continue;
3268+ }
3269
3270 if (nf_ct_is_expired(tmp)) {
3271 nf_ct_gc_expired(tmp);
3272@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
3273 WRITE_ONCE(ct->timeout, extra_jiffies);
3274 acct:
3275 if (do_acct)
3276- nf_ct_acct_update(ct, ctinfo, skb->len);
3277+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3278 }
3279 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
3280
3281@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
3282 enum ip_conntrack_info ctinfo,
3283 const struct sk_buff *skb)
3284 {
3285- nf_ct_acct_update(ct, ctinfo, skb->len);
3286+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3287
3288 return nf_ct_delete(ct, 0, 0);
3289 }
3290diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
3291index 7204f0366..3742bae21 100644
3292--- a/net/netfilter/nf_conntrack_proto_tcp.c
3293+++ b/net/netfilter/nf_conntrack_proto_tcp.c
3294@@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
3295 tn->tcp_loose = nf_ct_tcp_loose;
3296 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
3297 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
3298+
3299+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3300+ tn->offload_timeout = 30 * HZ;
3301+#endif
3302 }
3303
3304 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
3305diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
3306index e3a2d018f..a1579d6c3 100644
3307--- a/net/netfilter/nf_conntrack_proto_udp.c
3308+++ b/net/netfilter/nf_conntrack_proto_udp.c
3309@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
3310
3311 for (i = 0; i < UDP_CT_MAX; i++)
3312 un->timeouts[i] = udp_timeouts[i];
3313+
3314+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3315+ un->offload_timeout = 30 * HZ;
3316+#endif
3317 }
3318
3319 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
3320diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
3321index 9c6259c28..10d9f93ce 100644
3322--- a/net/netfilter/nf_conntrack_standalone.c
3323+++ b/net/netfilter/nf_conntrack_standalone.c
3324@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
3325 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
3326 goto release;
3327
3328- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3329+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
3330+ seq_puts(s, "[HW_OFFLOAD] ");
3331+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3332 seq_puts(s, "[OFFLOAD] ");
3333 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
3334 seq_puts(s, "[ASSURED] ");
3335@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
3336 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
3337 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
3338 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
3339+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3340+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
3341+#endif
3342 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
3343 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
3344 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
3345 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
3346 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
3347+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3348+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
3349+#endif
3350 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
3351 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
3352 #ifdef CONFIG_NF_CT_PROTO_SCTP
3353@@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
3354 .mode = 0644,
3355 .proc_handler = proc_dointvec_jiffies,
3356 },
3357+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3358+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
3359+ .procname = "nf_flowtable_tcp_timeout",
3360+ .maxlen = sizeof(unsigned int),
3361+ .mode = 0644,
3362+ .proc_handler = proc_dointvec_jiffies,
3363+ },
3364+#endif
3365 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
3366 .procname = "nf_conntrack_tcp_loose",
3367 .maxlen = sizeof(int),
3368@@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
3369 .mode = 0644,
3370 .proc_handler = proc_dointvec_jiffies,
3371 },
3372+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3373+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
3374+ .procname = "nf_flowtable_udp_timeout",
3375+ .maxlen = sizeof(unsigned int),
3376+ .mode = 0644,
3377+ .proc_handler = proc_dointvec_jiffies,
3378+ },
3379+#endif
3380 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
3381 .procname = "nf_conntrack_icmp_timeout",
3382 .maxlen = sizeof(unsigned int),
3383@@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
3384 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
3385 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
3386 #undef XASSIGN
3387+
3388+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3389+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
3390+#endif
3391+
3392 }
3393
3394 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
3395@@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
3396 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
3397 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
3398 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
3399+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3400+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
3401+#endif
3402
3403 nf_conntrack_standalone_init_tcp_sysctl(net, table);
3404 nf_conntrack_standalone_init_sctp_sysctl(net, table);
3405diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003406index f212cec0..10365581 100644
developer8cb3ac72022-07-04 10:55:14 +08003407--- a/net/netfilter/nf_flow_table_core.c
3408+++ b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003409@@ -7,43 +7,21 @@
developer8cb3ac72022-07-04 10:55:14 +08003410 #include <linux/netdevice.h>
3411 #include <net/ip.h>
3412 #include <net/ip6_route.h>
3413-#include <net/netfilter/nf_tables.h>
3414 #include <net/netfilter/nf_flow_table.h>
3415 #include <net/netfilter/nf_conntrack.h>
3416 #include <net/netfilter/nf_conntrack_core.h>
3417 #include <net/netfilter/nf_conntrack_l4proto.h>
3418 #include <net/netfilter/nf_conntrack_tuple.h>
3419
3420-struct flow_offload_entry {
3421- struct flow_offload flow;
3422- struct nf_conn *ct;
3423- struct rcu_head rcu_head;
3424-};
3425-
3426 static DEFINE_MUTEX(flowtable_lock);
3427 static LIST_HEAD(flowtables);
3428
developerb7c46752022-07-04 19:51:38 +08003429-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3430-{
3431- const struct rt6_info *rt;
3432-
3433- if (flow_tuple->l3proto == NFPROTO_IPV6) {
3434- rt = (const struct rt6_info *)flow_tuple->dst_cache;
3435- return rt6_get_cookie(rt);
3436- }
3437-
3438- return 0;
3439-}
3440-
developer8cb3ac72022-07-04 10:55:14 +08003441 static void
3442-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3443- struct nf_flow_route *route,
3444+flow_offload_fill_dir(struct flow_offload *flow,
3445 enum flow_offload_tuple_dir dir)
3446 {
3447 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
3448- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
3449- struct dst_entry *other_dst = route->tuple[!dir].dst;
3450- struct dst_entry *dst = route->tuple[dir].dst;
3451+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
3452
3453 ft->dir = dir;
3454
developerb7c46752022-07-04 19:51:38 +08003455@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003456 case NFPROTO_IPV4:
3457 ft->src_v4 = ctt->src.u3.in;
3458 ft->dst_v4 = ctt->dst.u3.in;
3459- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
3460 break;
3461 case NFPROTO_IPV6:
3462 ft->src_v6 = ctt->src.u3.in6;
3463 ft->dst_v6 = ctt->dst.u3.in6;
3464- ft->mtu = ip6_dst_mtu_forward(dst);
3465 break;
3466 }
3467
developerb7c46752022-07-04 19:51:38 +08003468@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003469 ft->l4proto = ctt->dst.protonum;
3470 ft->src_port = ctt->src.u.tcp.port;
3471 ft->dst_port = ctt->dst.u.tcp.port;
3472-
3473- ft->iifidx = other_dst->dev->ifindex;
3474- ft->dst_cache = dst;
developerb7c46752022-07-04 19:51:38 +08003475- ft->dst_cookie = flow_offload_dst_cookie(ft);
developer8cb3ac72022-07-04 10:55:14 +08003476 }
3477
3478-struct flow_offload *
3479-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
3480+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
3481 {
3482- struct flow_offload_entry *entry;
3483 struct flow_offload *flow;
3484
3485 if (unlikely(nf_ct_is_dying(ct) ||
3486 !atomic_inc_not_zero(&ct->ct_general.use)))
3487 return NULL;
3488
3489- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
3490- if (!entry)
3491+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
3492+ if (!flow)
3493 goto err_ct_refcnt;
3494
3495- flow = &entry->flow;
developerb7c46752022-07-04 19:51:38 +08003496-
developer8cb3ac72022-07-04 10:55:14 +08003497- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
3498- goto err_dst_cache_original;
3499-
3500- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
3501- goto err_dst_cache_reply;
developerb7c46752022-07-04 19:51:38 +08003502+ flow->ct = ct;
3503
developer8cb3ac72022-07-04 10:55:14 +08003504- entry->ct = ct;
3505-
3506- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3507- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
3508+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3509+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
3510
3511 if (ct->status & IPS_SRC_NAT)
3512- flow->flags |= FLOW_OFFLOAD_SNAT;
3513+ __set_bit(NF_FLOW_SNAT, &flow->flags);
3514 if (ct->status & IPS_DST_NAT)
3515- flow->flags |= FLOW_OFFLOAD_DNAT;
3516+ __set_bit(NF_FLOW_DNAT, &flow->flags);
3517
3518 return flow;
3519
3520-err_dst_cache_reply:
3521- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
3522-err_dst_cache_original:
3523- kfree(entry);
3524 err_ct_refcnt:
3525 nf_ct_put(ct);
3526
developerb7c46752022-07-04 19:51:38 +08003527@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
developer8cb3ac72022-07-04 10:55:14 +08003528 }
3529 EXPORT_SYMBOL_GPL(flow_offload_alloc);
3530
3531-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3532+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3533 {
3534- tcp->state = TCP_CONNTRACK_ESTABLISHED;
3535- tcp->seen[0].td_maxwin = 0;
3536- tcp->seen[1].td_maxwin = 0;
3537+ const struct rt6_info *rt;
3538+
3539+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
3540+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
3541+ return rt6_get_cookie(rt);
3542+ }
3543+
3544+ return 0;
3545 }
3546
3547-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
3548-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
3549+static int flow_offload_fill_route(struct flow_offload *flow,
3550+ const struct nf_flow_route *route,
3551+ enum flow_offload_tuple_dir dir)
3552+{
3553+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
3554+ struct dst_entry *dst = route->tuple[dir].dst;
3555+ int i, j = 0;
3556+
3557+ switch (flow_tuple->l3proto) {
3558+ case NFPROTO_IPV4:
3559+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
3560+ break;
3561+ case NFPROTO_IPV6:
3562+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
3563+ break;
3564+ }
3565+
3566+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
3567+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
3568+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
3569+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
3570+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
3571+ flow_tuple->in_vlan_ingress |= BIT(j);
3572+ j++;
3573+ }
3574+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
3575+
3576+ switch (route->tuple[dir].xmit_type) {
3577+ case FLOW_OFFLOAD_XMIT_DIRECT:
3578+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
3579+ ETH_ALEN);
3580+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
3581+ ETH_ALEN);
3582+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
3583+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
3584+ break;
3585+ case FLOW_OFFLOAD_XMIT_XFRM:
3586+ case FLOW_OFFLOAD_XMIT_NEIGH:
3587+ if (!dst_hold_safe(route->tuple[dir].dst))
3588+ return -1;
3589+
3590+ flow_tuple->dst_cache = dst;
3591+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
3592+ break;
3593+ default:
3594+ WARN_ON_ONCE(1);
3595+ break;
3596+ }
3597+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
developerb7c46752022-07-04 19:51:38 +08003598+
developer8cb3ac72022-07-04 10:55:14 +08003599+ return 0;
3600+}
3601+
3602+static void nft_flow_dst_release(struct flow_offload *flow,
3603+ enum flow_offload_tuple_dir dir)
3604+{
3605+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3606+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
3607+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
3608+}
3609+
3610+int flow_offload_route_init(struct flow_offload *flow,
3611+ const struct nf_flow_route *route)
3612+{
3613+ int err;
3614+
3615+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3616+ if (err < 0)
3617+ return err;
3618+
3619+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
3620+ if (err < 0)
3621+ goto err_route_reply;
3622+
3623+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
3624+
3625+ return 0;
3626+
3627+err_route_reply:
3628+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3629+
3630+ return err;
3631+}
3632+EXPORT_SYMBOL_GPL(flow_offload_route_init);
developerb7c46752022-07-04 19:51:38 +08003633
3634-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
developer8cb3ac72022-07-04 10:55:14 +08003635+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3636 {
3637- return (__s32)(timeout - (u32)jiffies);
3638+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
3639+ tcp->seen[0].td_maxwin = 0;
3640+ tcp->seen[1].td_maxwin = 0;
3641 }
3642
3643 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
3644 {
3645- const struct nf_conntrack_l4proto *l4proto;
3646+ struct net *net = nf_ct_net(ct);
3647 int l4num = nf_ct_protonum(ct);
3648- unsigned int timeout;
3649+ s32 timeout;
3650
3651- l4proto = nf_ct_l4proto_find(l4num);
3652- if (!l4proto)
3653- return;
3654+ if (l4num == IPPROTO_TCP) {
3655+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
3656
3657- if (l4num == IPPROTO_TCP)
3658- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
3659- else if (l4num == IPPROTO_UDP)
3660- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
3661- else
3662+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
3663+ timeout -= tn->offload_timeout;
3664+ } else if (l4num == IPPROTO_UDP) {
3665+ struct nf_udp_net *tn = nf_udp_pernet(net);
3666+
3667+ timeout = tn->timeouts[UDP_CT_REPLIED];
3668+ timeout -= tn->offload_timeout;
3669+ } else {
3670 return;
3671+ }
3672+
3673+ if (timeout < 0)
3674+ timeout = 0;
3675
3676- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
3677- ct->timeout = nfct_time_stamp + timeout;
3678+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
3679+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
3680 }
3681
3682 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
developerb7c46752022-07-04 19:51:38 +08003683@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003684 flow_offload_fixup_ct_timeout(ct);
3685 }
3686
3687-void flow_offload_free(struct flow_offload *flow)
3688+static void flow_offload_route_release(struct flow_offload *flow)
3689 {
3690- struct flow_offload_entry *e;
3691+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3692+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
3693+}
3694
3695- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
3696- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
3697- e = container_of(flow, struct flow_offload_entry, flow);
3698- if (flow->flags & FLOW_OFFLOAD_DYING)
3699- nf_ct_delete(e->ct, 0, 0);
3700- nf_ct_put(e->ct);
3701- kfree_rcu(e, rcu_head);
3702+void flow_offload_free(struct flow_offload *flow)
3703+{
3704+ switch (flow->type) {
3705+ case NF_FLOW_OFFLOAD_ROUTE:
3706+ flow_offload_route_release(flow);
3707+ break;
3708+ default:
3709+ break;
3710+ }
3711+ nf_ct_put(flow->ct);
3712+ kfree_rcu(flow, rcu_head);
3713 }
3714 EXPORT_SYMBOL_GPL(flow_offload_free);
3715
developerb7c46752022-07-04 19:51:38 +08003716@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
developer8cb3ac72022-07-04 10:55:14 +08003717 {
3718 const struct flow_offload_tuple *tuple = data;
3719
3720- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
3721+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3722 }
3723
3724 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
3725 {
3726 const struct flow_offload_tuple_rhash *tuplehash = data;
3727
3728- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
3729+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3730 }
3731
3732 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developerb7c46752022-07-04 19:51:38 +08003733@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer8cb3ac72022-07-04 10:55:14 +08003734 const struct flow_offload_tuple *tuple = arg->key;
3735 const struct flow_offload_tuple_rhash *x = ptr;
3736
3737- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
3738+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
3739 return 1;
3740
3741 return 0;
developerb7c46752022-07-04 19:51:38 +08003742@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
developer8cb3ac72022-07-04 10:55:14 +08003743 .automatic_shrinking = true,
3744 };
3745
3746-#define DAY (86400 * HZ)
3747-
3748-/* Set an arbitrary timeout large enough not to ever expire, this save
3749- * us a check for the IPS_OFFLOAD_BIT from the packet path via
3750- * nf_ct_is_expired().
3751- */
3752-static void nf_ct_offload_timeout(struct flow_offload *flow)
3753+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
3754 {
3755- struct flow_offload_entry *entry;
3756- struct nf_conn *ct;
3757+ unsigned long timeout = NF_FLOW_TIMEOUT;
3758+ struct net *net = nf_ct_net(flow->ct);
3759+ int l4num = nf_ct_protonum(flow->ct);
developer8cb3ac72022-07-04 10:55:14 +08003760
3761- entry = container_of(flow, struct flow_offload_entry, flow);
3762- ct = entry->ct;
developerb7c46752022-07-04 19:51:38 +08003763+ if (l4num == IPPROTO_TCP) {
3764+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
3765
3766- if (nf_ct_expires(ct) < DAY / 2)
3767- ct->timeout = nfct_time_stamp + DAY;
developer8cb3ac72022-07-04 10:55:14 +08003768+ timeout = tn->offload_timeout;
3769+ } else if (l4num == IPPROTO_UDP) {
3770+ struct nf_udp_net *tn = nf_udp_pernet(net);
3771+
3772+ timeout = tn->offload_timeout;
3773+ }
developerb7c46752022-07-04 19:51:38 +08003774+
developer8cb3ac72022-07-04 10:55:14 +08003775+ return timeout;
3776 }
3777
3778 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3779 {
3780 int err;
3781
3782- nf_ct_offload_timeout(flow);
3783- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
3784+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3785
3786 err = rhashtable_insert_fast(&flow_table->rhashtable,
3787 &flow->tuplehash[0].node,
developerb7c46752022-07-04 19:51:38 +08003788@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003789 return err;
3790 }
3791
3792+ nf_ct_offload_timeout(flow->ct);
3793+
3794+ if (nf_flowtable_hw_offload(flow_table)) {
3795+ __set_bit(NF_FLOW_HW, &flow->flags);
3796+ nf_flow_offload_add(flow_table, flow);
3797+ }
3798+
3799 return 0;
3800 }
3801 EXPORT_SYMBOL_GPL(flow_offload_add);
3802
3803+void flow_offload_refresh(struct nf_flowtable *flow_table,
3804+ struct flow_offload *flow)
3805+{
3806+ u32 timeout;
3807+
3808+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3809+ if (timeout - READ_ONCE(flow->timeout) > HZ)
3810+ WRITE_ONCE(flow->timeout, timeout);
3811+ else
3812+ return;
3813+
3814+ if (likely(!nf_flowtable_hw_offload(flow_table)))
3815+ return;
3816+
3817+ nf_flow_offload_add(flow_table, flow);
3818+}
3819+EXPORT_SYMBOL_GPL(flow_offload_refresh);
3820+
3821 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3822 {
3823 return nf_flow_timeout_delta(flow->timeout) <= 0;
developerb7c46752022-07-04 19:51:38 +08003824@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003825 static void flow_offload_del(struct nf_flowtable *flow_table,
3826 struct flow_offload *flow)
3827 {
3828- struct flow_offload_entry *e;
3829-
3830 rhashtable_remove_fast(&flow_table->rhashtable,
3831 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
3832 nf_flow_offload_rhash_params);
developerb7c46752022-07-04 19:51:38 +08003833@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003834 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
3835 nf_flow_offload_rhash_params);
3836
3837- e = container_of(flow, struct flow_offload_entry, flow);
3838- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
3839+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
3840
3841 if (nf_flow_has_expired(flow))
3842- flow_offload_fixup_ct(e->ct);
3843- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
3844- flow_offload_fixup_ct_timeout(e->ct);
3845-
3846- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
3847- flow_offload_fixup_ct_state(e->ct);
3848+ flow_offload_fixup_ct(flow->ct);
3849+ else
3850+ flow_offload_fixup_ct_timeout(flow->ct);
3851
3852 flow_offload_free(flow);
3853 }
3854
3855 void flow_offload_teardown(struct flow_offload *flow)
3856 {
3857- struct flow_offload_entry *e;
developerb7c46752022-07-04 19:51:38 +08003858-
3859- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
developer8cb3ac72022-07-04 10:55:14 +08003860+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3861
developer8cb3ac72022-07-04 10:55:14 +08003862- e = container_of(flow, struct flow_offload_entry, flow);
3863- flow_offload_fixup_ct_state(e->ct);
3864+ flow_offload_fixup_ct_state(flow->ct);
3865 }
3866 EXPORT_SYMBOL_GPL(flow_offload_teardown);
3867
developerb7c46752022-07-04 19:51:38 +08003868@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003869 {
3870 struct flow_offload_tuple_rhash *tuplehash;
3871 struct flow_offload *flow;
3872- struct flow_offload_entry *e;
3873 int dir;
3874
3875 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
developerb7c46752022-07-04 19:51:38 +08003876@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003877
3878 dir = tuplehash->tuple.dir;
3879 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
3880- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
3881+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
3882 return NULL;
3883
3884- e = container_of(flow, struct flow_offload_entry, flow);
3885- if (unlikely(nf_ct_is_dying(e->ct)))
3886+ if (unlikely(nf_ct_is_dying(flow->ct)))
3887 return NULL;
3888
3889 return tuplehash;
3890 }
3891 EXPORT_SYMBOL_GPL(flow_offload_lookup);
3892
3893-static int
3894-nf_flow_table_iterate(struct nf_flowtable *flow_table,
3895+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3896 void (*iter)(struct flow_offload *flow, void *data),
3897 void *data)
3898 {
developerb7c46752022-07-04 19:51:38 +08003899@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003900 rhashtable_walk_start(&hti);
3901
3902 while ((tuplehash = rhashtable_walk_next(&hti))) {
3903-
3904 if (IS_ERR(tuplehash)) {
3905 if (PTR_ERR(tuplehash) != -EAGAIN) {
3906 err = PTR_ERR(tuplehash);
developerec862f42023-03-23 13:08:45 +08003907@@ -359,23 +430,52 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003908
3909 return err;
3910 }
3911+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
3912
3913-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3914+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
3915 {
3916- struct nf_flowtable *flow_table = data;
3917- struct flow_offload_entry *e;
3918- bool teardown;
3919+ struct dst_entry *dst;
3920
3921- e = container_of(flow, struct flow_offload_entry, flow);
3922+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3923+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
3924+ dst = tuple->dst_cache;
3925+ if (!dst_check(dst, tuple->dst_cookie))
3926+ return true;
3927+ }
3928
3929- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
3930- FLOW_OFFLOAD_TEARDOWN);
3931+ return false;
3932+}
3933
3934- if (!teardown)
3935- nf_ct_offload_timeout(flow);
3936+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
3937+{
3938+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
3939+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
3940+}
3941
3942- if (nf_flow_has_expired(flow) || teardown)
3943- flow_offload_del(flow_table, flow);
3944+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3945+{
3946+ struct nf_flowtable *flow_table = data;
3947+
3948+ if (nf_flow_has_expired(flow) ||
3949+ nf_ct_is_dying(flow->ct) ||
3950+ nf_flow_has_stale_dst(flow))
3951+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3952+
3953+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
3954+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
developerec862f42023-03-23 13:08:45 +08003955+ if (!test_and_set_bit(NF_FLOW_HW_ACCT_DYING, &flow->flags))
3956+ nf_flow_offload_stats(flow_table, flow, true);
3957+
developer8cb3ac72022-07-04 10:55:14 +08003958+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
3959+ nf_flow_offload_del(flow_table, flow);
3960+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
3961+ flow_offload_del(flow_table, flow);
3962+ } else {
3963+ flow_offload_del(flow_table, flow);
3964+ }
3965+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
developerec862f42023-03-23 13:08:45 +08003966+ nf_flow_offload_stats(flow_table, flow, false);
developer8cb3ac72022-07-04 10:55:14 +08003967+ }
3968 }
3969
3970 static void nf_flow_offload_work_gc(struct work_struct *work)
developerb7c46752022-07-04 19:51:38 +08003971@@ -387,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
developer8cb3ac72022-07-04 10:55:14 +08003972 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
3973 }
3974
3975-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3976- __be16 port, __be16 new_port)
3977+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3978+ __be16 port, __be16 new_port)
3979 {
3980 struct tcphdr *tcph;
3981
3982- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
3983- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
3984- return -1;
3985-
3986 tcph = (void *)(skb_network_header(skb) + thoff);
3987 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
3988-
3989- return 0;
3990 }
3991
3992-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3993- __be16 port, __be16 new_port)
3994+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3995+ __be16 port, __be16 new_port)
3996 {
3997 struct udphdr *udph;
3998
3999- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4000- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4001- return -1;
4002-
4003 udph = (void *)(skb_network_header(skb) + thoff);
4004 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4005 inet_proto_csum_replace2(&udph->check, skb, port,
developerb7c46752022-07-04 19:51:38 +08004006@@ -418,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004007 if (!udph->check)
4008 udph->check = CSUM_MANGLED_0;
4009 }
4010-
4011- return 0;
4012 }
4013
4014-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4015- u8 protocol, __be16 port, __be16 new_port)
4016+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4017+ u8 protocol, __be16 port, __be16 new_port)
4018 {
4019 switch (protocol) {
4020 case IPPROTO_TCP:
4021- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
4022- return NF_DROP;
4023+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
4024 break;
4025 case IPPROTO_UDP:
4026- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
4027- return NF_DROP;
4028+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
4029 break;
4030 }
4031-
4032- return 0;
4033 }
4034
4035-int nf_flow_snat_port(const struct flow_offload *flow,
4036- struct sk_buff *skb, unsigned int thoff,
4037- u8 protocol, enum flow_offload_tuple_dir dir)
4038+void nf_flow_snat_port(const struct flow_offload *flow,
4039+ struct sk_buff *skb, unsigned int thoff,
4040+ u8 protocol, enum flow_offload_tuple_dir dir)
4041 {
4042 struct flow_ports *hdr;
4043 __be16 port, new_port;
4044
4045- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4046- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4047- return -1;
4048-
4049 hdr = (void *)(skb_network_header(skb) + thoff);
4050
4051 switch (dir) {
developerb7c46752022-07-04 19:51:38 +08004052@@ -463,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004053 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
4054 hdr->dest = new_port;
4055 break;
4056- default:
4057- return -1;
4058 }
4059
4060- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4061+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4062 }
4063 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
4064
4065-int nf_flow_dnat_port(const struct flow_offload *flow,
4066- struct sk_buff *skb, unsigned int thoff,
4067- u8 protocol, enum flow_offload_tuple_dir dir)
4068+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
4069+ unsigned int thoff, u8 protocol,
4070+ enum flow_offload_tuple_dir dir)
4071 {
4072 struct flow_ports *hdr;
4073 __be16 port, new_port;
4074
4075- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4076- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4077- return -1;
4078-
4079 hdr = (void *)(skb_network_header(skb) + thoff);
4080
4081 switch (dir) {
developerb7c46752022-07-04 19:51:38 +08004082@@ -495,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004083 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
4084 hdr->source = new_port;
4085 break;
4086- default:
4087- return -1;
4088 }
4089
4090- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4091+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4092 }
4093 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
4094
developerb7c46752022-07-04 19:51:38 +08004095@@ -507,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
developer8cb3ac72022-07-04 10:55:14 +08004096 {
4097 int err;
4098
4099- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4100+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4101+ flow_block_init(&flowtable->flow_block);
4102+ init_rwsem(&flowtable->flow_block_lock);
4103
4104 err = rhashtable_init(&flowtable->rhashtable,
4105 &nf_flow_offload_rhash_params);
developerb7c46752022-07-04 19:51:38 +08004106@@ -528,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
developer8cb3ac72022-07-04 10:55:14 +08004107 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
4108 {
4109 struct net_device *dev = data;
4110- struct flow_offload_entry *e;
4111-
4112- e = container_of(flow, struct flow_offload_entry, flow);
4113
4114 if (!dev) {
4115 flow_offload_teardown(flow);
4116 return;
4117 }
4118- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
4119+
4120+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
4121 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
4122 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
4123- flow_offload_dead(flow);
4124+ flow_offload_teardown(flow);
4125 }
4126
4127-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
4128- struct net_device *dev)
4129+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
4130+ struct net_device *dev)
4131 {
4132 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
4133 flush_delayed_work(&flowtable->gc_work);
4134+ nf_flow_table_offload_flush(flowtable);
4135 }
4136
4137 void nf_flow_table_cleanup(struct net_device *dev)
developerb7c46752022-07-04 19:51:38 +08004138@@ -555,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004139
4140 mutex_lock(&flowtable_lock);
4141 list_for_each_entry(flowtable, &flowtables, list)
4142- nf_flow_table_iterate_cleanup(flowtable, dev);
4143+ nf_flow_table_gc_cleanup(flowtable, dev);
4144 mutex_unlock(&flowtable_lock);
4145 }
4146 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
developerb7c46752022-07-04 19:51:38 +08004147@@ -565,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
developer8cb3ac72022-07-04 10:55:14 +08004148 mutex_lock(&flowtable_lock);
4149 list_del(&flow_table->list);
4150 mutex_unlock(&flowtable_lock);
4151+
4152 cancel_delayed_work_sync(&flow_table->gc_work);
4153 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
4154 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
4155+ nf_flow_table_offload_flush(flow_table);
4156+ if (nf_flowtable_hw_offload(flow_table))
4157+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
4158+ flow_table);
4159 rhashtable_destroy(&flow_table->rhashtable);
4160 }
4161 EXPORT_SYMBOL_GPL(nf_flow_table_free);
developerb7c46752022-07-04 19:51:38 +08004162@@ -591,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
developer8cb3ac72022-07-04 10:55:14 +08004163
4164 static int __init nf_flow_table_module_init(void)
4165 {
4166- return register_netdevice_notifier(&flow_offload_netdev_notifier);
4167+ int ret;
4168+
4169+ ret = nf_flow_table_offload_init();
4170+ if (ret)
4171+ return ret;
4172+
4173+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
4174+ if (ret)
4175+ nf_flow_table_offload_exit();
4176+
4177+ return ret;
4178 }
4179
4180 static void __exit nf_flow_table_module_exit(void)
4181 {
4182 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
4183+ nf_flow_table_offload_exit();
4184 }
4185
4186 module_init(nf_flow_table_module_init);
developerb7c46752022-07-04 19:51:38 +08004187@@ -604,3 +690,4 @@ module_exit(nf_flow_table_module_exit);
developer8cb3ac72022-07-04 10:55:14 +08004188
4189 MODULE_LICENSE("GPL");
4190 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
4191+MODULE_DESCRIPTION("Netfilter flow table module");
4192diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
developerb7c46752022-07-04 19:51:38 +08004193index 397129b2..6257d87c 100644
developer8cb3ac72022-07-04 10:55:14 +08004194--- a/net/netfilter/nf_flow_table_ip.c
4195+++ b/net/netfilter/nf_flow_table_ip.c
4196@@ -7,11 +7,13 @@
4197 #include <linux/ip.h>
4198 #include <linux/ipv6.h>
4199 #include <linux/netdevice.h>
4200+#include <linux/if_ether.h>
4201 #include <net/ip.h>
4202 #include <net/ipv6.h>
4203 #include <net/ip6_route.h>
4204 #include <net/neighbour.h>
4205 #include <net/netfilter/nf_flow_table.h>
4206+#include <net/netfilter/nf_conntrack_acct.h>
4207 /* For layer 4 checksum field offset. */
4208 #include <linux/tcp.h>
4209 #include <linux/udp.h>
4210@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4211 if (proto != IPPROTO_TCP)
4212 return 0;
4213
4214- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
4215- return -1;
4216-
4217 tcph = (void *)(skb_network_header(skb) + thoff);
4218 if (unlikely(tcph->fin || tcph->rst)) {
4219 flow_offload_teardown(flow);
4220@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4221 return 0;
4222 }
4223
4224-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4225- __be32 addr, __be32 new_addr)
4226+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4227+ __be32 addr, __be32 new_addr)
4228 {
4229 struct tcphdr *tcph;
4230
4231- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4232- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4233- return -1;
4234-
4235 tcph = (void *)(skb_network_header(skb) + thoff);
4236 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
4237-
4238- return 0;
4239 }
4240
4241-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4242- __be32 addr, __be32 new_addr)
4243+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4244+ __be32 addr, __be32 new_addr)
4245 {
4246 struct udphdr *udph;
4247
4248- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4249- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4250- return -1;
4251-
4252 udph = (void *)(skb_network_header(skb) + thoff);
4253 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4254 inet_proto_csum_replace4(&udph->check, skb, addr,
4255@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4256 if (!udph->check)
4257 udph->check = CSUM_MANGLED_0;
4258 }
4259-
4260- return 0;
4261 }
4262
4263-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4264- unsigned int thoff, __be32 addr,
4265- __be32 new_addr)
4266+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4267+ unsigned int thoff, __be32 addr,
4268+ __be32 new_addr)
4269 {
4270 switch (iph->protocol) {
4271 case IPPROTO_TCP:
4272- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
4273- return NF_DROP;
4274+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
4275 break;
4276 case IPPROTO_UDP:
4277- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
4278- return NF_DROP;
4279+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
4280 break;
4281 }
4282-
4283- return 0;
4284 }
4285
4286-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4287- struct iphdr *iph, unsigned int thoff,
4288- enum flow_offload_tuple_dir dir)
4289+static void nf_flow_snat_ip(const struct flow_offload *flow,
4290+ struct sk_buff *skb, struct iphdr *iph,
4291+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4292 {
4293 __be32 addr, new_addr;
4294
4295@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4296 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
4297 iph->daddr = new_addr;
4298 break;
4299- default:
4300- return -1;
4301 }
4302 csum_replace4(&iph->check, addr, new_addr);
4303
4304- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4305+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4306 }
4307
4308-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4309- struct iphdr *iph, unsigned int thoff,
4310- enum flow_offload_tuple_dir dir)
4311+static void nf_flow_dnat_ip(const struct flow_offload *flow,
4312+ struct sk_buff *skb, struct iphdr *iph,
4313+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4314 {
4315 __be32 addr, new_addr;
4316
4317@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4318 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
4319 iph->saddr = new_addr;
4320 break;
4321- default:
4322- return -1;
4323 }
4324 csum_replace4(&iph->check, addr, new_addr);
4325
4326- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4327+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4328 }
4329
4330-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4331- unsigned int thoff, enum flow_offload_tuple_dir dir)
4332+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4333+ unsigned int thoff, enum flow_offload_tuple_dir dir,
4334+ struct iphdr *iph)
4335 {
4336- struct iphdr *iph = ip_hdr(skb);
4337-
4338- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4339- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4340- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
4341- return -1;
4342- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4343- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4344- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
4345- return -1;
4346-
4347- return 0;
4348+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4349+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
4350+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
4351+ }
4352+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4353+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
4354+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
4355+ }
4356 }
4357
4358 static bool ip_has_options(unsigned int thoff)
4359@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
4360 return thoff != sizeof(struct iphdr);
4361 }
4362
4363+static void nf_flow_tuple_encap(struct sk_buff *skb,
4364+ struct flow_offload_tuple *tuple)
4365+{
4366+ struct vlan_ethhdr *veth;
4367+ struct pppoe_hdr *phdr;
4368+ int i = 0;
4369+
4370+ if (skb_vlan_tag_present(skb)) {
4371+ tuple->encap[i].id = skb_vlan_tag_get(skb);
4372+ tuple->encap[i].proto = skb->vlan_proto;
4373+ i++;
4374+ }
4375+ switch (skb->protocol) {
4376+ case htons(ETH_P_8021Q):
4377+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4378+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
4379+ tuple->encap[i].proto = skb->protocol;
4380+ break;
4381+ case htons(ETH_P_PPP_SES):
4382+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
4383+ tuple->encap[i].id = ntohs(phdr->sid);
4384+ tuple->encap[i].proto = skb->protocol;
4385+ break;
4386+ }
4387+}
4388+
4389 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4390- struct flow_offload_tuple *tuple)
4391+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4392+ u32 offset)
4393 {
4394 struct flow_ports *ports;
4395 unsigned int thoff;
4396 struct iphdr *iph;
4397
4398- if (!pskb_may_pull(skb, sizeof(*iph)))
4399+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
4400 return -1;
4401
4402- iph = ip_hdr(skb);
4403- thoff = iph->ihl * 4;
4404+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4405+ thoff = (iph->ihl * 4);
4406
4407 if (ip_is_fragment(iph) ||
4408 unlikely(ip_has_options(thoff)))
4409 return -1;
4410
4411- if (iph->protocol != IPPROTO_TCP &&
4412- iph->protocol != IPPROTO_UDP)
4413+ thoff += offset;
4414+
4415+ switch (iph->protocol) {
4416+ case IPPROTO_TCP:
4417+ *hdrsize = sizeof(struct tcphdr);
4418+ break;
4419+ case IPPROTO_UDP:
4420+ *hdrsize = sizeof(struct udphdr);
4421+ break;
4422+ default:
4423 return -1;
4424+ }
4425
4426 if (iph->ttl <= 1)
4427 return -1;
4428
4429- thoff = iph->ihl * 4;
4430- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4431+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4432 return -1;
4433
4434- iph = ip_hdr(skb);
4435+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4436 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4437
4438 tuple->src_v4.s_addr = iph->saddr;
4439@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4440 tuple->l3proto = AF_INET;
4441 tuple->l4proto = iph->protocol;
4442 tuple->iifidx = dev->ifindex;
4443+ nf_flow_tuple_encap(skb, tuple);
4444
4445 return 0;
4446 }
developerb7c46752022-07-04 19:51:38 +08004447@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004448 return NF_STOLEN;
4449 }
4450
4451+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
4452+ u32 *offset)
4453+{
4454+ struct vlan_ethhdr *veth;
4455+
4456+ switch (skb->protocol) {
4457+ case htons(ETH_P_8021Q):
4458+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4459+ if (veth->h_vlan_encapsulated_proto == proto) {
4460+ *offset += VLAN_HLEN;
4461+ return true;
4462+ }
4463+ break;
4464+ case htons(ETH_P_PPP_SES):
4465+ if (nf_flow_pppoe_proto(skb) == proto) {
4466+ *offset += PPPOE_SES_HLEN;
4467+ return true;
4468+ }
4469+ break;
4470+ }
4471+
4472+ return false;
4473+}
4474+
4475+static void nf_flow_encap_pop(struct sk_buff *skb,
4476+ struct flow_offload_tuple_rhash *tuplehash)
4477+{
4478+ struct vlan_hdr *vlan_hdr;
4479+ int i;
4480+
4481+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
4482+ if (skb_vlan_tag_present(skb)) {
4483+ __vlan_hwaccel_clear_tag(skb);
4484+ continue;
4485+ }
4486+ switch (skb->protocol) {
4487+ case htons(ETH_P_8021Q):
4488+ vlan_hdr = (struct vlan_hdr *)skb->data;
4489+ __skb_pull(skb, VLAN_HLEN);
4490+ vlan_set_encap_proto(skb, vlan_hdr);
4491+ skb_reset_network_header(skb);
4492+ break;
4493+ case htons(ETH_P_PPP_SES):
4494+ skb->protocol = nf_flow_pppoe_proto(skb);
4495+ skb_pull(skb, PPPOE_SES_HLEN);
4496+ skb_reset_network_header(skb);
4497+ break;
4498+ }
4499+ }
4500+}
4501+
4502+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
4503+ const struct flow_offload_tuple_rhash *tuplehash,
4504+ unsigned short type)
4505+{
4506+ struct net_device *outdev;
4507+
4508+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
4509+ if (!outdev)
4510+ return NF_DROP;
4511+
4512+ skb->dev = outdev;
4513+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
4514+ tuplehash->tuple.out.h_source, skb->len);
4515+ dev_queue_xmit(skb);
4516+
4517+ return NF_STOLEN;
4518+}
4519+
4520 unsigned int
4521 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4522 const struct nf_hook_state *state)
developerb7c46752022-07-04 19:51:38 +08004523@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004524 enum flow_offload_tuple_dir dir;
4525 struct flow_offload *flow;
4526 struct net_device *outdev;
4527+ u32 hdrsize, offset = 0;
4528+ unsigned int thoff, mtu;
4529 struct rtable *rt;
4530- unsigned int thoff;
4531 struct iphdr *iph;
4532 __be32 nexthop;
4533+ int ret;
4534
4535- if (skb->protocol != htons(ETH_P_IP))
4536+ if (skb->protocol != htons(ETH_P_IP) &&
4537+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
4538 return NF_ACCEPT;
4539
4540- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
4541+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
4542 return NF_ACCEPT;
4543
4544 tuplehash = flow_offload_lookup(flow_table, &tuple);
developerb7c46752022-07-04 19:51:38 +08004545@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004546
4547 dir = tuplehash->tuple.dir;
4548 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4549- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
4550- outdev = rt->dst.dev;
4551-
4552- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
4553- return NF_ACCEPT;
developerb7c46752022-07-04 19:51:38 +08004554
developer8cb3ac72022-07-04 10:55:14 +08004555- if (skb_try_make_writable(skb, sizeof(*iph)))
4556- return NF_DROP;
developerb7c46752022-07-04 19:51:38 +08004557-
developer8cb3ac72022-07-04 10:55:14 +08004558- thoff = ip_hdr(skb)->ihl * 4;
4559- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
4560+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4561+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4562 return NF_ACCEPT;
4563
developerb7c46752022-07-04 19:51:38 +08004564- if (!dst_check(&rt->dst, 0)) {
developer8cb3ac72022-07-04 10:55:14 +08004565- flow_offload_teardown(flow);
4566+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4567+ thoff = (iph->ihl * 4) + offset;
4568+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
4569 return NF_ACCEPT;
4570- }
4571
4572- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
4573+ if (skb_try_make_writable(skb, thoff + hdrsize))
4574 return NF_DROP;
4575
4576- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4577+ flow_offload_refresh(flow_table, flow);
4578+
4579+ nf_flow_encap_pop(skb, tuplehash);
4580+ thoff -= offset;
4581+
4582 iph = ip_hdr(skb);
4583+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
4584+
4585 ip_decrease_ttl(iph);
4586 skb->tstamp = 0;
4587
4588- if (unlikely(dst_xfrm(&rt->dst))) {
4589+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4590+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4591+
4592+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4593+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4594 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
4595 IPCB(skb)->iif = skb->dev->ifindex;
4596 IPCB(skb)->flags = IPSKB_FORWARDED;
4597 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4598 }
4599
4600- skb->dev = outdev;
4601- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4602- skb_dst_set_noref(skb, &rt->dst);
4603- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4604+ switch (tuplehash->tuple.xmit_type) {
4605+ case FLOW_OFFLOAD_XMIT_NEIGH:
4606+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4607+ outdev = rt->dst.dev;
4608+ skb->dev = outdev;
4609+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4610+ skb_dst_set_noref(skb, &rt->dst);
4611+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4612+ ret = NF_STOLEN;
4613+ break;
4614+ case FLOW_OFFLOAD_XMIT_DIRECT:
4615+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
4616+ if (ret == NF_DROP)
4617+ flow_offload_teardown(flow);
4618+ break;
4619+ }
4620
4621- return NF_STOLEN;
4622+ return ret;
4623 }
4624 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
4625
4626-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4627- struct in6_addr *addr,
4628- struct in6_addr *new_addr)
4629+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4630+ struct in6_addr *addr,
4631+ struct in6_addr *new_addr,
4632+ struct ipv6hdr *ip6h)
4633 {
4634 struct tcphdr *tcph;
4635
4636- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4637- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4638- return -1;
4639-
4640 tcph = (void *)(skb_network_header(skb) + thoff);
4641 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
4642 new_addr->s6_addr32, true);
4643-
4644- return 0;
4645 }
4646
4647-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4648- struct in6_addr *addr,
4649- struct in6_addr *new_addr)
4650+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4651+ struct in6_addr *addr,
4652+ struct in6_addr *new_addr)
4653 {
4654 struct udphdr *udph;
4655
4656- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4657- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4658- return -1;
4659-
4660 udph = (void *)(skb_network_header(skb) + thoff);
4661 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4662 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
developerb7c46752022-07-04 19:51:38 +08004663@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004664 if (!udph->check)
4665 udph->check = CSUM_MANGLED_0;
4666 }
4667-
4668- return 0;
4669 }
4670
4671-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4672- unsigned int thoff, struct in6_addr *addr,
4673- struct in6_addr *new_addr)
4674+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4675+ unsigned int thoff, struct in6_addr *addr,
4676+ struct in6_addr *new_addr)
4677 {
4678 switch (ip6h->nexthdr) {
4679 case IPPROTO_TCP:
4680- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
4681- return NF_DROP;
4682+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
4683 break;
4684 case IPPROTO_UDP:
4685- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
4686- return NF_DROP;
4687+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
4688 break;
4689 }
4690-
4691- return 0;
4692 }
4693
4694-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4695- struct sk_buff *skb, struct ipv6hdr *ip6h,
4696- unsigned int thoff,
4697- enum flow_offload_tuple_dir dir)
4698+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
4699+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4700+ unsigned int thoff,
4701+ enum flow_offload_tuple_dir dir)
4702 {
4703 struct in6_addr addr, new_addr;
4704
developerb7c46752022-07-04 19:51:38 +08004705@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004706 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
4707 ip6h->daddr = new_addr;
4708 break;
4709- default:
4710- return -1;
4711 }
4712
4713- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4714+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4715 }
4716
4717-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4718- struct sk_buff *skb, struct ipv6hdr *ip6h,
4719- unsigned int thoff,
4720- enum flow_offload_tuple_dir dir)
4721+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
4722+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4723+ unsigned int thoff,
4724+ enum flow_offload_tuple_dir dir)
4725 {
4726 struct in6_addr addr, new_addr;
4727
developerb7c46752022-07-04 19:51:38 +08004728@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004729 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
4730 ip6h->saddr = new_addr;
4731 break;
4732- default:
4733- return -1;
4734 }
4735
4736- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4737+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4738 }
4739
4740-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
4741- struct sk_buff *skb,
4742- enum flow_offload_tuple_dir dir)
4743+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
4744+ struct sk_buff *skb,
4745+ enum flow_offload_tuple_dir dir,
4746+ struct ipv6hdr *ip6h)
4747 {
4748- struct ipv6hdr *ip6h = ipv6_hdr(skb);
4749 unsigned int thoff = sizeof(*ip6h);
4750
4751- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4752- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4753- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4754- return -1;
4755- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4756- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4757- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4758- return -1;
4759-
4760- return 0;
4761+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4762+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4763+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
4764+ }
4765+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4766+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4767+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
4768+ }
4769 }
4770
4771 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4772- struct flow_offload_tuple *tuple)
4773+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4774+ u32 offset)
4775 {
4776 struct flow_ports *ports;
4777 struct ipv6hdr *ip6h;
4778 unsigned int thoff;
4779
4780- if (!pskb_may_pull(skb, sizeof(*ip6h)))
4781+ thoff = sizeof(*ip6h) + offset;
4782+ if (!pskb_may_pull(skb, thoff))
4783 return -1;
4784
4785- ip6h = ipv6_hdr(skb);
4786+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4787
4788- if (ip6h->nexthdr != IPPROTO_TCP &&
4789- ip6h->nexthdr != IPPROTO_UDP)
4790+ switch (ip6h->nexthdr) {
4791+ case IPPROTO_TCP:
4792+ *hdrsize = sizeof(struct tcphdr);
4793+ break;
4794+ case IPPROTO_UDP:
4795+ *hdrsize = sizeof(struct udphdr);
4796+ break;
4797+ default:
4798 return -1;
4799+ }
4800
4801 if (ip6h->hop_limit <= 1)
4802 return -1;
4803
4804- thoff = sizeof(*ip6h);
4805- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4806+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4807 return -1;
4808
4809- ip6h = ipv6_hdr(skb);
4810+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4811 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4812
4813 tuple->src_v6 = ip6h->saddr;
developerb7c46752022-07-04 19:51:38 +08004814@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08004815 tuple->l3proto = AF_INET6;
4816 tuple->l4proto = ip6h->nexthdr;
4817 tuple->iifidx = dev->ifindex;
4818+ nf_flow_tuple_encap(skb, tuple);
4819
4820 return 0;
4821 }
developerb7c46752022-07-04 19:51:38 +08004822@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004823 const struct in6_addr *nexthop;
4824 struct flow_offload *flow;
4825 struct net_device *outdev;
4826+ unsigned int thoff, mtu;
4827+ u32 hdrsize, offset = 0;
4828 struct ipv6hdr *ip6h;
4829 struct rt6_info *rt;
4830+ int ret;
4831
4832- if (skb->protocol != htons(ETH_P_IPV6))
4833+ if (skb->protocol != htons(ETH_P_IPV6) &&
4834+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
4835 return NF_ACCEPT;
4836
4837- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
4838+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
4839 return NF_ACCEPT;
4840
4841 tuplehash = flow_offload_lookup(flow_table, &tuple);
developerb7c46752022-07-04 19:51:38 +08004842@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004843
4844 dir = tuplehash->tuple.dir;
4845 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4846- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
4847- outdev = rt->dst.dev;
developer8cb3ac72022-07-04 10:55:14 +08004848
developerb7c46752022-07-04 19:51:38 +08004849- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004850+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4851+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4852 return NF_ACCEPT;
4853
developerb7c46752022-07-04 19:51:38 +08004854- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
4855- sizeof(*ip6h)))
developer8cb3ac72022-07-04 10:55:14 +08004856+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4857+ thoff = sizeof(*ip6h) + offset;
4858+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
4859 return NF_ACCEPT;
developer8cb3ac72022-07-04 10:55:14 +08004860
developerb7c46752022-07-04 19:51:38 +08004861- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
4862- flow_offload_teardown(flow);
4863- return NF_ACCEPT;
4864- }
4865-
developer8cb3ac72022-07-04 10:55:14 +08004866- if (skb_try_make_writable(skb, sizeof(*ip6h)))
4867+ if (skb_try_make_writable(skb, thoff + hdrsize))
4868 return NF_DROP;
4869
4870- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
4871- return NF_DROP;
4872+ flow_offload_refresh(flow_table, flow);
4873+
4874+ nf_flow_encap_pop(skb, tuplehash);
4875
4876- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4877 ip6h = ipv6_hdr(skb);
4878+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
4879+
4880 ip6h->hop_limit--;
4881 skb->tstamp = 0;
4882
4883- if (unlikely(dst_xfrm(&rt->dst))) {
4884+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4885+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4886+
4887+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4888+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4889 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4890 IP6CB(skb)->iif = skb->dev->ifindex;
4891 IP6CB(skb)->flags = IP6SKB_FORWARDED;
4892 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4893 }
4894
4895- skb->dev = outdev;
4896- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4897- skb_dst_set_noref(skb, &rt->dst);
4898- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4899+ switch (tuplehash->tuple.xmit_type) {
4900+ case FLOW_OFFLOAD_XMIT_NEIGH:
4901+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4902+ outdev = rt->dst.dev;
4903+ skb->dev = outdev;
4904+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4905+ skb_dst_set_noref(skb, &rt->dst);
4906+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4907+ ret = NF_STOLEN;
4908+ break;
4909+ case FLOW_OFFLOAD_XMIT_DIRECT:
4910+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
4911+ if (ret == NF_DROP)
4912+ flow_offload_teardown(flow);
4913+ break;
4914+ }
4915
4916- return NF_STOLEN;
4917+ return ret;
4918 }
4919 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
4920diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
4921new file mode 100644
4922index 000000000..d94c6fb92
4923--- /dev/null
4924+++ b/net/netfilter/nf_flow_table_offload.c
developerec862f42023-03-23 13:08:45 +08004925@@ -0,0 +1,1197 @@
developer8cb3ac72022-07-04 10:55:14 +08004926+#include <linux/kernel.h>
4927+#include <linux/init.h>
4928+#include <linux/module.h>
4929+#include <linux/netfilter.h>
4930+#include <linux/rhashtable.h>
4931+#include <linux/netdevice.h>
4932+#include <linux/tc_act/tc_csum.h>
4933+#include <net/flow_offload.h>
4934+#include <net/netfilter/nf_flow_table.h>
4935+#include <net/netfilter/nf_tables.h>
4936+#include <net/netfilter/nf_conntrack.h>
4937+#include <net/netfilter/nf_conntrack_acct.h>
4938+#include <net/netfilter/nf_conntrack_core.h>
4939+#include <net/netfilter/nf_conntrack_tuple.h>
4940+
4941+static struct workqueue_struct *nf_flow_offload_add_wq;
4942+static struct workqueue_struct *nf_flow_offload_del_wq;
4943+static struct workqueue_struct *nf_flow_offload_stats_wq;
4944+
4945+struct flow_offload_work {
4946+ struct list_head list;
4947+ enum flow_cls_command cmd;
4948+ int priority;
4949+ struct nf_flowtable *flowtable;
4950+ struct flow_offload *flow;
4951+ struct work_struct work;
4952+};
4953+
4954+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
4955+ (__match)->dissector.offset[__type] = \
4956+ offsetof(struct nf_flow_key, __field)
4957+
4958+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
4959+ struct ip_tunnel_info *tun_info)
4960+{
4961+ struct nf_flow_key *mask = &match->mask;
4962+ struct nf_flow_key *key = &match->key;
4963+ unsigned int enc_keys;
4964+
4965+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
4966+ return;
4967+
4968+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
4969+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
4970+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
4971+ mask->enc_key_id.keyid = 0xffffffff;
4972+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
4973+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
4974+
4975+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
4976+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
4977+ enc_ipv4);
4978+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
4979+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
4980+ if (key->enc_ipv4.src)
4981+ mask->enc_ipv4.src = 0xffffffff;
4982+ if (key->enc_ipv4.dst)
4983+ mask->enc_ipv4.dst = 0xffffffff;
4984+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
4985+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
4986+ } else {
4987+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
4988+ sizeof(struct in6_addr));
4989+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
4990+ sizeof(struct in6_addr));
4991+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
4992+ sizeof(struct in6_addr)))
4993+ memset(&mask->enc_ipv6.src, 0xff,
4994+ sizeof(struct in6_addr));
4995+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
4996+ sizeof(struct in6_addr)))
4997+ memset(&mask->enc_ipv6.dst, 0xff,
4998+ sizeof(struct in6_addr));
4999+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
5000+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5001+ }
5002+
5003+ match->dissector.used_keys |= enc_keys;
5004+}
5005+
5006+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
5007+ struct flow_dissector_key_vlan *mask,
5008+ u16 vlan_id, __be16 proto)
5009+{
5010+ key->vlan_id = vlan_id;
5011+ mask->vlan_id = VLAN_VID_MASK;
5012+ key->vlan_tpid = proto;
5013+ mask->vlan_tpid = 0xffff;
5014+}
5015+
5016+static int nf_flow_rule_match(struct nf_flow_match *match,
5017+ const struct flow_offload_tuple *tuple,
5018+ struct dst_entry *other_dst)
5019+{
5020+ struct nf_flow_key *mask = &match->mask;
5021+ struct nf_flow_key *key = &match->key;
5022+ struct ip_tunnel_info *tun_info;
5023+ bool vlan_encap = false;
5024+
5025+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
5026+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
5027+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
5028+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
5029+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
5030+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
5031+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
5032+
5033+ if (other_dst && other_dst->lwtstate) {
5034+ tun_info = lwt_tun_info(other_dst->lwtstate);
5035+ nf_flow_rule_lwt_match(match, tun_info);
5036+ }
5037+
5038+ key->meta.ingress_ifindex = tuple->iifidx;
5039+ mask->meta.ingress_ifindex = 0xffffffff;
5040+
5041+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
5042+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
5043+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
5044+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5045+ tuple->encap[0].id,
5046+ tuple->encap[0].proto);
5047+ vlan_encap = true;
5048+ }
5049+
5050+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
5051+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
5052+ if (vlan_encap) {
5053+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
5054+ cvlan);
5055+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
5056+ tuple->encap[1].id,
5057+ tuple->encap[1].proto);
5058+ } else {
5059+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
5060+ vlan);
5061+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5062+ tuple->encap[1].id,
5063+ tuple->encap[1].proto);
5064+ }
5065+ }
5066+
5067+ switch (tuple->l3proto) {
5068+ case AF_INET:
5069+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5070+ key->basic.n_proto = htons(ETH_P_IP);
5071+ key->ipv4.src = tuple->src_v4.s_addr;
5072+ mask->ipv4.src = 0xffffffff;
5073+ key->ipv4.dst = tuple->dst_v4.s_addr;
5074+ mask->ipv4.dst = 0xffffffff;
5075+ break;
5076+ case AF_INET6:
5077+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5078+ key->basic.n_proto = htons(ETH_P_IPV6);
5079+ key->ipv6.src = tuple->src_v6;
5080+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
5081+ key->ipv6.dst = tuple->dst_v6;
5082+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
5083+ break;
5084+ default:
5085+ return -EOPNOTSUPP;
5086+ }
5087+ mask->control.addr_type = 0xffff;
5088+ match->dissector.used_keys |= BIT(key->control.addr_type);
5089+ mask->basic.n_proto = 0xffff;
5090+
5091+ switch (tuple->l4proto) {
5092+ case IPPROTO_TCP:
5093+ key->tcp.flags = 0;
5094+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
5095+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
5096+ break;
5097+ case IPPROTO_UDP:
5098+ break;
5099+ default:
5100+ return -EOPNOTSUPP;
5101+ }
5102+
5103+ key->basic.ip_proto = tuple->l4proto;
5104+ mask->basic.ip_proto = 0xff;
5105+
5106+ key->tp.src = tuple->src_port;
5107+ mask->tp.src = 0xffff;
5108+ key->tp.dst = tuple->dst_port;
5109+ mask->tp.dst = 0xffff;
5110+
5111+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
5112+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
5113+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
5114+ BIT(FLOW_DISSECTOR_KEY_PORTS);
5115+ return 0;
5116+}
5117+
5118+static void flow_offload_mangle(struct flow_action_entry *entry,
5119+ enum flow_action_mangle_base htype, u32 offset,
5120+ const __be32 *value, const __be32 *mask)
5121+{
5122+ entry->id = FLOW_ACTION_MANGLE;
5123+ entry->mangle.htype = htype;
5124+ entry->mangle.offset = offset;
5125+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
5126+ memcpy(&entry->mangle.val, value, sizeof(u32));
5127+}
5128+
5129+static inline struct flow_action_entry *
5130+flow_action_entry_next(struct nf_flow_rule *flow_rule)
5131+{
5132+ int i = flow_rule->rule->action.num_entries++;
5133+
5134+ return &flow_rule->rule->action.entries[i];
5135+}
5136+
5137+static int flow_offload_eth_src(struct net *net,
5138+ const struct flow_offload *flow,
5139+ enum flow_offload_tuple_dir dir,
5140+ struct nf_flow_rule *flow_rule)
5141+{
5142+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5143+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5144+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5145+ struct net_device *dev = NULL;
5146+ const unsigned char *addr;
5147+ u32 mask, val;
5148+ u16 val16;
5149+
5150+ this_tuple = &flow->tuplehash[dir].tuple;
5151+
5152+ switch (this_tuple->xmit_type) {
5153+ case FLOW_OFFLOAD_XMIT_DIRECT:
5154+ addr = this_tuple->out.h_source;
5155+ break;
5156+ case FLOW_OFFLOAD_XMIT_NEIGH:
5157+ other_tuple = &flow->tuplehash[!dir].tuple;
5158+ dev = dev_get_by_index(net, other_tuple->iifidx);
5159+ if (!dev)
5160+ return -ENOENT;
5161+
5162+ addr = dev->dev_addr;
5163+ break;
5164+ default:
5165+ return -EOPNOTSUPP;
5166+ }
5167+
5168+ mask = ~0xffff0000;
5169+ memcpy(&val16, addr, 2);
5170+ val = val16 << 16;
5171+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5172+ &val, &mask);
5173+
5174+ mask = ~0xffffffff;
5175+ memcpy(&val, addr + 2, 4);
5176+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
5177+ &val, &mask);
5178+
5179+ if (dev)
5180+ dev_put(dev);
5181+
5182+ return 0;
5183+}
5184+
5185+static int flow_offload_eth_dst(struct net *net,
5186+ const struct flow_offload *flow,
5187+ enum flow_offload_tuple_dir dir,
5188+ struct nf_flow_rule *flow_rule)
5189+{
5190+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5191+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5192+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5193+ const struct dst_entry *dst_cache;
5194+ unsigned char ha[ETH_ALEN];
5195+ struct neighbour *n;
5196+ const void *daddr;
5197+ u32 mask, val;
5198+ u8 nud_state;
5199+ u16 val16;
5200+
5201+ this_tuple = &flow->tuplehash[dir].tuple;
5202+
5203+ switch (this_tuple->xmit_type) {
5204+ case FLOW_OFFLOAD_XMIT_DIRECT:
5205+ ether_addr_copy(ha, this_tuple->out.h_dest);
5206+ break;
5207+ case FLOW_OFFLOAD_XMIT_NEIGH:
5208+ other_tuple = &flow->tuplehash[!dir].tuple;
5209+ daddr = &other_tuple->src_v4;
5210+ dst_cache = this_tuple->dst_cache;
5211+ n = dst_neigh_lookup(dst_cache, daddr);
5212+ if (!n)
5213+ return -ENOENT;
5214+
5215+ read_lock_bh(&n->lock);
5216+ nud_state = n->nud_state;
5217+ ether_addr_copy(ha, n->ha);
5218+ read_unlock_bh(&n->lock);
5219+ neigh_release(n);
5220+
5221+ if (!(nud_state & NUD_VALID))
5222+ return -ENOENT;
5223+ break;
5224+ default:
5225+ return -EOPNOTSUPP;
5226+ }
5227+
5228+ mask = ~0xffffffff;
5229+ memcpy(&val, ha, 4);
5230+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
5231+ &val, &mask);
5232+
5233+ mask = ~0x0000ffff;
5234+ memcpy(&val16, ha + 4, 2);
5235+ val = val16;
5236+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5237+ &val, &mask);
5238+
5239+ return 0;
5240+}
5241+
5242+static void flow_offload_ipv4_snat(struct net *net,
5243+ const struct flow_offload *flow,
5244+ enum flow_offload_tuple_dir dir,
5245+ struct nf_flow_rule *flow_rule)
5246+{
5247+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5248+ u32 mask = ~htonl(0xffffffff);
5249+ __be32 addr;
5250+ u32 offset;
5251+
5252+ switch (dir) {
5253+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5254+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
5255+ offset = offsetof(struct iphdr, saddr);
5256+ break;
5257+ case FLOW_OFFLOAD_DIR_REPLY:
5258+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5259+ offset = offsetof(struct iphdr, daddr);
5260+ break;
5261+ default:
5262+ return;
5263+ }
5264+
5265+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5266+ &addr, &mask);
5267+}
5268+
5269+static void flow_offload_ipv4_dnat(struct net *net,
5270+ const struct flow_offload *flow,
5271+ enum flow_offload_tuple_dir dir,
5272+ struct nf_flow_rule *flow_rule)
5273+{
5274+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5275+ u32 mask = ~htonl(0xffffffff);
5276+ __be32 addr;
5277+ u32 offset;
5278+
5279+ switch (dir) {
5280+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5281+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
5282+ offset = offsetof(struct iphdr, daddr);
5283+ break;
5284+ case FLOW_OFFLOAD_DIR_REPLY:
5285+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5286+ offset = offsetof(struct iphdr, saddr);
5287+ break;
5288+ default:
5289+ return;
5290+ }
5291+
5292+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5293+ &addr, &mask);
5294+}
5295+
5296+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
5297+ unsigned int offset,
5298+ const __be32 *addr, const __be32 *mask)
5299+{
5300+ struct flow_action_entry *entry;
5301+ int i, j;
5302+
5303+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
5304+ entry = flow_action_entry_next(flow_rule);
5305+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
5306+ offset + i, &addr[j], mask);
5307+ }
5308+}
5309+
5310+static void flow_offload_ipv6_snat(struct net *net,
5311+ const struct flow_offload *flow,
5312+ enum flow_offload_tuple_dir dir,
5313+ struct nf_flow_rule *flow_rule)
5314+{
5315+ u32 mask = ~htonl(0xffffffff);
5316+ const __be32 *addr;
5317+ u32 offset;
5318+
5319+ switch (dir) {
5320+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5321+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
5322+ offset = offsetof(struct ipv6hdr, saddr);
5323+ break;
5324+ case FLOW_OFFLOAD_DIR_REPLY:
5325+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
5326+ offset = offsetof(struct ipv6hdr, daddr);
5327+ break;
5328+ default:
5329+ return;
5330+ }
5331+
5332+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5333+}
5334+
5335+static void flow_offload_ipv6_dnat(struct net *net,
5336+ const struct flow_offload *flow,
5337+ enum flow_offload_tuple_dir dir,
5338+ struct nf_flow_rule *flow_rule)
5339+{
5340+ u32 mask = ~htonl(0xffffffff);
5341+ const __be32 *addr;
5342+ u32 offset;
5343+
5344+ switch (dir) {
5345+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5346+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
5347+ offset = offsetof(struct ipv6hdr, daddr);
5348+ break;
5349+ case FLOW_OFFLOAD_DIR_REPLY:
5350+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
5351+ offset = offsetof(struct ipv6hdr, saddr);
5352+ break;
5353+ default:
5354+ return;
5355+ }
5356+
5357+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5358+}
5359+
5360+static int flow_offload_l4proto(const struct flow_offload *flow)
5361+{
5362+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5363+ u8 type = 0;
5364+
5365+ switch (protonum) {
5366+ case IPPROTO_TCP:
5367+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
5368+ break;
5369+ case IPPROTO_UDP:
5370+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
5371+ break;
5372+ default:
5373+ break;
5374+ }
5375+
5376+ return type;
5377+}
5378+
5379+static void flow_offload_port_snat(struct net *net,
5380+ const struct flow_offload *flow,
5381+ enum flow_offload_tuple_dir dir,
5382+ struct nf_flow_rule *flow_rule)
5383+{
5384+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5385+ u32 mask, port;
5386+ u32 offset;
5387+
5388+ switch (dir) {
5389+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5390+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
5391+ offset = 0; /* offsetof(struct tcphdr, source); */
5392+ port = htonl(port << 16);
5393+ mask = ~htonl(0xffff0000);
5394+ break;
5395+ case FLOW_OFFLOAD_DIR_REPLY:
5396+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
5397+ offset = 0; /* offsetof(struct tcphdr, dest); */
5398+ port = htonl(port);
5399+ mask = ~htonl(0xffff);
5400+ break;
5401+ default:
5402+ return;
5403+ }
5404+
5405+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5406+ &port, &mask);
5407+}
5408+
5409+static void flow_offload_port_dnat(struct net *net,
5410+ const struct flow_offload *flow,
5411+ enum flow_offload_tuple_dir dir,
5412+ struct nf_flow_rule *flow_rule)
5413+{
5414+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5415+ u32 mask, port;
5416+ u32 offset;
5417+
5418+ switch (dir) {
5419+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5420+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
5421+ offset = 0; /* offsetof(struct tcphdr, dest); */
5422+ port = htonl(port);
5423+ mask = ~htonl(0xffff);
5424+ break;
5425+ case FLOW_OFFLOAD_DIR_REPLY:
5426+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
5427+ offset = 0; /* offsetof(struct tcphdr, source); */
5428+ port = htonl(port << 16);
5429+ mask = ~htonl(0xffff0000);
5430+ break;
5431+ default:
5432+ return;
5433+ }
5434+
5435+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5436+ &port, &mask);
5437+}
5438+
5439+static void flow_offload_ipv4_checksum(struct net *net,
5440+ const struct flow_offload *flow,
5441+ struct nf_flow_rule *flow_rule)
5442+{
5443+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5444+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5445+
5446+ entry->id = FLOW_ACTION_CSUM;
5447+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
5448+
5449+ switch (protonum) {
5450+ case IPPROTO_TCP:
5451+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
5452+ break;
5453+ case IPPROTO_UDP:
5454+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
5455+ break;
5456+ }
5457+}
5458+
5459+static void flow_offload_redirect(struct net *net,
5460+ const struct flow_offload *flow,
5461+ enum flow_offload_tuple_dir dir,
5462+ struct nf_flow_rule *flow_rule)
5463+{
5464+ const struct flow_offload_tuple *this_tuple, *other_tuple;
5465+ struct flow_action_entry *entry;
5466+ struct net_device *dev;
5467+ int ifindex;
5468+
5469+ this_tuple = &flow->tuplehash[dir].tuple;
5470+ switch (this_tuple->xmit_type) {
5471+ case FLOW_OFFLOAD_XMIT_DIRECT:
5472+ this_tuple = &flow->tuplehash[dir].tuple;
5473+ ifindex = this_tuple->out.hw_ifidx;
5474+ break;
5475+ case FLOW_OFFLOAD_XMIT_NEIGH:
5476+ other_tuple = &flow->tuplehash[!dir].tuple;
5477+ ifindex = other_tuple->iifidx;
5478+ break;
5479+ default:
5480+ return;
5481+ }
5482+
5483+ dev = dev_get_by_index(net, ifindex);
5484+ if (!dev)
5485+ return;
5486+
5487+ entry = flow_action_entry_next(flow_rule);
5488+ entry->id = FLOW_ACTION_REDIRECT;
5489+ entry->dev = dev;
5490+}
5491+
5492+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
5493+ enum flow_offload_tuple_dir dir,
5494+ struct nf_flow_rule *flow_rule)
5495+{
5496+ const struct flow_offload_tuple *this_tuple;
5497+ struct flow_action_entry *entry;
5498+ struct dst_entry *dst;
5499+
5500+ this_tuple = &flow->tuplehash[dir].tuple;
5501+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5502+ return;
5503+
5504+ dst = this_tuple->dst_cache;
5505+ if (dst && dst->lwtstate) {
5506+ struct ip_tunnel_info *tun_info;
5507+
5508+ tun_info = lwt_tun_info(dst->lwtstate);
5509+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5510+ entry = flow_action_entry_next(flow_rule);
5511+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
5512+ entry->tunnel = tun_info;
5513+ }
5514+ }
5515+}
5516+
5517+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
5518+ enum flow_offload_tuple_dir dir,
5519+ struct nf_flow_rule *flow_rule)
5520+{
5521+ const struct flow_offload_tuple *other_tuple;
5522+ struct flow_action_entry *entry;
5523+ struct dst_entry *dst;
5524+
5525+ other_tuple = &flow->tuplehash[!dir].tuple;
5526+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5527+ return;
5528+
5529+ dst = other_tuple->dst_cache;
5530+ if (dst && dst->lwtstate) {
5531+ struct ip_tunnel_info *tun_info;
5532+
5533+ tun_info = lwt_tun_info(dst->lwtstate);
5534+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5535+ entry = flow_action_entry_next(flow_rule);
5536+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
5537+ }
5538+ }
5539+}
5540+
5541+static int
5542+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
5543+ enum flow_offload_tuple_dir dir,
5544+ struct nf_flow_rule *flow_rule)
5545+{
5546+ const struct flow_offload_tuple *other_tuple;
5547+ const struct flow_offload_tuple *tuple;
5548+ int i;
5549+
5550+ flow_offload_decap_tunnel(flow, dir, flow_rule);
5551+ flow_offload_encap_tunnel(flow, dir, flow_rule);
5552+
5553+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
5554+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
5555+ return -1;
5556+
5557+ tuple = &flow->tuplehash[dir].tuple;
5558+
5559+ for (i = 0; i < tuple->encap_num; i++) {
5560+ struct flow_action_entry *entry;
5561+
5562+ if (tuple->in_vlan_ingress & BIT(i))
5563+ continue;
5564+
5565+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
5566+ entry = flow_action_entry_next(flow_rule);
5567+ entry->id = FLOW_ACTION_VLAN_POP;
5568+ }
5569+ }
5570+
5571+ other_tuple = &flow->tuplehash[!dir].tuple;
5572+
5573+ for (i = 0; i < other_tuple->encap_num; i++) {
5574+ struct flow_action_entry *entry;
5575+
5576+ if (other_tuple->in_vlan_ingress & BIT(i))
5577+ continue;
5578+
5579+ entry = flow_action_entry_next(flow_rule);
5580+
5581+ switch (other_tuple->encap[i].proto) {
5582+ case htons(ETH_P_PPP_SES):
5583+ entry->id = FLOW_ACTION_PPPOE_PUSH;
5584+ entry->pppoe.sid = other_tuple->encap[i].id;
5585+ break;
5586+ case htons(ETH_P_8021Q):
5587+ entry->id = FLOW_ACTION_VLAN_PUSH;
5588+ entry->vlan.vid = other_tuple->encap[i].id;
5589+ entry->vlan.proto = other_tuple->encap[i].proto;
5590+ break;
5591+ }
5592+ }
5593+
5594+ return 0;
5595+}
5596+
5597+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
5598+ enum flow_offload_tuple_dir dir,
5599+ struct nf_flow_rule *flow_rule)
5600+{
5601+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5602+ return -1;
5603+
5604+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5605+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
5606+ flow_offload_port_snat(net, flow, dir, flow_rule);
5607+ }
5608+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5609+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
5610+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5611+ }
5612+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
5613+ test_bit(NF_FLOW_DNAT, &flow->flags))
5614+ flow_offload_ipv4_checksum(net, flow, flow_rule);
5615+
5616+ flow_offload_redirect(net, flow, dir, flow_rule);
5617+
5618+ return 0;
5619+}
5620+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
5621+
5622+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
5623+ enum flow_offload_tuple_dir dir,
5624+ struct nf_flow_rule *flow_rule)
5625+{
5626+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5627+ return -1;
5628+
5629+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5630+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
5631+ flow_offload_port_snat(net, flow, dir, flow_rule);
5632+ }
5633+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5634+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
5635+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5636+ }
5637+
5638+ flow_offload_redirect(net, flow, dir, flow_rule);
5639+
5640+ return 0;
5641+}
5642+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
5643+
5644+#define NF_FLOW_RULE_ACTION_MAX 16
5645+
5646+static struct nf_flow_rule *
5647+nf_flow_offload_rule_alloc(struct net *net,
5648+ const struct flow_offload_work *offload,
5649+ enum flow_offload_tuple_dir dir)
5650+{
5651+ const struct nf_flowtable *flowtable = offload->flowtable;
5652+ const struct flow_offload_tuple *tuple, *other_tuple;
5653+ const struct flow_offload *flow = offload->flow;
5654+ struct dst_entry *other_dst = NULL;
5655+ struct nf_flow_rule *flow_rule;
5656+ int err = -ENOMEM;
5657+
5658+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
5659+ if (!flow_rule)
5660+ goto err_flow;
5661+
5662+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
5663+ if (!flow_rule->rule)
5664+ goto err_flow_rule;
5665+
5666+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
5667+ flow_rule->rule->match.mask = &flow_rule->match.mask;
5668+ flow_rule->rule->match.key = &flow_rule->match.key;
5669+
5670+ tuple = &flow->tuplehash[dir].tuple;
5671+ other_tuple = &flow->tuplehash[!dir].tuple;
5672+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
5673+ other_dst = other_tuple->dst_cache;
5674+
5675+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
5676+ if (err < 0)
5677+ goto err_flow_match;
5678+
5679+ flow_rule->rule->action.num_entries = 0;
5680+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
5681+ goto err_flow_match;
5682+
5683+ return flow_rule;
5684+
5685+err_flow_match:
5686+ kfree(flow_rule->rule);
5687+err_flow_rule:
5688+ kfree(flow_rule);
5689+err_flow:
5690+ return NULL;
5691+}
5692+
5693+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
5694+{
5695+ struct flow_action_entry *entry;
5696+ int i;
5697+
5698+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
5699+ entry = &flow_rule->rule->action.entries[i];
5700+ if (entry->id != FLOW_ACTION_REDIRECT)
5701+ continue;
5702+
5703+ dev_put(entry->dev);
5704+ }
5705+ kfree(flow_rule->rule);
5706+ kfree(flow_rule);
5707+}
5708+
5709+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
5710+{
5711+ int i;
5712+
5713+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
5714+ __nf_flow_offload_destroy(flow_rule[i]);
5715+}
5716+
5717+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
5718+ struct nf_flow_rule *flow_rule[])
5719+{
5720+ struct net *net = read_pnet(&offload->flowtable->net);
5721+
5722+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
5723+ FLOW_OFFLOAD_DIR_ORIGINAL);
5724+ if (!flow_rule[0])
5725+ return -ENOMEM;
5726+
5727+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
5728+ FLOW_OFFLOAD_DIR_REPLY);
5729+ if (!flow_rule[1]) {
5730+ __nf_flow_offload_destroy(flow_rule[0]);
5731+ return -ENOMEM;
5732+ }
5733+
5734+ return 0;
5735+}
5736+
5737+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
5738+ __be16 proto, int priority,
5739+ enum flow_cls_command cmd,
5740+ const struct flow_offload_tuple *tuple,
5741+ struct netlink_ext_ack *extack)
5742+{
5743+ cls_flow->common.protocol = proto;
5744+ cls_flow->common.prio = priority;
5745+ cls_flow->common.extack = extack;
5746+ cls_flow->command = cmd;
5747+ cls_flow->cookie = (unsigned long)tuple;
5748+}
5749+
5750+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
5751+ struct flow_offload *flow,
5752+ struct nf_flow_rule *flow_rule,
5753+ enum flow_offload_tuple_dir dir,
5754+ int priority, int cmd,
5755+ struct flow_stats *stats,
5756+ struct list_head *block_cb_list)
5757+{
5758+ struct flow_cls_offload cls_flow = {};
5759+ struct flow_block_cb *block_cb;
5760+ struct netlink_ext_ack extack;
5761+ __be16 proto = ETH_P_ALL;
5762+ int err, i = 0;
5763+
5764+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
5765+ &flow->tuplehash[dir].tuple, &extack);
5766+ if (cmd == FLOW_CLS_REPLACE)
5767+ cls_flow.rule = flow_rule->rule;
5768+
developer207b39d2022-10-07 15:57:16 +08005769+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005770+ list_for_each_entry(block_cb, block_cb_list, list) {
5771+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
5772+ block_cb->cb_priv);
5773+ if (err < 0)
5774+ continue;
5775+
5776+ i++;
5777+ }
developer207b39d2022-10-07 15:57:16 +08005778+ up_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005779+
5780+ if (cmd == FLOW_CLS_STATS)
5781+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
5782+
5783+ return i;
5784+}
5785+
5786+static int flow_offload_tuple_add(struct flow_offload_work *offload,
5787+ struct nf_flow_rule *flow_rule,
5788+ enum flow_offload_tuple_dir dir)
5789+{
5790+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
5791+ flow_rule, dir, offload->priority,
5792+ FLOW_CLS_REPLACE, NULL,
5793+ &offload->flowtable->flow_block.cb_list);
5794+}
5795+
5796+static void flow_offload_tuple_del(struct flow_offload_work *offload,
5797+ enum flow_offload_tuple_dir dir)
5798+{
5799+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5800+ offload->priority, FLOW_CLS_DESTROY, NULL,
5801+ &offload->flowtable->flow_block.cb_list);
5802+}
5803+
5804+static int flow_offload_rule_add(struct flow_offload_work *offload,
5805+ struct nf_flow_rule *flow_rule[])
5806+{
5807+ int ok_count = 0;
5808+
5809+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
5810+ FLOW_OFFLOAD_DIR_ORIGINAL);
5811+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
5812+ FLOW_OFFLOAD_DIR_REPLY);
5813+ if (ok_count == 0)
5814+ return -ENOENT;
5815+
5816+ return 0;
5817+}
5818+
5819+static void flow_offload_work_add(struct flow_offload_work *offload)
5820+{
5821+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
5822+ int err;
5823+
5824+ err = nf_flow_offload_alloc(offload, flow_rule);
5825+ if (err < 0)
5826+ return;
5827+
5828+ err = flow_offload_rule_add(offload, flow_rule);
5829+ if (err < 0)
5830+ goto out;
5831+
5832+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5833+
5834+out:
5835+ nf_flow_offload_destroy(flow_rule);
5836+}
5837+
5838+static void flow_offload_work_del(struct flow_offload_work *offload)
5839+{
5840+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5841+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
5842+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
5843+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
5844+}
5845+
5846+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
5847+ enum flow_offload_tuple_dir dir,
5848+ struct flow_stats *stats)
5849+{
5850+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5851+ offload->priority, FLOW_CLS_STATS, stats,
5852+ &offload->flowtable->flow_block.cb_list);
5853+}
5854+
5855+static void flow_offload_work_stats(struct flow_offload_work *offload)
5856+{
5857+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
5858+ u64 lastused;
5859+
5860+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
5861+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
5862+
5863+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
5864+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
5865+ lastused + flow_offload_get_timeout(offload->flow));
5866+
5867+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
5868+ if (stats[0].pkts)
5869+ nf_ct_acct_add(offload->flow->ct,
5870+ FLOW_OFFLOAD_DIR_ORIGINAL,
5871+ stats[0].pkts, stats[0].bytes);
5872+ if (stats[1].pkts)
5873+ nf_ct_acct_add(offload->flow->ct,
5874+ FLOW_OFFLOAD_DIR_REPLY,
5875+ stats[1].pkts, stats[1].bytes);
5876+ }
5877+}
5878+
5879+static void flow_offload_work_handler(struct work_struct *work)
5880+{
5881+ struct flow_offload_work *offload;
5882+
5883+ offload = container_of(work, struct flow_offload_work, work);
5884+ switch (offload->cmd) {
5885+ case FLOW_CLS_REPLACE:
5886+ flow_offload_work_add(offload);
5887+ break;
5888+ case FLOW_CLS_DESTROY:
5889+ flow_offload_work_del(offload);
5890+ break;
5891+ case FLOW_CLS_STATS:
5892+ flow_offload_work_stats(offload);
5893+ break;
5894+ default:
5895+ WARN_ON_ONCE(1);
5896+ }
5897+
5898+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
5899+ kfree(offload);
5900+}
5901+
5902+static void flow_offload_queue_work(struct flow_offload_work *offload)
5903+{
5904+ if (offload->cmd == FLOW_CLS_REPLACE)
5905+ queue_work(nf_flow_offload_add_wq, &offload->work);
5906+ else if (offload->cmd == FLOW_CLS_DESTROY)
5907+ queue_work(nf_flow_offload_del_wq, &offload->work);
5908+ else
5909+ queue_work(nf_flow_offload_stats_wq, &offload->work);
5910+}
5911+
5912+static struct flow_offload_work *
5913+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
5914+ struct flow_offload *flow, unsigned int cmd)
5915+{
5916+ struct flow_offload_work *offload;
5917+
5918+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
5919+ return NULL;
5920+
5921+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
5922+ if (!offload) {
5923+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
5924+ return NULL;
5925+ }
5926+
5927+ offload->cmd = cmd;
5928+ offload->flow = flow;
5929+ offload->priority = flowtable->priority;
5930+ offload->flowtable = flowtable;
5931+ INIT_WORK(&offload->work, flow_offload_work_handler);
5932+
5933+ return offload;
5934+}
5935+
5936+
5937+void nf_flow_offload_add(struct nf_flowtable *flowtable,
5938+ struct flow_offload *flow)
5939+{
5940+ struct flow_offload_work *offload;
5941+
5942+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
5943+ if (!offload)
5944+ return;
5945+
5946+ flow_offload_queue_work(offload);
5947+}
5948+
5949+void nf_flow_offload_del(struct nf_flowtable *flowtable,
5950+ struct flow_offload *flow)
5951+{
5952+ struct flow_offload_work *offload;
5953+
5954+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
5955+ if (!offload)
5956+ return;
5957+
5958+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
5959+ flow_offload_queue_work(offload);
5960+}
5961+
5962+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
developerec862f42023-03-23 13:08:45 +08005963+ struct flow_offload *flow, bool force)
developer8cb3ac72022-07-04 10:55:14 +08005964+{
5965+ struct flow_offload_work *offload;
5966+ __s32 delta;
5967+
developerec862f42023-03-23 13:08:45 +08005968+ if (!force) {
5969+ delta = nf_flow_timeout_delta(flow->timeout);
5970+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
5971+ return;
5972+ }
developer8cb3ac72022-07-04 10:55:14 +08005973+
5974+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
5975+ if (!offload)
5976+ return;
5977+
5978+ flow_offload_queue_work(offload);
5979+}
5980+
5981+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
5982+{
5983+ if (nf_flowtable_hw_offload(flowtable)) {
5984+ flush_workqueue(nf_flow_offload_add_wq);
5985+ flush_workqueue(nf_flow_offload_del_wq);
5986+ flush_workqueue(nf_flow_offload_stats_wq);
5987+ }
5988+}
5989+
5990+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
5991+ struct flow_block_offload *bo,
5992+ enum flow_block_command cmd)
5993+{
5994+ struct flow_block_cb *block_cb, *next;
5995+ int err = 0;
5996+
developera54478c2022-10-01 16:41:46 +08005997+ down_read(&flowtable->flow_block_lock);
5998+
developer8cb3ac72022-07-04 10:55:14 +08005999+ switch (cmd) {
6000+ case FLOW_BLOCK_BIND:
6001+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
6002+ break;
6003+ case FLOW_BLOCK_UNBIND:
6004+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
6005+ list_del(&block_cb->list);
6006+ flow_block_cb_free(block_cb);
6007+ }
6008+ break;
6009+ default:
6010+ WARN_ON_ONCE(1);
6011+ err = -EOPNOTSUPP;
6012+ }
6013+
developera54478c2022-10-01 16:41:46 +08006014+ up_read(&flowtable->flow_block_lock);
6015+
developer8cb3ac72022-07-04 10:55:14 +08006016+ return err;
6017+}
6018+
6019+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
6020+ struct net *net,
6021+ enum flow_block_command cmd,
6022+ struct nf_flowtable *flowtable,
6023+ struct netlink_ext_ack *extack)
6024+{
6025+ memset(bo, 0, sizeof(*bo));
6026+ bo->net = net;
6027+ bo->block = &flowtable->flow_block;
6028+ bo->command = cmd;
6029+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
6030+ bo->extack = extack;
6031+ INIT_LIST_HEAD(&bo->cb_list);
6032+}
6033+
6034+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
6035+ struct nf_flowtable *flowtable,
6036+ struct net_device *dev,
6037+ enum flow_block_command cmd,
6038+ struct netlink_ext_ack *extack)
6039+{
6040+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6041+ extack);
6042+ flow_indr_block_call(dev, bo, cmd);
6043+
6044+ if (list_empty(&bo->cb_list))
6045+ return -EOPNOTSUPP;
6046+
6047+ return 0;
6048+}
6049+
6050+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
6051+ struct nf_flowtable *flowtable,
6052+ struct net_device *dev,
6053+ enum flow_block_command cmd,
6054+ struct netlink_ext_ack *extack)
6055+{
6056+ int err;
6057+
6058+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6059+ extack);
6060+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
6061+ if (err < 0)
6062+ return err;
6063+
6064+ return 0;
6065+}
6066+
6067+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
6068+ struct net_device *dev,
6069+ enum flow_block_command cmd)
6070+{
6071+ struct netlink_ext_ack extack = {};
6072+ struct flow_block_offload bo;
6073+ int err;
6074+
6075+ if (!nf_flowtable_hw_offload(flowtable))
6076+ return 0;
6077+
6078+ if (dev->netdev_ops->ndo_setup_tc)
6079+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
6080+ &extack);
6081+ else
6082+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
6083+ &extack);
6084+ if (err < 0)
6085+ return err;
6086+
6087+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
6088+}
6089+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
6090+
6091+int nf_flow_table_offload_init(void)
6092+{
6093+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
6094+ WQ_UNBOUND | WQ_SYSFS, 0);
6095+ if (!nf_flow_offload_add_wq)
6096+ return -ENOMEM;
6097+
6098+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
6099+ WQ_UNBOUND | WQ_SYSFS, 0);
6100+ if (!nf_flow_offload_del_wq)
6101+ goto err_del_wq;
6102+
6103+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
6104+ WQ_UNBOUND | WQ_SYSFS, 0);
6105+ if (!nf_flow_offload_stats_wq)
6106+ goto err_stats_wq;
6107+
6108+ return 0;
6109+
6110+err_stats_wq:
6111+ destroy_workqueue(nf_flow_offload_del_wq);
6112+err_del_wq:
6113+ destroy_workqueue(nf_flow_offload_add_wq);
6114+ return -ENOMEM;
6115+}
6116+
6117+void nf_flow_table_offload_exit(void)
6118+{
6119+ destroy_workqueue(nf_flow_offload_add_wq);
6120+ destroy_workqueue(nf_flow_offload_del_wq);
6121+ destroy_workqueue(nf_flow_offload_stats_wq);
6122+}
6123diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
6124new file mode 100644
developer9fdc0e82023-05-12 14:21:17 +08006125index 0000000..12f067c
developer8cb3ac72022-07-04 10:55:14 +08006126--- /dev/null
6127+++ b/net/netfilter/xt_FLOWOFFLOAD.c
developer9fdc0e82023-05-12 14:21:17 +08006128@@ -0,0 +1,794 @@
developer8cb3ac72022-07-04 10:55:14 +08006129+/*
6130+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
6131+ *
6132+ * This program is free software; you can redistribute it and/or modify
6133+ * it under the terms of the GNU General Public License version 2 as
6134+ * published by the Free Software Foundation.
6135+ */
6136+#include <linux/module.h>
6137+#include <linux/init.h>
6138+#include <linux/netfilter.h>
6139+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
6140+#include <linux/if_vlan.h>
6141+#include <net/ip.h>
6142+#include <net/netfilter/nf_conntrack.h>
6143+#include <net/netfilter/nf_conntrack_extend.h>
6144+#include <net/netfilter/nf_conntrack_helper.h>
6145+#include <net/netfilter/nf_flow_table.h>
6146+
6147+struct xt_flowoffload_hook {
6148+ struct hlist_node list;
6149+ struct nf_hook_ops ops;
6150+ struct net *net;
6151+ bool registered;
6152+ bool used;
6153+};
6154+
6155+struct xt_flowoffload_table {
6156+ struct nf_flowtable ft;
6157+ struct hlist_head hooks;
6158+ struct delayed_work work;
6159+};
6160+
6161+struct nf_forward_info {
6162+ const struct net_device *indev;
6163+ const struct net_device *outdev;
6164+ const struct net_device *hw_outdev;
6165+ struct id {
6166+ __u16 id;
6167+ __be16 proto;
6168+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
6169+ u8 num_encaps;
6170+ u8 ingress_vlans;
6171+ u8 h_source[ETH_ALEN];
6172+ u8 h_dest[ETH_ALEN];
6173+ enum flow_offload_xmit_type xmit_type;
6174+};
6175+
6176+static DEFINE_SPINLOCK(hooks_lock);
6177+
6178+struct xt_flowoffload_table flowtable[2];
6179+
6180+static unsigned int
6181+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
6182+ const struct nf_hook_state *state)
6183+{
6184+ struct vlan_ethhdr *veth;
6185+ __be16 proto;
6186+
6187+ switch (skb->protocol) {
6188+ case htons(ETH_P_8021Q):
6189+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
6190+ proto = veth->h_vlan_encapsulated_proto;
6191+ break;
6192+ case htons(ETH_P_PPP_SES):
6193+ proto = nf_flow_pppoe_proto(skb);
6194+ break;
6195+ default:
6196+ proto = skb->protocol;
6197+ break;
6198+ }
6199+
6200+ switch (proto) {
6201+ case htons(ETH_P_IP):
6202+ return nf_flow_offload_ip_hook(priv, skb, state);
6203+ case htons(ETH_P_IPV6):
6204+ return nf_flow_offload_ipv6_hook(priv, skb, state);
6205+ }
6206+
6207+ return NF_ACCEPT;
6208+}
6209+
6210+static int
6211+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
6212+ struct net_device *dev)
6213+{
6214+ struct xt_flowoffload_hook *hook;
6215+ struct nf_hook_ops *ops;
6216+
6217+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
6218+ if (!hook)
6219+ return -ENOMEM;
6220+
6221+ ops = &hook->ops;
6222+ ops->pf = NFPROTO_NETDEV;
6223+ ops->hooknum = NF_NETDEV_INGRESS;
6224+ ops->priority = 10;
6225+ ops->priv = &table->ft;
6226+ ops->hook = xt_flowoffload_net_hook;
6227+ ops->dev = dev;
6228+
6229+ hlist_add_head(&hook->list, &table->hooks);
6230+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
6231+
6232+ return 0;
6233+}
6234+
6235+static struct xt_flowoffload_hook *
6236+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
6237+ struct net_device *dev)
6238+{
6239+ struct xt_flowoffload_hook *hook;
6240+
6241+ hlist_for_each_entry(hook, &table->hooks, list) {
6242+ if (hook->ops.dev == dev)
6243+ return hook;
6244+ }
6245+
6246+ return NULL;
6247+}
6248+
6249+static void
6250+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
6251+ struct net_device *dev)
6252+{
6253+ struct xt_flowoffload_hook *hook;
6254+
6255+ if (!dev)
6256+ return;
6257+
6258+ spin_lock_bh(&hooks_lock);
6259+ hook = flow_offload_lookup_hook(table, dev);
6260+ if (hook)
6261+ hook->used = true;
6262+ else
6263+ xt_flowoffload_create_hook(table, dev);
6264+ spin_unlock_bh(&hooks_lock);
6265+}
6266+
6267+static void
6268+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
6269+{
6270+ struct xt_flowoffload_hook *hook;
6271+
6272+restart:
6273+ hlist_for_each_entry(hook, &table->hooks, list) {
6274+ if (hook->registered)
6275+ continue;
6276+
6277+ hook->registered = true;
6278+ hook->net = dev_net(hook->ops.dev);
6279+ spin_unlock_bh(&hooks_lock);
6280+ nf_register_net_hook(hook->net, &hook->ops);
6281+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6282+ table->ft.type->setup(&table->ft, hook->ops.dev,
6283+ FLOW_BLOCK_BIND);
6284+ spin_lock_bh(&hooks_lock);
6285+ goto restart;
6286+ }
6287+
6288+}
6289+
6290+static bool
6291+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
6292+{
6293+ struct xt_flowoffload_hook *hook;
6294+ bool active = false;
6295+
6296+restart:
6297+ spin_lock_bh(&hooks_lock);
6298+ hlist_for_each_entry(hook, &table->hooks, list) {
6299+ if (hook->used || !hook->registered) {
6300+ active = true;
6301+ continue;
6302+ }
6303+
6304+ hlist_del(&hook->list);
6305+ spin_unlock_bh(&hooks_lock);
6306+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6307+ table->ft.type->setup(&table->ft, hook->ops.dev,
6308+ FLOW_BLOCK_UNBIND);
6309+ nf_unregister_net_hook(hook->net, &hook->ops);
6310+ kfree(hook);
6311+ goto restart;
6312+ }
6313+ spin_unlock_bh(&hooks_lock);
6314+
6315+ return active;
6316+}
6317+
6318+static void
6319+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
6320+{
6321+ struct xt_flowoffload_table *table = data;
6322+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
6323+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
6324+ struct xt_flowoffload_hook *hook;
6325+
6326+ spin_lock_bh(&hooks_lock);
6327+ hlist_for_each_entry(hook, &table->hooks, list) {
6328+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
6329+ hook->ops.dev->ifindex != tuple1->iifidx)
6330+ continue;
6331+
6332+ hook->used = true;
6333+ }
6334+ spin_unlock_bh(&hooks_lock);
6335+}
6336+
6337+static void
6338+xt_flowoffload_hook_work(struct work_struct *work)
6339+{
6340+ struct xt_flowoffload_table *table;
6341+ struct xt_flowoffload_hook *hook;
6342+ int err;
6343+
6344+ table = container_of(work, struct xt_flowoffload_table, work.work);
6345+
6346+ spin_lock_bh(&hooks_lock);
6347+ xt_flowoffload_register_hooks(table);
6348+ hlist_for_each_entry(hook, &table->hooks, list)
6349+ hook->used = false;
6350+ spin_unlock_bh(&hooks_lock);
6351+
6352+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
6353+ table);
6354+ if (err && err != -EAGAIN)
6355+ goto out;
6356+
6357+ if (!xt_flowoffload_cleanup_hooks(table))
6358+ return;
6359+
6360+out:
6361+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
6362+}
6363+
6364+static bool
6365+xt_flowoffload_skip(struct sk_buff *skb, int family)
6366+{
6367+ if (skb_sec_path(skb))
6368+ return true;
6369+
6370+ if (family == NFPROTO_IPV4) {
6371+ const struct ip_options *opt = &(IPCB(skb)->opt);
6372+
6373+ if (unlikely(opt->optlen))
6374+ return true;
6375+ }
6376+
6377+ return false;
6378+}
6379+
6380+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
6381+{
6382+ if (dst_xfrm(dst))
6383+ return FLOW_OFFLOAD_XMIT_XFRM;
6384+
6385+ return FLOW_OFFLOAD_XMIT_NEIGH;
6386+}
6387+
6388+static void nf_default_forward_path(struct nf_flow_route *route,
6389+ struct dst_entry *dst_cache,
6390+ enum ip_conntrack_dir dir,
6391+ struct net_device **dev)
6392+{
6393+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
6394+ route->tuple[dir].dst = dst_cache;
6395+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
6396+}
6397+
6398+static bool nf_is_valid_ether_device(const struct net_device *dev)
6399+{
6400+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
6401+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
6402+ return false;
6403+
6404+ return true;
6405+}
6406+
6407+static void nf_dev_path_info(const struct net_device_path_stack *stack,
6408+ struct nf_forward_info *info,
6409+ unsigned char *ha)
6410+{
6411+ const struct net_device_path *path;
6412+ int i;
6413+
6414+ memcpy(info->h_dest, ha, ETH_ALEN);
6415+
6416+ for (i = 0; i < stack->num_paths; i++) {
6417+ path = &stack->path[i];
6418+
6419+ info->indev = path->dev;
6420+
6421+ switch (path->type) {
6422+ case DEV_PATH_ETHERNET:
6423+ case DEV_PATH_DSA:
6424+ case DEV_PATH_VLAN:
6425+ case DEV_PATH_PPPOE:
6426+ if (is_zero_ether_addr(info->h_source))
6427+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6428+
6429+ if (path->type == DEV_PATH_ETHERNET)
6430+ break;
6431+ if (path->type == DEV_PATH_DSA) {
6432+ i = stack->num_paths;
6433+ break;
6434+ }
6435+
6436+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
6437+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
6438+ info->indev = NULL;
6439+ break;
6440+ }
6441+ if (!info->outdev)
6442+ info->outdev = path->dev;
6443+ info->encap[info->num_encaps].id = path->encap.id;
6444+ info->encap[info->num_encaps].proto = path->encap.proto;
6445+ info->num_encaps++;
6446+ if (path->type == DEV_PATH_PPPOE)
6447+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
6448+ break;
6449+ case DEV_PATH_BRIDGE:
6450+ if (is_zero_ether_addr(info->h_source))
6451+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6452+
6453+ switch (path->bridge.vlan_mode) {
6454+ case DEV_PATH_BR_VLAN_UNTAG_HW:
6455+ info->ingress_vlans |= BIT(info->num_encaps - 1);
6456+ break;
6457+ case DEV_PATH_BR_VLAN_TAG:
6458+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
6459+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
6460+ info->num_encaps++;
6461+ break;
6462+ case DEV_PATH_BR_VLAN_UNTAG:
6463+ info->num_encaps--;
6464+ break;
6465+ case DEV_PATH_BR_VLAN_KEEP:
6466+ break;
6467+ }
6468+ break;
6469+ default:
6470+ break;
6471+ }
6472+ }
6473+ if (!info->outdev)
6474+ info->outdev = info->indev;
6475+
6476+ info->hw_outdev = info->indev;
6477+
6478+ if (nf_is_valid_ether_device(info->indev))
6479+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
6480+}
6481+
6482+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
6483+ const struct dst_entry *dst_cache,
6484+ const struct nf_conn *ct,
6485+ enum ip_conntrack_dir dir, u8 *ha,
6486+ struct net_device_path_stack *stack)
6487+{
6488+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
6489+ struct net_device *dev = dst_cache->dev;
6490+ struct neighbour *n;
6491+ u8 nud_state;
6492+
6493+ if (!nf_is_valid_ether_device(dev))
6494+ goto out;
6495+
developer9fdc0e82023-05-12 14:21:17 +08006496+ if (ct->status & IPS_NAT_MASK) {
6497+ n = dst_neigh_lookup(dst_cache, daddr);
6498+ if (!n)
6499+ return -1;
developer8cb3ac72022-07-04 10:55:14 +08006500+
developer9fdc0e82023-05-12 14:21:17 +08006501+ read_lock_bh(&n->lock);
6502+ nud_state = n->nud_state;
6503+ ether_addr_copy(ha, n->ha);
6504+ read_unlock_bh(&n->lock);
6505+ neigh_release(n);
developer8cb3ac72022-07-04 10:55:14 +08006506+
developer9fdc0e82023-05-12 14:21:17 +08006507+ if (!(nud_state & NUD_VALID))
6508+ return -1;
6509+ }
developer64db8532023-04-28 13:56:00 +08006510+
developer8cb3ac72022-07-04 10:55:14 +08006511+out:
6512+ return dev_fill_forward_path(dev, ha, stack);
6513+}
6514+
developer9fdc0e82023-05-12 14:21:17 +08006515+static int nf_dev_forward_path(struct sk_buff *skb,
6516+ struct nf_flow_route *route,
developer8cb3ac72022-07-04 10:55:14 +08006517+ const struct nf_conn *ct,
6518+ enum ip_conntrack_dir dir,
6519+ struct net_device **devs)
6520+{
6521+ const struct dst_entry *dst = route->tuple[dir].dst;
developer9fdc0e82023-05-12 14:21:17 +08006522+ struct ethhdr *eth;
6523+ enum ip_conntrack_dir skb_dir;
developer8cb3ac72022-07-04 10:55:14 +08006524+ struct net_device_path_stack stack;
6525+ struct nf_forward_info info = {};
6526+ unsigned char ha[ETH_ALEN];
6527+ int i;
6528+
developer9fdc0e82023-05-12 14:21:17 +08006529+ if (!(ct->status & IPS_NAT_MASK) && skb_mac_header_was_set(skb)) {
6530+ eth = eth_hdr(skb);
6531+ skb_dir = CTINFO2DIR(skb_get_nfct(skb) & NFCT_INFOMASK);
6532+
6533+ if (skb_dir != dir) {
6534+ memcpy(ha, eth->h_source, ETH_ALEN);
6535+ memcpy(info.h_source, eth->h_dest, ETH_ALEN);
6536+ } else {
6537+ memcpy(ha, eth->h_dest, ETH_ALEN);
6538+ memcpy(info.h_source, eth->h_source, ETH_ALEN);
6539+ }
6540+ }
6541+
developer7e533772023-04-27 05:59:30 +08006542+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
developer8cb3ac72022-07-04 10:55:14 +08006543+ nf_dev_path_info(&stack, &info, ha);
6544+
6545+ devs[!dir] = (struct net_device *)info.indev;
6546+ if (!info.indev)
6547+ return -1;
6548+
6549+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
6550+ for (i = 0; i < info.num_encaps; i++) {
6551+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
6552+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
6553+ }
6554+ route->tuple[!dir].in.num_encaps = info.num_encaps;
6555+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
6556+
6557+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
6558+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
6559+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
6560+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
6561+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
6562+ route->tuple[dir].xmit_type = info.xmit_type;
6563+ }
6564+
6565+ return 0;
6566+}
6567+
6568+static int
6569+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
6570+ enum ip_conntrack_dir dir,
6571+ const struct xt_action_param *par, int ifindex,
6572+ struct net_device **devs)
6573+{
6574+ struct dst_entry *dst = NULL;
6575+ struct flowi fl;
6576+
6577+ memset(&fl, 0, sizeof(fl));
6578+ switch (xt_family(par)) {
6579+ case NFPROTO_IPV4:
6580+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
6581+ fl.u.ip4.flowi4_oif = ifindex;
6582+ break;
6583+ case NFPROTO_IPV6:
6584+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6585+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
6586+ fl.u.ip6.flowi6_oif = ifindex;
6587+ break;
6588+ }
6589+
6590+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
6591+ if (!dst)
6592+ return -ENOENT;
6593+
6594+ nf_default_forward_path(route, dst, dir, devs);
6595+
6596+ return 0;
6597+}
6598+
6599+static int
developer480c5d52022-12-28 14:48:14 +08006600+xt_flowoffload_route_nat(struct sk_buff *skb, const struct nf_conn *ct,
6601+ const struct xt_action_param *par,
6602+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6603+ struct net_device **devs)
6604+{
6605+ struct dst_entry *this_dst = skb_dst(skb);
6606+ struct dst_entry *other_dst = NULL;
6607+ struct flowi fl;
6608+
6609+ memset(&fl, 0, sizeof(fl));
6610+ switch (xt_family(par)) {
6611+ case NFPROTO_IPV4:
6612+ fl.u.ip4.daddr = ct->tuplehash[dir].tuple.src.u3.ip;
6613+ fl.u.ip4.flowi4_oif = xt_in(par)->ifindex;
6614+ break;
6615+ case NFPROTO_IPV6:
6616+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6617+ fl.u.ip6.daddr = ct->tuplehash[dir].tuple.src.u3.in6;
6618+ fl.u.ip6.flowi6_oif = xt_in(par)->ifindex;
6619+ break;
6620+ }
6621+
6622+ nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
6623+ if (!other_dst)
6624+ return -ENOENT;
6625+
6626+ nf_default_forward_path(route, this_dst, dir, devs);
6627+ nf_default_forward_path(route, other_dst, !dir, devs);
6628+
developer7e533772023-04-27 05:59:30 +08006629+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer480c5d52022-12-28 14:48:14 +08006630+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006631+ if (nf_dev_forward_path(skb, route, ct, dir, devs))
developer480c5d52022-12-28 14:48:14 +08006632+ return -1;
developer9fdc0e82023-05-12 14:21:17 +08006633+ if (nf_dev_forward_path(skb, route, ct, !dir, devs))
developer480c5d52022-12-28 14:48:14 +08006634+ return -1;
6635+ }
6636+
6637+ return 0;
6638+}
6639+
6640+static int
6641+xt_flowoffload_route_bridge(struct sk_buff *skb, const struct nf_conn *ct,
6642+ const struct xt_action_param *par,
6643+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6644+ struct net_device **devs)
developer8cb3ac72022-07-04 10:55:14 +08006645+{
6646+ int ret;
6647+
6648+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
6649+ devs[dir]->ifindex,
6650+ devs);
6651+ if (ret)
6652+ return ret;
6653+
6654+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
6655+ devs[!dir]->ifindex,
6656+ devs);
6657+ if (ret)
developer67bbcc02022-07-08 09:04:01 +08006658+ goto err_route_dir1;
developer8cb3ac72022-07-04 10:55:14 +08006659+
developer7e533772023-04-27 05:59:30 +08006660+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
developer8cb3ac72022-07-04 10:55:14 +08006661+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer9fdc0e82023-05-12 14:21:17 +08006662+ if (nf_dev_forward_path(skb, route, ct, dir, devs) ||
6663+ nf_dev_forward_path(skb, route, ct, !dir, devs)) {
developer67bbcc02022-07-08 09:04:01 +08006664+ ret = -1;
6665+ goto err_route_dir2;
6666+ }
developer8cb3ac72022-07-04 10:55:14 +08006667+ }
6668+
6669+ return 0;
developer67bbcc02022-07-08 09:04:01 +08006670+
6671+err_route_dir2:
6672+ dst_release(route->tuple[!dir].dst);
6673+err_route_dir1:
6674+ dst_release(route->tuple[dir].dst);
6675+ return ret;
developer8cb3ac72022-07-04 10:55:14 +08006676+}
6677+
6678+static unsigned int
6679+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
6680+{
6681+ struct xt_flowoffload_table *table;
6682+ const struct xt_flowoffload_target_info *info = par->targinfo;
6683+ struct tcphdr _tcph, *tcph = NULL;
6684+ enum ip_conntrack_info ctinfo;
6685+ enum ip_conntrack_dir dir;
6686+ struct nf_flow_route route = {};
6687+ struct flow_offload *flow = NULL;
6688+ struct net_device *devs[2] = {};
6689+ struct nf_conn *ct;
6690+ struct net *net;
6691+
6692+ if (xt_flowoffload_skip(skb, xt_family(par)))
6693+ return XT_CONTINUE;
6694+
6695+ ct = nf_ct_get(skb, &ctinfo);
6696+ if (ct == NULL)
6697+ return XT_CONTINUE;
6698+
6699+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
6700+ case IPPROTO_TCP:
6701+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
6702+ return XT_CONTINUE;
6703+
6704+ tcph = skb_header_pointer(skb, par->thoff,
6705+ sizeof(_tcph), &_tcph);
6706+ if (unlikely(!tcph || tcph->fin || tcph->rst))
6707+ return XT_CONTINUE;
6708+ break;
6709+ case IPPROTO_UDP:
6710+ break;
6711+ default:
6712+ return XT_CONTINUE;
6713+ }
6714+
6715+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
6716+ ct->status & IPS_SEQ_ADJUST)
6717+ return XT_CONTINUE;
6718+
6719+ if (!nf_ct_is_confirmed(ct))
6720+ return XT_CONTINUE;
6721+
6722+ devs[dir] = xt_out(par);
6723+ devs[!dir] = xt_in(par);
6724+
6725+ if (!devs[dir] || !devs[!dir])
6726+ return XT_CONTINUE;
6727+
6728+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
6729+ return XT_CONTINUE;
6730+
6731+ dir = CTINFO2DIR(ctinfo);
6732+
developer480c5d52022-12-28 14:48:14 +08006733+ if (ct->status & IPS_NAT_MASK) {
6734+ if (xt_flowoffload_route_nat(skb, ct, par, &route, dir, devs) < 0)
6735+ goto err_flow_route;
6736+ } else {
6737+ if (xt_flowoffload_route_bridge(skb, ct, par, &route, dir, devs) < 0)
6738+ goto err_flow_route;
6739+ }
developer8cb3ac72022-07-04 10:55:14 +08006740+
6741+ flow = flow_offload_alloc(ct);
6742+ if (!flow)
6743+ goto err_flow_alloc;
6744+
6745+ if (flow_offload_route_init(flow, &route) < 0)
6746+ goto err_flow_add;
6747+
6748+ if (tcph) {
6749+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6750+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6751+ }
6752+
6753+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
6754+
6755+ net = read_pnet(&table->ft.net);
6756+ if (!net)
6757+ write_pnet(&table->ft.net, xt_net(par));
6758+
6759+ if (flow_offload_add(&table->ft, flow) < 0)
6760+ goto err_flow_add;
6761+
6762+ xt_flowoffload_check_device(table, devs[0]);
6763+ xt_flowoffload_check_device(table, devs[1]);
6764+
developer480c5d52022-12-28 14:48:14 +08006765+ if (!(ct->status & IPS_NAT_MASK))
6766+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006767+ dst_release(route.tuple[!dir].dst);
6768+
6769+ return XT_CONTINUE;
6770+
6771+err_flow_add:
6772+ flow_offload_free(flow);
6773+err_flow_alloc:
developer480c5d52022-12-28 14:48:14 +08006774+ if (!(ct->status & IPS_NAT_MASK))
6775+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006776+ dst_release(route.tuple[!dir].dst);
6777+err_flow_route:
6778+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
6779+
6780+ return XT_CONTINUE;
6781+}
6782+
6783+static int flowoffload_chk(const struct xt_tgchk_param *par)
6784+{
6785+ struct xt_flowoffload_target_info *info = par->targinfo;
6786+
6787+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
6788+ return -EINVAL;
6789+
6790+ return 0;
6791+}
6792+
6793+static struct xt_target offload_tg_reg __read_mostly = {
6794+ .family = NFPROTO_UNSPEC,
6795+ .name = "FLOWOFFLOAD",
6796+ .revision = 0,
6797+ .targetsize = sizeof(struct xt_flowoffload_target_info),
6798+ .usersize = sizeof(struct xt_flowoffload_target_info),
6799+ .checkentry = flowoffload_chk,
6800+ .target = flowoffload_tg,
6801+ .me = THIS_MODULE,
6802+};
6803+
6804+static int flow_offload_netdev_event(struct notifier_block *this,
6805+ unsigned long event, void *ptr)
6806+{
6807+ struct xt_flowoffload_hook *hook0, *hook1;
6808+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6809+
6810+ if (event != NETDEV_UNREGISTER)
6811+ return NOTIFY_DONE;
6812+
6813+ spin_lock_bh(&hooks_lock);
6814+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
6815+ if (hook0)
6816+ hlist_del(&hook0->list);
6817+
6818+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
6819+ if (hook1)
6820+ hlist_del(&hook1->list);
6821+ spin_unlock_bh(&hooks_lock);
6822+
6823+ if (hook0) {
6824+ nf_unregister_net_hook(hook0->net, &hook0->ops);
6825+ kfree(hook0);
6826+ }
6827+
6828+ if (hook1) {
6829+ nf_unregister_net_hook(hook1->net, &hook1->ops);
6830+ kfree(hook1);
6831+ }
6832+
6833+ nf_flow_table_cleanup(dev);
6834+
6835+ return NOTIFY_DONE;
6836+}
6837+
6838+static struct notifier_block flow_offload_netdev_notifier = {
6839+ .notifier_call = flow_offload_netdev_event,
6840+};
6841+
6842+static int nf_flow_rule_route_inet(struct net *net,
6843+ const struct flow_offload *flow,
6844+ enum flow_offload_tuple_dir dir,
6845+ struct nf_flow_rule *flow_rule)
6846+{
6847+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
6848+ int err;
6849+
6850+ switch (flow_tuple->l3proto) {
6851+ case NFPROTO_IPV4:
6852+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
6853+ break;
6854+ case NFPROTO_IPV6:
6855+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
6856+ break;
6857+ default:
6858+ err = -1;
6859+ break;
6860+ }
6861+
6862+ return err;
6863+}
6864+
6865+static struct nf_flowtable_type flowtable_inet = {
6866+ .family = NFPROTO_INET,
6867+ .init = nf_flow_table_init,
6868+ .setup = nf_flow_table_offload_setup,
6869+ .action = nf_flow_rule_route_inet,
6870+ .free = nf_flow_table_free,
6871+ .hook = xt_flowoffload_net_hook,
6872+ .owner = THIS_MODULE,
6873+};
6874+
6875+static int init_flowtable(struct xt_flowoffload_table *tbl)
6876+{
6877+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
6878+ tbl->ft.type = &flowtable_inet;
6879+
6880+ return nf_flow_table_init(&tbl->ft);
6881+}
6882+
6883+static int __init xt_flowoffload_tg_init(void)
6884+{
6885+ int ret;
6886+
6887+ register_netdevice_notifier(&flow_offload_netdev_notifier);
6888+
6889+ ret = init_flowtable(&flowtable[0]);
6890+ if (ret)
6891+ return ret;
6892+
6893+ ret = init_flowtable(&flowtable[1]);
6894+ if (ret)
6895+ goto cleanup;
6896+
6897+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
6898+
6899+ ret = xt_register_target(&offload_tg_reg);
6900+ if (ret)
6901+ goto cleanup2;
6902+
6903+ return 0;
6904+
6905+cleanup2:
6906+ nf_flow_table_free(&flowtable[1].ft);
6907+cleanup:
6908+ nf_flow_table_free(&flowtable[0].ft);
6909+ return ret;
6910+}
6911+
6912+static void __exit xt_flowoffload_tg_exit(void)
6913+{
6914+ xt_unregister_target(&offload_tg_reg);
6915+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
6916+ nf_flow_table_free(&flowtable[0].ft);
6917+ nf_flow_table_free(&flowtable[1].ft);
6918+}
6919+
6920+MODULE_LICENSE("GPL");
6921+module_init(xt_flowoffload_tg_init);
6922+module_exit(xt_flowoffload_tg_exit);
6923--
69242.18.0
6925