blob: 9b4cd671bd42c1e5865727711a079ecc7a17a147 [file] [log] [blame]
developer8cb3ac72022-07-04 10:55:14 +08001From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001
2From: Bo Jiao <Bo.Jiao@mediatek.com>
3Date: Wed, 22 Jun 2022 09:42:19 +0800
4Subject: [PATCH 1/8]
5 9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2
6
7---
8 drivers/net/ethernet/mediatek/Makefile | 3 +-
9 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 28 +-
10 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 20 +-
11 drivers/net/ethernet/mediatek/mtk_ppe.c | 509 +++++++
12 drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
13 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
14 .../net/ethernet/mediatek/mtk_ppe_offload.c | 526 ++++++++
15 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
16 drivers/net/ppp/ppp_generic.c | 22 +
17 drivers/net/ppp/pppoe.c | 24 +
18 include/linux/netdevice.h | 60 +
19 include/linux/ppp_channel.h | 3 +
20 include/net/dsa.h | 10 +
21 include/net/flow_offload.h | 4 +
22 include/net/ip6_route.h | 5 +-
23 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
24 include/net/netfilter/nf_conntrack.h | 12 +
25 include/net/netfilter/nf_conntrack_acct.h | 11 +
26 include/net/netfilter/nf_flow_table.h | 264 +++-
27 include/net/netns/conntrack.h | 6 +
28 .../linux/netfilter/nf_conntrack_common.h | 9 +-
29 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
30 net/8021q/vlan_dev.c | 21 +
31 net/bridge/br_device.c | 49 +
32 net/bridge/br_private.h | 20 +
33 net/bridge/br_vlan.c | 55 +
34 net/core/dev.c | 46 +
35 net/dsa/dsa.c | 9 +
36 net/dsa/slave.c | 41 +-
37 net/ipv4/netfilter/Kconfig | 4 +-
38 net/ipv6/ip6_output.c | 2 +-
39 net/ipv6/netfilter/Kconfig | 3 +-
40 net/ipv6/route.c | 22 +-
41 net/netfilter/Kconfig | 14 +-
42 net/netfilter/Makefile | 4 +-
43 net/netfilter/nf_conntrack_core.c | 20 +-
44 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
45 net/netfilter/nf_conntrack_proto_udp.c | 4 +
46 net/netfilter/nf_conntrack_standalone.c | 34 +-
47 net/netfilter/nf_flow_table_core.c | 446 +++---
48 net/netfilter/nf_flow_table_ip.c | 455 ++++---
49 net/netfilter/nf_flow_table_offload.c | 1191 +++++++++++++++++
50 net/netfilter/xt_FLOWOFFLOAD.c | 719 ++++++++++
51 43 files changed, 4913 insertions(+), 432 deletions(-)
52 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
53 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
54 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
55 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
56 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
57 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
58 create mode 100644 net/netfilter/nf_flow_table_offload.c
59 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
60
61diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
62index 13c5b4e8f..0a6af99f1 100755
63--- a/drivers/net/ethernet/mediatek/Makefile
64+++ b/drivers/net/ethernet/mediatek/Makefile
65@@ -4,5 +4,6 @@
66 #
67
68 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
developer68838542022-10-03 23:42:21 +080069-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
70+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_usxgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
developer8cb3ac72022-07-04 10:55:14 +080071+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
72 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
73diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
74index 2b21f7ed0..819d8a0be 100755
75--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
76+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
77@@ -2654,12 +2654,17 @@ static int mtk_open(struct net_device *dev)
78
79 /* we run 2 netdevs on the same dma ring so we only bring it up once */
80 if (!refcount_read(&eth->dma_refcnt)) {
81- int err = mtk_start_dma(eth);
82+ u32 gdm_config = MTK_GDMA_TO_PDMA;
83+ int err;
84
85+ err = mtk_start_dma(eth);
86 if (err)
87 return err;
88
89- mtk_gdm_config(eth, MTK_GDMA_TO_PDMA);
90+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
91+ gdm_config = MTK_GDMA_TO_PPE;
92+
93+ mtk_gdm_config(eth, gdm_config);
94
95 /* Indicates CDM to parse the MTK special tag from CPU */
96 if (netdev_uses_dsa(dev)) {
97@@ -2772,6 +2777,9 @@ static int mtk_stop(struct net_device *dev)
98
99 mtk_dma_free(eth);
100
101+ if (eth->soc->offload_version)
102+ mtk_ppe_stop(&eth->ppe);
103+
104 return 0;
105 }
106
107@@ -3391,6 +3399,7 @@ static const struct net_device_ops mtk_netdev_ops = {
108 #ifdef CONFIG_NET_POLL_CONTROLLER
109 .ndo_poll_controller = mtk_poll_controller,
110 #endif
111+ .ndo_setup_tc = mtk_eth_setup_tc,
112 };
113
114 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
115@@ -3682,6 +3691,17 @@ static int mtk_probe(struct platform_device *pdev)
116 goto err_free_dev;
117 }
118
119+ if (eth->soc->offload_version) {
120+ err = mtk_ppe_init(&eth->ppe, eth->dev,
121+ eth->base + MTK_ETH_PPE_BASE, 2);
122+ if (err)
123+ goto err_free_dev;
124+
125+ err = mtk_eth_offload_init(eth);
126+ if (err)
127+ goto err_free_dev;
128+ }
129+
130 for (i = 0; i < MTK_MAX_DEVS; i++) {
131 if (!eth->netdev[i])
132 continue;
developer0c6c5252022-07-12 11:59:21 +0800133@@ -3781,12 +3801,13 @@ static const struct mtk_soc_data mt2701_data = {
developer8cb3ac72022-07-04 10:55:14 +0800134 .required_clks = MT7623_CLKS_BITMAP,
135 .required_pctl = true,
136 .has_sram = false,
137+ .offload_version = 2,
developer0c6c5252022-07-12 11:59:21 +0800138 .txrx = {
139 .txd_size = sizeof(struct mtk_tx_dma),
140 .rxd_size = sizeof(struct mtk_rx_dma),
141 .dma_max_len = MTK_TX_DMA_BUF_LEN,
142 .dma_len_offset = MTK_TX_DMA_BUF_SHIFT,
143 },
developer8cb3ac72022-07-04 10:55:14 +0800144 };
145
146 static const struct mtk_soc_data mt7621_data = {
developer0c6c5252022-07-12 11:59:21 +0800147@@ -3789,12 +3810,13 @@ static const struct mtk_soc_data mt7621_data = {
developer8cb3ac72022-07-04 10:55:14 +0800148 .required_clks = MT7621_CLKS_BITMAP,
149 .required_pctl = false,
150 .has_sram = false,
151+ .offload_version = 2,
developer0c6c5252022-07-12 11:59:21 +0800152 .txrx = {
153 .txd_size = sizeof(struct mtk_tx_dma),
154 .rxd_size = sizeof(struct mtk_rx_dma),
155 .dma_max_len = MTK_TX_DMA_BUF_LEN,
156 .dma_len_offset = MTK_TX_DMA_BUF_SHIFT,
157 },
developer8cb3ac72022-07-04 10:55:14 +0800158 };
159
160 static const struct mtk_soc_data mt7622_data = {
developer0c6c5252022-07-12 11:59:21 +0800161@@ -3798,12 +3820,13 @@ static const struct mtk_soc_data mt7622_data = {
developer8cb3ac72022-07-04 10:55:14 +0800162 .required_clks = MT7622_CLKS_BITMAP,
163 .required_pctl = false,
164 .has_sram = false,
165+ .offload_version = 2,
developer0c6c5252022-07-12 11:59:21 +0800166 .txrx = {
167 .txd_size = sizeof(struct mtk_tx_dma),
168 .rxd_size = sizeof(struct mtk_rx_dma),
169 .dma_max_len = MTK_TX_DMA_BUF_LEN,
170 .dma_len_offset = MTK_TX_DMA_BUF_SHIFT,
171 },
developer8cb3ac72022-07-04 10:55:14 +0800172 };
173
174 static const struct mtk_soc_data mt7623_data = {
developer0c6c5252022-07-12 11:59:21 +0800175@@ -3806,12 +3829,13 @@ static const struct mtk_soc_data mt7623_data = {
developer8cb3ac72022-07-04 10:55:14 +0800176 .required_clks = MT7623_CLKS_BITMAP,
177 .required_pctl = true,
178 .has_sram = false,
179+ .offload_version = 2,
developer0c6c5252022-07-12 11:59:21 +0800180 .txrx = {
181 .txd_size = sizeof(struct mtk_tx_dma),
182 .rxd_size = sizeof(struct mtk_rx_dma),
183 .dma_max_len = MTK_TX_DMA_BUF_LEN,
184 .dma_len_offset = MTK_TX_DMA_BUF_SHIFT,
185 },
developer8cb3ac72022-07-04 10:55:14 +0800186 };
187
188 static const struct mtk_soc_data mt7629_data = {
189diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
190index b6380ffeb..349f98503 100755
191--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
192+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
193@@ -15,6 +15,8 @@
194 #include <linux/u64_stats_sync.h>
195 #include <linux/refcount.h>
196 #include <linux/phylink.h>
197+#include <linux/rhashtable.h>
198+#include "mtk_ppe.h"
199
200 #define MTK_QDMA_PAGE_SIZE 2048
201 #define MTK_MAX_RX_LENGTH 1536
202@@ -37,7 +39,8 @@
203 NETIF_F_HW_VLAN_CTAG_TX | \
204 NETIF_F_SG | NETIF_F_TSO | \
205 NETIF_F_TSO6 | \
206- NETIF_F_IPV6_CSUM)
207+ NETIF_F_IPV6_CSUM |\
208+ NETIF_F_HW_TC)
209 #define MTK_SET_FEATURES (NETIF_F_LRO | \
210 NETIF_F_HW_VLAN_CTAG_RX)
211 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
212@@ -107,6 +110,7 @@
213 #define MTK_GDMA_TCS_EN BIT(21)
214 #define MTK_GDMA_UCS_EN BIT(20)
215 #define MTK_GDMA_TO_PDMA 0x0
216+#define MTK_GDMA_TO_PPE 0x4444
217 #define MTK_GDMA_DROP_ALL 0x7777
218
219 /* Unicast Filter MAC Address Register - Low */
220@@ -547,6 +551,12 @@
221 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
222 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
223
224+/* QDMA descriptor rxd4 */
225+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
226+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
227+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
228+#define MTK_RXD4_ALG GENMASK(31, 22)
229+
230 /* QDMA descriptor rxd4 */
231 #define RX_DMA_L4_VALID BIT(24)
232 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
233@@ -1158,6 +1168,7 @@ struct mtk_soc_data {
234 u32 caps;
235 u32 required_clks;
236 bool required_pctl;
237+ u8 offload_version;
238 netdev_features_t hw_features;
239 bool has_sram;
240 };
241@@ -1271,6 +1282,9 @@ struct mtk_eth {
242 int ip_align;
243 spinlock_t syscfg0_lock;
244 struct timer_list mtk_dma_monitor_timer;
245+
246+ struct mtk_ppe ppe;
247+ struct rhashtable flow_table;
248 };
249
250 /* struct mtk_mac - the structure that holds the info about the MACs of the
251@@ -1319,4 +1333,8 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
252 void mtk_gdm_config(struct mtk_eth *eth, u32 config);
253 void ethsys_reset(struct mtk_eth *eth, u32 reset_bits);
254
255+int mtk_eth_offload_init(struct mtk_eth *eth);
256+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
257+ void *type_data);
258+
developer68838542022-10-03 23:42:21 +0800259 int mtk_mac2xgmii_id(struct mtk_eth *eth, int mac_id);
developer8cb3ac72022-07-04 10:55:14 +0800260diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
261new file mode 100644
262index 000000000..66298e223
263--- /dev/null
264+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
265@@ -0,0 +1,509 @@
266+// SPDX-License-Identifier: GPL-2.0-only
267+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
268+
269+#include <linux/kernel.h>
270+#include <linux/io.h>
271+#include <linux/iopoll.h>
272+#include <linux/etherdevice.h>
273+#include <linux/platform_device.h>
274+#include "mtk_ppe.h"
275+#include "mtk_ppe_regs.h"
276+
277+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
278+{
279+ writel(val, ppe->base + reg);
280+}
281+
282+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
283+{
284+ return readl(ppe->base + reg);
285+}
286+
287+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
288+{
289+ u32 val;
290+
291+ val = ppe_r32(ppe, reg);
292+ val &= ~mask;
293+ val |= set;
294+ ppe_w32(ppe, reg, val);
295+
296+ return val;
297+}
298+
299+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
300+{
301+ return ppe_m32(ppe, reg, 0, val);
302+}
303+
304+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
305+{
306+ return ppe_m32(ppe, reg, val, 0);
307+}
308+
309+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
310+{
311+ int ret;
312+ u32 val;
313+
314+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
315+ !(val & MTK_PPE_GLO_CFG_BUSY),
316+ 20, MTK_PPE_WAIT_TIMEOUT_US);
317+
318+ if (ret)
319+ dev_err(ppe->dev, "PPE table busy");
320+
321+ return ret;
322+}
323+
324+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
325+{
326+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
327+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
328+}
329+
330+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
331+{
332+ mtk_ppe_cache_clear(ppe);
333+
334+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
335+ enable * MTK_PPE_CACHE_CTL_EN);
336+}
337+
338+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
339+{
340+ u32 hv1, hv2, hv3;
341+ u32 hash;
342+
343+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
344+ case MTK_PPE_PKT_TYPE_BRIDGE:
345+ hv1 = e->bridge.src_mac_lo;
346+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
347+ hv2 = e->bridge.src_mac_hi >> 16;
348+ hv2 ^= e->bridge.dest_mac_lo;
349+ hv3 = e->bridge.dest_mac_hi;
350+ break;
351+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
352+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
353+ hv1 = e->ipv4.orig.ports;
354+ hv2 = e->ipv4.orig.dest_ip;
355+ hv3 = e->ipv4.orig.src_ip;
356+ break;
357+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
358+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
359+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
360+ hv1 ^= e->ipv6.ports;
361+
362+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
363+ hv2 ^= e->ipv6.dest_ip[0];
364+
365+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
366+ hv3 ^= e->ipv6.src_ip[0];
367+ break;
368+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
369+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
370+ default:
371+ WARN_ON_ONCE(1);
372+ return MTK_PPE_HASH_MASK;
373+ }
374+
375+ hash = (hv1 & hv2) | ((~hv1) & hv3);
376+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
377+ hash ^= hv1 ^ hv2 ^ hv3;
378+ hash ^= hash >> 16;
379+ hash <<= 1;
380+ hash &= MTK_PPE_ENTRIES - 1;
381+
382+ return hash;
383+}
384+
385+static inline struct mtk_foe_mac_info *
386+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
387+{
388+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
389+
390+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
391+ return &entry->ipv6.l2;
392+
393+ return &entry->ipv4.l2;
394+}
395+
396+static inline u32 *
397+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
398+{
399+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
400+
401+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
402+ return &entry->ipv6.ib2;
403+
404+ return &entry->ipv4.ib2;
405+}
406+
407+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
408+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
409+{
410+ struct mtk_foe_mac_info *l2;
411+ u32 ports_pad, val;
412+
413+ memset(entry, 0, sizeof(*entry));
414+
415+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
416+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
417+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
418+ MTK_FOE_IB1_BIND_TTL |
419+ MTK_FOE_IB1_BIND_CACHE;
420+ entry->ib1 = val;
421+
422+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
423+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
424+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
425+
426+ if (is_multicast_ether_addr(dest_mac))
427+ val |= MTK_FOE_IB2_MULTICAST;
428+
429+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
430+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
431+ entry->ipv4.orig.ports = ports_pad;
432+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
433+ entry->ipv6.ports = ports_pad;
434+
435+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
436+ entry->ipv6.ib2 = val;
437+ l2 = &entry->ipv6.l2;
438+ } else {
439+ entry->ipv4.ib2 = val;
440+ l2 = &entry->ipv4.l2;
441+ }
442+
443+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
444+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
445+ l2->src_mac_hi = get_unaligned_be32(src_mac);
446+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
447+
448+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
449+ l2->etype = ETH_P_IPV6;
450+ else
451+ l2->etype = ETH_P_IP;
452+
453+ return 0;
454+}
455+
456+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
457+{
458+ u32 *ib2 = mtk_foe_entry_ib2(entry);
459+ u32 val;
460+
461+ val = *ib2;
462+ val &= ~MTK_FOE_IB2_DEST_PORT;
463+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
464+ *ib2 = val;
465+
466+ return 0;
467+}
468+
469+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
470+ __be32 src_addr, __be16 src_port,
471+ __be32 dest_addr, __be16 dest_port)
472+{
473+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
474+ struct mtk_ipv4_tuple *t;
475+
476+ switch (type) {
477+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
478+ if (egress) {
479+ t = &entry->ipv4.new;
480+ break;
481+ }
482+ fallthrough;
483+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
484+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
485+ t = &entry->ipv4.orig;
486+ break;
487+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
488+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
489+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
490+ return 0;
491+ default:
492+ WARN_ON_ONCE(1);
493+ return -EINVAL;
494+ }
495+
496+ t->src_ip = be32_to_cpu(src_addr);
497+ t->dest_ip = be32_to_cpu(dest_addr);
498+
499+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
500+ return 0;
501+
502+ t->src_port = be16_to_cpu(src_port);
503+ t->dest_port = be16_to_cpu(dest_port);
504+
505+ return 0;
506+}
507+
508+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
509+ __be32 *src_addr, __be16 src_port,
510+ __be32 *dest_addr, __be16 dest_port)
511+{
512+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
513+ u32 *src, *dest;
514+ int i;
515+
516+ switch (type) {
517+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
518+ src = entry->dslite.tunnel_src_ip;
519+ dest = entry->dslite.tunnel_dest_ip;
520+ break;
521+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
522+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
523+ entry->ipv6.src_port = be16_to_cpu(src_port);
524+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
525+ fallthrough;
526+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
527+ src = entry->ipv6.src_ip;
528+ dest = entry->ipv6.dest_ip;
529+ break;
530+ default:
531+ WARN_ON_ONCE(1);
532+ return -EINVAL;
533+ }
534+
535+ for (i = 0; i < 4; i++)
536+ src[i] = be32_to_cpu(src_addr[i]);
537+ for (i = 0; i < 4; i++)
538+ dest[i] = be32_to_cpu(dest_addr[i]);
539+
540+ return 0;
541+}
542+
543+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
544+{
545+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
546+
547+ l2->etype = BIT(port);
548+
549+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
550+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
551+ else
552+ l2->etype |= BIT(8);
553+
554+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
555+
556+ return 0;
557+}
558+
559+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
560+{
561+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
562+
563+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
564+ case 0:
565+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
566+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
567+ l2->vlan1 = vid;
568+ return 0;
569+ case 1:
570+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
571+ l2->vlan1 = vid;
572+ l2->etype |= BIT(8);
573+ } else {
574+ l2->vlan2 = vid;
575+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
576+ }
577+ return 0;
578+ default:
579+ return -ENOSPC;
580+ }
581+}
582+
583+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
584+{
585+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
586+
587+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
588+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
589+ l2->etype = ETH_P_PPP_SES;
590+
591+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
592+ l2->pppoe_id = sid;
593+
594+ return 0;
595+}
596+
597+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
598+{
599+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
600+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
601+}
602+
603+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
604+ u16 timestamp)
605+{
606+ struct mtk_foe_entry *hwe;
607+ u32 hash;
608+
609+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
610+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
611+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
612+
613+ hash = mtk_ppe_hash_entry(entry);
614+ hwe = &ppe->foe_table[hash];
615+ if (!mtk_foe_entry_usable(hwe)) {
616+ hwe++;
617+ hash++;
618+
619+ if (!mtk_foe_entry_usable(hwe))
620+ return -ENOSPC;
621+ }
622+
623+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
624+ wmb();
625+ hwe->ib1 = entry->ib1;
626+
627+ dma_wmb();
628+
629+ mtk_ppe_cache_clear(ppe);
630+
631+ return hash;
632+}
633+
634+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
635+ int version)
636+{
637+ struct mtk_foe_entry *foe;
638+
639+ /* need to allocate a separate device, since it PPE DMA access is
640+ * not coherent.
641+ */
642+ ppe->base = base;
643+ ppe->dev = dev;
644+ ppe->version = version;
645+
646+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
647+ &ppe->foe_phys, GFP_KERNEL);
648+ if (!foe)
649+ return -ENOMEM;
650+
651+ ppe->foe_table = foe;
652+
653+ mtk_ppe_debugfs_init(ppe);
654+
655+ return 0;
656+}
657+
658+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
659+{
660+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
661+ int i, k;
662+
663+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
664+
665+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
666+ return;
667+
668+ /* skip all entries that cross the 1024 byte boundary */
669+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
670+ for (k = 0; k < ARRAY_SIZE(skip); k++)
671+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
672+}
673+
674+int mtk_ppe_start(struct mtk_ppe *ppe)
675+{
676+ u32 val;
677+
678+ mtk_ppe_init_foe_table(ppe);
679+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
680+
681+ val = MTK_PPE_TB_CFG_ENTRY_80B |
682+ MTK_PPE_TB_CFG_AGE_NON_L4 |
683+ MTK_PPE_TB_CFG_AGE_UNBIND |
684+ MTK_PPE_TB_CFG_AGE_TCP |
685+ MTK_PPE_TB_CFG_AGE_UDP |
686+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
687+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
688+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
689+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
690+ MTK_PPE_KEEPALIVE_DISABLE) |
691+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
692+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
693+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
694+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
695+ MTK_PPE_ENTRIES_SHIFT);
696+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
697+
698+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
699+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
700+
701+ mtk_ppe_cache_enable(ppe, true);
702+
703+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
704+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
705+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
706+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
707+ MTK_PPE_FLOW_CFG_IP6_6RD |
708+ MTK_PPE_FLOW_CFG_IP4_NAT |
709+ MTK_PPE_FLOW_CFG_IP4_NAPT |
710+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
711+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
712+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
713+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
714+
715+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
716+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
717+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
718+
719+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) |
720+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
721+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
722+
723+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
724+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7);
725+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
726+
727+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
728+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
729+
730+ val = MTK_PPE_BIND_LIMIT1_FULL |
731+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
732+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
733+
734+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
735+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
736+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
737+
738+ /* enable PPE */
739+ val = MTK_PPE_GLO_CFG_EN |
740+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
741+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
742+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
743+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
744+
745+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
746+
747+ return 0;
748+}
749+
750+int mtk_ppe_stop(struct mtk_ppe *ppe)
751+{
752+ u32 val;
753+ int i;
754+
755+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
756+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
757+ MTK_FOE_STATE_INVALID);
758+
759+ mtk_ppe_cache_enable(ppe, false);
760+
761+ /* disable offload engine */
762+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
763+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
764+
765+ /* disable aging */
766+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
767+ MTK_PPE_TB_CFG_AGE_UNBIND |
768+ MTK_PPE_TB_CFG_AGE_TCP |
769+ MTK_PPE_TB_CFG_AGE_UDP |
770+ MTK_PPE_TB_CFG_AGE_TCP_FIN;
771+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
772+
773+ return mtk_ppe_wait_busy(ppe);
774+}
775diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
776new file mode 100644
777index 000000000..242fb8f2a
778--- /dev/null
779+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
780@@ -0,0 +1,288 @@
781+// SPDX-License-Identifier: GPL-2.0-only
782+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
783+
784+#ifndef __MTK_PPE_H
785+#define __MTK_PPE_H
786+
787+#include <linux/kernel.h>
788+#include <linux/bitfield.h>
789+
790+#define MTK_ETH_PPE_BASE 0xc00
791+
792+#define MTK_PPE_ENTRIES_SHIFT 3
793+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
794+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
795+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
796+
797+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
798+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
799+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
800+
801+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
802+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
803+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
804+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
805+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
806+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
807+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
808+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
809+#define MTK_FOE_IB1_BIND_TTL BIT(24)
810+
811+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
812+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
813+#define MTK_FOE_IB1_UDP BIT(30)
814+#define MTK_FOE_IB1_STATIC BIT(31)
815+
816+enum {
817+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
818+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
819+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
820+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
821+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
822+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
823+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
824+};
825+
826+#define MTK_FOE_IB2_QID GENMASK(3, 0)
827+#define MTK_FOE_IB2_PSE_QOS BIT(4)
828+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
829+#define MTK_FOE_IB2_MULTICAST BIT(8)
830+
831+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
832+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
833+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
834+
835+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
836+
837+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
838+
839+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
840+
841+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
842+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
843+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
844+
845+enum {
846+ MTK_FOE_STATE_INVALID,
847+ MTK_FOE_STATE_UNBIND,
848+ MTK_FOE_STATE_BIND,
849+ MTK_FOE_STATE_FIN
850+};
851+
852+struct mtk_foe_mac_info {
853+ u16 vlan1;
854+ u16 etype;
855+
856+ u32 dest_mac_hi;
857+
858+ u16 vlan2;
859+ u16 dest_mac_lo;
860+
861+ u32 src_mac_hi;
862+
863+ u16 pppoe_id;
864+ u16 src_mac_lo;
865+};
866+
867+struct mtk_foe_bridge {
868+ u32 dest_mac_hi;
869+
870+ u16 src_mac_lo;
871+ u16 dest_mac_lo;
872+
873+ u32 src_mac_hi;
874+
875+ u32 ib2;
876+
877+ u32 _rsv[5];
878+
879+ u32 udf_tsid;
880+ struct mtk_foe_mac_info l2;
881+};
882+
883+struct mtk_ipv4_tuple {
884+ u32 src_ip;
885+ u32 dest_ip;
886+ union {
887+ struct {
888+ u16 dest_port;
889+ u16 src_port;
890+ };
891+ struct {
892+ u8 protocol;
893+ u8 _pad[3]; /* fill with 0xa5a5a5 */
894+ };
895+ u32 ports;
896+ };
897+};
898+
899+struct mtk_foe_ipv4 {
900+ struct mtk_ipv4_tuple orig;
901+
902+ u32 ib2;
903+
904+ struct mtk_ipv4_tuple new;
905+
906+ u16 timestamp;
907+ u16 _rsv0[3];
908+
909+ u32 udf_tsid;
910+
911+ struct mtk_foe_mac_info l2;
912+};
913+
914+struct mtk_foe_ipv4_dslite {
915+ struct mtk_ipv4_tuple ip4;
916+
917+ u32 tunnel_src_ip[4];
918+ u32 tunnel_dest_ip[4];
919+
920+ u8 flow_label[3];
921+ u8 priority;
922+
923+ u32 udf_tsid;
924+
925+ u32 ib2;
926+
927+ struct mtk_foe_mac_info l2;
928+};
929+
930+struct mtk_foe_ipv6 {
931+ u32 src_ip[4];
932+ u32 dest_ip[4];
933+
934+ union {
935+ struct {
936+ u8 protocol;
937+ u8 _pad[3]; /* fill with 0xa5a5a5 */
938+ }; /* 3-tuple */
939+ struct {
940+ u16 dest_port;
941+ u16 src_port;
942+ }; /* 5-tuple */
943+ u32 ports;
944+ };
945+
946+ u32 _rsv[3];
947+
948+ u32 udf;
949+
950+ u32 ib2;
951+ struct mtk_foe_mac_info l2;
952+};
953+
954+struct mtk_foe_ipv6_6rd {
955+ u32 src_ip[4];
956+ u32 dest_ip[4];
957+ u16 dest_port;
958+ u16 src_port;
959+
960+ u32 tunnel_src_ip;
961+ u32 tunnel_dest_ip;
962+
963+ u16 hdr_csum;
964+ u8 dscp;
965+ u8 ttl;
966+
967+ u8 flag;
968+ u8 pad;
969+ u8 per_flow_6rd_id;
970+ u8 pad2;
971+
972+ u32 ib2;
973+ struct mtk_foe_mac_info l2;
974+};
975+
976+struct mtk_foe_entry {
977+ u32 ib1;
978+
979+ union {
980+ struct mtk_foe_bridge bridge;
981+ struct mtk_foe_ipv4 ipv4;
982+ struct mtk_foe_ipv4_dslite dslite;
983+ struct mtk_foe_ipv6 ipv6;
984+ struct mtk_foe_ipv6_6rd ipv6_6rd;
985+ u32 data[19];
986+ };
987+};
988+
989+enum {
990+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
991+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
992+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
993+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
994+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
995+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
996+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
997+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
998+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
999+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
1000+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
1001+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
1002+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
1003+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
1004+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
1005+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
1006+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
1007+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
1008+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
1009+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
1010+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
1011+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
1012+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
1013+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
1014+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
1015+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
1016+};
1017+
1018+struct mtk_ppe {
1019+ struct device *dev;
1020+ void __iomem *base;
1021+ int version;
1022+
1023+ struct mtk_foe_entry *foe_table;
1024+ dma_addr_t foe_phys;
1025+
1026+ void *acct_table;
1027+};
1028+
1029+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
1030+ int version);
1031+int mtk_ppe_start(struct mtk_ppe *ppe);
1032+int mtk_ppe_stop(struct mtk_ppe *ppe);
1033+
1034+static inline void
1035+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
1036+{
1037+ ppe->foe_table[hash].ib1 = 0;
1038+ dma_wmb();
1039+}
1040+
1041+static inline int
1042+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
1043+{
1044+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
1045+
1046+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
1047+ return -1;
1048+
1049+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
1050+}
1051+
1052+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
1053+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
1054+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
1055+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
1056+ __be32 src_addr, __be16 src_port,
1057+ __be32 dest_addr, __be16 dest_port);
1058+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
1059+ __be32 *src_addr, __be16 src_port,
1060+ __be32 *dest_addr, __be16 dest_port);
1061+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
1062+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
1063+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
1064+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1065+ u16 timestamp);
1066+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
1067+
1068+#endif
1069diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1070new file mode 100644
1071index 000000000..d4b482340
1072--- /dev/null
1073+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1074@@ -0,0 +1,214 @@
1075+// SPDX-License-Identifier: GPL-2.0-only
1076+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1077+
1078+#include <linux/kernel.h>
1079+#include <linux/debugfs.h>
1080+#include "mtk_eth_soc.h"
1081+
1082+struct mtk_flow_addr_info
1083+{
1084+ void *src, *dest;
1085+ u16 *src_port, *dest_port;
1086+ bool ipv6;
1087+};
1088+
1089+static const char *mtk_foe_entry_state_str(int state)
1090+{
1091+ static const char * const state_str[] = {
1092+ [MTK_FOE_STATE_INVALID] = "INV",
1093+ [MTK_FOE_STATE_UNBIND] = "UNB",
1094+ [MTK_FOE_STATE_BIND] = "BND",
1095+ [MTK_FOE_STATE_FIN] = "FIN",
1096+ };
1097+
1098+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1099+ return "UNK";
1100+
1101+ return state_str[state];
1102+}
1103+
1104+static const char *mtk_foe_pkt_type_str(int type)
1105+{
1106+ static const char * const type_str[] = {
1107+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1108+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
1109+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
1110+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
1111+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
1112+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
1113+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
1114+ };
1115+
1116+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
1117+ return "UNKNOWN";
1118+
1119+ return type_str[type];
1120+}
1121+
1122+static void
1123+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
1124+{
1125+ u32 n_addr[4];
1126+ int i;
1127+
1128+ if (!ipv6) {
1129+ seq_printf(m, "%pI4h", addr);
1130+ return;
1131+ }
1132+
1133+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
1134+ n_addr[i] = htonl(addr[i]);
1135+ seq_printf(m, "%pI6", n_addr);
1136+}
1137+
1138+static void
1139+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
1140+{
1141+ mtk_print_addr(m, ai->src, ai->ipv6);
1142+ if (ai->src_port)
1143+ seq_printf(m, ":%d", *ai->src_port);
1144+ seq_printf(m, "->");
1145+ mtk_print_addr(m, ai->dest, ai->ipv6);
1146+ if (ai->dest_port)
1147+ seq_printf(m, ":%d", *ai->dest_port);
1148+}
1149+
1150+static int
1151+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
1152+{
1153+ struct mtk_ppe *ppe = m->private;
1154+ int i;
1155+
1156+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
1157+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
1158+ struct mtk_foe_mac_info *l2;
1159+ struct mtk_flow_addr_info ai = {};
1160+ unsigned char h_source[ETH_ALEN];
1161+ unsigned char h_dest[ETH_ALEN];
1162+ int type, state;
1163+ u32 ib2;
1164+
1165+
1166+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
1167+ if (!state)
1168+ continue;
1169+
1170+ if (bind && state != MTK_FOE_STATE_BIND)
1171+ continue;
1172+
1173+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
1174+ seq_printf(m, "%05x %s %7s", i,
1175+ mtk_foe_entry_state_str(state),
1176+ mtk_foe_pkt_type_str(type));
1177+
1178+ switch (type) {
1179+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1180+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1181+ ai.src_port = &entry->ipv4.orig.src_port;
1182+ ai.dest_port = &entry->ipv4.orig.dest_port;
1183+ fallthrough;
1184+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1185+ ai.src = &entry->ipv4.orig.src_ip;
1186+ ai.dest = &entry->ipv4.orig.dest_ip;
1187+ break;
1188+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
1189+ ai.src_port = &entry->ipv6.src_port;
1190+ ai.dest_port = &entry->ipv6.dest_port;
1191+ fallthrough;
1192+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
1193+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
1194+ ai.src = &entry->ipv6.src_ip;
1195+ ai.dest = &entry->ipv6.dest_ip;
1196+ ai.ipv6 = true;
1197+ break;
1198+ }
1199+
1200+ seq_printf(m, " orig=");
1201+ mtk_print_addr_info(m, &ai);
1202+
1203+ switch (type) {
1204+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1205+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1206+ ai.src_port = &entry->ipv4.new.src_port;
1207+ ai.dest_port = &entry->ipv4.new.dest_port;
1208+ fallthrough;
1209+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1210+ ai.src = &entry->ipv4.new.src_ip;
1211+ ai.dest = &entry->ipv4.new.dest_ip;
1212+ seq_printf(m, " new=");
1213+ mtk_print_addr_info(m, &ai);
1214+ break;
1215+ }
1216+
1217+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
1218+ l2 = &entry->ipv6.l2;
1219+ ib2 = entry->ipv6.ib2;
1220+ } else {
1221+ l2 = &entry->ipv4.l2;
1222+ ib2 = entry->ipv4.ib2;
1223+ }
1224+
1225+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
1226+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
1227+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
1228+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
1229+
1230+ seq_printf(m, " eth=%pM->%pM etype=%04x"
1231+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
1232+ h_source, h_dest, ntohs(l2->etype),
1233+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
1234+ }
1235+
1236+ return 0;
1237+}
1238+
1239+static int
1240+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
1241+{
1242+ return mtk_ppe_debugfs_foe_show(m, private, false);
1243+}
1244+
1245+static int
1246+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
1247+{
1248+ return mtk_ppe_debugfs_foe_show(m, private, true);
1249+}
1250+
1251+static int
1252+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
1253+{
1254+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
1255+ inode->i_private);
1256+}
1257+
1258+static int
1259+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
1260+{
1261+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
1262+ inode->i_private);
1263+}
1264+
1265+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
1266+{
1267+ static const struct file_operations fops_all = {
1268+ .open = mtk_ppe_debugfs_foe_open_all,
1269+ .read = seq_read,
1270+ .llseek = seq_lseek,
1271+ .release = single_release,
1272+ };
1273+
1274+ static const struct file_operations fops_bind = {
1275+ .open = mtk_ppe_debugfs_foe_open_bind,
1276+ .read = seq_read,
1277+ .llseek = seq_lseek,
1278+ .release = single_release,
1279+ };
1280+
1281+ struct dentry *root;
1282+
1283+ root = debugfs_create_dir("mtk_ppe", NULL);
1284+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
1285+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
1286+
1287+ return 0;
1288+}
1289diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1290new file mode 100644
1291index 000000000..4294f0c74
1292--- /dev/null
1293+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
developer207b39d2022-10-07 15:57:16 +08001294@@ -0,0 +1,541 @@
developer8cb3ac72022-07-04 10:55:14 +08001295+// SPDX-License-Identifier: GPL-2.0-only
1296+/*
1297+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
1298+ */
1299+
1300+#include <linux/if_ether.h>
1301+#include <linux/rhashtable.h>
1302+#include <linux/ip.h>
1303+#include <linux/ipv6.h>
1304+#include <net/flow_offload.h>
1305+#include <net/pkt_cls.h>
1306+#include <net/dsa.h>
1307+#include "mtk_eth_soc.h"
1308+
1309+struct mtk_flow_data {
1310+ struct ethhdr eth;
1311+
1312+ union {
1313+ struct {
1314+ __be32 src_addr;
1315+ __be32 dst_addr;
1316+ } v4;
1317+
1318+ struct {
1319+ struct in6_addr src_addr;
1320+ struct in6_addr dst_addr;
1321+ } v6;
1322+ };
1323+
1324+ __be16 src_port;
1325+ __be16 dst_port;
1326+
1327+ struct {
1328+ u16 id;
1329+ __be16 proto;
1330+ u8 num;
1331+ } vlan;
1332+ struct {
1333+ u16 sid;
1334+ u8 num;
1335+ } pppoe;
1336+};
1337+
1338+struct mtk_flow_entry {
1339+ struct rhash_head node;
1340+ unsigned long cookie;
1341+ u16 hash;
1342+};
1343+
1344+static const struct rhashtable_params mtk_flow_ht_params = {
1345+ .head_offset = offsetof(struct mtk_flow_entry, node),
1346+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
1347+ .key_len = sizeof(unsigned long),
1348+ .automatic_shrinking = true,
1349+};
1350+
1351+static u32
1352+mtk_eth_timestamp(struct mtk_eth *eth)
1353+{
1354+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
1355+}
1356+
1357+static int
1358+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
1359+ bool egress)
1360+{
1361+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
1362+ data->v4.src_addr, data->src_port,
1363+ data->v4.dst_addr, data->dst_port);
1364+}
1365+
1366+static int
1367+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
1368+{
1369+ return mtk_foe_entry_set_ipv6_tuple(foe,
1370+ data->v6.src_addr.s6_addr32, data->src_port,
1371+ data->v6.dst_addr.s6_addr32, data->dst_port);
1372+}
1373+
1374+static void
1375+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
1376+{
1377+ void *dest = eth + act->mangle.offset;
1378+ const void *src = &act->mangle.val;
1379+
1380+ if (act->mangle.offset > 8)
1381+ return;
1382+
1383+ if (act->mangle.mask == 0xffff) {
1384+ src += 2;
1385+ dest += 2;
1386+ }
1387+
1388+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
1389+}
1390+
1391+
1392+static int
1393+mtk_flow_mangle_ports(const struct flow_action_entry *act,
1394+ struct mtk_flow_data *data)
1395+{
1396+ u32 val = ntohl(act->mangle.val);
1397+
1398+ switch (act->mangle.offset) {
1399+ case 0:
1400+ if (act->mangle.mask == ~htonl(0xffff))
1401+ data->dst_port = cpu_to_be16(val);
1402+ else
1403+ data->src_port = cpu_to_be16(val >> 16);
1404+ break;
1405+ case 2:
1406+ data->dst_port = cpu_to_be16(val);
1407+ break;
1408+ default:
1409+ return -EINVAL;
1410+ }
1411+
1412+ return 0;
1413+}
1414+
1415+static int
1416+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
1417+ struct mtk_flow_data *data)
1418+{
1419+ __be32 *dest;
1420+
1421+ switch (act->mangle.offset) {
1422+ case offsetof(struct iphdr, saddr):
1423+ dest = &data->v4.src_addr;
1424+ break;
1425+ case offsetof(struct iphdr, daddr):
1426+ dest = &data->v4.dst_addr;
1427+ break;
1428+ default:
1429+ return -EINVAL;
1430+ }
1431+
1432+ memcpy(dest, &act->mangle.val, sizeof(u32));
1433+
1434+ return 0;
1435+}
1436+
1437+static int
1438+mtk_flow_get_dsa_port(struct net_device **dev)
1439+{
1440+#if IS_ENABLED(CONFIG_NET_DSA)
1441+ struct dsa_port *dp;
1442+
1443+ dp = dsa_port_from_netdev(*dev);
1444+ if (IS_ERR(dp))
1445+ return -ENODEV;
1446+
1447+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
1448+ return -ENODEV;
1449+
1450+ *dev = dp->cpu_dp->master;
1451+
1452+ return dp->index;
1453+#else
1454+ return -ENODEV;
1455+#endif
1456+}
1457+
1458+static int
1459+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
1460+ struct net_device *dev)
1461+{
1462+ int pse_port, dsa_port;
1463+
1464+ dsa_port = mtk_flow_get_dsa_port(&dev);
1465+ if (dsa_port >= 0)
1466+ mtk_foe_entry_set_dsa(foe, dsa_port);
1467+
1468+ if (dev == eth->netdev[0])
developerc693c152022-12-02 09:38:46 +08001469+ pse_port = PSE_GDM1_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001470+ else if (dev == eth->netdev[1])
developerc693c152022-12-02 09:38:46 +08001471+ pse_port = PSE_GDM2_PORT;
developer8cb3ac72022-07-04 10:55:14 +08001472+ else
1473+ return -EOPNOTSUPP;
1474+
1475+ mtk_foe_entry_set_pse_port(foe, pse_port);
1476+
1477+ return 0;
1478+}
1479+
1480+static int
1481+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
1482+{
1483+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1484+ struct flow_action_entry *act;
1485+ struct mtk_flow_data data = {};
1486+ struct mtk_foe_entry foe;
1487+ struct net_device *odev = NULL;
1488+ struct mtk_flow_entry *entry;
1489+ int offload_type = 0;
1490+ u16 addr_type = 0;
1491+ u32 timestamp;
1492+ u8 l4proto = 0;
1493+ int err = 0;
1494+ int hash;
1495+ int i;
1496+
1497+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
1498+ return -EEXIST;
1499+
1500+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1501+ struct flow_match_meta match;
1502+
1503+ flow_rule_match_meta(rule, &match);
1504+ } else {
1505+ return -EOPNOTSUPP;
1506+ }
1507+
1508+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1509+ struct flow_match_control match;
1510+
1511+ flow_rule_match_control(rule, &match);
1512+ addr_type = match.key->addr_type;
1513+ } else {
1514+ return -EOPNOTSUPP;
1515+ }
1516+
1517+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1518+ struct flow_match_basic match;
1519+
1520+ flow_rule_match_basic(rule, &match);
1521+ l4proto = match.key->ip_proto;
1522+ } else {
1523+ return -EOPNOTSUPP;
1524+ }
1525+
1526+ flow_action_for_each(i, act, &rule->action) {
1527+ switch (act->id) {
1528+ case FLOW_ACTION_MANGLE:
1529+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
1530+ mtk_flow_offload_mangle_eth(act, &data.eth);
1531+ break;
1532+ case FLOW_ACTION_REDIRECT:
1533+ odev = act->dev;
1534+ break;
1535+ case FLOW_ACTION_CSUM:
1536+ break;
1537+ case FLOW_ACTION_VLAN_PUSH:
1538+ if (data.vlan.num == 1 ||
1539+ act->vlan.proto != htons(ETH_P_8021Q))
1540+ return -EOPNOTSUPP;
1541+
1542+ data.vlan.id = act->vlan.vid;
1543+ data.vlan.proto = act->vlan.proto;
1544+ data.vlan.num++;
1545+ break;
1546+ case FLOW_ACTION_VLAN_POP:
1547+ break;
1548+ case FLOW_ACTION_PPPOE_PUSH:
1549+ if (data.pppoe.num == 1)
1550+ return -EOPNOTSUPP;
1551+
1552+ data.pppoe.sid = act->pppoe.sid;
1553+ data.pppoe.num++;
1554+ break;
1555+ default:
1556+ return -EOPNOTSUPP;
1557+ }
1558+ }
1559+
1560+ switch (addr_type) {
1561+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1562+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
1563+ break;
1564+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1565+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
1566+ break;
1567+ default:
1568+ return -EOPNOTSUPP;
1569+ }
1570+
1571+ if (!is_valid_ether_addr(data.eth.h_source) ||
1572+ !is_valid_ether_addr(data.eth.h_dest))
1573+ return -EINVAL;
1574+
1575+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
1576+ data.eth.h_source,
1577+ data.eth.h_dest);
1578+ if (err)
1579+ return err;
1580+
1581+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1582+ struct flow_match_ports ports;
1583+
1584+ flow_rule_match_ports(rule, &ports);
1585+ data.src_port = ports.key->src;
1586+ data.dst_port = ports.key->dst;
1587+ } else {
1588+ return -EOPNOTSUPP;
1589+ }
1590+
1591+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1592+ struct flow_match_ipv4_addrs addrs;
1593+
1594+ flow_rule_match_ipv4_addrs(rule, &addrs);
1595+
1596+ data.v4.src_addr = addrs.key->src;
1597+ data.v4.dst_addr = addrs.key->dst;
1598+
1599+ mtk_flow_set_ipv4_addr(&foe, &data, false);
1600+ }
1601+
1602+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1603+ struct flow_match_ipv6_addrs addrs;
1604+
1605+ flow_rule_match_ipv6_addrs(rule, &addrs);
1606+
1607+ data.v6.src_addr = addrs.key->src;
1608+ data.v6.dst_addr = addrs.key->dst;
1609+
1610+ mtk_flow_set_ipv6_addr(&foe, &data);
1611+ }
1612+
1613+ flow_action_for_each(i, act, &rule->action) {
1614+ if (act->id != FLOW_ACTION_MANGLE)
1615+ continue;
1616+
1617+ switch (act->mangle.htype) {
1618+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1619+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1620+ err = mtk_flow_mangle_ports(act, &data);
1621+ break;
1622+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1623+ err = mtk_flow_mangle_ipv4(act, &data);
1624+ break;
1625+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1626+ /* handled earlier */
1627+ break;
1628+ default:
1629+ return -EOPNOTSUPP;
1630+ }
1631+
1632+ if (err)
1633+ return err;
1634+ }
1635+
1636+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1637+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
1638+ if (err)
1639+ return err;
1640+ }
1641+
1642+ if (data.vlan.num == 1) {
1643+ if (data.vlan.proto != htons(ETH_P_8021Q))
1644+ return -EOPNOTSUPP;
1645+
1646+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
1647+ }
1648+ if (data.pppoe.num == 1)
1649+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
1650+
1651+ err = mtk_flow_set_output_device(eth, &foe, odev);
1652+ if (err)
1653+ return err;
1654+
1655+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1656+ if (!entry)
1657+ return -ENOMEM;
1658+
1659+ entry->cookie = f->cookie;
1660+ timestamp = mtk_eth_timestamp(eth);
1661+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
1662+ if (hash < 0) {
1663+ err = hash;
1664+ goto free;
1665+ }
1666+
1667+ entry->hash = hash;
1668+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
1669+ mtk_flow_ht_params);
1670+ if (err < 0)
1671+ goto clear_flow;
1672+
1673+ return 0;
1674+clear_flow:
1675+ mtk_foe_entry_clear(&eth->ppe, hash);
1676+free:
1677+ kfree(entry);
1678+ return err;
1679+}
1680+
1681+static int
1682+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
1683+{
1684+ struct mtk_flow_entry *entry;
1685+
1686+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1687+ mtk_flow_ht_params);
1688+ if (!entry)
1689+ return -ENOENT;
1690+
1691+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
1692+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
1693+ mtk_flow_ht_params);
1694+ kfree(entry);
1695+
1696+ return 0;
1697+}
1698+
1699+static int
1700+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
1701+{
1702+ struct mtk_flow_entry *entry;
1703+ int timestamp;
1704+ u32 idle;
1705+
1706+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1707+ mtk_flow_ht_params);
1708+ if (!entry)
1709+ return -ENOENT;
1710+
1711+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
1712+ if (timestamp < 0)
1713+ return -ETIMEDOUT;
1714+
1715+ idle = mtk_eth_timestamp(eth) - timestamp;
1716+ f->stats.lastused = jiffies - idle * HZ;
1717+
1718+ return 0;
1719+}
1720+
1721+static DEFINE_MUTEX(mtk_flow_offload_mutex);
1722+
1723+static int
1724+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
1725+{
1726+ struct flow_cls_offload *cls = type_data;
1727+ struct net_device *dev = cb_priv;
1728+ struct mtk_mac *mac = netdev_priv(dev);
1729+ struct mtk_eth *eth = mac->hw;
1730+ int err;
1731+
1732+ if (!tc_can_offload(dev))
1733+ return -EOPNOTSUPP;
1734+
1735+ if (type != TC_SETUP_CLSFLOWER)
1736+ return -EOPNOTSUPP;
1737+
1738+ mutex_lock(&mtk_flow_offload_mutex);
1739+ switch (cls->command) {
1740+ case FLOW_CLS_REPLACE:
1741+ err = mtk_flow_offload_replace(eth, cls);
1742+ break;
1743+ case FLOW_CLS_DESTROY:
1744+ err = mtk_flow_offload_destroy(eth, cls);
1745+ break;
1746+ case FLOW_CLS_STATS:
1747+ err = mtk_flow_offload_stats(eth, cls);
1748+ break;
1749+ default:
1750+ err = -EOPNOTSUPP;
1751+ break;
1752+ }
1753+ mutex_unlock(&mtk_flow_offload_mutex);
1754+
1755+ return err;
1756+}
1757+
1758+static int
1759+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
1760+{
1761+ struct mtk_mac *mac = netdev_priv(dev);
1762+ struct mtk_eth *eth = mac->hw;
developer207b39d2022-10-07 15:57:16 +08001763+ struct nf_flowtable *flowtable;
developer8cb3ac72022-07-04 10:55:14 +08001764+ static LIST_HEAD(block_cb_list);
1765+ struct flow_block_cb *block_cb;
1766+ flow_setup_cb_t *cb;
developer207b39d2022-10-07 15:57:16 +08001767+ int err = 0;
1768+
1769+ flowtable = container_of(f->block, struct nf_flowtable, flow_block);
developer8cb3ac72022-07-04 10:55:14 +08001770+
1771+ if (!eth->ppe.foe_table)
1772+ return -EOPNOTSUPP;
1773+
1774+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1775+ return -EOPNOTSUPP;
1776+
1777+ cb = mtk_eth_setup_tc_block_cb;
1778+ f->driver_block_list = &block_cb_list;
1779+
developer207b39d2022-10-07 15:57:16 +08001780+ down_write(&flowtable->flow_block_lock);
1781+
developer8cb3ac72022-07-04 10:55:14 +08001782+ switch (f->command) {
1783+ case FLOW_BLOCK_BIND:
1784+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1785+ if (block_cb) {
1786+ flow_block_cb_incref(block_cb);
developer207b39d2022-10-07 15:57:16 +08001787+ goto unlock;
developer8cb3ac72022-07-04 10:55:14 +08001788+ }
1789+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
developer207b39d2022-10-07 15:57:16 +08001790+ if (IS_ERR(block_cb)) {
1791+ err = PTR_ERR(block_cb);
1792+ goto unlock;
1793+ }
developer8cb3ac72022-07-04 10:55:14 +08001794+
1795+ flow_block_cb_add(block_cb, f);
1796+ list_add_tail(&block_cb->driver_list, &block_cb_list);
developer207b39d2022-10-07 15:57:16 +08001797+ break;
developer8cb3ac72022-07-04 10:55:14 +08001798+ case FLOW_BLOCK_UNBIND:
1799+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
developer207b39d2022-10-07 15:57:16 +08001800+ if (!block_cb) {
1801+ err = -ENOENT;
1802+ goto unlock;
1803+ }
developer8cb3ac72022-07-04 10:55:14 +08001804+
1805+ if (flow_block_cb_decref(block_cb)) {
1806+ flow_block_cb_remove(block_cb, f);
1807+ list_del(&block_cb->driver_list);
1808+ }
developer207b39d2022-10-07 15:57:16 +08001809+ break;
developer8cb3ac72022-07-04 10:55:14 +08001810+ default:
developer207b39d2022-10-07 15:57:16 +08001811+ err = -EOPNOTSUPP;
1812+ break;
developer8cb3ac72022-07-04 10:55:14 +08001813+ }
developer207b39d2022-10-07 15:57:16 +08001814+
1815+unlock:
1816+ up_write(&flowtable->flow_block_lock);
1817+ return err;
developer8cb3ac72022-07-04 10:55:14 +08001818+}
1819+
1820+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
1821+ void *type_data)
1822+{
1823+ if (type == TC_SETUP_FT)
1824+ return mtk_eth_setup_tc_block(dev, type_data);
1825+
1826+ return -EOPNOTSUPP;
1827+}
1828+
1829+int mtk_eth_offload_init(struct mtk_eth *eth)
1830+{
1831+ if (!eth->ppe.foe_table)
1832+ return 0;
1833+
1834+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
1835+}
1836diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1837new file mode 100644
1838index 000000000..0c45ea090
1839--- /dev/null
1840+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1841@@ -0,0 +1,144 @@
1842+// SPDX-License-Identifier: GPL-2.0-only
1843+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1844+
1845+#ifndef __MTK_PPE_REGS_H
1846+#define __MTK_PPE_REGS_H
1847+
1848+#define MTK_PPE_GLO_CFG 0x200
1849+#define MTK_PPE_GLO_CFG_EN BIT(0)
1850+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
1851+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
1852+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
1853+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
1854+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
1855+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
1856+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
1857+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
1858+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
1859+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
1860+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
1861+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
1862+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
1863+
1864+#define MTK_PPE_FLOW_CFG 0x204
1865+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
1866+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
1867+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
1868+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
1869+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
1870+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
1871+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
1872+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
1873+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
1874+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
1875+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
1876+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
1877+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
1878+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
1879+
1880+#define MTK_PPE_IP_PROTO_CHK 0x208
1881+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
1882+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
1883+
1884+#define MTK_PPE_TB_CFG 0x21c
1885+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
1886+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
1887+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
1888+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
1889+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
1890+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
1891+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
1892+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
1893+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
1894+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
1895+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
1896+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
1897+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
1898+
1899+enum {
1900+ MTK_PPE_SCAN_MODE_DISABLED,
1901+ MTK_PPE_SCAN_MODE_CHECK_AGE,
1902+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
1903+};
1904+
1905+enum {
1906+ MTK_PPE_KEEPALIVE_DISABLE,
1907+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
1908+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
1909+};
1910+
1911+enum {
1912+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
1913+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
1914+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
1915+};
1916+
1917+#define MTK_PPE_TB_BASE 0x220
1918+
1919+#define MTK_PPE_TB_USED 0x224
1920+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
1921+
1922+#define MTK_PPE_BIND_RATE 0x228
1923+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
1924+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
1925+
1926+#define MTK_PPE_BIND_LIMIT0 0x22c
1927+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
1928+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
1929+
1930+#define MTK_PPE_BIND_LIMIT1 0x230
1931+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
1932+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
1933+
1934+#define MTK_PPE_KEEPALIVE 0x234
1935+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
1936+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
1937+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
1938+
1939+#define MTK_PPE_UNBIND_AGE 0x238
1940+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
1941+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
1942+
1943+#define MTK_PPE_BIND_AGE0 0x23c
1944+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
1945+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
1946+
1947+#define MTK_PPE_BIND_AGE1 0x240
1948+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
1949+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
1950+
1951+#define MTK_PPE_HASH_SEED 0x244
1952+
1953+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
1954+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
1955+
1956+#define MTK_PPE_MTU_DROP 0x308
1957+
1958+#define MTK_PPE_VLAN_MTU0 0x30c
1959+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
1960+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
1961+
1962+#define MTK_PPE_VLAN_MTU1 0x310
1963+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
1964+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
1965+
1966+#define MTK_PPE_VPM_TPID 0x318
1967+
1968+#define MTK_PPE_CACHE_CTL 0x320
1969+#define MTK_PPE_CACHE_CTL_EN BIT(0)
1970+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
1971+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
1972+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
1973+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
1974+
1975+#define MTK_PPE_MIB_CFG 0x334
1976+#define MTK_PPE_MIB_CFG_EN BIT(0)
1977+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
1978+
1979+#define MTK_PPE_MIB_TB_BASE 0x338
1980+
1981+#define MTK_PPE_MIB_CACHE_CTL 0x350
1982+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
1983+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
1984+
1985+#endif
1986diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
1987index a085213dc..813e30495 100644
1988--- a/drivers/net/ppp/ppp_generic.c
1989+++ b/drivers/net/ppp/ppp_generic.c
1990@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
1991 ppp_destroy_interface(ppp);
1992 }
1993
1994+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
1995+ struct net_device_path *path)
1996+{
1997+ struct ppp *ppp = netdev_priv(ctx->dev);
1998+ struct ppp_channel *chan;
1999+ struct channel *pch;
2000+
2001+ if (ppp->flags & SC_MULTILINK)
2002+ return -EOPNOTSUPP;
2003+
2004+ if (list_empty(&ppp->channels))
2005+ return -ENODEV;
2006+
2007+ pch = list_first_entry(&ppp->channels, struct channel, clist);
2008+ chan = pch->chan;
2009+ if (!chan->ops->fill_forward_path)
2010+ return -EOPNOTSUPP;
2011+
2012+ return chan->ops->fill_forward_path(ctx, path, chan);
2013+}
2014+
2015 static const struct net_device_ops ppp_netdev_ops = {
2016 .ndo_init = ppp_dev_init,
2017 .ndo_uninit = ppp_dev_uninit,
2018 .ndo_start_xmit = ppp_start_xmit,
2019 .ndo_do_ioctl = ppp_net_ioctl,
2020 .ndo_get_stats64 = ppp_get_stats64,
2021+ .ndo_fill_forward_path = ppp_fill_forward_path,
2022 };
2023
2024 static struct device_type ppp_type = {
2025diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
2026index 087b01684..7a8c246ab 100644
2027--- a/drivers/net/ppp/pppoe.c
2028+++ b/drivers/net/ppp/pppoe.c
2029@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
2030 return __pppoe_xmit(sk, skb);
2031 }
2032
2033+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
2034+ struct net_device_path *path,
2035+ const struct ppp_channel *chan)
2036+{
2037+ struct sock *sk = (struct sock *)chan->private;
2038+ struct pppox_sock *po = pppox_sk(sk);
2039+ struct net_device *dev = po->pppoe_dev;
2040+
2041+ if (sock_flag(sk, SOCK_DEAD) ||
2042+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2043+ return -1;
2044+
2045+ path->type = DEV_PATH_PPPOE;
2046+ path->encap.proto = htons(ETH_P_PPP_SES);
2047+ path->encap.id = be16_to_cpu(po->num);
2048+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2049+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
2050+ path->dev = ctx->dev;
2051+ ctx->dev = dev;
2052+
2053+ return 0;
2054+}
2055+
2056 static const struct ppp_channel_ops pppoe_chan_ops = {
2057 .start_xmit = pppoe_xmit,
2058+ .fill_forward_path = pppoe_fill_forward_path,
2059 };
2060
2061 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
2062diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
2063index 38af42bf8..9f64504ac 100644
2064--- a/include/linux/netdevice.h
2065+++ b/include/linux/netdevice.h
2066@@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
2067 struct sk_buff *skb,
2068 struct net_device *sb_dev);
2069
2070+enum net_device_path_type {
2071+ DEV_PATH_ETHERNET = 0,
2072+ DEV_PATH_VLAN,
2073+ DEV_PATH_BRIDGE,
2074+ DEV_PATH_PPPOE,
2075+ DEV_PATH_DSA,
2076+};
2077+
2078+struct net_device_path {
2079+ enum net_device_path_type type;
2080+ const struct net_device *dev;
2081+ union {
2082+ struct {
2083+ u16 id;
2084+ __be16 proto;
2085+ u8 h_dest[ETH_ALEN];
2086+ } encap;
2087+ struct {
2088+ enum {
2089+ DEV_PATH_BR_VLAN_KEEP,
2090+ DEV_PATH_BR_VLAN_TAG,
2091+ DEV_PATH_BR_VLAN_UNTAG,
2092+ DEV_PATH_BR_VLAN_UNTAG_HW,
2093+ } vlan_mode;
2094+ u16 vlan_id;
2095+ __be16 vlan_proto;
2096+ } bridge;
2097+ struct {
2098+ int port;
2099+ u16 proto;
2100+ } dsa;
2101+ };
2102+};
2103+
2104+#define NET_DEVICE_PATH_STACK_MAX 5
2105+#define NET_DEVICE_PATH_VLAN_MAX 2
2106+
2107+struct net_device_path_stack {
2108+ int num_paths;
2109+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
2110+};
2111+
2112+struct net_device_path_ctx {
2113+ const struct net_device *dev;
2114+ u8 daddr[ETH_ALEN];
2115+
2116+ int num_vlans;
2117+ struct {
2118+ u16 id;
2119+ __be16 proto;
2120+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
2121+};
2122+
2123 enum tc_setup_type {
2124 TC_SETUP_QDISC_MQPRIO,
2125 TC_SETUP_CLSU32,
2126@@ -844,6 +897,7 @@ enum tc_setup_type {
2127 TC_SETUP_ROOT_QDISC,
2128 TC_SETUP_QDISC_GRED,
2129 TC_SETUP_QDISC_TAPRIO,
2130+ TC_SETUP_FT,
2131 };
2132
2133 /* These structures hold the attributes of bpf state that are being passed
2134@@ -1239,6 +1293,8 @@ struct tlsdev_ops;
2135 * Get devlink port instance associated with a given netdev.
2136 * Called with a reference on the netdevice and devlink locks only,
2137 * rtnl_lock is not held.
2138+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
2139+ * Get the forwarding path to reach the real device from the HW destination address
2140 */
2141 struct net_device_ops {
2142 int (*ndo_init)(struct net_device *dev);
2143@@ -1436,6 +1492,8 @@ struct net_device_ops {
2144 int (*ndo_xsk_wakeup)(struct net_device *dev,
2145 u32 queue_id, u32 flags);
2146 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
2147+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
2148+ struct net_device_path *path);
2149 };
2150
2151 /**
2152@@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po);
2153
2154 int dev_get_iflink(const struct net_device *dev);
2155 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
2156+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2157+ struct net_device_path_stack *stack);
2158 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
2159 unsigned short mask);
2160 struct net_device *dev_get_by_name(struct net *net, const char *name);
2161diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
2162index 98966064e..91f9a9283 100644
2163--- a/include/linux/ppp_channel.h
2164+++ b/include/linux/ppp_channel.h
2165@@ -28,6 +28,9 @@ struct ppp_channel_ops {
2166 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
2167 /* Handle an ioctl call that has come in via /dev/ppp. */
2168 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
2169+ int (*fill_forward_path)(struct net_device_path_ctx *,
2170+ struct net_device_path *,
2171+ const struct ppp_channel *);
2172 };
2173
2174 struct ppp_channel {
2175diff --git a/include/net/dsa.h b/include/net/dsa.h
2176index 05f66d487..cafc74218 100644
2177--- a/include/net/dsa.h
2178+++ b/include/net/dsa.h
2179@@ -561,6 +561,8 @@ struct dsa_switch_ops {
2180 struct sk_buff *skb);
2181 };
2182
2183+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
2184+
2185 struct dsa_switch_driver {
2186 struct list_head list;
2187 const struct dsa_switch_ops *ops;
2188@@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2189 #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
2190 #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
2191
2192+#if IS_ENABLED(CONFIG_NET_DSA)
2193+bool dsa_slave_dev_check(const struct net_device *dev);
2194+#else
2195+static inline bool dsa_slave_dev_check(const struct net_device *dev)
2196+{
2197+ return false;
2198+}
2199+#endif
2200
2201 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
2202 int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
2203diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
2204index c6f7bd22d..59b873653 100644
2205--- a/include/net/flow_offload.h
2206+++ b/include/net/flow_offload.h
2207@@ -138,6 +138,7 @@ enum flow_action_id {
2208 FLOW_ACTION_MPLS_PUSH,
2209 FLOW_ACTION_MPLS_POP,
2210 FLOW_ACTION_MPLS_MANGLE,
2211+ FLOW_ACTION_PPPOE_PUSH,
2212 NUM_FLOW_ACTIONS,
2213 };
2214
2215@@ -213,6 +214,9 @@ struct flow_action_entry {
2216 u8 bos;
2217 u8 ttl;
2218 } mpls_mangle;
2219+ struct { /* FLOW_ACTION_PPPOE_PUSH */
2220+ u16 sid;
2221+ } pppoe;
2222 };
2223 };
2224
2225diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
2226index 2c739fc75..89ab8f180 100644
2227--- a/include/net/ip6_route.h
2228+++ b/include/net/ip6_route.h
2229@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
2230 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
2231 }
2232
2233-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
2234+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
2235+ bool forwarding)
2236 {
2237 struct inet6_dev *idev;
2238 unsigned int mtu;
2239
2240- if (dst_metric_locked(dst, RTAX_MTU)) {
2241+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
2242 mtu = dst_metric_raw(dst, RTAX_MTU);
2243 if (mtu)
2244 goto out;
2245diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2246index 7b3c873f8..e95483192 100644
2247--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2248+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2249@@ -4,7 +4,4 @@
2250
2251 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
2252
2253-#include <linux/sysctl.h>
2254-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
2255-
2256 #endif /* _NF_CONNTRACK_IPV6_H*/
2257diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
2258index 90690e37a..ce0bc3e62 100644
2259--- a/include/net/netfilter/nf_conntrack.h
2260+++ b/include/net/netfilter/nf_conntrack.h
2261@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
2262 !nf_ct_is_dying(ct);
2263 }
2264
2265+#define NF_CT_DAY (86400 * HZ)
2266+
2267+/* Set an arbitrary timeout large enough not to ever expire, this save
2268+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
2269+ * nf_ct_is_expired().
2270+ */
2271+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
2272+{
2273+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
2274+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
2275+}
2276+
2277 struct kernel_param;
2278
2279 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
2280diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
2281index f7a060c6e..7f44a7715 100644
2282--- a/include/net/netfilter/nf_conntrack_acct.h
2283+++ b/include/net/netfilter/nf_conntrack_acct.h
2284@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
2285 #endif
2286 }
2287
2288+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
2289+ unsigned int bytes);
2290+
2291+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
2292+ unsigned int bytes)
2293+{
2294+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
2295+ nf_ct_acct_add(ct, dir, 1, bytes);
2296+#endif
2297+}
2298+
2299 void nf_conntrack_acct_pernet_init(struct net *net);
2300
2301 int nf_conntrack_acct_init(void);
2302diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
developerb7c46752022-07-04 19:51:38 +08002303index 68d7fc92..7cf89767 100644
developer8cb3ac72022-07-04 10:55:14 +08002304--- a/include/net/netfilter/nf_flow_table.h
2305+++ b/include/net/netfilter/nf_flow_table.h
2306@@ -8,31 +8,99 @@
2307 #include <linux/rcupdate.h>
2308 #include <linux/netfilter.h>
2309 #include <linux/netfilter/nf_conntrack_tuple_common.h>
2310+#include <net/flow_offload.h>
2311 #include <net/dst.h>
2312+#include <linux/if_pppox.h>
2313+#include <linux/ppp_defs.h>
2314
2315 struct nf_flowtable;
2316+struct nf_flow_rule;
2317+struct flow_offload;
2318+enum flow_offload_tuple_dir;
2319+
2320+struct nf_flow_key {
2321+ struct flow_dissector_key_meta meta;
2322+ struct flow_dissector_key_control control;
2323+ struct flow_dissector_key_control enc_control;
2324+ struct flow_dissector_key_basic basic;
2325+ struct flow_dissector_key_vlan vlan;
2326+ struct flow_dissector_key_vlan cvlan;
2327+ union {
2328+ struct flow_dissector_key_ipv4_addrs ipv4;
2329+ struct flow_dissector_key_ipv6_addrs ipv6;
2330+ };
2331+ struct flow_dissector_key_keyid enc_key_id;
2332+ union {
2333+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
2334+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
2335+ };
2336+ struct flow_dissector_key_tcp tcp;
2337+ struct flow_dissector_key_ports tp;
2338+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
2339+
2340+struct nf_flow_match {
2341+ struct flow_dissector dissector;
2342+ struct nf_flow_key key;
2343+ struct nf_flow_key mask;
2344+};
2345+
2346+struct nf_flow_rule {
2347+ struct nf_flow_match match;
2348+ struct flow_rule *rule;
2349+};
2350
2351 struct nf_flowtable_type {
2352 struct list_head list;
2353 int family;
2354 int (*init)(struct nf_flowtable *ft);
2355+ int (*setup)(struct nf_flowtable *ft,
2356+ struct net_device *dev,
2357+ enum flow_block_command cmd);
2358+ int (*action)(struct net *net,
2359+ const struct flow_offload *flow,
2360+ enum flow_offload_tuple_dir dir,
2361+ struct nf_flow_rule *flow_rule);
2362 void (*free)(struct nf_flowtable *ft);
2363 nf_hookfn *hook;
2364 struct module *owner;
2365 };
2366
2367+enum nf_flowtable_flags {
2368+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
2369+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
2370+};
2371+
2372 struct nf_flowtable {
2373 struct list_head list;
2374 struct rhashtable rhashtable;
2375+ int priority;
2376 const struct nf_flowtable_type *type;
2377 struct delayed_work gc_work;
2378+ unsigned int flags;
2379+ struct flow_block flow_block;
2380+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
2381+ possible_net_t net;
2382 };
2383
2384+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
2385+{
2386+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
2387+}
2388+
2389 enum flow_offload_tuple_dir {
2390 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
2391 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
2392- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
2393 };
2394+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
2395+
2396+enum flow_offload_xmit_type {
2397+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
2398+ FLOW_OFFLOAD_XMIT_NEIGH,
2399+ FLOW_OFFLOAD_XMIT_XFRM,
2400+ FLOW_OFFLOAD_XMIT_DIRECT,
2401+};
2402+
2403+#define NF_FLOW_TABLE_ENCAP_MAX 2
2404
2405 struct flow_offload_tuple {
2406 union {
developerb7c46752022-07-04 19:51:38 +08002407@@ -52,13 +120,30 @@ struct flow_offload_tuple {
developer8cb3ac72022-07-04 10:55:14 +08002408
2409 u8 l3proto;
2410 u8 l4proto;
2411- u8 dir;
2412+ struct {
2413+ u16 id;
2414+ __be16 proto;
2415+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2416
2417- u16 mtu;
2418+ /* All members above are keys for lookups, see flow_offload_hash(). */
2419+ struct { } __hash;
2420
developerb7c46752022-07-04 19:51:38 +08002421- struct {
2422- struct dst_entry *dst_cache;
2423- u32 dst_cookie;
developer8cb3ac72022-07-04 10:55:14 +08002424+ u8 dir:2,
2425+ xmit_type:2,
2426+ encap_num:2,
2427+ in_vlan_ingress:2;
2428+ u16 mtu;
2429+ union {
2430+ struct {
2431+ struct dst_entry *dst_cache;
2432+ u32 dst_cookie;
2433+ };
2434+ struct {
2435+ u32 ifidx;
2436+ u32 hw_ifidx;
2437+ u8 h_source[ETH_ALEN];
2438+ u8 h_dest[ETH_ALEN];
2439+ } out;
developerb7c46752022-07-04 19:51:38 +08002440 };
developer8cb3ac72022-07-04 10:55:14 +08002441 };
2442
developerb7c46752022-07-04 19:51:38 +08002443@@ -67,52 +152,139 @@ struct flow_offload_tuple_rhash {
developer8cb3ac72022-07-04 10:55:14 +08002444 struct flow_offload_tuple tuple;
2445 };
2446
2447-#define FLOW_OFFLOAD_SNAT 0x1
2448-#define FLOW_OFFLOAD_DNAT 0x2
2449-#define FLOW_OFFLOAD_DYING 0x4
2450-#define FLOW_OFFLOAD_TEARDOWN 0x8
2451+enum nf_flow_flags {
2452+ NF_FLOW_SNAT,
2453+ NF_FLOW_DNAT,
2454+ NF_FLOW_TEARDOWN,
2455+ NF_FLOW_HW,
2456+ NF_FLOW_HW_DYING,
2457+ NF_FLOW_HW_DEAD,
2458+ NF_FLOW_HW_PENDING,
2459+};
2460+
2461+enum flow_offload_type {
2462+ NF_FLOW_OFFLOAD_UNSPEC = 0,
2463+ NF_FLOW_OFFLOAD_ROUTE,
2464+};
2465
2466 struct flow_offload {
2467 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
2468- u32 flags;
2469- union {
2470- /* Your private driver data here. */
2471- u32 timeout;
2472- };
2473+ struct nf_conn *ct;
2474+ unsigned long flags;
2475+ u16 type;
2476+ u32 timeout;
2477+ struct rcu_head rcu_head;
2478 };
2479
2480 #define NF_FLOW_TIMEOUT (30 * HZ)
2481+#define nf_flowtable_time_stamp (u32)jiffies
2482+
2483+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
2484+
2485+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
2486+{
2487+ return (__s32)(timeout - nf_flowtable_time_stamp);
2488+}
2489
2490 struct nf_flow_route {
2491 struct {
2492- struct dst_entry *dst;
2493+ struct dst_entry *dst;
2494+ struct {
2495+ u32 ifindex;
2496+ struct {
2497+ u16 id;
2498+ __be16 proto;
2499+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2500+ u8 num_encaps:2,
2501+ ingress_vlans:2;
2502+ } in;
2503+ struct {
2504+ u32 ifindex;
2505+ u32 hw_ifindex;
2506+ u8 h_source[ETH_ALEN];
2507+ u8 h_dest[ETH_ALEN];
2508+ } out;
2509+ enum flow_offload_xmit_type xmit_type;
2510 } tuple[FLOW_OFFLOAD_DIR_MAX];
2511 };
2512
2513-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
2514- struct nf_flow_route *route);
2515+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
2516 void flow_offload_free(struct flow_offload *flow);
2517
2518+static inline int
2519+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
2520+ flow_setup_cb_t *cb, void *cb_priv)
2521+{
2522+ struct flow_block *block = &flow_table->flow_block;
2523+ struct flow_block_cb *block_cb;
2524+ int err = 0;
2525+
2526+ down_write(&flow_table->flow_block_lock);
2527+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2528+ if (block_cb) {
2529+ err = -EEXIST;
2530+ goto unlock;
2531+ }
2532+
2533+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
2534+ if (IS_ERR(block_cb)) {
2535+ err = PTR_ERR(block_cb);
2536+ goto unlock;
2537+ }
2538+
2539+ list_add_tail(&block_cb->list, &block->cb_list);
2540+
2541+unlock:
2542+ up_write(&flow_table->flow_block_lock);
2543+ return err;
2544+}
2545+
2546+static inline void
2547+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
2548+ flow_setup_cb_t *cb, void *cb_priv)
2549+{
2550+ struct flow_block *block = &flow_table->flow_block;
2551+ struct flow_block_cb *block_cb;
2552+
2553+ down_write(&flow_table->flow_block_lock);
2554+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2555+ if (block_cb) {
2556+ list_del(&block_cb->list);
2557+ flow_block_cb_free(block_cb);
2558+ } else {
2559+ WARN_ON(true);
2560+ }
2561+ up_write(&flow_table->flow_block_lock);
2562+}
2563+
2564+int flow_offload_route_init(struct flow_offload *flow,
2565+ const struct nf_flow_route *route);
2566+
2567 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
2568+void flow_offload_refresh(struct nf_flowtable *flow_table,
2569+ struct flow_offload *flow);
2570+
2571 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
2572 struct flow_offload_tuple *tuple);
2573+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
2574+ struct net_device *dev);
2575 void nf_flow_table_cleanup(struct net_device *dev);
2576
2577 int nf_flow_table_init(struct nf_flowtable *flow_table);
2578 void nf_flow_table_free(struct nf_flowtable *flow_table);
2579
2580 void flow_offload_teardown(struct flow_offload *flow);
2581-static inline void flow_offload_dead(struct flow_offload *flow)
2582-{
2583- flow->flags |= FLOW_OFFLOAD_DYING;
2584-}
2585
2586-int nf_flow_snat_port(const struct flow_offload *flow,
2587- struct sk_buff *skb, unsigned int thoff,
2588- u8 protocol, enum flow_offload_tuple_dir dir);
2589-int nf_flow_dnat_port(const struct flow_offload *flow,
2590- struct sk_buff *skb, unsigned int thoff,
2591- u8 protocol, enum flow_offload_tuple_dir dir);
2592+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
2593+ void (*iter)(struct flow_offload *flow, void *data),
2594+ void *data);
2595+
2596+void nf_flow_snat_port(const struct flow_offload *flow,
2597+ struct sk_buff *skb, unsigned int thoff,
2598+ u8 protocol, enum flow_offload_tuple_dir dir);
2599+void nf_flow_dnat_port(const struct flow_offload *flow,
2600+ struct sk_buff *skb, unsigned int thoff,
2601+ u8 protocol, enum flow_offload_tuple_dir dir);
2602
2603 struct flow_ports {
2604 __be16 source, dest;
developerb7c46752022-07-04 19:51:38 +08002605@@ -126,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08002606 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
2607 MODULE_ALIAS("nf-flowtable-" __stringify(family))
2608
2609+void nf_flow_offload_add(struct nf_flowtable *flowtable,
2610+ struct flow_offload *flow);
2611+void nf_flow_offload_del(struct nf_flowtable *flowtable,
2612+ struct flow_offload *flow);
2613+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
2614+ struct flow_offload *flow);
2615+
2616+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
2617+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
2618+ struct net_device *dev,
2619+ enum flow_block_command cmd);
2620+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
2621+ enum flow_offload_tuple_dir dir,
2622+ struct nf_flow_rule *flow_rule);
2623+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
2624+ enum flow_offload_tuple_dir dir,
2625+ struct nf_flow_rule *flow_rule);
2626+
2627+int nf_flow_table_offload_init(void);
2628+void nf_flow_table_offload_exit(void);
2629+
2630+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
2631+{
2632+ __be16 proto;
2633+
2634+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
2635+ sizeof(struct pppoe_hdr)));
2636+ switch (proto) {
2637+ case htons(PPP_IP):
2638+ return htons(ETH_P_IP);
2639+ case htons(PPP_IPV6):
2640+ return htons(ETH_P_IPV6);
2641+ }
2642+
2643+ return 0;
2644+}
2645+
2646 #endif /* _NF_FLOW_TABLE_H */
2647diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
2648index 806454e76..9e3963c8f 100644
2649--- a/include/net/netns/conntrack.h
2650+++ b/include/net/netns/conntrack.h
2651@@ -27,6 +27,9 @@ struct nf_tcp_net {
2652 int tcp_loose;
2653 int tcp_be_liberal;
2654 int tcp_max_retrans;
2655+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2656+ unsigned int offload_timeout;
2657+#endif
2658 };
2659
2660 enum udp_conntrack {
2661@@ -37,6 +40,9 @@ enum udp_conntrack {
2662
2663 struct nf_udp_net {
2664 unsigned int timeouts[UDP_CT_MAX];
2665+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2666+ unsigned int offload_timeout;
2667+#endif
2668 };
2669
2670 struct nf_icmp_net {
2671diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
2672index 336014bf8..ae698d11c 100644
2673--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
2674+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
2675@@ -105,14 +105,19 @@ enum ip_conntrack_status {
2676 IPS_OFFLOAD_BIT = 14,
2677 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
2678
2679+ /* Conntrack has been offloaded to hardware. */
2680+ IPS_HW_OFFLOAD_BIT = 15,
2681+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
2682+
2683 /* Be careful here, modifying these bits can make things messy,
2684 * so don't let users modify them directly.
2685 */
2686 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
2687 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
2688- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
2689+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
2690+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
2691
2692- __IPS_MAX_BIT = 15,
2693+ __IPS_MAX_BIT = 16,
2694 };
2695
2696 /* Connection tracking event types */
2697diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2698new file mode 100644
2699index 000000000..5841bbe0e
2700--- /dev/null
2701+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2702@@ -0,0 +1,17 @@
2703+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2704+#ifndef _XT_FLOWOFFLOAD_H
2705+#define _XT_FLOWOFFLOAD_H
2706+
2707+#include <linux/types.h>
2708+
2709+enum {
2710+ XT_FLOWOFFLOAD_HW = 1 << 0,
2711+
2712+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
2713+};
2714+
2715+struct xt_flowoffload_target_info {
2716+ __u32 flags;
2717+};
2718+
2719+#endif /* _XT_FLOWOFFLOAD_H */
2720diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
2721index 589615ec4..444ab5fae 100644
2722--- a/net/8021q/vlan_dev.c
2723+++ b/net/8021q/vlan_dev.c
2724@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
2725 return real_dev->ifindex;
2726 }
2727
2728+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
2729+ struct net_device_path *path)
2730+{
2731+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
2732+
2733+ path->type = DEV_PATH_VLAN;
2734+ path->encap.id = vlan->vlan_id;
2735+ path->encap.proto = vlan->vlan_proto;
2736+ path->dev = ctx->dev;
2737+ ctx->dev = vlan->real_dev;
2738+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2739+ return -ENOSPC;
2740+
2741+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
2742+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
2743+ ctx->num_vlans++;
2744+
2745+ return 0;
2746+}
2747+
2748 static const struct ethtool_ops vlan_ethtool_ops = {
2749 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
2750 .get_drvinfo = vlan_ethtool_get_drvinfo,
2751@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
2752 #endif
2753 .ndo_fix_features = vlan_dev_fix_features,
2754 .ndo_get_iflink = vlan_dev_get_iflink,
2755+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
2756 };
2757
2758 static void vlan_dev_free(struct net_device *dev)
2759diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
2760index 501f77f0f..0940b44cd 100644
2761--- a/net/bridge/br_device.c
2762+++ b/net/bridge/br_device.c
2763@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
2764 return br_del_if(br, slave_dev);
2765 }
2766
2767+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
2768+ struct net_device_path *path)
2769+{
2770+ struct net_bridge_fdb_entry *f;
2771+ struct net_bridge_port *dst;
2772+ struct net_bridge *br;
2773+
2774+ if (netif_is_bridge_port(ctx->dev))
2775+ return -1;
2776+
2777+ br = netdev_priv(ctx->dev);
2778+
2779+ br_vlan_fill_forward_path_pvid(br, ctx, path);
2780+
2781+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
2782+ if (!f || !f->dst)
2783+ return -1;
2784+
2785+ dst = READ_ONCE(f->dst);
2786+ if (!dst)
2787+ return -1;
2788+
2789+ if (br_vlan_fill_forward_path_mode(br, dst, path))
2790+ return -1;
2791+
2792+ path->type = DEV_PATH_BRIDGE;
2793+ path->dev = dst->br->dev;
2794+ ctx->dev = dst->dev;
2795+
2796+ switch (path->bridge.vlan_mode) {
2797+ case DEV_PATH_BR_VLAN_TAG:
2798+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2799+ return -ENOSPC;
2800+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
2801+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
2802+ ctx->num_vlans++;
2803+ break;
2804+ case DEV_PATH_BR_VLAN_UNTAG_HW:
2805+ case DEV_PATH_BR_VLAN_UNTAG:
2806+ ctx->num_vlans--;
2807+ break;
2808+ case DEV_PATH_BR_VLAN_KEEP:
2809+ break;
2810+ }
2811+
2812+ return 0;
2813+}
2814+
2815 static const struct ethtool_ops br_ethtool_ops = {
2816 .get_drvinfo = br_getinfo,
2817 .get_link = ethtool_op_get_link,
2818@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
2819 .ndo_bridge_setlink = br_setlink,
2820 .ndo_bridge_dellink = br_dellink,
2821 .ndo_features_check = passthru_features_check,
2822+ .ndo_fill_forward_path = br_fill_forward_path,
2823 };
2824
2825 static struct device_type br_type = {
2826diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
2827index a736be8a1..4bd9e9b57 100644
2828--- a/net/bridge/br_private.h
2829+++ b/net/bridge/br_private.h
2830@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
2831 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
2832 void *ptr);
2833
2834+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2835+ struct net_device_path_ctx *ctx,
2836+ struct net_device_path *path);
2837+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2838+ struct net_bridge_port *dst,
2839+ struct net_device_path *path);
2840+
2841 static inline struct net_bridge_vlan_group *br_vlan_group(
2842 const struct net_bridge *br)
2843 {
2844@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
2845 return 0;
2846 }
2847
2848+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2849+ struct net_device_path_ctx *ctx,
2850+ struct net_device_path *path)
2851+{
2852+}
2853+
2854+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2855+ struct net_bridge_port *dst,
2856+ struct net_device_path *path)
2857+{
2858+ return 0;
2859+}
2860+
2861 static inline struct net_bridge_vlan_group *br_vlan_group(
2862 const struct net_bridge *br)
2863 {
2864diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
2865index 9257292bd..bcfd16924 100644
2866--- a/net/bridge/br_vlan.c
2867+++ b/net/bridge/br_vlan.c
2868@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
2869 }
2870 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
2871
2872+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2873+ struct net_device_path_ctx *ctx,
2874+ struct net_device_path *path)
2875+{
2876+ struct net_bridge_vlan_group *vg;
2877+ int idx = ctx->num_vlans - 1;
2878+ u16 vid;
2879+
2880+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2881+
2882+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2883+ return;
2884+
2885+ vg = br_vlan_group(br);
2886+
2887+ if (idx >= 0 &&
2888+ ctx->vlan[idx].proto == br->vlan_proto) {
2889+ vid = ctx->vlan[idx].id;
2890+ } else {
2891+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
2892+ vid = br_get_pvid(vg);
2893+ }
2894+
2895+ path->bridge.vlan_id = vid;
2896+ path->bridge.vlan_proto = br->vlan_proto;
2897+}
2898+
2899+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2900+ struct net_bridge_port *dst,
2901+ struct net_device_path *path)
2902+{
2903+ struct net_bridge_vlan_group *vg;
2904+ struct net_bridge_vlan *v;
2905+
2906+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2907+ return 0;
2908+
2909+ vg = nbp_vlan_group_rcu(dst);
2910+ v = br_vlan_find(vg, path->bridge.vlan_id);
2911+ if (!v || !br_vlan_should_use(v))
2912+ return -EINVAL;
2913+
2914+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
2915+ return 0;
2916+
2917+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
2918+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2919+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
2920+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
2921+ else
2922+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
2923+
2924+ return 0;
2925+}
2926+
2927 int br_vlan_get_info(const struct net_device *dev, u16 vid,
2928 struct bridge_vlan_info *p_vinfo)
2929 {
2930diff --git a/net/core/dev.c b/net/core/dev.c
2931index fe2c856b9..4f0edb218 100644
2932--- a/net/core/dev.c
2933+++ b/net/core/dev.c
2934@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2935 }
2936 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
2937
2938+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
2939+{
2940+ int k = stack->num_paths++;
2941+
2942+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
2943+ return NULL;
2944+
2945+ return &stack->path[k];
2946+}
2947+
2948+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2949+ struct net_device_path_stack *stack)
2950+{
2951+ const struct net_device *last_dev;
2952+ struct net_device_path_ctx ctx = {
2953+ .dev = dev,
2954+ };
2955+ struct net_device_path *path;
2956+ int ret = 0;
2957+
2958+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
2959+ stack->num_paths = 0;
2960+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
2961+ last_dev = ctx.dev;
2962+ path = dev_fwd_path(stack);
2963+ if (!path)
2964+ return -1;
2965+
2966+ memset(path, 0, sizeof(struct net_device_path));
2967+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
2968+ if (ret < 0)
2969+ return -1;
2970+
2971+ if (WARN_ON_ONCE(last_dev == ctx.dev))
2972+ return -1;
2973+ }
2974+ path = dev_fwd_path(stack);
2975+ if (!path)
2976+ return -1;
2977+ path->type = DEV_PATH_ETHERNET;
2978+ path->dev = ctx.dev;
2979+
2980+ return ret;
2981+}
2982+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
2983+
2984 /**
2985 * __dev_get_by_name - find a device by its name
2986 * @net: the applicable net namespace
2987diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
2988index ca80f8699..35a1249a9 100644
2989--- a/net/dsa/dsa.c
2990+++ b/net/dsa/dsa.c
2991@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2992 }
2993 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
2994
2995+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
2996+{
2997+ if (!netdev || !dsa_slave_dev_check(netdev))
2998+ return ERR_PTR(-ENODEV);
2999+
3000+ return dsa_slave_to_port(netdev);
3001+}
3002+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
3003+
3004 static int __init dsa_init_module(void)
3005 {
3006 int rc;
3007diff --git a/net/dsa/slave.c b/net/dsa/slave.c
3008index 036fda317..2dfaa1eac 100644
3009--- a/net/dsa/slave.c
3010+++ b/net/dsa/slave.c
3011@@ -22,8 +22,6 @@
3012
3013 #include "dsa_priv.h"
3014
3015-static bool dsa_slave_dev_check(const struct net_device *dev);
3016-
3017 /* slave mii_bus handling ***************************************************/
3018 static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
3019 {
3020@@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
3021 }
3022 }
3023
3024+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
3025+ void *type_data)
3026+{
3027+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
3028+ struct net_device *master = cpu_dp->master;
3029+
3030+ if (!master->netdev_ops->ndo_setup_tc)
3031+ return -EOPNOTSUPP;
3032+
3033+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
3034+}
3035+
3036 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
3037 void *type_data)
3038 {
3039 struct dsa_port *dp = dsa_slave_to_port(dev);
3040 struct dsa_switch *ds = dp->ds;
3041
3042- if (type == TC_SETUP_BLOCK)
3043+ switch (type) {
3044+ case TC_SETUP_BLOCK:
3045 return dsa_slave_setup_tc_block(dev, type_data);
3046+ case TC_SETUP_FT:
3047+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
3048+ default:
3049+ break;
3050+ }
3051
3052 if (!ds->ops->port_setup_tc)
3053 return -EOPNOTSUPP;
3054@@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
3055 return dp->ds->devlink ? &dp->devlink_port : NULL;
3056 }
3057
3058+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
3059+ struct net_device_path *path)
3060+{
3061+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
3062+ struct dsa_port *cpu_dp = dp->cpu_dp;
3063+
3064+ path->dev = ctx->dev;
3065+ path->type = DEV_PATH_DSA;
3066+ path->dsa.proto = cpu_dp->tag_ops->proto;
3067+ path->dsa.port = dp->index;
3068+ ctx->dev = cpu_dp->master;
3069+
3070+ return 0;
3071+}
3072+
3073 static const struct net_device_ops dsa_slave_netdev_ops = {
3074 .ndo_open = dsa_slave_open,
3075 .ndo_stop = dsa_slave_close,
3076@@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
3077 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
3078 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
3079 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
3080+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
3081 };
3082
3083 static struct device_type dsa_type = {
3084@@ -1497,10 +1529,11 @@ void dsa_slave_destroy(struct net_device *slave_dev)
3085 free_netdev(slave_dev);
3086 }
3087
3088-static bool dsa_slave_dev_check(const struct net_device *dev)
3089+bool dsa_slave_dev_check(const struct net_device *dev)
3090 {
3091 return dev->netdev_ops == &dsa_slave_netdev_ops;
3092 }
3093+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
3094
3095 static int dsa_slave_changeupper(struct net_device *dev,
3096 struct netdev_notifier_changeupper_info *info)
3097diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
3098index f17b40211..803b92e4c 100644
3099--- a/net/ipv4/netfilter/Kconfig
3100+++ b/net/ipv4/netfilter/Kconfig
3101@@ -56,8 +56,6 @@ config NF_TABLES_ARP
3102 help
3103 This option enables the ARP support for nf_tables.
3104
3105-endif # NF_TABLES
3106-
3107 config NF_FLOW_TABLE_IPV4
3108 tristate "Netfilter flow table IPv4 module"
3109 depends on NF_FLOW_TABLE
3110@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
3111
3112 To compile it as a module, choose M here.
3113
3114+endif # NF_TABLES
3115+
3116 config NF_DUP_IPV4
3117 tristate "Netfilter IPv4 packet duplication to alternate destination"
3118 depends on !NF_CONNTRACK || NF_CONNTRACK
3119diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
3120index 5585e3a94..bb76f6061 100644
3121--- a/net/ipv6/ip6_output.c
3122+++ b/net/ipv6/ip6_output.c
3123@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
3124 }
3125 }
3126
3127- mtu = ip6_dst_mtu_forward(dst);
3128+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
3129 if (mtu < IPV6_MIN_MTU)
3130 mtu = IPV6_MIN_MTU;
3131
3132diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
3133index 69443e9a3..0b481d236 100644
3134--- a/net/ipv6/netfilter/Kconfig
3135+++ b/net/ipv6/netfilter/Kconfig
3136@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
3137 multicast or blackhole.
3138
3139 endif # NF_TABLES_IPV6
3140-endif # NF_TABLES
3141
3142 config NF_FLOW_TABLE_IPV6
3143 tristate "Netfilter flow table IPv6 module"
3144@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
3145
3146 To compile it as a module, choose M here.
3147
3148+endif # NF_TABLES
3149+
3150 config NF_DUP_IPV6
3151 tristate "Netfilter IPv6 packet duplication to alternate destination"
3152 depends on !NF_CONNTRACK || NF_CONNTRACK
3153diff --git a/net/ipv6/route.c b/net/ipv6/route.c
3154index 98aaf0b79..2b357ac71 100644
3155--- a/net/ipv6/route.c
3156+++ b/net/ipv6/route.c
3157@@ -83,7 +83,7 @@ enum rt6_nud_state {
3158
3159 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
3160 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
3161-static unsigned int ip6_mtu(const struct dst_entry *dst);
3162+static unsigned int ip6_mtu(const struct dst_entry *dst);
3163 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
3164 static void ip6_dst_destroy(struct dst_entry *);
3165 static void ip6_dst_ifdown(struct dst_entry *,
3166@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3167
3168 static unsigned int ip6_mtu(const struct dst_entry *dst)
3169 {
3170- struct inet6_dev *idev;
3171- unsigned int mtu;
3172-
3173- mtu = dst_metric_raw(dst, RTAX_MTU);
3174- if (mtu)
3175- goto out;
3176-
3177- mtu = IPV6_MIN_MTU;
3178-
3179- rcu_read_lock();
3180- idev = __in6_dev_get(dst->dev);
3181- if (idev)
3182- mtu = idev->cnf.mtu6;
3183- rcu_read_unlock();
3184-
3185-out:
3186- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3187-
3188- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3189+ return ip6_dst_mtu_maybe_forward(dst, false);
3190 }
3191
3192 /* MTU selection:
3193diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
3194index b967763f5..c040e713a 100644
3195--- a/net/netfilter/Kconfig
3196+++ b/net/netfilter/Kconfig
3197@@ -690,8 +690,6 @@ config NFT_FIB_NETDEV
3198
3199 endif # NF_TABLES_NETDEV
3200
3201-endif # NF_TABLES
3202-
3203 config NF_FLOW_TABLE_INET
3204 tristate "Netfilter flow table mixed IPv4/IPv6 module"
3205 depends on NF_FLOW_TABLE
3206@@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET
3207
3208 To compile it as a module, choose M here.
3209
3210+endif # NF_TABLES
3211+
3212 config NF_FLOW_TABLE
3213 tristate "Netfilter flow table module"
3214 depends on NETFILTER_INGRESS
3215 depends on NF_CONNTRACK
3216- depends on NF_TABLES
3217 help
3218 This option adds the flow table core infrastructure.
3219
3220@@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK
3221 depends on NETFILTER_ADVANCED
3222 select NETFILTER_XT_TARGET_CT
3223
3224+config NETFILTER_XT_TARGET_FLOWOFFLOAD
3225+ tristate '"FLOWOFFLOAD" target support'
3226+ depends on NF_FLOW_TABLE
3227+ depends on NETFILTER_INGRESS
3228+ help
3229+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
3230+ module to speed up processing of packets by bypassing the usual
3231+ netfilter chains
3232+
3233 config NETFILTER_XT_TARGET_RATEEST
3234 tristate '"RATEEST" target support'
3235 depends on NETFILTER_ADVANCED
3236diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
3237index 4fc075b61..d93a121bc 100644
3238--- a/net/netfilter/Makefile
3239+++ b/net/netfilter/Makefile
3240@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
3241
3242 # flow table infrastructure
3243 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
3244-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
3245+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
3246+ nf_flow_table_offload.o
3247
3248 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
3249
3250@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
3251 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
3252 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
3253 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
3254+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
3255 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
3256 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
3257 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
3258diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
3259index f6ab6f484..f689e19d8 100644
3260--- a/net/netfilter/nf_conntrack_core.c
3261+++ b/net/netfilter/nf_conntrack_core.c
3262@@ -864,9 +864,8 @@ out:
3263 }
3264 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
3265
3266-static inline void nf_ct_acct_update(struct nf_conn *ct,
3267- enum ip_conntrack_info ctinfo,
3268- unsigned int len)
3269+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3270+ unsigned int bytes)
3271 {
3272 struct nf_conn_acct *acct;
3273
3274@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
3275 if (acct) {
3276 struct nf_conn_counter *counter = acct->counter;
3277
3278- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
3279- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
3280+ atomic64_add(packets, &counter[dir].packets);
3281+ atomic64_add(bytes, &counter[dir].bytes);
3282 }
3283 }
3284+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
3285
3286 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3287 const struct nf_conn *loser_ct)
3288@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3289
3290 /* u32 should be fine since we must have seen one packet. */
3291 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
3292- nf_ct_acct_update(ct, ctinfo, bytes);
3293+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
3294 }
3295 }
3296
3297@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
3298
3299 tmp = nf_ct_tuplehash_to_ctrack(h);
3300
3301- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
3302+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
3303+ nf_ct_offload_timeout(tmp);
3304 continue;
3305+ }
3306
3307 if (nf_ct_is_expired(tmp)) {
3308 nf_ct_gc_expired(tmp);
3309@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
3310 WRITE_ONCE(ct->timeout, extra_jiffies);
3311 acct:
3312 if (do_acct)
3313- nf_ct_acct_update(ct, ctinfo, skb->len);
3314+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3315 }
3316 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
3317
3318@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
3319 enum ip_conntrack_info ctinfo,
3320 const struct sk_buff *skb)
3321 {
3322- nf_ct_acct_update(ct, ctinfo, skb->len);
3323+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3324
3325 return nf_ct_delete(ct, 0, 0);
3326 }
3327diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
3328index 7204f0366..3742bae21 100644
3329--- a/net/netfilter/nf_conntrack_proto_tcp.c
3330+++ b/net/netfilter/nf_conntrack_proto_tcp.c
3331@@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
3332 tn->tcp_loose = nf_ct_tcp_loose;
3333 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
3334 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
3335+
3336+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3337+ tn->offload_timeout = 30 * HZ;
3338+#endif
3339 }
3340
3341 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
3342diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
3343index e3a2d018f..a1579d6c3 100644
3344--- a/net/netfilter/nf_conntrack_proto_udp.c
3345+++ b/net/netfilter/nf_conntrack_proto_udp.c
3346@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
3347
3348 for (i = 0; i < UDP_CT_MAX; i++)
3349 un->timeouts[i] = udp_timeouts[i];
3350+
3351+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3352+ un->offload_timeout = 30 * HZ;
3353+#endif
3354 }
3355
3356 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
3357diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
3358index 9c6259c28..10d9f93ce 100644
3359--- a/net/netfilter/nf_conntrack_standalone.c
3360+++ b/net/netfilter/nf_conntrack_standalone.c
3361@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
3362 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
3363 goto release;
3364
3365- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3366+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
3367+ seq_puts(s, "[HW_OFFLOAD] ");
3368+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3369 seq_puts(s, "[OFFLOAD] ");
3370 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
3371 seq_puts(s, "[ASSURED] ");
3372@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
3373 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
3374 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
3375 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
3376+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3377+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
3378+#endif
3379 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
3380 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
3381 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
3382 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
3383 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
3384+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3385+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
3386+#endif
3387 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
3388 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
3389 #ifdef CONFIG_NF_CT_PROTO_SCTP
3390@@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
3391 .mode = 0644,
3392 .proc_handler = proc_dointvec_jiffies,
3393 },
3394+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3395+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
3396+ .procname = "nf_flowtable_tcp_timeout",
3397+ .maxlen = sizeof(unsigned int),
3398+ .mode = 0644,
3399+ .proc_handler = proc_dointvec_jiffies,
3400+ },
3401+#endif
3402 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
3403 .procname = "nf_conntrack_tcp_loose",
3404 .maxlen = sizeof(int),
3405@@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
3406 .mode = 0644,
3407 .proc_handler = proc_dointvec_jiffies,
3408 },
3409+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3410+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
3411+ .procname = "nf_flowtable_udp_timeout",
3412+ .maxlen = sizeof(unsigned int),
3413+ .mode = 0644,
3414+ .proc_handler = proc_dointvec_jiffies,
3415+ },
3416+#endif
3417 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
3418 .procname = "nf_conntrack_icmp_timeout",
3419 .maxlen = sizeof(unsigned int),
3420@@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
3421 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
3422 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
3423 #undef XASSIGN
3424+
3425+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3426+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
3427+#endif
3428+
3429 }
3430
3431 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
3432@@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
3433 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
3434 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
3435 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
3436+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3437+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
3438+#endif
3439
3440 nf_conntrack_standalone_init_tcp_sysctl(net, table);
3441 nf_conntrack_standalone_init_sctp_sysctl(net, table);
3442diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003443index f212cec0..10365581 100644
developer8cb3ac72022-07-04 10:55:14 +08003444--- a/net/netfilter/nf_flow_table_core.c
3445+++ b/net/netfilter/nf_flow_table_core.c
developerb7c46752022-07-04 19:51:38 +08003446@@ -7,43 +7,21 @@
developer8cb3ac72022-07-04 10:55:14 +08003447 #include <linux/netdevice.h>
3448 #include <net/ip.h>
3449 #include <net/ip6_route.h>
3450-#include <net/netfilter/nf_tables.h>
3451 #include <net/netfilter/nf_flow_table.h>
3452 #include <net/netfilter/nf_conntrack.h>
3453 #include <net/netfilter/nf_conntrack_core.h>
3454 #include <net/netfilter/nf_conntrack_l4proto.h>
3455 #include <net/netfilter/nf_conntrack_tuple.h>
3456
3457-struct flow_offload_entry {
3458- struct flow_offload flow;
3459- struct nf_conn *ct;
3460- struct rcu_head rcu_head;
3461-};
3462-
3463 static DEFINE_MUTEX(flowtable_lock);
3464 static LIST_HEAD(flowtables);
3465
developerb7c46752022-07-04 19:51:38 +08003466-static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3467-{
3468- const struct rt6_info *rt;
3469-
3470- if (flow_tuple->l3proto == NFPROTO_IPV6) {
3471- rt = (const struct rt6_info *)flow_tuple->dst_cache;
3472- return rt6_get_cookie(rt);
3473- }
3474-
3475- return 0;
3476-}
3477-
developer8cb3ac72022-07-04 10:55:14 +08003478 static void
3479-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3480- struct nf_flow_route *route,
3481+flow_offload_fill_dir(struct flow_offload *flow,
3482 enum flow_offload_tuple_dir dir)
3483 {
3484 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
3485- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
3486- struct dst_entry *other_dst = route->tuple[!dir].dst;
3487- struct dst_entry *dst = route->tuple[dir].dst;
3488+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
3489
3490 ft->dir = dir;
3491
developerb7c46752022-07-04 19:51:38 +08003492@@ -51,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003493 case NFPROTO_IPV4:
3494 ft->src_v4 = ctt->src.u3.in;
3495 ft->dst_v4 = ctt->dst.u3.in;
3496- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
3497 break;
3498 case NFPROTO_IPV6:
3499 ft->src_v6 = ctt->src.u3.in6;
3500 ft->dst_v6 = ctt->dst.u3.in6;
3501- ft->mtu = ip6_dst_mtu_forward(dst);
3502 break;
3503 }
3504
developerb7c46752022-07-04 19:51:38 +08003505@@ -64,50 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
developer8cb3ac72022-07-04 10:55:14 +08003506 ft->l4proto = ctt->dst.protonum;
3507 ft->src_port = ctt->src.u.tcp.port;
3508 ft->dst_port = ctt->dst.u.tcp.port;
3509-
3510- ft->iifidx = other_dst->dev->ifindex;
3511- ft->dst_cache = dst;
developerb7c46752022-07-04 19:51:38 +08003512- ft->dst_cookie = flow_offload_dst_cookie(ft);
developer8cb3ac72022-07-04 10:55:14 +08003513 }
3514
3515-struct flow_offload *
3516-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
3517+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
3518 {
3519- struct flow_offload_entry *entry;
3520 struct flow_offload *flow;
3521
3522 if (unlikely(nf_ct_is_dying(ct) ||
3523 !atomic_inc_not_zero(&ct->ct_general.use)))
3524 return NULL;
3525
3526- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
3527- if (!entry)
3528+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
3529+ if (!flow)
3530 goto err_ct_refcnt;
3531
3532- flow = &entry->flow;
developerb7c46752022-07-04 19:51:38 +08003533-
developer8cb3ac72022-07-04 10:55:14 +08003534- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
3535- goto err_dst_cache_original;
3536-
3537- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
3538- goto err_dst_cache_reply;
developerb7c46752022-07-04 19:51:38 +08003539+ flow->ct = ct;
3540
developer8cb3ac72022-07-04 10:55:14 +08003541- entry->ct = ct;
3542-
3543- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3544- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
3545+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3546+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
3547
3548 if (ct->status & IPS_SRC_NAT)
3549- flow->flags |= FLOW_OFFLOAD_SNAT;
3550+ __set_bit(NF_FLOW_SNAT, &flow->flags);
3551 if (ct->status & IPS_DST_NAT)
3552- flow->flags |= FLOW_OFFLOAD_DNAT;
3553+ __set_bit(NF_FLOW_DNAT, &flow->flags);
3554
3555 return flow;
3556
3557-err_dst_cache_reply:
3558- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
3559-err_dst_cache_original:
3560- kfree(entry);
3561 err_ct_refcnt:
3562 nf_ct_put(ct);
3563
developerb7c46752022-07-04 19:51:38 +08003564@@ -115,40 +73,135 @@ flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
developer8cb3ac72022-07-04 10:55:14 +08003565 }
3566 EXPORT_SYMBOL_GPL(flow_offload_alloc);
3567
3568-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3569+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3570 {
3571- tcp->state = TCP_CONNTRACK_ESTABLISHED;
3572- tcp->seen[0].td_maxwin = 0;
3573- tcp->seen[1].td_maxwin = 0;
3574+ const struct rt6_info *rt;
3575+
3576+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
3577+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
3578+ return rt6_get_cookie(rt);
3579+ }
3580+
3581+ return 0;
3582 }
3583
3584-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
3585-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
3586+static int flow_offload_fill_route(struct flow_offload *flow,
3587+ const struct nf_flow_route *route,
3588+ enum flow_offload_tuple_dir dir)
3589+{
3590+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
3591+ struct dst_entry *dst = route->tuple[dir].dst;
3592+ int i, j = 0;
3593+
3594+ switch (flow_tuple->l3proto) {
3595+ case NFPROTO_IPV4:
3596+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
3597+ break;
3598+ case NFPROTO_IPV6:
3599+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
3600+ break;
3601+ }
3602+
3603+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
3604+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
3605+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
3606+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
3607+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
3608+ flow_tuple->in_vlan_ingress |= BIT(j);
3609+ j++;
3610+ }
3611+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
3612+
3613+ switch (route->tuple[dir].xmit_type) {
3614+ case FLOW_OFFLOAD_XMIT_DIRECT:
3615+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
3616+ ETH_ALEN);
3617+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
3618+ ETH_ALEN);
3619+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
3620+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
3621+ break;
3622+ case FLOW_OFFLOAD_XMIT_XFRM:
3623+ case FLOW_OFFLOAD_XMIT_NEIGH:
3624+ if (!dst_hold_safe(route->tuple[dir].dst))
3625+ return -1;
3626+
3627+ flow_tuple->dst_cache = dst;
3628+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
3629+ break;
3630+ default:
3631+ WARN_ON_ONCE(1);
3632+ break;
3633+ }
3634+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
developerb7c46752022-07-04 19:51:38 +08003635+
developer8cb3ac72022-07-04 10:55:14 +08003636+ return 0;
3637+}
3638+
3639+static void nft_flow_dst_release(struct flow_offload *flow,
3640+ enum flow_offload_tuple_dir dir)
3641+{
3642+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3643+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
3644+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
3645+}
3646+
3647+int flow_offload_route_init(struct flow_offload *flow,
3648+ const struct nf_flow_route *route)
3649+{
3650+ int err;
3651+
3652+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3653+ if (err < 0)
3654+ return err;
3655+
3656+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
3657+ if (err < 0)
3658+ goto err_route_reply;
3659+
3660+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
3661+
3662+ return 0;
3663+
3664+err_route_reply:
3665+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3666+
3667+ return err;
3668+}
3669+EXPORT_SYMBOL_GPL(flow_offload_route_init);
developerb7c46752022-07-04 19:51:38 +08003670
3671-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
developer8cb3ac72022-07-04 10:55:14 +08003672+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3673 {
3674- return (__s32)(timeout - (u32)jiffies);
3675+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
3676+ tcp->seen[0].td_maxwin = 0;
3677+ tcp->seen[1].td_maxwin = 0;
3678 }
3679
3680 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
3681 {
3682- const struct nf_conntrack_l4proto *l4proto;
3683+ struct net *net = nf_ct_net(ct);
3684 int l4num = nf_ct_protonum(ct);
3685- unsigned int timeout;
3686+ s32 timeout;
3687
3688- l4proto = nf_ct_l4proto_find(l4num);
3689- if (!l4proto)
3690- return;
3691+ if (l4num == IPPROTO_TCP) {
3692+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
3693
3694- if (l4num == IPPROTO_TCP)
3695- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
3696- else if (l4num == IPPROTO_UDP)
3697- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
3698- else
3699+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
3700+ timeout -= tn->offload_timeout;
3701+ } else if (l4num == IPPROTO_UDP) {
3702+ struct nf_udp_net *tn = nf_udp_pernet(net);
3703+
3704+ timeout = tn->timeouts[UDP_CT_REPLIED];
3705+ timeout -= tn->offload_timeout;
3706+ } else {
3707 return;
3708+ }
3709+
3710+ if (timeout < 0)
3711+ timeout = 0;
3712
3713- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
3714- ct->timeout = nfct_time_stamp + timeout;
3715+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
3716+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
3717 }
3718
3719 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
developerb7c46752022-07-04 19:51:38 +08003720@@ -163,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
developer8cb3ac72022-07-04 10:55:14 +08003721 flow_offload_fixup_ct_timeout(ct);
3722 }
3723
3724-void flow_offload_free(struct flow_offload *flow)
3725+static void flow_offload_route_release(struct flow_offload *flow)
3726 {
3727- struct flow_offload_entry *e;
3728+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3729+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
3730+}
3731
3732- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
3733- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
3734- e = container_of(flow, struct flow_offload_entry, flow);
3735- if (flow->flags & FLOW_OFFLOAD_DYING)
3736- nf_ct_delete(e->ct, 0, 0);
3737- nf_ct_put(e->ct);
3738- kfree_rcu(e, rcu_head);
3739+void flow_offload_free(struct flow_offload *flow)
3740+{
3741+ switch (flow->type) {
3742+ case NF_FLOW_OFFLOAD_ROUTE:
3743+ flow_offload_route_release(flow);
3744+ break;
3745+ default:
3746+ break;
3747+ }
3748+ nf_ct_put(flow->ct);
3749+ kfree_rcu(flow, rcu_head);
3750 }
3751 EXPORT_SYMBOL_GPL(flow_offload_free);
3752
developerb7c46752022-07-04 19:51:38 +08003753@@ -181,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
developer8cb3ac72022-07-04 10:55:14 +08003754 {
3755 const struct flow_offload_tuple *tuple = data;
3756
3757- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
3758+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3759 }
3760
3761 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
3762 {
3763 const struct flow_offload_tuple_rhash *tuplehash = data;
3764
3765- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
3766+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3767 }
3768
3769 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developerb7c46752022-07-04 19:51:38 +08003770@@ -197,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
developer8cb3ac72022-07-04 10:55:14 +08003771 const struct flow_offload_tuple *tuple = arg->key;
3772 const struct flow_offload_tuple_rhash *x = ptr;
3773
3774- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
3775+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
3776 return 1;
3777
3778 return 0;
developerb7c46752022-07-04 19:51:38 +08003779@@ -211,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
developer8cb3ac72022-07-04 10:55:14 +08003780 .automatic_shrinking = true,
3781 };
3782
3783-#define DAY (86400 * HZ)
3784-
3785-/* Set an arbitrary timeout large enough not to ever expire, this save
3786- * us a check for the IPS_OFFLOAD_BIT from the packet path via
3787- * nf_ct_is_expired().
3788- */
3789-static void nf_ct_offload_timeout(struct flow_offload *flow)
3790+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
3791 {
3792- struct flow_offload_entry *entry;
3793- struct nf_conn *ct;
3794+ unsigned long timeout = NF_FLOW_TIMEOUT;
3795+ struct net *net = nf_ct_net(flow->ct);
3796+ int l4num = nf_ct_protonum(flow->ct);
developer8cb3ac72022-07-04 10:55:14 +08003797
3798- entry = container_of(flow, struct flow_offload_entry, flow);
3799- ct = entry->ct;
developerb7c46752022-07-04 19:51:38 +08003800+ if (l4num == IPPROTO_TCP) {
3801+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
3802
3803- if (nf_ct_expires(ct) < DAY / 2)
3804- ct->timeout = nfct_time_stamp + DAY;
developer8cb3ac72022-07-04 10:55:14 +08003805+ timeout = tn->offload_timeout;
3806+ } else if (l4num == IPPROTO_UDP) {
3807+ struct nf_udp_net *tn = nf_udp_pernet(net);
3808+
3809+ timeout = tn->offload_timeout;
3810+ }
developerb7c46752022-07-04 19:51:38 +08003811+
developer8cb3ac72022-07-04 10:55:14 +08003812+ return timeout;
3813 }
3814
3815 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3816 {
3817 int err;
3818
3819- nf_ct_offload_timeout(flow);
3820- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
3821+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3822
3823 err = rhashtable_insert_fast(&flow_table->rhashtable,
3824 &flow->tuplehash[0].node,
developerb7c46752022-07-04 19:51:38 +08003825@@ -252,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003826 return err;
3827 }
3828
3829+ nf_ct_offload_timeout(flow->ct);
3830+
3831+ if (nf_flowtable_hw_offload(flow_table)) {
3832+ __set_bit(NF_FLOW_HW, &flow->flags);
3833+ nf_flow_offload_add(flow_table, flow);
3834+ }
3835+
3836 return 0;
3837 }
3838 EXPORT_SYMBOL_GPL(flow_offload_add);
3839
3840+void flow_offload_refresh(struct nf_flowtable *flow_table,
3841+ struct flow_offload *flow)
3842+{
3843+ u32 timeout;
3844+
3845+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3846+ if (timeout - READ_ONCE(flow->timeout) > HZ)
3847+ WRITE_ONCE(flow->timeout, timeout);
3848+ else
3849+ return;
3850+
3851+ if (likely(!nf_flowtable_hw_offload(flow_table)))
3852+ return;
3853+
3854+ nf_flow_offload_add(flow_table, flow);
3855+}
3856+EXPORT_SYMBOL_GPL(flow_offload_refresh);
3857+
3858 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3859 {
3860 return nf_flow_timeout_delta(flow->timeout) <= 0;
developerb7c46752022-07-04 19:51:38 +08003861@@ -264,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
developer8cb3ac72022-07-04 10:55:14 +08003862 static void flow_offload_del(struct nf_flowtable *flow_table,
3863 struct flow_offload *flow)
3864 {
3865- struct flow_offload_entry *e;
3866-
3867 rhashtable_remove_fast(&flow_table->rhashtable,
3868 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
3869 nf_flow_offload_rhash_params);
developerb7c46752022-07-04 19:51:38 +08003870@@ -273,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003871 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
3872 nf_flow_offload_rhash_params);
3873
3874- e = container_of(flow, struct flow_offload_entry, flow);
3875- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
3876+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
3877
3878 if (nf_flow_has_expired(flow))
3879- flow_offload_fixup_ct(e->ct);
3880- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
3881- flow_offload_fixup_ct_timeout(e->ct);
3882-
3883- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
3884- flow_offload_fixup_ct_state(e->ct);
3885+ flow_offload_fixup_ct(flow->ct);
3886+ else
3887+ flow_offload_fixup_ct_timeout(flow->ct);
3888
3889 flow_offload_free(flow);
3890 }
3891
3892 void flow_offload_teardown(struct flow_offload *flow)
3893 {
3894- struct flow_offload_entry *e;
developerb7c46752022-07-04 19:51:38 +08003895-
3896- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
developer8cb3ac72022-07-04 10:55:14 +08003897+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3898
developer8cb3ac72022-07-04 10:55:14 +08003899- e = container_of(flow, struct flow_offload_entry, flow);
3900- flow_offload_fixup_ct_state(e->ct);
3901+ flow_offload_fixup_ct_state(flow->ct);
3902 }
3903 EXPORT_SYMBOL_GPL(flow_offload_teardown);
3904
developerb7c46752022-07-04 19:51:38 +08003905@@ -304,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003906 {
3907 struct flow_offload_tuple_rhash *tuplehash;
3908 struct flow_offload *flow;
3909- struct flow_offload_entry *e;
3910 int dir;
3911
3912 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
developerb7c46752022-07-04 19:51:38 +08003913@@ -314,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003914
3915 dir = tuplehash->tuple.dir;
3916 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
3917- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
3918+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
3919 return NULL;
3920
3921- e = container_of(flow, struct flow_offload_entry, flow);
3922- if (unlikely(nf_ct_is_dying(e->ct)))
3923+ if (unlikely(nf_ct_is_dying(flow->ct)))
3924 return NULL;
3925
3926 return tuplehash;
3927 }
3928 EXPORT_SYMBOL_GPL(flow_offload_lookup);
3929
3930-static int
3931-nf_flow_table_iterate(struct nf_flowtable *flow_table,
3932+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3933 void (*iter)(struct flow_offload *flow, void *data),
3934 void *data)
3935 {
developerb7c46752022-07-04 19:51:38 +08003936@@ -339,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003937 rhashtable_walk_start(&hti);
3938
3939 while ((tuplehash = rhashtable_walk_next(&hti))) {
3940-
3941 if (IS_ERR(tuplehash)) {
3942 if (PTR_ERR(tuplehash) != -EAGAIN) {
3943 err = PTR_ERR(tuplehash);
developerb7c46752022-07-04 19:51:38 +08003944@@ -359,23 +430,49 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
developer8cb3ac72022-07-04 10:55:14 +08003945
3946 return err;
3947 }
3948+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
3949
3950-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3951+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
3952 {
3953- struct nf_flowtable *flow_table = data;
3954- struct flow_offload_entry *e;
3955- bool teardown;
3956+ struct dst_entry *dst;
3957
3958- e = container_of(flow, struct flow_offload_entry, flow);
3959+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3960+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
3961+ dst = tuple->dst_cache;
3962+ if (!dst_check(dst, tuple->dst_cookie))
3963+ return true;
3964+ }
3965
3966- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
3967- FLOW_OFFLOAD_TEARDOWN);
3968+ return false;
3969+}
3970
3971- if (!teardown)
3972- nf_ct_offload_timeout(flow);
3973+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
3974+{
3975+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
3976+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
3977+}
3978
3979- if (nf_flow_has_expired(flow) || teardown)
3980- flow_offload_del(flow_table, flow);
3981+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3982+{
3983+ struct nf_flowtable *flow_table = data;
3984+
3985+ if (nf_flow_has_expired(flow) ||
3986+ nf_ct_is_dying(flow->ct) ||
3987+ nf_flow_has_stale_dst(flow))
3988+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3989+
3990+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
3991+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
3992+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
3993+ nf_flow_offload_del(flow_table, flow);
3994+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
3995+ flow_offload_del(flow_table, flow);
3996+ } else {
3997+ flow_offload_del(flow_table, flow);
3998+ }
3999+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
4000+ nf_flow_offload_stats(flow_table, flow);
4001+ }
4002 }
4003
4004 static void nf_flow_offload_work_gc(struct work_struct *work)
developerb7c46752022-07-04 19:51:38 +08004005@@ -387,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
developer8cb3ac72022-07-04 10:55:14 +08004006 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
4007 }
4008
4009-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
4010- __be16 port, __be16 new_port)
4011+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
4012+ __be16 port, __be16 new_port)
4013 {
4014 struct tcphdr *tcph;
4015
4016- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4017- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4018- return -1;
4019-
4020 tcph = (void *)(skb_network_header(skb) + thoff);
4021 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
4022-
4023- return 0;
4024 }
4025
4026-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
4027- __be16 port, __be16 new_port)
4028+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
4029+ __be16 port, __be16 new_port)
4030 {
4031 struct udphdr *udph;
4032
4033- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4034- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4035- return -1;
4036-
4037 udph = (void *)(skb_network_header(skb) + thoff);
4038 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4039 inet_proto_csum_replace2(&udph->check, skb, port,
developerb7c46752022-07-04 19:51:38 +08004040@@ -418,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004041 if (!udph->check)
4042 udph->check = CSUM_MANGLED_0;
4043 }
4044-
4045- return 0;
4046 }
4047
4048-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4049- u8 protocol, __be16 port, __be16 new_port)
4050+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
4051+ u8 protocol, __be16 port, __be16 new_port)
4052 {
4053 switch (protocol) {
4054 case IPPROTO_TCP:
4055- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
4056- return NF_DROP;
4057+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
4058 break;
4059 case IPPROTO_UDP:
4060- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
4061- return NF_DROP;
4062+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
4063 break;
4064 }
4065-
4066- return 0;
4067 }
4068
4069-int nf_flow_snat_port(const struct flow_offload *flow,
4070- struct sk_buff *skb, unsigned int thoff,
4071- u8 protocol, enum flow_offload_tuple_dir dir)
4072+void nf_flow_snat_port(const struct flow_offload *flow,
4073+ struct sk_buff *skb, unsigned int thoff,
4074+ u8 protocol, enum flow_offload_tuple_dir dir)
4075 {
4076 struct flow_ports *hdr;
4077 __be16 port, new_port;
4078
4079- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4080- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4081- return -1;
4082-
4083 hdr = (void *)(skb_network_header(skb) + thoff);
4084
4085 switch (dir) {
developerb7c46752022-07-04 19:51:38 +08004086@@ -463,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004087 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
4088 hdr->dest = new_port;
4089 break;
4090- default:
4091- return -1;
4092 }
4093
4094- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4095+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4096 }
4097 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
4098
4099-int nf_flow_dnat_port(const struct flow_offload *flow,
4100- struct sk_buff *skb, unsigned int thoff,
4101- u8 protocol, enum flow_offload_tuple_dir dir)
4102+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
4103+ unsigned int thoff, u8 protocol,
4104+ enum flow_offload_tuple_dir dir)
4105 {
4106 struct flow_ports *hdr;
4107 __be16 port, new_port;
4108
4109- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4110- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4111- return -1;
4112-
4113 hdr = (void *)(skb_network_header(skb) + thoff);
4114
4115 switch (dir) {
developerb7c46752022-07-04 19:51:38 +08004116@@ -495,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004117 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
4118 hdr->source = new_port;
4119 break;
4120- default:
4121- return -1;
4122 }
4123
4124- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4125+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4126 }
4127 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
4128
developerb7c46752022-07-04 19:51:38 +08004129@@ -507,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
developer8cb3ac72022-07-04 10:55:14 +08004130 {
4131 int err;
4132
4133- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4134+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4135+ flow_block_init(&flowtable->flow_block);
4136+ init_rwsem(&flowtable->flow_block_lock);
4137
4138 err = rhashtable_init(&flowtable->rhashtable,
4139 &nf_flow_offload_rhash_params);
developerb7c46752022-07-04 19:51:38 +08004140@@ -528,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
developer8cb3ac72022-07-04 10:55:14 +08004141 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
4142 {
4143 struct net_device *dev = data;
4144- struct flow_offload_entry *e;
4145-
4146- e = container_of(flow, struct flow_offload_entry, flow);
4147
4148 if (!dev) {
4149 flow_offload_teardown(flow);
4150 return;
4151 }
4152- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
4153+
4154+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
4155 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
4156 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
4157- flow_offload_dead(flow);
4158+ flow_offload_teardown(flow);
4159 }
4160
4161-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
4162- struct net_device *dev)
4163+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
4164+ struct net_device *dev)
4165 {
4166 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
4167 flush_delayed_work(&flowtable->gc_work);
4168+ nf_flow_table_offload_flush(flowtable);
4169 }
4170
4171 void nf_flow_table_cleanup(struct net_device *dev)
developerb7c46752022-07-04 19:51:38 +08004172@@ -555,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
developer8cb3ac72022-07-04 10:55:14 +08004173
4174 mutex_lock(&flowtable_lock);
4175 list_for_each_entry(flowtable, &flowtables, list)
4176- nf_flow_table_iterate_cleanup(flowtable, dev);
4177+ nf_flow_table_gc_cleanup(flowtable, dev);
4178 mutex_unlock(&flowtable_lock);
4179 }
4180 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
developerb7c46752022-07-04 19:51:38 +08004181@@ -565,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
developer8cb3ac72022-07-04 10:55:14 +08004182 mutex_lock(&flowtable_lock);
4183 list_del(&flow_table->list);
4184 mutex_unlock(&flowtable_lock);
4185+
4186 cancel_delayed_work_sync(&flow_table->gc_work);
4187 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
4188 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
4189+ nf_flow_table_offload_flush(flow_table);
4190+ if (nf_flowtable_hw_offload(flow_table))
4191+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
4192+ flow_table);
4193 rhashtable_destroy(&flow_table->rhashtable);
4194 }
4195 EXPORT_SYMBOL_GPL(nf_flow_table_free);
developerb7c46752022-07-04 19:51:38 +08004196@@ -591,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
developer8cb3ac72022-07-04 10:55:14 +08004197
4198 static int __init nf_flow_table_module_init(void)
4199 {
4200- return register_netdevice_notifier(&flow_offload_netdev_notifier);
4201+ int ret;
4202+
4203+ ret = nf_flow_table_offload_init();
4204+ if (ret)
4205+ return ret;
4206+
4207+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
4208+ if (ret)
4209+ nf_flow_table_offload_exit();
4210+
4211+ return ret;
4212 }
4213
4214 static void __exit nf_flow_table_module_exit(void)
4215 {
4216 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
4217+ nf_flow_table_offload_exit();
4218 }
4219
4220 module_init(nf_flow_table_module_init);
developerb7c46752022-07-04 19:51:38 +08004221@@ -604,3 +690,4 @@ module_exit(nf_flow_table_module_exit);
developer8cb3ac72022-07-04 10:55:14 +08004222
4223 MODULE_LICENSE("GPL");
4224 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
4225+MODULE_DESCRIPTION("Netfilter flow table module");
4226diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
developerb7c46752022-07-04 19:51:38 +08004227index 397129b2..6257d87c 100644
developer8cb3ac72022-07-04 10:55:14 +08004228--- a/net/netfilter/nf_flow_table_ip.c
4229+++ b/net/netfilter/nf_flow_table_ip.c
4230@@ -7,11 +7,13 @@
4231 #include <linux/ip.h>
4232 #include <linux/ipv6.h>
4233 #include <linux/netdevice.h>
4234+#include <linux/if_ether.h>
4235 #include <net/ip.h>
4236 #include <net/ipv6.h>
4237 #include <net/ip6_route.h>
4238 #include <net/neighbour.h>
4239 #include <net/netfilter/nf_flow_table.h>
4240+#include <net/netfilter/nf_conntrack_acct.h>
4241 /* For layer 4 checksum field offset. */
4242 #include <linux/tcp.h>
4243 #include <linux/udp.h>
4244@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4245 if (proto != IPPROTO_TCP)
4246 return 0;
4247
4248- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
4249- return -1;
4250-
4251 tcph = (void *)(skb_network_header(skb) + thoff);
4252 if (unlikely(tcph->fin || tcph->rst)) {
4253 flow_offload_teardown(flow);
4254@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4255 return 0;
4256 }
4257
4258-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4259- __be32 addr, __be32 new_addr)
4260+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4261+ __be32 addr, __be32 new_addr)
4262 {
4263 struct tcphdr *tcph;
4264
4265- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4266- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4267- return -1;
4268-
4269 tcph = (void *)(skb_network_header(skb) + thoff);
4270 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
4271-
4272- return 0;
4273 }
4274
4275-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4276- __be32 addr, __be32 new_addr)
4277+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4278+ __be32 addr, __be32 new_addr)
4279 {
4280 struct udphdr *udph;
4281
4282- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4283- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4284- return -1;
4285-
4286 udph = (void *)(skb_network_header(skb) + thoff);
4287 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4288 inet_proto_csum_replace4(&udph->check, skb, addr,
4289@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4290 if (!udph->check)
4291 udph->check = CSUM_MANGLED_0;
4292 }
4293-
4294- return 0;
4295 }
4296
4297-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4298- unsigned int thoff, __be32 addr,
4299- __be32 new_addr)
4300+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4301+ unsigned int thoff, __be32 addr,
4302+ __be32 new_addr)
4303 {
4304 switch (iph->protocol) {
4305 case IPPROTO_TCP:
4306- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
4307- return NF_DROP;
4308+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
4309 break;
4310 case IPPROTO_UDP:
4311- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
4312- return NF_DROP;
4313+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
4314 break;
4315 }
4316-
4317- return 0;
4318 }
4319
4320-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4321- struct iphdr *iph, unsigned int thoff,
4322- enum flow_offload_tuple_dir dir)
4323+static void nf_flow_snat_ip(const struct flow_offload *flow,
4324+ struct sk_buff *skb, struct iphdr *iph,
4325+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4326 {
4327 __be32 addr, new_addr;
4328
4329@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4330 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
4331 iph->daddr = new_addr;
4332 break;
4333- default:
4334- return -1;
4335 }
4336 csum_replace4(&iph->check, addr, new_addr);
4337
4338- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4339+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4340 }
4341
4342-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4343- struct iphdr *iph, unsigned int thoff,
4344- enum flow_offload_tuple_dir dir)
4345+static void nf_flow_dnat_ip(const struct flow_offload *flow,
4346+ struct sk_buff *skb, struct iphdr *iph,
4347+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4348 {
4349 __be32 addr, new_addr;
4350
4351@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4352 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
4353 iph->saddr = new_addr;
4354 break;
4355- default:
4356- return -1;
4357 }
4358 csum_replace4(&iph->check, addr, new_addr);
4359
4360- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4361+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4362 }
4363
4364-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4365- unsigned int thoff, enum flow_offload_tuple_dir dir)
4366+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4367+ unsigned int thoff, enum flow_offload_tuple_dir dir,
4368+ struct iphdr *iph)
4369 {
4370- struct iphdr *iph = ip_hdr(skb);
4371-
4372- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4373- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4374- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
4375- return -1;
4376- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4377- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4378- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
4379- return -1;
4380-
4381- return 0;
4382+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4383+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
4384+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
4385+ }
4386+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4387+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
4388+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
4389+ }
4390 }
4391
4392 static bool ip_has_options(unsigned int thoff)
4393@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
4394 return thoff != sizeof(struct iphdr);
4395 }
4396
4397+static void nf_flow_tuple_encap(struct sk_buff *skb,
4398+ struct flow_offload_tuple *tuple)
4399+{
4400+ struct vlan_ethhdr *veth;
4401+ struct pppoe_hdr *phdr;
4402+ int i = 0;
4403+
4404+ if (skb_vlan_tag_present(skb)) {
4405+ tuple->encap[i].id = skb_vlan_tag_get(skb);
4406+ tuple->encap[i].proto = skb->vlan_proto;
4407+ i++;
4408+ }
4409+ switch (skb->protocol) {
4410+ case htons(ETH_P_8021Q):
4411+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4412+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
4413+ tuple->encap[i].proto = skb->protocol;
4414+ break;
4415+ case htons(ETH_P_PPP_SES):
4416+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
4417+ tuple->encap[i].id = ntohs(phdr->sid);
4418+ tuple->encap[i].proto = skb->protocol;
4419+ break;
4420+ }
4421+}
4422+
4423 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4424- struct flow_offload_tuple *tuple)
4425+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4426+ u32 offset)
4427 {
4428 struct flow_ports *ports;
4429 unsigned int thoff;
4430 struct iphdr *iph;
4431
4432- if (!pskb_may_pull(skb, sizeof(*iph)))
4433+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
4434 return -1;
4435
4436- iph = ip_hdr(skb);
4437- thoff = iph->ihl * 4;
4438+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4439+ thoff = (iph->ihl * 4);
4440
4441 if (ip_is_fragment(iph) ||
4442 unlikely(ip_has_options(thoff)))
4443 return -1;
4444
4445- if (iph->protocol != IPPROTO_TCP &&
4446- iph->protocol != IPPROTO_UDP)
4447+ thoff += offset;
4448+
4449+ switch (iph->protocol) {
4450+ case IPPROTO_TCP:
4451+ *hdrsize = sizeof(struct tcphdr);
4452+ break;
4453+ case IPPROTO_UDP:
4454+ *hdrsize = sizeof(struct udphdr);
4455+ break;
4456+ default:
4457 return -1;
4458+ }
4459
4460 if (iph->ttl <= 1)
4461 return -1;
4462
4463- thoff = iph->ihl * 4;
4464- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4465+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4466 return -1;
4467
4468- iph = ip_hdr(skb);
4469+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4470 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4471
4472 tuple->src_v4.s_addr = iph->saddr;
4473@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4474 tuple->l3proto = AF_INET;
4475 tuple->l4proto = iph->protocol;
4476 tuple->iifidx = dev->ifindex;
4477+ nf_flow_tuple_encap(skb, tuple);
4478
4479 return 0;
4480 }
developerb7c46752022-07-04 19:51:38 +08004481@@ -225,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004482 return NF_STOLEN;
4483 }
4484
4485+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
4486+ u32 *offset)
4487+{
4488+ struct vlan_ethhdr *veth;
4489+
4490+ switch (skb->protocol) {
4491+ case htons(ETH_P_8021Q):
4492+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4493+ if (veth->h_vlan_encapsulated_proto == proto) {
4494+ *offset += VLAN_HLEN;
4495+ return true;
4496+ }
4497+ break;
4498+ case htons(ETH_P_PPP_SES):
4499+ if (nf_flow_pppoe_proto(skb) == proto) {
4500+ *offset += PPPOE_SES_HLEN;
4501+ return true;
4502+ }
4503+ break;
4504+ }
4505+
4506+ return false;
4507+}
4508+
4509+static void nf_flow_encap_pop(struct sk_buff *skb,
4510+ struct flow_offload_tuple_rhash *tuplehash)
4511+{
4512+ struct vlan_hdr *vlan_hdr;
4513+ int i;
4514+
4515+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
4516+ if (skb_vlan_tag_present(skb)) {
4517+ __vlan_hwaccel_clear_tag(skb);
4518+ continue;
4519+ }
4520+ switch (skb->protocol) {
4521+ case htons(ETH_P_8021Q):
4522+ vlan_hdr = (struct vlan_hdr *)skb->data;
4523+ __skb_pull(skb, VLAN_HLEN);
4524+ vlan_set_encap_proto(skb, vlan_hdr);
4525+ skb_reset_network_header(skb);
4526+ break;
4527+ case htons(ETH_P_PPP_SES):
4528+ skb->protocol = nf_flow_pppoe_proto(skb);
4529+ skb_pull(skb, PPPOE_SES_HLEN);
4530+ skb_reset_network_header(skb);
4531+ break;
4532+ }
4533+ }
4534+}
4535+
4536+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
4537+ const struct flow_offload_tuple_rhash *tuplehash,
4538+ unsigned short type)
4539+{
4540+ struct net_device *outdev;
4541+
4542+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
4543+ if (!outdev)
4544+ return NF_DROP;
4545+
4546+ skb->dev = outdev;
4547+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
4548+ tuplehash->tuple.out.h_source, skb->len);
4549+ dev_queue_xmit(skb);
4550+
4551+ return NF_STOLEN;
4552+}
4553+
4554 unsigned int
4555 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4556 const struct nf_hook_state *state)
developerb7c46752022-07-04 19:51:38 +08004557@@ -235,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004558 enum flow_offload_tuple_dir dir;
4559 struct flow_offload *flow;
4560 struct net_device *outdev;
4561+ u32 hdrsize, offset = 0;
4562+ unsigned int thoff, mtu;
4563 struct rtable *rt;
4564- unsigned int thoff;
4565 struct iphdr *iph;
4566 __be32 nexthop;
4567+ int ret;
4568
4569- if (skb->protocol != htons(ETH_P_IP))
4570+ if (skb->protocol != htons(ETH_P_IP) &&
4571+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
4572 return NF_ACCEPT;
4573
4574- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
4575+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
4576 return NF_ACCEPT;
4577
4578 tuplehash = flow_offload_lookup(flow_table, &tuple);
developerb7c46752022-07-04 19:51:38 +08004579@@ -252,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004580
4581 dir = tuplehash->tuple.dir;
4582 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4583- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
4584- outdev = rt->dst.dev;
4585-
4586- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
4587- return NF_ACCEPT;
developerb7c46752022-07-04 19:51:38 +08004588
developer8cb3ac72022-07-04 10:55:14 +08004589- if (skb_try_make_writable(skb, sizeof(*iph)))
4590- return NF_DROP;
developerb7c46752022-07-04 19:51:38 +08004591-
developer8cb3ac72022-07-04 10:55:14 +08004592- thoff = ip_hdr(skb)->ihl * 4;
4593- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
4594+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4595+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4596 return NF_ACCEPT;
4597
developerb7c46752022-07-04 19:51:38 +08004598- if (!dst_check(&rt->dst, 0)) {
developer8cb3ac72022-07-04 10:55:14 +08004599- flow_offload_teardown(flow);
4600+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4601+ thoff = (iph->ihl * 4) + offset;
4602+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
4603 return NF_ACCEPT;
4604- }
4605
4606- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
4607+ if (skb_try_make_writable(skb, thoff + hdrsize))
4608 return NF_DROP;
4609
4610- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4611+ flow_offload_refresh(flow_table, flow);
4612+
4613+ nf_flow_encap_pop(skb, tuplehash);
4614+ thoff -= offset;
4615+
4616 iph = ip_hdr(skb);
4617+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
4618+
4619 ip_decrease_ttl(iph);
4620 skb->tstamp = 0;
4621
4622- if (unlikely(dst_xfrm(&rt->dst))) {
4623+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4624+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4625+
4626+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4627+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4628 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
4629 IPCB(skb)->iif = skb->dev->ifindex;
4630 IPCB(skb)->flags = IPSKB_FORWARDED;
4631 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4632 }
4633
4634- skb->dev = outdev;
4635- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4636- skb_dst_set_noref(skb, &rt->dst);
4637- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4638+ switch (tuplehash->tuple.xmit_type) {
4639+ case FLOW_OFFLOAD_XMIT_NEIGH:
4640+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4641+ outdev = rt->dst.dev;
4642+ skb->dev = outdev;
4643+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4644+ skb_dst_set_noref(skb, &rt->dst);
4645+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4646+ ret = NF_STOLEN;
4647+ break;
4648+ case FLOW_OFFLOAD_XMIT_DIRECT:
4649+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
4650+ if (ret == NF_DROP)
4651+ flow_offload_teardown(flow);
4652+ break;
4653+ }
4654
4655- return NF_STOLEN;
4656+ return ret;
4657 }
4658 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
4659
4660-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4661- struct in6_addr *addr,
4662- struct in6_addr *new_addr)
4663+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4664+ struct in6_addr *addr,
4665+ struct in6_addr *new_addr,
4666+ struct ipv6hdr *ip6h)
4667 {
4668 struct tcphdr *tcph;
4669
4670- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4671- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4672- return -1;
4673-
4674 tcph = (void *)(skb_network_header(skb) + thoff);
4675 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
4676 new_addr->s6_addr32, true);
4677-
4678- return 0;
4679 }
4680
4681-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4682- struct in6_addr *addr,
4683- struct in6_addr *new_addr)
4684+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4685+ struct in6_addr *addr,
4686+ struct in6_addr *new_addr)
4687 {
4688 struct udphdr *udph;
4689
4690- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4691- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4692- return -1;
4693-
4694 udph = (void *)(skb_network_header(skb) + thoff);
4695 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4696 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
developerb7c46752022-07-04 19:51:38 +08004697@@ -328,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
developer8cb3ac72022-07-04 10:55:14 +08004698 if (!udph->check)
4699 udph->check = CSUM_MANGLED_0;
4700 }
4701-
4702- return 0;
4703 }
4704
4705-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4706- unsigned int thoff, struct in6_addr *addr,
4707- struct in6_addr *new_addr)
4708+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4709+ unsigned int thoff, struct in6_addr *addr,
4710+ struct in6_addr *new_addr)
4711 {
4712 switch (ip6h->nexthdr) {
4713 case IPPROTO_TCP:
4714- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
4715- return NF_DROP;
4716+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
4717 break;
4718 case IPPROTO_UDP:
4719- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
4720- return NF_DROP;
4721+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
4722 break;
4723 }
4724-
4725- return 0;
4726 }
4727
4728-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4729- struct sk_buff *skb, struct ipv6hdr *ip6h,
4730- unsigned int thoff,
4731- enum flow_offload_tuple_dir dir)
4732+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
4733+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4734+ unsigned int thoff,
4735+ enum flow_offload_tuple_dir dir)
4736 {
4737 struct in6_addr addr, new_addr;
4738
developerb7c46752022-07-04 19:51:38 +08004739@@ -368,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004740 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
4741 ip6h->daddr = new_addr;
4742 break;
4743- default:
4744- return -1;
4745 }
4746
4747- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4748+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4749 }
4750
4751-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4752- struct sk_buff *skb, struct ipv6hdr *ip6h,
4753- unsigned int thoff,
4754- enum flow_offload_tuple_dir dir)
4755+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
4756+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4757+ unsigned int thoff,
4758+ enum flow_offload_tuple_dir dir)
4759 {
4760 struct in6_addr addr, new_addr;
4761
developerb7c46752022-07-04 19:51:38 +08004762@@ -393,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
developer8cb3ac72022-07-04 10:55:14 +08004763 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
4764 ip6h->saddr = new_addr;
4765 break;
4766- default:
4767- return -1;
4768 }
4769
4770- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4771+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4772 }
4773
4774-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
4775- struct sk_buff *skb,
4776- enum flow_offload_tuple_dir dir)
4777+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
4778+ struct sk_buff *skb,
4779+ enum flow_offload_tuple_dir dir,
4780+ struct ipv6hdr *ip6h)
4781 {
4782- struct ipv6hdr *ip6h = ipv6_hdr(skb);
4783 unsigned int thoff = sizeof(*ip6h);
4784
4785- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4786- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4787- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4788- return -1;
4789- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4790- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4791- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4792- return -1;
4793-
4794- return 0;
4795+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4796+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4797+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
4798+ }
4799+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4800+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4801+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
4802+ }
4803 }
4804
4805 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4806- struct flow_offload_tuple *tuple)
4807+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4808+ u32 offset)
4809 {
4810 struct flow_ports *ports;
4811 struct ipv6hdr *ip6h;
4812 unsigned int thoff;
4813
4814- if (!pskb_may_pull(skb, sizeof(*ip6h)))
4815+ thoff = sizeof(*ip6h) + offset;
4816+ if (!pskb_may_pull(skb, thoff))
4817 return -1;
4818
4819- ip6h = ipv6_hdr(skb);
4820+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4821
4822- if (ip6h->nexthdr != IPPROTO_TCP &&
4823- ip6h->nexthdr != IPPROTO_UDP)
4824+ switch (ip6h->nexthdr) {
4825+ case IPPROTO_TCP:
4826+ *hdrsize = sizeof(struct tcphdr);
4827+ break;
4828+ case IPPROTO_UDP:
4829+ *hdrsize = sizeof(struct udphdr);
4830+ break;
4831+ default:
4832 return -1;
4833+ }
4834
4835 if (ip6h->hop_limit <= 1)
4836 return -1;
4837
4838- thoff = sizeof(*ip6h);
4839- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4840+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4841 return -1;
4842
4843- ip6h = ipv6_hdr(skb);
4844+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4845 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4846
4847 tuple->src_v6 = ip6h->saddr;
developerb7c46752022-07-04 19:51:38 +08004848@@ -452,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
developer8cb3ac72022-07-04 10:55:14 +08004849 tuple->l3proto = AF_INET6;
4850 tuple->l4proto = ip6h->nexthdr;
4851 tuple->iifidx = dev->ifindex;
4852+ nf_flow_tuple_encap(skb, tuple);
4853
4854 return 0;
4855 }
developerb7c46752022-07-04 19:51:38 +08004856@@ -467,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004857 const struct in6_addr *nexthop;
4858 struct flow_offload *flow;
4859 struct net_device *outdev;
4860+ unsigned int thoff, mtu;
4861+ u32 hdrsize, offset = 0;
4862 struct ipv6hdr *ip6h;
4863 struct rt6_info *rt;
4864+ int ret;
4865
4866- if (skb->protocol != htons(ETH_P_IPV6))
4867+ if (skb->protocol != htons(ETH_P_IPV6) &&
4868+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
4869 return NF_ACCEPT;
4870
4871- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
4872+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
4873 return NF_ACCEPT;
4874
4875 tuplehash = flow_offload_lookup(flow_table, &tuple);
developerb7c46752022-07-04 19:51:38 +08004876@@ -482,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
developer8cb3ac72022-07-04 10:55:14 +08004877
4878 dir = tuplehash->tuple.dir;
4879 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4880- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
4881- outdev = rt->dst.dev;
developer8cb3ac72022-07-04 10:55:14 +08004882
developerb7c46752022-07-04 19:51:38 +08004883- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
developer8cb3ac72022-07-04 10:55:14 +08004884+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4885+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4886 return NF_ACCEPT;
4887
developerb7c46752022-07-04 19:51:38 +08004888- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
4889- sizeof(*ip6h)))
developer8cb3ac72022-07-04 10:55:14 +08004890+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4891+ thoff = sizeof(*ip6h) + offset;
4892+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
4893 return NF_ACCEPT;
developer8cb3ac72022-07-04 10:55:14 +08004894
developerb7c46752022-07-04 19:51:38 +08004895- if (!dst_check(&rt->dst, tuplehash->tuple.dst_cookie)) {
4896- flow_offload_teardown(flow);
4897- return NF_ACCEPT;
4898- }
4899-
developer8cb3ac72022-07-04 10:55:14 +08004900- if (skb_try_make_writable(skb, sizeof(*ip6h)))
4901+ if (skb_try_make_writable(skb, thoff + hdrsize))
4902 return NF_DROP;
4903
4904- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
4905- return NF_DROP;
4906+ flow_offload_refresh(flow_table, flow);
4907+
4908+ nf_flow_encap_pop(skb, tuplehash);
4909
4910- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4911 ip6h = ipv6_hdr(skb);
4912+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
4913+
4914 ip6h->hop_limit--;
4915 skb->tstamp = 0;
4916
4917- if (unlikely(dst_xfrm(&rt->dst))) {
4918+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4919+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4920+
4921+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4922+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4923 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4924 IP6CB(skb)->iif = skb->dev->ifindex;
4925 IP6CB(skb)->flags = IP6SKB_FORWARDED;
4926 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4927 }
4928
4929- skb->dev = outdev;
4930- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4931- skb_dst_set_noref(skb, &rt->dst);
4932- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4933+ switch (tuplehash->tuple.xmit_type) {
4934+ case FLOW_OFFLOAD_XMIT_NEIGH:
4935+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4936+ outdev = rt->dst.dev;
4937+ skb->dev = outdev;
4938+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4939+ skb_dst_set_noref(skb, &rt->dst);
4940+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4941+ ret = NF_STOLEN;
4942+ break;
4943+ case FLOW_OFFLOAD_XMIT_DIRECT:
4944+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
4945+ if (ret == NF_DROP)
4946+ flow_offload_teardown(flow);
4947+ break;
4948+ }
4949
4950- return NF_STOLEN;
4951+ return ret;
4952 }
4953 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
4954diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
4955new file mode 100644
4956index 000000000..d94c6fb92
4957--- /dev/null
4958+++ b/net/netfilter/nf_flow_table_offload.c
developera54478c2022-10-01 16:41:46 +08004959@@ -0,0 +1,1195 @@
developer8cb3ac72022-07-04 10:55:14 +08004960+#include <linux/kernel.h>
4961+#include <linux/init.h>
4962+#include <linux/module.h>
4963+#include <linux/netfilter.h>
4964+#include <linux/rhashtable.h>
4965+#include <linux/netdevice.h>
4966+#include <linux/tc_act/tc_csum.h>
4967+#include <net/flow_offload.h>
4968+#include <net/netfilter/nf_flow_table.h>
4969+#include <net/netfilter/nf_tables.h>
4970+#include <net/netfilter/nf_conntrack.h>
4971+#include <net/netfilter/nf_conntrack_acct.h>
4972+#include <net/netfilter/nf_conntrack_core.h>
4973+#include <net/netfilter/nf_conntrack_tuple.h>
4974+
4975+static struct workqueue_struct *nf_flow_offload_add_wq;
4976+static struct workqueue_struct *nf_flow_offload_del_wq;
4977+static struct workqueue_struct *nf_flow_offload_stats_wq;
4978+
4979+struct flow_offload_work {
4980+ struct list_head list;
4981+ enum flow_cls_command cmd;
4982+ int priority;
4983+ struct nf_flowtable *flowtable;
4984+ struct flow_offload *flow;
4985+ struct work_struct work;
4986+};
4987+
4988+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
4989+ (__match)->dissector.offset[__type] = \
4990+ offsetof(struct nf_flow_key, __field)
4991+
4992+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
4993+ struct ip_tunnel_info *tun_info)
4994+{
4995+ struct nf_flow_key *mask = &match->mask;
4996+ struct nf_flow_key *key = &match->key;
4997+ unsigned int enc_keys;
4998+
4999+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
5000+ return;
5001+
5002+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
5003+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
5004+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
5005+ mask->enc_key_id.keyid = 0xffffffff;
5006+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
5007+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
5008+
5009+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
5010+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
5011+ enc_ipv4);
5012+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
5013+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
5014+ if (key->enc_ipv4.src)
5015+ mask->enc_ipv4.src = 0xffffffff;
5016+ if (key->enc_ipv4.dst)
5017+ mask->enc_ipv4.dst = 0xffffffff;
5018+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
5019+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5020+ } else {
5021+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
5022+ sizeof(struct in6_addr));
5023+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
5024+ sizeof(struct in6_addr));
5025+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
5026+ sizeof(struct in6_addr)))
5027+ memset(&mask->enc_ipv6.src, 0xff,
5028+ sizeof(struct in6_addr));
5029+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
5030+ sizeof(struct in6_addr)))
5031+ memset(&mask->enc_ipv6.dst, 0xff,
5032+ sizeof(struct in6_addr));
5033+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
5034+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5035+ }
5036+
5037+ match->dissector.used_keys |= enc_keys;
5038+}
5039+
5040+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
5041+ struct flow_dissector_key_vlan *mask,
5042+ u16 vlan_id, __be16 proto)
5043+{
5044+ key->vlan_id = vlan_id;
5045+ mask->vlan_id = VLAN_VID_MASK;
5046+ key->vlan_tpid = proto;
5047+ mask->vlan_tpid = 0xffff;
5048+}
5049+
5050+static int nf_flow_rule_match(struct nf_flow_match *match,
5051+ const struct flow_offload_tuple *tuple,
5052+ struct dst_entry *other_dst)
5053+{
5054+ struct nf_flow_key *mask = &match->mask;
5055+ struct nf_flow_key *key = &match->key;
5056+ struct ip_tunnel_info *tun_info;
5057+ bool vlan_encap = false;
5058+
5059+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
5060+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
5061+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
5062+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
5063+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
5064+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
5065+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
5066+
5067+ if (other_dst && other_dst->lwtstate) {
5068+ tun_info = lwt_tun_info(other_dst->lwtstate);
5069+ nf_flow_rule_lwt_match(match, tun_info);
5070+ }
5071+
5072+ key->meta.ingress_ifindex = tuple->iifidx;
5073+ mask->meta.ingress_ifindex = 0xffffffff;
5074+
5075+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
5076+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
5077+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
5078+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5079+ tuple->encap[0].id,
5080+ tuple->encap[0].proto);
5081+ vlan_encap = true;
5082+ }
5083+
5084+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
5085+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
5086+ if (vlan_encap) {
5087+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
5088+ cvlan);
5089+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
5090+ tuple->encap[1].id,
5091+ tuple->encap[1].proto);
5092+ } else {
5093+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
5094+ vlan);
5095+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5096+ tuple->encap[1].id,
5097+ tuple->encap[1].proto);
5098+ }
5099+ }
5100+
5101+ switch (tuple->l3proto) {
5102+ case AF_INET:
5103+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5104+ key->basic.n_proto = htons(ETH_P_IP);
5105+ key->ipv4.src = tuple->src_v4.s_addr;
5106+ mask->ipv4.src = 0xffffffff;
5107+ key->ipv4.dst = tuple->dst_v4.s_addr;
5108+ mask->ipv4.dst = 0xffffffff;
5109+ break;
5110+ case AF_INET6:
5111+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5112+ key->basic.n_proto = htons(ETH_P_IPV6);
5113+ key->ipv6.src = tuple->src_v6;
5114+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
5115+ key->ipv6.dst = tuple->dst_v6;
5116+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
5117+ break;
5118+ default:
5119+ return -EOPNOTSUPP;
5120+ }
5121+ mask->control.addr_type = 0xffff;
5122+ match->dissector.used_keys |= BIT(key->control.addr_type);
5123+ mask->basic.n_proto = 0xffff;
5124+
5125+ switch (tuple->l4proto) {
5126+ case IPPROTO_TCP:
5127+ key->tcp.flags = 0;
5128+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
5129+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
5130+ break;
5131+ case IPPROTO_UDP:
5132+ break;
5133+ default:
5134+ return -EOPNOTSUPP;
5135+ }
5136+
5137+ key->basic.ip_proto = tuple->l4proto;
5138+ mask->basic.ip_proto = 0xff;
5139+
5140+ key->tp.src = tuple->src_port;
5141+ mask->tp.src = 0xffff;
5142+ key->tp.dst = tuple->dst_port;
5143+ mask->tp.dst = 0xffff;
5144+
5145+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
5146+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
5147+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
5148+ BIT(FLOW_DISSECTOR_KEY_PORTS);
5149+ return 0;
5150+}
5151+
5152+static void flow_offload_mangle(struct flow_action_entry *entry,
5153+ enum flow_action_mangle_base htype, u32 offset,
5154+ const __be32 *value, const __be32 *mask)
5155+{
5156+ entry->id = FLOW_ACTION_MANGLE;
5157+ entry->mangle.htype = htype;
5158+ entry->mangle.offset = offset;
5159+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
5160+ memcpy(&entry->mangle.val, value, sizeof(u32));
5161+}
5162+
5163+static inline struct flow_action_entry *
5164+flow_action_entry_next(struct nf_flow_rule *flow_rule)
5165+{
5166+ int i = flow_rule->rule->action.num_entries++;
5167+
5168+ return &flow_rule->rule->action.entries[i];
5169+}
5170+
5171+static int flow_offload_eth_src(struct net *net,
5172+ const struct flow_offload *flow,
5173+ enum flow_offload_tuple_dir dir,
5174+ struct nf_flow_rule *flow_rule)
5175+{
5176+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5177+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5178+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5179+ struct net_device *dev = NULL;
5180+ const unsigned char *addr;
5181+ u32 mask, val;
5182+ u16 val16;
5183+
5184+ this_tuple = &flow->tuplehash[dir].tuple;
5185+
5186+ switch (this_tuple->xmit_type) {
5187+ case FLOW_OFFLOAD_XMIT_DIRECT:
5188+ addr = this_tuple->out.h_source;
5189+ break;
5190+ case FLOW_OFFLOAD_XMIT_NEIGH:
5191+ other_tuple = &flow->tuplehash[!dir].tuple;
5192+ dev = dev_get_by_index(net, other_tuple->iifidx);
5193+ if (!dev)
5194+ return -ENOENT;
5195+
5196+ addr = dev->dev_addr;
5197+ break;
5198+ default:
5199+ return -EOPNOTSUPP;
5200+ }
5201+
5202+ mask = ~0xffff0000;
5203+ memcpy(&val16, addr, 2);
5204+ val = val16 << 16;
5205+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5206+ &val, &mask);
5207+
5208+ mask = ~0xffffffff;
5209+ memcpy(&val, addr + 2, 4);
5210+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
5211+ &val, &mask);
5212+
5213+ if (dev)
5214+ dev_put(dev);
5215+
5216+ return 0;
5217+}
5218+
5219+static int flow_offload_eth_dst(struct net *net,
5220+ const struct flow_offload *flow,
5221+ enum flow_offload_tuple_dir dir,
5222+ struct nf_flow_rule *flow_rule)
5223+{
5224+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5225+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5226+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5227+ const struct dst_entry *dst_cache;
5228+ unsigned char ha[ETH_ALEN];
5229+ struct neighbour *n;
5230+ const void *daddr;
5231+ u32 mask, val;
5232+ u8 nud_state;
5233+ u16 val16;
5234+
5235+ this_tuple = &flow->tuplehash[dir].tuple;
5236+
5237+ switch (this_tuple->xmit_type) {
5238+ case FLOW_OFFLOAD_XMIT_DIRECT:
5239+ ether_addr_copy(ha, this_tuple->out.h_dest);
5240+ break;
5241+ case FLOW_OFFLOAD_XMIT_NEIGH:
5242+ other_tuple = &flow->tuplehash[!dir].tuple;
5243+ daddr = &other_tuple->src_v4;
5244+ dst_cache = this_tuple->dst_cache;
5245+ n = dst_neigh_lookup(dst_cache, daddr);
5246+ if (!n)
5247+ return -ENOENT;
5248+
5249+ read_lock_bh(&n->lock);
5250+ nud_state = n->nud_state;
5251+ ether_addr_copy(ha, n->ha);
5252+ read_unlock_bh(&n->lock);
5253+ neigh_release(n);
5254+
5255+ if (!(nud_state & NUD_VALID))
5256+ return -ENOENT;
5257+ break;
5258+ default:
5259+ return -EOPNOTSUPP;
5260+ }
5261+
5262+ mask = ~0xffffffff;
5263+ memcpy(&val, ha, 4);
5264+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
5265+ &val, &mask);
5266+
5267+ mask = ~0x0000ffff;
5268+ memcpy(&val16, ha + 4, 2);
5269+ val = val16;
5270+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5271+ &val, &mask);
5272+
5273+ return 0;
5274+}
5275+
5276+static void flow_offload_ipv4_snat(struct net *net,
5277+ const struct flow_offload *flow,
5278+ enum flow_offload_tuple_dir dir,
5279+ struct nf_flow_rule *flow_rule)
5280+{
5281+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5282+ u32 mask = ~htonl(0xffffffff);
5283+ __be32 addr;
5284+ u32 offset;
5285+
5286+ switch (dir) {
5287+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5288+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
5289+ offset = offsetof(struct iphdr, saddr);
5290+ break;
5291+ case FLOW_OFFLOAD_DIR_REPLY:
5292+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5293+ offset = offsetof(struct iphdr, daddr);
5294+ break;
5295+ default:
5296+ return;
5297+ }
5298+
5299+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5300+ &addr, &mask);
5301+}
5302+
5303+static void flow_offload_ipv4_dnat(struct net *net,
5304+ const struct flow_offload *flow,
5305+ enum flow_offload_tuple_dir dir,
5306+ struct nf_flow_rule *flow_rule)
5307+{
5308+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5309+ u32 mask = ~htonl(0xffffffff);
5310+ __be32 addr;
5311+ u32 offset;
5312+
5313+ switch (dir) {
5314+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5315+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
5316+ offset = offsetof(struct iphdr, daddr);
5317+ break;
5318+ case FLOW_OFFLOAD_DIR_REPLY:
5319+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5320+ offset = offsetof(struct iphdr, saddr);
5321+ break;
5322+ default:
5323+ return;
5324+ }
5325+
5326+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5327+ &addr, &mask);
5328+}
5329+
5330+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
5331+ unsigned int offset,
5332+ const __be32 *addr, const __be32 *mask)
5333+{
5334+ struct flow_action_entry *entry;
5335+ int i, j;
5336+
5337+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
5338+ entry = flow_action_entry_next(flow_rule);
5339+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
5340+ offset + i, &addr[j], mask);
5341+ }
5342+}
5343+
5344+static void flow_offload_ipv6_snat(struct net *net,
5345+ const struct flow_offload *flow,
5346+ enum flow_offload_tuple_dir dir,
5347+ struct nf_flow_rule *flow_rule)
5348+{
5349+ u32 mask = ~htonl(0xffffffff);
5350+ const __be32 *addr;
5351+ u32 offset;
5352+
5353+ switch (dir) {
5354+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5355+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
5356+ offset = offsetof(struct ipv6hdr, saddr);
5357+ break;
5358+ case FLOW_OFFLOAD_DIR_REPLY:
5359+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
5360+ offset = offsetof(struct ipv6hdr, daddr);
5361+ break;
5362+ default:
5363+ return;
5364+ }
5365+
5366+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5367+}
5368+
5369+static void flow_offload_ipv6_dnat(struct net *net,
5370+ const struct flow_offload *flow,
5371+ enum flow_offload_tuple_dir dir,
5372+ struct nf_flow_rule *flow_rule)
5373+{
5374+ u32 mask = ~htonl(0xffffffff);
5375+ const __be32 *addr;
5376+ u32 offset;
5377+
5378+ switch (dir) {
5379+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5380+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
5381+ offset = offsetof(struct ipv6hdr, daddr);
5382+ break;
5383+ case FLOW_OFFLOAD_DIR_REPLY:
5384+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
5385+ offset = offsetof(struct ipv6hdr, saddr);
5386+ break;
5387+ default:
5388+ return;
5389+ }
5390+
5391+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5392+}
5393+
5394+static int flow_offload_l4proto(const struct flow_offload *flow)
5395+{
5396+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5397+ u8 type = 0;
5398+
5399+ switch (protonum) {
5400+ case IPPROTO_TCP:
5401+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
5402+ break;
5403+ case IPPROTO_UDP:
5404+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
5405+ break;
5406+ default:
5407+ break;
5408+ }
5409+
5410+ return type;
5411+}
5412+
5413+static void flow_offload_port_snat(struct net *net,
5414+ const struct flow_offload *flow,
5415+ enum flow_offload_tuple_dir dir,
5416+ struct nf_flow_rule *flow_rule)
5417+{
5418+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5419+ u32 mask, port;
5420+ u32 offset;
5421+
5422+ switch (dir) {
5423+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5424+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
5425+ offset = 0; /* offsetof(struct tcphdr, source); */
5426+ port = htonl(port << 16);
5427+ mask = ~htonl(0xffff0000);
5428+ break;
5429+ case FLOW_OFFLOAD_DIR_REPLY:
5430+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
5431+ offset = 0; /* offsetof(struct tcphdr, dest); */
5432+ port = htonl(port);
5433+ mask = ~htonl(0xffff);
5434+ break;
5435+ default:
5436+ return;
5437+ }
5438+
5439+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5440+ &port, &mask);
5441+}
5442+
5443+static void flow_offload_port_dnat(struct net *net,
5444+ const struct flow_offload *flow,
5445+ enum flow_offload_tuple_dir dir,
5446+ struct nf_flow_rule *flow_rule)
5447+{
5448+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5449+ u32 mask, port;
5450+ u32 offset;
5451+
5452+ switch (dir) {
5453+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5454+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
5455+ offset = 0; /* offsetof(struct tcphdr, dest); */
5456+ port = htonl(port);
5457+ mask = ~htonl(0xffff);
5458+ break;
5459+ case FLOW_OFFLOAD_DIR_REPLY:
5460+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
5461+ offset = 0; /* offsetof(struct tcphdr, source); */
5462+ port = htonl(port << 16);
5463+ mask = ~htonl(0xffff0000);
5464+ break;
5465+ default:
5466+ return;
5467+ }
5468+
5469+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5470+ &port, &mask);
5471+}
5472+
5473+static void flow_offload_ipv4_checksum(struct net *net,
5474+ const struct flow_offload *flow,
5475+ struct nf_flow_rule *flow_rule)
5476+{
5477+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5478+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5479+
5480+ entry->id = FLOW_ACTION_CSUM;
5481+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
5482+
5483+ switch (protonum) {
5484+ case IPPROTO_TCP:
5485+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
5486+ break;
5487+ case IPPROTO_UDP:
5488+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
5489+ break;
5490+ }
5491+}
5492+
5493+static void flow_offload_redirect(struct net *net,
5494+ const struct flow_offload *flow,
5495+ enum flow_offload_tuple_dir dir,
5496+ struct nf_flow_rule *flow_rule)
5497+{
5498+ const struct flow_offload_tuple *this_tuple, *other_tuple;
5499+ struct flow_action_entry *entry;
5500+ struct net_device *dev;
5501+ int ifindex;
5502+
5503+ this_tuple = &flow->tuplehash[dir].tuple;
5504+ switch (this_tuple->xmit_type) {
5505+ case FLOW_OFFLOAD_XMIT_DIRECT:
5506+ this_tuple = &flow->tuplehash[dir].tuple;
5507+ ifindex = this_tuple->out.hw_ifidx;
5508+ break;
5509+ case FLOW_OFFLOAD_XMIT_NEIGH:
5510+ other_tuple = &flow->tuplehash[!dir].tuple;
5511+ ifindex = other_tuple->iifidx;
5512+ break;
5513+ default:
5514+ return;
5515+ }
5516+
5517+ dev = dev_get_by_index(net, ifindex);
5518+ if (!dev)
5519+ return;
5520+
5521+ entry = flow_action_entry_next(flow_rule);
5522+ entry->id = FLOW_ACTION_REDIRECT;
5523+ entry->dev = dev;
5524+}
5525+
5526+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
5527+ enum flow_offload_tuple_dir dir,
5528+ struct nf_flow_rule *flow_rule)
5529+{
5530+ const struct flow_offload_tuple *this_tuple;
5531+ struct flow_action_entry *entry;
5532+ struct dst_entry *dst;
5533+
5534+ this_tuple = &flow->tuplehash[dir].tuple;
5535+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5536+ return;
5537+
5538+ dst = this_tuple->dst_cache;
5539+ if (dst && dst->lwtstate) {
5540+ struct ip_tunnel_info *tun_info;
5541+
5542+ tun_info = lwt_tun_info(dst->lwtstate);
5543+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5544+ entry = flow_action_entry_next(flow_rule);
5545+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
5546+ entry->tunnel = tun_info;
5547+ }
5548+ }
5549+}
5550+
5551+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
5552+ enum flow_offload_tuple_dir dir,
5553+ struct nf_flow_rule *flow_rule)
5554+{
5555+ const struct flow_offload_tuple *other_tuple;
5556+ struct flow_action_entry *entry;
5557+ struct dst_entry *dst;
5558+
5559+ other_tuple = &flow->tuplehash[!dir].tuple;
5560+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5561+ return;
5562+
5563+ dst = other_tuple->dst_cache;
5564+ if (dst && dst->lwtstate) {
5565+ struct ip_tunnel_info *tun_info;
5566+
5567+ tun_info = lwt_tun_info(dst->lwtstate);
5568+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5569+ entry = flow_action_entry_next(flow_rule);
5570+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
5571+ }
5572+ }
5573+}
5574+
5575+static int
5576+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
5577+ enum flow_offload_tuple_dir dir,
5578+ struct nf_flow_rule *flow_rule)
5579+{
5580+ const struct flow_offload_tuple *other_tuple;
5581+ const struct flow_offload_tuple *tuple;
5582+ int i;
5583+
5584+ flow_offload_decap_tunnel(flow, dir, flow_rule);
5585+ flow_offload_encap_tunnel(flow, dir, flow_rule);
5586+
5587+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
5588+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
5589+ return -1;
5590+
5591+ tuple = &flow->tuplehash[dir].tuple;
5592+
5593+ for (i = 0; i < tuple->encap_num; i++) {
5594+ struct flow_action_entry *entry;
5595+
5596+ if (tuple->in_vlan_ingress & BIT(i))
5597+ continue;
5598+
5599+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
5600+ entry = flow_action_entry_next(flow_rule);
5601+ entry->id = FLOW_ACTION_VLAN_POP;
5602+ }
5603+ }
5604+
5605+ other_tuple = &flow->tuplehash[!dir].tuple;
5606+
5607+ for (i = 0; i < other_tuple->encap_num; i++) {
5608+ struct flow_action_entry *entry;
5609+
5610+ if (other_tuple->in_vlan_ingress & BIT(i))
5611+ continue;
5612+
5613+ entry = flow_action_entry_next(flow_rule);
5614+
5615+ switch (other_tuple->encap[i].proto) {
5616+ case htons(ETH_P_PPP_SES):
5617+ entry->id = FLOW_ACTION_PPPOE_PUSH;
5618+ entry->pppoe.sid = other_tuple->encap[i].id;
5619+ break;
5620+ case htons(ETH_P_8021Q):
5621+ entry->id = FLOW_ACTION_VLAN_PUSH;
5622+ entry->vlan.vid = other_tuple->encap[i].id;
5623+ entry->vlan.proto = other_tuple->encap[i].proto;
5624+ break;
5625+ }
5626+ }
5627+
5628+ return 0;
5629+}
5630+
5631+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
5632+ enum flow_offload_tuple_dir dir,
5633+ struct nf_flow_rule *flow_rule)
5634+{
5635+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5636+ return -1;
5637+
5638+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5639+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
5640+ flow_offload_port_snat(net, flow, dir, flow_rule);
5641+ }
5642+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5643+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
5644+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5645+ }
5646+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
5647+ test_bit(NF_FLOW_DNAT, &flow->flags))
5648+ flow_offload_ipv4_checksum(net, flow, flow_rule);
5649+
5650+ flow_offload_redirect(net, flow, dir, flow_rule);
5651+
5652+ return 0;
5653+}
5654+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
5655+
5656+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
5657+ enum flow_offload_tuple_dir dir,
5658+ struct nf_flow_rule *flow_rule)
5659+{
5660+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5661+ return -1;
5662+
5663+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5664+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
5665+ flow_offload_port_snat(net, flow, dir, flow_rule);
5666+ }
5667+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5668+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
5669+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5670+ }
5671+
5672+ flow_offload_redirect(net, flow, dir, flow_rule);
5673+
5674+ return 0;
5675+}
5676+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
5677+
5678+#define NF_FLOW_RULE_ACTION_MAX 16
5679+
5680+static struct nf_flow_rule *
5681+nf_flow_offload_rule_alloc(struct net *net,
5682+ const struct flow_offload_work *offload,
5683+ enum flow_offload_tuple_dir dir)
5684+{
5685+ const struct nf_flowtable *flowtable = offload->flowtable;
5686+ const struct flow_offload_tuple *tuple, *other_tuple;
5687+ const struct flow_offload *flow = offload->flow;
5688+ struct dst_entry *other_dst = NULL;
5689+ struct nf_flow_rule *flow_rule;
5690+ int err = -ENOMEM;
5691+
5692+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
5693+ if (!flow_rule)
5694+ goto err_flow;
5695+
5696+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
5697+ if (!flow_rule->rule)
5698+ goto err_flow_rule;
5699+
5700+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
5701+ flow_rule->rule->match.mask = &flow_rule->match.mask;
5702+ flow_rule->rule->match.key = &flow_rule->match.key;
5703+
5704+ tuple = &flow->tuplehash[dir].tuple;
5705+ other_tuple = &flow->tuplehash[!dir].tuple;
5706+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
5707+ other_dst = other_tuple->dst_cache;
5708+
5709+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
5710+ if (err < 0)
5711+ goto err_flow_match;
5712+
5713+ flow_rule->rule->action.num_entries = 0;
5714+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
5715+ goto err_flow_match;
5716+
5717+ return flow_rule;
5718+
5719+err_flow_match:
5720+ kfree(flow_rule->rule);
5721+err_flow_rule:
5722+ kfree(flow_rule);
5723+err_flow:
5724+ return NULL;
5725+}
5726+
5727+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
5728+{
5729+ struct flow_action_entry *entry;
5730+ int i;
5731+
5732+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
5733+ entry = &flow_rule->rule->action.entries[i];
5734+ if (entry->id != FLOW_ACTION_REDIRECT)
5735+ continue;
5736+
5737+ dev_put(entry->dev);
5738+ }
5739+ kfree(flow_rule->rule);
5740+ kfree(flow_rule);
5741+}
5742+
5743+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
5744+{
5745+ int i;
5746+
5747+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
5748+ __nf_flow_offload_destroy(flow_rule[i]);
5749+}
5750+
5751+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
5752+ struct nf_flow_rule *flow_rule[])
5753+{
5754+ struct net *net = read_pnet(&offload->flowtable->net);
5755+
5756+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
5757+ FLOW_OFFLOAD_DIR_ORIGINAL);
5758+ if (!flow_rule[0])
5759+ return -ENOMEM;
5760+
5761+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
5762+ FLOW_OFFLOAD_DIR_REPLY);
5763+ if (!flow_rule[1]) {
5764+ __nf_flow_offload_destroy(flow_rule[0]);
5765+ return -ENOMEM;
5766+ }
5767+
5768+ return 0;
5769+}
5770+
5771+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
5772+ __be16 proto, int priority,
5773+ enum flow_cls_command cmd,
5774+ const struct flow_offload_tuple *tuple,
5775+ struct netlink_ext_ack *extack)
5776+{
5777+ cls_flow->common.protocol = proto;
5778+ cls_flow->common.prio = priority;
5779+ cls_flow->common.extack = extack;
5780+ cls_flow->command = cmd;
5781+ cls_flow->cookie = (unsigned long)tuple;
5782+}
5783+
5784+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
5785+ struct flow_offload *flow,
5786+ struct nf_flow_rule *flow_rule,
5787+ enum flow_offload_tuple_dir dir,
5788+ int priority, int cmd,
5789+ struct flow_stats *stats,
5790+ struct list_head *block_cb_list)
5791+{
5792+ struct flow_cls_offload cls_flow = {};
5793+ struct flow_block_cb *block_cb;
5794+ struct netlink_ext_ack extack;
5795+ __be16 proto = ETH_P_ALL;
5796+ int err, i = 0;
5797+
5798+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
5799+ &flow->tuplehash[dir].tuple, &extack);
5800+ if (cmd == FLOW_CLS_REPLACE)
5801+ cls_flow.rule = flow_rule->rule;
5802+
developer207b39d2022-10-07 15:57:16 +08005803+ down_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005804+ list_for_each_entry(block_cb, block_cb_list, list) {
5805+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
5806+ block_cb->cb_priv);
5807+ if (err < 0)
5808+ continue;
5809+
5810+ i++;
5811+ }
developer207b39d2022-10-07 15:57:16 +08005812+ up_write(&flowtable->flow_block_lock);
developer8cb3ac72022-07-04 10:55:14 +08005813+
5814+ if (cmd == FLOW_CLS_STATS)
5815+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
5816+
5817+ return i;
5818+}
5819+
5820+static int flow_offload_tuple_add(struct flow_offload_work *offload,
5821+ struct nf_flow_rule *flow_rule,
5822+ enum flow_offload_tuple_dir dir)
5823+{
5824+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
5825+ flow_rule, dir, offload->priority,
5826+ FLOW_CLS_REPLACE, NULL,
5827+ &offload->flowtable->flow_block.cb_list);
5828+}
5829+
5830+static void flow_offload_tuple_del(struct flow_offload_work *offload,
5831+ enum flow_offload_tuple_dir dir)
5832+{
5833+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5834+ offload->priority, FLOW_CLS_DESTROY, NULL,
5835+ &offload->flowtable->flow_block.cb_list);
5836+}
5837+
5838+static int flow_offload_rule_add(struct flow_offload_work *offload,
5839+ struct nf_flow_rule *flow_rule[])
5840+{
5841+ int ok_count = 0;
5842+
5843+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
5844+ FLOW_OFFLOAD_DIR_ORIGINAL);
5845+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
5846+ FLOW_OFFLOAD_DIR_REPLY);
5847+ if (ok_count == 0)
5848+ return -ENOENT;
5849+
5850+ return 0;
5851+}
5852+
5853+static void flow_offload_work_add(struct flow_offload_work *offload)
5854+{
5855+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
5856+ int err;
5857+
5858+ err = nf_flow_offload_alloc(offload, flow_rule);
5859+ if (err < 0)
5860+ return;
5861+
5862+ err = flow_offload_rule_add(offload, flow_rule);
5863+ if (err < 0)
5864+ goto out;
5865+
5866+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5867+
5868+out:
5869+ nf_flow_offload_destroy(flow_rule);
5870+}
5871+
5872+static void flow_offload_work_del(struct flow_offload_work *offload)
5873+{
5874+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5875+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
5876+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
5877+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
5878+}
5879+
5880+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
5881+ enum flow_offload_tuple_dir dir,
5882+ struct flow_stats *stats)
5883+{
5884+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5885+ offload->priority, FLOW_CLS_STATS, stats,
5886+ &offload->flowtable->flow_block.cb_list);
5887+}
5888+
5889+static void flow_offload_work_stats(struct flow_offload_work *offload)
5890+{
5891+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
5892+ u64 lastused;
5893+
5894+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
5895+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
5896+
5897+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
5898+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
5899+ lastused + flow_offload_get_timeout(offload->flow));
5900+
5901+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
5902+ if (stats[0].pkts)
5903+ nf_ct_acct_add(offload->flow->ct,
5904+ FLOW_OFFLOAD_DIR_ORIGINAL,
5905+ stats[0].pkts, stats[0].bytes);
5906+ if (stats[1].pkts)
5907+ nf_ct_acct_add(offload->flow->ct,
5908+ FLOW_OFFLOAD_DIR_REPLY,
5909+ stats[1].pkts, stats[1].bytes);
5910+ }
5911+}
5912+
5913+static void flow_offload_work_handler(struct work_struct *work)
5914+{
5915+ struct flow_offload_work *offload;
5916+
5917+ offload = container_of(work, struct flow_offload_work, work);
5918+ switch (offload->cmd) {
5919+ case FLOW_CLS_REPLACE:
5920+ flow_offload_work_add(offload);
5921+ break;
5922+ case FLOW_CLS_DESTROY:
5923+ flow_offload_work_del(offload);
5924+ break;
5925+ case FLOW_CLS_STATS:
5926+ flow_offload_work_stats(offload);
5927+ break;
5928+ default:
5929+ WARN_ON_ONCE(1);
5930+ }
5931+
5932+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
5933+ kfree(offload);
5934+}
5935+
5936+static void flow_offload_queue_work(struct flow_offload_work *offload)
5937+{
5938+ if (offload->cmd == FLOW_CLS_REPLACE)
5939+ queue_work(nf_flow_offload_add_wq, &offload->work);
5940+ else if (offload->cmd == FLOW_CLS_DESTROY)
5941+ queue_work(nf_flow_offload_del_wq, &offload->work);
5942+ else
5943+ queue_work(nf_flow_offload_stats_wq, &offload->work);
5944+}
5945+
5946+static struct flow_offload_work *
5947+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
5948+ struct flow_offload *flow, unsigned int cmd)
5949+{
5950+ struct flow_offload_work *offload;
5951+
5952+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
5953+ return NULL;
5954+
5955+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
5956+ if (!offload) {
5957+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
5958+ return NULL;
5959+ }
5960+
5961+ offload->cmd = cmd;
5962+ offload->flow = flow;
5963+ offload->priority = flowtable->priority;
5964+ offload->flowtable = flowtable;
5965+ INIT_WORK(&offload->work, flow_offload_work_handler);
5966+
5967+ return offload;
5968+}
5969+
5970+
5971+void nf_flow_offload_add(struct nf_flowtable *flowtable,
5972+ struct flow_offload *flow)
5973+{
5974+ struct flow_offload_work *offload;
5975+
5976+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
5977+ if (!offload)
5978+ return;
5979+
5980+ flow_offload_queue_work(offload);
5981+}
5982+
5983+void nf_flow_offload_del(struct nf_flowtable *flowtable,
5984+ struct flow_offload *flow)
5985+{
5986+ struct flow_offload_work *offload;
5987+
5988+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
5989+ if (!offload)
5990+ return;
5991+
5992+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
5993+ flow_offload_queue_work(offload);
5994+}
5995+
5996+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
5997+ struct flow_offload *flow)
5998+{
5999+ struct flow_offload_work *offload;
6000+ __s32 delta;
6001+
6002+ delta = nf_flow_timeout_delta(flow->timeout);
6003+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
6004+ return;
6005+
6006+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
6007+ if (!offload)
6008+ return;
6009+
6010+ flow_offload_queue_work(offload);
6011+}
6012+
6013+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
6014+{
6015+ if (nf_flowtable_hw_offload(flowtable)) {
6016+ flush_workqueue(nf_flow_offload_add_wq);
6017+ flush_workqueue(nf_flow_offload_del_wq);
6018+ flush_workqueue(nf_flow_offload_stats_wq);
6019+ }
6020+}
6021+
6022+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
6023+ struct flow_block_offload *bo,
6024+ enum flow_block_command cmd)
6025+{
6026+ struct flow_block_cb *block_cb, *next;
6027+ int err = 0;
6028+
developera54478c2022-10-01 16:41:46 +08006029+ down_read(&flowtable->flow_block_lock);
6030+
developer8cb3ac72022-07-04 10:55:14 +08006031+ switch (cmd) {
6032+ case FLOW_BLOCK_BIND:
6033+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
6034+ break;
6035+ case FLOW_BLOCK_UNBIND:
6036+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
6037+ list_del(&block_cb->list);
6038+ flow_block_cb_free(block_cb);
6039+ }
6040+ break;
6041+ default:
6042+ WARN_ON_ONCE(1);
6043+ err = -EOPNOTSUPP;
6044+ }
6045+
developera54478c2022-10-01 16:41:46 +08006046+ up_read(&flowtable->flow_block_lock);
6047+
developer8cb3ac72022-07-04 10:55:14 +08006048+ return err;
6049+}
6050+
6051+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
6052+ struct net *net,
6053+ enum flow_block_command cmd,
6054+ struct nf_flowtable *flowtable,
6055+ struct netlink_ext_ack *extack)
6056+{
6057+ memset(bo, 0, sizeof(*bo));
6058+ bo->net = net;
6059+ bo->block = &flowtable->flow_block;
6060+ bo->command = cmd;
6061+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
6062+ bo->extack = extack;
6063+ INIT_LIST_HEAD(&bo->cb_list);
6064+}
6065+
6066+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
6067+ struct nf_flowtable *flowtable,
6068+ struct net_device *dev,
6069+ enum flow_block_command cmd,
6070+ struct netlink_ext_ack *extack)
6071+{
6072+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6073+ extack);
6074+ flow_indr_block_call(dev, bo, cmd);
6075+
6076+ if (list_empty(&bo->cb_list))
6077+ return -EOPNOTSUPP;
6078+
6079+ return 0;
6080+}
6081+
6082+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
6083+ struct nf_flowtable *flowtable,
6084+ struct net_device *dev,
6085+ enum flow_block_command cmd,
6086+ struct netlink_ext_ack *extack)
6087+{
6088+ int err;
6089+
6090+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6091+ extack);
6092+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
6093+ if (err < 0)
6094+ return err;
6095+
6096+ return 0;
6097+}
6098+
6099+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
6100+ struct net_device *dev,
6101+ enum flow_block_command cmd)
6102+{
6103+ struct netlink_ext_ack extack = {};
6104+ struct flow_block_offload bo;
6105+ int err;
6106+
6107+ if (!nf_flowtable_hw_offload(flowtable))
6108+ return 0;
6109+
6110+ if (dev->netdev_ops->ndo_setup_tc)
6111+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
6112+ &extack);
6113+ else
6114+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
6115+ &extack);
6116+ if (err < 0)
6117+ return err;
6118+
6119+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
6120+}
6121+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
6122+
6123+int nf_flow_table_offload_init(void)
6124+{
6125+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
6126+ WQ_UNBOUND | WQ_SYSFS, 0);
6127+ if (!nf_flow_offload_add_wq)
6128+ return -ENOMEM;
6129+
6130+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
6131+ WQ_UNBOUND | WQ_SYSFS, 0);
6132+ if (!nf_flow_offload_del_wq)
6133+ goto err_del_wq;
6134+
6135+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
6136+ WQ_UNBOUND | WQ_SYSFS, 0);
6137+ if (!nf_flow_offload_stats_wq)
6138+ goto err_stats_wq;
6139+
6140+ return 0;
6141+
6142+err_stats_wq:
6143+ destroy_workqueue(nf_flow_offload_del_wq);
6144+err_del_wq:
6145+ destroy_workqueue(nf_flow_offload_add_wq);
6146+ return -ENOMEM;
6147+}
6148+
6149+void nf_flow_table_offload_exit(void)
6150+{
6151+ destroy_workqueue(nf_flow_offload_add_wq);
6152+ destroy_workqueue(nf_flow_offload_del_wq);
6153+ destroy_workqueue(nf_flow_offload_stats_wq);
6154+}
6155diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
6156new file mode 100644
6157index 000000000..ae1eb2656
6158--- /dev/null
6159+++ b/net/netfilter/xt_FLOWOFFLOAD.c
developer67bbcc02022-07-08 09:04:01 +08006160@@ -0,0 +1,728 @@
developer8cb3ac72022-07-04 10:55:14 +08006161+/*
6162+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
6163+ *
6164+ * This program is free software; you can redistribute it and/or modify
6165+ * it under the terms of the GNU General Public License version 2 as
6166+ * published by the Free Software Foundation.
6167+ */
6168+#include <linux/module.h>
6169+#include <linux/init.h>
6170+#include <linux/netfilter.h>
6171+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
6172+#include <linux/if_vlan.h>
6173+#include <net/ip.h>
6174+#include <net/netfilter/nf_conntrack.h>
6175+#include <net/netfilter/nf_conntrack_extend.h>
6176+#include <net/netfilter/nf_conntrack_helper.h>
6177+#include <net/netfilter/nf_flow_table.h>
6178+
6179+struct xt_flowoffload_hook {
6180+ struct hlist_node list;
6181+ struct nf_hook_ops ops;
6182+ struct net *net;
6183+ bool registered;
6184+ bool used;
6185+};
6186+
6187+struct xt_flowoffload_table {
6188+ struct nf_flowtable ft;
6189+ struct hlist_head hooks;
6190+ struct delayed_work work;
6191+};
6192+
6193+struct nf_forward_info {
6194+ const struct net_device *indev;
6195+ const struct net_device *outdev;
6196+ const struct net_device *hw_outdev;
6197+ struct id {
6198+ __u16 id;
6199+ __be16 proto;
6200+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
6201+ u8 num_encaps;
6202+ u8 ingress_vlans;
6203+ u8 h_source[ETH_ALEN];
6204+ u8 h_dest[ETH_ALEN];
6205+ enum flow_offload_xmit_type xmit_type;
6206+};
6207+
6208+static DEFINE_SPINLOCK(hooks_lock);
6209+
6210+struct xt_flowoffload_table flowtable[2];
6211+
6212+static unsigned int
6213+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
6214+ const struct nf_hook_state *state)
6215+{
6216+ struct vlan_ethhdr *veth;
6217+ __be16 proto;
6218+
6219+ switch (skb->protocol) {
6220+ case htons(ETH_P_8021Q):
6221+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
6222+ proto = veth->h_vlan_encapsulated_proto;
6223+ break;
6224+ case htons(ETH_P_PPP_SES):
6225+ proto = nf_flow_pppoe_proto(skb);
6226+ break;
6227+ default:
6228+ proto = skb->protocol;
6229+ break;
6230+ }
6231+
6232+ switch (proto) {
6233+ case htons(ETH_P_IP):
6234+ return nf_flow_offload_ip_hook(priv, skb, state);
6235+ case htons(ETH_P_IPV6):
6236+ return nf_flow_offload_ipv6_hook(priv, skb, state);
6237+ }
6238+
6239+ return NF_ACCEPT;
6240+}
6241+
6242+static int
6243+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
6244+ struct net_device *dev)
6245+{
6246+ struct xt_flowoffload_hook *hook;
6247+ struct nf_hook_ops *ops;
6248+
6249+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
6250+ if (!hook)
6251+ return -ENOMEM;
6252+
6253+ ops = &hook->ops;
6254+ ops->pf = NFPROTO_NETDEV;
6255+ ops->hooknum = NF_NETDEV_INGRESS;
6256+ ops->priority = 10;
6257+ ops->priv = &table->ft;
6258+ ops->hook = xt_flowoffload_net_hook;
6259+ ops->dev = dev;
6260+
6261+ hlist_add_head(&hook->list, &table->hooks);
6262+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
6263+
6264+ return 0;
6265+}
6266+
6267+static struct xt_flowoffload_hook *
6268+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
6269+ struct net_device *dev)
6270+{
6271+ struct xt_flowoffload_hook *hook;
6272+
6273+ hlist_for_each_entry(hook, &table->hooks, list) {
6274+ if (hook->ops.dev == dev)
6275+ return hook;
6276+ }
6277+
6278+ return NULL;
6279+}
6280+
6281+static void
6282+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
6283+ struct net_device *dev)
6284+{
6285+ struct xt_flowoffload_hook *hook;
6286+
6287+ if (!dev)
6288+ return;
6289+
6290+ spin_lock_bh(&hooks_lock);
6291+ hook = flow_offload_lookup_hook(table, dev);
6292+ if (hook)
6293+ hook->used = true;
6294+ else
6295+ xt_flowoffload_create_hook(table, dev);
6296+ spin_unlock_bh(&hooks_lock);
6297+}
6298+
6299+static void
6300+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
6301+{
6302+ struct xt_flowoffload_hook *hook;
6303+
6304+restart:
6305+ hlist_for_each_entry(hook, &table->hooks, list) {
6306+ if (hook->registered)
6307+ continue;
6308+
6309+ hook->registered = true;
6310+ hook->net = dev_net(hook->ops.dev);
6311+ spin_unlock_bh(&hooks_lock);
6312+ nf_register_net_hook(hook->net, &hook->ops);
6313+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6314+ table->ft.type->setup(&table->ft, hook->ops.dev,
6315+ FLOW_BLOCK_BIND);
6316+ spin_lock_bh(&hooks_lock);
6317+ goto restart;
6318+ }
6319+
6320+}
6321+
6322+static bool
6323+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
6324+{
6325+ struct xt_flowoffload_hook *hook;
6326+ bool active = false;
6327+
6328+restart:
6329+ spin_lock_bh(&hooks_lock);
6330+ hlist_for_each_entry(hook, &table->hooks, list) {
6331+ if (hook->used || !hook->registered) {
6332+ active = true;
6333+ continue;
6334+ }
6335+
6336+ hlist_del(&hook->list);
6337+ spin_unlock_bh(&hooks_lock);
6338+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6339+ table->ft.type->setup(&table->ft, hook->ops.dev,
6340+ FLOW_BLOCK_UNBIND);
6341+ nf_unregister_net_hook(hook->net, &hook->ops);
6342+ kfree(hook);
6343+ goto restart;
6344+ }
6345+ spin_unlock_bh(&hooks_lock);
6346+
6347+ return active;
6348+}
6349+
6350+static void
6351+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
6352+{
6353+ struct xt_flowoffload_table *table = data;
6354+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
6355+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
6356+ struct xt_flowoffload_hook *hook;
6357+
6358+ spin_lock_bh(&hooks_lock);
6359+ hlist_for_each_entry(hook, &table->hooks, list) {
6360+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
6361+ hook->ops.dev->ifindex != tuple1->iifidx)
6362+ continue;
6363+
6364+ hook->used = true;
6365+ }
6366+ spin_unlock_bh(&hooks_lock);
6367+}
6368+
6369+static void
6370+xt_flowoffload_hook_work(struct work_struct *work)
6371+{
6372+ struct xt_flowoffload_table *table;
6373+ struct xt_flowoffload_hook *hook;
6374+ int err;
6375+
6376+ table = container_of(work, struct xt_flowoffload_table, work.work);
6377+
6378+ spin_lock_bh(&hooks_lock);
6379+ xt_flowoffload_register_hooks(table);
6380+ hlist_for_each_entry(hook, &table->hooks, list)
6381+ hook->used = false;
6382+ spin_unlock_bh(&hooks_lock);
6383+
6384+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
6385+ table);
6386+ if (err && err != -EAGAIN)
6387+ goto out;
6388+
6389+ if (!xt_flowoffload_cleanup_hooks(table))
6390+ return;
6391+
6392+out:
6393+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
6394+}
6395+
6396+static bool
6397+xt_flowoffload_skip(struct sk_buff *skb, int family)
6398+{
6399+ if (skb_sec_path(skb))
6400+ return true;
6401+
6402+ if (family == NFPROTO_IPV4) {
6403+ const struct ip_options *opt = &(IPCB(skb)->opt);
6404+
6405+ if (unlikely(opt->optlen))
6406+ return true;
6407+ }
6408+
6409+ return false;
6410+}
6411+
6412+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
6413+{
6414+ if (dst_xfrm(dst))
6415+ return FLOW_OFFLOAD_XMIT_XFRM;
6416+
6417+ return FLOW_OFFLOAD_XMIT_NEIGH;
6418+}
6419+
6420+static void nf_default_forward_path(struct nf_flow_route *route,
6421+ struct dst_entry *dst_cache,
6422+ enum ip_conntrack_dir dir,
6423+ struct net_device **dev)
6424+{
6425+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
6426+ route->tuple[dir].dst = dst_cache;
6427+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
6428+}
6429+
6430+static bool nf_is_valid_ether_device(const struct net_device *dev)
6431+{
6432+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
6433+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
6434+ return false;
6435+
6436+ return true;
6437+}
6438+
6439+static void nf_dev_path_info(const struct net_device_path_stack *stack,
6440+ struct nf_forward_info *info,
6441+ unsigned char *ha)
6442+{
6443+ const struct net_device_path *path;
6444+ int i;
6445+
6446+ memcpy(info->h_dest, ha, ETH_ALEN);
6447+
6448+ for (i = 0; i < stack->num_paths; i++) {
6449+ path = &stack->path[i];
6450+
6451+ info->indev = path->dev;
6452+
6453+ switch (path->type) {
6454+ case DEV_PATH_ETHERNET:
6455+ case DEV_PATH_DSA:
6456+ case DEV_PATH_VLAN:
6457+ case DEV_PATH_PPPOE:
6458+ if (is_zero_ether_addr(info->h_source))
6459+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6460+
6461+ if (path->type == DEV_PATH_ETHERNET)
6462+ break;
6463+ if (path->type == DEV_PATH_DSA) {
6464+ i = stack->num_paths;
6465+ break;
6466+ }
6467+
6468+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
6469+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
6470+ info->indev = NULL;
6471+ break;
6472+ }
6473+ if (!info->outdev)
6474+ info->outdev = path->dev;
6475+ info->encap[info->num_encaps].id = path->encap.id;
6476+ info->encap[info->num_encaps].proto = path->encap.proto;
6477+ info->num_encaps++;
6478+ if (path->type == DEV_PATH_PPPOE)
6479+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
6480+ break;
6481+ case DEV_PATH_BRIDGE:
6482+ if (is_zero_ether_addr(info->h_source))
6483+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6484+
6485+ switch (path->bridge.vlan_mode) {
6486+ case DEV_PATH_BR_VLAN_UNTAG_HW:
6487+ info->ingress_vlans |= BIT(info->num_encaps - 1);
6488+ break;
6489+ case DEV_PATH_BR_VLAN_TAG:
6490+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
6491+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
6492+ info->num_encaps++;
6493+ break;
6494+ case DEV_PATH_BR_VLAN_UNTAG:
6495+ info->num_encaps--;
6496+ break;
6497+ case DEV_PATH_BR_VLAN_KEEP:
6498+ break;
6499+ }
6500+ break;
6501+ default:
6502+ break;
6503+ }
6504+ }
6505+ if (!info->outdev)
6506+ info->outdev = info->indev;
6507+
6508+ info->hw_outdev = info->indev;
6509+
6510+ if (nf_is_valid_ether_device(info->indev))
6511+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
6512+}
6513+
6514+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
6515+ const struct dst_entry *dst_cache,
6516+ const struct nf_conn *ct,
6517+ enum ip_conntrack_dir dir, u8 *ha,
6518+ struct net_device_path_stack *stack)
6519+{
6520+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
6521+ struct net_device *dev = dst_cache->dev;
6522+ struct neighbour *n;
6523+ u8 nud_state;
6524+
6525+ if (!nf_is_valid_ether_device(dev))
6526+ goto out;
6527+
6528+ n = dst_neigh_lookup(dst_cache, daddr);
6529+ if (!n)
6530+ return -1;
6531+
6532+ read_lock_bh(&n->lock);
6533+ nud_state = n->nud_state;
6534+ ether_addr_copy(ha, n->ha);
6535+ read_unlock_bh(&n->lock);
6536+ neigh_release(n);
6537+
6538+ if (!(nud_state & NUD_VALID))
6539+ return -1;
6540+
6541+out:
6542+ return dev_fill_forward_path(dev, ha, stack);
6543+}
6544+
6545+static int nf_dev_forward_path(struct nf_flow_route *route,
6546+ const struct nf_conn *ct,
6547+ enum ip_conntrack_dir dir,
6548+ struct net_device **devs)
6549+{
6550+ const struct dst_entry *dst = route->tuple[dir].dst;
6551+ struct net_device_path_stack stack;
6552+ struct nf_forward_info info = {};
6553+ unsigned char ha[ETH_ALEN];
6554+ int i;
6555+
6556+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
6557+ nf_dev_path_info(&stack, &info, ha);
6558+
6559+ devs[!dir] = (struct net_device *)info.indev;
6560+ if (!info.indev)
6561+ return -1;
6562+
6563+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
6564+ for (i = 0; i < info.num_encaps; i++) {
6565+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
6566+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
6567+ }
6568+ route->tuple[!dir].in.num_encaps = info.num_encaps;
6569+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
6570+
6571+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
6572+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
6573+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
6574+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
6575+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
6576+ route->tuple[dir].xmit_type = info.xmit_type;
6577+ }
6578+
6579+ return 0;
6580+}
6581+
6582+static int
6583+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
6584+ enum ip_conntrack_dir dir,
6585+ const struct xt_action_param *par, int ifindex,
6586+ struct net_device **devs)
6587+{
6588+ struct dst_entry *dst = NULL;
6589+ struct flowi fl;
6590+
6591+ memset(&fl, 0, sizeof(fl));
6592+ switch (xt_family(par)) {
6593+ case NFPROTO_IPV4:
6594+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
6595+ fl.u.ip4.flowi4_oif = ifindex;
6596+ break;
6597+ case NFPROTO_IPV6:
6598+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6599+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
6600+ fl.u.ip6.flowi6_oif = ifindex;
6601+ break;
6602+ }
6603+
6604+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
6605+ if (!dst)
6606+ return -ENOENT;
6607+
6608+ nf_default_forward_path(route, dst, dir, devs);
6609+
6610+ return 0;
6611+}
6612+
6613+static int
6614+xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
6615+ const struct xt_action_param *par,
6616+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6617+ struct net_device **devs)
6618+{
6619+ int ret;
6620+
6621+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
6622+ devs[dir]->ifindex,
6623+ devs);
6624+ if (ret)
6625+ return ret;
6626+
6627+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
6628+ devs[!dir]->ifindex,
6629+ devs);
6630+ if (ret)
developer67bbcc02022-07-08 09:04:01 +08006631+ goto err_route_dir1;
developer8cb3ac72022-07-04 10:55:14 +08006632+
6633+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
6634+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
developer67bbcc02022-07-08 09:04:01 +08006635+ if (nf_dev_forward_path(route, ct, dir, devs) ||
6636+ nf_dev_forward_path(route, ct, !dir, devs)) {
6637+ ret = -1;
6638+ goto err_route_dir2;
6639+ }
developer8cb3ac72022-07-04 10:55:14 +08006640+ }
6641+
6642+ return 0;
developer67bbcc02022-07-08 09:04:01 +08006643+
6644+err_route_dir2:
6645+ dst_release(route->tuple[!dir].dst);
6646+err_route_dir1:
6647+ dst_release(route->tuple[dir].dst);
6648+ return ret;
developer8cb3ac72022-07-04 10:55:14 +08006649+}
6650+
6651+static unsigned int
6652+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
6653+{
6654+ struct xt_flowoffload_table *table;
6655+ const struct xt_flowoffload_target_info *info = par->targinfo;
6656+ struct tcphdr _tcph, *tcph = NULL;
6657+ enum ip_conntrack_info ctinfo;
6658+ enum ip_conntrack_dir dir;
6659+ struct nf_flow_route route = {};
6660+ struct flow_offload *flow = NULL;
6661+ struct net_device *devs[2] = {};
6662+ struct nf_conn *ct;
6663+ struct net *net;
6664+
6665+ if (xt_flowoffload_skip(skb, xt_family(par)))
6666+ return XT_CONTINUE;
6667+
6668+ ct = nf_ct_get(skb, &ctinfo);
6669+ if (ct == NULL)
6670+ return XT_CONTINUE;
6671+
6672+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
6673+ case IPPROTO_TCP:
6674+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
6675+ return XT_CONTINUE;
6676+
6677+ tcph = skb_header_pointer(skb, par->thoff,
6678+ sizeof(_tcph), &_tcph);
6679+ if (unlikely(!tcph || tcph->fin || tcph->rst))
6680+ return XT_CONTINUE;
6681+ break;
6682+ case IPPROTO_UDP:
6683+ break;
6684+ default:
6685+ return XT_CONTINUE;
6686+ }
6687+
6688+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
6689+ ct->status & IPS_SEQ_ADJUST)
6690+ return XT_CONTINUE;
6691+
6692+ if (!nf_ct_is_confirmed(ct))
6693+ return XT_CONTINUE;
6694+
6695+ devs[dir] = xt_out(par);
6696+ devs[!dir] = xt_in(par);
6697+
6698+ if (!devs[dir] || !devs[!dir])
6699+ return XT_CONTINUE;
6700+
6701+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
6702+ return XT_CONTINUE;
6703+
6704+ dir = CTINFO2DIR(ctinfo);
6705+
6706+ if (xt_flowoffload_route(skb, ct, par, &route, dir, devs) < 0)
6707+ goto err_flow_route;
6708+
6709+ flow = flow_offload_alloc(ct);
6710+ if (!flow)
6711+ goto err_flow_alloc;
6712+
6713+ if (flow_offload_route_init(flow, &route) < 0)
6714+ goto err_flow_add;
6715+
6716+ if (tcph) {
6717+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6718+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6719+ }
6720+
6721+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
6722+
6723+ net = read_pnet(&table->ft.net);
6724+ if (!net)
6725+ write_pnet(&table->ft.net, xt_net(par));
6726+
6727+ if (flow_offload_add(&table->ft, flow) < 0)
6728+ goto err_flow_add;
6729+
6730+ xt_flowoffload_check_device(table, devs[0]);
6731+ xt_flowoffload_check_device(table, devs[1]);
6732+
developer67bbcc02022-07-08 09:04:01 +08006733+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006734+ dst_release(route.tuple[!dir].dst);
6735+
6736+ return XT_CONTINUE;
6737+
6738+err_flow_add:
6739+ flow_offload_free(flow);
6740+err_flow_alloc:
developer67bbcc02022-07-08 09:04:01 +08006741+ dst_release(route.tuple[dir].dst);
developer8cb3ac72022-07-04 10:55:14 +08006742+ dst_release(route.tuple[!dir].dst);
6743+err_flow_route:
6744+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
6745+
6746+ return XT_CONTINUE;
6747+}
6748+
6749+static int flowoffload_chk(const struct xt_tgchk_param *par)
6750+{
6751+ struct xt_flowoffload_target_info *info = par->targinfo;
6752+
6753+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
6754+ return -EINVAL;
6755+
6756+ return 0;
6757+}
6758+
6759+static struct xt_target offload_tg_reg __read_mostly = {
6760+ .family = NFPROTO_UNSPEC,
6761+ .name = "FLOWOFFLOAD",
6762+ .revision = 0,
6763+ .targetsize = sizeof(struct xt_flowoffload_target_info),
6764+ .usersize = sizeof(struct xt_flowoffload_target_info),
6765+ .checkentry = flowoffload_chk,
6766+ .target = flowoffload_tg,
6767+ .me = THIS_MODULE,
6768+};
6769+
6770+static int flow_offload_netdev_event(struct notifier_block *this,
6771+ unsigned long event, void *ptr)
6772+{
6773+ struct xt_flowoffload_hook *hook0, *hook1;
6774+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6775+
6776+ if (event != NETDEV_UNREGISTER)
6777+ return NOTIFY_DONE;
6778+
6779+ spin_lock_bh(&hooks_lock);
6780+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
6781+ if (hook0)
6782+ hlist_del(&hook0->list);
6783+
6784+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
6785+ if (hook1)
6786+ hlist_del(&hook1->list);
6787+ spin_unlock_bh(&hooks_lock);
6788+
6789+ if (hook0) {
6790+ nf_unregister_net_hook(hook0->net, &hook0->ops);
6791+ kfree(hook0);
6792+ }
6793+
6794+ if (hook1) {
6795+ nf_unregister_net_hook(hook1->net, &hook1->ops);
6796+ kfree(hook1);
6797+ }
6798+
6799+ nf_flow_table_cleanup(dev);
6800+
6801+ return NOTIFY_DONE;
6802+}
6803+
6804+static struct notifier_block flow_offload_netdev_notifier = {
6805+ .notifier_call = flow_offload_netdev_event,
6806+};
6807+
6808+static int nf_flow_rule_route_inet(struct net *net,
6809+ const struct flow_offload *flow,
6810+ enum flow_offload_tuple_dir dir,
6811+ struct nf_flow_rule *flow_rule)
6812+{
6813+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
6814+ int err;
6815+
6816+ switch (flow_tuple->l3proto) {
6817+ case NFPROTO_IPV4:
6818+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
6819+ break;
6820+ case NFPROTO_IPV6:
6821+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
6822+ break;
6823+ default:
6824+ err = -1;
6825+ break;
6826+ }
6827+
6828+ return err;
6829+}
6830+
6831+static struct nf_flowtable_type flowtable_inet = {
6832+ .family = NFPROTO_INET,
6833+ .init = nf_flow_table_init,
6834+ .setup = nf_flow_table_offload_setup,
6835+ .action = nf_flow_rule_route_inet,
6836+ .free = nf_flow_table_free,
6837+ .hook = xt_flowoffload_net_hook,
6838+ .owner = THIS_MODULE,
6839+};
6840+
6841+static int init_flowtable(struct xt_flowoffload_table *tbl)
6842+{
6843+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
6844+ tbl->ft.type = &flowtable_inet;
6845+
6846+ return nf_flow_table_init(&tbl->ft);
6847+}
6848+
6849+static int __init xt_flowoffload_tg_init(void)
6850+{
6851+ int ret;
6852+
6853+ register_netdevice_notifier(&flow_offload_netdev_notifier);
6854+
6855+ ret = init_flowtable(&flowtable[0]);
6856+ if (ret)
6857+ return ret;
6858+
6859+ ret = init_flowtable(&flowtable[1]);
6860+ if (ret)
6861+ goto cleanup;
6862+
6863+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
6864+
6865+ ret = xt_register_target(&offload_tg_reg);
6866+ if (ret)
6867+ goto cleanup2;
6868+
6869+ return 0;
6870+
6871+cleanup2:
6872+ nf_flow_table_free(&flowtable[1].ft);
6873+cleanup:
6874+ nf_flow_table_free(&flowtable[0].ft);
6875+ return ret;
6876+}
6877+
6878+static void __exit xt_flowoffload_tg_exit(void)
6879+{
6880+ xt_unregister_target(&offload_tg_reg);
6881+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
6882+ nf_flow_table_free(&flowtable[0].ft);
6883+ nf_flow_table_free(&flowtable[1].ft);
6884+}
6885+
6886+MODULE_LICENSE("GPL");
6887+module_init(xt_flowoffload_tg_init);
6888+module_exit(xt_flowoffload_tg_exit);
6889--
68902.18.0
6891