blob: fee4d124072c4e5a0f2f7dc279bdd0ee500fb20c [file] [log] [blame]
developer8cb3ac72022-07-04 10:55:14 +08001From 6ad9bd65769003ab526e504577e0f747eba14287 Mon Sep 17 00:00:00 2001
2From: Bo Jiao <Bo.Jiao@mediatek.com>
3Date: Wed, 22 Jun 2022 09:42:19 +0800
4Subject: [PATCH 1/8]
5 9990-mt7622-backport-nf-hw-offload-framework-and-upstream-hnat-plus-xt-FLOWOFFLOAD-update-v2
6
7---
8 drivers/net/ethernet/mediatek/Makefile | 3 +-
9 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 28 +-
10 drivers/net/ethernet/mediatek/mtk_eth_soc.h | 20 +-
11 drivers/net/ethernet/mediatek/mtk_ppe.c | 509 +++++++
12 drivers/net/ethernet/mediatek/mtk_ppe.h | 288 ++++
13 .../net/ethernet/mediatek/mtk_ppe_debugfs.c | 214 +++
14 .../net/ethernet/mediatek/mtk_ppe_offload.c | 526 ++++++++
15 drivers/net/ethernet/mediatek/mtk_ppe_regs.h | 144 ++
16 drivers/net/ppp/ppp_generic.c | 22 +
17 drivers/net/ppp/pppoe.c | 24 +
18 include/linux/netdevice.h | 60 +
19 include/linux/ppp_channel.h | 3 +
20 include/net/dsa.h | 10 +
21 include/net/flow_offload.h | 4 +
22 include/net/ip6_route.h | 5 +-
23 .../net/netfilter/ipv6/nf_conntrack_ipv6.h | 3 -
24 include/net/netfilter/nf_conntrack.h | 12 +
25 include/net/netfilter/nf_conntrack_acct.h | 11 +
26 include/net/netfilter/nf_flow_table.h | 264 +++-
27 include/net/netns/conntrack.h | 6 +
28 .../linux/netfilter/nf_conntrack_common.h | 9 +-
29 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h | 17 +
30 net/8021q/vlan_dev.c | 21 +
31 net/bridge/br_device.c | 49 +
32 net/bridge/br_private.h | 20 +
33 net/bridge/br_vlan.c | 55 +
34 net/core/dev.c | 46 +
35 net/dsa/dsa.c | 9 +
36 net/dsa/slave.c | 41 +-
37 net/ipv4/netfilter/Kconfig | 4 +-
38 net/ipv6/ip6_output.c | 2 +-
39 net/ipv6/netfilter/Kconfig | 3 +-
40 net/ipv6/route.c | 22 +-
41 net/netfilter/Kconfig | 14 +-
42 net/netfilter/Makefile | 4 +-
43 net/netfilter/nf_conntrack_core.c | 20 +-
44 net/netfilter/nf_conntrack_proto_tcp.c | 4 +
45 net/netfilter/nf_conntrack_proto_udp.c | 4 +
46 net/netfilter/nf_conntrack_standalone.c | 34 +-
47 net/netfilter/nf_flow_table_core.c | 446 +++---
48 net/netfilter/nf_flow_table_ip.c | 455 ++++---
49 net/netfilter/nf_flow_table_offload.c | 1191 +++++++++++++++++
50 net/netfilter/xt_FLOWOFFLOAD.c | 719 ++++++++++
51 43 files changed, 4913 insertions(+), 432 deletions(-)
52 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.c
53 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe.h
54 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
55 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_offload.c
56 create mode 100644 drivers/net/ethernet/mediatek/mtk_ppe_regs.h
57 create mode 100644 include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
58 create mode 100644 net/netfilter/nf_flow_table_offload.c
59 create mode 100644 net/netfilter/xt_FLOWOFFLOAD.c
60
61diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
62index 13c5b4e8f..0a6af99f1 100755
63--- a/drivers/net/ethernet/mediatek/Makefile
64+++ b/drivers/net/ethernet/mediatek/Makefile
65@@ -4,5 +4,6 @@
66 #
67
68 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
69-mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o
70+mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o mtk_eth_dbg.o mtk_eth_reset.o \
71+ mtk_ppe.o mtk_ppe_debugfs.o mtk_ppe_offload.o
72 obj-$(CONFIG_NET_MEDIATEK_HNAT) += mtk_hnat/
73diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
74index 2b21f7ed0..819d8a0be 100755
75--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
76+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
77@@ -2654,12 +2654,17 @@ static int mtk_open(struct net_device *dev)
78
79 /* we run 2 netdevs on the same dma ring so we only bring it up once */
80 if (!refcount_read(&eth->dma_refcnt)) {
81- int err = mtk_start_dma(eth);
82+ u32 gdm_config = MTK_GDMA_TO_PDMA;
83+ int err;
84
85+ err = mtk_start_dma(eth);
86 if (err)
87 return err;
88
89- mtk_gdm_config(eth, MTK_GDMA_TO_PDMA);
90+ if (eth->soc->offload_version && mtk_ppe_start(&eth->ppe) == 0)
91+ gdm_config = MTK_GDMA_TO_PPE;
92+
93+ mtk_gdm_config(eth, gdm_config);
94
95 /* Indicates CDM to parse the MTK special tag from CPU */
96 if (netdev_uses_dsa(dev)) {
97@@ -2772,6 +2777,9 @@ static int mtk_stop(struct net_device *dev)
98
99 mtk_dma_free(eth);
100
101+ if (eth->soc->offload_version)
102+ mtk_ppe_stop(&eth->ppe);
103+
104 return 0;
105 }
106
107@@ -3391,6 +3399,7 @@ static const struct net_device_ops mtk_netdev_ops = {
108 #ifdef CONFIG_NET_POLL_CONTROLLER
109 .ndo_poll_controller = mtk_poll_controller,
110 #endif
111+ .ndo_setup_tc = mtk_eth_setup_tc,
112 };
113
114 static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
115@@ -3682,6 +3691,17 @@ static int mtk_probe(struct platform_device *pdev)
116 goto err_free_dev;
117 }
118
119+ if (eth->soc->offload_version) {
120+ err = mtk_ppe_init(&eth->ppe, eth->dev,
121+ eth->base + MTK_ETH_PPE_BASE, 2);
122+ if (err)
123+ goto err_free_dev;
124+
125+ err = mtk_eth_offload_init(eth);
126+ if (err)
127+ goto err_free_dev;
128+ }
129+
130 for (i = 0; i < MTK_MAX_DEVS; i++) {
131 if (!eth->netdev[i])
132 continue;
133@@ -3781,6 +3801,7 @@ static const struct mtk_soc_data mt2701_data = {
134 .required_clks = MT7623_CLKS_BITMAP,
135 .required_pctl = true,
136 .has_sram = false,
137+ .offload_version = 2,
138 };
139
140 static const struct mtk_soc_data mt7621_data = {
141@@ -3789,6 +3810,7 @@ static const struct mtk_soc_data mt7621_data = {
142 .required_clks = MT7621_CLKS_BITMAP,
143 .required_pctl = false,
144 .has_sram = false,
145+ .offload_version = 2,
146 };
147
148 static const struct mtk_soc_data mt7622_data = {
149@@ -3798,6 +3820,7 @@ static const struct mtk_soc_data mt7622_data = {
150 .required_clks = MT7622_CLKS_BITMAP,
151 .required_pctl = false,
152 .has_sram = false,
153+ .offload_version = 2,
154 };
155
156 static const struct mtk_soc_data mt7623_data = {
157@@ -3806,6 +3829,7 @@ static const struct mtk_soc_data mt7623_data = {
158 .required_clks = MT7623_CLKS_BITMAP,
159 .required_pctl = true,
160 .has_sram = false,
161+ .offload_version = 2,
162 };
163
164 static const struct mtk_soc_data mt7629_data = {
165diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
166index b6380ffeb..349f98503 100755
167--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
168+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
169@@ -15,6 +15,8 @@
170 #include <linux/u64_stats_sync.h>
171 #include <linux/refcount.h>
172 #include <linux/phylink.h>
173+#include <linux/rhashtable.h>
174+#include "mtk_ppe.h"
175
176 #define MTK_QDMA_PAGE_SIZE 2048
177 #define MTK_MAX_RX_LENGTH 1536
178@@ -37,7 +39,8 @@
179 NETIF_F_HW_VLAN_CTAG_TX | \
180 NETIF_F_SG | NETIF_F_TSO | \
181 NETIF_F_TSO6 | \
182- NETIF_F_IPV6_CSUM)
183+ NETIF_F_IPV6_CSUM |\
184+ NETIF_F_HW_TC)
185 #define MTK_SET_FEATURES (NETIF_F_LRO | \
186 NETIF_F_HW_VLAN_CTAG_RX)
187 #define MTK_HW_FEATURES_MT7628 (NETIF_F_SG | NETIF_F_RXCSUM)
188@@ -107,6 +110,7 @@
189 #define MTK_GDMA_TCS_EN BIT(21)
190 #define MTK_GDMA_UCS_EN BIT(20)
191 #define MTK_GDMA_TO_PDMA 0x0
192+#define MTK_GDMA_TO_PPE 0x4444
193 #define MTK_GDMA_DROP_ALL 0x7777
194
195 /* Unicast Filter MAC Address Register - Low */
196@@ -547,6 +551,12 @@
197 #define RX_DMA_TCI(_x) ((_x) & (VLAN_PRIO_MASK | VLAN_VID_MASK))
198 #define RX_DMA_VPID(_x) (((_x) >> 16) & 0xffff)
199
200+/* QDMA descriptor rxd4 */
201+#define MTK_RXD4_FOE_ENTRY GENMASK(13, 0)
202+#define MTK_RXD4_PPE_CPU_REASON GENMASK(18, 14)
203+#define MTK_RXD4_SRC_PORT GENMASK(21, 19)
204+#define MTK_RXD4_ALG GENMASK(31, 22)
205+
206 /* QDMA descriptor rxd4 */
207 #define RX_DMA_L4_VALID BIT(24)
208 #define RX_DMA_L4_VALID_PDMA BIT(30) /* when PDMA is used */
209@@ -1158,6 +1168,7 @@ struct mtk_soc_data {
210 u32 caps;
211 u32 required_clks;
212 bool required_pctl;
213+ u8 offload_version;
214 netdev_features_t hw_features;
215 bool has_sram;
216 };
217@@ -1271,6 +1282,9 @@ struct mtk_eth {
218 int ip_align;
219 spinlock_t syscfg0_lock;
220 struct timer_list mtk_dma_monitor_timer;
221+
222+ struct mtk_ppe ppe;
223+ struct rhashtable flow_table;
224 };
225
226 /* struct mtk_mac - the structure that holds the info about the MACs of the
227@@ -1319,4 +1333,8 @@ int mtk_gmac_rgmii_path_setup(struct mtk_eth *eth, int mac_id);
228 void mtk_gdm_config(struct mtk_eth *eth, u32 config);
229 void ethsys_reset(struct mtk_eth *eth, u32 reset_bits);
230
231+int mtk_eth_offload_init(struct mtk_eth *eth);
232+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
233+ void *type_data);
234+
235 #endif /* MTK_ETH_H */
236diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.c b/drivers/net/ethernet/mediatek/mtk_ppe.c
237new file mode 100644
238index 000000000..66298e223
239--- /dev/null
240+++ b/drivers/net/ethernet/mediatek/mtk_ppe.c
241@@ -0,0 +1,509 @@
242+// SPDX-License-Identifier: GPL-2.0-only
243+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
244+
245+#include <linux/kernel.h>
246+#include <linux/io.h>
247+#include <linux/iopoll.h>
248+#include <linux/etherdevice.h>
249+#include <linux/platform_device.h>
250+#include "mtk_ppe.h"
251+#include "mtk_ppe_regs.h"
252+
253+static void ppe_w32(struct mtk_ppe *ppe, u32 reg, u32 val)
254+{
255+ writel(val, ppe->base + reg);
256+}
257+
258+static u32 ppe_r32(struct mtk_ppe *ppe, u32 reg)
259+{
260+ return readl(ppe->base + reg);
261+}
262+
263+static u32 ppe_m32(struct mtk_ppe *ppe, u32 reg, u32 mask, u32 set)
264+{
265+ u32 val;
266+
267+ val = ppe_r32(ppe, reg);
268+ val &= ~mask;
269+ val |= set;
270+ ppe_w32(ppe, reg, val);
271+
272+ return val;
273+}
274+
275+static u32 ppe_set(struct mtk_ppe *ppe, u32 reg, u32 val)
276+{
277+ return ppe_m32(ppe, reg, 0, val);
278+}
279+
280+static u32 ppe_clear(struct mtk_ppe *ppe, u32 reg, u32 val)
281+{
282+ return ppe_m32(ppe, reg, val, 0);
283+}
284+
285+static int mtk_ppe_wait_busy(struct mtk_ppe *ppe)
286+{
287+ int ret;
288+ u32 val;
289+
290+ ret = readl_poll_timeout(ppe->base + MTK_PPE_GLO_CFG, val,
291+ !(val & MTK_PPE_GLO_CFG_BUSY),
292+ 20, MTK_PPE_WAIT_TIMEOUT_US);
293+
294+ if (ret)
295+ dev_err(ppe->dev, "PPE table busy");
296+
297+ return ret;
298+}
299+
300+static void mtk_ppe_cache_clear(struct mtk_ppe *ppe)
301+{
302+ ppe_set(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
303+ ppe_clear(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_CLEAR);
304+}
305+
306+static void mtk_ppe_cache_enable(struct mtk_ppe *ppe, bool enable)
307+{
308+ mtk_ppe_cache_clear(ppe);
309+
310+ ppe_m32(ppe, MTK_PPE_CACHE_CTL, MTK_PPE_CACHE_CTL_EN,
311+ enable * MTK_PPE_CACHE_CTL_EN);
312+}
313+
314+static u32 mtk_ppe_hash_entry(struct mtk_foe_entry *e)
315+{
316+ u32 hv1, hv2, hv3;
317+ u32 hash;
318+
319+ switch (FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, e->ib1)) {
320+ case MTK_PPE_PKT_TYPE_BRIDGE:
321+ hv1 = e->bridge.src_mac_lo;
322+ hv1 ^= ((e->bridge.src_mac_hi & 0xffff) << 16);
323+ hv2 = e->bridge.src_mac_hi >> 16;
324+ hv2 ^= e->bridge.dest_mac_lo;
325+ hv3 = e->bridge.dest_mac_hi;
326+ break;
327+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
328+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
329+ hv1 = e->ipv4.orig.ports;
330+ hv2 = e->ipv4.orig.dest_ip;
331+ hv3 = e->ipv4.orig.src_ip;
332+ break;
333+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
334+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
335+ hv1 = e->ipv6.src_ip[3] ^ e->ipv6.dest_ip[3];
336+ hv1 ^= e->ipv6.ports;
337+
338+ hv2 = e->ipv6.src_ip[2] ^ e->ipv6.dest_ip[2];
339+ hv2 ^= e->ipv6.dest_ip[0];
340+
341+ hv3 = e->ipv6.src_ip[1] ^ e->ipv6.dest_ip[1];
342+ hv3 ^= e->ipv6.src_ip[0];
343+ break;
344+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
345+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
346+ default:
347+ WARN_ON_ONCE(1);
348+ return MTK_PPE_HASH_MASK;
349+ }
350+
351+ hash = (hv1 & hv2) | ((~hv1) & hv3);
352+ hash = (hash >> 24) | ((hash & 0xffffff) << 8);
353+ hash ^= hv1 ^ hv2 ^ hv3;
354+ hash ^= hash >> 16;
355+ hash <<= 1;
356+ hash &= MTK_PPE_ENTRIES - 1;
357+
358+ return hash;
359+}
360+
361+static inline struct mtk_foe_mac_info *
362+mtk_foe_entry_l2(struct mtk_foe_entry *entry)
363+{
364+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
365+
366+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
367+ return &entry->ipv6.l2;
368+
369+ return &entry->ipv4.l2;
370+}
371+
372+static inline u32 *
373+mtk_foe_entry_ib2(struct mtk_foe_entry *entry)
374+{
375+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
376+
377+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE)
378+ return &entry->ipv6.ib2;
379+
380+ return &entry->ipv4.ib2;
381+}
382+
383+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
384+ u8 pse_port, u8 *src_mac, u8 *dest_mac)
385+{
386+ struct mtk_foe_mac_info *l2;
387+ u32 ports_pad, val;
388+
389+ memset(entry, 0, sizeof(*entry));
390+
391+ val = FIELD_PREP(MTK_FOE_IB1_STATE, MTK_FOE_STATE_BIND) |
392+ FIELD_PREP(MTK_FOE_IB1_PACKET_TYPE, type) |
393+ FIELD_PREP(MTK_FOE_IB1_UDP, l4proto == IPPROTO_UDP) |
394+ MTK_FOE_IB1_BIND_TTL |
395+ MTK_FOE_IB1_BIND_CACHE;
396+ entry->ib1 = val;
397+
398+ val = FIELD_PREP(MTK_FOE_IB2_PORT_MG, 0x3f) |
399+ FIELD_PREP(MTK_FOE_IB2_PORT_AG, 0x1f) |
400+ FIELD_PREP(MTK_FOE_IB2_DEST_PORT, pse_port);
401+
402+ if (is_multicast_ether_addr(dest_mac))
403+ val |= MTK_FOE_IB2_MULTICAST;
404+
405+ ports_pad = 0xa5a5a500 | (l4proto & 0xff);
406+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
407+ entry->ipv4.orig.ports = ports_pad;
408+ if (type == MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
409+ entry->ipv6.ports = ports_pad;
410+
411+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
412+ entry->ipv6.ib2 = val;
413+ l2 = &entry->ipv6.l2;
414+ } else {
415+ entry->ipv4.ib2 = val;
416+ l2 = &entry->ipv4.l2;
417+ }
418+
419+ l2->dest_mac_hi = get_unaligned_be32(dest_mac);
420+ l2->dest_mac_lo = get_unaligned_be16(dest_mac + 4);
421+ l2->src_mac_hi = get_unaligned_be32(src_mac);
422+ l2->src_mac_lo = get_unaligned_be16(src_mac + 4);
423+
424+ if (type >= MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T)
425+ l2->etype = ETH_P_IPV6;
426+ else
427+ l2->etype = ETH_P_IP;
428+
429+ return 0;
430+}
431+
432+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port)
433+{
434+ u32 *ib2 = mtk_foe_entry_ib2(entry);
435+ u32 val;
436+
437+ val = *ib2;
438+ val &= ~MTK_FOE_IB2_DEST_PORT;
439+ val |= FIELD_PREP(MTK_FOE_IB2_DEST_PORT, port);
440+ *ib2 = val;
441+
442+ return 0;
443+}
444+
445+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool egress,
446+ __be32 src_addr, __be16 src_port,
447+ __be32 dest_addr, __be16 dest_port)
448+{
449+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
450+ struct mtk_ipv4_tuple *t;
451+
452+ switch (type) {
453+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
454+ if (egress) {
455+ t = &entry->ipv4.new;
456+ break;
457+ }
458+ fallthrough;
459+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
460+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
461+ t = &entry->ipv4.orig;
462+ break;
463+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
464+ entry->ipv6_6rd.tunnel_src_ip = be32_to_cpu(src_addr);
465+ entry->ipv6_6rd.tunnel_dest_ip = be32_to_cpu(dest_addr);
466+ return 0;
467+ default:
468+ WARN_ON_ONCE(1);
469+ return -EINVAL;
470+ }
471+
472+ t->src_ip = be32_to_cpu(src_addr);
473+ t->dest_ip = be32_to_cpu(dest_addr);
474+
475+ if (type == MTK_PPE_PKT_TYPE_IPV4_ROUTE)
476+ return 0;
477+
478+ t->src_port = be16_to_cpu(src_port);
479+ t->dest_port = be16_to_cpu(dest_port);
480+
481+ return 0;
482+}
483+
484+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
485+ __be32 *src_addr, __be16 src_port,
486+ __be32 *dest_addr, __be16 dest_port)
487+{
488+ int type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
489+ u32 *src, *dest;
490+ int i;
491+
492+ switch (type) {
493+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
494+ src = entry->dslite.tunnel_src_ip;
495+ dest = entry->dslite.tunnel_dest_ip;
496+ break;
497+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
498+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
499+ entry->ipv6.src_port = be16_to_cpu(src_port);
500+ entry->ipv6.dest_port = be16_to_cpu(dest_port);
501+ fallthrough;
502+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
503+ src = entry->ipv6.src_ip;
504+ dest = entry->ipv6.dest_ip;
505+ break;
506+ default:
507+ WARN_ON_ONCE(1);
508+ return -EINVAL;
509+ }
510+
511+ for (i = 0; i < 4; i++)
512+ src[i] = be32_to_cpu(src_addr[i]);
513+ for (i = 0; i < 4; i++)
514+ dest[i] = be32_to_cpu(dest_addr[i]);
515+
516+ return 0;
517+}
518+
519+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port)
520+{
521+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
522+
523+ l2->etype = BIT(port);
524+
525+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER))
526+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
527+ else
528+ l2->etype |= BIT(8);
529+
530+ entry->ib1 &= ~MTK_FOE_IB1_BIND_VLAN_TAG;
531+
532+ return 0;
533+}
534+
535+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid)
536+{
537+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
538+
539+ switch (FIELD_GET(MTK_FOE_IB1_BIND_VLAN_LAYER, entry->ib1)) {
540+ case 0:
541+ entry->ib1 |= MTK_FOE_IB1_BIND_VLAN_TAG |
542+ FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
543+ l2->vlan1 = vid;
544+ return 0;
545+ case 1:
546+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG)) {
547+ l2->vlan1 = vid;
548+ l2->etype |= BIT(8);
549+ } else {
550+ l2->vlan2 = vid;
551+ entry->ib1 += FIELD_PREP(MTK_FOE_IB1_BIND_VLAN_LAYER, 1);
552+ }
553+ return 0;
554+ default:
555+ return -ENOSPC;
556+ }
557+}
558+
559+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid)
560+{
561+ struct mtk_foe_mac_info *l2 = mtk_foe_entry_l2(entry);
562+
563+ if (!(entry->ib1 & MTK_FOE_IB1_BIND_VLAN_LAYER) ||
564+ (entry->ib1 & MTK_FOE_IB1_BIND_VLAN_TAG))
565+ l2->etype = ETH_P_PPP_SES;
566+
567+ entry->ib1 |= MTK_FOE_IB1_BIND_PPPOE;
568+ l2->pppoe_id = sid;
569+
570+ return 0;
571+}
572+
573+static inline bool mtk_foe_entry_usable(struct mtk_foe_entry *entry)
574+{
575+ return !(entry->ib1 & MTK_FOE_IB1_STATIC) &&
576+ FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1) != MTK_FOE_STATE_BIND;
577+}
578+
579+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
580+ u16 timestamp)
581+{
582+ struct mtk_foe_entry *hwe;
583+ u32 hash;
584+
585+ timestamp &= MTK_FOE_IB1_BIND_TIMESTAMP;
586+ entry->ib1 &= ~MTK_FOE_IB1_BIND_TIMESTAMP;
587+ entry->ib1 |= FIELD_PREP(MTK_FOE_IB1_BIND_TIMESTAMP, timestamp);
588+
589+ hash = mtk_ppe_hash_entry(entry);
590+ hwe = &ppe->foe_table[hash];
591+ if (!mtk_foe_entry_usable(hwe)) {
592+ hwe++;
593+ hash++;
594+
595+ if (!mtk_foe_entry_usable(hwe))
596+ return -ENOSPC;
597+ }
598+
599+ memcpy(&hwe->data, &entry->data, sizeof(hwe->data));
600+ wmb();
601+ hwe->ib1 = entry->ib1;
602+
603+ dma_wmb();
604+
605+ mtk_ppe_cache_clear(ppe);
606+
607+ return hash;
608+}
609+
610+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
611+ int version)
612+{
613+ struct mtk_foe_entry *foe;
614+
615+ /* need to allocate a separate device, since it PPE DMA access is
616+ * not coherent.
617+ */
618+ ppe->base = base;
619+ ppe->dev = dev;
620+ ppe->version = version;
621+
622+ foe = dmam_alloc_coherent(ppe->dev, MTK_PPE_ENTRIES * sizeof(*foe),
623+ &ppe->foe_phys, GFP_KERNEL);
624+ if (!foe)
625+ return -ENOMEM;
626+
627+ ppe->foe_table = foe;
628+
629+ mtk_ppe_debugfs_init(ppe);
630+
631+ return 0;
632+}
633+
634+static void mtk_ppe_init_foe_table(struct mtk_ppe *ppe)
635+{
636+ static const u8 skip[] = { 12, 25, 38, 51, 76, 89, 102 };
637+ int i, k;
638+
639+ memset(ppe->foe_table, 0, MTK_PPE_ENTRIES * sizeof(*ppe->foe_table));
640+
641+ if (!IS_ENABLED(CONFIG_SOC_MT7621))
642+ return;
643+
644+ /* skip all entries that cross the 1024 byte boundary */
645+ for (i = 0; i < MTK_PPE_ENTRIES; i += 128)
646+ for (k = 0; k < ARRAY_SIZE(skip); k++)
647+ ppe->foe_table[i + skip[k]].ib1 |= MTK_FOE_IB1_STATIC;
648+}
649+
650+int mtk_ppe_start(struct mtk_ppe *ppe)
651+{
652+ u32 val;
653+
654+ mtk_ppe_init_foe_table(ppe);
655+ ppe_w32(ppe, MTK_PPE_TB_BASE, ppe->foe_phys);
656+
657+ val = MTK_PPE_TB_CFG_ENTRY_80B |
658+ MTK_PPE_TB_CFG_AGE_NON_L4 |
659+ MTK_PPE_TB_CFG_AGE_UNBIND |
660+ MTK_PPE_TB_CFG_AGE_TCP |
661+ MTK_PPE_TB_CFG_AGE_UDP |
662+ MTK_PPE_TB_CFG_AGE_TCP_FIN |
663+ FIELD_PREP(MTK_PPE_TB_CFG_SEARCH_MISS,
664+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD) |
665+ FIELD_PREP(MTK_PPE_TB_CFG_KEEPALIVE,
666+ MTK_PPE_KEEPALIVE_DISABLE) |
667+ FIELD_PREP(MTK_PPE_TB_CFG_HASH_MODE, 1) |
668+ FIELD_PREP(MTK_PPE_TB_CFG_SCAN_MODE,
669+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE) |
670+ FIELD_PREP(MTK_PPE_TB_CFG_ENTRY_NUM,
671+ MTK_PPE_ENTRIES_SHIFT);
672+ ppe_w32(ppe, MTK_PPE_TB_CFG, val);
673+
674+ ppe_w32(ppe, MTK_PPE_IP_PROTO_CHK,
675+ MTK_PPE_IP_PROTO_CHK_IPV4 | MTK_PPE_IP_PROTO_CHK_IPV6);
676+
677+ mtk_ppe_cache_enable(ppe, true);
678+
679+ val = MTK_PPE_FLOW_CFG_IP4_TCP_FRAG |
680+ MTK_PPE_FLOW_CFG_IP4_UDP_FRAG |
681+ MTK_PPE_FLOW_CFG_IP6_3T_ROUTE |
682+ MTK_PPE_FLOW_CFG_IP6_5T_ROUTE |
683+ MTK_PPE_FLOW_CFG_IP6_6RD |
684+ MTK_PPE_FLOW_CFG_IP4_NAT |
685+ MTK_PPE_FLOW_CFG_IP4_NAPT |
686+ MTK_PPE_FLOW_CFG_IP4_DSLITE |
687+ MTK_PPE_FLOW_CFG_L2_BRIDGE |
688+ MTK_PPE_FLOW_CFG_IP4_NAT_FRAG;
689+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, val);
690+
691+ val = FIELD_PREP(MTK_PPE_UNBIND_AGE_MIN_PACKETS, 1000) |
692+ FIELD_PREP(MTK_PPE_UNBIND_AGE_DELTA, 3);
693+ ppe_w32(ppe, MTK_PPE_UNBIND_AGE, val);
694+
695+ val = FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_UDP, 12) |
696+ FIELD_PREP(MTK_PPE_BIND_AGE0_DELTA_NON_L4, 1);
697+ ppe_w32(ppe, MTK_PPE_BIND_AGE0, val);
698+
699+ val = FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP_FIN, 1) |
700+ FIELD_PREP(MTK_PPE_BIND_AGE1_DELTA_TCP, 7);
701+ ppe_w32(ppe, MTK_PPE_BIND_AGE1, val);
702+
703+ val = MTK_PPE_BIND_LIMIT0_QUARTER | MTK_PPE_BIND_LIMIT0_HALF;
704+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT0, val);
705+
706+ val = MTK_PPE_BIND_LIMIT1_FULL |
707+ FIELD_PREP(MTK_PPE_BIND_LIMIT1_NON_L4, 1);
708+ ppe_w32(ppe, MTK_PPE_BIND_LIMIT1, val);
709+
710+ val = FIELD_PREP(MTK_PPE_BIND_RATE_BIND, 30) |
711+ FIELD_PREP(MTK_PPE_BIND_RATE_PREBIND, 1);
712+ ppe_w32(ppe, MTK_PPE_BIND_RATE, val);
713+
714+ /* enable PPE */
715+ val = MTK_PPE_GLO_CFG_EN |
716+ MTK_PPE_GLO_CFG_IP4_L4_CS_DROP |
717+ MTK_PPE_GLO_CFG_IP4_CS_DROP |
718+ MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE;
719+ ppe_w32(ppe, MTK_PPE_GLO_CFG, val);
720+
721+ ppe_w32(ppe, MTK_PPE_DEFAULT_CPU_PORT, 0);
722+
723+ return 0;
724+}
725+
726+int mtk_ppe_stop(struct mtk_ppe *ppe)
727+{
728+ u32 val;
729+ int i;
730+
731+ for (i = 0; i < MTK_PPE_ENTRIES; i++)
732+ ppe->foe_table[i].ib1 = FIELD_PREP(MTK_FOE_IB1_STATE,
733+ MTK_FOE_STATE_INVALID);
734+
735+ mtk_ppe_cache_enable(ppe, false);
736+
737+ /* disable offload engine */
738+ ppe_clear(ppe, MTK_PPE_GLO_CFG, MTK_PPE_GLO_CFG_EN);
739+ ppe_w32(ppe, MTK_PPE_FLOW_CFG, 0);
740+
741+ /* disable aging */
742+ val = MTK_PPE_TB_CFG_AGE_NON_L4 |
743+ MTK_PPE_TB_CFG_AGE_UNBIND |
744+ MTK_PPE_TB_CFG_AGE_TCP |
745+ MTK_PPE_TB_CFG_AGE_UDP |
746+ MTK_PPE_TB_CFG_AGE_TCP_FIN;
747+ ppe_clear(ppe, MTK_PPE_TB_CFG, val);
748+
749+ return mtk_ppe_wait_busy(ppe);
750+}
751diff --git a/drivers/net/ethernet/mediatek/mtk_ppe.h b/drivers/net/ethernet/mediatek/mtk_ppe.h
752new file mode 100644
753index 000000000..242fb8f2a
754--- /dev/null
755+++ b/drivers/net/ethernet/mediatek/mtk_ppe.h
756@@ -0,0 +1,288 @@
757+// SPDX-License-Identifier: GPL-2.0-only
758+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
759+
760+#ifndef __MTK_PPE_H
761+#define __MTK_PPE_H
762+
763+#include <linux/kernel.h>
764+#include <linux/bitfield.h>
765+
766+#define MTK_ETH_PPE_BASE 0xc00
767+
768+#define MTK_PPE_ENTRIES_SHIFT 3
769+#define MTK_PPE_ENTRIES (1024 << MTK_PPE_ENTRIES_SHIFT)
770+#define MTK_PPE_HASH_MASK (MTK_PPE_ENTRIES - 1)
771+#define MTK_PPE_WAIT_TIMEOUT_US 1000000
772+
773+#define MTK_FOE_IB1_UNBIND_TIMESTAMP GENMASK(7, 0)
774+#define MTK_FOE_IB1_UNBIND_PACKETS GENMASK(23, 8)
775+#define MTK_FOE_IB1_UNBIND_PREBIND BIT(24)
776+
777+#define MTK_FOE_IB1_BIND_TIMESTAMP GENMASK(14, 0)
778+#define MTK_FOE_IB1_BIND_KEEPALIVE BIT(15)
779+#define MTK_FOE_IB1_BIND_VLAN_LAYER GENMASK(18, 16)
780+#define MTK_FOE_IB1_BIND_PPPOE BIT(19)
781+#define MTK_FOE_IB1_BIND_VLAN_TAG BIT(20)
782+#define MTK_FOE_IB1_BIND_PKT_SAMPLE BIT(21)
783+#define MTK_FOE_IB1_BIND_CACHE BIT(22)
784+#define MTK_FOE_IB1_BIND_TUNNEL_DECAP BIT(23)
785+#define MTK_FOE_IB1_BIND_TTL BIT(24)
786+
787+#define MTK_FOE_IB1_PACKET_TYPE GENMASK(27, 25)
788+#define MTK_FOE_IB1_STATE GENMASK(29, 28)
789+#define MTK_FOE_IB1_UDP BIT(30)
790+#define MTK_FOE_IB1_STATIC BIT(31)
791+
792+enum {
793+ MTK_PPE_PKT_TYPE_IPV4_HNAPT = 0,
794+ MTK_PPE_PKT_TYPE_IPV4_ROUTE = 1,
795+ MTK_PPE_PKT_TYPE_BRIDGE = 2,
796+ MTK_PPE_PKT_TYPE_IPV4_DSLITE = 3,
797+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T = 4,
798+ MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T = 5,
799+ MTK_PPE_PKT_TYPE_IPV6_6RD = 7,
800+};
801+
802+#define MTK_FOE_IB2_QID GENMASK(3, 0)
803+#define MTK_FOE_IB2_PSE_QOS BIT(4)
804+#define MTK_FOE_IB2_DEST_PORT GENMASK(7, 5)
805+#define MTK_FOE_IB2_MULTICAST BIT(8)
806+
807+#define MTK_FOE_IB2_WHNAT_QID2 GENMASK(13, 12)
808+#define MTK_FOE_IB2_WHNAT_DEVIDX BIT(16)
809+#define MTK_FOE_IB2_WHNAT_NAT BIT(17)
810+
811+#define MTK_FOE_IB2_PORT_MG GENMASK(17, 12)
812+
813+#define MTK_FOE_IB2_PORT_AG GENMASK(23, 18)
814+
815+#define MTK_FOE_IB2_DSCP GENMASK(31, 24)
816+
817+#define MTK_FOE_VLAN2_WHNAT_BSS GEMMASK(5, 0)
818+#define MTK_FOE_VLAN2_WHNAT_WCID GENMASK(13, 6)
819+#define MTK_FOE_VLAN2_WHNAT_RING GENMASK(15, 14)
820+
821+enum {
822+ MTK_FOE_STATE_INVALID,
823+ MTK_FOE_STATE_UNBIND,
824+ MTK_FOE_STATE_BIND,
825+ MTK_FOE_STATE_FIN
826+};
827+
828+struct mtk_foe_mac_info {
829+ u16 vlan1;
830+ u16 etype;
831+
832+ u32 dest_mac_hi;
833+
834+ u16 vlan2;
835+ u16 dest_mac_lo;
836+
837+ u32 src_mac_hi;
838+
839+ u16 pppoe_id;
840+ u16 src_mac_lo;
841+};
842+
843+struct mtk_foe_bridge {
844+ u32 dest_mac_hi;
845+
846+ u16 src_mac_lo;
847+ u16 dest_mac_lo;
848+
849+ u32 src_mac_hi;
850+
851+ u32 ib2;
852+
853+ u32 _rsv[5];
854+
855+ u32 udf_tsid;
856+ struct mtk_foe_mac_info l2;
857+};
858+
859+struct mtk_ipv4_tuple {
860+ u32 src_ip;
861+ u32 dest_ip;
862+ union {
863+ struct {
864+ u16 dest_port;
865+ u16 src_port;
866+ };
867+ struct {
868+ u8 protocol;
869+ u8 _pad[3]; /* fill with 0xa5a5a5 */
870+ };
871+ u32 ports;
872+ };
873+};
874+
875+struct mtk_foe_ipv4 {
876+ struct mtk_ipv4_tuple orig;
877+
878+ u32 ib2;
879+
880+ struct mtk_ipv4_tuple new;
881+
882+ u16 timestamp;
883+ u16 _rsv0[3];
884+
885+ u32 udf_tsid;
886+
887+ struct mtk_foe_mac_info l2;
888+};
889+
890+struct mtk_foe_ipv4_dslite {
891+ struct mtk_ipv4_tuple ip4;
892+
893+ u32 tunnel_src_ip[4];
894+ u32 tunnel_dest_ip[4];
895+
896+ u8 flow_label[3];
897+ u8 priority;
898+
899+ u32 udf_tsid;
900+
901+ u32 ib2;
902+
903+ struct mtk_foe_mac_info l2;
904+};
905+
906+struct mtk_foe_ipv6 {
907+ u32 src_ip[4];
908+ u32 dest_ip[4];
909+
910+ union {
911+ struct {
912+ u8 protocol;
913+ u8 _pad[3]; /* fill with 0xa5a5a5 */
914+ }; /* 3-tuple */
915+ struct {
916+ u16 dest_port;
917+ u16 src_port;
918+ }; /* 5-tuple */
919+ u32 ports;
920+ };
921+
922+ u32 _rsv[3];
923+
924+ u32 udf;
925+
926+ u32 ib2;
927+ struct mtk_foe_mac_info l2;
928+};
929+
930+struct mtk_foe_ipv6_6rd {
931+ u32 src_ip[4];
932+ u32 dest_ip[4];
933+ u16 dest_port;
934+ u16 src_port;
935+
936+ u32 tunnel_src_ip;
937+ u32 tunnel_dest_ip;
938+
939+ u16 hdr_csum;
940+ u8 dscp;
941+ u8 ttl;
942+
943+ u8 flag;
944+ u8 pad;
945+ u8 per_flow_6rd_id;
946+ u8 pad2;
947+
948+ u32 ib2;
949+ struct mtk_foe_mac_info l2;
950+};
951+
952+struct mtk_foe_entry {
953+ u32 ib1;
954+
955+ union {
956+ struct mtk_foe_bridge bridge;
957+ struct mtk_foe_ipv4 ipv4;
958+ struct mtk_foe_ipv4_dslite dslite;
959+ struct mtk_foe_ipv6 ipv6;
960+ struct mtk_foe_ipv6_6rd ipv6_6rd;
961+ u32 data[19];
962+ };
963+};
964+
965+enum {
966+ MTK_PPE_CPU_REASON_TTL_EXCEEDED = 0x02,
967+ MTK_PPE_CPU_REASON_OPTION_HEADER = 0x03,
968+ MTK_PPE_CPU_REASON_NO_FLOW = 0x07,
969+ MTK_PPE_CPU_REASON_IPV4_FRAG = 0x08,
970+ MTK_PPE_CPU_REASON_IPV4_DSLITE_FRAG = 0x09,
971+ MTK_PPE_CPU_REASON_IPV4_DSLITE_NO_TCP_UDP = 0x0a,
972+ MTK_PPE_CPU_REASON_IPV6_6RD_NO_TCP_UDP = 0x0b,
973+ MTK_PPE_CPU_REASON_TCP_FIN_SYN_RST = 0x0c,
974+ MTK_PPE_CPU_REASON_UN_HIT = 0x0d,
975+ MTK_PPE_CPU_REASON_HIT_UNBIND = 0x0e,
976+ MTK_PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f,
977+ MTK_PPE_CPU_REASON_HIT_BIND_TCP_FIN = 0x10,
978+ MTK_PPE_CPU_REASON_HIT_TTL_1 = 0x11,
979+ MTK_PPE_CPU_REASON_HIT_BIND_VLAN_VIOLATION = 0x12,
980+ MTK_PPE_CPU_REASON_KEEPALIVE_UC_OLD_HDR = 0x13,
981+ MTK_PPE_CPU_REASON_KEEPALIVE_MC_NEW_HDR = 0x14,
982+ MTK_PPE_CPU_REASON_KEEPALIVE_DUP_OLD_HDR = 0x15,
983+ MTK_PPE_CPU_REASON_HIT_BIND_FORCE_CPU = 0x16,
984+ MTK_PPE_CPU_REASON_TUNNEL_OPTION_HEADER = 0x17,
985+ MTK_PPE_CPU_REASON_MULTICAST_TO_CPU = 0x18,
986+ MTK_PPE_CPU_REASON_MULTICAST_TO_GMAC1_CPU = 0x19,
987+ MTK_PPE_CPU_REASON_HIT_PRE_BIND = 0x1a,
988+ MTK_PPE_CPU_REASON_PACKET_SAMPLING = 0x1b,
989+ MTK_PPE_CPU_REASON_EXCEED_MTU = 0x1c,
990+ MTK_PPE_CPU_REASON_PPE_BYPASS = 0x1e,
991+ MTK_PPE_CPU_REASON_INVALID = 0x1f,
992+};
993+
994+struct mtk_ppe {
995+ struct device *dev;
996+ void __iomem *base;
997+ int version;
998+
999+ struct mtk_foe_entry *foe_table;
1000+ dma_addr_t foe_phys;
1001+
1002+ void *acct_table;
1003+};
1004+
1005+int mtk_ppe_init(struct mtk_ppe *ppe, struct device *dev, void __iomem *base,
1006+ int version);
1007+int mtk_ppe_start(struct mtk_ppe *ppe);
1008+int mtk_ppe_stop(struct mtk_ppe *ppe);
1009+
1010+static inline void
1011+mtk_foe_entry_clear(struct mtk_ppe *ppe, u16 hash)
1012+{
1013+ ppe->foe_table[hash].ib1 = 0;
1014+ dma_wmb();
1015+}
1016+
1017+static inline int
1018+mtk_foe_entry_timestamp(struct mtk_ppe *ppe, u16 hash)
1019+{
1020+ u32 ib1 = READ_ONCE(ppe->foe_table[hash].ib1);
1021+
1022+ if (FIELD_GET(MTK_FOE_IB1_STATE, ib1) != MTK_FOE_STATE_BIND)
1023+ return -1;
1024+
1025+ return FIELD_GET(MTK_FOE_IB1_BIND_TIMESTAMP, ib1);
1026+}
1027+
1028+int mtk_foe_entry_prepare(struct mtk_foe_entry *entry, int type, int l4proto,
1029+ u8 pse_port, u8 *src_mac, u8 *dest_mac);
1030+int mtk_foe_entry_set_pse_port(struct mtk_foe_entry *entry, u8 port);
1031+int mtk_foe_entry_set_ipv4_tuple(struct mtk_foe_entry *entry, bool orig,
1032+ __be32 src_addr, __be16 src_port,
1033+ __be32 dest_addr, __be16 dest_port);
1034+int mtk_foe_entry_set_ipv6_tuple(struct mtk_foe_entry *entry,
1035+ __be32 *src_addr, __be16 src_port,
1036+ __be32 *dest_addr, __be16 dest_port);
1037+int mtk_foe_entry_set_dsa(struct mtk_foe_entry *entry, int port);
1038+int mtk_foe_entry_set_vlan(struct mtk_foe_entry *entry, int vid);
1039+int mtk_foe_entry_set_pppoe(struct mtk_foe_entry *entry, int sid);
1040+int mtk_foe_entry_commit(struct mtk_ppe *ppe, struct mtk_foe_entry *entry,
1041+ u16 timestamp);
1042+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe);
1043+
1044+#endif
1045diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1046new file mode 100644
1047index 000000000..d4b482340
1048--- /dev/null
1049+++ b/drivers/net/ethernet/mediatek/mtk_ppe_debugfs.c
1050@@ -0,0 +1,214 @@
1051+// SPDX-License-Identifier: GPL-2.0-only
1052+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1053+
1054+#include <linux/kernel.h>
1055+#include <linux/debugfs.h>
1056+#include "mtk_eth_soc.h"
1057+
1058+struct mtk_flow_addr_info
1059+{
1060+ void *src, *dest;
1061+ u16 *src_port, *dest_port;
1062+ bool ipv6;
1063+};
1064+
1065+static const char *mtk_foe_entry_state_str(int state)
1066+{
1067+ static const char * const state_str[] = {
1068+ [MTK_FOE_STATE_INVALID] = "INV",
1069+ [MTK_FOE_STATE_UNBIND] = "UNB",
1070+ [MTK_FOE_STATE_BIND] = "BND",
1071+ [MTK_FOE_STATE_FIN] = "FIN",
1072+ };
1073+
1074+ if (state >= ARRAY_SIZE(state_str) || !state_str[state])
1075+ return "UNK";
1076+
1077+ return state_str[state];
1078+}
1079+
1080+static const char *mtk_foe_pkt_type_str(int type)
1081+{
1082+ static const char * const type_str[] = {
1083+ [MTK_PPE_PKT_TYPE_IPV4_HNAPT] = "IPv4 5T",
1084+ [MTK_PPE_PKT_TYPE_IPV4_ROUTE] = "IPv4 3T",
1085+ [MTK_PPE_PKT_TYPE_BRIDGE] = "L2",
1086+ [MTK_PPE_PKT_TYPE_IPV4_DSLITE] = "DS-LITE",
1087+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T] = "IPv6 3T",
1088+ [MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T] = "IPv6 5T",
1089+ [MTK_PPE_PKT_TYPE_IPV6_6RD] = "6RD",
1090+ };
1091+
1092+ if (type >= ARRAY_SIZE(type_str) || !type_str[type])
1093+ return "UNKNOWN";
1094+
1095+ return type_str[type];
1096+}
1097+
1098+static void
1099+mtk_print_addr(struct seq_file *m, u32 *addr, bool ipv6)
1100+{
1101+ u32 n_addr[4];
1102+ int i;
1103+
1104+ if (!ipv6) {
1105+ seq_printf(m, "%pI4h", addr);
1106+ return;
1107+ }
1108+
1109+ for (i = 0; i < ARRAY_SIZE(n_addr); i++)
1110+ n_addr[i] = htonl(addr[i]);
1111+ seq_printf(m, "%pI6", n_addr);
1112+}
1113+
1114+static void
1115+mtk_print_addr_info(struct seq_file *m, struct mtk_flow_addr_info *ai)
1116+{
1117+ mtk_print_addr(m, ai->src, ai->ipv6);
1118+ if (ai->src_port)
1119+ seq_printf(m, ":%d", *ai->src_port);
1120+ seq_printf(m, "->");
1121+ mtk_print_addr(m, ai->dest, ai->ipv6);
1122+ if (ai->dest_port)
1123+ seq_printf(m, ":%d", *ai->dest_port);
1124+}
1125+
1126+static int
1127+mtk_ppe_debugfs_foe_show(struct seq_file *m, void *private, bool bind)
1128+{
1129+ struct mtk_ppe *ppe = m->private;
1130+ int i;
1131+
1132+ for (i = 0; i < MTK_PPE_ENTRIES; i++) {
1133+ struct mtk_foe_entry *entry = &ppe->foe_table[i];
1134+ struct mtk_foe_mac_info *l2;
1135+ struct mtk_flow_addr_info ai = {};
1136+ unsigned char h_source[ETH_ALEN];
1137+ unsigned char h_dest[ETH_ALEN];
1138+ int type, state;
1139+ u32 ib2;
1140+
1141+
1142+ state = FIELD_GET(MTK_FOE_IB1_STATE, entry->ib1);
1143+ if (!state)
1144+ continue;
1145+
1146+ if (bind && state != MTK_FOE_STATE_BIND)
1147+ continue;
1148+
1149+ type = FIELD_GET(MTK_FOE_IB1_PACKET_TYPE, entry->ib1);
1150+ seq_printf(m, "%05x %s %7s", i,
1151+ mtk_foe_entry_state_str(state),
1152+ mtk_foe_pkt_type_str(type));
1153+
1154+ switch (type) {
1155+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1156+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1157+ ai.src_port = &entry->ipv4.orig.src_port;
1158+ ai.dest_port = &entry->ipv4.orig.dest_port;
1159+ fallthrough;
1160+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1161+ ai.src = &entry->ipv4.orig.src_ip;
1162+ ai.dest = &entry->ipv4.orig.dest_ip;
1163+ break;
1164+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T:
1165+ ai.src_port = &entry->ipv6.src_port;
1166+ ai.dest_port = &entry->ipv6.dest_port;
1167+ fallthrough;
1168+ case MTK_PPE_PKT_TYPE_IPV6_ROUTE_3T:
1169+ case MTK_PPE_PKT_TYPE_IPV6_6RD:
1170+ ai.src = &entry->ipv6.src_ip;
1171+ ai.dest = &entry->ipv6.dest_ip;
1172+ ai.ipv6 = true;
1173+ break;
1174+ }
1175+
1176+ seq_printf(m, " orig=");
1177+ mtk_print_addr_info(m, &ai);
1178+
1179+ switch (type) {
1180+ case MTK_PPE_PKT_TYPE_IPV4_HNAPT:
1181+ case MTK_PPE_PKT_TYPE_IPV4_DSLITE:
1182+ ai.src_port = &entry->ipv4.new.src_port;
1183+ ai.dest_port = &entry->ipv4.new.dest_port;
1184+ fallthrough;
1185+ case MTK_PPE_PKT_TYPE_IPV4_ROUTE:
1186+ ai.src = &entry->ipv4.new.src_ip;
1187+ ai.dest = &entry->ipv4.new.dest_ip;
1188+ seq_printf(m, " new=");
1189+ mtk_print_addr_info(m, &ai);
1190+ break;
1191+ }
1192+
1193+ if (type >= MTK_PPE_PKT_TYPE_IPV4_DSLITE) {
1194+ l2 = &entry->ipv6.l2;
1195+ ib2 = entry->ipv6.ib2;
1196+ } else {
1197+ l2 = &entry->ipv4.l2;
1198+ ib2 = entry->ipv4.ib2;
1199+ }
1200+
1201+ *((__be32 *)h_source) = htonl(l2->src_mac_hi);
1202+ *((__be16 *)&h_source[4]) = htons(l2->src_mac_lo);
1203+ *((__be32 *)h_dest) = htonl(l2->dest_mac_hi);
1204+ *((__be16 *)&h_dest[4]) = htons(l2->dest_mac_lo);
1205+
1206+ seq_printf(m, " eth=%pM->%pM etype=%04x"
1207+ " vlan=%d,%d ib1=%08x ib2=%08x\n",
1208+ h_source, h_dest, ntohs(l2->etype),
1209+ l2->vlan1, l2->vlan2, entry->ib1, ib2);
1210+ }
1211+
1212+ return 0;
1213+}
1214+
1215+static int
1216+mtk_ppe_debugfs_foe_show_all(struct seq_file *m, void *private)
1217+{
1218+ return mtk_ppe_debugfs_foe_show(m, private, false);
1219+}
1220+
1221+static int
1222+mtk_ppe_debugfs_foe_show_bind(struct seq_file *m, void *private)
1223+{
1224+ return mtk_ppe_debugfs_foe_show(m, private, true);
1225+}
1226+
1227+static int
1228+mtk_ppe_debugfs_foe_open_all(struct inode *inode, struct file *file)
1229+{
1230+ return single_open(file, mtk_ppe_debugfs_foe_show_all,
1231+ inode->i_private);
1232+}
1233+
1234+static int
1235+mtk_ppe_debugfs_foe_open_bind(struct inode *inode, struct file *file)
1236+{
1237+ return single_open(file, mtk_ppe_debugfs_foe_show_bind,
1238+ inode->i_private);
1239+}
1240+
1241+int mtk_ppe_debugfs_init(struct mtk_ppe *ppe)
1242+{
1243+ static const struct file_operations fops_all = {
1244+ .open = mtk_ppe_debugfs_foe_open_all,
1245+ .read = seq_read,
1246+ .llseek = seq_lseek,
1247+ .release = single_release,
1248+ };
1249+
1250+ static const struct file_operations fops_bind = {
1251+ .open = mtk_ppe_debugfs_foe_open_bind,
1252+ .read = seq_read,
1253+ .llseek = seq_lseek,
1254+ .release = single_release,
1255+ };
1256+
1257+ struct dentry *root;
1258+
1259+ root = debugfs_create_dir("mtk_ppe", NULL);
1260+ debugfs_create_file("entries", S_IRUGO, root, ppe, &fops_all);
1261+ debugfs_create_file("bind", S_IRUGO, root, ppe, &fops_bind);
1262+
1263+ return 0;
1264+}
1265diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_offload.c b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1266new file mode 100644
1267index 000000000..4294f0c74
1268--- /dev/null
1269+++ b/drivers/net/ethernet/mediatek/mtk_ppe_offload.c
1270@@ -0,0 +1,526 @@
1271+// SPDX-License-Identifier: GPL-2.0-only
1272+/*
1273+ * Copyright (C) 2020 Felix Fietkau <nbd@nbd.name>
1274+ */
1275+
1276+#include <linux/if_ether.h>
1277+#include <linux/rhashtable.h>
1278+#include <linux/ip.h>
1279+#include <linux/ipv6.h>
1280+#include <net/flow_offload.h>
1281+#include <net/pkt_cls.h>
1282+#include <net/dsa.h>
1283+#include "mtk_eth_soc.h"
1284+
1285+struct mtk_flow_data {
1286+ struct ethhdr eth;
1287+
1288+ union {
1289+ struct {
1290+ __be32 src_addr;
1291+ __be32 dst_addr;
1292+ } v4;
1293+
1294+ struct {
1295+ struct in6_addr src_addr;
1296+ struct in6_addr dst_addr;
1297+ } v6;
1298+ };
1299+
1300+ __be16 src_port;
1301+ __be16 dst_port;
1302+
1303+ struct {
1304+ u16 id;
1305+ __be16 proto;
1306+ u8 num;
1307+ } vlan;
1308+ struct {
1309+ u16 sid;
1310+ u8 num;
1311+ } pppoe;
1312+};
1313+
1314+struct mtk_flow_entry {
1315+ struct rhash_head node;
1316+ unsigned long cookie;
1317+ u16 hash;
1318+};
1319+
1320+static const struct rhashtable_params mtk_flow_ht_params = {
1321+ .head_offset = offsetof(struct mtk_flow_entry, node),
1322+ .key_offset = offsetof(struct mtk_flow_entry, cookie),
1323+ .key_len = sizeof(unsigned long),
1324+ .automatic_shrinking = true,
1325+};
1326+
1327+static u32
1328+mtk_eth_timestamp(struct mtk_eth *eth)
1329+{
1330+ return mtk_r32(eth, 0x0010) & MTK_FOE_IB1_BIND_TIMESTAMP;
1331+}
1332+
1333+static int
1334+mtk_flow_set_ipv4_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data,
1335+ bool egress)
1336+{
1337+ return mtk_foe_entry_set_ipv4_tuple(foe, egress,
1338+ data->v4.src_addr, data->src_port,
1339+ data->v4.dst_addr, data->dst_port);
1340+}
1341+
1342+static int
1343+mtk_flow_set_ipv6_addr(struct mtk_foe_entry *foe, struct mtk_flow_data *data)
1344+{
1345+ return mtk_foe_entry_set_ipv6_tuple(foe,
1346+ data->v6.src_addr.s6_addr32, data->src_port,
1347+ data->v6.dst_addr.s6_addr32, data->dst_port);
1348+}
1349+
1350+static void
1351+mtk_flow_offload_mangle_eth(const struct flow_action_entry *act, void *eth)
1352+{
1353+ void *dest = eth + act->mangle.offset;
1354+ const void *src = &act->mangle.val;
1355+
1356+ if (act->mangle.offset > 8)
1357+ return;
1358+
1359+ if (act->mangle.mask == 0xffff) {
1360+ src += 2;
1361+ dest += 2;
1362+ }
1363+
1364+ memcpy(dest, src, act->mangle.mask ? 2 : 4);
1365+}
1366+
1367+
1368+static int
1369+mtk_flow_mangle_ports(const struct flow_action_entry *act,
1370+ struct mtk_flow_data *data)
1371+{
1372+ u32 val = ntohl(act->mangle.val);
1373+
1374+ switch (act->mangle.offset) {
1375+ case 0:
1376+ if (act->mangle.mask == ~htonl(0xffff))
1377+ data->dst_port = cpu_to_be16(val);
1378+ else
1379+ data->src_port = cpu_to_be16(val >> 16);
1380+ break;
1381+ case 2:
1382+ data->dst_port = cpu_to_be16(val);
1383+ break;
1384+ default:
1385+ return -EINVAL;
1386+ }
1387+
1388+ return 0;
1389+}
1390+
1391+static int
1392+mtk_flow_mangle_ipv4(const struct flow_action_entry *act,
1393+ struct mtk_flow_data *data)
1394+{
1395+ __be32 *dest;
1396+
1397+ switch (act->mangle.offset) {
1398+ case offsetof(struct iphdr, saddr):
1399+ dest = &data->v4.src_addr;
1400+ break;
1401+ case offsetof(struct iphdr, daddr):
1402+ dest = &data->v4.dst_addr;
1403+ break;
1404+ default:
1405+ return -EINVAL;
1406+ }
1407+
1408+ memcpy(dest, &act->mangle.val, sizeof(u32));
1409+
1410+ return 0;
1411+}
1412+
1413+static int
1414+mtk_flow_get_dsa_port(struct net_device **dev)
1415+{
1416+#if IS_ENABLED(CONFIG_NET_DSA)
1417+ struct dsa_port *dp;
1418+
1419+ dp = dsa_port_from_netdev(*dev);
1420+ if (IS_ERR(dp))
1421+ return -ENODEV;
1422+
1423+ if (dp->cpu_dp->tag_ops->proto != DSA_TAG_PROTO_MTK)
1424+ return -ENODEV;
1425+
1426+ *dev = dp->cpu_dp->master;
1427+
1428+ return dp->index;
1429+#else
1430+ return -ENODEV;
1431+#endif
1432+}
1433+
1434+static int
1435+mtk_flow_set_output_device(struct mtk_eth *eth, struct mtk_foe_entry *foe,
1436+ struct net_device *dev)
1437+{
1438+ int pse_port, dsa_port;
1439+
1440+ dsa_port = mtk_flow_get_dsa_port(&dev);
1441+ if (dsa_port >= 0)
1442+ mtk_foe_entry_set_dsa(foe, dsa_port);
1443+
1444+ if (dev == eth->netdev[0])
1445+ pse_port = 1;
1446+ else if (dev == eth->netdev[1])
1447+ pse_port = 2;
1448+ else
1449+ return -EOPNOTSUPP;
1450+
1451+ mtk_foe_entry_set_pse_port(foe, pse_port);
1452+
1453+ return 0;
1454+}
1455+
1456+static int
1457+mtk_flow_offload_replace(struct mtk_eth *eth, struct flow_cls_offload *f)
1458+{
1459+ struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1460+ struct flow_action_entry *act;
1461+ struct mtk_flow_data data = {};
1462+ struct mtk_foe_entry foe;
1463+ struct net_device *odev = NULL;
1464+ struct mtk_flow_entry *entry;
1465+ int offload_type = 0;
1466+ u16 addr_type = 0;
1467+ u32 timestamp;
1468+ u8 l4proto = 0;
1469+ int err = 0;
1470+ int hash;
1471+ int i;
1472+
1473+ if (rhashtable_lookup(&eth->flow_table, &f->cookie, mtk_flow_ht_params))
1474+ return -EEXIST;
1475+
1476+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
1477+ struct flow_match_meta match;
1478+
1479+ flow_rule_match_meta(rule, &match);
1480+ } else {
1481+ return -EOPNOTSUPP;
1482+ }
1483+
1484+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
1485+ struct flow_match_control match;
1486+
1487+ flow_rule_match_control(rule, &match);
1488+ addr_type = match.key->addr_type;
1489+ } else {
1490+ return -EOPNOTSUPP;
1491+ }
1492+
1493+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
1494+ struct flow_match_basic match;
1495+
1496+ flow_rule_match_basic(rule, &match);
1497+ l4proto = match.key->ip_proto;
1498+ } else {
1499+ return -EOPNOTSUPP;
1500+ }
1501+
1502+ flow_action_for_each(i, act, &rule->action) {
1503+ switch (act->id) {
1504+ case FLOW_ACTION_MANGLE:
1505+ if (act->mangle.htype == FLOW_ACT_MANGLE_HDR_TYPE_ETH)
1506+ mtk_flow_offload_mangle_eth(act, &data.eth);
1507+ break;
1508+ case FLOW_ACTION_REDIRECT:
1509+ odev = act->dev;
1510+ break;
1511+ case FLOW_ACTION_CSUM:
1512+ break;
1513+ case FLOW_ACTION_VLAN_PUSH:
1514+ if (data.vlan.num == 1 ||
1515+ act->vlan.proto != htons(ETH_P_8021Q))
1516+ return -EOPNOTSUPP;
1517+
1518+ data.vlan.id = act->vlan.vid;
1519+ data.vlan.proto = act->vlan.proto;
1520+ data.vlan.num++;
1521+ break;
1522+ case FLOW_ACTION_VLAN_POP:
1523+ break;
1524+ case FLOW_ACTION_PPPOE_PUSH:
1525+ if (data.pppoe.num == 1)
1526+ return -EOPNOTSUPP;
1527+
1528+ data.pppoe.sid = act->pppoe.sid;
1529+ data.pppoe.num++;
1530+ break;
1531+ default:
1532+ return -EOPNOTSUPP;
1533+ }
1534+ }
1535+
1536+ switch (addr_type) {
1537+ case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
1538+ offload_type = MTK_PPE_PKT_TYPE_IPV4_HNAPT;
1539+ break;
1540+ case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
1541+ offload_type = MTK_PPE_PKT_TYPE_IPV6_ROUTE_5T;
1542+ break;
1543+ default:
1544+ return -EOPNOTSUPP;
1545+ }
1546+
1547+ if (!is_valid_ether_addr(data.eth.h_source) ||
1548+ !is_valid_ether_addr(data.eth.h_dest))
1549+ return -EINVAL;
1550+
1551+ err = mtk_foe_entry_prepare(&foe, offload_type, l4proto, 0,
1552+ data.eth.h_source,
1553+ data.eth.h_dest);
1554+ if (err)
1555+ return err;
1556+
1557+ if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
1558+ struct flow_match_ports ports;
1559+
1560+ flow_rule_match_ports(rule, &ports);
1561+ data.src_port = ports.key->src;
1562+ data.dst_port = ports.key->dst;
1563+ } else {
1564+ return -EOPNOTSUPP;
1565+ }
1566+
1567+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1568+ struct flow_match_ipv4_addrs addrs;
1569+
1570+ flow_rule_match_ipv4_addrs(rule, &addrs);
1571+
1572+ data.v4.src_addr = addrs.key->src;
1573+ data.v4.dst_addr = addrs.key->dst;
1574+
1575+ mtk_flow_set_ipv4_addr(&foe, &data, false);
1576+ }
1577+
1578+ if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
1579+ struct flow_match_ipv6_addrs addrs;
1580+
1581+ flow_rule_match_ipv6_addrs(rule, &addrs);
1582+
1583+ data.v6.src_addr = addrs.key->src;
1584+ data.v6.dst_addr = addrs.key->dst;
1585+
1586+ mtk_flow_set_ipv6_addr(&foe, &data);
1587+ }
1588+
1589+ flow_action_for_each(i, act, &rule->action) {
1590+ if (act->id != FLOW_ACTION_MANGLE)
1591+ continue;
1592+
1593+ switch (act->mangle.htype) {
1594+ case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
1595+ case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
1596+ err = mtk_flow_mangle_ports(act, &data);
1597+ break;
1598+ case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1599+ err = mtk_flow_mangle_ipv4(act, &data);
1600+ break;
1601+ case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1602+ /* handled earlier */
1603+ break;
1604+ default:
1605+ return -EOPNOTSUPP;
1606+ }
1607+
1608+ if (err)
1609+ return err;
1610+ }
1611+
1612+ if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
1613+ err = mtk_flow_set_ipv4_addr(&foe, &data, true);
1614+ if (err)
1615+ return err;
1616+ }
1617+
1618+ if (data.vlan.num == 1) {
1619+ if (data.vlan.proto != htons(ETH_P_8021Q))
1620+ return -EOPNOTSUPP;
1621+
1622+ mtk_foe_entry_set_vlan(&foe, data.vlan.id);
1623+ }
1624+ if (data.pppoe.num == 1)
1625+ mtk_foe_entry_set_pppoe(&foe, data.pppoe.sid);
1626+
1627+ err = mtk_flow_set_output_device(eth, &foe, odev);
1628+ if (err)
1629+ return err;
1630+
1631+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1632+ if (!entry)
1633+ return -ENOMEM;
1634+
1635+ entry->cookie = f->cookie;
1636+ timestamp = mtk_eth_timestamp(eth);
1637+ hash = mtk_foe_entry_commit(&eth->ppe, &foe, timestamp);
1638+ if (hash < 0) {
1639+ err = hash;
1640+ goto free;
1641+ }
1642+
1643+ entry->hash = hash;
1644+ err = rhashtable_insert_fast(&eth->flow_table, &entry->node,
1645+ mtk_flow_ht_params);
1646+ if (err < 0)
1647+ goto clear_flow;
1648+
1649+ return 0;
1650+clear_flow:
1651+ mtk_foe_entry_clear(&eth->ppe, hash);
1652+free:
1653+ kfree(entry);
1654+ return err;
1655+}
1656+
1657+static int
1658+mtk_flow_offload_destroy(struct mtk_eth *eth, struct flow_cls_offload *f)
1659+{
1660+ struct mtk_flow_entry *entry;
1661+
1662+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1663+ mtk_flow_ht_params);
1664+ if (!entry)
1665+ return -ENOENT;
1666+
1667+ mtk_foe_entry_clear(&eth->ppe, entry->hash);
1668+ rhashtable_remove_fast(&eth->flow_table, &entry->node,
1669+ mtk_flow_ht_params);
1670+ kfree(entry);
1671+
1672+ return 0;
1673+}
1674+
1675+static int
1676+mtk_flow_offload_stats(struct mtk_eth *eth, struct flow_cls_offload *f)
1677+{
1678+ struct mtk_flow_entry *entry;
1679+ int timestamp;
1680+ u32 idle;
1681+
1682+ entry = rhashtable_lookup(&eth->flow_table, &f->cookie,
1683+ mtk_flow_ht_params);
1684+ if (!entry)
1685+ return -ENOENT;
1686+
1687+ timestamp = mtk_foe_entry_timestamp(&eth->ppe, entry->hash);
1688+ if (timestamp < 0)
1689+ return -ETIMEDOUT;
1690+
1691+ idle = mtk_eth_timestamp(eth) - timestamp;
1692+ f->stats.lastused = jiffies - idle * HZ;
1693+
1694+ return 0;
1695+}
1696+
1697+static DEFINE_MUTEX(mtk_flow_offload_mutex);
1698+
1699+static int
1700+mtk_eth_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
1701+{
1702+ struct flow_cls_offload *cls = type_data;
1703+ struct net_device *dev = cb_priv;
1704+ struct mtk_mac *mac = netdev_priv(dev);
1705+ struct mtk_eth *eth = mac->hw;
1706+ int err;
1707+
1708+ if (!tc_can_offload(dev))
1709+ return -EOPNOTSUPP;
1710+
1711+ if (type != TC_SETUP_CLSFLOWER)
1712+ return -EOPNOTSUPP;
1713+
1714+ mutex_lock(&mtk_flow_offload_mutex);
1715+ switch (cls->command) {
1716+ case FLOW_CLS_REPLACE:
1717+ err = mtk_flow_offload_replace(eth, cls);
1718+ break;
1719+ case FLOW_CLS_DESTROY:
1720+ err = mtk_flow_offload_destroy(eth, cls);
1721+ break;
1722+ case FLOW_CLS_STATS:
1723+ err = mtk_flow_offload_stats(eth, cls);
1724+ break;
1725+ default:
1726+ err = -EOPNOTSUPP;
1727+ break;
1728+ }
1729+ mutex_unlock(&mtk_flow_offload_mutex);
1730+
1731+ return err;
1732+}
1733+
1734+static int
1735+mtk_eth_setup_tc_block(struct net_device *dev, struct flow_block_offload *f)
1736+{
1737+ struct mtk_mac *mac = netdev_priv(dev);
1738+ struct mtk_eth *eth = mac->hw;
1739+ static LIST_HEAD(block_cb_list);
1740+ struct flow_block_cb *block_cb;
1741+ flow_setup_cb_t *cb;
1742+
1743+ if (!eth->ppe.foe_table)
1744+ return -EOPNOTSUPP;
1745+
1746+ if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1747+ return -EOPNOTSUPP;
1748+
1749+ cb = mtk_eth_setup_tc_block_cb;
1750+ f->driver_block_list = &block_cb_list;
1751+
1752+ switch (f->command) {
1753+ case FLOW_BLOCK_BIND:
1754+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1755+ if (block_cb) {
1756+ flow_block_cb_incref(block_cb);
1757+ return 0;
1758+ }
1759+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
1760+ if (IS_ERR(block_cb))
1761+ return PTR_ERR(block_cb);
1762+
1763+ flow_block_cb_add(block_cb, f);
1764+ list_add_tail(&block_cb->driver_list, &block_cb_list);
1765+ return 0;
1766+ case FLOW_BLOCK_UNBIND:
1767+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
1768+ if (!block_cb)
1769+ return -ENOENT;
1770+
1771+ if (flow_block_cb_decref(block_cb)) {
1772+ flow_block_cb_remove(block_cb, f);
1773+ list_del(&block_cb->driver_list);
1774+ }
1775+ return 0;
1776+ default:
1777+ return -EOPNOTSUPP;
1778+ }
1779+}
1780+
1781+int mtk_eth_setup_tc(struct net_device *dev, enum tc_setup_type type,
1782+ void *type_data)
1783+{
1784+ if (type == TC_SETUP_FT)
1785+ return mtk_eth_setup_tc_block(dev, type_data);
1786+
1787+ return -EOPNOTSUPP;
1788+}
1789+
1790+int mtk_eth_offload_init(struct mtk_eth *eth)
1791+{
1792+ if (!eth->ppe.foe_table)
1793+ return 0;
1794+
1795+ return rhashtable_init(&eth->flow_table, &mtk_flow_ht_params);
1796+}
1797diff --git a/drivers/net/ethernet/mediatek/mtk_ppe_regs.h b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1798new file mode 100644
1799index 000000000..0c45ea090
1800--- /dev/null
1801+++ b/drivers/net/ethernet/mediatek/mtk_ppe_regs.h
1802@@ -0,0 +1,144 @@
1803+// SPDX-License-Identifier: GPL-2.0-only
1804+/* Copyright (C) 2020 Felix Fietkau <nbd@nbd.name> */
1805+
1806+#ifndef __MTK_PPE_REGS_H
1807+#define __MTK_PPE_REGS_H
1808+
1809+#define MTK_PPE_GLO_CFG 0x200
1810+#define MTK_PPE_GLO_CFG_EN BIT(0)
1811+#define MTK_PPE_GLO_CFG_TSID_EN BIT(1)
1812+#define MTK_PPE_GLO_CFG_IP4_L4_CS_DROP BIT(2)
1813+#define MTK_PPE_GLO_CFG_IP4_CS_DROP BIT(3)
1814+#define MTK_PPE_GLO_CFG_TTL0_DROP BIT(4)
1815+#define MTK_PPE_GLO_CFG_PPE_BSWAP BIT(5)
1816+#define MTK_PPE_GLO_CFG_PSE_HASH_OFS BIT(6)
1817+#define MTK_PPE_GLO_CFG_MCAST_TB_EN BIT(7)
1818+#define MTK_PPE_GLO_CFG_FLOW_DROP_KA BIT(8)
1819+#define MTK_PPE_GLO_CFG_FLOW_DROP_UPDATE BIT(9)
1820+#define MTK_PPE_GLO_CFG_UDP_LITE_EN BIT(10)
1821+#define MTK_PPE_GLO_CFG_UDP_LEN_DROP BIT(11)
1822+#define MTK_PPE_GLO_CFG_MCAST_ENTRIES GNEMASK(13, 12)
1823+#define MTK_PPE_GLO_CFG_BUSY BIT(31)
1824+
1825+#define MTK_PPE_FLOW_CFG 0x204
1826+#define MTK_PPE_FLOW_CFG_IP4_TCP_FRAG BIT(6)
1827+#define MTK_PPE_FLOW_CFG_IP4_UDP_FRAG BIT(7)
1828+#define MTK_PPE_FLOW_CFG_IP6_3T_ROUTE BIT(8)
1829+#define MTK_PPE_FLOW_CFG_IP6_5T_ROUTE BIT(9)
1830+#define MTK_PPE_FLOW_CFG_IP6_6RD BIT(10)
1831+#define MTK_PPE_FLOW_CFG_IP4_NAT BIT(12)
1832+#define MTK_PPE_FLOW_CFG_IP4_NAPT BIT(13)
1833+#define MTK_PPE_FLOW_CFG_IP4_DSLITE BIT(14)
1834+#define MTK_PPE_FLOW_CFG_L2_BRIDGE BIT(15)
1835+#define MTK_PPE_FLOW_CFG_IP_PROTO_BLACKLIST BIT(16)
1836+#define MTK_PPE_FLOW_CFG_IP4_NAT_FRAG BIT(17)
1837+#define MTK_PPE_FLOW_CFG_IP4_HASH_FLOW_LABEL BIT(18)
1838+#define MTK_PPE_FLOW_CFG_IP4_HASH_GRE_KEY BIT(19)
1839+#define MTK_PPE_FLOW_CFG_IP6_HASH_GRE_KEY BIT(20)
1840+
1841+#define MTK_PPE_IP_PROTO_CHK 0x208
1842+#define MTK_PPE_IP_PROTO_CHK_IPV4 GENMASK(15, 0)
1843+#define MTK_PPE_IP_PROTO_CHK_IPV6 GENMASK(31, 16)
1844+
1845+#define MTK_PPE_TB_CFG 0x21c
1846+#define MTK_PPE_TB_CFG_ENTRY_NUM GENMASK(2, 0)
1847+#define MTK_PPE_TB_CFG_ENTRY_80B BIT(3)
1848+#define MTK_PPE_TB_CFG_SEARCH_MISS GENMASK(5, 4)
1849+#define MTK_PPE_TB_CFG_AGE_PREBIND BIT(6)
1850+#define MTK_PPE_TB_CFG_AGE_NON_L4 BIT(7)
1851+#define MTK_PPE_TB_CFG_AGE_UNBIND BIT(8)
1852+#define MTK_PPE_TB_CFG_AGE_TCP BIT(9)
1853+#define MTK_PPE_TB_CFG_AGE_UDP BIT(10)
1854+#define MTK_PPE_TB_CFG_AGE_TCP_FIN BIT(11)
1855+#define MTK_PPE_TB_CFG_KEEPALIVE GENMASK(13, 12)
1856+#define MTK_PPE_TB_CFG_HASH_MODE GENMASK(15, 14)
1857+#define MTK_PPE_TB_CFG_SCAN_MODE GENMASK(17, 16)
1858+#define MTK_PPE_TB_CFG_HASH_DEBUG GENMASK(19, 18)
1859+
1860+enum {
1861+ MTK_PPE_SCAN_MODE_DISABLED,
1862+ MTK_PPE_SCAN_MODE_CHECK_AGE,
1863+ MTK_PPE_SCAN_MODE_KEEPALIVE_AGE,
1864+};
1865+
1866+enum {
1867+ MTK_PPE_KEEPALIVE_DISABLE,
1868+ MTK_PPE_KEEPALIVE_UNICAST_CPU,
1869+ MTK_PPE_KEEPALIVE_DUP_CPU = 3,
1870+};
1871+
1872+enum {
1873+ MTK_PPE_SEARCH_MISS_ACTION_DROP,
1874+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD = 2,
1875+ MTK_PPE_SEARCH_MISS_ACTION_FORWARD_BUILD = 3,
1876+};
1877+
1878+#define MTK_PPE_TB_BASE 0x220
1879+
1880+#define MTK_PPE_TB_USED 0x224
1881+#define MTK_PPE_TB_USED_NUM GENMASK(13, 0)
1882+
1883+#define MTK_PPE_BIND_RATE 0x228
1884+#define MTK_PPE_BIND_RATE_BIND GENMASK(15, 0)
1885+#define MTK_PPE_BIND_RATE_PREBIND GENMASK(31, 16)
1886+
1887+#define MTK_PPE_BIND_LIMIT0 0x22c
1888+#define MTK_PPE_BIND_LIMIT0_QUARTER GENMASK(13, 0)
1889+#define MTK_PPE_BIND_LIMIT0_HALF GENMASK(29, 16)
1890+
1891+#define MTK_PPE_BIND_LIMIT1 0x230
1892+#define MTK_PPE_BIND_LIMIT1_FULL GENMASK(13, 0)
1893+#define MTK_PPE_BIND_LIMIT1_NON_L4 GENMASK(23, 16)
1894+
1895+#define MTK_PPE_KEEPALIVE 0x234
1896+#define MTK_PPE_KEEPALIVE_TIME GENMASK(15, 0)
1897+#define MTK_PPE_KEEPALIVE_TIME_TCP GENMASK(23, 16)
1898+#define MTK_PPE_KEEPALIVE_TIME_UDP GENMASK(31, 24)
1899+
1900+#define MTK_PPE_UNBIND_AGE 0x238
1901+#define MTK_PPE_UNBIND_AGE_MIN_PACKETS GENMASK(31, 16)
1902+#define MTK_PPE_UNBIND_AGE_DELTA GENMASK(7, 0)
1903+
1904+#define MTK_PPE_BIND_AGE0 0x23c
1905+#define MTK_PPE_BIND_AGE0_DELTA_NON_L4 GENMASK(30, 16)
1906+#define MTK_PPE_BIND_AGE0_DELTA_UDP GENMASK(14, 0)
1907+
1908+#define MTK_PPE_BIND_AGE1 0x240
1909+#define MTK_PPE_BIND_AGE1_DELTA_TCP_FIN GENMASK(30, 16)
1910+#define MTK_PPE_BIND_AGE1_DELTA_TCP GENMASK(14, 0)
1911+
1912+#define MTK_PPE_HASH_SEED 0x244
1913+
1914+#define MTK_PPE_DEFAULT_CPU_PORT 0x248
1915+#define MTK_PPE_DEFAULT_CPU_PORT_MASK(_n) (GENMASK(2, 0) << ((_n) * 4))
1916+
1917+#define MTK_PPE_MTU_DROP 0x308
1918+
1919+#define MTK_PPE_VLAN_MTU0 0x30c
1920+#define MTK_PPE_VLAN_MTU0_NONE GENMASK(13, 0)
1921+#define MTK_PPE_VLAN_MTU0_1TAG GENMASK(29, 16)
1922+
1923+#define MTK_PPE_VLAN_MTU1 0x310
1924+#define MTK_PPE_VLAN_MTU1_2TAG GENMASK(13, 0)
1925+#define MTK_PPE_VLAN_MTU1_3TAG GENMASK(29, 16)
1926+
1927+#define MTK_PPE_VPM_TPID 0x318
1928+
1929+#define MTK_PPE_CACHE_CTL 0x320
1930+#define MTK_PPE_CACHE_CTL_EN BIT(0)
1931+#define MTK_PPE_CACHE_CTL_LOCK_CLR BIT(4)
1932+#define MTK_PPE_CACHE_CTL_REQ BIT(8)
1933+#define MTK_PPE_CACHE_CTL_CLEAR BIT(9)
1934+#define MTK_PPE_CACHE_CTL_CMD GENMASK(13, 12)
1935+
1936+#define MTK_PPE_MIB_CFG 0x334
1937+#define MTK_PPE_MIB_CFG_EN BIT(0)
1938+#define MTK_PPE_MIB_CFG_RD_CLR BIT(1)
1939+
1940+#define MTK_PPE_MIB_TB_BASE 0x338
1941+
1942+#define MTK_PPE_MIB_CACHE_CTL 0x350
1943+#define MTK_PPE_MIB_CACHE_CTL_EN BIT(0)
1944+#define MTK_PPE_MIB_CACHE_CTL_FLUSH BIT(2)
1945+
1946+#endif
1947diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
1948index a085213dc..813e30495 100644
1949--- a/drivers/net/ppp/ppp_generic.c
1950+++ b/drivers/net/ppp/ppp_generic.c
1951@@ -1378,12 +1378,34 @@ static void ppp_dev_priv_destructor(struct net_device *dev)
1952 ppp_destroy_interface(ppp);
1953 }
1954
1955+static int ppp_fill_forward_path(struct net_device_path_ctx *ctx,
1956+ struct net_device_path *path)
1957+{
1958+ struct ppp *ppp = netdev_priv(ctx->dev);
1959+ struct ppp_channel *chan;
1960+ struct channel *pch;
1961+
1962+ if (ppp->flags & SC_MULTILINK)
1963+ return -EOPNOTSUPP;
1964+
1965+ if (list_empty(&ppp->channels))
1966+ return -ENODEV;
1967+
1968+ pch = list_first_entry(&ppp->channels, struct channel, clist);
1969+ chan = pch->chan;
1970+ if (!chan->ops->fill_forward_path)
1971+ return -EOPNOTSUPP;
1972+
1973+ return chan->ops->fill_forward_path(ctx, path, chan);
1974+}
1975+
1976 static const struct net_device_ops ppp_netdev_ops = {
1977 .ndo_init = ppp_dev_init,
1978 .ndo_uninit = ppp_dev_uninit,
1979 .ndo_start_xmit = ppp_start_xmit,
1980 .ndo_do_ioctl = ppp_net_ioctl,
1981 .ndo_get_stats64 = ppp_get_stats64,
1982+ .ndo_fill_forward_path = ppp_fill_forward_path,
1983 };
1984
1985 static struct device_type ppp_type = {
1986diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
1987index 087b01684..7a8c246ab 100644
1988--- a/drivers/net/ppp/pppoe.c
1989+++ b/drivers/net/ppp/pppoe.c
1990@@ -974,8 +974,32 @@ static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb)
1991 return __pppoe_xmit(sk, skb);
1992 }
1993
1994+static int pppoe_fill_forward_path(struct net_device_path_ctx *ctx,
1995+ struct net_device_path *path,
1996+ const struct ppp_channel *chan)
1997+{
1998+ struct sock *sk = (struct sock *)chan->private;
1999+ struct pppox_sock *po = pppox_sk(sk);
2000+ struct net_device *dev = po->pppoe_dev;
2001+
2002+ if (sock_flag(sk, SOCK_DEAD) ||
2003+ !(sk->sk_state & PPPOX_CONNECTED) || !dev)
2004+ return -1;
2005+
2006+ path->type = DEV_PATH_PPPOE;
2007+ path->encap.proto = htons(ETH_P_PPP_SES);
2008+ path->encap.id = be16_to_cpu(po->num);
2009+ memcpy(path->encap.h_dest, po->pppoe_pa.remote, ETH_ALEN);
2010+ memcpy(ctx->daddr, po->pppoe_pa.remote, ETH_ALEN);
2011+ path->dev = ctx->dev;
2012+ ctx->dev = dev;
2013+
2014+ return 0;
2015+}
2016+
2017 static const struct ppp_channel_ops pppoe_chan_ops = {
2018 .start_xmit = pppoe_xmit,
2019+ .fill_forward_path = pppoe_fill_forward_path,
2020 };
2021
2022 static int pppoe_recvmsg(struct socket *sock, struct msghdr *m,
2023diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
2024index 38af42bf8..9f64504ac 100644
2025--- a/include/linux/netdevice.h
2026+++ b/include/linux/netdevice.h
2027@@ -829,6 +829,59 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
2028 struct sk_buff *skb,
2029 struct net_device *sb_dev);
2030
2031+enum net_device_path_type {
2032+ DEV_PATH_ETHERNET = 0,
2033+ DEV_PATH_VLAN,
2034+ DEV_PATH_BRIDGE,
2035+ DEV_PATH_PPPOE,
2036+ DEV_PATH_DSA,
2037+};
2038+
2039+struct net_device_path {
2040+ enum net_device_path_type type;
2041+ const struct net_device *dev;
2042+ union {
2043+ struct {
2044+ u16 id;
2045+ __be16 proto;
2046+ u8 h_dest[ETH_ALEN];
2047+ } encap;
2048+ struct {
2049+ enum {
2050+ DEV_PATH_BR_VLAN_KEEP,
2051+ DEV_PATH_BR_VLAN_TAG,
2052+ DEV_PATH_BR_VLAN_UNTAG,
2053+ DEV_PATH_BR_VLAN_UNTAG_HW,
2054+ } vlan_mode;
2055+ u16 vlan_id;
2056+ __be16 vlan_proto;
2057+ } bridge;
2058+ struct {
2059+ int port;
2060+ u16 proto;
2061+ } dsa;
2062+ };
2063+};
2064+
2065+#define NET_DEVICE_PATH_STACK_MAX 5
2066+#define NET_DEVICE_PATH_VLAN_MAX 2
2067+
2068+struct net_device_path_stack {
2069+ int num_paths;
2070+ struct net_device_path path[NET_DEVICE_PATH_STACK_MAX];
2071+};
2072+
2073+struct net_device_path_ctx {
2074+ const struct net_device *dev;
2075+ u8 daddr[ETH_ALEN];
2076+
2077+ int num_vlans;
2078+ struct {
2079+ u16 id;
2080+ __be16 proto;
2081+ } vlan[NET_DEVICE_PATH_VLAN_MAX];
2082+};
2083+
2084 enum tc_setup_type {
2085 TC_SETUP_QDISC_MQPRIO,
2086 TC_SETUP_CLSU32,
2087@@ -844,6 +897,7 @@ enum tc_setup_type {
2088 TC_SETUP_ROOT_QDISC,
2089 TC_SETUP_QDISC_GRED,
2090 TC_SETUP_QDISC_TAPRIO,
2091+ TC_SETUP_FT,
2092 };
2093
2094 /* These structures hold the attributes of bpf state that are being passed
2095@@ -1239,6 +1293,8 @@ struct tlsdev_ops;
2096 * Get devlink port instance associated with a given netdev.
2097 * Called with a reference on the netdevice and devlink locks only,
2098 * rtnl_lock is not held.
2099+ * int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx, struct net_device_path *path);
2100+ * Get the forwarding path to reach the real device from the HW destination address
2101 */
2102 struct net_device_ops {
2103 int (*ndo_init)(struct net_device *dev);
2104@@ -1436,6 +1492,8 @@ struct net_device_ops {
2105 int (*ndo_xsk_wakeup)(struct net_device *dev,
2106 u32 queue_id, u32 flags);
2107 struct devlink_port * (*ndo_get_devlink_port)(struct net_device *dev);
2108+ int (*ndo_fill_forward_path)(struct net_device_path_ctx *ctx,
2109+ struct net_device_path *path);
2110 };
2111
2112 /**
2113@@ -2661,6 +2719,8 @@ void dev_remove_offload(struct packet_offload *po);
2114
2115 int dev_get_iflink(const struct net_device *dev);
2116 int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
2117+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2118+ struct net_device_path_stack *stack);
2119 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
2120 unsigned short mask);
2121 struct net_device *dev_get_by_name(struct net *net, const char *name);
2122diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
2123index 98966064e..91f9a9283 100644
2124--- a/include/linux/ppp_channel.h
2125+++ b/include/linux/ppp_channel.h
2126@@ -28,6 +28,9 @@ struct ppp_channel_ops {
2127 int (*start_xmit)(struct ppp_channel *, struct sk_buff *);
2128 /* Handle an ioctl call that has come in via /dev/ppp. */
2129 int (*ioctl)(struct ppp_channel *, unsigned int, unsigned long);
2130+ int (*fill_forward_path)(struct net_device_path_ctx *,
2131+ struct net_device_path *,
2132+ const struct ppp_channel *);
2133 };
2134
2135 struct ppp_channel {
2136diff --git a/include/net/dsa.h b/include/net/dsa.h
2137index 05f66d487..cafc74218 100644
2138--- a/include/net/dsa.h
2139+++ b/include/net/dsa.h
2140@@ -561,6 +561,8 @@ struct dsa_switch_ops {
2141 struct sk_buff *skb);
2142 };
2143
2144+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
2145+
2146 struct dsa_switch_driver {
2147 struct list_head list;
2148 const struct dsa_switch_ops *ops;
2149@@ -653,6 +655,14 @@ static inline int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2150 #define BRCM_TAG_GET_PORT(v) ((v) >> 8)
2151 #define BRCM_TAG_GET_QUEUE(v) ((v) & 0xff)
2152
2153+#if IS_ENABLED(CONFIG_NET_DSA)
2154+bool dsa_slave_dev_check(const struct net_device *dev);
2155+#else
2156+static inline bool dsa_slave_dev_check(const struct net_device *dev)
2157+{
2158+ return false;
2159+}
2160+#endif
2161
2162 netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
2163 int dsa_port_get_phy_strings(struct dsa_port *dp, uint8_t *data);
2164diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
2165index c6f7bd22d..59b873653 100644
2166--- a/include/net/flow_offload.h
2167+++ b/include/net/flow_offload.h
2168@@ -138,6 +138,7 @@ enum flow_action_id {
2169 FLOW_ACTION_MPLS_PUSH,
2170 FLOW_ACTION_MPLS_POP,
2171 FLOW_ACTION_MPLS_MANGLE,
2172+ FLOW_ACTION_PPPOE_PUSH,
2173 NUM_FLOW_ACTIONS,
2174 };
2175
2176@@ -213,6 +214,9 @@ struct flow_action_entry {
2177 u8 bos;
2178 u8 ttl;
2179 } mpls_mangle;
2180+ struct { /* FLOW_ACTION_PPPOE_PUSH */
2181+ u16 sid;
2182+ } pppoe;
2183 };
2184 };
2185
2186diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
2187index 2c739fc75..89ab8f180 100644
2188--- a/include/net/ip6_route.h
2189+++ b/include/net/ip6_route.h
2190@@ -314,12 +314,13 @@ static inline bool rt6_duplicate_nexthop(struct fib6_info *a, struct fib6_info *
2191 !lwtunnel_cmp_encap(nha->fib_nh_lws, nhb->fib_nh_lws);
2192 }
2193
2194-static inline unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
2195+static inline unsigned int ip6_dst_mtu_maybe_forward(const struct dst_entry *dst,
2196+ bool forwarding)
2197 {
2198 struct inet6_dev *idev;
2199 unsigned int mtu;
2200
2201- if (dst_metric_locked(dst, RTAX_MTU)) {
2202+ if (!forwarding || dst_metric_locked(dst, RTAX_MTU)) {
2203 mtu = dst_metric_raw(dst, RTAX_MTU);
2204 if (mtu)
2205 goto out;
2206diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2207index 7b3c873f8..e95483192 100644
2208--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2209+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
2210@@ -4,7 +4,4 @@
2211
2212 extern const struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
2213
2214-#include <linux/sysctl.h>
2215-extern struct ctl_table nf_ct_ipv6_sysctl_table[];
2216-
2217 #endif /* _NF_CONNTRACK_IPV6_H*/
2218diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
2219index 90690e37a..ce0bc3e62 100644
2220--- a/include/net/netfilter/nf_conntrack.h
2221+++ b/include/net/netfilter/nf_conntrack.h
2222@@ -279,6 +279,18 @@ static inline bool nf_ct_should_gc(const struct nf_conn *ct)
2223 !nf_ct_is_dying(ct);
2224 }
2225
2226+#define NF_CT_DAY (86400 * HZ)
2227+
2228+/* Set an arbitrary timeout large enough not to ever expire, this save
2229+ * us a check for the IPS_OFFLOAD_BIT from the packet path via
2230+ * nf_ct_is_expired().
2231+ */
2232+static inline void nf_ct_offload_timeout(struct nf_conn *ct)
2233+{
2234+ if (nf_ct_expires(ct) < NF_CT_DAY / 2)
2235+ WRITE_ONCE(ct->timeout, nfct_time_stamp + NF_CT_DAY);
2236+}
2237+
2238 struct kernel_param;
2239
2240 int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp);
2241diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
2242index f7a060c6e..7f44a7715 100644
2243--- a/include/net/netfilter/nf_conntrack_acct.h
2244+++ b/include/net/netfilter/nf_conntrack_acct.h
2245@@ -65,6 +65,17 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
2246 #endif
2247 }
2248
2249+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
2250+ unsigned int bytes);
2251+
2252+static inline void nf_ct_acct_update(struct nf_conn *ct, u32 dir,
2253+ unsigned int bytes)
2254+{
2255+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
2256+ nf_ct_acct_add(ct, dir, 1, bytes);
2257+#endif
2258+}
2259+
2260 void nf_conntrack_acct_pernet_init(struct net *net);
2261
2262 int nf_conntrack_acct_init(void);
2263diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
2264index b37a7d608..7cf897677 100644
2265--- a/include/net/netfilter/nf_flow_table.h
2266+++ b/include/net/netfilter/nf_flow_table.h
2267@@ -8,31 +8,99 @@
2268 #include <linux/rcupdate.h>
2269 #include <linux/netfilter.h>
2270 #include <linux/netfilter/nf_conntrack_tuple_common.h>
2271+#include <net/flow_offload.h>
2272 #include <net/dst.h>
2273+#include <linux/if_pppox.h>
2274+#include <linux/ppp_defs.h>
2275
2276 struct nf_flowtable;
2277+struct nf_flow_rule;
2278+struct flow_offload;
2279+enum flow_offload_tuple_dir;
2280+
2281+struct nf_flow_key {
2282+ struct flow_dissector_key_meta meta;
2283+ struct flow_dissector_key_control control;
2284+ struct flow_dissector_key_control enc_control;
2285+ struct flow_dissector_key_basic basic;
2286+ struct flow_dissector_key_vlan vlan;
2287+ struct flow_dissector_key_vlan cvlan;
2288+ union {
2289+ struct flow_dissector_key_ipv4_addrs ipv4;
2290+ struct flow_dissector_key_ipv6_addrs ipv6;
2291+ };
2292+ struct flow_dissector_key_keyid enc_key_id;
2293+ union {
2294+ struct flow_dissector_key_ipv4_addrs enc_ipv4;
2295+ struct flow_dissector_key_ipv6_addrs enc_ipv6;
2296+ };
2297+ struct flow_dissector_key_tcp tcp;
2298+ struct flow_dissector_key_ports tp;
2299+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
2300+
2301+struct nf_flow_match {
2302+ struct flow_dissector dissector;
2303+ struct nf_flow_key key;
2304+ struct nf_flow_key mask;
2305+};
2306+
2307+struct nf_flow_rule {
2308+ struct nf_flow_match match;
2309+ struct flow_rule *rule;
2310+};
2311
2312 struct nf_flowtable_type {
2313 struct list_head list;
2314 int family;
2315 int (*init)(struct nf_flowtable *ft);
2316+ int (*setup)(struct nf_flowtable *ft,
2317+ struct net_device *dev,
2318+ enum flow_block_command cmd);
2319+ int (*action)(struct net *net,
2320+ const struct flow_offload *flow,
2321+ enum flow_offload_tuple_dir dir,
2322+ struct nf_flow_rule *flow_rule);
2323 void (*free)(struct nf_flowtable *ft);
2324 nf_hookfn *hook;
2325 struct module *owner;
2326 };
2327
2328+enum nf_flowtable_flags {
2329+ NF_FLOWTABLE_HW_OFFLOAD = 0x1, /* NFT_FLOWTABLE_HW_OFFLOAD */
2330+ NF_FLOWTABLE_COUNTER = 0x2, /* NFT_FLOWTABLE_COUNTER */
2331+};
2332+
2333 struct nf_flowtable {
2334 struct list_head list;
2335 struct rhashtable rhashtable;
2336+ int priority;
2337 const struct nf_flowtable_type *type;
2338 struct delayed_work gc_work;
2339+ unsigned int flags;
2340+ struct flow_block flow_block;
2341+ struct rw_semaphore flow_block_lock; /* Guards flow_block */
2342+ possible_net_t net;
2343 };
2344
2345+static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
2346+{
2347+ return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
2348+}
2349+
2350 enum flow_offload_tuple_dir {
2351 FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
2352 FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
2353- FLOW_OFFLOAD_DIR_MAX = IP_CT_DIR_MAX
2354 };
2355+#define FLOW_OFFLOAD_DIR_MAX IP_CT_DIR_MAX
2356+
2357+enum flow_offload_xmit_type {
2358+ FLOW_OFFLOAD_XMIT_UNSPEC = 0,
2359+ FLOW_OFFLOAD_XMIT_NEIGH,
2360+ FLOW_OFFLOAD_XMIT_XFRM,
2361+ FLOW_OFFLOAD_XMIT_DIRECT,
2362+};
2363+
2364+#define NF_FLOW_TABLE_ENCAP_MAX 2
2365
2366 struct flow_offload_tuple {
2367 union {
2368@@ -52,11 +120,31 @@ struct flow_offload_tuple {
2369
2370 u8 l3proto;
2371 u8 l4proto;
2372- u8 dir;
2373+ struct {
2374+ u16 id;
2375+ __be16 proto;
2376+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2377
2378- u16 mtu;
2379+ /* All members above are keys for lookups, see flow_offload_hash(). */
2380+ struct { } __hash;
2381
2382- struct dst_entry *dst_cache;
2383+ u8 dir:2,
2384+ xmit_type:2,
2385+ encap_num:2,
2386+ in_vlan_ingress:2;
2387+ u16 mtu;
2388+ union {
2389+ struct {
2390+ struct dst_entry *dst_cache;
2391+ u32 dst_cookie;
2392+ };
2393+ struct {
2394+ u32 ifidx;
2395+ u32 hw_ifidx;
2396+ u8 h_source[ETH_ALEN];
2397+ u8 h_dest[ETH_ALEN];
2398+ } out;
2399+ };
2400 };
2401
2402 struct flow_offload_tuple_rhash {
2403@@ -64,52 +152,139 @@ struct flow_offload_tuple_rhash {
2404 struct flow_offload_tuple tuple;
2405 };
2406
2407-#define FLOW_OFFLOAD_SNAT 0x1
2408-#define FLOW_OFFLOAD_DNAT 0x2
2409-#define FLOW_OFFLOAD_DYING 0x4
2410-#define FLOW_OFFLOAD_TEARDOWN 0x8
2411+enum nf_flow_flags {
2412+ NF_FLOW_SNAT,
2413+ NF_FLOW_DNAT,
2414+ NF_FLOW_TEARDOWN,
2415+ NF_FLOW_HW,
2416+ NF_FLOW_HW_DYING,
2417+ NF_FLOW_HW_DEAD,
2418+ NF_FLOW_HW_PENDING,
2419+};
2420+
2421+enum flow_offload_type {
2422+ NF_FLOW_OFFLOAD_UNSPEC = 0,
2423+ NF_FLOW_OFFLOAD_ROUTE,
2424+};
2425
2426 struct flow_offload {
2427 struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
2428- u32 flags;
2429- union {
2430- /* Your private driver data here. */
2431- u32 timeout;
2432- };
2433+ struct nf_conn *ct;
2434+ unsigned long flags;
2435+ u16 type;
2436+ u32 timeout;
2437+ struct rcu_head rcu_head;
2438 };
2439
2440 #define NF_FLOW_TIMEOUT (30 * HZ)
2441+#define nf_flowtable_time_stamp (u32)jiffies
2442+
2443+unsigned long flow_offload_get_timeout(struct flow_offload *flow);
2444+
2445+static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
2446+{
2447+ return (__s32)(timeout - nf_flowtable_time_stamp);
2448+}
2449
2450 struct nf_flow_route {
2451 struct {
2452- struct dst_entry *dst;
2453+ struct dst_entry *dst;
2454+ struct {
2455+ u32 ifindex;
2456+ struct {
2457+ u16 id;
2458+ __be16 proto;
2459+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
2460+ u8 num_encaps:2,
2461+ ingress_vlans:2;
2462+ } in;
2463+ struct {
2464+ u32 ifindex;
2465+ u32 hw_ifindex;
2466+ u8 h_source[ETH_ALEN];
2467+ u8 h_dest[ETH_ALEN];
2468+ } out;
2469+ enum flow_offload_xmit_type xmit_type;
2470 } tuple[FLOW_OFFLOAD_DIR_MAX];
2471 };
2472
2473-struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
2474- struct nf_flow_route *route);
2475+struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
2476 void flow_offload_free(struct flow_offload *flow);
2477
2478+static inline int
2479+nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
2480+ flow_setup_cb_t *cb, void *cb_priv)
2481+{
2482+ struct flow_block *block = &flow_table->flow_block;
2483+ struct flow_block_cb *block_cb;
2484+ int err = 0;
2485+
2486+ down_write(&flow_table->flow_block_lock);
2487+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2488+ if (block_cb) {
2489+ err = -EEXIST;
2490+ goto unlock;
2491+ }
2492+
2493+ block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
2494+ if (IS_ERR(block_cb)) {
2495+ err = PTR_ERR(block_cb);
2496+ goto unlock;
2497+ }
2498+
2499+ list_add_tail(&block_cb->list, &block->cb_list);
2500+
2501+unlock:
2502+ up_write(&flow_table->flow_block_lock);
2503+ return err;
2504+}
2505+
2506+static inline void
2507+nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
2508+ flow_setup_cb_t *cb, void *cb_priv)
2509+{
2510+ struct flow_block *block = &flow_table->flow_block;
2511+ struct flow_block_cb *block_cb;
2512+
2513+ down_write(&flow_table->flow_block_lock);
2514+ block_cb = flow_block_cb_lookup(block, cb, cb_priv);
2515+ if (block_cb) {
2516+ list_del(&block_cb->list);
2517+ flow_block_cb_free(block_cb);
2518+ } else {
2519+ WARN_ON(true);
2520+ }
2521+ up_write(&flow_table->flow_block_lock);
2522+}
2523+
2524+int flow_offload_route_init(struct flow_offload *flow,
2525+ const struct nf_flow_route *route);
2526+
2527 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
2528+void flow_offload_refresh(struct nf_flowtable *flow_table,
2529+ struct flow_offload *flow);
2530+
2531 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
2532 struct flow_offload_tuple *tuple);
2533+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
2534+ struct net_device *dev);
2535 void nf_flow_table_cleanup(struct net_device *dev);
2536
2537 int nf_flow_table_init(struct nf_flowtable *flow_table);
2538 void nf_flow_table_free(struct nf_flowtable *flow_table);
2539
2540 void flow_offload_teardown(struct flow_offload *flow);
2541-static inline void flow_offload_dead(struct flow_offload *flow)
2542-{
2543- flow->flags |= FLOW_OFFLOAD_DYING;
2544-}
2545
2546-int nf_flow_snat_port(const struct flow_offload *flow,
2547- struct sk_buff *skb, unsigned int thoff,
2548- u8 protocol, enum flow_offload_tuple_dir dir);
2549-int nf_flow_dnat_port(const struct flow_offload *flow,
2550- struct sk_buff *skb, unsigned int thoff,
2551- u8 protocol, enum flow_offload_tuple_dir dir);
2552+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
2553+ void (*iter)(struct flow_offload *flow, void *data),
2554+ void *data);
2555+
2556+void nf_flow_snat_port(const struct flow_offload *flow,
2557+ struct sk_buff *skb, unsigned int thoff,
2558+ u8 protocol, enum flow_offload_tuple_dir dir);
2559+void nf_flow_dnat_port(const struct flow_offload *flow,
2560+ struct sk_buff *skb, unsigned int thoff,
2561+ u8 protocol, enum flow_offload_tuple_dir dir);
2562
2563 struct flow_ports {
2564 __be16 source, dest;
2565@@ -123,4 +298,41 @@ unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
2566 #define MODULE_ALIAS_NF_FLOWTABLE(family) \
2567 MODULE_ALIAS("nf-flowtable-" __stringify(family))
2568
2569+void nf_flow_offload_add(struct nf_flowtable *flowtable,
2570+ struct flow_offload *flow);
2571+void nf_flow_offload_del(struct nf_flowtable *flowtable,
2572+ struct flow_offload *flow);
2573+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
2574+ struct flow_offload *flow);
2575+
2576+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
2577+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
2578+ struct net_device *dev,
2579+ enum flow_block_command cmd);
2580+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
2581+ enum flow_offload_tuple_dir dir,
2582+ struct nf_flow_rule *flow_rule);
2583+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
2584+ enum flow_offload_tuple_dir dir,
2585+ struct nf_flow_rule *flow_rule);
2586+
2587+int nf_flow_table_offload_init(void);
2588+void nf_flow_table_offload_exit(void);
2589+
2590+static inline __be16 nf_flow_pppoe_proto(const struct sk_buff *skb)
2591+{
2592+ __be16 proto;
2593+
2594+ proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
2595+ sizeof(struct pppoe_hdr)));
2596+ switch (proto) {
2597+ case htons(PPP_IP):
2598+ return htons(ETH_P_IP);
2599+ case htons(PPP_IPV6):
2600+ return htons(ETH_P_IPV6);
2601+ }
2602+
2603+ return 0;
2604+}
2605+
2606 #endif /* _NF_FLOW_TABLE_H */
2607diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
2608index 806454e76..9e3963c8f 100644
2609--- a/include/net/netns/conntrack.h
2610+++ b/include/net/netns/conntrack.h
2611@@ -27,6 +27,9 @@ struct nf_tcp_net {
2612 int tcp_loose;
2613 int tcp_be_liberal;
2614 int tcp_max_retrans;
2615+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2616+ unsigned int offload_timeout;
2617+#endif
2618 };
2619
2620 enum udp_conntrack {
2621@@ -37,6 +40,9 @@ enum udp_conntrack {
2622
2623 struct nf_udp_net {
2624 unsigned int timeouts[UDP_CT_MAX];
2625+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
2626+ unsigned int offload_timeout;
2627+#endif
2628 };
2629
2630 struct nf_icmp_net {
2631diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
2632index 336014bf8..ae698d11c 100644
2633--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
2634+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
2635@@ -105,14 +105,19 @@ enum ip_conntrack_status {
2636 IPS_OFFLOAD_BIT = 14,
2637 IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
2638
2639+ /* Conntrack has been offloaded to hardware. */
2640+ IPS_HW_OFFLOAD_BIT = 15,
2641+ IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
2642+
2643 /* Be careful here, modifying these bits can make things messy,
2644 * so don't let users modify them directly.
2645 */
2646 IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
2647 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
2648- IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD),
2649+ IPS_SEQ_ADJUST | IPS_TEMPLATE |
2650+ IPS_OFFLOAD | IPS_HW_OFFLOAD),
2651
2652- __IPS_MAX_BIT = 15,
2653+ __IPS_MAX_BIT = 16,
2654 };
2655
2656 /* Connection tracking event types */
2657diff --git a/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2658new file mode 100644
2659index 000000000..5841bbe0e
2660--- /dev/null
2661+++ b/include/uapi/linux/netfilter/xt_FLOWOFFLOAD.h
2662@@ -0,0 +1,17 @@
2663+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2664+#ifndef _XT_FLOWOFFLOAD_H
2665+#define _XT_FLOWOFFLOAD_H
2666+
2667+#include <linux/types.h>
2668+
2669+enum {
2670+ XT_FLOWOFFLOAD_HW = 1 << 0,
2671+
2672+ XT_FLOWOFFLOAD_MASK = XT_FLOWOFFLOAD_HW
2673+};
2674+
2675+struct xt_flowoffload_target_info {
2676+ __u32 flags;
2677+};
2678+
2679+#endif /* _XT_FLOWOFFLOAD_H */
2680diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
2681index 589615ec4..444ab5fae 100644
2682--- a/net/8021q/vlan_dev.c
2683+++ b/net/8021q/vlan_dev.c
2684@@ -747,6 +747,26 @@ static int vlan_dev_get_iflink(const struct net_device *dev)
2685 return real_dev->ifindex;
2686 }
2687
2688+static int vlan_dev_fill_forward_path(struct net_device_path_ctx *ctx,
2689+ struct net_device_path *path)
2690+{
2691+ struct vlan_dev_priv *vlan = vlan_dev_priv(ctx->dev);
2692+
2693+ path->type = DEV_PATH_VLAN;
2694+ path->encap.id = vlan->vlan_id;
2695+ path->encap.proto = vlan->vlan_proto;
2696+ path->dev = ctx->dev;
2697+ ctx->dev = vlan->real_dev;
2698+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2699+ return -ENOSPC;
2700+
2701+ ctx->vlan[ctx->num_vlans].id = vlan->vlan_id;
2702+ ctx->vlan[ctx->num_vlans].proto = vlan->vlan_proto;
2703+ ctx->num_vlans++;
2704+
2705+ return 0;
2706+}
2707+
2708 static const struct ethtool_ops vlan_ethtool_ops = {
2709 .get_link_ksettings = vlan_ethtool_get_link_ksettings,
2710 .get_drvinfo = vlan_ethtool_get_drvinfo,
2711@@ -785,6 +805,7 @@ static const struct net_device_ops vlan_netdev_ops = {
2712 #endif
2713 .ndo_fix_features = vlan_dev_fix_features,
2714 .ndo_get_iflink = vlan_dev_get_iflink,
2715+ .ndo_fill_forward_path = vlan_dev_fill_forward_path,
2716 };
2717
2718 static void vlan_dev_free(struct net_device *dev)
2719diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
2720index 501f77f0f..0940b44cd 100644
2721--- a/net/bridge/br_device.c
2722+++ b/net/bridge/br_device.c
2723@@ -377,6 +377,54 @@ static int br_del_slave(struct net_device *dev, struct net_device *slave_dev)
2724 return br_del_if(br, slave_dev);
2725 }
2726
2727+static int br_fill_forward_path(struct net_device_path_ctx *ctx,
2728+ struct net_device_path *path)
2729+{
2730+ struct net_bridge_fdb_entry *f;
2731+ struct net_bridge_port *dst;
2732+ struct net_bridge *br;
2733+
2734+ if (netif_is_bridge_port(ctx->dev))
2735+ return -1;
2736+
2737+ br = netdev_priv(ctx->dev);
2738+
2739+ br_vlan_fill_forward_path_pvid(br, ctx, path);
2740+
2741+ f = br_fdb_find_rcu(br, ctx->daddr, path->bridge.vlan_id);
2742+ if (!f || !f->dst)
2743+ return -1;
2744+
2745+ dst = READ_ONCE(f->dst);
2746+ if (!dst)
2747+ return -1;
2748+
2749+ if (br_vlan_fill_forward_path_mode(br, dst, path))
2750+ return -1;
2751+
2752+ path->type = DEV_PATH_BRIDGE;
2753+ path->dev = dst->br->dev;
2754+ ctx->dev = dst->dev;
2755+
2756+ switch (path->bridge.vlan_mode) {
2757+ case DEV_PATH_BR_VLAN_TAG:
2758+ if (ctx->num_vlans >= ARRAY_SIZE(ctx->vlan))
2759+ return -ENOSPC;
2760+ ctx->vlan[ctx->num_vlans].id = path->bridge.vlan_id;
2761+ ctx->vlan[ctx->num_vlans].proto = path->bridge.vlan_proto;
2762+ ctx->num_vlans++;
2763+ break;
2764+ case DEV_PATH_BR_VLAN_UNTAG_HW:
2765+ case DEV_PATH_BR_VLAN_UNTAG:
2766+ ctx->num_vlans--;
2767+ break;
2768+ case DEV_PATH_BR_VLAN_KEEP:
2769+ break;
2770+ }
2771+
2772+ return 0;
2773+}
2774+
2775 static const struct ethtool_ops br_ethtool_ops = {
2776 .get_drvinfo = br_getinfo,
2777 .get_link = ethtool_op_get_link,
2778@@ -410,6 +458,7 @@ static const struct net_device_ops br_netdev_ops = {
2779 .ndo_bridge_setlink = br_setlink,
2780 .ndo_bridge_dellink = br_dellink,
2781 .ndo_features_check = passthru_features_check,
2782+ .ndo_fill_forward_path = br_fill_forward_path,
2783 };
2784
2785 static struct device_type br_type = {
2786diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
2787index a736be8a1..4bd9e9b57 100644
2788--- a/net/bridge/br_private.h
2789+++ b/net/bridge/br_private.h
2790@@ -912,6 +912,13 @@ void br_vlan_port_event(struct net_bridge_port *p, unsigned long event);
2791 int br_vlan_bridge_event(struct net_device *dev, unsigned long event,
2792 void *ptr);
2793
2794+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2795+ struct net_device_path_ctx *ctx,
2796+ struct net_device_path *path);
2797+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2798+ struct net_bridge_port *dst,
2799+ struct net_device_path *path);
2800+
2801 static inline struct net_bridge_vlan_group *br_vlan_group(
2802 const struct net_bridge *br)
2803 {
2804@@ -1066,6 +1073,19 @@ static inline int nbp_get_num_vlan_infos(struct net_bridge_port *p,
2805 return 0;
2806 }
2807
2808+static inline void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2809+ struct net_device_path_ctx *ctx,
2810+ struct net_device_path *path)
2811+{
2812+}
2813+
2814+static inline int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2815+ struct net_bridge_port *dst,
2816+ struct net_device_path *path)
2817+{
2818+ return 0;
2819+}
2820+
2821 static inline struct net_bridge_vlan_group *br_vlan_group(
2822 const struct net_bridge *br)
2823 {
2824diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
2825index 9257292bd..bcfd16924 100644
2826--- a/net/bridge/br_vlan.c
2827+++ b/net/bridge/br_vlan.c
2828@@ -1268,6 +1268,61 @@ int br_vlan_get_pvid_rcu(const struct net_device *dev, u16 *p_pvid)
2829 }
2830 EXPORT_SYMBOL_GPL(br_vlan_get_pvid_rcu);
2831
2832+void br_vlan_fill_forward_path_pvid(struct net_bridge *br,
2833+ struct net_device_path_ctx *ctx,
2834+ struct net_device_path *path)
2835+{
2836+ struct net_bridge_vlan_group *vg;
2837+ int idx = ctx->num_vlans - 1;
2838+ u16 vid;
2839+
2840+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2841+
2842+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2843+ return;
2844+
2845+ vg = br_vlan_group(br);
2846+
2847+ if (idx >= 0 &&
2848+ ctx->vlan[idx].proto == br->vlan_proto) {
2849+ vid = ctx->vlan[idx].id;
2850+ } else {
2851+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_TAG;
2852+ vid = br_get_pvid(vg);
2853+ }
2854+
2855+ path->bridge.vlan_id = vid;
2856+ path->bridge.vlan_proto = br->vlan_proto;
2857+}
2858+
2859+int br_vlan_fill_forward_path_mode(struct net_bridge *br,
2860+ struct net_bridge_port *dst,
2861+ struct net_device_path *path)
2862+{
2863+ struct net_bridge_vlan_group *vg;
2864+ struct net_bridge_vlan *v;
2865+
2866+ if (!br_opt_get(br, BROPT_VLAN_ENABLED))
2867+ return 0;
2868+
2869+ vg = nbp_vlan_group_rcu(dst);
2870+ v = br_vlan_find(vg, path->bridge.vlan_id);
2871+ if (!v || !br_vlan_should_use(v))
2872+ return -EINVAL;
2873+
2874+ if (!(v->flags & BRIDGE_VLAN_INFO_UNTAGGED))
2875+ return 0;
2876+
2877+ if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG)
2878+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP;
2879+ else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV)
2880+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW;
2881+ else
2882+ path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG;
2883+
2884+ return 0;
2885+}
2886+
2887 int br_vlan_get_info(const struct net_device *dev, u16 vid,
2888 struct bridge_vlan_info *p_vinfo)
2889 {
2890diff --git a/net/core/dev.c b/net/core/dev.c
2891index fe2c856b9..4f0edb218 100644
2892--- a/net/core/dev.c
2893+++ b/net/core/dev.c
2894@@ -639,6 +639,52 @@ int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
2895 }
2896 EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
2897
2898+static struct net_device_path *dev_fwd_path(struct net_device_path_stack *stack)
2899+{
2900+ int k = stack->num_paths++;
2901+
2902+ if (WARN_ON_ONCE(k >= NET_DEVICE_PATH_STACK_MAX))
2903+ return NULL;
2904+
2905+ return &stack->path[k];
2906+}
2907+
2908+int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
2909+ struct net_device_path_stack *stack)
2910+{
2911+ const struct net_device *last_dev;
2912+ struct net_device_path_ctx ctx = {
2913+ .dev = dev,
2914+ };
2915+ struct net_device_path *path;
2916+ int ret = 0;
2917+
2918+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
2919+ stack->num_paths = 0;
2920+ while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
2921+ last_dev = ctx.dev;
2922+ path = dev_fwd_path(stack);
2923+ if (!path)
2924+ return -1;
2925+
2926+ memset(path, 0, sizeof(struct net_device_path));
2927+ ret = ctx.dev->netdev_ops->ndo_fill_forward_path(&ctx, path);
2928+ if (ret < 0)
2929+ return -1;
2930+
2931+ if (WARN_ON_ONCE(last_dev == ctx.dev))
2932+ return -1;
2933+ }
2934+ path = dev_fwd_path(stack);
2935+ if (!path)
2936+ return -1;
2937+ path->type = DEV_PATH_ETHERNET;
2938+ path->dev = ctx.dev;
2939+
2940+ return ret;
2941+}
2942+EXPORT_SYMBOL_GPL(dev_fill_forward_path);
2943+
2944 /**
2945 * __dev_get_by_name - find a device by its name
2946 * @net: the applicable net namespace
2947diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
2948index ca80f8699..35a1249a9 100644
2949--- a/net/dsa/dsa.c
2950+++ b/net/dsa/dsa.c
2951@@ -329,6 +329,15 @@ int call_dsa_notifiers(unsigned long val, struct net_device *dev,
2952 }
2953 EXPORT_SYMBOL_GPL(call_dsa_notifiers);
2954
2955+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
2956+{
2957+ if (!netdev || !dsa_slave_dev_check(netdev))
2958+ return ERR_PTR(-ENODEV);
2959+
2960+ return dsa_slave_to_port(netdev);
2961+}
2962+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
2963+
2964 static int __init dsa_init_module(void)
2965 {
2966 int rc;
2967diff --git a/net/dsa/slave.c b/net/dsa/slave.c
2968index 036fda317..2dfaa1eac 100644
2969--- a/net/dsa/slave.c
2970+++ b/net/dsa/slave.c
2971@@ -22,8 +22,6 @@
2972
2973 #include "dsa_priv.h"
2974
2975-static bool dsa_slave_dev_check(const struct net_device *dev);
2976-
2977 /* slave mii_bus handling ***************************************************/
2978 static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg)
2979 {
2980@@ -1033,14 +1031,32 @@ static int dsa_slave_setup_tc_block(struct net_device *dev,
2981 }
2982 }
2983
2984+static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port,
2985+ void *type_data)
2986+{
2987+ struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp;
2988+ struct net_device *master = cpu_dp->master;
2989+
2990+ if (!master->netdev_ops->ndo_setup_tc)
2991+ return -EOPNOTSUPP;
2992+
2993+ return master->netdev_ops->ndo_setup_tc(master, TC_SETUP_FT, type_data);
2994+}
2995+
2996 static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type,
2997 void *type_data)
2998 {
2999 struct dsa_port *dp = dsa_slave_to_port(dev);
3000 struct dsa_switch *ds = dp->ds;
3001
3002- if (type == TC_SETUP_BLOCK)
3003+ switch (type) {
3004+ case TC_SETUP_BLOCK:
3005 return dsa_slave_setup_tc_block(dev, type_data);
3006+ case TC_SETUP_FT:
3007+ return dsa_slave_setup_ft_block(ds, dp->index, type_data);
3008+ default:
3009+ break;
3010+ }
3011
3012 if (!ds->ops->port_setup_tc)
3013 return -EOPNOTSUPP;
3014@@ -1226,6 +1242,21 @@ static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev)
3015 return dp->ds->devlink ? &dp->devlink_port : NULL;
3016 }
3017
3018+static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx,
3019+ struct net_device_path *path)
3020+{
3021+ struct dsa_port *dp = dsa_slave_to_port(ctx->dev);
3022+ struct dsa_port *cpu_dp = dp->cpu_dp;
3023+
3024+ path->dev = ctx->dev;
3025+ path->type = DEV_PATH_DSA;
3026+ path->dsa.proto = cpu_dp->tag_ops->proto;
3027+ path->dsa.port = dp->index;
3028+ ctx->dev = cpu_dp->master;
3029+
3030+ return 0;
3031+}
3032+
3033 static const struct net_device_ops dsa_slave_netdev_ops = {
3034 .ndo_open = dsa_slave_open,
3035 .ndo_stop = dsa_slave_close,
3036@@ -1250,6 +1281,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
3037 .ndo_vlan_rx_add_vid = dsa_slave_vlan_rx_add_vid,
3038 .ndo_vlan_rx_kill_vid = dsa_slave_vlan_rx_kill_vid,
3039 .ndo_get_devlink_port = dsa_slave_get_devlink_port,
3040+ .ndo_fill_forward_path = dsa_slave_fill_forward_path,
3041 };
3042
3043 static struct device_type dsa_type = {
3044@@ -1497,10 +1529,11 @@ void dsa_slave_destroy(struct net_device *slave_dev)
3045 free_netdev(slave_dev);
3046 }
3047
3048-static bool dsa_slave_dev_check(const struct net_device *dev)
3049+bool dsa_slave_dev_check(const struct net_device *dev)
3050 {
3051 return dev->netdev_ops == &dsa_slave_netdev_ops;
3052 }
3053+EXPORT_SYMBOL_GPL(dsa_slave_dev_check);
3054
3055 static int dsa_slave_changeupper(struct net_device *dev,
3056 struct netdev_notifier_changeupper_info *info)
3057diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
3058index f17b40211..803b92e4c 100644
3059--- a/net/ipv4/netfilter/Kconfig
3060+++ b/net/ipv4/netfilter/Kconfig
3061@@ -56,8 +56,6 @@ config NF_TABLES_ARP
3062 help
3063 This option enables the ARP support for nf_tables.
3064
3065-endif # NF_TABLES
3066-
3067 config NF_FLOW_TABLE_IPV4
3068 tristate "Netfilter flow table IPv4 module"
3069 depends on NF_FLOW_TABLE
3070@@ -66,6 +64,8 @@ config NF_FLOW_TABLE_IPV4
3071
3072 To compile it as a module, choose M here.
3073
3074+endif # NF_TABLES
3075+
3076 config NF_DUP_IPV4
3077 tristate "Netfilter IPv4 packet duplication to alternate destination"
3078 depends on !NF_CONNTRACK || NF_CONNTRACK
3079diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
3080index 5585e3a94..bb76f6061 100644
3081--- a/net/ipv6/ip6_output.c
3082+++ b/net/ipv6/ip6_output.c
3083@@ -607,7 +607,7 @@ int ip6_forward(struct sk_buff *skb)
3084 }
3085 }
3086
3087- mtu = ip6_dst_mtu_forward(dst);
3088+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
3089 if (mtu < IPV6_MIN_MTU)
3090 mtu = IPV6_MIN_MTU;
3091
3092diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
3093index 69443e9a3..0b481d236 100644
3094--- a/net/ipv6/netfilter/Kconfig
3095+++ b/net/ipv6/netfilter/Kconfig
3096@@ -45,7 +45,6 @@ config NFT_FIB_IPV6
3097 multicast or blackhole.
3098
3099 endif # NF_TABLES_IPV6
3100-endif # NF_TABLES
3101
3102 config NF_FLOW_TABLE_IPV6
3103 tristate "Netfilter flow table IPv6 module"
3104@@ -55,6 +54,8 @@ config NF_FLOW_TABLE_IPV6
3105
3106 To compile it as a module, choose M here.
3107
3108+endif # NF_TABLES
3109+
3110 config NF_DUP_IPV6
3111 tristate "Netfilter IPv6 packet duplication to alternate destination"
3112 depends on !NF_CONNTRACK || NF_CONNTRACK
3113diff --git a/net/ipv6/route.c b/net/ipv6/route.c
3114index 98aaf0b79..2b357ac71 100644
3115--- a/net/ipv6/route.c
3116+++ b/net/ipv6/route.c
3117@@ -83,7 +83,7 @@ enum rt6_nud_state {
3118
3119 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
3120 static unsigned int ip6_default_advmss(const struct dst_entry *dst);
3121-static unsigned int ip6_mtu(const struct dst_entry *dst);
3122+static unsigned int ip6_mtu(const struct dst_entry *dst);
3123 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
3124 static void ip6_dst_destroy(struct dst_entry *);
3125 static void ip6_dst_ifdown(struct dst_entry *,
3126@@ -3125,25 +3125,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
3127
3128 static unsigned int ip6_mtu(const struct dst_entry *dst)
3129 {
3130- struct inet6_dev *idev;
3131- unsigned int mtu;
3132-
3133- mtu = dst_metric_raw(dst, RTAX_MTU);
3134- if (mtu)
3135- goto out;
3136-
3137- mtu = IPV6_MIN_MTU;
3138-
3139- rcu_read_lock();
3140- idev = __in6_dev_get(dst->dev);
3141- if (idev)
3142- mtu = idev->cnf.mtu6;
3143- rcu_read_unlock();
3144-
3145-out:
3146- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
3147-
3148- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
3149+ return ip6_dst_mtu_maybe_forward(dst, false);
3150 }
3151
3152 /* MTU selection:
3153diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
3154index b967763f5..c040e713a 100644
3155--- a/net/netfilter/Kconfig
3156+++ b/net/netfilter/Kconfig
3157@@ -690,8 +690,6 @@ config NFT_FIB_NETDEV
3158
3159 endif # NF_TABLES_NETDEV
3160
3161-endif # NF_TABLES
3162-
3163 config NF_FLOW_TABLE_INET
3164 tristate "Netfilter flow table mixed IPv4/IPv6 module"
3165 depends on NF_FLOW_TABLE
3166@@ -700,11 +698,12 @@ config NF_FLOW_TABLE_INET
3167
3168 To compile it as a module, choose M here.
3169
3170+endif # NF_TABLES
3171+
3172 config NF_FLOW_TABLE
3173 tristate "Netfilter flow table module"
3174 depends on NETFILTER_INGRESS
3175 depends on NF_CONNTRACK
3176- depends on NF_TABLES
3177 help
3178 This option adds the flow table core infrastructure.
3179
3180@@ -984,6 +983,15 @@ config NETFILTER_XT_TARGET_NOTRACK
3181 depends on NETFILTER_ADVANCED
3182 select NETFILTER_XT_TARGET_CT
3183
3184+config NETFILTER_XT_TARGET_FLOWOFFLOAD
3185+ tristate '"FLOWOFFLOAD" target support'
3186+ depends on NF_FLOW_TABLE
3187+ depends on NETFILTER_INGRESS
3188+ help
3189+ This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
3190+ module to speed up processing of packets by bypassing the usual
3191+ netfilter chains
3192+
3193 config NETFILTER_XT_TARGET_RATEEST
3194 tristate '"RATEEST" target support'
3195 depends on NETFILTER_ADVANCED
3196diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
3197index 4fc075b61..d93a121bc 100644
3198--- a/net/netfilter/Makefile
3199+++ b/net/netfilter/Makefile
3200@@ -120,7 +120,8 @@ obj-$(CONFIG_NFT_FWD_NETDEV) += nft_fwd_netdev.o
3201
3202 # flow table infrastructure
3203 obj-$(CONFIG_NF_FLOW_TABLE) += nf_flow_table.o
3204-nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o
3205+nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o \
3206+ nf_flow_table_offload.o
3207
3208 obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o
3209
3210@@ -140,6 +141,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
3211 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
3212 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
3213 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
3214+obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
3215 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
3216 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
3217 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
3218diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
3219index f6ab6f484..f689e19d8 100644
3220--- a/net/netfilter/nf_conntrack_core.c
3221+++ b/net/netfilter/nf_conntrack_core.c
3222@@ -864,9 +864,8 @@ out:
3223 }
3224 EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
3225
3226-static inline void nf_ct_acct_update(struct nf_conn *ct,
3227- enum ip_conntrack_info ctinfo,
3228- unsigned int len)
3229+void nf_ct_acct_add(struct nf_conn *ct, u32 dir, unsigned int packets,
3230+ unsigned int bytes)
3231 {
3232 struct nf_conn_acct *acct;
3233
3234@@ -874,10 +873,11 @@ static inline void nf_ct_acct_update(struct nf_conn *ct,
3235 if (acct) {
3236 struct nf_conn_counter *counter = acct->counter;
3237
3238- atomic64_inc(&counter[CTINFO2DIR(ctinfo)].packets);
3239- atomic64_add(len, &counter[CTINFO2DIR(ctinfo)].bytes);
3240+ atomic64_add(packets, &counter[dir].packets);
3241+ atomic64_add(bytes, &counter[dir].bytes);
3242 }
3243 }
3244+EXPORT_SYMBOL_GPL(nf_ct_acct_add);
3245
3246 static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3247 const struct nf_conn *loser_ct)
3248@@ -891,7 +891,7 @@ static void nf_ct_acct_merge(struct nf_conn *ct, enum ip_conntrack_info ctinfo,
3249
3250 /* u32 should be fine since we must have seen one packet. */
3251 bytes = atomic64_read(&counter[CTINFO2DIR(ctinfo)].bytes);
3252- nf_ct_acct_update(ct, ctinfo, bytes);
3253+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), bytes);
3254 }
3255 }
3256
3257@@ -1238,8 +1238,10 @@ static void gc_worker(struct work_struct *work)
3258
3259 tmp = nf_ct_tuplehash_to_ctrack(h);
3260
3261- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
3262+ if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
3263+ nf_ct_offload_timeout(tmp);
3264 continue;
3265+ }
3266
3267 if (nf_ct_is_expired(tmp)) {
3268 nf_ct_gc_expired(tmp);
3269@@ -1763,7 +1765,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
3270 WRITE_ONCE(ct->timeout, extra_jiffies);
3271 acct:
3272 if (do_acct)
3273- nf_ct_acct_update(ct, ctinfo, skb->len);
3274+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3275 }
3276 EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
3277
3278@@ -1771,7 +1773,7 @@ bool nf_ct_kill_acct(struct nf_conn *ct,
3279 enum ip_conntrack_info ctinfo,
3280 const struct sk_buff *skb)
3281 {
3282- nf_ct_acct_update(ct, ctinfo, skb->len);
3283+ nf_ct_acct_update(ct, CTINFO2DIR(ctinfo), skb->len);
3284
3285 return nf_ct_delete(ct, 0, 0);
3286 }
3287diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
3288index 7204f0366..3742bae21 100644
3289--- a/net/netfilter/nf_conntrack_proto_tcp.c
3290+++ b/net/netfilter/nf_conntrack_proto_tcp.c
3291@@ -1453,6 +1453,10 @@ void nf_conntrack_tcp_init_net(struct net *net)
3292 tn->tcp_loose = nf_ct_tcp_loose;
3293 tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
3294 tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
3295+
3296+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3297+ tn->offload_timeout = 30 * HZ;
3298+#endif
3299 }
3300
3301 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp =
3302diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
3303index e3a2d018f..a1579d6c3 100644
3304--- a/net/netfilter/nf_conntrack_proto_udp.c
3305+++ b/net/netfilter/nf_conntrack_proto_udp.c
3306@@ -267,6 +267,10 @@ void nf_conntrack_udp_init_net(struct net *net)
3307
3308 for (i = 0; i < UDP_CT_MAX; i++)
3309 un->timeouts[i] = udp_timeouts[i];
3310+
3311+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3312+ un->offload_timeout = 30 * HZ;
3313+#endif
3314 }
3315
3316 const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp =
3317diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
3318index 9c6259c28..10d9f93ce 100644
3319--- a/net/netfilter/nf_conntrack_standalone.c
3320+++ b/net/netfilter/nf_conntrack_standalone.c
3321@@ -353,7 +353,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
3322 if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
3323 goto release;
3324
3325- if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3326+ if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
3327+ seq_puts(s, "[HW_OFFLOAD] ");
3328+ else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
3329 seq_puts(s, "[OFFLOAD] ");
3330 else if (test_bit(IPS_ASSURED_BIT, &ct->status))
3331 seq_puts(s, "[ASSURED] ");
3332@@ -620,11 +622,17 @@ enum nf_ct_sysctl_index {
3333 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_CLOSE,
3334 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_RETRANS,
3335 NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_UNACK,
3336+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3337+ NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD,
3338+#endif
3339 NF_SYSCTL_CT_PROTO_TCP_LOOSE,
3340 NF_SYSCTL_CT_PROTO_TCP_LIBERAL,
3341 NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS,
3342 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP,
3343 NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM,
3344+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3345+ NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD,
3346+#endif
3347 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP,
3348 NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6,
3349 #ifdef CONFIG_NF_CT_PROTO_SCTP
3350@@ -812,6 +820,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
3351 .mode = 0644,
3352 .proc_handler = proc_dointvec_jiffies,
3353 },
3354+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3355+ [NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD] = {
3356+ .procname = "nf_flowtable_tcp_timeout",
3357+ .maxlen = sizeof(unsigned int),
3358+ .mode = 0644,
3359+ .proc_handler = proc_dointvec_jiffies,
3360+ },
3361+#endif
3362 [NF_SYSCTL_CT_PROTO_TCP_LOOSE] = {
3363 .procname = "nf_conntrack_tcp_loose",
3364 .maxlen = sizeof(int),
3365@@ -846,6 +862,14 @@ static struct ctl_table nf_ct_sysctl_table[] = {
3366 .mode = 0644,
3367 .proc_handler = proc_dointvec_jiffies,
3368 },
3369+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3370+ [NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD] = {
3371+ .procname = "nf_flowtable_udp_timeout",
3372+ .maxlen = sizeof(unsigned int),
3373+ .mode = 0644,
3374+ .proc_handler = proc_dointvec_jiffies,
3375+ },
3376+#endif
3377 [NF_SYSCTL_CT_PROTO_TIMEOUT_ICMP] = {
3378 .procname = "nf_conntrack_icmp_timeout",
3379 .maxlen = sizeof(unsigned int),
3380@@ -1028,6 +1052,11 @@ static void nf_conntrack_standalone_init_tcp_sysctl(struct net *net,
3381 XASSIGN(LIBERAL, &tn->tcp_be_liberal);
3382 XASSIGN(MAX_RETRANS, &tn->tcp_max_retrans);
3383 #undef XASSIGN
3384+
3385+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3386+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_TCP_OFFLOAD].data = &tn->offload_timeout;
3387+#endif
3388+
3389 }
3390
3391 static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
3392@@ -1115,6 +1144,9 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
3393 table[NF_SYSCTL_CT_PROTO_TIMEOUT_ICMPV6].data = &nf_icmpv6_pernet(net)->timeout;
3394 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP].data = &un->timeouts[UDP_CT_UNREPLIED];
3395 table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_STREAM].data = &un->timeouts[UDP_CT_REPLIED];
3396+#if IS_ENABLED(CONFIG_NF_FLOW_TABLE)
3397+ table[NF_SYSCTL_CT_PROTO_TIMEOUT_UDP_OFFLOAD].data = &un->offload_timeout;
3398+#endif
3399
3400 nf_conntrack_standalone_init_tcp_sysctl(net, table);
3401 nf_conntrack_standalone_init_sctp_sysctl(net, table);
3402diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
3403index e1ffc66b8..103655813 100644
3404--- a/net/netfilter/nf_flow_table_core.c
3405+++ b/net/netfilter/nf_flow_table_core.c
3406@@ -7,31 +7,21 @@
3407 #include <linux/netdevice.h>
3408 #include <net/ip.h>
3409 #include <net/ip6_route.h>
3410-#include <net/netfilter/nf_tables.h>
3411 #include <net/netfilter/nf_flow_table.h>
3412 #include <net/netfilter/nf_conntrack.h>
3413 #include <net/netfilter/nf_conntrack_core.h>
3414 #include <net/netfilter/nf_conntrack_l4proto.h>
3415 #include <net/netfilter/nf_conntrack_tuple.h>
3416
3417-struct flow_offload_entry {
3418- struct flow_offload flow;
3419- struct nf_conn *ct;
3420- struct rcu_head rcu_head;
3421-};
3422-
3423 static DEFINE_MUTEX(flowtable_lock);
3424 static LIST_HEAD(flowtables);
3425
3426 static void
3427-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3428- struct nf_flow_route *route,
3429+flow_offload_fill_dir(struct flow_offload *flow,
3430 enum flow_offload_tuple_dir dir)
3431 {
3432 struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
3433- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
3434- struct dst_entry *other_dst = route->tuple[!dir].dst;
3435- struct dst_entry *dst = route->tuple[dir].dst;
3436+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
3437
3438 ft->dir = dir;
3439
3440@@ -39,12 +29,10 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3441 case NFPROTO_IPV4:
3442 ft->src_v4 = ctt->src.u3.in;
3443 ft->dst_v4 = ctt->dst.u3.in;
3444- ft->mtu = ip_dst_mtu_maybe_forward(dst, true);
3445 break;
3446 case NFPROTO_IPV6:
3447 ft->src_v6 = ctt->src.u3.in6;
3448 ft->dst_v6 = ctt->dst.u3.in6;
3449- ft->mtu = ip6_dst_mtu_forward(dst);
3450 break;
3451 }
3452
3453@@ -52,49 +40,32 @@ flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
3454 ft->l4proto = ctt->dst.protonum;
3455 ft->src_port = ctt->src.u.tcp.port;
3456 ft->dst_port = ctt->dst.u.tcp.port;
3457-
3458- ft->iifidx = other_dst->dev->ifindex;
3459- ft->dst_cache = dst;
3460 }
3461
3462-struct flow_offload *
3463-flow_offload_alloc(struct nf_conn *ct, struct nf_flow_route *route)
3464+struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
3465 {
3466- struct flow_offload_entry *entry;
3467 struct flow_offload *flow;
3468
3469 if (unlikely(nf_ct_is_dying(ct) ||
3470 !atomic_inc_not_zero(&ct->ct_general.use)))
3471 return NULL;
3472
3473- entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
3474- if (!entry)
3475+ flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
3476+ if (!flow)
3477 goto err_ct_refcnt;
3478
3479- flow = &entry->flow;
3480+ flow->ct = ct;
3481
3482- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst))
3483- goto err_dst_cache_original;
3484-
3485- if (!dst_hold_safe(route->tuple[FLOW_OFFLOAD_DIR_REPLY].dst))
3486- goto err_dst_cache_reply;
3487-
3488- entry->ct = ct;
3489-
3490- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3491- flow_offload_fill_dir(flow, ct, route, FLOW_OFFLOAD_DIR_REPLY);
3492+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3493+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
3494
3495 if (ct->status & IPS_SRC_NAT)
3496- flow->flags |= FLOW_OFFLOAD_SNAT;
3497+ __set_bit(NF_FLOW_SNAT, &flow->flags);
3498 if (ct->status & IPS_DST_NAT)
3499- flow->flags |= FLOW_OFFLOAD_DNAT;
3500+ __set_bit(NF_FLOW_DNAT, &flow->flags);
3501
3502 return flow;
3503
3504-err_dst_cache_reply:
3505- dst_release(route->tuple[FLOW_OFFLOAD_DIR_ORIGINAL].dst);
3506-err_dst_cache_original:
3507- kfree(entry);
3508 err_ct_refcnt:
3509 nf_ct_put(ct);
3510
3511@@ -102,40 +73,135 @@ err_ct_refcnt:
3512 }
3513 EXPORT_SYMBOL_GPL(flow_offload_alloc);
3514
3515-static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3516+static u32 flow_offload_dst_cookie(struct flow_offload_tuple *flow_tuple)
3517 {
3518- tcp->state = TCP_CONNTRACK_ESTABLISHED;
3519- tcp->seen[0].td_maxwin = 0;
3520- tcp->seen[1].td_maxwin = 0;
3521+ const struct rt6_info *rt;
3522+
3523+ if (flow_tuple->l3proto == NFPROTO_IPV6) {
3524+ rt = (const struct rt6_info *)flow_tuple->dst_cache;
3525+ return rt6_get_cookie(rt);
3526+ }
3527+
3528+ return 0;
3529 }
3530
3531-#define NF_FLOWTABLE_TCP_PICKUP_TIMEOUT (120 * HZ)
3532-#define NF_FLOWTABLE_UDP_PICKUP_TIMEOUT (30 * HZ)
3533+static int flow_offload_fill_route(struct flow_offload *flow,
3534+ const struct nf_flow_route *route,
3535+ enum flow_offload_tuple_dir dir)
3536+{
3537+ struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
3538+ struct dst_entry *dst = route->tuple[dir].dst;
3539+ int i, j = 0;
3540+
3541+ switch (flow_tuple->l3proto) {
3542+ case NFPROTO_IPV4:
3543+ flow_tuple->mtu = ip_dst_mtu_maybe_forward(dst, true);
3544+ break;
3545+ case NFPROTO_IPV6:
3546+ flow_tuple->mtu = ip6_dst_mtu_maybe_forward(dst, true);
3547+ break;
3548+ }
3549+
3550+ flow_tuple->iifidx = route->tuple[dir].in.ifindex;
3551+ for (i = route->tuple[dir].in.num_encaps - 1; i >= 0; i--) {
3552+ flow_tuple->encap[j].id = route->tuple[dir].in.encap[i].id;
3553+ flow_tuple->encap[j].proto = route->tuple[dir].in.encap[i].proto;
3554+ if (route->tuple[dir].in.ingress_vlans & BIT(i))
3555+ flow_tuple->in_vlan_ingress |= BIT(j);
3556+ j++;
3557+ }
3558+ flow_tuple->encap_num = route->tuple[dir].in.num_encaps;
3559+
3560+ switch (route->tuple[dir].xmit_type) {
3561+ case FLOW_OFFLOAD_XMIT_DIRECT:
3562+ memcpy(flow_tuple->out.h_dest, route->tuple[dir].out.h_dest,
3563+ ETH_ALEN);
3564+ memcpy(flow_tuple->out.h_source, route->tuple[dir].out.h_source,
3565+ ETH_ALEN);
3566+ flow_tuple->out.ifidx = route->tuple[dir].out.ifindex;
3567+ flow_tuple->out.hw_ifidx = route->tuple[dir].out.hw_ifindex;
3568+ break;
3569+ case FLOW_OFFLOAD_XMIT_XFRM:
3570+ case FLOW_OFFLOAD_XMIT_NEIGH:
3571+ if (!dst_hold_safe(route->tuple[dir].dst))
3572+ return -1;
3573+
3574+ flow_tuple->dst_cache = dst;
3575+ flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
3576+ break;
3577+ default:
3578+ WARN_ON_ONCE(1);
3579+ break;
3580+ }
3581+ flow_tuple->xmit_type = route->tuple[dir].xmit_type;
3582
3583-static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
3584+ return 0;
3585+}
3586+
3587+static void nft_flow_dst_release(struct flow_offload *flow,
3588+ enum flow_offload_tuple_dir dir)
3589+{
3590+ if (flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3591+ flow->tuplehash[dir].tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)
3592+ dst_release(flow->tuplehash[dir].tuple.dst_cache);
3593+}
3594+
3595+int flow_offload_route_init(struct flow_offload *flow,
3596+ const struct nf_flow_route *route)
3597+{
3598+ int err;
3599+
3600+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
3601+ if (err < 0)
3602+ return err;
3603+
3604+ err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
3605+ if (err < 0)
3606+ goto err_route_reply;
3607+
3608+ flow->type = NF_FLOW_OFFLOAD_ROUTE;
3609+
3610+ return 0;
3611+
3612+err_route_reply:
3613+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3614+
3615+ return err;
3616+}
3617+EXPORT_SYMBOL_GPL(flow_offload_route_init);
3618+
3619+static void flow_offload_fixup_tcp(struct ip_ct_tcp *tcp)
3620 {
3621- return (__s32)(timeout - (u32)jiffies);
3622+ tcp->state = TCP_CONNTRACK_ESTABLISHED;
3623+ tcp->seen[0].td_maxwin = 0;
3624+ tcp->seen[1].td_maxwin = 0;
3625 }
3626
3627 static void flow_offload_fixup_ct_timeout(struct nf_conn *ct)
3628 {
3629- const struct nf_conntrack_l4proto *l4proto;
3630+ struct net *net = nf_ct_net(ct);
3631 int l4num = nf_ct_protonum(ct);
3632- unsigned int timeout;
3633+ s32 timeout;
3634
3635- l4proto = nf_ct_l4proto_find(l4num);
3636- if (!l4proto)
3637- return;
3638+ if (l4num == IPPROTO_TCP) {
3639+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
3640
3641- if (l4num == IPPROTO_TCP)
3642- timeout = NF_FLOWTABLE_TCP_PICKUP_TIMEOUT;
3643- else if (l4num == IPPROTO_UDP)
3644- timeout = NF_FLOWTABLE_UDP_PICKUP_TIMEOUT;
3645- else
3646+ timeout = tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
3647+ timeout -= tn->offload_timeout;
3648+ } else if (l4num == IPPROTO_UDP) {
3649+ struct nf_udp_net *tn = nf_udp_pernet(net);
3650+
3651+ timeout = tn->timeouts[UDP_CT_REPLIED];
3652+ timeout -= tn->offload_timeout;
3653+ } else {
3654 return;
3655+ }
3656+
3657+ if (timeout < 0)
3658+ timeout = 0;
3659
3660- if (nf_flow_timeout_delta(ct->timeout) > (__s32)timeout)
3661- ct->timeout = nfct_time_stamp + timeout;
3662+ if (nf_flow_timeout_delta(READ_ONCE(ct->timeout)) > (__s32)timeout)
3663+ WRITE_ONCE(ct->timeout, nfct_time_stamp + timeout);
3664 }
3665
3666 static void flow_offload_fixup_ct_state(struct nf_conn *ct)
3667@@ -150,17 +216,23 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
3668 flow_offload_fixup_ct_timeout(ct);
3669 }
3670
3671-void flow_offload_free(struct flow_offload *flow)
3672+static void flow_offload_route_release(struct flow_offload *flow)
3673 {
3674- struct flow_offload_entry *e;
3675+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
3676+ nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_REPLY);
3677+}
3678
3679- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
3680- dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
3681- e = container_of(flow, struct flow_offload_entry, flow);
3682- if (flow->flags & FLOW_OFFLOAD_DYING)
3683- nf_ct_delete(e->ct, 0, 0);
3684- nf_ct_put(e->ct);
3685- kfree_rcu(e, rcu_head);
3686+void flow_offload_free(struct flow_offload *flow)
3687+{
3688+ switch (flow->type) {
3689+ case NF_FLOW_OFFLOAD_ROUTE:
3690+ flow_offload_route_release(flow);
3691+ break;
3692+ default:
3693+ break;
3694+ }
3695+ nf_ct_put(flow->ct);
3696+ kfree_rcu(flow, rcu_head);
3697 }
3698 EXPORT_SYMBOL_GPL(flow_offload_free);
3699
3700@@ -168,14 +240,14 @@ static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
3701 {
3702 const struct flow_offload_tuple *tuple = data;
3703
3704- return jhash(tuple, offsetof(struct flow_offload_tuple, dir), seed);
3705+ return jhash(tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3706 }
3707
3708 static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
3709 {
3710 const struct flow_offload_tuple_rhash *tuplehash = data;
3711
3712- return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, dir), seed);
3713+ return jhash(&tuplehash->tuple, offsetof(struct flow_offload_tuple, __hash), seed);
3714 }
3715
3716 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
3717@@ -184,7 +256,7 @@ static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
3718 const struct flow_offload_tuple *tuple = arg->key;
3719 const struct flow_offload_tuple_rhash *x = ptr;
3720
3721- if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, dir)))
3722+ if (memcmp(&x->tuple, tuple, offsetof(struct flow_offload_tuple, __hash)))
3723 return 1;
3724
3725 return 0;
3726@@ -198,30 +270,30 @@ static const struct rhashtable_params nf_flow_offload_rhash_params = {
3727 .automatic_shrinking = true,
3728 };
3729
3730-#define DAY (86400 * HZ)
3731-
3732-/* Set an arbitrary timeout large enough not to ever expire, this save
3733- * us a check for the IPS_OFFLOAD_BIT from the packet path via
3734- * nf_ct_is_expired().
3735- */
3736-static void nf_ct_offload_timeout(struct flow_offload *flow)
3737+unsigned long flow_offload_get_timeout(struct flow_offload *flow)
3738 {
3739- struct flow_offload_entry *entry;
3740- struct nf_conn *ct;
3741+ unsigned long timeout = NF_FLOW_TIMEOUT;
3742+ struct net *net = nf_ct_net(flow->ct);
3743+ int l4num = nf_ct_protonum(flow->ct);
3744+
3745+ if (l4num == IPPROTO_TCP) {
3746+ struct nf_tcp_net *tn = nf_tcp_pernet(net);
3747
3748- entry = container_of(flow, struct flow_offload_entry, flow);
3749- ct = entry->ct;
3750+ timeout = tn->offload_timeout;
3751+ } else if (l4num == IPPROTO_UDP) {
3752+ struct nf_udp_net *tn = nf_udp_pernet(net);
3753+
3754+ timeout = tn->offload_timeout;
3755+ }
3756
3757- if (nf_ct_expires(ct) < DAY / 2)
3758- ct->timeout = nfct_time_stamp + DAY;
3759+ return timeout;
3760 }
3761
3762 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3763 {
3764 int err;
3765
3766- nf_ct_offload_timeout(flow);
3767- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
3768+ flow->timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3769
3770 err = rhashtable_insert_fast(&flow_table->rhashtable,
3771 &flow->tuplehash[0].node,
3772@@ -239,10 +311,35 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
3773 return err;
3774 }
3775
3776+ nf_ct_offload_timeout(flow->ct);
3777+
3778+ if (nf_flowtable_hw_offload(flow_table)) {
3779+ __set_bit(NF_FLOW_HW, &flow->flags);
3780+ nf_flow_offload_add(flow_table, flow);
3781+ }
3782+
3783 return 0;
3784 }
3785 EXPORT_SYMBOL_GPL(flow_offload_add);
3786
3787+void flow_offload_refresh(struct nf_flowtable *flow_table,
3788+ struct flow_offload *flow)
3789+{
3790+ u32 timeout;
3791+
3792+ timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
3793+ if (timeout - READ_ONCE(flow->timeout) > HZ)
3794+ WRITE_ONCE(flow->timeout, timeout);
3795+ else
3796+ return;
3797+
3798+ if (likely(!nf_flowtable_hw_offload(flow_table)))
3799+ return;
3800+
3801+ nf_flow_offload_add(flow_table, flow);
3802+}
3803+EXPORT_SYMBOL_GPL(flow_offload_refresh);
3804+
3805 static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3806 {
3807 return nf_flow_timeout_delta(flow->timeout) <= 0;
3808@@ -251,8 +348,6 @@ static inline bool nf_flow_has_expired(const struct flow_offload *flow)
3809 static void flow_offload_del(struct nf_flowtable *flow_table,
3810 struct flow_offload *flow)
3811 {
3812- struct flow_offload_entry *e;
3813-
3814 rhashtable_remove_fast(&flow_table->rhashtable,
3815 &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
3816 nf_flow_offload_rhash_params);
3817@@ -260,28 +355,21 @@ static void flow_offload_del(struct nf_flowtable *flow_table,
3818 &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
3819 nf_flow_offload_rhash_params);
3820
3821- e = container_of(flow, struct flow_offload_entry, flow);
3822- clear_bit(IPS_OFFLOAD_BIT, &e->ct->status);
3823+ clear_bit(IPS_OFFLOAD_BIT, &flow->ct->status);
3824
3825 if (nf_flow_has_expired(flow))
3826- flow_offload_fixup_ct(e->ct);
3827- else if (flow->flags & FLOW_OFFLOAD_TEARDOWN)
3828- flow_offload_fixup_ct_timeout(e->ct);
3829-
3830- if (!(flow->flags & FLOW_OFFLOAD_TEARDOWN))
3831- flow_offload_fixup_ct_state(e->ct);
3832+ flow_offload_fixup_ct(flow->ct);
3833+ else
3834+ flow_offload_fixup_ct_timeout(flow->ct);
3835
3836 flow_offload_free(flow);
3837 }
3838
3839 void flow_offload_teardown(struct flow_offload *flow)
3840 {
3841- struct flow_offload_entry *e;
3842+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3843
3844- flow->flags |= FLOW_OFFLOAD_TEARDOWN;
3845-
3846- e = container_of(flow, struct flow_offload_entry, flow);
3847- flow_offload_fixup_ct_state(e->ct);
3848+ flow_offload_fixup_ct_state(flow->ct);
3849 }
3850 EXPORT_SYMBOL_GPL(flow_offload_teardown);
3851
3852@@ -291,7 +379,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
3853 {
3854 struct flow_offload_tuple_rhash *tuplehash;
3855 struct flow_offload *flow;
3856- struct flow_offload_entry *e;
3857 int dir;
3858
3859 tuplehash = rhashtable_lookup(&flow_table->rhashtable, tuple,
3860@@ -301,19 +388,17 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
3861
3862 dir = tuplehash->tuple.dir;
3863 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
3864- if (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN))
3865+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags))
3866 return NULL;
3867
3868- e = container_of(flow, struct flow_offload_entry, flow);
3869- if (unlikely(nf_ct_is_dying(e->ct)))
3870+ if (unlikely(nf_ct_is_dying(flow->ct)))
3871 return NULL;
3872
3873 return tuplehash;
3874 }
3875 EXPORT_SYMBOL_GPL(flow_offload_lookup);
3876
3877-static int
3878-nf_flow_table_iterate(struct nf_flowtable *flow_table,
3879+int nf_flow_table_iterate(struct nf_flowtable *flow_table,
3880 void (*iter)(struct flow_offload *flow, void *data),
3881 void *data)
3882 {
3883@@ -326,7 +411,6 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
3884 rhashtable_walk_start(&hti);
3885
3886 while ((tuplehash = rhashtable_walk_next(&hti))) {
3887-
3888 if (IS_ERR(tuplehash)) {
3889 if (PTR_ERR(tuplehash) != -EAGAIN) {
3890 err = PTR_ERR(tuplehash);
3891@@ -346,23 +430,49 @@ nf_flow_table_iterate(struct nf_flowtable *flow_table,
3892
3893 return err;
3894 }
3895+EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
3896
3897-static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3898+static bool flow_offload_stale_dst(struct flow_offload_tuple *tuple)
3899 {
3900- struct nf_flowtable *flow_table = data;
3901- struct flow_offload_entry *e;
3902- bool teardown;
3903+ struct dst_entry *dst;
3904
3905- e = container_of(flow, struct flow_offload_entry, flow);
3906+ if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH ||
3907+ tuple->xmit_type == FLOW_OFFLOAD_XMIT_XFRM) {
3908+ dst = tuple->dst_cache;
3909+ if (!dst_check(dst, tuple->dst_cookie))
3910+ return true;
3911+ }
3912
3913- teardown = flow->flags & (FLOW_OFFLOAD_DYING |
3914- FLOW_OFFLOAD_TEARDOWN);
3915+ return false;
3916+}
3917
3918- if (!teardown)
3919- nf_ct_offload_timeout(flow);
3920+static bool nf_flow_has_stale_dst(struct flow_offload *flow)
3921+{
3922+ return flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple) ||
3923+ flow_offload_stale_dst(&flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple);
3924+}
3925
3926- if (nf_flow_has_expired(flow) || teardown)
3927- flow_offload_del(flow_table, flow);
3928+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
3929+{
3930+ struct nf_flowtable *flow_table = data;
3931+
3932+ if (nf_flow_has_expired(flow) ||
3933+ nf_ct_is_dying(flow->ct) ||
3934+ nf_flow_has_stale_dst(flow))
3935+ set_bit(NF_FLOW_TEARDOWN, &flow->flags);
3936+
3937+ if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
3938+ if (test_bit(NF_FLOW_HW, &flow->flags)) {
3939+ if (!test_bit(NF_FLOW_HW_DYING, &flow->flags))
3940+ nf_flow_offload_del(flow_table, flow);
3941+ else if (test_bit(NF_FLOW_HW_DEAD, &flow->flags))
3942+ flow_offload_del(flow_table, flow);
3943+ } else {
3944+ flow_offload_del(flow_table, flow);
3945+ }
3946+ } else if (test_bit(NF_FLOW_HW, &flow->flags)) {
3947+ nf_flow_offload_stats(flow_table, flow);
3948+ }
3949 }
3950
3951 static void nf_flow_offload_work_gc(struct work_struct *work)
3952@@ -374,30 +484,20 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
3953 queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
3954 }
3955
3956-static int nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3957- __be16 port, __be16 new_port)
3958+static void nf_flow_nat_port_tcp(struct sk_buff *skb, unsigned int thoff,
3959+ __be16 port, __be16 new_port)
3960 {
3961 struct tcphdr *tcph;
3962
3963- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
3964- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
3965- return -1;
3966-
3967 tcph = (void *)(skb_network_header(skb) + thoff);
3968 inet_proto_csum_replace2(&tcph->check, skb, port, new_port, false);
3969-
3970- return 0;
3971 }
3972
3973-static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3974- __be16 port, __be16 new_port)
3975+static void nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3976+ __be16 port, __be16 new_port)
3977 {
3978 struct udphdr *udph;
3979
3980- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
3981- skb_try_make_writable(skb, thoff + sizeof(*udph)))
3982- return -1;
3983-
3984 udph = (void *)(skb_network_header(skb) + thoff);
3985 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
3986 inet_proto_csum_replace2(&udph->check, skb, port,
3987@@ -405,38 +505,28 @@ static int nf_flow_nat_port_udp(struct sk_buff *skb, unsigned int thoff,
3988 if (!udph->check)
3989 udph->check = CSUM_MANGLED_0;
3990 }
3991-
3992- return 0;
3993 }
3994
3995-static int nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
3996- u8 protocol, __be16 port, __be16 new_port)
3997+static void nf_flow_nat_port(struct sk_buff *skb, unsigned int thoff,
3998+ u8 protocol, __be16 port, __be16 new_port)
3999 {
4000 switch (protocol) {
4001 case IPPROTO_TCP:
4002- if (nf_flow_nat_port_tcp(skb, thoff, port, new_port) < 0)
4003- return NF_DROP;
4004+ nf_flow_nat_port_tcp(skb, thoff, port, new_port);
4005 break;
4006 case IPPROTO_UDP:
4007- if (nf_flow_nat_port_udp(skb, thoff, port, new_port) < 0)
4008- return NF_DROP;
4009+ nf_flow_nat_port_udp(skb, thoff, port, new_port);
4010 break;
4011 }
4012-
4013- return 0;
4014 }
4015
4016-int nf_flow_snat_port(const struct flow_offload *flow,
4017- struct sk_buff *skb, unsigned int thoff,
4018- u8 protocol, enum flow_offload_tuple_dir dir)
4019+void nf_flow_snat_port(const struct flow_offload *flow,
4020+ struct sk_buff *skb, unsigned int thoff,
4021+ u8 protocol, enum flow_offload_tuple_dir dir)
4022 {
4023 struct flow_ports *hdr;
4024 __be16 port, new_port;
4025
4026- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4027- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4028- return -1;
4029-
4030 hdr = (void *)(skb_network_header(skb) + thoff);
4031
4032 switch (dir) {
4033@@ -450,25 +540,19 @@ int nf_flow_snat_port(const struct flow_offload *flow,
4034 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port;
4035 hdr->dest = new_port;
4036 break;
4037- default:
4038- return -1;
4039 }
4040
4041- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4042+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4043 }
4044 EXPORT_SYMBOL_GPL(nf_flow_snat_port);
4045
4046-int nf_flow_dnat_port(const struct flow_offload *flow,
4047- struct sk_buff *skb, unsigned int thoff,
4048- u8 protocol, enum flow_offload_tuple_dir dir)
4049+void nf_flow_dnat_port(const struct flow_offload *flow, struct sk_buff *skb,
4050+ unsigned int thoff, u8 protocol,
4051+ enum flow_offload_tuple_dir dir)
4052 {
4053 struct flow_ports *hdr;
4054 __be16 port, new_port;
4055
4056- if (!pskb_may_pull(skb, thoff + sizeof(*hdr)) ||
4057- skb_try_make_writable(skb, thoff + sizeof(*hdr)))
4058- return -1;
4059-
4060 hdr = (void *)(skb_network_header(skb) + thoff);
4061
4062 switch (dir) {
4063@@ -482,11 +566,9 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
4064 new_port = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port;
4065 hdr->source = new_port;
4066 break;
4067- default:
4068- return -1;
4069 }
4070
4071- return nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4072+ nf_flow_nat_port(skb, thoff, protocol, port, new_port);
4073 }
4074 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
4075
4076@@ -494,7 +576,9 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
4077 {
4078 int err;
4079
4080- INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4081+ INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
4082+ flow_block_init(&flowtable->flow_block);
4083+ init_rwsem(&flowtable->flow_block_lock);
4084
4085 err = rhashtable_init(&flowtable->rhashtable,
4086 &nf_flow_offload_rhash_params);
4087@@ -515,25 +599,24 @@ EXPORT_SYMBOL_GPL(nf_flow_table_init);
4088 static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
4089 {
4090 struct net_device *dev = data;
4091- struct flow_offload_entry *e;
4092-
4093- e = container_of(flow, struct flow_offload_entry, flow);
4094
4095 if (!dev) {
4096 flow_offload_teardown(flow);
4097 return;
4098 }
4099- if (net_eq(nf_ct_net(e->ct), dev_net(dev)) &&
4100+
4101+ if (net_eq(nf_ct_net(flow->ct), dev_net(dev)) &&
4102 (flow->tuplehash[0].tuple.iifidx == dev->ifindex ||
4103 flow->tuplehash[1].tuple.iifidx == dev->ifindex))
4104- flow_offload_dead(flow);
4105+ flow_offload_teardown(flow);
4106 }
4107
4108-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
4109- struct net_device *dev)
4110+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
4111+ struct net_device *dev)
4112 {
4113 nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
4114 flush_delayed_work(&flowtable->gc_work);
4115+ nf_flow_table_offload_flush(flowtable);
4116 }
4117
4118 void nf_flow_table_cleanup(struct net_device *dev)
4119@@ -542,7 +625,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
4120
4121 mutex_lock(&flowtable_lock);
4122 list_for_each_entry(flowtable, &flowtables, list)
4123- nf_flow_table_iterate_cleanup(flowtable, dev);
4124+ nf_flow_table_gc_cleanup(flowtable, dev);
4125 mutex_unlock(&flowtable_lock);
4126 }
4127 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
4128@@ -552,9 +635,14 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
4129 mutex_lock(&flowtable_lock);
4130 list_del(&flow_table->list);
4131 mutex_unlock(&flowtable_lock);
4132+
4133 cancel_delayed_work_sync(&flow_table->gc_work);
4134 nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
4135 nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
4136+ nf_flow_table_offload_flush(flow_table);
4137+ if (nf_flowtable_hw_offload(flow_table))
4138+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step,
4139+ flow_table);
4140 rhashtable_destroy(&flow_table->rhashtable);
4141 }
4142 EXPORT_SYMBOL_GPL(nf_flow_table_free);
4143@@ -578,12 +666,23 @@ static struct notifier_block flow_offload_netdev_notifier = {
4144
4145 static int __init nf_flow_table_module_init(void)
4146 {
4147- return register_netdevice_notifier(&flow_offload_netdev_notifier);
4148+ int ret;
4149+
4150+ ret = nf_flow_table_offload_init();
4151+ if (ret)
4152+ return ret;
4153+
4154+ ret = register_netdevice_notifier(&flow_offload_netdev_notifier);
4155+ if (ret)
4156+ nf_flow_table_offload_exit();
4157+
4158+ return ret;
4159 }
4160
4161 static void __exit nf_flow_table_module_exit(void)
4162 {
4163 unregister_netdevice_notifier(&flow_offload_netdev_notifier);
4164+ nf_flow_table_offload_exit();
4165 }
4166
4167 module_init(nf_flow_table_module_init);
4168@@ -591,3 +690,4 @@ module_exit(nf_flow_table_module_exit);
4169
4170 MODULE_LICENSE("GPL");
4171 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
4172+MODULE_DESCRIPTION("Netfilter flow table module");
4173diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
4174index e92aa6b7e..6257d87c3 100644
4175--- a/net/netfilter/nf_flow_table_ip.c
4176+++ b/net/netfilter/nf_flow_table_ip.c
4177@@ -7,11 +7,13 @@
4178 #include <linux/ip.h>
4179 #include <linux/ipv6.h>
4180 #include <linux/netdevice.h>
4181+#include <linux/if_ether.h>
4182 #include <net/ip.h>
4183 #include <net/ipv6.h>
4184 #include <net/ip6_route.h>
4185 #include <net/neighbour.h>
4186 #include <net/netfilter/nf_flow_table.h>
4187+#include <net/netfilter/nf_conntrack_acct.h>
4188 /* For layer 4 checksum field offset. */
4189 #include <linux/tcp.h>
4190 #include <linux/udp.h>
4191@@ -24,9 +26,6 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4192 if (proto != IPPROTO_TCP)
4193 return 0;
4194
4195- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)))
4196- return -1;
4197-
4198 tcph = (void *)(skb_network_header(skb) + thoff);
4199 if (unlikely(tcph->fin || tcph->rst)) {
4200 flow_offload_teardown(flow);
4201@@ -36,30 +35,20 @@ static int nf_flow_state_check(struct flow_offload *flow, int proto,
4202 return 0;
4203 }
4204
4205-static int nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4206- __be32 addr, __be32 new_addr)
4207+static void nf_flow_nat_ip_tcp(struct sk_buff *skb, unsigned int thoff,
4208+ __be32 addr, __be32 new_addr)
4209 {
4210 struct tcphdr *tcph;
4211
4212- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4213- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4214- return -1;
4215-
4216 tcph = (void *)(skb_network_header(skb) + thoff);
4217 inet_proto_csum_replace4(&tcph->check, skb, addr, new_addr, true);
4218-
4219- return 0;
4220 }
4221
4222-static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4223- __be32 addr, __be32 new_addr)
4224+static void nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4225+ __be32 addr, __be32 new_addr)
4226 {
4227 struct udphdr *udph;
4228
4229- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4230- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4231- return -1;
4232-
4233 udph = (void *)(skb_network_header(skb) + thoff);
4234 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4235 inet_proto_csum_replace4(&udph->check, skb, addr,
4236@@ -67,31 +56,25 @@ static int nf_flow_nat_ip_udp(struct sk_buff *skb, unsigned int thoff,
4237 if (!udph->check)
4238 udph->check = CSUM_MANGLED_0;
4239 }
4240-
4241- return 0;
4242 }
4243
4244-static int nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4245- unsigned int thoff, __be32 addr,
4246- __be32 new_addr)
4247+static void nf_flow_nat_ip_l4proto(struct sk_buff *skb, struct iphdr *iph,
4248+ unsigned int thoff, __be32 addr,
4249+ __be32 new_addr)
4250 {
4251 switch (iph->protocol) {
4252 case IPPROTO_TCP:
4253- if (nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr) < 0)
4254- return NF_DROP;
4255+ nf_flow_nat_ip_tcp(skb, thoff, addr, new_addr);
4256 break;
4257 case IPPROTO_UDP:
4258- if (nf_flow_nat_ip_udp(skb, thoff, addr, new_addr) < 0)
4259- return NF_DROP;
4260+ nf_flow_nat_ip_udp(skb, thoff, addr, new_addr);
4261 break;
4262 }
4263-
4264- return 0;
4265 }
4266
4267-static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4268- struct iphdr *iph, unsigned int thoff,
4269- enum flow_offload_tuple_dir dir)
4270+static void nf_flow_snat_ip(const struct flow_offload *flow,
4271+ struct sk_buff *skb, struct iphdr *iph,
4272+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4273 {
4274 __be32 addr, new_addr;
4275
4276@@ -106,17 +89,15 @@ static int nf_flow_snat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4277 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
4278 iph->daddr = new_addr;
4279 break;
4280- default:
4281- return -1;
4282 }
4283 csum_replace4(&iph->check, addr, new_addr);
4284
4285- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4286+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4287 }
4288
4289-static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4290- struct iphdr *iph, unsigned int thoff,
4291- enum flow_offload_tuple_dir dir)
4292+static void nf_flow_dnat_ip(const struct flow_offload *flow,
4293+ struct sk_buff *skb, struct iphdr *iph,
4294+ unsigned int thoff, enum flow_offload_tuple_dir dir)
4295 {
4296 __be32 addr, new_addr;
4297
4298@@ -131,29 +112,24 @@ static int nf_flow_dnat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4299 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
4300 iph->saddr = new_addr;
4301 break;
4302- default:
4303- return -1;
4304 }
4305 csum_replace4(&iph->check, addr, new_addr);
4306
4307- return nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4308+ nf_flow_nat_ip_l4proto(skb, iph, thoff, addr, new_addr);
4309 }
4310
4311-static int nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4312- unsigned int thoff, enum flow_offload_tuple_dir dir)
4313+static void nf_flow_nat_ip(const struct flow_offload *flow, struct sk_buff *skb,
4314+ unsigned int thoff, enum flow_offload_tuple_dir dir,
4315+ struct iphdr *iph)
4316 {
4317- struct iphdr *iph = ip_hdr(skb);
4318-
4319- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4320- (nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4321- nf_flow_snat_ip(flow, skb, iph, thoff, dir) < 0))
4322- return -1;
4323- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4324- (nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir) < 0 ||
4325- nf_flow_dnat_ip(flow, skb, iph, thoff, dir) < 0))
4326- return -1;
4327-
4328- return 0;
4329+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4330+ nf_flow_snat_port(flow, skb, thoff, iph->protocol, dir);
4331+ nf_flow_snat_ip(flow, skb, iph, thoff, dir);
4332+ }
4333+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4334+ nf_flow_dnat_port(flow, skb, thoff, iph->protocol, dir);
4335+ nf_flow_dnat_ip(flow, skb, iph, thoff, dir);
4336+ }
4337 }
4338
4339 static bool ip_has_options(unsigned int thoff)
4340@@ -161,35 +137,70 @@ static bool ip_has_options(unsigned int thoff)
4341 return thoff != sizeof(struct iphdr);
4342 }
4343
4344+static void nf_flow_tuple_encap(struct sk_buff *skb,
4345+ struct flow_offload_tuple *tuple)
4346+{
4347+ struct vlan_ethhdr *veth;
4348+ struct pppoe_hdr *phdr;
4349+ int i = 0;
4350+
4351+ if (skb_vlan_tag_present(skb)) {
4352+ tuple->encap[i].id = skb_vlan_tag_get(skb);
4353+ tuple->encap[i].proto = skb->vlan_proto;
4354+ i++;
4355+ }
4356+ switch (skb->protocol) {
4357+ case htons(ETH_P_8021Q):
4358+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4359+ tuple->encap[i].id = ntohs(veth->h_vlan_TCI);
4360+ tuple->encap[i].proto = skb->protocol;
4361+ break;
4362+ case htons(ETH_P_PPP_SES):
4363+ phdr = (struct pppoe_hdr *)skb_mac_header(skb);
4364+ tuple->encap[i].id = ntohs(phdr->sid);
4365+ tuple->encap[i].proto = skb->protocol;
4366+ break;
4367+ }
4368+}
4369+
4370 static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4371- struct flow_offload_tuple *tuple)
4372+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4373+ u32 offset)
4374 {
4375 struct flow_ports *ports;
4376 unsigned int thoff;
4377 struct iphdr *iph;
4378
4379- if (!pskb_may_pull(skb, sizeof(*iph)))
4380+ if (!pskb_may_pull(skb, sizeof(*iph) + offset))
4381 return -1;
4382
4383- iph = ip_hdr(skb);
4384- thoff = iph->ihl * 4;
4385+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4386+ thoff = (iph->ihl * 4);
4387
4388 if (ip_is_fragment(iph) ||
4389 unlikely(ip_has_options(thoff)))
4390 return -1;
4391
4392- if (iph->protocol != IPPROTO_TCP &&
4393- iph->protocol != IPPROTO_UDP)
4394+ thoff += offset;
4395+
4396+ switch (iph->protocol) {
4397+ case IPPROTO_TCP:
4398+ *hdrsize = sizeof(struct tcphdr);
4399+ break;
4400+ case IPPROTO_UDP:
4401+ *hdrsize = sizeof(struct udphdr);
4402+ break;
4403+ default:
4404 return -1;
4405+ }
4406
4407 if (iph->ttl <= 1)
4408 return -1;
4409
4410- thoff = iph->ihl * 4;
4411- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4412+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4413 return -1;
4414
4415- iph = ip_hdr(skb);
4416+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4417 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4418
4419 tuple->src_v4.s_addr = iph->saddr;
4420@@ -199,6 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
4421 tuple->l3proto = AF_INET;
4422 tuple->l4proto = iph->protocol;
4423 tuple->iifidx = dev->ifindex;
4424+ nf_flow_tuple_encap(skb, tuple);
4425
4426 return 0;
4427 }
4428@@ -215,14 +227,6 @@ static bool nf_flow_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
4429 return true;
4430 }
4431
4432-static int nf_flow_offload_dst_check(struct dst_entry *dst)
4433-{
4434- if (unlikely(dst_xfrm(dst)))
4435- return dst_check(dst, 0) ? 0 : -1;
4436-
4437- return 0;
4438-}
4439-
4440 static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
4441 const struct nf_hook_state *state,
4442 struct dst_entry *dst)
4443@@ -233,6 +237,75 @@ static unsigned int nf_flow_xmit_xfrm(struct sk_buff *skb,
4444 return NF_STOLEN;
4445 }
4446
4447+static bool nf_flow_skb_encap_protocol(const struct sk_buff *skb, __be16 proto,
4448+ u32 *offset)
4449+{
4450+ struct vlan_ethhdr *veth;
4451+
4452+ switch (skb->protocol) {
4453+ case htons(ETH_P_8021Q):
4454+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
4455+ if (veth->h_vlan_encapsulated_proto == proto) {
4456+ *offset += VLAN_HLEN;
4457+ return true;
4458+ }
4459+ break;
4460+ case htons(ETH_P_PPP_SES):
4461+ if (nf_flow_pppoe_proto(skb) == proto) {
4462+ *offset += PPPOE_SES_HLEN;
4463+ return true;
4464+ }
4465+ break;
4466+ }
4467+
4468+ return false;
4469+}
4470+
4471+static void nf_flow_encap_pop(struct sk_buff *skb,
4472+ struct flow_offload_tuple_rhash *tuplehash)
4473+{
4474+ struct vlan_hdr *vlan_hdr;
4475+ int i;
4476+
4477+ for (i = 0; i < tuplehash->tuple.encap_num; i++) {
4478+ if (skb_vlan_tag_present(skb)) {
4479+ __vlan_hwaccel_clear_tag(skb);
4480+ continue;
4481+ }
4482+ switch (skb->protocol) {
4483+ case htons(ETH_P_8021Q):
4484+ vlan_hdr = (struct vlan_hdr *)skb->data;
4485+ __skb_pull(skb, VLAN_HLEN);
4486+ vlan_set_encap_proto(skb, vlan_hdr);
4487+ skb_reset_network_header(skb);
4488+ break;
4489+ case htons(ETH_P_PPP_SES):
4490+ skb->protocol = nf_flow_pppoe_proto(skb);
4491+ skb_pull(skb, PPPOE_SES_HLEN);
4492+ skb_reset_network_header(skb);
4493+ break;
4494+ }
4495+ }
4496+}
4497+
4498+static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
4499+ const struct flow_offload_tuple_rhash *tuplehash,
4500+ unsigned short type)
4501+{
4502+ struct net_device *outdev;
4503+
4504+ outdev = dev_get_by_index_rcu(net, tuplehash->tuple.out.ifidx);
4505+ if (!outdev)
4506+ return NF_DROP;
4507+
4508+ skb->dev = outdev;
4509+ dev_hard_header(skb, skb->dev, type, tuplehash->tuple.out.h_dest,
4510+ tuplehash->tuple.out.h_source, skb->len);
4511+ dev_queue_xmit(skb);
4512+
4513+ return NF_STOLEN;
4514+}
4515+
4516 unsigned int
4517 nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4518 const struct nf_hook_state *state)
4519@@ -243,15 +316,18 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4520 enum flow_offload_tuple_dir dir;
4521 struct flow_offload *flow;
4522 struct net_device *outdev;
4523+ u32 hdrsize, offset = 0;
4524+ unsigned int thoff, mtu;
4525 struct rtable *rt;
4526- unsigned int thoff;
4527 struct iphdr *iph;
4528 __be32 nexthop;
4529+ int ret;
4530
4531- if (skb->protocol != htons(ETH_P_IP))
4532+ if (skb->protocol != htons(ETH_P_IP) &&
4533+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
4534 return NF_ACCEPT;
4535
4536- if (nf_flow_tuple_ip(skb, state->in, &tuple) < 0)
4537+ if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
4538 return NF_ACCEPT;
4539
4540 tuplehash = flow_offload_lookup(flow_table, &tuple);
4541@@ -260,75 +336,80 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
4542
4543 dir = tuplehash->tuple.dir;
4544 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4545- rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
4546- outdev = rt->dst.dev;
4547-
4548- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
4549- return NF_ACCEPT;
4550-
4551- if (skb_try_make_writable(skb, sizeof(*iph)))
4552- return NF_DROP;
4553
4554- thoff = ip_hdr(skb)->ihl * 4;
4555- if (nf_flow_state_check(flow, ip_hdr(skb)->protocol, skb, thoff))
4556+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4557+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4558 return NF_ACCEPT;
4559
4560- if (nf_flow_offload_dst_check(&rt->dst)) {
4561- flow_offload_teardown(flow);
4562+ iph = (struct iphdr *)(skb_network_header(skb) + offset);
4563+ thoff = (iph->ihl * 4) + offset;
4564+ if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
4565 return NF_ACCEPT;
4566- }
4567
4568- if (nf_flow_nat_ip(flow, skb, thoff, dir) < 0)
4569+ if (skb_try_make_writable(skb, thoff + hdrsize))
4570 return NF_DROP;
4571
4572- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4573+ flow_offload_refresh(flow_table, flow);
4574+
4575+ nf_flow_encap_pop(skb, tuplehash);
4576+ thoff -= offset;
4577+
4578 iph = ip_hdr(skb);
4579+ nf_flow_nat_ip(flow, skb, thoff, dir, iph);
4580+
4581 ip_decrease_ttl(iph);
4582 skb->tstamp = 0;
4583
4584- if (unlikely(dst_xfrm(&rt->dst))) {
4585+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4586+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4587+
4588+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4589+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4590 memset(skb->cb, 0, sizeof(struct inet_skb_parm));
4591 IPCB(skb)->iif = skb->dev->ifindex;
4592 IPCB(skb)->flags = IPSKB_FORWARDED;
4593 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4594 }
4595
4596- skb->dev = outdev;
4597- nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4598- skb_dst_set_noref(skb, &rt->dst);
4599- neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4600+ switch (tuplehash->tuple.xmit_type) {
4601+ case FLOW_OFFLOAD_XMIT_NEIGH:
4602+ rt = (struct rtable *)tuplehash->tuple.dst_cache;
4603+ outdev = rt->dst.dev;
4604+ skb->dev = outdev;
4605+ nexthop = rt_nexthop(rt, flow->tuplehash[!dir].tuple.src_v4.s_addr);
4606+ skb_dst_set_noref(skb, &rt->dst);
4607+ neigh_xmit(NEIGH_ARP_TABLE, outdev, &nexthop, skb);
4608+ ret = NF_STOLEN;
4609+ break;
4610+ case FLOW_OFFLOAD_XMIT_DIRECT:
4611+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IP);
4612+ if (ret == NF_DROP)
4613+ flow_offload_teardown(flow);
4614+ break;
4615+ }
4616
4617- return NF_STOLEN;
4618+ return ret;
4619 }
4620 EXPORT_SYMBOL_GPL(nf_flow_offload_ip_hook);
4621
4622-static int nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4623- struct in6_addr *addr,
4624- struct in6_addr *new_addr)
4625+static void nf_flow_nat_ipv6_tcp(struct sk_buff *skb, unsigned int thoff,
4626+ struct in6_addr *addr,
4627+ struct in6_addr *new_addr,
4628+ struct ipv6hdr *ip6h)
4629 {
4630 struct tcphdr *tcph;
4631
4632- if (!pskb_may_pull(skb, thoff + sizeof(*tcph)) ||
4633- skb_try_make_writable(skb, thoff + sizeof(*tcph)))
4634- return -1;
4635-
4636 tcph = (void *)(skb_network_header(skb) + thoff);
4637 inet_proto_csum_replace16(&tcph->check, skb, addr->s6_addr32,
4638 new_addr->s6_addr32, true);
4639-
4640- return 0;
4641 }
4642
4643-static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4644- struct in6_addr *addr,
4645- struct in6_addr *new_addr)
4646+static void nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4647+ struct in6_addr *addr,
4648+ struct in6_addr *new_addr)
4649 {
4650 struct udphdr *udph;
4651
4652- if (!pskb_may_pull(skb, thoff + sizeof(*udph)) ||
4653- skb_try_make_writable(skb, thoff + sizeof(*udph)))
4654- return -1;
4655-
4656 udph = (void *)(skb_network_header(skb) + thoff);
4657 if (udph->check || skb->ip_summed == CHECKSUM_PARTIAL) {
4658 inet_proto_csum_replace16(&udph->check, skb, addr->s6_addr32,
4659@@ -336,32 +417,26 @@ static int nf_flow_nat_ipv6_udp(struct sk_buff *skb, unsigned int thoff,
4660 if (!udph->check)
4661 udph->check = CSUM_MANGLED_0;
4662 }
4663-
4664- return 0;
4665 }
4666
4667-static int nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4668- unsigned int thoff, struct in6_addr *addr,
4669- struct in6_addr *new_addr)
4670+static void nf_flow_nat_ipv6_l4proto(struct sk_buff *skb, struct ipv6hdr *ip6h,
4671+ unsigned int thoff, struct in6_addr *addr,
4672+ struct in6_addr *new_addr)
4673 {
4674 switch (ip6h->nexthdr) {
4675 case IPPROTO_TCP:
4676- if (nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr) < 0)
4677- return NF_DROP;
4678+ nf_flow_nat_ipv6_tcp(skb, thoff, addr, new_addr, ip6h);
4679 break;
4680 case IPPROTO_UDP:
4681- if (nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr) < 0)
4682- return NF_DROP;
4683+ nf_flow_nat_ipv6_udp(skb, thoff, addr, new_addr);
4684 break;
4685 }
4686-
4687- return 0;
4688 }
4689
4690-static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4691- struct sk_buff *skb, struct ipv6hdr *ip6h,
4692- unsigned int thoff,
4693- enum flow_offload_tuple_dir dir)
4694+static void nf_flow_snat_ipv6(const struct flow_offload *flow,
4695+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4696+ unsigned int thoff,
4697+ enum flow_offload_tuple_dir dir)
4698 {
4699 struct in6_addr addr, new_addr;
4700
4701@@ -376,17 +451,15 @@ static int nf_flow_snat_ipv6(const struct flow_offload *flow,
4702 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6;
4703 ip6h->daddr = new_addr;
4704 break;
4705- default:
4706- return -1;
4707 }
4708
4709- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4710+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4711 }
4712
4713-static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4714- struct sk_buff *skb, struct ipv6hdr *ip6h,
4715- unsigned int thoff,
4716- enum flow_offload_tuple_dir dir)
4717+static void nf_flow_dnat_ipv6(const struct flow_offload *flow,
4718+ struct sk_buff *skb, struct ipv6hdr *ip6h,
4719+ unsigned int thoff,
4720+ enum flow_offload_tuple_dir dir)
4721 {
4722 struct in6_addr addr, new_addr;
4723
4724@@ -401,56 +474,60 @@ static int nf_flow_dnat_ipv6(const struct flow_offload *flow,
4725 new_addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6;
4726 ip6h->saddr = new_addr;
4727 break;
4728- default:
4729- return -1;
4730 }
4731
4732- return nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4733+ nf_flow_nat_ipv6_l4proto(skb, ip6h, thoff, &addr, &new_addr);
4734 }
4735
4736-static int nf_flow_nat_ipv6(const struct flow_offload *flow,
4737- struct sk_buff *skb,
4738- enum flow_offload_tuple_dir dir)
4739+static void nf_flow_nat_ipv6(const struct flow_offload *flow,
4740+ struct sk_buff *skb,
4741+ enum flow_offload_tuple_dir dir,
4742+ struct ipv6hdr *ip6h)
4743 {
4744- struct ipv6hdr *ip6h = ipv6_hdr(skb);
4745 unsigned int thoff = sizeof(*ip6h);
4746
4747- if (flow->flags & FLOW_OFFLOAD_SNAT &&
4748- (nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4749- nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4750- return -1;
4751- if (flow->flags & FLOW_OFFLOAD_DNAT &&
4752- (nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir) < 0 ||
4753- nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir) < 0))
4754- return -1;
4755-
4756- return 0;
4757+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
4758+ nf_flow_snat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4759+ nf_flow_snat_ipv6(flow, skb, ip6h, thoff, dir);
4760+ }
4761+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
4762+ nf_flow_dnat_port(flow, skb, thoff, ip6h->nexthdr, dir);
4763+ nf_flow_dnat_ipv6(flow, skb, ip6h, thoff, dir);
4764+ }
4765 }
4766
4767 static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4768- struct flow_offload_tuple *tuple)
4769+ struct flow_offload_tuple *tuple, u32 *hdrsize,
4770+ u32 offset)
4771 {
4772 struct flow_ports *ports;
4773 struct ipv6hdr *ip6h;
4774 unsigned int thoff;
4775
4776- if (!pskb_may_pull(skb, sizeof(*ip6h)))
4777+ thoff = sizeof(*ip6h) + offset;
4778+ if (!pskb_may_pull(skb, thoff))
4779 return -1;
4780
4781- ip6h = ipv6_hdr(skb);
4782+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4783
4784- if (ip6h->nexthdr != IPPROTO_TCP &&
4785- ip6h->nexthdr != IPPROTO_UDP)
4786+ switch (ip6h->nexthdr) {
4787+ case IPPROTO_TCP:
4788+ *hdrsize = sizeof(struct tcphdr);
4789+ break;
4790+ case IPPROTO_UDP:
4791+ *hdrsize = sizeof(struct udphdr);
4792+ break;
4793+ default:
4794 return -1;
4795+ }
4796
4797 if (ip6h->hop_limit <= 1)
4798 return -1;
4799
4800- thoff = sizeof(*ip6h);
4801- if (!pskb_may_pull(skb, thoff + sizeof(*ports)))
4802+ if (!pskb_may_pull(skb, thoff + *hdrsize))
4803 return -1;
4804
4805- ip6h = ipv6_hdr(skb);
4806+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4807 ports = (struct flow_ports *)(skb_network_header(skb) + thoff);
4808
4809 tuple->src_v6 = ip6h->saddr;
4810@@ -460,6 +537,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
4811 tuple->l3proto = AF_INET6;
4812 tuple->l4proto = ip6h->nexthdr;
4813 tuple->iifidx = dev->ifindex;
4814+ nf_flow_tuple_encap(skb, tuple);
4815
4816 return 0;
4817 }
4818@@ -475,13 +553,17 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
4819 const struct in6_addr *nexthop;
4820 struct flow_offload *flow;
4821 struct net_device *outdev;
4822+ unsigned int thoff, mtu;
4823+ u32 hdrsize, offset = 0;
4824 struct ipv6hdr *ip6h;
4825 struct rt6_info *rt;
4826+ int ret;
4827
4828- if (skb->protocol != htons(ETH_P_IPV6))
4829+ if (skb->protocol != htons(ETH_P_IPV6) &&
4830+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
4831 return NF_ACCEPT;
4832
4833- if (nf_flow_tuple_ipv6(skb, state->in, &tuple) < 0)
4834+ if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
4835 return NF_ACCEPT;
4836
4837 tuplehash = flow_offload_lookup(flow_table, &tuple);
4838@@ -490,44 +572,57 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
4839
4840 dir = tuplehash->tuple.dir;
4841 flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
4842- rt = (struct rt6_info *)flow->tuplehash[dir].tuple.dst_cache;
4843- outdev = rt->dst.dev;
4844-
4845- if (unlikely(nf_flow_exceeds_mtu(skb, flow->tuplehash[dir].tuple.mtu)))
4846- return NF_ACCEPT;
4847
4848- if (nf_flow_state_check(flow, ipv6_hdr(skb)->nexthdr, skb,
4849- sizeof(*ip6h)))
4850+ mtu = flow->tuplehash[dir].tuple.mtu + offset;
4851+ if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
4852 return NF_ACCEPT;
4853
4854- if (nf_flow_offload_dst_check(&rt->dst)) {
4855- flow_offload_teardown(flow);
4856+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
4857+ thoff = sizeof(*ip6h) + offset;
4858+ if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
4859 return NF_ACCEPT;
4860- }
4861
4862- if (skb_try_make_writable(skb, sizeof(*ip6h)))
4863+ if (skb_try_make_writable(skb, thoff + hdrsize))
4864 return NF_DROP;
4865
4866- if (nf_flow_nat_ipv6(flow, skb, dir) < 0)
4867- return NF_DROP;
4868+ flow_offload_refresh(flow_table, flow);
4869+
4870+ nf_flow_encap_pop(skb, tuplehash);
4871
4872- flow->timeout = (u32)jiffies + NF_FLOW_TIMEOUT;
4873 ip6h = ipv6_hdr(skb);
4874+ nf_flow_nat_ipv6(flow, skb, dir, ip6h);
4875+
4876 ip6h->hop_limit--;
4877 skb->tstamp = 0;
4878
4879- if (unlikely(dst_xfrm(&rt->dst))) {
4880+ if (flow_table->flags & NF_FLOWTABLE_COUNTER)
4881+ nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
4882+
4883+ if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
4884+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4885 memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
4886 IP6CB(skb)->iif = skb->dev->ifindex;
4887 IP6CB(skb)->flags = IP6SKB_FORWARDED;
4888 return nf_flow_xmit_xfrm(skb, state, &rt->dst);
4889 }
4890
4891- skb->dev = outdev;
4892- nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4893- skb_dst_set_noref(skb, &rt->dst);
4894- neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4895+ switch (tuplehash->tuple.xmit_type) {
4896+ case FLOW_OFFLOAD_XMIT_NEIGH:
4897+ rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
4898+ outdev = rt->dst.dev;
4899+ skb->dev = outdev;
4900+ nexthop = rt6_nexthop(rt, &flow->tuplehash[!dir].tuple.src_v6);
4901+ skb_dst_set_noref(skb, &rt->dst);
4902+ neigh_xmit(NEIGH_ND_TABLE, outdev, nexthop, skb);
4903+ ret = NF_STOLEN;
4904+ break;
4905+ case FLOW_OFFLOAD_XMIT_DIRECT:
4906+ ret = nf_flow_queue_xmit(state->net, skb, tuplehash, ETH_P_IPV6);
4907+ if (ret == NF_DROP)
4908+ flow_offload_teardown(flow);
4909+ break;
4910+ }
4911
4912- return NF_STOLEN;
4913+ return ret;
4914 }
4915 EXPORT_SYMBOL_GPL(nf_flow_offload_ipv6_hook);
4916diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
4917new file mode 100644
4918index 000000000..d94c6fb92
4919--- /dev/null
4920+++ b/net/netfilter/nf_flow_table_offload.c
4921@@ -0,0 +1,1191 @@
4922+#include <linux/kernel.h>
4923+#include <linux/init.h>
4924+#include <linux/module.h>
4925+#include <linux/netfilter.h>
4926+#include <linux/rhashtable.h>
4927+#include <linux/netdevice.h>
4928+#include <linux/tc_act/tc_csum.h>
4929+#include <net/flow_offload.h>
4930+#include <net/netfilter/nf_flow_table.h>
4931+#include <net/netfilter/nf_tables.h>
4932+#include <net/netfilter/nf_conntrack.h>
4933+#include <net/netfilter/nf_conntrack_acct.h>
4934+#include <net/netfilter/nf_conntrack_core.h>
4935+#include <net/netfilter/nf_conntrack_tuple.h>
4936+
4937+static struct workqueue_struct *nf_flow_offload_add_wq;
4938+static struct workqueue_struct *nf_flow_offload_del_wq;
4939+static struct workqueue_struct *nf_flow_offload_stats_wq;
4940+
4941+struct flow_offload_work {
4942+ struct list_head list;
4943+ enum flow_cls_command cmd;
4944+ int priority;
4945+ struct nf_flowtable *flowtable;
4946+ struct flow_offload *flow;
4947+ struct work_struct work;
4948+};
4949+
4950+#define NF_FLOW_DISSECTOR(__match, __type, __field) \
4951+ (__match)->dissector.offset[__type] = \
4952+ offsetof(struct nf_flow_key, __field)
4953+
4954+static void nf_flow_rule_lwt_match(struct nf_flow_match *match,
4955+ struct ip_tunnel_info *tun_info)
4956+{
4957+ struct nf_flow_key *mask = &match->mask;
4958+ struct nf_flow_key *key = &match->key;
4959+ unsigned int enc_keys;
4960+
4961+ if (!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX))
4962+ return;
4963+
4964+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_CONTROL, enc_control);
4965+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_KEYID, enc_key_id);
4966+ key->enc_key_id.keyid = tunnel_id_to_key32(tun_info->key.tun_id);
4967+ mask->enc_key_id.keyid = 0xffffffff;
4968+ enc_keys = BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
4969+ BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL);
4970+
4971+ if (ip_tunnel_info_af(tun_info) == AF_INET) {
4972+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
4973+ enc_ipv4);
4974+ key->enc_ipv4.src = tun_info->key.u.ipv4.dst;
4975+ key->enc_ipv4.dst = tun_info->key.u.ipv4.src;
4976+ if (key->enc_ipv4.src)
4977+ mask->enc_ipv4.src = 0xffffffff;
4978+ if (key->enc_ipv4.dst)
4979+ mask->enc_ipv4.dst = 0xffffffff;
4980+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
4981+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
4982+ } else {
4983+ memcpy(&key->enc_ipv6.src, &tun_info->key.u.ipv6.dst,
4984+ sizeof(struct in6_addr));
4985+ memcpy(&key->enc_ipv6.dst, &tun_info->key.u.ipv6.src,
4986+ sizeof(struct in6_addr));
4987+ if (memcmp(&key->enc_ipv6.src, &in6addr_any,
4988+ sizeof(struct in6_addr)))
4989+ memset(&mask->enc_ipv6.src, 0xff,
4990+ sizeof(struct in6_addr));
4991+ if (memcmp(&key->enc_ipv6.dst, &in6addr_any,
4992+ sizeof(struct in6_addr)))
4993+ memset(&mask->enc_ipv6.dst, 0xff,
4994+ sizeof(struct in6_addr));
4995+ enc_keys |= BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS);
4996+ key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
4997+ }
4998+
4999+ match->dissector.used_keys |= enc_keys;
5000+}
5001+
5002+static void nf_flow_rule_vlan_match(struct flow_dissector_key_vlan *key,
5003+ struct flow_dissector_key_vlan *mask,
5004+ u16 vlan_id, __be16 proto)
5005+{
5006+ key->vlan_id = vlan_id;
5007+ mask->vlan_id = VLAN_VID_MASK;
5008+ key->vlan_tpid = proto;
5009+ mask->vlan_tpid = 0xffff;
5010+}
5011+
5012+static int nf_flow_rule_match(struct nf_flow_match *match,
5013+ const struct flow_offload_tuple *tuple,
5014+ struct dst_entry *other_dst)
5015+{
5016+ struct nf_flow_key *mask = &match->mask;
5017+ struct nf_flow_key *key = &match->key;
5018+ struct ip_tunnel_info *tun_info;
5019+ bool vlan_encap = false;
5020+
5021+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_META, meta);
5022+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CONTROL, control);
5023+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_BASIC, basic);
5024+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
5025+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
5026+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_TCP, tcp);
5027+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_PORTS, tp);
5028+
5029+ if (other_dst && other_dst->lwtstate) {
5030+ tun_info = lwt_tun_info(other_dst->lwtstate);
5031+ nf_flow_rule_lwt_match(match, tun_info);
5032+ }
5033+
5034+ key->meta.ingress_ifindex = tuple->iifidx;
5035+ mask->meta.ingress_ifindex = 0xffffffff;
5036+
5037+ if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
5038+ tuple->encap[0].proto == htons(ETH_P_8021Q)) {
5039+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN, vlan);
5040+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5041+ tuple->encap[0].id,
5042+ tuple->encap[0].proto);
5043+ vlan_encap = true;
5044+ }
5045+
5046+ if (tuple->encap_num > 1 && !(tuple->in_vlan_ingress & BIT(1)) &&
5047+ tuple->encap[1].proto == htons(ETH_P_8021Q)) {
5048+ if (vlan_encap) {
5049+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_CVLAN,
5050+ cvlan);
5051+ nf_flow_rule_vlan_match(&key->cvlan, &mask->cvlan,
5052+ tuple->encap[1].id,
5053+ tuple->encap[1].proto);
5054+ } else {
5055+ NF_FLOW_DISSECTOR(match, FLOW_DISSECTOR_KEY_VLAN,
5056+ vlan);
5057+ nf_flow_rule_vlan_match(&key->vlan, &mask->vlan,
5058+ tuple->encap[1].id,
5059+ tuple->encap[1].proto);
5060+ }
5061+ }
5062+
5063+ switch (tuple->l3proto) {
5064+ case AF_INET:
5065+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
5066+ key->basic.n_proto = htons(ETH_P_IP);
5067+ key->ipv4.src = tuple->src_v4.s_addr;
5068+ mask->ipv4.src = 0xffffffff;
5069+ key->ipv4.dst = tuple->dst_v4.s_addr;
5070+ mask->ipv4.dst = 0xffffffff;
5071+ break;
5072+ case AF_INET6:
5073+ key->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
5074+ key->basic.n_proto = htons(ETH_P_IPV6);
5075+ key->ipv6.src = tuple->src_v6;
5076+ memset(&mask->ipv6.src, 0xff, sizeof(mask->ipv6.src));
5077+ key->ipv6.dst = tuple->dst_v6;
5078+ memset(&mask->ipv6.dst, 0xff, sizeof(mask->ipv6.dst));
5079+ break;
5080+ default:
5081+ return -EOPNOTSUPP;
5082+ }
5083+ mask->control.addr_type = 0xffff;
5084+ match->dissector.used_keys |= BIT(key->control.addr_type);
5085+ mask->basic.n_proto = 0xffff;
5086+
5087+ switch (tuple->l4proto) {
5088+ case IPPROTO_TCP:
5089+ key->tcp.flags = 0;
5090+ mask->tcp.flags = cpu_to_be16(be32_to_cpu(TCP_FLAG_RST | TCP_FLAG_FIN) >> 16);
5091+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_TCP);
5092+ break;
5093+ case IPPROTO_UDP:
5094+ break;
5095+ default:
5096+ return -EOPNOTSUPP;
5097+ }
5098+
5099+ key->basic.ip_proto = tuple->l4proto;
5100+ mask->basic.ip_proto = 0xff;
5101+
5102+ key->tp.src = tuple->src_port;
5103+ mask->tp.src = 0xffff;
5104+ key->tp.dst = tuple->dst_port;
5105+ mask->tp.dst = 0xffff;
5106+
5107+ match->dissector.used_keys |= BIT(FLOW_DISSECTOR_KEY_META) |
5108+ BIT(FLOW_DISSECTOR_KEY_CONTROL) |
5109+ BIT(FLOW_DISSECTOR_KEY_BASIC) |
5110+ BIT(FLOW_DISSECTOR_KEY_PORTS);
5111+ return 0;
5112+}
5113+
5114+static void flow_offload_mangle(struct flow_action_entry *entry,
5115+ enum flow_action_mangle_base htype, u32 offset,
5116+ const __be32 *value, const __be32 *mask)
5117+{
5118+ entry->id = FLOW_ACTION_MANGLE;
5119+ entry->mangle.htype = htype;
5120+ entry->mangle.offset = offset;
5121+ memcpy(&entry->mangle.mask, mask, sizeof(u32));
5122+ memcpy(&entry->mangle.val, value, sizeof(u32));
5123+}
5124+
5125+static inline struct flow_action_entry *
5126+flow_action_entry_next(struct nf_flow_rule *flow_rule)
5127+{
5128+ int i = flow_rule->rule->action.num_entries++;
5129+
5130+ return &flow_rule->rule->action.entries[i];
5131+}
5132+
5133+static int flow_offload_eth_src(struct net *net,
5134+ const struct flow_offload *flow,
5135+ enum flow_offload_tuple_dir dir,
5136+ struct nf_flow_rule *flow_rule)
5137+{
5138+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5139+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5140+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5141+ struct net_device *dev = NULL;
5142+ const unsigned char *addr;
5143+ u32 mask, val;
5144+ u16 val16;
5145+
5146+ this_tuple = &flow->tuplehash[dir].tuple;
5147+
5148+ switch (this_tuple->xmit_type) {
5149+ case FLOW_OFFLOAD_XMIT_DIRECT:
5150+ addr = this_tuple->out.h_source;
5151+ break;
5152+ case FLOW_OFFLOAD_XMIT_NEIGH:
5153+ other_tuple = &flow->tuplehash[!dir].tuple;
5154+ dev = dev_get_by_index(net, other_tuple->iifidx);
5155+ if (!dev)
5156+ return -ENOENT;
5157+
5158+ addr = dev->dev_addr;
5159+ break;
5160+ default:
5161+ return -EOPNOTSUPP;
5162+ }
5163+
5164+ mask = ~0xffff0000;
5165+ memcpy(&val16, addr, 2);
5166+ val = val16 << 16;
5167+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5168+ &val, &mask);
5169+
5170+ mask = ~0xffffffff;
5171+ memcpy(&val, addr + 2, 4);
5172+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 8,
5173+ &val, &mask);
5174+
5175+ if (dev)
5176+ dev_put(dev);
5177+
5178+ return 0;
5179+}
5180+
5181+static int flow_offload_eth_dst(struct net *net,
5182+ const struct flow_offload *flow,
5183+ enum flow_offload_tuple_dir dir,
5184+ struct nf_flow_rule *flow_rule)
5185+{
5186+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
5187+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
5188+ const struct flow_offload_tuple *other_tuple, *this_tuple;
5189+ const struct dst_entry *dst_cache;
5190+ unsigned char ha[ETH_ALEN];
5191+ struct neighbour *n;
5192+ const void *daddr;
5193+ u32 mask, val;
5194+ u8 nud_state;
5195+ u16 val16;
5196+
5197+ this_tuple = &flow->tuplehash[dir].tuple;
5198+
5199+ switch (this_tuple->xmit_type) {
5200+ case FLOW_OFFLOAD_XMIT_DIRECT:
5201+ ether_addr_copy(ha, this_tuple->out.h_dest);
5202+ break;
5203+ case FLOW_OFFLOAD_XMIT_NEIGH:
5204+ other_tuple = &flow->tuplehash[!dir].tuple;
5205+ daddr = &other_tuple->src_v4;
5206+ dst_cache = this_tuple->dst_cache;
5207+ n = dst_neigh_lookup(dst_cache, daddr);
5208+ if (!n)
5209+ return -ENOENT;
5210+
5211+ read_lock_bh(&n->lock);
5212+ nud_state = n->nud_state;
5213+ ether_addr_copy(ha, n->ha);
5214+ read_unlock_bh(&n->lock);
5215+ neigh_release(n);
5216+
5217+ if (!(nud_state & NUD_VALID))
5218+ return -ENOENT;
5219+ break;
5220+ default:
5221+ return -EOPNOTSUPP;
5222+ }
5223+
5224+ mask = ~0xffffffff;
5225+ memcpy(&val, ha, 4);
5226+ flow_offload_mangle(entry0, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 0,
5227+ &val, &mask);
5228+
5229+ mask = ~0x0000ffff;
5230+ memcpy(&val16, ha + 4, 2);
5231+ val = val16;
5232+ flow_offload_mangle(entry1, FLOW_ACT_MANGLE_HDR_TYPE_ETH, 4,
5233+ &val, &mask);
5234+
5235+ return 0;
5236+}
5237+
5238+static void flow_offload_ipv4_snat(struct net *net,
5239+ const struct flow_offload *flow,
5240+ enum flow_offload_tuple_dir dir,
5241+ struct nf_flow_rule *flow_rule)
5242+{
5243+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5244+ u32 mask = ~htonl(0xffffffff);
5245+ __be32 addr;
5246+ u32 offset;
5247+
5248+ switch (dir) {
5249+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5250+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v4.s_addr;
5251+ offset = offsetof(struct iphdr, saddr);
5252+ break;
5253+ case FLOW_OFFLOAD_DIR_REPLY:
5254+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v4.s_addr;
5255+ offset = offsetof(struct iphdr, daddr);
5256+ break;
5257+ default:
5258+ return;
5259+ }
5260+
5261+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5262+ &addr, &mask);
5263+}
5264+
5265+static void flow_offload_ipv4_dnat(struct net *net,
5266+ const struct flow_offload *flow,
5267+ enum flow_offload_tuple_dir dir,
5268+ struct nf_flow_rule *flow_rule)
5269+{
5270+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5271+ u32 mask = ~htonl(0xffffffff);
5272+ __be32 addr;
5273+ u32 offset;
5274+
5275+ switch (dir) {
5276+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5277+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v4.s_addr;
5278+ offset = offsetof(struct iphdr, daddr);
5279+ break;
5280+ case FLOW_OFFLOAD_DIR_REPLY:
5281+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v4.s_addr;
5282+ offset = offsetof(struct iphdr, saddr);
5283+ break;
5284+ default:
5285+ return;
5286+ }
5287+
5288+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP4, offset,
5289+ &addr, &mask);
5290+}
5291+
5292+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
5293+ unsigned int offset,
5294+ const __be32 *addr, const __be32 *mask)
5295+{
5296+ struct flow_action_entry *entry;
5297+ int i, j;
5298+
5299+ for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
5300+ entry = flow_action_entry_next(flow_rule);
5301+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
5302+ offset + i, &addr[j], mask);
5303+ }
5304+}
5305+
5306+static void flow_offload_ipv6_snat(struct net *net,
5307+ const struct flow_offload *flow,
5308+ enum flow_offload_tuple_dir dir,
5309+ struct nf_flow_rule *flow_rule)
5310+{
5311+ u32 mask = ~htonl(0xffffffff);
5312+ const __be32 *addr;
5313+ u32 offset;
5314+
5315+ switch (dir) {
5316+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5317+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr32;
5318+ offset = offsetof(struct ipv6hdr, saddr);
5319+ break;
5320+ case FLOW_OFFLOAD_DIR_REPLY:
5321+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr32;
5322+ offset = offsetof(struct ipv6hdr, daddr);
5323+ break;
5324+ default:
5325+ return;
5326+ }
5327+
5328+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5329+}
5330+
5331+static void flow_offload_ipv6_dnat(struct net *net,
5332+ const struct flow_offload *flow,
5333+ enum flow_offload_tuple_dir dir,
5334+ struct nf_flow_rule *flow_rule)
5335+{
5336+ u32 mask = ~htonl(0xffffffff);
5337+ const __be32 *addr;
5338+ u32 offset;
5339+
5340+ switch (dir) {
5341+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5342+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr32;
5343+ offset = offsetof(struct ipv6hdr, daddr);
5344+ break;
5345+ case FLOW_OFFLOAD_DIR_REPLY:
5346+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr32;
5347+ offset = offsetof(struct ipv6hdr, saddr);
5348+ break;
5349+ default:
5350+ return;
5351+ }
5352+
5353+ flow_offload_ipv6_mangle(flow_rule, offset, addr, &mask);
5354+}
5355+
5356+static int flow_offload_l4proto(const struct flow_offload *flow)
5357+{
5358+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5359+ u8 type = 0;
5360+
5361+ switch (protonum) {
5362+ case IPPROTO_TCP:
5363+ type = FLOW_ACT_MANGLE_HDR_TYPE_TCP;
5364+ break;
5365+ case IPPROTO_UDP:
5366+ type = FLOW_ACT_MANGLE_HDR_TYPE_UDP;
5367+ break;
5368+ default:
5369+ break;
5370+ }
5371+
5372+ return type;
5373+}
5374+
5375+static void flow_offload_port_snat(struct net *net,
5376+ const struct flow_offload *flow,
5377+ enum flow_offload_tuple_dir dir,
5378+ struct nf_flow_rule *flow_rule)
5379+{
5380+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5381+ u32 mask, port;
5382+ u32 offset;
5383+
5384+ switch (dir) {
5385+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5386+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_port);
5387+ offset = 0; /* offsetof(struct tcphdr, source); */
5388+ port = htonl(port << 16);
5389+ mask = ~htonl(0xffff0000);
5390+ break;
5391+ case FLOW_OFFLOAD_DIR_REPLY:
5392+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_port);
5393+ offset = 0; /* offsetof(struct tcphdr, dest); */
5394+ port = htonl(port);
5395+ mask = ~htonl(0xffff);
5396+ break;
5397+ default:
5398+ return;
5399+ }
5400+
5401+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5402+ &port, &mask);
5403+}
5404+
5405+static void flow_offload_port_dnat(struct net *net,
5406+ const struct flow_offload *flow,
5407+ enum flow_offload_tuple_dir dir,
5408+ struct nf_flow_rule *flow_rule)
5409+{
5410+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5411+ u32 mask, port;
5412+ u32 offset;
5413+
5414+ switch (dir) {
5415+ case FLOW_OFFLOAD_DIR_ORIGINAL:
5416+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_port);
5417+ offset = 0; /* offsetof(struct tcphdr, dest); */
5418+ port = htonl(port);
5419+ mask = ~htonl(0xffff);
5420+ break;
5421+ case FLOW_OFFLOAD_DIR_REPLY:
5422+ port = ntohs(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_port);
5423+ offset = 0; /* offsetof(struct tcphdr, source); */
5424+ port = htonl(port << 16);
5425+ mask = ~htonl(0xffff0000);
5426+ break;
5427+ default:
5428+ return;
5429+ }
5430+
5431+ flow_offload_mangle(entry, flow_offload_l4proto(flow), offset,
5432+ &port, &mask);
5433+}
5434+
5435+static void flow_offload_ipv4_checksum(struct net *net,
5436+ const struct flow_offload *flow,
5437+ struct nf_flow_rule *flow_rule)
5438+{
5439+ u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
5440+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
5441+
5442+ entry->id = FLOW_ACTION_CSUM;
5443+ entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
5444+
5445+ switch (protonum) {
5446+ case IPPROTO_TCP:
5447+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_TCP;
5448+ break;
5449+ case IPPROTO_UDP:
5450+ entry->csum_flags |= TCA_CSUM_UPDATE_FLAG_UDP;
5451+ break;
5452+ }
5453+}
5454+
5455+static void flow_offload_redirect(struct net *net,
5456+ const struct flow_offload *flow,
5457+ enum flow_offload_tuple_dir dir,
5458+ struct nf_flow_rule *flow_rule)
5459+{
5460+ const struct flow_offload_tuple *this_tuple, *other_tuple;
5461+ struct flow_action_entry *entry;
5462+ struct net_device *dev;
5463+ int ifindex;
5464+
5465+ this_tuple = &flow->tuplehash[dir].tuple;
5466+ switch (this_tuple->xmit_type) {
5467+ case FLOW_OFFLOAD_XMIT_DIRECT:
5468+ this_tuple = &flow->tuplehash[dir].tuple;
5469+ ifindex = this_tuple->out.hw_ifidx;
5470+ break;
5471+ case FLOW_OFFLOAD_XMIT_NEIGH:
5472+ other_tuple = &flow->tuplehash[!dir].tuple;
5473+ ifindex = other_tuple->iifidx;
5474+ break;
5475+ default:
5476+ return;
5477+ }
5478+
5479+ dev = dev_get_by_index(net, ifindex);
5480+ if (!dev)
5481+ return;
5482+
5483+ entry = flow_action_entry_next(flow_rule);
5484+ entry->id = FLOW_ACTION_REDIRECT;
5485+ entry->dev = dev;
5486+}
5487+
5488+static void flow_offload_encap_tunnel(const struct flow_offload *flow,
5489+ enum flow_offload_tuple_dir dir,
5490+ struct nf_flow_rule *flow_rule)
5491+{
5492+ const struct flow_offload_tuple *this_tuple;
5493+ struct flow_action_entry *entry;
5494+ struct dst_entry *dst;
5495+
5496+ this_tuple = &flow->tuplehash[dir].tuple;
5497+ if (this_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5498+ return;
5499+
5500+ dst = this_tuple->dst_cache;
5501+ if (dst && dst->lwtstate) {
5502+ struct ip_tunnel_info *tun_info;
5503+
5504+ tun_info = lwt_tun_info(dst->lwtstate);
5505+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5506+ entry = flow_action_entry_next(flow_rule);
5507+ entry->id = FLOW_ACTION_TUNNEL_ENCAP;
5508+ entry->tunnel = tun_info;
5509+ }
5510+ }
5511+}
5512+
5513+static void flow_offload_decap_tunnel(const struct flow_offload *flow,
5514+ enum flow_offload_tuple_dir dir,
5515+ struct nf_flow_rule *flow_rule)
5516+{
5517+ const struct flow_offload_tuple *other_tuple;
5518+ struct flow_action_entry *entry;
5519+ struct dst_entry *dst;
5520+
5521+ other_tuple = &flow->tuplehash[!dir].tuple;
5522+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_DIRECT)
5523+ return;
5524+
5525+ dst = other_tuple->dst_cache;
5526+ if (dst && dst->lwtstate) {
5527+ struct ip_tunnel_info *tun_info;
5528+
5529+ tun_info = lwt_tun_info(dst->lwtstate);
5530+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX)) {
5531+ entry = flow_action_entry_next(flow_rule);
5532+ entry->id = FLOW_ACTION_TUNNEL_DECAP;
5533+ }
5534+ }
5535+}
5536+
5537+static int
5538+nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
5539+ enum flow_offload_tuple_dir dir,
5540+ struct nf_flow_rule *flow_rule)
5541+{
5542+ const struct flow_offload_tuple *other_tuple;
5543+ const struct flow_offload_tuple *tuple;
5544+ int i;
5545+
5546+ flow_offload_decap_tunnel(flow, dir, flow_rule);
5547+ flow_offload_encap_tunnel(flow, dir, flow_rule);
5548+
5549+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
5550+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
5551+ return -1;
5552+
5553+ tuple = &flow->tuplehash[dir].tuple;
5554+
5555+ for (i = 0; i < tuple->encap_num; i++) {
5556+ struct flow_action_entry *entry;
5557+
5558+ if (tuple->in_vlan_ingress & BIT(i))
5559+ continue;
5560+
5561+ if (tuple->encap[i].proto == htons(ETH_P_8021Q)) {
5562+ entry = flow_action_entry_next(flow_rule);
5563+ entry->id = FLOW_ACTION_VLAN_POP;
5564+ }
5565+ }
5566+
5567+ other_tuple = &flow->tuplehash[!dir].tuple;
5568+
5569+ for (i = 0; i < other_tuple->encap_num; i++) {
5570+ struct flow_action_entry *entry;
5571+
5572+ if (other_tuple->in_vlan_ingress & BIT(i))
5573+ continue;
5574+
5575+ entry = flow_action_entry_next(flow_rule);
5576+
5577+ switch (other_tuple->encap[i].proto) {
5578+ case htons(ETH_P_PPP_SES):
5579+ entry->id = FLOW_ACTION_PPPOE_PUSH;
5580+ entry->pppoe.sid = other_tuple->encap[i].id;
5581+ break;
5582+ case htons(ETH_P_8021Q):
5583+ entry->id = FLOW_ACTION_VLAN_PUSH;
5584+ entry->vlan.vid = other_tuple->encap[i].id;
5585+ entry->vlan.proto = other_tuple->encap[i].proto;
5586+ break;
5587+ }
5588+ }
5589+
5590+ return 0;
5591+}
5592+
5593+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
5594+ enum flow_offload_tuple_dir dir,
5595+ struct nf_flow_rule *flow_rule)
5596+{
5597+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5598+ return -1;
5599+
5600+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5601+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
5602+ flow_offload_port_snat(net, flow, dir, flow_rule);
5603+ }
5604+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5605+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
5606+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5607+ }
5608+ if (test_bit(NF_FLOW_SNAT, &flow->flags) ||
5609+ test_bit(NF_FLOW_DNAT, &flow->flags))
5610+ flow_offload_ipv4_checksum(net, flow, flow_rule);
5611+
5612+ flow_offload_redirect(net, flow, dir, flow_rule);
5613+
5614+ return 0;
5615+}
5616+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
5617+
5618+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
5619+ enum flow_offload_tuple_dir dir,
5620+ struct nf_flow_rule *flow_rule)
5621+{
5622+ if (nf_flow_rule_route_common(net, flow, dir, flow_rule) < 0)
5623+ return -1;
5624+
5625+ if (test_bit(NF_FLOW_SNAT, &flow->flags)) {
5626+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
5627+ flow_offload_port_snat(net, flow, dir, flow_rule);
5628+ }
5629+ if (test_bit(NF_FLOW_DNAT, &flow->flags)) {
5630+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
5631+ flow_offload_port_dnat(net, flow, dir, flow_rule);
5632+ }
5633+
5634+ flow_offload_redirect(net, flow, dir, flow_rule);
5635+
5636+ return 0;
5637+}
5638+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
5639+
5640+#define NF_FLOW_RULE_ACTION_MAX 16
5641+
5642+static struct nf_flow_rule *
5643+nf_flow_offload_rule_alloc(struct net *net,
5644+ const struct flow_offload_work *offload,
5645+ enum flow_offload_tuple_dir dir)
5646+{
5647+ const struct nf_flowtable *flowtable = offload->flowtable;
5648+ const struct flow_offload_tuple *tuple, *other_tuple;
5649+ const struct flow_offload *flow = offload->flow;
5650+ struct dst_entry *other_dst = NULL;
5651+ struct nf_flow_rule *flow_rule;
5652+ int err = -ENOMEM;
5653+
5654+ flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
5655+ if (!flow_rule)
5656+ goto err_flow;
5657+
5658+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
5659+ if (!flow_rule->rule)
5660+ goto err_flow_rule;
5661+
5662+ flow_rule->rule->match.dissector = &flow_rule->match.dissector;
5663+ flow_rule->rule->match.mask = &flow_rule->match.mask;
5664+ flow_rule->rule->match.key = &flow_rule->match.key;
5665+
5666+ tuple = &flow->tuplehash[dir].tuple;
5667+ other_tuple = &flow->tuplehash[!dir].tuple;
5668+ if (other_tuple->xmit_type == FLOW_OFFLOAD_XMIT_NEIGH)
5669+ other_dst = other_tuple->dst_cache;
5670+
5671+ err = nf_flow_rule_match(&flow_rule->match, tuple, other_dst);
5672+ if (err < 0)
5673+ goto err_flow_match;
5674+
5675+ flow_rule->rule->action.num_entries = 0;
5676+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
5677+ goto err_flow_match;
5678+
5679+ return flow_rule;
5680+
5681+err_flow_match:
5682+ kfree(flow_rule->rule);
5683+err_flow_rule:
5684+ kfree(flow_rule);
5685+err_flow:
5686+ return NULL;
5687+}
5688+
5689+static void __nf_flow_offload_destroy(struct nf_flow_rule *flow_rule)
5690+{
5691+ struct flow_action_entry *entry;
5692+ int i;
5693+
5694+ for (i = 0; i < flow_rule->rule->action.num_entries; i++) {
5695+ entry = &flow_rule->rule->action.entries[i];
5696+ if (entry->id != FLOW_ACTION_REDIRECT)
5697+ continue;
5698+
5699+ dev_put(entry->dev);
5700+ }
5701+ kfree(flow_rule->rule);
5702+ kfree(flow_rule);
5703+}
5704+
5705+static void nf_flow_offload_destroy(struct nf_flow_rule *flow_rule[])
5706+{
5707+ int i;
5708+
5709+ for (i = 0; i < FLOW_OFFLOAD_DIR_MAX; i++)
5710+ __nf_flow_offload_destroy(flow_rule[i]);
5711+}
5712+
5713+static int nf_flow_offload_alloc(const struct flow_offload_work *offload,
5714+ struct nf_flow_rule *flow_rule[])
5715+{
5716+ struct net *net = read_pnet(&offload->flowtable->net);
5717+
5718+ flow_rule[0] = nf_flow_offload_rule_alloc(net, offload,
5719+ FLOW_OFFLOAD_DIR_ORIGINAL);
5720+ if (!flow_rule[0])
5721+ return -ENOMEM;
5722+
5723+ flow_rule[1] = nf_flow_offload_rule_alloc(net, offload,
5724+ FLOW_OFFLOAD_DIR_REPLY);
5725+ if (!flow_rule[1]) {
5726+ __nf_flow_offload_destroy(flow_rule[0]);
5727+ return -ENOMEM;
5728+ }
5729+
5730+ return 0;
5731+}
5732+
5733+static void nf_flow_offload_init(struct flow_cls_offload *cls_flow,
5734+ __be16 proto, int priority,
5735+ enum flow_cls_command cmd,
5736+ const struct flow_offload_tuple *tuple,
5737+ struct netlink_ext_ack *extack)
5738+{
5739+ cls_flow->common.protocol = proto;
5740+ cls_flow->common.prio = priority;
5741+ cls_flow->common.extack = extack;
5742+ cls_flow->command = cmd;
5743+ cls_flow->cookie = (unsigned long)tuple;
5744+}
5745+
5746+static int nf_flow_offload_tuple(struct nf_flowtable *flowtable,
5747+ struct flow_offload *flow,
5748+ struct nf_flow_rule *flow_rule,
5749+ enum flow_offload_tuple_dir dir,
5750+ int priority, int cmd,
5751+ struct flow_stats *stats,
5752+ struct list_head *block_cb_list)
5753+{
5754+ struct flow_cls_offload cls_flow = {};
5755+ struct flow_block_cb *block_cb;
5756+ struct netlink_ext_ack extack;
5757+ __be16 proto = ETH_P_ALL;
5758+ int err, i = 0;
5759+
5760+ nf_flow_offload_init(&cls_flow, proto, priority, cmd,
5761+ &flow->tuplehash[dir].tuple, &extack);
5762+ if (cmd == FLOW_CLS_REPLACE)
5763+ cls_flow.rule = flow_rule->rule;
5764+
5765+ down_read(&flowtable->flow_block_lock);
5766+ list_for_each_entry(block_cb, block_cb_list, list) {
5767+ err = block_cb->cb(TC_SETUP_CLSFLOWER, &cls_flow,
5768+ block_cb->cb_priv);
5769+ if (err < 0)
5770+ continue;
5771+
5772+ i++;
5773+ }
5774+ up_read(&flowtable->flow_block_lock);
5775+
5776+ if (cmd == FLOW_CLS_STATS)
5777+ memcpy(stats, &cls_flow.stats, sizeof(*stats));
5778+
5779+ return i;
5780+}
5781+
5782+static int flow_offload_tuple_add(struct flow_offload_work *offload,
5783+ struct nf_flow_rule *flow_rule,
5784+ enum flow_offload_tuple_dir dir)
5785+{
5786+ return nf_flow_offload_tuple(offload->flowtable, offload->flow,
5787+ flow_rule, dir, offload->priority,
5788+ FLOW_CLS_REPLACE, NULL,
5789+ &offload->flowtable->flow_block.cb_list);
5790+}
5791+
5792+static void flow_offload_tuple_del(struct flow_offload_work *offload,
5793+ enum flow_offload_tuple_dir dir)
5794+{
5795+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5796+ offload->priority, FLOW_CLS_DESTROY, NULL,
5797+ &offload->flowtable->flow_block.cb_list);
5798+}
5799+
5800+static int flow_offload_rule_add(struct flow_offload_work *offload,
5801+ struct nf_flow_rule *flow_rule[])
5802+{
5803+ int ok_count = 0;
5804+
5805+ ok_count += flow_offload_tuple_add(offload, flow_rule[0],
5806+ FLOW_OFFLOAD_DIR_ORIGINAL);
5807+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
5808+ FLOW_OFFLOAD_DIR_REPLY);
5809+ if (ok_count == 0)
5810+ return -ENOENT;
5811+
5812+ return 0;
5813+}
5814+
5815+static void flow_offload_work_add(struct flow_offload_work *offload)
5816+{
5817+ struct nf_flow_rule *flow_rule[FLOW_OFFLOAD_DIR_MAX];
5818+ int err;
5819+
5820+ err = nf_flow_offload_alloc(offload, flow_rule);
5821+ if (err < 0)
5822+ return;
5823+
5824+ err = flow_offload_rule_add(offload, flow_rule);
5825+ if (err < 0)
5826+ goto out;
5827+
5828+ set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5829+
5830+out:
5831+ nf_flow_offload_destroy(flow_rule);
5832+}
5833+
5834+static void flow_offload_work_del(struct flow_offload_work *offload)
5835+{
5836+ clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
5837+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
5838+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
5839+ set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
5840+}
5841+
5842+static void flow_offload_tuple_stats(struct flow_offload_work *offload,
5843+ enum flow_offload_tuple_dir dir,
5844+ struct flow_stats *stats)
5845+{
5846+ nf_flow_offload_tuple(offload->flowtable, offload->flow, NULL, dir,
5847+ offload->priority, FLOW_CLS_STATS, stats,
5848+ &offload->flowtable->flow_block.cb_list);
5849+}
5850+
5851+static void flow_offload_work_stats(struct flow_offload_work *offload)
5852+{
5853+ struct flow_stats stats[FLOW_OFFLOAD_DIR_MAX] = {};
5854+ u64 lastused;
5855+
5856+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
5857+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
5858+
5859+ lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
5860+ offload->flow->timeout = max_t(u64, offload->flow->timeout,
5861+ lastused + flow_offload_get_timeout(offload->flow));
5862+
5863+ if (offload->flowtable->flags & NF_FLOWTABLE_COUNTER) {
5864+ if (stats[0].pkts)
5865+ nf_ct_acct_add(offload->flow->ct,
5866+ FLOW_OFFLOAD_DIR_ORIGINAL,
5867+ stats[0].pkts, stats[0].bytes);
5868+ if (stats[1].pkts)
5869+ nf_ct_acct_add(offload->flow->ct,
5870+ FLOW_OFFLOAD_DIR_REPLY,
5871+ stats[1].pkts, stats[1].bytes);
5872+ }
5873+}
5874+
5875+static void flow_offload_work_handler(struct work_struct *work)
5876+{
5877+ struct flow_offload_work *offload;
5878+
5879+ offload = container_of(work, struct flow_offload_work, work);
5880+ switch (offload->cmd) {
5881+ case FLOW_CLS_REPLACE:
5882+ flow_offload_work_add(offload);
5883+ break;
5884+ case FLOW_CLS_DESTROY:
5885+ flow_offload_work_del(offload);
5886+ break;
5887+ case FLOW_CLS_STATS:
5888+ flow_offload_work_stats(offload);
5889+ break;
5890+ default:
5891+ WARN_ON_ONCE(1);
5892+ }
5893+
5894+ clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags);
5895+ kfree(offload);
5896+}
5897+
5898+static void flow_offload_queue_work(struct flow_offload_work *offload)
5899+{
5900+ if (offload->cmd == FLOW_CLS_REPLACE)
5901+ queue_work(nf_flow_offload_add_wq, &offload->work);
5902+ else if (offload->cmd == FLOW_CLS_DESTROY)
5903+ queue_work(nf_flow_offload_del_wq, &offload->work);
5904+ else
5905+ queue_work(nf_flow_offload_stats_wq, &offload->work);
5906+}
5907+
5908+static struct flow_offload_work *
5909+nf_flow_offload_work_alloc(struct nf_flowtable *flowtable,
5910+ struct flow_offload *flow, unsigned int cmd)
5911+{
5912+ struct flow_offload_work *offload;
5913+
5914+ if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags))
5915+ return NULL;
5916+
5917+ offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC);
5918+ if (!offload) {
5919+ clear_bit(NF_FLOW_HW_PENDING, &flow->flags);
5920+ return NULL;
5921+ }
5922+
5923+ offload->cmd = cmd;
5924+ offload->flow = flow;
5925+ offload->priority = flowtable->priority;
5926+ offload->flowtable = flowtable;
5927+ INIT_WORK(&offload->work, flow_offload_work_handler);
5928+
5929+ return offload;
5930+}
5931+
5932+
5933+void nf_flow_offload_add(struct nf_flowtable *flowtable,
5934+ struct flow_offload *flow)
5935+{
5936+ struct flow_offload_work *offload;
5937+
5938+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_REPLACE);
5939+ if (!offload)
5940+ return;
5941+
5942+ flow_offload_queue_work(offload);
5943+}
5944+
5945+void nf_flow_offload_del(struct nf_flowtable *flowtable,
5946+ struct flow_offload *flow)
5947+{
5948+ struct flow_offload_work *offload;
5949+
5950+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_DESTROY);
5951+ if (!offload)
5952+ return;
5953+
5954+ set_bit(NF_FLOW_HW_DYING, &flow->flags);
5955+ flow_offload_queue_work(offload);
5956+}
5957+
5958+void nf_flow_offload_stats(struct nf_flowtable *flowtable,
5959+ struct flow_offload *flow)
5960+{
5961+ struct flow_offload_work *offload;
5962+ __s32 delta;
5963+
5964+ delta = nf_flow_timeout_delta(flow->timeout);
5965+ if ((delta >= (9 * flow_offload_get_timeout(flow)) / 10))
5966+ return;
5967+
5968+ offload = nf_flow_offload_work_alloc(flowtable, flow, FLOW_CLS_STATS);
5969+ if (!offload)
5970+ return;
5971+
5972+ flow_offload_queue_work(offload);
5973+}
5974+
5975+void nf_flow_table_offload_flush(struct nf_flowtable *flowtable)
5976+{
5977+ if (nf_flowtable_hw_offload(flowtable)) {
5978+ flush_workqueue(nf_flow_offload_add_wq);
5979+ flush_workqueue(nf_flow_offload_del_wq);
5980+ flush_workqueue(nf_flow_offload_stats_wq);
5981+ }
5982+}
5983+
5984+static int nf_flow_table_block_setup(struct nf_flowtable *flowtable,
5985+ struct flow_block_offload *bo,
5986+ enum flow_block_command cmd)
5987+{
5988+ struct flow_block_cb *block_cb, *next;
5989+ int err = 0;
5990+
5991+ switch (cmd) {
5992+ case FLOW_BLOCK_BIND:
5993+ list_splice(&bo->cb_list, &flowtable->flow_block.cb_list);
5994+ break;
5995+ case FLOW_BLOCK_UNBIND:
5996+ list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
5997+ list_del(&block_cb->list);
5998+ flow_block_cb_free(block_cb);
5999+ }
6000+ break;
6001+ default:
6002+ WARN_ON_ONCE(1);
6003+ err = -EOPNOTSUPP;
6004+ }
6005+
6006+ return err;
6007+}
6008+
6009+static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
6010+ struct net *net,
6011+ enum flow_block_command cmd,
6012+ struct nf_flowtable *flowtable,
6013+ struct netlink_ext_ack *extack)
6014+{
6015+ memset(bo, 0, sizeof(*bo));
6016+ bo->net = net;
6017+ bo->block = &flowtable->flow_block;
6018+ bo->command = cmd;
6019+ bo->binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
6020+ bo->extack = extack;
6021+ INIT_LIST_HEAD(&bo->cb_list);
6022+}
6023+
6024+static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
6025+ struct nf_flowtable *flowtable,
6026+ struct net_device *dev,
6027+ enum flow_block_command cmd,
6028+ struct netlink_ext_ack *extack)
6029+{
6030+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6031+ extack);
6032+ flow_indr_block_call(dev, bo, cmd);
6033+
6034+ if (list_empty(&bo->cb_list))
6035+ return -EOPNOTSUPP;
6036+
6037+ return 0;
6038+}
6039+
6040+static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
6041+ struct nf_flowtable *flowtable,
6042+ struct net_device *dev,
6043+ enum flow_block_command cmd,
6044+ struct netlink_ext_ack *extack)
6045+{
6046+ int err;
6047+
6048+ nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
6049+ extack);
6050+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_FT, bo);
6051+ if (err < 0)
6052+ return err;
6053+
6054+ return 0;
6055+}
6056+
6057+int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
6058+ struct net_device *dev,
6059+ enum flow_block_command cmd)
6060+{
6061+ struct netlink_ext_ack extack = {};
6062+ struct flow_block_offload bo;
6063+ int err;
6064+
6065+ if (!nf_flowtable_hw_offload(flowtable))
6066+ return 0;
6067+
6068+ if (dev->netdev_ops->ndo_setup_tc)
6069+ err = nf_flow_table_offload_cmd(&bo, flowtable, dev, cmd,
6070+ &extack);
6071+ else
6072+ err = nf_flow_table_indr_offload_cmd(&bo, flowtable, dev, cmd,
6073+ &extack);
6074+ if (err < 0)
6075+ return err;
6076+
6077+ return nf_flow_table_block_setup(flowtable, &bo, cmd);
6078+}
6079+EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
6080+
6081+int nf_flow_table_offload_init(void)
6082+{
6083+ nf_flow_offload_add_wq = alloc_workqueue("nf_ft_offload_add",
6084+ WQ_UNBOUND | WQ_SYSFS, 0);
6085+ if (!nf_flow_offload_add_wq)
6086+ return -ENOMEM;
6087+
6088+ nf_flow_offload_del_wq = alloc_workqueue("nf_ft_offload_del",
6089+ WQ_UNBOUND | WQ_SYSFS, 0);
6090+ if (!nf_flow_offload_del_wq)
6091+ goto err_del_wq;
6092+
6093+ nf_flow_offload_stats_wq = alloc_workqueue("nf_ft_offload_stats",
6094+ WQ_UNBOUND | WQ_SYSFS, 0);
6095+ if (!nf_flow_offload_stats_wq)
6096+ goto err_stats_wq;
6097+
6098+ return 0;
6099+
6100+err_stats_wq:
6101+ destroy_workqueue(nf_flow_offload_del_wq);
6102+err_del_wq:
6103+ destroy_workqueue(nf_flow_offload_add_wq);
6104+ return -ENOMEM;
6105+}
6106+
6107+void nf_flow_table_offload_exit(void)
6108+{
6109+ destroy_workqueue(nf_flow_offload_add_wq);
6110+ destroy_workqueue(nf_flow_offload_del_wq);
6111+ destroy_workqueue(nf_flow_offload_stats_wq);
6112+}
6113diff --git a/net/netfilter/xt_FLOWOFFLOAD.c b/net/netfilter/xt_FLOWOFFLOAD.c
6114new file mode 100644
6115index 000000000..ae1eb2656
6116--- /dev/null
6117+++ b/net/netfilter/xt_FLOWOFFLOAD.c
6118@@ -0,0 +1,719 @@
6119+/*
6120+ * Copyright (C) 2018-2021 Felix Fietkau <nbd@nbd.name>
6121+ *
6122+ * This program is free software; you can redistribute it and/or modify
6123+ * it under the terms of the GNU General Public License version 2 as
6124+ * published by the Free Software Foundation.
6125+ */
6126+#include <linux/module.h>
6127+#include <linux/init.h>
6128+#include <linux/netfilter.h>
6129+#include <linux/netfilter/xt_FLOWOFFLOAD.h>
6130+#include <linux/if_vlan.h>
6131+#include <net/ip.h>
6132+#include <net/netfilter/nf_conntrack.h>
6133+#include <net/netfilter/nf_conntrack_extend.h>
6134+#include <net/netfilter/nf_conntrack_helper.h>
6135+#include <net/netfilter/nf_flow_table.h>
6136+
6137+struct xt_flowoffload_hook {
6138+ struct hlist_node list;
6139+ struct nf_hook_ops ops;
6140+ struct net *net;
6141+ bool registered;
6142+ bool used;
6143+};
6144+
6145+struct xt_flowoffload_table {
6146+ struct nf_flowtable ft;
6147+ struct hlist_head hooks;
6148+ struct delayed_work work;
6149+};
6150+
6151+struct nf_forward_info {
6152+ const struct net_device *indev;
6153+ const struct net_device *outdev;
6154+ const struct net_device *hw_outdev;
6155+ struct id {
6156+ __u16 id;
6157+ __be16 proto;
6158+ } encap[NF_FLOW_TABLE_ENCAP_MAX];
6159+ u8 num_encaps;
6160+ u8 ingress_vlans;
6161+ u8 h_source[ETH_ALEN];
6162+ u8 h_dest[ETH_ALEN];
6163+ enum flow_offload_xmit_type xmit_type;
6164+};
6165+
6166+static DEFINE_SPINLOCK(hooks_lock);
6167+
6168+struct xt_flowoffload_table flowtable[2];
6169+
6170+static unsigned int
6171+xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
6172+ const struct nf_hook_state *state)
6173+{
6174+ struct vlan_ethhdr *veth;
6175+ __be16 proto;
6176+
6177+ switch (skb->protocol) {
6178+ case htons(ETH_P_8021Q):
6179+ veth = (struct vlan_ethhdr *)skb_mac_header(skb);
6180+ proto = veth->h_vlan_encapsulated_proto;
6181+ break;
6182+ case htons(ETH_P_PPP_SES):
6183+ proto = nf_flow_pppoe_proto(skb);
6184+ break;
6185+ default:
6186+ proto = skb->protocol;
6187+ break;
6188+ }
6189+
6190+ switch (proto) {
6191+ case htons(ETH_P_IP):
6192+ return nf_flow_offload_ip_hook(priv, skb, state);
6193+ case htons(ETH_P_IPV6):
6194+ return nf_flow_offload_ipv6_hook(priv, skb, state);
6195+ }
6196+
6197+ return NF_ACCEPT;
6198+}
6199+
6200+static int
6201+xt_flowoffload_create_hook(struct xt_flowoffload_table *table,
6202+ struct net_device *dev)
6203+{
6204+ struct xt_flowoffload_hook *hook;
6205+ struct nf_hook_ops *ops;
6206+
6207+ hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
6208+ if (!hook)
6209+ return -ENOMEM;
6210+
6211+ ops = &hook->ops;
6212+ ops->pf = NFPROTO_NETDEV;
6213+ ops->hooknum = NF_NETDEV_INGRESS;
6214+ ops->priority = 10;
6215+ ops->priv = &table->ft;
6216+ ops->hook = xt_flowoffload_net_hook;
6217+ ops->dev = dev;
6218+
6219+ hlist_add_head(&hook->list, &table->hooks);
6220+ mod_delayed_work(system_power_efficient_wq, &table->work, 0);
6221+
6222+ return 0;
6223+}
6224+
6225+static struct xt_flowoffload_hook *
6226+flow_offload_lookup_hook(struct xt_flowoffload_table *table,
6227+ struct net_device *dev)
6228+{
6229+ struct xt_flowoffload_hook *hook;
6230+
6231+ hlist_for_each_entry(hook, &table->hooks, list) {
6232+ if (hook->ops.dev == dev)
6233+ return hook;
6234+ }
6235+
6236+ return NULL;
6237+}
6238+
6239+static void
6240+xt_flowoffload_check_device(struct xt_flowoffload_table *table,
6241+ struct net_device *dev)
6242+{
6243+ struct xt_flowoffload_hook *hook;
6244+
6245+ if (!dev)
6246+ return;
6247+
6248+ spin_lock_bh(&hooks_lock);
6249+ hook = flow_offload_lookup_hook(table, dev);
6250+ if (hook)
6251+ hook->used = true;
6252+ else
6253+ xt_flowoffload_create_hook(table, dev);
6254+ spin_unlock_bh(&hooks_lock);
6255+}
6256+
6257+static void
6258+xt_flowoffload_register_hooks(struct xt_flowoffload_table *table)
6259+{
6260+ struct xt_flowoffload_hook *hook;
6261+
6262+restart:
6263+ hlist_for_each_entry(hook, &table->hooks, list) {
6264+ if (hook->registered)
6265+ continue;
6266+
6267+ hook->registered = true;
6268+ hook->net = dev_net(hook->ops.dev);
6269+ spin_unlock_bh(&hooks_lock);
6270+ nf_register_net_hook(hook->net, &hook->ops);
6271+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6272+ table->ft.type->setup(&table->ft, hook->ops.dev,
6273+ FLOW_BLOCK_BIND);
6274+ spin_lock_bh(&hooks_lock);
6275+ goto restart;
6276+ }
6277+
6278+}
6279+
6280+static bool
6281+xt_flowoffload_cleanup_hooks(struct xt_flowoffload_table *table)
6282+{
6283+ struct xt_flowoffload_hook *hook;
6284+ bool active = false;
6285+
6286+restart:
6287+ spin_lock_bh(&hooks_lock);
6288+ hlist_for_each_entry(hook, &table->hooks, list) {
6289+ if (hook->used || !hook->registered) {
6290+ active = true;
6291+ continue;
6292+ }
6293+
6294+ hlist_del(&hook->list);
6295+ spin_unlock_bh(&hooks_lock);
6296+ if (table->ft.flags & NF_FLOWTABLE_HW_OFFLOAD)
6297+ table->ft.type->setup(&table->ft, hook->ops.dev,
6298+ FLOW_BLOCK_UNBIND);
6299+ nf_unregister_net_hook(hook->net, &hook->ops);
6300+ kfree(hook);
6301+ goto restart;
6302+ }
6303+ spin_unlock_bh(&hooks_lock);
6304+
6305+ return active;
6306+}
6307+
6308+static void
6309+xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
6310+{
6311+ struct xt_flowoffload_table *table = data;
6312+ struct flow_offload_tuple *tuple0 = &flow->tuplehash[0].tuple;
6313+ struct flow_offload_tuple *tuple1 = &flow->tuplehash[1].tuple;
6314+ struct xt_flowoffload_hook *hook;
6315+
6316+ spin_lock_bh(&hooks_lock);
6317+ hlist_for_each_entry(hook, &table->hooks, list) {
6318+ if (hook->ops.dev->ifindex != tuple0->iifidx &&
6319+ hook->ops.dev->ifindex != tuple1->iifidx)
6320+ continue;
6321+
6322+ hook->used = true;
6323+ }
6324+ spin_unlock_bh(&hooks_lock);
6325+}
6326+
6327+static void
6328+xt_flowoffload_hook_work(struct work_struct *work)
6329+{
6330+ struct xt_flowoffload_table *table;
6331+ struct xt_flowoffload_hook *hook;
6332+ int err;
6333+
6334+ table = container_of(work, struct xt_flowoffload_table, work.work);
6335+
6336+ spin_lock_bh(&hooks_lock);
6337+ xt_flowoffload_register_hooks(table);
6338+ hlist_for_each_entry(hook, &table->hooks, list)
6339+ hook->used = false;
6340+ spin_unlock_bh(&hooks_lock);
6341+
6342+ err = nf_flow_table_iterate(&table->ft, xt_flowoffload_check_hook,
6343+ table);
6344+ if (err && err != -EAGAIN)
6345+ goto out;
6346+
6347+ if (!xt_flowoffload_cleanup_hooks(table))
6348+ return;
6349+
6350+out:
6351+ queue_delayed_work(system_power_efficient_wq, &table->work, HZ);
6352+}
6353+
6354+static bool
6355+xt_flowoffload_skip(struct sk_buff *skb, int family)
6356+{
6357+ if (skb_sec_path(skb))
6358+ return true;
6359+
6360+ if (family == NFPROTO_IPV4) {
6361+ const struct ip_options *opt = &(IPCB(skb)->opt);
6362+
6363+ if (unlikely(opt->optlen))
6364+ return true;
6365+ }
6366+
6367+ return false;
6368+}
6369+
6370+static enum flow_offload_xmit_type nf_xmit_type(struct dst_entry *dst)
6371+{
6372+ if (dst_xfrm(dst))
6373+ return FLOW_OFFLOAD_XMIT_XFRM;
6374+
6375+ return FLOW_OFFLOAD_XMIT_NEIGH;
6376+}
6377+
6378+static void nf_default_forward_path(struct nf_flow_route *route,
6379+ struct dst_entry *dst_cache,
6380+ enum ip_conntrack_dir dir,
6381+ struct net_device **dev)
6382+{
6383+ route->tuple[!dir].in.ifindex = dst_cache->dev->ifindex;
6384+ route->tuple[dir].dst = dst_cache;
6385+ route->tuple[dir].xmit_type = nf_xmit_type(dst_cache);
6386+}
6387+
6388+static bool nf_is_valid_ether_device(const struct net_device *dev)
6389+{
6390+ if (!dev || (dev->flags & IFF_LOOPBACK) || dev->type != ARPHRD_ETHER ||
6391+ dev->addr_len != ETH_ALEN || !is_valid_ether_addr(dev->dev_addr))
6392+ return false;
6393+
6394+ return true;
6395+}
6396+
6397+static void nf_dev_path_info(const struct net_device_path_stack *stack,
6398+ struct nf_forward_info *info,
6399+ unsigned char *ha)
6400+{
6401+ const struct net_device_path *path;
6402+ int i;
6403+
6404+ memcpy(info->h_dest, ha, ETH_ALEN);
6405+
6406+ for (i = 0; i < stack->num_paths; i++) {
6407+ path = &stack->path[i];
6408+
6409+ info->indev = path->dev;
6410+
6411+ switch (path->type) {
6412+ case DEV_PATH_ETHERNET:
6413+ case DEV_PATH_DSA:
6414+ case DEV_PATH_VLAN:
6415+ case DEV_PATH_PPPOE:
6416+ if (is_zero_ether_addr(info->h_source))
6417+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6418+
6419+ if (path->type == DEV_PATH_ETHERNET)
6420+ break;
6421+ if (path->type == DEV_PATH_DSA) {
6422+ i = stack->num_paths;
6423+ break;
6424+ }
6425+
6426+ /* DEV_PATH_VLAN and DEV_PATH_PPPOE */
6427+ if (info->num_encaps >= NF_FLOW_TABLE_ENCAP_MAX) {
6428+ info->indev = NULL;
6429+ break;
6430+ }
6431+ if (!info->outdev)
6432+ info->outdev = path->dev;
6433+ info->encap[info->num_encaps].id = path->encap.id;
6434+ info->encap[info->num_encaps].proto = path->encap.proto;
6435+ info->num_encaps++;
6436+ if (path->type == DEV_PATH_PPPOE)
6437+ memcpy(info->h_dest, path->encap.h_dest, ETH_ALEN);
6438+ break;
6439+ case DEV_PATH_BRIDGE:
6440+ if (is_zero_ether_addr(info->h_source))
6441+ memcpy(info->h_source, path->dev->dev_addr, ETH_ALEN);
6442+
6443+ switch (path->bridge.vlan_mode) {
6444+ case DEV_PATH_BR_VLAN_UNTAG_HW:
6445+ info->ingress_vlans |= BIT(info->num_encaps - 1);
6446+ break;
6447+ case DEV_PATH_BR_VLAN_TAG:
6448+ info->encap[info->num_encaps].id = path->bridge.vlan_id;
6449+ info->encap[info->num_encaps].proto = path->bridge.vlan_proto;
6450+ info->num_encaps++;
6451+ break;
6452+ case DEV_PATH_BR_VLAN_UNTAG:
6453+ info->num_encaps--;
6454+ break;
6455+ case DEV_PATH_BR_VLAN_KEEP:
6456+ break;
6457+ }
6458+ break;
6459+ default:
6460+ break;
6461+ }
6462+ }
6463+ if (!info->outdev)
6464+ info->outdev = info->indev;
6465+
6466+ info->hw_outdev = info->indev;
6467+
6468+ if (nf_is_valid_ether_device(info->indev))
6469+ info->xmit_type = FLOW_OFFLOAD_XMIT_DIRECT;
6470+}
6471+
6472+static int nf_dev_fill_forward_path(const struct nf_flow_route *route,
6473+ const struct dst_entry *dst_cache,
6474+ const struct nf_conn *ct,
6475+ enum ip_conntrack_dir dir, u8 *ha,
6476+ struct net_device_path_stack *stack)
6477+{
6478+ const void *daddr = &ct->tuplehash[!dir].tuple.src.u3;
6479+ struct net_device *dev = dst_cache->dev;
6480+ struct neighbour *n;
6481+ u8 nud_state;
6482+
6483+ if (!nf_is_valid_ether_device(dev))
6484+ goto out;
6485+
6486+ n = dst_neigh_lookup(dst_cache, daddr);
6487+ if (!n)
6488+ return -1;
6489+
6490+ read_lock_bh(&n->lock);
6491+ nud_state = n->nud_state;
6492+ ether_addr_copy(ha, n->ha);
6493+ read_unlock_bh(&n->lock);
6494+ neigh_release(n);
6495+
6496+ if (!(nud_state & NUD_VALID))
6497+ return -1;
6498+
6499+out:
6500+ return dev_fill_forward_path(dev, ha, stack);
6501+}
6502+
6503+static int nf_dev_forward_path(struct nf_flow_route *route,
6504+ const struct nf_conn *ct,
6505+ enum ip_conntrack_dir dir,
6506+ struct net_device **devs)
6507+{
6508+ const struct dst_entry *dst = route->tuple[dir].dst;
6509+ struct net_device_path_stack stack;
6510+ struct nf_forward_info info = {};
6511+ unsigned char ha[ETH_ALEN];
6512+ int i;
6513+
6514+ if (nf_dev_fill_forward_path(route, dst, ct, dir, ha, &stack) >= 0)
6515+ nf_dev_path_info(&stack, &info, ha);
6516+
6517+ devs[!dir] = (struct net_device *)info.indev;
6518+ if (!info.indev)
6519+ return -1;
6520+
6521+ route->tuple[!dir].in.ifindex = info.indev->ifindex;
6522+ for (i = 0; i < info.num_encaps; i++) {
6523+ route->tuple[!dir].in.encap[i].id = info.encap[i].id;
6524+ route->tuple[!dir].in.encap[i].proto = info.encap[i].proto;
6525+ }
6526+ route->tuple[!dir].in.num_encaps = info.num_encaps;
6527+ route->tuple[!dir].in.ingress_vlans = info.ingress_vlans;
6528+
6529+ if (info.xmit_type == FLOW_OFFLOAD_XMIT_DIRECT) {
6530+ memcpy(route->tuple[dir].out.h_source, info.h_source, ETH_ALEN);
6531+ memcpy(route->tuple[dir].out.h_dest, info.h_dest, ETH_ALEN);
6532+ route->tuple[dir].out.ifindex = info.outdev->ifindex;
6533+ route->tuple[dir].out.hw_ifindex = info.hw_outdev->ifindex;
6534+ route->tuple[dir].xmit_type = info.xmit_type;
6535+ }
6536+
6537+ return 0;
6538+}
6539+
6540+static int
6541+xt_flowoffload_route_dir(struct nf_flow_route *route, const struct nf_conn *ct,
6542+ enum ip_conntrack_dir dir,
6543+ const struct xt_action_param *par, int ifindex,
6544+ struct net_device **devs)
6545+{
6546+ struct dst_entry *dst = NULL;
6547+ struct flowi fl;
6548+
6549+ memset(&fl, 0, sizeof(fl));
6550+ switch (xt_family(par)) {
6551+ case NFPROTO_IPV4:
6552+ fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.src.u3.ip;
6553+ fl.u.ip4.flowi4_oif = ifindex;
6554+ break;
6555+ case NFPROTO_IPV6:
6556+ fl.u.ip6.saddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
6557+ fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.src.u3.in6;
6558+ fl.u.ip6.flowi6_oif = ifindex;
6559+ break;
6560+ }
6561+
6562+ nf_route(xt_net(par), &dst, &fl, false, xt_family(par));
6563+ if (!dst)
6564+ return -ENOENT;
6565+
6566+ nf_default_forward_path(route, dst, dir, devs);
6567+
6568+ return 0;
6569+}
6570+
6571+static int
6572+xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
6573+ const struct xt_action_param *par,
6574+ struct nf_flow_route *route, enum ip_conntrack_dir dir,
6575+ struct net_device **devs)
6576+{
6577+ int ret;
6578+
6579+ ret = xt_flowoffload_route_dir(route, ct, dir, par,
6580+ devs[dir]->ifindex,
6581+ devs);
6582+ if (ret)
6583+ return ret;
6584+
6585+ ret = xt_flowoffload_route_dir(route, ct, !dir, par,
6586+ devs[!dir]->ifindex,
6587+ devs);
6588+ if (ret)
6589+ return ret;
6590+
6591+ if (route->tuple[dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH &&
6592+ route->tuple[!dir].xmit_type == FLOW_OFFLOAD_XMIT_NEIGH) {
6593+ if (nf_dev_forward_path(route, ct, dir, devs))
6594+ return -1;
6595+ if (nf_dev_forward_path(route, ct, !dir, devs))
6596+ return -1;
6597+ }
6598+
6599+ return 0;
6600+}
6601+
6602+static unsigned int
6603+flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
6604+{
6605+ struct xt_flowoffload_table *table;
6606+ const struct xt_flowoffload_target_info *info = par->targinfo;
6607+ struct tcphdr _tcph, *tcph = NULL;
6608+ enum ip_conntrack_info ctinfo;
6609+ enum ip_conntrack_dir dir;
6610+ struct nf_flow_route route = {};
6611+ struct flow_offload *flow = NULL;
6612+ struct net_device *devs[2] = {};
6613+ struct nf_conn *ct;
6614+ struct net *net;
6615+
6616+ if (xt_flowoffload_skip(skb, xt_family(par)))
6617+ return XT_CONTINUE;
6618+
6619+ ct = nf_ct_get(skb, &ctinfo);
6620+ if (ct == NULL)
6621+ return XT_CONTINUE;
6622+
6623+ switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
6624+ case IPPROTO_TCP:
6625+ if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
6626+ return XT_CONTINUE;
6627+
6628+ tcph = skb_header_pointer(skb, par->thoff,
6629+ sizeof(_tcph), &_tcph);
6630+ if (unlikely(!tcph || tcph->fin || tcph->rst))
6631+ return XT_CONTINUE;
6632+ break;
6633+ case IPPROTO_UDP:
6634+ break;
6635+ default:
6636+ return XT_CONTINUE;
6637+ }
6638+
6639+ if (nf_ct_ext_exist(ct, NF_CT_EXT_HELPER) ||
6640+ ct->status & IPS_SEQ_ADJUST)
6641+ return XT_CONTINUE;
6642+
6643+ if (!nf_ct_is_confirmed(ct))
6644+ return XT_CONTINUE;
6645+
6646+ devs[dir] = xt_out(par);
6647+ devs[!dir] = xt_in(par);
6648+
6649+ if (!devs[dir] || !devs[!dir])
6650+ return XT_CONTINUE;
6651+
6652+ if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
6653+ return XT_CONTINUE;
6654+
6655+ dir = CTINFO2DIR(ctinfo);
6656+
6657+ if (xt_flowoffload_route(skb, ct, par, &route, dir, devs) < 0)
6658+ goto err_flow_route;
6659+
6660+ flow = flow_offload_alloc(ct);
6661+ if (!flow)
6662+ goto err_flow_alloc;
6663+
6664+ if (flow_offload_route_init(flow, &route) < 0)
6665+ goto err_flow_add;
6666+
6667+ if (tcph) {
6668+ ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6669+ ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
6670+ }
6671+
6672+ table = &flowtable[!!(info->flags & XT_FLOWOFFLOAD_HW)];
6673+
6674+ net = read_pnet(&table->ft.net);
6675+ if (!net)
6676+ write_pnet(&table->ft.net, xt_net(par));
6677+
6678+ if (flow_offload_add(&table->ft, flow) < 0)
6679+ goto err_flow_add;
6680+
6681+ xt_flowoffload_check_device(table, devs[0]);
6682+ xt_flowoffload_check_device(table, devs[1]);
6683+
6684+ dst_release(route.tuple[!dir].dst);
6685+
6686+ return XT_CONTINUE;
6687+
6688+err_flow_add:
6689+ flow_offload_free(flow);
6690+err_flow_alloc:
6691+ dst_release(route.tuple[!dir].dst);
6692+err_flow_route:
6693+ clear_bit(IPS_OFFLOAD_BIT, &ct->status);
6694+
6695+ return XT_CONTINUE;
6696+}
6697+
6698+static int flowoffload_chk(const struct xt_tgchk_param *par)
6699+{
6700+ struct xt_flowoffload_target_info *info = par->targinfo;
6701+
6702+ if (info->flags & ~XT_FLOWOFFLOAD_MASK)
6703+ return -EINVAL;
6704+
6705+ return 0;
6706+}
6707+
6708+static struct xt_target offload_tg_reg __read_mostly = {
6709+ .family = NFPROTO_UNSPEC,
6710+ .name = "FLOWOFFLOAD",
6711+ .revision = 0,
6712+ .targetsize = sizeof(struct xt_flowoffload_target_info),
6713+ .usersize = sizeof(struct xt_flowoffload_target_info),
6714+ .checkentry = flowoffload_chk,
6715+ .target = flowoffload_tg,
6716+ .me = THIS_MODULE,
6717+};
6718+
6719+static int flow_offload_netdev_event(struct notifier_block *this,
6720+ unsigned long event, void *ptr)
6721+{
6722+ struct xt_flowoffload_hook *hook0, *hook1;
6723+ struct net_device *dev = netdev_notifier_info_to_dev(ptr);
6724+
6725+ if (event != NETDEV_UNREGISTER)
6726+ return NOTIFY_DONE;
6727+
6728+ spin_lock_bh(&hooks_lock);
6729+ hook0 = flow_offload_lookup_hook(&flowtable[0], dev);
6730+ if (hook0)
6731+ hlist_del(&hook0->list);
6732+
6733+ hook1 = flow_offload_lookup_hook(&flowtable[1], dev);
6734+ if (hook1)
6735+ hlist_del(&hook1->list);
6736+ spin_unlock_bh(&hooks_lock);
6737+
6738+ if (hook0) {
6739+ nf_unregister_net_hook(hook0->net, &hook0->ops);
6740+ kfree(hook0);
6741+ }
6742+
6743+ if (hook1) {
6744+ nf_unregister_net_hook(hook1->net, &hook1->ops);
6745+ kfree(hook1);
6746+ }
6747+
6748+ nf_flow_table_cleanup(dev);
6749+
6750+ return NOTIFY_DONE;
6751+}
6752+
6753+static struct notifier_block flow_offload_netdev_notifier = {
6754+ .notifier_call = flow_offload_netdev_event,
6755+};
6756+
6757+static int nf_flow_rule_route_inet(struct net *net,
6758+ const struct flow_offload *flow,
6759+ enum flow_offload_tuple_dir dir,
6760+ struct nf_flow_rule *flow_rule)
6761+{
6762+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
6763+ int err;
6764+
6765+ switch (flow_tuple->l3proto) {
6766+ case NFPROTO_IPV4:
6767+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
6768+ break;
6769+ case NFPROTO_IPV6:
6770+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
6771+ break;
6772+ default:
6773+ err = -1;
6774+ break;
6775+ }
6776+
6777+ return err;
6778+}
6779+
6780+static struct nf_flowtable_type flowtable_inet = {
6781+ .family = NFPROTO_INET,
6782+ .init = nf_flow_table_init,
6783+ .setup = nf_flow_table_offload_setup,
6784+ .action = nf_flow_rule_route_inet,
6785+ .free = nf_flow_table_free,
6786+ .hook = xt_flowoffload_net_hook,
6787+ .owner = THIS_MODULE,
6788+};
6789+
6790+static int init_flowtable(struct xt_flowoffload_table *tbl)
6791+{
6792+ INIT_DELAYED_WORK(&tbl->work, xt_flowoffload_hook_work);
6793+ tbl->ft.type = &flowtable_inet;
6794+
6795+ return nf_flow_table_init(&tbl->ft);
6796+}
6797+
6798+static int __init xt_flowoffload_tg_init(void)
6799+{
6800+ int ret;
6801+
6802+ register_netdevice_notifier(&flow_offload_netdev_notifier);
6803+
6804+ ret = init_flowtable(&flowtable[0]);
6805+ if (ret)
6806+ return ret;
6807+
6808+ ret = init_flowtable(&flowtable[1]);
6809+ if (ret)
6810+ goto cleanup;
6811+
6812+ flowtable[1].ft.flags = NF_FLOWTABLE_HW_OFFLOAD;
6813+
6814+ ret = xt_register_target(&offload_tg_reg);
6815+ if (ret)
6816+ goto cleanup2;
6817+
6818+ return 0;
6819+
6820+cleanup2:
6821+ nf_flow_table_free(&flowtable[1].ft);
6822+cleanup:
6823+ nf_flow_table_free(&flowtable[0].ft);
6824+ return ret;
6825+}
6826+
6827+static void __exit xt_flowoffload_tg_exit(void)
6828+{
6829+ xt_unregister_target(&offload_tg_reg);
6830+ unregister_netdevice_notifier(&flow_offload_netdev_notifier);
6831+ nf_flow_table_free(&flowtable[0].ft);
6832+ nf_flow_table_free(&flowtable[1].ft);
6833+}
6834+
6835+MODULE_LICENSE("GPL");
6836+module_init(xt_flowoffload_tg_init);
6837+module_exit(xt_flowoffload_tg_exit);
6838--
68392.18.0
6840